robotframework.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.robotframework
  4. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  5. Lexer for Robot Framework.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. # Copyright 2012 Nokia Siemens Networks Oyj
  10. #
  11. # Licensed under the Apache License, Version 2.0 (the "License");
  12. # you may not use this file except in compliance with the License.
  13. # You may obtain a copy of the License at
  14. #
  15. # http://www.apache.org/licenses/LICENSE-2.0
  16. #
  17. # Unless required by applicable law or agreed to in writing, software
  18. # distributed under the License is distributed on an "AS IS" BASIS,
  19. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  20. # See the License for the specific language governing permissions and
  21. # limitations under the License.
  22. import re
  23. from pygments.lexer import Lexer
  24. from pygments.token import Token
  25. from pygments.util import text_type
  26. __all__ = ['RobotFrameworkLexer']
  27. HEADING = Token.Generic.Heading
  28. SETTING = Token.Keyword.Namespace
  29. IMPORT = Token.Name.Namespace
  30. TC_KW_NAME = Token.Generic.Subheading
  31. KEYWORD = Token.Name.Function
  32. ARGUMENT = Token.String
  33. VARIABLE = Token.Name.Variable
  34. COMMENT = Token.Comment
  35. SEPARATOR = Token.Punctuation
  36. SYNTAX = Token.Punctuation
  37. GHERKIN = Token.Generic.Emph
  38. ERROR = Token.Error
  39. def normalize(string, remove=''):
  40. string = string.lower()
  41. for char in remove + ' ':
  42. if char in string:
  43. string = string.replace(char, '')
  44. return string
  45. class RobotFrameworkLexer(Lexer):
  46. """
  47. For `Robot Framework <http://robotframework.org>`_ test data.
  48. Supports both space and pipe separated plain text formats.
  49. .. versionadded:: 1.6
  50. """
  51. name = 'RobotFramework'
  52. aliases = ['robotframework']
  53. filenames = ['*.robot']
  54. mimetypes = ['text/x-robotframework']
  55. def __init__(self, **options):
  56. options['tabsize'] = 2
  57. options['encoding'] = 'UTF-8'
  58. Lexer.__init__(self, **options)
  59. def get_tokens_unprocessed(self, text):
  60. row_tokenizer = RowTokenizer()
  61. var_tokenizer = VariableTokenizer()
  62. index = 0
  63. for row in text.splitlines():
  64. for value, token in row_tokenizer.tokenize(row):
  65. for value, token in var_tokenizer.tokenize(value, token):
  66. if value:
  67. yield index, token, text_type(value)
  68. index += len(value)
  69. class VariableTokenizer(object):
  70. def tokenize(self, string, token):
  71. var = VariableSplitter(string, identifiers='$@%&')
  72. if var.start < 0 or token in (COMMENT, ERROR):
  73. yield string, token
  74. return
  75. for value, token in self._tokenize(var, string, token):
  76. if value:
  77. yield value, token
  78. def _tokenize(self, var, string, orig_token):
  79. before = string[:var.start]
  80. yield before, orig_token
  81. yield var.identifier + '{', SYNTAX
  82. for value, token in self.tokenize(var.base, VARIABLE):
  83. yield value, token
  84. yield '}', SYNTAX
  85. if var.index:
  86. yield '[', SYNTAX
  87. for value, token in self.tokenize(var.index, VARIABLE):
  88. yield value, token
  89. yield ']', SYNTAX
  90. for value, token in self.tokenize(string[var.end:], orig_token):
  91. yield value, token
  92. class RowTokenizer(object):
  93. def __init__(self):
  94. self._table = UnknownTable()
  95. self._splitter = RowSplitter()
  96. testcases = TestCaseTable()
  97. settings = SettingTable(testcases.set_default_template)
  98. variables = VariableTable()
  99. keywords = KeywordTable()
  100. self._tables = {'settings': settings, 'setting': settings,
  101. 'metadata': settings,
  102. 'variables': variables, 'variable': variables,
  103. 'testcases': testcases, 'testcase': testcases,
  104. 'keywords': keywords, 'keyword': keywords,
  105. 'userkeywords': keywords, 'userkeyword': keywords}
  106. def tokenize(self, row):
  107. commented = False
  108. heading = False
  109. for index, value in enumerate(self._splitter.split(row)):
  110. # First value, and every second after that, is a separator.
  111. index, separator = divmod(index-1, 2)
  112. if value.startswith('#'):
  113. commented = True
  114. elif index == 0 and value.startswith('*'):
  115. self._table = self._start_table(value)
  116. heading = True
  117. for value, token in self._tokenize(value, index, commented,
  118. separator, heading):
  119. yield value, token
  120. self._table.end_row()
  121. def _start_table(self, header):
  122. name = normalize(header, remove='*')
  123. return self._tables.get(name, UnknownTable())
  124. def _tokenize(self, value, index, commented, separator, heading):
  125. if commented:
  126. yield value, COMMENT
  127. elif separator:
  128. yield value, SEPARATOR
  129. elif heading:
  130. yield value, HEADING
  131. else:
  132. for value, token in self._table.tokenize(value, index):
  133. yield value, token
  134. class RowSplitter(object):
  135. _space_splitter = re.compile('( {2,})')
  136. _pipe_splitter = re.compile(r'((?:^| +)\|(?: +|$))')
  137. def split(self, row):
  138. splitter = (row.startswith('| ') and self._split_from_pipes
  139. or self._split_from_spaces)
  140. for value in splitter(row):
  141. yield value
  142. yield '\n'
  143. def _split_from_spaces(self, row):
  144. yield '' # Start with (pseudo)separator similarly as with pipes
  145. for value in self._space_splitter.split(row):
  146. yield value
  147. def _split_from_pipes(self, row):
  148. _, separator, rest = self._pipe_splitter.split(row, 1)
  149. yield separator
  150. while self._pipe_splitter.search(rest):
  151. cell, separator, rest = self._pipe_splitter.split(rest, 1)
  152. yield cell
  153. yield separator
  154. yield rest
  155. class Tokenizer(object):
  156. _tokens = None
  157. def __init__(self):
  158. self._index = 0
  159. def tokenize(self, value):
  160. values_and_tokens = self._tokenize(value, self._index)
  161. self._index += 1
  162. if isinstance(values_and_tokens, type(Token)):
  163. values_and_tokens = [(value, values_and_tokens)]
  164. return values_and_tokens
  165. def _tokenize(self, value, index):
  166. index = min(index, len(self._tokens) - 1)
  167. return self._tokens[index]
  168. def _is_assign(self, value):
  169. if value.endswith('='):
  170. value = value[:-1].strip()
  171. var = VariableSplitter(value, identifiers='$@&')
  172. return var.start == 0 and var.end == len(value)
  173. class Comment(Tokenizer):
  174. _tokens = (COMMENT,)
  175. class Setting(Tokenizer):
  176. _tokens = (SETTING, ARGUMENT)
  177. _keyword_settings = ('suitesetup', 'suiteprecondition', 'suiteteardown',
  178. 'suitepostcondition', 'testsetup', 'testprecondition',
  179. 'testteardown', 'testpostcondition', 'testtemplate')
  180. _import_settings = ('library', 'resource', 'variables')
  181. _other_settings = ('documentation', 'metadata', 'forcetags', 'defaulttags',
  182. 'testtimeout')
  183. _custom_tokenizer = None
  184. def __init__(self, template_setter=None):
  185. Tokenizer.__init__(self)
  186. self._template_setter = template_setter
  187. def _tokenize(self, value, index):
  188. if index == 1 and self._template_setter:
  189. self._template_setter(value)
  190. if index == 0:
  191. normalized = normalize(value)
  192. if normalized in self._keyword_settings:
  193. self._custom_tokenizer = KeywordCall(support_assign=False)
  194. elif normalized in self._import_settings:
  195. self._custom_tokenizer = ImportSetting()
  196. elif normalized not in self._other_settings:
  197. return ERROR
  198. elif self._custom_tokenizer:
  199. return self._custom_tokenizer.tokenize(value)
  200. return Tokenizer._tokenize(self, value, index)
  201. class ImportSetting(Tokenizer):
  202. _tokens = (IMPORT, ARGUMENT)
  203. class TestCaseSetting(Setting):
  204. _keyword_settings = ('setup', 'precondition', 'teardown', 'postcondition',
  205. 'template')
  206. _import_settings = ()
  207. _other_settings = ('documentation', 'tags', 'timeout')
  208. def _tokenize(self, value, index):
  209. if index == 0:
  210. type = Setting._tokenize(self, value[1:-1], index)
  211. return [('[', SYNTAX), (value[1:-1], type), (']', SYNTAX)]
  212. return Setting._tokenize(self, value, index)
  213. class KeywordSetting(TestCaseSetting):
  214. _keyword_settings = ('teardown',)
  215. _other_settings = ('documentation', 'arguments', 'return', 'timeout', 'tags')
  216. class Variable(Tokenizer):
  217. _tokens = (SYNTAX, ARGUMENT)
  218. def _tokenize(self, value, index):
  219. if index == 0 and not self._is_assign(value):
  220. return ERROR
  221. return Tokenizer._tokenize(self, value, index)
  222. class KeywordCall(Tokenizer):
  223. _tokens = (KEYWORD, ARGUMENT)
  224. def __init__(self, support_assign=True):
  225. Tokenizer.__init__(self)
  226. self._keyword_found = not support_assign
  227. self._assigns = 0
  228. def _tokenize(self, value, index):
  229. if not self._keyword_found and self._is_assign(value):
  230. self._assigns += 1
  231. return SYNTAX # VariableTokenizer tokenizes this later.
  232. if self._keyword_found:
  233. return Tokenizer._tokenize(self, value, index - self._assigns)
  234. self._keyword_found = True
  235. return GherkinTokenizer().tokenize(value, KEYWORD)
  236. class GherkinTokenizer(object):
  237. _gherkin_prefix = re.compile('^(Given|When|Then|And) ', re.IGNORECASE)
  238. def tokenize(self, value, token):
  239. match = self._gherkin_prefix.match(value)
  240. if not match:
  241. return [(value, token)]
  242. end = match.end()
  243. return [(value[:end], GHERKIN), (value[end:], token)]
  244. class TemplatedKeywordCall(Tokenizer):
  245. _tokens = (ARGUMENT,)
  246. class ForLoop(Tokenizer):
  247. def __init__(self):
  248. Tokenizer.__init__(self)
  249. self._in_arguments = False
  250. def _tokenize(self, value, index):
  251. token = self._in_arguments and ARGUMENT or SYNTAX
  252. if value.upper() in ('IN', 'IN RANGE'):
  253. self._in_arguments = True
  254. return token
  255. class _Table(object):
  256. _tokenizer_class = None
  257. def __init__(self, prev_tokenizer=None):
  258. self._tokenizer = self._tokenizer_class()
  259. self._prev_tokenizer = prev_tokenizer
  260. self._prev_values_on_row = []
  261. def tokenize(self, value, index):
  262. if self._continues(value, index):
  263. self._tokenizer = self._prev_tokenizer
  264. yield value, SYNTAX
  265. else:
  266. for value_and_token in self._tokenize(value, index):
  267. yield value_and_token
  268. self._prev_values_on_row.append(value)
  269. def _continues(self, value, index):
  270. return value == '...' and all(self._is_empty(t)
  271. for t in self._prev_values_on_row)
  272. def _is_empty(self, value):
  273. return value in ('', '\\')
  274. def _tokenize(self, value, index):
  275. return self._tokenizer.tokenize(value)
  276. def end_row(self):
  277. self.__init__(prev_tokenizer=self._tokenizer)
  278. class UnknownTable(_Table):
  279. _tokenizer_class = Comment
  280. def _continues(self, value, index):
  281. return False
  282. class VariableTable(_Table):
  283. _tokenizer_class = Variable
  284. class SettingTable(_Table):
  285. _tokenizer_class = Setting
  286. def __init__(self, template_setter, prev_tokenizer=None):
  287. _Table.__init__(self, prev_tokenizer)
  288. self._template_setter = template_setter
  289. def _tokenize(self, value, index):
  290. if index == 0 and normalize(value) == 'testtemplate':
  291. self._tokenizer = Setting(self._template_setter)
  292. return _Table._tokenize(self, value, index)
  293. def end_row(self):
  294. self.__init__(self._template_setter, prev_tokenizer=self._tokenizer)
  295. class TestCaseTable(_Table):
  296. _setting_class = TestCaseSetting
  297. _test_template = None
  298. _default_template = None
  299. @property
  300. def _tokenizer_class(self):
  301. if self._test_template or (self._default_template and
  302. self._test_template is not False):
  303. return TemplatedKeywordCall
  304. return KeywordCall
  305. def _continues(self, value, index):
  306. return index > 0 and _Table._continues(self, value, index)
  307. def _tokenize(self, value, index):
  308. if index == 0:
  309. if value:
  310. self._test_template = None
  311. return GherkinTokenizer().tokenize(value, TC_KW_NAME)
  312. if index == 1 and self._is_setting(value):
  313. if self._is_template(value):
  314. self._test_template = False
  315. self._tokenizer = self._setting_class(self.set_test_template)
  316. else:
  317. self._tokenizer = self._setting_class()
  318. if index == 1 and self._is_for_loop(value):
  319. self._tokenizer = ForLoop()
  320. if index == 1 and self._is_empty(value):
  321. return [(value, SYNTAX)]
  322. return _Table._tokenize(self, value, index)
  323. def _is_setting(self, value):
  324. return value.startswith('[') and value.endswith(']')
  325. def _is_template(self, value):
  326. return normalize(value) == '[template]'
  327. def _is_for_loop(self, value):
  328. return value.startswith(':') and normalize(value, remove=':') == 'for'
  329. def set_test_template(self, template):
  330. self._test_template = self._is_template_set(template)
  331. def set_default_template(self, template):
  332. self._default_template = self._is_template_set(template)
  333. def _is_template_set(self, template):
  334. return normalize(template) not in ('', '\\', 'none', '${empty}')
  335. class KeywordTable(TestCaseTable):
  336. _tokenizer_class = KeywordCall
  337. _setting_class = KeywordSetting
  338. def _is_template(self, value):
  339. return False
  340. # Following code copied directly from Robot Framework 2.7.5.
  341. class VariableSplitter:
  342. def __init__(self, string, identifiers):
  343. self.identifier = None
  344. self.base = None
  345. self.index = None
  346. self.start = -1
  347. self.end = -1
  348. self._identifiers = identifiers
  349. self._may_have_internal_variables = False
  350. try:
  351. self._split(string)
  352. except ValueError:
  353. pass
  354. else:
  355. self._finalize()
  356. def get_replaced_base(self, variables):
  357. if self._may_have_internal_variables:
  358. return variables.replace_string(self.base)
  359. return self.base
  360. def _finalize(self):
  361. self.identifier = self._variable_chars[0]
  362. self.base = ''.join(self._variable_chars[2:-1])
  363. self.end = self.start + len(self._variable_chars)
  364. if self._has_list_or_dict_variable_index():
  365. self.index = ''.join(self._list_and_dict_variable_index_chars[1:-1])
  366. self.end += len(self._list_and_dict_variable_index_chars)
  367. def _has_list_or_dict_variable_index(self):
  368. return self._list_and_dict_variable_index_chars\
  369. and self._list_and_dict_variable_index_chars[-1] == ']'
  370. def _split(self, string):
  371. start_index, max_index = self._find_variable(string)
  372. self.start = start_index
  373. self._open_curly = 1
  374. self._state = self._variable_state
  375. self._variable_chars = [string[start_index], '{']
  376. self._list_and_dict_variable_index_chars = []
  377. self._string = string
  378. start_index += 2
  379. for index, char in enumerate(string[start_index:]):
  380. index += start_index # Giving start to enumerate only in Py 2.6+
  381. try:
  382. self._state(char, index)
  383. except StopIteration:
  384. return
  385. if index == max_index and not self._scanning_list_variable_index():
  386. return
  387. def _scanning_list_variable_index(self):
  388. return self._state in [self._waiting_list_variable_index_state,
  389. self._list_variable_index_state]
  390. def _find_variable(self, string):
  391. max_end_index = string.rfind('}')
  392. if max_end_index == -1:
  393. raise ValueError('No variable end found')
  394. if self._is_escaped(string, max_end_index):
  395. return self._find_variable(string[:max_end_index])
  396. start_index = self._find_start_index(string, 1, max_end_index)
  397. if start_index == -1:
  398. raise ValueError('No variable start found')
  399. return start_index, max_end_index
  400. def _find_start_index(self, string, start, end):
  401. index = string.find('{', start, end) - 1
  402. if index < 0:
  403. return -1
  404. if self._start_index_is_ok(string, index):
  405. return index
  406. return self._find_start_index(string, index+2, end)
  407. def _start_index_is_ok(self, string, index):
  408. return string[index] in self._identifiers\
  409. and not self._is_escaped(string, index)
  410. def _is_escaped(self, string, index):
  411. escaped = False
  412. while index > 0 and string[index-1] == '\\':
  413. index -= 1
  414. escaped = not escaped
  415. return escaped
  416. def _variable_state(self, char, index):
  417. self._variable_chars.append(char)
  418. if char == '}' and not self._is_escaped(self._string, index):
  419. self._open_curly -= 1
  420. if self._open_curly == 0:
  421. if not self._is_list_or_dict_variable():
  422. raise StopIteration
  423. self._state = self._waiting_list_variable_index_state
  424. elif char in self._identifiers:
  425. self._state = self._internal_variable_start_state
  426. def _is_list_or_dict_variable(self):
  427. return self._variable_chars[0] in ('@','&')
  428. def _internal_variable_start_state(self, char, index):
  429. self._state = self._variable_state
  430. if char == '{':
  431. self._variable_chars.append(char)
  432. self._open_curly += 1
  433. self._may_have_internal_variables = True
  434. else:
  435. self._variable_state(char, index)
  436. def _waiting_list_variable_index_state(self, char, index):
  437. if char != '[':
  438. raise StopIteration
  439. self._list_and_dict_variable_index_chars.append(char)
  440. self._state = self._list_variable_index_state
  441. def _list_variable_index_state(self, char, index):
  442. self._list_and_dict_variable_index_chars.append(char)
  443. if char == ']':
  444. raise StopIteration