123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560 |
- # -*- coding: utf-8 -*-
- """
- pygments.lexers.robotframework
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Lexer for Robot Framework.
- :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- # Copyright 2012 Nokia Siemens Networks Oyj
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import re
- from pygments.lexer import Lexer
- from pygments.token import Token
- from pygments.util import text_type
- __all__ = ['RobotFrameworkLexer']
- HEADING = Token.Generic.Heading
- SETTING = Token.Keyword.Namespace
- IMPORT = Token.Name.Namespace
- TC_KW_NAME = Token.Generic.Subheading
- KEYWORD = Token.Name.Function
- ARGUMENT = Token.String
- VARIABLE = Token.Name.Variable
- COMMENT = Token.Comment
- SEPARATOR = Token.Punctuation
- SYNTAX = Token.Punctuation
- GHERKIN = Token.Generic.Emph
- ERROR = Token.Error
- def normalize(string, remove=''):
- string = string.lower()
- for char in remove + ' ':
- if char in string:
- string = string.replace(char, '')
- return string
- class RobotFrameworkLexer(Lexer):
- """
- For `Robot Framework <http://robotframework.org>`_ test data.
- Supports both space and pipe separated plain text formats.
- .. versionadded:: 1.6
- """
- name = 'RobotFramework'
- aliases = ['robotframework']
- filenames = ['*.robot']
- mimetypes = ['text/x-robotframework']
- def __init__(self, **options):
- options['tabsize'] = 2
- options['encoding'] = 'UTF-8'
- Lexer.__init__(self, **options)
- def get_tokens_unprocessed(self, text):
- row_tokenizer = RowTokenizer()
- var_tokenizer = VariableTokenizer()
- index = 0
- for row in text.splitlines():
- for value, token in row_tokenizer.tokenize(row):
- for value, token in var_tokenizer.tokenize(value, token):
- if value:
- yield index, token, text_type(value)
- index += len(value)
- class VariableTokenizer(object):
- def tokenize(self, string, token):
- var = VariableSplitter(string, identifiers='$@%&')
- if var.start < 0 or token in (COMMENT, ERROR):
- yield string, token
- return
- for value, token in self._tokenize(var, string, token):
- if value:
- yield value, token
- def _tokenize(self, var, string, orig_token):
- before = string[:var.start]
- yield before, orig_token
- yield var.identifier + '{', SYNTAX
- for value, token in self.tokenize(var.base, VARIABLE):
- yield value, token
- yield '}', SYNTAX
- if var.index:
- yield '[', SYNTAX
- for value, token in self.tokenize(var.index, VARIABLE):
- yield value, token
- yield ']', SYNTAX
- for value, token in self.tokenize(string[var.end:], orig_token):
- yield value, token
- class RowTokenizer(object):
- def __init__(self):
- self._table = UnknownTable()
- self._splitter = RowSplitter()
- testcases = TestCaseTable()
- settings = SettingTable(testcases.set_default_template)
- variables = VariableTable()
- keywords = KeywordTable()
- self._tables = {'settings': settings, 'setting': settings,
- 'metadata': settings,
- 'variables': variables, 'variable': variables,
- 'testcases': testcases, 'testcase': testcases,
- 'keywords': keywords, 'keyword': keywords,
- 'userkeywords': keywords, 'userkeyword': keywords}
- def tokenize(self, row):
- commented = False
- heading = False
- for index, value in enumerate(self._splitter.split(row)):
- # First value, and every second after that, is a separator.
- index, separator = divmod(index-1, 2)
- if value.startswith('#'):
- commented = True
- elif index == 0 and value.startswith('*'):
- self._table = self._start_table(value)
- heading = True
- for value, token in self._tokenize(value, index, commented,
- separator, heading):
- yield value, token
- self._table.end_row()
- def _start_table(self, header):
- name = normalize(header, remove='*')
- return self._tables.get(name, UnknownTable())
- def _tokenize(self, value, index, commented, separator, heading):
- if commented:
- yield value, COMMENT
- elif separator:
- yield value, SEPARATOR
- elif heading:
- yield value, HEADING
- else:
- for value, token in self._table.tokenize(value, index):
- yield value, token
- class RowSplitter(object):
- _space_splitter = re.compile('( {2,})')
- _pipe_splitter = re.compile(r'((?:^| +)\|(?: +|$))')
- def split(self, row):
- splitter = (row.startswith('| ') and self._split_from_pipes
- or self._split_from_spaces)
- for value in splitter(row):
- yield value
- yield '\n'
- def _split_from_spaces(self, row):
- yield '' # Start with (pseudo)separator similarly as with pipes
- for value in self._space_splitter.split(row):
- yield value
- def _split_from_pipes(self, row):
- _, separator, rest = self._pipe_splitter.split(row, 1)
- yield separator
- while self._pipe_splitter.search(rest):
- cell, separator, rest = self._pipe_splitter.split(rest, 1)
- yield cell
- yield separator
- yield rest
- class Tokenizer(object):
- _tokens = None
- def __init__(self):
- self._index = 0
- def tokenize(self, value):
- values_and_tokens = self._tokenize(value, self._index)
- self._index += 1
- if isinstance(values_and_tokens, type(Token)):
- values_and_tokens = [(value, values_and_tokens)]
- return values_and_tokens
- def _tokenize(self, value, index):
- index = min(index, len(self._tokens) - 1)
- return self._tokens[index]
- def _is_assign(self, value):
- if value.endswith('='):
- value = value[:-1].strip()
- var = VariableSplitter(value, identifiers='$@&')
- return var.start == 0 and var.end == len(value)
- class Comment(Tokenizer):
- _tokens = (COMMENT,)
- class Setting(Tokenizer):
- _tokens = (SETTING, ARGUMENT)
- _keyword_settings = ('suitesetup', 'suiteprecondition', 'suiteteardown',
- 'suitepostcondition', 'testsetup', 'testprecondition',
- 'testteardown', 'testpostcondition', 'testtemplate')
- _import_settings = ('library', 'resource', 'variables')
- _other_settings = ('documentation', 'metadata', 'forcetags', 'defaulttags',
- 'testtimeout')
- _custom_tokenizer = None
- def __init__(self, template_setter=None):
- Tokenizer.__init__(self)
- self._template_setter = template_setter
- def _tokenize(self, value, index):
- if index == 1 and self._template_setter:
- self._template_setter(value)
- if index == 0:
- normalized = normalize(value)
- if normalized in self._keyword_settings:
- self._custom_tokenizer = KeywordCall(support_assign=False)
- elif normalized in self._import_settings:
- self._custom_tokenizer = ImportSetting()
- elif normalized not in self._other_settings:
- return ERROR
- elif self._custom_tokenizer:
- return self._custom_tokenizer.tokenize(value)
- return Tokenizer._tokenize(self, value, index)
- class ImportSetting(Tokenizer):
- _tokens = (IMPORT, ARGUMENT)
- class TestCaseSetting(Setting):
- _keyword_settings = ('setup', 'precondition', 'teardown', 'postcondition',
- 'template')
- _import_settings = ()
- _other_settings = ('documentation', 'tags', 'timeout')
- def _tokenize(self, value, index):
- if index == 0:
- type = Setting._tokenize(self, value[1:-1], index)
- return [('[', SYNTAX), (value[1:-1], type), (']', SYNTAX)]
- return Setting._tokenize(self, value, index)
- class KeywordSetting(TestCaseSetting):
- _keyword_settings = ('teardown',)
- _other_settings = ('documentation', 'arguments', 'return', 'timeout', 'tags')
- class Variable(Tokenizer):
- _tokens = (SYNTAX, ARGUMENT)
- def _tokenize(self, value, index):
- if index == 0 and not self._is_assign(value):
- return ERROR
- return Tokenizer._tokenize(self, value, index)
- class KeywordCall(Tokenizer):
- _tokens = (KEYWORD, ARGUMENT)
- def __init__(self, support_assign=True):
- Tokenizer.__init__(self)
- self._keyword_found = not support_assign
- self._assigns = 0
- def _tokenize(self, value, index):
- if not self._keyword_found and self._is_assign(value):
- self._assigns += 1
- return SYNTAX # VariableTokenizer tokenizes this later.
- if self._keyword_found:
- return Tokenizer._tokenize(self, value, index - self._assigns)
- self._keyword_found = True
- return GherkinTokenizer().tokenize(value, KEYWORD)
- class GherkinTokenizer(object):
- _gherkin_prefix = re.compile('^(Given|When|Then|And) ', re.IGNORECASE)
- def tokenize(self, value, token):
- match = self._gherkin_prefix.match(value)
- if not match:
- return [(value, token)]
- end = match.end()
- return [(value[:end], GHERKIN), (value[end:], token)]
- class TemplatedKeywordCall(Tokenizer):
- _tokens = (ARGUMENT,)
- class ForLoop(Tokenizer):
- def __init__(self):
- Tokenizer.__init__(self)
- self._in_arguments = False
- def _tokenize(self, value, index):
- token = self._in_arguments and ARGUMENT or SYNTAX
- if value.upper() in ('IN', 'IN RANGE'):
- self._in_arguments = True
- return token
- class _Table(object):
- _tokenizer_class = None
- def __init__(self, prev_tokenizer=None):
- self._tokenizer = self._tokenizer_class()
- self._prev_tokenizer = prev_tokenizer
- self._prev_values_on_row = []
- def tokenize(self, value, index):
- if self._continues(value, index):
- self._tokenizer = self._prev_tokenizer
- yield value, SYNTAX
- else:
- for value_and_token in self._tokenize(value, index):
- yield value_and_token
- self._prev_values_on_row.append(value)
- def _continues(self, value, index):
- return value == '...' and all(self._is_empty(t)
- for t in self._prev_values_on_row)
- def _is_empty(self, value):
- return value in ('', '\\')
- def _tokenize(self, value, index):
- return self._tokenizer.tokenize(value)
- def end_row(self):
- self.__init__(prev_tokenizer=self._tokenizer)
- class UnknownTable(_Table):
- _tokenizer_class = Comment
- def _continues(self, value, index):
- return False
- class VariableTable(_Table):
- _tokenizer_class = Variable
- class SettingTable(_Table):
- _tokenizer_class = Setting
- def __init__(self, template_setter, prev_tokenizer=None):
- _Table.__init__(self, prev_tokenizer)
- self._template_setter = template_setter
- def _tokenize(self, value, index):
- if index == 0 and normalize(value) == 'testtemplate':
- self._tokenizer = Setting(self._template_setter)
- return _Table._tokenize(self, value, index)
- def end_row(self):
- self.__init__(self._template_setter, prev_tokenizer=self._tokenizer)
- class TestCaseTable(_Table):
- _setting_class = TestCaseSetting
- _test_template = None
- _default_template = None
- @property
- def _tokenizer_class(self):
- if self._test_template or (self._default_template and
- self._test_template is not False):
- return TemplatedKeywordCall
- return KeywordCall
- def _continues(self, value, index):
- return index > 0 and _Table._continues(self, value, index)
- def _tokenize(self, value, index):
- if index == 0:
- if value:
- self._test_template = None
- return GherkinTokenizer().tokenize(value, TC_KW_NAME)
- if index == 1 and self._is_setting(value):
- if self._is_template(value):
- self._test_template = False
- self._tokenizer = self._setting_class(self.set_test_template)
- else:
- self._tokenizer = self._setting_class()
- if index == 1 and self._is_for_loop(value):
- self._tokenizer = ForLoop()
- if index == 1 and self._is_empty(value):
- return [(value, SYNTAX)]
- return _Table._tokenize(self, value, index)
- def _is_setting(self, value):
- return value.startswith('[') and value.endswith(']')
- def _is_template(self, value):
- return normalize(value) == '[template]'
- def _is_for_loop(self, value):
- return value.startswith(':') and normalize(value, remove=':') == 'for'
- def set_test_template(self, template):
- self._test_template = self._is_template_set(template)
- def set_default_template(self, template):
- self._default_template = self._is_template_set(template)
- def _is_template_set(self, template):
- return normalize(template) not in ('', '\\', 'none', '${empty}')
- class KeywordTable(TestCaseTable):
- _tokenizer_class = KeywordCall
- _setting_class = KeywordSetting
- def _is_template(self, value):
- return False
- # Following code copied directly from Robot Framework 2.7.5.
- class VariableSplitter:
- def __init__(self, string, identifiers):
- self.identifier = None
- self.base = None
- self.index = None
- self.start = -1
- self.end = -1
- self._identifiers = identifiers
- self._may_have_internal_variables = False
- try:
- self._split(string)
- except ValueError:
- pass
- else:
- self._finalize()
- def get_replaced_base(self, variables):
- if self._may_have_internal_variables:
- return variables.replace_string(self.base)
- return self.base
- def _finalize(self):
- self.identifier = self._variable_chars[0]
- self.base = ''.join(self._variable_chars[2:-1])
- self.end = self.start + len(self._variable_chars)
- if self._has_list_or_dict_variable_index():
- self.index = ''.join(self._list_and_dict_variable_index_chars[1:-1])
- self.end += len(self._list_and_dict_variable_index_chars)
- def _has_list_or_dict_variable_index(self):
- return self._list_and_dict_variable_index_chars\
- and self._list_and_dict_variable_index_chars[-1] == ']'
- def _split(self, string):
- start_index, max_index = self._find_variable(string)
- self.start = start_index
- self._open_curly = 1
- self._state = self._variable_state
- self._variable_chars = [string[start_index], '{']
- self._list_and_dict_variable_index_chars = []
- self._string = string
- start_index += 2
- for index, char in enumerate(string[start_index:]):
- index += start_index # Giving start to enumerate only in Py 2.6+
- try:
- self._state(char, index)
- except StopIteration:
- return
- if index == max_index and not self._scanning_list_variable_index():
- return
- def _scanning_list_variable_index(self):
- return self._state in [self._waiting_list_variable_index_state,
- self._list_variable_index_state]
- def _find_variable(self, string):
- max_end_index = string.rfind('}')
- if max_end_index == -1:
- raise ValueError('No variable end found')
- if self._is_escaped(string, max_end_index):
- return self._find_variable(string[:max_end_index])
- start_index = self._find_start_index(string, 1, max_end_index)
- if start_index == -1:
- raise ValueError('No variable start found')
- return start_index, max_end_index
- def _find_start_index(self, string, start, end):
- index = string.find('{', start, end) - 1
- if index < 0:
- return -1
- if self._start_index_is_ok(string, index):
- return index
- return self._find_start_index(string, index+2, end)
- def _start_index_is_ok(self, string, index):
- return string[index] in self._identifiers\
- and not self._is_escaped(string, index)
- def _is_escaped(self, string, index):
- escaped = False
- while index > 0 and string[index-1] == '\\':
- index -= 1
- escaped = not escaped
- return escaped
- def _variable_state(self, char, index):
- self._variable_chars.append(char)
- if char == '}' and not self._is_escaped(self._string, index):
- self._open_curly -= 1
- if self._open_curly == 0:
- if not self._is_list_or_dict_variable():
- raise StopIteration
- self._state = self._waiting_list_variable_index_state
- elif char in self._identifiers:
- self._state = self._internal_variable_start_state
- def _is_list_or_dict_variable(self):
- return self._variable_chars[0] in ('@','&')
- def _internal_variable_start_state(self, char, index):
- self._state = self._variable_state
- if char == '{':
- self._variable_chars.append(char)
- self._open_curly += 1
- self._may_have_internal_variables = True
- else:
- self._variable_state(char, index)
- def _waiting_list_variable_index_state(self, char, index):
- if char != '[':
- raise StopIteration
- self._list_and_dict_variable_index_chars.append(char)
- self._state = self._list_variable_index_state
- def _list_variable_index_state(self, char, index):
- self._list_and_dict_variable_index_chars.append(char)
- if char == ']':
- raise StopIteration
|