Browse Source

[jsinterp] Some optimizations and refactoring

Motivated by:

Authored by: dirkf, pukkandan
pukkandan 2 years ago
1 changed files with 98 additions and 109 deletions
  1. 98 109

+ 98 - 109

@@ -6,22 +6,19 @@ import re
 from .utils import ExtractorError, remove_quotes
-    ('|', operator.or_),
-    ('^', operator.xor),
-    ('&', operator.and_),
-    ('>>', operator.rshift),
-    ('<<', operator.lshift),
-    ('-', operator.sub),
-    ('+', operator.add),
-    ('%', operator.mod),
-    ('/', operator.truediv),
-    ('*', operator.mul),
-_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
-_ASSIGN_OPERATORS.append(('=', (lambda cur, right: right)))
-_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
+_NAME_RE = r'[a-zA-Z_$][\w$]*'
+    '|': operator.or_,
+    '^': operator.xor,
+    '&': operator.and_,
+    '>>': operator.rshift,
+    '<<': operator.lshift,
+    '-': operator.sub,
+    '+': operator.add,
+    '%': operator.mod,
+    '/': operator.truediv,
+    '*': operator.mul,
 _MATCHING_PARENS = dict(zip('({[', ')}]'))
 _QUOTES = '\'"'
@@ -50,13 +47,11 @@ class LocalNameSpace(collections.ChainMap):
 class JSInterpreter:
+    __named_object_counter = 0
     def __init__(self, code, objects=None):
-        if objects is None:
-            objects = {}
-        self.code = code
-        self._functions = {}
-        self._objects = objects
-        self.__named_object_counter = 0
+        self.code, self._functions = code, {}
+        self._objects = {} if objects is None else objects
     def _named_object(self, namespace, obj):
         self.__named_object_counter += 1
@@ -93,9 +88,9 @@ class JSInterpreter:
         yield expr[start:]
-    @staticmethod
-    def _separate_at_paren(expr, delim):
-        separated = list(JSInterpreter._separate(expr, delim, 1))
+    @classmethod
+    def _separate_at_paren(cls, expr, delim):
+        separated = list(cls._separate(expr, delim, 1))
         if len(separated) < 2:
             raise ExtractorError(f'No terminating paren {delim} in {expr}')
         return separated[0][1:].strip(), separated[1].strip()
@@ -104,33 +99,29 @@ class JSInterpreter:
         if allow_recursion < 0:
             raise ExtractorError('Recursion limit reached')
-        sub_statements = list(self._separate(stmt, ';'))
-        stmt = (sub_statements or ['']).pop()
+        should_abort = False
+        sub_statements = list(self._separate(stmt, ';')) or ['']
+        stmt = sub_statements.pop().lstrip()
         for sub_stmt in sub_statements:
             ret, should_abort = self.interpret_statement(sub_stmt, local_vars, allow_recursion - 1)
             if should_abort:
-                return ret
+                return ret, should_abort
-        should_abort = False
-        stmt = stmt.lstrip()
-        stmt_m = re.match(r'var\s', stmt)
-        if stmt_m:
-            expr = stmt[len(]
+        m = re.match(r'(?P<var>var\s)|return(?:\s+|$)', stmt)
+        if not m:  # Try interpreting it as an expression
+            expr = stmt
+        elif'var'):
+            expr = stmt[len(]
-            return_m = re.match(r'return(?:\s+|$)', stmt)
-            if return_m:
-                expr = stmt[len(]
-                should_abort = True
-            else:
-                # Try interpreting it as an expression
-                expr = stmt
+            expr = stmt[len(]
+            should_abort = True
-        v = self.interpret_expression(expr, local_vars, allow_recursion)
-        return v, should_abort
+        return self.interpret_expression(expr, local_vars, allow_recursion), should_abort
     def interpret_expression(self, expr, local_vars, allow_recursion):
         expr = expr.strip()
-        if expr == '':  # Empty expression
+        if not expr:
             return None
         if expr.startswith('{'):
@@ -156,8 +147,8 @@ class JSInterpreter:
                 for item in self._separate(inner)])
             expr = name + outer
-        m = re.match(r'try\s*', expr)
-        if m:
+        m = re.match(r'(?P<try>try)\s*|(?:(?P<catch>catch)|(?P<for>for)|(?P<switch>switch))\s*\(', expr)
+        if m and'try'):
             if expr[m.end()] == '{':
                 try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
@@ -167,21 +158,19 @@ class JSInterpreter:
                 return ret
             return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
-        m = re.match(r'catch\s*\(', expr)
-        if m:
+        elif m and'catch'):
             # We ignore the catch block
             _, expr = self._separate_at_paren(expr, '}')
             return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
-        m = re.match(r'for\s*\(', expr)
-        if m:
+        elif m and'for'):
             constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
             if remaining.startswith('{'):
                 body, expr = self._separate_at_paren(remaining, '}')
-                m = re.match(r'switch\s*\(', remaining)  # FIXME
-                if m:
-                    switch_val, remaining = self._separate_at_paren(remaining[m.end() - 1:], ')')
+                switch_m = re.match(r'switch\s*\(', remaining)  # FIXME
+                if switch_m:
+                    switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:], ')')
                     body, expr = self._separate_at_paren(remaining, '}')
                     body = 'switch(%s){%s}' % (switch_val, body)
@@ -206,8 +195,7 @@ class JSInterpreter:
                         f'Premature return in the initialization of a for loop in {constructor!r}')
             return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
-        m = re.match(r'switch\s*\(', expr)
-        if m:
+        elif m and'switch'):
             switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
             switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion)
             body, expr = self._separate_at_paren(remaining, '}')
@@ -250,55 +238,63 @@ class JSInterpreter:
                 ret = local_vars[var]
             expr = expr[:start] + json.dumps(ret) + expr[end:]
-        for op, opfunc in _ASSIGN_OPERATORS:
-            m = re.match(rf'''(?x)
-                (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?
-                \s*{re.escape(op)}
-                (?P<expr>.*)$''', expr)
-            if not m:
-                continue
-            right_val = self.interpret_expression('expr'), local_vars, allow_recursion)
+        if not expr:
+            return None
-            if m.groupdict().get('index'):
-                lvar = local_vars['out')]
-                idx = self.interpret_expression('index'), local_vars, allow_recursion)
-                if not isinstance(idx, int):
-                    raise ExtractorError(f'List indices must be integers: {idx}')
-                cur = lvar[idx]
-                val = opfunc(cur, right_val)
-                lvar[idx] = val
-                return val
+        m = re.match(fr'''(?x)
+            (?P<assign>
+                (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
+                (?P<op>{"|".join(map(re.escape, _OPERATORS))})?
+                =(?P<expr>.*)$
+            )|(?P<return>
+                (?!if|return|true|false|null)(?P<name>{_NAME_RE})$
+            )|(?P<indexing>
+                (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
+            )|(?P<attribute>
+                (?P<var>{_NAME_RE})(?:\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
+            )|(?P<function>
+                (?P<fname>{_NAME_RE})\((?P<args>[\w$,]*)\)$
+            )''', expr)
+        if m and'assign'):
+            if not'op'):
+                opfunc = lambda curr, right: right
-                cur = local_vars.get('out'))
-                val = opfunc(cur, right_val)
-                local_vars['out')] = val
-                return val
+                opfunc = _OPERATORS['op')]
+            right_val = self.interpret_expression('expr'), local_vars, allow_recursion)
+            left_val = local_vars.get('out'))
+            if not'index'):
+                local_vars['out')] = opfunc(left_val, right_val)
+                return local_vars['out')]
+            elif left_val is None:
+                raise ExtractorError(f'Cannot index undefined variable: {"out")}')
+            idx = self.interpret_expression('index'), local_vars, allow_recursion)
+            if not isinstance(idx, int):
+                raise ExtractorError(f'List indices must be integers: {idx}')
+            left_val[idx] = opfunc(left_val[idx], right_val)
+            return left_val[idx]
-        if expr.isdigit():
+        elif expr.isdigit():
             return int(expr)
-        if expr == 'break':
+        elif expr == 'break':
             raise JS_Break()
         elif expr == 'continue':
             raise JS_Continue()
-        var_m = re.match(
-            r'(?!if|return|true|false|null)(?P<name>%s)$' % _NAME_RE,
-            expr)
-        if var_m:
-            return local_vars['name')]
+        elif m and'return'):
+            return local_vars['name')]
         with contextlib.suppress(ValueError):
             return json.loads(expr)
-        m = re.match(
-            r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
-        if m:
+        if m and'indexing'):
             val = local_vars['in')]
             idx = self.interpret_expression('idx'), local_vars, allow_recursion)
             return val[idx]
-        for op, opfunc in _OPERATORS:
+        for op, opfunc in _OPERATORS.items():
             separated = list(self._separate(expr, op))
             if len(separated) < 2:
@@ -314,10 +310,7 @@ class JSInterpreter:
                 raise ExtractorError(f'Premature right-side return of {op} in {expr!r}')
             return opfunc(left_val or 0, right_val)
-        m = re.match(
-            r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*' % _NAME_RE,
-            expr)
-        if m:
+        if m and'attribute'):
             variable ='var')
             member = remove_quotes('member') or'member2'))
             arg_str = expr[m.end():]
@@ -332,7 +325,6 @@ class JSInterpreter:
                     raise ExtractorError(f'{member} {msg}: {expr}')
             def eval_method():
-                nonlocal member
                 if variable == 'String':
                     obj = str
                 elif variable in local_vars:
@@ -342,8 +334,8 @@ class JSInterpreter:
                         self._objects[variable] = self.extract_object(variable)
                     obj = self._objects[variable]
+                # Member access
                 if arg_str is None:
-                    # Member access
                     if member == 'length':
                         return len(obj)
                     return obj[member]
@@ -418,9 +410,7 @@ class JSInterpreter:
                     except ValueError:
                         return -1
-                if isinstance(obj, list):
-                    member = int(member)
-                return obj[member](argvals)
+                return obj[int(member) if isinstance(obj, list) else member](argvals)
             if remaining:
                 return self.interpret_expression(
@@ -429,9 +419,8 @@ class JSInterpreter:
                 return eval_method()
-        m = re.match(r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
-        if m:
-            fname ='func')
+        elif m and'function'):
+            fname ='fname')
             argvals = tuple(
                 int(v) if v.isdigit() else local_vars[v]
                 for v in self._separate('args')))
@@ -441,8 +430,7 @@ class JSInterpreter:
                 self._functions[fname] = self.extract_function(fname)
             return self._functions[fname](argvals)
-        if expr:
-            raise ExtractorError('Unsupported JS expression %r' % expr)
+        raise ExtractorError(f'Unsupported JS expression {expr!r}')
     def extract_object(self, objname):
         _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
@@ -471,14 +459,17 @@ class JSInterpreter:
         """ @returns argnames, code """
         func_m =
-                (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
+                (?:
+                    function\s+%(name)s|
+                    [{;,]\s*%(name)s\s*=\s*function|
+                    var\s+%(name)s\s*=\s*function
+                )\s*
-                (?P<code>\{(?:(?!};)[^"]|"([^"]|\\")*")+\})''' % (
-                re.escape(funcname), re.escape(funcname), re.escape(funcname)),
+                (?P<code>{(?:(?!};)[^"]|"([^"]|\\")*")+})''' % {'name': re.escape(funcname)},
         code, _ = self._separate_at_paren('code'), '}')  # refine the match
         if func_m is None:
-            raise ExtractorError('Could not find JS function %r' % funcname)
+            raise ExtractorError(f'Could not find JS function "{funcname}"')
         return'args').split(','), code
     def extract_function(self, funcname):
@@ -492,11 +483,9 @@ class JSInterpreter:
             start, body_start = mobj.span()
             body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
-            name = self._named_object(
-                local_vars,
-                self.extract_function_from_code(
-                    [str.strip(x) for x in'args').split(',')],
-                    body, local_vars, *global_stack))
+            name = self._named_object(local_vars, self.extract_function_from_code(
+                [x.strip() for x in'args').split(',')],
+                body, local_vars, *global_stack))
             code = code[:start] + name + remaining
         return self.build_function(argnames, code, local_vars, *global_stack)