|
@@ -35,5 +35,9 @@ CollapsedColumns
|
|
|
Whitespace
|
|
|
= Whitespace:[\n\t\r ]+ { return { type: 'Whitespace', content: Whitespace.join("") } }
|
|
|
|
|
|
+// \u00A0-\uFFFF is the entire Unicode BMP _including_ surrogate pairs and
|
|
|
+// unassigned code points, which aren't parse-able naively. A more precise
|
|
|
+// approach would be to define all valid Unicode ranges exactly but for
|
|
|
+// permissive parsing we don't mind the lack of precision.
|
|
|
GenericToken
|
|
|
- = GenericToken:[a-zA-Z0-9"'`_\-.=><:,*;!\[\]?$%|/\\@#&~^+{}]+ { return { type: 'GenericToken', content: GenericToken.join('') } }
|
|
|
+ = GenericToken:[a-zA-Z0-9\u00A0-\uFFFF"'`_\-.=><:,*;!\[\]?$%|/\\@#&~^+{}]+ { return { type: 'GenericToken', content: GenericToken.join('') } }
|