sas.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. """
  2. pygments.lexers.sas
  3. ~~~~~~~~~~~~~~~~~~~
  4. Lexer for SAS.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, words
  10. from pygments.token import Comment, Keyword, Name, Number, String, Text, \
  11. Other, Generic
  12. __all__ = ['SASLexer']
  13. class SASLexer(RegexLexer):
  14. """
  15. For SAS files.
  16. """
  17. # Syntax from syntax/sas.vim by James Kidd <james.kidd@covance.com>
  18. name = 'SAS'
  19. aliases = ['sas']
  20. filenames = ['*.SAS', '*.sas']
  21. mimetypes = ['text/x-sas', 'text/sas', 'application/x-sas']
  22. url = 'https://en.wikipedia.org/wiki/SAS_(software)'
  23. version_added = '2.2'
  24. flags = re.IGNORECASE | re.MULTILINE
  25. builtins_macros = (
  26. "bquote", "nrbquote", "cmpres", "qcmpres", "compstor", "datatyp",
  27. "display", "do", "else", "end", "eval", "global", "goto", "if",
  28. "index", "input", "keydef", "label", "left", "length", "let",
  29. "local", "lowcase", "macro", "mend", "nrquote",
  30. "nrstr", "put", "qleft", "qlowcase", "qscan",
  31. "qsubstr", "qsysfunc", "qtrim", "quote", "qupcase", "scan",
  32. "str", "substr", "superq", "syscall", "sysevalf", "sysexec",
  33. "sysfunc", "sysget", "syslput", "sysprod", "sysrc", "sysrput",
  34. "then", "to", "trim", "unquote", "until", "upcase", "verify",
  35. "while", "window"
  36. )
  37. builtins_conditionals = (
  38. "do", "if", "then", "else", "end", "until", "while"
  39. )
  40. builtins_statements = (
  41. "abort", "array", "attrib", "by", "call", "cards", "cards4",
  42. "catname", "continue", "datalines", "datalines4", "delete", "delim",
  43. "delimiter", "display", "dm", "drop", "endsas", "error", "file",
  44. "filename", "footnote", "format", "goto", "in", "infile", "informat",
  45. "input", "keep", "label", "leave", "length", "libname", "link",
  46. "list", "lostcard", "merge", "missing", "modify", "options", "output",
  47. "out", "page", "put", "redirect", "remove", "rename", "replace",
  48. "retain", "return", "select", "set", "skip", "startsas", "stop",
  49. "title", "update", "waitsas", "where", "window", "x", "systask"
  50. )
  51. builtins_sql = (
  52. "add", "and", "alter", "as", "cascade", "check", "create",
  53. "delete", "describe", "distinct", "drop", "foreign", "from",
  54. "group", "having", "index", "insert", "into", "in", "key", "like",
  55. "message", "modify", "msgtype", "not", "null", "on", "or",
  56. "order", "primary", "references", "reset", "restrict", "select",
  57. "set", "table", "unique", "update", "validate", "view", "where"
  58. )
  59. builtins_functions = (
  60. "abs", "addr", "airy", "arcos", "arsin", "atan", "attrc",
  61. "attrn", "band", "betainv", "blshift", "bnot", "bor",
  62. "brshift", "bxor", "byte", "cdf", "ceil", "cexist", "cinv",
  63. "close", "cnonct", "collate", "compbl", "compound",
  64. "compress", "cos", "cosh", "css", "curobs", "cv", "daccdb",
  65. "daccdbsl", "daccsl", "daccsyd", "dacctab", "dairy", "date",
  66. "datejul", "datepart", "datetime", "day", "dclose", "depdb",
  67. "depdbsl", "depsl", "depsyd",
  68. "deptab", "dequote", "dhms", "dif", "digamma",
  69. "dim", "dinfo", "dnum", "dopen", "doptname", "doptnum",
  70. "dread", "dropnote", "dsname", "erf", "erfc", "exist", "exp",
  71. "fappend", "fclose", "fcol", "fdelete", "fetch", "fetchobs",
  72. "fexist", "fget", "fileexist", "filename", "fileref",
  73. "finfo", "finv", "fipname", "fipnamel", "fipstate", "floor",
  74. "fnonct", "fnote", "fopen", "foptname", "foptnum", "fpoint",
  75. "fpos", "fput", "fread", "frewind", "frlen", "fsep", "fuzz",
  76. "fwrite", "gaminv", "gamma", "getoption", "getvarc", "getvarn",
  77. "hbound", "hms", "hosthelp", "hour", "ibessel", "index",
  78. "indexc", "indexw", "input", "inputc", "inputn", "int",
  79. "intck", "intnx", "intrr", "irr", "jbessel", "juldate",
  80. "kurtosis", "lag", "lbound", "left", "length", "lgamma",
  81. "libname", "libref", "log", "log10", "log2", "logpdf", "logpmf",
  82. "logsdf", "lowcase", "max", "mdy", "mean", "min", "minute",
  83. "mod", "month", "mopen", "mort", "n", "netpv", "nmiss",
  84. "normal", "note", "npv", "open", "ordinal", "pathname",
  85. "pdf", "peek", "peekc", "pmf", "point", "poisson", "poke",
  86. "probbeta", "probbnml", "probchi", "probf", "probgam",
  87. "probhypr", "probit", "probnegb", "probnorm", "probt",
  88. "put", "putc", "putn", "qtr", "quote", "ranbin", "rancau",
  89. "ranexp", "rangam", "range", "rank", "rannor", "ranpoi",
  90. "rantbl", "rantri", "ranuni", "repeat", "resolve", "reverse",
  91. "rewind", "right", "round", "saving", "scan", "sdf", "second",
  92. "sign", "sin", "sinh", "skewness", "soundex", "spedis",
  93. "sqrt", "std", "stderr", "stfips", "stname", "stnamel",
  94. "substr", "sum", "symget", "sysget", "sysmsg", "sysprod",
  95. "sysrc", "system", "tan", "tanh", "time", "timepart", "tinv",
  96. "tnonct", "today", "translate", "tranwrd", "trigamma",
  97. "trim", "trimn", "trunc", "uniform", "upcase", "uss", "var",
  98. "varfmt", "varinfmt", "varlabel", "varlen", "varname",
  99. "varnum", "varray", "varrayx", "vartype", "verify", "vformat",
  100. "vformatd", "vformatdx", "vformatn", "vformatnx", "vformatw",
  101. "vformatwx", "vformatx", "vinarray", "vinarrayx", "vinformat",
  102. "vinformatd", "vinformatdx", "vinformatn", "vinformatnx",
  103. "vinformatw", "vinformatwx", "vinformatx", "vlabel",
  104. "vlabelx", "vlength", "vlengthx", "vname", "vnamex", "vtype",
  105. "vtypex", "weekday", "year", "yyq", "zipfips", "zipname",
  106. "zipnamel", "zipstate"
  107. )
  108. tokens = {
  109. 'root': [
  110. include('comments'),
  111. include('proc-data'),
  112. include('cards-datalines'),
  113. include('logs'),
  114. include('general'),
  115. (r'.', Text),
  116. ],
  117. # SAS is multi-line regardless, but * is ended by ;
  118. 'comments': [
  119. (r'^\s*\*.*?;', Comment),
  120. (r'/\*.*?\*/', Comment),
  121. (r'^\s*\*(.|\n)*?;', Comment.Multiline),
  122. (r'/[*](.|\n)*?[*]/', Comment.Multiline),
  123. ],
  124. # Special highlight for proc, data, quit, run
  125. 'proc-data': [
  126. (r'(^|;)\s*(proc \w+|data|run|quit)[\s;]',
  127. Keyword.Reserved),
  128. ],
  129. # Special highlight cards and datalines
  130. 'cards-datalines': [
  131. (r'^\s*(datalines|cards)\s*;\s*$', Keyword, 'data'),
  132. ],
  133. 'data': [
  134. (r'(.|\n)*^\s*;\s*$', Other, '#pop'),
  135. ],
  136. # Special highlight for put NOTE|ERROR|WARNING (order matters)
  137. 'logs': [
  138. (r'\n?^\s*%?put ', Keyword, 'log-messages'),
  139. ],
  140. 'log-messages': [
  141. (r'NOTE(:|-).*', Generic, '#pop'),
  142. (r'WARNING(:|-).*', Generic.Emph, '#pop'),
  143. (r'ERROR(:|-).*', Generic.Error, '#pop'),
  144. include('general'),
  145. ],
  146. 'general': [
  147. include('keywords'),
  148. include('vars-strings'),
  149. include('special'),
  150. include('numbers'),
  151. ],
  152. # Keywords, statements, functions, macros
  153. 'keywords': [
  154. (words(builtins_statements,
  155. prefix = r'\b',
  156. suffix = r'\b'),
  157. Keyword),
  158. (words(builtins_sql,
  159. prefix = r'\b',
  160. suffix = r'\b'),
  161. Keyword),
  162. (words(builtins_conditionals,
  163. prefix = r'\b',
  164. suffix = r'\b'),
  165. Keyword),
  166. (words(builtins_macros,
  167. prefix = r'%',
  168. suffix = r'\b'),
  169. Name.Builtin),
  170. (words(builtins_functions,
  171. prefix = r'\b',
  172. suffix = r'\('),
  173. Name.Builtin),
  174. ],
  175. # Strings and user-defined variables and macros (order matters)
  176. 'vars-strings': [
  177. (r'&[a-z_]\w{0,31}\.?', Name.Variable),
  178. (r'%[a-z_]\w{0,31}', Name.Function),
  179. (r'\'', String, 'string_squote'),
  180. (r'"', String, 'string_dquote'),
  181. ],
  182. 'string_squote': [
  183. ('\'', String, '#pop'),
  184. (r'\\\\|\\"|\\\n', String.Escape),
  185. # AFAIK, macro variables are not evaluated in single quotes
  186. # (r'&', Name.Variable, 'validvar'),
  187. (r'[^$\'\\]+', String),
  188. (r'[$\'\\]', String),
  189. ],
  190. 'string_dquote': [
  191. (r'"', String, '#pop'),
  192. (r'\\\\|\\"|\\\n', String.Escape),
  193. (r'&', Name.Variable, 'validvar'),
  194. (r'[^$&"\\]+', String),
  195. (r'[$"\\]', String),
  196. ],
  197. 'validvar': [
  198. (r'[a-z_]\w{0,31}\.?', Name.Variable, '#pop'),
  199. ],
  200. # SAS numbers and special variables
  201. 'numbers': [
  202. (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)(E[+-]?[0-9]+)?i?\b',
  203. Number),
  204. ],
  205. 'special': [
  206. (r'(null|missing|_all_|_automatic_|_character_|_n_|'
  207. r'_infile_|_name_|_null_|_numeric_|_user_|_webout_)',
  208. Keyword.Constant),
  209. ],
  210. # 'operators': [
  211. # (r'(-|=|<=|>=|<|>|<>|&|!=|'
  212. # r'\||\*|\+|\^|/|!|~|~=)', Operator)
  213. # ],
  214. }