tokenizeSearch.tsx 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. import {escapeDoubleQuotes} from 'app/utils';
  2. export enum TokenType {
  3. OP,
  4. TAG,
  5. QUERY,
  6. }
  7. export type Token = {
  8. type: TokenType;
  9. value: string;
  10. key?: string;
  11. };
  12. function isOp(t: Token) {
  13. return t.type === TokenType.OP;
  14. }
  15. function isBooleanOp(value: string) {
  16. return ['OR', 'AND'].includes(value.toUpperCase());
  17. }
  18. function isParen(token: Token, character: '(' | ')') {
  19. return (
  20. token !== undefined &&
  21. isOp(token) &&
  22. ['(', ')'].includes(token.value) &&
  23. token.value === character
  24. );
  25. }
  26. export class QueryResults {
  27. tagValues: Record<string, string[]>;
  28. tokens: Token[];
  29. constructor(strTokens: string[]) {
  30. this.tokens = [];
  31. this.tagValues = {};
  32. for (let token of strTokens) {
  33. let tokenState = TokenType.QUERY;
  34. if (isBooleanOp(token)) {
  35. this.addOp(token.toUpperCase());
  36. continue;
  37. }
  38. if (token.startsWith('(')) {
  39. const parenMatch = token.match(/^\(+/g);
  40. if (parenMatch) {
  41. parenMatch[0].split('').map(paren => this.addOp(paren));
  42. token = token.replace(/^\(+/g, '');
  43. }
  44. }
  45. // Traverse the token and determine if it is a tag
  46. // condition or bare words.
  47. for (let i = 0, len = token.length; i < len; i++) {
  48. const char = token[i];
  49. if (i === 0 && (char === '"' || char === ':')) {
  50. break;
  51. }
  52. // We may have entered a tag condition
  53. if (char === ':') {
  54. const nextChar = token[i + 1] || '';
  55. if ([':', ' '].includes(nextChar)) {
  56. tokenState = TokenType.QUERY;
  57. } else {
  58. tokenState = TokenType.TAG;
  59. }
  60. break;
  61. }
  62. }
  63. let trailingParen = '';
  64. if (token.endsWith(')') && !token.includes('(')) {
  65. const parenMatch = token.match(/\)+$/g);
  66. if (parenMatch) {
  67. trailingParen = parenMatch[0];
  68. token = token.replace(/\)+$/g, '');
  69. }
  70. }
  71. if (tokenState === TokenType.QUERY && token.length) {
  72. this.addQuery(token);
  73. } else if (tokenState === TokenType.TAG) {
  74. this.addStringTag(token, false);
  75. }
  76. if (trailingParen !== '') {
  77. trailingParen.split('').map(paren => this.addOp(paren));
  78. }
  79. }
  80. }
  81. formatString() {
  82. const formattedTokens: string[] = [];
  83. for (const token of this.tokens) {
  84. switch (token.type) {
  85. case TokenType.TAG:
  86. if (token.value === '' || token.value === null) {
  87. formattedTokens.push(`${token.key}:""`);
  88. } else if (/[\s\(\)\\"]/g.test(token.value)) {
  89. formattedTokens.push(`${token.key}:"${escapeDoubleQuotes(token.value)}"`);
  90. } else {
  91. formattedTokens.push(`${token.key}:${token.value}`);
  92. }
  93. break;
  94. case TokenType.QUERY:
  95. if (/[\s\(\)\\"]/g.test(token.value)) {
  96. formattedTokens.push(`"${escapeDoubleQuotes(token.value)}"`);
  97. } else {
  98. formattedTokens.push(token.value);
  99. }
  100. break;
  101. default:
  102. formattedTokens.push(token.value);
  103. }
  104. }
  105. return formattedTokens.join(' ').trim();
  106. }
  107. addStringTag(value: string, shouldEscape = true) {
  108. const [key, tag] = formatTag(value);
  109. this.addTagValues(key, [tag], shouldEscape);
  110. return this;
  111. }
  112. addTagValues(tag: string, tagValues: string[], shouldEscape = true) {
  113. for (const t of tagValues) {
  114. // Tag values that we insert through the UI can contain special characters
  115. // that need to escaped. User entered filters should not be escaped.
  116. const escaped = shouldEscape ? escapeTagValue(t) : t;
  117. this.tagValues[tag] = Array.isArray(this.tagValues[tag])
  118. ? [...this.tagValues[tag], escaped]
  119. : [escaped];
  120. const token: Token = {type: TokenType.TAG, key: tag, value: escaped};
  121. this.tokens.push(token);
  122. }
  123. return this;
  124. }
  125. setTagValues(tag: string, tagValues: string[], shouldEscape = true) {
  126. this.removeTag(tag);
  127. this.addTagValues(tag, tagValues, shouldEscape);
  128. return this;
  129. }
  130. getTagValues(tag: string) {
  131. return this.tagValues[tag] ?? [];
  132. }
  133. getTagKeys() {
  134. return Object.keys(this.tagValues);
  135. }
  136. hasTag(tag: string): boolean {
  137. const tags = this.getTagValues(tag);
  138. return !!(tags && tags.length);
  139. }
  140. removeTag(key: string) {
  141. this.tokens = this.tokens.filter(token => token.key !== key);
  142. delete this.tagValues[key];
  143. // Now the really complicated part: removing parens that only have one element in them.
  144. // Since parens are themselves tokens, this gets tricky. In summary, loop through the
  145. // tokens until we find the innermost open paren. Then forward search through the rest of the tokens
  146. // to see if that open paren corresponds to a closed paren with one or fewer items inside.
  147. // If it does, delete those parens, and loop again until there are no more parens to delete.
  148. let parensToDelete: number[] = [];
  149. const cleanParens = (_, idx: number) => !parensToDelete.includes(idx);
  150. do {
  151. if (parensToDelete.length) {
  152. this.tokens = this.tokens.filter(cleanParens);
  153. }
  154. parensToDelete = [];
  155. for (let i = 0; i < this.tokens.length; i++) {
  156. const token = this.tokens[i];
  157. if (!isOp(token) || token.value !== '(') {
  158. continue;
  159. }
  160. let alreadySeen = false;
  161. for (let j = i + 1; j < this.tokens.length; j++) {
  162. const nextToken = this.tokens[j];
  163. if (isOp(nextToken) && nextToken.value === '(') {
  164. // Continue down to the nested parens. We can skip i forward since we know
  165. // everything between i and j is NOT an open paren.
  166. i = j - 1;
  167. break;
  168. } else if (!isOp(nextToken)) {
  169. if (alreadySeen) {
  170. // This has more than one term, no need to delete
  171. break;
  172. }
  173. alreadySeen = true;
  174. } else if (isOp(nextToken) && nextToken.value === ')') {
  175. // We found another paren with zero or one terms inside. Delete the pair.
  176. parensToDelete = [i, j];
  177. break;
  178. }
  179. }
  180. if (parensToDelete.length > 0) {
  181. break;
  182. }
  183. }
  184. } while (parensToDelete.length > 0);
  185. // Now that all erroneous parens are removed we need to remove dangling OR/AND operators.
  186. // I originally removed all the dangling properties in a single loop, but that meant that
  187. // cases like `a OR OR b` would remove both operators, when only one should be removed. So
  188. // instead, we loop until we find an operator to remove, then go back to the start and loop
  189. // again.
  190. let toRemove = -1;
  191. do {
  192. if (toRemove >= 0) {
  193. this.tokens.splice(toRemove, 1);
  194. toRemove = -1;
  195. }
  196. for (let i = 0; i < this.tokens.length; i++) {
  197. const token = this.tokens[i];
  198. const prev = this.tokens[i - 1];
  199. const next = this.tokens[i + 1];
  200. if (isOp(token) && isBooleanOp(token.value)) {
  201. if (prev === undefined || isOp(prev) || next === undefined || isOp(next)) {
  202. // Want to avoid removing `(term) OR (term)`
  203. if (isParen(prev, ')') && isParen(next, '(')) {
  204. continue;
  205. }
  206. toRemove = i;
  207. break;
  208. }
  209. }
  210. }
  211. } while (toRemove >= 0);
  212. return this;
  213. }
  214. removeTagValue(key: string, value: string) {
  215. const values = this.getTagValues(key);
  216. if (Array.isArray(values) && values.length) {
  217. this.setTagValues(
  218. key,
  219. values.filter(item => item !== value)
  220. );
  221. }
  222. }
  223. addQuery(value: string) {
  224. const token: Token = {type: TokenType.QUERY, value: formatQuery(value)};
  225. this.tokens.push(token);
  226. return this;
  227. }
  228. addOp(value: string) {
  229. const token: Token = {type: TokenType.OP, value};
  230. this.tokens.push(token);
  231. return this;
  232. }
  233. get query(): string[] {
  234. return this.tokens.filter(t => t.type === TokenType.QUERY).map(t => t.value);
  235. }
  236. set query(values: string[]) {
  237. this.tokens = this.tokens.filter(t => t.type !== TokenType.QUERY);
  238. for (const v of values) {
  239. this.addQuery(v);
  240. }
  241. }
  242. copy() {
  243. const q = new QueryResults([]);
  244. q.tagValues = {...this.tagValues};
  245. q.tokens = [...this.tokens];
  246. return q;
  247. }
  248. isEmpty() {
  249. return this.tokens.length === 0;
  250. }
  251. }
  252. /**
  253. * Tokenize a search into a QueryResult
  254. *
  255. *
  256. * Should stay in sync with src.sentry.search.utils:tokenize_query
  257. */
  258. export function tokenizeSearch(query: string) {
  259. const tokens = splitSearchIntoTokens(query);
  260. return new QueryResults(tokens);
  261. }
  262. /**
  263. * Splits search strings into tokens for parsing by tokenizeSearch.
  264. *
  265. * Should stay in sync with src.sentry.search.utils:split_query_into_tokens
  266. */
  267. function splitSearchIntoTokens(query: string) {
  268. const queryChars = Array.from(query);
  269. const tokens: string[] = [];
  270. let token = '';
  271. let endOfPrevWord = '';
  272. let quoteType = '';
  273. let quoteEnclosed = false;
  274. for (let idx = 0; idx < queryChars.length; idx++) {
  275. const char = queryChars[idx];
  276. const nextChar = queryChars.length - 1 > idx ? queryChars[idx + 1] : null;
  277. token += char;
  278. if (nextChar !== null && !isSpace(char) && isSpace(nextChar)) {
  279. endOfPrevWord = char;
  280. }
  281. if (isSpace(char) && !quoteEnclosed && endOfPrevWord !== ':' && !isSpace(token)) {
  282. tokens.push(token.trim());
  283. token = '';
  284. }
  285. if (["'", '"'].includes(char) && (!quoteEnclosed || quoteType === char)) {
  286. quoteEnclosed = !quoteEnclosed;
  287. if (quoteEnclosed) {
  288. quoteType = char;
  289. }
  290. }
  291. if (quoteEnclosed && char === '\\' && nextChar === quoteType) {
  292. token += nextChar;
  293. idx++;
  294. }
  295. }
  296. const trimmedToken = token.trim();
  297. if (trimmedToken !== '') {
  298. tokens.push(trimmedToken);
  299. }
  300. return tokens;
  301. }
  302. /**
  303. * Checks if the string is only spaces
  304. */
  305. function isSpace(s: string) {
  306. return s.trim() === '';
  307. }
  308. /**
  309. * Splits tags on ':' and removes enclosing quotes if present, and returns both
  310. * sides of the split as strings.
  311. */
  312. function formatTag(tag: string) {
  313. const idx = tag.indexOf(':');
  314. const key = removeSurroundingQuotes(tag.slice(0, idx));
  315. const value = removeSurroundingQuotes(tag.slice(idx + 1));
  316. return [key, value];
  317. }
  318. function removeSurroundingQuotes(text: string) {
  319. const length = text.length;
  320. if (length <= 1) {
  321. return text;
  322. }
  323. let left = 0;
  324. for (; left <= length / 2; left++) {
  325. if (text.charAt(left) !== '"') {
  326. break;
  327. }
  328. }
  329. let right = length - 1;
  330. for (; right >= length / 2; right--) {
  331. if (text.charAt(right) !== '"' || text.charAt(right - 1) === '\\') {
  332. break;
  333. }
  334. }
  335. return text.slice(left, right + 1);
  336. }
  337. /**
  338. * Strips enclosing quotes and parens from a query, if present.
  339. */
  340. function formatQuery(query: string) {
  341. return query.replace(/^["\(]+|["\)]+$/g, '');
  342. }
  343. /**
  344. * Some characters have special meaning in a tag value. So when they
  345. * are directly added as a tag value, we have to escape them to mean
  346. * the literal.
  347. */
  348. function escapeTagValue(value: string) {
  349. // TODO(txiao): The types here are definitely wrong.
  350. // Need to dig deeper to see where exactly it's wrong.
  351. //
  352. // astericks (*) is used for wildcard searches
  353. return typeof value === 'string' ? value.replace(/([\*])/g, '\\$1') : value;
  354. }