tokenizeSearch.tsx 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. import {escapeDoubleQuotes} from 'sentry/utils';
  2. export const ALLOWED_WILDCARD_FIELDS = [
  3. 'span.description',
  4. 'span.domain',
  5. 'span.status_code',
  6. ];
  7. export const EMPTY_OPTION_VALUE = '(empty)' as const;
  8. export enum TokenType {
  9. OPERATOR,
  10. FILTER,
  11. FREE_TEXT,
  12. }
  13. export type Token = {
  14. type: TokenType;
  15. value: string;
  16. key?: string;
  17. };
  18. function isOp(t: Token) {
  19. return t.type === TokenType.OPERATOR;
  20. }
  21. function isBooleanOp(value: string) {
  22. return ['OR', 'AND'].includes(value.toUpperCase());
  23. }
  24. function isParen(token: Token, character: '(' | ')') {
  25. return (
  26. token !== undefined &&
  27. isOp(token) &&
  28. ['(', ')'].includes(token.value) &&
  29. token.value === character
  30. );
  31. }
  32. // TODO(epurkhiser): This is legacy from before the existence of
  33. // searchSyntax/parser. We should absolutely replace the internals of this API
  34. // with `parseSearch`.
  35. export class MutableSearch {
  36. tokens: Token[];
  37. /**
  38. * Creates a `MutableSearch` from a key-value mapping of field:value.
  39. * This construct doesn't support conditions like `OR` and `AND` or
  40. * parentheses, so it's only useful for simple queries.
  41. * @param params
  42. * @returns {MutableSearch}
  43. */
  44. static fromQueryObject(params: {
  45. [key: string]: string[] | string | number | undefined;
  46. }): MutableSearch {
  47. const query = new MutableSearch('');
  48. Object.entries(params).forEach(([key, value]) => {
  49. if (!value) {
  50. return;
  51. }
  52. if (value === EMPTY_OPTION_VALUE) {
  53. query.addFilterValue('!has', key);
  54. } else if (Array.isArray(value)) {
  55. query.addFilterValues(key, value, !ALLOWED_WILDCARD_FIELDS.includes(key));
  56. } else {
  57. query.addFilterValue(
  58. key,
  59. value.toString(),
  60. !ALLOWED_WILDCARD_FIELDS.includes(key)
  61. );
  62. }
  63. });
  64. return query;
  65. }
  66. /**
  67. * Creates a MutableSearch from a string query
  68. */
  69. constructor(query: string);
  70. /**
  71. * Creates a mutable search query from a list of query parts
  72. */
  73. constructor(queries: string[]);
  74. constructor(tokensOrQuery: string[] | string) {
  75. const strTokens = Array.isArray(tokensOrQuery)
  76. ? tokensOrQuery
  77. : splitSearchIntoTokens(tokensOrQuery);
  78. this.tokens = [];
  79. for (let token of strTokens) {
  80. let tokenState = TokenType.FREE_TEXT;
  81. if (isBooleanOp(token)) {
  82. this.addOp(token.toUpperCase());
  83. continue;
  84. }
  85. if (token.startsWith('(')) {
  86. const parenMatch = token.match(/^\(+/g);
  87. if (parenMatch) {
  88. parenMatch[0].split('').map(paren => this.addOp(paren));
  89. token = token.replace(/^\(+/g, '');
  90. }
  91. }
  92. // Traverse the token and check if it's a filter condition or free text
  93. for (let i = 0, len = token.length; i < len; i++) {
  94. const char = token[i];
  95. if (i === 0 && (char === '"' || char === ':')) {
  96. break;
  97. }
  98. // We may have entered a filter condition
  99. if (char === ':') {
  100. const nextChar = token[i + 1] || '';
  101. if ([':', ' '].includes(nextChar)) {
  102. tokenState = TokenType.FREE_TEXT;
  103. } else {
  104. tokenState = TokenType.FILTER;
  105. }
  106. break;
  107. }
  108. }
  109. let trailingParen = '';
  110. if (token.endsWith(')') && !token.includes('(')) {
  111. const parenMatch = token.match(/\)+$/g);
  112. if (parenMatch) {
  113. trailingParen = parenMatch[0];
  114. token = token.replace(/\)+$/g, '');
  115. }
  116. }
  117. if (tokenState === TokenType.FREE_TEXT && token.length) {
  118. this.addFreeText(token);
  119. } else if (tokenState === TokenType.FILTER) {
  120. this.addStringFilter(token, false);
  121. }
  122. if (trailingParen !== '') {
  123. trailingParen.split('').map(paren => this.addOp(paren));
  124. }
  125. }
  126. }
  127. formatString() {
  128. const formattedTokens: string[] = [];
  129. for (const token of this.tokens) {
  130. switch (token.type) {
  131. case TokenType.FILTER:
  132. if (token.value === '' || token.value === null) {
  133. formattedTokens.push(`${token.key}:""`);
  134. } else if (/[\s\(\)\\"]/g.test(token.value)) {
  135. formattedTokens.push(`${token.key}:"${escapeDoubleQuotes(token.value)}"`);
  136. } else {
  137. formattedTokens.push(`${token.key}:${token.value}`);
  138. }
  139. break;
  140. case TokenType.FREE_TEXT:
  141. if (/[\s\(\)\\"]/g.test(token.value)) {
  142. formattedTokens.push(`"${escapeDoubleQuotes(token.value)}"`);
  143. } else {
  144. formattedTokens.push(token.value);
  145. }
  146. break;
  147. default:
  148. formattedTokens.push(token.value);
  149. }
  150. }
  151. return formattedTokens.join(' ').trim();
  152. }
  153. addStringFilter(filter: string, shouldEscape = true) {
  154. const [key, value] = parseFilter(filter);
  155. this.addFilterValues(key, [value], shouldEscape);
  156. return this;
  157. }
  158. addFilterValues(key: string, values: string[], shouldEscape = true) {
  159. for (const value of values) {
  160. this.addFilterValue(key, value, shouldEscape);
  161. }
  162. return this;
  163. }
  164. addFilterValue(key: string, value: string, shouldEscape = true) {
  165. // Filter values that we insert through the UI can contain special characters
  166. // that need to escaped. User entered filters should not be escaped.
  167. const escaped = shouldEscape ? escapeFilterValue(value) : value;
  168. const token: Token = {type: TokenType.FILTER, key, value: escaped};
  169. this.tokens.push(token);
  170. }
  171. setFilterValues(key: string, values: string[], shouldEscape = true) {
  172. this.removeFilter(key);
  173. this.addFilterValues(key, values, shouldEscape);
  174. return this;
  175. }
  176. get filters() {
  177. type Filters = Record<string, string[]>;
  178. const reducer = (acc: Filters, token: Token) => ({
  179. ...acc,
  180. [token.key!]: [...(acc[token.key!] ?? []), token.value],
  181. });
  182. return this.tokens
  183. .filter(t => t.type === TokenType.FILTER)
  184. .reduce<Filters>(reducer, {});
  185. }
  186. getFilterValues(key: string) {
  187. return this.filters[key] ?? [];
  188. }
  189. getFilterKeys() {
  190. return Object.keys(this.filters);
  191. }
  192. hasFilter(key: string): boolean {
  193. return this.getFilterValues(key).length > 0;
  194. }
  195. removeFilter(key: string) {
  196. const removeErroneousAndOrOps = () => {
  197. let toRemove = -1;
  198. do {
  199. if (toRemove >= 0) {
  200. this.tokens.splice(toRemove, 1);
  201. toRemove = -1;
  202. }
  203. for (let i = 0; i < this.tokens.length; i++) {
  204. const token = this.tokens[i];
  205. const prev = this.tokens[i - 1];
  206. const next = this.tokens[i + 1];
  207. if (isOp(token) && isBooleanOp(token.value)) {
  208. if (prev === undefined || isOp(prev) || next === undefined || isOp(next)) {
  209. // Want to avoid removing `(term) OR (term)` and `term OR (term)`
  210. if (
  211. prev &&
  212. next &&
  213. (isParen(prev, ')') || !isOp(prev)) &&
  214. (isParen(next, '(') || !isOp(next))
  215. ) {
  216. continue;
  217. }
  218. toRemove = i;
  219. break;
  220. }
  221. }
  222. }
  223. } while (toRemove >= 0);
  224. };
  225. this.tokens = this.tokens.filter(token => token.key !== key);
  226. // Remove any AND/OR operators that have become erroneous due to filtering out tokens
  227. removeErroneousAndOrOps();
  228. // Now the really complicated part: removing parens that only have one element in them.
  229. // Since parens are themselves tokens, this gets tricky. In summary, loop through the
  230. // tokens until we find the innermost open paren. Then forward search through the rest of the tokens
  231. // to see if that open paren corresponds to a closed paren with one or fewer items inside.
  232. // If it does, delete those parens, and loop again until there are no more parens to delete.
  233. let parensToDelete: number[] = [];
  234. const cleanParens = (_, idx: number) => !parensToDelete.includes(idx);
  235. do {
  236. if (parensToDelete.length) {
  237. this.tokens = this.tokens.filter(cleanParens);
  238. }
  239. parensToDelete = [];
  240. for (let i = 0; i < this.tokens.length; i++) {
  241. const token = this.tokens[i];
  242. if (!isOp(token) || token.value !== '(') {
  243. continue;
  244. }
  245. let alreadySeen = false;
  246. for (let j = i + 1; j < this.tokens.length; j++) {
  247. const nextToken = this.tokens[j];
  248. if (isOp(nextToken) && nextToken.value === '(') {
  249. // Continue down to the nested parens. We can skip i forward since we know
  250. // everything between i and j is NOT an open paren.
  251. i = j - 1;
  252. break;
  253. } else if (!isOp(nextToken)) {
  254. if (alreadySeen) {
  255. // This has more than one term, no need to delete
  256. break;
  257. }
  258. alreadySeen = true;
  259. } else if (isOp(nextToken) && nextToken.value === ')') {
  260. // We found another paren with zero or one terms inside. Delete the pair.
  261. parensToDelete = [i, j];
  262. break;
  263. }
  264. }
  265. if (parensToDelete.length > 0) {
  266. break;
  267. }
  268. }
  269. } while (parensToDelete.length > 0);
  270. // Now that all erroneous parens are removed we need to remove dangling OR/AND operators.
  271. // I originally removed all the dangling properties in a single loop, but that meant that
  272. // cases like `a OR OR b` would remove both operators, when only one should be removed. So
  273. // instead, we loop until we find an operator to remove, then go back to the start and loop
  274. // again.
  275. removeErroneousAndOrOps();
  276. return this;
  277. }
  278. removeFilterValue(key: string, value: string) {
  279. const values = this.getFilterValues(key);
  280. if (Array.isArray(values) && values.length) {
  281. this.setFilterValues(
  282. key,
  283. values.filter(item => item !== value)
  284. );
  285. }
  286. return this;
  287. }
  288. addFreeText(value: string) {
  289. const token: Token = {type: TokenType.FREE_TEXT, value: formatQuery(value)};
  290. this.tokens.push(token);
  291. return this;
  292. }
  293. addOp(value: string) {
  294. const token: Token = {type: TokenType.OPERATOR, value};
  295. this.tokens.push(token);
  296. return this;
  297. }
  298. get freeText(): string[] {
  299. return this.tokens.filter(t => t.type === TokenType.FREE_TEXT).map(t => t.value);
  300. }
  301. set freeText(values: string[]) {
  302. this.tokens = this.tokens.filter(t => t.type !== TokenType.FREE_TEXT);
  303. for (const v of values) {
  304. this.addFreeText(v);
  305. }
  306. }
  307. copy() {
  308. const q = new MutableSearch([]);
  309. q.tokens = [...this.tokens];
  310. return q;
  311. }
  312. isEmpty() {
  313. return this.tokens.length === 0;
  314. }
  315. }
  316. /**
  317. * Splits search strings into tokens for parsing by tokenizeSearch.
  318. *
  319. * Should stay in sync with src.sentry.search.utils:split_query_into_tokens
  320. */
  321. function splitSearchIntoTokens(query: string) {
  322. const queryChars = Array.from(query);
  323. const tokens: string[] = [];
  324. let token = '';
  325. let endOfPrevWord = '';
  326. let quoteType = '';
  327. let quoteEnclosed = false;
  328. for (let idx = 0; idx < queryChars.length; idx++) {
  329. const char = queryChars[idx];
  330. const nextChar = queryChars.length - 1 > idx ? queryChars[idx + 1] : null;
  331. token += char;
  332. if (nextChar !== null && !isSpace(char) && isSpace(nextChar)) {
  333. endOfPrevWord = char;
  334. }
  335. if (isSpace(char) && !quoteEnclosed && endOfPrevWord !== ':' && !isSpace(token)) {
  336. tokens.push(token.trim());
  337. token = '';
  338. }
  339. if (["'", '"'].includes(char) && (!quoteEnclosed || quoteType === char)) {
  340. quoteEnclosed = !quoteEnclosed;
  341. if (quoteEnclosed) {
  342. quoteType = char;
  343. }
  344. }
  345. if (quoteEnclosed && char === '\\' && nextChar === quoteType) {
  346. token += nextChar;
  347. idx++;
  348. }
  349. }
  350. const trimmedToken = token.trim();
  351. if (trimmedToken !== '') {
  352. tokens.push(trimmedToken);
  353. }
  354. return tokens;
  355. }
  356. /**
  357. * Checks if the string is only spaces
  358. */
  359. function isSpace(s: string) {
  360. return s.trim() === '';
  361. }
  362. /**
  363. * Splits a filter on ':' and removes enclosing quotes if present, and returns
  364. * both sides of the split as strings.
  365. */
  366. function parseFilter(filter: string) {
  367. const idx = filter.indexOf(':');
  368. const key = removeSurroundingQuotes(filter.slice(0, idx));
  369. const value = removeSurroundingQuotes(filter.slice(idx + 1));
  370. return [key, value];
  371. }
  372. function removeSurroundingQuotes(text: string) {
  373. const length = text.length;
  374. if (length <= 1) {
  375. return text;
  376. }
  377. let left = 0;
  378. for (; left <= length / 2; left++) {
  379. if (text.charAt(left) !== '"') {
  380. break;
  381. }
  382. }
  383. let right = length - 1;
  384. for (; right >= length / 2; right--) {
  385. if (text.charAt(right) !== '"' || text.charAt(right - 1) === '\\') {
  386. break;
  387. }
  388. }
  389. return text.slice(left, right + 1);
  390. }
  391. /**
  392. * Strips enclosing quotes and parens from a query, if present.
  393. */
  394. function formatQuery(query: string) {
  395. return query.replace(/^["\(]+|["\)]+$/g, '');
  396. }
  397. /**
  398. * Some characters have special meaning in a filter value. So when they are
  399. * directly added as a value, we have to escape them to mean the literal.
  400. */
  401. export function escapeFilterValue(value: string) {
  402. // TODO(txiao): The types here are definitely wrong.
  403. // Need to dig deeper to see where exactly it's wrong.
  404. //
  405. // astericks (*) is used for wildcard searches
  406. return typeof value === 'string' ? value.replace(/([\*])/g, '\\$1') : value;
  407. }