tokenizeSearch.tsx 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. import {escapeDoubleQuotes} from 'sentry/utils';
  2. export enum TokenType {
  3. OPERATOR,
  4. FILTER,
  5. FREE_TEXT,
  6. }
  7. export type Token = {
  8. type: TokenType;
  9. value: string;
  10. key?: string;
  11. };
  12. function isOp(t: Token) {
  13. return t.type === TokenType.OPERATOR;
  14. }
  15. function isBooleanOp(value: string) {
  16. return ['OR', 'AND'].includes(value.toUpperCase());
  17. }
  18. function isParen(token: Token, character: '(' | ')') {
  19. return (
  20. token !== undefined &&
  21. isOp(token) &&
  22. ['(', ')'].includes(token.value) &&
  23. token.value === character
  24. );
  25. }
  26. // TODO(epurkhiser): This is legacy from before the existence of
  27. // searchSyntax/parser. We should absolutely replace the internals of this API
  28. // with `parseSearch`.
  29. export class MutableSearch {
  30. tokens: Token[];
  31. /**
  32. * Creates a MutableSearch from a string query
  33. */
  34. constructor(query: string);
  35. /**
  36. * Creates a mutable search query from a list of query parts
  37. */
  38. constructor(queries: string[]);
  39. constructor(tokensOrQuery: string[] | string) {
  40. const strTokens = Array.isArray(tokensOrQuery)
  41. ? tokensOrQuery
  42. : splitSearchIntoTokens(tokensOrQuery);
  43. this.tokens = [];
  44. for (let token of strTokens) {
  45. let tokenState = TokenType.FREE_TEXT;
  46. if (isBooleanOp(token)) {
  47. this.addOp(token.toUpperCase());
  48. continue;
  49. }
  50. if (token.startsWith('(')) {
  51. const parenMatch = token.match(/^\(+/g);
  52. if (parenMatch) {
  53. parenMatch[0].split('').map(paren => this.addOp(paren));
  54. token = token.replace(/^\(+/g, '');
  55. }
  56. }
  57. // Traverse the token and check if it's a filter condition or free text
  58. for (let i = 0, len = token.length; i < len; i++) {
  59. const char = token[i];
  60. if (i === 0 && (char === '"' || char === ':')) {
  61. break;
  62. }
  63. // We may have entered a filter condition
  64. if (char === ':') {
  65. const nextChar = token[i + 1] || '';
  66. if ([':', ' '].includes(nextChar)) {
  67. tokenState = TokenType.FREE_TEXT;
  68. } else {
  69. tokenState = TokenType.FILTER;
  70. }
  71. break;
  72. }
  73. }
  74. let trailingParen = '';
  75. if (token.endsWith(')') && !token.includes('(')) {
  76. const parenMatch = token.match(/\)+$/g);
  77. if (parenMatch) {
  78. trailingParen = parenMatch[0];
  79. token = token.replace(/\)+$/g, '');
  80. }
  81. }
  82. if (tokenState === TokenType.FREE_TEXT && token.length) {
  83. this.addFreeText(token);
  84. } else if (tokenState === TokenType.FILTER) {
  85. this.addStringFilter(token, false);
  86. }
  87. if (trailingParen !== '') {
  88. trailingParen.split('').map(paren => this.addOp(paren));
  89. }
  90. }
  91. }
  92. formatString() {
  93. const formattedTokens: string[] = [];
  94. for (const token of this.tokens) {
  95. switch (token.type) {
  96. case TokenType.FILTER:
  97. if (token.value === '' || token.value === null) {
  98. formattedTokens.push(`${token.key}:""`);
  99. } else if (/[\s\(\)\\"]/g.test(token.value)) {
  100. formattedTokens.push(`${token.key}:"${escapeDoubleQuotes(token.value)}"`);
  101. } else {
  102. formattedTokens.push(`${token.key}:${token.value}`);
  103. }
  104. break;
  105. case TokenType.FREE_TEXT:
  106. if (/[\s\(\)\\"]/g.test(token.value)) {
  107. formattedTokens.push(`"${escapeDoubleQuotes(token.value)}"`);
  108. } else {
  109. formattedTokens.push(token.value);
  110. }
  111. break;
  112. default:
  113. formattedTokens.push(token.value);
  114. }
  115. }
  116. return formattedTokens.join(' ').trim();
  117. }
  118. addStringFilter(filter: string, shouldEscape = true) {
  119. const [key, value] = parseFilter(filter);
  120. this.addFilterValues(key, [value], shouldEscape);
  121. return this;
  122. }
  123. addFilterValues(key: string, values: string[], shouldEscape = true) {
  124. for (const value of values) {
  125. this.addFilterValue(key, value, shouldEscape);
  126. }
  127. return this;
  128. }
  129. addFilterValue(key: string, value: string, shouldEscape = true) {
  130. // Filter values that we insert through the UI can contain special characters
  131. // that need to escaped. User entered filters should not be escaped.
  132. const escaped = shouldEscape ? escapeFilterValue(value) : value;
  133. const token: Token = {type: TokenType.FILTER, key, value: escaped};
  134. this.tokens.push(token);
  135. }
  136. setFilterValues(key: string, values: string[], shouldEscape = true) {
  137. this.removeFilter(key);
  138. this.addFilterValues(key, values, shouldEscape);
  139. return this;
  140. }
  141. get filters() {
  142. type Filters = Record<string, string[]>;
  143. const reducer = (acc: Filters, token: Token) => ({
  144. ...acc,
  145. [token.key!]: [...(acc[token.key!] ?? []), token.value],
  146. });
  147. return this.tokens
  148. .filter(t => t.type === TokenType.FILTER)
  149. .reduce<Filters>(reducer, {});
  150. }
  151. getFilterValues(key: string) {
  152. return this.filters[key] ?? [];
  153. }
  154. getFilterKeys() {
  155. return Object.keys(this.filters);
  156. }
  157. hasFilter(key: string): boolean {
  158. return this.getFilterValues(key).length > 0;
  159. }
  160. removeFilter(key: string) {
  161. const removeErroneousAndOrOps = () => {
  162. let toRemove = -1;
  163. do {
  164. if (toRemove >= 0) {
  165. this.tokens.splice(toRemove, 1);
  166. toRemove = -1;
  167. }
  168. for (let i = 0; i < this.tokens.length; i++) {
  169. const token = this.tokens[i];
  170. const prev = this.tokens[i - 1];
  171. const next = this.tokens[i + 1];
  172. if (isOp(token) && isBooleanOp(token.value)) {
  173. if (prev === undefined || isOp(prev) || next === undefined || isOp(next)) {
  174. // Want to avoid removing `(term) OR (term)` and `term OR (term)`
  175. if (
  176. prev &&
  177. next &&
  178. (isParen(prev, ')') || !isOp(prev)) &&
  179. (isParen(next, '(') || !isOp(next))
  180. ) {
  181. continue;
  182. }
  183. toRemove = i;
  184. break;
  185. }
  186. }
  187. }
  188. } while (toRemove >= 0);
  189. };
  190. this.tokens = this.tokens.filter(token => token.key !== key);
  191. // Remove any AND/OR operators that have become erroneous due to filtering out tokens
  192. removeErroneousAndOrOps();
  193. // Now the really complicated part: removing parens that only have one element in them.
  194. // Since parens are themselves tokens, this gets tricky. In summary, loop through the
  195. // tokens until we find the innermost open paren. Then forward search through the rest of the tokens
  196. // to see if that open paren corresponds to a closed paren with one or fewer items inside.
  197. // If it does, delete those parens, and loop again until there are no more parens to delete.
  198. let parensToDelete: number[] = [];
  199. const cleanParens = (_, idx: number) => !parensToDelete.includes(idx);
  200. do {
  201. if (parensToDelete.length) {
  202. this.tokens = this.tokens.filter(cleanParens);
  203. }
  204. parensToDelete = [];
  205. for (let i = 0; i < this.tokens.length; i++) {
  206. const token = this.tokens[i];
  207. if (!isOp(token) || token.value !== '(') {
  208. continue;
  209. }
  210. let alreadySeen = false;
  211. for (let j = i + 1; j < this.tokens.length; j++) {
  212. const nextToken = this.tokens[j];
  213. if (isOp(nextToken) && nextToken.value === '(') {
  214. // Continue down to the nested parens. We can skip i forward since we know
  215. // everything between i and j is NOT an open paren.
  216. i = j - 1;
  217. break;
  218. } else if (!isOp(nextToken)) {
  219. if (alreadySeen) {
  220. // This has more than one term, no need to delete
  221. break;
  222. }
  223. alreadySeen = true;
  224. } else if (isOp(nextToken) && nextToken.value === ')') {
  225. // We found another paren with zero or one terms inside. Delete the pair.
  226. parensToDelete = [i, j];
  227. break;
  228. }
  229. }
  230. if (parensToDelete.length > 0) {
  231. break;
  232. }
  233. }
  234. } while (parensToDelete.length > 0);
  235. // Now that all erroneous parens are removed we need to remove dangling OR/AND operators.
  236. // I originally removed all the dangling properties in a single loop, but that meant that
  237. // cases like `a OR OR b` would remove both operators, when only one should be removed. So
  238. // instead, we loop until we find an operator to remove, then go back to the start and loop
  239. // again.
  240. removeErroneousAndOrOps();
  241. return this;
  242. }
  243. removeFilterValue(key: string, value: string) {
  244. const values = this.getFilterValues(key);
  245. if (Array.isArray(values) && values.length) {
  246. this.setFilterValues(
  247. key,
  248. values.filter(item => item !== value)
  249. );
  250. }
  251. }
  252. addFreeText(value: string) {
  253. const token: Token = {type: TokenType.FREE_TEXT, value: formatQuery(value)};
  254. this.tokens.push(token);
  255. return this;
  256. }
  257. addOp(value: string) {
  258. const token: Token = {type: TokenType.OPERATOR, value};
  259. this.tokens.push(token);
  260. return this;
  261. }
  262. get freeText(): string[] {
  263. return this.tokens.filter(t => t.type === TokenType.FREE_TEXT).map(t => t.value);
  264. }
  265. set freeText(values: string[]) {
  266. this.tokens = this.tokens.filter(t => t.type !== TokenType.FREE_TEXT);
  267. for (const v of values) {
  268. this.addFreeText(v);
  269. }
  270. }
  271. copy() {
  272. const q = new MutableSearch([]);
  273. q.tokens = [...this.tokens];
  274. return q;
  275. }
  276. isEmpty() {
  277. return this.tokens.length === 0;
  278. }
  279. }
  280. /**
  281. * Splits search strings into tokens for parsing by tokenizeSearch.
  282. *
  283. * Should stay in sync with src.sentry.search.utils:split_query_into_tokens
  284. */
  285. function splitSearchIntoTokens(query: string) {
  286. const queryChars = Array.from(query);
  287. const tokens: string[] = [];
  288. let token = '';
  289. let endOfPrevWord = '';
  290. let quoteType = '';
  291. let quoteEnclosed = false;
  292. for (let idx = 0; idx < queryChars.length; idx++) {
  293. const char = queryChars[idx];
  294. const nextChar = queryChars.length - 1 > idx ? queryChars[idx + 1] : null;
  295. token += char;
  296. if (nextChar !== null && !isSpace(char) && isSpace(nextChar)) {
  297. endOfPrevWord = char;
  298. }
  299. if (isSpace(char) && !quoteEnclosed && endOfPrevWord !== ':' && !isSpace(token)) {
  300. tokens.push(token.trim());
  301. token = '';
  302. }
  303. if (["'", '"'].includes(char) && (!quoteEnclosed || quoteType === char)) {
  304. quoteEnclosed = !quoteEnclosed;
  305. if (quoteEnclosed) {
  306. quoteType = char;
  307. }
  308. }
  309. if (quoteEnclosed && char === '\\' && nextChar === quoteType) {
  310. token += nextChar;
  311. idx++;
  312. }
  313. }
  314. const trimmedToken = token.trim();
  315. if (trimmedToken !== '') {
  316. tokens.push(trimmedToken);
  317. }
  318. return tokens;
  319. }
  320. /**
  321. * Checks if the string is only spaces
  322. */
  323. function isSpace(s: string) {
  324. return s.trim() === '';
  325. }
  326. /**
  327. * Splits a filter on ':' and removes enclosing quotes if present, and returns
  328. * both sides of the split as strings.
  329. */
  330. function parseFilter(filter: string) {
  331. const idx = filter.indexOf(':');
  332. const key = removeSurroundingQuotes(filter.slice(0, idx));
  333. const value = removeSurroundingQuotes(filter.slice(idx + 1));
  334. return [key, value];
  335. }
  336. function removeSurroundingQuotes(text: string) {
  337. const length = text.length;
  338. if (length <= 1) {
  339. return text;
  340. }
  341. let left = 0;
  342. for (; left <= length / 2; left++) {
  343. if (text.charAt(left) !== '"') {
  344. break;
  345. }
  346. }
  347. let right = length - 1;
  348. for (; right >= length / 2; right--) {
  349. if (text.charAt(right) !== '"' || text.charAt(right - 1) === '\\') {
  350. break;
  351. }
  352. }
  353. return text.slice(left, right + 1);
  354. }
  355. /**
  356. * Strips enclosing quotes and parens from a query, if present.
  357. */
  358. function formatQuery(query: string) {
  359. return query.replace(/^["\(]+|["\)]+$/g, '');
  360. }
  361. /**
  362. * Some characters have special meaning in a filter value. So when they are
  363. * directly added as a value, we have to escape them to mean the literal.
  364. */
  365. export function escapeFilterValue(value: string) {
  366. // TODO(txiao): The types here are definitely wrong.
  367. // Need to dig deeper to see where exactly it's wrong.
  368. //
  369. // astericks (*) is used for wildcard searches
  370. return typeof value === 'string' ? value.replace(/([\*])/g, '\\$1') : value;
  371. }