Browse Source

feat(apm): Use Jaro–Winkler distance when comparing span descriptions for matching spans (#20224)

Alberto Leal 4 years ago
parent
commit
2aca93d3fe
3 changed files with 78 additions and 13 deletions
  1. 2 1
      package.json
  2. 71 12
      src/sentry/static/sentry/app/views/performance/compare/utils.tsx
  3. 5 0
      yarn.lock

+ 2 - 1
package.json

@@ -20,11 +20,11 @@
     "@emotion/babel-preset-css-prop": "^10.0.27",
     "@emotion/core": "^10.0.27",
     "@emotion/styled": "^10.0.27",
-    "@sentry/tracing": "5.20.1",
     "@sentry/integrations": "5.20.1",
     "@sentry/react": "5.20.1",
     "@sentry/release-parser": "^0.6.0",
     "@sentry/rrweb": "^0.1.1",
+    "@sentry/tracing": "5.20.1",
     "@sentry/utils": "5.20.1",
     "@types/classnames": "^2.2.0",
     "@types/clipboard": "^2.0.1",
@@ -128,6 +128,7 @@
     "webpack": "4.42.1",
     "webpack-cli": "3.3.11",
     "webpack-fix-style-only-entries": "^0.2.0",
+    "wink-jaro-distance": "^2.0.0",
     "zxcvbn": "^4.4.2"
   },
   "devDependencies": {

+ 71 - 12
src/sentry/static/sentry/app/views/performance/compare/utils.tsx

@@ -1,3 +1,5 @@
+import jaro from 'wink-jaro-distance';
+
 import {SentryTransactionEvent} from 'app/types';
 import {RawSpanType, SpanType} from 'app/components/events/interfaces/spans/types';
 import {
@@ -7,6 +9,9 @@ import {
   toPercent,
 } from 'app/components/events/interfaces/spans/utils';
 
+// Minimum threshold score for descriptions that are similar.
+const COMMON_SIMILARITY_DESCRIPTION_THRESHOLD = 0.8;
+
 export function isTransactionEvent(event: any): event is SentryTransactionEvent {
   if (!event) {
     return false;
@@ -124,7 +129,7 @@ export function diffTransactions({
     //
     // baselineSpan and regressionSpan have equivalent depth levels due to the nature of the tree traversal algorithm.
 
-    if (!matchableSpans({baselineSpan, regressionSpan})) {
+    if (matchableSpans({baselineSpan, regressionSpan}) === 0) {
       if (currentSpans.type === 'root') {
         const spanComparisonResults: [DiffSpanType, DiffSpanType] = [
           {
@@ -209,14 +214,16 @@ function createChildPairs({
 
     const candidates = remainingRegressionChildren.reduce(
       (
-        acc: Array<{regressionSpan: SpanType; index: number}>,
+        acc: Array<{regressionSpan: SpanType; index: number; matchScore: number}>,
         regressionSpan: SpanType,
         index: number
       ) => {
-        if (matchableSpans({baselineSpan, regressionSpan})) {
+        const matchScore = matchableSpans({baselineSpan, regressionSpan});
+        if (matchScore !== 0) {
           acc.push({
             regressionSpan,
             index,
+            matchScore,
           });
         }
 
@@ -241,8 +248,8 @@ function createChildPairs({
     );
 
     const {regressionSpan, index} = candidates.reduce((bestCandidate, nextCandidate) => {
-      const {regressionSpan: thisSpan} = bestCandidate;
-      const {regressionSpan: otherSpan} = nextCandidate;
+      const {regressionSpan: thisSpan, matchScore: thisSpanMatchScore} = bestCandidate;
+      const {regressionSpan: otherSpan, matchScore: otherSpanMatchScore} = nextCandidate;
 
       // calculate the deltas of the start timestamps relative to baselineSpan's
       // start timestamp
@@ -264,16 +271,17 @@ function createChildPairs({
       const deltaDurationThisSpan = Math.abs(thisSpanDuration - baselineSpanDuration);
       const deltaDurationOtherSpan = Math.abs(otherSpanDuration - baselineSpanDuration);
 
-      const thisSpanScore = deltaDurationThisSpan + deltaStartTimestampThisSpan;
-      const otherSpanScore = deltaDurationOtherSpan + deltaStartTimestampOtherSpan;
+      const thisSpanScore =
+        deltaDurationThisSpan + deltaStartTimestampThisSpan + (1 - thisSpanMatchScore);
+
+      const otherSpanScore =
+        deltaDurationOtherSpan + deltaStartTimestampOtherSpan + (1 - otherSpanMatchScore);
 
       if (thisSpanScore < otherSpanScore) {
-        // sort thisSpan before otherSpan
         return bestCandidate;
       }
 
       if (thisSpanScore > otherSpanScore) {
-        // sort thisSpan after otherSpan
         return nextCandidate;
       }
 
@@ -318,17 +326,68 @@ function createChildPairs({
   };
 }
 
+function jaroSimilarity(thisString: string, otherString: string): number {
+  // based on https://winkjs.org/wink-distance/string-jaro-winkler.js.html
+  // and https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
+  if (thisString === otherString) {
+    return 1;
+  }
+
+  let jaroDistance: number = jaro(thisString, otherString).distance;
+
+  // Constant scaling factor for how much the score is adjusted upwards for having common prefixes.
+  // This is only used for the Jaro–Winkler Similarity procedure.
+  const scalingFactor = 0.1;
+
+  // boostThreshold is the upper bound threshold of which if the Jaro score was less-than or equal
+  // to boostThreshold, then the Jaro–Winkler Similarity procedure is applied. Otherwise,
+  // 1 - jaroDistance is returned.
+  const boostThreshold = 0.3;
+
+  if (jaroDistance > boostThreshold) {
+    return 1 - jaroDistance;
+  }
+
+  const pLimit = Math.min(thisString.length, otherString.length, 4);
+  let l = 0;
+
+  for (let i = 0; i < pLimit; i += 1) {
+    if (thisString[i] === otherString[i]) {
+      l += 1;
+    } else {
+      break;
+    }
+  }
+
+  jaroDistance -= l * scalingFactor * jaroDistance;
+  return 1 - jaroDistance;
+}
+
 function matchableSpans({
   baselineSpan,
   regressionSpan,
 }: {
   baselineSpan: SpanType;
   regressionSpan: SpanType;
-}): boolean {
+}): number {
   const opNamesEqual = baselineSpan.op === regressionSpan.op;
-  const descriptionsEqual = baselineSpan.description === regressionSpan.description;
 
-  return opNamesEqual && descriptionsEqual;
+  if (!opNamesEqual) {
+    return 0;
+  }
+
+  // remove whitespace and convert string to lower case as the individual characters
+  // adds noise to the edit distance function
+  const baselineDescription = (baselineSpan.description || '')
+    .replace(/\s+/g, '')
+    .toLowerCase();
+  const regressionDescription = (regressionSpan.description || '')
+    .replace(/\s+/g, '')
+    .toLowerCase();
+
+  const score = jaroSimilarity(baselineDescription, regressionDescription);
+
+  return score >= COMMON_SIMILARITY_DESCRIPTION_THRESHOLD ? score : 0;
 }
 
 function generateMergedSpanId({

+ 5 - 0
yarn.lock

@@ -16670,6 +16670,11 @@ window-size@0.1.0:
   resolved "https://registry.yarnpkg.com/window-size/-/window-size-0.1.0.tgz#5438cd2ea93b202efa3a19fe8887aee7c94f9c9d"
   integrity sha1-VDjNLqk7IC76Ohn+iIeu58lPnJ0=
 
+wink-jaro-distance@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/wink-jaro-distance/-/wink-jaro-distance-2.0.0.tgz#96bc3fd69ec5dd48ee74055217f0dcd313148049"
+  integrity sha512-9bcUaXCi9N8iYpGWbFkf83OsBkg17r4hEyxusEzl+nnReLRPqxhB9YNeRn3g54SYnVRNXP029lY3HDsbdxTAuA==
+
 with@^5.0.0:
   version "5.1.1"
   resolved "https://registry.yarnpkg.com/with/-/with-5.1.1.tgz#fa4daa92daf32c4ea94ed453c81f04686b575dfe"