Browse Source

Add tests retry (#6973)

Maxim Yurchuk 7 months ago
parent
commit
39cb9cff24

+ 251 - 204
.github/actions/test_ya/action.yml

@@ -52,6 +52,10 @@ inputs:
     type: boolean
     default: true
     description: "run tests"
+  test_retry_count:
+    type: string
+    default: ""
+    description: "how many times to retry failed tests"
 outputs:
   success:
     value: ${{ steps.build.outputs.status }}
@@ -83,85 +87,13 @@ runs:
         echo "PUBLIC_DIR_URL=$PUBLIC_DIR_URL" >> $GITHUB_ENV
         mkdir -p $PUBLIC_DIR
 
-        export YA_MAKE_OUT_DIR=$TMP_DIR/out
-        echo "YA_MAKE_OUT_DIR=$YA_MAKE_OUT_DIR" >> $GITHUB_ENV
-        mkdir -p $YA_MAKE_OUT_DIR
-
-        echo "JUNIT_REPORT_XML=$PUBLIC_DIR/junit.xml" >> $GITHUB_ENV
+        echo "LAST_JUNIT_REPORT_XML=$PUBLIC_DIR/last_junit.xml" >> $GITHUB_ENV
         echo "TESTMO_URL=${{ inputs.testman_url }}" >> $GITHUB_ENV
         echo "SUMMARY_LINKS=$PUBLIC_DIR/summary_links.txt" >> $GITHUB_ENV
         echo "BUILD_PRESET=${{ inputs.build_preset }}" >> $GITHUB_ENV
-    
-
-    - name: Upload tests result to testmo
-      id: th
-      if: inputs.testman_token && inputs.run_tests
-      shell: bash
-      env:
-        PR_NUMBER: ${{ github.event.number }}
-        TESTMO_TOKEN: ${{ inputs.testman_token }}
-      run: |
-        set -x
-        RUN_URL="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
-        BRANCH_TAG="$GITHUB_REF_NAME"
-        ARCH="${{ runner.arch == 'X64' && 'x86-64' || runner.arch == 'ARM64' && 'arm64' || 'unknown' }}"
-        
-        # install test mo
-        npm install -g @testmo/testmo-cli
-
-        case "$BUILD_PRESET" in
-          relwithdebinfo)
-            TESTMO_SOURCE="ya-${ARCH}"
-            ;;
-          debug)
-            TESTMO_SOURCE="ya-${ARCH}-debug"
-            ;;
-          release-*)
-            TESTMO_SOURCE="ya-${ARCH}-${BUILD_PRESET/release-/}"
-            ;;
-          *)
-            echo "Invalid preset: $BUILD_PRESET"
-            exit 1
-            ;;
-        esac
-        
-        case $GITHUB_EVENT_NAME in
-          workflow_dispatch)
-            TESTMO_RUN_NAME="${{ github.run_id }} manual"
-            EXTRA_TAG="manual"
-            ;;
-          pull_request | pull_request_target)
-            TESTMO_RUN_NAME="${{ github.run_id }} PR #${PR_NUMBER}"
-            EXTRA_TAG="pr"
-            BRANCH_TAG=""
-            ;;
-          schedule)
-            TESTMO_RUN_NAME="${{ github.run_id }} schedule"
-            EXTRA_TAG="schedule"
-            ;;
-          push)
-            TESTMO_RUN_NAME="${{ github.run_id }} POST"
-            EXTRA_TAG="post-commit"
-            ;;
-          *)
-            TESTMO_RUN_NAME="${{ github.run_id }}"
-            EXTRA_TAG=""
-            ;;
-        esac
-        echo "TESTMO_RUN_NAME=$TESTMO_RUN_NAME" >> $GITHUB_ENV 
-        testmo automation:resources:add-link --name build --url "$RUN_URL" --resources testmo.json
-        testmo automation:resources:add-field --name git-sha --type string --value "${GITHUB_SHA:0:7}" --resources testmo.json
-        RUN_ID=$(
-        testmo automation:run:create --instance "$TESTMO_URL" --project-id ${{ inputs.testman_project_id }} \
-          --name "$TESTMO_RUN_NAME" --source "$TESTMO_SOURCE" --resources testmo.json \
-          --tags "$BRANCH_TAG" --tags "$EXTRA_TAG"
-        )
-        echo "runid=${RUN_ID}" >> $GITHUB_OUTPUT
-        TEST_HISTORY_URL="${TESTMO_URL}/automation/runs/view/${RUN_ID}"
-        echo "TEST_HISTORY_URL=$TEST_HISTORY_URL" >> $GITHUB_ENV
 
-        # Print test history link
-        echo "10 [Test history](${TEST_HISTORY_URL})" >> $SUMMARY_LINKS
+        python3 -m pip install ydb ydb[yc] codeowners
+    
 
     - name: set environment variables required by some tests
       if: inputs.run_tests
@@ -184,6 +116,8 @@ runs:
           --test-threads "${{ inputs.test_threads }}" --link-threads "${{ inputs.link_threads }}"
           -DUSE_EAT_MY_DATA
         )
+
+        TEST_RETRY_COUNT=${{ inputs.test_retry_count }}
         
         case "$BUILD_PRESET" in
           debug)
@@ -206,25 +140,39 @@ runs:
               --build "release" --sanitize="address"
               -DDEBUGINFO_LINES_ONLY
             )
+            if [ $TEST_RETRY_COUNT -z ]; then
+              TEST_RETRY_COUNT=1
+            fi
             ;;
           release-tsan)
             params+=(
               --build "release" --sanitize="thread"
               -DDEBUGINFO_LINES_ONLY
             )
+            if [ $TEST_RETRY_COUNT -z ]; then
+              TEST_RETRY_COUNT=1
+            fi
             ;;
           release-msan)
             params+=(
               --build "release" --sanitize="memory"
               -DDEBUGINFO_LINES_ONLY
             )
+            if [ $TEST_RETRY_COUNT -z ]; then
+              TEST_RETRY_COUNT=1
+            fi
             ;;
           *)
             echo "Invalid preset: $BUILD_PRESET"
             exit 1
             ;;
         esac
-        
+
+        if [ $TEST_RETRY_COUNT -z ]; then
+          # default is 3 for ordinary build and 1 for sanitizer builds
+          TEST_RETRY_COUNT=3
+        fi
+    
         if [ ! -z "${{ inputs.additional_ya_make_args }}" ]; then
           params+=(${{ inputs.additional_ya_make_args }})
         fi
@@ -244,32 +192,222 @@ runs:
           params+=(-A)
         fi
 
+        params+=(
+          --stat -DCONSISTENT_DEBUG --no-dir-outputs
+          --test-failure-code 0 --build-all 
+          --cache-size 2TB --force-build-depends
+        )
+
+
+        TESTMO_BRANCH_TAG="$GITHUB_REF_NAME"
+        TESTMO_ARCH="${{ runner.arch == 'X64' && 'x86-64' || runner.arch == 'ARM64' && 'arm64' || 'unknown' }}"
+        TESTMO_PR_NUMBER=${{ github.event.number }}
+        
+        # install testmo
+        npm install -g @testmo/testmo-cli
+
+        case "$BUILD_PRESET" in
+          relwithdebinfo)
+            TESTMO_SOURCE="ya-${TESTMO_ARCH}"
+            ;;
+          debug)
+            TESTMO_SOURCE="ya-${TESTMO_ARCH}-debug"
+            ;;
+          release-*)
+            TESTMO_SOURCE="ya-${TESTMO_ARCH}-${BUILD_PRESET/release-/}"
+            ;;
+          *)
+            echo "Invalid preset: $BUILD_PRESET"
+            exit 1
+            ;;
+        esac
+        
+        case $GITHUB_EVENT_NAME in
+          workflow_dispatch)
+            TESTMO_RUN_NAME="${{ github.run_id }} manual"
+            TESTMO_EXTRA_TAG="manual"
+            ;;
+          pull_request | pull_request_target)
+            TESTMO_RUN_NAME="${{ github.run_id }} PR #${TESTMO_PR_NUMBER}"
+            TESTMO_EXTRA_TAG="pr"
+            TESTMO_BRANCH_TAG=""
+            ;;
+          schedule)
+            TESTMO_RUN_NAME="${{ github.run_id }} schedule"
+            TESTMO_EXTRA_TAG="schedule"
+            ;;
+          push)
+            TESTMO_RUN_NAME="${{ github.run_id }} POST"
+            TESTMO_EXTRA_TAG="post-commit"
+            ;;
+          *)
+            TESTMO_RUN_NAME="${{ github.run_id }}"
+            TESTMO_EXTRA_TAG=""
+            ;;
+        esac
+        echo "TESTMO_RUN_NAME=$TESTMO_RUN_NAME" >> $GITHUB_ENV 
+
         echo "::debug::get version"
         ./ya --version
 
+        YA_MAKE_OUT_DIR=$TMP_DIR/out
+
         YA_MAKE_OUTPUT="$PUBLIC_DIR/ya_make_output.log"
         YA_MAKE_OUTPUT_URL="$PUBLIC_DIR_URL/ya_make_output.log"
-        echo "10 [Ya make output]($YA_MAKE_OUTPUT_URL)" >> $SUMMARY_LINKS
+        echo "20 [Ya make output]($YA_MAKE_OUTPUT_URL)" >> $SUMMARY_LINKS
         echo "YA_MAKE_OUTPUT_URL=$YA_MAKE_OUTPUT_URL" >> $GITHUB_ENV 
 
-        echo "Build+Tests **{platform_name}-${BUILD_PRESET}** is running..." | GITHUB_TOKEN="${{ github.token }}" .github/scripts/tests/comment-pr.py
-        set +ex
-        (./ya make ${{ inputs.build_target }} "${params[@]}" \
-          --stat --log-file "$PUBLIC_DIR/ya_log.log" -DCONSISTENT_DEBUG \
-          --no-dir-outputs --test-failure-code 0 --build-all \
-          --cache-size 2TB --force-build-depends --evlog-file "$PUBLIC_DIR/ya_evlog.jsonl" \
-          --junit "$JUNIT_REPORT_XML" --output "$YA_MAKE_OUT_DIR"; echo $? > exit_code) |& tee $YA_MAKE_OUTPUT
-        set -e
-        RC=`cat exit_code`
-
-
-        if [ $RC -ne 0 ]; then
-          echo "ya make returned $RC, build failed"
-          echo "status=failed" >> $GITHUB_OUTPUT
-        else
+        BUILD_FAILED=0
+
+        for RETRY in $(seq 1 $TEST_RETRY_COUNT)
+        do
+          if [ $RETRY = $TEST_RETRY_COUNT ]; then
+            IS_LAST_RETRY=1
+          else
+            IS_LAST_RETRY=0
+          fi
+
+          if [ $RETRY != 1 ]; then
+            IS_RETRY=1
+          else
+            IS_RETRY=0
+          fi
+
+          if [ ${{ inputs.testman_token }} ]; then
+            # inititalize testmo session
+            TESTMO_RUN_URL="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID_$RETRY"
+            TESTMO_TOKEN=${{ inputs.testman_token }} testmo automation:resources:add-link --name build --url "$TESTMO_RUN_URL" --resources testmo.json
+            TESTMO_TOKEN=${{ inputs.testman_token }} testmo automation:resources:add-field --name git-sha --type string --value "${GITHUB_SHA:0:7}" --resources testmo.json
+            TESTMO_RUN_ID=$(
+              TESTMO_TOKEN=${{ inputs.testman_token }} testmo automation:run:create --instance "$TESTMO_URL" --project-id ${{ inputs.testman_project_id }} \
+                --name "$TESTMO_RUN_NAME" --source "$TESTMO_SOURCE" --resources testmo.json \
+                --tags "$TESTMO_BRANCH_TAG" --tags "$TESTMO_EXTRA_TAG"
+            )
+            echo "runid=${TESTMO_RUN_ID}" >> $GITHUB_OUTPUT
+            TESTMO_HISTORY_URL="${TESTMO_URL}/automation/runs/view/${TESTMO_RUN_ID}"
+
+            # Replace test history link
+            cat $SUMMARY_LINKS | (grep -v "Test history"  || true) > $TMP_DIR/tmp_summary
+            mv $TMP_DIR/tmp_summary $SUMMARY_LINKS
+            echo "10 [Test history](${TESTMO_HISTORY_URL})" >> $SUMMARY_LINKS
+          fi
+
+          CURRENT_MESSAGE="**{platform_name}-${BUILD_PRESET}** is running..." 
+          if [ $IS_RETRY = 0 ]; then
+            CURRENT_MESSAGE="Check $CURRENT_MESSAGE"
+            RERUN_FAILED_TESTS=""
+          else
+            CURRENT_MESSAGE="Failed tests rerun (try $RETRY) $CURRENT_MESSAGE"
+            RERUN_FAILED_OPT="-X"
+          fi
+          echo $CURRENT_MESSAGE | GITHUB_TOKEN="${{ github.token }}" .github/scripts/tests/comment-pr.py
+
+          CURRENT_PUBLIC_DIR_RELATIVE=try_$RETRY
+          CURRENT_PUBLIC_DIR=$PUBLIC_DIR/$CURRENT_PUBLIC_DIR_RELATIVE
+          mkdir $CURRENT_PUBLIC_DIR
+
+          CURRENT_JUNIT_XML_PATH=$CURRENT_PUBLIC_DIR/junit.xml
+          set +ex
+          (./ya make ${{ inputs.build_target }} "${params[@]}" \
+            $RERUN_FAILED_OPT --log-file "$PUBLIC_DIR/ya_log.log"  \
+            --evlog-file "$PUBLIC_DIR/ya_evlog.jsonl" \
+            --junit "$CURRENT_JUNIT_XML_PATH" --output "$YA_MAKE_OUT_DIR"; echo $? > exit_code) |& tee -a $YA_MAKE_OUTPUT
+          set -ex
+          RC=`cat exit_code`
+
+          if [ $RC -ne 0 ]; then
+            echo "ya make returned $RC, build failed"
+            echo "status=failed" >> $GITHUB_OUTPUT
+            BUILD_FAILED=1
+            break
+          fi
+
+          # fix junit files (add links, logs etc)
+          # archive unitest reports (orig) 
+          gzip -c $CURRENT_JUNIT_XML_PATH > $CURRENT_PUBLIC_DIR/orig_junit.xml.gz
+
+          # postprocess junit report
+          .github/scripts/tests/transform-ya-junit.py -i \
+            -m .github/config/muted_ya.txt \
+            --ya_out "$YA_MAKE_OUT_DIR" \
+            --public_dir "$PUBLIC_DIR" \
+            --public_dir_url "$PUBLIC_DIR_URL" \
+            --log_out_dir "$CURRENT_PUBLIC_DIR_RELATIVE/artifacts/logs/" \
+            --test_stuff_out "$CURRENT_PUBLIC_DIR_RELATIVE/test_artifacts/" \
+            "$CURRENT_JUNIT_XML_PATH"
+          cp $CURRENT_JUNIT_XML_PATH $LAST_JUNIT_REPORT_XML
+
+          if [ "${{ inputs.run_tests }}" = "true" ]; then
+            GITHUB_TOKEN=${{ github.token }} .github/scripts/tests/generate-summary.py \
+              --summary_links "$SUMMARY_LINKS" \
+              --public_dir "$PUBLIC_DIR" \
+              --public_dir_url "$PUBLIC_DIR_URL" \
+              --build_preset "$BUILD_PRESET" \
+              --status_report_file statusrep.txt \
+              --is_retry $IS_RETRY \
+              --is_last_retry $IS_LAST_RETRY \
+              "Tests" $CURRENT_PUBLIC_DIR/ya-test.html "$CURRENT_JUNIT_XML_PATH"
+          fi
+            
+          # upload tests results to YDB
+          ydb_upload_run_name="${TESTMO_RUN_NAME// /"_"}"
+          result=`.github/scripts/upload_tests_results.py --test-results-file ${CURRENT_JUNIT_XML_PATH} --run-timestamp $(date +%s) --commit $(git rev-parse HEAD) --build-type ${BUILD_PRESET} --pull $ydb_upload_run_name --job-name "${{ github.workflow }}" --job-id "${{ github.run_id }}" --branch ${GITHUB_REF_NAME}`
+
+          if [ ${{ inputs.testman_token }} ]; then
+            # finish testme session
+
+            # split large junit_report
+            export TESTMO_JUNIT_REPORT_PARTS=$TMP_DIR/junit-split
+            mkdir -p $TESTMO_JUNIT_REPORT_PARTS
+            .github/scripts/tests/split-junit.py -o "$TESTMO_JUNIT_REPORT_PARTS" "$CURRENT_JUNIT_XML_PATH"
+            # archive unitest reports (transformed)
+            tar -C $TESTMO_JUNIT_REPORT_PARTS/.. -czf $PUBLIC_DIR/junit_parts.xml.tar.gz $(basename $TESTMO_JUNIT_REPORT_PARTS)
+
+            TESTMO_PROXY_ADDR=127.0.0.1:8888
+
+            openssl req -x509 -newkey rsa:2048 \
+              -keyout $TMP_DIR/key.pem -out $TMP_DIR/cert.pem \
+              -sha256 -days 1 -nodes -subj "/CN=127.0.0.1"
+            
+            TESTMO_TOKEN=${{ inputs.testman_token }} ./ydb/ci/testmo-proxy/testmo-proxy.py -l $TESTMO_PROXY_ADDR \
+              --cert-file "$TMP_DIR/cert.pem" \
+              --cert-key "$TMP_DIR/key.pem" \
+              --target-timeout 3,10 \
+              --max-request-time 55 \
+              "$TESTMO_URL" &
+            
+            testmo_proxy_pid=$!
+            
+            TESTMO_TOKEN=${{ inputs.testman_token }} NODE_TLS_REJECT_UNAUTHORIZED=0 testmo automation:run:submit-thread \
+              --instance "https://$TESTMO_PROXY_ADDR" --run-id "$TESTMO_RUN_ID" \
+              --results "$TESTMO_JUNIT_REPORT_PARTS/*.xml"
+            
+            kill $testmo_proxy_pid
+
+            TESTMO_TOKEN=${{ inputs.testman_token }} testmo automation:run:complete --instance "$TESTMO_URL" --run-id $TESTMO_RUN_ID
+            echo "runid=" >> $GITHUB_OUTPUT
+          fi
+
+          TESTS_RESULT=0
+          .github/scripts/tests/fail-checker.py "$CURRENT_JUNIT_XML_PATH" --output_path $CURRENT_PUBLIC_DIR/failed_count.txt || TESTS_RESULT=$?
+
+          FAILED_TESTS_COUNT=$(cat $CURRENT_PUBLIC_DIR/failed_count.txt)
+
+          if [ $FAILED_TESTS_COUNT -gt 500 ]; then 
+            TOO_MANY_FAILED="Too many tests failed, NOT going to retry"
+            echo $TOO_MANY_FAILED | GITHUB_TOKEN="${{ github.token }}" .github/scripts/tests/comment-pr.py --fail
+            break
+          fi
+
+          if [ $TESTS_RESULT = 0 ]; then
+            break
+          fi
+        done;
+
+        if [ $BUILD_FAILED = 0 ]; then
           echo "status=true" >> $GITHUB_OUTPUT
         fi
-
+        
 
     - name: comment-build-status
       if: github.event_name == 'pull_request' || github.event_name == 'pull_request_target'
@@ -290,133 +428,42 @@ runs:
             -d '{"state":"success","description":"The check has been completed successfully","context":"build_${{inputs.build_preset}}"}'
           echo "Build successful." | .github/scripts/tests/comment-pr.py --ok
         fi
-
-    - name: process reports
-      if: inputs.run_tests
-      shell: bash
-      run: |
-        set -x
-        # archive unitest reports (orig) 
-        gzip -c $JUNIT_REPORT_XML > $PUBLIC_DIR/orig_junit.xml.gz
-
-        # postprocess junit report
-        .github/scripts/tests/transform-ya-junit.py -i \
-          -m .github/config/muted_ya.txt \
-          --ya_out "$YA_MAKE_OUT_DIR" \
-          --public_dir "$PUBLIC_DIR" \
-          --public_dir_url "$PUBLIC_DIR_URL" \
-          --log_out_dir "artifacts/logs/" \
-          --test_stuff_out "test_artifacts/" \
-          "$JUNIT_REPORT_XML"
     
-    - name: Test history upload results to YDB
-      if: inputs.run_tests
-      continue-on-error: true
-      shell: bash
-      env:
-        PR_NUMBER: ${{ github.event.number }}
-        JOB_NAME: ${{ github.workflow }}
-        JOB_ID: ${{github.run_id }}
-      run: |
-        set -x
-        python3 -m pip install ydb ydb[yc] codeowners
-
-        #Variables
-        echo "JUNIT_REPORT_XML=${JUNIT_REPORT_XML}"
-        echo "BUILD_PRESET=${BUILD_PRESET}"
-        echo "PR_NUMBER=${PR_NUMBER}"
-        echo "JOB_NAME=${JOB_NAME}"
-        echo "JOB_ID=${JOB_ID}"
-        echo "GITHUB_REF_NAME=${GITHUB_REF_NAME}"
 
-        run_name="${TESTMO_RUN_NAME// /"_"}"
-        result=`.github/scripts/upload_tests_results.py --test-results-file ${JUNIT_REPORT_XML} --run-timestamp $(date +%s) --commit $(git rev-parse HEAD) --build-type ${BUILD_PRESET} --pull $run_name --job-name ${JOB_NAME} --job-id ${JOB_ID} --branch ${GITHUB_REF_NAME}`
-
-    - name: Unit test history upload results
-      if: inputs.testman_token && inputs.run_tests
-      shell: bash
-      env:
-        TESTMO_TOKEN: ${{ inputs.testman_token }}
-      run: |
-        set -x
-
-        # split large junit_report
-        export JUNIT_REPORT_PARTS=$TMP_DIR/junit-split
-        mkdir -p $JUNIT_REPORT_PARTS
-        .github/scripts/tests/split-junit.py -o "$JUNIT_REPORT_PARTS" "$JUNIT_REPORT_XML"
-        # archive unitest reports (transformed)
-        tar -C $JUNIT_REPORT_PARTS/.. -czf $PUBLIC_DIR/junit_parts.xml.tar.gz $(basename $JUNIT_REPORT_PARTS)
-
-        PROXY_ADDR=127.0.0.1:8888
-
-        openssl req -x509 -newkey rsa:2048 \
-          -keyout $TMP_DIR/key.pem -out $TMP_DIR/cert.pem \
-          -sha256 -days 1 -nodes -subj "/CN=127.0.0.1"
-        
-        ./ydb/ci/testmo-proxy/testmo-proxy.py -l $PROXY_ADDR \
-          --cert-file "$TMP_DIR/cert.pem" \
-          --cert-key "$TMP_DIR/key.pem" \
-          --target-timeout 3,10 \
-          --max-request-time 55 \
-          "$TESTMO_URL" &
-        
-        proxy_pid=$!
-        
-        NODE_TLS_REJECT_UNAUTHORIZED=0 testmo automation:run:submit-thread \
-          --instance "https://$PROXY_ADDR" --run-id "${{ steps.th.outputs.runid }}" \
-          --results "$JUNIT_REPORT_PARTS/*.xml"
-        
-        kill $proxy_pid
-
-    - name: Test history run complete
-      if: always() && inputs.testman_token && inputs.run_tests
+    - name: Clean up unfinished testmo sessions
+      if: always()
       shell: bash
-      env:
-        TESTMO_TOKEN: ${{ inputs.testman_token }}
       run: |
-        testmo automation:run:complete --instance "$TESTMO_URL" --run-id ${{ steps.th.outputs.runid }}
-    
+        if [ ${{ steps.build.outputs.runid }} ]; then
+          TESTMO_TOKEN=${{ inputs.testman_token }} testmo automation:run:complete --instance "$TESTMO_URL" --run-id ${{ steps.build.outputs.runid }}
+        fi
     - name: analyze tests results
       shell: bash
-      if: always() && inputs.run_tests
       env:
         GITHUB_TOKEN: ${{ github.token }}
       run: |
         set -x
-
-        if [ "${{ inputs.run_tests }}" != "true" ]; then
-          # I have no idea why 'always() && inputs.run_tests' not working 
-          # Probably because of implicit bool->str cast? 
-          exit 0
-        fi
-
-        .github/scripts/tests/generate-summary.py \
-          --summary_links "$SUMMARY_LINKS" \
-          --public_dir "$PUBLIC_DIR" \
-          --public_dir_url "$PUBLIC_DIR_URL" \
-          --build_preset "$BUILD_PRESET" \
-          --status_report_file statusrep.txt \
-          "Tests" ya-test.html "$JUNIT_REPORT_XML"
-
-        teststatus=$(cat statusrep.txt)
-        if [[ $teststatus == "success" ]];then
-          testmessage="The check has been completed successfully"
-        else
-          testmessage="The check has been failed"
-        fi
-        curl -L -X POST -H "Accept: application/vnd.github+json" -H "Authorization: Bearer ${{github.token}}" -H "X-GitHub-Api-Version: 2022-11-28" \
-            https://api.github.com/repos/${{github.repository}}/statuses/${{github.event.pull_request.head.sha}} \
-            -d '{"state":"'$teststatus'","description":"'"$testmessage"'","context":"test_${{inputs.build_preset}}"}'
-        
-        if [[ $teststatus != "success" ]];then
-          echo "status=failed" >> $GITHUB_OUTPUT
+        if [ true = ${{ inputs.run_tests }} ]; then
+          teststatus=$(cat statusrep.txt)
+          if [[ $teststatus == "success" ]];then
+            testmessage="The check has been completed successfully"
+          else
+            testmessage="The check has been failed"
+          fi
+          curl -L -X POST -H "Accept: application/vnd.github+json" -H "Authorization: Bearer ${{github.token}}" -H "X-GitHub-Api-Version: 2022-11-28" \
+              https://api.github.com/repos/${{github.repository}}/statuses/${{github.event.pull_request.head.sha}} \
+              -d '{"state":"'$teststatus'","description":"'"$testmessage"'","context":"test_${{inputs.build_preset}}"}'
+          
+          if [[ $teststatus != "success" ]];then
+            echo "status=failed" >> $GITHUB_OUTPUT
+          fi
         fi
     
     - name: check test results
       if: inputs.run_tests
       shell: bash
       run: |
-        .github/scripts/tests/fail-checker.py "$JUNIT_REPORT_XML"
+        .github/scripts/tests/fail-checker.py "$LAST_JUNIT_REPORT_XML"
 
     - name: sync results to s3 and publish links
       if: always()

+ 23 - 7
.github/scripts/tests/fail-checker.py

@@ -4,7 +4,7 @@ from typing import List
 from junit_utils import iter_xml_files
 
 
-def check_for_fail(paths: List[str]):
+def check_for_fail(paths: List[str], output_path: str):
     failed_list = []
     error_list = []
     for path in paths:
@@ -17,21 +17,37 @@ def check_for_fail(paths: List[str]):
                 failed_list.append((test_name, fn))
             elif is_error:
                 error_list.append((test_name, fn))
+    failed_test_count = 0
 
-    if failed_list or error_list:
+    for t, fn in failed_list:
+        print(f"failure: {t} ({fn})")
+        failed_test_count += 1
+    for t, fn in error_list:
+        print(f"error: {t} ({fn})")
+        failed_test_count += 1
+
+    if output_path:
+        with open(output_path, "w") as f:
+            f.write("{}".format(failed_test_count))
+
+    if failed_test_count:
         print(f"::error::You have failed tests")
-        for t, fn in failed_list:
-            print(f"failure: {t} ({fn})")
-        for t, fn in error_list:
-            print(f"error: {t} ({fn})")
         raise SystemExit(-1)
 
 
 def main():
     parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-o",
+        "--output_path",
+        metavar="OUTPUT",
+        help=(
+            "Output file with count of failed tests"
+        ),
+    )
     parser.add_argument("path", nargs="+", help="jsuite xml reports directories")
     args = parser.parse_args()
-    check_for_fail(args.path)
+    check_for_fail(args.path, args.output_path)
 
 
 if __name__ == "__main__":

+ 36 - 18
.github/scripts/tests/generate-summary.py

@@ -143,9 +143,10 @@ class TestSummaryLine:
 
 
 class TestSummary:
-    def __init__(self):
+    def __init__(self, is_retry: bool):
         self.lines: List[TestSummaryLine] = []
         self.is_failed = False
+        self.is_retry = is_retry
 
     def add_line(self, line: TestSummaryLine):
         self.is_failed |= line.is_failed
@@ -158,7 +159,7 @@ class TestSummary:
     def render_line(self, items):
         return f"| {' | '.join(items)} |"
 
-    def render(self, add_footnote=False):
+    def render(self, add_footnote=False, is_retry=False):
         github_srv = os.environ.get("GITHUB_SERVER_URL", "https://github.com")
         repo = os.environ.get("GITHUB_REPOSITORY", "ydb-platform/ydb")
 
@@ -175,9 +176,9 @@ class TestSummary:
         if need_first_column:
             columns.insert(0, "")
 
-        result = [
-            self.render_line(columns),
-        ]
+        result = []
+
+        result.append(self.render_line(columns))
 
         if need_first_column:
             result.append(self.render_line([':---'] + ['---:'] * (len(columns) - 1)))
@@ -190,7 +191,7 @@ class TestSummary:
             if need_first_column:
                 row.append(line.title)
             row.extend([
-                render_pm(line.test_count, f"{report_url}", 0),
+                render_pm(f"{line.test_count}" + (" (only retried tests)" if self.is_retry else ""), f"{report_url}", 0),
                 render_pm(line.passed, f"{report_url}#PASS", 0),
                 render_pm(line.errors, f"{report_url}#ERROR", 0),
                 render_pm(line.failed, f"{report_url}#FAIL", 0),
@@ -202,8 +203,7 @@ class TestSummary:
         if add_footnote:
             result.append("")
             result.append(f"[^1]: All mute rules are defined [here]({footnote_url}).")
-            
-        result.append("")
+        
         return result
 
 
@@ -274,8 +274,8 @@ def write_summary(summary: TestSummary):
         fp.close()
 
 
-def gen_summary(public_dir, public_dir_url, paths):
-    summary = TestSummary()
+def gen_summary(public_dir, public_dir_url, paths, is_retry: bool):
+    summary = TestSummary(is_retry=is_retry)
 
     for title, html_fn, path in paths:
         summary_line = TestSummaryLine(title)
@@ -286,7 +286,9 @@ def gen_summary(public_dir, public_dir_url, paths):
 
         if not summary_line.tests:
             continue
-
+        
+        if os.path.isabs(html_fn):
+            html_fn = os.path.relpath(html_fn, public_dir)
         report_url = f"{public_dir_url}/{html_fn}"
 
         render_testlist_html(summary_line.tests, os.path.join(public_dir, html_fn))
@@ -296,19 +298,26 @@ def gen_summary(public_dir, public_dir_url, paths):
     return summary
 
 
-def get_comment_text(pr: PullRequest, summary: TestSummary, summary_links: str):
+def get_comment_text(pr: PullRequest, summary: TestSummary, summary_links: str, is_last_retry: bool):
     if summary.is_empty:
         return [
             f"Test run completed, no test results found for commit {pr.head.sha}. "
         ]
     elif summary.is_failed:
         result = f"Some tests failed, follow the links below."
+        if not is_last_retry:
+            result += " Going to retry failed tests..."
     else:
         result = f"Tests successful."
 
-    body = [
-        result
-    ]
+    body = []
+
+    body.append(result)
+
+    if not is_last_retry:
+        body.append("")
+        body.append("<details>")
+        body.append("")
 
     with open(summary_links) as f:
         links = f.readlines()
@@ -319,9 +328,16 @@ def get_comment_text(pr: PullRequest, summary: TestSummary, summary_links: str):
     if links:
         body.append("")
         body.append(" | ".join(links))
-
+    
     body.extend(summary.render())
 
+    if not is_last_retry:
+        body.append("")
+        body.append("</details>")
+        body.append("")
+    else:
+        body.append("")
+
     return body
 
 
@@ -332,6 +348,8 @@ def main():
     parser.add_argument("--summary_links", required=True)
     parser.add_argument('--build_preset', default="default-linux-x86-64-relwithdebinfo", required=False)
     parser.add_argument('--status_report_file', required=False)
+    parser.add_argument('--is_retry', required=True, type=int)
+    parser.add_argument('--is_last_retry', required=True, type=int)
     parser.add_argument("args", nargs="+", metavar="TITLE html_out path")
     args = parser.parse_args()
 
@@ -342,7 +360,7 @@ def main():
     paths = iter(args.args)
     title_path = list(zip(paths, paths, paths))
 
-    summary = gen_summary(args.public_dir, args.public_dir_url, title_path)
+    summary = gen_summary(args.public_dir, args.public_dir_url, title_path, is_retry=bool(args.is_retry))
     write_summary(summary)
 
     if summary.is_empty | summary.is_failed:
@@ -360,7 +378,7 @@ def main():
             event = json.load(fp)
 
         pr = gh.create_from_raw_data(PullRequest, event["pull_request"])
-        text = get_comment_text(pr, summary, args.summary_links)
+        text = get_comment_text(pr, summary, args.summary_links, is_last_retry=bool(args.is_last_retry))
 
         update_pr_comment_text(pr, args.build_preset, run_number, color, text='\n'.join(text), rewrite=False)
 

+ 1 - 0
.github/workflows/acceptance_run.yml

@@ -50,6 +50,7 @@ jobs:
         build_target: "ydb/tests/acceptance"
         increment: false
         run_tests: true
+        test_retry_count: 1
         test_size: small,medium,large
         test_type: unittest,py3test,py2test,pytest
         test_threads: 1

+ 1 - 0
.github/workflows/build_analytics.yml

@@ -49,6 +49,7 @@ jobs:
         build_target: ${{ inputs.build_target || 'ydb/apps/ydbd'}}
         increment: false
         run_tests: false
+        test_retry_count: 1
         put_build_results_to_cache: false
         additional_ya_make_args: "-DDUMP_LINKER_MAP -DCOMPILER_TIME_TRACE --add-result .json"
         secs: ${{ format('{{"TESTMO_TOKEN2":"{0}","AWS_KEY_ID":"{1}","AWS_KEY_VALUE":"{2}","REMOTE_CACHE_USERNAME":"{3}","REMOTE_CACHE_PASSWORD":"{4}"}}',

+ 5 - 0
.github/workflows/build_and_test_ya.yml

@@ -36,6 +36,10 @@ on:
       test_size:
         type: string
         default: "small,medium,large"
+      test_retry_count:
+        type: string
+        default: ""
+        description: "how many times to retry failed tests"
       test_type:
         type: string
         default: ""
@@ -79,6 +83,7 @@ jobs:
         run_tests: ${{ inputs.run_tests }}
         test_size: ${{ inputs.test_size }}
         test_type: ${{ inputs.test_type }}
+        test_retry_count: ${{ inputs.test_retry_count }}
         test_threads: ${{ inputs.test_threads }}
         put_build_results_to_cache: ${{ inputs.put_build_results_to_cache }}
         secs: ${{ format('{{"TESTMO_TOKEN2":"{0}","AWS_KEY_ID":"{1}","AWS_KEY_VALUE":"{2}","REMOTE_CACHE_USERNAME":"{3}","REMOTE_CACHE_PASSWORD":"{4}"}}',

+ 1 - 0
.github/workflows/pr_check.yml

@@ -228,6 +228,7 @@ jobs:
         run_tests: ${{ contains(fromJSON('["relwithdebinfo", "release-asan"]'), matrix.build_preset) }}
         test_size: "small,medium"
         test_threads: 52
+        test_retry_count: 1
         put_build_results_to_cache: true
         secs: ${{ format('{{"TESTMO_TOKEN2":"{0}","AWS_KEY_ID":"{1}","AWS_KEY_VALUE":"{2}","REMOTE_CACHE_USERNAME":"{3}","REMOTE_CACHE_PASSWORD":"{4}"}}',
           secrets.TESTMO_TOKEN2, secrets.AWS_KEY_ID, secrets.AWS_KEY_VALUE, secrets.REMOTE_CACHE_USERNAME, secrets.REMOTE_CACHE_PASSWORD ) }}