65 lines
2.5 KiB
Bash
65 lines
2.5 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
# Usage: bash .woodpecker/_retry_failed.sh <test command args...>
|
||
|
|
#
|
||
|
|
# Runs `python manage.py test "$@"`. If any tests fail/error, parses the
|
||
|
|
# failure labels out of stdout and re-runs ONLY those tests — so a single
|
||
|
|
# Selenium flake at test 90/93 costs ~22s on retry instead of the full
|
||
|
|
# 35-minute step.
|
||
|
|
#
|
||
|
|
# Django's unittest-based runner prints failures in a predictable shape:
|
||
|
|
#
|
||
|
|
# ERROR: test_method (full.dotted.path.TestClass.test_method)
|
||
|
|
# FAIL: test_method (full.dotted.path.TestClass.test_method)
|
||
|
|
#
|
||
|
|
# The dotted path inside the parens is exactly what `manage.py test`
|
||
|
|
# accepts as a label. We grep for those lines + re-run that list.
|
||
|
|
#
|
||
|
|
# Exit semantics:
|
||
|
|
# - First run green → exit 0, no retry.
|
||
|
|
# - First run failed AND label parse found nothing (crashed before any
|
||
|
|
# test reported, e.g. ImportError) → propagate first-run exit code,
|
||
|
|
# no retry. Genuine infra problems shouldn't be silently re-run.
|
||
|
|
# - First run failed AND labels parsed → retry just those; exit with
|
||
|
|
# the retry's exit code. A real (not-flaky) regression fails twice
|
||
|
|
# → step still red, with the focused retry log as the authoritative
|
||
|
|
# report (no need to scroll past the noisy first-run output).
|
||
|
|
#
|
||
|
|
# Run from inside `src/` (Woodpecker preserves cwd across `commands:`,
|
||
|
|
# so the upstream `cd ./src` carries through).
|
||
|
|
|
||
|
|
set +e # do NOT bail on first failure; we WANT to handle it
|
||
|
|
|
||
|
|
LOG=$(mktemp -t ft-retry.XXXXXX.log)
|
||
|
|
trap 'rm -f "$LOG"' EXIT
|
||
|
|
|
||
|
|
echo "──── First run ────"
|
||
|
|
python manage.py test "$@" 2>&1 | tee "$LOG"
|
||
|
|
FIRST=${PIPESTATUS[0]}
|
||
|
|
|
||
|
|
if [ "$FIRST" -eq 0 ]; then
|
||
|
|
exit 0
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Parse failure labels. Match both FAIL: and ERROR: lines; the dotted
|
||
|
|
# path lives inside the trailing parens. `sort -u` dedupes if a single
|
||
|
|
# test produces multiple lines (rare but possible).
|
||
|
|
FAILED=$(grep -E '^(FAIL|ERROR): ' "$LOG" \
|
||
|
|
| sed -E 's/^.*\(([^)]+)\)[^()]*$/\1/' \
|
||
|
|
| sort -u \
|
||
|
|
| tr '\n' ' ')
|
||
|
|
|
||
|
|
if [ -z "$FAILED" ]; then
|
||
|
|
echo "──── First run failed, but no FAIL/ERROR labels parseable ────"
|
||
|
|
echo "──── Not retrying — likely an infra problem, not a test flake ────"
|
||
|
|
exit "$FIRST"
|
||
|
|
fi
|
||
|
|
|
||
|
|
NUM=$(echo "$FAILED" | wc -w | tr -d ' ')
|
||
|
|
echo ""
|
||
|
|
echo "──── Retry ($NUM failed test(s) from first run) ────"
|
||
|
|
echo "$FAILED" | tr ' ' '\n' | sed 's/^/ /'
|
||
|
|
echo "─────────────────────────────────────────────────────"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
python manage.py test $FAILED
|