python-tdd/.woodpecker/_retry_failed.sh

#!/usr/bin/env bash
# Usage: bash .woodpecker/_retry_failed.sh <test command args...>
#
# Runs `python manage.py test "$@"`. If any tests fail/error, parses the
# failure labels out of stdout and re-runs ONLY those tests — so a single
# Selenium flake at test 90/93 costs ~22s on retry instead of the full
# 35-minute step.
#
# Django's unittest-based runner prints failures in a predictable shape:
#
#   ERROR: test_method (full.dotted.path.TestClass.test_method)
#   FAIL:  test_method (full.dotted.path.TestClass.test_method)
#
# The dotted path inside the parens is exactly what `manage.py test`
# accepts as a label. We grep for those lines + re-run that list.
#
# Exit semantics:
#   - First run green → exit 0, no retry.
#   - First run failed AND label parse found nothing (crashed before any
#     test reported, e.g. ImportError) → propagate first-run exit code,
#     no retry. Genuine infra problems shouldn't be silently re-run.
#   - First run failed AND labels parsed → retry just those; exit with
#     the retry's exit code. A real (not-flaky) regression fails twice
#     → step still red, with the focused retry log as the authoritative
#     report (no need to scroll past the noisy first-run output).
#
# Run from inside `src/` (Woodpecker preserves cwd across `commands:`,
# so the upstream `cd ./src` carries through).

set +e  # do NOT bail on first failure; we WANT to handle it

LOG=$(mktemp -t ft-retry.XXXXXX.log)
trap 'rm -f "$LOG"' EXIT

echo "──── First run ────"
python manage.py test "$@" 2>&1 | tee "$LOG"
FIRST=${PIPESTATUS[0]}

if [ "$FIRST" -eq 0 ]; then
    exit 0
fi

# Parse failure labels. Match both FAIL: and ERROR: lines; the dotted
# path lives inside the trailing parens. `sort -u` dedupes if a single
# test produces multiple lines (rare but possible).
FAILED=$(grep -E '^(FAIL|ERROR): ' "$LOG" \
         | sed -E 's/^.*\(([^)]+)\)[^()]*$/\1/' \
         | sort -u \
         | tr '\n' ' ')

if [ -z "$FAILED" ]; then
    echo "──── First run failed, but no FAIL/ERROR labels parseable ────"
    echo "──── Not retrying — likely an infra problem, not a test flake ────"
    exit "$FIRST"
fi

NUM=$(echo "$FAILED" | wc -w | tr -d ' ')
echo ""
echo "──── Retry ($NUM failed test(s) from first run) ────"
echo "$FAILED" | tr ' ' '\n' | sed 's/^/  /'
echo "─────────────────────────────────────────────────────"
echo ""

python manage.py test $FAILED
CI: `_retry_failed.sh` wraps both FT steps — single-flake retries cost ~22s instead of a full 35-min step re-run. Parses Django's `FAIL:/ERROR: test_method (full.dotted.path)` lines from stdout, re-runs only those labels (deduped + sorted). Green first runs skip the retry; first-run crashes w. no parseable labels propagate the original exit code without masking infra problems Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-05-20 13:14:26 -04:00			`#!/usr/bin/env bash`
			`# Usage: bash .woodpecker/_retry_failed.sh <test command args...>`
			`#`
			# Runs `python manage.py test "$@"`. If any tests fail/error, parses the
			`# failure labels out of stdout and re-runs ONLY those tests — so a single`
			`# Selenium flake at test 90/93 costs ~22s on retry instead of the full`
			`# 35-minute step.`
			`#`
			`# Django's unittest-based runner prints failures in a predictable shape:`
			`#`
			`# ERROR: test_method (full.dotted.path.TestClass.test_method)`
			`# FAIL: test_method (full.dotted.path.TestClass.test_method)`
			`#`
			# The dotted path inside the parens is exactly what `manage.py test`
			`# accepts as a label. We grep for those lines + re-run that list.`
			`#`
			`# Exit semantics:`
			`# - First run green → exit 0, no retry.`
			`# - First run failed AND label parse found nothing (crashed before any`
			`# test reported, e.g. ImportError) → propagate first-run exit code,`
			`# no retry. Genuine infra problems shouldn't be silently re-run.`
			`# - First run failed AND labels parsed → retry just those; exit with`
			`# the retry's exit code. A real (not-flaky) regression fails twice`
			`# → step still red, with the focused retry log as the authoritative`
			`# report (no need to scroll past the noisy first-run output).`
			`#`
			# Run from inside `src/` (Woodpecker preserves cwd across `commands:`,
			# so the upstream `cd ./src` carries through).

			`set +e # do NOT bail on first failure; we WANT to handle it`

			`LOG=$(mktemp -t ft-retry.XXXXXX.log)`
			`trap 'rm -f "$LOG"' EXIT`

			`echo "──── First run ────"`
			`python manage.py test "$@" 2>&1 \| tee "$LOG"`
			`FIRST=${PIPESTATUS[0]}`

			`if [ "$FIRST" -eq 0 ]; then`
			`exit 0`
			`fi`

			`# Parse failure labels. Match both FAIL: and ERROR: lines; the dotted`
			# path lives inside the trailing parens. `sort -u` dedupes if a single
			`# test produces multiple lines (rare but possible).`
			`FAILED=$(grep -E '^(FAIL\|ERROR): ' "$LOG" \`
			`\| sed -E 's/^.\(([^)]+)\)[^()]$/\1/' \`
			`\| sort -u \`
			`\| tr '\n' ' ')`

			`if [ -z "$FAILED" ]; then`
			`echo "──── First run failed, but no FAIL/ERROR labels parseable ────"`
			`echo "──── Not retrying — likely an infra problem, not a test flake ────"`
			`exit "$FIRST"`
			`fi`

			`NUM=$(echo "$FAILED" \| wc -w \| tr -d ' ')`
			`echo ""`
			`echo "──── Retry ($NUM failed test(s) from first run) ────"`
			`echo "$FAILED" \| tr ' ' '\n' \| sed 's/^/ /'`
			`echo "─────────────────────────────────────────────────────"`
			`echo ""`

			`python manage.py test $FAILED`