Files
gitea-ci-library/git-pages/files/retention-cleanup.sh
T
moilanik 5c6d4fd636
CI Feature / Load example-gitea-env.conf to pipeline env (push) Successful in 21s
acc-tests Cucumber test report
CI Feature / Cucumber tests (push) Successful in 1m16s
unit-tests Bats test report
CI Feature / Bats tests (push) Successful in 1m37s
CI Feature / Report Summary (push) Successful in 4s
git pages bug fix
2026-06-21 08:34:29 +03:00

266 lines
7.7 KiB
Bash

#!/usr/bin/env bash
set -eo pipefail
PAGES_URL="${PAGES_URL:-http://localhost:3000}"
PAGES_HOST="${PAGES_HOST:?PAGES_HOST is required}"
CONFIG="${RETENTION_CONFIG:-/etc/retention/retention.json}"
GITEA_API_URL="${GITEA_API_URL:-}"
GITEA_TOKEN="${GITEA_TOKEN:-}"
curl_with_host() {
curl -sS -H "Host: ${PAGES_HOST}" "$@"
}
[ -f "$CONFIG" ] || { echo "ERROR: config missing: $CONFIG" >&2; exit 1; }
declare -A BRANCH_CACHE
branch_exists() {
local owner="$1" repo="$2" branch="$3" key="${owner}/${repo}/${branch}"
local status attempt
[ -z "$GITEA_API_URL" ] && return 0
[ -z "$GITEA_TOKEN" ] && return 0
if [ "${BRANCH_CACHE[$key]:-}" = "1" ]; then
return 0
fi
# Retry up to 2 times on API errors (hardcoded)
for attempt in 1 2 3; do
status=$(curl -sS -o /dev/null -w "%{http_code}" \
-H "Authorization: token ${GITEA_TOKEN}" \
"${GITEA_API_URL}/api/v1/repos/${owner}/${repo}/branches/${branch}" 2>/dev/null || echo "000")
if [ "$status" = "200" ]; then
BRANCH_CACHE[$key]=1
return 0
fi
if [ "$status" = "404" ]; then
return 1
fi
# API error - retry if not last attempt
if [ "$attempt" -lt 3 ]; then
sleep 10
continue
fi
done
# All retries failed - keep report (fail-safe)
echo " WARN: Gitea API error for ${owner}/${repo}/${branch} (status ${status}) after 3 attempts - KEEPING report"
BRANCH_CACHE[$key]=1
return 0
}
default_max_age=$(jq -r '.branches.default.maxAgeDays // 90' "$CONFIG")
default_keep_min=$(jq -r '.branches.default.keepMin // 5' "$CONFIG")
rule_max_age() {
local branch="$1" v
v=$(jq -r --arg b "$branch" '.branches[$b].maxAgeDays // empty' "$CONFIG")
[ -n "$v" ] && echo "$v" || echo "$default_max_age"
}
rule_keep_min() {
local branch="$1" v
v=$(jq -r --arg b "$branch" '.branches[$b].keepMin // empty' "$CONFIG")
[ -n "$v" ] && echo "$v" || echo "$default_keep_min"
}
age_days() {
local published="$1" epoch_pub now
epoch_pub=$(date -u -d "$published" +%s 2>/dev/null || echo 0)
[ "$epoch_pub" -eq 0 ] && echo 99999 && return
now=$(date -u +%s)
echo $(( (now - epoch_pub) / 86400 ))
}
parse_path() {
local rel="$1"
OWNER="${rel%%/*}"
rest="${rel#*/}"
REPO="${rest%%/*}"
}
echo "Fetching manifest from ${PAGES_URL}/.git-pages/manifest.json"
MANIFEST=$(curl_with_host "${PAGES_URL}/.git-pages/manifest.json")
echo "Manifest loaded"
META_PATHS=$(echo "$MANIFEST" | jq -r '.contents | to_entries[] | select(.key | test("/reports/")) | select(.key | endswith("/.meta")) | .key' 2>/dev/null || true)
if [ -z "$META_PATHS" ]; then
echo "No .meta files found under /reports/ — nothing to clean"
exit 0
fi
echo ""
echo "=== Phase 1: collect reports ==="
declare -A SEEN_REPORTS
declare -a REPORTS
while IFS= read -r meta_path; do
report_dir=$(dirname "$meta_path")
# Skip duplicates - same report dir already processed
[ -z "${SEEN_REPORTS[$report_dir]:-}" ] || continue
SEEN_REPORTS[$report_dir]=1
parse_path "$report_dir"
meta_content=$(curl_with_host "${PAGES_URL}/${meta_path}" 2>/dev/null || true)
[ -n "$meta_content" ] || { echo " WARN: could not fetch $meta_path"; continue; }
branch=$(echo "$meta_content" | jq -r '.branch // empty' 2>/dev/null || true)
published=$(echo "$meta_content" | jq -r '.published_at // empty' 2>/dev/null || true)
[ -n "$branch" ] || { echo " WARN: no branch in $meta_path"; continue; }
[ -n "$published" ] || { echo " WARN: no published_at in $meta_path"; continue; }
days=$(age_days "$published")
REPORTS+=("${report_dir}|${OWNER}|${REPO}|${branch}|${days}")
echo " ${OWNER}/${REPO} branch=${branch} age=${days}d"
done <<< "$META_PATHS"
[ "${#REPORTS[@]}" -eq 0 ] && { echo "No actionable reports"; exit 0; }
echo ""
echo "=== Phase 2: check branches in Gitea ==="
declare -a TO_DELETE
declare -a KEEP
for entry in "${REPORTS[@]}"; do
IFS='|' read -r dir owner repo branch days <<< "$entry"
if [ -n "$GITEA_API_URL" ] && [ -n "$GITEA_TOKEN" ]; then
if branch_exists "$owner" "$repo" "$branch"; then
echo " BRANCH EXISTS: ${owner}/${repo}/${branch}"
KEEP+=("${dir}|${owner}|${repo}|${branch}|${days}")
else
echo " BRANCH DELETED: ${owner}/${repo}/${branch} -> DELETE"
TO_DELETE+=("$dir")
fi
else
KEEP+=("${dir}|${owner}|${repo}|${branch}|${days}")
fi
done
echo ""
echo "=== Phase 3: apply retention rules to remaining reports ==="
declare -A BRANCH_COUNTS
if [ "${#KEEP[@]}" -gt 0 ]; then
IFS=$'\n'
for entry in $(printf '%s\n' "${KEEP[@]}" | sort -t'|' -k4,4 -k5,5rn); do
IFS='|' read -r dir owner repo branch days <<< "$entry"
max_age=$(rule_max_age "$branch")
keep_min=$(rule_keep_min "$branch")
if [ "$days" -gt "$max_age" ]; then
echo " DELETE: ${dir} (age ${days}d > maxAge ${max_age}d, branch ${branch})"
TO_DELETE+=("$dir")
continue
fi
key="${branch}"
count="${BRANCH_COUNTS[$key]:-0}"
count=$((count + 1))
BRANCH_COUNTS["$key"]=$count
if [ "$count" -gt "$keep_min" ]; then
echo " DELETE: ${dir} (kept ${keep_min}/${count}, exceeds keepMin, branch ${branch})"
TO_DELETE+=("$dir")
fi
done
unset IFS
fi
if [ "${#TO_DELETE[@]}" -eq 0 ]; then
echo "Nothing to delete"
exit 0
fi
echo ""
echo "=== Phase 4: full site rebuild ==="
echo "Rebuilding site (${#TO_DELETE[@]} report(s) to delete)..."
ARCHIVE_FILE=$(mktemp)
SITE_DIR=$(mktemp -d)
NEW_TAR=$(mktemp)
cleanup_phase4() {
rm -f "$ARCHIVE_FILE" "$NEW_TAR"
rm -rf "$SITE_DIR"
}
trap cleanup_phase4 EXIT
# Try archive.tar first
echo "Downloading archive.tar..."
HTTP_CODE=$(curl_with_host -o "$ARCHIVE_FILE" -w "%{http_code}" -sS "${PAGES_URL}/.git-pages/archive.tar")
if [ "$HTTP_CODE" = "200" ] && tar -tf "$ARCHIVE_FILE" >/dev/null 2>&1; then
echo "Extracting archive..."
tar -xf "$ARCHIVE_FILE" -C "$SITE_DIR"
for dir in "${TO_DELETE[@]}"; do
if [ -d "$SITE_DIR/$dir" ]; then
echo " Removing: $dir"
rm -rf "$SITE_DIR/$dir"
fi
done
else
echo "archive.tar failed (HTTP ${HTTP_CODE}) - falling back to manifest-based rebuild"
ALL_PATHS=$(echo "$MANIFEST" | jq -r '.contents | keys[]' 2>/dev/null || true)
if [ -z "$ALL_PATHS" ]; then
echo "ERROR: no files in manifest - cannot rebuild" >&2
exit 1
fi
EXCLUDE_GREP=""
for dir in "${TO_DELETE[@]}"; do
EXCLUDE_GREP="${EXCLUDE_GREP}${EXCLUDE_GREP:+|}^${dir}/"
done
if [ -n "$EXCLUDE_GREP" ]; then
KEEP_PATHS=$(echo "$ALL_PATHS" | grep -v -E "$EXCLUDE_GREP" || true)
else
KEEP_PATHS="$ALL_PATHS"
fi
if [ -z "$KEEP_PATHS" ]; then
echo "No files to keep - site will be empty"
mkdir -p "$SITE_DIR/__placeholder__"
echo "placeholder" > "$SITE_DIR/__placeholder__/index.html"
else
FILE_COUNT=$(echo "$KEEP_PATHS" | wc -l | tr -d ' ')
echo "Downloading ${FILE_COUNT} file(s)..."
while IFS= read -r path; do
[ -z "$path" ] && continue
dir=$(dirname "$SITE_DIR/$path")
mkdir -p "$dir"
curl_with_host -o "$SITE_DIR/$path" -sS "${PAGES_URL}/${path}" || {
echo " WARN: failed to download ${path}"
}
done <<< "$KEEP_PATHS"
fi
fi
if [ -z "$(ls -A "$SITE_DIR" 2>/dev/null)" ]; then
echo "Site is empty - creating placeholder"
mkdir -p "$SITE_DIR/__placeholder__"
echo "placeholder" > "$SITE_DIR/__placeholder__/index.html"
fi
tar -cf "$NEW_TAR" -C "$SITE_DIR" .
echo "PUT: replacing site contents..."
HTTP_CODE=$(curl_with_host -X PUT "${PAGES_URL}/" \
-H "Content-Type: application/x-tar" \
--data-binary @"${NEW_TAR}" \
-w "%{http_code}" \
-o /dev/null)
echo "HTTP ${HTTP_CODE}"
if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "204" ]; then
echo "Site rebuild completed."
else
echo "ERROR: PUT HTTP ${HTTP_CODE}" >&2
exit 1
fi