#!/usr/bin/env bash set -euo pipefail DATA_ROOT="${DATA_ROOT:-/app/data}" CONFIG="${RETENTION_CONFIG:-/etc/retention/retention.json}" GITEA_API_URL="${GITEA_API_URL:?GITEA_API_URL is required}" GITEA_TOKEN="${GITEA_TOKEN:?GITEA_TOKEN is required}" [ -d "$DATA_ROOT" ] || { echo "ERROR: data root missing: $DATA_ROOT" >&2; exit 1; } [ -f "$CONFIG" ] || { echo "ERROR: config missing: $CONFIG" >&2; exit 1; } default_max_age=$(jq -r '.default.maxAgeDays' "$CONFIG") default_keep_min=$(jq -r '.default.keepMin' "$CONFIG") gitea_get() { curl -fsS -H "Authorization: token ${GITEA_TOKEN}" \ -H "Accept: application/json" "${GITEA_API_URL}${1}" } branch_names_for_repo() { local owner="$1" repo="$2" page=1 while true; do local resp count resp=$(gitea_get "/api/v1/repos/${owner}/${repo}/branches?limit=50&page=${page}") count=$(echo "$resp" | jq 'length') [ "$count" -eq 0 ] && break echo "$resp" | jq -r '.[].name' [ "$count" -lt 50 ] && break page=$((page + 1)) done } rule_max_age() { local branch="$1" v v=$(jq -r --arg b "$branch" '.branches[$b].maxAgeDays // empty' "$CONFIG") [ -n "$v" ] && echo "$v" || echo "$default_max_age" } rule_keep_min() { local branch="$1" v v=$(jq -r --arg b "$branch" '.branches[$b].keepMin // empty' "$CONFIG") [ -n "$v" ] && echo "$v" || echo "$default_keep_min" } delete_report_dir() { echo "DELETE $1 ($2)" rm -rf "$1" } age_days() { local meta="$1" published epoch_pub now published=$(jq -r '.published_at // empty' "$meta") if [ -z "$published" ]; then epoch_pub=$(stat -c %Y "$(dirname "$meta")" 2>/dev/null || stat -f %m "$(dirname "$meta")") else epoch_pub=$(date -u -d "$published" +%s 2>/dev/null || echo 0) fi now=$(date -u +%s) [ "$epoch_pub" -eq 0 ] && echo 99999 && return echo $(( (now - epoch_pub) / 86400 )) } CACHE_DIR=$(mktemp -d) trap 'rm -rf "$CACHE_DIR"' EXIT active_branches_file() { local owner="$1" repo="$2" local cache="${CACHE_DIR}/${owner}__${repo}.branches" if [ ! -f "$cache" ]; then branch_names_for_repo "$owner" "$repo" | sort -u > "$cache" fi echo "$cache" } branch_active() { grep -Fxq "$2" "$1" } parse_report_path() { local report_dir="$1" rel rest rel="${report_dir#"${DATA_ROOT}/"}" REPO_OWNER="${rel%%/*}" rest="${rel#*/}" REPO_NAME="${rest%%/*}" } # Pass 1: deleted branches (always — no config) while IFS= read -r meta; do report_dir=$(dirname "$meta") parse_report_path "$report_dir" branch=$(jq -r '.branch // empty' "$meta") [ -n "$branch" ] || continue cache=$(active_branches_file "$REPO_OWNER" "$REPO_NAME") if ! branch_active "$cache" "$branch"; then delete_report_dir "$report_dir" "branch deleted: ${branch}" fi done < <(find "$DATA_ROOT" -path '*/reports/*/.meta' -type f 2>/dev/null) # Pass 2: age + keepMin for active branches (newest first) SURVIVORS="${CACHE_DIR}/survivors.tsv" : > "$SURVIVORS" while IFS= read -r meta; do report_dir=$(dirname "$meta") [ -d "$report_dir" ] || continue parse_report_path "$report_dir" branch=$(jq -r '.branch // empty' "$meta") [ -n "$branch" ] || continue cache=$(active_branches_file "$REPO_OWNER" "$REPO_NAME") branch_active "$cache" "$branch" || continue printf '%s\t%s\t%s\t%s\n' "${REPO_OWNER}/${REPO_NAME}" "$branch" "$report_dir" "$(age_days "$meta")" >> "$SURVIVORS" done < <(find "$DATA_ROOT" -path '*/reports/*/.meta' -type f 2>/dev/null) current_key="" kept=0 while IFS=$'\t' read -r repo_key branch report_dir days; do key="${repo_key}|${branch}" if [ "$key" != "$current_key" ]; then current_key="$key" kept=0 max_age=$(rule_max_age "$branch") keep_min=$(rule_keep_min "$branch") fi if [ "$days" -gt "$max_age" ]; then delete_report_dir "$report_dir" "older than ${max_age}d (branch ${branch})" continue fi kept=$((kept + 1)) if [ "$kept" -gt "$keep_min" ]; then delete_report_dir "$report_dir" "exceeds keepMin ${keep_min} (branch ${branch})" fi done < <(sort -t $'\t' -k1,1 -k2,2 -k4,4n "$SURVIVORS") echo "Retention cleanup finished."