- ci-engine.yml: 2 dummy test stepiä + agnostinen publish-stage (skannaa .meta-tiedostot, PATCH raportit, postaa status + linkki) - publish-git-pages.sh: palauta BASE URL (ilman index.html) - .meta-formaatti: lisää context, description, state kentät
This commit is contained in:
@@ -1,34 +1,43 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
set -eo pipefail
|
||||
|
||||
DATA_ROOT="${DATA_ROOT:-/app/data}"
|
||||
PAGES_URL="${PAGES_URL:-http://localhost:3000}"
|
||||
PAGES_HOST="${PAGES_HOST:?PAGES_HOST is required}"
|
||||
CONFIG="${RETENTION_CONFIG:-/etc/retention/retention.json}"
|
||||
GITEA_API_URL="${GITEA_API_URL:?GITEA_API_URL is required}"
|
||||
GITEA_TOKEN="${GITEA_TOKEN:?GITEA_TOKEN is required}"
|
||||
GITEA_API_URL="${GITEA_API_URL:-}"
|
||||
GITEA_TOKEN="${GITEA_TOKEN:-}"
|
||||
|
||||
curl_with_host() {
|
||||
curl -sS -H "Host: ${PAGES_HOST}" "$@"
|
||||
}
|
||||
|
||||
[ -d "$DATA_ROOT" ] || { echo "ERROR: data root missing: $DATA_ROOT" >&2; exit 1; }
|
||||
[ -f "$CONFIG" ] || { echo "ERROR: config missing: $CONFIG" >&2; exit 1; }
|
||||
|
||||
default_max_age=$(jq -r '.default.maxAgeDays' "$CONFIG")
|
||||
default_keep_min=$(jq -r '.default.keepMin' "$CONFIG")
|
||||
BRANCH_CACHE=""
|
||||
branch_exists() {
|
||||
local owner="$1" repo="$2" branch="$3" key="${owner}/${repo}/${branch}"
|
||||
local status
|
||||
|
||||
gitea_get() {
|
||||
curl -fsS -H "Authorization: token ${GITEA_TOKEN}" \
|
||||
-H "Accept: application/json" "${GITEA_API_URL}${1}"
|
||||
[ -z "$GITEA_API_URL" ] && return 0
|
||||
[ -z "$GITEA_TOKEN" ] && return 0
|
||||
|
||||
if grep -q "^${key}$" <<< "$BRANCH_CACHE" 2>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
status=$(curl -sS -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
"${GITEA_API_URL}/api/v1/repos/${owner}/${repo}/branches/${branch}" 2>/dev/null || echo "000")
|
||||
|
||||
if [ "$status" = "200" ]; then
|
||||
BRANCH_CACHE="${BRANCH_CACHE}${key}"$'\n'
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
branch_names_for_repo() {
|
||||
local owner="$1" repo="$2" page=1
|
||||
while true; do
|
||||
local resp count
|
||||
resp=$(gitea_get "/api/v1/repos/${owner}/${repo}/branches?limit=50&page=${page}")
|
||||
count=$(echo "$resp" | jq 'length')
|
||||
[ "$count" -eq 0 ] && break
|
||||
echo "$resp" | jq -r '.[].name'
|
||||
[ "$count" -lt 50 ] && break
|
||||
page=$((page + 1))
|
||||
done
|
||||
}
|
||||
default_max_age=$(jq -r '.branches.default.maxAgeDays // 90' "$CONFIG")
|
||||
default_keep_min=$(jq -r '.branches.default.keepMin // 5' "$CONFIG")
|
||||
|
||||
rule_max_age() {
|
||||
local branch="$1" v
|
||||
@@ -42,99 +51,147 @@ rule_keep_min() {
|
||||
[ -n "$v" ] && echo "$v" || echo "$default_keep_min"
|
||||
}
|
||||
|
||||
delete_report_dir() {
|
||||
echo "DELETE $1 ($2)"
|
||||
rm -rf "$1"
|
||||
}
|
||||
|
||||
age_days() {
|
||||
local meta="$1" published epoch_pub now
|
||||
published=$(jq -r '.published_at // empty' "$meta")
|
||||
if [ -z "$published" ]; then
|
||||
epoch_pub=$(stat -c %Y "$(dirname "$meta")" 2>/dev/null || stat -f %m "$(dirname "$meta")")
|
||||
else
|
||||
epoch_pub=$(date -u -d "$published" +%s 2>/dev/null || echo 0)
|
||||
fi
|
||||
now=$(date -u +%s)
|
||||
local published="$1" epoch_pub now
|
||||
epoch_pub=$(date -u -d "$published" +%s 2>/dev/null || echo 0)
|
||||
[ "$epoch_pub" -eq 0 ] && echo 99999 && return
|
||||
now=$(date -u +%s)
|
||||
echo $(( (now - epoch_pub) / 86400 ))
|
||||
}
|
||||
|
||||
CACHE_DIR=$(mktemp -d)
|
||||
trap 'rm -rf "$CACHE_DIR"' EXIT
|
||||
|
||||
active_branches_file() {
|
||||
local owner="$1" repo="$2"
|
||||
local cache="${CACHE_DIR}/${owner}__${repo}.branches"
|
||||
if [ ! -f "$cache" ]; then
|
||||
branch_names_for_repo "$owner" "$repo" | sort -u > "$cache"
|
||||
fi
|
||||
echo "$cache"
|
||||
}
|
||||
|
||||
branch_active() {
|
||||
grep -Fxq "$2" "$1"
|
||||
}
|
||||
|
||||
parse_report_path() {
|
||||
local report_dir="$1" rel rest
|
||||
rel="${report_dir#"${DATA_ROOT}/"}"
|
||||
REPO_OWNER="${rel%%/*}"
|
||||
parse_path() {
|
||||
local rel="$1"
|
||||
OWNER="${rel%%/*}"
|
||||
rest="${rel#*/}"
|
||||
REPO_NAME="${rest%%/*}"
|
||||
REPO="${rest%%/*}"
|
||||
}
|
||||
|
||||
# Pass 1: deleted branches (always — no config)
|
||||
while IFS= read -r meta; do
|
||||
report_dir=$(dirname "$meta")
|
||||
parse_report_path "$report_dir"
|
||||
branch=$(jq -r '.branch // empty' "$meta")
|
||||
[ -n "$branch" ] || continue
|
||||
echo "Fetching manifest from ${PAGES_URL}/.git-pages/manifest.json"
|
||||
MANIFEST=$(curl_with_host "${PAGES_URL}/.git-pages/manifest.json")
|
||||
echo "Manifest loaded"
|
||||
|
||||
cache=$(active_branches_file "$REPO_OWNER" "$REPO_NAME")
|
||||
if ! branch_active "$cache" "$branch"; then
|
||||
delete_report_dir "$report_dir" "branch deleted: ${branch}"
|
||||
META_PATHS=$(echo "$MANIFEST" | jq -r '.contents | to_entries[] | select(.key | test("/reports/")) | select(.key | endswith("/.meta")) | .key' 2>/dev/null || true)
|
||||
|
||||
if [ -z "$META_PATHS" ]; then
|
||||
echo "No .meta files found under /reports/ — nothing to clean"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Phase 1: collect reports ==="
|
||||
declare -a REPORTS
|
||||
while IFS= read -r meta_path; do
|
||||
report_dir=$(dirname "$meta_path")
|
||||
parse_path "$report_dir"
|
||||
meta_content=$(curl_with_host "${PAGES_URL}/${meta_path}" 2>/dev/null || true)
|
||||
[ -n "$meta_content" ] || { echo " WARN: could not fetch $meta_path"; continue; }
|
||||
|
||||
branch=$(echo "$meta_content" | jq -r '.branch // empty' 2>/dev/null || true)
|
||||
published=$(echo "$meta_content" | jq -r '.published_at // empty' 2>/dev/null || true)
|
||||
|
||||
[ -n "$branch" ] || { echo " WARN: no branch in $meta_path"; continue; }
|
||||
[ -n "$published" ] || { echo " WARN: no published_at in $meta_path"; continue; }
|
||||
|
||||
days=$(age_days "$published")
|
||||
REPORTS+=("${report_dir}|${OWNER}|${REPO}|${branch}|${days}")
|
||||
echo " ${OWNER}/${REPO} branch=${branch} age=${days}d"
|
||||
done <<< "$META_PATHS"
|
||||
|
||||
[ "${#REPORTS[@]}" -eq 0 ] && { echo "No actionable reports"; exit 0; }
|
||||
|
||||
echo ""
|
||||
echo "=== Phase 2: check branches in Gitea ==="
|
||||
declare -a TO_DELETE
|
||||
declare -a KEEP
|
||||
for entry in "${REPORTS[@]}"; do
|
||||
IFS='|' read -r dir owner repo branch days <<< "$entry"
|
||||
|
||||
if [ -n "$GITEA_API_URL" ] && [ -n "$GITEA_TOKEN" ]; then
|
||||
if branch_exists "$owner" "$repo" "$branch"; then
|
||||
echo " BRANCH EXISTS: ${owner}/${repo}/${branch}"
|
||||
KEEP+=("${dir}|${owner}|${repo}|${branch}|${days}")
|
||||
else
|
||||
echo " BRANCH DELETED: ${owner}/${repo}/${branch} -> DELETE"
|
||||
TO_DELETE+=("$dir")
|
||||
fi
|
||||
else
|
||||
KEEP+=("${dir}|${owner}|${repo}|${branch}|${days}")
|
||||
fi
|
||||
done < <(find "$DATA_ROOT" -path '*/reports/*/.meta' -type f 2>/dev/null)
|
||||
done
|
||||
|
||||
# Pass 2: age + keepMin for active branches (newest first)
|
||||
SURVIVORS="${CACHE_DIR}/survivors.tsv"
|
||||
: > "$SURVIVORS"
|
||||
|
||||
while IFS= read -r meta; do
|
||||
report_dir=$(dirname "$meta")
|
||||
[ -d "$report_dir" ] || continue
|
||||
parse_report_path "$report_dir"
|
||||
branch=$(jq -r '.branch // empty' "$meta")
|
||||
[ -n "$branch" ] || continue
|
||||
|
||||
cache=$(active_branches_file "$REPO_OWNER" "$REPO_NAME")
|
||||
branch_active "$cache" "$branch" || continue
|
||||
|
||||
printf '%s\t%s\t%s\t%s\n' "${REPO_OWNER}/${REPO_NAME}" "$branch" "$report_dir" "$(age_days "$meta")" >> "$SURVIVORS"
|
||||
done < <(find "$DATA_ROOT" -path '*/reports/*/.meta' -type f 2>/dev/null)
|
||||
|
||||
current_key=""
|
||||
kept=0
|
||||
|
||||
while IFS=$'\t' read -r repo_key branch report_dir days; do
|
||||
key="${repo_key}|${branch}"
|
||||
if [ "$key" != "$current_key" ]; then
|
||||
current_key="$key"
|
||||
kept=0
|
||||
echo ""
|
||||
echo "=== Phase 3: apply retention rules to remaining reports ==="
|
||||
if [ "${#KEEP[@]}" -gt 0 ]; then
|
||||
IFS=$'\n'
|
||||
for entry in $(printf '%s\n' "${KEEP[@]}" | sort -t'|' -k4,4 -k5,5rn); do
|
||||
IFS='|' read -r dir owner repo branch days <<< "$entry"
|
||||
max_age=$(rule_max_age "$branch")
|
||||
keep_min=$(rule_keep_min "$branch")
|
||||
fi
|
||||
|
||||
if [ "$days" -gt "$max_age" ]; then
|
||||
delete_report_dir "$report_dir" "older than ${max_age}d (branch ${branch})"
|
||||
continue
|
||||
fi
|
||||
if [ "$days" -gt "$max_age" ]; then
|
||||
echo " DELETE: ${dir} (age ${days}d > maxAge ${max_age}d, branch ${branch})"
|
||||
TO_DELETE+=("$dir")
|
||||
continue
|
||||
fi
|
||||
|
||||
kept=$((kept + 1))
|
||||
if [ "$kept" -gt "$keep_min" ]; then
|
||||
delete_report_dir "$report_dir" "exceeds keepMin ${keep_min} (branch ${branch})"
|
||||
fi
|
||||
done < <(sort -t $'\t' -k1,1 -k2,2 -k4,4n "$SURVIVORS")
|
||||
key="${branch}"
|
||||
count="${BRANCH_COUNTS[$key]:-0}"
|
||||
count=$((count + 1))
|
||||
BRANCH_COUNTS["$key"]=$count
|
||||
if [ "$count" -gt "$keep_min" ]; then
|
||||
echo " DELETE: ${dir} (kept ${keep_min}/${count}, exceeds keepMin, branch ${branch})"
|
||||
TO_DELETE+=("$dir")
|
||||
fi
|
||||
done
|
||||
unset IFS
|
||||
fi
|
||||
|
||||
echo "Retention cleanup finished."
|
||||
if [ "${#TO_DELETE[@]}" -eq 0 ]; then
|
||||
echo "Nothing to delete"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Phase 4: whiteout deletion ==="
|
||||
echo "Creating whiteout tar for ${#TO_DELETE[@]} report(s)..."
|
||||
|
||||
WHITEOUT_TAR=$(mktemp)
|
||||
trap 'rm -f "$WHITEOUT_TAR"' EXIT
|
||||
|
||||
python3 -c "
|
||||
import tarfile, sys
|
||||
|
||||
tar = tarfile.open(name='${WHITEOUT_TAR}', mode='w')
|
||||
|
||||
dirs = set()
|
||||
for d in sys.argv[1:]:
|
||||
dirs.add(d.strip())
|
||||
|
||||
tarinfo = tarfile.TarInfo()
|
||||
tarinfo.type = tarfile.CHRTYPE
|
||||
tarinfo.devmajor = 0
|
||||
tarinfo.devminor = 0
|
||||
|
||||
for d in sorted(dirs, key=len, reverse=True):
|
||||
info = tarinfo
|
||||
info.name = d
|
||||
tar.addfile(info)
|
||||
|
||||
tar.close()
|
||||
" "${TO_DELETE[@]}"
|
||||
|
||||
echo "Patching ${PAGES_URL}/ with whiteout tar..."
|
||||
HTTP_CODE=$(curl_with_host -X PATCH "${PAGES_URL}/" \
|
||||
-H "Content-Type: application/x-tar" \
|
||||
-H "Atomic: no" \
|
||||
--data-binary @"${WHITEOUT_TAR}" \
|
||||
-w "%{http_code}" \
|
||||
-o /dev/null)
|
||||
|
||||
echo "HTTP $HTTP_CODE"
|
||||
if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "204" ]; then
|
||||
echo "Retention cleanup finished."
|
||||
else
|
||||
echo "ERROR: retention HTTP ${HTTP_CODE}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user