diff --git a/git-pages/files/retention-cleanup.sh b/git-pages/files/retention-cleanup.sh index 82bb30b..653fd6b 100644 --- a/git-pages/files/retention-cleanup.sh +++ b/git-pages/files/retention-cleanup.sh @@ -176,46 +176,90 @@ if [ "${#TO_DELETE[@]}" -eq 0 ]; then fi echo "" -echo "=== Phase 4: whiteout deletion ===" -echo "Creating whiteout tar for ${#TO_DELETE[@]} report(s)..." +echo "=== Phase 4: full site rebuild ===" +echo "Rebuilding site (${#TO_DELETE[@]} report(s) to delete)..." -WHITEOUT_TAR=$(mktemp) -trap 'rm -f "$WHITEOUT_TAR"' EXIT +ARCHIVE_FILE=$(mktemp) +SITE_DIR=$(mktemp -d) +NEW_TAR=$(mktemp) +cleanup_phase4() { + rm -f "$ARCHIVE_FILE" "$NEW_TAR" + rm -rf "$SITE_DIR" +} +trap cleanup_phase4 EXIT -python3 -c " -import tarfile, sys +# Try archive.tar first +echo "Downloading archive.tar..." +HTTP_CODE=$(curl_with_host -o "$ARCHIVE_FILE" -w "%{http_code}" -sS "${PAGES_URL}/.git-pages/archive.tar") -tar = tarfile.open(name='${WHITEOUT_TAR}', mode='w') +if [ "$HTTP_CODE" = "200" ] && tar -tf "$ARCHIVE_FILE" >/dev/null 2>&1; then + echo "Extracting archive..." + tar -xf "$ARCHIVE_FILE" -C "$SITE_DIR" -dirs = set() -for d in sys.argv[1:]: - dirs.add(d.strip()) + for dir in "${TO_DELETE[@]}"; do + if [ -d "$SITE_DIR/$dir" ]; then + echo " Removing: $dir" + rm -rf "$SITE_DIR/$dir" + fi + done +else + echo "archive.tar failed (HTTP ${HTTP_CODE}) - falling back to manifest-based rebuild" -tarinfo = tarfile.TarInfo() -tarinfo.type = tarfile.CHRTYPE -tarinfo.devmajor = 0 -tarinfo.devminor = 0 + ALL_PATHS=$(echo "$MANIFEST" | jq -r '.contents | keys[]' 2>/dev/null || true) -for d in sorted(dirs, key=len, reverse=True): - info = tarinfo - info.name = d - tar.addfile(info) + if [ -z "$ALL_PATHS" ]; then + echo "ERROR: no files in manifest - cannot rebuild" >&2 + exit 1 + fi -tar.close() -" "${TO_DELETE[@]}" + EXCLUDE_GREP="" + for dir in "${TO_DELETE[@]}"; do + EXCLUDE_GREP="${EXCLUDE_GREP}${EXCLUDE_GREP:+|}^${dir}/" + done -echo "Patching ${PAGES_URL}/ with whiteout tar..." -HTTP_CODE=$(curl_with_host -X PATCH "${PAGES_URL}/" \ + if [ -n "$EXCLUDE_GREP" ]; then + KEEP_PATHS=$(echo "$ALL_PATHS" | grep -v -E "$EXCLUDE_GREP" || true) + else + KEEP_PATHS="$ALL_PATHS" + fi + + if [ -z "$KEEP_PATHS" ]; then + echo "No files to keep - site will be empty" + mkdir -p "$SITE_DIR/__placeholder__" + echo "placeholder" > "$SITE_DIR/__placeholder__/index.html" + else + FILE_COUNT=$(echo "$KEEP_PATHS" | wc -l | tr -d ' ') + echo "Downloading ${FILE_COUNT} file(s)..." + while IFS= read -r path; do + [ -z "$path" ] && continue + dir=$(dirname "$SITE_DIR/$path") + mkdir -p "$dir" + curl_with_host -o "$SITE_DIR/$path" -sS "${PAGES_URL}/${path}" || { + echo " WARN: failed to download ${path}" + } + done <<< "$KEEP_PATHS" + fi +fi + +if [ -z "$(ls -A "$SITE_DIR" 2>/dev/null)" ]; then + echo "Site is empty - creating placeholder" + mkdir -p "$SITE_DIR/__placeholder__" + echo "placeholder" > "$SITE_DIR/__placeholder__/index.html" +fi + +tar -cf "$NEW_TAR" -C "$SITE_DIR" . + +echo "PUT: replacing site contents..." +HTTP_CODE=$(curl_with_host -X PUT "${PAGES_URL}/" \ -H "Content-Type: application/x-tar" \ - -H "Atomic: no" \ - --data-binary @"${WHITEOUT_TAR}" \ + --data-binary @"${NEW_TAR}" \ -w "%{http_code}" \ -o /dev/null) -echo "HTTP $HTTP_CODE" -if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "204" ]; then - echo "Retention cleanup finished." +echo "HTTP ${HTTP_CODE}" +if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "204" ]; then + echo "Site rebuild completed." else - echo "ERROR: retention HTTP ${HTTP_CODE}" >&2 + echo "ERROR: PUT HTTP ${HTTP_CODE}" >&2 exit 1 fi diff --git a/git-pages/templates/init-job.yaml b/git-pages/templates/init-job.yaml index ef6403b..9cc1605 100644 --- a/git-pages/templates/init-job.yaml +++ b/git-pages/templates/init-job.yaml @@ -6,7 +6,7 @@ metadata: labels: {{- include "git-pages.componentLabels" . | nindent 4 }} annotations: - "helm.sh/hook": post-install, post-upgrade + "helm.sh/hook": post-install "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation spec: backoffLimit: 5 @@ -32,6 +32,16 @@ spec: -H "Host: {{ .Values.ingress.host }}" \ -o /dev/null "http://git-pages:3000/.git-pages/health" do sleep 2; done + echo "Init: checking if site already exists..." + MANIFEST=$(curl -sf \ + -H "Host: {{ .Values.ingress.host }}" \ + "http://git-pages:3000/.git-pages/manifest.json" 2>/dev/null || echo "") + + if echo "$MANIFEST" | grep -q '"contents"'; then + echo "Init: site already initialized, skipping" + exit 0 + fi + echo "Init: creating placeholder site..." WORK=$(mktemp -d) mkdir -p "$WORK/__init__"