joeblack2k · terafin · Jun 12, 2026 · Jun 12, 2026
diff --git a/.github/workflows/build-from-source.yml b/.github/workflows/build-from-source.yml
@@ -0,0 +1,202 @@
+# UNIVERSAL build-from-source workflow — BYTE-IDENTICAL across every Model B fork.
+#
+# DO NOT customize this file per-fork. If a fork needs different behavior, use
+# GitHub Actions repository Variables (Settings → Secrets and variables →
+# Actions → Variables):
+#
+#   DOCKERFILE      override auto-detected path (default: root Dockerfile, then shallowest)
+#   BUILD_CONTEXT   override docker build context (default: . — repo root)
+#   PLATFORMS       override platform list (default: linux/amd64,linux/arm64)
+#   BUILD_ARGS      multi-line KEY=VALUE pairs passed to docker build (default: none).
+#                   Used by forks whose upstream Dockerfile declares an ARG with no
+#                   default that the upstream's own CI passes externally (e.g.
+#                   tika-docker's TIKA_VERSION via republish-images.sh).
+#   IS_SOURCE_BUILT job-level gate: build runs only when set to 'true'. Plain-mirror
+#                   forks set 'false' so the workflow file stays installed (uniform
+#                   scaffolding) but the job no-ops; fork-publish.yml owns :latest.
+#   FREE_DISK_SPACE opt-in (set 'true') for forks whose images blow past the
+#                   14GB default disk on ubuntu-latest (vllm-class CUDA images).
+#                   Adds ~3 min per build; reclaims ~30GB by removing pre-installed
+#                   Android SDK / .NET / Haskell / large npm caches.
+#   RUNS_ON         per-fork runner-label override (default: "ubuntu-latest").
+#                   JSON-encoded — single string for one label, or JSON array for
+#                   multiple. Set to '["self-hosted","lan-docker","big-build"]'
+#                   for forks that exceed hosted-runner capacity (vllm).
+#   BUILD_TIMEOUT_MINUTES  per-fork job-timeout override (default 360, the
+#                   GitHub-hosted-runner ceiling). Raise as a ONE-SHOT bootstrap
+#                   on forks whose first cold-cache build exceeds 360min (vllm
+#                   single-arch CUDA: ~5-6h first run, ~30-45m once :buildcache
+#                   is populated). Revert to default after first green run.
+#                   Honest-fact #97.
+#
+# If a fork's Dockerfile needs out-of-context pre-build steps, the right fix is
+# to cherry-pick a multi-stage self-contained Dockerfile from upstream into our
+# intarweb-dev — NOT to add pre-build logic here.
+#
+# Codified in oss-contributing:ghcr-fork-mirror skill honest-fact #52.
+
+name: Build from source → GHCR
+
+on:
+  push:
+    branches: [intarweb-dev]
+  workflow_dispatch:
+
+# Single-flight per repo: schedule-driven sync push + Heal K dispatch + manual
+# workflow_dispatch can all converge. cancel-in-progress: false because builds
+# are expensive (vllm 30-45 min, bifrost 45 min) — let the in-flight one finish,
+# queue the next behind it. Honest-fact #74.
+concurrency:
+  group: build-from-source-${{ github.repository }}
+  cancel-in-progress: false
+
+permissions:
+  contents: read
+  packages: write
+
+jobs:
+  build:
+    # IS_SOURCE_BUILT Variable is the authoritative source-vs-mirror switch
+    # for the fleet. Setting it to anything other than 'true' (or leaving it
+    # unset) skips the build entirely. This keeps the WORKFLOW FILE itself
+    # byte-identical across every fork (uniform infra scaffolding) while
+    # giving operators a per-fork Variable to control source-build vs
+    # plain-mirror behavior without removing files or disabling workflows.
+    # Plain-mirror forks (docker-neo4j etc) still have build-from-source.yml
+    # installed but the job no-ops; fork-publish.yml owns :latest for them.
+    if: vars.IS_SOURCE_BUILT == 'true'
+    # Per-fork override via repo Variable RUNS_ON (default: ubuntu-latest hosted).
+    # Set to '["self-hosted","lan-docker","big-build"]' (JSON array, single line)
+    # for forks whose builds exceed hosted-runner capacity (16GB RAM / 14GB disk
+    # — vllm's CUDA csrc-build is the canonical case). Honest-fact #89.
+    runs-on: ${{ fromJSON(vars.RUNS_ON || '"ubuntu-latest"') }}
+    # Per-fork override via repo Variable BUILD_TIMEOUT_MINUTES (default 360, the
+    # GitHub-hosted-runner hard ceiling). Raise to 720 as a ONE-SHOT bootstrap on
+    # forks whose first build needs to populate :buildcache from scratch (vllm
+    # at single-arch CUDA: ~5-6h cold-cache vs ~30-45m warm). Revert to 360 once
+    # warm. Codified after 27340784996 / 27373307060 cancelled at 360min on the
+    # vllm fork's first cold-cache run. Honest-fact #91.
+    timeout-minutes: ${{ fromJSON(vars.BUILD_TIMEOUT_MINUTES || '360') }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+
+      # Optional disk reclamation for forks whose images blow past the 14GB
+      # default disk on ubuntu-latest hosted runners (vllm being the canonical
+      # offender — its CUDA build artifacts + intermediate layers exceed 20GB
+      # easily). Frees ~30GB by removing pre-installed Android SDK, .NET,
+      # Haskell, large npm caches, and codeql databases. Costs ~3 min — gated
+      # behind FREE_DISK_SPACE Variable so non-vllm-class forks don't pay it.
+      - name: 🧹 Free disk space (opt-in via FREE_DISK_SPACE Variable)
+        if: vars.FREE_DISK_SPACE == 'true'
+        uses: jlumbroso/free-disk-space@v1.3.1
+        with:
+          tool-cache: true
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          docker-images: false
+          swap-storage: false
+
+      - name: 🔍 Resolve Dockerfile
+        id: df
+        env:
+          DOCKERFILE_VAR: ${{ vars.DOCKERFILE }}
+        run: |
+          set -euo pipefail
+          # 1. Explicit override (repo Variable) always wins.
+          if [ -n "${DOCKERFILE_VAR}" ]; then
+            echo "  ✓ using DOCKERFILE override: ${DOCKERFILE_VAR}"
+            echo "path=${DOCKERFILE_VAR}" >> "$GITHUB_OUTPUT"
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+          # 2. Prefer a root Dockerfile.
+          if [ -f Dockerfile ]; then
+            echo "  ✓ found root Dockerfile"
+            echo "path=Dockerfile" >> "$GITHUB_OUTPUT"
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+          # 3. Deterministic discovery: shallowest non-test Dockerfile.
+          cand="$(find . -maxdepth 4 -type f -name Dockerfile \
+                    -not -path './node_modules/*' \
+                    -not -path './.git/*' \
+                    -not -path '*/test/*' \
+                    -not -path '*/tests/*' \
+                    -not -path '*/example*/*' \
+                    -not -path './vendor/*' \
+                    -not -path './third_party/*' \
+                  | sed 's|^\./||' \
+                  | awk '{print gsub(/\//,"/"), $0}' \
+                  | sort -n | head -n1 | cut -d' ' -f2- || true)"
+          if [ -z "$cand" ]; then
+            echo "  ✗ no Dockerfile found → plain-mirror fallback (no image built)"
+            echo "skip=true" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+          echo "  ✓ auto-found: $cand"
+          echo "path=$cand" >> "$GITHUB_OUTPUT"
+          echo "skip=false" >> "$GITHUB_OUTPUT"
+
+      - name: Set up QEMU
+        if: steps.df.outputs.skip != 'true'
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Buildx
+        if: steps.df.outputs.skip != 'true'
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to ghcr.io
+        if: steps.df.outputs.skip != 'true'
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: 🔡 Compute image (lowercase — GHCR requires lowercase repo names)
+        id: imgname
+        run: |
+          # vars.IMAGE_NAME may already be lowercase (set as such manually), but
+          # the github.event.repository.name fallback can be CamelCase if the fork
+          # repo name has uppercase letters (e.g. RetroSaveManager). docker/build-push
+          # rejects ANY uppercase in the image path. fork-publish.yml does the same
+          # via tr; we do it here. Honest-fact #63.
+          NAME="${{ vars.IMAGE_NAME || github.event.repository.name }}"
+          NAME_LOWER=$(echo "$NAME" | tr '[:upper:]' '[:lower:]')
+          echo "image=ghcr.io/${{ github.repository_owner }}/${NAME_LOWER}" >> "$GITHUB_OUTPUT"
+
+      - name: 🏷️ Docker metadata
+        id: meta
+        if: steps.df.outputs.skip != 'true'
+        uses: docker/metadata-action@v5
+        with:
+          # Per-fork override via repo Variable IMAGE_NAME — for forks where the
+          # published image name differs from the repo name (docker-autoheal repo →
+          # autoheal image). Falls back to repo name. Same as fork-publish.yml so
+          # ONE image per fork. Lowercased above (metadata-action's lowercase only
+          # applies to tags, not to the images input — uppercase repo names slip
+          # through and break build-push with "repository name must be lowercase").
+          images: ${{ steps.imgname.outputs.image }}
+          tags: |
+            type=raw,value=latest,enable=${{ github.ref == 'refs/heads/intarweb-dev' }}
+            type=sha,prefix=sha-,format=short
+
+      - name: 🚀 Build & push
+        if: steps.df.outputs.skip != 'true'
+        uses: docker/build-push-action@v6
+        with:
+          context: ${{ vars.BUILD_CONTEXT || '.' }}
+          file: ${{ steps.df.outputs.path }}
+          platforms: ${{ vars.PLATFORMS || 'linux/amd64,linux/arm64' }}
+          # Per-fork override via repo Variable BUILD_ARGS — multi-line, one
+          # KEY=VALUE per line. Used by forks whose Dockerfile declares an ARG
+          # with no default (e.g. tika-docker's TIKA_VERSION). Honest-fact #88.
+          build-args: ${{ vars.BUILD_ARGS || '' }}
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          provenance: false
diff --git a/.github/workflows/fold-on-push.yml b/.github/workflows/fold-on-push.yml
@@ -0,0 +1,64 @@
+# UNIVERSAL fold-on-push trigger — byte-identical across every Model B fork.
+#
+# Purpose: when a push lands on ANY branch other than the default branch or
+# intarweb-dev, dispatch sync-upstream.yml to re-fold + re-publish. This is
+# the event-driven counterpart to the hourly schedule — covers "new commit
+# landed on a PR head branch" and "operator pushed a new branch they're
+# about to PR" instantly, without waiting up to 1 hour.
+#
+# Why every-branch-except-defaults: PR head branches use varied naming
+# (feat/*, fix/*, test/*, docs/*, etc). Enumerating patterns means missing
+# new ones (build-overlay.yml's pattern misses feat/* and docs/*, MEASURED
+# 2026-06-10). Wildcard with explicit exclusions for self-trigger paths is
+# the simplest correct shape.
+#
+# Loop prevention: branches-ignore covers EVERY ref this workflow's downstream
+# (sync-upstream.yml) pushes — main, master, intarweb-dev. Pushes from the
+# hard-reset/ops-overlay/intarweb-dev-regen logic therefore do NOT re-fire
+# this workflow.
+#
+# Cadence bypass: the dispatch into sync-upstream.yml fires it as a
+# workflow_dispatch event. sync-upstream's cadence gate explicitly bypasses
+# for non-schedule events (see its ⏱️ Cadence gate step). So a push triggers
+# an immediate fold regardless of vars.SYNC_CADENCE_HOURS.
+#
+# Codified in oss-contributing:ghcr-fork-mirror skill honest-fact #64.
+
+name: Fold on PR head-branch push
+
+on:
+  push:
+    branches-ignore:
+      - main
+      - master
+      - intarweb-dev
+
+permissions:
+  actions: write   # required for `gh workflow run` dispatch into sync-upstream
+
+jobs:
+  dispatch-sync:
+    runs-on: ubuntu-latest
+    steps:
+      - name: 🤖 Mint org-bot app token
+        id: app-token
+        uses: actions/create-github-app-token@v1
+        with:
+          app-id: ${{ vars.SYNC_APP_ID }}
+          private-key: ${{ secrets.SYNC_APP_PRIVATE_KEY }}
+
+      - name: 🌿 Dispatch sync-upstream (full re-fold)
+        env:
+          GH_TOKEN: ${{ steps.app-token.outputs.token }}
+        run: |
+          echo "  push detected on branch: ${{ github.ref_name }}"
+          echo "  triggering full re-fold via sync-upstream.yml"
+          # The workflow_dispatch event is whitelisted by sync-upstream's
+          # cadence gate — runs immediately, bypassing SYNC_CADENCE_HOURS.
+          # --ref points at the workflow file location (default branch),
+          # NOT the branch that was pushed. sync-upstream itself always
+          # operates on default branch + folds open PRs regardless of
+          # which branch triggered this dispatch.
+          gh workflow run "Sync from upstream + auto-regen intarweb-dev" \
+            --repo "${{ github.repository }}" \
+            --ref "${{ github.event.repository.default_branch }}"
diff --git a/.github/workflows/fork-publish.yml b/.github/workflows/fork-publish.yml
@@ -0,0 +1,115 @@
+# UNIVERSAL fork-publish workflow — BYTE-IDENTICAL across every fork.
+#
+# Mirrors upstream's released versioned tags to ghcr.io/${{ github.repository }}
+# (auto-lowercased by docker/metadata-action). Same image namespace as
+# build-from-source.yml — ONE concept, no per-fork vanity names.
+#
+# :latest is owned by build-from-source.yml (built from intarweb-dev).
+# This workflow handles VERSIONED tags only (1.2.3, 1.2, 1, etc).
+#
+# Codified in oss-contributing:ghcr-fork-mirror skill honest-fact #57.
+
+name: Fork-publish (mirror upstream → GHCR)
+
+on:
+  schedule:
+    - cron: '17 6 * * *'    # daily 06:17 UTC — well before sync-upstream's :00 cron
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  packages: write
+
+env:
+  # Per-fork override via repo Variable IMAGE_NAME — used when upstream's published
+  # image name differs from our repo name (e.g. docker-autoheal repo → autoheal image).
+  # Falls back to repo name. ONE image per fork, shared with build-from-source.yml.
+  IMAGE: ghcr.io/${{ github.repository_owner }}/${{ vars.IMAGE_NAME || github.event.repository.name }}
+
+jobs:
+  mirror:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: docker/setup-buildx-action@v3
+
+      - name: 🔑 Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: 🔍 Discover upstream tags
+        id: tags
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          # GitHub repo (for `gh release list` to discover versioned tags)
+          UPSTREAM_REPO="joeblack2k/RetroSaveManager"
+          # Docker image source (where imagetools mirrors FROM). Defaults to
+          # GitHub repo path; override via repo Variable UPSTREAM_IMAGE for forks
+          # where Docker Hub path differs (e.g. acmesh-official/acme.sh on GitHub
+          # but neilpang/acme.sh on Docker Hub).
+          UPSTREAM_IMAGE="${{ vars.UPSTREAM_IMAGE || format('{0}/{1}', 'joeblack2k', 'RetroSaveManager') }}"
+          RELEASES=$(gh release list --repo "$UPSTREAM_REPO" --limit 3 --json tagName \
+            --jq '.[].tagName | sub("^v"; "")' 2>/dev/null | tr '\n' ' ' || echo "")
+          # Fallback: if upstream uses raw git tags without GH Release objects
+          # (e.g. neo4j/docker-neo4j), gh release list returns empty. Use the
+          # git tags API and filter for version-shaped tags (must contain at least
+          # one N.N component; skips purely descriptive words like
+          # community/latest/enterprise/nightly/dev/main). Honest-fact #86.
+          if [ -z "$(echo "$RELEASES" | tr -d ' ')" ]; then
+            RELEASES=$(gh api "repos/$UPSTREAM_REPO/tags?per_page=10" \
+              --jq '.[].name' 2>/dev/null \
+              | grep -E '[0-9]+\.[0-9]+' \
+              | sed 's/^v//' \
+              | head -3 \
+              | tr '\n' ' ' || echo "")
+            [ -n "$(echo "$RELEASES" | tr -d ' ')" ] && \
+              echo "  (release-list was empty; fell back to git tags API)"
+          fi
+          MAJORS=$(for r in $RELEASES; do echo "${r%%.*}"; done | sort -u | tr '\n' ' ')
+          TAGS="$RELEASES $MAJORS"
+          echo "upstream_image=$UPSTREAM_IMAGE" >> "$GITHUB_OUTPUT"
+          echo "tags=$TAGS" >> "$GITHUB_OUTPUT"
+          echo "  will mirror: $TAGS"
+
+      - name: 🪞 Mirror tags to GHCR
+        run: |
+          set -euo pipefail
+          UPSTREAM="${{ steps.tags.outputs.upstream_image }}"
+          # Lowercase the image (Docker requires lowercase repo names; metadata-action
+          # does this automatically for build-push, we do it manually here for imagetools).
+          TARGET=$(echo "${IMAGE}" | tr '[:upper:]' '[:lower:]')
+          NEWEST_VERSIONED=""
+          for tag in ${{ steps.tags.outputs.tags }}; do
+            [ -z "$tag" ] && continue
+            echo "::group::Mirror $UPSTREAM:$tag → $TARGET:$tag"
+            if docker buildx imagetools inspect "$UPSTREAM:$tag" >/dev/null 2>&1; then
+              docker buildx imagetools create --tag "$TARGET:$tag" "$UPSTREAM:$tag"
+              echo "  ✓ mirrored $tag"
+              # Track the first (newest) versioned tag for :latest aliasing below
+              if [ -z "$NEWEST_VERSIONED" ]; then NEWEST_VERSIONED="$tag"; fi
+            else
+              echo "  - upstream tag $tag does not exist, skipping"
+            fi
+            echo "::endgroup::"
+          done
+
+          # Also publish :latest pointing to the newest successfully-mirrored version,
+          # ONLY if this fork is plain-mirror (i.e. has no source-build). Source-built
+          # forks (vars.IS_SOURCE_BUILT=true) own :latest via build-from-source.yml,
+          # which builds from intarweb-dev (= upstream + open PRs cherry-picked + ops
+          # overlay). Letting fork-publish stomp :latest on those forks would clobber
+          # our patch stack with upstream-pristine, breaking the contract.
+          # Plain-mirror forks (var unset) keep the historical behavior — fork-publish
+          # IS the :latest source. Honest-fact #72.
+          if [ -n "$NEWEST_VERSIONED" ] && [ "${{ vars.IS_SOURCE_BUILT }}" != "true" ]; then
+            echo "::group::Alias :latest → :$NEWEST_VERSIONED"
+            docker buildx imagetools create --tag "$TARGET:latest" "$UPSTREAM:$NEWEST_VERSIONED"
+            echo "  ✓ :latest → :$NEWEST_VERSIONED"
+            echo "::endgroup::"
+          elif [ "${{ vars.IS_SOURCE_BUILT }}" = "true" ]; then
+            echo "  - skipping :latest alias (IS_SOURCE_BUILT=true; build-from-source.yml owns :latest)"
+          fi