-
-
Notifications
You must be signed in to change notification settings - Fork 126
fix(backend): clean stuck manifest backlog #2257
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
riderx
wants to merge
7
commits into
main
Choose a base branch
from
codex/manifest-cleanup-health
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
ed1a77d
fix(backend): clean stuck manifest backlog
riderx c7f2de0
fix(backend): validate manifest cleanup db tls
riderx c9f320a
test(backend): guard shared manifest cleanup
riderx a900db1
test(backend): document manifest cleanup batch size
riderx d3ec77d
Merge remote-tracking branch 'origin/main' into codex/manifest-cleanu…
riderx 0c7bd17
fix(backend): clean file handler comments
riderx 4a3669f
test(backend): align file read availability checks
riderx File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,248 @@ | ||
| /* | ||
| * Audit and clear old manifest rows stuck behind soft-deleted bundles. | ||
| * | ||
| * Dry run: | ||
| * bun run admin:cleanup-stuck-manifest-backlog | ||
| * | ||
| * Apply: | ||
| * bun run admin:cleanup-stuck-manifest-backlog --apply | ||
| * | ||
| * Optional: | ||
| * bun run admin:cleanup-stuck-manifest-backlog --apply --db-url="$DATABASE_URL" | ||
| * bun run admin:cleanup-stuck-manifest-backlog --apply --env-file=./internal/cloudflare/.env.prod | ||
| * bun run admin:cleanup-stuck-manifest-backlog --apply --max-batches=1000 --pause-ms=250 | ||
| * bun run admin:cleanup-stuck-manifest-backlog --apply --skip-vacuum | ||
| */ | ||
| import process from 'node:process' | ||
| import { setTimeout as sleep } from 'node:timers/promises' | ||
| import { Client } from 'pg' | ||
| import { DEFAULT_ENV_FILE, getArgValue, loadEnv, parsePositiveInteger } from './admin_stripe_backfill_utils.ts' | ||
|
|
||
| interface TableSizeRow { | ||
| heap: string | ||
| indexes: string | ||
| total: string | ||
| } | ||
|
|
||
| interface VacuumStatsRow { | ||
| last_autoanalyze: string | null | ||
| last_autovacuum: string | null | ||
| n_dead_tup: string | ||
| n_live_tup: string | ||
| } | ||
|
|
||
| interface BucketRow { | ||
| bucket: string | ||
| manifest_rows: string | ||
| versions: string | ||
| } | ||
|
|
||
| interface EligibleVersionRow { | ||
| eligible_versions: string | ||
| } | ||
|
|
||
| const DEFAULT_MAX_BATCHES = 1000 | ||
| const DEFAULT_PAUSE_MS = 250 | ||
|
|
||
| function printHelp() { | ||
| console.log(`Audit and clear old manifest rows stuck behind soft-deleted bundles. | ||
|
|
||
| Usage: | ||
| bun run admin:cleanup-stuck-manifest-backlog [options] | ||
|
|
||
| Options: | ||
| --apply Delete old soft-deleted versions by calling public.delete_old_deleted_versions(). | ||
| --db-url=URL Postgres connection string. Overrides env file values. | ||
| --env-file=PATH Env file to load. Default: ${DEFAULT_ENV_FILE}. | ||
| --max-batches=N Maximum cleanup batches to run. Default: ${DEFAULT_MAX_BATCHES}. | ||
| --pause-ms=N Delay between batches. Default: ${DEFAULT_PAUSE_MS}. | ||
| --skip-vacuum Do not run VACUUM (ANALYZE) public.manifest after apply. | ||
| --help Show this help. | ||
|
|
||
| Required env: | ||
| DATABASE_URL, SUPABASE_DB_URL, POSTGRES_URL, or PGDATABASE_URL | ||
| `) | ||
| } | ||
|
|
||
| function parseNonNegativeInteger(value: string | null, label: string, fallback: number) { | ||
| if (value === null) | ||
| return fallback | ||
|
|
||
| const parsed = Number.parseInt(value, 10) | ||
| if (!Number.isInteger(parsed) || parsed < 0) | ||
| throw new Error(`${label} must be a non-negative integer`) | ||
|
|
||
| return parsed | ||
| } | ||
|
|
||
| function getDatabaseUrl(env: Record<string, string | undefined>, args: string[]) { | ||
| return getArgValue(args, '--db-url') | ||
| ?? env.DATABASE_URL?.trim() | ||
| ?? env.SUPABASE_DB_URL?.trim() | ||
| ?? env.POSTGRES_URL?.trim() | ||
| ?? env.PGDATABASE_URL?.trim() | ||
| ?? null | ||
| } | ||
|
|
||
| function shouldUseSsl(databaseUrl: string) { | ||
| const url = new URL(databaseUrl) | ||
| const sslMode = url.searchParams.get('sslmode') | ||
| if (sslMode === 'disable') | ||
| return false | ||
| if (url.hostname === 'localhost' || url.hostname === '127.0.0.1') | ||
| return false | ||
| return true | ||
| } | ||
|
|
||
| async function getTableSize(client: Client) { | ||
| const result = await client.query<TableSizeRow>(` | ||
| SELECT | ||
| pg_size_pretty(pg_relation_size('public.manifest')) AS heap, | ||
| pg_size_pretty(pg_indexes_size('public.manifest')) AS indexes, | ||
| pg_size_pretty(pg_total_relation_size('public.manifest')) AS total | ||
| `) | ||
| return result.rows[0] | ||
| } | ||
|
|
||
| async function getVacuumStats(client: Client) { | ||
| const result = await client.query<VacuumStatsRow>(` | ||
| SELECT n_live_tup, n_dead_tup, last_autovacuum, last_autoanalyze | ||
| FROM pg_stat_user_tables | ||
| WHERE schemaname = 'public' AND relname = 'manifest' | ||
| `) | ||
| return result.rows[0] | ||
| } | ||
|
|
||
| async function getManifestBuckets(client: Client) { | ||
| const result = await client.query<BucketRow>(` | ||
| SELECT | ||
| CASE | ||
| WHEN av.deleted = false THEN 'active' | ||
| WHEN av.deleted_at < now() - interval '3 months' THEN 'past_hard_delete' | ||
| ELSE 'soft_deleted_waiting' | ||
| END AS bucket, | ||
| count(*)::text AS manifest_rows, | ||
| count(DISTINCT av.id)::text AS versions | ||
| FROM public.manifest m | ||
| JOIN public.app_versions av ON av.id = m.app_version_id | ||
| GROUP BY 1 | ||
| ORDER BY count(*) DESC | ||
| `) | ||
| return result.rows | ||
| } | ||
|
|
||
| async function getEligibleVersionCount(client: Client) { | ||
| const result = await client.query<EligibleVersionRow>(` | ||
| SELECT count(*)::text AS eligible_versions | ||
| FROM public.app_versions av | ||
| WHERE av.deleted_at IS NOT NULL | ||
| AND av.deleted_at < now() - interval '3 months' | ||
| AND av.name NOT IN ('builtin', 'unknown') | ||
| AND NOT EXISTS ( | ||
| SELECT 1 | ||
| FROM public.channels | ||
| WHERE channels.version = av.id | ||
| ) | ||
| `) | ||
| return Number.parseInt(result.rows[0]?.eligible_versions ?? '0', 10) | ||
| } | ||
|
|
||
| function printAudit(title: string, size: TableSizeRow, stats: VacuumStatsRow | undefined, buckets: BucketRow[]) { | ||
| console.log(`\n${title}`) | ||
| console.table([size]) | ||
| if (stats) | ||
| console.table([stats]) | ||
| console.table(buckets) | ||
| } | ||
|
|
||
| async function runCleanupLoop(client: Client, maxBatches: number, pauseMs: number) { | ||
| let batches = 0 | ||
| let previousRemaining = await getEligibleVersionCount(client) | ||
| console.log(`Eligible old deleted versions before cleanup: ${previousRemaining}`) | ||
|
|
||
| while (previousRemaining > 0 && batches < maxBatches) { | ||
| batches += 1 | ||
| await client.query('SELECT public.delete_old_deleted_versions()') | ||
|
|
||
| const remaining = await getEligibleVersionCount(client) | ||
| const deleted = Math.max(previousRemaining - remaining, 0) | ||
| console.log(`Batch ${batches}: deleted about ${deleted} versions, ${remaining} eligible remain`) | ||
|
|
||
| if (remaining >= previousRemaining) { | ||
| console.log('No progress detected; stopping to avoid looping on a blocked cleanup.') | ||
| break | ||
| } | ||
|
|
||
| previousRemaining = remaining | ||
| if (pauseMs > 0) | ||
| await sleep(pauseMs) | ||
| } | ||
|
|
||
| return { | ||
| batches, | ||
| remaining: previousRemaining, | ||
| } | ||
| } | ||
|
|
||
| async function main() { | ||
| const args = Bun.argv.slice(2) | ||
| if (args.includes('--help')) { | ||
| printHelp() | ||
| return | ||
| } | ||
|
|
||
| const apply = args.includes('--apply') | ||
| const skipVacuum = args.includes('--skip-vacuum') | ||
| const envFile = getArgValue(args, '--env-file') ?? DEFAULT_ENV_FILE | ||
| const env = { ...process.env, ...await loadEnv(envFile) } | ||
| const databaseUrl = getDatabaseUrl(env, args) | ||
| if (!databaseUrl) | ||
| throw new Error('Missing database URL. Set DATABASE_URL, SUPABASE_DB_URL, POSTGRES_URL, PGDATABASE_URL, or pass --db-url.') | ||
|
|
||
| const maxBatches = parsePositiveInteger(getArgValue(args, '--max-batches'), '--max-batches', DEFAULT_MAX_BATCHES) | ||
| const pauseMs = parseNonNegativeInteger(getArgValue(args, '--pause-ms'), '--pause-ms', DEFAULT_PAUSE_MS) | ||
| const client = new Client({ | ||
| application_name: 'capgo_cleanup_stuck_manifest_backlog', | ||
| connectionString: databaseUrl, | ||
| ssl: shouldUseSsl(databaseUrl) ? { rejectUnauthorized: true } : undefined, | ||
| }) | ||
|
|
||
| await client.connect() | ||
| try { | ||
| await client.query('SELECT set_config($1, $2, false)', ['statement_timeout', '15min']) | ||
| await client.query('SELECT set_config($1, $2, false)', ['lock_timeout', '10s']) | ||
|
|
||
| printAudit( | ||
| 'Before cleanup', | ||
| await getTableSize(client), | ||
| await getVacuumStats(client), | ||
| await getManifestBuckets(client), | ||
| ) | ||
|
|
||
| if (!apply) { | ||
| console.log('\nDry run only. Re-run with --apply to delete old soft-deleted versions and cascade stuck manifest rows.') | ||
| return | ||
| } | ||
|
|
||
| const cleanup = await runCleanupLoop(client, maxBatches, pauseMs) | ||
| if (cleanup.remaining > 0) | ||
| console.log(`Stopped with ${cleanup.remaining} eligible versions still remaining. Increase --max-batches after checking database load.`) | ||
|
|
||
| if (!skipVacuum) { | ||
| console.log('\nRunning VACUUM (ANALYZE) public.manifest...') | ||
| await client.query('VACUUM (ANALYZE) public.manifest') | ||
| } | ||
|
|
||
| printAudit( | ||
| 'After cleanup', | ||
| await getTableSize(client), | ||
| await getVacuumStats(client), | ||
| await getManifestBuckets(client), | ||
| ) | ||
| } | ||
| finally { | ||
| await client.end() | ||
| } | ||
| } | ||
|
|
||
| await main() | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
44 changes: 44 additions & 0 deletions
44
supabase/migrations/20260513003104_manifest_cleanup_health.sql
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| -- Keep hard-deleted bundle cleanup bounded so manifest cascades do not create | ||
| -- one very large delete transaction when retention has a backlog. | ||
| CREATE OR REPLACE FUNCTION "public"."delete_old_deleted_versions"() RETURNS "void" | ||
| LANGUAGE "plpgsql" | ||
| SECURITY DEFINER | ||
| SET search_path = '' | ||
| AS $$ | ||
| DECLARE | ||
| deleted_count bigint; | ||
| BEGIN | ||
| WITH deleted_versions AS ( | ||
| SELECT "app_versions"."id" | ||
| FROM "public"."app_versions" | ||
| WHERE "app_versions"."deleted_at" IS NOT NULL | ||
| AND "app_versions"."deleted_at" < now() - INTERVAL '3 months' | ||
| AND "app_versions"."name" NOT IN ('builtin', 'unknown') | ||
| AND NOT EXISTS ( | ||
| SELECT 1 | ||
| FROM "public"."channels" | ||
| WHERE "channels"."version" = "app_versions"."id" | ||
| ) | ||
| ORDER BY "app_versions"."deleted_at", "app_versions"."id" | ||
| LIMIT 500 | ||
| FOR UPDATE SKIP LOCKED | ||
| ) | ||
| DELETE FROM "public"."app_versions" | ||
| USING deleted_versions | ||
| WHERE "app_versions"."id" = deleted_versions."id"; | ||
|
|
||
| GET DIAGNOSTICS deleted_count = ROW_COUNT; | ||
|
|
||
| IF deleted_count > 0 THEN | ||
| RAISE NOTICE 'delete_old_deleted_versions: permanently deleted % app versions', deleted_count; | ||
| END IF; | ||
| END; | ||
| $$; | ||
|
|
||
| ALTER FUNCTION "public"."delete_old_deleted_versions"() OWNER TO "postgres"; | ||
| COMMENT ON FUNCTION "public"."delete_old_deleted_versions"() IS 'Permanently deletes up to 500 soft-deleted app versions older than 3 months per run; related manifest rows cascade through foreign keys.'; | ||
|
|
||
| REVOKE ALL ON FUNCTION "public"."delete_old_deleted_versions"() FROM PUBLIC; | ||
| REVOKE ALL ON FUNCTION "public"."delete_old_deleted_versions"() FROM "anon"; | ||
| REVOKE ALL ON FUNCTION "public"."delete_old_deleted_versions"() FROM "authenticated"; | ||
| GRANT EXECUTE ON FUNCTION "public"."delete_old_deleted_versions"() TO "service_role"; |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.