Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 99 additions & 25 deletions assets/apps_script/Code.gs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,14 @@
* Set CACHE_SPREADSHEET_ID to a valid Google Sheet ID (must be owned by
* the same account). When enabled, public GET requests are stored in the
* sheet and served from there on repeat visits, reducing UrlFetchApp
* quota consumption. The cache is Vary-aware (Accept-Encoding and
* Accept-Language are hashed into the compound cache key). Leave
* CACHE_SPREADSHEET_ID as-is to disable caching entirely — zero overhead.
* quota consumption. Bodies are gzipped before base64 storage so larger
* responses fit under the per-cell character limit, and persistent
* 4xx (404/410/451) get a short negative-cache TTL so buggy clients
* that hammer dead URLs cost zero quota; 5xx is never cached so a
* flapping upstream cannot poison a 24h slot with a transient outage.
* The cache is Vary-aware (Accept-Encoding and Accept-Language are
* hashed into the compound cache key). Leave CACHE_SPREADSHEET_ID as-is
* to disable caching entirely — zero overhead.
*
* DEPLOYMENT:
* 1. Go to https://script.google.com → New project
Expand Down Expand Up @@ -54,6 +59,22 @@ const CACHE_MAX_ROWS = 5000; // circular buffer capacity
const CACHE_MAX_BODY_BYTES = 35000; // skip responses larger than ~35 KB
const CACHE_DEFAULT_TTL_SECONDS = 86400; // 24-hour fallback when no Cache-Control

// ── Negative Caching ────────────────────────────────────────
// Persistent 4xx errors get a short TTL when the upstream is silent on
// Cache-Control. Buggy clients hammer dead URLs (favicons, telemetry
// pixels, dev-tools probes); a 5-minute floor absorbs the storm at
// zero quota cost while letting transient 404s self-heal quickly.
// 5xx is never cached — see _fetchAndCache.
const NEGATIVE_CACHE_STATUSES = { 404: 1, 410: 1, 451: 1 };
const NEGATIVE_CACHE_TTL_SECONDS = 300;

// ── Body Compression ────────────────────────────────────────
// Bodies are gzipped before base64 storage when worthwhile. Gzip has
// ~20 bytes of header overhead, so very small payloads can bloat;
// skip below this threshold. Already-encoded responses (gzip/br/etc.)
// are stored as-is to avoid double-compression.
const GZIP_MIN_BYTES = 256;

// ── Vary-Aware Cache Key ────────────────────────────────────
// These request headers are hashed into the compound cache key
// alongside the URL so that responses with different encodings
Expand Down Expand Up @@ -320,9 +341,12 @@ function _initCacheSheet() {
var sheet = ss.getSheetByName(CACHE_SHEET_NAME);
if (!sheet) {
sheet = ss.insertSheet(CACHE_SHEET_NAME);
// Schema: URL_Hash | URL | Status | Headers | Body | Timestamp | Expires_At
sheet.getRange(1, 1, 1, 7).setValues([[
"URL_Hash", "URL", "Status", "Headers", "Body", "Timestamp", "Expires_At"
// Schema: URL_Hash | URL | Status | Headers | Body | Timestamp | Expires_At | Z
// Z is 1 when Body is base64(gzip(rawBytes)), 0/empty when base64(rawBytes).
// Legacy 7-column rows from older deployments read back as Z=undefined,
// which the cache hit path treats as "not gzipped" — fully compatible.
sheet.getRange(1, 1, 1, 8).setValues([[
"URL_Hash", "URL", "Status", "Headers", "Body", "Timestamp", "Expires_At", "Z"
]]);
}
return sheet;
Expand Down Expand Up @@ -539,27 +563,46 @@ function _getFromCache(url, reqHeaders) {
var found = finder.findNext();

if (found) {
var row = sheet.getRange(found.getRow(), 1, 1, 7).getValues()[0];
// 8-column read. Legacy 7-column rows return undefined for the Z slot,
// which is falsy and falls through the not-gzipped branch below — fully
// compatible with caches written before the gzip-storage change.
var row = sheet.getRange(found.getRow(), 1, 1, 8).getValues()[0];

var expiresAt = row[6];
if (expiresAt && expiresAt instanceof Date && expiresAt < new Date()) {
return null;
}

var storedBody = row[4];
var body;
if (row[7]) {
// Stored as base64(gzip(rawBytes)). The relay protocol's `b` field
// is base64(rawBytes), so decompress and re-encode for the wire.
var gzipped = Utilities.base64Decode(storedBody);
var raw = Utilities
.ungzip(Utilities.newBlob(gzipped, "application/x-gzip"))
.getBytes();
body = Utilities.base64Encode(raw);
} else {
body = storedBody;
}

return {
status: row[2],
headers: _refreshCachedHeaders(row[3], row[5]),
body: row[4],
body: body,
};
}
return null;
}

/**
* Fetch a URL and store the response in the spreadsheet cache
* using a circular buffer (O(1) writes). Skips storage when the
* encoded body exceeds CACHE_MAX_BODY_BYTES or when Cache-Control
* forbids caching. Returns the fetch result regardless.
* using a circular buffer (O(1) writes). Skips storage on 5xx
* (transient outages must not poison a 24h slot), when Cache-Control
* forbids caching, or when the post-compression body exceeds
* CACHE_MAX_BODY_BYTES. Always returns the fetch result so the caller
* can serve the live response even when the cache write is skipped.
*/
function _fetchAndCache(url, reqHeaders) {
var sheet = _initCacheSheet();
Expand All @@ -569,22 +612,52 @@ function _fetchAndCache(url, reqHeaders) {
var response = UrlFetchApp.fetch(url, { muteHttpExceptions: true });
var status = response.getResponseCode();
var headers = _respHeaders(response);
var body = Utilities.base64Encode(response.getContent());
var bodyBytes = response.getContent();
var rawB64 = Utilities.base64Encode(bodyBytes);
var headersJson = JSON.stringify(headers);
var liveResult = { status: status, headers: headersJson, body: rawB64 };

// Cell-size safety gate
if (body.length > CACHE_MAX_BODY_BYTES) {
return { status: status, headers: JSON.stringify(headers), body: body };
}
// 5xx never enters the cache. A flapping upstream returning 503 once
// would otherwise pin that response for 24h and break the URL for
// every subsequent client until expiry.
if (status >= 500) return liveResult;

// TTL extraction
var cacheControl =
headers["Cache-Control"] || headers["cache-control"] || null;
var ttlSeconds = _parseMaxAge(cacheControl);

if (ttlSeconds === 0) {
return { status: status, headers: JSON.stringify(headers), body: body };
if (ttlSeconds === 0) return liveResult;

// Negative caching: cap TTL on persistent 4xx when upstream is silent.
// If they explicitly stated a max-age for the 404, we honor it instead
// — the origin knows best when it spoke up.
if (NEGATIVE_CACHE_STATUSES[status] && !cacheControl) {
ttlSeconds = NEGATIVE_CACHE_TTL_SECONDS;
}

// Decide whether to gzip-store. Skip when upstream is already encoded
// (avoids double-compressing gzip/br/zstd payloads) and when the body
// is too small to overcome gzip's header overhead.
var contentEncoding = String(
headers["Content-Encoding"] || headers["content-encoding"] || ""
).toLowerCase();
var alreadyEncoded = contentEncoding && contentEncoding !== "identity";
var storedBody;
var storedZ;
if (alreadyEncoded || bodyBytes.length < GZIP_MIN_BYTES) {
storedBody = rawB64;
storedZ = 0;
} else {
storedBody = Utilities.base64Encode(
Utilities.gzip(Utilities.newBlob(bodyBytes)).getBytes()
);
storedZ = 1;
}

// Cell-size safety gate, applied after compression so that a 100 KB
// text body that gzips to ~15 KB now fits where it previously bailed.
if (storedBody.length > CACHE_MAX_BODY_BYTES) return liveResult;

var hash = _getCacheKey(url, reqHeaders);
var timestamp = new Date();
var expiresAt = new Date(timestamp.getTime() + ttlSeconds * 1000);
Expand All @@ -598,25 +671,26 @@ function _fetchAndCache(url, reqHeaders) {
hash,
url,
status,
JSON.stringify(headers),
body,
headersJson,
storedBody,
timestamp.toISOString(),
expiresAt,
storedZ,
];

// Circular buffer write (O(1))
var metaSheet = _getMetaSheet();
if (metaSheet) {
_ensureRowsAllocated(sheet);
var writeRow = _getNextCursor(sheet, metaSheet);
sheet.getRange(writeRow, 1, 1, 7).setValues([rowData]);
sheet.getRange(writeRow, 1, 1, 8).setValues([rowData]);
_advanceCursor(metaSheet, writeRow);
} else {
// Fallback: simple append if meta sheet is unavailable
sheet.appendRow(rowData);
}

return { status: status, headers: JSON.stringify(headers), body: body };
return liveResult;
} catch (e) {
return null;
}
Expand Down Expand Up @@ -684,7 +758,7 @@ function clearExpiredCache() {
}

for (var j = 0; j < rowsToClear.length; j++) {
sheet.getRange(rowsToClear[j], 1, 1, 7).clearContent();
sheet.getRange(rowsToClear[j], 1, 1, 8).clearContent();
}

console.log("Cleared " + rowsToClear.length + " expired entries (" +
Expand All @@ -696,7 +770,7 @@ function clearEntireCache() {
if (sheet) {
var totalRows = sheet.getDataRange().getNumRows();
if (totalRows > 1) {
sheet.getRange(2, 1, totalRows - 1, 7).clearContent();
sheet.getRange(2, 1, totalRows - 1, 8).clearContent();
}
}

Expand Down
Loading