diff --git a/bench/baselines/quality/current.json b/bench/baselines/quality/current.json index 26bd26c..18cf88f 100644 --- a/bench/baselines/quality/current.json +++ b/bench/baselines/quality/current.json @@ -1,7 +1,7 @@ { - "version": "1.3.0", - "gitRef": "0e7aab2fe3c65661d7735303b15a7010e280a649", - "generated": "2026-03-21T14:11:05.599Z", + "version": "1.4.0", + "gitRef": "b983bb4283b942c045c3ff01c93933625fb41ff2", + "generated": "2026-03-22T16:49:09.591Z", "results": { "scenarios": { "Coding assistant": { @@ -100,7 +100,8 @@ "entityRetention": 1, "codeBlocksIntact": true } - ] + ], + "overheadRatio": 0.0003203430451127824 }, "Long Q&A": { "ratio": 4.902912621359223, @@ -190,7 +191,8 @@ "entityRetention": 1, "codeBlocksIntact": true } - ] + ], + "overheadRatio": 0.00006223777662517271 }, "Tool-heavy": { "ratio": 1.4009797060881735, @@ -249,7 +251,8 @@ "entityRetention": 0.6, "codeBlocksIntact": true } - ] + ], + "overheadRatio": 0.00009947913793103351 }, "Deep conversation": { "ratio": 2.5041568769202964, @@ -752,7 +755,8 @@ "entityRetention": 1, "codeBlocksIntact": true } - ] + ], + "overheadRatio": 0.000041621278337531585 }, "Technical explanation": { "ratio": 1.2398561890087314, @@ -824,7 +828,8 @@ "entityRetention": 1, "codeBlocksIntact": true } - ] + ], + "overheadRatio": 0.000056277095375722494 }, "Structured content": { "ratio": 1.2595769010863351, @@ -879,7 +884,8 @@ "entityRetention": 0.6, "codeBlocksIntact": true } - ] + ], + "overheadRatio": 0.000031551811023622734 }, "Agentic coding session": { "ratio": 1.004950495049505, @@ -925,7 +931,8 @@ "entityRetention": 0.2857142857142857, "codeBlocksIntact": true } - ] + ], + "overheadRatio": 0.00012199074074074208 }, "Single-char messages": { "ratio": 1, @@ -953,7 +960,8 @@ ], "negativeCompressions": 0, "coherenceIssues": 0, - "messages": [] + "messages": [], + "overheadRatio": 0.00007007500000000277 }, "Giant single message": { "ratio": 2.828036762263315, @@ -999,7 +1007,8 @@ "entityRetention": 1, "codeBlocksIntact": true } - ] + ], + "overheadRatio": 0.00002075325774184707 }, "Code-only conversation": { "ratio": 1, @@ -1031,7 +1040,8 @@ ], "negativeCompressions": 0, "coherenceIssues": 0, - "messages": [] + "messages": [], + "overheadRatio": 0.000018653536585363587 }, "Entity-dense technical": { "ratio": 1.5571321882001494, @@ -1107,7 +1117,8 @@ "entityRetention": 0.6666666666666666, "codeBlocksIntact": true } - ] + ], + "overheadRatio": 0.00011654841402337068 }, "Prose-only conversation": { "ratio": 3.367965367965368, @@ -1167,7 +1178,8 @@ "entityRetention": 1, "codeBlocksIntact": true } - ] + ], + "overheadRatio": 0.000045656333830104156 }, "Mixed languages": { "ratio": 1.0689134808853118, @@ -1213,7 +1225,51 @@ "entityRetention": 0.6666666666666666, "codeBlocksIntact": true } - ] + ], + "overheadRatio": 0.00003091133004926051 + }, + "High-entropy content": { + "ratio": 1.3451492537313432, + "avgEntityRetention": 1, + "minEntityRetention": 1, + "codeBlockIntegrity": 1, + "informationDensity": 1, + "compressedQualityScore": 1, + "probesPassed": 4, + "probesTotal": 4, + "probePassRate": 1, + "probeResults": [ + { + "label": "Hex block verbatim", + "passed": true + }, + { + "label": "UUID array verbatim", + "passed": true + }, + { + "label": "Base64 blob verbatim", + "passed": true + }, + { + "label": "Mixed entropy+prose preserves entropy", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 1, + "messages": [ + { + "messageId": "50042", + "action": "compressed", + "inputChars": 808, + "outputChars": 253, + "localRatio": 3.1936758893280635, + "entityRetention": 1, + "codeBlocksIntact": true + } + ], + "overheadRatio": 0.000058397177419353755 } }, "tradeoff": { diff --git a/bench/baselines/quality/history/b983bb42.json b/bench/baselines/quality/history/b983bb42.json new file mode 100644 index 0000000..18cf88f --- /dev/null +++ b/bench/baselines/quality/history/b983bb42.json @@ -0,0 +1,1733 @@ +{ + "version": "1.4.0", + "gitRef": "b983bb4283b942c045c3ff01c93933625fb41ff2", + "generated": "2026-03-22T16:49:09.591Z", + "results": { + "scenarios": { + "Coding assistant": { + "ratio": 1.9385451505016722, + "avgEntityRetention": 0.9380952380952381, + "minEntityRetention": 0.8333333333333334, + "codeBlockIntegrity": 1, + "informationDensity": 1.9408267576707483, + "compressedQualityScore": 1, + "probesPassed": 9, + "probesTotal": 9, + "probePassRate": 1, + "probeResults": [ + { + "label": "JWT_SECRET env var", + "passed": true + }, + { + "label": "jwt.verify in code", + "passed": true + }, + { + "label": "15m access expiry", + "passed": true + }, + { + "label": "7d refresh expiry", + "passed": true + }, + { + "label": "rateLimit in code", + "passed": true + }, + { + "label": "authMiddleware function", + "passed": true + }, + { + "label": "express-rate-limit import", + "passed": true + }, + { + "label": "Redis/ioredis mention", + "passed": true + }, + { + "label": "min output ≥ 2000 chars", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 1, + "messages": [ + { + "messageId": "3", + "action": "code_split", + "inputChars": 912, + "outputChars": 564, + "localRatio": 1.6170212765957446, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "5", + "action": "code_split", + "inputChars": 1057, + "outputChars": 530, + "localRatio": 1.9943396226415093, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "7", + "action": "code_split", + "inputChars": 824, + "outputChars": 297, + "localRatio": 2.774410774410774, + "entityRetention": 0.8333333333333334, + "codeBlocksIntact": true + }, + { + "messageId": "9", + "action": "code_split", + "inputChars": 828, + "outputChars": 480, + "localRatio": 1.725, + "entityRetention": 0.8571428571428571, + "codeBlocksIntact": true + }, + { + "messageId": "13", + "action": "compressed", + "inputChars": 713, + "outputChars": 218, + "localRatio": 3.270642201834862, + "entityRetention": 1, + "codeBlocksIntact": true + } + ], + "overheadRatio": 0.0003203430451127824 + }, + "Long Q&A": { + "ratio": 4.902912621359223, + "avgEntityRetention": 0.8, + "minEntityRetention": 0, + "codeBlockIntegrity": 1, + "informationDensity": 4.258064516129032, + "compressedQualityScore": 1, + "probesPassed": 7, + "probesTotal": 7, + "probePassRate": 1, + "probeResults": [ + { + "label": "event sourcing", + "passed": true + }, + { + "label": "circuit breaker", + "passed": true + }, + { + "label": "eventual consistency", + "passed": true + }, + { + "label": "saga pattern", + "passed": true + }, + { + "label": "choreography", + "passed": true + }, + { + "label": "orchestration", + "passed": true + }, + { + "label": "min output ≥ 800 chars", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 5, + "messages": [ + { + "messageId": "16", + "action": "deduped", + "inputChars": 1800, + "outputChars": 28, + "localRatio": 64.28571428571429, + "entityRetention": 0, + "codeBlocksIntact": true + }, + { + "messageId": "18", + "action": "compressed", + "inputChars": 2250, + "outputChars": 493, + "localRatio": 4.563894523326572, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "20", + "action": "compressed", + "inputChars": 1800, + "outputChars": 493, + "localRatio": 3.6511156186612577, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "22", + "action": "compressed", + "inputChars": 2700, + "outputChars": 493, + "localRatio": 5.476673427991886, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "24", + "action": "compressed", + "inputChars": 1350, + "outputChars": 353, + "localRatio": 3.8243626062322944, + "entityRetention": 1, + "codeBlocksIntact": true + } + ], + "overheadRatio": 0.00006223777662517271 + }, + "Tool-heavy": { + "ratio": 1.4009797060881735, + "avgEntityRetention": 0.8, + "minEntityRetention": 0.6, + "codeBlockIntegrity": 1, + "informationDensity": 1.6052416052416052, + "compressedQualityScore": 0.8666666666666667, + "probesPassed": 6, + "probesTotal": 6, + "probePassRate": 1, + "probeResults": [ + { + "label": "JSON array preserved", + "passed": true + }, + { + "label": "SQL SELECT preserved", + "passed": true + }, + { + "label": "STRIPE_SECRET_KEY", + "passed": true + }, + { + "label": "GITHUB_TOKEN", + "passed": true + }, + { + "label": "code blocks present", + "passed": true + }, + { + "label": "DATABASE_URL", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 2, + "messages": [ + { + "messageId": "30", + "action": "compressed", + "inputChars": 744, + "outputChars": 235, + "localRatio": 3.1659574468085108, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "36", + "action": "compressed", + "inputChars": 236, + "outputChars": 172, + "localRatio": 1.372093023255814, + "entityRetention": 0.6, + "codeBlocksIntact": true + } + ], + "overheadRatio": 0.00009947913793103351 + }, + "Deep conversation": { + "ratio": 2.5041568769202964, + "avgEntityRetention": 1, + "minEntityRetention": 1, + "codeBlockIntegrity": 1, + "informationDensity": 1, + "compressedQualityScore": 1, + "probesPassed": 3, + "probesTotal": 9, + "probePassRate": 0.3333333333333333, + "probeResults": [ + { + "label": "≥15/25 topics survive", + "passed": false + }, + { + "label": "topic: database schema", + "passed": true + }, + { + "label": "topic: authentication", + "passed": false + }, + { + "label": "topic: caching", + "passed": false + }, + { + "label": "topic: monitoring", + "passed": false + }, + { + "label": "topic: testing", + "passed": false + }, + { + "label": "topic: deployment", + "passed": false + }, + { + "label": "topic: error handling", + "passed": true + }, + { + "label": "min output ≥ 3000 chars", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 6, + "messages": [ + { + "messageId": "44", + "action": "compressed", + "inputChars": 306, + "outputChars": 168, + "localRatio": 1.8214285714285714, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "45", + "action": "compressed", + "inputChars": 809, + "outputChars": 246, + "localRatio": 3.2886178861788617, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "46", + "action": "compressed", + "inputChars": 306, + "outputChars": 168, + "localRatio": 1.8214285714285714, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "47", + "action": "compressed", + "inputChars": 809, + "outputChars": 246, + "localRatio": 3.2886178861788617, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "48", + "action": "compressed", + "inputChars": 303, + "outputChars": 202, + "localRatio": 1.5, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "49", + "action": "compressed", + "inputChars": 806, + "outputChars": 246, + "localRatio": 3.2764227642276422, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "50", + "action": "compressed", + "inputChars": 307, + "outputChars": 169, + "localRatio": 1.816568047337278, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "51", + "action": "compressed", + "inputChars": 810, + "outputChars": 246, + "localRatio": 3.292682926829268, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "52", + "action": "compressed", + "inputChars": 297, + "outputChars": 202, + "localRatio": 1.4702970297029703, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "53", + "action": "compressed", + "inputChars": 800, + "outputChars": 246, + "localRatio": 3.252032520325203, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "54", + "action": "compressed", + "inputChars": 303, + "outputChars": 202, + "localRatio": 1.5, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "55", + "action": "compressed", + "inputChars": 806, + "outputChars": 246, + "localRatio": 3.2764227642276422, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "56", + "action": "compressed", + "inputChars": 300, + "outputChars": 202, + "localRatio": 1.4851485148514851, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "57", + "action": "compressed", + "inputChars": 803, + "outputChars": 246, + "localRatio": 3.2642276422764227, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "58", + "action": "compressed", + "inputChars": 300, + "outputChars": 202, + "localRatio": 1.4851485148514851, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "59", + "action": "compressed", + "inputChars": 803, + "outputChars": 246, + "localRatio": 3.2642276422764227, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "60", + "action": "compressed", + "inputChars": 303, + "outputChars": 202, + "localRatio": 1.5, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "61", + "action": "compressed", + "inputChars": 806, + "outputChars": 246, + "localRatio": 3.2764227642276422, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "62", + "action": "compressed", + "inputChars": 307, + "outputChars": 169, + "localRatio": 1.816568047337278, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "63", + "action": "compressed", + "inputChars": 810, + "outputChars": 246, + "localRatio": 3.292682926829268, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "64", + "action": "compressed", + "inputChars": 305, + "outputChars": 167, + "localRatio": 1.8263473053892216, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "65", + "action": "compressed", + "inputChars": 808, + "outputChars": 246, + "localRatio": 3.2845528455284554, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "66", + "action": "compressed", + "inputChars": 300, + "outputChars": 202, + "localRatio": 1.4851485148514851, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "67", + "action": "compressed", + "inputChars": 803, + "outputChars": 246, + "localRatio": 3.2642276422764227, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "68", + "action": "compressed", + "inputChars": 297, + "outputChars": 202, + "localRatio": 1.4702970297029703, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "69", + "action": "compressed", + "inputChars": 800, + "outputChars": 246, + "localRatio": 3.252032520325203, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "70", + "action": "compressed", + "inputChars": 298, + "outputChars": 202, + "localRatio": 1.4752475247524752, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "71", + "action": "compressed", + "inputChars": 801, + "outputChars": 246, + "localRatio": 3.2560975609756095, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "72", + "action": "compressed", + "inputChars": 298, + "outputChars": 202, + "localRatio": 1.4752475247524752, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "73", + "action": "compressed", + "inputChars": 801, + "outputChars": 246, + "localRatio": 3.2560975609756095, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "74", + "action": "compressed", + "inputChars": 300, + "outputChars": 202, + "localRatio": 1.4851485148514851, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "75", + "action": "compressed", + "inputChars": 803, + "outputChars": 246, + "localRatio": 3.2642276422764227, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "76", + "action": "compressed", + "inputChars": 299, + "outputChars": 202, + "localRatio": 1.4801980198019802, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "77", + "action": "compressed", + "inputChars": 802, + "outputChars": 246, + "localRatio": 3.2601626016260163, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "78", + "action": "compressed", + "inputChars": 302, + "outputChars": 202, + "localRatio": 1.495049504950495, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "79", + "action": "compressed", + "inputChars": 805, + "outputChars": 246, + "localRatio": 3.272357723577236, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "80", + "action": "compressed", + "inputChars": 298, + "outputChars": 202, + "localRatio": 1.4752475247524752, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "81", + "action": "compressed", + "inputChars": 801, + "outputChars": 246, + "localRatio": 3.2560975609756095, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "82", + "action": "compressed", + "inputChars": 307, + "outputChars": 169, + "localRatio": 1.816568047337278, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "83", + "action": "compressed", + "inputChars": 810, + "outputChars": 246, + "localRatio": 3.292682926829268, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "84", + "action": "compressed", + "inputChars": 301, + "outputChars": 202, + "localRatio": 1.49009900990099, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "85", + "action": "compressed", + "inputChars": 804, + "outputChars": 246, + "localRatio": 3.268292682926829, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "86", + "action": "compressed", + "inputChars": 297, + "outputChars": 202, + "localRatio": 1.4702970297029703, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "87", + "action": "compressed", + "inputChars": 800, + "outputChars": 246, + "localRatio": 3.252032520325203, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "88", + "action": "compressed", + "inputChars": 301, + "outputChars": 202, + "localRatio": 1.49009900990099, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "89", + "action": "compressed", + "inputChars": 804, + "outputChars": 246, + "localRatio": 3.268292682926829, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "90", + "action": "compressed", + "inputChars": 301, + "outputChars": 202, + "localRatio": 1.49009900990099, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "91", + "action": "compressed", + "inputChars": 804, + "outputChars": 246, + "localRatio": 3.268292682926829, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "92", + "action": "compressed", + "inputChars": 298, + "outputChars": 202, + "localRatio": 1.4752475247524752, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "93", + "action": "compressed", + "inputChars": 801, + "outputChars": 246, + "localRatio": 3.2560975609756095, + "entityRetention": 1, + "codeBlocksIntact": true + } + ], + "overheadRatio": 0.000041621278337531585 + }, + "Technical explanation": { + "ratio": 1.2398561890087314, + "avgEntityRetention": 1, + "minEntityRetention": 1, + "codeBlockIntegrity": 1, + "informationDensity": 1.7915254237288134, + "compressedQualityScore": 1, + "probesPassed": 6, + "probesTotal": 7, + "probePassRate": 0.8571428571428571, + "probeResults": [ + { + "label": "OrderPlaced event", + "passed": true + }, + { + "label": "temporal decoupling", + "passed": true + }, + { + "label": "schema version", + "passed": false + }, + { + "label": "partition ordering", + "passed": true + }, + { + "label": "at-least-once delivery", + "passed": true + }, + { + "label": "dead letter queue", + "passed": true + }, + { + "label": "idempotent consumers", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 3, + "messages": [ + { + "messageId": "98", + "action": "compressed", + "inputChars": 483, + "outputChars": 203, + "localRatio": 2.3793103448275863, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "100", + "action": "compressed", + "inputChars": 347, + "outputChars": 209, + "localRatio": 1.6602870813397128, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "102", + "action": "compressed", + "inputChars": 227, + "outputChars": 178, + "localRatio": 1.2752808988764044, + "entityRetention": 1, + "codeBlocksIntact": true + } + ], + "overheadRatio": 0.000056277095375722494 + }, + "Structured content": { + "ratio": 1.2595769010863351, + "avgEntityRetention": 0.675, + "minEntityRetention": 0.6, + "codeBlockIntegrity": 1, + "informationDensity": 1.3318681318681318, + "compressedQualityScore": 0.8666666666666667, + "probesPassed": 5, + "probesTotal": 5, + "probePassRate": 1, + "probeResults": [ + { + "label": "API keys preserved", + "passed": true + }, + { + "label": "CREATE TABLE preserved", + "passed": true + }, + { + "label": "JSON code block", + "passed": true + }, + { + "label": "AWS_ACCESS_KEY_ID", + "passed": true + }, + { + "label": "SENDGRID_API_KEY", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 1, + "messages": [ + { + "messageId": "109", + "action": "compressed", + "inputChars": 494, + "outputChars": 230, + "localRatio": 2.1478260869565218, + "entityRetention": 0.75, + "codeBlocksIntact": true + }, + { + "messageId": "111", + "action": "compressed", + "inputChars": 415, + "outputChars": 225, + "localRatio": 1.8444444444444446, + "entityRetention": 0.6, + "codeBlocksIntact": true + } + ], + "overheadRatio": 0.000031551811023622734 + }, + "Agentic coding session": { + "ratio": 1.004950495049505, + "avgEntityRetention": 0.2857142857142857, + "minEntityRetention": 0.2857142857142857, + "codeBlockIntegrity": 1, + "informationDensity": 0.30398671096345514, + "compressedQualityScore": 0.7142857142857144, + "probesPassed": 4, + "probesTotal": 5, + "probePassRate": 0.8, + "probeResults": [ + { + "label": "AuthService in code", + "passed": true + }, + { + "label": "verify or validateToken", + "passed": true + }, + { + "label": "grep results", + "passed": false + }, + { + "label": "test counts", + "passed": true + }, + { + "label": "jwt.sign in code", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 1, + "messages": [ + { + "messageId": "122", + "action": "compressed", + "inputChars": 183, + "outputChars": 172, + "localRatio": 1.063953488372093, + "entityRetention": 0.2857142857142857, + "codeBlocksIntact": true + } + ], + "overheadRatio": 0.00012199074074074208 + }, + "Single-char messages": { + "ratio": 1, + "avgEntityRetention": 1, + "minEntityRetention": 1, + "codeBlockIntegrity": 1, + "informationDensity": 1, + "compressedQualityScore": 1, + "probesPassed": 3, + "probesTotal": 3, + "probePassRate": 1, + "probeResults": [ + { + "label": "output count = input count", + "passed": true + }, + { + "label": "\"y\" present", + "passed": true + }, + { + "label": "\"n\" present", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 0, + "messages": [], + "overheadRatio": 0.00007007500000000277 + }, + "Giant single message": { + "ratio": 2.828036762263315, + "avgEntityRetention": 1, + "minEntityRetention": 1, + "codeBlockIntegrity": 1, + "informationDensity": 2.8382140073488475, + "compressedQualityScore": 1, + "probesPassed": 5, + "probesTotal": 5, + "probePassRate": 1, + "probeResults": [ + { + "label": "TracingService in code", + "passed": true + }, + { + "label": "traceId identifier", + "passed": true + }, + { + "label": "spanId identifier", + "passed": true + }, + { + "label": "startSpan in code", + "passed": true + }, + { + "label": "min output ≥ 10000 chars", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 1, + "messages": [ + { + "messageId": "50012", + "action": "code_split", + "inputChars": 50980, + "outputChars": 17962, + "localRatio": 2.8382140073488475, + "entityRetention": 1, + "codeBlocksIntact": true + } + ], + "overheadRatio": 0.00002075325774184707 + }, + "Code-only conversation": { + "ratio": 1, + "avgEntityRetention": 1, + "minEntityRetention": 1, + "codeBlockIntegrity": 1, + "informationDensity": 1, + "compressedQualityScore": 1, + "probesPassed": 4, + "probesTotal": 4, + "probePassRate": 1, + "probeResults": [ + { + "label": "TypeScript code blocks", + "passed": true + }, + { + "label": "Python code blocks", + "passed": true + }, + { + "label": "SQL code blocks", + "passed": true + }, + { + "label": "all code preserved verbatim", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 0, + "messages": [], + "overheadRatio": 0.000018653536585363587 + }, + "Entity-dense technical": { + "ratio": 1.5571321882001494, + "avgEntityRetention": 0.5292397660818713, + "minEntityRetention": 0.42105263157894735, + "codeBlockIntegrity": 1, + "informationDensity": 0.9882198952879582, + "compressedQualityScore": 0.7945945945945947, + "probesPassed": 5, + "probesTotal": 8, + "probePassRate": 0.625, + "probeResults": [ + { + "label": "file paths present", + "passed": true + }, + { + "label": "redis-prod-001", + "passed": false + }, + { + "label": "v22.3.0 version", + "passed": false + }, + { + "label": "max_connections", + "passed": true + }, + { + "label": "PR #142", + "passed": false + }, + { + "label": "orderService.ts", + "passed": true + }, + { + "label": "idx_orders_user_created", + "passed": true + }, + { + "label": "p99 latency", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 2, + "messages": [ + { + "messageId": "50022", + "action": "compressed", + "inputChars": 466, + "outputChars": 253, + "localRatio": 1.841897233201581, + "entityRetention": 0.5, + "codeBlocksIntact": true + }, + { + "messageId": "50023", + "action": "compressed", + "inputChars": 641, + "outputChars": 242, + "localRatio": 2.6487603305785123, + "entityRetention": 0.42105263157894735, + "codeBlocksIntact": true + }, + { + "messageId": "50024", + "action": "compressed", + "inputChars": 403, + "outputChars": 269, + "localRatio": 1.4981412639405205, + "entityRetention": 0.6666666666666666, + "codeBlocksIntact": true + } + ], + "overheadRatio": 0.00011654841402337068 + }, + "Prose-only conversation": { + "ratio": 3.367965367965368, + "avgEntityRetention": 1, + "minEntityRetention": 1, + "codeBlockIntegrity": 1, + "informationDensity": 4.348979591836734, + "compressedQualityScore": 1, + "probesPassed": 2, + "probesTotal": 4, + "probePassRate": 0.5, + "probeResults": [ + { + "label": "hiring topic", + "passed": false + }, + { + "label": "review topic", + "passed": true + }, + { + "label": "onboarding topic", + "passed": false + }, + { + "label": "min output ≥ 400 chars", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 2, + "messages": [ + { + "messageId": "50028", + "action": "compressed", + "inputChars": 684, + "outputChars": 113, + "localRatio": 6.053097345132743, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "50030", + "action": "compressed", + "inputChars": 736, + "outputChars": 257, + "localRatio": 2.8638132295719845, + "entityRetention": 1, + "codeBlocksIntact": true + }, + { + "messageId": "50032", + "action": "compressed", + "inputChars": 711, + "outputChars": 120, + "localRatio": 5.925, + "entityRetention": 1, + "codeBlocksIntact": true + } + ], + "overheadRatio": 0.000045656333830104156 + }, + "Mixed languages": { + "ratio": 1.0689134808853118, + "avgEntityRetention": 0.6666666666666666, + "minEntityRetention": 0.6666666666666666, + "codeBlockIntegrity": 1, + "informationDensity": 1.050420168067227, + "compressedQualityScore": 0.8666666666666667, + "probesPassed": 5, + "probesTotal": 5, + "probePassRate": 1, + "probeResults": [ + { + "label": "Python code block", + "passed": true + }, + { + "label": "SQL code block", + "passed": true + }, + { + "label": "JSON code block", + "passed": true + }, + { + "label": "YAML code block", + "passed": true + }, + { + "label": "metrics-processor name", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 0, + "messages": [ + { + "messageId": "50039", + "action": "compressed", + "inputChars": 375, + "outputChars": 238, + "localRatio": 1.5756302521008403, + "entityRetention": 0.6666666666666666, + "codeBlocksIntact": true + } + ], + "overheadRatio": 0.00003091133004926051 + }, + "High-entropy content": { + "ratio": 1.3451492537313432, + "avgEntityRetention": 1, + "minEntityRetention": 1, + "codeBlockIntegrity": 1, + "informationDensity": 1, + "compressedQualityScore": 1, + "probesPassed": 4, + "probesTotal": 4, + "probePassRate": 1, + "probeResults": [ + { + "label": "Hex block verbatim", + "passed": true + }, + { + "label": "UUID array verbatim", + "passed": true + }, + { + "label": "Base64 blob verbatim", + "passed": true + }, + { + "label": "Mixed entropy+prose preserves entropy", + "passed": true + } + ], + "negativeCompressions": 0, + "coherenceIssues": 1, + "messages": [ + { + "messageId": "50042", + "action": "compressed", + "inputChars": 808, + "outputChars": 253, + "localRatio": 3.1936758893280635, + "entityRetention": 1, + "codeBlocksIntact": true + } + ], + "overheadRatio": 0.000058397177419353755 + } + }, + "tradeoff": { + "Coding assistant": { + "points": [ + { + "recencyWindow": 0, + "ratio": 1.9385451505016722, + "entityRetention": 1, + "informationDensity": 1.9408267576707483, + "qualityScore": 1 + }, + { + "recencyWindow": 1, + "ratio": 1.6061655697956356, + "entityRetention": 1, + "informationDensity": 1.7970909368557686, + "qualityScore": 1 + }, + { + "recencyWindow": 2, + "ratio": 1.6061655697956356, + "entityRetention": 1, + "informationDensity": 1.7970909368557686, + "qualityScore": 1 + }, + { + "recencyWindow": 3, + "ratio": 1.6061655697956356, + "entityRetention": 1, + "informationDensity": 1.7970909368557686, + "qualityScore": 1 + }, + { + "recencyWindow": 4, + "ratio": 1.6061655697956356, + "entityRetention": 1, + "informationDensity": 1.7970909368557686, + "qualityScore": 1 + }, + { + "recencyWindow": 5, + "ratio": 1.4333848531684699, + "entityRetention": 1, + "informationDensity": 1.9122933141624732, + "qualityScore": 1 + }, + { + "recencyWindow": 6, + "ratio": 1.4333848531684699, + "entityRetention": 1, + "informationDensity": 1.9122933141624732, + "qualityScore": 1 + }, + { + "recencyWindow": 7, + "ratio": 1.232589048378522, + "entityRetention": 1, + "informationDensity": 1.79981718464351, + "qualityScore": 1 + }, + { + "recencyWindow": 8, + "ratio": 1.232589048378522, + "entityRetention": 1, + "informationDensity": 1.79981718464351, + "qualityScore": 1 + }, + { + "recencyWindow": 9, + "ratio": 1.0811377943576592, + "entityRetention": 1, + "informationDensity": 1.6170212765957448, + "qualityScore": 1 + }, + { + "recencyWindow": 10, + "ratio": 1.0811377943576592, + "entityRetention": 1, + "informationDensity": 1.6170212765957448, + "qualityScore": 1 + }, + { + "recencyWindow": 11, + "ratio": 1, + "entityRetention": 1, + "informationDensity": 1, + "qualityScore": 1 + } + ], + "qualityAt2x": 1, + "qualityAt3x": null, + "maxRatioAbove80pctQuality": 1.9385451505016722 + }, + "Deep conversation": { + "points": [ + { + "recencyWindow": 0, + "ratio": 2.5041568769202964, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 2, + "ratio": 2.3650251770931128, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 4, + "ratio": 2.2394536932277354, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 6, + "ratio": 2.1265443941370576, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 8, + "ratio": 2.025657894736842, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 10, + "ratio": 1.9328311362209667, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 12, + "ratio": 1.8426092160383005, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 14, + "ratio": 1.7661567877629063, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 16, + "ratio": 1.6949660529696007, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 18, + "ratio": 1.629867074461828, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 20, + "ratio": 1.569405901342244, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 22, + "ratio": 1.5136006117544243, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 24, + "ratio": 1.4616277229811698, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 26, + "ratio": 1.413249694002448, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 28, + "ratio": 1.3675665005181858, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 30, + "ratio": 1.3219004913418881, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 32, + "ratio": 1.2790676205861988, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 34, + "ratio": 1.2411986025262027, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 36, + "ratio": 1.2058222009486097, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 38, + "ratio": 1.1724064985615164, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 40, + "ratio": 1.1405111742190395, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 42, + "ratio": 1.110839413132366, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 44, + "ratio": 1.0804351216469121, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 46, + "ratio": 1.053289748755179, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 48, + "ratio": 1.0259533506108849, + "entityRetention": 0.6666666666666666, + "informationDensity": 1, + "qualityScore": 1 + }, + { + "recencyWindow": 50, + "ratio": 1, + "entityRetention": 1, + "informationDensity": 1, + "qualityScore": 1 + } + ], + "qualityAt2x": 1, + "qualityAt3x": 1, + "maxRatioAbove80pctQuality": 2.5041568769202964 + }, + "Technical explanation": { + "points": [ + { + "recencyWindow": 0, + "ratio": 1.2398561890087314, + "entityRetention": 0.8571428571428571, + "informationDensity": 1.7915254237288134, + "qualityScore": 1 + }, + { + "recencyWindow": 1, + "ratio": 1.2094188376753507, + "entityRetention": 0.8, + "informationDensity": 2.0145631067961163, + "qualityScore": 1 + }, + { + "recencyWindow": 2, + "ratio": 1.2094188376753507, + "entityRetention": 0.8, + "informationDensity": 2.0145631067961163, + "qualityScore": 1 + }, + { + "recencyWindow": 3, + "ratio": 1.1312089971883785, + "entityRetention": 0.6666666666666666, + "informationDensity": 2.379310344827586, + "qualityScore": 1 + }, + { + "recencyWindow": 4, + "ratio": 1.1312089971883785, + "entityRetention": 0.6666666666666666, + "informationDensity": 2.379310344827586, + "qualityScore": 1 + }, + { + "recencyWindow": 5, + "ratio": 1, + "entityRetention": 1, + "informationDensity": 1, + "qualityScore": 1 + } + ], + "qualityAt2x": null, + "qualityAt3x": null, + "maxRatioAbove80pctQuality": 1.2398561890087314 + }, + "Agentic coding session": { + "points": [ + { + "recencyWindow": 0, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 1, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 2, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 3, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 4, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 5, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 6, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 7, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 8, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 9, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 10, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 11, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 12, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 13, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 14, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 15, + "ratio": 1.004950495049505, + "entityRetention": 0, + "informationDensity": 0.30398671096345514, + "qualityScore": 0.956 + }, + { + "recencyWindow": 16, + "ratio": 1, + "entityRetention": 1, + "informationDensity": 1, + "qualityScore": 1 + } + ], + "qualityAt2x": null, + "qualityAt3x": null, + "maxRatioAbove80pctQuality": 1.004950495049505 + } + } + } +} diff --git a/bench/quality-analysis.ts b/bench/quality-analysis.ts index 5dfc576..4742df2 100644 --- a/bench/quality-analysis.ts +++ b/bench/quality-analysis.ts @@ -42,6 +42,7 @@ export interface QualityResult { probeResults: ProbeResult[]; negativeCompressions: number; coherenceIssues: number; + overheadRatio?: number; messages: MessageQuality[]; } @@ -533,6 +534,31 @@ export function analyzeQuality( }; } +// --------------------------------------------------------------------------- +// Compression overhead ratio +// --------------------------------------------------------------------------- + +/** + * Compute compression overhead ratio: how much time the compression takes + * relative to the time those tokens would take in an LLM inference pass. + * + * A ratio of 0.1 means compression took 10% of the LLM processing time + * for the same token count — i.e. compression is 10x cheaper. + * + * @param compressionTimeMs - wall-clock time for the compress() call + * @param originalTokens - estimated token count of the original messages + * @param msPerToken - assumed LLM inference cost per token (default: 20ms) + */ +export function computeOverheadRatio( + compressionTimeMs: number, + originalTokens: number, + msPerToken: number = 20, +): number { + const llmTime = originalTokens * msPerToken; + if (llmTime <= 0) return 0; + return compressionTimeMs / llmTime; +} + // --------------------------------------------------------------------------- // Baseline comparison // --------------------------------------------------------------------------- diff --git a/bench/quality-scenarios.ts b/bench/quality-scenarios.ts index b7cdc1d..d33e63f 100644 --- a/bench/quality-scenarios.ts +++ b/bench/quality-scenarios.ts @@ -300,6 +300,29 @@ export function getProbesForScenario(name: string): ProbeDefinition[] { }, ]; + case 'High-entropy content': + return [ + { + label: 'Hex block verbatim', + check: (ms) => anyMessageMatches(ms, /[a-f0-9]{64}/i), + }, + { + label: 'UUID array verbatim', + check: (ms) => + anyMessageMatches(ms, /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i), + }, + { + label: 'Base64 blob verbatim', + check: (ms) => anyMessageMatches(ms, /[A-Za-z0-9+/]{40,}={0,2}/), + }, + { + label: 'Mixed entropy+prose preserves entropy', + check: (ms) => + anyMessageMatches(ms, /[A-Za-z0-9+/]{40,}={0,2}/) && + anyMessageMatches(ms, /preserved exactly|configuration/), + }, + ]; + default: return []; } @@ -644,6 +667,51 @@ export function mixedLanguages(): Scenario { }; } +/** + * Messages containing high-entropy content: Base64, hex dumps, UUID arrays. + * Tests that the engine preserves opaque binary/encoded data verbatim. + */ +export function highEntropyContent(): Scenario { + const hexLines = Array.from({ length: 10 }, (_, i) => + Array.from({ length: 32 }, (_, j) => ((i * 32 + j) % 256).toString(16).padStart(2, '0')).join( + '', + ), + ).join('\n'); + + const uuids = Array.from( + { length: 20 }, + (_, i) => + `${(i * 1111).toString(16).padStart(8, '0')}-` + + `${(i * 22).toString(16).padStart(4, '0')}-4${(i * 3).toString(16).padStart(3, '0')}-` + + `a${(i * 5).toString(16).padStart(3, '0')}-${(i * 777777).toString(16).padStart(12, '0')}`, + ); + + const base64Blob = + 'U29tZSBiYXNlNjQgZW5jb2RlZCBkYXRhIHRoYXQgaXMgbG9uZyBlbm91Z2ggdG8gZXhjZWVkIHRoZSBmb3J0eSBjaGFyYWN0ZXIgdGhyZXNob2xkIGFuZCBzaG91bGQgYmUgcHJlc2VydmVkIHZlcmJhdGlt'; + + return { + name: 'High-entropy content', + messages: [ + msg('system', 'You are a data analysis assistant.'), + msg('user', 'Here is the hex dump from the binary:\n\n' + hexLines), + msg( + 'assistant', + 'I see the hex data. Here is the UUID list from the database export:\n\n' + + uuids.join('\n'), + ), + msg('user', 'And here is the Base64 encoded certificate:\n\n' + base64Blob), + msg( + 'assistant', + 'The certificate data looks valid. Let me also note that the configuration ' + + 'contains this embedded payload which must be preserved exactly:\n\n' + + base64Blob + + '\n\nThe rest of the configuration uses standard JSON format and includes ' + + 'several environment variables for the staging deployment pipeline.', + ), + ], + }; +} + // --------------------------------------------------------------------------- // Builder // --------------------------------------------------------------------------- @@ -657,5 +725,6 @@ export function buildEdgeCaseScenarios(): Scenario[] { entityDenseTechnical(), proseOnlyConversation(), mixedLanguages(), + highEntropyContent(), ]; } diff --git a/bench/quality.ts b/bench/quality.ts index 067e293..790ee69 100644 --- a/bench/quality.ts +++ b/bench/quality.ts @@ -9,6 +9,7 @@ import { sweepTradeoff, summarizeTradeoff, compareQualityResults, + computeOverheadRatio, runLlmJudge, type QualityBaseline, type QualityResult, @@ -468,6 +469,7 @@ async function run(): Promise { 'NegCp'.padStart(6), 'Coher'.padStart(6), 'CmpQ'.padStart(6), + 'OvhdR'.padStart(7), ].join(' '); const qSep = '-'.repeat(qHeader.length); @@ -479,7 +481,18 @@ async function run(): Promise { for (const scenario of allScenarios) { const probes = getProbesForScenario(scenario.name); + + // Time the compression for overhead ratio + const inputTokens = scenario.messages.reduce((sum, m) => { + const len = typeof m.content === 'string' ? m.content.length : 0; + return sum + Math.ceil(len / 3.5); + }, 0); + const t0 = performance.now(); const q = analyzeQuality(scenario.messages, probes); + const elapsed = performance.now() - t0; + const overhead = computeOverheadRatio(elapsed, inputTokens); + q.overheadRatio = overhead; + qualityResults[scenario.name] = q; console.log( @@ -494,6 +507,7 @@ async function run(): Promise { String(q.negativeCompressions).padStart(6), String(q.coherenceIssues).padStart(6), fix(q.compressedQualityScore).padStart(6), + fix(overhead, 3).padStart(7), ].join(' '), ); } diff --git a/demo/index.html b/demo/index.html index 27c0171..cdc7c72 100644 --- a/demo/index.html +++ b/demo/index.html @@ -324,6 +324,34 @@ cursor: not-allowed; } + .settings select { + background: var(--bg); + border: 1px solid var(--border); + border-radius: var(--radius-sm); + color: var(--text); + padding: 4px 8px; + font-family: var(--mono); + font-size: 11px; + outline: none; + cursor: pointer; + transition: + border-color 0.2s, + box-shadow 0.2s; + } + + .settings select:focus { + border-color: var(--accent); + box-shadow: 0 0 0 3px var(--accent-glow); + } + + .settings-divider { + width: 1px; + height: 24px; + background: var(--border); + margin: 0 4px; + flex-shrink: 0; + } + /* ─── Main Panels ─── */ .panels { @@ -509,7 +537,7 @@ } .stats-bar.visible { - max-height: 60px; + max-height: 100px; padding: 10px 20px; } @@ -928,6 +956,83 @@

Context Compression Engine / demo

+ + + + +
+ + +
+ +
+ + + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ @@ -988,6 +1093,68 @@

Context Compression Engine / demo

+
V2 Features
+ +
+
depth
+
+ Compression aggressiveness. gentle (default) = standard sentence + selection. moderate = tighter budgets, +20% ratio at zero quality cost. + aggressive = entity-only stubs. auto = progressive + escalation until budget fits. +
+
+
+
relevance
+
+ Drops low-value messages to stubs. The number sets the sentence score threshold + — 3 is a good default. Messages below this get replaced with + [N messages omitted]. +
+
+
+
flow
+
+ Groups Q&A pairs, request→action chains, and corrections into compression + units. Produces summaries like Q: ... → A: .... +
+
+
+
importance
+
+ Scores messages by forward-reference density and decision content. High-importance + messages are preserved even outside the recency window. +
+
+
+
contradiction
+
+ Detects later messages that correct earlier ones. Superseded messages get compressed + with a [cce:superseded by ...] annotation. +
+
+
+
coref
+
+ Inlines entity definitions into compressed summaries when a preserved message + references an entity defined only in a compressed message. +
+
+
+
cluster
+
+ Groups consecutive messages by topic using TF-IDF similarity and compresses each + cluster as a unit. Best for long, topic-scattered conversations. +
+
+
+
strategy
+
+ Budget fitting strategy. tiered keeps recency window fixed and is 7-8x + faster than binary-search at 500+ messages. +
+
+
Output
@@ -1026,6 +1193,8 @@

Context Compression Engine / demo

+ + role: message — blank lines separate @@ -1253,6 +1422,54 @@

Context Compression Engine / demo

A: 12`, + corrections: `system: You are a backend engineer helping with infrastructure decisions. + +user: What should we use for the caching layer? + +A: Use Redis for the caching layer with a TTL of 3600 seconds for all session data and user preferences. Configure the connection pool with a maximum of 20 connections per service instance. Redis gives you data structures beyond simple key-value pairs, which is useful for leaderboards, rate limiting counters, and pub-sub messaging between services. + +user: How should we handle the connection pooling? + +A: Set up the Redis connection pool with a minimum of 5 idle connections and a maximum of 20 active connections. Use a connection timeout of 5 seconds and an idle timeout of 300 seconds. Enable TCP keepalive to detect dead connections early. The pool should use FIFO ordering so connections are reused evenly and none sit idle long enough to be dropped by network infrastructure. + +user: Actually, use Memcached instead of Redis. Redis is overkill for simple key-value session storage and the operational overhead of running a Redis cluster isn't justified for our use case. Memcached is simpler to operate and has lower memory overhead per key. + +user: What about cache invalidation? + +A: For cache invalidation with Memcached, use a TTL-based approach as the primary strategy. Set session data TTL to 3600 seconds and user preferences to 7200 seconds. For write-through invalidation, delete the cache key immediately after a database write succeeds. Use cache stampede protection by implementing a locking mechanism where only one request regenerates a stale cache entry while others wait or serve the stale value briefly. + +user: Wait, I think we should reconsider. Let's use DynamoDB DAX instead. We already run on AWS and the team has more experience with DynamoDB. DAX provides microsecond latency for reads and we wouldn't need to manage any caching infrastructure ourselves. + +user: OK so with DynamoDB DAX, how do we handle the session storage? + +A: With DynamoDB DAX, session storage becomes straightforward. Create a sessions table with user_id as the partition key and session_id as the sort key. DAX automatically caches reads, so repeated session lookups hit the cache without any application-level caching code. Set a TTL attribute on each session record for automatic expiration. For write-heavy patterns like updating last_active timestamps, use DynamoDB conditional writes to avoid overwriting concurrent updates.`, + + scattered: `system: You are a senior full-stack developer helping with a large project. + +user: The fetchData function in the API layer needs retry logic. Right now it fails silently on network errors. + +A: The fetchData function should implement exponential backoff with jitter for transient failures. Start with a 200ms base delay, double on each retry up to a maximum of 5 attempts. Add random jitter of plus or minus 20% to prevent thundering herd when multiple clients retry simultaneously. Categorize errors into transient (503, 429, network timeout) vs permanent (400, 401, 404) and only retry transient ones. + +user: The dashboard CSS is broken on mobile. The sidebar overlaps the main content below 768px. + +A: The sidebar layout needs a responsive breakpoint. At 768px and below, switch the sidebar from a fixed position column to a slide-out drawer triggered by a hamburger menu button. Use a CSS media query to change the grid layout from two columns to a single column stack. The drawer should use transform translateX for smooth animation and a semi-transparent backdrop overlay that closes the drawer on tap. + +user: The fetchData retry logic also needs a circuit breaker pattern. If a service returns 503 five times in a row, we should stop calling it for 30 seconds. + +A: Add a circuit breaker state machine to fetchData with three states: closed (normal), open (blocking requests), and half-open (testing recovery). Track failures in a sliding window of 60 seconds. When 5 failures accumulate, transition to open and reject requests immediately for 30 seconds. After the timeout, transition to half-open and allow one probe request through. If it succeeds, close the circuit. If it fails, reopen for another 30 seconds. + +user: For the dashboard mobile fix, we also need to handle the navigation tabs. They currently overflow horizontally. + +A: Convert the navigation tabs to a horizontally scrollable container on mobile. Use overflow-x auto with scroll-snap-align start on each tab for smooth snap scrolling. Hide the scrollbar with webkit-scrollbar display none while keeping the scroll functionality. Add subtle gradient fade indicators on the left and right edges to hint that more tabs are available. Each tab should have a min-width to prevent text wrapping. + +user: Back to the API — we need to add request deduplication to fetchData. If the same URL is requested twice within 100ms, return the same promise. + +A: Implement request deduplication using a Map keyed by the request URL plus a hash of the request body. When fetchData is called, check if an in-flight request exists for the same key. If yes, return the existing promise. If no, create the promise, store it in the map, and attach a finally handler that removes it from the map after completion. Set a 100ms debounce window where subsequent identical requests join the existing one rather than creating new network calls. + +user: The dashboard also needs a dark mode toggle. The current colors are hardcoded. + +A: Extract all color values into CSS custom properties on the root element. Create a data-theme attribute on the html element that switches between light and dark variable sets. Store the user preference in localStorage and check it on page load before first paint to avoid a flash of wrong theme. Use prefers-color-scheme media query as the default when no stored preference exists. The toggle button should use a smooth CSS transition on all color properties with a 200ms duration.`, + deep: `system: You are a software architecture consultant. user: Let's discuss the database schema design. We need to handle user profiles, posts, and comments with proper relationships and indexing. @@ -1373,12 +1590,29 @@

Context Compression Engine / demo

$budgetEnabled.addEventListener('change', () => { $tokenBudget.disabled = !$budgetEnabled.checked; + $budgetStrategyGroup.style.display = $budgetEnabled.checked ? 'flex' : 'none'; }); $fuzzyDedup.addEventListener('change', () => { $fuzzyThresholdGroup.style.display = $fuzzyDedup.checked ? 'flex' : 'none'; }); + // V2 controls + const $compressionDepth = document.getElementById('compressionDepth'); + const $relevanceEnabled = document.getElementById('relevanceEnabled'); + const $relevanceThreshold = document.getElementById('relevanceThreshold'); + const $conversationFlow = document.getElementById('conversationFlow'); + const $importanceScoring = document.getElementById('importanceScoring'); + const $contradictionDetection = document.getElementById('contradictionDetection'); + const $coreference = document.getElementById('coreference'); + const $semanticClustering = document.getElementById('semanticClustering'); + const $budgetStrategy = document.getElementById('budgetStrategy'); + const $budgetStrategyGroup = document.getElementById('budgetStrategyGroup'); + + $relevanceEnabled.addEventListener('change', () => { + $relevanceThreshold.disabled = !$relevanceEnabled.checked; + }); + // ── Line-level diff (LCS) ── function lineDiff(oldText, newText) { @@ -1474,8 +1708,22 @@

Context Compression Engine / demo

if ($budgetEnabled.checked) { opts.tokenBudget = parseInt($tokenBudget.value, 10); + opts.budgetStrategy = $budgetStrategy.value; } + // V2 features + if ($compressionDepth.value) { + opts.compressionDepth = $compressionDepth.value; + } + if ($relevanceEnabled.checked) { + opts.relevanceThreshold = parseInt($relevanceThreshold.value, 10); + } + if ($conversationFlow.checked) opts.conversationFlow = true; + if ($importanceScoring.checked) opts.importanceScoring = true; + if ($contradictionDetection.checked) opts.contradictionDetection = true; + if ($coreference.checked) opts.coreference = true; + if ($semanticClustering.checked) opts.semanticClustering = true; + try { const result = CCE.compress(inputMessages, opts); renderResult(result, inputMessages); @@ -1580,6 +1828,35 @@

Context Compression Engine / demo

if (result.recencyWindow != null) chips += chip('auto window', result.recencyWindow, 'neutral', i++); + // V2 quality metrics + if (c.quality_score != null) + chips += chip( + 'quality', + c.quality_score.toFixed(3), + c.quality_score >= 0.9 ? 'good' : c.quality_score >= 0.7 ? 'warn' : 'bad', + i++, + ); + if (c.entity_retention != null) + chips += chip( + 'entities', + (c.entity_retention * 100).toFixed(0) + '%', + c.entity_retention >= 0.9 ? 'good' : c.entity_retention >= 0.7 ? 'warn' : 'bad', + i++, + ); + if (c.structural_integrity != null) + chips += chip( + 'structure', + (c.structural_integrity * 100).toFixed(0) + '%', + c.structural_integrity >= 0.9 ? 'good' : 'warn', + i++, + ); + if (c.messages_relevance_dropped != null) + chips += chip('relevance dropped', c.messages_relevance_dropped, 'neutral', i++); + if (c.messages_importance_preserved != null) + chips += chip('importance kept', c.messages_importance_preserved, 'neutral', i++); + if (c.messages_contradicted != null) + chips += chip('contradicted', c.messages_contradicted, 'neutral', i++); + $stats.innerHTML = chips; $stats.classList.add('visible'); } diff --git a/src/classify.ts b/src/classify.ts index 219241d..395ec27 100644 --- a/src/classify.ts +++ b/src/classify.ts @@ -194,6 +194,7 @@ const FORCE_T0_PATTERNS: Array<{ re: RegExp; label: string }> = [ { re: /\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b/, label: 'phone' }, { re: /\b(v\d+\.\d+(\.\d+)?|version\s+\d+)\b/i, label: 'version_number' }, { re: /[a-f0-9]{40,64}/i, label: 'hash_or_sha' }, + { re: /[A-Za-z0-9+/]{40,}={0,2}/, label: 'base64_content' }, { re: /(?:\/[\w.-]+){2,}/, label: 'file_path' }, { re: /\b\d+(\.\d+){1,5}\b/, label: 'ip_or_semver' }, { re: /"[^"]{3,}"(?:\s*[,:])/, label: 'quoted_key' }, @@ -280,6 +281,7 @@ export const HARD_T0_REASONS = new Set([ 'sql_content', 'verse_pattern', 'reasoning_chain', + 'base64_content', ]); export function classifyMessage(content: string): ClassifyResult { diff --git a/tests/classify.test.ts b/tests/classify.test.ts index dc42a0c..183d9a1 100644 --- a/tests/classify.test.ts +++ b/tests/classify.test.ts @@ -825,6 +825,66 @@ describe('classifyMessage', () => { }); }); + describe('high-entropy content', () => { + it('classifies Base64 blob as T0 via base64_content', () => { + const base64 = + 'U29tZSBiYXNlNjQgZW5jb2RlZCBkYXRhIHRoYXQgaXMgbG9uZyBlbm91Z2ggdG8gZXhjZWVkIHRoZSBmb3J0eSBjaGFyYWN0ZXIgdGhyZXNob2xkIGFuZCBzaG91bGQgYmUgcHJlc2VydmVkIHZlcmJhdGlt'; + const r = classifyMessage(base64); + expect(r.decision).toBe('T0'); + expect(r.reasons).toContain('base64_content'); + }); + + it('classifies hex dump as T0 via hash_or_sha', () => { + const hexLines = Array.from({ length: 10 }, (_, i) => + Array.from({ length: 32 }, (_, j) => + ((i * 32 + j) % 256).toString(16).padStart(2, '0'), + ).join(''), + ).join('\n'); + const r = classifyMessage(hexLines); + expect(r.decision).toBe('T0'); + expect(r.reasons).toContain('hash_or_sha'); + }); + + it('classifies standalone UUID array as T3 (known gap — UUIDs are 32 hex chars, below 40-char threshold)', () => { + // UUIDs without dashes are 32 hex chars, under the 40-char hash_or_sha minimum. + // This documents a known classification gap for pure UUID blocks. + const uuids = Array.from( + { length: 20 }, + (_, i) => + `${(i * 1111).toString(16).padStart(8, '0')}-` + + `${(i * 22).toString(16).padStart(4, '0')}-4${(i * 3).toString(16).padStart(3, '0')}-` + + `a${(i * 5).toString(16).padStart(3, '0')}-${(i * 777777).toString(16).padStart(12, '0')}`, + ).join('\n'); + const r = classifyMessage(uuids); + expect(r.decision).toBe('T3'); + }); + + it('classifies mixed Base64 + prose as T0', () => { + const base64 = 'U29tZSBiYXNlNjQgZW5jb2RlZCBkYXRhIHRoYXQgaXMgbG9uZyBlbm91Z2ggdG8gZXhjZWVk'; + const r = classifyMessage( + 'Here is the encoded payload:\n\n' + base64 + '\n\nPlease decode and process it.', + ); + expect(r.decision).toBe('T0'); + expect(r.reasons).toContain('base64_content'); + }); + + it('does not false-positive on short alphanumeric words', () => { + const r = classifyMessage('The quick brown fox jumps over the lazy dog near the river bank.'); + expect(r.reasons).not.toContain('base64_content'); + }); + + it('long camelCase identifier triggers base64_content (known limitation)', () => { + // Long identifiers without spaces match [A-Za-z0-9+/]{40,}. + // In practice these also trigger other T0 signals (camelCase detection), + // so the classification outcome (T0) is correct even if the reason is imprecise. + const r = classifyMessage( + 'The class MyVeryLongClassNameThatExceedsFortyCharactersEasily extends Base.', + ); + expect(r.decision).toBe('T0'); + expect(r.reasons).toContain('base64_content'); + }); + }); + describe('performance', () => { it('completes in under 5ms', () => { const start = performance.now(); diff --git a/tests/compress.test.ts b/tests/compress.test.ts index af1e798..ed728dd 100644 --- a/tests/compress.test.ts +++ b/tests/compress.test.ts @@ -3241,4 +3241,34 @@ describe('compression decision audit trail (trace)', () => { expect(result.compression.messages_compressed).toBeGreaterThan(0); }); }); + + describe('high-entropy content preservation', () => { + it('preserves hex dump verbatim in output', () => { + const hexDump = Array.from({ length: 10 }, (_, i) => + Array.from({ length: 32 }, (_, j) => + ((i * 32 + j) % 256).toString(16).padStart(2, '0'), + ).join(''), + ).join('\n'); + const messages: Message[] = [ + msg({ id: '1', index: 0, role: 'user', content: 'Analyze this hex dump:\n\n' + hexDump }), + msg({ id: '2', index: 1, role: 'assistant', content: 'I see the hex data.' }), + ]; + const result = compress(messages, { recencyWindow: 0 }); + // Hex dump should survive — classified as T0 via hash_or_sha + const allContent = result.messages.map((m) => m.content).join('\n'); + expect(allContent).toContain(hexDump.slice(0, 64)); + }); + + it('preserves Base64 blob in output', () => { + const base64 = + 'U29tZSBiYXNlNjQgZW5jb2RlZCBkYXRhIHRoYXQgaXMgbG9uZyBlbm91Z2ggdG8gZXhjZWVkIHRoZSBmb3J0eSBjaGFyYWN0ZXIgdGhyZXNob2xkIGFuZCBzaG91bGQgYmUgcHJlc2VydmVkIHZlcmJhdGlt'; + const messages: Message[] = [ + msg({ id: '1', index: 0, role: 'user', content: 'Here is the cert:\n\n' + base64 }), + msg({ id: '2', index: 1, role: 'assistant', content: 'Certificate received.' }), + ]; + const result = compress(messages, { recencyWindow: 0 }); + const allContent = result.messages.map((m) => m.content).join('\n'); + expect(allContent).toContain(base64); + }); + }); });