Skip to content

Commit 5af0e8c

Browse files
mbayntonclaude
andcommitted
Fix stream processor failing when Central Directory is encountered
The downloader was failing with "Failed to parse frame at offset X" when processing archives where a download part contains the Central Directory. The stream processor didn't recognize the CD header signature (0x02014b50) and would error when encountering it. Changes: - Add FRAME_ZIP_CENTRAL_DIRECTORY to frame type enum - Detect Central Directory header signature in parse_next_frame() - Handle CD in STATE_EXPECT_LOCAL_HEADER and STATE_PROCESSING_FRAMES by transitioning to STATE_DONE - Add validation that compares actual CD offset with expected offset from EOCD, printing a warning if they differ - Enhance error messages to include part index and archive offset - Add unit tests for CD detection in frame parser and stream processor 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent acb8b93 commit 5af0e8c

5 files changed

Lines changed: 213 additions & 2 deletions

File tree

include/stream_processor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ struct frame_info {
149149
FRAME_BURST_START_OF_PART,
150150
FRAME_ZIP_LOCAL_HEADER,
151151
FRAME_ZIP_DATA_DESCRIPTOR,
152+
FRAME_ZIP_CENTRAL_DIRECTORY,
152153
FRAME_UNKNOWN
153154
} type;
154155

src/downloader/frame_parser.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ int parse_next_frame(const uint8_t *buffer, size_t buffer_len, struct frame_info
3939
return STREAM_PROC_SUCCESS;
4040
}
4141

42+
if (magic == ZIP_CENTRAL_DIR_HEADER_SIG) {
43+
// Central Directory reached - signals end of file data in this part
44+
info->type = FRAME_ZIP_CENTRAL_DIRECTORY;
45+
info->frame_size = 0; // Not consumed - just signals end
46+
return STREAM_PROC_SUCCESS;
47+
}
48+
4249
if (magic == ZSTD_MAGIC_NUMBER) {
4350
info->type = FRAME_ZSTD_COMPRESSED;
4451

src/downloader/stream_processor.c

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,11 @@ int part_processor_process_data(
236236
}
237237
if (rc != STREAM_PROC_SUCCESS) {
238238
snprintf(state->error_message, sizeof(state->error_message),
239-
"Failed to parse frame at offset %zu", offset);
239+
"Failed to parse frame at offset %zu (part %u, part_start=%llu, bytes_processed=%llu, archive_offset=%llu)",
240+
offset, state->part_index,
241+
(unsigned long long)state->part_start_offset,
242+
(unsigned long long)state->bytes_processed,
243+
(unsigned long long)(state->part_start_offset + state->bytes_processed + offset));
240244
state->state = STATE_ERROR;
241245
state->error_code = rc;
242246
return rc;
@@ -249,6 +253,19 @@ int part_processor_process_data(
249253
continue;
250254
}
251255

256+
// Central Directory reached - stop processing this part
257+
if (info.type == FRAME_ZIP_CENTRAL_DIRECTORY) {
258+
// Validate CD location matches expected offset
259+
uint64_t actual_cd_offset = state->part_start_offset + state->bytes_processed + offset;
260+
if (actual_cd_offset != state->cd_result->central_dir_offset) {
261+
fprintf(stderr, "Warning: Central Directory found at offset %llu, expected %llu\n",
262+
(unsigned long long)actual_cd_offset,
263+
(unsigned long long)state->cd_result->central_dir_offset);
264+
}
265+
state->state = STATE_DONE;
266+
break;
267+
}
268+
252269
// Should be local header
253270
if (info.type != FRAME_ZIP_LOCAL_HEADER) {
254271
snprintf(state->error_message, sizeof(state->error_message),
@@ -296,7 +313,11 @@ int part_processor_process_data(
296313
}
297314
if (rc != STREAM_PROC_SUCCESS) {
298315
snprintf(state->error_message, sizeof(state->error_message),
299-
"Failed to parse frame at offset %zu", offset);
316+
"Failed to parse frame at offset %zu (part %u, part_start=%llu, bytes_processed=%llu, archive_offset=%llu)",
317+
offset, state->part_index,
318+
(unsigned long long)state->part_start_offset,
319+
(unsigned long long)state->bytes_processed,
320+
(unsigned long long)(state->part_start_offset + state->bytes_processed + offset));
300321
state->state = STATE_ERROR;
301322
state->error_code = rc;
302323
return rc;
@@ -380,6 +401,23 @@ int part_processor_process_data(
380401
// Don't advance offset - re-parse in new state
381402
break;
382403

404+
case FRAME_ZIP_CENTRAL_DIRECTORY: {
405+
// Central Directory reached - stop processing this part
406+
// Validate CD location matches expected offset
407+
uint64_t actual_cd_offset = state->part_start_offset + state->bytes_processed + offset;
408+
if (actual_cd_offset != state->cd_result->central_dir_offset) {
409+
fprintf(stderr, "Warning: Central Directory found at offset %llu, expected %llu\n",
410+
(unsigned long long)actual_cd_offset,
411+
(unsigned long long)state->cd_result->central_dir_offset);
412+
}
413+
rc = close_output_file(state);
414+
if (rc != STREAM_PROC_SUCCESS) {
415+
return rc;
416+
}
417+
state->state = STATE_DONE;
418+
break;
419+
}
420+
383421
default:
384422
snprintf(state->error_message, sizeof(state->error_message),
385423
"Unknown frame type %d", info.type);

tests/unit/test_frame_parser.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,22 @@ void test_parse_zip_data_descriptor(void) {
187187
TEST_ASSERT_EQUAL(16, info.frame_size);
188188
}
189189

190+
void test_parse_zip_central_directory(void) {
191+
uint8_t buffer[64];
192+
// Central Directory header signature: 0x02014b50
193+
uint32_t sig = 0x02014b50;
194+
memcpy(buffer, &sig, 4);
195+
// Fill rest with zeros (minimum CD header is 46 bytes but we only need 4 for detection)
196+
memset(buffer + 4, 0, 60);
197+
198+
struct frame_info info;
199+
int rc = parse_next_frame(buffer, 64, &info);
200+
201+
TEST_ASSERT_EQUAL(STREAM_PROC_SUCCESS, rc);
202+
TEST_ASSERT_EQUAL(FRAME_ZIP_CENTRAL_DIRECTORY, info.type);
203+
TEST_ASSERT_EQUAL(0, info.frame_size); // frame_size is 0 - signals end, not consumed
204+
}
205+
190206
void test_parse_zstd_frame(void) {
191207
uint8_t buffer[64];
192208
size_t uncompressed_size = 1000;
@@ -448,6 +464,7 @@ int main(void) {
448464
RUN_TEST(test_parse_zip_local_header);
449465
RUN_TEST(test_parse_zip_local_header_long_filename);
450466
RUN_TEST(test_parse_zip_data_descriptor);
467+
RUN_TEST(test_parse_zip_central_directory);
451468
RUN_TEST(test_parse_zstd_frame);
452469
RUN_TEST(test_parse_burst_padding);
453470
RUN_TEST(test_parse_burst_start_of_part);

tests/unit/test_stream_processor.c

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,6 +1283,148 @@ void test_split_at_multiple_boundaries(void) {
12831283
free_test_cd_result(cd);
12841284
}
12851285

1286+
// =============================================================================
1287+
// Central Directory Detection Tests
1288+
// =============================================================================
1289+
1290+
// Helper to create a Central Directory header signature
1291+
static size_t create_central_dir_header(uint8_t *buffer) {
1292+
uint32_t sig = 0x02014b50; // ZIP_CENTRAL_DIR_HEADER_SIG
1293+
memcpy(buffer, &sig, 4);
1294+
return 4;
1295+
}
1296+
1297+
void test_central_directory_at_expected_offset(void) {
1298+
uint8_t buffer[1024];
1299+
size_t offset = 0;
1300+
1301+
// Create: local header + zstd frame + data descriptor + CD header
1302+
offset += create_local_header(buffer + offset, "test.txt");
1303+
size_t zstd_size = create_test_zstd_frame(buffer + offset, sizeof(buffer) - offset, 100);
1304+
offset += zstd_size;
1305+
offset += create_data_descriptor(buffer + offset, 0, (uint32_t)zstd_size, 100);
1306+
1307+
// Record where CD starts
1308+
size_t cd_offset = offset;
1309+
1310+
// Add CD header signature
1311+
offset += create_central_dir_header(buffer + offset);
1312+
1313+
// Create cd_result with matching central_dir_offset
1314+
struct central_dir_parse_result *cd = create_test_cd_result("test.txt", 0, zstd_size, 100);
1315+
cd->central_dir_offset = cd_offset; // Set expected CD location
1316+
1317+
struct part_processor_state *state = part_processor_create(0, cd, test_output_dir, 8 * 1024 * 1024);
1318+
1319+
int rc = part_processor_process_data(state, buffer, offset);
1320+
TEST_ASSERT_EQUAL(STREAM_PROC_SUCCESS, rc);
1321+
1322+
rc = part_processor_finalize(state);
1323+
TEST_ASSERT_EQUAL(STREAM_PROC_SUCCESS, rc);
1324+
1325+
part_processor_destroy(state);
1326+
free_test_cd_result(cd);
1327+
}
1328+
1329+
void test_central_directory_before_expected_offset(void) {
1330+
uint8_t buffer[1024];
1331+
size_t offset = 0;
1332+
1333+
// Create: local header + zstd frame + data descriptor + CD header
1334+
offset += create_local_header(buffer + offset, "test.txt");
1335+
size_t zstd_size = create_test_zstd_frame(buffer + offset, sizeof(buffer) - offset, 100);
1336+
offset += zstd_size;
1337+
offset += create_data_descriptor(buffer + offset, 0, (uint32_t)zstd_size, 100);
1338+
1339+
// Record where CD starts
1340+
size_t cd_offset = offset;
1341+
1342+
// Add CD header signature
1343+
offset += create_central_dir_header(buffer + offset);
1344+
1345+
// Create cd_result with central_dir_offset set LARGER than actual
1346+
// (CD found earlier than expected - e.g., truncated archive)
1347+
struct central_dir_parse_result *cd = create_test_cd_result("test.txt", 0, zstd_size, 100);
1348+
cd->central_dir_offset = cd_offset + 1000; // Expected later than actual
1349+
1350+
struct part_processor_state *state = part_processor_create(0, cd, test_output_dir, 8 * 1024 * 1024);
1351+
1352+
// Should still succeed (warning printed to stderr but no error)
1353+
int rc = part_processor_process_data(state, buffer, offset);
1354+
TEST_ASSERT_EQUAL(STREAM_PROC_SUCCESS, rc);
1355+
1356+
rc = part_processor_finalize(state);
1357+
TEST_ASSERT_EQUAL(STREAM_PROC_SUCCESS, rc);
1358+
1359+
part_processor_destroy(state);
1360+
free_test_cd_result(cd);
1361+
}
1362+
1363+
void test_central_directory_after_expected_offset(void) {
1364+
uint8_t buffer[1024];
1365+
size_t offset = 0;
1366+
1367+
// Create: local header + zstd frame + data descriptor + CD header
1368+
offset += create_local_header(buffer + offset, "test.txt");
1369+
size_t zstd_size = create_test_zstd_frame(buffer + offset, sizeof(buffer) - offset, 100);
1370+
offset += zstd_size;
1371+
offset += create_data_descriptor(buffer + offset, 0, (uint32_t)zstd_size, 100);
1372+
1373+
// Record where CD starts
1374+
size_t cd_offset = offset;
1375+
1376+
// Add CD header signature
1377+
offset += create_central_dir_header(buffer + offset);
1378+
1379+
// Create cd_result with central_dir_offset set SMALLER than actual
1380+
// (CD found later than expected - e.g., extra padding before CD)
1381+
struct central_dir_parse_result *cd = create_test_cd_result("test.txt", 0, zstd_size, 100);
1382+
cd->central_dir_offset = cd_offset > 100 ? cd_offset - 100 : 0; // Expected earlier than actual
1383+
1384+
struct part_processor_state *state = part_processor_create(0, cd, test_output_dir, 8 * 1024 * 1024);
1385+
1386+
// Should still succeed (warning printed to stderr but no error)
1387+
int rc = part_processor_process_data(state, buffer, offset);
1388+
TEST_ASSERT_EQUAL(STREAM_PROC_SUCCESS, rc);
1389+
1390+
rc = part_processor_finalize(state);
1391+
TEST_ASSERT_EQUAL(STREAM_PROC_SUCCESS, rc);
1392+
1393+
part_processor_destroy(state);
1394+
free_test_cd_result(cd);
1395+
}
1396+
1397+
void test_central_directory_in_processing_frames_state(void) {
1398+
// Test CD detection when in STATE_PROCESSING_FRAMES (with open file)
1399+
uint8_t buffer[1024];
1400+
size_t offset = 0;
1401+
1402+
// Create: local header + zstd frame (no data descriptor) + CD header
1403+
// This tests the FRAME_ZIP_CENTRAL_DIRECTORY case in STATE_PROCESSING_FRAMES
1404+
offset += create_local_header(buffer + offset, "test.txt");
1405+
size_t zstd_size = create_test_zstd_frame(buffer + offset, sizeof(buffer) - offset, 100);
1406+
offset += zstd_size;
1407+
1408+
// No data descriptor - go directly to CD
1409+
size_t cd_offset = offset;
1410+
offset += create_central_dir_header(buffer + offset);
1411+
1412+
// Create cd_result - file uses no data descriptor (for this test)
1413+
struct central_dir_parse_result *cd = create_test_cd_result("test.txt", 0, zstd_size, 100);
1414+
cd->central_dir_offset = cd_offset;
1415+
1416+
struct part_processor_state *state = part_processor_create(0, cd, test_output_dir, 8 * 1024 * 1024);
1417+
1418+
int rc = part_processor_process_data(state, buffer, offset);
1419+
TEST_ASSERT_EQUAL(STREAM_PROC_SUCCESS, rc);
1420+
1421+
rc = part_processor_finalize(state);
1422+
TEST_ASSERT_EQUAL(STREAM_PROC_SUCCESS, rc);
1423+
1424+
part_processor_destroy(state);
1425+
free_test_cd_result(cd);
1426+
}
1427+
12861428
int main(void) {
12871429
UNITY_BEGIN();
12881430

@@ -1330,5 +1472,11 @@ int main(void) {
13301472
RUN_TEST(test_split_mid_local_header_variable_fields);
13311473
RUN_TEST(test_split_at_multiple_boundaries);
13321474

1475+
// Central Directory detection tests
1476+
RUN_TEST(test_central_directory_at_expected_offset);
1477+
RUN_TEST(test_central_directory_before_expected_offset);
1478+
RUN_TEST(test_central_directory_after_expected_offset);
1479+
RUN_TEST(test_central_directory_in_processing_frames_state);
1480+
13331481
return UNITY_END();
13341482
}

0 commit comments

Comments
 (0)