@@ -401,4 +401,208 @@ function defineBinlogStreamTests(factory: storage.TestStorageFactory) {
401401 ] ) ;
402402 }
403403 } ) ;
404+
405+ test ( 'Snapshot filter - replicate only filtered rows' , async ( ) => {
406+ await using context = await BinlogStreamTestContext . open ( factory ) ;
407+ const { connectionManager } = context ;
408+ await context . updateSyncRules ( `
409+ bucket_definitions:
410+ active_users:
411+ data:
412+ - SELECT id, name, status FROM "users"
413+ source_tables:
414+ users:
415+ snapshot_filter: "status = 'active'"` ) ;
416+
417+ await connectionManager . query (
418+ `CREATE TABLE users (id CHAR(36) PRIMARY KEY, name TEXT, status VARCHAR(20))`
419+ ) ;
420+
421+ // Insert rows before snapshot
422+ const activeId = uuid ( ) ;
423+ const inactiveId = uuid ( ) ;
424+ await connectionManager . query (
425+ `INSERT INTO users(id, name, status) VALUES('${ activeId } ', 'Active User', 'active')`
426+ ) ;
427+ await connectionManager . query (
428+ `INSERT INTO users(id, name, status) VALUES('${ inactiveId } ', 'Inactive User', 'inactive')`
429+ ) ;
430+
431+ await context . replicateSnapshot ( ) ;
432+
433+ const data = await context . getBucketData ( 'active_users[]' ) ;
434+
435+ // Should only have the active user, not the inactive one
436+ expect ( data ) . toMatchObject ( [ putOp ( 'users' , { id : activeId , name : 'Active User' , status : 'active' } ) ] ) ;
437+ expect ( data . length ) . toBe ( 1 ) ;
438+ } ) ;
439+
440+ test ( 'Snapshot filter - ORs multiple bucket filters' , async ( ) => {
441+ await using context = await BinlogStreamTestContext . open ( factory ) ;
442+ const { connectionManager } = context ;
443+ await context . updateSyncRules ( `
444+ bucket_definitions:
445+ active_users:
446+ data:
447+ - SELECT id, name, status FROM "users" WHERE status = 'active'
448+ source_tables:
449+ users:
450+ snapshot_filter: "status = 'active'"
451+
452+ admin_users:
453+ data:
454+ - SELECT id, name, is_admin FROM "users" WHERE is_admin = true
455+ source_tables:
456+ users:
457+ snapshot_filter: "is_admin = true"` ) ;
458+
459+ await connectionManager . query (
460+ `CREATE TABLE users (id CHAR(36) PRIMARY KEY, name TEXT, status VARCHAR(20), is_admin BOOLEAN)`
461+ ) ;
462+
463+ // Insert test data
464+ const activeUserId = uuid ( ) ;
465+ const adminUserId = uuid ( ) ;
466+ const regularUserId = uuid ( ) ;
467+
468+ await connectionManager . query (
469+ `INSERT INTO users(id, name, status, is_admin) VALUES('${ activeUserId } ', 'Active User', 'active', false)`
470+ ) ;
471+ await connectionManager . query (
472+ `INSERT INTO users(id, name, status, is_admin) VALUES('${ adminUserId } ', 'Admin User', 'inactive', true)`
473+ ) ;
474+ await connectionManager . query (
475+ `INSERT INTO users(id, name, status, is_admin) VALUES('${ regularUserId } ', 'Regular User', 'inactive', false)`
476+ ) ;
477+
478+ await context . replicateSnapshot ( ) ;
479+
480+ const activeData = await context . getBucketData ( 'active_users[]' ) ;
481+ const adminData = await context . getBucketData ( 'admin_users[]' ) ;
482+
483+ // Active bucket should have the active user
484+ expect ( activeData ) . toMatchObject ( [
485+ putOp ( 'users' , { id : activeUserId , name : 'Active User' , status : 'active' , is_admin : 0n } )
486+ ] ) ;
487+
488+ // Admin bucket should have the admin user
489+ expect ( adminData ) . toMatchObject ( [
490+ putOp ( 'users' , { id : adminUserId , name : 'Admin User' , status : 'inactive' , is_admin : 1n } )
491+ ] ) ;
492+
493+ // Regular user should not be in either bucket (filtered out by snapshot filter)
494+ } ) ;
495+
496+ test ( 'Snapshot filter - CDC changes only affect filtered rows' , async ( ) => {
497+ await using context = await BinlogStreamTestContext . open ( factory ) ;
498+ const { connectionManager } = context ;
499+ await context . updateSyncRules ( `
500+ bucket_definitions:
501+ active_users:
502+ data:
503+ - SELECT id, name, status FROM "users" WHERE status = 'active'
504+ source_tables:
505+ users:
506+ snapshot_filter: "status = 'active'"` ) ;
507+
508+ await connectionManager . query (
509+ `CREATE TABLE users (id CHAR(36) PRIMARY KEY, name TEXT, status VARCHAR(20))`
510+ ) ;
511+
512+ // Insert an active user before snapshot
513+ const activeId = uuid ( ) ;
514+ await connectionManager . query (
515+ `INSERT INTO users(id, name, status) VALUES('${ activeId } ', 'Active User', 'active')`
516+ ) ;
517+
518+ await context . replicateSnapshot ( ) ;
519+ await context . startStreaming ( ) ;
520+
521+ // Insert an inactive user - should not appear in bucket
522+ const inactiveId = uuid ( ) ;
523+ await connectionManager . query (
524+ `INSERT INTO users(id, name, status) VALUES('${ inactiveId } ', 'Inactive User', 'inactive')`
525+ ) ;
526+
527+ // Update the active user - should appear in bucket
528+ await connectionManager . query ( `UPDATE users SET name = 'Updated Active' WHERE id = '${ activeId } '` ) ;
529+
530+ const data = await context . getBucketData ( 'active_users[]' ) ;
531+
532+ // Should only have the active user with updated name
533+ expect ( data ) . toMatchObject ( [ putOp ( 'users' , { id : activeId , name : 'Updated Active' , status : 'active' } ) ] ) ;
534+ expect ( data . length ) . toBe ( 1 ) ;
535+ } ) ;
536+
537+ test ( 'Snapshot filter - complex WHERE clause' , async ( ) => {
538+ await using context = await BinlogStreamTestContext . open ( factory ) ;
539+ const { connectionManager } = context ;
540+ await context . updateSyncRules ( `
541+ bucket_definitions:
542+ recent_active_users:
543+ data:
544+ - SELECT id, name, created_at FROM "users"
545+ source_tables:
546+ users:
547+ snapshot_filter: "created_at > DATE_SUB(NOW(), INTERVAL 7 DAY) AND status = 'active'"` ) ;
548+
549+ await connectionManager . query (
550+ `CREATE TABLE users (id CHAR(36) PRIMARY KEY, name TEXT, status VARCHAR(20), created_at DATETIME)`
551+ ) ;
552+
553+ // Insert recent active user
554+ const recentActiveId = uuid ( ) ;
555+ await connectionManager . query (
556+ `INSERT INTO users(id, name, status, created_at) VALUES('${ recentActiveId } ', 'Recent Active', 'active', NOW())`
557+ ) ;
558+
559+ // Insert old active user
560+ const oldActiveId = uuid ( ) ;
561+ await connectionManager . query (
562+ `INSERT INTO users(id, name, status, created_at) VALUES('${ oldActiveId } ', 'Old Active', 'active', DATE_SUB(NOW(), INTERVAL 30 DAY))`
563+ ) ;
564+
565+ // Insert recent inactive user
566+ const recentInactiveId = uuid ( ) ;
567+ await connectionManager . query (
568+ `INSERT INTO users(id, name, status, created_at) VALUES('${ recentInactiveId } ', 'Recent Inactive', 'inactive', NOW())`
569+ ) ;
570+
571+ await context . replicateSnapshot ( ) ;
572+
573+ const data = await context . getBucketData ( 'recent_active_users[]' ) ;
574+
575+ // Should only have the recent active user
576+ expect ( data . length ) . toBe ( 1 ) ;
577+ expect ( data [ 0 ] ) . toMatchObject ( {
578+ op : 'PUT' ,
579+ object_type : 'users' ,
580+ object_id : recentActiveId
581+ } ) ;
582+ } ) ;
583+
584+ test ( 'Snapshot filter - no filter means all rows replicated' , async ( ) => {
585+ await using context = await BinlogStreamTestContext . open ( factory ) ;
586+ const { connectionManager } = context ;
587+ await context . updateSyncRules ( `
588+ bucket_definitions:
589+ all_users:
590+ data:
591+ - SELECT id, name FROM "users"` ) ;
592+
593+ await connectionManager . query ( `CREATE TABLE users (id CHAR(36) PRIMARY KEY, name TEXT)` ) ;
594+
595+ // Insert multiple users
596+ const user1Id = uuid ( ) ;
597+ const user2Id = uuid ( ) ;
598+ await connectionManager . query ( `INSERT INTO users(id, name) VALUES('${ user1Id } ', 'User 1')` ) ;
599+ await connectionManager . query ( `INSERT INTO users(id, name) VALUES('${ user2Id } ', 'User 2')` ) ;
600+
601+ await context . replicateSnapshot ( ) ;
602+
603+ const data = await context . getBucketData ( 'all_users[]' ) ;
604+
605+ // Should have both users when no filter is specified
606+ expect ( data . length ) . toBe ( 2 ) ;
607+ } ) ;
404608}
0 commit comments