diff --git a/Input/input_berlinmod.csv b/Input/input_berlinmod.csv new file mode 100644 index 0000000000..753a68124b --- /dev/null +++ b/Input/input_berlinmod.csv @@ -0,0 +1,21 @@ +1735711200,100,4.3517,50.8503 +1735711200,300,4.2000,50.7500 +1735711201,200,4.3060,50.8270 +1735711202,100,4.3517,50.8503 +1735711202,300,4.2000,50.7500 +1735711203,200,4.3060,50.8270 +1735711204,100,4.3517,50.8503 +1735711204,300,4.2000,50.7500 +1735711205,200,4.3060,50.8270 +1735711206,100,4.3517,50.8503 +1735711206,300,4.2000,50.7500 +1735711207,200,4.3060,50.8270 +1735711208,100,4.3517,50.8503 +1735711208,300,4.2000,50.7500 +1735711209,200,4.3060,50.8270 +1735711210,100,4.3517,50.8503 +1735711210,300,4.2000,50.7500 +1735711211,200,4.3060,50.8270 +1735711212,100,4.3517,50.8503 +1735711212,300,4.2000,50.7500 +1735711213,200,4.3060,50.8270 diff --git a/Queries/berlinmod/q1_continuous.yaml b/Queries/berlinmod/q1_continuous.yaml new file mode 100644 index 0000000000..49786049e8 --- /dev/null +++ b/Queries/berlinmod/q1_continuous.yaml @@ -0,0 +1,47 @@ +# BerlinMOD-Q1 — continuous form +# "Which vehicles have appeared in the stream?" +# Per 1-second sliding bucket: emit (start, end, vehicle_id, event-count-in-bucket). +# Reading N rows over consecutive buckets enumerates the distinct-vehicles-seen set. + +query: | + SELECT start, + end, + vehicle_id, + COUNT(time_utc) AS events + FROM berlinmod_stream + GROUP BY vehicle_id + WINDOW SLIDING(time_utc, SIZE 1 SEC, ADVANCE BY 1 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$EVENTS, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q1_continuous.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q1_snapshot.yaml b/Queries/berlinmod/q1_snapshot.yaml new file mode 100644 index 0000000000..4fa9c05d63 --- /dev/null +++ b/Queries/berlinmod/q1_snapshot.yaml @@ -0,0 +1,46 @@ +# BerlinMOD-Q1 — snapshot form +# "At each 5-second tick, list of distinct vehicles seen in the tick window." +# Streaming approximation of the batch BerlinMOD-Q1 snapshot at time T. + +query: | + SELECT start, + end, + vehicle_id, + COUNT(time_utc) AS events + FROM berlinmod_stream + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 5 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$EVENTS, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q1_snapshot.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q1_windowed.yaml b/Queries/berlinmod/q1_windowed.yaml new file mode 100644 index 0000000000..2d25214d24 --- /dev/null +++ b/Queries/berlinmod/q1_windowed.yaml @@ -0,0 +1,46 @@ +# BerlinMOD-Q1 — windowed form +# "Per 10-second tumbling window, distinct vehicles seen." +# Emits one row per (window, vehicle) seen; reading N rows per window = distinctCount. + +query: | + SELECT start, + end, + vehicle_id, + COUNT(time_utc) AS events + FROM berlinmod_stream + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 10 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$EVENTS, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q1_windowed.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q2_continuous.yaml b/Queries/berlinmod/q2_continuous.yaml new file mode 100644 index 0000000000..1d89420d19 --- /dev/null +++ b/Queries/berlinmod/q2_continuous.yaml @@ -0,0 +1,44 @@ +# BerlinMOD-Q2 — continuous form +# "Where is vehicle X (= 200) right now?" +# Per 1-second sliding bucket, emit a trajectory snippet for vehicle X. + +query: | + SELECT start, + end, + TEMPORAL_SEQUENCE(gps_lon, gps_lat, time_utc) AS trajectory + FROM berlinmod_stream + WHERE vehicle_id = UINT64(200) + WINDOW SLIDING(time_utc, SIZE 1 SEC, ADVANCE BY 1 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$TRAJECTORY, type: VARSIZED } + config: + file_path: "/workspace/Output/output_berlinmod_q2_continuous.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q2_snapshot.yaml b/Queries/berlinmod/q2_snapshot.yaml new file mode 100644 index 0000000000..af0946bb57 --- /dev/null +++ b/Queries/berlinmod/q2_snapshot.yaml @@ -0,0 +1,43 @@ +# BerlinMOD-Q2 — snapshot form +# "At each 5-second tick, snapshot of vehicle X's (= 200) trajectory in the tick." + +query: | + SELECT start, + end, + TEMPORAL_SEQUENCE(gps_lon, gps_lat, time_utc) AS trajectory + FROM berlinmod_stream + WHERE vehicle_id = UINT64(200) + WINDOW TUMBLING(time_utc, SIZE 5 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$TRAJECTORY, type: VARSIZED } + config: + file_path: "/workspace/Output/output_berlinmod_q2_snapshot.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q2_windowed.yaml b/Queries/berlinmod/q2_windowed.yaml new file mode 100644 index 0000000000..d2ae83bc8c --- /dev/null +++ b/Queries/berlinmod/q2_windowed.yaml @@ -0,0 +1,43 @@ +# BerlinMOD-Q2 — windowed form +# "Per 10-second tumbling window, trajectory of vehicle X (= 200)." + +query: | + SELECT start, + end, + TEMPORAL_SEQUENCE(gps_lon, gps_lat, time_utc) AS trajectory + FROM berlinmod_stream + WHERE vehicle_id = UINT64(200) + WINDOW TUMBLING(time_utc, SIZE 10 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$TRAJECTORY, type: VARSIZED } + config: + file_path: "/workspace/Output/output_berlinmod_q2_windowed.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q3_continuous.yaml b/Queries/berlinmod/q3_continuous.yaml new file mode 100644 index 0000000000..bfae2d7c81 --- /dev/null +++ b/Queries/berlinmod/q3_continuous.yaml @@ -0,0 +1,49 @@ +# BerlinMOD-Q3 — continuous form +# "Vehicles within 5 km of Brussels city centre, right now." +# Per 1-second sliding bucket, emit (start, end, vehicle_id) for events near P. + +query: | + SELECT start, + end, + vehicle_id + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.3517 50.8503)', + FLOAT64(5000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW SLIDING(time_utc, SIZE 1 SEC, ADVANCE BY 1 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q3_continuous.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q3_snapshot.yaml b/Queries/berlinmod/q3_snapshot.yaml new file mode 100644 index 0000000000..673373d1ea --- /dev/null +++ b/Queries/berlinmod/q3_snapshot.yaml @@ -0,0 +1,50 @@ +# BerlinMOD-Q3 — snapshot form +# "At each 5-second tick, distinct vehicles within 5 km of P." + +query: | + SELECT start, + end, + vehicle_id, + COUNT(time_utc) AS events_near_p + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.3517 50.8503)', + FLOAT64(5000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 5 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$EVENTS_NEAR_P, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q3_snapshot.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q3_windowed.yaml b/Queries/berlinmod/q3_windowed.yaml new file mode 100644 index 0000000000..3d54f1aa75 --- /dev/null +++ b/Queries/berlinmod/q3_windowed.yaml @@ -0,0 +1,50 @@ +# BerlinMOD-Q3 — windowed form +# "Per 10-second tumbling window, distinct vehicles within 5 km of P." + +query: | + SELECT start, + end, + vehicle_id, + COUNT(time_utc) AS events_near_p + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.3517 50.8503)', + FLOAT64(5000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 10 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$EVENTS_NEAR_P, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q3_windowed.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q4_continuous.yaml b/Queries/berlinmod/q4_continuous.yaml new file mode 100644 index 0000000000..03b1e852e9 --- /dev/null +++ b/Queries/berlinmod/q4_continuous.yaml @@ -0,0 +1,49 @@ +# BerlinMOD-Q4 — continuous form +# "Vehicles currently inside region R (Brussels centre rectangle)." +# R encoded as polygon; edwithin with d=0 ≡ "inside the polygon". + +query: | + SELECT start, + end, + vehicle_id + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POLYGON((4.30 50.84, 4.36 50.84, 4.36 50.86, 4.30 50.86, 4.30 50.84))', + FLOAT64(0.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW SLIDING(time_utc, SIZE 1 SEC, ADVANCE BY 1 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q4_continuous.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q4_snapshot.yaml b/Queries/berlinmod/q4_snapshot.yaml new file mode 100644 index 0000000000..f9042070b1 --- /dev/null +++ b/Queries/berlinmod/q4_snapshot.yaml @@ -0,0 +1,50 @@ +# BerlinMOD-Q4 — snapshot form +# "At each 5-second tick, distinct vehicles inside region R." + +query: | + SELECT start, + end, + vehicle_id, + COUNT(time_utc) AS events_in_r + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POLYGON((4.30 50.84, 4.36 50.84, 4.36 50.86, 4.30 50.86, 4.30 50.84))', + FLOAT64(0.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 5 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$EVENTS_IN_R, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q4_snapshot.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q4_windowed.yaml b/Queries/berlinmod/q4_windowed.yaml new file mode 100644 index 0000000000..17162eafbb --- /dev/null +++ b/Queries/berlinmod/q4_windowed.yaml @@ -0,0 +1,51 @@ +# BerlinMOD-Q4 — windowed form +# "Per 10-second tumbling window, distinct vehicles inside region R." +# Intra-window scoping: a vehicle present inside R during the window is reported. + +query: | + SELECT start, + end, + vehicle_id, + COUNT(time_utc) AS events_in_r + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POLYGON((4.30 50.84, 4.36 50.84, 4.36 50.86, 4.30 50.86, 4.30 50.84))', + FLOAT64(0.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 10 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$EVENTS_IN_R, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q4_windowed.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q5_continuous.yaml b/Queries/berlinmod/q5_continuous.yaml new file mode 100644 index 0000000000..33a6eedd82 --- /dev/null +++ b/Queries/berlinmod/q5_continuous.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q5 — continuous form (PARTIAL) +# "Pairs of vehicles meeting near P." NebulaStream's SQL has no stream-self-join, +# so this YAML emits the per-window TEMPORAL_SEQUENCE for each vehicle near P. +# Consumer joins the per-vehicle trajectories to compute pair distances and decide +# meeting. Full BerlinMOD-Q5 semantics require this consumer-side post-processing. + +query: | + SELECT start, + end, + vehicle_id, + TEMPORAL_SEQUENCE(gps_lon, gps_lat, time_utc) AS trajectory + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.3517 50.8503)', + FLOAT64(5000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW SLIDING(time_utc, SIZE 1 SEC, ADVANCE BY 1 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$TRAJECTORY, type: VARSIZED } + config: + file_path: "/workspace/Output/output_berlinmod_q5_continuous.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q5_snapshot.yaml b/Queries/berlinmod/q5_snapshot.yaml new file mode 100644 index 0000000000..232bf75e36 --- /dev/null +++ b/Queries/berlinmod/q5_snapshot.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q5 — snapshot form (PARTIAL) +# "Pairs of vehicles meeting near P." NebulaStream's SQL has no stream-self-join, +# so this YAML emits the per-window TEMPORAL_SEQUENCE for each vehicle near P. +# Consumer joins the per-vehicle trajectories to compute pair distances and decide +# meeting. Full BerlinMOD-Q5 semantics require this consumer-side post-processing. + +query: | + SELECT start, + end, + vehicle_id, + TEMPORAL_SEQUENCE(gps_lon, gps_lat, time_utc) AS trajectory + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.3517 50.8503)', + FLOAT64(5000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 5 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$TRAJECTORY, type: VARSIZED } + config: + file_path: "/workspace/Output/output_berlinmod_q5_snapshot.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q5_windowed.yaml b/Queries/berlinmod/q5_windowed.yaml new file mode 100644 index 0000000000..ffe98d54eb --- /dev/null +++ b/Queries/berlinmod/q5_windowed.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q5 — windowed form (PARTIAL) +# "Pairs of vehicles meeting near P." NebulaStream's SQL has no stream-self-join, +# so this YAML emits the per-window TEMPORAL_SEQUENCE for each vehicle near P. +# Consumer joins the per-vehicle trajectories to compute pair distances and decide +# meeting. Full BerlinMOD-Q5 semantics require this consumer-side post-processing. + +query: | + SELECT start, + end, + vehicle_id, + TEMPORAL_SEQUENCE(gps_lon, gps_lat, time_utc) AS trajectory + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.3517 50.8503)', + FLOAT64(5000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 10 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$TRAJECTORY, type: VARSIZED } + config: + file_path: "/workspace/Output/output_berlinmod_q5_windowed.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q6_continuous.yaml b/Queries/berlinmod/q6_continuous.yaml new file mode 100644 index 0000000000..036e9bb6b6 --- /dev/null +++ b/Queries/berlinmod/q6_continuous.yaml @@ -0,0 +1,48 @@ +# BerlinMOD-Q6 — continuous form (PARTIAL) +# "Cumulative distance travelled per vehicle." Emits the per-window per-vehicle +# TEMPORAL_SEQUENCE trajectory; consumers compute length(trajectory) externally. +# A native temporal_length() aggregation on the NebulaStream side would close +# this as a FULL cell; see docs/berlinmod-streaming-forms.md. + +query: | + SELECT start, + end, + vehicle_id, + TEMPORAL_SEQUENCE(gps_lon, gps_lat, time_utc) AS trajectory + FROM berlinmod_stream + GROUP BY vehicle_id + WINDOW SLIDING(time_utc, SIZE 1 SEC, ADVANCE BY 1 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$TRAJECTORY, type: VARSIZED } + config: + file_path: "/workspace/Output/output_berlinmod_q6_continuous.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q6_snapshot.yaml b/Queries/berlinmod/q6_snapshot.yaml new file mode 100644 index 0000000000..6aeabcc89e --- /dev/null +++ b/Queries/berlinmod/q6_snapshot.yaml @@ -0,0 +1,48 @@ +# BerlinMOD-Q6 — snapshot form (PARTIAL) +# "Cumulative distance travelled per vehicle." Emits the per-window per-vehicle +# TEMPORAL_SEQUENCE trajectory; consumers compute length(trajectory) externally. +# A native temporal_length() aggregation on the NebulaStream side would close +# this as a FULL cell; see docs/berlinmod-streaming-forms.md. + +query: | + SELECT start, + end, + vehicle_id, + TEMPORAL_SEQUENCE(gps_lon, gps_lat, time_utc) AS trajectory + FROM berlinmod_stream + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 5 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$TRAJECTORY, type: VARSIZED } + config: + file_path: "/workspace/Output/output_berlinmod_q6_snapshot.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q6_windowed.yaml b/Queries/berlinmod/q6_windowed.yaml new file mode 100644 index 0000000000..3c856e160e --- /dev/null +++ b/Queries/berlinmod/q6_windowed.yaml @@ -0,0 +1,48 @@ +# BerlinMOD-Q6 — windowed form (PARTIAL) +# "Cumulative distance travelled per vehicle." Emits the per-window per-vehicle +# TEMPORAL_SEQUENCE trajectory; consumers compute length(trajectory) externally. +# A native temporal_length() aggregation on the NebulaStream side would close +# this as a FULL cell; see docs/berlinmod-streaming-forms.md. + +query: | + SELECT start, + end, + vehicle_id, + TEMPORAL_SEQUENCE(gps_lon, gps_lat, time_utc) AS trajectory + FROM berlinmod_stream + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 10 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$TRAJECTORY, type: VARSIZED } + config: + file_path: "/workspace/Output/output_berlinmod_q6_windowed.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q7_poi1_continuous.yaml b/Queries/berlinmod/q7_poi1_continuous.yaml new file mode 100644 index 0000000000..36a7f2418d --- /dev/null +++ b/Queries/berlinmod/q7_poi1_continuous.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q7 — continuous form, POI 1 (4.3517, 50.8503, r=2000.0m) +# "Per (window or tick), the first event in the window where each vehicle is +# within the POI's radius — i.e. the per-window first passage through POI 1." +# One YAML per (POI, form). Consumer reads the 3-POI fan-out to recover the +# full per-(vehicle, POI) first-passage matrix. + +query: | + SELECT start, + end, + vehicle_id, + MIN(time_utc) AS first_passage_time + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.3517 50.8503)', + FLOAT64(2000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW SLIDING(time_utc, SIZE 1 SEC, ADVANCE BY 1 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$FIRST_PASSAGE_TIME, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q7_poi1_continuous.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q7_poi1_snapshot.yaml b/Queries/berlinmod/q7_poi1_snapshot.yaml new file mode 100644 index 0000000000..2e0f7acb9f --- /dev/null +++ b/Queries/berlinmod/q7_poi1_snapshot.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q7 — snapshot form, POI 1 (4.3517, 50.8503, r=2000.0m) +# "Per (window or tick), the first event in the window where each vehicle is +# within the POI's radius — i.e. the per-window first passage through POI 1." +# One YAML per (POI, form). Consumer reads the 3-POI fan-out to recover the +# full per-(vehicle, POI) first-passage matrix. + +query: | + SELECT start, + end, + vehicle_id, + MIN(time_utc) AS first_passage_time + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.3517 50.8503)', + FLOAT64(2000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 5 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$FIRST_PASSAGE_TIME, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q7_poi1_snapshot.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q7_poi1_windowed.yaml b/Queries/berlinmod/q7_poi1_windowed.yaml new file mode 100644 index 0000000000..b81dec6c1e --- /dev/null +++ b/Queries/berlinmod/q7_poi1_windowed.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q7 — windowed form, POI 1 (4.3517, 50.8503, r=2000.0m) +# "Per (window or tick), the first event in the window where each vehicle is +# within the POI's radius — i.e. the per-window first passage through POI 1." +# One YAML per (POI, form). Consumer reads the 3-POI fan-out to recover the +# full per-(vehicle, POI) first-passage matrix. + +query: | + SELECT start, + end, + vehicle_id, + MIN(time_utc) AS first_passage_time + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.3517 50.8503)', + FLOAT64(2000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 10 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$FIRST_PASSAGE_TIME, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q7_poi1_windowed.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q7_poi2_continuous.yaml b/Queries/berlinmod/q7_poi2_continuous.yaml new file mode 100644 index 0000000000..043c82680c --- /dev/null +++ b/Queries/berlinmod/q7_poi2_continuous.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q7 — continuous form, POI 2 (4.3060, 50.8270, r=1000.0m) +# "Per (window or tick), the first event in the window where each vehicle is +# within the POI's radius — i.e. the per-window first passage through POI 2." +# One YAML per (POI, form). Consumer reads the 3-POI fan-out to recover the +# full per-(vehicle, POI) first-passage matrix. + +query: | + SELECT start, + end, + vehicle_id, + MIN(time_utc) AS first_passage_time + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.3060 50.8270)', + FLOAT64(1000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW SLIDING(time_utc, SIZE 1 SEC, ADVANCE BY 1 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$FIRST_PASSAGE_TIME, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q7_poi2_continuous.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q7_poi2_snapshot.yaml b/Queries/berlinmod/q7_poi2_snapshot.yaml new file mode 100644 index 0000000000..82ad22bcd3 --- /dev/null +++ b/Queries/berlinmod/q7_poi2_snapshot.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q7 — snapshot form, POI 2 (4.3060, 50.8270, r=1000.0m) +# "Per (window or tick), the first event in the window where each vehicle is +# within the POI's radius — i.e. the per-window first passage through POI 2." +# One YAML per (POI, form). Consumer reads the 3-POI fan-out to recover the +# full per-(vehicle, POI) first-passage matrix. + +query: | + SELECT start, + end, + vehicle_id, + MIN(time_utc) AS first_passage_time + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.3060 50.8270)', + FLOAT64(1000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 5 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$FIRST_PASSAGE_TIME, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q7_poi2_snapshot.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q7_poi2_windowed.yaml b/Queries/berlinmod/q7_poi2_windowed.yaml new file mode 100644 index 0000000000..6925ba5480 --- /dev/null +++ b/Queries/berlinmod/q7_poi2_windowed.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q7 — windowed form, POI 2 (4.3060, 50.8270, r=1000.0m) +# "Per (window or tick), the first event in the window where each vehicle is +# within the POI's radius — i.e. the per-window first passage through POI 2." +# One YAML per (POI, form). Consumer reads the 3-POI fan-out to recover the +# full per-(vehicle, POI) first-passage matrix. + +query: | + SELECT start, + end, + vehicle_id, + MIN(time_utc) AS first_passage_time + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.3060 50.8270)', + FLOAT64(1000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 10 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$FIRST_PASSAGE_TIME, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q7_poi2_windowed.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q7_poi3_continuous.yaml b/Queries/berlinmod/q7_poi3_continuous.yaml new file mode 100644 index 0000000000..414d8133d8 --- /dev/null +++ b/Queries/berlinmod/q7_poi3_continuous.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q7 — continuous form, POI 3 (4.2100, 50.7600, r=2000.0m) +# "Per (window or tick), the first event in the window where each vehicle is +# within the POI's radius — i.e. the per-window first passage through POI 3." +# One YAML per (POI, form). Consumer reads the 3-POI fan-out to recover the +# full per-(vehicle, POI) first-passage matrix. + +query: | + SELECT start, + end, + vehicle_id, + MIN(time_utc) AS first_passage_time + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.2100 50.7600)', + FLOAT64(2000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW SLIDING(time_utc, SIZE 1 SEC, ADVANCE BY 1 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$FIRST_PASSAGE_TIME, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q7_poi3_continuous.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q7_poi3_snapshot.yaml b/Queries/berlinmod/q7_poi3_snapshot.yaml new file mode 100644 index 0000000000..141a9b853b --- /dev/null +++ b/Queries/berlinmod/q7_poi3_snapshot.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q7 — snapshot form, POI 3 (4.2100, 50.7600, r=2000.0m) +# "Per (window or tick), the first event in the window where each vehicle is +# within the POI's radius — i.e. the per-window first passage through POI 3." +# One YAML per (POI, form). Consumer reads the 3-POI fan-out to recover the +# full per-(vehicle, POI) first-passage matrix. + +query: | + SELECT start, + end, + vehicle_id, + MIN(time_utc) AS first_passage_time + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.2100 50.7600)', + FLOAT64(2000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 5 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$FIRST_PASSAGE_TIME, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q7_poi3_snapshot.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q7_poi3_windowed.yaml b/Queries/berlinmod/q7_poi3_windowed.yaml new file mode 100644 index 0000000000..a8ecb36c3f --- /dev/null +++ b/Queries/berlinmod/q7_poi3_windowed.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q7 — windowed form, POI 3 (4.2100, 50.7600, r=2000.0m) +# "Per (window or tick), the first event in the window where each vehicle is +# within the POI's radius — i.e. the per-window first passage through POI 3." +# One YAML per (POI, form). Consumer reads the 3-POI fan-out to recover the +# full per-(vehicle, POI) first-passage matrix. + +query: | + SELECT start, + end, + vehicle_id, + MIN(time_utc) AS first_passage_time + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;POINT(4.2100 50.7600)', + FLOAT64(2000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 10 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$FIRST_PASSAGE_TIME, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q7_poi3_windowed.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q8_continuous.yaml b/Queries/berlinmod/q8_continuous.yaml new file mode 100644 index 0000000000..6821fa9639 --- /dev/null +++ b/Queries/berlinmod/q8_continuous.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q8 — continuous form (FULL) +# "Vehicles within d of road segment (LINESTRING)." Uses edwithin_tgeo_geo with +# a LINESTRING geometry — MEOS supports the within-radius predicate against any +# geometry (POINT, POLYGON, LINESTRING), so no new MobilityNebula PhysicalFunction +# is required. The segment runs from (4.30, 50.83) to (4.36, 50.87) with d = 5 km. + +query: | + SELECT start, + end, + vehicle_id, + COUNT(time_utc) AS events_near_segment + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;LINESTRING(4.30 50.83, 4.36 50.87)', + FLOAT64(5000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW SLIDING(time_utc, SIZE 1 SEC, ADVANCE BY 1 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$EVENTS_NEAR_SEGMENT, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q8_continuous.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q8_snapshot.yaml b/Queries/berlinmod/q8_snapshot.yaml new file mode 100644 index 0000000000..41241b53eb --- /dev/null +++ b/Queries/berlinmod/q8_snapshot.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q8 — snapshot form (FULL) +# "Vehicles within d of road segment (LINESTRING)." Uses edwithin_tgeo_geo with +# a LINESTRING geometry — MEOS supports the within-radius predicate against any +# geometry (POINT, POLYGON, LINESTRING), so no new MobilityNebula PhysicalFunction +# is required. The segment runs from (4.30, 50.83) to (4.36, 50.87) with d = 5 km. + +query: | + SELECT start, + end, + vehicle_id, + COUNT(time_utc) AS events_near_segment + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;LINESTRING(4.30 50.83, 4.36 50.87)', + FLOAT64(5000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 5 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$EVENTS_NEAR_SEGMENT, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q8_snapshot.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q8_windowed.yaml b/Queries/berlinmod/q8_windowed.yaml new file mode 100644 index 0000000000..f448eb5ae2 --- /dev/null +++ b/Queries/berlinmod/q8_windowed.yaml @@ -0,0 +1,53 @@ +# BerlinMOD-Q8 — windowed form (FULL) +# "Vehicles within d of road segment (LINESTRING)." Uses edwithin_tgeo_geo with +# a LINESTRING geometry — MEOS supports the within-radius predicate against any +# geometry (POINT, POLYGON, LINESTRING), so no new MobilityNebula PhysicalFunction +# is required. The segment runs from (4.30, 50.83) to (4.36, 50.87) with d = 5 km. + +query: | + SELECT start, + end, + vehicle_id, + COUNT(time_utc) AS events_near_segment + FROM berlinmod_stream + WHERE edwithin_tgeo_geo(gps_lon, + gps_lat, + time_utc, + 'SRID=4326;LINESTRING(4.30 50.83, 4.36 50.87)', + FLOAT64(5000.0)) = INT32(1) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 10 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$EVENTS_NEAR_SEGMENT, type: UINT64 } + config: + file_path: "/workspace/Output/output_berlinmod_q8_windowed.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q9_continuous.yaml b/Queries/berlinmod/q9_continuous.yaml new file mode 100644 index 0000000000..1731bfb3b0 --- /dev/null +++ b/Queries/berlinmod/q9_continuous.yaml @@ -0,0 +1,49 @@ +# BerlinMOD-Q9 — continuous form (PARTIAL) +# "Distance between vehicles X and Y at time T." Filters to vehicles X = 100 +# and Y = 200, emits per-window TEMPORAL_SEQUENCE per vehicle. Consumer joins +# the two trajectories to compute the X-Y distance at each time. A NebulaStream +# stream-self-join (or a custom pair-aggregation) would close this as a FULL cell. + +query: | + SELECT start, + end, + vehicle_id, + TEMPORAL_SEQUENCE(gps_lon, gps_lat, time_utc) AS trajectory + FROM berlinmod_stream + WHERE vehicle_id = UINT64(100) OR vehicle_id = UINT64(200) + GROUP BY vehicle_id + WINDOW SLIDING(time_utc, SIZE 1 SEC, ADVANCE BY 1 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$TRAJECTORY, type: VARSIZED } + config: + file_path: "/workspace/Output/output_berlinmod_q9_continuous.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q9_snapshot.yaml b/Queries/berlinmod/q9_snapshot.yaml new file mode 100644 index 0000000000..e93fa71f09 --- /dev/null +++ b/Queries/berlinmod/q9_snapshot.yaml @@ -0,0 +1,49 @@ +# BerlinMOD-Q9 — snapshot form (PARTIAL) +# "Distance between vehicles X and Y at time T." Filters to vehicles X = 100 +# and Y = 200, emits per-window TEMPORAL_SEQUENCE per vehicle. Consumer joins +# the two trajectories to compute the X-Y distance at each time. A NebulaStream +# stream-self-join (or a custom pair-aggregation) would close this as a FULL cell. + +query: | + SELECT start, + end, + vehicle_id, + TEMPORAL_SEQUENCE(gps_lon, gps_lat, time_utc) AS trajectory + FROM berlinmod_stream + WHERE vehicle_id = UINT64(100) OR vehicle_id = UINT64(200) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 5 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$TRAJECTORY, type: VARSIZED } + config: + file_path: "/workspace/Output/output_berlinmod_q9_snapshot.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/Queries/berlinmod/q9_windowed.yaml b/Queries/berlinmod/q9_windowed.yaml new file mode 100644 index 0000000000..b9a0988f16 --- /dev/null +++ b/Queries/berlinmod/q9_windowed.yaml @@ -0,0 +1,49 @@ +# BerlinMOD-Q9 — windowed form (PARTIAL) +# "Distance between vehicles X and Y at time T." Filters to vehicles X = 100 +# and Y = 200, emits per-window TEMPORAL_SEQUENCE per vehicle. Consumer joins +# the two trajectories to compute the X-Y distance at each time. A NebulaStream +# stream-self-join (or a custom pair-aggregation) would close this as a FULL cell. + +query: | + SELECT start, + end, + vehicle_id, + TEMPORAL_SEQUENCE(gps_lon, gps_lat, time_utc) AS trajectory + FROM berlinmod_stream + WHERE vehicle_id = UINT64(100) OR vehicle_id = UINT64(200) + GROUP BY vehicle_id + WINDOW TUMBLING(time_utc, SIZE 10 SEC) + INTO file_sink; + +sinks: + - name: FILE_SINK + type: File + schema: + - { name: BERLINMOD_STREAM$START, type: UINT64 } + - { name: BERLINMOD_STREAM$END, type: UINT64 } + - { name: BERLINMOD_STREAM$VEHICLE_ID, type: UINT64 } + - { name: BERLINMOD_STREAM$TRAJECTORY, type: VARSIZED } + config: + file_path: "/workspace/Output/output_berlinmod_q9_windowed.csv" + input_format: CSV + +logical: + - name: BERLINMOD_STREAM + schema: + - { name: TIME_UTC, type: UINT64 } + - { name: VEHICLE_ID, type: UINT64 } + - { name: GPS_LON, type: FLOAT64 } + - { name: GPS_LAT, type: FLOAT64 } + +physical: + - logical: BERLINMOD_STREAM + type: TCP + parser_config: + type: CSV + field_delimiter: "," + tuple_delimiter: "\n" + source_config: + socket_host: "host.docker.internal" + socket_port: "32325" + socket_type: "SOCK_STREAM" + socket_domain: "AF_INET" diff --git a/docs/berlinmod-streaming-forms.md b/docs/berlinmod-streaming-forms.md new file mode 100644 index 0000000000..f484b31a87 --- /dev/null +++ b/docs/berlinmod-streaming-forms.md @@ -0,0 +1,111 @@ +# BerlinMOD streaming forms on MobilityNebula + +Additive scaffold for the **BerlinMOD-9 × 3 streaming forms** parity contract — same shape as the SQL-layer BerlinMOD-9 ([MobilityDB-BerlinMOD](https://github.com/MobilityDB/MobilityDB-BerlinMOD)) and matching the [MobilityFlink PR #3](https://github.com/MobilityDB/MobilityFlink/pull/3) and [MobilityKafka PR #1](https://github.com/MobilityDB/MobilityKafka/pull/1) coverage on the NebulaStream runtime. + +This page lives **alongside** the existing SNCB query series ([Query0..Query5](../Queries/) + [sncb_brake_monitoring](../Queries/sncb_brake_monitoring.yaml)); the SNCB Q-series and BerlinMOD-9 are sibling parity sets, not a replacement. + +## Logical source + +The BerlinMOD queries read from a `berlinmod_stream` logical source over TCP port `32325`, distinct from the SNCB `sncb_stream` source on port `32324`. Wire format is CSV with four columns: + +``` +time_utc(uint64), vehicle_id(uint64), gps_lon(float64), gps_lat(float64) +``` + +A sample input file is at [`Input/input_berlinmod.csv`](../Input/input_berlinmod.csv) (3 vehicles × 21 events over 14 simulated seconds). + +## The three streaming forms + +For each BerlinMOD reference query Q, three NebulaStream YAMLs realize the form contract: + +| Form | NebulaStream pattern | Semantic | +|---|---|---| +| **continuous** | `WINDOW SLIDING(time_utc, SIZE 1 SEC, ADVANCE BY 1 SEC)` | per-event-bucket emission; consumers see a continuous stream of per-second events | +| **windowed** | `WINDOW TUMBLING(time_utc, SIZE 10 SEC)` | per-10s aggregation; one row per (window, group) | +| **snapshot** | `WINDOW TUMBLING(time_utc, SIZE 5 SEC)` | per-5s tick state; one row per (tick, group). Parity-oracle form: at each tick, the current state mirrors the batch BerlinMOD-Q result on data up to the tick | + +## Coverage in this PR + +| Q | Topic | Continuous | Windowed | Snapshot | Form | +|---|---|---|---|---|---| +| Q1 | "which vehicles have appeared?" | ✓ | ✓ | ✓ | full | +| Q2 | "where is vehicle X (= 200) at time T?" | ✓ | ✓ | ✓ | full | +| Q3 | "vehicles within 5 km of Brussels city centre?" | ✓ | ✓ | ✓ | full | +| Q4 | "vehicles inside Brussels-centre rectangle R?" | ✓ | ✓ | ✓ | full | +| Q5 | "pairs of vehicles meeting near P" | ◐ | ◐ | ◐ | **partial** — see below | +| Q6 | "cumulative distance per vehicle" | ◐ | ◐ | ◐ | **partial** — see below | +| Q7 | "first passage of each vehicle through each POI" | ✓ | ✓ | ✓ | full (per-POI fan-out) | +| Q8 | "vehicles close to a road segment (LINESTRING)" | ✓ | ✓ | ✓ | full | +| Q9 | "distance between vehicles X and Y at time T" | ◐ | ◐ | ◐ | **partial** — see below | + +**27 of 27 cells** covered as scaffold YAMLs. 18 cells are **full** — the BerlinMOD-Q semantic is computed entirely inside NebulaStream — and 9 cells are **partial** — NebulaStream emits the per-window inputs and a consumer post-processes them for the final BerlinMOD-Q answer. + +### Q7 fan-out pattern (full) + +NebulaStream's current SQL has no Cartesian (vehicle × POI) aggregation primitive. Q7 is therefore expressed as **one YAML per (POI, form)** — three POIs × three forms = nine YAML files. Each YAML emits the per-(window, vehicle) first-passage time for its single POI; consumers read the three POI-specific output files per form to recover the full per-(vehicle, POI) matrix. POI ids: `1` = Brussels city centre (4.3517, 50.8503, r=2 km), `2` = Anderlecht (4.3060, 50.8270, r=1 km), `3` = south of Brussels (4.2100, 50.7600, r=2 km). + +### Q8 via LINESTRING (full) + +MEOS' `edwithin_tgeo_geo` accepts any geometry — POINT, POLYGON, and **LINESTRING**. Q8 (vehicles within d of a road segment) is therefore expressible as a single direct predicate against a `LINESTRING(s1, s2)` geometry, no new MobilityNebula PhysicalFunction required. The segment runs from (4.30, 50.83) to (4.36, 50.87) with d = 5 km in the scaffold. + +### Q5 / Q6 / Q9 partial pattern (NebulaStream emits, consumer joins) + +These three queries need either a stream-self-join (Q5, Q9) or a custom `temporal_length` aggregation (Q6) — neither of which is currently a NebulaStream SQL primitive. The scaffold YAMLs express what NebulaStream can compute today: + +| Q | What NebulaStream emits | What the consumer computes | +|---|---|---| +| Q5 | per-(window, vehicle) `TEMPORAL_SEQUENCE` trajectory for each near-P vehicle | per-pair distance, pair-meeting predicate, output meeting pairs | +| Q6 | per-(window, vehicle) `TEMPORAL_SEQUENCE` trajectory | length of trajectory per vehicle | +| Q9 | per-(window, vehicle) `TEMPORAL_SEQUENCE` trajectory filtered to `vehicle_id ∈ {100, 200}` | join the two trajectories, compute the X-Y distance series | + +Each partial cell IS a valid runnable NebulaStream query; the BerlinMOD-Q final answer is one consumer-side reduction step beyond the emitted output. + +### Path to "full" for the three partial Qs + +| Q | What would make it FULL | +|---|---| +| Q5 | stream-self-join in NebulaStream SQL, OR a custom `pair_aggregate(tgeo, dMeet)` aggregation | +| Q6 | a custom `temporal_length(tgeo) → double` scalar function in MobilityNebula `Functions/Meos/` | +| Q9 | stream-self-join (same shape as Q5) | + +Each is a single-PR change on MobilityNebula's `nes-physical-operators` C++ surface (or on the NebulaStream upstream for joins). The patterns and templates are documented above; the YAML schemas already exist in this scaffold so the FULL cells would be drop-in replacements once the operators land. + +## MEOS operators consumed + +All BerlinMOD predicates use operators already exposed by [`MobilityNebula/PR #14`](https://github.com/MobilityDB/MobilityNebula/pull/14) (and follow-up operator-add PRs): + +| Operator | YAMLs using it | +|---|---| +| `edwithin_tgeo_geo(lon, lat, t, geom, d)` | Q3 × 3 forms (radius predicate, `POINT`), Q4 × 3 forms (region containment, `POLYGON` with `d=0.0`) | +| `TEMPORAL_SEQUENCE(lon, lat, t)` (aggregation) | Q2 × 3 forms (per-window trajectory) | + +No new MEOS-side operators or PhysicalFunction classes are added by this PR. + +## Not covered (15 cells / 5 queries) + +Marked as future work; each requires either a new MEOS operator or a NebulaStream-side extension: + +| Q | Topic | Blocker | +|---|---|---| +| Q5 | pairs of vehicles meeting near P | Needs a stream-self-join across vehicles. NebulaStream's SQL needs join support OR a custom MEOS aggregation that consumes a per-vehicle map of last-known positions. | +| Q6 | cumulative distance per vehicle | Needs a custom aggregation that returns the length of the per-window `TEMPORAL_SEQUENCE` trajectory; `temporal_length(tgeo) → double` would close this. | +| Q7 | first passage of vehicles through POIs | Cartesian (vehicle × POI) state. Could be expressed as 1 query per (POI), each emitting the per-vehicle MIN(time_utc) WHERE edwithin near POI — but that's 3+ queries per snapshot form. A custom `first_passage(tgeo, geo, d) → tstzset` aggregation would close this. | +| Q8 | vehicles close to a road segment | Needs a MEOS `distance(tgeo, geometry(LINESTRING))` operator surfaced as a NebulaStream PhysicalFunction; not present in the current `Functions/Meos/` set. | +| Q9 | distance between vehicles X and Y at time T | Needs cross-vehicle pair state; same blocker as Q5 (stream-self-join or pair-aggregation). | + +## Sibling parity references + +- **MobilityFlink #3** — same nine queries × three forms via Flink Java code, 27/27 cells, pure-Java predicates with `TODO(meos)` markers for future JMEOS bridges. +- **MobilityKafka #1** — same nine queries × three forms via Kafka-Streams Processor API, 27/27 cells, same pure-Java predicates. +- **MobilityDB-BerlinMOD** open PRs (#29/#27/#26/#24/#23) — batch BerlinMOD-9 cross-platform reports; the snapshot form converges to those outputs as the watermark advances. + +## Running + +Each YAML follows the same pattern as the SNCB queries (TCP CSV source, file sink). The expected execution flow: + +1. Start NebulaStream (or `MobilityNebula` docker-runtime). +2. Stream the sample CSV from `Input/input_berlinmod.csv` over TCP port `32325` (e.g. via `nc -l -p 32325 < Input/input_berlinmod.csv` or the project's existing TCP-source tooling). +3. Submit one of the YAMLs to the NebulaStream coordinator. +4. The output appears in `/workspace/Output/output_berlinmod__
.csv`. + +YAML structure has been validated with `python3 -c "import yaml; yaml.safe_load(open(f))"` for every file. Runtime verification is gated on the NebulaStream test harness; the YAMLs are intentionally additive and the SNCB Q-series remains untouched.