-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrip_unlinked.sql
More file actions
365 lines (323 loc) · 11.7 KB
/
trip_unlinked.sql
File metadata and controls
365 lines (323 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
-------------------------------------------------------------------------------------
-- preprocessing/setting up certain columns before final selection
-------------------------------------------------------------------------------------
-- get vehicle lookup table
WITH vehicle_lookup AS (
SELECT hh_id, person_id, trip_id, vehicle_id
FROM `wfrc-modeling-data.prd_tdm_hts_2023.vehicle-trip-crosswalk`
),
-- get school_id from person table and vehicle id from vehicle lookup table
trip_base AS (
SELECT
t.*,
p.school_type,
v.vehicle_id -- 1. Add this line
FROM `wfrc-modeling-data.src_rsg_household_travel_survey_2023.core_trip` AS t
LEFT JOIN `wfrc-modeling-data.src_rsg_household_travel_survey_2023.core_person` AS p
ON t.person_id = p.person_id
LEFT JOIN vehicle_lookup AS v -- 2. Add this join
ON t.hh_id = v.hh_id
AND t.person_id = v.person_id
AND t.trip_id = v.trip_id
),
-- calculate 'oCO_TAZID_USTMv3' origin co_tazid
origin_taz_v3 AS (
SELECT
t.trip_id,
ARRAY_AGG(taz.CO_TAZID LIMIT 1)[OFFSET(0)] AS oCO_TAZID_USTMv3
FROM `wfrc-modeling-data.src_rsg_household_travel_survey_2023.core_trip` AS t
JOIN `wfrc-modeling-data.prd_tdm_taz.ustm_v3_taz_2021_09_22_geo` AS taz
ON ST_INTERSECTS(st_geogpoint(t.o_lon, t.o_lat), taz.geometry)
GROUP BY t.trip_id
),
-- calculate 'dCO_TAZID_USTMv3' destination co_tazid
destination_taz_v3 AS (
SELECT
t.trip_id,
ARRAY_AGG(taz.CO_TAZID LIMIT 1)[OFFSET(0)] AS dCO_TAZID_USTMv3
FROM `wfrc-modeling-data.src_rsg_household_travel_survey_2023.core_trip` AS t
JOIN `wfrc-modeling-data.prd_tdm_taz.ustm_v3_taz_2021_09_22_geo` AS taz
ON ST_INTERSECTS(st_geogpoint(t.d_lon, t.d_lat), taz.geometry)
GROUP BY t.trip_id
),
origin_taz_v4 AS (
SELECT
t.trip_id,
ARRAY_AGG(taz.CO_TAZID LIMIT 1)[OFFSET(0)] AS oCO_TAZID_USTMv4,
SAFE_CAST(ARRAY_AGG(taz.SUBAREAID LIMIT 1)[OFFSET(0)] AS INT64) AS oSUBAREAID
FROM `wfrc-modeling-data.src_rsg_household_travel_survey_2023.core_trip` AS t
JOIN `wfrc-modeling-data.prd_tdm_taz.ustm_v4_taz_2025_07_29_geo` AS taz
ON ST_INTERSECTS(st_geogpoint(t.o_lon, t.o_lat), taz.geometry)
GROUP BY t.trip_id
),
-- calculate 'dCO_TAZID_USTMv4' destination co_tazid
destination_taz_v4 AS (
SELECT
t.trip_id,
ARRAY_AGG(taz.CO_TAZID LIMIT 1)[OFFSET(0)] AS dCO_TAZID_USTMv4,
SAFE_CAST(ARRAY_AGG(taz.SUBAREAID LIMIT 1)[OFFSET(0)] AS INT64) AS dSUBAREAID
FROM `wfrc-modeling-data.src_rsg_household_travel_survey_2023.core_trip` AS t
JOIN `wfrc-modeling-data.prd_tdm_taz.ustm_v4_taz_2025_07_29_geo` AS taz
ON ST_INTERSECTS(st_geogpoint(t.d_lon, t.d_lat), taz.geometry)
GROUP BY t.trip_id
),
-- calculate 'PA_AP' field using o/d purposes
trip_taz_pa AS (
SELECT
tb.*,
ot3.oCO_TAZID_USTMv3,
dt3.dCO_TAZID_USTMv3,
ot4.oCO_TAZID_USTMv4,
dt4.dCO_TAZID_USTMv4,
dt4.dSUBAREAID,
ot4.oSUBAREAID,
CASE
WHEN tb.o_purpose_category = 1 THEN 'PA'
WHEN tb.d_purpose_category = 1 THEN 'AP'
WHEN tb.o_purpose_category = 2 AND
CASE
WHEN tb.d_purpose_category = 1 THEN 'Home'
WHEN tb.d_purpose_category = 2 THEN 'Work'
WHEN tb.d_purpose_category IN (995, -1) THEN 'Undefined'
ELSE 'Other'
END = 'Other' THEN 'PA'
WHEN CASE
WHEN tb.o_purpose_category = 1 THEN 'Home'
WHEN tb.o_purpose_category = 2 THEN 'Work'
WHEN tb.o_purpose_category IN (995, -1) THEN 'Undefined'
ELSE 'Other'
END = 'Other'
AND tb.d_purpose_category = 2 THEN 'AP'
WHEN tb.o_purpose_type = 995 OR tb.d_purpose_type = 995 THEN 'Undefined'
ELSE 'PA'
END AS PA_AP
FROM trip_base tb
LEFT JOIN origin_taz_v3 AS ot3 ON tb.trip_id = ot3.trip_id
LEFT JOIN destination_taz_v3 AS dt3 ON tb.trip_id = dt3.trip_id
LEFT JOIN origin_taz_v4 AS ot4 ON tb.trip_id = ot4.trip_id
LEFT JOIN destination_taz_v4 AS dt4 ON tb.trip_id = dt4.trip_id
),
-- calculate 'PURP7_Sch' field to better understand school purpose
trip_taz_pa_sch AS (
SELECT
t.*,
CASE
WHEN t.trip_type = 2 AND (t.o_purpose = 22 OR t.d_purpose = 22) THEN 7
WHEN t.trip_type = 2 AND (t.o_purpose = 21 OR t.d_purpose = 21) THEN 2
WHEN t.trip_type = 2 AND (t.o_purpose IN (23,24,25,26) OR t.d_purpose IN (23,24,25,26)) THEN 4
WHEN t.trip_type = 2 AND ((t.o_purpose = 60 OR t.d_purpose = 60) AND t.school_type IN (11,12,13)) THEN 7
WHEN t.trip_type = 2 AND ((t.o_purpose = 60 OR t.d_purpose = 60) AND t.school_type IN (5,6,7)) THEN 2
WHEN t.trip_type = 2 THEN 4
ELSE NULL
END AS PURP7_Sch
FROM trip_taz_pa t
),
-- calculate 'PURP7' using 'PURP7_Sch' to divide purpose into 7 categories
trip_taz_pa_sch_purp AS (
SELECT
t.*,
CASE
WHEN t.trip_type = 1 THEN 1
WHEN t.trip_type = 2 AND t.PURP7_Sch = 2 THEN 2
WHEN t.trip_type = 2 AND t.PURP7_Sch = 4 THEN 4
WHEN t.trip_type = 2 AND t.PURP7_Sch = 7 THEN 7
WHEN t.trip_type = 3 THEN 3
WHEN t.trip_type = 4 THEN 4
WHEN t.trip_type = 5 THEN 4
WHEN t.trip_type = 6 THEN 5
WHEN t.trip_type = 7 THEN 6
WHEN t.trip_type = 995 THEN 995
ELSE 10
END AS PURP7
FROM trip_taz_pa_sch t
)
-------------------------------------------------------------------------------------
-- calculate remaining trip fields using preprocessed tables from above
-------------------------------------------------------------------------------------
SELECT
t.* EXCEPT(Unnamed__0, segment_type,
o_taz,o_lon,o_lat,o_x,o_y,
d_taz,d_lon,d_lat,d_x,d_y,
PURP7_Sch,
trip_weight_v2,
trip_weight,
trip_weight_fri,
trip_weight_sat,
trip_weight_sun,
trip_weight_aggregated_v2
),
-- Replace 'Supplemental' with 'CBS' in segment_type
CASE
WHEN t.segment_type = 'Supplemental' THEN 'CBS'
ELSE t.segment_type
END AS segment_type_cleaned,
-- Group origin purpose to category of 3
CASE
WHEN t.o_purpose_category = 1 THEN 'Home'
WHEN t.o_purpose_category = 2 THEN 'Work'
WHEN t.o_purpose_category IN (995, -1) THEN 'Undefined'
ELSE 'Other'
END AS o_purpose_3cat,
-- Group destination purpose to category of 3
CASE
WHEN t.d_purpose_category = 1 THEN 'Home'
WHEN t.d_purpose_category = 2 THEN 'Work'
WHEN t.d_purpose_category IN (995, -1) THEN 'Undefined'
ELSE 'Other'
END AS d_purpose_3cat,
-- Departure hour period
CASE
WHEN t.depart_hour BETWEEN 6 AND 8 THEN "AM"
WHEN t.depart_hour BETWEEN 9 AND 14 THEN "MD"
WHEN t.depart_hour BETWEEN 15 AND 17 THEN "PM"
ELSE "EV"
END AS depart_per,
-- Departure/arrival time in specific formats
SAFE_CAST(FORMAT('%02d%02d', t.depart_hour, t.depart_minute) AS INT64) AS depart_hhm,
t.depart_hour * 60 + t.depart_minute AS depart_mam,
SAFE_CAST(FORMAT('%02d%02d', t.arrive_hour, t.arrive_minute) AS INT64) AS arrive_hhm,
t.arrive_hour * 60 + t.arrive_minute AS arrive_mam,
-- Integer distance
CAST(FLOOR(IFNULL(t.distance_miles, 0)) AS INT64) AS INT_Dist,
-- Optional duration column (uncomment if needed)
-- CAST(FLOOR(IFNULL(t.duration_minutes, 0)) AS INT64) AS INT_Dur,
-- Mode groupings
CASE
WHEN t.mode_type_broad = 3 AND t.num_travelers = 1 THEN 3.1
WHEN t.mode_type_broad = 3 AND t.num_travelers = 2 THEN 3.2
WHEN t.mode_type_broad = 3 AND t.num_travelers >= 3 THEN 3.3
ELSE CAST(t.mode_type_broad AS FLOAT64)
END AS mode_auto,
-- College trips
CASE
WHEN segment_type = 'College' THEN t.trip_weight_v2
ELSE NULL
END AS trip_weight_col_enrol,
-- Non-College trips
CASE
WHEN segment_type != 'College' OR segment_type IS NULL THEN t.trip_weight_v2
ELSE NULL
END AS trip_weight,
-- Compute activ_dur using LAG function
LAG(t.arrive_hour * 60 + t.arrive_minute) OVER (
PARTITION BY t.person_id
ORDER BY t.hh_id, t.person_id, t.trip_id
) AS lag_arrive_mam,
CASE
WHEN (t.depart_hour * 60 + t.depart_minute) -
LAG(t.arrive_hour * 60 + t.arrive_minute) OVER (
PARTITION BY t.person_id
ORDER BY t.hh_id, t.person_id, t.trip_id
) < 0
THEN -1
ELSE IFNULL(
(t.depart_hour * 60 + t.depart_minute) -
LAG(t.arrive_hour * 60 + t.arrive_minute) OVER (
PARTITION BY t.person_id
ORDER BY t.hh_id, t.person_id, t.trip_id
),
0
)
END AS activ_dur,
-- calculate purpose as text using 'PURP7'
CASE
WHEN PURP7 = 1 THEN 'HBW'
WHEN PURP7 = 2 THEN 'HBSch'
WHEN PURP7 = 3 THEN 'HBShp'
WHEN PURP7 = 4 THEN 'HBOth'
WHEN PURP7 = 5 THEN 'NHBW'
WHEN PURP7 = 6 THEN 'NHBNW'
WHEN PURP7 = 7 THEN 'HBC'
WHEN PURP7 = 995 THEN 'Missing Response'
ELSE 'HBOth'
END AS PURP7_t,
-- calculate prupose as text (more divisions) -- trip_pur_t
CASE
WHEN PURP7 = 4 AND trip_type = 4 THEN 'HBPb'
WHEN PURP7 = 4 AND trip_type != 4 THEN 'HBOth'
ELSE
CASE
WHEN PURP7 = 1 THEN 'HBW'
WHEN PURP7 = 2 THEN 'HBSch'
WHEN PURP7 = 3 THEN 'HBShp'
WHEN PURP7 = 5 THEN 'NHBW'
WHEN PURP7 = 6 THEN 'NHBNW'
WHEN PURP7 = 7 THEN 'HBC'
WHEN PURP7 = 995 THEN 'Missing Response'
ELSE 'HBOth'
END
END AS trip_pur_t,
-- Specify primary/secondary schooling
CASE
WHEN PURP7 = 2 AND school_type = 5 THEN 'primary'
WHEN PURP7 = 2 AND school_type IN (6, 7) THEN 'secondary'
WHEN PURP7 = 2 THEN 'undefined'
ELSE NULL
END AS HBSch_lev,
-- Compute PURP_PER using 'depart_hour' and 'PURP7'
CASE
WHEN t.PURP7 = 995 OR
CASE
WHEN t.depart_hour BETWEEN 6 AND 8 THEN 1
WHEN t.depart_hour BETWEEN 9 AND 14 THEN 2
WHEN t.depart_hour BETWEEN 15 AND 17 THEN 3
ELSE 4
END = 995 THEN 995
ELSE t.PURP7 + (
(CASE
WHEN t.depart_hour BETWEEN 6 AND 8 THEN 1
WHEN t.depart_hour BETWEEN 9 AND 14 THEN 2
WHEN t.depart_hour BETWEEN 15 AND 17 THEN 3
ELSE 4
END) - 1
) * 7
END AS PURP_PER,
-- calculate production CO_TAZID
CASE
WHEN t.PA_AP = 'PA' THEN t.oCO_TAZID_USTMv3
WHEN t.PA_AP = 'AP' THEN t.dCO_TAZID_USTMv3
ELSE NULL
END AS pCO_TAZID_USTMv3,
-- calculate attraciton CO_TAZID
CASE
WHEN t.PA_AP = 'PA' THEN t.dCO_TAZID_USTMv3
WHEN t.PA_AP = 'AP' THEN t.oCO_TAZID_USTMv3
ELSE NULL
END AS aCO_TAZID_USTMv3,
-- calculate production CO_TAZID
CASE
WHEN t.PA_AP = 'PA' THEN t.oCO_TAZID_USTMv4
WHEN t.PA_AP = 'AP' THEN t.dCO_TAZID_USTMv4
ELSE NULL
END AS pCO_TAZID_USTMv4,
-- calculate attraciton CO_TAZID
CASE
WHEN t.PA_AP = 'PA' THEN t.dCO_TAZID_USTMv4
WHEN t.PA_AP = 'AP' THEN t.oCO_TAZID_USTMv4
ELSE NULL
END AS aCO_TAZID_USTMv4,
-- calculate production CO_TAZID
CASE
WHEN t.PA_AP = 'PA' THEN t.oSUBAREAID
WHEN t.PA_AP = 'AP' THEN t.dSUBAREAID
ELSE NULL
END AS pSUBAREAID,
-- calculate attraciton CO_TAZID
CASE
WHEN t.PA_AP = 'PA' THEN t.dSUBAREAID
WHEN t.PA_AP = 'AP' THEN t.oSUBAREAID
ELSE NULL
END AS aSUBAREAID,
-- calculate production BG
CASE
WHEN t.PA_AP = 'PA' THEN t.o_bg_2020
WHEN t.PA_AP = 'AP' THEN t.d_bg_2020
ELSE NULL
END AS p_bg_2020,
-- calculate attraciton BG
CASE
WHEN t.PA_AP = 'PA' THEN t.d_bg_2020
WHEN t.PA_AP = 'AP' THEN t.o_bg_2020
ELSE NULL
END AS a_bg_2020,
FROM trip_taz_pa_sch_purp AS t