Skip to content

Commit 9398ab9

Browse files
Merge pull request #68 from huisman/speedup_process_map
Speed up process_map function by using data.table
2 parents 21777bd + de545ce commit 9398ab9

1 file changed

Lines changed: 24 additions & 16 deletions

File tree

R/process_map.R

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -162,29 +162,37 @@ process_map.eventlog <- function(log,
162162
as.data.frame() -> base_log
163163
}
164164

165-
#create end points for graph
166-
167-
base_log %>%
168-
group_by(CASE_CLASSIFIER_) %>%
169-
arrange(start_time, min_order) -> points_temp
170-
171-
points_temp %>%
172-
slice(1) %>%
173-
mutate(ACTIVITY_CLASSIFIER_ = "ARTIFICIAL_START",
174-
end_time = start_time,
175-
min_order = -Inf) -> end_points_start
176-
points_temp %>%
177-
slice(n()) %>%
178-
mutate(ACTIVITY_CLASSIFIER_ = "ARTIFICIAL_END",
179-
start_time = end_time,
180-
min_order = Inf) -> end_points_end
165+
#create end points for graph using data.table slicing
166+
dt <- as.data.table(base_log)
167+
#set correct ordering within groups
168+
data.table::setorder(dt, CASE_CLASSIFIER_, start_time, min_order, na.last=TRUE)
169+
#specifying order of columns to reorder end_points_start and end_points_end
170+
colorder <- c('ACTIVITY_CLASSIFIER_', 'ACTIVITY_INSTANCE_CLASSIFIER_', 'CASE_CLASSIFIER_', 'start_time', 'end_time', 'min_order')
171+
172+
end_points_start <- dt[, .SD[1], by = CASE_CLASSIFIER_]
173+
end_points_start[, min_order := as.numeric(min_order)] #prevents warning when setting value to -Inf
174+
end_points_start[, `:=`(ACTIVITY_CLASSIFIER_ = "ARTIFICIAL_START",
175+
end_time = start_time,
176+
min_order = -Inf)]
177+
data.table::setcolorder(end_points_start, colorder)
178+
179+
end_points_end <- dt[, .SD[.N], by = CASE_CLASSIFIER_]
180+
end_points_end[, min_order := as.numeric(min_order)] #prevents warning when setting value to Inf
181+
end_points_end[, `:=`(ACTIVITY_CLASSIFIER_ = "ARTIFICIAL_END",
182+
start_time = end_time,
183+
min_order = Inf)]
184+
data.table::setcolorder(end_points_end, colorder)
181185

182186
#add endpoints to base log
183187

184188
suppressWarnings(
185189
bind_rows(end_points_start, end_points_end, base_log) %>%
186190
ungroup() -> base_log
187191
)
192+
193+
#converting ACTIVITY_CLASSIFIER_ to character to keep `base_log` identical to the previous dplyr method
194+
base_log <- base_log %>%
195+
mutate(ACTIVITY_CLASSIFIER_ = as.character(ACTIVITY_CLASSIFIER_))
188196

189197
#create base nodes list
190198

0 commit comments

Comments
 (0)