@@ -60,23 +60,9 @@ def segment_into_trips(self, timeseries, time_query):
60
60
t_get_filtered_points .elapsed
61
61
)
62
62
63
- with ect .Timer () as t_mark_valid :
64
- self .filtered_points_df .loc [:, "valid" ] = True
65
- esds .store_pipeline_time (
66
- user_id ,
67
- ecwp .PipelineStages .TRIP_SEGMENTATION .name + "/segment_into_trips_dist/mark_valid" ,
68
- time .time (),
69
- t_mark_valid .elapsed
70
- )
63
+ self .filtered_points_df .loc [:, "valid" ] = True
71
64
72
- with ect .Timer () as t_get_transition_df :
73
- self .transition_df = timeseries .get_data_df ("statemachine/transition" , time_query )
74
- esds .store_pipeline_time (
75
- user_id ,
76
- ecwp .PipelineStages .TRIP_SEGMENTATION .name + "/segment_into_trips_dist/get_transition_df" ,
77
- time .time (),
78
- t_get_transition_df .elapsed
79
- )
65
+ self .transition_df = timeseries .get_data_df ("statemachine/transition" , time_query )
80
66
81
67
if len (self .transition_df ) > 0 :
82
68
logging .debug ("self.transition_df = %s" % self .transition_df [["fmt_time" , "transition" ]])
@@ -103,14 +89,7 @@ def segment_into_trips(self, timeseries, time_query):
103
89
# segmentation_points.append(currPoint)
104
90
105
91
if just_ended :
106
- with ect .Timer () as t_continue_just_ended :
107
- continue_flag = self .continue_just_ended (idx , currPoint , self .filtered_points_df )
108
- esds .store_pipeline_time (
109
- user_id ,
110
- ecwp .PipelineStages .TRIP_SEGMENTATION .name + "/segment_into_trips_dist/continue_just_ended" ,
111
- time .time (),
112
- t_continue_just_ended .elapsed
113
- )
92
+ continue_flag = self .continue_just_ended (idx , currPoint , self .filtered_points_df )
114
93
115
94
if continue_flag :
116
95
# We have "processed" the currPoint by deciding to glom it
@@ -119,14 +98,7 @@ def segment_into_trips(self, timeseries, time_query):
119
98
# else:
120
99
sel_point = currPoint
121
100
logging .debug ("Setting new trip start point %s with idx %s" % (sel_point , sel_point .idx ))
122
- with ect .Timer () as t_set_start_point :
123
- curr_trip_start_point = sel_point
124
- esds .store_pipeline_time (
125
- user_id ,
126
- ecwp .PipelineStages .TRIP_SEGMENTATION .name + "/segment_into_trips_dist/set_new_trip_start_point" ,
127
- time .time (),
128
- t_set_start_point .elapsed
129
- )
101
+ curr_trip_start_point = sel_point
130
102
just_ended = False
131
103
else :
132
104
with ect .Timer () as t_process_trip :
@@ -137,106 +109,72 @@ def segment_into_trips(self, timeseries, time_query):
137
109
max (idx - self .point_threshold , curr_trip_start_point .idx ):idx + 1
138
110
]
139
111
lastPoint = self .find_last_valid_point (idx )
140
- with ect .Timer () as t_has_trip_ended :
141
- trip_ended = self .has_trip_ended (lastPoint , currPoint , timeseries )
142
- esds .store_pipeline_time (
143
- user_id ,
144
- ecwp .PipelineStages .TRIP_SEGMENTATION .name + "/segment_into_trips_dist/has_trip_ended" ,
145
- time .time (),
146
- t_has_trip_ended .elapsed
147
- )
112
+ trip_ended = self .has_trip_ended (lastPoint , currPoint , timeseries )
148
113
149
114
if trip_ended :
150
- with ect .Timer () as t_get_last_trip_end_point :
151
- last_trip_end_point = lastPoint
152
- logging .debug ("Appending last_trip_end_point %s with index %s " %
153
- (last_trip_end_point , idx - 1 ))
154
- segmentation_points .append ((curr_trip_start_point , last_trip_end_point ))
155
- logging .info ("Found trip end at %s" % last_trip_end_point .fmt_time )
156
- # We have processed everything up to the trip end by marking it as a completed trip
157
- self .last_ts_processed = currPoint .metadata_write_ts
158
- esds .store_pipeline_time (
159
- user_id ,
160
- ecwp .PipelineStages .TRIP_SEGMENTATION .name + "/segment_into_trips_dist/get_last_trip_end_point" ,
161
- time .time (),
162
- t_get_last_trip_end_point .elapsed
163
- )
164
-
165
- with ect .Timer () as t_handle_trip_end :
166
- just_ended = True
167
- # Now, we have finished processing the previous point as a trip
168
- # end or not. But we still need to process this point by seeing
169
- # whether it should represent a new trip start, or a glom to the
170
- # previous trip
171
- if not self .continue_just_ended (idx , currPoint , self .filtered_points_df ):
172
- sel_point = currPoint
173
- logging .debug ("Setting new trip start point %s with idx %s" % (sel_point , sel_point .idx ))
174
- curr_trip_start_point = sel_point
175
- just_ended = False
176
- esds .store_pipeline_time (
177
- user_id ,
178
- ecwp .PipelineStages .TRIP_SEGMENTATION .name + "/segment_into_trips_dist/handle_trip_end" ,
179
- time .time (),
180
- t_handle_trip_end .elapsed
181
- )
115
+ last_trip_end_point = lastPoint
116
+ logging .debug ("Appending last_trip_end_point %s with index %s " %
117
+ (last_trip_end_point , idx - 1 ))
118
+ segmentation_points .append ((curr_trip_start_point , last_trip_end_point ))
119
+ logging .info ("Found trip end at %s" % last_trip_end_point .fmt_time )
120
+ # We have processed everything up to the trip end by marking it as a completed trip
121
+ self .last_ts_processed = currPoint .metadata_write_ts
122
+ just_ended = True
123
+ # Now, we have finished processing the previous point as a trip
124
+ # end or not. But we still need to process this point by seeing
125
+ # whether it should represent a new trip start, or a glom to the
126
+ # previous trip
127
+ if not self .continue_just_ended (idx , currPoint , self .filtered_points_df ):
128
+ sel_point = currPoint
129
+ logging .debug ("Setting new trip start point %s with idx %s" % (sel_point , sel_point .idx ))
130
+ curr_trip_start_point = sel_point
131
+ just_ended = False
132
+
182
133
esds .store_pipeline_time (
183
134
user_id ,
184
135
ecwp .PipelineStages .TRIP_SEGMENTATION .name + "/segment_into_trips_dist/loop" ,
185
136
time .time (),
186
137
t_loop .elapsed
187
138
)
188
139
189
- with ect .Timer () as t_post_loop :
190
- # Since we only end a trip when we start a new trip, this means that
191
- # the last trip that was pushed is ignored. Consider the example of
192
- # 2016-02-22 when I took kids to karate. We arrived shortly after 4pm,
193
- # so during that remote push, a trip end was not detected. And we got
194
- # back home shortly after 5pm, so the trip end was only detected on the
195
- # phone at 6pm. At that time, the following points were pushed:
196
- # ..., 2016-02-22T16:04:02, 2016-02-22T16:49:34, 2016-02-22T16:49:50,
197
- # ..., 2016-02-22T16:57:04
198
- # Then, on the server, while iterating through the points, we detected
199
- # a trip end at 16:04, and a new trip start at 16:49. But we did not
200
- # detect the trip end at 16:57, because we didn't start a new point.
201
- # This has two issues:
202
- # - we won't see this trip until the next trip start, which may be on the next day
203
- # - we won't see this trip at all, because when we run the pipeline the
204
- # next time, we will only look at points from that time onwards. These
205
- # points have been marked as "processed", so they won't even be considered.
206
-
207
- # There are multiple potential fixes:
208
- # - we can mark only the completed trips as processed. This will solve (2) above, but not (1)
209
- # - we can mark a trip end based on the fact that we only push data
210
- # when a trip ends, so if we have data, it means that the trip has been
211
- # detected as ended on the phone.
212
- # This seems a bit fragile - what if we start pushing incomplete trip
213
- # data for efficiency reasons? Therefore, we also check to see if there
214
- # is a trip_end_detected in this timeframe after the last point. If so,
215
- # then we end the trip at the last point that we have.
216
- if not just_ended and len (self .transition_df ) > 0 :
217
- with ect .Timer () as t_check_transitions :
218
- stopped_moving_after_last = self .transition_df [
219
- (self .transition_df .ts > currPoint .ts ) & (self .transition_df .transition == 2 )
220
- ]
221
- logging .debug ("stopped_moving_after_last = %s" % stopped_moving_after_last [["fmt_time" , "transition" ]])
222
- if len (stopped_moving_after_last ) > 0 :
223
- logging .debug ("Found %d transitions after last point, ending trip..." % len (stopped_moving_after_last ))
224
- segmentation_points .append ((curr_trip_start_point , currPoint ))
225
- self .last_ts_processed = currPoint .metadata_write_ts
226
- else :
227
- logging .debug ("Found %d transitions after last point, not ending trip..." % len (stopped_moving_after_last ))
228
- esds .store_pipeline_time (
229
- user_id ,
230
- ecwp .PipelineStages .TRIP_SEGMENTATION .name + "/segment_into_trips_dist/check_transitions_post_loop" ,
231
- time .time (),
232
- t_check_transitions .elapsed
233
- )
234
- esds .store_pipeline_time (
235
- user_id ,
236
- ecwp .PipelineStages .TRIP_SEGMENTATION .name + "/segment_into_trips_dist/post_loop" ,
237
- time .time (),
238
- t_post_loop .elapsed
239
- )
140
+
141
+ # Since we only end a trip when we start a new trip, this means that
142
+ # the last trip that was pushed is ignored. Consider the example of
143
+ # 2016-02-22 when I took kids to karate. We arrived shortly after 4pm,
144
+ # so during that remote push, a trip end was not detected. And we got
145
+ # back home shortly after 5pm, so the trip end was only detected on the
146
+ # phone at 6pm. At that time, the following points were pushed:
147
+ # ..., 2016-02-22T16:04:02, 2016-02-22T16:49:34, 2016-02-22T16:49:50,
148
+ # ..., 2016-02-22T16:57:04
149
+ # Then, on the server, while iterating through the points, we detected
150
+ # a trip end at 16:04, and a new trip start at 16:49. But we did not
151
+ # detect the trip end at 16:57, because we didn't start a new point.
152
+ # This has two issues:
153
+ # - we won't see this trip until the next trip start, which may be on the next day
154
+ # - we won't see this trip at all, because when we run the pipeline the
155
+ # next time, we will only look at points from that time onwards. These
156
+ # points have been marked as "processed", so they won't even be considered.
157
+
158
+ # There are multiple potential fixes:
159
+ # - we can mark only the completed trips as processed. This will solve (2) above, but not (1)
160
+ # - we can mark a trip end based on the fact that we only push data
161
+ # when a trip ends, so if we have data, it means that the trip has been
162
+ # detected as ended on the phone.
163
+ # This seems a bit fragile - what if we start pushing incomplete trip
164
+ # data for efficiency reasons? Therefore, we also check to see if there
165
+ # is a trip_end_detected in this timeframe after the last point. If so,
166
+ # then we end the trip at the last point that we have.
167
+ if not just_ended and len (self .transition_df ) > 0 :
168
+ stopped_moving_after_last = self .transition_df [
169
+ (self .transition_df .ts > currPoint .ts ) & (self .transition_df .transition == 2 )
170
+ ]
171
+ logging .debug ("stopped_moving_after_last = %s" % stopped_moving_after_last [["fmt_time" , "transition" ]])
172
+ if len (stopped_moving_after_last ) > 0 :
173
+ logging .debug ("Found %d transitions after last point, ending trip..." % len (stopped_moving_after_last ))
174
+ segmentation_points .append ((curr_trip_start_point , currPoint ))
175
+ self .last_ts_processed = currPoint .metadata_write_ts
176
+ else :
177
+ logging .debug ("Found %d transitions after last point, not ending trip..." % len (stopped_moving_after_last ))
240
178
241
179
return segmentation_points
242
180
0 commit comments