|
1 |
| -from __future__ import print_function |
2 |
| -from __future__ import unicode_literals |
3 |
| -from __future__ import division |
4 |
| -from __future__ import absolute_import |
5 |
| -# Exports all data for the particular user for the particular day |
6 |
| -# Used for debugging issues with trip and section generation |
7 |
| -from future import standard_library |
8 |
| -standard_library.install_aliases() |
9 |
| -from builtins import * |
10 |
| -import sys |
11 | 1 | import logging
|
12 | 2 | logging.basicConfig(level=logging.DEBUG)
|
13 | 3 | import gzip
|
14 | 4 |
|
15 | 5 | import uuid
|
16 |
| -import datetime as pydt |
17 | 6 | import json
|
18 | 7 | import bson.json_util as bju
|
19 |
| -import arrow |
20 |
| -import argparse |
21 | 8 |
|
22 |
| -import emission.core.wrapper.user as ecwu |
23 | 9 | import emission.storage.timeseries.abstract_timeseries as esta
|
24 |
| -import emission.storage.timeseries.timequery as estt |
25 |
| -import emission.storage.decorations.user_queries as esdu |
26 |
| -import emission.storage.timeseries.cache_series as estcs |
27 |
| -# only needed to read the motion_activity |
28 |
| -# https://github.com/e-mission/e-mission-docs/issues/356#issuecomment-520630934 |
29 |
| -import emission.net.usercache.abstract_usercache as enua |
| 10 | +#import emission.storage.timeseries.cache_series as estcs |
30 | 11 |
|
31 |
| - |
32 |
| -def export(loc_time_query, trip_time_query, place_time_query, ma_entry_list, user_id, file_name): |
33 |
| - ts = esta.TimeSeries.get_time_series(user_id) |
34 |
| - loc_entry_list = list(estcs.find_entries(user_id, key_list=None, time_query=loc_time_query)) |
35 |
| - trip_entry_list = list(ts.find_entries(key_list=None, time_query=trip_time_query)) |
36 |
| - place_entry_list = list(ts.find_entries(key_list=None, time_query=place_time_query)) |
37 |
| - first_place_extra_query = {'$and': [{'data.enter_ts': {'$exists': False}}, |
38 |
| - {'data.exit_ts': {'$exists': True}}]} |
39 |
| - first_place_entry_list = list(ts.find_entries(key_list=None, time_query=None, extra_query_list=[first_place_extra_query])) |
40 |
| - logging.info("First place entry list = %s" % first_place_entry_list) |
| 12 | +def export(loc_entry_list, trip_entry_list, place_entry_list, ma_entry_list, user_id, file_name, ts): |
| 13 | + first_place_extra_query = {'$and': [{'data.enter_ts': {'$exists': False}},{'data.exit_ts': {'$exists': True}}]} |
| 14 | + first_place_entry_list = list(ts.find_entries(key_list=None, time_query=None, extra_query_list=[first_place_extra_query])) |
| 15 | + logging.info("First place entry list = %s" % first_place_entry_list) |
41 | 16 | combined_list = ma_entry_list + loc_entry_list + trip_entry_list + place_entry_list + first_place_entry_list
|
42 | 17 |
|
43 | 18 | logging.info("Found %d loc entries, %d motion entries, %d trip-like entries, %d place-like entries = %d total entries" %
|
44 | 19 | (len(loc_entry_list), len(ma_entry_list), len(trip_entry_list), len(place_entry_list), len(combined_list)))
|
45 |
| - |
46 |
| - validate_truncation(loc_entry_list, trip_entry_list, place_entry_list) |
47 |
| - |
48 |
| - unique_key_list = set([e["metadata"]["key"] for e in combined_list]) |
49 |
| - logging.info("timeline has unique keys = %s" % unique_key_list) |
50 |
| - if len(combined_list) == 0 or unique_key_list == set(['stats/pipeline_time']): |
51 |
| - logging.info("No entries found in range for user %s, skipping save" % user_id) |
52 |
| - else: |
53 |
| - # Also dump the pipeline state, since that's where we have analysis results upto |
54 |
| - # This allows us to copy data to a different *live system*, not just |
55 |
| - # duplicate for analysis |
56 |
| - combined_filename = "%s_%s.gz" % (file_name, user_id) |
57 |
| - with gzip.open(combined_filename, "wt") as gcfd: |
58 |
| - json.dump(combined_list,gcfd, default=bju.default, allow_nan=False, indent=4) |
59 |
| - |
60 |
| - import emission.core.get_database as edb |
61 |
| - pipeline_state_list = list(edb.get_pipeline_state_db().find({"user_id": user_id})) |
62 |
| - logging.info("Found %d pipeline states %s" % |
63 |
| - (len(pipeline_state_list), |
64 |
| - list([ps["pipeline_stage"] for ps in pipeline_state_list]))) |
65 |
| - |
66 |
| - pipeline_filename = "%s_pipelinestate_%s.gz" % (file_name, user_id) |
67 |
| - with gzip.open(pipeline_filename, "wt") as gpfd: |
68 |
| - json.dump(pipeline_state_list, |
69 |
| - gpfd, default=bju.default, allow_nan=False, indent=4) |
| 20 | + validate_truncation(loc_entry_list, trip_entry_list, place_entry_list) |
| 21 | + |
| 22 | + unique_key_list = set([e["metadata"]["key"] for e in combined_list]) |
| 23 | + logging.info("timeline has unique keys = %s" % unique_key_list) |
| 24 | + if len(combined_list) == 0 or unique_key_list == set(['stats/pipeline_time']): |
| 25 | + logging.info("No entries found in range for user %s, skipping save" % user_id) |
| 26 | + else: |
| 27 | + # Also dump the pipeline state, since that's where we have analysis results upto |
| 28 | + # This allows us to copy data to a different *live system*, not just |
| 29 | + # duplicate for analysis |
| 30 | + combined_filename = "%s_%s.gz" % (file_name, user_id) |
| 31 | + with gzip.open(combined_filename, "wt") as gcfd: |
| 32 | + json.dump(combined_list,gcfd, default=bju.default, allow_nan=False, indent=4) |
70 | 33 |
|
71 | 34 | def validate_truncation(loc_entry_list, trip_entry_list, place_entry_list):
|
72 | 35 | MAX_LIMIT = 25 * 10000
|
|
0 commit comments