e-mission
diff --git a/‎.github/workflows/test-with-manual-install.yml
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/test-with-manual-install.yml
Lines changed: 3 additions & 1 deletion
diff --git a/‎Timeseries_Sample.ipynb
Lines changed: 11 additions & 11 deletions b/‎Timeseries_Sample.ipynb
Lines changed: 11 additions & 11 deletions
diff --git a/‎bin/debug/save_ground_truth.py
Lines changed: 1 addition & 1 deletion b/‎bin/debug/save_ground_truth.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎bin/historical/migrations/_common.py
Lines changed: 10 additions & 2 deletions b/‎bin/historical/migrations/_common.py
Lines changed: 10 additions & 2 deletions
diff --git a/‎bin/historical/migrations/add_sections_and_summaries_to_trips.py
Lines changed: 3 additions & 3 deletions b/‎bin/historical/migrations/add_sections_and_summaries_to_trips.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎bin/historical/migrations/inactive.py
Lines changed: 83 additions & 0 deletions b/‎bin/historical/migrations/inactive.py
Lines changed: 83 additions & 0 deletions
@@ -26,6 +26,8 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
+    env:
+      USE_HINTS: True
 
     # Steps represent a sequence of tasks that will be executed as part of the job
     steps:
@@ -35,7 +37,7 @@ jobs:
     - name: Install and start MongoDB
       uses: supercharge/[email protected]
       with:
-        mongodb-version: 4.4.0
+        mongodb-version: 8.0.4
 
     - name: Check existing version of miniconda
       shell: bash -l {0}
 
@@ -135,7 +135,7 @@
    "outputs": [],
    "source": [
     "# Get all cleaned trips for the first user\n",
-    "entry_it = ts.find_entries([\"analysis/cleaned_trip\"], time_query=None)"
+    "entries = ts.find_entries([\"analysis/cleaned_trip\"], time_query=None)"
    ]
   },
   {
@@ -152,11 +152,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for ct in entry_it:\n",
+    "for ct in entries:\n",
     "    cte = ecwe.Entry(ct)\n",
     "    print(\"=== Trip:\", cte.data.start_loc, \"->\", cte.data.end_loc)\n",
-    "    section_it = esdt.get_sections_for_trip(\"analysis/cleaned_section\", test_user_id, cte.get_id())\n",
-    "    for sec in section_it:\n",
+    "    sections = esdt.get_sections_for_trip(\"analysis/cleaned_section\", test_user_id, cte.get_id())\n",
+    "    for sec in sections:\n",
     "        print(\"  --- Section:\", sec.data.start_loc, \"->\", sec.data.end_loc, \" on \", sec.data.sensed_mode)"
    ]
   },
@@ -169,7 +169,7 @@
    "outputs": [],
    "source": [
     "# Get all cleaned trips for the second user\n",
-    "entry_it = ts_2.find_entries([\"analysis/cleaned_trip\"], time_query=None)"
+    "entries = ts_2.find_entries([\"analysis/cleaned_trip\"], time_query=None)"
    ]
   },
   {
@@ -178,11 +178,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for ct in entry_it:\n",
+    "for ct in entries:\n",
     "    cte = ecwe.Entry(ct)\n",
     "    print(\"=== Trip:\", cte.data.start_loc, \"->\", cte.data.end_loc)\n",
-    "    section_it = esdt.get_sections_for_trip(\"analysis/cleaned_section\", test_user_id, cte.get_id())\n",
-    "    for sec in section_it:\n",
+    "    sections = esdt.get_sections_for_trip(\"analysis/cleaned_section\", test_user_id, cte.get_id())\n",
+    "    for sec in sections:\n",
     "        print(\"  --- Section:\", sec.data.start_loc, \"->\", sec.data.end_loc, \" on \", sec.data.sensed_mode)"
    ]
   },
@@ -198,10 +198,10 @@
     "aug_1_tq = estt.TimeQuery(\"data.start_ts\",\n",
     "                          arrow.get(\"2017-08-01\").timestamp, # start of range\n",
     "                          arrow.get(\"2017-08-02\").timestamp)  # end of range\n",
-    "entry_it = ts.find_entries([\"analysis/cleaned_trip\"], time_query=aug_1_tq)\n",
-    "entry_it_2 = ts_2.find_entries([\"analysis/cleaned_trip\"], time_query=aug_1_tq)\n",
+    "entries = ts.find_entries([\"analysis/cleaned_trip\"], time_query=aug_1_tq)\n",
+    "entries2 = ts_2.find_entries([\"analysis/cleaned_trip\"], time_query=aug_1_tq)\n",
     "print(\"From %s -> %s, user %s had %d trips and user %s had %d trips\" %\n",
-    "     (aug_1_tq.startTs, aug_1_tq.endTs, test_user_id, len(list(entry_it)), test_user_id_2, len(list(entry_it_2))))"
+    "     (aug_1_tq.startTs, aug_1_tq.endTs, test_user_id, len(entries), test_user_id_2, len(entries_2)))"
    ]
   },
   {
 
@@ -18,7 +18,7 @@ def save_diary(args):
 def save_ct_list(args):
     print("Saving confirmed trip list for %s to file %s" % (args.sel_uuid, args.file_name))
     ts = esta.TimeSeries.get_time_series(args.sel_uuid)
-    analysis_objects = list(ts.find_entries(args.key_list, None))
+    analysis_objects = ts.find_entries(args.key_list, None)
     print("Retrieved object is of length %s" % len(analysis_objects))
     json.dump(analysis_objects, open(args.file_name, "w"), indent=4, default=esj.wrapped_default)
 
 
@@ -26,17 +26,25 @@
     ]
 print(f"PROD_LIST: {PROD_LIST}")
 
-def run_on_all_deployments(fn_to_run):
+def run_on_all_deployments(fn_to_run, *args):
     """
     Run the given function on the database for each deployment by setting the
       DB_HOST environment variable in between each function call.
     The list of deployments (PROD_LIST) is retrieved from the
       nrel-openpath-deploy-configs repo upon initialization of this module.
     """
+    print(f'About to run {fn_to_run.__name__}{args} on {len(PROD_LIST)} deployments. Proceed? [y/n]')
+    if input() != 'y':
+        print("Aborting")
+        return
     for prod in PROD_LIST:
+        # e-bikes-for-essentials has a typo; treat as special case
+        if prod == 'e-bikes-for-essentials':
+            prod = 'ebikes-for-essentials'
+            
         prod_db_name = prod.replace("-", "_")
         print(f"Running {fn_to_run.__name__} for {prod} on DB {prod_db_name}")
         os.environ['DB_HOST'] = DB_HOST_TEMPLATE.replace(
             "REPLACEME", prod_db_name)
         importlib.reload(edb)
-        fn_to_run()
+        fn_to_run(*args)
@@ -33,9 +33,9 @@ def add_sections_to_trips(process_number, uuid_list, skip_if_no_new_data):
 
 def add_sections_to_trips_for_user(uuid):
     ts = esta.TimeSeries.get_time_series(uuid)
-    cleaned_trips = list(ts.find_entries([esda.CLEANED_TRIP_KEY]))
-    confirmed_trips = list(ts.find_entries([esda.CONFIRMED_TRIP_KEY]))
-    composite_trips =  list(ts.find_entries([esda.COMPOSITE_TRIP_KEY]))
+    cleaned_trips = ts.find_entries([esda.CLEANED_TRIP_KEY])
+    confirmed_trips = ts.find_entries([esda.CONFIRMED_TRIP_KEY])
+    composite_trips =  ts.find_entries([esda.COMPOSITE_TRIP_KEY])
     cleaned_trips_map = dict((t["_id"], t) for t in cleaned_trips)
     composite_trips_map = dict((t["data"]["confirmed_trip"], t) for t in composite_trips)
     # This script is slow due to DB queries
 
@@ -0,0 +1,83 @@
+import arrow
+import pymongo
+import emission.core.get_database as edb
+import emission.storage.timeseries.abstract_timeseries as esta
+import bin.debug.common as common
+from _common import run_on_all_deployments
+
+NOW_SECONDS = arrow.now().timestamp()
+
+def find_inactive_uuids(uuids_entries, threshold):
+    inactive_uuids = []
+    for u in uuids_entries:
+        print(f'Checking activity for user {u["uuid"]}')
+        profile_data = edb.get_profile_db().find_one({'user_id': u})
+        ts = esta.TimeSeries.get_time_series(u['uuid'])
+
+        if profile_data:
+            last_call_ts = profile_data.get('last_call_ts')
+        else:
+            last_call_ts = ts.get_first_value_for_field(
+                key='stats/server_api_time',
+                field='data.ts',
+                sort_order=pymongo.DESCENDING
+            )
+
+        print(f'for user {u["uuid"]}, last call was {last_call_ts}')
+        if last_call_ts > NOW_SECONDS - threshold:
+            continue
+
+        if profile_data:
+            last_loc_ts = profile_data.get('last_loc_ts')
+        else:
+            last_loc_ts = ts.get_first_value_for_field(
+                key='background/location',
+                field='data.ts',
+                sort_order=pymongo.DESCENDING
+            )
+
+        print(f'for user {u["uuid"]}, last location was {last_loc_ts}')
+        if last_loc_ts > NOW_SECONDS - threshold:
+            continue
+
+        print(f'User {u["uuid"]} is inactive')
+        inactive_uuids.append(u['uuid'])
+
+    return inactive_uuids
+
+def purge_users(uuids):
+    print(f'About to remove {len(uuids)} users. Proceed? [y/n]')
+    if input() != 'y':
+        print("Aborting")
+        return
+    for u in uuids:
+        print(f'Purging user {u}')
+        common.purge_entries_for_user(u, True)
+
+def start_inactive(threshold_s, purge):
+    total_users = edb.get_uuid_db().count_documents({})
+    print(f'Total users: {total_users}')
+    uuids_entries = edb.get_uuid_db().find()
+    print('Finding inactive users...')
+    inactive_uuids = find_inactive_uuids(uuids_entries, threshold_s)
+    print(f'Of {total_users} users, found {len(inactive_uuids)} inactive users:')
+    print(inactive_uuids)
+
+    if purge:
+        purge_users(inactive_uuids)
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser(
+        prog='inactive_users',
+        description='Identify and perform actions on inactive users'
+    )
+    parser.add_argument('-t', '--threshold', help='amount of time in days that defines an inactive user') 
+    parser.add_argument('-p', '--purge', action='store_true', help='purge inactive users')
+    args = parser.parse_args()
+
+    threshold_s = 60 * 60 * 24 * int(args.threshold)
+
+    run_on_all_deployments(start_inactive, threshold_s, args.purge)
+
+