1
- # This tests the label inference pipeline. It uses real data and placeholder inference algorithms
2
1
import unittest
3
2
import numpy as np
4
3
import time
4
+ import logging
5
+ import bson .objectid as boi
5
6
import emission .analysis .classification .inference .labels .pipeline as eacilp
6
7
import emission .analysis .classification .inference .labels .inferrers as eacili
7
8
import emission .core .wrapper .labelprediction as ecwl
11
12
import emission .core .get_database as edb
12
13
import emission .tests .common as etc
13
14
import emission .pipeline .intake_stage as epi
14
- import logging
15
- import bson .objectid as boi
16
-
17
15
import emission .analysis .modelling .trip_model .config as eamtc
18
-
19
16
import emission .analysis .modelling .trip_model .run_model as eamur
20
17
import emission .analysis .modelling .trip_model .model_type as eamumt
21
18
import emission .analysis .modelling .trip_model .model_storage as eamums
22
19
import emission .tests .modellingTests .modellingTestAssets as etmm
23
20
import emission .storage .timeseries .abstract_timeseries as esta
24
21
25
-
26
22
class TestForestModelIntegration (unittest .TestCase ):
27
- # Test if the forest model for label prediction is smoothly integrated with the inference pipeline.
28
- # In the initial setup, build a dummy forest model. Then run the pipeline on real example data.
29
- # Finally in the test, assert the type of label predictions expected.
30
-
23
+ """
24
+ This tests the label inference pipeline. It uses real data and placeholder inference algorithms.
25
+ Test if the forest model for label prediction is smoothly integrated with the inference pipeline.
26
+ In the initial setup, build a dummy forest model. Then run the pipeline on real example data.
27
+ Finally in the test, assert the type of label predictions expected.
28
+ The label_data dict and mock_trip_data are copied over from TestRunGreedyModel.py
29
+ """
31
30
def setUp (self ):
32
31
np .random .seed (91 )
33
32
self .test_algorithms = eacilp .primary_algorithms
34
33
forest_model_config = eamtc .get_config_value_or_raise ('model_parameters.forest' )
35
-
36
34
etc .setupRealExample (self , "emission/tests/data/real_examples/shankari_2015-07-22" ) ##maybe use a different file
37
35
ts = esta .TimeSeries .get_time_series (self .testUUID )
36
+
37
+ # Generate labels with a known sample weight that we can rely on in the test
38
38
label_data = {
39
39
"mode_confirm" : ['ebike' , 'bike' ],
40
40
"purpose_confirm" : ['happy-hour' , 'dog-park' ],
@@ -43,11 +43,10 @@ def setUp(self):
43
43
"purpose_weights" : [0.1 , 0.9 ]
44
44
}
45
45
46
- self .total_trips = 100
47
- ## generate mock trips
48
- train = etmm .generate_mock_trips (
46
+ # Configuration values for randomly-generated test data copied over from TestRunGreedyModel.py
47
+ mock_trip_data = etmm .generate_mock_trips (
49
48
user_id = self .testUUID ,
50
- trips = self . total_trips ,
49
+ trips = 100 ,
51
50
origin = (- 105.1705977 , 39.7402654 ),
52
51
destination = (- 105.1755606 , 39.7673075 ),
53
52
trip_part = 'od' ,
@@ -56,61 +55,80 @@ def setUp(self):
56
55
threshold = 0.004 , # ~400m
57
56
has_label_p = 0.9
58
57
)
59
- ## Required for Forest model inference
60
- for result_entry in train :
58
+
59
+ # Required for Forest model inference
60
+ for result_entry in mock_trip_data :
61
61
result_entry ['data' ]['start_local_dt' ]= result_entry ['metadata' ]['write_local_dt' ]
62
62
result_entry ['data' ]['end_local_dt' ]= result_entry ['metadata' ]['write_local_dt' ]
63
63
result_entry ['data' ]['start_place' ]= boi .ObjectId ()
64
64
result_entry ['data' ]['end_place' ]= boi .ObjectId ()
65
- ts . bulk_insert ( train )
66
- # confirm data write did not fail
67
- check_data = esda . get_entries ( key = "analysis/confirmed_trip" , user_id = self . testUUID , time_query = None )
68
- if len ( check_data ) != self . total_trips :
69
- logging . debug ( f'test invariant failed after generating test data' )
70
- self . fail ( )
71
- else :
72
- logging . debug ( f'found { self . total_trips } trips in database' )
73
- ## Build an already existing model or a new model
65
+
66
+ split = int ( len ( mock_trip_data ) * 0.7 )
67
+ mock_train_data = mock_trip_data [: split ]
68
+ self . mock_test_data = mock_trip_data [ split :]
69
+
70
+ ts . bulk_insert ( mock_train_data )
71
+
72
+ # Build and train model
73
+ logging . debug ( f'(TRAIN) creating a model based on trips in database' )
74
74
eamur .update_trip_model (
75
75
user_id = self .testUUID ,
76
76
model_type = eamumt .ModelType .RANDOM_FOREST_CLASSIFIER ,
77
77
model_storage = eamums .ModelStorage .DOCUMENT_DATABASE ,
78
- min_trips = 4 ,
78
+ min_trips = 14 ,
79
79
model_config = forest_model_config
80
80
)
81
- ## run inference pipeline
81
+
82
+ # Run inference pipeline
82
83
self .run_pipeline (self .test_algorithms )
83
84
time_range = estt .TimeQuery ("metadata.write_ts" , None , time .time ())
84
85
self .inferred_trips = esda .get_entries (esda .INFERRED_TRIP_KEY , self .testUUID , time_query = time_range )
85
86
86
87
def tearDown (self ):
87
- self . reset_all ( )
88
+ etc . dropAllCollections ( edb . _get_current_db () )
88
89
89
90
def run_pipeline (self , algorithms ):
90
91
default_primary_algorithms = eacilp .primary_algorithms
91
92
eacilp .primary_algorithms = algorithms
92
93
epi .run_intake_pipeline_for_user (self .testUUID ,skip_if_no_new_data = False )
93
94
eacilp .primary_algorithms = default_primary_algorithms
94
95
95
- def reset_all (self ):
96
- edb .get_analysis_timeseries_db ().delete_many ({'user_id' : self .testUUID })
97
- edb .get_model_db ().delete_many ({'user_id' : self .testUUID })
98
- edb .get_pipeline_state_db ().delete_many ({'user_id' : self .testUUID })
99
-
100
-
101
- # Tests that forest algorithm being tested runs successfully
102
96
def testForestAlgorithm (self ):
97
+ '''
98
+ Tests that forest algorithm runs successfully when called from the analysis pipeline
99
+ The tests are based on the existing tests in TestLabelInferencePipeline.py
100
+ '''
101
+ valid_modes = ['ebike' , 'bike' ]
102
+ valid_purposes = ['happy-hour' , 'dog-park' ]
103
+
103
104
for trip in self .inferred_trips :
104
105
entries = esdt .get_sections_for_trip ("inference/labels" , self .testUUID , trip .get_id ())
105
106
self .assertEqual (len (entries ), len (self .test_algorithms ))
106
107
for entry in entries :
107
- self .assertGreater (len (entry ["data" ]["prediction" ]), 0 )
108
+ # Test 1: Check that non-empty prediction list is generated
109
+ self .assertGreater (len (entry ["data" ]["prediction" ]), 0 , "Prediction list should not be empty - model failed to generate any predictions" )
110
+
111
+ # Test 2: Check for equality of trip inferred labels and prediction value in entry
112
+ self .assertEqual (trip ["data" ]["inferred_labels" ], entry ["data" ]["prediction" ])
113
+
114
+ # Test 3: Check that prediction value in entry is equal to the prediction generated by the algorithm
115
+ this_algorithm = ecwl .AlgorithmTypes (entry ["data" ]["algorithm_id" ])
116
+ self .assertIn (this_algorithm , self .test_algorithms )
117
+ self .assertEqual (entry ["data" ]["prediction" ], self .test_algorithms [this_algorithm ]([trip ])[0 ])
118
+
108
119
for singleprediction in entry ["data" ]["prediction" ]:
109
- self .assertIsInstance (singleprediction , dict , " should be an instance of the dictionary class" )
110
- self .assertIsInstance (singleprediction ['labels' ], dict , " should be an instance of the dictionary class" )
111
- self .assertIn ('mode_confirm' ,singleprediction ['labels' ].keys ())
112
- self .assertIn ('replaced_mode' ,singleprediction ['labels' ].keys ())
113
- self .assertIn ('purpose_confirm' ,singleprediction ['labels' ].keys ())
120
+ # Test 4: Check that the prediction is a dictionary
121
+ self .assertIsInstance (singleprediction , dict , "should be an instance of the dictionary class" )
122
+ self .assertIsInstance (singleprediction ['labels' ], dict , "should be an instance of the dictionary class" )
123
+
124
+ # Test 5: Check that the prediction dictionary contains the required keys
125
+ self .assertIn ('mode_confirm' , singleprediction ['labels' ].keys ())
126
+ self .assertIn ('replaced_mode' , singleprediction ['labels' ].keys ())
127
+ self .assertIn ('purpose_confirm' , singleprediction ['labels' ].keys ())
128
+
129
+ # Test 6: Check that the prediction dictionary contains the correct values
130
+ self .assertIn (singleprediction ['labels' ]['mode_confirm' ], valid_modes )
131
+ self .assertIn (singleprediction ['labels' ]['purpose_confirm' ], valid_purposes )
114
132
115
133
def main ():
116
134
etc .configLogging ()
0 commit comments