@@ -165,15 +165,20 @@ def test_generate_dupes_with_duplicates():
165165 ["2" , "Bob" , "31 Acacia Avenue" , "W1A 1AA" ],
166166 ["3" , "Charlotte" , "33 Acacia Avenue" , "W1A 1AA" ],
167167 ["4" , "David" , "33 Acacia Avenue" , "W1B 1BB" ],
168+ ["5" , "Edward" , "33 Zoological Street" , "Z1Z 1ZZ" ],
169+ ]
170+ people_selected_rows = [
171+ ["id" , "name" , "address_line_1" , "postcode" ],
172+ ["11" , "Zoe" , "33 Zoological Street" , "Z1Z 1ZZ" ],
168173 ]
169174 settings = Settings (
170175 id_column = "id" ,
171176 columns_to_keep = ["name" ],
172177 check_same_address = True ,
173178 check_same_address_columns = ["address_line_1" , "postcode" ],
174179 )
175- dupes = generate_dupes (people_remaining_rows , settings )
176- assert dupes == [1 , 3 ]
180+ dupes = generate_dupes (people_remaining_rows , people_selected_rows , settings )
181+ assert dupes == [1 , 3 , 5 ]
177182
178183
179184def test_generate_dupes_no_duplicates ():
@@ -186,13 +191,17 @@ def test_generate_dupes_no_duplicates():
186191 ["2" , "Bob" , "31 Acacia Avenue" , "W1A 1BB" ],
187192 ["3" , "Charlotte" , "35 Acacia Avenue" , "W1A 1CC" ],
188193 ]
194+ people_selected_rows = [
195+ ["id" , "name" , "address_line_1" , "postcode" ],
196+ ["11" , "Zoe" , "33 Zoological Street" , "Z1Z 1ZZ" ],
197+ ]
189198 settings = Settings (
190199 id_column = "id" ,
191200 columns_to_keep = ["name" ],
192201 check_same_address = True ,
193202 check_same_address_columns = ["address_line_1" , "postcode" ],
194203 )
195- dupes = generate_dupes (people_remaining_rows , settings )
204+ dupes = generate_dupes (people_remaining_rows , people_selected_rows , settings )
196205 assert dupes == []
197206
198207
@@ -204,14 +213,19 @@ def test_generate_dupes_check_disabled():
204213 ["id" , "name" , "address_line_1" , "postcode" ],
205214 ["1" , "Alice" , "33 Acacia Avenue" , "W1A 1AA" ],
206215 ["2" , "Bob" , "33 Acacia Avenue" , "W1A 1AA" ],
216+ ["5" , "Edward" , "33 Zoological Street" , "Z1Z 1ZZ" ],
217+ ]
218+ people_selected_rows = [
219+ ["id" , "name" , "address_line_1" , "postcode" ],
220+ ["11" , "Zoe" , "33 Zoological Street" , "Z1Z 1ZZ" ],
207221 ]
208222 settings = Settings (
209223 id_column = "id" ,
210224 columns_to_keep = ["name" ],
211225 check_same_address = False ,
212226 check_same_address_columns = ["address_line_1" , "postcode" ],
213227 )
214- dupes = generate_dupes (people_remaining_rows , settings )
228+ dupes = generate_dupes (people_remaining_rows , people_selected_rows , settings )
215229 assert dupes == []
216230
217231
@@ -227,13 +241,17 @@ def test_generate_dupes_multiple_groups():
227241 ["4" , "David" , "15 Oak Street" , "W2B 2BB" ],
228242 ["5" , "Eve" , "99 Pine Road" , "W3C 3CC" ],
229243 ]
244+ people_selected_rows = [
245+ ["id" , "name" , "address_line_1" , "postcode" ],
246+ ["11" , "Zoe" , "33 Zoological Street" , "Z1Z 1ZZ" ],
247+ ]
230248 settings = Settings (
231249 id_column = "id" ,
232250 columns_to_keep = ["name" ],
233251 check_same_address = True ,
234252 check_same_address_columns = ["address_line_1" , "postcode" ],
235253 )
236- dupes = generate_dupes (people_remaining_rows , settings )
254+ dupes = generate_dupes (people_remaining_rows , people_selected_rows , settings )
237255 assert dupes == [1 , 2 , 3 , 4 ]
238256
239257
@@ -247,13 +265,17 @@ def test_generate_dupes_three_at_same_address():
247265 ["2" , "Bob" , "33 Acacia Avenue" , "W1A 1AA" ],
248266 ["3" , "Charlotte" , "33 Acacia Avenue" , "W1A 1AA" ],
249267 ]
268+ people_selected_rows = [
269+ ["id" , "name" , "address_line_1" , "postcode" ],
270+ ["11" , "Zoe" , "33 Acacia Avenue" , "W1A 1AA" ],
271+ ]
250272 settings = Settings (
251273 id_column = "id" ,
252274 columns_to_keep = ["name" ],
253275 check_same_address = True ,
254276 check_same_address_columns = ["address_line_1" , "postcode" ],
255277 )
256- dupes = generate_dupes (people_remaining_rows , settings )
278+ dupes = generate_dupes (people_remaining_rows , people_selected_rows , settings )
257279 assert dupes == [1 , 2 , 3 ]
258280
259281
@@ -266,15 +288,21 @@ def test_generate_dupes_single_address_column():
266288 ["1" , "Alice" , "W1A 1AA" ],
267289 ["2" , "Bob" , "W1A 1AA" ],
268290 ["3" , "Charlotte" , "W1B 1BB" ],
291+ ["4" , "David" , "Z1Z 1ZZ" ],
292+ ["5" , "Edwina" , "W1C 1CC" ],
293+ ]
294+ people_selected_rows = [
295+ ["id" , "name" , "postcode" ],
296+ ["11" , "Zoe" , "Z1Z 1ZZ" ],
269297 ]
270298 settings = Settings (
271299 id_column = "id" ,
272300 columns_to_keep = ["name" ],
273301 check_same_address = True ,
274302 check_same_address_columns = ["postcode" ],
275303 )
276- dupes = generate_dupes (people_remaining_rows , settings )
277- assert dupes == [1 , 2 ]
304+ dupes = generate_dupes (people_remaining_rows , people_selected_rows , settings )
305+ assert dupes == [1 , 2 , 4 ]
278306
279307
280308def test_generate_dupes_ignores_non_address_columns ():
@@ -287,13 +315,17 @@ def test_generate_dupes_ignores_non_address_columns():
287315 ["2" , "Bob" , "33 Acacia Avenue" , "W1A 1AA" , "456" ],
288316 ["3" , "Charlotte" , "35 Acacia Avenue" , "W1B 1BB" , "789" ],
289317 ]
318+ people_selected_rows = [
319+ ["id" , "name" , "address_line_1" , "postcode" , "phone" ],
320+ ["11" , "Zoe" , "33 Zoological Street" , "Z1Z 1ZZ" , "789" ],
321+ ]
290322 settings = Settings (
291323 id_column = "id" ,
292324 columns_to_keep = ["name" ],
293325 check_same_address = True ,
294326 check_same_address_columns = ["address_line_1" , "postcode" ],
295327 )
296- dupes = generate_dupes (people_remaining_rows , settings )
328+ dupes = generate_dupes (people_remaining_rows , people_selected_rows , settings )
297329 # Alice and Bob have same address_line_1 and postcode, despite different phone
298330 assert dupes == [1 , 2 ]
299331
@@ -303,13 +335,14 @@ def test_generate_dupes_only_header_row():
303335 Test that generate_dupes returns empty list when only header row is present.
304336 """
305337 people_remaining_rows = [["id" , "name" , "address_line_1" , "postcode" ]]
338+ people_selected_rows = [["id" , "name" , "address_line_1" , "postcode" ]]
306339 settings = Settings (
307340 id_column = "id" ,
308341 columns_to_keep = ["name" ],
309342 check_same_address = True ,
310343 check_same_address_columns = ["address_line_1" , "postcode" ],
311344 )
312- dupes = generate_dupes (people_remaining_rows , settings )
345+ dupes = generate_dupes (people_remaining_rows , people_selected_rows , settings )
313346 assert dupes == []
314347
315348
@@ -323,13 +356,17 @@ def test_generate_dupes_partial_address_match():
323356 ["2" , "Bob" , "33 Acacia Avenue" , "W1B 1BB" ],
324357 ["3" , "Charlotte" , "31 Acacia Avenue" , "W1A 1AA" ],
325358 ]
359+ people_selected_rows = [
360+ ["id" , "name" , "address_line_1" , "postcode" ],
361+ ["11" , "Zoe" , "33 Zoological Street" , "Z1Z 1ZZ" ],
362+ ]
326363 settings = Settings (
327364 id_column = "id" ,
328365 columns_to_keep = ["name" ],
329366 check_same_address = True ,
330367 check_same_address_columns = ["address_line_1" , "postcode" ],
331368 )
332- dupes = generate_dupes (people_remaining_rows , settings )
369+ dupes = generate_dupes (people_remaining_rows , people_selected_rows , settings )
333370 assert dupes == []
334371
335372
0 commit comments