Skip to content

Commit 9993235

Browse files
committed
Updated as_vector to catch cases where padding was poorly defined. Added new test cases where binary representation was invalid.
1 parent 8a94de1 commit 9993235

File tree

3 files changed

+38
-17
lines changed

3 files changed

+38
-17
lines changed

bson/binary.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,11 @@ def as_vector(self) -> BinaryVector:
490490
dtype = BinaryVectorDtype(dtype)
491491
n_values = len(self) - position
492492

493+
if padding and dtype != BinaryVectorDtype.PACKED_BIT:
494+
raise ValueError(
495+
f"Corrupt data. Padding ({padding}) must be 0 for all but PACKED_BIT dtypes. ({dtype=})"
496+
)
497+
493498
if dtype == BinaryVectorDtype.INT8:
494499
dtype_format = "b"
495500
format_string = f"<{n_values}{dtype_format}"
@@ -510,6 +515,10 @@ def as_vector(self) -> BinaryVector:
510515

511516
elif dtype == BinaryVectorDtype.PACKED_BIT:
512517
# data packed as uint8
518+
if padding and not n_values:
519+
raise ValueError("Corrupt data. Vector has a padding P, but no data.")
520+
if padding > 7 or padding < 0:
521+
raise ValueError(f"Corrupt data. Padding ({padding}) must be between 0 and 7.")
513522
dtype_format = "B"
514523
format_string = f"<{n_values}{dtype_format}"
515524
unpacked_uint8s = list(struct.unpack_from(format_string, self, position))

test/bson_binary_vector/packed_bit.json

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,22 @@
2121
"canonical_bson": "1600000005766563746F7200040000000910007F0700"
2222
},
2323
{
24-
"description": "Empty Vector PACKED_BIT",
24+
"description": "PACKED_BIT with padding",
2525
"valid": true,
26-
"vector": [],
26+
"vector": [127, 8],
2727
"dtype_hex": "0x10",
2828
"dtype_alias": "PACKED_BIT",
29-
"padding": 0,
30-
"canonical_bson": "1400000005766563746F72000200000009100000"
29+
"padding": 3,
30+
"canonical_bson": "1600000005766563746F7200040000000910037F0800"
3131
},
3232
{
33-
"description": "PACKED_BIT with padding",
33+
"description": "Empty Vector PACKED_BIT",
3434
"valid": true,
35-
"vector": [127, 7],
35+
"vector": [],
3636
"dtype_hex": "0x10",
3737
"dtype_alias": "PACKED_BIT",
38-
"padding": 3,
39-
"canonical_bson": "1600000005766563746F7200040000000910037F0700"
38+
"padding": 0,
39+
"canonical_bson": "1400000005766563746F72000200000009100000"
4040
},
4141
{
4242
"description": "Overflow Vector PACKED_BIT",

test/test_bson_binary_vector.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def create_test(case_spec):
4848
def run_test(self):
4949
for test_case in case_spec.get("tests", []):
5050
description = test_case["description"]
51-
vector_exp = test_case.get("vector", [])
51+
vector_exp = test_case.get("vector")
5252
dtype_hex_exp = test_case["dtype_hex"]
5353
dtype_alias_exp = test_case.get("dtype_alias")
5454
padding_exp = test_case.get("padding", 0)
@@ -85,14 +85,26 @@ def run_test(self):
8585
self.assertEqual(cB_obs, canonical_bson_exp, description)
8686

8787
else:
88-
with self.assertRaises((struct.error, ValueError), msg=description):
89-
# Tests Binary.from_vector
90-
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
91-
# Tests Binary.as_vector
92-
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
93-
decoded_doc = decode(cB_exp)
94-
binary_obs = decoded_doc[test_key]
95-
binary_obs.as_vector()
88+
"""
89+
#### To prove correct in an invalid case (`valid:false`), one MUST
90+
- (encoding case) if the vector field is present, raise an exception
91+
when attempting to encode a document from the numeric values,dtype, and padding.
92+
- (decoding case) if the canonical_bson field is present, raise an exception
93+
when attempting to deserialize it into the corresponding
94+
numeric values, as the field contains corrupted data.
95+
"""
96+
# Tests Binary.from_vector()
97+
if vector_exp is not None:
98+
with self.assertRaises((struct.error, ValueError), msg=description):
99+
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
100+
101+
# Tests Binary.as_vector()
102+
if canonical_bson_exp is not None:
103+
with self.assertRaises((struct.error, ValueError), msg=description):
104+
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
105+
decoded_doc = decode(cB_exp)
106+
binary_obs = decoded_doc[test_key]
107+
binary_obs.as_vector()
96108

97109
return run_test
98110

0 commit comments

Comments
 (0)