Skip to content

Commit 66ee73a

Browse files
committed
remove cast_mode option
1 parent 7fa0477 commit 66ee73a

File tree

8 files changed

+28
-204
lines changed

8 files changed

+28
-204
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,6 @@ Alternatively, you can use the following environment variables when starting pos
193193

194194
`pg_parquet` supports the following options in the `COPY FROM` command:
195195
- `format parquet`: you need to specify this option to read or write Parquet files which does not end with `.parquet[.<compression>]` extension,
196-
- `cast_mode <string>`: Specifies the casting behavior, which can be set to either `strict` or `relaxed`. This determines whether lossy conversions are allowed. By default, the mode is `strict`, which does not permit lossy conversions (e.g., `bigint => int` causes a schema mismatch error during schema validation). When set to `relaxed`, lossy conversions are allowed, and errors will only be raised at runtime if a value cannot be properly converted. This option provides flexibility to handle schema mismatches by deferring error checks to runtime.
197196

198197
## Configuration
199198
There is currently only one GUC parameter to enable/disable the `pg_parquet`:

src/arrow_parquet.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
pub(crate) mod arrow_to_pg;
22
pub(crate) mod arrow_utils;
3-
pub(crate) mod cast_mode;
43
pub(crate) mod compression;
54
pub(crate) mod parquet_reader;
65
pub(crate) mod parquet_writer;

src/arrow_parquet/cast_mode.rs

Lines changed: 0 additions & 22 deletions
This file was deleted.

src/arrow_parquet/parquet_reader.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ use crate::{
2424

2525
use super::{
2626
arrow_to_pg::{collect_arrow_to_pg_attribute_contexts, ArrowToPgAttributeContext},
27-
cast_mode::CastMode,
2827
schema_parser::{
2928
ensure_arrow_schema_match_tupledesc_schema, parse_arrow_schema_from_attributes,
3029
},
@@ -42,7 +41,7 @@ pub(crate) struct ParquetReaderContext {
4241
}
4342

4443
impl ParquetReaderContext {
45-
pub(crate) fn new(uri: Url, cast_mode: CastMode, tupledesc: &PgTupleDesc) -> Self {
44+
pub(crate) fn new(uri: Url, tupledesc: &PgTupleDesc) -> Self {
4645
// Postgis and Map contexts are used throughout reading the parquet file.
4746
// We need to reset them to avoid reading the stale data. (e.g. extension could be dropped)
4847
reset_postgis_context();
@@ -70,7 +69,6 @@ impl ParquetReaderContext {
7069
parquet_file_schema.clone(),
7170
tupledesc_schema.clone(),
7271
&attributes,
73-
cast_mode,
7472
);
7573

7674
let attribute_contexts = collect_arrow_to_pg_attribute_contexts(

src/arrow_parquet/schema_parser.rs

Lines changed: 15 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use arrow_schema::{DataType, FieldRef};
66
use parquet::arrow::{arrow_to_parquet_schema, PARQUET_FIELD_ID_META_KEY};
77
use pg_sys::{
88
can_coerce_type,
9-
CoercionContext::{self, COERCION_EXPLICIT, COERCION_IMPLICIT},
9+
CoercionContext::{self, COERCION_EXPLICIT},
1010
FormData_pg_attribute, InvalidOid, Oid, BOOLOID, BYTEAOID, CHAROID, DATEOID, FLOAT4OID,
1111
FLOAT8OID, INT2OID, INT4OID, INT8OID, NUMERICOID, OIDOID, TEXTOID, TIMEOID, TIMESTAMPOID,
1212
TIMESTAMPTZOID, TIMETZOID,
@@ -27,8 +27,6 @@ use crate::{
2727
},
2828
};
2929

30-
use super::cast_mode::CastMode;
31-
3230
pub(crate) fn parquet_schema_string_from_attributes(
3331
attributes: &[FormData_pg_attribute],
3432
) -> String {
@@ -351,7 +349,6 @@ pub(crate) fn ensure_arrow_schema_match_tupledesc_schema(
351349
arrow_schema: Arc<Schema>,
352350
tupledesc_schema: Arc<Schema>,
353351
attributes: &[FormData_pg_attribute],
354-
cast_mode: CastMode,
355352
) -> Vec<Option<DataType>> {
356353
let mut cast_to_types = Vec::new();
357354

@@ -376,37 +373,12 @@ pub(crate) fn ensure_arrow_schema_match_tupledesc_schema(
376373
continue;
377374
}
378375

379-
if let Err(coercion_error) = is_coercible(
380-
from_type,
381-
to_type,
382-
attribute.atttypid,
383-
attribute.atttypmod,
384-
cast_mode,
385-
) {
386-
let type_mismatch_message = format!(
376+
if !is_coercible(from_type, to_type, attribute.atttypid, attribute.atttypmod) {
377+
panic!(
387378
"type mismatch for column \"{}\" between table and parquet file.\n\n\
388379
table has \"{}\"\n\nparquet file has \"{}\"",
389380
field_name, to_type, from_type
390381
);
391-
392-
match coercion_error {
393-
CoercionError::NoStrictCoercionPath => ereport!(
394-
pgrx::PgLogLevel::ERROR,
395-
PgSqlErrorCode::ERRCODE_CANNOT_COERCE,
396-
type_mismatch_message,
397-
"Try COPY FROM '..' WITH (cast_mode 'relaxed') to allow lossy casts with runtime checks."
398-
),
399-
CoercionError::NoCoercionPath => ereport!(
400-
pgrx::PgLogLevel::ERROR,
401-
PgSqlErrorCode::ERRCODE_CANNOT_COERCE,
402-
type_mismatch_message
403-
),
404-
CoercionError::MapEntriesNullable => ereport!(
405-
pgrx::PgLogLevel::ERROR,
406-
PgSqlErrorCode::ERRCODE_CANNOT_COERCE,
407-
format!("entries field in map type cannot be nullable for column \"{}\"", field_name)
408-
),
409-
}
410382
}
411383

412384
pgrx::debug2!(
@@ -422,12 +394,6 @@ pub(crate) fn ensure_arrow_schema_match_tupledesc_schema(
422394
cast_to_types
423395
}
424396

425-
enum CoercionError {
426-
NoStrictCoercionPath,
427-
NoCoercionPath,
428-
MapEntriesNullable,
429-
}
430-
431397
// is_coercible first checks if "from_type" can be cast to "to_type" by arrow-cast.
432398
// Then, it checks if the cast is meaningful at Postgres by seeing if there is
433399
// an explicit coercion from "from_typoid" to "to_typoid".
@@ -436,17 +402,11 @@ enum CoercionError {
436402
// Arrow supports casting struct fields by field position instead of field name,
437403
// which is not the intended behavior for pg_parquet. Hence, we make sure the field names
438404
// match for structs.
439-
fn is_coercible(
440-
from_type: &DataType,
441-
to_type: &DataType,
442-
to_typoid: Oid,
443-
to_typmod: i32,
444-
cast_mode: CastMode,
445-
) -> Result<(), CoercionError> {
405+
fn is_coercible(from_type: &DataType, to_type: &DataType, to_typoid: Oid, to_typmod: i32) -> bool {
446406
match (from_type, to_type) {
447407
(DataType::Struct(from_fields), DataType::Struct(to_fields)) => {
448408
if from_fields.len() != to_fields.len() {
449-
return Err(CoercionError::NoCoercionPath);
409+
return false;
450410
}
451411

452412
let tupledesc = tuple_desc(to_typoid, to_typmod);
@@ -458,19 +418,20 @@ fn is_coercible(
458418
.zip(to_fields.iter().zip(attributes.iter()))
459419
{
460420
if from_field.name() != to_field.name() {
461-
return Err(CoercionError::NoCoercionPath);
421+
return false;
462422
}
463423

464-
is_coercible(
424+
if !is_coercible(
465425
from_field.data_type(),
466426
to_field.data_type(),
467427
to_attribute.type_oid().value(),
468428
to_attribute.type_mod(),
469-
cast_mode,
470-
)?;
429+
) {
430+
return false;
431+
}
471432
}
472433

473-
Ok(())
434+
true
474435
}
475436
(DataType::List(from_field), DataType::List(to_field)) => {
476437
let element_oid = array_element_typoid(to_typoid);
@@ -481,13 +442,12 @@ fn is_coercible(
481442
to_field.data_type(),
482443
element_oid,
483444
element_typmod,
484-
cast_mode,
485445
)
486446
}
487447
(DataType::Map(from_entries_field, _), DataType::Map(to_entries_field, _)) => {
488448
// entries field cannot be null
489449
if from_entries_field.is_nullable() {
490-
return Err(CoercionError::MapEntriesNullable);
450+
return false;
491451
}
492452

493453
let entries_typoid = domain_array_base_elem_typoid(to_typoid);
@@ -497,47 +457,23 @@ fn is_coercible(
497457
to_entries_field.data_type(),
498458
entries_typoid,
499459
to_typmod,
500-
cast_mode,
501460
)
502461
}
503462
_ => {
504463
// check if arrow-cast can cast the types
505464
if !can_cast_types(from_type, to_type) {
506-
return Err(CoercionError::NoCoercionPath);
465+
return false;
507466
}
508467

509468
let from_typoid = pg_type_for_arrow_primitive_type(from_type);
510469

511470
// pg_parquet could not recognize that arrow type
512471
if from_typoid == InvalidOid {
513-
return Err(CoercionError::NoCoercionPath);
472+
return false;
514473
}
515474

516-
let can_coerce_via_relaxed_mode =
517-
can_pg_coerce_types(from_typoid, to_typoid, COERCION_EXPLICIT);
518-
519475
// check if coercion is meaningful at Postgres (it has a coercion path)
520-
match cast_mode {
521-
CastMode::Strict => {
522-
let can_coerce_via_strict_mode =
523-
can_pg_coerce_types(from_typoid, to_typoid, COERCION_IMPLICIT);
524-
525-
if !can_coerce_via_strict_mode && can_coerce_via_relaxed_mode {
526-
Err(CoercionError::NoStrictCoercionPath)
527-
} else if !can_coerce_via_strict_mode {
528-
Err(CoercionError::NoCoercionPath)
529-
} else {
530-
Ok(())
531-
}
532-
}
533-
CastMode::Relaxed => {
534-
if !can_coerce_via_relaxed_mode {
535-
Err(CoercionError::NoCoercionPath)
536-
} else {
537-
Ok(())
538-
}
539-
}
540-
}
476+
can_pg_coerce_types(from_typoid, to_typoid, COERCION_EXPLICIT)
541477
}
542478
}
543479
}

src/lib.rs

Lines changed: 8 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,7 +1499,7 @@ mod tests {
14991499
let create_table = "CREATE TABLE test_table (x real)";
15001500
Spi::run(create_table).unwrap();
15011501

1502-
let copy_from = "COPY test_table FROM '/tmp/test.parquet' WITH (cast_mode 'relaxed')";
1502+
let copy_from = "COPY test_table FROM '/tmp/test.parquet'";
15031503
Spi::run(copy_from).unwrap();
15041504

15051505
let value = Spi::get_one::<f32>("SELECT x FROM test_table LIMIT 1")
@@ -1588,7 +1588,7 @@ mod tests {
15881588
let create_table = "CREATE TABLE test_table (x timestamp)";
15891589
Spi::run(create_table).unwrap();
15901590

1591-
let copy_from = "COPY test_table FROM '/tmp/test.parquet' WITH (cast_mode 'relaxed')";
1591+
let copy_from = "COPY test_table FROM '/tmp/test.parquet'";
15921592
Spi::run(copy_from).unwrap();
15931593

15941594
let value = Spi::get_one::<Timestamp>("SELECT x FROM test_table LIMIT 1")
@@ -1768,7 +1768,7 @@ mod tests {
17681768
let create_table = "CREATE TABLE test_table (x int)";
17691769
Spi::run(create_table).unwrap();
17701770

1771-
let copy_from = "COPY test_table FROM '/tmp/test.parquet' WITH (cast_mode 'relaxed')";
1771+
let copy_from = "COPY test_table FROM '/tmp/test.parquet'";
17721772
Spi::run(copy_from).unwrap();
17731773

17741774
let value = Spi::get_one::<i32>("SELECT x FROM test_table LIMIT 1")
@@ -1820,7 +1820,7 @@ mod tests {
18201820
let create_table = "CREATE TABLE test_table (x text)";
18211821
Spi::run(create_table).unwrap();
18221822

1823-
let copy_from = "COPY test_table FROM '/tmp/test.parquet' WITH (cast_mode 'relaxed')";
1823+
let copy_from = "COPY test_table FROM '/tmp/test.parquet'";
18241824
Spi::run(copy_from).unwrap();
18251825

18261826
let value = Spi::get_one::<String>("SELECT x FROM test_table LIMIT 1")
@@ -2078,7 +2078,7 @@ mod tests {
20782078
let create_table = "CREATE TABLE test_table (x test_type[])";
20792079
Spi::run(create_table).unwrap();
20802080

2081-
let copy_from = "COPY test_table FROM '/tmp/test.parquet' WITH (cast_mode 'relaxed')";
2081+
let copy_from = "COPY test_table FROM '/tmp/test.parquet'";
20822082
Spi::run(copy_from).unwrap();
20832083

20842084
let value =
@@ -2108,7 +2108,7 @@ mod tests {
21082108
let create_table = "CREATE TABLE test_table (x test_type)";
21092109
Spi::run(create_table).unwrap();
21102110

2111-
let copy_from = "COPY test_table FROM '/tmp/test.parquet' WITH (cast_mode 'relaxed')";
2111+
let copy_from = "COPY test_table FROM '/tmp/test.parquet'";
21122112
Spi::run(copy_from).unwrap();
21132113
}
21142114

@@ -2141,7 +2141,7 @@ mod tests {
21412141
let create_table = "CREATE TABLE test_table (x test_type)";
21422142
Spi::run(create_table).unwrap();
21432143

2144-
let copy_from = "COPY test_table FROM '/tmp/test.parquet' WITH (cast_mode 'relaxed')";
2144+
let copy_from = "COPY test_table FROM '/tmp/test.parquet'";
21452145
Spi::run(copy_from).unwrap();
21462146
}
21472147

@@ -2174,7 +2174,7 @@ mod tests {
21742174
let create_table = "CREATE TABLE test_table (x test_type)";
21752175
Spi::run(create_table).unwrap();
21762176

2177-
let copy_from = "COPY test_table FROM '/tmp/test.parquet' WITH (cast_mode 'relaxed')";
2177+
let copy_from = "COPY test_table FROM '/tmp/test.parquet'";
21782178
Spi::run(copy_from).unwrap();
21792179
}
21802180

@@ -2359,38 +2359,6 @@ mod tests {
23592359
assert_eq!(result, (Some("hello"), Some(1)));
23602360
}
23612361

2362-
#[pg_test]
2363-
fn test_table_with_relaxed_cast() {
2364-
// INT64 => int
2365-
let copy_to = "COPY (SELECT 1::bigint as x) TO '/tmp/test.parquet'";
2366-
Spi::run(copy_to).unwrap();
2367-
2368-
let create_table = "CREATE TABLE test_table (x int)";
2369-
Spi::run(create_table).unwrap();
2370-
2371-
let copy_from = "COPY test_table FROM '/tmp/test.parquet' WITH (cast_mode 'relaxed')";
2372-
Spi::run(copy_from).unwrap();
2373-
2374-
let result = Spi::get_one::<i32>("SELECT x FROM test_table LIMIT 1")
2375-
.unwrap()
2376-
.unwrap();
2377-
assert_eq!(result, 1);
2378-
}
2379-
2380-
#[pg_test]
2381-
#[should_panic(expected = "type mismatch for column \"x\" between table and parquet file.")]
2382-
fn test_table_with_strict_cast_fail() {
2383-
// INT64 => int
2384-
let copy_to = "COPY (SELECT 1::bigint as x) TO '/tmp/test.parquet'";
2385-
Spi::run(copy_to).unwrap();
2386-
2387-
let create_table = "CREATE TABLE test_table (x int)";
2388-
Spi::run(create_table).unwrap();
2389-
2390-
let copy_from = "COPY test_table FROM '/tmp/test.parquet'";
2391-
Spi::run(copy_from).unwrap();
2392-
}
2393-
23942362
#[pg_test]
23952363
#[should_panic(expected = "type mismatch for column \"x\" between table and parquet file.")]
23962364
fn test_coerce_custom_cast_fail() {
@@ -2731,20 +2699,6 @@ mod tests {
27312699
test_helper(test_table);
27322700
}
27332701

2734-
#[pg_test]
2735-
#[should_panic(expected = "invalid_cast_mode is not a valid cast_mode")]
2736-
fn test_invalid_cast_mode_copy_from() {
2737-
let mut copy_options = HashMap::new();
2738-
copy_options.insert(
2739-
"cast_mode".to_string(),
2740-
CopyOptionValue::StringOption("invalid_cast_mode".to_string()),
2741-
);
2742-
2743-
let test_table = TestTable::<i32>::new("int4".into()).with_copy_from_options(copy_options);
2744-
test_table.insert("INSERT INTO test_expected (a) VALUES (1), (2), (null);");
2745-
test_helper(test_table);
2746-
}
2747-
27482702
#[pg_test]
27492703
#[should_panic(expected = "nonexisted is not a valid option for \"copy from parquet\".")]
27502704
fn test_nonexistent_copy_from_option() {

0 commit comments

Comments
 (0)