Skip to content

Commit d48996c

Browse files
committed
remove cast_mode option
1 parent 7fa0477 commit d48996c

File tree

8 files changed

+46
-220
lines changed

8 files changed

+46
-220
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,6 @@ Alternatively, you can use the following environment variables when starting pos
193193

194194
`pg_parquet` supports the following options in the `COPY FROM` command:
195195
- `format parquet`: you need to specify this option to read or write Parquet files which does not end with `.parquet[.<compression>]` extension,
196-
- `cast_mode <string>`: Specifies the casting behavior, which can be set to either `strict` or `relaxed`. This determines whether lossy conversions are allowed. By default, the mode is `strict`, which does not permit lossy conversions (e.g., `bigint => int` causes a schema mismatch error during schema validation). When set to `relaxed`, lossy conversions are allowed, and errors will only be raised at runtime if a value cannot be properly converted. This option provides flexibility to handle schema mismatches by deferring error checks to runtime.
197196

198197
## Configuration
199198
There is currently only one GUC parameter to enable/disable the `pg_parquet`:

src/arrow_parquet.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
pub(crate) mod arrow_to_pg;
22
pub(crate) mod arrow_utils;
3-
pub(crate) mod cast_mode;
43
pub(crate) mod compression;
54
pub(crate) mod parquet_reader;
65
pub(crate) mod parquet_writer;

src/arrow_parquet/cast_mode.rs

Lines changed: 0 additions & 22 deletions
This file was deleted.

src/arrow_parquet/parquet_reader.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@ use crate::{
2424

2525
use super::{
2626
arrow_to_pg::{collect_arrow_to_pg_attribute_contexts, ArrowToPgAttributeContext},
27-
cast_mode::CastMode,
2827
schema_parser::{
29-
ensure_arrow_schema_match_tupledesc_schema, parse_arrow_schema_from_attributes,
28+
ensure_file_schema_match_tupledesc_schema, parse_arrow_schema_from_attributes,
3029
},
3130
uri_utils::{parquet_reader_from_uri, PG_BACKEND_TOKIO_RUNTIME},
3231
};
@@ -42,7 +41,7 @@ pub(crate) struct ParquetReaderContext {
4241
}
4342

4443
impl ParquetReaderContext {
45-
pub(crate) fn new(uri: Url, cast_mode: CastMode, tupledesc: &PgTupleDesc) -> Self {
44+
pub(crate) fn new(uri: Url, tupledesc: &PgTupleDesc) -> Self {
4645
// Postgis and Map contexts are used throughout reading the parquet file.
4746
// We need to reset them to avoid reading the stale data. (e.g. extension could be dropped)
4847
reset_postgis_context();
@@ -63,14 +62,13 @@ impl ParquetReaderContext {
6362

6463
let tupledesc_schema = Arc::new(tupledesc_schema);
6564

66-
// Ensure that the arrow schema matches the tupledesc.
65+
// Ensure that the file schema matches the tupledesc schema.
6766
// Gets cast_to_types for each attribute if a cast is needed for the attribute's columnar array
6867
// to match the expected columnar array for its tupledesc type.
69-
let cast_to_types = ensure_arrow_schema_match_tupledesc_schema(
68+
let cast_to_types = ensure_file_schema_match_tupledesc_schema(
7069
parquet_file_schema.clone(),
7170
tupledesc_schema.clone(),
7271
&attributes,
73-
cast_mode,
7472
);
7573

7674
let attribute_contexts = collect_arrow_to_pg_attribute_contexts(

src/arrow_parquet/schema_parser.rs

Lines changed: 30 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use arrow_schema::{DataType, FieldRef};
66
use parquet::arrow::{arrow_to_parquet_schema, PARQUET_FIELD_ID_META_KEY};
77
use pg_sys::{
88
can_coerce_type,
9-
CoercionContext::{self, COERCION_EXPLICIT, COERCION_IMPLICIT},
9+
CoercionContext::{self, COERCION_EXPLICIT},
1010
FormData_pg_attribute, InvalidOid, Oid, BOOLOID, BYTEAOID, CHAROID, DATEOID, FLOAT4OID,
1111
FLOAT8OID, INT2OID, INT4OID, INT8OID, NUMERICOID, OIDOID, TEXTOID, TIMEOID, TIMESTAMPOID,
1212
TIMESTAMPTZOID, TIMETZOID,
@@ -27,8 +27,6 @@ use crate::{
2727
},
2828
};
2929

30-
use super::cast_mode::CastMode;
31-
3230
pub(crate) fn parquet_schema_string_from_attributes(
3331
attributes: &[FormData_pg_attribute],
3432
) -> String {
@@ -344,69 +342,45 @@ fn adjust_map_entries_field(field: FieldRef) -> FieldRef {
344342
Arc::new(entries_field)
345343
}
346344

347-
// ensure_arrow_schema_match_tupledesc_schema throws an error if the arrow schema does not match the table schema.
348-
// If the arrow schema is castable to the table schema, it returns a vector of Option<DataType> to cast to
349-
// for each field.
350-
pub(crate) fn ensure_arrow_schema_match_tupledesc_schema(
351-
arrow_schema: Arc<Schema>,
345+
// ensure_file_schema_match_tupledesc_schema throws an error if the file's schema does not match the table schema.
346+
// If the file's arrow schema is castable to the table's arrow schema, it returns a vector of Option<DataType>
347+
// to cast to for each field.
348+
pub(crate) fn ensure_file_schema_match_tupledesc_schema(
349+
file_schema: Arc<Schema>,
352350
tupledesc_schema: Arc<Schema>,
353351
attributes: &[FormData_pg_attribute],
354-
cast_mode: CastMode,
355352
) -> Vec<Option<DataType>> {
356353
let mut cast_to_types = Vec::new();
357354

358-
for (tupledesc_field, attribute) in tupledesc_schema.fields().iter().zip(attributes.iter()) {
359-
let field_name = tupledesc_field.name();
355+
for (tupledesc_schema_field, attribute) in
356+
tupledesc_schema.fields().iter().zip(attributes.iter())
357+
{
358+
let field_name = tupledesc_schema_field.name();
360359

361-
let arrow_field = arrow_schema.column_with_name(field_name);
360+
let file_schema_field = file_schema.column_with_name(field_name);
362361

363-
if arrow_field.is_none() {
362+
if file_schema_field.is_none() {
364363
panic!("column \"{}\" is not found in parquet file", field_name);
365364
}
366365

367-
let (_, arrow_field) = arrow_field.unwrap();
368-
let arrow_field = Arc::new(arrow_field.clone());
366+
let (_, file_schema_field) = file_schema_field.unwrap();
367+
let file_schema_field = Arc::new(file_schema_field.clone());
369368

370-
let from_type = arrow_field.data_type();
371-
let to_type = tupledesc_field.data_type();
369+
let from_type = file_schema_field.data_type();
370+
let to_type = tupledesc_schema_field.data_type();
372371

373372
// no cast needed
374373
if from_type == to_type {
375374
cast_to_types.push(None);
376375
continue;
377376
}
378377

379-
if let Err(coercion_error) = is_coercible(
380-
from_type,
381-
to_type,
382-
attribute.atttypid,
383-
attribute.atttypmod,
384-
cast_mode,
385-
) {
386-
let type_mismatch_message = format!(
378+
if !is_coercible(from_type, to_type, attribute.atttypid, attribute.atttypmod) {
379+
panic!(
387380
"type mismatch for column \"{}\" between table and parquet file.\n\n\
388381
table has \"{}\"\n\nparquet file has \"{}\"",
389382
field_name, to_type, from_type
390383
);
391-
392-
match coercion_error {
393-
CoercionError::NoStrictCoercionPath => ereport!(
394-
pgrx::PgLogLevel::ERROR,
395-
PgSqlErrorCode::ERRCODE_CANNOT_COERCE,
396-
type_mismatch_message,
397-
"Try COPY FROM '..' WITH (cast_mode 'relaxed') to allow lossy casts with runtime checks."
398-
),
399-
CoercionError::NoCoercionPath => ereport!(
400-
pgrx::PgLogLevel::ERROR,
401-
PgSqlErrorCode::ERRCODE_CANNOT_COERCE,
402-
type_mismatch_message
403-
),
404-
CoercionError::MapEntriesNullable => ereport!(
405-
pgrx::PgLogLevel::ERROR,
406-
PgSqlErrorCode::ERRCODE_CANNOT_COERCE,
407-
format!("entries field in map type cannot be nullable for column \"{}\"", field_name)
408-
),
409-
}
410384
}
411385

412386
pgrx::debug2!(
@@ -422,12 +396,6 @@ pub(crate) fn ensure_arrow_schema_match_tupledesc_schema(
422396
cast_to_types
423397
}
424398

425-
enum CoercionError {
426-
NoStrictCoercionPath,
427-
NoCoercionPath,
428-
MapEntriesNullable,
429-
}
430-
431399
// is_coercible first checks if "from_type" can be cast to "to_type" by arrow-cast.
432400
// Then, it checks if the cast is meaningful at Postgres by seeing if there is
433401
// an explicit coercion from "from_typoid" to "to_typoid".
@@ -436,17 +404,11 @@ enum CoercionError {
436404
// Arrow supports casting struct fields by field position instead of field name,
437405
// which is not the intended behavior for pg_parquet. Hence, we make sure the field names
438406
// match for structs.
439-
fn is_coercible(
440-
from_type: &DataType,
441-
to_type: &DataType,
442-
to_typoid: Oid,
443-
to_typmod: i32,
444-
cast_mode: CastMode,
445-
) -> Result<(), CoercionError> {
407+
fn is_coercible(from_type: &DataType, to_type: &DataType, to_typoid: Oid, to_typmod: i32) -> bool {
446408
match (from_type, to_type) {
447409
(DataType::Struct(from_fields), DataType::Struct(to_fields)) => {
448410
if from_fields.len() != to_fields.len() {
449-
return Err(CoercionError::NoCoercionPath);
411+
return false;
450412
}
451413

452414
let tupledesc = tuple_desc(to_typoid, to_typmod);
@@ -458,19 +420,20 @@ fn is_coercible(
458420
.zip(to_fields.iter().zip(attributes.iter()))
459421
{
460422
if from_field.name() != to_field.name() {
461-
return Err(CoercionError::NoCoercionPath);
423+
return false;
462424
}
463425

464-
is_coercible(
426+
if !is_coercible(
465427
from_field.data_type(),
466428
to_field.data_type(),
467429
to_attribute.type_oid().value(),
468430
to_attribute.type_mod(),
469-
cast_mode,
470-
)?;
431+
) {
432+
return false;
433+
}
471434
}
472435

473-
Ok(())
436+
true
474437
}
475438
(DataType::List(from_field), DataType::List(to_field)) => {
476439
let element_oid = array_element_typoid(to_typoid);
@@ -481,13 +444,12 @@ fn is_coercible(
481444
to_field.data_type(),
482445
element_oid,
483446
element_typmod,
484-
cast_mode,
485447
)
486448
}
487449
(DataType::Map(from_entries_field, _), DataType::Map(to_entries_field, _)) => {
488450
// entries field cannot be null
489451
if from_entries_field.is_nullable() {
490-
return Err(CoercionError::MapEntriesNullable);
452+
return false;
491453
}
492454

493455
let entries_typoid = domain_array_base_elem_typoid(to_typoid);
@@ -497,47 +459,23 @@ fn is_coercible(
497459
to_entries_field.data_type(),
498460
entries_typoid,
499461
to_typmod,
500-
cast_mode,
501462
)
502463
}
503464
_ => {
504465
// check if arrow-cast can cast the types
505466
if !can_cast_types(from_type, to_type) {
506-
return Err(CoercionError::NoCoercionPath);
467+
return false;
507468
}
508469

509470
let from_typoid = pg_type_for_arrow_primitive_type(from_type);
510471

511472
// pg_parquet could not recognize that arrow type
512473
if from_typoid == InvalidOid {
513-
return Err(CoercionError::NoCoercionPath);
474+
return false;
514475
}
515476

516-
let can_coerce_via_relaxed_mode =
517-
can_pg_coerce_types(from_typoid, to_typoid, COERCION_EXPLICIT);
518-
519477
// check if coercion is meaningful at Postgres (it has a coercion path)
520-
match cast_mode {
521-
CastMode::Strict => {
522-
let can_coerce_via_strict_mode =
523-
can_pg_coerce_types(from_typoid, to_typoid, COERCION_IMPLICIT);
524-
525-
if !can_coerce_via_strict_mode && can_coerce_via_relaxed_mode {
526-
Err(CoercionError::NoStrictCoercionPath)
527-
} else if !can_coerce_via_strict_mode {
528-
Err(CoercionError::NoCoercionPath)
529-
} else {
530-
Ok(())
531-
}
532-
}
533-
CastMode::Relaxed => {
534-
if !can_coerce_via_relaxed_mode {
535-
Err(CoercionError::NoCoercionPath)
536-
} else {
537-
Ok(())
538-
}
539-
}
540-
}
478+
can_pg_coerce_types(from_typoid, to_typoid, COERCION_EXPLICIT)
541479
}
542480
}
543481
}

0 commit comments

Comments
 (0)