@@ -6,7 +6,7 @@ use arrow_schema::{DataType, FieldRef};
66use parquet:: arrow:: { arrow_to_parquet_schema, PARQUET_FIELD_ID_META_KEY } ;
77use pg_sys:: {
88 can_coerce_type,
9- CoercionContext :: { self , COERCION_EXPLICIT , COERCION_IMPLICIT } ,
9+ CoercionContext :: { self , COERCION_EXPLICIT } ,
1010 FormData_pg_attribute , InvalidOid , Oid , BOOLOID , BYTEAOID , CHAROID , DATEOID , FLOAT4OID ,
1111 FLOAT8OID , INT2OID , INT4OID , INT8OID , NUMERICOID , OIDOID , TEXTOID , TIMEOID , TIMESTAMPOID ,
1212 TIMESTAMPTZOID , TIMETZOID ,
@@ -27,8 +27,6 @@ use crate::{
2727 } ,
2828} ;
2929
30- use super :: cast_mode:: CastMode ;
31-
3230pub ( crate ) fn parquet_schema_string_from_attributes (
3331 attributes : & [ FormData_pg_attribute ] ,
3432) -> String {
@@ -344,69 +342,45 @@ fn adjust_map_entries_field(field: FieldRef) -> FieldRef {
344342 Arc :: new ( entries_field)
345343}
346344
347- // ensure_arrow_schema_match_tupledesc_schema throws an error if the arrow schema does not match the table schema.
348- // If the arrow schema is castable to the table schema, it returns a vector of Option<DataType> to cast to
349- // for each field.
350- pub ( crate ) fn ensure_arrow_schema_match_tupledesc_schema (
351- arrow_schema : Arc < Schema > ,
345+ // ensure_file_schema_match_tupledesc_schema throws an error if the file's schema does not match the table schema.
346+ // If the file's arrow schema is castable to the table's arrow schema, it returns a vector of Option<DataType>
347+ // to cast to for each field.
348+ pub ( crate ) fn ensure_file_schema_match_tupledesc_schema (
349+ file_schema : Arc < Schema > ,
352350 tupledesc_schema : Arc < Schema > ,
353351 attributes : & [ FormData_pg_attribute ] ,
354- cast_mode : CastMode ,
355352) -> Vec < Option < DataType > > {
356353 let mut cast_to_types = Vec :: new ( ) ;
357354
358- for ( tupledesc_field, attribute) in tupledesc_schema. fields ( ) . iter ( ) . zip ( attributes. iter ( ) ) {
359- let field_name = tupledesc_field. name ( ) ;
355+ for ( tupledesc_schema_field, attribute) in
356+ tupledesc_schema. fields ( ) . iter ( ) . zip ( attributes. iter ( ) )
357+ {
358+ let field_name = tupledesc_schema_field. name ( ) ;
360359
361- let arrow_field = arrow_schema . column_with_name ( field_name) ;
360+ let file_schema_field = file_schema . column_with_name ( field_name) ;
362361
363- if arrow_field . is_none ( ) {
362+ if file_schema_field . is_none ( ) {
364363 panic ! ( "column \" {}\" is not found in parquet file" , field_name) ;
365364 }
366365
367- let ( _, arrow_field ) = arrow_field . unwrap ( ) ;
368- let arrow_field = Arc :: new ( arrow_field . clone ( ) ) ;
366+ let ( _, file_schema_field ) = file_schema_field . unwrap ( ) ;
367+ let file_schema_field = Arc :: new ( file_schema_field . clone ( ) ) ;
369368
370- let from_type = arrow_field . data_type ( ) ;
371- let to_type = tupledesc_field . data_type ( ) ;
369+ let from_type = file_schema_field . data_type ( ) ;
370+ let to_type = tupledesc_schema_field . data_type ( ) ;
372371
373372 // no cast needed
374373 if from_type == to_type {
375374 cast_to_types. push ( None ) ;
376375 continue ;
377376 }
378377
379- if let Err ( coercion_error) = is_coercible (
380- from_type,
381- to_type,
382- attribute. atttypid ,
383- attribute. atttypmod ,
384- cast_mode,
385- ) {
386- let type_mismatch_message = format ! (
378+ if !is_coercible ( from_type, to_type, attribute. atttypid , attribute. atttypmod ) {
379+ panic ! (
387380 "type mismatch for column \" {}\" between table and parquet file.\n \n \
388381 table has \" {}\" \n \n parquet file has \" {}\" ",
389382 field_name, to_type, from_type
390383 ) ;
391-
392- match coercion_error {
393- CoercionError :: NoStrictCoercionPath => ereport ! (
394- pgrx:: PgLogLevel :: ERROR ,
395- PgSqlErrorCode :: ERRCODE_CANNOT_COERCE ,
396- type_mismatch_message,
397- "Try COPY FROM '..' WITH (cast_mode 'relaxed') to allow lossy casts with runtime checks."
398- ) ,
399- CoercionError :: NoCoercionPath => ereport ! (
400- pgrx:: PgLogLevel :: ERROR ,
401- PgSqlErrorCode :: ERRCODE_CANNOT_COERCE ,
402- type_mismatch_message
403- ) ,
404- CoercionError :: MapEntriesNullable => ereport ! (
405- pgrx:: PgLogLevel :: ERROR ,
406- PgSqlErrorCode :: ERRCODE_CANNOT_COERCE ,
407- format!( "entries field in map type cannot be nullable for column \" {}\" " , field_name)
408- ) ,
409- }
410384 }
411385
412386 pgrx:: debug2!(
@@ -422,12 +396,6 @@ pub(crate) fn ensure_arrow_schema_match_tupledesc_schema(
422396 cast_to_types
423397}
424398
425- enum CoercionError {
426- NoStrictCoercionPath ,
427- NoCoercionPath ,
428- MapEntriesNullable ,
429- }
430-
431399// is_coercible first checks if "from_type" can be cast to "to_type" by arrow-cast.
432400// Then, it checks if the cast is meaningful at Postgres by seeing if there is
433401// an explicit coercion from "from_typoid" to "to_typoid".
@@ -436,17 +404,11 @@ enum CoercionError {
436404// Arrow supports casting struct fields by field position instead of field name,
437405// which is not the intended behavior for pg_parquet. Hence, we make sure the field names
438406// match for structs.
439- fn is_coercible (
440- from_type : & DataType ,
441- to_type : & DataType ,
442- to_typoid : Oid ,
443- to_typmod : i32 ,
444- cast_mode : CastMode ,
445- ) -> Result < ( ) , CoercionError > {
407+ fn is_coercible ( from_type : & DataType , to_type : & DataType , to_typoid : Oid , to_typmod : i32 ) -> bool {
446408 match ( from_type, to_type) {
447409 ( DataType :: Struct ( from_fields) , DataType :: Struct ( to_fields) ) => {
448410 if from_fields. len ( ) != to_fields. len ( ) {
449- return Err ( CoercionError :: NoCoercionPath ) ;
411+ return false ;
450412 }
451413
452414 let tupledesc = tuple_desc ( to_typoid, to_typmod) ;
@@ -458,19 +420,20 @@ fn is_coercible(
458420 . zip ( to_fields. iter ( ) . zip ( attributes. iter ( ) ) )
459421 {
460422 if from_field. name ( ) != to_field. name ( ) {
461- return Err ( CoercionError :: NoCoercionPath ) ;
423+ return false ;
462424 }
463425
464- is_coercible (
426+ if ! is_coercible (
465427 from_field. data_type ( ) ,
466428 to_field. data_type ( ) ,
467429 to_attribute. type_oid ( ) . value ( ) ,
468430 to_attribute. type_mod ( ) ,
469- cast_mode,
470- ) ?;
431+ ) {
432+ return false ;
433+ }
471434 }
472435
473- Ok ( ( ) )
436+ true
474437 }
475438 ( DataType :: List ( from_field) , DataType :: List ( to_field) ) => {
476439 let element_oid = array_element_typoid ( to_typoid) ;
@@ -481,13 +444,12 @@ fn is_coercible(
481444 to_field. data_type ( ) ,
482445 element_oid,
483446 element_typmod,
484- cast_mode,
485447 )
486448 }
487449 ( DataType :: Map ( from_entries_field, _) , DataType :: Map ( to_entries_field, _) ) => {
488450 // entries field cannot be null
489451 if from_entries_field. is_nullable ( ) {
490- return Err ( CoercionError :: MapEntriesNullable ) ;
452+ return false ;
491453 }
492454
493455 let entries_typoid = domain_array_base_elem_typoid ( to_typoid) ;
@@ -497,47 +459,23 @@ fn is_coercible(
497459 to_entries_field. data_type ( ) ,
498460 entries_typoid,
499461 to_typmod,
500- cast_mode,
501462 )
502463 }
503464 _ => {
504465 // check if arrow-cast can cast the types
505466 if !can_cast_types ( from_type, to_type) {
506- return Err ( CoercionError :: NoCoercionPath ) ;
467+ return false ;
507468 }
508469
509470 let from_typoid = pg_type_for_arrow_primitive_type ( from_type) ;
510471
511472 // pg_parquet could not recognize that arrow type
512473 if from_typoid == InvalidOid {
513- return Err ( CoercionError :: NoCoercionPath ) ;
474+ return false ;
514475 }
515476
516- let can_coerce_via_relaxed_mode =
517- can_pg_coerce_types ( from_typoid, to_typoid, COERCION_EXPLICIT ) ;
518-
519477 // check if coercion is meaningful at Postgres (it has a coercion path)
520- match cast_mode {
521- CastMode :: Strict => {
522- let can_coerce_via_strict_mode =
523- can_pg_coerce_types ( from_typoid, to_typoid, COERCION_IMPLICIT ) ;
524-
525- if !can_coerce_via_strict_mode && can_coerce_via_relaxed_mode {
526- Err ( CoercionError :: NoStrictCoercionPath )
527- } else if !can_coerce_via_strict_mode {
528- Err ( CoercionError :: NoCoercionPath )
529- } else {
530- Ok ( ( ) )
531- }
532- }
533- CastMode :: Relaxed => {
534- if !can_coerce_via_relaxed_mode {
535- Err ( CoercionError :: NoCoercionPath )
536- } else {
537- Ok ( ( ) )
538- }
539- }
540- }
478+ can_pg_coerce_types ( from_typoid, to_typoid, COERCION_EXPLICIT )
541479 }
542480 }
543481}
0 commit comments