CrunchyData · aykut-bozkurt · Nov 26, 2024 · Nov 11, 2024 · Nov 22, 2024 · Nov 25, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -21,6 +21,7 @@ pg_test = []
 
 [dependencies]
 arrow = {version = "53", default-features = false}
+arrow-cast = {version = "53", default-features = false}
 arrow-schema = {version = "53", default-features = false}
 aws-config = { version = "1.5", default-features = false, features = ["rustls"]}
 aws-credential-types = {version = "1.2", default-features = false}

diff --git a/README.md b/README.md
@@ -110,7 +110,7 @@ SELECT * FROM parquet.schema('/tmp/product_example.parquet') LIMIT 10;
  /tmp/product_example.parquet | name         | BYTE_ARRAY |             | OPTIONAL        |              | UTF8           |       |           |        3 | STRING
  /tmp/product_example.parquet | items        |            |             | OPTIONAL        |            1 | LIST           |       |           |        4 | LIST
  /tmp/product_example.parquet | list         |            |             | REPEATED        |            1 |                |       |           |          | 
- /tmp/product_example.parquet | items        |            |             | OPTIONAL        |            3 |                |       |           |        5 | 
+ /tmp/product_example.parquet | element        |            |             | OPTIONAL        |            3 |                |       |           |        5 | 
  /tmp/product_example.parquet | id           | INT32      |             | OPTIONAL        |              |                |       |           |        6 | 
  /tmp/product_example.parquet | name         | BYTE_ARRAY |             | OPTIONAL        |              | UTF8           |       |           |        7 | STRING
 (10 rows)
@@ -185,12 +185,16 @@ Alternatively, you can use the following environment variables when starting pos
 
 ## Copy Options
 `pg_parquet` supports the following options in the `COPY TO` command:
-- `format parquet`: you need to specify this option to read or write Parquet files which does not end with `.parquet[.<compression>]` extension. (This is the only option that `COPY FROM` command supports.),
+- `format parquet`: you need to specify this option to read or write Parquet files which does not end with `.parquet[.<compression>]` extension,
 - `row_group_size <int>`: the number of rows in each row group while writing Parquet files. The default row group size is `122880`,
 - `row_group_size_bytes <int>`: the total byte size of rows in each row group while writing Parquet files. The default row group size bytes is `row_group_size * 1024`,
-- `compression <string>`: the compression format to use while writing Parquet files. The supported compression formats are `uncompressed`, `snappy`, `gzip`, `brotli`, `lz4`, `lz4raw` and `zstd`. The default compression format is `snappy`. If not specified, the compression format is determined by the file extension.
+- `compression <string>`: the compression format to use while writing Parquet files. The supported compression formats are `uncompressed`, `snappy`, `gzip`, `brotli`, `lz4`, `lz4raw` and `zstd`. The default compression format is `snappy`. If not specified, the compression format is determined by the file extension,
 - `compression_level <int>`: the compression level to use while writing Parquet files. The supported compression levels are only supported for `gzip`, `zstd` and `brotli` compression formats. The default compression level is `6` for `gzip (0-10)`, `1` for `zstd (1-22)` and `1` for `brotli (0-11)`.
 
+`pg_parquet` supports the following options in the `COPY FROM` command:
+- `format parquet`: you need to specify this option to read or write Parquet files which does not end with `.parquet[.<compression>]` extension,
+- `cast_mode <string>`: Specifies the casting behavior, which can be set to either `strict` or `relaxed`. This determines whether lossy conversions are allowed. By default, the mode is `strict`, which does not permit lossy conversions (e.g., `bigint => int` causes a schema mismatch error during schema validation). When set to `relaxed`, lossy conversions are allowed, and errors will only be raised at runtime if a value cannot be properly converted. This option provides flexibility to handle schema mismatches by deferring error checks to runtime.
+
 ## Configuration
 There is currently only one GUC parameter to enable/disable the `pg_parquet`:
 - `pg_parquet.enable_copy_hooks`: you can set this parameter to `on` or `off` to enable or disable the `pg_parquet` extension. The default value is `on`.

diff --git a/src/arrow_parquet.rs b/src/arrow_parquet.rs
@@ -1,5 +1,6 @@
 pub(crate) mod arrow_to_pg;
 pub(crate) mod arrow_utils;
+pub(crate) mod cast_mode;
 pub(crate) mod compression;
 pub(crate) mod parquet_reader;
 pub(crate) mod parquet_writer;