CrunchyData
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/arrow_parquet.rs‎
Lines changed: 1 addition & 0 deletions b/‎src/arrow_parquet.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/arrow_parquet/field_ids.rs‎
Lines changed: 78 additions & 0 deletions b/‎src/arrow_parquet/field_ids.rs‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎src/arrow_parquet/parquet_reader.rs‎
Lines changed: 3 additions & 1 deletion b/‎src/arrow_parquet/parquet_reader.rs‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/arrow_parquet/parquet_writer.rs‎
Lines changed: 3 additions & 1 deletion b/‎src/arrow_parquet/parquet_writer.rs‎
Lines changed: 3 additions & 1 deletion
@@ -242,7 +242,8 @@ Supported authorization methods' priority order is shown below:
 ## Copy Options
 `pg_parquet` supports the following options in the `COPY TO` command:
 - `format parquet`: you need to specify this option to read or write Parquet files which does not end with `.parquet[.<compression>]` extension,
-- `file_size_bytes <int>`: the total byte size per Parquet file. When set, the parquet files, with target size, are created under parent directory (named the same as file name without file extension). By default, when not specified, a single file is generated without creating a parent folder.
+- `file_size_bytes <int>`: the total byte size per Parquet file. When set, the parquet files, with target size, are created under parent directory (named the same as file name without file extension). By default, when not specified, a single file is generated without creating a parent folder,
+- `field_ids <string>`: fields ids that are assigned to the fields in Parquet file schema. By default, no field ids are assigned. Pass `auto` to let pg_parquet generate field ids. You can pass a json string to explicitly pass the field ids,
 - `row_group_size <int>`: the number of rows in each row group while writing Parquet files. The default row group size is `122880`,
 - `row_group_size_bytes <int>`: the total byte size of rows in each row group while writing Parquet files. The default row group size bytes is `row_group_size * 1024`,
 - `compression <string>`: the compression format to use while writing Parquet files. The supported compression formats are `uncompressed`, `snappy`, `gzip`, `brotli`, `lz4`, `lz4raw` and `zstd`. The default compression format is `snappy`. If not specified, the compression format is determined by the file extension,
 
@@ -1,6 +1,7 @@
 pub(crate) mod arrow_to_pg;
 pub(crate) mod arrow_utils;
 pub(crate) mod compression;
+pub(crate) mod field_ids;
 pub(crate) mod match_by;
 pub(crate) mod parquet_reader;
 pub(crate) mod parquet_writer;
 
@@ -0,0 +1,78 @@
+use std::{collections::HashMap, fmt::Display, str::FromStr};
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Default)]
+pub(crate) enum FieldIds {
+    #[default]
+    None,
+    Auto,
+    Explicit(FieldIdMapping),
+}
+
+impl FromStr for FieldIds {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "none" => Ok(FieldIds::None),
+            "auto" => Ok(FieldIds::Auto),
+            field_ids => Ok(FieldIds::Explicit(field_id_mapping_from_json_string(
+                field_ids,
+            )?)),
+        }
+    }
+}
+
+impl Display for FieldIds {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            FieldIds::None => write!(f, "none"),
+            FieldIds::Auto => write!(f, "auto"),
+            FieldIds::Explicit(field_id_mapping) => {
+                write!(f, "{}", field_id_mapping_to_json_string(field_id_mapping))
+            }
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+enum FieldIdMappingItem {
+    FieldId(i32),
+    FieldIdMapping(FieldIdMapping),
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub(crate) struct FieldIdMapping {
+    #[serde(flatten)]
+    fields: HashMap<String, FieldIdMappingItem>,
+}
+
+impl FieldIdMapping {
+    pub(crate) fn field_id(&self, field_path: &[String]) -> Option<i32> {
+        if field_path.is_empty() {
+            panic!("Field path is empty");
+        }
+
+        let field_name = &field_path[0];
+
+        match self.fields.get(field_name) {
+            Some(FieldIdMappingItem::FieldId(field_id)) => Some(*field_id),
+            Some(FieldIdMappingItem::FieldIdMapping(field_id_mapping)) => {
+                field_id_mapping.field_id(&field_path[1..])
+            }
+            None => None,
+        }
+    }
+}
+
+pub(crate) fn field_id_mapping_from_json_string(
+    json_string: &str,
+) -> Result<FieldIdMapping, String> {
+    serde_json::from_str(json_string).map_err(|_| "invalid JSON string for field_ids".into())
+}
+
+fn field_id_mapping_to_json_string(field_id_mapping: &FieldIdMapping) -> String {
+    serde_json::to_string(field_id_mapping).unwrap()
+}
@@ -17,6 +17,7 @@ use pgrx::{
 use crate::{
     arrow_parquet::{
         arrow_to_pg::{context::collect_arrow_to_pg_attribute_contexts, to_pg_datum},
+        field_ids::FieldIds,
         schema_parser::{
             error_if_copy_from_match_by_position_with_generated_columns,
             parquet_schema_string_from_attributes,
@@ -68,7 +69,8 @@ impl ParquetReaderContext {
             parquet_schema_string_from_attributes(&attributes)
         );
 
-        let tupledesc_schema = parse_arrow_schema_from_attributes(&attributes);
+        let tupledesc_schema =
+            parse_arrow_schema_from_attributes(&attributes, FieldIds::None);
 
         let tupledesc_schema = Arc::new(tupledesc_schema);
 
 
@@ -28,6 +28,7 @@ use crate::{
 };
 
 use super::{
+    field_ids::FieldIds,
     pg_to_arrow::{context::PgToArrowAttributeContext, to_arrow_array},
     uri_utils::ParsedUriInfo,
 };
@@ -46,6 +47,7 @@ impl ParquetWriterContext {
     pub(crate) fn new(
         uri_info: ParsedUriInfo,
         options: CopyToParquetOptions,
+        field_ids: FieldIds,
         tupledesc: &PgTupleDesc,
     ) -> ParquetWriterContext {
         // Postgis and Map contexts are used throughout writing the parquet file.
@@ -60,7 +62,7 @@ impl ParquetWriterContext {
             parquet_schema_string_from_attributes(&attributes)
         );
 
-        let schema = parse_arrow_schema_from_attributes(&attributes);
+        let schema = parse_arrow_schema_from_attributes(&attributes, field_ids);
         let schema = Arc::new(schema);
 
         let writer_props = Self::writer_props(tupledesc, options);