CrunchyData
diff --git a/‎README.md‎
Lines changed: 3 additions & 2 deletions b/‎README.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/arrow_parquet/parquet_writer.rs‎
Lines changed: 31 additions & 22 deletions b/‎src/arrow_parquet/parquet_writer.rs‎
Lines changed: 31 additions & 22 deletions
diff --git a/‎src/arrow_parquet/uri_utils.rs‎
Lines changed: 27 additions & 2 deletions b/‎src/arrow_parquet/uri_utils.rs‎
Lines changed: 27 additions & 2 deletions
diff --git a/‎src/lib.rs‎
Lines changed: 1 addition & 1 deletion b/‎src/lib.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/object_store/local_file.rs‎
Lines changed: 7 additions & 0 deletions b/‎src/object_store/local_file.rs‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/parquet_copy_hook.rs‎
Lines changed: 1 addition & 0 deletions b/‎src/parquet_copy_hook.rs‎
Lines changed: 1 addition & 0 deletions
@@ -273,8 +273,9 @@ Supported authorization methods' priority order is shown below:
 ## Copy Options
 `pg_parquet` supports the following options in the `COPY TO` command:
 - `format parquet`: you need to specify this option to read or write Parquet files which does not end with `.parquet[.<compression>]` extension,
-- `row_group_size <int>`: the number of rows in each row group while writing Parquet files. The default row group size is `122880`,
-- `row_group_size_bytes <int>`: the total byte size of rows in each row group while writing Parquet files. The default row group size bytes is `row_group_size * 1024`,
+- `file_size_bytes <string>`: the total file size per Parquet file. When set, the parquet files, with target size, are created under parent directory (named the same as file name). By default, when not specified, a single file is generated without creating a parent folder. You can specify total bytes without unit like `file_size_bytes 2000000` or with unit (KB, MB, or GB) like `file_size_bytes '1MB'`,
+- `row_group_size <int64>`: the number of rows in each row group while writing Parquet files. The default row group size is `122880`,
+- `row_group_size_bytes <int64>`: the total byte size of rows in each row group while writing Parquet files. The default row group size bytes is `row_group_size * 1024`,
 - `compression <string>`: the compression format to use while writing Parquet files. The supported compression formats are `uncompressed`, `snappy`, `gzip`, `brotli`, `lz4`, `lz4raw` and `zstd`. The default compression format is `snappy`. If not specified, the compression format is determined by the file extension,
 - `compression_level <int>`: the compression level to use while writing Parquet files. The supported compression levels are only supported for `gzip`, `zstd` and `brotli` compression formats. The default compression level is `6` for `gzip (0-10)`, `1` for `zstd (1-22)` and `1` for `brotli (0-11)`.
 
 
@@ -11,13 +11,14 @@ use pgrx::{heap_tuple::PgHeapTuple, AllocatedByRust, PgTupleDesc};
 
 use crate::{
     arrow_parquet::{
-        compression::{PgParquetCompression, PgParquetCompressionWithLevel},
+        compression::PgParquetCompressionWithLevel,
         pg_to_arrow::context::collect_pg_to_arrow_attribute_contexts,
         schema_parser::{
             parquet_schema_string_from_attributes, parse_arrow_schema_from_attributes,
         },
         uri_utils::parquet_writer_from_uri,
     },
+    parquet_copy_hook::copy_to_split_dest_receiver::CopyToParquetOptions,
     pgrx_utils::{collect_attributes_for, CollectAttributesFor},
     type_compat::{
         geometry::{geoparquet_metadata_json_from_tupledesc, reset_postgis_context},
@@ -38,13 +39,13 @@ pub(crate) struct ParquetWriterContext {
     parquet_writer: AsyncArrowWriter<ParquetObjectWriter>,
     schema: SchemaRef,
     attribute_contexts: Vec<PgToArrowAttributeContext>,
+    options: CopyToParquetOptions,
 }
 
 impl ParquetWriterContext {
     pub(crate) fn new(
         uri_info: ParsedUriInfo,
-        compression: PgParquetCompression,
-        compression_level: i32,
+        options: CopyToParquetOptions,
         tupledesc: &PgTupleDesc,
     ) -> ParquetWriterContext {
         // Postgis and Map contexts are used throughout writing the parquet file.
@@ -62,7 +63,7 @@ impl ParquetWriterContext {
         let schema = parse_arrow_schema_from_attributes(&attributes);
         let schema = Arc::new(schema);
 
-        let writer_props = Self::writer_props(tupledesc, compression, compression_level);
+        let writer_props = Self::writer_props(tupledesc, options);
 
         let parquet_writer = parquet_writer_from_uri(uri_info, schema.clone(), writer_props);
 
@@ -73,22 +74,20 @@ impl ParquetWriterContext {
             parquet_writer,
             schema,
             attribute_contexts,
+            options,
         }
     }
 
-    fn writer_props(
-        tupledesc: &PgTupleDesc,
-        compression: PgParquetCompression,
-        compression_level: i32,
-    ) -> WriterProperties {
+    fn writer_props(tupledesc: &PgTupleDesc, options: CopyToParquetOptions) -> WriterProperties {
         let compression = PgParquetCompressionWithLevel {
-            compression,
-            compression_level,
+            compression: options.compression,
+            compression_level: options.compression_level,
         };
 
         let mut writer_props_builder = WriterProperties::builder()
             .set_statistics_enabled(EnabledStatistics::Page)
             .set_compression(compression.into())
+            .set_max_row_group_size(options.row_group_size as usize)
             .set_created_by("pg_parquet".to_string());
 
         let geometry_columns_metadata_value = geoparquet_metadata_json_from_tupledesc(tupledesc);
@@ -103,10 +102,9 @@ impl ParquetWriterContext {
         writer_props_builder.build()
     }
 
-    pub(crate) fn write_new_row_group(
-        &mut self,
-        tuples: Vec<Option<PgHeapTuple<AllocatedByRust>>>,
-    ) {
+    // write_tuples writes the tuples to the parquet file. It flushes the in progress rows to a new row group
+    // if the row group size is reached.
+    pub(crate) fn write_tuples(&mut self, tuples: Vec<Option<PgHeapTuple<AllocatedByRust>>>) {
         let record_batch =
             Self::pg_tuples_to_record_batch(tuples, &self.attribute_contexts, self.schema.clone());
 
@@ -116,9 +114,24 @@ impl ParquetWriterContext {
             .block_on(parquet_writer.write(&record_batch))
             .unwrap_or_else(|e| panic!("failed to write record batch: {}", e));
 
+        if parquet_writer.in_progress_rows() >= self.options.row_group_size as _
+            || parquet_writer.in_progress_size() >= self.options.row_group_size_bytes as _
+        {
+            PG_BACKEND_TOKIO_RUNTIME
+                .block_on(parquet_writer.flush())
+                .unwrap_or_else(|e| panic!("failed to flush record batch: {}", e));
+        }
+    }
+
+    // finalize flushes the in progress rows to a new row group and finally writes metadata to the file.
+    fn finalize(&mut self) {
         PG_BACKEND_TOKIO_RUNTIME
-            .block_on(parquet_writer.flush())
-            .unwrap_or_else(|e| panic!("failed to flush record batch: {}", e));
+            .block_on(self.parquet_writer.finish())
+            .unwrap_or_else(|e| panic!("failed to finish parquet writer: {}", e));
+    }
+
+    pub(crate) fn bytes_written(&self) -> usize {
+        self.parquet_writer.bytes_written()
     }
 
     fn pg_tuples_to_record_batch(
@@ -140,10 +153,6 @@ impl ParquetWriterContext {
 
 impl Drop for ParquetWriterContext {
     fn drop(&mut self) {
-        PG_BACKEND_TOKIO_RUNTIME
-            .block_on(self.parquet_writer.finish())
-            .unwrap_or_else(|e| {
-                panic!("failed to close parquet writer: {}", e);
-            });
+        self.finalize();
     }
 }
@@ -18,7 +18,6 @@ use pgrx::{
 use url::Url;
 
 use crate::{
-    arrow_parquet::parquet_writer::DEFAULT_ROW_GROUP_SIZE,
     object_store::{
         aws::parse_s3_bucket, azure::parse_azure_blob_container, http::parse_http_base_uri,
         object_store_cache::get_or_create_object_store,
@@ -143,6 +142,9 @@ pub(crate) fn parquet_metadata_from_uri(uri_info: ParsedUriInfo) -> Arc<ParquetM
     })
 }
 
+// default # of records per batch during arrow-parquet conversions (RecordBatch api)
+pub(crate) const RECORD_BATCH_SIZE: i64 = 1024;
+
 pub(crate) fn parquet_reader_from_uri(
     uri_info: ParsedUriInfo,
 ) -> ParquetRecordBatchStream<ParquetObjectReader> {
@@ -169,13 +171,36 @@ pub(crate) fn parquet_reader_from_uri(
 
         pgrx::debug2!("Converted arrow schema is: {}", builder.schema());
 
+        let batch_size = calculate_reader_batch_size(builder.metadata());
+
         builder
-            .with_batch_size(DEFAULT_ROW_GROUP_SIZE as usize)
+            .with_batch_size(batch_size)
             .build()
             .unwrap_or_else(|e| panic!("{}", e))
     })
 }
 
+fn calculate_reader_batch_size(metadata: &Arc<ParquetMetaData>) -> usize {
+    const MAX_ARROW_ARRAY_SIZE: i64 = i32::MAX as _;
+
+    for row_group in metadata.row_groups() {
+        for column in row_group.columns() {
+            // try our best to get the size of the column
+            let column_size = column
+                .unencoded_byte_array_data_bytes()
+                .unwrap_or(column.uncompressed_size());
+
+            if column_size > MAX_ARROW_ARRAY_SIZE {
+                // to prevent decoding large arrays into memory, process one row at a time
+                return 1;
+            }
+        }
+    }
+
+    // default batch size
+    RECORD_BATCH_SIZE as _
+}
+
 pub(crate) fn parquet_writer_from_uri(
     uri_info: ParsedUriInfo,
     arrow_schema: SchemaRef,
 
@@ -18,7 +18,7 @@ mod type_compat;
 #[allow(unused_imports)]
 pub use crate::arrow_parquet::compression::PgParquetCompression;
 #[allow(unused_imports)]
-pub use crate::parquet_copy_hook::copy_to_dest_receiver::create_copy_to_parquet_dest_receiver;
+pub use crate::parquet_copy_hook::copy_to_split_dest_receiver::create_copy_to_parquet_split_dest_receiver;
 
 pgrx::pg_module_magic!();
 
 
@@ -13,6 +13,13 @@ pub(crate) fn create_local_file_object_store(
     let path = uri_as_string(uri);
 
     if !copy_from {
+        // create parent folder if it doesn't exist
+        let parent = std::path::Path::new(&path)
+            .parent()
+            .unwrap_or_else(|| panic!("invalid parent for path: {}", path));
+
+        std::fs::create_dir_all(parent).unwrap_or_else(|e| panic!("{}", e));
+
         // create or overwrite the local file
         std::fs::OpenOptions::new()
             .write(true)
 
@@ -1,6 +1,7 @@
 pub(crate) mod copy_from;
 pub(crate) mod copy_to;
 pub(crate) mod copy_to_dest_receiver;
+pub(crate) mod copy_to_split_dest_receiver;
 pub(crate) mod copy_utils;
 pub(crate) mod hook;
 pub(crate) mod pg_compat;