|
1 | | -use ::parquet::file::statistics::Statistics; |
2 | 1 | use pgrx::{iter::TableIterator, name, pg_extern, pg_schema}; |
3 | 2 |
|
4 | | -use crate::arrow_parquet::uri_utils::{ |
5 | | - ensure_access_privilege_to_uri, parquet_metadata_from_uri, uri_as_string, ParsedUriInfo, |
| 3 | +use crate::{ |
| 4 | + arrow_parquet::uri_utils::{ |
| 5 | + ensure_access_privilege_to_uri, parquet_metadata_from_uri, uri_as_string, ParsedUriInfo, |
| 6 | + }, |
| 7 | + parquet_udfs::stats::{stats_max_value_to_pg_str, stats_min_value_to_pg_str}, |
6 | 8 | }; |
7 | 9 |
|
8 | 10 | #[pg_schema] |
@@ -69,10 +71,12 @@ mod parquet { |
69 | 71 | let mut stats_null_count = None; |
70 | 72 | let mut stats_distinct_count = None; |
71 | 73 |
|
| 74 | + let column_descriptor = column.column_descr(); |
| 75 | + |
72 | 76 | if let Some(statistics) = column.statistics() { |
73 | | - stats_min = stats_min_value_to_str(statistics); |
| 77 | + stats_min = stats_min_value_to_pg_str(statistics, column_descriptor); |
74 | 78 |
|
75 | | - stats_max = stats_max_value_to_str(statistics); |
| 79 | + stats_max = stats_max_value_to_pg_str(statistics, column_descriptor); |
76 | 80 |
|
77 | 81 | stats_null_count = statistics.null_count_opt().map(|v| v as i64); |
78 | 82 |
|
@@ -215,45 +219,3 @@ mod parquet { |
215 | 219 | TableIterator::new(rows) |
216 | 220 | } |
217 | 221 | } |
218 | | - |
219 | | -fn stats_min_value_to_str(statistics: &Statistics) -> Option<String> { |
220 | | - match &statistics { |
221 | | - Statistics::Boolean(val_stats) => val_stats.min_opt().map(|v| v.to_string()), |
222 | | - Statistics::Int32(val_stats) => val_stats.min_opt().map(|v| v.to_string()), |
223 | | - Statistics::Int64(val_stats) => val_stats.min_opt().map(|v| v.to_string()), |
224 | | - Statistics::Int96(val_stats) => val_stats.min_opt().map(|v| v.to_string()), |
225 | | - Statistics::Float(val_stats) => val_stats.min_opt().map(|v| v.to_string()), |
226 | | - Statistics::Double(val_stats) => val_stats.min_opt().map(|v| v.to_string()), |
227 | | - Statistics::ByteArray(val_stats) => val_stats.min_opt().map(|v| match v.as_utf8() { |
228 | | - Ok(v) => v.to_string(), |
229 | | - Err(_) => v.to_string(), |
230 | | - }), |
231 | | - Statistics::FixedLenByteArray(val_stats) => { |
232 | | - val_stats.min_opt().map(|v| match v.as_utf8() { |
233 | | - Ok(v) => v.to_string(), |
234 | | - Err(_) => v.to_string(), |
235 | | - }) |
236 | | - } |
237 | | - } |
238 | | -} |
239 | | - |
240 | | -fn stats_max_value_to_str(statistics: &Statistics) -> Option<String> { |
241 | | - match statistics { |
242 | | - Statistics::Boolean(statistics) => statistics.max_opt().map(|v| v.to_string()), |
243 | | - Statistics::Int32(statistics) => statistics.max_opt().map(|v| v.to_string()), |
244 | | - Statistics::Int64(statistics) => statistics.max_opt().map(|v| v.to_string()), |
245 | | - Statistics::Int96(statistics) => statistics.max_opt().map(|v| v.to_string()), |
246 | | - Statistics::Float(statistics) => statistics.max_opt().map(|v| v.to_string()), |
247 | | - Statistics::Double(statistics) => statistics.max_opt().map(|v| v.to_string()), |
248 | | - Statistics::ByteArray(statistics) => statistics.max_opt().map(|v| match v.as_utf8() { |
249 | | - Ok(v) => v.to_string(), |
250 | | - Err(_) => v.to_string(), |
251 | | - }), |
252 | | - Statistics::FixedLenByteArray(statistics) => { |
253 | | - statistics.max_opt().map(|v| match v.as_utf8() { |
254 | | - Ok(v) => v.to_string(), |
255 | | - Err(_) => v.to_string(), |
256 | | - }) |
257 | | - } |
258 | | - } |
259 | | -} |
0 commit comments