-
Notifications
You must be signed in to change notification settings - Fork 3.8k
ARROW-13806: [C++][Python] Add support for new MonthDayNano Interval Type #11302
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
108638b
f3f0273
47aa6f7
faae971
299d97b
096797b
67527dc
9845763
51fa76b
f5b2750
a9ca3ed
88a97db
f1a6d15
19d4072
3cac885
b4c4501
c640152
b5aadb1
c2aa56e
0e47a98
3b2db24
5a5c73b
9467051
93108a9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#pragma once | ||
|
||
#include "arrow/array.h" | ||
#include "arrow/python/platform.h" | ||
|
||
namespace arrow { | ||
namespace py { | ||
namespace internal { | ||
// TODO(ARROW-12976): See if we can refactor Pandas ObjectWriter logic | ||
// to the .cc file and move this there as well if we can. | ||
|
||
// Converts array to a sequency of python objects. | ||
template <typename ArrayType, typename WriteValue, typename Assigner> | ||
inline Status WriteArrayObjects(const ArrayType& arr, WriteValue&& write_func, | ||
Assigner out_values) { | ||
// TODO(ARROW-12976): Use visitor here? | ||
const bool has_nulls = arr.null_count() > 0; | ||
for (int64_t i = 0; i < arr.length(); ++i) { | ||
if (has_nulls && arr.IsNull(i)) { | ||
Py_INCREF(Py_None); | ||
*out_values = Py_None; | ||
} else { | ||
RETURN_NOT_OK(write_func(arr.GetView(i), out_values)); | ||
} | ||
++out_values; | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
} // namespace internal | ||
} // namespace py | ||
} // namespace arrow |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,9 +20,12 @@ | |
#include <chrono> | ||
#include <iomanip> | ||
|
||
#include "arrow/array.h" | ||
#include "arrow/python/arrow_to_python_internal.h" | ||
#include "arrow/python/common.h" | ||
#include "arrow/python/helpers.h" | ||
#include "arrow/python/platform.h" | ||
#include "arrow/scalar.h" | ||
#include "arrow/status.h" | ||
#include "arrow/type.h" | ||
#include "arrow/util/logging.h" | ||
|
@@ -71,6 +74,26 @@ bool MatchFixedOffset(const std::string& tz, util::string_view* sign, | |
return iter == (tz.data() + tz.size()); | ||
} | ||
|
||
static PyTypeObject MonthDayNanoTupleType = {}; | ||
|
||
constexpr char* NonConst(const char* st) { | ||
// Hack for python versions < 3.7 where members of PyStruct members | ||
// where non-const (C++ doesn't like assigning string literals to these types) | ||
return const_cast<char*>(st); | ||
Comment on lines
+80
to
+82
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we still support python versions < 3.7? I thought we stopped shipping binary wheels for these versions but maybe we still support building from source. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we do, I asked that we ddidn't drop it last release. I would also ask that we don't drop it this release (i.e. keep it through its full python support cycle. There are a number of consumers of pyarrow that try to keep support for python versions until they dropped it which is end of this year). This was caught because we run it in CI. |
||
} | ||
|
||
static PyStructSequence_Field MonthDayNanoField[] = { | ||
{NonConst("months"), NonConst("The number of months in the interval")}, | ||
{NonConst("days"), NonConst("The number days in the interval")}, | ||
{NonConst("nanoseconds"), NonConst("The number of nanoseconds in the interval")}, | ||
{nullptr, nullptr}}; | ||
|
||
static PyStructSequence_Desc MonthDayNanoTupleDesc = { | ||
NonConst("MonthDayNano"), | ||
NonConst("A calendar interval consisting of months, days and nanoseconds."), | ||
MonthDayNanoField, | ||
/*n_in_sequence=*/3}; | ||
|
||
} // namespace | ||
|
||
PyDateTime_CAPI* datetime_api = nullptr; | ||
|
@@ -270,6 +293,16 @@ static inline Status PyDate_convert_int(int64_t val, const DateUnit unit, int64_ | |
return Status::OK(); | ||
} | ||
|
||
PyObject* NewMonthDayNanoTupleType() { | ||
if (MonthDayNanoTupleType.tp_name == nullptr) { | ||
if (PyStructSequence_InitType2(&MonthDayNanoTupleType, &MonthDayNanoTupleDesc) != 0) { | ||
Py_FatalError("Could not initialize MonthDayNanoTuple"); | ||
} | ||
} | ||
Py_INCREF(&MonthDayNanoTupleType); | ||
return (PyObject*)&MonthDayNanoTupleType; | ||
} | ||
|
||
Status PyTime_from_int(int64_t val, const TimeUnit::type unit, PyObject** out) { | ||
int64_t hour = 0, minute = 0, second = 0, microsecond = 0; | ||
RETURN_NOT_OK(PyTime_convert_int(val, unit, &hour, &minute, &second, µsecond)); | ||
|
@@ -450,6 +483,84 @@ Result<std::string> TzinfoToString(PyObject* tzinfo) { | |
return PyTZInfo_utcoffset_hhmm(tzinfo); | ||
} | ||
|
||
PyObject* MonthDayNanoIntervalToNamedTuple( | ||
const MonthDayNanoIntervalType::MonthDayNanos& interval) { | ||
OwnedRef tuple(PyStructSequence_New(&MonthDayNanoTupleType)); | ||
if (ARROW_PREDICT_FALSE(tuple.obj() == nullptr)) { | ||
return nullptr; | ||
} | ||
PyStructSequence_SetItem(tuple.obj(), /*pos=*/0, PyLong_FromLong(interval.months)); | ||
PyStructSequence_SetItem(tuple.obj(), /*pos=*/1, PyLong_FromLong(interval.days)); | ||
PyStructSequence_SetItem(tuple.obj(), /*pos=*/2, | ||
PyLong_FromLongLong(interval.nanoseconds)); | ||
return tuple.detach(); | ||
} | ||
|
||
namespace { | ||
|
||
// Wrapper around a Python list object that mimics dereference and assignment | ||
// operations. | ||
struct PyListAssigner { | ||
public: | ||
explicit PyListAssigner(PyObject* list) : list_(list) { DCHECK(PyList_Check(list_)); } | ||
|
||
PyListAssigner& operator*() { return *this; } | ||
|
||
void operator=(PyObject* obj) { | ||
if (ARROW_PREDICT_FALSE(PyList_SetItem(list_, current_index_, obj) == -1)) { | ||
Py_FatalError("list did not have the correct preallocated size."); | ||
} | ||
} | ||
|
||
PyListAssigner& operator++() { | ||
current_index_++; | ||
return *this; | ||
} | ||
|
||
PyListAssigner& operator+=(int64_t offset) { | ||
current_index_ += offset; | ||
return *this; | ||
} | ||
|
||
private: | ||
PyObject* list_; | ||
int64_t current_index_ = 0; | ||
}; | ||
|
||
} // namespace | ||
|
||
Result<PyObject*> MonthDayNanoIntervalArrayToPyList( | ||
const MonthDayNanoIntervalArray& array) { | ||
OwnedRef out_list(PyList_New(array.length())); | ||
RETURN_IF_PYERROR(); | ||
PyListAssigner out_objects(out_list.obj()); | ||
auto& interval_array = | ||
arrow::internal::checked_cast<const MonthDayNanoIntervalArray&>(array); | ||
RETURN_NOT_OK(internal::WriteArrayObjects( | ||
interval_array, | ||
[&](const MonthDayNanoIntervalType::MonthDayNanos& interval, PyListAssigner& out) { | ||
PyObject* tuple = internal::MonthDayNanoIntervalToNamedTuple(interval); | ||
if (ARROW_PREDICT_FALSE(tuple == nullptr)) { | ||
RETURN_IF_PYERROR(); | ||
} | ||
|
||
*out = tuple; | ||
return Status::OK(); | ||
}, | ||
out_objects)); | ||
return out_list.detach(); | ||
} | ||
|
||
Result<PyObject*> MonthDayNanoIntervalScalarToPyObject( | ||
const MonthDayNanoIntervalScalar& scalar) { | ||
if (scalar.is_valid) { | ||
return internal::MonthDayNanoIntervalToNamedTuple(scalar.value); | ||
} else { | ||
Py_INCREF(Py_None); | ||
return Py_None; | ||
} | ||
} | ||
|
||
} // namespace internal | ||
} // namespace py | ||
} // namespace arrow |
Uh oh!
There was an error while loading. Please reload this page.