Skip to content

Commit 520c8e8

Browse files
authored
[Spark] Add DeltaTable.addFeatureSupport API to PySpark (#3786)
#### Which Delta project/connector is this regarding? - [X] Spark - [ ] Standalone - [ ] Flink - [ ] Kernel - [ ] Other (fill in here) ## Description This PR introduces a `DeltaTable.addFeatureSupport` API which was missing in PySpark. This API is used to add support of a table feature to a given Delta table. ## How was this patch tested? A new test is added. ## Does this PR introduce _any_ user-facing changes? Yes. See the above `Description` section.
1 parent cb352c2 commit 520c8e8

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed

python/delta/tables.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,19 @@ def upgradeTableProtocol(self, readerVersion: int, writerVersion: int) -> None:
581581
type(writerVersion))
582582
jdt.upgradeTableProtocol(readerVersion, writerVersion)
583583

584+
@since(3.3) # type: ignore[arg-type]
585+
def addFeatureSupport(self, featureName: str) -> None:
586+
"""
587+
Modify the protocol to add a supported feature, and if the table does not support table
588+
features, upgrade the protocol automatically. In such a case when the provided feature is
589+
writer-only, the table's writer version will be upgraded to `7`, and when the provided
590+
feature is reader-writer, both reader and writer versions will be upgraded, to `(3, 7)`.
591+
592+
See online documentation and Delta's protocol specification at PROTOCOL.md for more details.
593+
"""
594+
DeltaTable._verify_type_str(featureName, "featureName")
595+
self._jdt.addFeatureSupport(featureName)
596+
584597
@since(1.2) # type: ignore[arg-type]
585598
def restoreToVersion(self, version: int) -> DataFrame:
586599
"""

python/delta/tests/test_deltatable.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from multiprocessing.pool import ThreadPool
2424
from typing import List, Set, Dict, Optional, Any, Callable, Union, Tuple
2525

26+
from py4j.protocol import Py4JJavaError
2627
from pyspark.errors.exceptions.base import UnsupportedOperationException
2728
from pyspark.sql import DataFrame, Row
2829
from pyspark.sql.functions import col, lit, expr, floor
@@ -1187,6 +1188,42 @@ def test_protocolUpgrade(self) -> None:
11871188
with self.assertRaisesRegex(ValueError, "writerVersion"):
11881189
dt.upgradeTableProtocol(1, {}) # type: ignore[arg-type]
11891190

1191+
def test_addFeatureSupport(self) -> None:
1192+
try:
1193+
self.spark.conf.set('spark.databricks.delta.minReaderVersion', '1')
1194+
self.spark.conf.set('spark.databricks.delta.minWriterVersion', '2')
1195+
self.__writeDeltaTable([('a', 1), ('b', 2), ('c', 3), ('d', 4)])
1196+
dt = DeltaTable.forPath(self.spark, self.tempFile)
1197+
finally:
1198+
self.spark.conf.unset('spark.databricks.delta.minReaderVersion')
1199+
self.spark.conf.unset('spark.databricks.delta.minWriterVersion')
1200+
1201+
# bad args
1202+
with self.assertRaisesRegex(Py4JJavaError, "DELTA_UNSUPPORTED_FEATURES_IN_CONFIG"):
1203+
dt.addFeatureSupport("abc")
1204+
with self.assertRaisesRegex(ValueError, "featureName needs to be a string"):
1205+
dt.addFeatureSupport(12345) # type: ignore[arg-type]
1206+
with self.assertRaisesRegex(ValueError, "featureName needs to be a string"):
1207+
dt.addFeatureSupport([12345]) # type: ignore[arg-type]
1208+
with self.assertRaisesRegex(ValueError, "featureName needs to be a string"):
1209+
dt.addFeatureSupport({}) # type: ignore[arg-type]
1210+
with self.assertRaisesRegex(ValueError, "featureName needs to be a string"):
1211+
dt.addFeatureSupport([]) # type: ignore[arg-type]
1212+
1213+
# good args
1214+
dt.addFeatureSupport("appendOnly")
1215+
dt_details = dt.detail().collect()[0].asDict()
1216+
self.assertTrue(dt_details["minReaderVersion"] == 1, "The upgrade should be a no-op")
1217+
self.assertTrue(dt_details["minWriterVersion"] == 2, "The upgrade should be a no-op")
1218+
self.assertEqual(sorted(dt_details["tableFeatures"]), ["appendOnly", "invariants"])
1219+
1220+
dt.addFeatureSupport("deletionVectors")
1221+
dt_details = dt.detail().collect()[0].asDict()
1222+
self.assertTrue(dt_details["minReaderVersion"] == 3, "DV requires reader version 3")
1223+
self.assertTrue(dt_details["minWriterVersion"] == 7, "DV requires writer version 7")
1224+
self.assertEqual(sorted(dt_details["tableFeatures"]),
1225+
["appendOnly", "deletionVectors", "invariants"])
1226+
11901227
def test_restore_to_version(self) -> None:
11911228
self.__writeDeltaTable([('a', 1), ('b', 2)])
11921229
self.__overwriteDeltaTable([('a', 3), ('b', 2)],

0 commit comments

Comments
 (0)