apache · jiayuasu · Jun 25, 2025 · Dec 11, 2024 · Feb 28, 2025 · Feb 28, 2025
@@ -44,7 +44,7 @@ jobs:
       - name: Compile JavaDoc
         run: mvn -q clean install -DskipTests && mkdir -p docs/api/javadoc/spark && cp -r spark/common/target/apidocs/* docs/api/javadoc/spark/
       - name: Compile ScalaDoc
-        run: mvn scala:doc && mkdir -p docs/api/scaladoc/spark && cp -r spark/common/target/site/scaladocs/* docs/api/scaladoc/spark
+        run: mvn generate-sources scala:doc && mkdir -p docs/api/scaladoc/spark && cp -r spark/common/target/site/scaladocs/* docs/api/scaladoc/spark
       - uses: actions/setup-python@v5
         with:
           python-version: 3.x

@@ -57,6 +57,9 @@ jobs:
       fail-fast: true
       matrix:
         include:
+          - spark: 4.0.0
+            scala: 2.13.8
+            jdk: '17'
           - spark: 3.5.0
             scala: 2.13.8
             jdk: '8'
@@ -116,6 +119,11 @@ jobs:
               export SPARK_HOME=$(python -c "import pyspark; print(pyspark.__path__[0])")
           fi
 
+          if [ "${SPARK_VERSION}" == "4.0.0" ]; then
+              pip install https://dist.apache.org/repos/dist/dev/spark/v4.0.0-rc4-bin/pyspark-4.0.0.tar.gz pandas shapely apache-sedona pyarrow
+              export SPARK_HOME=$(python -c "import pyspark; print(pyspark.__path__[0])")
+          fi
+
           mvn -q clean install -Dspark=${SPARK_COMPAT_VERSION} -Dscala=${SCALA_VERSION:0:4} -Dspark.version=${SPARK_VERSION} ${SKIP_TESTS}
       - run: mkdir staging
       - run: cp spark-shaded/target/sedona-*.jar staging

diff --git a/common/pom.xml b/common/pom.xml
@@ -106,12 +106,6 @@
             <artifactId>jt-jiffle-language</artifactId>
         </dependency>
         <!-- These test dependencies are for running map algebra tests -->
-        <dependency>
-            <groupId>org.antlr</groupId>
-            <artifactId>antlr4-runtime</artifactId>
-            <version>${antlr-runtime.version}</version>
-            <scope>test</scope>
-        </dependency>
         <dependency>
             <groupId>org.codehaus.janino</groupId>
             <artifactId>janino</artifactId>
@@ -134,6 +128,67 @@
                     <target>8</target>
                 </configuration>
             </plugin>
+            <plugin>
+                <!-- Skip running resolved-pom-maven-plugin since shade will
+                     generate dependency reduced pom which substitutes property
+                     values. resolved-pom-maven-plugin will break pom
+                     installation when working with maven-shade-plugin.  -->
+                <groupId>io.paradoxical</groupId>
+                <artifactId>resolved-pom-maven-plugin</artifactId>
+                <version>1.0</version>
+                <executions>
+                    <execution>
+                        <id>resolve-my-pom</id>
+                        <phase>none</phase>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <!-- We need to shade jiffle and it's antlr dependency because Spark 4 uses an
+                     incompatible version of antlr at runtime. -->
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <artifactSet>
+                                <includes>
+                                    <include>it.geosolutions.jaiext.jiffle:*</include>
+                                    <include>org.antlr:*</include>
+                                </includes>
+                            </artifactSet>
+                            <relocations>
+                                <relocation>
+                                    <pattern>it.geosolutions.jaiext.jiffle</pattern>
+                                    <shadedPattern>org.shaded.jiffle</shadedPattern>
+                                    <excludes>
+                                        <exclude>it.geosolutions.jaiext.jiffle.runtime.*</exclude>
+                                    </excludes>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.antlr.v4.runtime</pattern>
+                                    <shadedPattern>org.shaded.antlr</shadedPattern>
+                                </relocation>
+                            </relocations>
+                            <filters>
+                                <!--  filter to address "Invalid signature file" issue - see http://stackoverflow.com/a/6743609/589215 -->
+                                <filter>
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>META-INF/*.SF</exclude>
+                                        <exclude>META-INF/*.DSA</exclude>
+                                        <exclude>META-INF/*.RSA</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
         </plugins>
     </build>
 </project>
@@ -76,14 +76,14 @@
         <spatial4j.version>0.8</spatial4j.version>
 
         <jt-jiffle.version>1.1.24</jt-jiffle.version>
-        <antlr-runtime.version>4.9.3</antlr-runtime.version>
         <janino-version>3.1.9</janino-version>
 
         <!-- Actual scala, spark and log4j version will be set by activated profiles.
              Setting a default value helps IDE:s that can't make sense of profiles. -->
         <scala.compat.version>2.12</scala.compat.version>
         <spark.version>3.3.0</spark.version>
         <spark.compat.version>3.3</spark.compat.version>
+        <spark.major.version>3</spark.major.version>
         <log4j.version>2.17.2</log4j.version>
         <graphframe.version>0.8.3-spark3.4</graphframe.version>
 
@@ -272,12 +272,19 @@
                 <groupId>it.geosolutions.jaiext.jiffle</groupId>
                 <artifactId>jt-jiffle-language</artifactId>
                 <version>${jt-jiffle.version}</version>
-                <scope>${geotools.scope}</scope>
                 <exclusions>
                     <exclusion>
-                        <groupId>*</groupId>
+                        <groupId>javax.media</groupId>
                         <artifactId>*</artifactId>
                     </exclusion>
+                    <exclusion>
+                        <groupId>org.codehaus.janino</groupId>
+                        <artifactId>janino</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>it.geosolutions.jaiext.utilities</groupId>
+                        <artifactId>jt-utilities</artifactId>
+                    </exclusion>
                 </exclusions>
             </dependency>
            <dependency>
@@ -745,6 +752,35 @@
                 <skip.deploy.common.modules>true</skip.deploy.common.modules>
             </properties>
         </profile>
+        <profile>
+            <id>sedona-spark-4.0</id>
+            <activation>
+                <property>
+                    <name>spark</name>
+                    <value>4.0</value>
+                </property>
+            </activation>
+            <repositories>
+                <repository>
+                    <id>spark-4.0.0-rc4</id>
+                    <name>Spark 4.0.0 RC4 Staging</name>
+                    <url>https://repository.apache.org/content/repositories/orgapachespark-1480/</url>
+                </repository>
+            </repositories>
+            <properties>
+                <spark.version>4.0.0</spark.version>
+                <spark.compat.version>4.0</spark.compat.version>
+                <spark.major.version>4</spark.major.version>
+                <hadoop.version>3.4.1</hadoop.version>
+                <log4j.version>2.24.3</log4j.version>
+                <slf4j.version>2.0.16</slf4j.version>
+                <graphframe.version>0.8.3-spark3.5</graphframe.version>
+                <scala.version>2.13.12</scala.version>
+                <scala.compat.version>2.13</scala.compat.version>
+                <!-- Skip deploying parent module. it will be deployed with sedona-spark-3.3 -->
+                <skip.deploy.common.modules>true</skip.deploy.common.modules>
+            </properties>
+        </profile>
         <profile>
             <id>scala2.13</id>
             <activation>

@@ -272,6 +272,25 @@
 	<build>
         <sourceDirectory>src/main/java</sourceDirectory>
         <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+                <version>3.2.0</version>
+                <executions>
+                    <execution>
+                        <id>add-source</id>
+                        <phase>generate-sources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                        </goals>
+                        <configuration>
+                            <sources>
+                                <source>src/main/scala-spark-${spark.major.version}</source>
+                            </sources>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
             <plugin>
                 <groupId>net.alchim31.maven</groupId>
                 <artifactId>scala-maven-plugin</artifactId>
@@ -294,4 +313,23 @@
             </plugin>
         </plugins>
 	</build>
+    <profiles>
+        <profile>
+            <id>sedona-spark-4.0</id>
+            <activation>
+                <property>
+                    <name>spark</name>
+                    <value>4.0</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.spark</groupId>
+                    <artifactId>spark-sql-api_${scala.compat.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+            </dependencies>
+        </profile>
+    </profiles>
 </project>
@@ -16,57 +16,59 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.spark.sql.sedona_sql.expressions
+package org.apache.spark.sql.sedona_sql
 
 import scala.reflect.ClassTag
 
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Column, DataFrame, SparkSession}
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
-import org.apache.spark.sql.Column
 import org.apache.spark.sql.expressions.UserDefinedAggregateFunction
 import org.apache.spark.sql.execution.aggregate.ScalaUDAF
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.types.StructType
 
-trait DataFrameAPI {
-  protected def wrapExpression[E <: Expression: ClassTag](args: Any*): Column = {
-    val exprArgs = args.map(_ match {
-      case c: Column => c.expr
-      case s: String => Column(s).expr
-      case e: Expression => e
-      case x: Any => Literal(x)
-      case null => Literal(null)
-    })
-    val expressionConstructor =
-      implicitly[ClassTag[E]].runtimeClass.getConstructor(classOf[Seq[Expression]])
-    val expressionInstance = expressionConstructor.newInstance(exprArgs).asInstanceOf[E]
-    Column(expressionInstance)
+object DataFrameShims {
+
+  private[sedona_sql] def wrapExpression[E <: Expression: ClassTag](args: Any*): Column = {
+    wrapVarArgExpression[E](args)
   }
 
-  protected def wrapVarArgExpression[E <: Expression: ClassTag](arg: Seq[Any]): Column = {
+  private[sedona_sql] def wrapVarArgExpression[E <: Expression: ClassTag](arg: Seq[Any]): Column = {
+    val runtimeClass = implicitly[ClassTag[E]].runtimeClass
     val exprArgs = arg.map(_ match {
       case c: Column => c.expr
       case s: String => Column(s).expr
       case e: Expression => e
       case x: Any => Literal(x)
       case null => Literal(null)
     })
-    val expressionConstructor =
-      implicitly[ClassTag[E]].runtimeClass.getConstructor(classOf[Seq[Expression]])
+    val expressionConstructor = runtimeClass.getConstructor(classOf[Seq[Expression]])
     val expressionInstance = expressionConstructor.newInstance(exprArgs).asInstanceOf[E]
     Column(expressionInstance)
   }
 
-  protected def wrapAggregator[A <: UserDefinedAggregateFunction: ClassTag](arg: Any*): Column = {
+  private[sedona_sql] def wrapAggregator[A <: UserDefinedAggregateFunction: ClassTag](arg: Any*): Column = {
+    val runtimeClass = implicitly[ClassTag[A]].runtimeClass
     val exprArgs = arg.map(_ match {
       case c: Column => c.expr
       case s: String => Column(s).expr
       case e: Expression => e
       case x: Any => Literal(x)
       case null => Literal(null)
     })
-    val aggregatorClass = implicitly[ClassTag[A]].runtimeClass
-    val aggregatorConstructor = aggregatorClass.getConstructor()
+    val aggregatorConstructor = runtimeClass.getConstructor()
     val aggregatorInstance =
       aggregatorConstructor.newInstance().asInstanceOf[UserDefinedAggregateFunction]
     val scalaAggregator = ScalaUDAF(exprArgs, aggregatorInstance)
     Column(scalaAggregator)
   }
+
+  private[sedona_sql] def createDataFrame(
+      sparkSession: SparkSession,
+      rdd: RDD[InternalRow],
+      schema: StructType): DataFrame = {
+    sparkSession.internalCreateDataFrame(rdd, schema)
+  }
 }
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.sql.sedona_sql.expressions
+
+import org.apache.spark.sql.catalyst.expressions.NullIntolerant
+
+trait NullIntolerantShim extends NullIntolerant