Skip to content

Commit bf33945

Browse files
authored
Do not bloat spark image with ENV variables (#2081)
* Do not bloat spark image with ENV variables * Remove HadoopVersionTagger
1 parent d57bf95 commit bf33945

File tree

3 files changed

+6
-27
lines changed

3 files changed

+6
-27
lines changed

images/pyspark-notebook/Dockerfile

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,20 +34,18 @@ ARG scala_version
3434
# But it seems to be slower, that's why we use the recommended site for download
3535
ARG spark_download_url="https://dlcdn.apache.org/spark/"
3636

37-
# Configure Spark
38-
ENV SPARK_VERSION="${spark_version}" \
39-
HADOOP_VERSION="${hadoop_version}" \
40-
SCALA_VERSION="${scala_version}" \
41-
SPARK_DOWNLOAD_URL="${spark_download_url}"
42-
4337
ENV SPARK_HOME=/usr/local/spark
4438
ENV PATH="${PATH}:${SPARK_HOME}/bin"
4539
ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info"
4640

4741
COPY setup_spark.py /opt/setup-scripts/
4842

4943
# Setup Spark
50-
RUN /opt/setup-scripts/setup_spark.py
44+
RUN SPARK_VERSION="${spark_version}" \
45+
HADOOP_VERSION="${hadoop_version}" \
46+
SCALA_VERSION="${scala_version}" \
47+
SPARK_DOWNLOAD_URL="${spark_download_url}" \
48+
/opt/setup-scripts/setup_spark.py
5149

5250
# Configure IPython system-wide
5351
COPY ipython_kernel_config.py "/etc/ipython/"

tagging/images_hierarchy.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
)
1414
from tagging.taggers import (
1515
DateTagger,
16-
HadoopVersionTagger,
1716
JavaVersionTagger,
1817
JuliaVersionTagger,
1918
JupyterHubVersionTagger,
@@ -83,7 +82,7 @@ class ImageDescription:
8382
),
8483
"pyspark-notebook": ImageDescription(
8584
parent_image="scipy-notebook",
86-
taggers=[SparkVersionTagger(), HadoopVersionTagger(), JavaVersionTagger()],
85+
taggers=[SparkVersionTagger(), JavaVersionTagger()],
8786
manifests=[SparkInfoManifest()],
8887
),
8988
"all-spark-notebook": ImageDescription(

tagging/taggers.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,6 @@ def _get_program_version(container: Container, program: str) -> str:
1212
return DockerRunner.run_simple_command(container, cmd=f"{program} --version")
1313

1414

15-
def _get_env_variable(container: Container, variable: str) -> str:
16-
env = DockerRunner.run_simple_command(
17-
container,
18-
cmd="env",
19-
print_result=False,
20-
).split()
21-
for env_entry in env:
22-
if env_entry.startswith(variable):
23-
return env_entry[len(variable) + 1 :]
24-
raise KeyError(variable)
25-
26-
2715
def _get_pip_package_version(container: Container, package: str) -> str:
2816
PIP_VERSION_PREFIX = "Version: "
2917

@@ -136,12 +124,6 @@ def tag_value(container: Container) -> str:
136124
return "spark-" + version_line.split(" ")[-1]
137125

138126

139-
class HadoopVersionTagger(TaggerInterface):
140-
@staticmethod
141-
def tag_value(container: Container) -> str:
142-
return "hadoop-" + _get_env_variable(container, "HADOOP_VERSION")
143-
144-
145127
class JavaVersionTagger(TaggerInterface):
146128
@staticmethod
147129
def tag_value(container: Container) -> str:

0 commit comments

Comments
 (0)