Spark (starting with version 2.3) ships with a Dockerfile that can be used
to build the Spark Docker Image , or customized to match an individual application’s needs. It can be found in the kubernetes/dockerfiles/ directory.

Spark(自2.3版本开始),在“kubernetes/dockerfiles/”目录中,附带了构造Spark 镜像的Dockerfile。

Download Apache Spark (下载)

##
#  http://spark.apache.org/downloads.html
#  mkdir 创建目录
#  wget 下载
#  tar -xvf 解压

[root@ec-k8s-m1 ~]# mkdir spark
[root@ec-k8s-m1 ~]# cd /home/e.c.com/spark/
[root@ec-k8s-m1 ~]# wget http://mirrors.shu.edu.cn/apache/spark/spark-2.3.2/spark-2.3.2-bin-hadoop2.7.tgz

[root@ec-k8s-m1 spark]# tar -xvf spark-2.3.2-bin-hadoop2.7.tgz

[root@ec-k8s-m1 spark]# ls
spark-2.3.2-bin-hadoop2.7  spark-2.3.2-bin-hadoop2.7.tgz

Spark Dockerfile

##
#
[root@ec-k8s-m1 spark]# cd spark-2.3.2-bin-hadoop2.7/

[root@ec-k8s-m1 spark-2.3.2-bin-hadoop2.7]# ll kubernetes/dockerfiles/spark/
total 8
-rw-rw-r-- 1 e.c.com e.c.com 1934 Oct 30 16:19 Dockerfile
-rwxrwxr-x 1 e.c.com e.c.com 2878 Sep 16 20:00 entrypoint.sh
## Dockerfile 文本
#
[root@ec-k8s-m1 spark-2.3.2-bin-hadoop2.7]# vim kubernetes/dockerfiles/spark/Dockerfile
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# 基础镜像 openjdk(java 8) with Alpine 3.8
FROM openjdk:8-alpine

ARG spark_jars=jars
ARG img_path=kubernetes/dockerfiles

# Before building the docker image, first build and make a Spark distribution following
# the instructions in http://spark.apache.org/docs/latest/building-spark.html.
# If this docker file is being used in the context of building your images from a Spark
# distribution, the docker build command should be invoked from the top level directory
# of the Spark distribution. E.g.:
# 请在 spark-2.3.2-bin-hadoop2.7 的顶层目录,使用 docker build 命令,如下:
# docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile .

# RUN 执行命令行命令
RUN set -ex && \
    apk upgrade --no-cache && \
    apk add --no-cache bash tini libc6-compat && \
    mkdir -p /opt/spark && \
    mkdir -p /opt/spark/work-dir \
    touch /opt/spark/RELEASE && \
    rm /bin/sh && \
    ln -sv /bin/bash /bin/sh && \
    chgrp root /etc/passwd && chmod ug+rw /etc/passwd

# COPY 复制文件 到 镜像
COPY ${spark_jars} /opt/spark/jars
COPY bin /opt/spark/bin
COPY sbin /opt/spark/sbin
COPY conf /opt/spark/conf
COPY ${img_path}/spark/entrypoint.sh /opt/
COPY examples /opt/spark/examples
COPY data /opt/spark/data

# ENV 设置环境变量
ENV SPARK_HOME /opt/spark

# WORKDIR 指定工作目录,以后各层的当前目录就被改为指定的目录,如该目录不存在, 可以帮你建立目录。
WORKDIR /opt/spark/work-dir

ENTRYPOINT [ "/opt/entrypoint.sh" ]

## entrypoint.sh bash 脚本
#
[root@ec-k8s-m1 spark-2.3.2-bin-hadoop2.7]# vim kubernetes/dockerfiles/spark/entrypoint.sh
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# echo commands to the terminal output
# -e 若指令传回值不等于0,则立即退出shell
# -x 执行指令后,会先显示该指令及所下的参数
set -ex

# Check whether there is a passwd entry for the container UID
myuid=$(id -u)
mygid=$(id -g)

# Get entries from administrative database.
uidentry=$(getent passwd $myuid)

# If there is no passwd entry for the container UID, attempt to create one
# if  [ -z $string  ],如果string 为空。
if [ -z "$uidentry" ] ; then
    if [ -w /etc/passwd ] ; then
        echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd
    else
        echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
    fi
fi

SPARK_K8S_CMD="$1"
if [ -z "$SPARK_K8S_CMD" ]; then
  echo "No command to execute has been provided." 1>&2
  exit 1
fi
shift 1

SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
env | grep SPARK_JAVA_OPT_ | sort -t_ -k4 -n | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt
readarray -t SPARK_JAVA_OPTS < /tmp/java_opts.txt
# if  [ -n $string  ],如果string 非空
if [ -n "$SPARK_MOUNTED_CLASSPATH" ]; then
  SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_MOUNTED_CLASSPATH"
fi
if [ -n "$SPARK_MOUNTED_FILES_DIR" ]; then
  cp -R "$SPARK_MOUNTED_FILES_DIR/." .
fi

case "$SPARK_K8S_CMD" in
  driver)
    CMD=(
      ${JAVA_HOME}/bin/java
      "${SPARK_JAVA_OPTS[@]}"
      -cp "$SPARK_CLASSPATH"
      -Xms$SPARK_DRIVER_MEMORY
      -Xmx$SPARK_DRIVER_MEMORY
      -Dspark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS
      $SPARK_DRIVER_CLASS
      $SPARK_DRIVER_ARGS
    )
    ;;

  executor)
    CMD=(
      ${JAVA_HOME}/bin/java
      "${SPARK_JAVA_OPTS[@]}"
      -Xms$SPARK_EXECUTOR_MEMORY
      -Xmx$SPARK_EXECUTOR_MEMORY
      -cp "$SPARK_CLASSPATH"
      org.apache.spark.executor.CoarseGrainedExecutorBackend
      --driver-url $SPARK_DRIVER_URL
      --executor-id $SPARK_EXECUTOR_ID
      --cores $SPARK_EXECUTOR_CORES
      --app-id $SPARK_APPLICATION_ID
      --hostname $SPARK_EXECUTOR_POD_IP
    )
    ;;

  init)
    CMD=(
      "$SPARK_HOME/bin/spark-class"
      "org.apache.spark.deploy.k8s.SparkPodInitContainer"
      "$@"
    )
    ;;

  *)
    echo "Unknown command: $SPARK_K8S_CMD" 1>&2
    exit 1
esac

# Execute the container CMD under tini for better hygiene
exec /sbin/tini -s -- "${CMD[@]}"

Building Image (构建)

##
#
[root@ec-k8s-m1 spark-2.3.2-bin-hadoop2.7]# docker build -t spark:v2.3.2 -f kubernetes/dockerfiles/spark/Dockerfile .

Sending build context to Docker daemon  256.6MB
Step 1/14 : FROM openjdk:8-alpine
 ---> 97bc1352afde
Step 2/14 : ARG spark_jars=jars
 ---> Using cache
 ---> c5766899e12f
Step 3/14 : ARG img_path=kubernetes/dockerfiles
 ---> Using cache
 ---> 205adcda7d41
Step 4/14 : RUN set -ex &&     apk upgrade --no-cache &&     apk add --no-cache bash tini libc6-compat &&     mkdir -p /opt/spark &&     mkdir -p /opt/spark/work-dir     touch /opt/spark/RELEASE &&     rm /bin/sh &&     ln -sv /bin/bash /bin/sh &&     chgrp root /etc/passwd && chmod ug+rw /etc/passwd
 ---> Using cache
 ---> d87294bae12a
Step 5/14 : COPY ${spark_jars} /opt/spark/jars
 ---> e6dd6c6a1004
Step 6/14 : COPY bin /opt/spark/bin
 ---> 5f922e95894d
Step 7/14 : COPY sbin /opt/spark/sbin
 ---> 219596902819
Step 8/14 : COPY conf /opt/spark/conf
 ---> 49c77795c810
Step 9/14 : COPY ${img_path}/spark/entrypoint.sh /opt/
 ---> 1ba1d2efd9cf
Step 10/14 : COPY examples /opt/spark/examples
 ---> c90b67aeb802
Step 11/14 : COPY data /opt/spark/data
 ---> 331bf7b79de2
Step 12/14 : ENV SPARK_HOME /opt/spark
 ---> Running in fd2b10d3dfd1
 ---> 5c35ef59d554
Removing intermediate container fd2b10d3dfd1
Step 13/14 : WORKDIR /opt/spark/work-dir
 ---> ea7ca87df71d
Removing intermediate container 8dd16a87c4d1
Step 14/14 : ENTRYPOINT /opt/entrypoint.sh
 ---> Running in c9780b266a1b
 ---> 9c672c188d7e
Removing intermediate container c9780b266a1b
Successfully built 9c672c188d7e
Successfully tagged spark:v2.3.2
[root@ec-k8s-m1 spark-2.3.2-bin-hadoop2.7]# docker images
REPOSITORY   TAG           IMAGE ID         CREATED              SIZE
spark        v2.3.2        9c672c188d7e     About a minute ago   346MB
.....

参考资料:
http://spark.apache.org/docs/latest/
http://spark.apache.org/docs/latest/running-on-kubernetes.html#docker-images
https://docs.docker.com/engine/reference/commandline/docker/#child-commands
https://docs.docker.com/engine/reference/builder/