Skip to content

Commit 9c65abd

Browse files
committed
Initial commit
0 parents  commit 9c65abd

File tree

8 files changed

+147
-0
lines changed

8 files changed

+147
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.tsv
2+
*.csv

README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
2+
# spark
3+
4+
A `progrium/busybox` based [Spark](http://spark.apache.org) container. Use it in a standalone cluster with the accompanying `docker-compose.yml`, or as a base for more complex recipes.
5+
6+
## example
7+
8+
To create a standalone cluster with [docker-compose](http://docs.docker.com/compose):
9+
10+
docker-compose up
11+
12+
The SparkUI will be running at `http://${YOUR_DOCKER_HOST}:8080` with one worker listed. To run `spark-shell`, exec into a container:
13+
14+
docker exec -it dockerspark_master_1 /bin/bash
15+
/usr/spark/bin/spark-shell --master spark://master:7077
16+
17+
## license
18+
19+
MIT

conf/master/spark-defaults.conf

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Default system properties included when running spark-submit.
2+
# This is useful for setting default environmental settings.
3+
4+
spark.driver.port 7001
5+
spark.fileserver.port 7002
6+
spark.broadcast.port 7003
7+
spark.replClassServer.port 7004
8+
spark.blockManager.port 7005
9+
spark.executor.port 7006
10+
11+
spark.broadcast.factory=org.apache.spark.broadcast.HttpBroadcastFactory
12+
spark.port.maxRetries 4

conf/worker/spark-defaults.conf

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Default system properties included when running spark-submit.
2+
# This is useful for setting default environmental settings.
3+
4+
#spark.driver.port 7101
5+
spark.fileserver.port 7012
6+
spark.broadcast.port 7013
7+
spark.replClassServer.port 7014
8+
spark.blockManager.port 7015
9+
spark.executor.port 7016
10+
11+
spark.broadcast.factory=org.apache.spark.broadcast.HttpBroadcastFactory
12+
spark.port.maxRetries 4

data/.gitkeep

Whitespace-only changes.

docker-compose.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
master:
2+
image: gettyimages/spark:1.3.1-hadoop-2.4
3+
command: /usr/spark/bin/spark-class org.apache.spark.deploy.master.Master
4+
hostname: master
5+
environment:
6+
SPARK_CONF_DIR: /conf
7+
expose:
8+
- 7001
9+
- 7002
10+
- 7003
11+
- 7004
12+
- 7005
13+
- 7006
14+
- 7077
15+
- 6066
16+
ports:
17+
- 4040:4040
18+
- 6066:6066
19+
- 7077:7077
20+
- 8080:8080
21+
volumes:
22+
- ./conf/master:/conf
23+
- ./data:/tmp/data
24+
25+
worker:
26+
image: gettyimages/spark:1.3.1-hadoop-2.4
27+
command: /usr/spark/bin/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077
28+
hostname: worker
29+
environment:
30+
SPARK_CONF_DIR: /conf
31+
SPARK_WORKER_CORES: 2
32+
SPARK_WORKER_MEMORY: 1g
33+
SPARK_WORKER_PORT: 8881
34+
SPARK_WORKER_WEBUI_PORT: 8081
35+
links:
36+
- master
37+
expose:
38+
- 7012
39+
- 7013
40+
- 7014
41+
- 7015
42+
- 7016
43+
- 8881
44+
ports:
45+
- 8081:8081
46+
volumes:
47+
- ./conf/worker:/conf
48+
- ./data:/tmp/data

spark-1.3.0-hadoop-2.4/Dockerfile

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
FROM progrium/busybox
2+
RUN opkg-install curl ca-certificates bash gzip zlib
3+
4+
# JAVA
5+
ENV JAVA_HOME /usr/jdk1.8.0_31
6+
ENV PATH $PATH:$JAVA_HOME/bin
7+
RUN curl -sL --retry 3 --insecure \
8+
--header "Cookie: oraclelicense=accept-securebackup-cookie;" \
9+
"http://download.oracle.com/otn-pub/java/jdk/8u31-b13/server-jre-8u31-linux-x64.tar.gz" \
10+
| gunzip \
11+
| tar x -C /usr/ \
12+
&& ln -s $JAVA_HOME /usr/java \
13+
&& rm -rf $JAVA_HOME/man
14+
15+
# SPARK
16+
ENV SPARK_VERSION 1.3.0
17+
ENV HADOOP_VERSION 2.4
18+
ENV SPARK_PACKAGE $SPARK_VERSION-bin-hadoop$HADOOP_VERSION
19+
ENV SPARK_HOME /usr/spark-$SPARK_PACKAGE
20+
ENV PATH $PATH:$SPARK_HOME/bin
21+
RUN curl -sL --retry 3 \
22+
"http://mirrors.ibiblio.org/apache/spark/spark-$SPARK_VERSION/spark-$SPARK_PACKAGE.tgz" \
23+
| gunzip \
24+
| tar x -C /usr/ \
25+
&& ln -s $SPARK_HOME /usr/spark
26+
27+
CMD /usr/spark/bin/spark-class org.apache.spark.deploy.master.Master

spark-1.3.1-hadoop-2.4/Dockerfile

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
FROM progrium/busybox
2+
RUN opkg-install curl ca-certificates bash gzip zlib
3+
4+
# JAVA
5+
ENV JAVA_HOME /usr/jdk1.8.0_31
6+
ENV PATH $PATH:$JAVA_HOME/bin
7+
RUN curl -sL --retry 3 --insecure \
8+
--header "Cookie: oraclelicense=accept-securebackup-cookie;" \
9+
"http://download.oracle.com/otn-pub/java/jdk/8u31-b13/server-jre-8u31-linux-x64.tar.gz" \
10+
| gunzip \
11+
| tar x -C /usr/ \
12+
&& ln -s $JAVA_HOME /usr/java \
13+
&& rm -rf $JAVA_HOME/man
14+
15+
# SPARK
16+
ENV SPARK_VERSION 1.3.1
17+
ENV HADOOP_VERSION 2.4
18+
ENV SPARK_PACKAGE $SPARK_VERSION-bin-hadoop$HADOOP_VERSION
19+
ENV SPARK_HOME /usr/spark-$SPARK_PACKAGE
20+
ENV PATH $PATH:$SPARK_HOME/bin
21+
RUN curl -sL --retry 3 \
22+
"http://mirrors.ibiblio.org/apache/spark/spark-$SPARK_VERSION/spark-$SPARK_PACKAGE.tgz" \
23+
| gunzip \
24+
| tar x -C /usr/ \
25+
&& ln -s $SPARK_HOME /usr/spark
26+
27+
CMD /usr/spark/bin/spark-class org.apache.spark.deploy.master.Master

0 commit comments

Comments
 (0)