File tree Expand file tree Collapse file tree 8 files changed +147
-0
lines changed Expand file tree Collapse file tree 8 files changed +147
-0
lines changed Original file line number Diff line number Diff line change
1
+ * .tsv
2
+ * .csv
Original file line number Diff line number Diff line change
1
+
2
+ # spark
3
+
4
+ A ` progrium/busybox ` based [ Spark] ( http://spark.apache.org ) container. Use it in a standalone cluster with the accompanying ` docker-compose.yml ` , or as a base for more complex recipes.
5
+
6
+ ## example
7
+
8
+ To create a standalone cluster with [ docker-compose] ( http://docs.docker.com/compose ) :
9
+
10
+ docker-compose up
11
+
12
+ The SparkUI will be running at ` http://${YOUR_DOCKER_HOST}:8080 ` with one worker listed. To run ` spark-shell ` , exec into a container:
13
+
14
+ docker exec -it dockerspark_master_1 /bin/bash
15
+ /usr/spark/bin/spark-shell --master spark://master:7077
16
+
17
+ ## license
18
+
19
+ MIT
Original file line number Diff line number Diff line change
1
+ # Default system properties included when running spark-submit.
2
+ # This is useful for setting default environmental settings.
3
+
4
+ spark.driver.port 7001
5
+ spark.fileserver.port 7002
6
+ spark.broadcast.port 7003
7
+ spark.replClassServer.port 7004
8
+ spark.blockManager.port 7005
9
+ spark.executor.port 7006
10
+
11
+ spark.broadcast.factory=org.apache.spark.broadcast.HttpBroadcastFactory
12
+ spark.port.maxRetries 4
Original file line number Diff line number Diff line change
1
+ # Default system properties included when running spark-submit.
2
+ # This is useful for setting default environmental settings.
3
+
4
+ #spark.driver.port 7101
5
+ spark.fileserver.port 7012
6
+ spark.broadcast.port 7013
7
+ spark.replClassServer.port 7014
8
+ spark.blockManager.port 7015
9
+ spark.executor.port 7016
10
+
11
+ spark.broadcast.factory=org.apache.spark.broadcast.HttpBroadcastFactory
12
+ spark.port.maxRetries 4
Original file line number Diff line number Diff line change
1
+ master :
2
+ image : gettyimages/spark:1.3.1-hadoop-2.4
3
+ command : /usr/spark/bin/spark-class org.apache.spark.deploy.master.Master
4
+ hostname : master
5
+ environment :
6
+ SPARK_CONF_DIR : /conf
7
+ expose :
8
+ - 7001
9
+ - 7002
10
+ - 7003
11
+ - 7004
12
+ - 7005
13
+ - 7006
14
+ - 7077
15
+ - 6066
16
+ ports :
17
+ - 4040:4040
18
+ - 6066:6066
19
+ - 7077:7077
20
+ - 8080:8080
21
+ volumes :
22
+ - ./conf/master:/conf
23
+ - ./data:/tmp/data
24
+
25
+ worker :
26
+ image : gettyimages/spark:1.3.1-hadoop-2.4
27
+ command : /usr/spark/bin/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077
28
+ hostname : worker
29
+ environment :
30
+ SPARK_CONF_DIR : /conf
31
+ SPARK_WORKER_CORES : 2
32
+ SPARK_WORKER_MEMORY : 1g
33
+ SPARK_WORKER_PORT : 8881
34
+ SPARK_WORKER_WEBUI_PORT : 8081
35
+ links :
36
+ - master
37
+ expose :
38
+ - 7012
39
+ - 7013
40
+ - 7014
41
+ - 7015
42
+ - 7016
43
+ - 8881
44
+ ports :
45
+ - 8081:8081
46
+ volumes :
47
+ - ./conf/worker:/conf
48
+ - ./data:/tmp/data
Original file line number Diff line number Diff line change
1
+ FROM progrium/busybox
2
+ RUN opkg-install curl ca-certificates bash gzip zlib
3
+
4
+ # JAVA
5
+ ENV JAVA_HOME /usr/jdk1.8.0_31
6
+ ENV PATH $PATH:$JAVA_HOME/bin
7
+ RUN curl -sL --retry 3 --insecure \
8
+ --header "Cookie: oraclelicense=accept-securebackup-cookie;" \
9
+ "http://download.oracle.com/otn-pub/java/jdk/8u31-b13/server-jre-8u31-linux-x64.tar.gz" \
10
+ | gunzip \
11
+ | tar x -C /usr/ \
12
+ && ln -s $JAVA_HOME /usr/java \
13
+ && rm -rf $JAVA_HOME/man
14
+
15
+ # SPARK
16
+ ENV SPARK_VERSION 1.3.0
17
+ ENV HADOOP_VERSION 2.4
18
+ ENV SPARK_PACKAGE $SPARK_VERSION-bin-hadoop$HADOOP_VERSION
19
+ ENV SPARK_HOME /usr/spark-$SPARK_PACKAGE
20
+ ENV PATH $PATH:$SPARK_HOME/bin
21
+ RUN curl -sL --retry 3 \
22
+ "http://mirrors.ibiblio.org/apache/spark/spark-$SPARK_VERSION/spark-$SPARK_PACKAGE.tgz" \
23
+ | gunzip \
24
+ | tar x -C /usr/ \
25
+ && ln -s $SPARK_HOME /usr/spark
26
+
27
+ CMD /usr/spark/bin/spark-class org.apache.spark.deploy.master.Master
Original file line number Diff line number Diff line change
1
+ FROM progrium/busybox
2
+ RUN opkg-install curl ca-certificates bash gzip zlib
3
+
4
+ # JAVA
5
+ ENV JAVA_HOME /usr/jdk1.8.0_31
6
+ ENV PATH $PATH:$JAVA_HOME/bin
7
+ RUN curl -sL --retry 3 --insecure \
8
+ --header "Cookie: oraclelicense=accept-securebackup-cookie;" \
9
+ "http://download.oracle.com/otn-pub/java/jdk/8u31-b13/server-jre-8u31-linux-x64.tar.gz" \
10
+ | gunzip \
11
+ | tar x -C /usr/ \
12
+ && ln -s $JAVA_HOME /usr/java \
13
+ && rm -rf $JAVA_HOME/man
14
+
15
+ # SPARK
16
+ ENV SPARK_VERSION 1.3.1
17
+ ENV HADOOP_VERSION 2.4
18
+ ENV SPARK_PACKAGE $SPARK_VERSION-bin-hadoop$HADOOP_VERSION
19
+ ENV SPARK_HOME /usr/spark-$SPARK_PACKAGE
20
+ ENV PATH $PATH:$SPARK_HOME/bin
21
+ RUN curl -sL --retry 3 \
22
+ "http://mirrors.ibiblio.org/apache/spark/spark-$SPARK_VERSION/spark-$SPARK_PACKAGE.tgz" \
23
+ | gunzip \
24
+ | tar x -C /usr/ \
25
+ && ln -s $SPARK_HOME /usr/spark
26
+
27
+ CMD /usr/spark/bin/spark-class org.apache.spark.deploy.master.Master
You can’t perform that action at this time.
0 commit comments