Skip to content

Commit 08680bf

Browse files
committed
First commit. Image working and tested. Enjoy!!!!
1 parent 63ab4c9 commit 08680bf

8 files changed

+359
-0
lines changed

Dockerfile

+156
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
FROM ubuntu:14.04
2+
MAINTAINER Angel Cervera Claudio <angelcervera@gmail.com>
3+
4+
USER root
5+
WORKDIR /root
6+
7+
ENV HADOOP_VERSION 2.7.1
8+
ENV HADOOP_PREFIX /opt/hadoop
9+
10+
# Install all dependencies
11+
RUN apt-get update && apt-get install -y wget ssh rsync openjdk-7-jdk
12+
13+
# Download hadoop.
14+
RUN wget -O /tmp/hadoop-${HADOOP_VERSION}.tar.gz http://mirrors.whoishostingthis.com/apache/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz \
15+
&& wget -O /tmp/hadoop-${HADOOP_VERSION}.tar.gz.mds http://mirrors.whoishostingthis.com/apache/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz.mds
16+
17+
# Install hadoop
18+
RUN tar -C /opt -xf /tmp/hadoop-${HADOOP_VERSION}.tar.gz \
19+
&& ln -s /opt/hadoop-${HADOOP_VERSION} ${HADOOP_PREFIX} \
20+
&& mkdir /var/lib/hadoop
21+
22+
# Install ssh key
23+
RUN ssh-keygen -q -t dsa -P '' -f /root/.ssh/id_dsa \
24+
&& cat /root/.ssh/id_dsa.pub >> /root/.ssh/authorized_keys
25+
26+
# Config ssh to accept all connections from unknow hosts.
27+
COPY config/ssh_config /root/.ssh/config
28+
29+
# Copy Hadoop config files
30+
COPY config/hadoop-env.sh ${HADOOP_PREFIX}/etc/hadoop/
31+
COPY config/core-site.xml ${HADOOP_PREFIX}/etc/hadoop/
32+
COPY config/hdfs-site.xml ${HADOOP_PREFIX}/etc/hadoop/
33+
COPY config/mapred-site.xml ${HADOOP_PREFIX}/etc/hadoop/
34+
COPY config/yarn-site.xml ${HADOOP_PREFIX}/etc/hadoop/
35+
36+
# Format hdfs
37+
RUN ${HADOOP_PREFIX}/bin/hdfs namenode -format
38+
39+
# Copy the entry point shell
40+
COPY config/docker_entrypoint.sh /root/
41+
RUN chmod a+x /root/docker_entrypoint.sh
42+
43+
# Folder to share files
44+
RUN mkdir /root/shared && \
45+
chmod a+rwX /root/shared
46+
47+
# Clean
48+
RUN rm -r /var/cache/apt /var/lib/apt/lists /tmp/hadoop-${HADOOP_VERSION}.tar*
49+
50+
51+
################### Expose ports
52+
53+
### Core
54+
55+
# Zookeeper
56+
EXPOSE 2181
57+
58+
# NameNode metadata service ( fs.defaultFS )
59+
EXPOSE 9000
60+
61+
# FTP Filesystem impl. (fs.ftp.host.port)
62+
EXPOSE 21
63+
64+
### Hdfs ports (Reference: https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml)
65+
66+
# NameNode Web UI: Web UI to look at current status of HDFS, explore file system (dfs.namenode.http-address / dfs.namenode.https-address)
67+
EXPOSE 50070 50470
68+
69+
# DataNode : DataNode WebUI to access the status, logs etc. (dfs.datanode.http.address / dfs.datanode.https.address)
70+
EXPOSE 50075 50475
71+
72+
# DataNode (dfs.datanode.address / dfs.datanode.ipc.address)
73+
EXPOSE 50010 50020
74+
75+
# Secondary NameNode (dfs.namenode.secondary.http-address / dfs.namenode.secondary.https-address)
76+
EXPOSE 50090 50090
77+
78+
# Backup node (dfs.namenode.backup.address / dfs.namenode.backup.http-address)
79+
EXPOSE 50100 50105
80+
81+
# Journal node (dfs.journalnode.rpc-address / dfs.journalnode.http-address / dfs.journalnode.https-address )
82+
EXPOSE 8485 8480 8481
83+
84+
### Mapred ports (Reference: https://hadoop.apache.org/docs/stable/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml)
85+
86+
# Task Tracker Web UI and Shuffle (mapreduce.tasktracker.http.address)
87+
EXPOSE 50060
88+
89+
# Job tracker Web UI (mapreduce.jobtracker.http.address)
90+
EXPOSE 50030
91+
92+
# Job History Web UI (mapreduce.jobhistory.webapp.address)
93+
EXPOSE 19888
94+
95+
# Job History Admin Interface (mapreduce.jobhistory.admin.address)
96+
EXPOSE 10033
97+
98+
# Job History IPC (mapreduce.jobhistory.address)
99+
EXPOSE 10020
100+
101+
### Yarn ports (Reference: https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-common/yarn-default.xml)
102+
103+
# Applications manager interface (yarn.resourcemanager.address)
104+
EXPOSE 8032
105+
106+
# Scheduler interface (yarn.resourcemanager.scheduler.address)
107+
EXPOSE 8030
108+
109+
# Resource Manager Web UI (yarn.resourcemanager.webapp.address / yarn.resourcemanager.webapp.https.address)
110+
EXPOSE 8088 8090
111+
112+
# ??? (yarn.resourcemanager.resource-tracker.address)
113+
EXPOSE 8031
114+
115+
# Resource Manager Administration Web UI
116+
EXPOSE 8033
117+
118+
# Address where the localizer IPC is (yarn.nodemanager.localizer.address)
119+
EXPOSE 8040
120+
121+
# Node Manager Web UI (yarn.nodemanager.webapp.address)
122+
EXPOSE 8042
123+
124+
# Timeline servise RPC (yarn.timeline-service.address)
125+
EXPOSE 10200
126+
127+
# Timeline servise Web UI (yarn.timeline-service.webapp.address / yarn.timeline-service.webapp.https.address)
128+
EXPOSE 8188 8190
129+
130+
# Shared Cache Manager Admin Web UI (yarn.sharedcache.admin.address)
131+
EXPOSE 8047
132+
133+
# Shared Cache Web UI (yarn.sharedcache.webapp.address)
134+
EXPOSE 8788
135+
136+
# Shared Cache node manager interface (yarn.sharedcache.uploader.server.address)
137+
EXPOSE 8046
138+
139+
# Shared Cache client interface (yarn.sharedcache.client-server.address)
140+
EXPOSE 8045
141+
142+
### Other ports
143+
144+
# SSH
145+
EXPOSE 22
146+
147+
148+
################### Expose volumes
149+
VOLUME ["/opt/hadoop/logs", "/var/lib/hadoop", "/root/shared"]
150+
151+
152+
################### Entry point
153+
ENTRYPOINT [ "/root/docker_entrypoint.sh" ]
154+
155+
156+

config/core-site.xml

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3+
<!--
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License. See accompanying LICENSE file.
15+
-->
16+
17+
<configuration>
18+
<property>
19+
<name>hadoop.tmp.dir</name>
20+
<value>/var/lib/hadoop</value>
21+
</property>
22+
<property>
23+
<name>fs.defaultFS</name>
24+
<value>hdfs://localhost:9000</value>
25+
</property>
26+
</configuration>

config/docker_entrypoint.sh

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
3+
service ssh start
4+
/opt/hadoop/sbin/start-dfs.sh
5+
/opt/hadoop/sbin/start-yarn.sh
6+
7+
# TODO: open a shell to avoid stop the docker container. Maybe is there is a better/elegant solution.
8+
/bin/bash
9+

config/hadoop-env.sh

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#!/usr/bin/env bash
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
# Set Hadoop-specific environment variables here.
19+
export HADOOP_PREFIX=/opt/hadoop
20+
21+
# The only required environment variable is JAVA_HOME. All others are
22+
# optional. When running a distributed configuration it is best to
23+
# set JAVA_HOME in this file, so that it is correctly defined on
24+
# remote nodes.
25+
26+
# The java implementation to use.
27+
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
28+
29+
# The jsvc implementation to use. Jsvc is required to run secure datanodes
30+
# that bind to privileged ports to provide authentication of data transfer
31+
# protocol. Jsvc is not required if SASL is configured for authentication of
32+
# data transfer protocol using non-privileged ports.
33+
#export JSVC_HOME=${JSVC_HOME}
34+
35+
export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
36+
37+
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
38+
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
39+
if [ "$HADOOP_CLASSPATH" ]; then
40+
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
41+
else
42+
export HADOOP_CLASSPATH=$f
43+
fi
44+
done
45+
46+
# The maximum amount of heap to use, in MB. Default is 1000.
47+
#export HADOOP_HEAPSIZE=
48+
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
49+
50+
# Extra Java runtime options. Empty by default.
51+
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
52+
53+
# Command specific options appended to HADOOP_OPTS when specified
54+
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
55+
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
56+
57+
export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
58+
59+
export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
60+
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
61+
62+
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
63+
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
64+
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
65+
66+
# On secure datanodes, user to run the datanode as after dropping privileges.
67+
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
68+
# to provide authentication of data transfer protocol. This **MUST NOT** be
69+
# defined if SASL is configured for authentication of data transfer protocol
70+
# using non-privileged ports.
71+
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
72+
73+
# Where log files are stored. $HADOOP_HOME/logs by default.
74+
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
75+
76+
# Where log files are stored in the secure data environment.
77+
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
78+
79+
###
80+
# HDFS Mover specific parameters
81+
###
82+
# Specify the JVM options to be used when starting the HDFS Mover.
83+
# These options will be appended to the options specified as HADOOP_OPTS
84+
# and therefore may override any similar flags set in HADOOP_OPTS
85+
#
86+
# export HADOOP_MOVER_OPTS=""
87+
88+
###
89+
# Advanced Users Only!
90+
###
91+
92+
# The directory where pid files are stored. /tmp by default.
93+
# NOTE: this should be set to a directory that can only be written to by
94+
# the user that will run the hadoop daemons. Otherwise there is the
95+
# potential for a symlink attack.
96+
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
97+
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
98+
99+
# A string representing this instance of hadoop. $USER by default.
100+
export HADOOP_IDENT_STRING=$USER

config/hdfs-site.xml

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3+
<!--
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License. See accompanying LICENSE file.
15+
-->
16+
17+
<configuration>
18+
<property>
19+
<name>dfs.replication</name>
20+
<value>1</value>
21+
</property>
22+
</configuration>

config/mapred-site.xml

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
<?xml version="1.0"?>
2+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3+
<!--
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License. See accompanying LICENSE file.
15+
-->
16+
17+
<configuration>
18+
<property>
19+
<name>mapreduce.framework.name</name>
20+
<value>yarn</value>
21+
</property>
22+
</configuration>

config/ssh_config

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Host *
2+
StrictHostKeyChecking no
3+
UserKnownHostsFile /dev/null

config/yarn-site.xml

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<?xml version="1.0"?>
2+
<!--
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. See accompanying LICENSE file.
14+
-->
15+
16+
<configuration>
17+
<property>
18+
<name>yarn.nodemanager.aux-services</name>
19+
<value>mapreduce_shuffle</value>
20+
</property>
21+
</configuration>

0 commit comments

Comments
 (0)