This repository has been archived by the owner on Jan 19, 2020. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathDockerfile
177 lines (156 loc) · 5.45 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
FROM airdock/oraclejdk:1.8 as metabrainz-spark-base
# Compile and install specific version of Python
# The jdk image comes with jessie which has python 3.4 which
# is not supported anymore. We install Python 3.6 here because
# 3.7 needs a version of OpenSSL that is not available in jessie
# Based on https://github.com/docker-library/python/blob/master/3.6/jessie/Dockerfile
# Ensure that local Python build is preferred over whatever might come with the base image
ENV PATH /usr/local/bin:$PATH
# http://bugs.python.org/issue19846
# > At the moment, setting "LANG=C" on a Linux system *fundamentally breaks Python 3*, and that's not OK.
ENV LANG C.UTF-8
# Runtime dependencies. This includes the core packages for all of the buildDeps listed
# below. We explicitly install them so that when we `remove --auto-remove` the dev packages,
# these packages stay installed.
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
ca-certificates \
netbase \
git \
libbz2-1.0 \
libexpat1 \
libffi6 \
libgdbm3 \
liblzma5 \
libncursesw5 \
libreadline6 \
libsqlite3-0 \
libssl1.0.0 \
libuuid1 \
tcl \
tk \
zlib1g wget \
&& rm -rf /var/lib/apt/lists/*
ENV GPG_KEY 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D
ENV PYTHON_VERSION 3.6.9
# The list of build dependencies comes from the python-docker slim version:
# https://github.com/docker-library/python/blob/408f7b8130/3.7/stretch/slim/Dockerfile#L29
RUN set -ex \
&& buildDeps=' \
build-essential \
libbz2-dev \
libexpat1-dev \
libffi-dev \
libgdbm-dev \
liblzma-dev \
libncursesw5-dev \
libreadline-dev \
libsqlite3-dev \
libssl-dev \
tk-dev \
tcl-dev \
uuid-dev \
xz-utils \
zlib1g-dev \
' \
&& apt-get update \
&& apt-get install -y $buildDeps --no-install-recommends \
\
&& wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \
&& wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \
&& export GNUPGHOME="$(mktemp -d)" \
&& gpg --batch --keyserver ha.pool.sks-keyservers.net --recv-keys "$GPG_KEY" \
&& gpg --batch --verify python.tar.xz.asc python.tar.xz \
&& { command -v gpgconf > /dev/null && gpgconf --kill all || :; } \
&& rm -rf "$GNUPGHOME" python.tar.xz.asc \
&& mkdir -p /usr/src/python \
&& tar -xJC /usr/src/python --strip-components=1 -f python.tar.xz \
&& rm python.tar.xz \
\
&& cd /usr/src/python \
&& gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)" \
&& ./configure \
--build="$gnuArch" \
--enable-loadable-sqlite-extensions \
--enable-shared \
--with-system-expat \
--with-system-ffi \
--without-ensurepip \
&& make -j "$(nproc)" \
&& make install \
&& ldconfig \
\
&& find /usr/local -depth \
\( \
\( -type d -a \( -name test -o -name tests \) \) \
-o \
\( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \
\) -exec rm -rf '{}' + \
&& rm -rf /usr/src/python \
\
&& apt-get purge -y --auto-remove $buildDeps \
&& rm -rf /var/lib/apt/lists/* \
\
&& python3 --version
# make some useful symlinks that are expected to exist
RUN cd /usr/local/bin \
&& ln -s idle3 idle \
&& ln -s pydoc3 pydoc \
&& ln -s python3 python \
&& ln -s python3-config python-config
# Install pip
ENV PYTHON_PIP_VERSION 19.2.3
RUN set -ex; \
\
wget -O get-pip.py 'https://bootstrap.pypa.io/get-pip.py'; \
\
python get-pip.py \
--disable-pip-version-check \
--no-cache-dir \
"pip==$PYTHON_PIP_VERSION" \
; \
pip --version; \
\
find /usr/local -depth \
\( \
\( -type d -a \( -name test -o -name tests \) \) \
-o \
\( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \
\) -exec rm -rf '{}' +; \
rm -f get-pip.py
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
scala \
wget \
net-tools \
dnsutils \
bsdmainutils \
xz-utils \
pxz \
zip \
&& rm -rf /var/lib/apt/lists/*
ENV DOCKERIZE_VERSION v0.6.1
RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
&& tar -C /usr/local/bin -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
&& rm dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz
COPY docker/apache-download.sh /apache-download.sh
ENV SPARK_VERSION 2.4.1
ENV HADOOP_VERSION 2.7
RUN cd /usr/local && \
/apache-download.sh spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz && \
tar xzf spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz && \
ln -s spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION spark
RUN mkdir /rec
WORKDIR /rec
COPY requirements.txt /rec/requirements.txt
RUN pip3 install -r requirements.txt
FROM metabrainz-spark-base as metabrainz-spark-master
CMD /usr/local/spark/sbin/start-master.sh
FROM metabrainz-spark-base as metabrainz-spark-worker
CMD dockerize -wait tcp://spark-master:7077 -timeout 9999s /usr/local/spark/sbin/start-slave.sh spark://spark-master:7077
FROM metabrainz-spark-base as metabrainz-spark-jobs
COPY . /rec
FROM metabrainz-spark-base as metabrainz-spark-dev
COPY . /rec
FROM metabrainz-spark-base as metabrainz-spark-request-consumer
COPY . /rec