-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #551 from boozallen/540-harden-hive-image
[#540] update jars/libs in hive image
- Loading branch information
Showing
15 changed files
with
434 additions
and
74 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
65 changes: 54 additions & 11 deletions
65
extensions/extensions-docker/aissemble-hive-service/src/main/resources/docker/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,75 @@ | ||
ARG METASTORE_VERSION | ||
FROM docker.io/apache/hive:${METASTORE_VERSION} AS appsource | ||
FROM docker.io/eclipse-temurin:17-jre AS builder | ||
|
||
ARG METASTORE_VERSION | ||
ARG HADOOP_VERSION | ||
ARG JARS_DIR | ||
ARG BIN_DIR | ||
ARG PATCH_DIR | ||
|
||
ENV HADOOP_HOME=/opt/hadoop | ||
ENV HIVE_HOME=/opt/hive | ||
ENV HIVE_VER=$METASTORE_VERSION | ||
|
||
# Install hadoop | ||
RUN cd /tmp \ | ||
&& wget https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}-lean.tar.gz -O - \ | ||
| tar -xzf - \ | ||
&& mv hadoop-${HADOOP_VERSION} $HADOOP_HOME | ||
|
||
# Use standalone binary instead of full-service version on original image | ||
COPY ${BIN_DIR}/hive-standalone-metastore-server-bin.tar.gz /tmp | ||
RUN cd /tmp \ | ||
&& tar -xf /tmp/hive-standalone-metastore-server-bin.tar.gz \ | ||
&& mv apache-hive-metastore-${METASTORE_VERSION}-bin $HIVE_HOME \ | ||
&& rm /tmp/hive-standalone-metastore-server-bin.tar.gz | ||
|
||
# Update library jars used by Hive and Hadoop | ||
COPY --chmod=755 ./src/main/resources/scripts/setup.sh $HIVE_HOME/setup.sh | ||
ADD ${PATCH_DIR}/* /tmp/patch-jars | ||
RUN $HIVE_HOME/setup.sh /tmp/patch-jars && rm -rf /tmp/patch-jars | ||
|
||
# Hadoop ships with source jars, which aren't necessary and bloat the image, also remove YARN support as we only support K8s | ||
RUN rm -rf $HADOOP_HOME/share/hadoop/common/sources \ | ||
$HADOOP_HOME/share/hadoop/hdfs/sources \ | ||
$HADOOP_HOME/share/hadoop/mapreduce/sources \ | ||
$HADOOP_HOME/share/hadoop/tools/sources \ | ||
$HADOOP_HOME/share/hadoop/yarn/* | ||
|
||
# Patch for HIVE-28487: https://github.com/apache/hive/pull/5419 | ||
RUN sed -i \ | ||
's/org.apache.hadoop.hive.metastore.tools.MetastoreSchemaTool/org.apache.hadoop.hive.metastore.tools.schematool.MetastoreSchemaTool/' \ | ||
"$HIVE_HOME/bin/ext/schemaTool.sh" | ||
|
||
FROM docker.io/eclipse-temurin:17-jre AS final | ||
|
||
LABEL org.opencontainers.image.source="https://github.com/boozallen/aissemble" | ||
|
||
WORKDIR /opt | ||
|
||
ARG METASTORE_VERSION | ||
ARG HADOOP_VERSION | ||
ARG JARS_DIR | ||
ARG BIN_DIR | ||
ARG PATCH_DIR | ||
|
||
ENV HADOOP_HOME=/opt/hadoop | ||
ENV HIVE_HOME=/opt/hive | ||
ENV HIVE_VER=$METASTORE_VERSION | ||
|
||
COPY --from=builder $HADOOP_HOME $HADOOP_HOME | ||
COPY --from=builder $HIVE_HOME $HIVE_HOME | ||
|
||
COPY --from=appsource /opt/hadoop $HADOOP_HOME | ||
COPY --from=appsource /opt/hive $HIVE_HOME | ||
COPY --chmod=755 src/main/resources/scripts/entrypoint.sh /entrypoint.sh | ||
|
||
RUN groupadd -rf hive --gid=1000 && \ | ||
useradd --home $HIVE_HOME -g hive --shell /usr/sbin/nologin --uid 1000 hive -o && \ | ||
chown hive:hive -R $HIVE_HOME && \ | ||
ln -s $JAVA_HOME /opt/jre | ||
chown hive:hive -R $HIVE_HOME | ||
|
||
ADD ${JARS_DIR}/* $HIVE_HOME/lib/ | ||
|
||
# Remove jars with open vulnerabilities. These jars are included in the apache hive image but not necessary | ||
# when running the hive metastore only | ||
RUN rm ${HIVE_HOME}/lib/avatica-1.12.0.jar ${HIVE_HOME}/lib/htrace-core-3.1.0-incubating.jar \ | ||
${HADOOP_HOME}/share/hadoop/yarn/timelineservice/lib/htrace-core-3.1.0-incubating.jar | ||
|
||
USER hive | ||
WORKDIR $HIVE_HOME | ||
|
||
ENTRYPOINT ["/opt/hive/bin/hive", "--skiphadoopversion", "--skiphbasecp", "--verbose", "--service", "metastore"] | ||
ENV VERBOSE=true | ||
ENTRYPOINT ["/entrypoint.sh"] |
65 changes: 65 additions & 0 deletions
65
extensions/extensions-docker/aissemble-hive-service/src/main/resources/scripts/entrypoint.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#!/bin/bash | ||
|
||
### | ||
# #%L | ||
# aiSSEMBLE::Extensions::Docker::Hive Service | ||
# %% | ||
# Copyright (C) 2021 Booz Allen | ||
# %% | ||
# This software package is licensed under the Booz Allen Public License. All Rights Reserved. | ||
# #L% | ||
### | ||
|
||
|
||
# DERIVED FROM apache/spark image entrypoint script | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
set -x | ||
|
||
DB_DRIVER=${DB_DRIVER:-derby} | ||
if [[ $VERBOSE = "true" ]]; then | ||
VERBOSE_MODE="--verbose" | ||
else | ||
VERBOSE_MODE="" | ||
fi | ||
|
||
function initialize_hive { | ||
COMMAND="-initOrUpgradeSchema" | ||
if [ "$(echo "$HIVE_VER" | cut -d '.' -f1)" -lt "4" ]; then | ||
COMMAND="-initSchema" | ||
fi | ||
# Don't honor verbose mode and dump errors because the 4.0.0 mysql schema generates a ton of deprecation warnings | ||
if "$HIVE_HOME/bin/schematool" -dbType $DB_DRIVER $COMMAND; then | ||
echo "Initialized schema successfully.." | ||
else | ||
echo "Schema initialization failed!" | ||
exit 1 | ||
fi | ||
} | ||
|
||
export HIVE_CONF_DIR=$HIVE_HOME/conf | ||
if [ -d "${HIVE_CUSTOM_CONF_DIR:-}" ]; then | ||
find "${HIVE_CUSTOM_CONF_DIR}" -type f -exec \ | ||
ln -sfn {} "${HIVE_CONF_DIR}"/ \; | ||
export HADOOP_CONF_DIR=$HIVE_CONF_DIR | ||
export TEZ_CONF_DIR=$HIVE_CONF_DIR | ||
fi | ||
|
||
export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Xmx1G $SERVICE_OPTS" | ||
|
||
if [ -z "$IS_RESUME" ]; then | ||
echo "Initializing (or upgrading) schema" | ||
initialize_hive | ||
else | ||
echo "Skip schema initialization ($IS_RESUME)" | ||
fi | ||
|
||
export METASTORE_PORT=${METASTORE_PORT:-9083} | ||
exec "$HIVE_HOME/bin/base" --skiphadoopversion $VERBOSE_MODE --service metastore |
Oops, something went wrong.