[FLINK-30024][tests] Build local test KDC docker image

pull/21200/head
Gabor Somogyi 2 years ago committed by MartijnVisser
parent e5762a558f
commit e822cfdbc9

@ -1 +1,2 @@
test-scripts/temp-test-directory*
test-scripts/docker-hadoop-secure-cluster/hadoop/hadoop.tar.gz

@ -100,13 +100,10 @@ function build_image() {
echo "Pre-downloading Hadoop tarball"
local cache_path
cache_path=$(get_artifact "http://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz")
ln "${cache_path}" "${END_TO_END_DIR}/test-scripts/docker-hadoop-secure-cluster/hadoop-2.8.5.tar.gz"
ln "${cache_path}" "${END_TO_END_DIR}/test-scripts/docker-hadoop-secure-cluster/hadoop/hadoop.tar.gz"
echo "Building Hadoop Docker container"
docker build --build-arg HADOOP_VERSION=2.8.5 \
-f "${END_TO_END_DIR}/test-scripts/docker-hadoop-secure-cluster/Dockerfile" \
-t flink/docker-hadoop-secure-cluster:latest \
"${END_TO_END_DIR}/test-scripts/docker-hadoop-secure-cluster/"
docker-compose -f "${END_TO_END_DIR}/test-scripts/docker-hadoop-secure-cluster/docker-compose.yml" build
}
function start_hadoop_cluster_and_prepare_flink() {

@ -1,15 +1,7 @@
# Apache Hadoop Docker image with Kerberos enabled
This image is modified version of Knappek/docker-hadoop-secure
* Knappek/docker-hadoop-secure <https://github.com/Knappek/docker-hadoop-secure>
With bits and pieces added from Lewuathe/docker-hadoop-cluster to extend it to start a proper kerberized Hadoop cluster:
* Lewuathe/docker-hadoop-cluster <https://github.com/Lewuathe/docker-hadoop-cluster>
And a lot of added stuff for making this an actual, properly configured, kerberized cluster with proper user/permissions structure.
Versions
--------
Required versions
-----------------
* JDK8
* Hadoop 2.8.5
@ -17,21 +9,23 @@ Versions
Default Environment Variables
-----------------------------
| Name | Value | Description |
| ---- | ---- | ---- |
| `KRB_REALM` | `EXAMPLE.COM` | The Kerberos Realm, more information [here](https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html#) |
| `DOMAIN_REALM` | `example.com` | The Kerberos Domain Realm, more information [here](https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html#) |
| `KERBEROS_ADMIN` | `admin/admin` | The KDC admin user |
| `KERBEROS_ADMIN_PASSWORD` | `admin` | The KDC admin password |
| Name | Value | Description |
|---------------------------| ---- |-------------------------------------------------------------------------------------------------------------------------------------------------|
| `KRB_REALM` | `EXAMPLE.COM` | The Kerberos Realm, more information [here](https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html#) |
| `DOMAIN_REALM` | `example.com` | The Kerberos Domain Realm, more information [here](https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html#) |
| `KERB_MASTER_KEY` | `masterkey` | The Kerberos master database password, more information [here](https://web.mit.edu/kerberos/krb5-1.12/doc/admin/admin_commands/kdb5_util.html#) |
| `KERBEROS_ADMIN_USER` | `admin/admin` | The KDC admin user |
| `KERBEROS_ADMIN_PASSWORD` | `admin` | The KDC admin password |
You can simply define these variables in the `docker-compose.yml`.
Run image
---------
Clone the [project](https://git-wip-us.apache.org/repos/asf/flink.git) and run
```
cd flink-end-to-end-tests/test-scripts/docker-hadoop-secure-cluster
wget -O hadoop/hadoop.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz
docker-compose build
docker-compose up
```
@ -44,7 +38,6 @@ Get the container name with `docker ps` and login to the container with
docker exec -it <container-name> /bin/bash
```
To obtain a Kerberos ticket, execute
```
@ -57,7 +50,6 @@ Afterwards you can use `hdfs` CLI like
hdfs dfs -ls /
```
Known issues
------------
@ -74,17 +66,6 @@ Login failure for nn/hadoop.docker.com@EXAMPLE.COM from keytab /etc/security/key
Stop the containers with `docker-compose down` and start again with `docker-compose up -d`.
### JDK 8
Make sure you use download a JDK version that is still available. Old versions can be deprecated by Oracle and thus the download link won't be able anymore.
Get the latest JDK8 Download URL with
```
curl -s https://lv.binarybabel.org/catalog-api/java/jdk8.json
```
### Java Keystore
If the Keystore has been expired, then create a new `keystore.jks`:

@ -23,9 +23,10 @@ networks:
services:
kdc:
build: kdc
container_name: "kdc"
hostname: kdc.kerberos.com
image: sequenceiq/kerberos
image: flink/test-kdc:latest
networks:
- docker-hadoop-cluster-network
environment:
@ -33,7 +34,8 @@ services:
DOMAIN_REALM: kdc.kerberos.com
master:
image: ${DOCKER_HADOOP_IMAGE_NAME:-flink/docker-hadoop-secure-cluster:latest}
build: hadoop
image: ${DOCKER_HADOOP_IMAGE_NAME:-flink/test-hadoop:latest}
command: master
depends_on:
- kdc
@ -46,7 +48,8 @@ services:
DOMAIN_REALM: kdc.kerberos.com
worker1:
image: ${DOCKER_HADOOP_IMAGE_NAME:-flink/docker-hadoop-secure-cluster:latest}
build: hadoop
image: ${DOCKER_HADOOP_IMAGE_NAME:-flink/test-hadoop:latest}
command: worker
depends_on:
- kdc
@ -60,7 +63,8 @@ services:
DOMAIN_REALM: kdc.kerberos.com
worker2:
image: ${DOCKER_HADOOP_IMAGE_NAME:-flink/docker-hadoop-secure-cluster:latest}
build: hadoop
image: ${DOCKER_HADOOP_IMAGE_NAME:-flink/test-hadoop:latest}
command: worker
depends_on:
- kdc

@ -53,18 +53,15 @@ RUN set -x \
RUN sed -i 's/^#crypto.policy=unlimited/crypto.policy=unlimited/' $JAVA_HOME/jre/lib/security/java.security
ARG HADOOP_VERSION=2.8.5
COPY hadoop-${HADOOP_VERSION}.tar.gz /tmp/hadoop.tar.gz
COPY hadoop.tar.gz /tmp/hadoop.tar.gz
RUN set -x \
&& tar -xf /tmp/hadoop.tar.gz -C /usr/local/ \
&& mkdir -p /usr/local/hadoop \
&& tar -xf /tmp/hadoop.tar.gz --strip-components=1 -C /usr/local/hadoop \
&& rm /tmp/hadoop.tar.gz*
WORKDIR /usr/local
RUN set -x \
&& ln -s /usr/local/hadoop-${HADOOP_VERSION} /usr/local/hadoop \
&& chown root:root -R /usr/local/hadoop-${HADOOP_VERSION}/ \
&& chown root:root -R /usr/local/hadoop/ \
&& chown root:yarn /usr/local/hadoop/bin/container-executor \
&& chmod 6050 /usr/local/hadoop/bin/container-executor \
@ -90,7 +87,7 @@ ENV PATH $PATH:$HADOOP_BIN_HOME
ENV KRB_REALM EXAMPLE.COM
ENV DOMAIN_REALM example.com
ENV KERBEROS_ADMIN admin/admin
ENV KERBEROS_ADMIN_USER admin/admin
ENV KERBEROS_ADMIN_PASSWORD admin
ENV KEYTAB_DIR /etc/security/keytabs
@ -136,11 +133,9 @@ EXPOSE 8030 8031 8032 8033 8040 8042 8088 8188
# Other ports
EXPOSE 49707 2122
ADD bootstrap.sh /etc/bootstrap.sh
RUN chown root:root /etc/bootstrap.sh
RUN chmod 700 /etc/bootstrap.sh
ENV BOOTSTRAP /etc/bootstrap.sh
ADD entrypoint.sh /entrypoint.sh
RUN chown root:root /entrypoint.sh
RUN chmod 700 /entrypoint.sh
ENTRYPOINT ["/etc/bootstrap.sh"]
ENTRYPOINT ["/entrypoint.sh"]
CMD ["-h"]

@ -53,7 +53,7 @@ sed -i "s#/usr/local/hadoop/bin/container-executor#${NM_CONTAINER_EXECUTOR_PATH}
# we retry the first call because it can happen that Kerberos is not ready in
# time
start_time=$(date +%s)
until kadmin -p ${KERBEROS_ADMIN} -w ${KERBEROS_ADMIN_PASSWORD} -q "addprinc -randkey hdfs/$(hostname -f)@${KRB_REALM}"; do
until kadmin -p ${KERBEROS_ADMIN_USER} -w ${KERBEROS_ADMIN_PASSWORD} -q "addprinc -randkey hdfs/$(hostname -f)@${KRB_REALM}"; do
current_time=$(date +%s)
time_diff=$((current_time - start_time))
@ -66,13 +66,13 @@ until kadmin -p ${KERBEROS_ADMIN} -w ${KERBEROS_ADMIN_PASSWORD} -q "addprinc -ra
fi
done
kadmin -p ${KERBEROS_ADMIN} -w ${KERBEROS_ADMIN_PASSWORD} -q "addprinc -randkey mapred/$(hostname -f)@${KRB_REALM}"
kadmin -p ${KERBEROS_ADMIN} -w ${KERBEROS_ADMIN_PASSWORD} -q "addprinc -randkey yarn/$(hostname -f)@${KRB_REALM}"
kadmin -p ${KERBEROS_ADMIN} -w ${KERBEROS_ADMIN_PASSWORD} -q "addprinc -randkey HTTP/$(hostname -f)@${KRB_REALM}"
kadmin -p ${KERBEROS_ADMIN_USER} -w ${KERBEROS_ADMIN_PASSWORD} -q "addprinc -randkey mapred/$(hostname -f)@${KRB_REALM}"
kadmin -p ${KERBEROS_ADMIN_USER} -w ${KERBEROS_ADMIN_PASSWORD} -q "addprinc -randkey yarn/$(hostname -f)@${KRB_REALM}"
kadmin -p ${KERBEROS_ADMIN_USER} -w ${KERBEROS_ADMIN_PASSWORD} -q "addprinc -randkey HTTP/$(hostname -f)@${KRB_REALM}"
kadmin -p ${KERBEROS_ADMIN} -w ${KERBEROS_ADMIN_PASSWORD} -q "xst -k hdfs.keytab hdfs/$(hostname -f) HTTP/$(hostname -f)"
kadmin -p ${KERBEROS_ADMIN} -w ${KERBEROS_ADMIN_PASSWORD} -q "xst -k mapred.keytab mapred/$(hostname -f) HTTP/$(hostname -f)"
kadmin -p ${KERBEROS_ADMIN} -w ${KERBEROS_ADMIN_PASSWORD} -q "xst -k yarn.keytab yarn/$(hostname -f) HTTP/$(hostname -f)"
kadmin -p ${KERBEROS_ADMIN_USER} -w ${KERBEROS_ADMIN_PASSWORD} -q "xst -k hdfs.keytab hdfs/$(hostname -f) HTTP/$(hostname -f)"
kadmin -p ${KERBEROS_ADMIN_USER} -w ${KERBEROS_ADMIN_PASSWORD} -q "xst -k mapred.keytab mapred/$(hostname -f) HTTP/$(hostname -f)"
kadmin -p ${KERBEROS_ADMIN_USER} -w ${KERBEROS_ADMIN_PASSWORD} -q "xst -k yarn.keytab yarn/$(hostname -f) HTTP/$(hostname -f)"
mkdir -p ${KEYTAB_DIR}
mv hdfs.keytab ${KEYTAB_DIR}
@ -104,11 +104,11 @@ elif [ "$1" == "master" ]; then
nohup sudo -E -u mapred $HADOOP_PREFIX/bin/mapred historyserver 2>> /var/log/hadoop/historyserver.err >> /var/log/hadoop/historyserver.out &
kadmin -p ${KERBEROS_ADMIN} -w ${KERBEROS_ADMIN_PASSWORD} -q "addprinc -randkey root@${KRB_REALM}"
kadmin -p ${KERBEROS_ADMIN} -w ${KERBEROS_ADMIN_PASSWORD} -q "xst -k /root/root.keytab root"
kadmin -p ${KERBEROS_ADMIN_USER} -w ${KERBEROS_ADMIN_PASSWORD} -q "addprinc -randkey root@${KRB_REALM}"
kadmin -p ${KERBEROS_ADMIN_USER} -w ${KERBEROS_ADMIN_PASSWORD} -q "xst -k /root/root.keytab root"
kadmin -p ${KERBEROS_ADMIN} -w ${KERBEROS_ADMIN_PASSWORD} -q "addprinc -pw hadoop-user hadoop-user@${KRB_REALM}"
kadmin -p ${KERBEROS_ADMIN} -w ${KERBEROS_ADMIN_PASSWORD} -q "xst -k /home/hadoop-user/hadoop-user.keytab hadoop-user"
kadmin -p ${KERBEROS_ADMIN_USER} -w ${KERBEROS_ADMIN_PASSWORD} -q "addprinc -pw hadoop-user hadoop-user@${KRB_REALM}"
kadmin -p ${KERBEROS_ADMIN_USER} -w ${KERBEROS_ADMIN_PASSWORD} -q "xst -k /home/hadoop-user/hadoop-user.keytab hadoop-user"
chown hadoop-user:hadoop-user /home/hadoop-user/hadoop-user.keytab
kinit -kt /root/root.keytab root

@ -0,0 +1,33 @@
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
FROM debian:bullseye
ENV DEBIAN_FRONTEND noninteractive
# The -qq implies --yes
RUN apt-get -qq update
RUN apt-get -qq install locales krb5-kdc krb5-admin-server
RUN apt-get -qq clean
RUN locale-gen "en_US.UTF-8"
RUN echo "LC_ALL=\"en_US.UTF-8\"" >> /etc/default/locale
EXPOSE 88 749
ADD entrypoint.sh /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]

@ -0,0 +1,78 @@
#!/bin/bash -x
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
: "${KRB_REALM:=EXAMPLE.COM}"
: "${DOMAIN_REALM:=example.com}"
: "${KERB_MASTER_KEY:=masterkey}"
: "${KERB_ADMIN_USER:=admin}"
: "${KERB_ADMIN_PASS:=admin}"
create_config() {
: "${KDC_ADDRESS:=$(hostname -f)}"
cat>/etc/krb5.conf<<EOF
[logging]
default = FILE:/var/log/kerberos/krb5libs.log
kdc = FILE:/var/log/kerberos/krb5kdc.log
admin_server = FILE:/var/log/kerberos/kadmind.log
[libdefaults]
default_realm = ${KRB_REALM}
dns_lookup_realm = false
dns_lookup_kdc = false
ticket_lifetime = 24h
renew_lifetime = 7d
forwardable = true
[realms]
${KRB_REALM} = {
kdc = ${KDC_ADDRESS}
admin_server = ${KDC_ADDRESS}
}
[domain_realm]
.$DOMAIN_REALM = ${KRB_REALM}
$DOMAIN_REALM = ${KRB_REALM}
EOF
}
create_db() {
/usr/sbin/kdb5_util -P ${KERB_MASTER_KEY} -r ${KRB_REALM} create -s
}
create_admin_user() {
kadmin.local -q "addprinc -pw ${KERB_ADMIN_PASS} ${KERB_ADMIN_USER}/admin"
echo "*/admin@${KRB_REALM} *" > /etc/krb5kdc/kadm5.acl
}
start_kdc() {
/usr/sbin/krb5kdc
/usr/sbin/kadmind
}
main() {
mkdir -p /var/log/kerberos
create_config
create_db
create_admin_user
start_kdc
tail -F /var/log/kerberos/krb5kdc.log
}
main "$@"

@ -1489,7 +1489,7 @@ under the License.
<exclude>flink-table/flink-table-planner/src/test/resources/json/*.json</exclude>
<exclude>flink-yarn/src/test/resources/krb5.keytab</exclude>
<exclude>flink-end-to-end-tests/test-scripts/test-data/**</exclude>
<exclude>flink-end-to-end-tests/test-scripts/docker-hadoop-secure-cluster/config/keystore.jks</exclude>
<exclude>flink-end-to-end-tests/test-scripts/docker-hadoop-secure-cluster/hadoop/config/keystore.jks</exclude>
<exclude>flink-connectors/flink-connector-kafka/src/test/resources/**</exclude>
<exclude>flink-connectors/flink-connector-hive/src/test/resources/**</exclude>
<exclude>flink-end-to-end-tests/flink-tpcds-test/tpcds-tool/answer_set/*</exclude>

Loading…
Cancel
Save