flink/flink-end-to-end-tests/test-scripts/common_s3_operations.sh

#!/usr/bin/env bash
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

###################################
# Starts a docker container of the aws.
#
# To improve performance of s3_get_number_of_lines_by_prefix, one docker container will be reused for several aws
# commands. An interactive python shell keeps the container busy such that it can be reused to issue several commands.
#
# Globals:
#   TEST_INFRA_DIR
# Exports:
#   AWSCLI_CONTAINER_ID
###################################
function aws_cli_start() {
  local CONTAINER_ID
  CONTAINER_ID=$(docker run -d \
    --network host \
    --mount type=bind,source="$TEST_INFRA_DIR",target=/hostdir \
    -e AWS_REGION -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY \
    --entrypoint python \
    -it banst/awscli)
  if [ $? -ne 0 ]; then
    echo "running aws cli container failed"
    if [ -n "$CONTAINER_ID" ]
    then
      docker kill "$CONTAINER_ID"
      docker rm "$CONTAINER_ID"
    fi
    return 1
  fi
  export AWSCLI_CONTAINER_ID="$CONTAINER_ID"

  while [[ "$(docker inspect -f {{.State.Running}} "$AWSCLI_CONTAINER_ID")" -ne "true" ]]; do
    sleep 0.1
  done
  on_exit aws_cli_stop
}

###################################
# Stops the docker container of the aws cli.
#
# Globals:
#   AWSCLI_CONTAINER_ID
###################################
function aws_cli_stop() {
  docker kill "$AWSCLI_CONTAINER_ID"
  docker rm "$AWSCLI_CONTAINER_ID"
  export AWSCLI_CONTAINER_ID=
}

# always start it while sourcing, so that AWSCLI_CONTAINER_ID is available from parent script
if [[ $AWSCLI_CONTAINER_ID ]]; then
  aws_cli_stop
fi
aws_cli_start || aws_cli_start
if [ $? -ne 0 ]; then
    echo "running the aws cli container failed"
    exit 1
fi

###################################
# Runs an aws command on the previously started container.
#
# Globals:
#   AWSCLI_CONTAINER_ID
###################################
function aws_cli() {
  local endpoint=""
  if [[ $S3_ENDPOINT ]]; then
    endpoint="--endpoint-url $S3_ENDPOINT"
  fi
  if ! docker exec "$AWSCLI_CONTAINER_ID" aws $endpoint "$@"; then
    echo "Error executing aws command: $@";
    return 1
  fi
}

###################################
# Download s3 objects to folder by full path prefix.
#
# Globals:
#   IT_CASE_S3_BUCKET
#   TEST_INFRA_DIR
# Arguments:
#   $1 - local path to save folder with files
#   $2 - s3 key full path prefix
#   $3 - recursive?
#   $4 - s3 file name prefix w/o directory to filter files by name (optional)
# Returns:
#   None
###################################
function s3_get_by_full_path_and_filename_prefix() {
  local args=
  if [[ "$3" == true ]]; then
    args="$args --recursive"
  fi
  if [[ $4 ]]; then
    args="$args --exclude '*' --include '*/${4}[!/]*'"
  fi
  local relative_dir=${1#$TEST_INFRA_DIR}
  aws_cli s3 cp --quiet "s3://$IT_CASE_S3_BUCKET/$2" "/hostdir/${relative_dir}" $args
}

###################################
# Delete s3 objects by full path prefix.
#
# Globals:
#   IT_CASE_S3_BUCKET
# Arguments:
#   $1 - s3 key full path prefix
# Returns:
#   None
###################################
function s3_delete_by_full_path_prefix() {
  aws_cli s3 rm --quiet "s3://$IT_CASE_S3_BUCKET/$1" --recursive
}

###################################
# Count number of lines in files of s3 objects filtered by prefix.
# The lines has to be simple to comply with CSV format
# because SQL is used to query the s3 objects.
#
# Globals:
#   IT_CASE_S3_BUCKET
# Arguments:
#   $1 - s3 key prefix
#   $2 - s3 file name prefix w/o directory to filter files by name (optional)
# Returns:
#   line number in part files
###################################
function s3_get_number_of_lines_by_prefix() {
  local file_prefix="${2-}"

  # find all files that have the given prefix
  parts=$(aws_cli s3api list-objects --bucket "$IT_CASE_S3_BUCKET" --prefix "$1" |
    docker run -i --rm ghcr.io/jqlang/jq:1.7.1 -r '[.Contents[].Key] | join(" ")')

  # in parallel (N tasks), query the number of lines, store result in a file named lines
  N=10
  echo "0" >lines
  # turn off job control, so that there is noise when starting/finishing bg tasks
  old_state=$(set +o)
  set +m
  for part in $parts; do
    if [[ $(basename "${part}") == $file_prefix* ]]; then
      ((i = i % N))
      ((i++ == 0)) && wait
      aws_cli s3api select-object-content --bucket "$IT_CASE_S3_BUCKET" --key "$part" \
        --expression "select count(*) from s3object" --expression-type "SQL" \
        --input-serialization='{"CSV": {}}' --output-serialization='{"CSV": {}}' /dev/stdout >>lines &
    fi
  done
  wait
  # restore old settings
  eval "$old_state"
  # add number of lines of each part
  paste -sd+ lines | bc
}
[FLINK-14574][e2e] Replacing s3Util with dockerized aws cli enabling customized endpoints and other features that would needed to be manually implemented. 5 years ago			`#!/usr/bin/env bash`
			`################################################################################`
			`# Licensed to the Apache Software Foundation (ASF) under one`
			`# or more contributor license agreements. See the NOTICE file`
			`# distributed with this work for additional information`
			`# regarding copyright ownership. The ASF licenses this file`
			`# to you under the Apache License, Version 2.0 (the`
			`# "License"); you may not use this file except in compliance`
			`# with the License. You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`################################################################################`

			`###################################`
			`# Starts a docker container of the aws.`
			`#`
			`# To improve performance of s3_get_number_of_lines_by_prefix, one docker container will be reused for several aws`
			`# commands. An interactive python shell keeps the container busy such that it can be reused to issue several commands.`
			`#`
			`# Globals:`
			`# TEST_INFRA_DIR`
			`# Exports:`
			`# AWSCLI_CONTAINER_ID`
			`###################################`
			`function aws_cli_start() {`
[Flink 34569][e2e] fail fast if AWS cli container fails to start (#24491) * [FLINK-34569][e2e] Fail fast if aws cli container fails to run Why: An end-to-end test run failed and in the test logs you could see that the AWS cli container failed to start. Because of the way it's organised the failure in the subshell did not cause a failure and AWSCLI_CONTAINER_ID was empty. This lead to a loop trying to docker exec a command in a container named "" and the test taking 15 minutes to time out. This change speeds up the failure. Note that we use 'return' to prevent an immediate failure of the script so that we have the potential to implement a simple retry. Signed-off-by: Robert Young <robeyoun@redhat.com> * [FLINK-34569][e2e] Add naive retry when creating aws cli container Why: An end-to-end test run failed with what looked like a transient network exception when pulling the aws cli image. This retries once. Signed-off-by: Robert Young <robeyoun@redhat.com> * [FLINK-34569][e2e] Remove jq containers after user Why: A large pile of exited jq containers were left in docker after an operation was retried repeatedly. Signed-off-by: Robert Young <robeyoun@redhat.com> * [FLINK-34569][e2e] Clean up after failed awscli container run Why: If for some reason the command can return a non-zero exit code and also create a container, this will remove it so we don't have an orphan sitting stranded. Signed-off-by: Robert Young <robeyoun@redhat.com> --------- Signed-off-by: Robert Young <robeyoun@redhat.com> 8 months ago			`local CONTAINER_ID`
			`CONTAINER_ID=$(docker run -d \`
[FLINK-14574][e2e] Replacing s3Util with dockerized aws cli enabling customized endpoints and other features that would needed to be manually implemented. 5 years ago			`--network host \`
			`--mount type=bind,source="$TEST_INFRA_DIR",target=/hostdir \`
			`-e AWS_REGION -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY \`
			`--entrypoint python \`
			`-it banst/awscli)`
[Flink 34569][e2e] fail fast if AWS cli container fails to start (#24491) * [FLINK-34569][e2e] Fail fast if aws cli container fails to run Why: An end-to-end test run failed and in the test logs you could see that the AWS cli container failed to start. Because of the way it's organised the failure in the subshell did not cause a failure and AWSCLI_CONTAINER_ID was empty. This lead to a loop trying to docker exec a command in a container named "" and the test taking 15 minutes to time out. This change speeds up the failure. Note that we use 'return' to prevent an immediate failure of the script so that we have the potential to implement a simple retry. Signed-off-by: Robert Young <robeyoun@redhat.com> * [FLINK-34569][e2e] Add naive retry when creating aws cli container Why: An end-to-end test run failed with what looked like a transient network exception when pulling the aws cli image. This retries once. Signed-off-by: Robert Young <robeyoun@redhat.com> * [FLINK-34569][e2e] Remove jq containers after user Why: A large pile of exited jq containers were left in docker after an operation was retried repeatedly. Signed-off-by: Robert Young <robeyoun@redhat.com> * [FLINK-34569][e2e] Clean up after failed awscli container run Why: If for some reason the command can return a non-zero exit code and also create a container, this will remove it so we don't have an orphan sitting stranded. Signed-off-by: Robert Young <robeyoun@redhat.com> --------- Signed-off-by: Robert Young <robeyoun@redhat.com> 8 months ago			`if [ $? -ne 0 ]; then`
			`echo "running aws cli container failed"`
			`if [ -n "$CONTAINER_ID" ]`
			`then`
			`docker kill "$CONTAINER_ID"`
			`docker rm "$CONTAINER_ID"`
			`fi`
			`return 1`
			`fi`
			`export AWSCLI_CONTAINER_ID="$CONTAINER_ID"`
[FLINK-14574][e2e] Replacing s3Util with dockerized aws cli enabling customized endpoints and other features that would needed to be manually implemented. 5 years ago
			`while [[ "$(docker inspect -f {{.State.Running}} "$AWSCLI_CONTAINER_ID")" -ne "true" ]]; do`
			`sleep 0.1`
			`done`
			`on_exit aws_cli_stop`
			`}`

			`###################################`
			`# Stops the docker container of the aws cli.`
			`#`
			`# Globals:`
			`# AWSCLI_CONTAINER_ID`
			`###################################`
			`function aws_cli_stop() {`
			`docker kill "$AWSCLI_CONTAINER_ID"`
			`docker rm "$AWSCLI_CONTAINER_ID"`
			`export AWSCLI_CONTAINER_ID=`
			`}`

			`# always start it while sourcing, so that AWSCLI_CONTAINER_ID is available from parent script`
			`if [[ $AWSCLI_CONTAINER_ID ]]; then`
			`aws_cli_stop`
			`fi`
[Flink 34569][e2e] fail fast if AWS cli container fails to start (#24491) * [FLINK-34569][e2e] Fail fast if aws cli container fails to run Why: An end-to-end test run failed and in the test logs you could see that the AWS cli container failed to start. Because of the way it's organised the failure in the subshell did not cause a failure and AWSCLI_CONTAINER_ID was empty. This lead to a loop trying to docker exec a command in a container named "" and the test taking 15 minutes to time out. This change speeds up the failure. Note that we use 'return' to prevent an immediate failure of the script so that we have the potential to implement a simple retry. Signed-off-by: Robert Young <robeyoun@redhat.com> * [FLINK-34569][e2e] Add naive retry when creating aws cli container Why: An end-to-end test run failed with what looked like a transient network exception when pulling the aws cli image. This retries once. Signed-off-by: Robert Young <robeyoun@redhat.com> * [FLINK-34569][e2e] Remove jq containers after user Why: A large pile of exited jq containers were left in docker after an operation was retried repeatedly. Signed-off-by: Robert Young <robeyoun@redhat.com> * [FLINK-34569][e2e] Clean up after failed awscli container run Why: If for some reason the command can return a non-zero exit code and also create a container, this will remove it so we don't have an orphan sitting stranded. Signed-off-by: Robert Young <robeyoun@redhat.com> --------- Signed-off-by: Robert Young <robeyoun@redhat.com> 8 months ago			`aws_cli_start \|\| aws_cli_start`
			`if [ $? -ne 0 ]; then`
			`echo "running the aws cli container failed"`
			`exit 1`
			`fi`
[FLINK-14574][e2e] Replacing s3Util with dockerized aws cli enabling customized endpoints and other features that would needed to be manually implemented. 5 years ago
			`###################################`
			`# Runs an aws command on the previously started container.`
			`#`
			`# Globals:`
			`# AWSCLI_CONTAINER_ID`
			`###################################`
			`function aws_cli() {`
			`local endpoint=""`
			`if [[ $S3_ENDPOINT ]]; then`
			`endpoint="--endpoint-url $S3_ENDPOINT"`
			`fi`
[minor] Add debug logging if aws cli command fails The motivation is to help debugging FLINK-15772. 5 years ago			`if ! docker exec "$AWSCLI_CONTAINER_ID" aws $endpoint "$@"; then`
			`echo "Error executing aws command: $@";`
[FLINK-15772] Add retry to test_batch_wordcount file fetching The suspicion is that test flakyness was caused by S3 flakyness/eventual consistency. 5 years ago			`return 1`
[minor] Add debug logging if aws cli command fails The motivation is to help debugging FLINK-15772. 5 years ago			`fi`
[FLINK-14574][e2e] Replacing s3Util with dockerized aws cli enabling customized endpoints and other features that would needed to be manually implemented. 5 years ago			`}`

			`###################################`
			`# Download s3 objects to folder by full path prefix.`
			`#`
			`# Globals:`
			`# IT_CASE_S3_BUCKET`
			`# TEST_INFRA_DIR`
			`# Arguments:`
			`# $1 - local path to save folder with files`
			`# $2 - s3 key full path prefix`
[FLINK-36435] Fix shaded hadoop S3A with credentials provider end-to-end test failed 4 months ago			`# $3 - recursive?`
			`# $4 - s3 file name prefix w/o directory to filter files by name (optional)`
[FLINK-14574][e2e] Replacing s3Util with dockerized aws cli enabling customized endpoints and other features that would needed to be manually implemented. 5 years ago			`# Returns:`
			`# None`
			`###################################`
			`function s3_get_by_full_path_and_filename_prefix() {`
			`local args=`
[FLINK-36435] Fix shaded hadoop S3A with credentials provider end-to-end test failed 4 months ago			`if [[ "$3" == true ]]; then`
[FLINK-14574][e2e] Replacing s3Util with dockerized aws cli enabling customized endpoints and other features that would needed to be manually implemented. 5 years ago			`args="$args --recursive"`
			`fi`
[FLINK-36435] Fix shaded hadoop S3A with credentials provider end-to-end test failed 4 months ago			`if [[ $4 ]]; then`
			`args="$args --exclude '' --include '/${4}[!/]*'"`
			`fi`
[FLINK-14574][e2e] Replacing s3Util with dockerized aws cli enabling customized endpoints and other features that would needed to be manually implemented. 5 years ago			`local relative_dir=${1#$TEST_INFRA_DIR}`
[FLINK-17010] Remove --quiet from s3 get operation The reason this failed with --quiet is that the test redirects stdout to get the sorted results: get_complete_result > "${TEST_DATA_DIR}/complete_result" 5f2f7d637d8073b1034f8c1124f52604745936eb removed the --quiet parameter from the s3 fetch command, which changed what is printed to stdout. The s3 command is printing which file it is downloading and this log message also ends up in the "result" output that is being sorted and checked. 5 years ago			`aws_cli s3 cp --quiet "s3://$IT_CASE_S3_BUCKET/$2" "/hostdir/${relative_dir}" $args`
[FLINK-14574][e2e] Replacing s3Util with dockerized aws cli enabling customized endpoints and other features that would needed to be manually implemented. 5 years ago			`}`

			`###################################`
			`# Delete s3 objects by full path prefix.`
			`#`
			`# Globals:`
			`# IT_CASE_S3_BUCKET`
			`# Arguments:`
			`# $1 - s3 key full path prefix`
			`# Returns:`
			`# None`
			`###################################`
			`function s3_delete_by_full_path_prefix() {`
			`aws_cli s3 rm --quiet "s3://$IT_CASE_S3_BUCKET/$1" --recursive`
			`}`

			`###################################`
			`# Count number of lines in files of s3 objects filtered by prefix.`
			`# The lines has to be simple to comply with CSV format`
			`# because SQL is used to query the s3 objects.`
			`#`
			`# Globals:`
			`# IT_CASE_S3_BUCKET`
			`# Arguments:`
			`# $1 - s3 key prefix`
			`# $2 - s3 file name prefix w/o directory to filter files by name (optional)`
			`# Returns:`
			`# line number in part files`
			`###################################`
			`function s3_get_number_of_lines_by_prefix() {`
			`local file_prefix="${2-}"`

			`# find all files that have the given prefix`
			`parts=$(aws_cli s3api list-objects --bucket "$IT_CASE_S3_BUCKET" --prefix "$1" \|`
[FLINK-35526] Use more up to date jq docker image for Flink e2e tests (#24892) 8 months ago			`docker run -i --rm ghcr.io/jqlang/jq:1.7.1 -r '[.Contents[].Key] \| join(" ")')`
[FLINK-14574][e2e] Replacing s3Util with dockerized aws cli enabling customized endpoints and other features that would needed to be manually implemented. 5 years ago
			`# in parallel (N tasks), query the number of lines, store result in a file named lines`
			`N=10`
			`echo "0" >lines`
			`# turn off job control, so that there is noise when starting/finishing bg tasks`
			`old_state=$(set +o)`
			`set +m`
			`for part in $parts; do`
			`if [[ $(basename "${part}") == $file_prefix* ]]; then`
			`((i = i % N))`
			`((i++ == 0)) && wait`
			`aws_cli s3api select-object-content --bucket "$IT_CASE_S3_BUCKET" --key "$part" \`
			`--expression "select count(*) from s3object" --expression-type "SQL" \`
			`--input-serialization='{"CSV": {}}' --output-serialization='{"CSV": {}}' /dev/stdout >>lines &`
			`fi`
			`done`
			`wait`
			`# restore old settings`
			`eval "$old_state"`
			`# add number of lines of each part`
			`paste -sd+ lines \| bc`
			`}`