#!/usr/bin/env bash ################################################################################ # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ################################################################################ ################################### # Starts a docker container of the aws. # # To improve performance of s3_get_number_of_lines_by_prefix, one docker container will be reused for several aws # commands. An interactive python shell keeps the container busy such that it can be reused to issue several commands. # # Globals: # TEST_INFRA_DIR # Exports: # AWSCLI_CONTAINER_ID ################################### function aws_cli_start() { local CONTAINER_ID CONTAINER_ID=$(docker run -d \ --network host \ --mount type=bind,source="$TEST_INFRA_DIR",target=/hostdir \ -e AWS_REGION -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY \ --entrypoint python \ -it banst/awscli) if [ $? -ne 0 ]; then echo "running aws cli container failed" if [ -n "$CONTAINER_ID" ] then docker kill "$CONTAINER_ID" docker rm "$CONTAINER_ID" fi return 1 fi export AWSCLI_CONTAINER_ID="$CONTAINER_ID" while [[ "$(docker inspect -f {{.State.Running}} "$AWSCLI_CONTAINER_ID")" -ne "true" ]]; do sleep 0.1 done on_exit aws_cli_stop } ################################### # Stops the docker container of the aws cli. # # Globals: # AWSCLI_CONTAINER_ID ################################### function aws_cli_stop() { docker kill "$AWSCLI_CONTAINER_ID" docker rm "$AWSCLI_CONTAINER_ID" export AWSCLI_CONTAINER_ID= } # always start it while sourcing, so that AWSCLI_CONTAINER_ID is available from parent script if [[ $AWSCLI_CONTAINER_ID ]]; then aws_cli_stop fi aws_cli_start || aws_cli_start if [ $? -ne 0 ]; then echo "running the aws cli container failed" exit 1 fi ################################### # Runs an aws command on the previously started container. # # Globals: # AWSCLI_CONTAINER_ID ################################### function aws_cli() { local endpoint="" if [[ $S3_ENDPOINT ]]; then endpoint="--endpoint-url $S3_ENDPOINT" fi if ! docker exec "$AWSCLI_CONTAINER_ID" aws $endpoint "$@"; then echo "Error executing aws command: $@"; return 1 fi } ################################### # Download s3 objects to folder by full path prefix. # # Globals: # IT_CASE_S3_BUCKET # TEST_INFRA_DIR # Arguments: # $1 - local path to save folder with files # $2 - s3 key full path prefix # $3 - recursive? # $4 - s3 file name prefix w/o directory to filter files by name (optional) # Returns: # None ################################### function s3_get_by_full_path_and_filename_prefix() { local args= if [[ "$3" == true ]]; then args="$args --recursive" fi if [[ $4 ]]; then args="$args --exclude '*' --include '*/${4}[!/]*'" fi local relative_dir=${1#$TEST_INFRA_DIR} aws_cli s3 cp --quiet "s3://$IT_CASE_S3_BUCKET/$2" "/hostdir/${relative_dir}" $args } ################################### # Delete s3 objects by full path prefix. # # Globals: # IT_CASE_S3_BUCKET # Arguments: # $1 - s3 key full path prefix # Returns: # None ################################### function s3_delete_by_full_path_prefix() { aws_cli s3 rm --quiet "s3://$IT_CASE_S3_BUCKET/$1" --recursive } ################################### # Count number of lines in files of s3 objects filtered by prefix. # The lines has to be simple to comply with CSV format # because SQL is used to query the s3 objects. # # Globals: # IT_CASE_S3_BUCKET # Arguments: # $1 - s3 key prefix # $2 - s3 file name prefix w/o directory to filter files by name (optional) # Returns: # line number in part files ################################### function s3_get_number_of_lines_by_prefix() { local file_prefix="${2-}" # find all files that have the given prefix parts=$(aws_cli s3api list-objects --bucket "$IT_CASE_S3_BUCKET" --prefix "$1" | docker run -i --rm ghcr.io/jqlang/jq:1.7.1 -r '[.Contents[].Key] | join(" ")') # in parallel (N tasks), query the number of lines, store result in a file named lines N=10 echo "0" >lines # turn off job control, so that there is noise when starting/finishing bg tasks old_state=$(set +o) set +m for part in $parts; do if [[ $(basename "${part}") == $file_prefix* ]]; then ((i = i % N)) ((i++ == 0)) && wait aws_cli s3api select-object-content --bucket "$IT_CASE_S3_BUCKET" --key "$part" \ --expression "select count(*) from s3object" --expression-type "SQL" \ --input-serialization='{"CSV": {}}' --output-serialization='{"CSV": {}}' /dev/stdout >>lines & fi done wait # restore old settings eval "$old_state" # add number of lines of each part paste -sd+ lines | bc }