You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
flink/flink-python/setup.py

343 lines
14 KiB
Python

################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
from __future__ import print_function
import glob
import io
import os
import platform
import subprocess
import sys
from distutils.command.build_ext import build_ext
from shutil import copytree, copy, rmtree
from setuptools import setup, Extension
if sys.version_info < (3, 5):
print("Python versions prior to 3.5 are not supported for PyFlink.",
file=sys.stderr)
sys.exit(-1)
def remove_if_exists(file_path):
if os.path.exists(file_path):
if os.path.islink(file_path) or os.path.isfile(file_path):
os.remove(file_path)
else:
assert os.path.isdir(file_path)
rmtree(file_path)
def find_file_path(pattern):
files = glob.glob(pattern)
if len(files) < 1:
print("Failed to find the file %s." % pattern)
exit(-1)
if len(files) > 1:
print("The file pattern %s is ambiguous: %s" % (pattern, files))
exit(-1)
return files[0]
# Currently Cython optimizing doesn't support Windows.
if platform.system() == 'Windows':
extensions = ([])
else:
try:
from Cython.Build import cythonize
extensions = cythonize([
Extension(
name="pyflink.fn_execution.coder_impl_fast",
sources=["pyflink/fn_execution/coder_impl_fast.pyx"],
include_dirs=["pyflink/fn_execution/"]),
Extension(
name="pyflink.fn_execution.stream",
sources=["pyflink/fn_execution/stream.pyx"],
include_dirs=["pyflink/fn_execution/"]),
Extension(
name="pyflink.fn_execution.beam.beam_stream",
sources=["pyflink/fn_execution/beam/beam_stream.pyx"],
include_dirs=["pyflink/fn_execution/beam"]),
Extension(
name="pyflink.fn_execution.beam.beam_coder_impl_fast",
sources=["pyflink/fn_execution/beam/beam_coder_impl_fast.pyx"],
include_dirs=["pyflink/fn_execution/beam"]),
Extension(
name="pyflink.fn_execution.beam.beam_operations_fast",
sources=["pyflink/fn_execution/beam/beam_operations_fast.pyx"],
include_dirs=["pyflink/fn_execution/beam"]),
])
except ImportError:
if os.path.exists("pyflink/fn_execution/coder_impl_fast.c"):
extensions = ([
Extension(
name="pyflink.fn_execution.coder_impl_fast",
sources=["pyflink/fn_execution/coder_impl_fast.c"],
include_dirs=["pyflink/fn_execution/"]),
Extension(
name="pyflink.fn_execution.stream",
sources=["pyflink/fn_execution/stream.c"],
include_dirs=["pyflink/fn_execution/"]),
Extension(
name="pyflink.fn_execution.beam.beam_stream",
sources=["pyflink/fn_execution/beam/beam_stream.c"],
include_dirs=["pyflink/fn_execution/beam"]),
Extension(
name="pyflink.fn_execution.beam.beam_coder_impl_fast",
sources=["pyflink/fn_execution/beam/beam_coder_impl_fast.c"],
include_dirs=["pyflink/fn_execution/beam"]),
Extension(
name="pyflink.fn_execution.beam.beam_operations_fast",
sources=["pyflink/fn_execution/beam/beam_operations_fast.c"],
include_dirs=["pyflink/fn_execution/beam"]),
])
else:
extensions = ([])
this_directory = os.path.abspath(os.path.dirname(__file__))
version_file = os.path.join(this_directory, 'pyflink/version.py')
try:
exec(open(version_file).read())
except IOError:
print("Failed to load PyFlink version file for packaging. " +
"'%s' not found!" % version_file,
file=sys.stderr)
sys.exit(-1)
VERSION = __version__ # noqa
with io.open(os.path.join(this_directory, 'README.md'), 'r', encoding='utf-8') as f:
long_description = f.read()
TEMP_PATH = "deps"
LIB_TEMP_PATH = os.path.join(TEMP_PATH, "lib")
OPT_TEMP_PATH = os.path.join(TEMP_PATH, "opt")
CONF_TEMP_PATH = os.path.join(TEMP_PATH, "conf")
LOG_TEMP_PATH = os.path.join(TEMP_PATH, "log")
EXAMPLES_TEMP_PATH = os.path.join(TEMP_PATH, "examples")
LICENSES_TEMP_PATH = os.path.join(TEMP_PATH, "licenses")
PLUGINS_TEMP_PATH = os.path.join(TEMP_PATH, "plugins")
SCRIPTS_TEMP_PATH = os.path.join(TEMP_PATH, "bin")
LICENSE_FILE_TEMP_PATH = os.path.join(this_directory, "LICENSE")
NOTICE_FILE_TEMP_PATH = os.path.join(this_directory, "NOTICE")
README_FILE_TEMP_PATH = os.path.join("pyflink", "README.txt")
PYFLINK_UDF_RUNNER_SH = "pyflink-udf-runner.sh"
PYFLINK_UDF_RUNNER_BAT = "pyflink-udf-runner.bat"
in_flink_source = os.path.isfile("../flink-java/src/main/java/org/apache/flink/api/java/"
"ExecutionEnvironment.java")
# Due to changes in FLINK-14008, the licenses directory and NOTICE file may not exist in
# build-target folder. Just ignore them in this case.
exist_licenses = None
try:
if in_flink_source:
try:
os.mkdir(TEMP_PATH)
except:
print("Temp path for symlink to parent already exists {0}".format(TEMP_PATH),
file=sys.stderr)
sys.exit(-1)
flink_version = VERSION.replace(".dev0", "-SNAPSHOT")
FLINK_HOME = os.path.abspath(
"../flink-dist/target/flink-%s-bin/flink-%s" % (flink_version, flink_version))
incorrect_invocation_message = """
If you are installing pyflink from flink source, you must first build Flink and
run sdist.
To build Flink with maven you can run:
mvn -DskipTests clean package
Building the source dist is done in the flink-python directory:
cd flink-python
python setup.py sdist
pip install dist/*.tar.gz"""
LIB_PATH = os.path.join(FLINK_HOME, "lib")
OPT_PATH = os.path.join(FLINK_HOME, "opt")
OPT_PYTHON_JAR_NAME = os.path.basename(
find_file_path(os.path.join(OPT_PATH, "flink-python_*.jar")))
OPT_SQL_CLIENT_JAR_NAME = os.path.basename(
find_file_path(os.path.join(OPT_PATH, "flink-sql-client_*.jar")))
CONF_PATH = os.path.join(FLINK_HOME, "conf")
EXAMPLES_PATH = os.path.join(FLINK_HOME, "examples")
LICENSES_PATH = os.path.join(FLINK_HOME, "licenses")
PLUGINS_PATH = os.path.join(FLINK_HOME, "plugins")
SCRIPTS_PATH = os.path.join(FLINK_HOME, "bin")
LICENSE_FILE_PATH = os.path.join(FLINK_HOME, "LICENSE")
README_FILE_PATH = os.path.join(FLINK_HOME, "README.txt")
exist_licenses = os.path.exists(LICENSES_PATH)
if not os.path.isdir(LIB_PATH):
print(incorrect_invocation_message, file=sys.stderr)
sys.exit(-1)
try:
os.symlink(LIB_PATH, LIB_TEMP_PATH)
support_symlinks = True
except BaseException: # pylint: disable=broad-except
support_symlinks = False
os.mkdir(OPT_TEMP_PATH)
if support_symlinks:
os.symlink(os.path.join(OPT_PATH, OPT_PYTHON_JAR_NAME),
os.path.join(OPT_TEMP_PATH, OPT_PYTHON_JAR_NAME))
os.symlink(os.path.join(OPT_PATH, OPT_SQL_CLIENT_JAR_NAME),
os.path.join(OPT_TEMP_PATH, OPT_SQL_CLIENT_JAR_NAME))
os.symlink(CONF_PATH, CONF_TEMP_PATH)
os.symlink(EXAMPLES_PATH, EXAMPLES_TEMP_PATH)
os.symlink(PLUGINS_PATH, PLUGINS_TEMP_PATH)
os.symlink(LICENSE_FILE_PATH, LICENSE_FILE_TEMP_PATH)
os.symlink(README_FILE_PATH, README_FILE_TEMP_PATH)
else:
copytree(LIB_PATH, LIB_TEMP_PATH)
copy(os.path.join(OPT_PATH, OPT_PYTHON_JAR_NAME),
os.path.join(OPT_TEMP_PATH, OPT_PYTHON_JAR_NAME))
copy(os.path.join(OPT_PATH, OPT_SQL_CLIENT_JAR_NAME),
os.path.join(OPT_TEMP_PATH, OPT_SQL_CLIENT_JAR_NAME))
copytree(CONF_PATH, CONF_TEMP_PATH)
copytree(EXAMPLES_PATH, EXAMPLES_TEMP_PATH)
copytree(PLUGINS_PATH, PLUGINS_TEMP_PATH)
copy(LICENSE_FILE_PATH, LICENSE_FILE_TEMP_PATH)
copy(README_FILE_PATH, README_FILE_TEMP_PATH)
os.mkdir(LOG_TEMP_PATH)
with open(os.path.join(LOG_TEMP_PATH, "empty.txt"), 'w') as f:
f.write("This file is used to force setuptools to include the log directory. "
"You can delete it at any time after installation.")
# copy the udf runner scripts
copytree(SCRIPTS_PATH, SCRIPTS_TEMP_PATH)
copy(os.path.join(this_directory, "bin", PYFLINK_UDF_RUNNER_SH),
os.path.join(SCRIPTS_TEMP_PATH, PYFLINK_UDF_RUNNER_SH))
copy(os.path.join(this_directory, "bin", PYFLINK_UDF_RUNNER_BAT),
os.path.join(SCRIPTS_TEMP_PATH, PYFLINK_UDF_RUNNER_BAT))
if exist_licenses and platform.system() != "Windows":
# regenerate the licenses directory and NOTICE file as we only copy part of the
# flink binary distribution.
collect_licenses_file_sh = os.path.abspath(os.path.join(
this_directory, "..", "tools", "releasing", "collect_license_files.sh"))
subprocess.check_output([collect_licenses_file_sh, TEMP_PATH, TEMP_PATH])
# move the NOTICE file to the root of the package
GENERATED_NOTICE_FILE_PATH = os.path.join(TEMP_PATH, "NOTICE")
os.rename(GENERATED_NOTICE_FILE_PATH, NOTICE_FILE_TEMP_PATH)
else:
if not os.path.isdir(LIB_TEMP_PATH) or not os.path.isdir(OPT_TEMP_PATH) \
or not os.path.isdir(SCRIPTS_TEMP_PATH):
print("The flink core files are not found. Please make sure your installation package "
"is complete, or do this in the flink-python directory of the flink source "
"directory.")
sys.exit(-1)
exist_licenses = os.path.exists(LICENSES_TEMP_PATH)
script_names = ["pyflink-shell.sh", "find-flink-home.sh"]
scripts = [os.path.join(SCRIPTS_TEMP_PATH, script) for script in script_names]
scripts.append("pyflink/find_flink_home.py")
PACKAGES = ['pyflink',
'pyflink.table',
'pyflink.util',
'pyflink.datastream',
'pyflink.dataset',
'pyflink.common',
'pyflink.fn_execution',
'pyflink.fn_execution.beam',
'pyflink.metrics',
'pyflink.ml',
'pyflink.ml.api',
'pyflink.ml.api.param',
'pyflink.ml.lib',
'pyflink.ml.lib.param',
'pyflink.lib',
'pyflink.opt',
'pyflink.conf',
'pyflink.log',
'pyflink.examples',
'pyflink.plugins',
'pyflink.bin']
PACKAGE_DIR = {
'pyflink.lib': TEMP_PATH + '/lib',
'pyflink.opt': TEMP_PATH + '/opt',
'pyflink.conf': TEMP_PATH + '/conf',
'pyflink.log': TEMP_PATH + '/log',
'pyflink.examples': TEMP_PATH + '/examples',
'pyflink.plugins': TEMP_PATH + '/plugins',
'pyflink.bin': TEMP_PATH + '/bin'}
PACKAGE_DATA = {
'pyflink': ['README.txt'],
'pyflink.lib': ['*.jar'],
'pyflink.opt': ['*.*', '*/*'],
'pyflink.conf': ['*'],
'pyflink.log': ['*'],
'pyflink.examples': ['*.py', '*/*.py'],
'pyflink.plugins': ['*', '*/*'],
'pyflink.bin': ['*']}
if exist_licenses and platform.system() != "Windows":
PACKAGES.append('pyflink.licenses')
PACKAGE_DIR['pyflink.licenses'] = TEMP_PATH + '/licenses'
PACKAGE_DATA['pyflink.licenses'] = ['*']
setup(
name='apache-flink',
version=VERSION,
packages=PACKAGES,
include_package_data=True,
package_dir=PACKAGE_DIR,
package_data=PACKAGE_DATA,
scripts=scripts,
url='https://flink.apache.org',
license='https://www.apache.org/licenses/LICENSE-2.0',
author='Apache Software Foundation',
author_email='dev@flink.apache.org',
python_requires='>=3.5',
install_requires=['py4j==0.10.8.1', 'python-dateutil==2.8.0', 'apache-beam==2.23.0',
'cloudpickle==1.2.2', 'avro-python3>=1.8.1,<=1.9.1', 'jsonpickle==1.2',
'pandas>=0.24.2,<1; python_full_version < "3.5.3"',
'pandas>=0.25.2,<1; python_full_version >= "3.5.3"',
'pyarrow>=0.15.1,<0.18.0', 'pytz>=2018.3'],
cmdclass={'build_ext': build_ext},
tests_require=['pytest==4.4.1'],
description='Apache Flink Python API',
long_description=long_description,
long_description_content_type='text/markdown',
zip_safe=False,
classifiers=[
'Development Status :: 5 - Production/Stable',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8'],
ext_modules=extensions
)
finally:
if in_flink_source:
remove_if_exists(TEMP_PATH)
remove_if_exists(LICENSE_FILE_TEMP_PATH)
remove_if_exists(NOTICE_FILE_TEMP_PATH)
remove_if_exists(README_FILE_TEMP_PATH)