mirror of https://github.com/apache/flink.git
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
258 lines
9.1 KiB
Python
258 lines
9.1 KiB
Python
################################################################################
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
################################################################################
|
|
from typing import Dict, Union, List, Optional
|
|
|
|
from pyflink.common.config_options import ConfigOption
|
|
from pyflink.java_gateway import get_gateway
|
|
from pyflink.table.schema import Schema
|
|
from pyflink.util.java_utils import to_jarray
|
|
|
|
__all__ = ['TableDescriptor', 'FormatDescriptor']
|
|
|
|
|
|
class TableDescriptor(object):
|
|
"""
|
|
Describes a CatalogTable representing a source or sink.
|
|
|
|
TableDescriptor is a template for creating a CatalogTable instance. It closely resembles the
|
|
"CREATE TABLE" SQL DDL statement, containing schema, connector options, and other
|
|
characteristics. Since tables in Flink are typically backed by external systems, the
|
|
descriptor describes how a connector (and possibly its format) are configured.
|
|
|
|
This can be used to register a table in the Table API, see :func:`create_temporary_table` in
|
|
TableEnvironment.
|
|
"""
|
|
|
|
def __init__(self, j_table_descriptor):
|
|
self._j_table_descriptor = j_table_descriptor
|
|
|
|
@staticmethod
|
|
def for_connector(connector: str) -> 'TableDescriptor.Builder':
|
|
"""
|
|
Creates a new :class:`~pyflink.table.TableDescriptor.Builder` for a table using the given
|
|
connector.
|
|
|
|
:param connector: The factory identifier for the connector.
|
|
"""
|
|
gateway = get_gateway()
|
|
j_builder = gateway.jvm.TableDescriptor.forConnector(connector)
|
|
return TableDescriptor.Builder(j_builder)
|
|
|
|
def get_schema(self) -> Optional[Schema]:
|
|
j_schema = self._j_table_descriptor.getSchema()
|
|
if j_schema.isPresent():
|
|
return Schema(j_schema.get())
|
|
else:
|
|
return None
|
|
|
|
def get_options(self) -> Dict[str, str]:
|
|
return self._j_table_descriptor.getOptions()
|
|
|
|
def get_partition_keys(self) -> List[str]:
|
|
return self._j_table_descriptor.getPartitionKeys()
|
|
|
|
def get_comment(self) -> Optional[str]:
|
|
j_comment = self._j_table_descriptor.getComment()
|
|
if j_comment.isPresent():
|
|
return j_comment.get()
|
|
else:
|
|
return None
|
|
|
|
def __str__(self):
|
|
return self._j_table_descriptor.toString()
|
|
|
|
def __eq__(self, other):
|
|
return (self.__class__ == other.__class__ and
|
|
self._j_table_descriptor.equals(other._j_table_descriptor))
|
|
|
|
def __hash__(self):
|
|
return self._j_table_descriptor.hashCode()
|
|
|
|
class Builder(object):
|
|
"""
|
|
Builder for TableDescriptor.
|
|
"""
|
|
|
|
def __init__(self, j_builder):
|
|
self._j_builder = j_builder
|
|
|
|
def schema(self, schema: Schema) -> 'TableDescriptor.Builder':
|
|
"""
|
|
Define the schema of the TableDescriptor.
|
|
"""
|
|
self._j_builder.schema(schema._j_schema)
|
|
return self
|
|
|
|
def option(self, key: Union[str, ConfigOption], value) -> 'TableDescriptor.Builder':
|
|
"""
|
|
Sets the given option on the table.
|
|
|
|
Option keys must be fully specified. When defining options for a Format, use
|
|
format(FormatDescriptor) instead.
|
|
|
|
Example:
|
|
::
|
|
|
|
>>> TableDescriptor.for_connector("kafka") \
|
|
... .option("scan.startup.mode", "latest-offset") \
|
|
... .build()
|
|
|
|
"""
|
|
if isinstance(key, str):
|
|
self._j_builder.option(key, value)
|
|
else:
|
|
self._j_builder.option(key._j_config_option, value)
|
|
return self
|
|
|
|
def format(self,
|
|
format: Union[str, 'FormatDescriptor'],
|
|
format_option: ConfigOption[str] = None) -> 'TableDescriptor.Builder':
|
|
"""
|
|
Defines the format to be used for this table.
|
|
|
|
Note that not every connector requires a format to be specified, while others may use
|
|
multiple formats.
|
|
|
|
Example:
|
|
::
|
|
|
|
>>> TableDescriptor.for_connector("kafka") \
|
|
... .format(FormatDescriptor.for_format("json")
|
|
... .option("ignore-parse-errors", "true")
|
|
... .build())
|
|
|
|
will result in the options:
|
|
|
|
'format' = 'json'
|
|
'json.ignore-parse-errors' = 'true'
|
|
|
|
"""
|
|
if format_option is None:
|
|
if isinstance(format, str):
|
|
self._j_builder.format(format)
|
|
else:
|
|
self._j_builder.format(format._j_format_descriptor)
|
|
else:
|
|
if isinstance(format, str):
|
|
self._j_builder.format(format_option._j_config_option, format)
|
|
else:
|
|
self._j_builder.format(
|
|
format_option._j_config_option, format._j_format_descriptor)
|
|
return self
|
|
|
|
def partitioned_by(self, *partition_keys: str) -> 'TableDescriptor.Builder':
|
|
"""
|
|
Define which columns this table is partitioned by.
|
|
"""
|
|
gateway = get_gateway()
|
|
self._j_builder.partitionedBy(to_jarray(gateway.jvm.java.lang.String, partition_keys))
|
|
return self
|
|
|
|
def comment(self, comment: str) -> 'TableDescriptor.Builder':
|
|
"""
|
|
Define the comment for this table.
|
|
"""
|
|
self._j_builder.comment(comment)
|
|
return self
|
|
|
|
def build(self) -> 'TableDescriptor':
|
|
"""
|
|
Returns an immutable instance of :class:`~pyflink.table.TableDescriptor`.
|
|
"""
|
|
return TableDescriptor(self._j_builder.build())
|
|
|
|
|
|
class FormatDescriptor(object):
|
|
"""
|
|
Describes a Format and its options for use with :class:`~pyflink.table.TableDescriptor`.
|
|
|
|
Formats are responsible for encoding and decoding data in table connectors. Note that not
|
|
every connector has a format, while others may have multiple formats (e.g. the Kafka connector
|
|
has separate formats for keys and values). Common formats are "json", "csv", "avro", etc.
|
|
"""
|
|
|
|
def __init__(self, j_format_descriptor):
|
|
self._j_format_descriptor = j_format_descriptor
|
|
|
|
@staticmethod
|
|
def for_format(format: str) -> 'FormatDescriptor.Builder':
|
|
"""
|
|
Creates a new :class:`~pyflink.table.FormatDescriptor.Builder` describing a format with the
|
|
given format identifier.
|
|
|
|
:param format: The factory identifier for the format.
|
|
"""
|
|
gateway = get_gateway()
|
|
j_builder = gateway.jvm.FormatDescriptor.forFormat(format)
|
|
return FormatDescriptor.Builder(j_builder)
|
|
|
|
def get_format(self) -> str:
|
|
return self._j_format_descriptor.getFormat()
|
|
|
|
def get_options(self) -> Dict[str, str]:
|
|
return self._j_format_descriptor.getOptions()
|
|
|
|
def __str__(self):
|
|
return self._j_format_descriptor.toString()
|
|
|
|
def __eq__(self, other):
|
|
return (self.__class__ == other.__class__ and
|
|
self._j_format_descriptor.equals(other._j_format_descriptor))
|
|
|
|
def __hash__(self):
|
|
return self._j_format_descriptor.hashCode()
|
|
|
|
class Builder(object):
|
|
"""
|
|
Builder for FormatDescriptor.
|
|
"""
|
|
|
|
def __init__(self, j_builder):
|
|
self._j_builder = j_builder
|
|
|
|
def option(self, key: Union[str, ConfigOption], value) -> 'FormatDescriptor.Builder':
|
|
"""
|
|
Sets the given option on the format.
|
|
|
|
Note that format options must not be prefixed with the format identifier itself here.
|
|
|
|
Example:
|
|
::
|
|
|
|
>>> FormatDescriptor.for_format("json") \
|
|
... .option("ignore-parse-errors", "true") \
|
|
... .build()
|
|
|
|
will automatically be converted into its prefixed form:
|
|
|
|
'format' = 'json'
|
|
'json.ignore-parse-errors' = 'true'
|
|
|
|
"""
|
|
if isinstance(key, str):
|
|
self._j_builder.option(key, value)
|
|
else:
|
|
self._j_builder.option(key._j_config_option, value)
|
|
return self
|
|
|
|
def build(self) -> 'FormatDescriptor':
|
|
"""
|
|
Returns an immutable instance of :class:`~pyflink.table.FormatDescriptor`.
|
|
"""
|
|
return FormatDescriptor(self._j_builder.build())
|