mirror of https://github.com/apache/flink.git
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
125 lines
4.7 KiB
Cython
125 lines
4.7 KiB
Cython
################################################################################
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
################################################################################
|
|
# cython: language_level = 3
|
|
# cython: infer_types = True
|
|
# cython: profile=True
|
|
# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True
|
|
|
|
from libc.stdlib cimport realloc
|
|
from libc.string cimport memcpy
|
|
|
|
from pyflink.fn_execution.utils.operation_utils import PeriodicThread
|
|
|
|
cdef class BeamInputStream(LengthPrefixInputStream):
|
|
def __cinit__(self, input_stream, size):
|
|
self._input_buffer_size = size
|
|
self._input_stream = input_stream
|
|
self._parse_input_stream(input_stream)
|
|
|
|
cdef size_t read(self, char** data):
|
|
cdef size_t length = 0
|
|
cdef bint has_prefix = True
|
|
cdef size_t shift = 0
|
|
cdef char bits
|
|
# read the var-int size
|
|
while has_prefix:
|
|
bits = self._input_data[self._input_pos] & 0x7F
|
|
length |= bits << shift
|
|
shift += 7
|
|
if not (self._input_data[self._input_pos] & 0x80):
|
|
has_prefix = False
|
|
self._input_pos += 1
|
|
data[0] = self._input_data + self._input_pos
|
|
self._input_pos += length
|
|
self._input_stream.pos = self._input_pos
|
|
return length
|
|
|
|
cdef size_t available(self):
|
|
return self._input_buffer_size - self._input_pos
|
|
|
|
cdef void _parse_input_stream(self, BInputStream input_stream):
|
|
self._input_data = input_stream.allc
|
|
self._input_pos = input_stream.pos
|
|
|
|
cdef class BeamSizeBasedOutputStream(LengthPrefixOutputStream):
|
|
cdef void write(self, char*data, size_t length):
|
|
cdef char bits
|
|
cdef size_t size = length
|
|
cdef size_t i
|
|
# the length of the variable prefix length will be less than 9 bytes
|
|
if self._output_buffer_size < self._output_pos + length + 9:
|
|
self._output_buffer_size += length + 9
|
|
self._output_data = <char*> realloc(self._output_data, self._output_buffer_size)
|
|
self._output_stream.buffer_size = self._output_buffer_size
|
|
self._output_stream.data = self._output_data
|
|
# write variable prefix length
|
|
while size:
|
|
bits = size & 0x7F
|
|
size >>= 7
|
|
if size:
|
|
bits |= 0x80
|
|
self._output_data[self._output_pos] = bits
|
|
self._output_pos += 1
|
|
|
|
if length < 8:
|
|
# This is faster than memcpy when the string is short.
|
|
for i in range(length):
|
|
self._output_data[self._output_pos + i] = data[i]
|
|
else:
|
|
memcpy(self._output_data + self._output_pos, data, length)
|
|
self._output_pos += length
|
|
self._output_stream.pos = self._output_pos
|
|
|
|
cpdef void flush(self):
|
|
self._output_stream.flush()
|
|
self._output_pos = 0
|
|
|
|
cdef void reset_output_stream(self, BOutputStream output_stream):
|
|
self._output_stream = output_stream
|
|
self._output_data = output_stream.data
|
|
self._output_pos = output_stream.pos
|
|
self._output_buffer_size = output_stream.buffer_size
|
|
|
|
cpdef bint maybe_flush(self):
|
|
if self._output_pos > 10_000_000:
|
|
self.flush()
|
|
return True
|
|
return False
|
|
|
|
cdef class BeamTimeBasedOutputStream(BeamSizeBasedOutputStream):
|
|
def __init__(self, *args, **kwargs):
|
|
self._flush_event = False
|
|
self._periodic_flusher = PeriodicThread(1, self.notify_flush)
|
|
self._periodic_flusher.daemon = True
|
|
self._periodic_flusher.start()
|
|
|
|
cpdef void notify_flush(self):
|
|
self._flush_event = True
|
|
|
|
cpdef void close(self):
|
|
if self._periodic_flusher:
|
|
self._periodic_flusher.cancel()
|
|
self._periodic_flusher = None
|
|
|
|
cpdef bint maybe_flush(self):
|
|
if self._flush_event:
|
|
self.flush()
|
|
self._flush_event = False
|
|
elif BeamSizeBasedOutputStream.maybe_flush(self):
|
|
self._flush_event = False
|