From 50b82e026df0a3e2d1c287ba32075c752e97efb4 Mon Sep 17 00:00:00 2001 From: Ulysse Carion Date: Mon, 6 Jul 2015 16:49:55 -0700 Subject: [PATCH 001/291] Add metrics to KafkaClient. --- kafka/client.py | 27 ++++++++++++++++++++++++++- kafka/consumer/kafka.py | 21 ++++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 817c62152..88cba501e 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -19,6 +19,22 @@ log = logging.getLogger(__name__) +def time_metric(metric_name): + def decorator(fn): + @functools.wraps(fn) + def wrapper(self, *args, **kwargs): + start_time = time.time() + ret = fn(self, *args, **kwargs) + + if self.metrics: + metric = getattr(self.metrics, metric_name) + metric.addValue(time.time() - start_time) + + return ret + return wrapper + return decorator + + class KafkaClient(object): CLIENT_ID = b'kafka-python' @@ -28,12 +44,14 @@ class KafkaClient(object): # socket timeout. def __init__(self, hosts, client_id=CLIENT_ID, timeout=DEFAULT_SOCKET_TIMEOUT_SECONDS, - correlation_id=0): + correlation_id=0, + metrics=None): # We need one connection to bootstrap self.client_id = kafka_bytestring(client_id) self.timeout = timeout self.hosts = collect_hosts(hosts) self.correlation_id = correlation_id + self.metrics = metrics # create connections only when we need them self.conns = {} @@ -48,6 +66,7 @@ def __init__(self, hosts, client_id=CLIENT_ID, # Private API # ################## + def _get_conn(self, host, port): """Get or create a connection to a broker using host and port""" host_key = (host, port) @@ -422,6 +441,7 @@ def load_metadata_for_topics(self, *topics): leader, None, None ) + @time_metric('metadata_request_timer') def send_metadata_request(self, payloads=[], fail_on_error=True, callback=None): encoder = KafkaProtocol.encode_metadata_request @@ -429,6 +449,7 @@ def send_metadata_request(self, payloads=[], fail_on_error=True, return self._send_broker_unaware_request(payloads, encoder, decoder) + @time_metric('produce_request_timer') def send_produce_request(self, payloads=[], acks=1, timeout=1000, fail_on_error=True, callback=None): """ @@ -479,6 +500,7 @@ def send_produce_request(self, payloads=[], acks=1, timeout=1000, if resp is not None and (not fail_on_error or not self._raise_on_response_error(resp))] + @time_metric('fetch_request_timer') def send_fetch_request(self, payloads=[], fail_on_error=True, callback=None, max_wait_time=100, min_bytes=4096): """ @@ -499,6 +521,7 @@ def send_fetch_request(self, payloads=[], fail_on_error=True, return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] + @time_metric('offset_request_timer') def send_offset_request(self, payloads=[], fail_on_error=True, callback=None): resps = self._send_broker_aware_request( @@ -509,6 +532,7 @@ def send_offset_request(self, payloads=[], fail_on_error=True, return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] + @time_metric('offset_commit_request_timer') def send_offset_commit_request(self, group, payloads=[], fail_on_error=True, callback=None): encoder = functools.partial(KafkaProtocol.encode_offset_commit_request, @@ -519,6 +543,7 @@ def send_offset_commit_request(self, group, payloads=[], return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] + @time_metric('offset_fetch_request_timer') def send_offset_fetch_request(self, group, payloads=[], fail_on_error=True, callback=None): diff --git a/kafka/consumer/kafka.py b/kafka/consumer/kafka.py index 11c4221b9..b62141c17 100644 --- a/kafka/consumer/kafka.py +++ b/kafka/consumer/kafka.py @@ -38,6 +38,8 @@ 'auto_commit_interval_messages': None, 'consumer_timeout_ms': -1, + 'enable_metrics': False, + # Currently unused 'socket_receive_buffer_bytes': 64 * 1024, 'num_consumer_fetchers': 1, @@ -135,10 +137,23 @@ def configure(self, **configs): 'bootstrap_servers required to configure KafkaConsumer' ) + if self._config['enable_metrics']: + from greplin import scales + metrics = scales.collection('kafka', + scales.PmfStat('metadata_request_timer'), + scales.PmfStat('produce_request_timer'), + scales.PmfStat('fetch_request_timer'), + scales.PmfStat('offset_request_timer'), + scales.PmfStat('offset_commit_request_timer'), + scales.PmfStat('offset_fetch_request_timer')) + else: + metrics = None + self._client = KafkaClient( self._config['bootstrap_servers'], client_id=self._config['client_id'], - timeout=(self._config['socket_timeout_ms'] / 1000.0) + timeout=(self._config['socket_timeout_ms'] / 1000.0), + metrics=metrics ) def set_topic_partitions(self, *topics): @@ -566,6 +581,10 @@ def commit(self): logger.info('No new offsets found to commit in group %s', self._config['group_id']) return False + @property + def metrics(self): + return self._client.metrics + # # Topic/partition management private methods # From 0460cddeecf59f4ebad6c3c74e9d039ad829a7b1 Mon Sep 17 00:00:00 2001 From: Ulysse Carion Date: Thu, 9 Jul 2015 13:14:52 -0700 Subject: [PATCH 002/291] Have metrics count number of each errors in fetch_messages --- kafka/consumer/kafka.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/kafka/consumer/kafka.py b/kafka/consumer/kafka.py index b62141c17..ab773e0fc 100644 --- a/kafka/consumer/kafka.py +++ b/kafka/consumer/kafka.py @@ -139,21 +139,26 @@ def configure(self, **configs): if self._config['enable_metrics']: from greplin import scales - metrics = scales.collection('kafka', + self.metrics = scales.collection('kafka', scales.PmfStat('metadata_request_timer'), scales.PmfStat('produce_request_timer'), scales.PmfStat('fetch_request_timer'), scales.PmfStat('offset_request_timer'), scales.PmfStat('offset_commit_request_timer'), - scales.PmfStat('offset_fetch_request_timer')) + scales.PmfStat('offset_fetch_request_timer'), + + scales.IntStat('failed_payloads_count'), + scales.IntStat('offset_out_of_range_count'), + scales.IntStat('not_leader_for_partition_count'), + scales.IntStat('request_timed_out_count')) else: - metrics = None + self.metrics = None self._client = KafkaClient( self._config['bootstrap_servers'], client_id=self._config['client_id'], timeout=(self._config['socket_timeout_ms'] / 1000.0), - metrics=metrics + metrics=self.metrics ) def set_topic_partitions(self, *topics): @@ -359,6 +364,9 @@ def fetch_messages(self): for resp in responses: if isinstance(resp, FailedPayloadsError): + if self.metrics: + self.metrics.failed_payloads_count += 1 + logger.warning('FailedPayloadsError attempting to fetch data') self._refresh_metadata_on_error() continue @@ -368,6 +376,9 @@ def fetch_messages(self): try: check_error(resp) except OffsetOutOfRangeError: + if self.metrics: + self.metrics.offset_out_of_range_count += 1 + logger.warning('OffsetOutOfRange: topic %s, partition %d, ' 'offset %d (Highwatermark: %d)', topic, partition, @@ -380,6 +391,9 @@ def fetch_messages(self): continue except NotLeaderForPartitionError: + if self.metrics: + self.metrics.not_leader_for_partition_count += 1 + logger.warning("NotLeaderForPartitionError for %s - %d. " "Metadata may be out of date", topic, partition) @@ -387,6 +401,9 @@ def fetch_messages(self): continue except RequestTimedOutError: + if self.metrics: + self.metrics.request_timed_out_count += 1 + logger.warning("RequestTimedOutError for %s - %d", topic, partition) continue @@ -581,10 +598,6 @@ def commit(self): logger.info('No new offsets found to commit in group %s', self._config['group_id']) return False - @property - def metrics(self): - return self._client.metrics - # # Topic/partition management private methods # From 92dbc525307bd01e62d84bc2c4c95ae919673c1e Mon Sep 17 00:00:00 2001 From: Ulysse Carion Date: Thu, 9 Jul 2015 17:51:25 -0700 Subject: [PATCH 003/291] Use a responder function rather than a metrics library. --- kafka/client.py | 9 ++++----- kafka/consumer/kafka.py | 37 +++++++++++-------------------------- 2 files changed, 15 insertions(+), 31 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 88cba501e..f356e326e 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -26,9 +26,8 @@ def wrapper(self, *args, **kwargs): start_time = time.time() ret = fn(self, *args, **kwargs) - if self.metrics: - metric = getattr(self.metrics, metric_name) - metric.addValue(time.time() - start_time) + if self.metrics_responder: + self.metrics_responder(metric_name, time.time() - start_time) return ret return wrapper @@ -45,13 +44,13 @@ class KafkaClient(object): def __init__(self, hosts, client_id=CLIENT_ID, timeout=DEFAULT_SOCKET_TIMEOUT_SECONDS, correlation_id=0, - metrics=None): + metrics_responder=None): # We need one connection to bootstrap self.client_id = kafka_bytestring(client_id) self.timeout = timeout self.hosts = collect_hosts(hosts) self.correlation_id = correlation_id - self.metrics = metrics + self.metrics_responder = metrics_responder # create connections only when we need them self.conns = {} diff --git a/kafka/consumer/kafka.py b/kafka/consumer/kafka.py index ab773e0fc..9cf225659 100644 --- a/kafka/consumer/kafka.py +++ b/kafka/consumer/kafka.py @@ -38,7 +38,7 @@ 'auto_commit_interval_messages': None, 'consumer_timeout_ms': -1, - 'enable_metrics': False, + 'metrics_responder': None, # Currently unused 'socket_receive_buffer_bytes': 64 * 1024, @@ -137,28 +137,13 @@ def configure(self, **configs): 'bootstrap_servers required to configure KafkaConsumer' ) - if self._config['enable_metrics']: - from greplin import scales - self.metrics = scales.collection('kafka', - scales.PmfStat('metadata_request_timer'), - scales.PmfStat('produce_request_timer'), - scales.PmfStat('fetch_request_timer'), - scales.PmfStat('offset_request_timer'), - scales.PmfStat('offset_commit_request_timer'), - scales.PmfStat('offset_fetch_request_timer'), - - scales.IntStat('failed_payloads_count'), - scales.IntStat('offset_out_of_range_count'), - scales.IntStat('not_leader_for_partition_count'), - scales.IntStat('request_timed_out_count')) - else: - self.metrics = None + self.metrics_responder = self._config['metrics_responder'] self._client = KafkaClient( self._config['bootstrap_servers'], client_id=self._config['client_id'], timeout=(self._config['socket_timeout_ms'] / 1000.0), - metrics=self.metrics + metrics_responder=self.metrics_responder ) def set_topic_partitions(self, *topics): @@ -364,8 +349,8 @@ def fetch_messages(self): for resp in responses: if isinstance(resp, FailedPayloadsError): - if self.metrics: - self.metrics.failed_payloads_count += 1 + if self.metrics_responder: + self.metrics_responder('failed_payloads_count', 1) logger.warning('FailedPayloadsError attempting to fetch data') self._refresh_metadata_on_error() @@ -376,8 +361,8 @@ def fetch_messages(self): try: check_error(resp) except OffsetOutOfRangeError: - if self.metrics: - self.metrics.offset_out_of_range_count += 1 + if self.metrics_responder: + self.metrics_responder('offset_out_of_range_count', 1) logger.warning('OffsetOutOfRange: topic %s, partition %d, ' 'offset %d (Highwatermark: %d)', @@ -391,8 +376,8 @@ def fetch_messages(self): continue except NotLeaderForPartitionError: - if self.metrics: - self.metrics.not_leader_for_partition_count += 1 + if self.metrics_responder: + self.metrics_responder('not_leader_for_partition_count', 1) logger.warning("NotLeaderForPartitionError for %s - %d. " "Metadata may be out of date", @@ -401,8 +386,8 @@ def fetch_messages(self): continue except RequestTimedOutError: - if self.metrics: - self.metrics.request_timed_out_count += 1 + if self.metrics_responder: + self.metrics_responder('request_timed_out_count', 1) logger.warning("RequestTimedOutError for %s - %d", topic, partition) From c01aa454aa8f7f37ef9a650bef3f5b02ca5419c9 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Tue, 8 Sep 2015 17:23:27 -0700 Subject: [PATCH 004/291] Bump version --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index cd64b48dc..edf30172b 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '0.9.5-dev' +__version__ = '0.9.4.post1' From 64185688afb1f01851f4eec0ea6440c38a399eba Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Mon, 12 Oct 2015 00:11:18 -0700 Subject: [PATCH 005/291] Unblocking broker aware request --- kafka/client.py | 48 ++++++++++++++++++++++++++++++------------------ kafka/conn.py | 5 +++++ 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 1ac0aae58..e59a780e6 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -2,6 +2,7 @@ import copy import functools import logging +import select import time import kafka.common @@ -190,6 +191,10 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): # and collect the responses and errors responses = {} broker_failures = [] + + # For each KafkaConnection we store the real socket so that we can use + # a select to perform unblocking I/O + socket_connection = {} for broker, payloads in payloads_by_broker.items(): requestId = self._next_id() log.debug('Request %s to %s: %s', requestId, broker, payloads) @@ -223,27 +228,34 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): topic_partition = (payload.topic, payload.partition) responses[topic_partition] = None continue + else: + socket_connection[conn.get_connected_socket()] = (conn, broker) - try: - response = conn.recv(requestId) - except ConnectionError as e: - broker_failures.append(broker) - log.warning('ConnectionError attempting to receive a ' - 'response to request %s from server %s: %s', - requestId, broker, e) + conn = None + while socket_connection: + sockets = socket_connection.keys() + rlist, _, _ = select.select(sockets, [], [], None) + conn, broker = socket_connection.pop(rlist[0]) + try: + response = conn.recv(requestId) + except ConnectionError as e: + broker_failures.append(broker) + log.warning('ConnectionError attempting to receive a ' + 'response to request %s from server %s: %s', + requestId, broker, e) - for payload in payloads: - topic_partition = (payload.topic, payload.partition) - responses[topic_partition] = FailedPayloadsError(payload) + for payload in payloads: + topic_partition = (payload.topic, payload.partition) + responses[topic_partition] = FailedPayloadsError(payload) - else: - _resps = [] - for payload_response in decoder_fn(response): - topic_partition = (payload_response.topic, - payload_response.partition) - responses[topic_partition] = payload_response - _resps.append(payload_response) - log.debug('Response %s: %s', requestId, _resps) + else: + _resps = [] + for payload_response in decoder_fn(response): + topic_partition = (payload_response.topic, + payload_response.partition) + responses[topic_partition] = payload_response + _resps.append(payload_response) + log.debug('Response %s: %s', requestId, _resps) # Connection errors generally mean stale metadata # although sometimes it means incorrect api request diff --git a/kafka/conn.py b/kafka/conn.py index 432e10b0c..f1a12dc9b 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -118,6 +118,11 @@ def _read_bytes(self, num_bytes): # TODO multiplex socket communication to allow for multi-threaded clients + def get_connected_socket(self): + if not self._sock: + self.reinit() + return self._sock + def send(self, request_id, payload): """ Send a request to Kafka From 1ad887ca5bd2d7a8fdb8da3744ae5fe9df040200 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Sat, 24 Oct 2015 16:50:46 -0700 Subject: [PATCH 006/291] Add tests. Bug fix. Rename socket_conn dict. --- kafka/client.py | 14 +++++++------- test/test_conn.py | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index e59a780e6..63129f196 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -192,9 +192,9 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): responses = {} broker_failures = [] - # For each KafkaConnection we store the real socket so that we can use + # For each KafkaConnection keep the real socket so that we can use # a select to perform unblocking I/O - socket_connection = {} + connections_by_socket = {} for broker, payloads in payloads_by_broker.items(): requestId = self._next_id() log.debug('Request %s to %s: %s', requestId, broker, payloads) @@ -229,13 +229,13 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): responses[topic_partition] = None continue else: - socket_connection[conn.get_connected_socket()] = (conn, broker) + connections_by_socket[conn.get_connected_socket()] = (conn, broker) conn = None - while socket_connection: - sockets = socket_connection.keys() + while connections_by_socket: + sockets = connections_by_socket.keys() rlist, _, _ = select.select(sockets, [], [], None) - conn, broker = socket_connection.pop(rlist[0]) + conn, broker = connections_by_socket.pop(rlist[0]) try: response = conn.recv(requestId) except ConnectionError as e: @@ -244,7 +244,7 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): 'response to request %s from server %s: %s', requestId, broker, e) - for payload in payloads: + for payload in payloads_by_broker[broker]: topic_partition = (payload.topic, payload.partition) responses[topic_partition] = FailedPayloadsError(payload) diff --git a/test/test_conn.py b/test/test_conn.py index 2b7034461..1bdfc1eb0 100644 --- a/test/test_conn.py +++ b/test/test_conn.py @@ -165,6 +165,23 @@ def test_recv__doesnt_consume_extra_data_in_stream(self): self.assertEqual(self.conn.recv(self.config['request_id']), self.config['payload']) self.assertEqual(self.conn.recv(self.config['request_id']), self.config['payload2']) + def test_get_connected_socket(self): + s = self.conn.get_connected_socket() + + self.assertEqual(s, self.MockCreateConn()) + + def test_get_connected_socket_on_dirty_conn(self): + # Dirty the connection + try: + self.conn._raise_connection_error() + except ConnectionError: + pass + + # Test that get_connected_socket tries to connect + self.assertEqual(self.MockCreateConn.call_count, 0) + self.conn.get_connected_socket() + self.assertEqual(self.MockCreateConn.call_count, 1) + def test_close__object_is_reusable(self): # test that sending to a closed connection From c9cb664bbfef58f9cb2318e77402f7e63a9ca6d8 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Fri, 30 Oct 2015 14:19:19 -0700 Subject: [PATCH 007/291] Bump version to .post2 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index edf30172b..6c50be88e 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '0.9.4.post1' +__version__ = '0.9.4.post2' From eebc69a43a04e286cd5d11672f0ded370aa2358d Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Mon, 11 Apr 2016 12:21:08 -0700 Subject: [PATCH 008/291] Bump version 0.9.5.post1 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 9272695b3..30013491c 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '0.9.5' +__version__ = '0.9.5.post1' From 9f73af1b7ac0a9eba66906eadac0526609de319e Mon Sep 17 00:00:00 2001 From: Federico Giraud Date: Wed, 23 Mar 2016 03:07:41 -0700 Subject: [PATCH 009/291] Add dual commit --- kafka/client.py | 11 +++++++++-- kafka/consumer/kafka.py | 5 +++-- kafka/protocol.py | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 638fcbd63..45f007726 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -668,12 +668,19 @@ def send_offset_request(self, payloads=[], fail_on_error=True, @time_metric('offset_commit_request_timer') def send_offset_commit_request(self, group, payloads=[], - fail_on_error=True, callback=None): + fail_on_error=True, callback=None, dual_commit=True): encoder = functools.partial(KafkaProtocol.encode_offset_commit_request, - group=group) + group=group) decoder = KafkaProtocol.decode_offset_commit_response resps = self._send_broker_aware_request(payloads, encoder, decoder) + if dual_commit: + encoder = functools.partial( + KafkaProtocol.encode_offset_commit_request_kafka, + group=group, + ) + resps += self._send_consumer_aware_request(group, payloads, encoder, decoder) + return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] diff --git a/kafka/consumer/kafka.py b/kafka/consumer/kafka.py index c68719bd3..419ee192f 100644 --- a/kafka/consumer/kafka.py +++ b/kafka/consumer/kafka.py @@ -37,8 +37,8 @@ 'auto_commit_interval_ms': 60 * 1000, 'auto_commit_interval_messages': None, 'consumer_timeout_ms': -1, - 'metrics_responder': None, + 'dual_commit': False, # Currently unused 'socket_receive_buffer_bytes': 64 * 1024, @@ -570,7 +570,8 @@ def commit(self): logger.info('committing consumer offsets to group %s', self._config['group_id']) resps = self._client.send_offset_commit_request( kafka_bytestring(self._config['group_id']), commits, - fail_on_error=False + fail_on_error=False, + dual_commit=self._config['dual_commit'], ) for r in resps: diff --git a/kafka/protocol.py b/kafka/protocol.py index 412a95794..f488b0417 100644 --- a/kafka/protocol.py +++ b/kafka/protocol.py @@ -495,6 +495,41 @@ def encode_offset_commit_request(cls, client_id, correlation_id, msg = b''.join(message) return struct.pack('>i%ds' % len(msg), len(msg), msg) + @classmethod + def encode_offset_commit_request_kafka(cls, client_id, correlation_id, + group, payloads): + """ + Encode some OffsetCommitRequest structs + + Arguments: + client_id: string + correlation_id: int + group: string, the consumer group you are committing offsets for + payloads: list of OffsetCommitRequest + """ + grouped_payloads = group_by_topic_and_partition(payloads) + + message = [] + message.append(cls._encode_message_header(client_id, correlation_id, + KafkaProtocol.OFFSET_COMMIT_KEY, + version=2)) + message.append(write_short_string(group)) + message.append(struct.pack('>i', -1)) # ConsumerGroupGenerationId + message.append(write_short_string('')) # ConsumerId + message.append(struct.pack('>q', -1)) # Retention time + message.append(struct.pack('>i', len(grouped_payloads))) + + for topic, topic_payloads in grouped_payloads.items(): + message.append(write_short_string(topic)) + message.append(struct.pack('>i', len(topic_payloads))) + + for partition, payload in topic_payloads.items(): + message.append(struct.pack('>iq', partition, payload.offset)) + message.append(write_short_string(payload.metadata)) + + msg = b''.join(message) + return struct.pack('>i%ds' % len(msg), len(msg), msg) + @classmethod def decode_offset_commit_response(cls, data): """ From ab8443d8da4b1c9a88827b9ffd51858fa6dca095 Mon Sep 17 00:00:00 2001 From: Federico Giraud Date: Thu, 7 Apr 2016 04:51:40 -0700 Subject: [PATCH 010/291] Add integration test for dual commit --- kafka/client.py | 2 +- test/test_client_integration.py | 36 +++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 45f007726..0b4081e2a 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -668,7 +668,7 @@ def send_offset_request(self, payloads=[], fail_on_error=True, @time_metric('offset_commit_request_timer') def send_offset_commit_request(self, group, payloads=[], - fail_on_error=True, callback=None, dual_commit=True): + fail_on_error=True, callback=None, dual_commit=False): encoder = functools.partial(KafkaProtocol.encode_offset_commit_request, group=group) decoder = KafkaProtocol.decode_offset_commit_response diff --git a/test/test_client_integration.py b/test/test_client_integration.py index 8853350fa..59690a72c 100644 --- a/test/test_client_integration.py +++ b/test/test_client_integration.py @@ -1,8 +1,9 @@ import os +import time from kafka.common import ( FetchRequest, OffsetCommitRequest, OffsetFetchRequest, - KafkaTimeoutError, ProduceRequest + KafkaTimeoutError, ProduceRequest, ConsumerCoordinatorNotAvailableCode ) from kafka.protocol import create_message @@ -83,7 +84,7 @@ def test_send_produce_request_maintains_request_response_order(self): # Offset Tests # #################### - @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1") + @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1", "0.9.0.0") def test_commit_fetch_offsets(self): req = OffsetCommitRequest(self.bytes_topic, 0, 42, b"metadata") (resp,) = self.client.send_offset_commit_request(b"group", [req]) @@ -94,3 +95,34 @@ def test_commit_fetch_offsets(self): self.assertEqual(resp.error, 0) self.assertEqual(resp.offset, 42) self.assertEqual(resp.metadata, b"") # Metadata isn't stored for now + + @kafka_versions("0.9.0.0") + def test_commit_fetch_offsets_dual(self): + for _ in range(10): + try: + self.client._get_coordinator_for_group(b"group") + except ConsumerCoordinatorNotAvailableCode: + time.sleep(.5) + continue + break + + req = OffsetCommitRequest(self.bytes_topic, 0, 42, b"metadata") + (resp_zk, resp_kafka,) = self.client.send_offset_commit_request( + b"group", + [req], + dual_commit=True, + ) + self.assertEqual(resp_zk.error, 0) + self.assertEqual(resp_kafka.error, 0) + + req = OffsetFetchRequest(self.bytes_topic, 0) + (resp,) = self.client.send_offset_fetch_request(b"group", [req]) + self.assertEqual(resp.error, 0) + self.assertEqual(resp.offset, 42) + self.assertEqual(resp.metadata, b"") # Metadata isn't stored for now + + (resp,) = self.client.send_offset_fetch_request_kafka(b"group", [req]) + self.assertEqual(resp.error, 0) + self.assertEqual(resp.offset, 42) + # Metadata is stored in kafka + self.assertEqual(resp.metadata, b"metadata") From de2f81361e682745cf8c0dcda5cdcc26d67dc15d Mon Sep 17 00:00:00 2001 From: Federico Giraud Date: Thu, 7 Apr 2016 07:48:12 -0700 Subject: [PATCH 011/291] Add unit test for encode_offset_commit_request_kafka --- test/test_protocol.py | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/test/test_protocol.py b/test/test_protocol.py index ac7bea6c8..8cdb523f7 100644 --- a/test/test_protocol.py +++ b/test/test_protocol.py @@ -698,6 +698,54 @@ def test_encode_offset_commit_request(self): self.assertIn(encoded, [ expected1, expected2 ]) + + def test_encode_offset_commit_request_kafka(self): + + header = b"".join([ + struct.pack('>i', 113), # Total message length + + struct.pack('>h', 8), # Message type = offset commit + struct.pack('>h', 2), # API version + struct.pack('>i', 42), # Correlation ID + struct.pack('>h9s', 9, b"client_id"),# The client ID + struct.pack('>h8s', 8, b"group_id"), # The group to commit for + struct.pack('>i', -1), # Consumer group generation id + struct.pack(">h0s", 0, b""), # Consumer id + struct.pack('>q', -1), # Retention time + struct.pack('>i', 2), # Num topics + ]) + + topic1 = b"".join([ + struct.pack(">h6s", 6, b"topic1"), # Topic for the request + struct.pack(">i", 2), # Two partitions + struct.pack(">i", 0), # Partition 0 + struct.pack(">q", 123), # Offset 123 + struct.pack(">h", -1), # Null metadata + struct.pack(">i", 1), # Partition 1 + struct.pack(">q", 234), # Offset 234 + struct.pack(">h", -1), # Null metadata + ]) + + topic2 = b"".join([ + struct.pack(">h6s", 6, b"topic2"), # Topic for the request + struct.pack(">i", 1), # One partition + struct.pack(">i", 2), # Partition 2 + struct.pack(">q", 345), # Offset 345 + struct.pack(">h", -1), # Null metadata + ]) + + expected1 = b"".join([ header, topic1, topic2 ]) + expected2 = b"".join([ header, topic2, topic1 ]) + + encoded = KafkaProtocol.encode_offset_commit_request_kafka(b"client_id", 42, b"group_id", [ + OffsetCommitRequest(b"topic1", 0, 123, None), + OffsetCommitRequest(b"topic1", 1, 234, None), + OffsetCommitRequest(b"topic2", 2, 345, None), + ]) + + self.assertIn(encoded, [ expected1, expected2 ]) + + def test_decode_offset_commit_response(self): encoded = b"".join([ struct.pack(">i", 42), # Correlation ID From 9a9272103290906a5dc6ab6817ef9a9a82bb0e09 Mon Sep 17 00:00:00 2001 From: Federico Giraud Date: Thu, 7 Apr 2016 10:21:35 -0700 Subject: [PATCH 012/291] Rename dual commit parameter --- kafka/client.py | 14 +++++++++----- kafka/consumer/kafka.py | 4 ++-- test/test_client_integration.py | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 0b4081e2a..253c929bd 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -668,13 +668,17 @@ def send_offset_request(self, payloads=[], fail_on_error=True, @time_metric('offset_commit_request_timer') def send_offset_commit_request(self, group, payloads=[], - fail_on_error=True, callback=None, dual_commit=False): - encoder = functools.partial(KafkaProtocol.encode_offset_commit_request, - group=group) + fail_on_error=True, callback=None, offset_storage='zookeeper'): + resps = [] decoder = KafkaProtocol.decode_offset_commit_response - resps = self._send_broker_aware_request(payloads, encoder, decoder) + if offset_storage in ['zookeeper', 'dual']: + encoder = functools.partial( + KafkaProtocol.encode_offset_commit_request, + group=group, + ) + resps += self._send_broker_aware_request(payloads, encoder, decoder) - if dual_commit: + if offset_storage in ['kafka', 'dual']: encoder = functools.partial( KafkaProtocol.encode_offset_commit_request_kafka, group=group, diff --git a/kafka/consumer/kafka.py b/kafka/consumer/kafka.py index 419ee192f..c4739b6c0 100644 --- a/kafka/consumer/kafka.py +++ b/kafka/consumer/kafka.py @@ -38,7 +38,7 @@ 'auto_commit_interval_messages': None, 'consumer_timeout_ms': -1, 'metrics_responder': None, - 'dual_commit': False, + 'offset_storage': False, # Currently unused 'socket_receive_buffer_bytes': 64 * 1024, @@ -571,7 +571,7 @@ def commit(self): resps = self._client.send_offset_commit_request( kafka_bytestring(self._config['group_id']), commits, fail_on_error=False, - dual_commit=self._config['dual_commit'], + offset_storage=self._config['offset_storage'], ) for r in resps: diff --git a/test/test_client_integration.py b/test/test_client_integration.py index 59690a72c..f6e279337 100644 --- a/test/test_client_integration.py +++ b/test/test_client_integration.py @@ -110,7 +110,7 @@ def test_commit_fetch_offsets_dual(self): (resp_zk, resp_kafka,) = self.client.send_offset_commit_request( b"group", [req], - dual_commit=True, + offset_storage='dual', ) self.assertEqual(resp_zk.error, 0) self.assertEqual(resp_kafka.error, 0) From 4d864a5f40b386969aeddcca329e7003ca5681f3 Mon Sep 17 00:00:00 2001 From: Federico Giraud Date: Mon, 11 Apr 2016 06:24:36 -0700 Subject: [PATCH 013/291] Add dual commit fetch --- kafka/client.py | 55 ++++++++++++++++++++++--------- kafka/consumer/kafka.py | 42 +++++++++++++++-------- test/test_client_integration.py | 24 ++------------ test/test_consumer_integration.py | 47 +++++++++++++++++++++++++- 4 files changed, 116 insertions(+), 52 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 253c929bd..8392e0dcd 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -10,13 +10,22 @@ ConnectionError, FailedPayloadsError, KafkaTimeoutError, KafkaUnavailableError, LeaderNotAvailableError, UnknownTopicOrPartitionError, - NotLeaderForPartitionError, ReplicaNotAvailableError) + NotLeaderForPartitionError, ReplicaNotAvailableError, + ConsumerCoordinatorNotAvailableCode, OffsetsLoadInProgressCode, +) from kafka.conn import collect_hosts, KafkaConnection, DEFAULT_SOCKET_TIMEOUT_SECONDS from kafka.protocol import KafkaProtocol from kafka.util import kafka_bytestring +# If the __consumer_offsets topic is missing, the first consumer coordinator +# request will fail and it will trigger the creation of the topic; for this +# reason, we will retry few times until the creation is completed. +CONSUMER_OFFSET_TOPIC_CREATION_RETRIES = 20 +CONSUMER_OFFSET_RETRY_INTERVAL_SEC = 0.5 + + log = logging.getLogger(__name__) @@ -322,7 +331,16 @@ def _send_consumer_aware_request(self, group, payloads, encoder_fn, decoder_fn): # so we need to keep this so we can rebuild order before returning original_ordering = [(p.topic, p.partition) for p in payloads] - broker = self._get_coordinator_for_group(group) + retries = 0 + broker = None + while not broker: + try: + broker = self._get_coordinator_for_group(group) + except (ConsumerCoordinatorNotAvailableCode, OffsetsLoadInProgressCode) as e: + if retries == CONSUMER_OFFSET_TOPIC_CREATION_RETRIES: + raise e + time.sleep(CONSUMER_OFFSET_RETRY_INTERVAL_SEC) + retries += 1 # Send the list of request payloads and collect the responses and # errors @@ -668,22 +686,26 @@ def send_offset_request(self, payloads=[], fail_on_error=True, @time_metric('offset_commit_request_timer') def send_offset_commit_request(self, group, payloads=[], - fail_on_error=True, callback=None, offset_storage='zookeeper'): - resps = [] + fail_on_error=True, callback=None): + encoder = functools.partial( + KafkaProtocol.encode_offset_commit_request, + group=group, + ) decoder = KafkaProtocol.decode_offset_commit_response - if offset_storage in ['zookeeper', 'dual']: - encoder = functools.partial( - KafkaProtocol.encode_offset_commit_request, - group=group, - ) - resps += self._send_broker_aware_request(payloads, encoder, decoder) + resps = self._send_broker_aware_request(payloads, encoder, decoder) - if offset_storage in ['kafka', 'dual']: - encoder = functools.partial( - KafkaProtocol.encode_offset_commit_request_kafka, - group=group, - ) - resps += self._send_consumer_aware_request(group, payloads, encoder, decoder) + return [resp if not callback else callback(resp) for resp in resps + if not fail_on_error or not self._raise_on_response_error(resp)] + + @time_metric('offset_commit_request_timer_kafka') + def send_offset_commit_request_kafka(self, group, payloads=[], + fail_on_error=True, callback=None): + encoder = functools.partial( + KafkaProtocol.encode_offset_commit_request_kafka, + group=group, + ) + decoder = KafkaProtocol.decode_offset_commit_response + resps = self._send_consumer_aware_request(group, payloads, encoder, decoder) return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] @@ -700,6 +722,7 @@ def send_offset_fetch_request(self, group, payloads=[], return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] + @time_metric('offset_fetch_request_timer_kafka') def send_offset_fetch_request_kafka(self, group, payloads=[], fail_on_error=True, callback=None): diff --git a/kafka/consumer/kafka.py b/kafka/consumer/kafka.py index c4739b6c0..7d5d2168e 100644 --- a/kafka/consumer/kafka.py +++ b/kafka/consumer/kafka.py @@ -38,7 +38,7 @@ 'auto_commit_interval_messages': None, 'consumer_timeout_ms': -1, 'metrics_responder': None, - 'offset_storage': False, + 'offset_storage': 'zookeeper', # Currently unused 'socket_receive_buffer_bytes': 64 * 1024, @@ -568,11 +568,18 @@ def commit(self): if commits: logger.info('committing consumer offsets to group %s', self._config['group_id']) - resps = self._client.send_offset_commit_request( - kafka_bytestring(self._config['group_id']), commits, - fail_on_error=False, - offset_storage=self._config['offset_storage'], - ) + + resps = [] + if self._config['offset_storage'] in ['zookeeper', 'dual']: + resps += self._client.send_offset_commit_request( + kafka_bytestring(self._config['group_id']), commits, + fail_on_error=False, + ) + if self._config['offset_storage'] in ['kafka', 'dual']: + resps += self._client.send_offset_commit_request_kafka( + kafka_bytestring(self._config['group_id']), commits, + fail_on_error=False, + ) for r in resps: check_error(r) @@ -633,25 +640,34 @@ def _refresh_metadata_on_error(self): def _get_commit_offsets(self): logger.info("Consumer fetching stored offsets") for topic_partition in self._topics: - (resp,) = self._client.send_offset_fetch_request( - kafka_bytestring(self._config['group_id']), - [OffsetFetchRequest(topic_partition[0], topic_partition[1])], - fail_on_error=False) + resps = [] + if self._config['offset_storage'] in ['zookeeper', 'dual']: + resps += self._client.send_offset_fetch_request( + kafka_bytestring(self._config['group_id']), + [OffsetFetchRequest(topic_partition[0], topic_partition[1])], + fail_on_error=False) + if self._config['offset_storage'] in ['kafka', 'dual']: + resps += self._client.send_offset_fetch_request_kafka( + kafka_bytestring(self._config['group_id']), + [OffsetFetchRequest(topic_partition[0], topic_partition[1])], + fail_on_error=False) try: - check_error(resp) + for r in resps: + check_error(r) # API spec says server wont set an error here # but 0.8.1.1 does actually... except UnknownTopicOrPartitionError: pass # -1 offset signals no commit is currently stored - if resp.offset == -1: + max_offset = max(r.offset for r in resps) + if max_offset == -1: self._offsets.commit[topic_partition] = None # Otherwise we committed the stored offset # and need to fetch the next one else: - self._offsets.commit[topic_partition] = resp.offset + self._offsets.commit[topic_partition] = max_offset def _reset_highwater_offsets(self): for topic_partition in self._topics: diff --git a/test/test_client_integration.py b/test/test_client_integration.py index f6e279337..d4d268c6c 100644 --- a/test/test_client_integration.py +++ b/test/test_client_integration.py @@ -1,9 +1,8 @@ import os -import time from kafka.common import ( FetchRequest, OffsetCommitRequest, OffsetFetchRequest, - KafkaTimeoutError, ProduceRequest, ConsumerCoordinatorNotAvailableCode + KafkaTimeoutError, ProduceRequest ) from kafka.protocol import create_message @@ -98,28 +97,9 @@ def test_commit_fetch_offsets(self): @kafka_versions("0.9.0.0") def test_commit_fetch_offsets_dual(self): - for _ in range(10): - try: - self.client._get_coordinator_for_group(b"group") - except ConsumerCoordinatorNotAvailableCode: - time.sleep(.5) - continue - break - req = OffsetCommitRequest(self.bytes_topic, 0, 42, b"metadata") - (resp_zk, resp_kafka,) = self.client.send_offset_commit_request( - b"group", - [req], - offset_storage='dual', - ) - self.assertEqual(resp_zk.error, 0) - self.assertEqual(resp_kafka.error, 0) - - req = OffsetFetchRequest(self.bytes_topic, 0) - (resp,) = self.client.send_offset_fetch_request(b"group", [req]) + (resp,) = self.client.send_offset_commit_request_kafka(b"group", [req]) self.assertEqual(resp.error, 0) - self.assertEqual(resp.offset, 42) - self.assertEqual(resp.metadata, b"") # Metadata isn't stored for now (resp,) = self.client.send_offset_fetch_request_kafka(b"group", [req]) self.assertEqual(resp.error, 0) diff --git a/test/test_consumer_integration.py b/test/test_consumer_integration.py index fee53f5a8..13b2c2d15 100644 --- a/test/test_consumer_integration.py +++ b/test/test_consumer_integration.py @@ -509,7 +509,7 @@ def test_kafka_consumer__blocking(self): self.assertEqual(len(messages), 5) self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0 ) - @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1") + @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1", "0.9.0.0") def test_kafka_consumer__offset_commit_resume(self): GROUP_ID = random_string(10).encode('utf-8') @@ -551,3 +551,48 @@ def test_kafka_consumer__offset_commit_resume(self): output_msgs2.append(m) self.assert_message_count(output_msgs2, 20) self.assertEqual(len(set(output_msgs1) & set(output_msgs2)), 15) + + @kafka_versions("0.9.0.0") + def test_kafka_consumer__offset_commit_resume_dual(self): + GROUP_ID = random_string(10).encode('utf-8') + + self.send_messages(0, range(0, 100)) + self.send_messages(1, range(100, 200)) + + # Start a consumer + consumer1 = self.kafka_consumer( + group_id = GROUP_ID, + auto_commit_enable = True, + auto_commit_interval_ms = None, + auto_commit_interval_messages = 20, + auto_offset_reset='smallest', + offset_storage='kafka', + ) + + # Grab the first 195 messages + output_msgs1 = [] + for _ in xrange(195): + m = consumer1.next() + output_msgs1.append(m) + consumer1.task_done(m) + self.assert_message_count(output_msgs1, 195) + + # The total offset across both partitions should be at 180 + consumer2 = self.kafka_consumer( + group_id = GROUP_ID, + auto_commit_enable = True, + auto_commit_interval_ms = None, + auto_commit_interval_messages = 20, + consumer_timeout_ms = 100, + auto_offset_reset='smallest', + offset_storage='dual', + ) + + # 181-200 + output_msgs2 = [] + with self.assertRaises(ConsumerTimeout): + while True: + m = consumer2.next() + output_msgs2.append(m) + self.assert_message_count(output_msgs2, 20) + self.assertEqual(len(set(output_msgs1) & set(output_msgs2)), 15) From b22ae70ae6a93ac32d481db4f730760ef6b9c3b1 Mon Sep 17 00:00:00 2001 From: Federico Giraud Date: Tue, 12 Apr 2016 04:12:30 -0700 Subject: [PATCH 014/291] Bump version, use tuple instead of list for config selection --- kafka/consumer/kafka.py | 4 ++-- kafka/version.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kafka/consumer/kafka.py b/kafka/consumer/kafka.py index 7d5d2168e..8fcc31fe1 100644 --- a/kafka/consumer/kafka.py +++ b/kafka/consumer/kafka.py @@ -641,12 +641,12 @@ def _get_commit_offsets(self): logger.info("Consumer fetching stored offsets") for topic_partition in self._topics: resps = [] - if self._config['offset_storage'] in ['zookeeper', 'dual']: + if self._config['offset_storage'] in ('zookeeper', 'dual'): resps += self._client.send_offset_fetch_request( kafka_bytestring(self._config['group_id']), [OffsetFetchRequest(topic_partition[0], topic_partition[1])], fail_on_error=False) - if self._config['offset_storage'] in ['kafka', 'dual']: + if self._config['offset_storage'] in ('kafka', 'dual'): resps += self._client.send_offset_fetch_request_kafka( kafka_bytestring(self._config['group_id']), [OffsetFetchRequest(topic_partition[0], topic_partition[1])], diff --git a/kafka/version.py b/kafka/version.py index 30013491c..9ff8b5c47 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '0.9.5.post1' +__version__ = '0.9.5.post2' From 468682a41303325ec8ee54f9b6cfbc59e8dee95f Mon Sep 17 00:00:00 2001 From: Federico Giraud Date: Tue, 21 Jun 2016 07:40:04 -0700 Subject: [PATCH 015/291] Add offset storage option for simple consumer --- kafka/consumer/base.py | 9 +++++++-- kafka/consumer/simple.py | 10 ++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/kafka/consumer/base.py b/kafka/consumer/base.py index c9f6e4866..1f05b9094 100644 --- a/kafka/consumer/base.py +++ b/kafka/consumer/base.py @@ -44,7 +44,8 @@ class Consumer(object): """ def __init__(self, client, group, topic, partitions=None, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, - auto_commit_every_t=AUTO_COMMIT_INTERVAL): + auto_commit_every_t=AUTO_COMMIT_INTERVAL, + offset_storage='zookeeper'): self.client = client self.topic = kafka_bytestring(topic) @@ -64,6 +65,7 @@ def __init__(self, client, group, topic, partitions=None, auto_commit=True, self.auto_commit = auto_commit self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_t = auto_commit_every_t + self.offset_storage = offset_storage # Set up the auto-commit timer if auto_commit is True and auto_commit_every_t is not None: @@ -159,7 +161,10 @@ def commit(self, partitions=None): offset, None)) try: - self.client.send_offset_commit_request(self.group, reqs) + if self.offset_storage in ['zookeeper', 'dual']: + self.client.send_offset_commit_request(self.group, reqs) + if self.offset_storage in ['zookeeper', 'dual']: + self.client.send_offset_commit_request_kafka(self.group, reqs) except KafkaError as e: log.error('%s saving offsets: %s', e.__class__.__name__, e) return False diff --git a/kafka/consumer/simple.py b/kafka/consumer/simple.py index 7c6324644..9dcd3e1c5 100644 --- a/kafka/consumer/simple.py +++ b/kafka/consumer/simple.py @@ -104,6 +104,10 @@ class SimpleConsumer(Consumer): OffsetOutOfRangeError. Valid values are largest and smallest. Otherwise, do not reset the offsets and raise OffsetOutOfRangeError. + offset_storage:. default zookeeper. Specifies that offset storage that + will be used to fetch and commit the offsets. Valid values are + 'zookeeper', 'kafka', or 'dual'. + Auto commit details: If both auto_commit_every_n and auto_commit_every_t are set, they will reset one another when one is triggered. These triggers simply call the @@ -117,13 +121,15 @@ def __init__(self, client, group, topic, auto_commit=True, partitions=None, buffer_size=FETCH_BUFFER_SIZE_BYTES, max_buffer_size=MAX_FETCH_BUFFER_SIZE_BYTES, iter_timeout=None, - auto_offset_reset='largest'): + auto_offset_reset='largest', + offset_storage='zookeeper'): super(SimpleConsumer, self).__init__( client, group, topic, partitions=partitions, auto_commit=auto_commit, auto_commit_every_n=auto_commit_every_n, - auto_commit_every_t=auto_commit_every_t) + auto_commit_every_t=auto_commit_every_t, + offset_storage=offset_storage) if max_buffer_size is not None and buffer_size > max_buffer_size: raise ValueError('buffer_size (%d) is greater than ' From 06f21bf19578e38177ea0d1ab50a717a7495e841 Mon Sep 17 00:00:00 2001 From: Federico Giraud Date: Tue, 21 Jun 2016 07:48:23 -0700 Subject: [PATCH 016/291] Fix typos --- kafka/consumer/base.py | 2 +- kafka/consumer/simple.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kafka/consumer/base.py b/kafka/consumer/base.py index 1f05b9094..1febbd053 100644 --- a/kafka/consumer/base.py +++ b/kafka/consumer/base.py @@ -163,7 +163,7 @@ def commit(self, partitions=None): try: if self.offset_storage in ['zookeeper', 'dual']: self.client.send_offset_commit_request(self.group, reqs) - if self.offset_storage in ['zookeeper', 'dual']: + if self.offset_storage in ['kafka', 'dual']: self.client.send_offset_commit_request_kafka(self.group, reqs) except KafkaError as e: log.error('%s saving offsets: %s', e.__class__.__name__, e) diff --git a/kafka/consumer/simple.py b/kafka/consumer/simple.py index 9dcd3e1c5..7b6794dc1 100644 --- a/kafka/consumer/simple.py +++ b/kafka/consumer/simple.py @@ -104,7 +104,7 @@ class SimpleConsumer(Consumer): OffsetOutOfRangeError. Valid values are largest and smallest. Otherwise, do not reset the offsets and raise OffsetOutOfRangeError. - offset_storage:. default zookeeper. Specifies that offset storage that + offset_storage: default zookeeper. Specifies that offset storage that will be used to fetch and commit the offsets. Valid values are 'zookeeper', 'kafka', or 'dual'. From 7e2419fbcb258aa17275a08f47341c5ca525c979 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Tue, 21 Jun 2016 15:54:33 -0700 Subject: [PATCH 017/291] Bump version 0.9.5.post3 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 9ff8b5c47..df69f04ae 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '0.9.5.post2' +__version__ = '0.9.5.post3' From 8511b80d58ead8e4087e1653bc4d1da4210a6422 Mon Sep 17 00:00:00 2001 From: Federico Giraud Date: Mon, 25 Jul 2016 07:00:21 -0700 Subject: [PATCH 018/291] Add offset storage based fetch for simple consumer --- kafka/consumer/base.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/kafka/consumer/base.py b/kafka/consumer/base.py index 1febbd053..d8a209034 100644 --- a/kafka/consumer/base.py +++ b/kafka/consumer/base.py @@ -101,11 +101,19 @@ def fetch_last_known_offsets(self, partitions=None): if partitions is None: partitions = self.client.get_partition_ids_for_topic(self.topic) - responses = self.client.send_offset_fetch_request( - self.group, - [OffsetFetchRequest(self.topic, p) for p in partitions], - fail_on_error=False - ) + responses = [] + if self.offset_storage in ['zookeeper', 'dual']: + responses += self.client.send_offset_fetch_request( + self.group, + [OffsetFetchRequest(self.topic, p) for p in partitions], + fail_on_error=False + ) + if self.offset_storage in ['kafka', 'dual']: + responses += self.client.send_offset_fetch_request_kafka( + self.group, + [OffsetFetchRequest(self.topic, p) for p in partitions], + fail_on_error=False + ) for resp in responses: try: @@ -115,14 +123,15 @@ def fetch_last_known_offsets(self, partitions=None): except UnknownTopicOrPartitionError: pass + prev = self.offsets.get(resp.partition, 0) # -1 offset signals no commit is currently stored if resp.offset == -1: - self.offsets[resp.partition] = 0 + self.offsets[resp.partition] = prev # Otherwise we committed the stored offset # and need to fetch the next one else: - self.offsets[resp.partition] = resp.offset + self.offsets[resp.partition] = max(prev, resp.offset) def commit(self, partitions=None): """Commit stored offsets to Kafka via OffsetCommitRequest (v0) From 78e7184d246b795cfcac45eda8baee8ca303a86a Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Wed, 27 Jul 2016 15:52:44 -0700 Subject: [PATCH 019/291] Bump version 0.9.5.post4 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index df69f04ae..858c0419d 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '0.9.5.post3' +__version__ = '0.9.5.post4' From 3be034ddb898ac0d2984847c852b7c3e27efcf45 Mon Sep 17 00:00:00 2001 From: Tom Most Date: Fri, 26 Feb 2016 17:37:41 -0800 Subject: [PATCH 020/291] Update base.py Our app uses `SimpleProducer` and logs lots of these warnings (from line 438): producer.stop() called, but producer is not async This destructor appears to be the cause. (Also, is it wise to do a thread join from a destructor?) --- kafka/producer/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/producer/base.py b/kafka/producer/base.py index 39b1f8402..267ac79ed 100644 --- a/kafka/producer/base.py +++ b/kafka/producer/base.py @@ -458,5 +458,5 @@ def stop(self, timeout=None): self.stopped = True def __del__(self): - if not self.stopped: + if self.async and not self.stopped: self.stop() From 95356f65e1b33bf5e64c19cd92540dc038a7be33 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Tue, 2 Aug 2016 17:03:23 -0700 Subject: [PATCH 021/291] Bump version 0.9.5.post5 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 858c0419d..63dfad098 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '0.9.5.post4' +__version__ = '0.9.5.post5' From 78548bf79e4ffef6ce88ce9ccf5874ead4165c59 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Fri, 19 Aug 2016 11:42:35 -0700 Subject: [PATCH 022/291] Add select timeout and reset connections when expired --- kafka/client.py | 53 ++++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 8392e0dcd..daa8080d5 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -268,28 +268,41 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): conn = None while connections_by_socket: sockets = connections_by_socket.keys() - rlist, _, _ = select.select(sockets, [], [], None) - conn, broker, requestId = connections_by_socket.pop(rlist[0]) - try: - response = conn.recv(requestId) - except ConnectionError as e: - broker_failures.append(broker) - log.warning('ConnectionError attempting to receive a ' - 'response to request %s from server %s: %s', - requestId, broker, e) - - for payload in payloads_by_broker[broker]: - topic_partition = (payload.topic, payload.partition) - responses[topic_partition] = FailedPayloadsError(payload) + rlist, _, _ = select.select(sockets, [], [], self.timeout) + if rlist: + conn, broker, requestId = connections_by_socket.pop(rlist[0]) + try: + response = conn.recv(requestId) + except ConnectionError as e: + broker_failures.append(broker) + log.warning('ConnectionError attempting to receive a ' + 'response to request %s from server %s: %s', + requestId, broker, e) + + for payload in payloads_by_broker[broker]: + topic_partition = (payload.topic, payload.partition) + responses[topic_partition] = FailedPayloadsError(payload) + else: + _resps = [] + for payload_response in decoder_fn(response): + topic_partition = (payload_response.topic, + payload_response.partition) + responses[topic_partition] = payload_response + _resps.append(payload_response) + log.debug('Response %s: %s', requestId, _resps) + # If the timeout expires rlist is empty and + # all pending requests are considered failed else: - _resps = [] - for payload_response in decoder_fn(response): - topic_partition = (payload_response.topic, - payload_response.partition) - responses[topic_partition] = payload_response - _resps.append(payload_response) - log.debug('Response %s: %s', requestId, _resps) + for conn, broker, requestId in connections_by_socket.values(): + conn.close() + broker_failures.append(broker) + log.warning('Socket timeout error attempting to receive a ' + 'response to request %s from server %s', + requestId, broker) + for payload in payloads_by_broker[broker]: + topic_partition = (payload.topic, payload.partition) + responses[topic_partition] = FailedPayloadsError(payload) # Connection errors generally mean stale metadata # although sometimes it means incorrect api request From 3165dd855be5def08eb9b78722a181b60ed7b846 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Fri, 19 Aug 2016 14:20:11 -0700 Subject: [PATCH 023/291] Bump version 0.9.5.post6 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 63dfad098..94b565426 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '0.9.5.post5' +__version__ = '0.9.5.post6' From 590ac446f91e0c14d1ec82aceb5f58399fcd8bc6 Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Tue, 4 Oct 2016 13:13:26 -0700 Subject: [PATCH 024/291] Undo removal of str conversation and --no-certificate in wget --- build_integration.sh | 2 +- kafka/client.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/build_integration.sh b/build_integration.sh index 4bd100071..53c25bfda 100755 --- a/build_integration.sh +++ b/build_integration.sh @@ -55,7 +55,7 @@ pushd servers if [ ! -f "../$kafka/kafka-bin/bin/kafka-run-class.sh" ]; then echo "Downloading kafka ${kafka} tarball" if hash wget 2>/dev/null; then - wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz --no-check-certificate || wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz --no-check-certificate + wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz || wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz else echo "wget not found... using curl" if [ -f "${KAFKA_ARTIFACT}.tar.gz" ]; then diff --git a/kafka/client.py b/kafka/client.py index 1b96c411e..7a2fe68ca 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -260,8 +260,8 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): def failed_payloads(payloads): for payload in payloads: - topic_partition = (payload.topic, payload.partition) - responses[(topic_partition)] = FailedPayloadsError(payload) + topic_partition = (str(payload.topic), payload.partition) + responses[topic_partition] = FailedPayloadsError(payload) # For each BrokerConnection keep the real socket so that we can use # a select to perform unblocking I/O @@ -294,7 +294,7 @@ def failed_payloads(payloads): if not expect_response: for payload in broker_payloads: - topic_partition = (payload.topic, payload.partition) + topic_partition = (str(payload.topic), payload.partition) responses[topic_partition] = None continue @@ -317,7 +317,7 @@ def failed_payloads(payloads): else: for payload_response in decoder_fn(future.value): - topic_partition = (payload_response.topic, + topic_partition = (str(payload_response.topic), payload_response.partition) responses[topic_partition] = payload_response @@ -374,8 +374,8 @@ def _send_consumer_aware_request(self, group, payloads, encoder_fn, decoder_fn): def failed_payloads(payloads): for payload in payloads: - topic_partition = (payload.topic, payload.partition) - responses[(topic_partition)] = FailedPayloadsError(payload) + topic_partition = (str(payload.topic), payload.partition) + responses[topic_partition] = FailedPayloadsError(payload) host, port, afi = get_ip_port_afi(broker.host) try: @@ -402,7 +402,7 @@ def failed_payloads(payloads): else: for payload_response in decoder_fn(future.value): - topic_partition = (payload_response.topic, + topic_partition = (str(payload_response.topic), payload_response.partition) responses[topic_partition] = payload_response From 5a127b24365ade70333207ce450fee22366eedcb Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Tue, 4 Oct 2016 14:35:35 -0700 Subject: [PATCH 025/291] Undo hack to make tests run in docker since using travis build now --- test/fixtures.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/fixtures.py b/test/fixtures.py index bfb01b8e5..e50ce12cf 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -272,9 +272,6 @@ def open(self): "kafka-python") env = self.kafka_run_class_env() proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - # Fix bugs where integration tests hang in docker container by sending - # blank line to stdin - proc.communicate('') if proc.wait() != 0: self.out("Failed to create Zookeeper chroot node") From b3a8a3853438bb771e69103ec659bf22d5c3eff5 Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Wed, 5 Oct 2016 11:55:03 -0700 Subject: [PATCH 026/291] Generalize Dockerfile for public usage --- Dockerfile | 31 +++++++++++++++++++------------ test/fixtures.py | 3 +++ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index df303d43d..a3abb264b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,25 +1,28 @@ -FROM docker-dev.yelpcorp.com/xenial_yelp:latest -MAINTAINER Team Distributed Systems - +FROM ubuntu:xenial RUN /usr/sbin/locale-gen en_US.UTF-8 ENV LANG en_US.UTF-8 -ENV JAVA_HOME="/usr/lib/jvm/java-8-oracle-1.8.0.20" -ENV PATH="$PATH:$JAVA_HOME/bin" ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get install -y python2.7-dev \ +RUN echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu precise main" >> /etc/apt/sources.list +RUN echo "deb http://ppa.launchpad.net/fkrull/deadsnakes/ubuntu precise main" >> /etc/apt/sources.list +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 5BB92C09DB82666C C2518248EEA14886 +RUN echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | debconf-set-selections +RUN apt-get update && apt-get install -y python2.7-dev \ python3.5-dev \ - pypy \ pypy-dev \ python-pkg-resources \ - python-pip \ python-setuptools \ python-virtualenv \ - python-tox \ libsnappy-dev \ - java-8u20-oracle \ - wget + oracle-java8-installer \ + wget\ + g++ \ + ca-certificates \ + python-pip \ + python-tox +ENV JAVA_HOME="/usr/lib/jvm/java-8-oracle" +ENV PATH="$PATH:$JAVA_HOME/bin" COPY servers /work/servers COPY kafka /work/kafka @@ -31,5 +34,9 @@ COPY build_integration.sh /work COPY setup.cfg /work COPY setup.py /work COPY tox.ini /work -WORKDIR /work +COPY LICENSE /work +COPY AUTHORS.md /work +COPY CHANGES.md /work +COPY MANIFEST.in /work +WORKDIR /work diff --git a/test/fixtures.py b/test/fixtures.py index e50ce12cf..bfb01b8e5 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -272,6 +272,9 @@ def open(self): "kafka-python") env = self.kafka_run_class_env() proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + # Fix bugs where integration tests hang in docker container by sending + # blank line to stdin + proc.communicate('') if proc.wait() != 0: self.out("Failed to create Zookeeper chroot node") From 79a0a02f327454a6983d7c2b0592ac9fc0b10858 Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Wed, 5 Oct 2016 14:43:05 -0700 Subject: [PATCH 027/291] Try using curl over wget to get past 503 --- build_integration.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build_integration.sh b/build_integration.sh index 53c25bfda..952d5aebe 100755 --- a/build_integration.sh +++ b/build_integration.sh @@ -54,14 +54,14 @@ pushd servers fi if [ ! -f "../$kafka/kafka-bin/bin/kafka-run-class.sh" ]; then echo "Downloading kafka ${kafka} tarball" - if hash wget 2>/dev/null; then - wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz || wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz + if hash curl 2>/dev/null; then + curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz -o ${KAFKA_ARTIFACT}.tar.gz || curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz -o ${KAFKA_ARTIFACT}.tar.gz else echo "wget not found... using curl" if [ -f "${KAFKA_ARTIFACT}.tar.gz" ]; then echo "Using cached artifact: ${KAFKA_ARTIFACT}.tar.gz" else - curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz -o ${KAFKA_ARTIFACT}.tar.gz || curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz -o ${KAFKA_ARTIFACT}.tar.gz + wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz || wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz fi fi echo From 00339f753c5d9d423052878b2e087e1779d80d61 Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Wed, 5 Oct 2016 14:55:36 -0700 Subject: [PATCH 028/291] Retry downloading tar --- build_integration.sh | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/build_integration.sh b/build_integration.sh index 952d5aebe..d4d5877dd 100755 --- a/build_integration.sh +++ b/build_integration.sh @@ -55,9 +55,22 @@ pushd servers if [ ! -f "../$kafka/kafka-bin/bin/kafka-run-class.sh" ]; then echo "Downloading kafka ${kafka} tarball" if hash curl 2>/dev/null; then - curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz -o ${KAFKA_ARTIFACT}.tar.gz || curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz -o ${KAFKA_ARTIFACT}.tar.gz + curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz -o ${KAFKA_ARTIFACT}.tar.gz || curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz -o ${KAFKA_ARTIFACT}.tar.gz else - echo "wget not found... using curl" + echo "curl not found... using wget" + if [ -f "${KAFKA_ARTIFACT}.tar.gz" ]; then + echo "Using cached artifact: ${KAFKA_ARTIFACT}.tar.gz" + else + wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz || wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz + fi + fi + # Retry download again + if [ ! -f "../$kafka/kafka-bin/bin/kafka-run-class.sh" ]; then + echo "Downloading kafka ${kafka} tarball" + if hash curl 2>/dev/null; then + curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz -o ${KAFKA_ARTIFACT}.tar.gz || curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz -o ${KAFKA_ARTIFACT}.tar.gz + else + echo "curl not found... using wget" if [ -f "${KAFKA_ARTIFACT}.tar.gz" ]; then echo "Using cached artifact: ${KAFKA_ARTIFACT}.tar.gz" else From 664cb781b7a31da2390add132c9d25ce72072347 Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Wed, 5 Oct 2016 15:18:46 -0700 Subject: [PATCH 029/291] Fix missing fi in build_integration --- build_integration.sh | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/build_integration.sh b/build_integration.sh index d4d5877dd..635beab50 100755 --- a/build_integration.sh +++ b/build_integration.sh @@ -65,16 +65,17 @@ pushd servers fi fi # Retry download again - if [ ! -f "../$kafka/kafka-bin/bin/kafka-run-class.sh" ]; then - echo "Downloading kafka ${kafka} tarball" - if hash curl 2>/dev/null; then - curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz -o ${KAFKA_ARTIFACT}.tar.gz || curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz -o ${KAFKA_ARTIFACT}.tar.gz - else - echo "curl not found... using wget" - if [ -f "${KAFKA_ARTIFACT}.tar.gz" ]; then - echo "Using cached artifact: ${KAFKA_ARTIFACT}.tar.gz" + if [ ! -f "${KAFKA_ARTIFACT}.tar.gz" && ! -f "${KAFKA_ARTIFACT}.tgz"]; then + echo "Downloading kafka ${kafka} tarball" + if hash curl 2>/dev/null; then + curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz -o ${KAFKA_ARTIFACT}.tar.gz || curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz -o ${KAFKA_ARTIFACT}.tar.gz else - wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz || wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz + echo "curl not found... using wget" + if [ -f "${KAFKA_ARTIFACT}.tar.gz" ]; then + echo "Using cached artifact: ${KAFKA_ARTIFACT}.tar.gz" + else + wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz || wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz + fi fi fi echo From 6c1a3253bc43881b0db7effa3007abadcae96237 Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Wed, 5 Oct 2016 15:39:00 -0700 Subject: [PATCH 030/291] Oops this is bash... --- build_integration.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_integration.sh b/build_integration.sh index 635beab50..0a4d5eb5d 100755 --- a/build_integration.sh +++ b/build_integration.sh @@ -65,7 +65,7 @@ pushd servers fi fi # Retry download again - if [ ! -f "${KAFKA_ARTIFACT}.tar.gz" && ! -f "${KAFKA_ARTIFACT}.tgz"]; then + if [ ! -f "${KAFKA_ARTIFACT}.tar.gz" ] && [ ! -f "${KAFKA_ARTIFACT}.tgz" ]; then echo "Downloading kafka ${kafka} tarball" if hash curl 2>/dev/null; then curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz -o ${KAFKA_ARTIFACT}.tar.gz || curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz -o ${KAFKA_ARTIFACT}.tar.gz From b99a4e9893a7443f32c84e79e9f9aa0208117301 Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Thu, 6 Oct 2016 07:40:07 -0700 Subject: [PATCH 031/291] More retries and try no proxy --- build_integration.sh | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/build_integration.sh b/build_integration.sh index 0a4d5eb5d..a017607bb 100755 --- a/build_integration.sh +++ b/build_integration.sh @@ -1,7 +1,7 @@ #!/bin/bash # Versions available for testing via binary distributions -OFFICIAL_RELEASES="0.8.1.1 0.8.2.2 0.9.0.1 0.10.0.0" +OFFICIAL_RELEASES="0.8.1.1 0.8.2.2 0.9.0.1 0.10.0.1" # Useful configuration vars, with sensible defaults if [ -z "$SCALA_VERSION" ]; then @@ -9,7 +9,7 @@ if [ -z "$SCALA_VERSION" ]; then fi # On travis CI, empty KAFKA_VERSION means skip integration tests -# so we dont try to get binaries +# so we dont try to get binaries # Otherwise it means test all official releases, so we get all of them! if [ -z "$KAFKA_VERSION" -a -z "$TRAVIS" ]; then KAFKA_VERSION=$OFFICIAL_RELEASES @@ -54,28 +54,14 @@ pushd servers fi if [ ! -f "../$kafka/kafka-bin/bin/kafka-run-class.sh" ]; then echo "Downloading kafka ${kafka} tarball" - if hash curl 2>/dev/null; then - curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz -o ${KAFKA_ARTIFACT}.tar.gz || curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz -o ${KAFKA_ARTIFACT}.tar.gz + if hash wget 2>/dev/null; then + wget --no-proxy --tries=3 -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz || wget --no-proxy --tries=3 -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz else - echo "curl not found... using wget" + echo "wget not found... using curl" if [ -f "${KAFKA_ARTIFACT}.tar.gz" ]; then echo "Using cached artifact: ${KAFKA_ARTIFACT}.tar.gz" else - wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz || wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz - fi - fi - # Retry download again - if [ ! -f "${KAFKA_ARTIFACT}.tar.gz" ] && [ ! -f "${KAFKA_ARTIFACT}.tgz" ]; then - echo "Downloading kafka ${kafka} tarball" - if hash curl 2>/dev/null; then curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz -o ${KAFKA_ARTIFACT}.tar.gz || curl -f https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz -o ${KAFKA_ARTIFACT}.tar.gz - else - echo "curl not found... using wget" - if [ -f "${KAFKA_ARTIFACT}.tar.gz" ]; then - echo "Using cached artifact: ${KAFKA_ARTIFACT}.tar.gz" - else - wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tgz || wget -N https://archive.apache.org/dist/kafka/$kafka/${KAFKA_ARTIFACT}.tar.gz - fi fi fi echo From 051157a04b45677ab34da9b553956b211a85721b Mon Sep 17 00:00:00 2001 From: Vipul Singh Date: Thu, 6 Oct 2016 11:50:35 -0700 Subject: [PATCH 032/291] exposing to send metrics by pluging in metric_reporters --- kafka/metrics/dict_reporter.py | 6 ++++++ kafka/metrics/metrics.py | 9 +++++++-- kafka/metrics/metrics_reporter.py | 8 ++++++++ kafka/metrics/stats/sensor.py | 6 +++++- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/kafka/metrics/dict_reporter.py b/kafka/metrics/dict_reporter.py index 0b98fe1e4..781f15b64 100644 --- a/kafka/metrics/dict_reporter.py +++ b/kafka/metrics/dict_reporter.py @@ -81,3 +81,9 @@ def configure(self, configs): def close(self): pass + + def record(self, emitter, value): + pass + + def get_emitter(self, metric): + pass diff --git a/kafka/metrics/metrics.py b/kafka/metrics/metrics.py index e9c465deb..2d7a20112 100644 --- a/kafka/metrics/metrics.py +++ b/kafka/metrics/metrics.py @@ -140,11 +140,15 @@ def sensor(self, name, config=None, if sensor: return sensor + with self._lock: sensor = self.get_sensor(name) if not sensor: - sensor = Sensor(self, name, parents, config or self.config, - inactive_sensor_expiration_time_seconds) + sensor = Sensor( + self, name, parents, config or self.config, + inactive_sensor_expiration_time_seconds, + self._reporters + ) self._sensors[name] = sensor if parents: for parent in parents: @@ -156,6 +160,7 @@ def sensor(self, name, config=None, logger.debug('Added sensor with name %s', name) return sensor + def remove_sensor(self, name): """ Remove a sensor (if it exists), associated metrics and its children. diff --git a/kafka/metrics/metrics_reporter.py b/kafka/metrics/metrics_reporter.py index d8bd12b3b..0eb7dac66 100644 --- a/kafka/metrics/metrics_reporter.py +++ b/kafka/metrics/metrics_reporter.py @@ -55,3 +55,11 @@ def configure(self, configs): def close(self): """Called when the metrics repository is closed.""" raise NotImplementedError + + @abc.abstractmethod + def get_emitter(self, metric): + """Called to return an instance of an emitter like meteorite etc""" + + @abc.abstractmethod + def record(self, emitter, value): + """ called to record and emit metrics""" diff --git a/kafka/metrics/stats/sensor.py b/kafka/metrics/stats/sensor.py index ca9979bc7..73eee3633 100644 --- a/kafka/metrics/stats/sensor.py +++ b/kafka/metrics/stats/sensor.py @@ -16,7 +16,7 @@ class Sensor(object): of metrics about request sizes such as the average or max. """ def __init__(self, registry, name, parents, config, - inactive_sensor_expiration_time_seconds): + inactive_sensor_expiration_time_seconds, reporters): if not name: raise ValueError('name must be non-empty') self._lock = threading.RLock() @@ -30,6 +30,8 @@ def __init__(self, registry, name, parents, config, inactive_sensor_expiration_time_seconds * 1000) self._last_record_time = time.time() * 1000 self._check_forest(set()) + self._emitters = dict( (reporter, reporter.get_emitter(name)) for + reporter in reporters ) def _check_forest(self, sensors): """Validate that this sensor doesn't end up referencing itself.""" @@ -64,6 +66,8 @@ def record(self, value=1.0, time_ms=None): QuotaViolationException: if recording this value moves a metric beyond its configured maximum or minimum bound """ + for reporter, emitter in self._emitters.items(): + reporter.record(emitter, value) if time_ms is None: time_ms = time.time() * 1000 self._last_record_time = time_ms From 31b061dc47d583d58caa814a5a6aa31a8a23242e Mon Sep 17 00:00:00 2001 From: Vipul Singh Date: Thu, 6 Oct 2016 13:13:19 -0700 Subject: [PATCH 033/291] refactoring and changing doc string --- kafka/metrics/metrics.py | 1 - kafka/metrics/metrics_reporter.py | 15 +++++++++++++-- kafka/metrics/stats/sensor.py | 2 +- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/kafka/metrics/metrics.py b/kafka/metrics/metrics.py index 2d7a20112..e9e31d9d3 100644 --- a/kafka/metrics/metrics.py +++ b/kafka/metrics/metrics.py @@ -160,7 +160,6 @@ def sensor(self, name, config=None, logger.debug('Added sensor with name %s', name) return sensor - def remove_sensor(self, name): """ Remove a sensor (if it exists), associated metrics and its children. diff --git a/kafka/metrics/metrics_reporter.py b/kafka/metrics/metrics_reporter.py index 0eb7dac66..91689520c 100644 --- a/kafka/metrics/metrics_reporter.py +++ b/kafka/metrics/metrics_reporter.py @@ -58,8 +58,19 @@ def close(self): @abc.abstractmethod def get_emitter(self, metric): - """Called to return an instance of an emitter like meteorite etc""" + """ + Called to return an instance of an emitter like meteorite etc + + Arguments: + metric (str): the name of the metric + """ @abc.abstractmethod def record(self, emitter, value): - """ called to record and emit metrics""" + """ + Called to record and emit metrics + + Arguments: + emitter: reference to an emitter + value(float): value to be emitted + """ diff --git a/kafka/metrics/stats/sensor.py b/kafka/metrics/stats/sensor.py index 73eee3633..4e81e2503 100644 --- a/kafka/metrics/stats/sensor.py +++ b/kafka/metrics/stats/sensor.py @@ -30,7 +30,7 @@ def __init__(self, registry, name, parents, config, inactive_sensor_expiration_time_seconds * 1000) self._last_record_time = time.time() * 1000 self._check_forest(set()) - self._emitters = dict( (reporter, reporter.get_emitter(name)) for + self._emitters = dict((reporter, reporter.get_emitter(name)) for reporter in reporters ) def _check_forest(self, sensors): From 112d64987fdfaa3e1bf922b5a8e7b9f8f52624ce Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Thu, 6 Oct 2016 14:17:13 -0700 Subject: [PATCH 034/291] Rename metric and limit travis builds to needed python and kafka versions --- .travis.yml | 8 +------- kafka/client.py | 2 +- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6ffd64d92..3491533b4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,19 +1,13 @@ language: python python: - - 2.6 - 2.7 - - 3.3 - - 3.4 - 3.5 - pypy env: - - KAFKA_VERSION=0.8.0 - - KAFKA_VERSION=0.8.1.1 - - KAFKA_VERSION=0.8.2.2 - KAFKA_VERSION=0.9.0.1 - - KAFKA_VERSION=0.10.0.0 + - KAFKA_VERSION=0.10.0.1 sudo: false diff --git a/kafka/client.py b/kafka/client.py index 7a2fe68ca..a75c513bc 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -710,7 +710,7 @@ def send_offset_commit_request(self, group, payloads=[], return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] - @time_metric('offset_commit_request_timer_kafka') + @time_metric('offset_commit_request_kafka_timer') def send_offset_commit_request_kafka(self, group, payloads=[], fail_on_error=True, callback=None): encoder = functools.partial( From b9d3458db1b5166c22b140f238efab652f2638e6 Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Thu, 6 Oct 2016 14:54:47 -0700 Subject: [PATCH 035/291] Add resources for 0.10.1 itests --- servers/0.10.0.1/resources/kafka.properties | 142 ++++++++++++++++++ servers/0.10.0.1/resources/log4j.properties | 25 +++ .../0.10.0.1/resources/zookeeper.properties | 21 +++ 3 files changed, 188 insertions(+) create mode 100644 servers/0.10.0.1/resources/kafka.properties create mode 100644 servers/0.10.0.1/resources/log4j.properties create mode 100644 servers/0.10.0.1/resources/zookeeper.properties diff --git a/servers/0.10.0.1/resources/kafka.properties b/servers/0.10.0.1/resources/kafka.properties new file mode 100644 index 000000000..7237f454e --- /dev/null +++ b/servers/0.10.0.1/resources/kafka.properties @@ -0,0 +1,142 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/server.truststore.jks +ssl.truststore.password=foobar + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + + # The number of threads doing disk I/O + num.io.threads=8 + + # The send buffer (SO_SNDBUF) used by the socket server + socket.send.buffer.bytes=102400 + + # The receive buffer (SO_RCVBUF) used by the socket server + socket.receive.buffer.bytes=102400 + + # The maximum size of a request that the socket server will accept (protection against OOM) + socket.request.max.bytes=104857600 + + + ############################# Log Basics ############################# + + # A comma seperated list of directories under which to store log files + log.dirs={tmp_dir}/data + + # The default number of log partitions per topic. More partitions allow greater + # parallelism for consumption, but this will also result in more files across + # the brokers. + num.partitions={partitions} + default.replication.factor={replicas} + + ## Short Replica Lag -- Drops failed brokers out of ISR + replica.lag.time.max.ms=1000 + replica.socket.timeout.ms=1000 + + ############################# Log Flush Policy ############################# + + # Messages are immediately written to the filesystem but by default we only fsync() to sync + # the OS cache lazily. The following configurations control the flush of data to disk. + # There are a few important trade-offs here: + # 1. Durability: Unflushed data may be lost if you are not using replication. + # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. + # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. + # The settings below allow one to configure the flush policy to flush data after a period of time or + # every N messages (or both). This can be done globally and overridden on a per-topic basis. + + # The number of messages to accept before forcing a flush of data to disk + #log.flush.interval.messages=10000 + + # The maximum amount of time a message can sit in a log before we force a flush + #log.flush.interval.ms=1000 + + ############################# Log Retention Policy ############################# + + # The following configurations control the disposal of log segments. The policy can + # be set to delete segments after a period of time, or after a given size has accumulated. + # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens + # from the end of the log. + + # The minimum age of a log file to be eligible for deletion + log.retention.hours=168 + + # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining + # segments don't drop below log.retention.bytes. + #log.retention.bytes=1073741824 + + # The maximum size of a log segment file. When this size is reached a new log segment will be created. + log.segment.bytes=1073741824 + + # The interval at which log segments are checked to see if they can be deleted according + # to the retention policies + log.retention.check.interval.ms=300000 + + # By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. + # If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. + log.cleaner.enable=false + + # tune down offset topics to reduce setup time in tests + offsets.commit.timeout.ms=500 + offsets.topic.num.partitions=2 + offsets.topic.replication.factor=2 + + # Allow shorter session timeouts for tests + group.min.session.timeout.ms=1000 + + + ############################# Zookeeper ############################# + + # Zookeeper connection string (see zookeeper docs for details). + # This is a comma separated host:port pairs, each corresponding to a zk + # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". + # You can also append an optional chroot string to the urls to specify the + # root directory for all kafka znodes. + zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + + # Timeout in ms for connecting to zookeeper + zookeeper.connection.timeout.ms=30000 + # We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly + diff --git a/servers/0.10.0.1/resources/log4j.properties b/servers/0.10.0.1/resources/log4j.properties new file mode 100644 index 000000000..52c0bad87 --- /dev/null +++ b/servers/0.10.0.1/resources/log4j.properties @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +log4j.rootLogger=INFO, stdout, logfile + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.logfile=org.apache.log4j.FileAppender +log4j.appender.logfile.File=${kafka.logs.dir}/server.log +log4j.appender.logfile.layout=org.apache.log4j.PatternLayout +log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.10.0.1/resources/zookeeper.properties b/servers/0.10.0.1/resources/zookeeper.properties new file mode 100644 index 000000000..756eb1e77 --- /dev/null +++ b/servers/0.10.0.1/resources/zookeeper.properties @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# the directory where the snapshot is stored. +dataDir={tmp_dir} +# the port at which the clients will connect +clientPort={port} +clientPortAddress={host} +# disable the per-ip limit on the number of connections since this is a non-production config +maxClientCnxns=0 From 192eb7b9ffffb424b5bee403b3691fff66fd229d Mon Sep 17 00:00:00 2001 From: Vipul Singh Date: Thu, 6 Oct 2016 15:35:00 -0700 Subject: [PATCH 036/291] passing a reference to time a value is recorded at, change in docstring --- kafka/metrics/dict_reporter.py | 2 +- kafka/metrics/metrics_reporter.py | 11 ++++++----- kafka/metrics/stats/sensor.py | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/kafka/metrics/dict_reporter.py b/kafka/metrics/dict_reporter.py index 781f15b64..24b91775e 100644 --- a/kafka/metrics/dict_reporter.py +++ b/kafka/metrics/dict_reporter.py @@ -82,7 +82,7 @@ def configure(self, configs): def close(self): pass - def record(self, emitter, value): + def record(self, emitter, value, timestamp): pass def get_emitter(self, metric): diff --git a/kafka/metrics/metrics_reporter.py b/kafka/metrics/metrics_reporter.py index 91689520c..a6a7239f5 100644 --- a/kafka/metrics/metrics_reporter.py +++ b/kafka/metrics/metrics_reporter.py @@ -59,18 +59,19 @@ def close(self): @abc.abstractmethod def get_emitter(self, metric): """ - Called to return an instance of an emitter like meteorite etc - + Called to return an instance of an emitter + Arguments: metric (str): the name of the metric """ @abc.abstractmethod - def record(self, emitter, value): + def record(self, emitter, value, timestamp): """ Called to record and emit metrics - + Arguments: - emitter: reference to an emitter + emitter: reference to an emitter value(float): value to be emitted + timestamp: the time the value was recorded at """ diff --git a/kafka/metrics/stats/sensor.py b/kafka/metrics/stats/sensor.py index 4e81e2503..2bf7b644e 100644 --- a/kafka/metrics/stats/sensor.py +++ b/kafka/metrics/stats/sensor.py @@ -66,10 +66,10 @@ def record(self, value=1.0, time_ms=None): QuotaViolationException: if recording this value moves a metric beyond its configured maximum or minimum bound """ - for reporter, emitter in self._emitters.items(): - reporter.record(emitter, value) if time_ms is None: time_ms = time.time() * 1000 + for reporter, emitter in self._emitters.items(): + reporter.record(emitter, value, time_ms) self._last_record_time = time_ms with self._lock: # XXX high volume, might be performance issue # increment all the stats From 56e1e303284d128971687a78d36159ff21b93c1b Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Thu, 6 Oct 2016 17:31:02 -0700 Subject: [PATCH 037/291] itest improvements when running in docker --- Dockerfile | 4 ++++ Makefile | 5 +---- run_itest.sh | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 run_itest.sh diff --git a/Dockerfile b/Dockerfile index a3abb264b..baae5277f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,5 +38,9 @@ COPY LICENSE /work COPY AUTHORS.md /work COPY CHANGES.md /work COPY MANIFEST.in /work +COPY run_itest.sh /work +RUN chmod +x /work/run_itest.sh WORKDIR /work + +CMD ["./run_itest.sh"] diff --git a/Makefile b/Makefile index 213e84574..18034b48e 100644 --- a/Makefile +++ b/Makefile @@ -8,10 +8,7 @@ test: itest: docker build -t kafka_python_test . - docker run -i -t kafka_python_test /bin/bash -c "export KAFKA_VERSION='0.9.0.1'; ./build_integration.sh; \ - tox -e py27; tox -e py35; tox -e pypy; \ - export KAFKA_VERSION='0.10.0.0'; ./build_integration.sh; \ - tox -e py27; tox -e py35; tox -e pypy; exit $?" + docker run kafka_python_test clean: rm -rf kafka-python.egg-info/ .tox/ diff --git a/run_itest.sh b/run_itest.sh new file mode 100644 index 000000000..a55802b52 --- /dev/null +++ b/run_itest.sh @@ -0,0 +1,14 @@ +#!/bin/bash -e + + +export KAFKA_VERSION='0.9.0.1' +./build_integration.sh +tox -e py27 +tox -e py35 +tox -e pypy + +export KAFKA_VERSION='0.10.0.0' +./build_integration.sh +tox -e py27 +tox -e py35 +tox -e pypy From be1a56d825ac620e585f77a3e40ee48c3eced8de Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Thu, 13 Oct 2016 14:11:56 -0700 Subject: [PATCH 038/291] Make metrics name in kafka consistent --- kafka/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/client.py b/kafka/client.py index a75c513bc..be4da7d6e 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -735,7 +735,7 @@ def send_offset_fetch_request(self, group, payloads=[], return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] - @time_metric('offset_fetch_request_timer_kafka') + @time_metric('offset_fetch_request_kafka_timer') def send_offset_fetch_request_kafka(self, group, payloads=[], fail_on_error=True, callback=None): From 70ae1a8a95930cf201d3c917f7942e61d54cc765 Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Thu, 13 Oct 2016 14:31:56 -0700 Subject: [PATCH 039/291] Bump version --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 6860d5abd..d63db2570 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.1.post1' +__version__ = '1.3.1.post2' From 4e323b74475acd2fa59d1f3d051a2df3ca29f71c Mon Sep 17 00:00:00 2001 From: Vipul Singh Date: Wed, 2 Nov 2016 18:05:01 -0700 Subject: [PATCH 040/291] making a common way to report metrics --- kafka/client.py | 11 +++++++++-- kafka/metrics/stats/sensor.py | 7 +++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index be4da7d6e..395da5888 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -16,6 +16,7 @@ NotLeaderForPartitionError, ReplicaNotAvailableError, GroupCoordinatorNotAvailableError, GroupLoadInProgressError) from kafka.structs import TopicPartition, BrokerMetadata +from kafka.metrics.metrics import Metrics from kafka.conn import ( collect_hosts, BrokerConnection, @@ -46,7 +47,7 @@ def wrapper(self, *args, **kwargs): ret = fn(self, *args, **kwargs) if self.metrics_responder: - self.metrics_responder(metric_name, time.time() - start_time) + self.metrics_responder.record(metric_name, time.time() - start_time) return ret return wrapper @@ -87,10 +88,16 @@ def _get_conn(self, host, port, afi): """Get or create a connection to a broker using host and port""" host_key = (host, port) if host_key not in self._conns: + metrics = None + if self.metrics_responder: + metrics = Metrics( + reporters=[self.metrics_responder] + ) self._conns[host_key] = BrokerConnection( host, port, afi, request_timeout_ms=self.timeout * 1000, - client_id=self.client_id + client_id=self.client_id, + metrics=metrics ) conn = self._conns[host_key] diff --git a/kafka/metrics/stats/sensor.py b/kafka/metrics/stats/sensor.py index 2bf7b644e..0d3458690 100644 --- a/kafka/metrics/stats/sensor.py +++ b/kafka/metrics/stats/sensor.py @@ -30,8 +30,7 @@ def __init__(self, registry, name, parents, config, inactive_sensor_expiration_time_seconds * 1000) self._last_record_time = time.time() * 1000 self._check_forest(set()) - self._emitters = dict((reporter, reporter.get_emitter(name)) for - reporter in reporters ) + self.reporters = reporters def _check_forest(self, sensors): """Validate that this sensor doesn't end up referencing itself.""" @@ -68,8 +67,8 @@ def record(self, value=1.0, time_ms=None): """ if time_ms is None: time_ms = time.time() * 1000 - for reporter, emitter in self._emitters.items(): - reporter.record(emitter, value, time_ms) + for reporter in self.reporters: + reporter.record(self._name, value, time_ms) self._last_record_time = time_ms with self._lock: # XXX high volume, might be performance issue # increment all the stats From f33bce6a8913fe7531fbe2659f415ed34e28036b Mon Sep 17 00:00:00 2001 From: Vipul Singh Date: Thu, 3 Nov 2016 18:59:39 -0700 Subject: [PATCH 041/291] change the interface, and adding docstring --- kafka/metrics/dict_reporter.py | 2 +- kafka/metrics/metrics_reporter.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kafka/metrics/dict_reporter.py b/kafka/metrics/dict_reporter.py index 24b91775e..72a9cda51 100644 --- a/kafka/metrics/dict_reporter.py +++ b/kafka/metrics/dict_reporter.py @@ -82,7 +82,7 @@ def configure(self, configs): def close(self): pass - def record(self, emitter, value, timestamp): + def record(self, name, value, timestamp): pass def get_emitter(self, metric): diff --git a/kafka/metrics/metrics_reporter.py b/kafka/metrics/metrics_reporter.py index a6a7239f5..0f4802627 100644 --- a/kafka/metrics/metrics_reporter.py +++ b/kafka/metrics/metrics_reporter.py @@ -66,12 +66,12 @@ def get_emitter(self, metric): """ @abc.abstractmethod - def record(self, emitter, value, timestamp): + def record(self, name, value, timestamp): """ Called to record and emit metrics Arguments: - emitter: reference to an emitter + name: name of the metric to be recorded value(float): value to be emitted timestamp: the time the value was recorded at """ From 4fd045234af77dfe275dead394a25795cbdb2b9f Mon Sep 17 00:00:00 2001 From: Vipul Singh Date: Thu, 3 Nov 2016 21:38:08 -0700 Subject: [PATCH 042/291] changing docstring and interface --- kafka/metrics/dict_reporter.py | 4 ++-- kafka/metrics/metrics_reporter.py | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/kafka/metrics/dict_reporter.py b/kafka/metrics/dict_reporter.py index 72a9cda51..d2a5c60b3 100644 --- a/kafka/metrics/dict_reporter.py +++ b/kafka/metrics/dict_reporter.py @@ -82,8 +82,8 @@ def configure(self, configs): def close(self): pass - def record(self, name, value, timestamp): + def record(self, metric_name, value, timestamp=None): pass - def get_emitter(self, metric): + def get_emitter(self, metric_name, prefix='', default_dimensions=None): pass diff --git a/kafka/metrics/metrics_reporter.py b/kafka/metrics/metrics_reporter.py index 0f4802627..457fdab0e 100644 --- a/kafka/metrics/metrics_reporter.py +++ b/kafka/metrics/metrics_reporter.py @@ -57,21 +57,23 @@ def close(self): raise NotImplementedError @abc.abstractmethod - def get_emitter(self, metric): + def get_emitter(self, metric_name, prefix='', default_dimensions=None): """ Called to return an instance of an emitter Arguments: - metric (str): the name of the metric + metric_name (str): the name of the metric + prefix (str): the prefix attached to the metric for reporting + default_dimensions: the extra dimensions provided for the metric """ @abc.abstractmethod - def record(self, name, value, timestamp): + def record(self, metric_name, value, timestamp=None): """ Called to record and emit metrics Arguments: - name: name of the metric to be recorded + metric_name: name of the metric to be recorded value(float): value to be emitted timestamp: the time the value was recorded at """ From 4f2f0e5cde1c472efbcf9ceda5e5c47b6eab0bfc Mon Sep 17 00:00:00 2001 From: Vipul Singh Date: Thu, 3 Nov 2016 21:57:29 -0700 Subject: [PATCH 043/291] removing prefix and dimensions as these should be defined at the class level, and should be dealt in __init__ --- kafka/metrics/dict_reporter.py | 2 +- kafka/metrics/metrics_reporter.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kafka/metrics/dict_reporter.py b/kafka/metrics/dict_reporter.py index d2a5c60b3..241295a7f 100644 --- a/kafka/metrics/dict_reporter.py +++ b/kafka/metrics/dict_reporter.py @@ -85,5 +85,5 @@ def close(self): def record(self, metric_name, value, timestamp=None): pass - def get_emitter(self, metric_name, prefix='', default_dimensions=None): + def get_emitter(self, metric_name): pass diff --git a/kafka/metrics/metrics_reporter.py b/kafka/metrics/metrics_reporter.py index 457fdab0e..62f60c067 100644 --- a/kafka/metrics/metrics_reporter.py +++ b/kafka/metrics/metrics_reporter.py @@ -57,7 +57,7 @@ def close(self): raise NotImplementedError @abc.abstractmethod - def get_emitter(self, metric_name, prefix='', default_dimensions=None): + def get_emitter(self, metric_name): """ Called to return an instance of an emitter From 9ed0ce8c46730bb6076171ddb26054778adb6522 Mon Sep 17 00:00:00 2001 From: Vipul Singh Date: Wed, 9 Nov 2016 11:00:38 -0800 Subject: [PATCH 044/291] changing doc string to remove old params, and moving lines to one lone as it dosent violate pep8 --- kafka/client.py | 4 +--- kafka/metrics/metrics_reporter.py | 2 -- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 395da5888..928f6b416 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -90,9 +90,7 @@ def _get_conn(self, host, port, afi): if host_key not in self._conns: metrics = None if self.metrics_responder: - metrics = Metrics( - reporters=[self.metrics_responder] - ) + metrics = Metrics(reporters=[self.metrics_responder]) self._conns[host_key] = BrokerConnection( host, port, afi, request_timeout_ms=self.timeout * 1000, diff --git a/kafka/metrics/metrics_reporter.py b/kafka/metrics/metrics_reporter.py index 62f60c067..88b0a7eaf 100644 --- a/kafka/metrics/metrics_reporter.py +++ b/kafka/metrics/metrics_reporter.py @@ -63,8 +63,6 @@ def get_emitter(self, metric_name): Arguments: metric_name (str): the name of the metric - prefix (str): the prefix attached to the metric for reporting - default_dimensions: the extra dimensions provided for the metric """ @abc.abstractmethod From ad4215af5e257a26dcf2b3f0c99fbef778eb6690 Mon Sep 17 00:00:00 2001 From: Vipul Singh Date: Wed, 30 Nov 2016 04:00:12 -0800 Subject: [PATCH 045/291] changing from responder to reporter to make things consistent --- kafka/client.py | 12 ++++++------ kafka/consumer/kafka.py | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 928f6b416..8b8a3a896 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -46,8 +46,8 @@ def wrapper(self, *args, **kwargs): start_time = time.time() ret = fn(self, *args, **kwargs) - if self.metrics_responder: - self.metrics_responder.record(metric_name, time.time() - start_time) + if self.metrics_reporter: + self.metrics_reporter.record(metric_name, time.time() - start_time) return ret return wrapper @@ -65,13 +65,13 @@ class SimpleClient(object): # socket timeout. def __init__(self, hosts, client_id=CLIENT_ID, timeout=DEFAULT_SOCKET_TIMEOUT_SECONDS, - correlation_id=0, metrics_responder=None): + correlation_id=0, metrics_reporter=None): # We need one connection to bootstrap self.client_id = client_id self.timeout = timeout self.hosts = collect_hosts(hosts) self.correlation_id = correlation_id - self.metrics_responder = metrics_responder + self.metrics_reporter = metrics_reporter self._conns = {} self.brokers = {} # broker_id -> BrokerMetadata @@ -89,8 +89,8 @@ def _get_conn(self, host, port, afi): host_key = (host, port) if host_key not in self._conns: metrics = None - if self.metrics_responder: - metrics = Metrics(reporters=[self.metrics_responder]) + if self.metrics_reporter: + metrics = Metrics(reporters=[self.metrics_reporter]) self._conns[host_key] = BrokerConnection( host, port, afi, request_timeout_ms=self.timeout * 1000, diff --git a/kafka/consumer/kafka.py b/kafka/consumer/kafka.py index 5876c986c..f44e459bc 100644 --- a/kafka/consumer/kafka.py +++ b/kafka/consumer/kafka.py @@ -39,7 +39,7 @@ 'auto_commit_interval_ms': 60 * 1000, 'auto_commit_interval_messages': None, 'consumer_timeout_ms': -1, - 'metrics_responder': None, + 'metrics_reporter': None, 'offset_storage': 'zookeeper', # Currently unused @@ -141,13 +141,13 @@ def configure(self, **configs): 'bootstrap_servers required to configure KafkaConsumer' ) - self.metrics_responder = self._config['metrics_responder'] + self.metrics_reporter = self._config['metrics_reporter'] self._client = SimpleClient( self._config['bootstrap_servers'], client_id=self._config['client_id'], timeout=(self._config['socket_timeout_ms'] / 1000.0), - metrics_responder=self.metrics_responder + metrics_reporter=self.metrics_reporter ) def set_topic_partitions(self, *topics): @@ -357,8 +357,8 @@ def fetch_messages(self): for resp in responses: if isinstance(resp, FailedPayloadsError): - if self.metrics_responder: - self.metrics_responder('failed_payloads_count', 1) + if self.metrics_reporter: + self.metrics_reporter('failed_payloads_count', 1) logger.warning('FailedPayloadsError attempting to fetch data') self._refresh_metadata_on_error() @@ -369,8 +369,8 @@ def fetch_messages(self): try: check_error(resp) except OffsetOutOfRangeError: - if self.metrics_responder: - self.metrics_responder('offset_out_of_range_count', 1) + if self.metrics_reporter: + self.metrics_reporter('offset_out_of_range_count', 1) logger.warning('OffsetOutOfRange: topic %s, partition %d, ' 'offset %d (Highwatermark: %d)', @@ -384,8 +384,8 @@ def fetch_messages(self): continue except NotLeaderForPartitionError: - if self.metrics_responder: - self.metrics_responder('not_leader_for_partition_count', 1) + if self.metrics_reporter: + self.metrics_reporter('not_leader_for_partition_count', 1) logger.warning("NotLeaderForPartitionError for %s - %d. " "Metadata may be out of date", @@ -394,8 +394,8 @@ def fetch_messages(self): continue except RequestTimedOutError: - if self.metrics_responder: - self.metrics_responder('request_timed_out_count', 1) + if self.metrics_reporter: + self.metrics_reporter('request_timed_out_count', 1) logger.warning("RequestTimedOutError for %s - %d", topic, partition) From 5ab13bec44b8420d60469425a2292076f83e7f42 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Thu, 1 Dec 2016 13:52:17 -0800 Subject: [PATCH 046/291] Bump version 1.3.2.post2 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index c3e43da95..6ea006ef7 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.2.post1' +__version__ = '1.3.2.post2' From 3d3a9d93b8842100d9bfd11a20a3ee7058d5b313 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Fri, 2 Dec 2016 16:14:06 -0800 Subject: [PATCH 047/291] Refactor metrics reporter interface --- kafka/metrics/dict_reporter.py | 5 +---- kafka/metrics/metrics_reporter.py | 16 ++++------------ kafka/metrics/stats/sensor.py | 8 ++++---- 3 files changed, 9 insertions(+), 20 deletions(-) diff --git a/kafka/metrics/dict_reporter.py b/kafka/metrics/dict_reporter.py index 241295a7f..71ba8f9be 100644 --- a/kafka/metrics/dict_reporter.py +++ b/kafka/metrics/dict_reporter.py @@ -82,8 +82,5 @@ def configure(self, configs): def close(self): pass - def record(self, metric_name, value, timestamp=None): - pass - - def get_emitter(self, metric_name): + def record(self, metric, value, timestamp, config): pass diff --git a/kafka/metrics/metrics_reporter.py b/kafka/metrics/metrics_reporter.py index 88b0a7eaf..2542a7366 100644 --- a/kafka/metrics/metrics_reporter.py +++ b/kafka/metrics/metrics_reporter.py @@ -57,21 +57,13 @@ def close(self): raise NotImplementedError @abc.abstractmethod - def get_emitter(self, metric_name): - """ - Called to return an instance of an emitter - - Arguments: - metric_name (str): the name of the metric - """ - - @abc.abstractmethod - def record(self, metric_name, value, timestamp=None): + def record(self, metric, value, timestamp, config): """ Called to record and emit metrics Arguments: - metric_name: name of the metric to be recorded - value(float): value to be emitted + metric: KafkaMetric object of the metric to be recorded + value(float): value to be recorded timestamp: the time the value was recorded at + config: sensor config """ diff --git a/kafka/metrics/stats/sensor.py b/kafka/metrics/stats/sensor.py index 09f8dc70f..eaa850025 100644 --- a/kafka/metrics/stats/sensor.py +++ b/kafka/metrics/stats/sensor.py @@ -67,13 +67,13 @@ def record(self, value=1.0, time_ms=None): """ if time_ms is None: time_ms = time.time() * 1000 - for reporter in self.reporters: - reporter.record(self._name, value, time_ms) self._last_record_time = time_ms with self._lock: # XXX high volume, might be performance issue # increment all the stats - for stat in self._stats: - stat.record(self._config, value, time_ms) + for metric in self._metrics: + metric.measurable.record(self._config, value, time_ms) + for reporter in self.reporters: + reporter.record(metric, value, time_ms, self._config) self._check_quotas(time_ms) for parent in self._parents: parent.record(value, time_ms) From 59fefd46ae38690de370148c2dfd365e17de5fc4 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Thu, 8 Dec 2016 15:39:31 -0800 Subject: [PATCH 048/291] Refactor metrics reporter base class --- kafka/metrics/dict_reporter.py | 2 +- kafka/metrics/metrics_reporter.py | 3 ++- kafka/metrics/stats/sensor.py | 5 +++-- kafka/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/kafka/metrics/dict_reporter.py b/kafka/metrics/dict_reporter.py index 71ba8f9be..abf30ea76 100644 --- a/kafka/metrics/dict_reporter.py +++ b/kafka/metrics/dict_reporter.py @@ -82,5 +82,5 @@ def configure(self, configs): def close(self): pass - def record(self, metric, value, timestamp, config): + def record(self, sensor_name, metric, value, timestamp, config): pass diff --git a/kafka/metrics/metrics_reporter.py b/kafka/metrics/metrics_reporter.py index 2542a7366..3c0ba334b 100644 --- a/kafka/metrics/metrics_reporter.py +++ b/kafka/metrics/metrics_reporter.py @@ -57,11 +57,12 @@ def close(self): raise NotImplementedError @abc.abstractmethod - def record(self, metric, value, timestamp, config): + def record(self, sensor_name, metric, value, timestamp, config): """ Called to record and emit metrics Arguments: + sensor_name: name of the sensor metric: KafkaMetric object of the metric to be recorded value(float): value to be recorded timestamp: the time the value was recorded at diff --git a/kafka/metrics/stats/sensor.py b/kafka/metrics/stats/sensor.py index eaa850025..9f55e64fc 100644 --- a/kafka/metrics/stats/sensor.py +++ b/kafka/metrics/stats/sensor.py @@ -71,9 +71,10 @@ def record(self, value=1.0, time_ms=None): with self._lock: # XXX high volume, might be performance issue # increment all the stats for metric in self._metrics: - metric.measurable.record(self._config, value, time_ms) + if hasattr(metric, 'measurable'): + metric.measurable.record(self._config, value, time_ms) for reporter in self.reporters: - reporter.record(metric, value, time_ms, self._config) + reporter.record(self._name, metric, value, time_ms, self._config) self._check_quotas(time_ms) for parent in self._parents: parent.record(value, time_ms) diff --git a/kafka/version.py b/kafka/version.py index 6ea006ef7..8a17aafc3 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.2.post2' +__version__ = '1.3.2.post3' From c0ecaab7e2c75abadfc32d394a9c32ec9e7ea173 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Thu, 8 Dec 2016 17:13:37 -0800 Subject: [PATCH 049/291] Fix metrics reporting for metrics that are not stats --- kafka/metrics/stats/sensor.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/kafka/metrics/stats/sensor.py b/kafka/metrics/stats/sensor.py index 9f55e64fc..913015cb3 100644 --- a/kafka/metrics/stats/sensor.py +++ b/kafka/metrics/stats/sensor.py @@ -24,7 +24,7 @@ def __init__(self, registry, name, parents, config, self._name = name self._parents = parents or [] self._metrics = [] - self._stats = [] + self._stats = set() self._config = config self._inactive_sensor_expiration_time_ms = ( inactive_sensor_expiration_time_seconds * 1000) @@ -71,10 +71,18 @@ def record(self, value=1.0, time_ms=None): with self._lock: # XXX high volume, might be performance issue # increment all the stats for metric in self._metrics: - if hasattr(metric, 'measurable'): + # Some metrics are not stats and they don't have any measurable + # we cannot report them. + if metric in self._stats: + for reporter in self.reporters: + reporter.record( + self._name, + metric, + value, + time_ms, + self._config, + ) metric.measurable.record(self._config, value, time_ms) - for reporter in self.reporters: - reporter.record(self._name, metric, value, time_ms, self._config) self._check_quotas(time_ms) for parent in self._parents: parent.record(value, time_ms) @@ -107,7 +115,7 @@ def add_compound(self, compound_stat, config=None): """ if not compound_stat: raise ValueError('compound stat must be non-empty') - self._stats.append(compound_stat) + self._stats.add(compound_stat) for named_measurable in compound_stat.stats(): metric = KafkaMetric(named_measurable.name, named_measurable.stat, config or self._config) @@ -128,7 +136,7 @@ def add(self, metric_name, stat, config=None): metric = KafkaMetric(metric_name, stat, config or self._config) self._registry.register_metric(metric) self._metrics.append(metric) - self._stats.append(stat) + self._stats.add(stat) def has_expired(self): """ From 5ef5c9098acf22db32658d8ea0fbfad45d969f2c Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Thu, 8 Dec 2016 17:40:37 -0800 Subject: [PATCH 050/291] Fix metrics reporting for real --- kafka/metrics/stats/sensor.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kafka/metrics/stats/sensor.py b/kafka/metrics/stats/sensor.py index 913015cb3..d8e2af08b 100644 --- a/kafka/metrics/stats/sensor.py +++ b/kafka/metrics/stats/sensor.py @@ -24,7 +24,7 @@ def __init__(self, registry, name, parents, config, self._name = name self._parents = parents or [] self._metrics = [] - self._stats = set() + self._stats = [] self._config = config self._inactive_sensor_expiration_time_ms = ( inactive_sensor_expiration_time_seconds * 1000) @@ -73,7 +73,7 @@ def record(self, value=1.0, time_ms=None): for metric in self._metrics: # Some metrics are not stats and they don't have any measurable # we cannot report them. - if metric in self._stats: + if hasattr(metric, 'measurable'): for reporter in self.reporters: reporter.record( self._name, @@ -82,7 +82,8 @@ def record(self, value=1.0, time_ms=None): time_ms, self._config, ) - metric.measurable.record(self._config, value, time_ms) + for stat in self._stats: + stat.record(self._config, value, time_ms) self._check_quotas(time_ms) for parent in self._parents: parent.record(value, time_ms) @@ -115,7 +116,7 @@ def add_compound(self, compound_stat, config=None): """ if not compound_stat: raise ValueError('compound stat must be non-empty') - self._stats.add(compound_stat) + self._stats.append(compound_stat) for named_measurable in compound_stat.stats(): metric = KafkaMetric(named_measurable.name, named_measurable.stat, config or self._config) @@ -136,7 +137,7 @@ def add(self, metric_name, stat, config=None): metric = KafkaMetric(metric_name, stat, config or self._config) self._registry.register_metric(metric) self._metrics.append(metric) - self._stats.add(stat) + self._stats.append(stat) def has_expired(self): """ From 6953591abcdc687028f0fcc2a6883a6169506b5d Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Tue, 13 Dec 2016 11:09:13 -0800 Subject: [PATCH 051/291] Fix double -rate --- kafka/client.py | 40 ++++++++++++++++++++++++++++++++-------- kafka/consumer/kafka.py | 41 +++++++++++++++++++++++++++++++---------- 2 files changed, 63 insertions(+), 18 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 8b8a3a896..38afdd8c0 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -17,6 +17,7 @@ GroupCoordinatorNotAvailableError, GroupLoadInProgressError) from kafka.structs import TopicPartition, BrokerMetadata from kafka.metrics.metrics import Metrics +from kafka.metrics.stat.avg import Avg from kafka.conn import ( collect_hosts, BrokerConnection, @@ -46,8 +47,10 @@ def wrapper(self, *args, **kwargs): start_time = time.time() ret = fn(self, *args, **kwargs) - if self.metrics_reporter: - self.metrics_reporter.record(metric_name, time.time() - start_time) + self.metrics.record( + metric_name, + (time.time() - start_time) * 1000, + ) return ret return wrapper @@ -65,13 +68,14 @@ class SimpleClient(object): # socket timeout. def __init__(self, hosts, client_id=CLIENT_ID, timeout=DEFAULT_SOCKET_TIMEOUT_SECONDS, - correlation_id=0, metrics_reporter=None): + correlation_id=0, metrics=None): # We need one connection to bootstrap self.client_id = client_id self.timeout = timeout self.hosts = collect_hosts(hosts) self.correlation_id = correlation_id - self.metrics_reporter = metrics_reporter + self._metrics = metrics + self.metrics = SimpleClientMetrics(metrics if metrics else Metrics()) self._conns = {} self.brokers = {} # broker_id -> BrokerMetadata @@ -88,14 +92,11 @@ def _get_conn(self, host, port, afi): """Get or create a connection to a broker using host and port""" host_key = (host, port) if host_key not in self._conns: - metrics = None - if self.metrics_reporter: - metrics = Metrics(reporters=[self.metrics_reporter]) self._conns[host_key] = BrokerConnection( host, port, afi, request_timeout_ms=self.timeout * 1000, client_id=self.client_id, - metrics=metrics + metrics=self._metrics ) conn = self._conns[host_key] @@ -751,3 +752,26 @@ def send_offset_fetch_request_kafka(self, group, payloads=[], return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] + + +class SimpleClientMetrics(object): + + def __init__(self, metrics): + self.metrics = metrics + self.group_name = 'simple-client' + self.request_timers = {} + + def record(self, request_name, value): + timer = self.request_timers.setdefault( + request_name, + self.metrics.sensor(request_name.replace('_', '-').add( + self.metrics.metric_name( + 'request-time-avg', + self.group, + "Time latency for request {}".format(request_name), + {'request-type': request_name.replace('_', '-')}, + ), + Avg(), + )), + ) + timer.record(value) diff --git a/kafka/consumer/kafka.py b/kafka/consumer/kafka.py index f44e459bc..fdb70faec 100644 --- a/kafka/consumer/kafka.py +++ b/kafka/consumer/kafka.py @@ -15,6 +15,8 @@ OffsetOutOfRangeError, RequestTimedOutError, KafkaMessage, ConsumerTimeout, FailedPayloadsError, KafkaUnavailableError, KafkaConfigurationError ) +from kafka.metrics.metrics import Metrics +from kafka.metrics.stats.rate import Rate from kafka.structs import ( FetchRequestPayload, OffsetCommitRequestPayload, OffsetFetchRequestPayload, OffsetRequestPayload @@ -141,13 +143,14 @@ def configure(self, **configs): 'bootstrap_servers required to configure KafkaConsumer' ) - self.metrics_reporter = self._config['metrics_reporter'] + metrics = Metrics(reporters=[self._config['metrics_reporter']()]) + self.metrics = KafkaConsumerMetrics(metrics) self._client = SimpleClient( self._config['bootstrap_servers'], client_id=self._config['client_id'], timeout=(self._config['socket_timeout_ms'] / 1000.0), - metrics_reporter=self.metrics_reporter + metrics=metrics, ) def set_topic_partitions(self, *topics): @@ -357,8 +360,7 @@ def fetch_messages(self): for resp in responses: if isinstance(resp, FailedPayloadsError): - if self.metrics_reporter: - self.metrics_reporter('failed_payloads_count', 1) + self.metrics.record_count('failed-payloads', 1) logger.warning('FailedPayloadsError attempting to fetch data') self._refresh_metadata_on_error() @@ -369,8 +371,7 @@ def fetch_messages(self): try: check_error(resp) except OffsetOutOfRangeError: - if self.metrics_reporter: - self.metrics_reporter('offset_out_of_range_count', 1) + self.metrics.record_count('offset-out-of-range', 1) logger.warning('OffsetOutOfRange: topic %s, partition %d, ' 'offset %d (Highwatermark: %d)', @@ -384,8 +385,7 @@ def fetch_messages(self): continue except NotLeaderForPartitionError: - if self.metrics_reporter: - self.metrics_reporter('not_leader_for_partition_count', 1) + self.metrics.record_count('not-leader-for-partition', 1) logger.warning("NotLeaderForPartitionError for %s - %d. " "Metadata may be out of date", @@ -394,8 +394,7 @@ def fetch_messages(self): continue except RequestTimedOutError: - if self.metrics_reporter: - self.metrics_reporter('request_timed_out_count', 1) + self.metrics.record_count('request-timed-out', 1) logger.warning("RequestTimedOutError for %s - %d", topic, partition) @@ -807,3 +806,25 @@ def _deprecate_configs(self, **configs): if new not in configs: configs[new] = old_value return configs + + +class KafkaConsumerMetrics(object): + + def __init__(self, metrics): + self._metrics = metrics + self.group_name = 'legacy-kafka-consumer' + self.counters = {} + + def record_count(self, counter_name, value): + counter = self.counters.setdefault( + counter_name, + self.metrics.sensor(counter_name).add( + self.metrics.metric_name( + counter_name + '-rate', + self.group, + "Rate of {}".format(counter_name), + ), + Rate(), + )), + ) + counter.record(value) From 825dd34c3debb1654e3e34fc749b8e0a78320da3 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Wed, 14 Dec 2016 16:00:35 -0800 Subject: [PATCH 052/291] Fix sensor creation --- kafka/client.py | 15 ++++++++------- kafka/consumer/kafka.py | 33 +++++++++++++++++---------------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 38afdd8c0..92ce1192b 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -17,7 +17,7 @@ GroupCoordinatorNotAvailableError, GroupLoadInProgressError) from kafka.structs import TopicPartition, BrokerMetadata from kafka.metrics.metrics import Metrics -from kafka.metrics.stat.avg import Avg +from kafka.metrics.stats.avg import Avg from kafka.conn import ( collect_hosts, BrokerConnection, @@ -762,16 +762,17 @@ def __init__(self, metrics): self.request_timers = {} def record(self, request_name, value): - timer = self.request_timers.setdefault( - request_name, - self.metrics.sensor(request_name.replace('_', '-').add( + timer = self.request_timers.get(request_name) + if not timer: + timer = self.metrics.sensor(request_name.replace('_', '-')) + timer.add( self.metrics.metric_name( 'request-time-avg', - self.group, + self.group_name, "Time latency for request {}".format(request_name), {'request-type': request_name.replace('_', '-')}, ), Avg(), - )), - ) + ) + self.request_timers[request_name] = timer timer.record(value) diff --git a/kafka/consumer/kafka.py b/kafka/consumer/kafka.py index fdb70faec..fc9083ba1 100644 --- a/kafka/consumer/kafka.py +++ b/kafka/consumer/kafka.py @@ -360,7 +360,7 @@ def fetch_messages(self): for resp in responses: if isinstance(resp, FailedPayloadsError): - self.metrics.record_count('failed-payloads', 1) + self.metrics.record('failed-payloads', 1) logger.warning('FailedPayloadsError attempting to fetch data') self._refresh_metadata_on_error() @@ -371,7 +371,7 @@ def fetch_messages(self): try: check_error(resp) except OffsetOutOfRangeError: - self.metrics.record_count('offset-out-of-range', 1) + self.metrics.record('offset-out-of-range', 1) logger.warning('OffsetOutOfRange: topic %s, partition %d, ' 'offset %d (Highwatermark: %d)', @@ -385,7 +385,7 @@ def fetch_messages(self): continue except NotLeaderForPartitionError: - self.metrics.record_count('not-leader-for-partition', 1) + self.metrics.record('not-leader-for-partition', 1) logger.warning("NotLeaderForPartitionError for %s - %d. " "Metadata may be out of date", @@ -394,7 +394,7 @@ def fetch_messages(self): continue except RequestTimedOutError: - self.metrics.record_count('request-timed-out', 1) + self.metrics.record('request-timed-out', 1) logger.warning("RequestTimedOutError for %s - %d", topic, partition) @@ -811,20 +811,21 @@ def _deprecate_configs(self, **configs): class KafkaConsumerMetrics(object): def __init__(self, metrics): - self._metrics = metrics + self.metrics = metrics self.group_name = 'legacy-kafka-consumer' - self.counters = {} + self.sensors = {} - def record_count(self, counter_name, value): - counter = self.counters.setdefault( - counter_name, - self.metrics.sensor(counter_name).add( + def record(self, sensor_name, value): + sensor = self.sensors.get(sensor_name) + if not sensor: + sensor = self.metrics.sensor(sensor_name) + sensor.add( self.metrics.metric_name( - counter_name + '-rate', - self.group, - "Rate of {}".format(counter_name), + sensor_name + '-rate', + self.group_name, + "Rate of {}".format(sensor_name), ), Rate(), - )), - ) - counter.record(value) + ) + self.sensors[sensor_name] = sensor + sensor.record(value) From 43d755a2c32c7db6e2b2a16019cebe1b60441b26 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Thu, 15 Dec 2016 05:58:58 -0800 Subject: [PATCH 053/291] Fix SimpleClient deep copy --- kafka/client.py | 17 +++++++++++++++-- kafka/consumer/kafka.py | 4 +++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 92ce1192b..b7623c906 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -74,7 +74,7 @@ def __init__(self, hosts, client_id=CLIENT_ID, self.timeout = timeout self.hosts = collect_hosts(hosts) self.correlation_id = correlation_id - self._metrics = metrics + self._metrics_registry = metrics self.metrics = SimpleClientMetrics(metrics if metrics else Metrics()) self._conns = {} @@ -96,7 +96,7 @@ def _get_conn(self, host, port, afi): host, port, afi, request_timeout_ms=self.timeout * 1000, client_id=self.client_id, - metrics=self._metrics + metrics=self._metrics_registry ) conn = self._conns[host_key] @@ -451,8 +451,17 @@ def copy(self): """ _conns = self._conns self._conns = {} + _metrics_registry = self._metrics_registry + self._metrics_registry = None + _metrics = self.metrics + self.metrics = None + c = copy.deepcopy(self) self._conns = _conns + self.metrics = _metrics + self._metrics_registry = _metrics_registry + c.metrics = _metrics + c._metrics_registry = _metrics_registry return c def reinit(self): @@ -762,6 +771,10 @@ def __init__(self, metrics): self.request_timers = {} def record(self, request_name, value): + # Note: there is a possible race condition here when using async simple + # producer. A metric can be added twice to the same sensor and reported + # twice. This case should be extremely rare and shouldn't be too bad for + # metrics. timer = self.request_timers.get(request_name) if not timer: timer = self.metrics.sensor(request_name.replace('_', '-')) diff --git a/kafka/consumer/kafka.py b/kafka/consumer/kafka.py index fc9083ba1..5f54cfa0a 100644 --- a/kafka/consumer/kafka.py +++ b/kafka/consumer/kafka.py @@ -143,7 +143,9 @@ def configure(self, **configs): 'bootstrap_servers required to configure KafkaConsumer' ) - metrics = Metrics(reporters=[self._config['metrics_reporter']()]) + reporters = [self._config['metrics_reporter']()] if \ + self._config['metrics_reporter'] else [] + metrics = Metrics(reporters=reporters) self.metrics = KafkaConsumerMetrics(metrics) self._client = SimpleClient( From 105933e0b232d088aedbb98cba6386a997460de5 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Thu, 15 Dec 2016 09:34:02 -0800 Subject: [PATCH 054/291] Bump version 1.3.2.post4 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 8a17aafc3..1065bfcef 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.2.post3' +__version__ = '1.3.2.post4' From 36df78a178e9f64b69b226ee5af8b5259c0357e3 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Fri, 16 Dec 2016 08:48:45 -0800 Subject: [PATCH 055/291] Fix request name --- kafka/client.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index b7623c906..c27c1d2b8 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -613,7 +613,7 @@ def load_metadata_for_topics(self, *topics, **kwargs): leader, None, None, None ) - @time_metric('metadata_request_timer') + @time_metric('metadata') def send_metadata_request(self, payloads=[], fail_on_error=True, callback=None): encoder = KafkaProtocol.encode_metadata_request @@ -621,7 +621,7 @@ def send_metadata_request(self, payloads=[], fail_on_error=True, return self._send_broker_unaware_request(payloads, encoder, decoder) - @time_metric('consumer_metadata_request_timer') + @time_metric('consumer_metadata') def send_consumer_metadata_request(self, payloads=[], fail_on_error=True, callback=None): encoder = KafkaProtocol.encode_consumer_metadata_request @@ -629,7 +629,7 @@ def send_consumer_metadata_request(self, payloads=[], fail_on_error=True, return self._send_broker_unaware_request(payloads, encoder, decoder) - @time_metric('produce_request_timer') + @time_metric('produce') def send_produce_request(self, payloads=[], acks=1, timeout=1000, fail_on_error=True, callback=None): """ @@ -680,7 +680,7 @@ def send_produce_request(self, payloads=[], acks=1, timeout=1000, if resp is not None and (not fail_on_error or not self._raise_on_response_error(resp))] - @time_metric('fetch_request_timer') + @time_metric('fetch') def send_fetch_request(self, payloads=[], fail_on_error=True, callback=None, max_wait_time=100, min_bytes=4096): """ @@ -701,7 +701,7 @@ def send_fetch_request(self, payloads=[], fail_on_error=True, return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] - @time_metric('offset_request_timer') + @time_metric('offset') def send_offset_request(self, payloads=[], fail_on_error=True, callback=None): resps = self._send_broker_aware_request( @@ -712,7 +712,7 @@ def send_offset_request(self, payloads=[], fail_on_error=True, return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] - @time_metric('offset_commit_request_timer') + @time_metric('offset_commit') def send_offset_commit_request(self, group, payloads=[], fail_on_error=True, callback=None): encoder = functools.partial( @@ -725,7 +725,7 @@ def send_offset_commit_request(self, group, payloads=[], return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] - @time_metric('offset_commit_request_kafka_timer') + @time_metric('offset_commit_kafka') def send_offset_commit_request_kafka(self, group, payloads=[], fail_on_error=True, callback=None): encoder = functools.partial( @@ -738,7 +738,7 @@ def send_offset_commit_request_kafka(self, group, payloads=[], return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] - @time_metric('offset_fetch_request_timer') + @time_metric('offset_fetch') def send_offset_fetch_request(self, group, payloads=[], fail_on_error=True, callback=None): @@ -750,7 +750,7 @@ def send_offset_fetch_request(self, group, payloads=[], return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] - @time_metric('offset_fetch_request_kafka_timer') + @time_metric('offset_fetch_kafka') def send_offset_fetch_request_kafka(self, group, payloads=[], fail_on_error=True, callback=None): From 70e8fc01098cf47407086c5f0fec8330452782a0 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Mon, 19 Dec 2016 06:09:02 -0800 Subject: [PATCH 056/291] Add metrics init to metrics registry --- kafka/metrics/metrics.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kafka/metrics/metrics.py b/kafka/metrics/metrics.py index e9e31d9d3..29a0076f8 100644 --- a/kafka/metrics/metrics.py +++ b/kafka/metrics/metrics.py @@ -261,3 +261,7 @@ def close(self): """Close this metrics repository.""" for reporter in self._reporters: reporter.close() + + def init(self): + for reporter in self._reporters: + reporter.init() From b1630b13bc691734e774deafcb7bfe3d11424d62 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Tue, 20 Dec 2016 04:25:27 -0800 Subject: [PATCH 057/291] Bump version 1.3.2.post5 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 1065bfcef..2e929d0bb 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.2.post4' +__version__ = '1.3.2.post5' From 3733ede976d1168d1adb43ae96135de3b58e9a39 Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Wed, 25 Jan 2017 16:55:31 -0800 Subject: [PATCH 058/291] Remove partial message from response messages --- kafka/consumer/kafka.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kafka/consumer/kafka.py b/kafka/consumer/kafka.py index 5f54cfa0a..e16227f19 100644 --- a/kafka/consumer/kafka.py +++ b/kafka/consumer/kafka.py @@ -17,6 +17,7 @@ ) from kafka.metrics.metrics import Metrics from kafka.metrics.stats.rate import Rate +from kafka.protocol.message import PartialMessage from kafka.structs import ( FetchRequestPayload, OffsetCommitRequestPayload, OffsetFetchRequestPayload, OffsetRequestPayload @@ -405,6 +406,10 @@ def fetch_messages(self): # Track server highwater mark self._offsets.highwater[(topic, partition)] = resp.highwaterMark + # Check for partial message and remove + if resp.messages and isinstance(resp.messages[-1].message, PartialMessage): + resp.messages.pop() + # Yield each message # Kafka-python could raise an exception during iteration # we are not catching -- user will need to address From 2b348d660a2f4e8867cb42f26d8ba2cdc1596360 Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Thu, 26 Jan 2017 16:11:30 -0800 Subject: [PATCH 059/291] Bump version v1.3.2.post6 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 2e929d0bb..d7f251cdb 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.2.post5' +__version__ = '1.3.2.post6' From c8659a1020fb8152a1a4934f20effa9d8accabb0 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Thu, 15 Dec 2016 09:33:06 -0800 Subject: [PATCH 060/291] Run unit tests on Docker --- Dockerfile | 4 ++-- Makefile | 6 +++++- run_utest.sh | 5 +++++ 3 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 run_utest.sh diff --git a/Dockerfile b/Dockerfile index baae5277f..6d777a999 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,8 +39,8 @@ COPY AUTHORS.md /work COPY CHANGES.md /work COPY MANIFEST.in /work COPY run_itest.sh /work +COPY run_utest.sh /work RUN chmod +x /work/run_itest.sh +RUN chmod +x /work/run_utest.sh WORKDIR /work - -CMD ["./run_itest.sh"] diff --git a/Makefile b/Makefile index 045cab27f..1e65744cb 100644 --- a/Makefile +++ b/Makefile @@ -6,9 +6,13 @@ test: tox -e py27 tox -e py35 +unit_test_docker: + docker build -t kafka_python_test . + docker run kafka_python_test /work/run_utest.sh + itest: docker build -t kafka_python_test . - docker run kafka_python_test + docker run kafka_python_test /work/run_itest.sh clean: rm -rf kafka-python.egg-info/ .tox/ diff --git a/run_utest.sh b/run_utest.sh new file mode 100644 index 000000000..3922ab3a0 --- /dev/null +++ b/run_utest.sh @@ -0,0 +1,5 @@ +#!/bin/bash -e + +tox -e py27 +tox -e py35 +tox -e pypy From 7237f1747f111b1cb0b4eba503343a751b5e159e Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Tue, 14 Feb 2017 17:36:23 -0800 Subject: [PATCH 061/291] Use selectors in simple client to avoid wasting CPU cycles --- kafka/client.py | 54 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index c27c1d2b8..4d2fb457c 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -7,6 +7,13 @@ import random import time +# selectors in stdlib as of py3.4 +try: + import selectors # pylint: disable=import-error +except ImportError: + # vendored backport module + from .vendor import selectors34 as selectors + from kafka.vendor import six import kafka.errors @@ -92,11 +99,12 @@ def _get_conn(self, host, port, afi): """Get or create a connection to a broker using host and port""" host_key = (host, port) if host_key not in self._conns: + self._conns[host_key] = BrokerConnection( host, port, afi, request_timeout_ms=self.timeout * 1000, client_id=self.client_id, - metrics=self._metrics_registry + metrics=self._metrics_registry, ) conn = self._conns[host_key] @@ -202,7 +210,8 @@ def _send_broker_unaware_request(self, payloads, encoder_fn, decoder_fn): request = encoder_fn(payloads=payloads) future = conn.send(request) - # Block + # Block, also waste CPU cycle here, but broker unaware requests + # shouldn't be very frequent. while not future.is_done: conn.recv() @@ -269,9 +278,9 @@ def failed_payloads(payloads): topic_partition = (str(payload.topic), payload.partition) responses[topic_partition] = FailedPayloadsError(payload) - # For each BrokerConnection keep the real socket so that we can use - # a select to perform unblocking I/O - connections_by_future = {} + futures_by_connection = {} + selector = selectors.DefaultSelector() + for broker, broker_payloads in six.iteritems(payloads_by_broker): if broker is None: failed_payloads(broker_payloads) @@ -291,9 +300,13 @@ def failed_payloads(payloads): # send a response. This probably only applies to # ProduceRequest w/ acks = 0 expect_response = (decoder_fn is not None) + if expect_response: + selector.register(conn._sock, selectors.EVENT_READ, conn) future = conn.send(request, expect_response=expect_response) if future.failed(): + log.error("Request failed: %s", future.exception) + selector.unregister(conn._sock) refresh_metadata = True failed_payloads(broker_payloads) continue @@ -304,20 +317,24 @@ def failed_payloads(payloads): responses[topic_partition] = None continue - connections_by_future[future] = (conn, broker) + futures_by_connection[conn] = (future, broker) + + timeout = self.timeout + while futures_by_connection: + start_time = time.time() - conn = None - while connections_by_future: - futures = list(connections_by_future.keys()) - for future in futures: + ready = selector.select(timeout) - if not future.is_done: - conn, _ = connections_by_future[future] + for key, _ in ready: + + conn = key.data + future, _ = futures_by_connection[conn] + while not future.is_done: conn.recv() - continue + _, broker = futures_by_connection.pop(conn) - _, broker = connections_by_future.pop(future) if future.failed(): + log.error("Request failed: %s", future.exception) refresh_metadata = True failed_payloads(payloads_by_broker[broker]) @@ -327,9 +344,18 @@ def failed_payloads(payloads): payload_response.partition) responses[topic_partition] = payload_response + timeout -= time.time() - start_time + if timeout < 0: + log.error("%s requests timed out.", len(futures_by_connection)) + for _, broker in six.itervalues(futures_by_connection): + failed_payloads(payloads_by_broker[broker]) + refresh_metadata = True + break + if refresh_metadata: self.reset_all_metadata() + selector.close() # Return responses in the same order as provided return [responses[tp] for tp in original_ordering] From 4c049d0388f8e885fb4bc015b9ceb1a7cf6a5c3d Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Thu, 16 Feb 2017 14:35:03 -0800 Subject: [PATCH 062/291] Bump version to 1.3.2.post7 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index d7f251cdb..2d186c623 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.2.post6' +__version__ = '1.3.2.post7' From 49d1be35dbf35ef4539e8be5e3e3ec28ec86700a Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Wed, 15 Mar 2017 16:14:46 -0700 Subject: [PATCH 063/291] Change warning to debug in conn.py Warning conflicts with our waf setup so set it to debug --- kafka/conn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kafka/conn.py b/kafka/conn.py index 6af0d8f19..1d964a9fc 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -36,7 +36,8 @@ ssl.SSLWantWriteError ssl.SSLZeroReturnError except: - log.warning('old ssl module detected.' + # Don't use warning as incompatible with our waf setup for ym + log.debug('old ssl module detected.' ' ssl error handling may not operate cleanly.' ' Consider upgrading to python 3.5 or 2.7') ssl.SSLWantReadError = ssl.SSLError From bf345cdcba7a308581a20a39962ca592a347d8bd Mon Sep 17 00:00:00 2001 From: Nicholas Ngorok Date: Wed, 15 Mar 2017 16:42:23 -0700 Subject: [PATCH 064/291] Bump version to 1.3.2.post8 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 2d186c623..68617fd9e 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.2.post7' +__version__ = '1.3.2.post8' From f6a1e35ea5b0d6fbb9464d86eb48fdd1807c2839 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Tue, 28 Mar 2017 17:50:00 -0700 Subject: [PATCH 065/291] Fix group prefix in simple-client --- kafka/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kafka/client.py b/kafka/client.py index 4d2fb457c..81251b104 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -105,6 +105,7 @@ def _get_conn(self, host, port, afi): request_timeout_ms=self.timeout * 1000, client_id=self.client_id, metrics=self._metrics_registry, + metric_group_prefix='simple-client', ) conn = self._conns[host_key] From cbf4b4e6773bbcf3cf6d6c90aabcd0b288fbf234 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Wed, 29 Mar 2017 13:33:31 -0700 Subject: [PATCH 066/291] Fix tests to support node_id in SimpleClient --- kafka/client.py | 17 +++++++++-------- kafka/version.py | 2 +- test/test_client.py | 28 +++++++++++++++++----------- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 81251b104..ed673e9b3 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -79,7 +79,7 @@ def __init__(self, hosts, client_id=CLIENT_ID, # We need one connection to bootstrap self.client_id = client_id self.timeout = timeout - self.hosts = collect_hosts(hosts) + self.hosts = [host + ('bootstrap',) for host in collect_hosts(hosts)] self.correlation_id = correlation_id self._metrics_registry = metrics self.metrics = SimpleClientMetrics(metrics if metrics else Metrics()) @@ -95,7 +95,7 @@ def __init__(self, hosts, client_id=CLIENT_ID, # Private API # ################## - def _get_conn(self, host, port, afi): + def _get_conn(self, host, port, afi, node_id='bootstrap'): """Get or create a connection to a broker using host and port""" host_key = (host, port) if host_key not in self._conns: @@ -106,6 +106,7 @@ def _get_conn(self, host, port, afi): client_id=self.client_id, metrics=self._metrics_registry, metric_group_prefix='simple-client', + node_id=node_id, ) conn = self._conns[host_key] @@ -193,17 +194,17 @@ def _send_broker_unaware_request(self, payloads, encoder_fn, decoder_fn): brokers. Keep trying until you succeed. """ hosts = set() - for broker in self.brokers.values(): + for node_id, broker in self.brokers.items(): host, port, afi = get_ip_port_afi(broker.host) - hosts.add((host, broker.port, afi)) + hosts.add((host, broker.port, afi, node_id)) hosts.update(self.hosts) hosts = list(hosts) random.shuffle(hosts) - for (host, port, afi) in hosts: + for (host, port, afi, node_id) in hosts: try: - conn = self._get_conn(host, port, afi) + conn = self._get_conn(host, port, afi, node_id) except ConnectionError: log.warning("Skipping unconnected connection: %s:%s (AFI %s)", host, port, afi) @@ -290,7 +291,7 @@ def failed_payloads(payloads): host, port, afi = get_ip_port_afi(broker.host) try: - conn = self._get_conn(host, broker.port, afi) + conn = self._get_conn(host, broker.port, afi, broker.nodeId) except ConnectionError: refresh_metadata = True failed_payloads(broker_payloads) @@ -412,7 +413,7 @@ def failed_payloads(payloads): host, port, afi = get_ip_port_afi(broker.host) try: - conn = self._get_conn(host, broker.port, afi) + conn = self._get_conn(host, broker.port, afi, broker.nodeId) except ConnectionError: failed_payloads(payloads) diff --git a/kafka/version.py b/kafka/version.py index 2d186c623..68617fd9e 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.2.post7' +__version__ = '1.3.2.post8' diff --git a/test/test_client.py b/test/test_client.py index 79ac8bedf..da28cabb5 100644 --- a/test/test_client.py +++ b/test/test_client.py @@ -36,27 +36,33 @@ def test_init_with_list(self): client = SimpleClient(hosts=['kafka01:9092', 'kafka02:9092', 'kafka03:9092']) self.assertEqual( - sorted([('kafka01', 9092, socket.AF_UNSPEC), ('kafka02', 9092, socket.AF_UNSPEC), - ('kafka03', 9092, socket.AF_UNSPEC)]), - sorted(client.hosts)) + sorted([('kafka01', 9092, socket.AF_UNSPEC, 'bootstrap'), + ('kafka02', 9092, socket.AF_UNSPEC, 'bootstrap'), + ('kafka03', 9092, socket.AF_UNSPEC, 'bootstrap')]), + sorted(client.hosts), + ) def test_init_with_csv(self): with patch.object(SimpleClient, 'load_metadata_for_topics'): client = SimpleClient(hosts='kafka01:9092,kafka02:9092,kafka03:9092') self.assertEqual( - sorted([('kafka01', 9092, socket.AF_UNSPEC), ('kafka02', 9092, socket.AF_UNSPEC), - ('kafka03', 9092, socket.AF_UNSPEC)]), - sorted(client.hosts)) + sorted([('kafka01', 9092, socket.AF_UNSPEC, 'bootstrap'), + ('kafka02', 9092, socket.AF_UNSPEC, 'bootstrap'), + ('kafka03', 9092, socket.AF_UNSPEC, 'bootstrap')]), + sorted(client.hosts), + ) def test_init_with_unicode_csv(self): with patch.object(SimpleClient, 'load_metadata_for_topics'): client = SimpleClient(hosts=u'kafka01:9092,kafka02:9092,kafka03:9092') self.assertEqual( - sorted([('kafka01', 9092, socket.AF_UNSPEC), ('kafka02', 9092, socket.AF_UNSPEC), - ('kafka03', 9092, socket.AF_UNSPEC)]), - sorted(client.hosts)) + sorted([('kafka01', 9092, socket.AF_UNSPEC, 'bootstrap'), + ('kafka02', 9092, socket.AF_UNSPEC, 'bootstrap'), + ('kafka03', 9092, socket.AF_UNSPEC, 'bootstrap')]), + sorted(client.hosts), + ) @patch.object(SimpleClient, '_get_conn') @patch.object(SimpleClient, 'load_metadata_for_topics') @@ -68,7 +74,7 @@ def test_send_broker_unaware_request_fail(self, load_metadata, conn): for val in mocked_conns.values(): mock_conn(val, success=False) - def mock_get_conn(host, port, afi): + def mock_get_conn(host, port, afi, node_id='bootstrap'): return mocked_conns[(host, port)] conn.side_effect = mock_get_conn @@ -96,7 +102,7 @@ def test_send_broker_unaware_request(self): mocked_conns[('kafka02', 9092)].send.return_value = future mocked_conns[('kafka02', 9092)].recv.side_effect = lambda: future.success('valid response') - def mock_get_conn(host, port, afi): + def mock_get_conn(host, port, afi, node_id='bootstrap'): return mocked_conns[(host, port)] # patch to avoid making requests before we want it From daa545ff53f13e2fda9481136c01a8369f33ed63 Mon Sep 17 00:00:00 2001 From: Enrico Canzonieri Date: Wed, 29 Mar 2017 16:15:58 -0700 Subject: [PATCH 067/291] Bump version 1.3.2.post9 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 68617fd9e..f0db02f68 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.2.post8' +__version__ = '1.3.2.post9' From ab6e7bea99ac2379d8ed3e9f79add08e6f0a7a4f Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Thu, 29 Dec 2016 09:17:22 -0800 Subject: [PATCH 068/291] Resolve commits --- CHANGES.md | 50 +++++++++++++++++++++++++++++++++++++++ docs/changelog.rst | 58 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 5cefcb82d..b0e01f2dc 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,53 @@ +# 1.3.2 (Dec 28, 2016) + +Core +* Add kafka.serializer interfaces (dpkp 912) +* from kafka import ConsumerRebalanceListener, OffsetAndMetadata +* Use 0.10.0.1 for integration tests (dpkp 803) + +Consumer +* KAFKA-3007: KafkaConsumer max_poll_records (dpkp 831) +* Raise exception if given a non-str topic (ssaamm 824) +* Immediately update metadata for pattern subscription (laz2 915) + +Producer +* Update Partitioners for use with KafkaProducer (barrotsteindev 827) +* Sort partitions before calling partitioner (ms7s 905) +* Added ssl_password config option to KafkaProducer class (kierkegaard13 830) + +Client +* Always check for request timeouts (dpkp 887) +* When hostname lookup is necessary, do every connect (benauthor 812) + +Bugfixes +* Fix errorcode check when socket.connect_ex raises an exception (guojh 907) +* Fix fetcher bug when processing offset out of range (sibiryakov 860) +* Fix possible request draining in ensure_active_group (dpkp 896) +* Fix metadata refresh handling with 0.10+ brokers when topic list is empty (sibiryakov 867) +* KafkaProducer should set timestamp in Message if provided (Drizzt1991 875) +* Fix murmur2 bug handling python2 bytes that do not ascii encode (dpkp 815) +* Monkeypatch max_in_flight_requests_per_connection when checking broker version (dpkp 834) +* Fix message timestamp_type (qix 828) + +Logging / Error Messages +* Always include an error for logging when the coordinator is marked dead (dpkp 890) +* Only string-ify BrokerResponseError args if provided (dpkp 889) +* Update warning re advertised.listeners / advertised.host.name (jeffwidman 878) +* Fix unrecognized sasl_mechanism error message (sharego 883) + +Documentation +* Add docstring for max_records (jeffwidman 897) +* Fixup doc references to max_in_flight_requests_per_connection +* Fix typo: passowrd --> password (jeffwidman 901) +* Fix documentation typo 'Defualt' -> 'Default'. (rolando 895) +* Added doc for `max_poll_records` option (Drizzt1991 881) +* Remove old design notes from Kafka 8 era (jeffwidman 876) +* Fix documentation typos (jeffwidman 874) +* Fix quota violation exception message (dpkp 809) +* Add comment for round robin partitioner with different subscriptions +* Improve KafkaProducer docstring for retries configuration + + # 1.3.1 (Aug 8, 2016) Bugfixes diff --git a/docs/changelog.rst b/docs/changelog.rst index c56a432e1..9d265388c 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,64 @@ Changelog ========= +1.3.2 (Dec 28, 2016) +#################### + +Core +---- +* Add kafka.serializer interfaces (dpkp 912) +* from kafka import ConsumerRebalanceListener, OffsetAndMetadata +* Use 0.10.0.1 for integration tests (dpkp 803) + +Consumer +-------- +* KAFKA-3007: KafkaConsumer max_poll_records (dpkp 831) +* Raise exception if given a non-str topic (ssaamm 824) +* Immediately update metadata for pattern subscription (laz2 915) + +Producer +-------- +* Update Partitioners for use with KafkaProducer (barrotsteindev 827) +* Sort partitions before calling partitioner (ms7s 905) +* Added ssl_password config option to KafkaProducer class (kierkegaard13 830) + +Client +------ +* Always check for request timeouts (dpkp 887) +* When hostname lookup is necessary, do every connect (benauthor 812) + +Bugfixes +-------- +* Fix errorcode check when socket.connect_ex raises an exception (guojh 907) +* Fix fetcher bug when processing offset out of range (sibiryakov 860) +* Fix possible request draining in ensure_active_group (dpkp 896) +* Fix metadata refresh handling with 0.10+ brokers when topic list is empty (sibiryakov 867) +* KafkaProducer should set timestamp in Message if provided (Drizzt1991 875) +* Fix murmur2 bug handling python2 bytes that do not ascii encode (dpkp 815) +* Monkeypatch max_in_flight_requests_per_connection when checking broker version (dpkp 834) +* Fix message timestamp_type (qix 828) + +Logging / Error Messages +------------------------ +* Always include an error for logging when the coordinator is marked dead (dpkp 890) +* Only string-ify BrokerResponseError args if provided (dpkp 889) +* Update warning re advertised.listeners / advertised.host.name (jeffwidman 878) +* Fix unrecognized sasl_mechanism error message (sharego 883) + +Documentation +------------- +* Add docstring for max_records (jeffwidman 897) +* Fixup doc references to max_in_flight_requests_per_connection +* Fix typo: passowrd --> password (jeffwidman 901) +* Fix documentation typo 'Defualt' -> 'Default'. (rolando 895) +* Added doc for `max_poll_records` option (Drizzt1991 881) +* Remove old design notes from Kafka 8 era (jeffwidman 876) +* Fix documentation typos (jeffwidman 874) +* Fix quota violation exception message (dpkp 809) +* Add comment for round robin partitioner with different subscriptions +* Improve KafkaProducer docstring for retries configuration + + 1.3.1 (Aug 8, 2016) ################### From 4ca3d4aadfd600301dea37f03b92a3004125eaec Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Thu, 29 Dec 2016 09:18:31 -0800 Subject: [PATCH 069/291] Bump version for development --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index f0db02f68..58419f85f 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.2.post9' +__version__ = '1.3.3.dev' From 0d48361b31563951dc02967643263ca51375bc15 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 4 Jan 2017 14:49:05 -0800 Subject: [PATCH 070/291] Make SSL warning list the correct Python versions (#924) --- kafka/conn.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kafka/conn.py b/kafka/conn.py index 1d964a9fc..c9ce76fe8 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -36,10 +36,9 @@ ssl.SSLWantWriteError ssl.SSLZeroReturnError except: - # Don't use warning as incompatible with our waf setup for ym - log.debug('old ssl module detected.' - ' ssl error handling may not operate cleanly.' - ' Consider upgrading to python 3.5 or 2.7') + log.debug('Old SSL module detected.' + ' SSL error handling may not operate cleanly.' + ' Consider upgrading to Python 3.3 or 2.7.9') ssl.SSLWantReadError = ssl.SSLError ssl.SSLWantWriteError = ssl.SSLError ssl.SSLZeroReturnError = ssl.SSLError From 6d6b7c2013e6d9a53761968a1c36ced880f41e0f Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 11 Jan 2017 17:18:02 -0800 Subject: [PATCH 071/291] Fix typo: coorelation --> correlation (#929) --- kafka/conn.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kafka/conn.py b/kafka/conn.py index c9ce76fe8..cc860caea 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -686,13 +686,13 @@ def _process_response(self, read_buffer): ifr.correlation_id != 0 and recv_correlation_id == 0): log.warning('Kafka 0.8.2 quirk -- GroupCoordinatorResponse' - ' coorelation id does not match request. This' + ' Correlation ID does not match request. This' ' should go away once at least one topic has been' - ' initialized on the broker') + ' initialized on the broker.') elif ifr.correlation_id != recv_correlation_id: error = Errors.CorrelationIdError( - '%s: Correlation ids do not match: sent %d, recv %d' + '%s: Correlation IDs do not match: sent %d, recv %d' % (str(self), ifr.correlation_id, recv_correlation_id)) ifr.future.failure(error) self.close() @@ -748,7 +748,7 @@ def check_version(self, timeout=2, strict=False): stashed[key] = self.config[key] self.config[key] = override_config[key] - # kafka kills the connection when it doesnt recognize an API request + # kafka kills the connection when it doesn't recognize an API request # so we can send a test request and then follow immediately with a # vanilla MetadataRequest. If the server did not recognize the first # request, both will be failed with a ConnectionError that wraps @@ -1014,7 +1014,7 @@ def get_ip_port_afi(host_and_port_str): return host_and_port_str, DEFAULT_KAFKA_PORT, socket.AF_INET6 except AttributeError: log.warning('socket.inet_pton not available on this platform.' - ' consider pip install win_inet_pton') + ' consider `pip install win_inet_pton`') pass except (ValueError, socket.error): # it's a host:port pair From ef6aeca4647c7deca98b17f310e82d32b630727c Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 11 Jan 2017 17:18:28 -0800 Subject: [PATCH 072/291] Update pytest fixtures to new yield syntax (#919) --- test/conftest.py | 12 ++++-------- test/test_metrics.py | 4 ++-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/test/conftest.py b/test/conftest.py index 79ad0742c..e85b977c8 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -18,10 +18,8 @@ def version(): def zookeeper(version, request): assert version zk = ZookeeperFixture.instance() - def fin(): - zk.close() - request.addfinalizer(fin) - return zk + yield zk + zk.close() @pytest.fixture(scope="module") @@ -29,10 +27,8 @@ def kafka_broker(version, zookeeper, request): assert version k = KafkaFixture.instance(0, zookeeper.host, zookeeper.port, partitions=4) - def fin(): - k.close() - request.addfinalizer(fin) - return k + yield k + k.close() @pytest.fixture diff --git a/test/test_metrics.py b/test/test_metrics.py index e4757d66f..8d35f5534 100644 --- a/test/test_metrics.py +++ b/test/test_metrics.py @@ -32,8 +32,8 @@ def reporter(): @pytest.fixture def metrics(request, config, reporter): metrics = Metrics(config, [reporter], enable_expiration=True) - request.addfinalizer(lambda: metrics.close()) - return metrics + yield metrics + metrics.close() def test_MetricName(): From 44c679773fa70eacfd1a20ece71557028a3fd172 Mon Sep 17 00:00:00 2001 From: melissacrawford396 Date: Wed, 11 Jan 2017 20:19:38 -0500 Subject: [PATCH 073/291] Spelling and grammar changes (#923) --- kafka/consumer/base.py | 2 +- kafka/consumer/group.py | 179 +++++++++++++++++++------------------- test/test_client_async.py | 2 +- test/testutil.py | 2 +- 4 files changed, 92 insertions(+), 93 deletions(-) diff --git a/kafka/consumer/base.py b/kafka/consumer/base.py index 28250f67c..9eb695fe5 100644 --- a/kafka/consumer/base.py +++ b/kafka/consumer/base.py @@ -120,7 +120,7 @@ def fetch_last_known_offsets(self, partitions=None): for resp in responses: try: check_error(resp) - # API spec says server wont set an error here + # API spec says server won't set an error here # but 0.8.1.1 does actually... except UnknownTopicOrPartitionError: pass diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 3ab68a7d3..f38ff39f8 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -42,12 +42,12 @@ class KafkaConsumer(six.Iterator): It just needs to have at least one broker that will respond to a Metadata API Request. Default port is 9092. If no servers are specified, will default to localhost:9092. - client_id (str): a name for this client. This string is passed in + client_id (str): A name for this client. This string is passed in each request to servers and can be used to identify specific server-side log entries that correspond to this client. Also submitted to GroupCoordinator for logging with respect to consumer group administration. Default: 'kafka-python-{version}' - group_id (str or None): name of the consumer group to join for dynamic + group_id (str or None): The name of the consumer group to join for dynamic partition assignment (if enabled), and to use for fetching and committing offsets. If None, auto-partition assignment (via group coordinator) and offset commits are disabled. @@ -84,21 +84,21 @@ class KafkaConsumer(six.Iterator): auto_offset_reset (str): A policy for resetting offsets on OffsetOutOfRange errors: 'earliest' will move to the oldest available message, 'latest' will move to the most recent. Any - ofther value will raise the exception. Default: 'latest'. - enable_auto_commit (bool): If true the consumer's offset will be + other value will raise the exception. Default: 'latest'. + enable_auto_commit (bool): If True, the consumer's offset will be periodically committed in the background. Default: True. - auto_commit_interval_ms (int): milliseconds between automatic + auto_commit_interval_ms (int): Number of milliseconds between automatic offset commits, if enable_auto_commit is True. Default: 5000. - default_offset_commit_callback (callable): called as + default_offset_commit_callback (callable): Called as callback(offsets, response) response will be either an Exception - or a OffsetCommitResponse struct. This callback can be used to + or an OffsetCommitResponse struct. This callback can be used to trigger custom actions when a commit request completes. check_crcs (bool): Automatically check the CRC32 of the records consumed. This ensures no on-the-wire or on-disk corruption to the messages occurred. This check adds some overhead, so it may be disabled in cases seeking extreme performance. Default: True metadata_max_age_ms (int): The period of time in milliseconds after - which we force a refresh of metadata even if we haven't seen any + which we force a refresh of metadata, even if we haven't seen any partition leadership changes to proactively discover any new brokers or partitions. Default: 300000 partition_assignment_strategy (list): List of objects to use to @@ -115,8 +115,9 @@ class KafkaConsumer(six.Iterator): adjusted even lower to control the expected time for normal rebalances. Default: 3000 session_timeout_ms (int): The timeout used to detect failures when - using Kafka's group managementment facilities. Default: 30000 - max_poll_records (int): .... + using Kafka's group management facilities. Default: 30000 + max_poll_records (int): The maximum number of records returned in a + single call to poll(). receive_buffer_bytes (int): The size of the TCP receive buffer (SO_RCVBUF) to use when reading data. Default: None (relies on system defaults). The java client defaults to 32768. @@ -138,27 +139,27 @@ class KafkaConsumer(six.Iterator): set this option to True. Default: False. security_protocol (str): Protocol used to communicate with brokers. Valid values are: PLAINTEXT, SSL. Default: PLAINTEXT. - ssl_context (ssl.SSLContext): pre-configured SSLContext for wrapping + ssl_context (ssl.SSLContext): Pre-configured SSLContext for wrapping socket connections. If provided, all other ssl_* configurations will be ignored. Default: None. - ssl_check_hostname (bool): flag to configure whether ssl handshake + ssl_check_hostname (bool): Flag to configure whether ssl handshake should verify that the certificate matches the brokers hostname. - default: true. - ssl_cafile (str): optional filename of ca file to use in certificate - verification. default: none. - ssl_certfile (str): optional filename of file in pem format containing + Default: True. + ssl_cafile (str): Optional filename of ca file to use in certificate + verification. Default: None. + ssl_certfile (str): Optional filename of file in pem format containing the client certificate, as well as any ca certificates needed to - establish the certificate's authenticity. default: none. - ssl_keyfile (str): optional filename containing the client private key. - default: none. - ssl_password (str): optional password to be used when loading the - certificate chain. default: None. - ssl_crlfile (str): optional filename containing the CRL to check for + establish the certificate's authenticity. Default: None. + ssl_keyfile (str): Optional filename containing the client private key. + Default: None. + ssl_password (str): Optional password to be used when loading the + certificate chain. Default: None. + ssl_crlfile (str): Optional filename containing the CRL to check for certificate expiration. By default, no CRL check is done. When providing a file, only the leaf certificate will be checked against this CRL. The CRL can only be checked with Python 3.4+ or 2.7.9+. - default: none. - api_version (tuple): specify which kafka API version to use. + Default: None. + api_version (tuple): Specify which kafka API version to use. If set to None, the client will attempt to infer the broker version by probing various APIs. Default: None Examples: @@ -188,13 +189,13 @@ class KafkaConsumer(six.Iterator): (such as offsets) should be exposed to the consumer. If set to True the only way to receive records from an internal topic is subscribing to it. Requires 0.10+ Default: True - sasl_mechanism (str): string picking sasl mechanism when security_protocol + sasl_mechanism (str): String picking sasl mechanism when security_protocol is SASL_PLAINTEXT or SASL_SSL. Currently only PLAIN is supported. Default: None - sasl_plain_username (str): username for sasl PLAIN authentication. + sasl_plain_username (str): Username for sasl PLAIN authentication. + Default: None + sasl_plain_password (str): Password for sasl PLAIN authentication. Default: None - sasl_plain_password (str): password for sasl PLAIN authentication. - Defualt: None Note: Configuration parameters are described in more detail at @@ -238,7 +239,7 @@ class KafkaConsumer(six.Iterator): 'ssl_password': None, 'api_version': None, 'api_version_auto_timeout_ms': 2000, - 'connections_max_idle_ms': 9 * 60 * 1000, # not implemented yet + 'connections_max_idle_ms': 9 * 60 * 1000, # Not implemented yet 'metric_reporters': [], 'metrics_num_samples': 2, 'metrics_sample_window_ms': 30000, @@ -274,7 +275,7 @@ def __init__(self, *topics, **configs): self._metrics = Metrics(metric_config, reporters) # TODO _metrics likely needs to be passed to KafkaClient, etc. - # api_version was previously a str. accept old format for now + # api_version was previously a str. Accept old format for now if isinstance(self.config['api_version'], str): str_version = self.config['api_version'] if str_version == 'auto': @@ -309,10 +310,10 @@ def assign(self, partitions): """Manually assign a list of TopicPartitions to this consumer. Arguments: - partitions (list of TopicPartition): assignment for this instance. + partitions (list of TopicPartition): Assignment for this instance. Raises: - IllegalStateError: if consumer has already called subscribe() + IllegalStateError: If consumer has already called subscribe() Warning: It is not possible to use both manual partition assignment with @@ -338,7 +339,7 @@ def assignment(self): simply return the same partitions that were previously assigned. If topics were subscribed using subscribe(), then this will give the set of topic partitions currently assigned to the consumer (which may - be none if the assignment hasn't happened yet, or if the partitions are + be None if the assignment hasn't happened yet, or if the partitions are in the process of being reassigned). Returns: @@ -366,7 +367,7 @@ def close(self): log.debug("The KafkaConsumer has closed.") def commit_async(self, offsets=None, callback=None): - """Commit offsets to kafka asynchronously, optionally firing callback + """Commit offsets to kafka asynchronously, optionally firing callback. This commits offsets only to Kafka. The offsets committed using this API will be used on the first fetch after every rebalance and also on @@ -380,10 +381,10 @@ def commit_async(self, offsets=None, callback=None): Arguments: offsets (dict, optional): {TopicPartition: OffsetAndMetadata} dict - to commit with the configured group_id. Defaults to current + to commit with the configured group_id. Defaults to currently consumed offsets for all subscribed partitions. - callback (callable, optional): called as callback(offsets, response) - with response as either an Exception or a OffsetCommitResponse + callback (callable, optional): Called as callback(offsets, response) + with response as either an Exception or an OffsetCommitResponse struct. This callback can be used to trigger custom actions when a commit request completes. @@ -400,7 +401,7 @@ def commit_async(self, offsets=None, callback=None): return future def commit(self, offsets=None): - """Commit offsets to kafka, blocking until success or error + """Commit offsets to kafka, blocking until success or error. This commits offsets only to Kafka. The offsets committed using this API will be used on the first fetch after every rebalance and also on @@ -412,11 +413,11 @@ def commit(self, offsets=None): Blocks until either the commit succeeds or an unrecoverable error is encountered (in which case it is thrown to the caller). - Currently only supports kafka-topic offset storage (not zookeeper) + Currently only supports kafka-topic offset storage (not zookeeper). Arguments: offsets (dict, optional): {TopicPartition: OffsetAndMetadata} dict - to commit with the configured group_id. Defaults to current + to commit with the configured group_id. Defaults to currently consumed offsets for all subscribed partitions. """ assert self.config['api_version'] >= (0, 8, 1), 'Requires >= Kafka 0.8.1' @@ -426,7 +427,7 @@ def commit(self, offsets=None): self._coordinator.commit_offsets_sync(offsets) def committed(self, partition): - """Get the last committed offset for the given partition + """Get the last committed offset for the given partition. This offset will be used as the position for the consumer in the event of a failure. @@ -436,7 +437,7 @@ def committed(self, partition): initialized its cache of committed offsets. Arguments: - partition (TopicPartition): the partition to check + partition (TopicPartition): The partition to check. Returns: The last committed offset, or None if there was no prior commit. @@ -479,10 +480,10 @@ def partitions_for_topic(self, topic): """Get metadata about the partitions for a given topic. Arguments: - topic (str): topic to check + topic (str): Topic to check. Returns: - set: partition ids + set: Partition ids """ return self._client.cluster.partitions_for_topic(topic) @@ -498,20 +499,20 @@ def poll(self, timeout_ms=0, max_records=None): Incompatible with iterator interface -- use one or the other, not both. Arguments: - timeout_ms (int, optional): milliseconds spent waiting in poll if + timeout_ms (int, optional): Milliseconds spent waiting in poll if data is not available in the buffer. If 0, returns immediately with any records that are available currently in the buffer, else returns empty. Must not be negative. Default: 0 Returns: - dict: topic to list of records since the last fetch for the - subscribed list of topics and partitions + dict: Topic to list of records since the last fetch for the + subscribed list of topics and partitions. """ assert timeout_ms >= 0, 'Timeout must not be negative' if max_records is None: max_records = self.config['max_poll_records'] - # poll for new data until the timeout expires + # Poll for new data until the timeout expires start = time.time() remaining = timeout_ms while True: @@ -526,15 +527,14 @@ def poll(self, timeout_ms=0, max_records=None): return {} def _poll_once(self, timeout_ms, max_records): - """ - Do one round of polling. In addition to checking for new data, this does + """Do one round of polling. In addition to checking for new data, this does any needed heart-beating, auto-commits, and offset updates. Arguments: - timeout_ms (int): The maximum time in milliseconds to block + timeout_ms (int): The maximum time in milliseconds to block. Returns: - dict: map of topic to list of records (may be empty) + dict: Map of topic to list of records (may be empty). """ if self._use_consumer_group(): self._coordinator.ensure_coordinator_known() @@ -544,16 +544,16 @@ def _poll_once(self, timeout_ms, max_records): elif self.config['group_id'] is not None and self.config['api_version'] >= (0, 8, 2): self._coordinator.ensure_coordinator_known() - # fetch positions if we have partitions we're subscribed to that we + # Fetch positions if we have partitions we're subscribed to that we # don't know the offset for if not self._subscription.has_all_fetch_positions(): self._update_fetch_positions(self._subscription.missing_fetch_positions()) - # if data is available already, e.g. from a previous network client + # If data is available already, e.g. from a previous network client # poll() call to commit, then just return it immediately records, partial = self._fetcher.fetched_records(max_records) if records: - # before returning the fetched records, we can send off the + # Before returning the fetched records, we can send off the # next round of fetches and avoid block waiting for their # responses to enable pipelining while the user is handling the # fetched records. @@ -561,7 +561,7 @@ def _poll_once(self, timeout_ms, max_records): self._fetcher.send_fetches() return records - # send any new fetches (won't resend pending fetches) + # Send any new fetches (won't resend pending fetches) self._fetcher.send_fetches() self._client.poll(timeout_ms=timeout_ms, sleep=True) @@ -572,10 +572,10 @@ def position(self, partition): """Get the offset of the next record that will be fetched Arguments: - partition (TopicPartition): partition to check + partition (TopicPartition): Partition to check Returns: - int: offset + int: Offset """ if not isinstance(partition, TopicPartition): raise TypeError('partition must be a TopicPartition namedtuple') @@ -587,7 +587,7 @@ def position(self, partition): return offset def highwater(self, partition): - """Last known highwater offset for a partition + """Last known highwater offset for a partition. A highwater offset is the offset that will be assigned to the next message that is produced. It may be useful for calculating lag, by @@ -600,10 +600,10 @@ def highwater(self, partition): yet. Arguments: - partition (TopicPartition): partition to check + partition (TopicPartition): Partition to check Returns: - int or None: offset if available + int or None: Offset if available """ if not isinstance(partition, TopicPartition): raise TypeError('partition must be a TopicPartition namedtuple') @@ -619,7 +619,7 @@ def pause(self, *partitions): group rebalance when automatic assignment is used. Arguments: - *partitions (TopicPartition): partitions to pause + *partitions (TopicPartition): Partitions to pause. """ if not all([isinstance(p, TopicPartition) for p in partitions]): raise TypeError('partitions must be TopicPartition namedtuples') @@ -639,7 +639,7 @@ def resume(self, *partitions): """Resume fetching from the specified (paused) partitions. Arguments: - *partitions (TopicPartition): partitions to resume + *partitions (TopicPartition): Partitions to resume. """ if not all([isinstance(p, TopicPartition) for p in partitions]): raise TypeError('partitions must be TopicPartition namedtuples') @@ -657,11 +657,11 @@ def seek(self, partition, offset): to reset the fetch offsets. Arguments: - partition (TopicPartition): partition for seek operation - offset (int): message offset in partition + partition (TopicPartition): Partition for seek operation + offset (int): Message offset in partition Raises: - AssertionError: if offset is not an int >= 0; or if partition is not + AssertionError: If offset is not an int >= 0; or if partition is not currently assigned. """ if not isinstance(partition, TopicPartition): @@ -675,12 +675,12 @@ def seek_to_beginning(self, *partitions): """Seek to the oldest available offset for partitions. Arguments: - *partitions: optionally provide specific TopicPartitions, otherwise - default to all assigned partitions + *partitions: Optionally provide specific TopicPartitions, otherwise + default to all assigned partitions. Raises: - AssertionError: if any partition is not currently assigned, or if - no partitions are assigned + AssertionError: If any partition is not currently assigned, or if + no partitions are assigned. """ if not all([isinstance(p, TopicPartition) for p in partitions]): raise TypeError('partitions must be TopicPartition namedtuples') @@ -699,12 +699,12 @@ def seek_to_end(self, *partitions): """Seek to the most recent available offset for partitions. Arguments: - *partitions: optionally provide specific TopicPartitions, otherwise - default to all assigned partitions + *partitions: Optionally provide specific TopicPartitions, otherwise + default to all assigned partitions. Raises: - AssertionError: if any partition is not currently assigned, or if - no partitions are assigned + AssertionError: If any partition is not currently assigned, or if + no partitions are assigned. """ if not all([isinstance(p, TopicPartition) for p in partitions]): raise TypeError('partitions must be TopicPartition namedtuples') @@ -720,13 +720,13 @@ def seek_to_end(self, *partitions): self._subscription.need_offset_reset(tp, OffsetResetStrategy.LATEST) def subscribe(self, topics=(), pattern=None, listener=None): - """Subscribe to a list of topics, or a topic regex pattern + """Subscribe to a list of topics, or a topic regex pattern. Partitions will be dynamically assigned via a group coordinator. Topic subscriptions are not incremental: this list will replace the current assignment (if there is one). - This method is incompatible with assign() + This method is incompatible with assign(). Arguments: topics (list): List of topics for subscription. @@ -755,16 +755,16 @@ def subscribe(self, topics=(), pattern=None, listener=None): through this interface are from topics subscribed in this call. Raises: - IllegalStateError: if called after previously calling assign() - AssertionError: if neither topics or pattern is provided - TypeError: if listener is not a ConsumerRebalanceListener + IllegalStateError: If called after previously calling assign(). + AssertionError: If neither topics or pattern is provided. + TypeError: If listener is not a ConsumerRebalanceListener. """ # SubscriptionState handles error checking self._subscription.subscribe(topics=topics, pattern=pattern, listener=listener) - # regex will need all topic metadata + # Regex will need all topic metadata if pattern is not None: self._client.cluster.need_all_topic_metadata = True self._client.set_topics([]) @@ -816,25 +816,24 @@ def _use_consumer_group(self): return True def _update_fetch_positions(self, partitions): - """ - Set the fetch position to the committed position (if there is one) + """Set the fetch position to the committed position (if there is one) or reset it using the offset reset policy the user has configured. Arguments: partitions (List[TopicPartition]): The partitions that need - updating fetch positions + updating fetch positions. Raises: NoOffsetForPartitionError: If no offset is stored for a given - partition and no offset reset policy is defined + partition and no offset reset policy is defined. """ if (self.config['api_version'] >= (0, 8, 1) and self.config['group_id'] is not None): - # refresh commits for all assigned partitions + # Refresh commits for all assigned partitions self._coordinator.refresh_committed_offsets_if_needed() - # then do any offset lookups in case some positions are not known + # Then, do any offset lookups in case some positions are not known self._fetcher.update_fetch_positions(partitions) def _message_generator(self): @@ -849,7 +848,7 @@ def _message_generator(self): elif self.config['group_id'] is not None and self.config['api_version'] >= (0, 8, 2): self._coordinator.ensure_coordinator_known() - # fetch offsets for any subscribed partitions that we arent tracking yet + # Fetch offsets for any subscribed partitions that we arent tracking yet if not self._subscription.has_all_fetch_positions(): partitions = self._subscription.missing_fetch_positions() self._update_fetch_positions(partitions) @@ -884,9 +883,9 @@ def _message_generator(self): log.debug("internal iterator timeout - breaking for poll") break - # an else block on a for loop only executes if there was no break + # An else block on a for loop only executes if there was no break # so this should only be called on a StopIteration from the fetcher - # and we assume that it is safe to init_fetches when fetcher is done + # We assume that it is safe to init_fetches when fetcher is done # i.e., there are no more records stored internally else: self._fetcher.send_fetches() @@ -928,7 +927,7 @@ def _set_consumer_timeout(self): self._consumer_timeout = time.time() + ( self.config['consumer_timeout_ms'] / 1000.0) - # old KafkaConsumer methods are deprecated + # Old KafkaConsumer methods are deprecated def configure(self, **configs): raise NotImplementedError( 'deprecated -- initialize a new consumer') diff --git a/test/test_client_async.py b/test/test_client_async.py index b165f931e..b3873cad4 100644 --- a/test/test_client_async.py +++ b/test/test_client_async.py @@ -72,7 +72,7 @@ def test_bootstrap_failure(conn): def test_can_connect(cli, conn): - # Node is not in broker metadata - cant connect + # Node is not in broker metadata - can't connect assert not cli._can_connect(2) # Node is in broker metadata but not in _conns diff --git a/test/testutil.py b/test/testutil.py index a6f4421c6..c247e6ad7 100644 --- a/test/testutil.py +++ b/test/testutil.py @@ -114,7 +114,7 @@ def current_offset(self, topic, partition): try: offsets, = self.client.send_offset_request([OffsetRequestPayload(topic, partition, -1, 1)]) except: - # XXX: We've seen some UnknownErrors here and cant debug w/o server logs + # XXX: We've seen some UnknownErrors here and can't debug w/o server logs self.zk.child.dump_logs() self.server.child.dump_logs() raise From 1b253bc4aabb6f79aab848b904016d93f49e4f7c Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Thu, 12 Jan 2017 18:03:05 -0800 Subject: [PATCH 074/291] Remove non-pip install instructions (#940) --- docs/install.rst | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/docs/install.rst b/docs/install.rst index 4dca5d06a..9720d65a1 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -22,20 +22,6 @@ Bleeding-Edge git clone https://github.com/dpkp/kafka-python pip install ./kafka-python -Setuptools: - -.. code:: bash - - git clone https://github.com/dpkp/kafka-python - easy_install ./kafka-python - -Using `setup.py` directly: - -.. code:: bash - - git clone https://github.com/dpkp/kafka-python - cd kafka-python - python setup.py install Optional LZ4 install ******************** From 676df5321a3da3e136b60a0a6ac37cdf27fe75a0 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 18 Jan 2017 16:51:03 -0800 Subject: [PATCH 075/291] Default max_poll_records to Java default of 500 (#947) --- kafka/consumer/group.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index f38ff39f8..670b540ff 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -117,7 +117,7 @@ class KafkaConsumer(six.Iterator): session_timeout_ms (int): The timeout used to detect failures when using Kafka's group management facilities. Default: 30000 max_poll_records (int): The maximum number of records returned in a - single call to poll(). + single call to poll(). Default: 500 receive_buffer_bytes (int): The size of the TCP receive buffer (SO_RCVBUF) to use when reading data. Default: None (relies on system defaults). The java client defaults to 32768. @@ -223,7 +223,7 @@ class KafkaConsumer(six.Iterator): 'partition_assignment_strategy': (RangePartitionAssignor, RoundRobinPartitionAssignor), 'heartbeat_interval_ms': 3000, 'session_timeout_ms': 30000, - 'max_poll_records': sys.maxsize, + 'max_poll_records': 500, 'receive_buffer_bytes': None, 'send_buffer_bytes': None, 'socket_options': [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)], From 1f11d2f44762602aedba6251c16d6be3a1b2b4c8 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 18 Jan 2017 16:51:58 -0800 Subject: [PATCH 076/291] Add CreateTopics / DeleteTopics Structs (#944) --- kafka/protocol/admin.py | 62 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/kafka/protocol/admin.py b/kafka/protocol/admin.py index 747684fe9..99ec1770e 100644 --- a/kafka/protocol/admin.py +++ b/kafka/protocol/admin.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from .struct import Struct -from .types import Array, Bytes, Int16, Schema, String +from .types import Array, Bytes, Int16, Int32, Schema, String class ApiVersionResponse_v0(Struct): @@ -12,7 +12,8 @@ class ApiVersionResponse_v0(Struct): ('api_versions', Array( ('api_key', Int16), ('min_version', Int16), - ('max_version', Int16)))) + ('max_version', Int16))) + ) class ApiVersionRequest_v0(Struct): @@ -26,6 +27,63 @@ class ApiVersionRequest_v0(Struct): ApiVersionResponse = [ApiVersionResponse_v0] +class CreateTopicsResponse_v0(Struct): + API_KEY = 19 + API_VERSION = 0 + SCHEMA = Schema( + ('topic_error_codes', Array( + ('topic', String('utf-8')), + ('error_code', Int16))) + ) + + +class CreateTopicsRequest_v0(Struct): + API_KEY = 19 + API_VERSION = 0 + RESPONSE_TYPE = CreateTopicsResponse_v0 + SCHEMA = Schema( + ('create_topic_requests', Array( + ('topic', String('utf-8')), + ('num_partitions', Int32), + ('replication_factor', Int16), + ('replica_assignment', Array( + ('partition_id', Int32), + ('replicas', Array(Int32)))), + ('configs', Array( + ('config_key', String('utf-8')), + ('config_value', String('utf-8')))))), + ('timeout', Int32) + ) + + +CreateTopicsRequest = [CreateTopicsRequest_v0] +CreateTopicsResponse = [CreateTopicsResponse_v0] + + +class DeleteTopicsResponse_v0(Struct): + API_KEY = 20 + API_VERSION = 0 + SCHEMA = Schema( + ('topic_error_codes', Array( + ('topic', String('utf-8')), + ('error_code', Int16))) + ) + + +class DeleteTopicsRequest_v0(Struct): + API_KEY = 20 + API_VERSION = 0 + RESPONSE_TYPE = DeleteTopicsResponse_v0 + SCHEMA = Schema( + ('topics', Array(String('utf-8'))), + ('timeout', Int32) + ) + + +DeleteTopicsRequest = [DeleteTopicsRequest_v0] +DeleteTopicsResponse = [DeleteTopicsResponse_v0] + + class ListGroupsResponse_v0(Struct): API_KEY = 16 API_VERSION = 0 From fc81fb972b6d38b80c5c7dc6d628b438a868c0d9 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 18 Jan 2017 16:52:29 -0800 Subject: [PATCH 077/291] Add FetchRequest/Response_v3 structs (#943) --- kafka/protocol/fetch.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/kafka/protocol/fetch.py b/kafka/protocol/fetch.py index 7df6627dd..79b010fe2 100644 --- a/kafka/protocol/fetch.py +++ b/kafka/protocol/fetch.py @@ -40,6 +40,12 @@ class FetchResponse_v2(Struct): SCHEMA = FetchResponse_v1.SCHEMA # message format changed internally +class FetchResponse_v3(Struct): + API_KEY = 1 + API_VERSION = 3 + SCHEMA = FetchResponse_v2.SCHEMA + + class FetchRequest_v0(Struct): API_KEY = 1 API_VERSION = 0 @@ -71,5 +77,25 @@ class FetchRequest_v2(Struct): SCHEMA = FetchRequest_v1.SCHEMA -FetchRequest = [FetchRequest_v0, FetchRequest_v1, FetchRequest_v2] -FetchResponse = [FetchResponse_v0, FetchResponse_v1, FetchResponse_v2] +class FetchRequest_v3(Struct): + API_KEY = 1 + API_VERSION = 3 + RESPONSE_TYPE = FetchResponse_v3 + SCHEMA = Schema( + ('replica_id', Int32), + ('max_wait_time', Int32), + ('min_bytes', Int32), + ('max_bytes', Int32), # This new field is only difference from FR_v2 + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('offset', Int64), + ('max_bytes', Int32))))) + ) + + +FetchRequest = [FetchRequest_v0, FetchRequest_v1, FetchRequest_v2, + FetchRequest_v3] +FetchResponse = [FetchResponse_v0, FetchResponse_v1, FetchResponse_v2, + FetchResponse_v3] From 85a76c905b2f97052cac64cc61dfcf12578eea79 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 17 Dec 2016 10:37:28 -0800 Subject: [PATCH 078/291] Pass error to BrokerConnection.close() --- kafka/client_async.py | 2 +- kafka/conn.py | 38 +++++++++++++++++++++++--------------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/kafka/client_async.py b/kafka/client_async.py index 0849c7bad..097de71f4 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -556,7 +556,7 @@ def _poll(self, timeout, sleep=True): log.warning('Protocol out of sync on %r, closing', conn) except socket.error: pass - conn.close() + conn.close(Errors.ConnectionError('Socket EVENT_READ without in-flight-requests')) continue # Accumulate as many responses as the connection has pending diff --git a/kafka/conn.py b/kafka/conn.py index cc860caea..25a3fa034 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -9,6 +9,7 @@ import socket import ssl import time +import traceback from kafka.vendor import six @@ -289,12 +290,12 @@ def connect(self): elif ret not in (errno.EINPROGRESS, errno.EALREADY, errno.EWOULDBLOCK, 10022): log.error('Connect attempt to %s returned error %s.' ' Disconnecting.', self, ret) - self.close() + self.close(Errors.ConnectionError(ret)) # Connection timedout elif time.time() > request_timeout + self.last_attempt: log.error('Connection attempt to %s timed out', self) - self.close() # error=TimeoutError ? + self.close(Errors.ConnectionError('timeout')) # Needs retry else: @@ -341,9 +342,9 @@ def _wrap_ssl(self): password=self.config['ssl_password']) if self.config['ssl_crlfile']: if not hasattr(ssl, 'VERIFY_CRL_CHECK_LEAF'): - log.error('%s: No CRL support with this version of Python.' - ' Disconnecting.', self) - self.close() + error = 'No CRL support with this version of Python.' + log.error('%s: %s Disconnecting.', self, error) + self.close(Errors.ConnectionError(error)) return log.info('%s: Loading SSL CRL from %s', str(self), self.config['ssl_crlfile']) self._ssl_context.load_verify_locations(self.config['ssl_crlfile']) @@ -355,9 +356,9 @@ def _wrap_ssl(self): self._sock, server_hostname=self.hostname, do_handshake_on_connect=False) - except ssl.SSLError: + except ssl.SSLError as e: log.exception('%s: Failed to wrap socket in SSLContext!', str(self)) - self.close() + self.close(e) self.last_failure = time.time() def _try_handshake(self): @@ -370,7 +371,7 @@ def _try_handshake(self): pass except ssl.SSLZeroReturnError: log.warning('SSL connection closed by server during handshake.') - self.close() + self.close(Errors.ConnectionError('SSL connection closed by server during handshake')) # Other SSLErrors will be raised to user return False @@ -478,9 +479,15 @@ def close(self, error=None): will be failed with this exception. Default: kafka.errors.ConnectionError. """ - if self.state is not ConnectionStates.DISCONNECTED: - self.state = ConnectionStates.DISCONNECTING - self.config['state_change_callback'](self) + if self.state is ConnectionStates.DISCONNECTED: + if error is not None: + log.warning('%s: close() called on disconnected connection with error: %s', self, error) + traceback.print_stack() + return + + log.info('%s: Closing connection. %s', self, error or '') + self.state = ConnectionStates.DISCONNECTING + self.config['state_change_callback'](self) if self._sock: self._sock.close() self._sock = None @@ -568,7 +575,7 @@ def recv(self): # If requests are pending, we should close the socket and # fail all the pending request futures if self.in_flight_requests: - self.close() + self.close(Errors.ConnectionError('Socket not connected during recv with in-flight-requests')) return None elif not self.in_flight_requests: @@ -695,7 +702,7 @@ def _process_response(self, read_buffer): '%s: Correlation IDs do not match: sent %d, recv %d' % (str(self), ifr.correlation_id, recv_correlation_id)) ifr.future.failure(error) - self.close() + self.close(error) self._processing = False return None @@ -709,8 +716,9 @@ def _process_response(self, read_buffer): ' Unable to decode %d-byte buffer: %r', self, ifr.correlation_id, ifr.response_type, ifr.request, len(buf), buf) - ifr.future.failure(Errors.UnknownError('Unable to decode response')) - self.close() + error = Errors.UnknownError('Unable to decode response') + ifr.future.failure(error) + self.close(error) self._processing = False return None From e5f9a546fc1a413183906f1681ffecca7275087a Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 17 Dec 2016 10:39:06 -0800 Subject: [PATCH 079/291] Use connection state functions where possible --- kafka/client_async.py | 6 +++--- kafka/conn.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/kafka/client_async.py b/kafka/client_async.py index 097de71f4..4c5fab0ca 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -229,7 +229,7 @@ def _bootstrap(self, hosts): bootstrap.connect() while bootstrap.connecting(): bootstrap.connect() - if bootstrap.state is not ConnectionStates.CONNECTED: + if not bootstrap.connected(): bootstrap.close() continue future = bootstrap.send(metadata_request) @@ -261,7 +261,7 @@ def _can_connect(self, node_id): return True return False conn = self._conns[node_id] - return conn.state is ConnectionStates.DISCONNECTED and not conn.blacked_out() + return conn.disconnected() and not conn.blacked_out() def _conn_state_change(self, node_id, conn): if conn.connecting(): @@ -398,7 +398,7 @@ def connection_delay(self, node_id): conn = self._conns[node_id] time_waited_ms = time.time() - (conn.last_attempt or 0) - if conn.state is ConnectionStates.DISCONNECTED: + if conn.disconnected(): return max(self.config['reconnect_backoff_ms'] - time_waited_ms, 0) elif conn.connecting(): return 0 diff --git a/kafka/conn.py b/kafka/conn.py index 25a3fa034..7cb92dd66 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -519,6 +519,7 @@ def send(self, request, expect_response=True): return self._send(request, expect_response=expect_response) def _send(self, request, expect_response=True): + assert self.state in (ConnectionStates.AUTHENTICATING, ConnectionStates.CONNECTED) future = Future() correlation_id = self._next_correlation_id() header = RequestHeader(request, From ba81bbaa5009d766cee8d28dfc56c0ed1af7c6d9 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 17 Dec 2016 10:39:34 -0800 Subject: [PATCH 080/291] Drop unused last_failure time from BrokerConnection --- kafka/conn.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kafka/conn.py b/kafka/conn.py index 7cb92dd66..f99cb2b86 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -154,8 +154,8 @@ def __init__(self, host, port, afi, **configs): sasl_plain_password (str): passowrd for sasl PLAIN authentication. Defualt: None """ - self.host = host self.hostname = host + self.host = host self.port = port self.afi = afi self.in_flight_requests = collections.deque() @@ -191,7 +191,6 @@ def __init__(self, host, port, afi, **configs): self._receiving = False self._next_payload_bytes = 0 self.last_attempt = 0 - self.last_failure = 0 self._processing = False self._correlation_id = 0 self._gai = None @@ -359,7 +358,6 @@ def _wrap_ssl(self): except ssl.SSLError as e: log.exception('%s: Failed to wrap socket in SSLContext!', str(self)) self.close(e) - self.last_failure = time.time() def _try_handshake(self): assert self.config['security_protocol'] in ('SSL', 'SASL_SSL') @@ -492,7 +490,6 @@ def close(self, error=None): self._sock.close() self._sock = None self.state = ConnectionStates.DISCONNECTED - self.last_failure = time.time() self._receiving = False self._next_payload_bytes = 0 self._rbuffer.seek(0) From 632e7d32c31de96f66e0d69d2b3bf1e3baa2603d Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 17 Dec 2016 10:40:07 -0800 Subject: [PATCH 081/291] Do not re-close a disconnected connection --- kafka/conn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/kafka/conn.py b/kafka/conn.py index f99cb2b86..98bad57d8 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -204,7 +204,6 @@ def __init__(self, host, port, afi, **configs): def connect(self): """Attempt to connect and return ConnectionState""" if self.state is ConnectionStates.DISCONNECTED: - self.close() log.debug('%s: creating new socket', str(self)) # if self.afi is set to AF_UNSPEC, then we need to do a name # resolution and try all available address families From e54581609cbf0ca2011e24f8e8259ee808619053 Mon Sep 17 00:00:00 2001 From: Julien Lafaye Date: Thu, 19 Jan 2017 22:10:26 +0100 Subject: [PATCH 082/291] Add support for offsetRequestV1 messages (#951) --- kafka/protocol/offset.py | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/kafka/protocol/offset.py b/kafka/protocol/offset.py index 8d660635c..77a6a09d3 100644 --- a/kafka/protocol/offset.py +++ b/kafka/protocol/offset.py @@ -22,6 +22,19 @@ class OffsetResponse_v0(Struct): ('offsets', Array(Int64)))))) ) +class OffsetResponse_v1(Struct): + API_KEY = 2 + API_VERSION = 1 + SCHEMA = Schema( + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('error_code', Int16), + ('timestamp', Int64), + ('offset', Int64))))) + ) + class OffsetRequest_v0(Struct): API_KEY = 2 @@ -33,13 +46,29 @@ class OffsetRequest_v0(Struct): ('topic', String('utf-8')), ('partitions', Array( ('partition', Int32), - ('time', Int64), + ('timestamp', Int64), ('max_offsets', Int32))))) ) DEFAULTS = { 'replica_id': -1 } +class OffsetRequest_v1(Struct): + API_KEY = 2 + API_VERSION = 1 + RESPONSE_TYPE = OffsetResponse_v1 + SCHEMA = Schema( + ('replica_id', Int32), + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('time', Int64))))) + ) + DEFAULTS = { + 'replica_id': -1 + } + -OffsetRequest = [OffsetRequest_v0] -OffsetResponse = [OffsetResponse_v0] +OffsetRequest = [OffsetRequest_v0, OffsetRequest_v1] +OffsetResponse = [OffsetResponse_v0, OffsetResponse_v1] From 59eb18a462ab52742d4d2c11ce2a0cbd7c10d939 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Sat, 21 Jan 2017 18:09:53 -0800 Subject: [PATCH 083/291] Move docstring so it shows up in Sphinx/RTD (#952) --- kafka/client_async.py | 166 +++++++++++++++++++++--------------------- 1 file changed, 83 insertions(+), 83 deletions(-) diff --git a/kafka/client_async.py b/kafka/client_async.py index 4c5fab0ca..68d5579b6 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -41,12 +41,92 @@ class KafkaClient(object): """ - A network client for asynchronous request/response network i/o. - This is an internal class used to implement the - user-facing producer and consumer clients. + A network client for asynchronous request/response network I/O. + + This is an internal class used to implement the user-facing producer and + consumer clients. This class is not thread-safe! + + Keyword Arguments: + bootstrap_servers: 'host[:port]' string (or list of 'host[:port]' + strings) that the consumer should contact to bootstrap initial + cluster metadata. This does not have to be the full node list. + It just needs to have at least one broker that will respond to a + Metadata API Request. Default port is 9092. If no servers are + specified, will default to localhost:9092. + client_id (str): a name for this client. This string is passed in + each request to servers and can be used to identify specific + server-side log entries that correspond to this client. Also + submitted to GroupCoordinator for logging with respect to + consumer group administration. Default: 'kafka-python-{version}' + reconnect_backoff_ms (int): The amount of time in milliseconds to + wait before attempting to reconnect to a given host. + Default: 50. + request_timeout_ms (int): Client request timeout in milliseconds. + Default: 40000. + retry_backoff_ms (int): Milliseconds to backoff when retrying on + errors. Default: 100. + max_in_flight_requests_per_connection (int): Requests are pipelined + to kafka brokers up to this number of maximum requests per + broker connection. Default: 5. + receive_buffer_bytes (int): The size of the TCP receive buffer + (SO_RCVBUF) to use when reading data. Default: None (relies on + system defaults). Java client defaults to 32768. + send_buffer_bytes (int): The size of the TCP send buffer + (SO_SNDBUF) to use when sending data. Default: None (relies on + system defaults). Java client defaults to 131072. + socket_options (list): List of tuple-arguments to socket.setsockopt + to apply to broker connection sockets. Default: + [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + metadata_max_age_ms (int): The period of time in milliseconds after + which we force a refresh of metadata even if we haven't seen any + partition leadership changes to proactively discover any new + brokers or partitions. Default: 300000 + security_protocol (str): Protocol used to communicate with brokers. + Valid values are: PLAINTEXT, SSL. Default: PLAINTEXT. + ssl_context (ssl.SSLContext): pre-configured SSLContext for wrapping + socket connections. If provided, all other ssl_* configurations + will be ignored. Default: None. + ssl_check_hostname (bool): flag to configure whether ssl handshake + should verify that the certificate matches the brokers hostname. + default: true. + ssl_cafile (str): optional filename of ca file to use in certificate + veriication. default: none. + ssl_certfile (str): optional filename of file in pem format containing + the client certificate, as well as any ca certificates needed to + establish the certificate's authenticity. default: none. + ssl_keyfile (str): optional filename containing the client private key. + default: none. + ssl_password (str): optional password to be used when loading the + certificate chain. default: none. + ssl_crlfile (str): optional filename containing the CRL to check for + certificate expiration. By default, no CRL check is done. When + providing a file, only the leaf certificate will be checked against + this CRL. The CRL can only be checked with Python 3.4+ or 2.7.9+. + default: none. + api_version (tuple): specify which kafka API version to use. Accepted + values are: (0, 8, 0), (0, 8, 1), (0, 8, 2), (0, 9), (0, 10) + If None, KafkaClient will attempt to infer the broker + version by probing various APIs. Default: None + api_version_auto_timeout_ms (int): number of milliseconds to throw a + timeout exception from the constructor when checking the broker + api version. Only applies if api_version is None + selector (selectors.BaseSelector): Provide a specific selector + implementation to use for I/O multiplexing. + Default: selectors.DefaultSelector + metrics (kafka.metrics.Metrics): Optionally provide a metrics + instance for capturing network IO stats. Default: None. + metric_group_prefix (str): Prefix for metric names. Default: '' + sasl_mechanism (str): string picking sasl mechanism when security_protocol + is SASL_PLAINTEXT or SASL_SSL. Currently only PLAIN is supported. + Default: None + sasl_plain_username (str): username for sasl PLAIN authentication. + Default: None + sasl_plain_password (str): password for sasl PLAIN authentication. + Default: None """ + DEFAULT_CONFIG = { 'bootstrap_servers': 'localhost', 'client_id': 'kafka-python-' + __version__, @@ -84,86 +164,6 @@ class KafkaClient(object): ] def __init__(self, **configs): - """Initialize an asynchronous kafka client - - Keyword Arguments: - bootstrap_servers: 'host[:port]' string (or list of 'host[:port]' - strings) that the consumer should contact to bootstrap initial - cluster metadata. This does not have to be the full node list. - It just needs to have at least one broker that will respond to a - Metadata API Request. Default port is 9092. If no servers are - specified, will default to localhost:9092. - client_id (str): a name for this client. This string is passed in - each request to servers and can be used to identify specific - server-side log entries that correspond to this client. Also - submitted to GroupCoordinator for logging with respect to - consumer group administration. Default: 'kafka-python-{version}' - reconnect_backoff_ms (int): The amount of time in milliseconds to - wait before attempting to reconnect to a given host. - Default: 50. - request_timeout_ms (int): Client request timeout in milliseconds. - Default: 40000. - retry_backoff_ms (int): Milliseconds to backoff when retrying on - errors. Default: 100. - max_in_flight_requests_per_connection (int): Requests are pipelined - to kafka brokers up to this number of maximum requests per - broker connection. Default: 5. - receive_buffer_bytes (int): The size of the TCP receive buffer - (SO_RCVBUF) to use when reading data. Default: None (relies on - system defaults). Java client defaults to 32768. - send_buffer_bytes (int): The size of the TCP send buffer - (SO_SNDBUF) to use when sending data. Default: None (relies on - system defaults). Java client defaults to 131072. - socket_options (list): List of tuple-arguments to socket.setsockopt - to apply to broker connection sockets. Default: - [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] - metadata_max_age_ms (int): The period of time in milliseconds after - which we force a refresh of metadata even if we haven't seen any - partition leadership changes to proactively discover any new - brokers or partitions. Default: 300000 - security_protocol (str): Protocol used to communicate with brokers. - Valid values are: PLAINTEXT, SSL. Default: PLAINTEXT. - ssl_context (ssl.SSLContext): pre-configured SSLContext for wrapping - socket connections. If provided, all other ssl_* configurations - will be ignored. Default: None. - ssl_check_hostname (bool): flag to configure whether ssl handshake - should verify that the certificate matches the brokers hostname. - default: true. - ssl_cafile (str): optional filename of ca file to use in certificate - veriication. default: none. - ssl_certfile (str): optional filename of file in pem format containing - the client certificate, as well as any ca certificates needed to - establish the certificate's authenticity. default: none. - ssl_keyfile (str): optional filename containing the client private key. - default: none. - ssl_password (str): optional password to be used when loading the - certificate chain. default: none. - ssl_crlfile (str): optional filename containing the CRL to check for - certificate expiration. By default, no CRL check is done. When - providing a file, only the leaf certificate will be checked against - this CRL. The CRL can only be checked with Python 3.4+ or 2.7.9+. - default: none. - api_version (tuple): specify which kafka API version to use. Accepted - values are: (0, 8, 0), (0, 8, 1), (0, 8, 2), (0, 9), (0, 10) - If None, KafkaClient will attempt to infer the broker - version by probing various APIs. Default: None - api_version_auto_timeout_ms (int): number of milliseconds to throw a - timeout exception from the constructor when checking the broker - api version. Only applies if api_version is None - selector (selectors.BaseSelector): Provide a specific selector - implementation to use for I/O multiplexing. - Default: selectors.DefaultSelector - metrics (kafka.metrics.Metrics): Optionally provide a metrics - instance for capturing network IO stats. Default: None. - metric_group_prefix (str): Prefix for metric names. Default: '' - sasl_mechanism (str): string picking sasl mechanism when security_protocol - is SASL_PLAINTEXT or SASL_SSL. Currently only PLAIN is supported. - Default: None - sasl_plain_username (str): username for sasl PLAIN authentication. - Default: None - sasl_plain_password (str): password for sasl PLAIN authentication. - Defualt: None - """ self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: if key in configs: From cebea0c1e255ce2327e94d1768207aa8239a2dab Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 23 Jan 2017 10:00:23 -0800 Subject: [PATCH 084/291] Update to 0.10.1.1 for integration testing (#953) --- .travis.yml | 2 +- build_integration.sh | 2 +- docs/tests.rst | 4 +- servers/0.10.1.1/resources/kafka.properties | 142 ++++++++++++++++++ servers/0.10.1.1/resources/log4j.properties | 25 +++ .../0.10.1.1/resources/zookeeper.properties | 21 +++ 6 files changed, 192 insertions(+), 4 deletions(-) create mode 100644 servers/0.10.1.1/resources/kafka.properties create mode 100644 servers/0.10.1.1/resources/log4j.properties create mode 100644 servers/0.10.1.1/resources/zookeeper.properties diff --git a/.travis.yml b/.travis.yml index 3491533b4..a31d62f30 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ python: env: - KAFKA_VERSION=0.9.0.1 - - KAFKA_VERSION=0.10.0.1 + - KAFKA_VERSION=0.10.1.1 sudo: false diff --git a/build_integration.sh b/build_integration.sh index a017607bb..9b436ea96 100755 --- a/build_integration.sh +++ b/build_integration.sh @@ -1,7 +1,7 @@ #!/bin/bash # Versions available for testing via binary distributions -OFFICIAL_RELEASES="0.8.1.1 0.8.2.2 0.9.0.1 0.10.0.1" +OFFICIAL_RELEASES="0.8.1.1 0.8.2.2 0.9.0.1 0.10.1.1" # Useful configuration vars, with sensible defaults if [ -z "$SCALA_VERSION" ]; then diff --git a/docs/tests.rst b/docs/tests.rst index 983790df4..5b093c3d4 100644 --- a/docs/tests.rst +++ b/docs/tests.rst @@ -49,7 +49,7 @@ Integration tests .. code:: bash - KAFKA_VERSION=0.10.0.1 tox -e py27 + KAFKA_VERSION=0.10.1.1 tox -e py27 KAFKA_VERSION=0.8.2.2 tox -e py35 @@ -60,7 +60,7 @@ kafka server binaries: ./build_integration.sh -By default, this will install 0.8.1.1, 0.8.2.2, 0.9.0.1, and 0.10.0.1 brokers into the +By default, this will install 0.8.1.1, 0.8.2.2, 0.9.0.1, and 0.10.1.1 brokers into the servers/ directory. To install a specific version, set `KAFKA_VERSION=0.9.0.0`: .. code:: bash diff --git a/servers/0.10.1.1/resources/kafka.properties b/servers/0.10.1.1/resources/kafka.properties new file mode 100644 index 000000000..7a19a1187 --- /dev/null +++ b/servers/0.10.1.1/resources/kafka.properties @@ -0,0 +1,142 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/server.truststore.jks +ssl.truststore.password=foobar + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=2 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/0.10.1.1/resources/log4j.properties b/servers/0.10.1.1/resources/log4j.properties new file mode 100644 index 000000000..b0b76aa79 --- /dev/null +++ b/servers/0.10.1.1/resources/log4j.properties @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +log4j.rootLogger=INFO, stdout, logfile + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.logfile=org.apache.log4j.FileAppender +log4j.appender.logfile.File=${kafka.logs.dir}/server.log +log4j.appender.logfile.layout=org.apache.log4j.PatternLayout +log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.10.1.1/resources/zookeeper.properties b/servers/0.10.1.1/resources/zookeeper.properties new file mode 100644 index 000000000..e3fd09742 --- /dev/null +++ b/servers/0.10.1.1/resources/zookeeper.properties @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# the directory where the snapshot is stored. +dataDir={tmp_dir} +# the port at which the clients will connect +clientPort={port} +clientPortAddress={host} +# disable the per-ip limit on the number of connections since this is a non-production config +maxClientCnxns=0 From 85be7447b9367a193896a6a381ff9604bb25630c Mon Sep 17 00:00:00 2001 From: adamwen Date: Tue, 24 Jan 2017 02:01:04 +0800 Subject: [PATCH 085/291] use select.select without instance bounding (#949) berkerpeksag/selectors34@6128e06 --- kafka/vendor/selectors34.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/vendor/selectors34.py b/kafka/vendor/selectors34.py index 71ac60cb9..2a6e55628 100644 --- a/kafka/vendor/selectors34.py +++ b/kafka/vendor/selectors34.py @@ -331,7 +331,7 @@ def _select(self, r, w, _, timeout=None): r, w, x = select.select(r, w, w, timeout) return r, w + x, [] else: - _select = select.select + _select = staticmethod(select.select) def select(self, timeout=None): timeout = None if timeout is None else max(timeout, 0) From 052d50332045efcb76f9818d484343f23c141e12 Mon Sep 17 00:00:00 2001 From: Sho Minagawa Date: Sat, 4 Feb 2017 02:44:29 +0900 Subject: [PATCH 086/291] Add support for Python built without ssl (#939) (#954) --- kafka/conn.py | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/kafka/conn.py b/kafka/conn.py index 98bad57d8..14740bcb4 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -7,7 +7,6 @@ import io from random import shuffle import socket -import ssl import time import traceback @@ -31,19 +30,28 @@ DEFAULT_KAFKA_PORT = 9092 -# support older ssl libraries try: - ssl.SSLWantReadError - ssl.SSLWantWriteError - ssl.SSLZeroReturnError -except: - log.debug('Old SSL module detected.' - ' SSL error handling may not operate cleanly.' - ' Consider upgrading to Python 3.3 or 2.7.9') - ssl.SSLWantReadError = ssl.SSLError - ssl.SSLWantWriteError = ssl.SSLError - ssl.SSLZeroReturnError = ssl.SSLError - + import ssl + ssl_available = True + try: + SSLWantReadError = ssl.SSLWantReadError + SSLWantWriteError = ssl.SSLWantWriteError + SSLZeroReturnError = ssl.SSLZeroReturnError + except: + # support older ssl libraries + log.debug('Old SSL module detected.' + ' SSL error handling may not operate cleanly.' + ' Consider upgrading to Python 3.3 or 2.7.9') + SSLWantReadError = ssl.SSLError + SSLWantWriteError = ssl.SSLError + SSLZeroReturnError = ssl.SSLError +except ImportError: + # support Python without ssl libraries + ssl_available = False + class SSLWantReadError(Exception): + pass + class SSLWantWriteError(Exception): + pass class ConnectionStates(object): DISCONNECTING = '' @@ -174,6 +182,9 @@ def __init__(self, host, port, afi, **configs): (socket.SOL_SOCKET, socket.SO_SNDBUF, self.config['send_buffer_bytes'])) + if self.config['security_protocol'] in ('SSL', 'SASL_SSL'): + assert ssl_available, "Python wasn't built with SSL support" + if self.config['security_protocol'] in ('SASL_PLAINTEXT', 'SASL_SSL'): assert self.config['sasl_mechanism'] in self.SASL_MECHANISMS, ( 'sasl_mechanism must be in ' + self.SASL_MECHANISMS) @@ -364,9 +375,9 @@ def _try_handshake(self): self._sock.do_handshake() return True # old ssl in python2.6 will swallow all SSLErrors here... - except (ssl.SSLWantReadError, ssl.SSLWantWriteError): + except (SSLWantReadError, SSLWantWriteError): pass - except ssl.SSLZeroReturnError: + except SSLZeroReturnError: log.warning('SSL connection closed by server during handshake.') self.close(Errors.ConnectionError('SSL connection closed by server during handshake')) # Other SSLErrors will be raised to user @@ -604,7 +615,7 @@ def _recv(self): self.close(error=Errors.ConnectionError('socket disconnected')) return None self._rbuffer.write(data) - except ssl.SSLWantReadError: + except SSLWantReadError: return None except ConnectionError as e: if six.PY2 and e.errno == errno.EWOULDBLOCK: @@ -642,7 +653,7 @@ def _recv(self): self.close(error=Errors.ConnectionError('socket disconnected')) return None self._rbuffer.write(data) - except ssl.SSLWantReadError: + except SSLWantReadError: return None except ConnectionError as e: # Extremely small chance that we have exactly 4 bytes for a From 15d227a1da35fe59e868058e69ff22b8b44a3d17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0ebek?= Date: Fri, 3 Feb 2017 18:45:57 +0100 Subject: [PATCH 087/291] Fix raise exception from SubscriptionState.assign_from_subscribed (#960) --- kafka/consumer/subscription_state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/consumer/subscription_state.py b/kafka/consumer/subscription_state.py index fac1a9825..6025fefc0 100644 --- a/kafka/consumer/subscription_state.py +++ b/kafka/consumer/subscription_state.py @@ -218,7 +218,7 @@ def assign_from_subscribed(self, assignments): for tp in assignments: if tp.topic not in self.subscription: - raise ValueError("Assigned partition %s for non-subscribed topic." % tp) + raise ValueError("Assigned partition %s for non-subscribed topic." % str(tp)) self.assignment.clear() for tp in assignments: self._add_assigned_partition(tp) From 7201ca2d89d0b2c5ed350b01a1fbc0de473bbf1b Mon Sep 17 00:00:00 2001 From: Jianbin Wei Date: Fri, 3 Feb 2017 10:06:12 -0800 Subject: [PATCH 088/291] Use select to poll sockets for read to reduce CPU usage (#958) --- kafka/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kafka/client.py b/kafka/client.py index ed673e9b3..0b1176697 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -6,6 +6,7 @@ import logging import random import time +import select # selectors in stdlib as of py3.4 try: From c899b54bf37b67842e5cc2e4ac161ee74c61a2c5 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Fri, 3 Feb 2017 10:07:01 -0800 Subject: [PATCH 089/291] time --> timestamp to match Java API (#969) --- kafka/protocol/offset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/protocol/offset.py b/kafka/protocol/offset.py index 77a6a09d3..5182d63ee 100644 --- a/kafka/protocol/offset.py +++ b/kafka/protocol/offset.py @@ -63,7 +63,7 @@ class OffsetRequest_v1(Struct): ('topic', String('utf-8')), ('partitions', Array( ('partition', Int32), - ('time', Int64))))) + ('timestamp', Int64))))) ) DEFAULTS = { 'replica_id': -1 From 8dbe1d9f4f58a18aa518ed201ab8c9d581822c64 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Fri, 3 Feb 2017 10:08:12 -0800 Subject: [PATCH 090/291] DRY-up the OffsetFetch structs (#964) --- kafka/protocol/commit.py | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/kafka/protocol/commit.py b/kafka/protocol/commit.py index 69201bee2..7891267e9 100644 --- a/kafka/protocol/commit.py +++ b/kafka/protocol/commit.py @@ -116,20 +116,12 @@ class OffsetFetchResponse_v0(Struct): class OffsetFetchResponse_v1(Struct): API_KEY = 9 API_VERSION = 1 - SCHEMA = Schema( - ('topics', Array( - ('topic', String('utf-8')), - ('partitions', Array( - ('partition', Int32), - ('offset', Int64), - ('metadata', String('utf-8')), - ('error_code', Int16))))) - ) + SCHEMA = OffsetFetchResponse_v0.SCHEMA class OffsetFetchRequest_v0(Struct): API_KEY = 9 - API_VERSION = 0 # zookeeper-backed storage + API_VERSION = 0 # zookeeper-backed storage RESPONSE_TYPE = OffsetFetchResponse_v0 SCHEMA = Schema( ('consumer_group', String('utf-8')), @@ -141,14 +133,9 @@ class OffsetFetchRequest_v0(Struct): class OffsetFetchRequest_v1(Struct): API_KEY = 9 - API_VERSION = 1 # kafka-backed storage + API_VERSION = 1 # kafka-backed storage RESPONSE_TYPE = OffsetFetchResponse_v1 - SCHEMA = Schema( - ('consumer_group', String('utf-8')), - ('topics', Array( - ('topic', String('utf-8')), - ('partitions', Array(Int32)))) - ) + SCHEMA = OffsetFetchRequest_v0.SCHEMA OffsetFetchRequest = [OffsetFetchRequest_v0, OffsetFetchRequest_v1] From c09e3b711560985dea367ff2e149e31dee6d653c Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Fri, 3 Feb 2017 10:10:58 -0800 Subject: [PATCH 091/291] DRY-up the OffsetCommitResponse Structs (#970) --- kafka/protocol/commit.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/kafka/protocol/commit.py b/kafka/protocol/commit.py index 7891267e9..effba1e1f 100644 --- a/kafka/protocol/commit.py +++ b/kafka/protocol/commit.py @@ -19,30 +19,18 @@ class OffsetCommitResponse_v0(Struct): class OffsetCommitResponse_v1(Struct): API_KEY = 8 API_VERSION = 1 - SCHEMA = Schema( - ('topics', Array( - ('topic', String('utf-8')), - ('partitions', Array( - ('partition', Int32), - ('error_code', Int16))))) - ) + SCHEMA = OffsetCommitResponse_v0.SCHEMA class OffsetCommitResponse_v2(Struct): API_KEY = 8 API_VERSION = 2 - SCHEMA = Schema( - ('topics', Array( - ('topic', String('utf-8')), - ('partitions', Array( - ('partition', Int32), - ('error_code', Int16))))) - ) + SCHEMA = OffsetCommitResponse_v1.SCHEMA class OffsetCommitRequest_v0(Struct): API_KEY = 8 - API_VERSION = 0 # Zookeeper-backed storage + API_VERSION = 0 # Zookeeper-backed storage RESPONSE_TYPE = OffsetCommitResponse_v0 SCHEMA = Schema( ('consumer_group', String('utf-8')), @@ -57,7 +45,7 @@ class OffsetCommitRequest_v0(Struct): class OffsetCommitRequest_v1(Struct): API_KEY = 8 - API_VERSION = 1 # Kafka-backed storage + API_VERSION = 1 # Kafka-backed storage RESPONSE_TYPE = OffsetCommitResponse_v1 SCHEMA = Schema( ('consumer_group', String('utf-8')), @@ -75,7 +63,7 @@ class OffsetCommitRequest_v1(Struct): class OffsetCommitRequest_v2(Struct): API_KEY = 8 - API_VERSION = 2 # added retention_time, dropped timestamp + API_VERSION = 2 # added retention_time, dropped timestamp RESPONSE_TYPE = OffsetCommitResponse_v2 SCHEMA = Schema( ('consumer_group', String('utf-8')), From 8837562bac166d585afc7a5d3f33dbb28795239a Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 8 Feb 2017 17:16:10 -0800 Subject: [PATCH 092/291] Add JoinGroup v1 structs (#965) The JoinGroupRequest struct added a rebalance_timeout as part of KIP-62 / KAFKA-3888 --- kafka/protocol/group.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/kafka/protocol/group.py b/kafka/protocol/group.py index 98715f39e..0e0b70e19 100644 --- a/kafka/protocol/group.py +++ b/kafka/protocol/group.py @@ -19,6 +19,12 @@ class JoinGroupResponse_v0(Struct): ) +class JoinGroupResponse_v1(Struct): + API_KEY = 11 + API_VERSION = 1 + SCHEMA = JoinGroupResponse_v0.SCHEMA + + class JoinGroupRequest_v0(Struct): API_KEY = 11 API_VERSION = 0 @@ -35,8 +41,25 @@ class JoinGroupRequest_v0(Struct): UNKNOWN_MEMBER_ID = '' -JoinGroupRequest = [JoinGroupRequest_v0] -JoinGroupResponse = [JoinGroupResponse_v0] +class JoinGroupRequest_v1(Struct): + API_KEY = 11 + API_VERSION = 1 + RESPONSE_TYPE = JoinGroupResponse_v1 + SCHEMA = Schema( + ('group', String('utf-8')), + ('session_timeout', Int32), + ('rebalance_timeout', Int32), + ('member_id', String('utf-8')), + ('protocol_type', String('utf-8')), + ('group_protocols', Array( + ('protocol_name', String('utf-8')), + ('protocol_metadata', Bytes))) + ) + UNKNOWN_MEMBER_ID = '' + + +JoinGroupRequest = [JoinGroupRequest_v0, JoinGroupRequest_v1] +JoinGroupResponse = [JoinGroupResponse_v0, JoinGroupResponse_v1] class ProtocolMetadata(Struct): From 57ffa5f3618cb6b5eacf4d8ca753f2c2069c4ffe Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 8 Feb 2017 17:17:21 -0800 Subject: [PATCH 093/291] DRY-up the MetadataRequest_v1 struct (#966) --- kafka/protocol/metadata.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kafka/protocol/metadata.py b/kafka/protocol/metadata.py index 7a04104a2..bc106df25 100644 --- a/kafka/protocol/metadata.py +++ b/kafka/protocol/metadata.py @@ -52,17 +52,18 @@ class MetadataRequest_v0(Struct): API_VERSION = 0 RESPONSE_TYPE = MetadataResponse_v0 SCHEMA = Schema( - ('topics', Array(String('utf-8'))) # Empty Array (len 0) for all topics + ('topics', Array(String('utf-8'))) ) + ALL_TOPICS = None # Empty Array (len 0) for topics returns all topics class MetadataRequest_v1(Struct): API_KEY = 3 API_VERSION = 1 RESPONSE_TYPE = MetadataResponse_v1 - SCHEMA = Schema( - ('topics', Array(String('utf-8'))) # Null Array (len -1) for all topics - ) + SCHEMA = MetadataRequest_v0.SCHEMA + ALL_TOPICS = -1 # Null Array (len -1) for topics returns all topics + NO_TOPICS = None # Empty array (len 0) for topics returns no topics MetadataRequest = [MetadataRequest_v0, MetadataRequest_v1] From 638c38617f3c710327d211fb1ef8548f8685425d Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 8 Feb 2017 17:18:31 -0800 Subject: [PATCH 094/291] Move BrokerConnection docstring to class (#968) --- kafka/conn.py | 137 +++++++++++++++++++++++++------------------------- 1 file changed, 69 insertions(+), 68 deletions(-) diff --git a/kafka/conn.py b/kafka/conn.py index 14740bcb4..11ede9f9e 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -66,6 +66,75 @@ class ConnectionStates(object): class BrokerConnection(object): + """Initialize a Kafka broker connection + + Keyword Arguments: + client_id (str): a name for this client. This string is passed in + each request to servers and can be used to identify specific + server-side log entries that correspond to this client. Also + submitted to GroupCoordinator for logging with respect to + consumer group administration. Default: 'kafka-python-{version}' + reconnect_backoff_ms (int): The amount of time in milliseconds to + wait before attempting to reconnect to a given host. + Default: 50. + request_timeout_ms (int): Client request timeout in milliseconds. + Default: 40000. + max_in_flight_requests_per_connection (int): Requests are pipelined + to kafka brokers up to this number of maximum requests per + broker connection. Default: 5. + receive_buffer_bytes (int): The size of the TCP receive buffer + (SO_RCVBUF) to use when reading data. Default: None (relies on + system defaults). Java client defaults to 32768. + send_buffer_bytes (int): The size of the TCP send buffer + (SO_SNDBUF) to use when sending data. Default: None (relies on + system defaults). Java client defaults to 131072. + socket_options (list): List of tuple-arguments to socket.setsockopt + to apply to broker connection sockets. Default: + [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + security_protocol (str): Protocol used to communicate with brokers. + Valid values are: PLAINTEXT, SSL. Default: PLAINTEXT. + ssl_context (ssl.SSLContext): pre-configured SSLContext for wrapping + socket connections. If provided, all other ssl_* configurations + will be ignored. Default: None. + ssl_check_hostname (bool): flag to configure whether ssl handshake + should verify that the certificate matches the brokers hostname. + default: True. + ssl_cafile (str): optional filename of ca file to use in certificate + veriication. default: None. + ssl_certfile (str): optional filename of file in pem format containing + the client certificate, as well as any ca certificates needed to + establish the certificate's authenticity. default: None. + ssl_keyfile (str): optional filename containing the client private key. + default: None. + ssl_password (callable, str, bytes, bytearray): optional password or + callable function that returns a password, for decrypting the + client private key. Default: None. + ssl_crlfile (str): optional filename containing the CRL to check for + certificate expiration. By default, no CRL check is done. When + providing a file, only the leaf certificate will be checked against + this CRL. The CRL can only be checked with Python 3.4+ or 2.7.9+. + default: None. + api_version (tuple): specify which Kafka API version to use. Accepted + values are: (0, 8, 0), (0, 8, 1), (0, 8, 2), (0, 9), (0, 10) + If None, KafkaClient will attempt to infer the broker + version by probing various APIs. Default: None + api_version_auto_timeout_ms (int): number of milliseconds to throw a + timeout exception from the constructor when checking the broker + api version. Only applies if api_version is None + state_change_callback (callable): function to be called when the + connection state changes from CONNECTING to CONNECTED etc. + metrics (kafka.metrics.Metrics): Optionally provide a metrics + instance for capturing network IO stats. Default: None. + metric_group_prefix (str): Prefix for metric names. Default: '' + sasl_mechanism (str): string picking sasl mechanism when security_protocol + is SASL_PLAINTEXT or SASL_SSL. Currently only PLAIN is supported. + Default: None + sasl_plain_username (str): username for sasl PLAIN authentication. + Default: None + sasl_plain_password (str): password for sasl PLAIN authentication. + Default: None + """ + DEFAULT_CONFIG = { 'client_id': 'kafka-python-' + __version__, 'node_id': 0, @@ -94,74 +163,6 @@ class BrokerConnection(object): SASL_MECHANISMS = ('PLAIN',) def __init__(self, host, port, afi, **configs): - """Initialize a kafka broker connection - - Keyword Arguments: - client_id (str): a name for this client. This string is passed in - each request to servers and can be used to identify specific - server-side log entries that correspond to this client. Also - submitted to GroupCoordinator for logging with respect to - consumer group administration. Default: 'kafka-python-{version}' - reconnect_backoff_ms (int): The amount of time in milliseconds to - wait before attempting to reconnect to a given host. - Default: 50. - request_timeout_ms (int): Client request timeout in milliseconds. - Default: 40000. - max_in_flight_requests_per_connection (int): Requests are pipelined - to kafka brokers up to this number of maximum requests per - broker connection. Default: 5. - receive_buffer_bytes (int): The size of the TCP receive buffer - (SO_RCVBUF) to use when reading data. Default: None (relies on - system defaults). Java client defaults to 32768. - send_buffer_bytes (int): The size of the TCP send buffer - (SO_SNDBUF) to use when sending data. Default: None (relies on - system defaults). Java client defaults to 131072. - socket_options (list): List of tuple-arguments to socket.setsockopt - to apply to broker connection sockets. Default: - [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] - security_protocol (str): Protocol used to communicate with brokers. - Valid values are: PLAINTEXT, SSL. Default: PLAINTEXT. - ssl_context (ssl.SSLContext): pre-configured SSLContext for wrapping - socket connections. If provided, all other ssl_* configurations - will be ignored. Default: None. - ssl_check_hostname (bool): flag to configure whether ssl handshake - should verify that the certificate matches the brokers hostname. - default: True. - ssl_cafile (str): optional filename of ca file to use in certificate - veriication. default: None. - ssl_certfile (str): optional filename of file in pem format containing - the client certificate, as well as any ca certificates needed to - establish the certificate's authenticity. default: None. - ssl_keyfile (str): optional filename containing the client private key. - default: None. - ssl_password (callable, str, bytes, bytearray): optional password or - callable function that returns a password, for decrypting the - client private key. Default: None. - ssl_crlfile (str): optional filename containing the CRL to check for - certificate expiration. By default, no CRL check is done. When - providing a file, only the leaf certificate will be checked against - this CRL. The CRL can only be checked with Python 3.4+ or 2.7.9+. - default: None. - api_version (tuple): specify which kafka API version to use. Accepted - values are: (0, 8, 0), (0, 8, 1), (0, 8, 2), (0, 9), (0, 10) - If None, KafkaClient will attempt to infer the broker - version by probing various APIs. Default: None - api_version_auto_timeout_ms (int): number of milliseconds to throw a - timeout exception from the constructor when checking the broker - api version. Only applies if api_version is None - state_chance_callback (callable): function to be called when the - connection state changes from CONNECTING to CONNECTED etc. - metrics (kafka.metrics.Metrics): Optionally provide a metrics - instance for capturing network IO stats. Default: None. - metric_group_prefix (str): Prefix for metric names. Default: '' - sasl_mechanism (str): string picking sasl mechanism when security_protocol - is SASL_PLAINTEXT or SASL_SSL. Currently only PLAIN is supported. - Default: None - sasl_plain_username (str): username for sasl PLAIN authentication. - Default: None - sasl_plain_password (str): passowrd for sasl PLAIN authentication. - Defualt: None - """ self.hostname = host self.host = host self.port = port From 2c0a76806ef49aea30b56dd6f11e387bb89a2599 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 8 Feb 2017 17:23:39 -0800 Subject: [PATCH 095/291] KIP-88 / KAFKA-3853: OffsetFetch v2 structs (#971) --- kafka/protocol/commit.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/kafka/protocol/commit.py b/kafka/protocol/commit.py index effba1e1f..564537240 100644 --- a/kafka/protocol/commit.py +++ b/kafka/protocol/commit.py @@ -107,6 +107,22 @@ class OffsetFetchResponse_v1(Struct): SCHEMA = OffsetFetchResponse_v0.SCHEMA +class OffsetFetchResponse_v2(Struct): + # Added in KIP-88 + API_KEY = 9 + API_VERSION = 2 + SCHEMA = Schema( + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('offset', Int64), + ('metadata', String('utf-8')), + ('error_code', Int16))))), + ('error_code', Int16) + ) + + class OffsetFetchRequest_v0(Struct): API_KEY = 9 API_VERSION = 0 # zookeeper-backed storage @@ -126,8 +142,20 @@ class OffsetFetchRequest_v1(Struct): SCHEMA = OffsetFetchRequest_v0.SCHEMA -OffsetFetchRequest = [OffsetFetchRequest_v0, OffsetFetchRequest_v1] -OffsetFetchResponse = [OffsetFetchResponse_v0, OffsetFetchResponse_v1] +class OffsetFetchRequest_v2(Struct): + # KIP-88: Allows passing null topics to return offsets for all partitions + # that the consumer group has a stored offset for, even if no consumer in + # the group is currently consuming that partition. + API_KEY = 9 + API_VERSION = 2 + RESPONSE_TYPE = OffsetFetchResponse_v2 + SCHEMA = OffsetFetchRequest_v1.SCHEMA + + +OffsetFetchRequest = [OffsetFetchRequest_v0, OffsetFetchRequest_v1, + OffsetFetchRequest_v2] +OffsetFetchResponse = [OffsetFetchResponse_v0, OffsetFetchResponse_v1, + OffsetFetchResponse_v2] class GroupCoordinatorResponse_v0(Struct): From d0db38825d532951d4e93c5d2385a62a4d2cf314 Mon Sep 17 00:00:00 2001 From: Taras Voinarovskyi Date: Thu, 9 Feb 2017 20:56:58 +0200 Subject: [PATCH 096/291] Add MetadataRequest_v2 and MetadataResponse_v2 structures for KIP-78 (#974) --- kafka/protocol/metadata.py | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/kafka/protocol/metadata.py b/kafka/protocol/metadata.py index bc106df25..e017c5904 100644 --- a/kafka/protocol/metadata.py +++ b/kafka/protocol/metadata.py @@ -47,6 +47,30 @@ class MetadataResponse_v1(Struct): ) +class MetadataResponse_v2(Struct): + API_KEY = 3 + API_VERSION = 2 + SCHEMA = Schema( + ('brokers', Array( + ('node_id', Int32), + ('host', String('utf-8')), + ('port', Int32), + ('rack', String('utf-8')))), + ('cluster_id', String('utf-8')), # <-- Added cluster_id field in v2 + ('controller_id', Int32), + ('topics', Array( + ('error_code', Int16), + ('topic', String('utf-8')), + ('is_internal', Boolean), + ('partitions', Array( + ('error_code', Int16), + ('partition', Int32), + ('leader', Int32), + ('replicas', Array(Int32)), + ('isr', Array(Int32)))))) + ) + + class MetadataRequest_v0(Struct): API_KEY = 3 API_VERSION = 0 @@ -66,5 +90,13 @@ class MetadataRequest_v1(Struct): NO_TOPICS = None # Empty array (len 0) for topics returns no topics -MetadataRequest = [MetadataRequest_v0, MetadataRequest_v1] -MetadataResponse = [MetadataResponse_v0, MetadataResponse_v1] +class MetadataRequest_v2(Struct): + API_KEY = 3 + API_VERSION = 2 + RESPONSE_TYPE = MetadataResponse_v2 + SCHEMA = MetadataRequest_v1.SCHEMA + + +MetadataRequest = [MetadataRequest_v0, MetadataRequest_v1, MetadataRequest_v2] +MetadataResponse = [ + MetadataResponse_v0, MetadataResponse_v1, MetadataResponse_v2] From b33490b91f5d3703028cbf643f8574043dc30de2 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Thu, 9 Feb 2017 12:27:16 -0800 Subject: [PATCH 097/291] PEP-8: Spacing & removed unused imports (#899) --- kafka/client.py | 7 +++---- kafka/client_async.py | 22 +++++++++++----------- kafka/consumer/fetcher.py | 16 ++++++++-------- kafka/consumer/group.py | 6 +++--- kafka/coordinator/base.py | 12 ++++++------ kafka/producer/base.py | 4 ++-- kafka/producer/kafka.py | 8 ++++---- kafka/protocol/fetch.py | 2 +- kafka/protocol/legacy.py | 14 +++++--------- kafka/protocol/message.py | 7 +++---- kafka/protocol/struct.py | 3 +-- test/test_fetcher.py | 6 ++---- 12 files changed, 49 insertions(+), 58 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 0b1176697..d4b55afd5 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -289,7 +289,6 @@ def failed_payloads(payloads): failed_payloads(broker_payloads) continue - host, port, afi = get_ip_port_afi(broker.host) try: conn = self._get_conn(host, broker.port, afi, broker.nodeId) @@ -522,8 +521,8 @@ def reset_all_metadata(self): def has_metadata_for_topic(self, topic): return ( - topic in self.topic_partitions - and len(self.topic_partitions[topic]) > 0 + topic in self.topic_partitions + and len(self.topic_partitions[topic]) > 0 ) def get_partition_ids_for_topic(self, topic): @@ -536,7 +535,7 @@ def get_partition_ids_for_topic(self, topic): def topics(self): return list(self.topic_partitions.keys()) - def ensure_topic_exists(self, topic, timeout = 30): + def ensure_topic_exists(self, topic, timeout=30): start_time = time.time() while not self.has_metadata_for_topic(topic): diff --git a/kafka/client_async.py b/kafka/client_async.py index 68d5579b6..f326cf74f 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -10,7 +10,7 @@ # selectors in stdlib as of py3.4 try: - import selectors # pylint: disable=import-error + import selectors # pylint: disable=import-error except ImportError: # vendored backport module from .vendor import selectors34 as selectors @@ -175,7 +175,7 @@ def __init__(self, **configs): self.config['api_version'], str(self.API_VERSIONS))) self.cluster = ClusterMetadata(**self.config) - self._topics = set() # empty set will fetch all topic metadata + self._topics = set() # empty set will fetch all topic metadata self._metadata_refresh_in_progress = False self._last_no_node_available_ms = 0 self._selector = self.config['selector']() @@ -343,7 +343,7 @@ def connected(self, node_id): return self._conns[node_id].connected() def close(self, node_id=None): - """Closes one or all broker connections. + """Close one or all broker connections. Arguments: node_id (int, optional): the id of the node to close @@ -381,7 +381,7 @@ def is_disconnected(self, node_id): def connection_delay(self, node_id): """ - Returns the number of milliseconds to wait, based on the connection + Return the number of milliseconds to wait, based on the connection state, before attempting to send data. When disconnected, this respects the reconnect backoff time. When connecting, returns 0 to allow non-blocking connect to finish. When connected, returns a very large @@ -506,7 +506,7 @@ def poll(self, timeout_ms=None, future=None, sleep=True): metadata_timeout_ms, self._delayed_tasks.next_at() * 1000, self.config['request_timeout_ms']) - timeout = max(0, timeout / 1000.0) # avoid negative timeouts + timeout = max(0, timeout / 1000.0) # avoid negative timeouts responses.extend(self._poll(timeout, sleep=sleep)) @@ -561,7 +561,7 @@ def _poll(self, timeout, sleep=True): # Accumulate as many responses as the connection has pending while conn.in_flight_requests: - response = conn.recv() # Note: conn.recv runs callbacks / errbacks + response = conn.recv() # Note: conn.recv runs callbacks / errbacks # Incomplete responses are buffered internally # while conn.in_flight_requests retains the request @@ -768,9 +768,9 @@ def unschedule(self, task): self._delayed_tasks.remove(task) def check_version(self, node_id=None, timeout=2, strict=False): - """Attempt to guess a broker version + """Attempt to guess the version of a Kafka broker. - Note: it is possible that this method blocks longer than the + Note: It is possible that this method blocks longer than the specified timeout. This can happen if the entire cluster is down and the client enters a bootstrap backoff sleep. This is only possible if node_id is None. @@ -829,9 +829,9 @@ def _clear_wake_fd(self): class DelayedTaskQueue(object): # see https://docs.python.org/2/library/heapq.html def __init__(self): - self._tasks = [] # list of entries arranged in a heap - self._task_map = {} # mapping of tasks to entries - self._counter = itertools.count() # unique sequence count + self._tasks = [] # list of entries arranged in a heap + self._task_map = {} # mapping of tasks to entries + self._counter = itertools.count() # unique sequence count def add(self, task, at): """Add a task to run at a later time. diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index d09f9da9a..802f92efb 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -43,7 +43,7 @@ class Fetcher(six.Iterator): 'max_poll_records': sys.maxsize, 'check_crcs': True, 'skip_double_compressed_messages': False, - 'iterator_refetch_records': 1, # undocumented -- interface may change + 'iterator_refetch_records': 1, # undocumented -- interface may change 'metric_group_prefix': 'consumer', 'api_version': (0, 8, 0), } @@ -90,10 +90,10 @@ def __init__(self, client, subscriptions, metrics, **configs): self._client = client self._subscriptions = subscriptions - self._records = collections.deque() # (offset, topic_partition, messages) + self._records = collections.deque() # (offset, topic_partition, messages) self._unauthorized_topics = set() - self._offset_out_of_range_partitions = dict() # {topic_partition: offset} - self._record_too_large_partitions = dict() # {topic_partition: offset} + self._offset_out_of_range_partitions = dict() # {topic_partition: offset} + self._record_too_large_partitions = dict() # {topic_partition: offset} self._iterator = None self._fetch_futures = collections.deque() self._sensors = FetchManagerMetrics(metrics, self.config['metric_group_prefix']) @@ -216,7 +216,7 @@ def _offset(self, partition, timestamp): return future.value if not future.retriable(): - raise future.exception # pylint: disable-msg=raising-bad-type + raise future.exception # pylint: disable-msg=raising-bad-type if future.exception.invalid_metadata: refresh_future = self._client.cluster.request_update() @@ -493,10 +493,10 @@ def _unpack_message_set(self, tp, messages): # of a compressed message depends on the # typestamp type of the wrapper message: - if msg.timestamp_type == 0: # CREATE_TIME (0) + if msg.timestamp_type == 0: # CREATE_TIME (0) inner_timestamp = inner_msg.timestamp - elif msg.timestamp_type == 1: # LOG_APPEND_TIME (1) + elif msg.timestamp_type == 1: # LOG_APPEND_TIME (1) inner_timestamp = msg.timestamp else: @@ -666,7 +666,7 @@ def _create_fetch_requests(self): requests = {} for node_id, partition_data in six.iteritems(fetchable): requests[node_id] = FetchRequest[version]( - -1, # replica_id + -1, # replica_id self.config['fetch_max_wait_ms'], self.config['fetch_min_bytes'], partition_data.items()) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 670b540ff..d362c7278 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -239,7 +239,7 @@ class KafkaConsumer(six.Iterator): 'ssl_password': None, 'api_version': None, 'api_version_auto_timeout_ms': 2000, - 'connections_max_idle_ms': 9 * 60 * 1000, # Not implemented yet + 'connections_max_idle_ms': 9 * 60 * 1000, # Not implemented yet 'metric_reporters': [], 'metrics_num_samples': 2, 'metrics_sample_window_ms': 30000, @@ -827,8 +827,8 @@ def _update_fetch_positions(self, partitions): NoOffsetForPartitionError: If no offset is stored for a given partition and no offset reset policy is defined. """ - if (self.config['api_version'] >= (0, 8, 1) - and self.config['group_id'] is not None): + if (self.config['api_version'] >= (0, 8, 1) and + self.config['group_id'] is not None): # Refresh commits for all assigned partitions self._coordinator.refresh_committed_offsets_if_needed() diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index 5f60aa321..68de8df83 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -15,7 +15,7 @@ from ..metrics.stats import Avg, Count, Max, Rate from ..protocol.commit import GroupCoordinatorRequest, OffsetCommitRequest from ..protocol.group import (HeartbeatRequest, JoinGroupRequest, - LeaveGroupRequest, SyncGroupRequest) + LeaveGroupRequest, SyncGroupRequest) log = logging.getLogger('kafka.coordinator') @@ -220,7 +220,7 @@ def ensure_coordinator_known(self): metadata_update = self._client.cluster.request_update() self._client.poll(future=metadata_update) else: - raise future.exception # pylint: disable-msg=raising-bad-type + raise future.exception # pylint: disable-msg=raising-bad-type def need_rejoin(self): """Check whether the group should be rejoined (e.g. if metadata changes) @@ -267,7 +267,7 @@ def ensure_active_group(self): Errors.IllegalGenerationError)): continue elif not future.retriable(): - raise exception # pylint: disable-msg=raising-bad-type + raise exception # pylint: disable-msg=raising-bad-type time.sleep(self.config['retry_backoff_ms'] / 1000) def _send_join_group_request(self): @@ -425,7 +425,7 @@ def _handle_sync_group_response(self, future, send_time, response): error_type = Errors.for_code(response.error_code) if error_type is Errors.NoError: log.info("Successfully joined group %s with generation %s", - self.group_id, self.generation) + self.group_id, self.generation) self.sensors.sync_latency.record((time.time() - send_time) * 1000) future.success(response.member_assignment) return @@ -551,7 +551,7 @@ def _handle_leave_group_response(self, response): def _send_heartbeat_request(self): """Send a heartbeat request""" request = HeartbeatRequest[0](self.group_id, self.generation, self.member_id) - log.debug("Heartbeat: %s[%s] %s", request.group, request.generation_id, request.member_id) #pylint: disable-msg=no-member + log.debug("Heartbeat: %s[%s] %s", request.group, request.generation_id, request.member_id) # pylint: disable-msg=no-member future = Future() _f = self._client.send(self.coordinator_id, request) _f.add_callback(self._handle_heartbeat_response, future, time.time()) @@ -624,7 +624,7 @@ def reset(self): def __call__(self): if (self._coordinator.generation < 0 or - self._coordinator.need_rejoin()): + self._coordinator.need_rejoin()): # no need to send the heartbeat we're not using auto-assignment # or if we are awaiting a rebalance log.info("Skipping heartbeat: no auto-assignment" diff --git a/kafka/producer/base.py b/kafka/producer/base.py index 8471818a2..91d3ee7df 100644 --- a/kafka/producer/base.py +++ b/kafka/producer/base.py @@ -282,7 +282,7 @@ def __init__(self, client, codec_compresslevel=None, sync_fail_on_error=SYNC_FAIL_ON_ERROR_DEFAULT, async=False, - batch_send=False, # deprecated, use async + batch_send=False, # deprecated, use async batch_send_every_n=BATCH_SEND_MSG_COUNT, batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL, async_retry_limit=ASYNC_RETRY_LIMIT, @@ -452,7 +452,7 @@ def stop(self, timeout=None): # py3 supports unregistering if hasattr(atexit, 'unregister'): - atexit.unregister(self._cleanup_func) # pylint: disable=no-member + atexit.unregister(self._cleanup_func) # pylint: disable=no-member # py2 requires removing from private attribute... else: diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index b13db8623..5f2b21d17 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -264,7 +264,7 @@ class KafkaProducer(object): 'linger_ms': 0, 'partitioner': DefaultPartitioner(), 'buffer_memory': 33554432, - 'connections_max_idle_ms': 600000, # not implemented yet + 'connections_max_idle_ms': 600000, # not implemented yet 'max_block_ms': 60000, 'max_request_size': 1048576, 'metadata_max_age_ms': 300000, @@ -295,7 +295,7 @@ class KafkaProducer(object): } def __init__(self, **configs): - log.debug("Starting the Kafka producer") # trace + log.debug("Starting the Kafka producer") # trace self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: if key in configs: @@ -368,7 +368,7 @@ def wrapper(): def _unregister_cleanup(self): if getattr(self, '_cleanup', None): if hasattr(atexit, 'unregister'): - atexit.unregister(self._cleanup) # pylint: disable=no-member + atexit.unregister(self._cleanup) # pylint: disable=no-member # py2 requires removing from private attribute... else: @@ -543,7 +543,7 @@ def flush(self, timeout=None): Arguments: timeout (float, optional): timeout in seconds to wait for completion. """ - log.debug("Flushing accumulated records in producer.") # trace + log.debug("Flushing accumulated records in producer.") # trace self._accumulator.begin_flush() self._sender.wakeup() self._accumulator.await_flush_completion(timeout=timeout) diff --git a/kafka/protocol/fetch.py b/kafka/protocol/fetch.py index 79b010fe2..6a9ad5b99 100644 --- a/kafka/protocol/fetch.py +++ b/kafka/protocol/fetch.py @@ -37,7 +37,7 @@ class FetchResponse_v1(Struct): class FetchResponse_v2(Struct): API_KEY = 1 API_VERSION = 2 - SCHEMA = FetchResponse_v1.SCHEMA # message format changed internally + SCHEMA = FetchResponse_v1.SCHEMA # message format changed internally class FetchResponse_v3(Struct): diff --git a/kafka/protocol/legacy.py b/kafka/protocol/legacy.py index c43a5edc3..e1cfbd0c5 100644 --- a/kafka/protocol/legacy.py +++ b/kafka/protocol/legacy.py @@ -3,9 +3,7 @@ import logging import struct -from kafka.vendor import six # pylint: disable=import-error - -from kafka.vendor.six.moves import xrange # pylint: disable=import-error +from kafka.vendor import six # pylint: disable=import-error import kafka.protocol.commit import kafka.protocol.fetch @@ -15,13 +13,12 @@ import kafka.protocol.produce import kafka.structs -from kafka.codec import ( - gzip_encode, gzip_decode, snappy_encode, snappy_decode) -from kafka.errors import ProtocolError, ChecksumError, UnsupportedCodecError +from kafka.codec import gzip_encode, snappy_encode +from kafka.errors import ProtocolError, UnsupportedCodecError from kafka.structs import ConsumerMetadataResponse from kafka.util import ( - crc32, read_short_string, read_int_string, relative_unpack, - write_short_string, write_int_string, group_by_topic_and_partition) + crc32, read_short_string, relative_unpack, + write_int_string, group_by_topic_and_partition) log = logging.getLogger(__name__) @@ -315,7 +312,6 @@ def encode_offset_commit_request(cls, group, payloads): for partition, payload in six.iteritems(topic_payloads)]) for topic, topic_payloads in six.iteritems(group_by_topic_and_partition(payloads))]) - @classmethod def encode_offset_commit_request_kafka(cls, group, payloads): """ diff --git a/kafka/protocol/message.py b/kafka/protocol/message.py index 36f03ca92..bfad1275d 100644 --- a/kafka/protocol/message.py +++ b/kafka/protocol/message.py @@ -6,7 +6,6 @@ from ..codec import (has_gzip, has_snappy, has_lz4, gzip_decode, snappy_decode, lz4_decode, lz4_decode_old_kafka) -from . import pickle from .struct import Struct from .types import ( Int8, Int32, Int64, Bytes, Schema, AbstractType @@ -36,7 +35,7 @@ class Message(Struct): CODEC_SNAPPY = 0x02 CODEC_LZ4 = 0x03 TIMESTAMP_TYPE_MASK = 0x08 - HEADER_SIZE = 22 # crc(4), magic(1), attributes(1), timestamp(8), key+value size(4*2) + HEADER_SIZE = 22 # crc(4), magic(1), attributes(1), timestamp(8), key+value size(4*2) def __init__(self, value, key=None, magic=0, attributes=0, crc=0, timestamp=None): @@ -127,7 +126,7 @@ def decompress(self): else: raw_bytes = lz4_decode(self.value) else: - raise Exception('This should be impossible') + raise Exception('This should be impossible') return MessageSet.decode(raw_bytes, bytes_to_read=len(raw_bytes)) @@ -145,7 +144,7 @@ class MessageSet(AbstractType): ('offset', Int64), ('message', Bytes) ) - HEADER_SIZE = 12 # offset + message_size + HEADER_SIZE = 12 # offset + message_size @classmethod def encode(cls, items): diff --git a/kafka/protocol/struct.py b/kafka/protocol/struct.py index 602cfb8d4..a3d28d76c 100644 --- a/kafka/protocol/struct.py +++ b/kafka/protocol/struct.py @@ -1,6 +1,5 @@ from __future__ import absolute_import -#from collections import namedtuple from io import BytesIO from .abstract import AbstractType @@ -23,7 +22,7 @@ def __init__(self, *args, **kwargs): self.encode = self._encode_self @classmethod - def encode(cls, item): # pylint: disable=E0202 + def encode(cls, item): # pylint: disable=E0202 bits = [] for i, field in enumerate(cls.SCHEMA.fields): bits.append(field.encode(item[i])) diff --git a/test/test_fetcher.py b/test/test_fetcher.py index fea3f7d7b..984de8883 100644 --- a/test/test_fetcher.py +++ b/test/test_fetcher.py @@ -6,11 +6,9 @@ from kafka.client_async import KafkaClient from kafka.consumer.fetcher import Fetcher from kafka.consumer.subscription_state import SubscriptionState -import kafka.errors as Errors -from kafka.future import Future from kafka.metrics import Metrics from kafka.protocol.fetch import FetchRequest -from kafka.structs import TopicPartition, OffsetAndMetadata +from kafka.structs import TopicPartition @pytest.fixture @@ -51,7 +49,7 @@ def test_send_fetches(fetcher, mocker): ] mocker.patch.object(fetcher, '_create_fetch_requests', - return_value = dict(enumerate(fetch_requests))) + return_value=dict(enumerate(fetch_requests))) ret = fetcher.send_fetches() for node, request in enumerate(fetch_requests): From 8579a6c9fe203a4d19a87e2332c922d7737dd4c3 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 24 Feb 2017 09:53:44 -0800 Subject: [PATCH 098/291] Drop bad disconnect test -- just use the mocked-socket test (#982) --- test/test_conn.py | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/test/test_conn.py b/test/test_conn.py index 4f2b12f60..358a57678 100644 --- a/test/test_conn.py +++ b/test/test_conn.py @@ -165,33 +165,7 @@ def test_can_send_more(conn): assert conn.can_send_more() is False -def test_recv_disconnected(): - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.bind(('127.0.0.1', 0)) - port = sock.getsockname()[1] - sock.listen(5) - - conn = BrokerConnection('127.0.0.1', port, socket.AF_INET) - timeout = time.time() + 1 - while time.time() < timeout: - conn.connect() - if conn.connected(): - break - else: - assert False, 'Connection attempt to local socket timed-out ?' - - conn.send(MetadataRequest[0]([])) - - # Disconnect server socket - sock.close() - - # Attempt to receive should mark connection as disconnected - assert conn.connected() - conn.recv() - assert conn.disconnected() - - -def test_recv_disconnected_too(_socket, conn): +def test_recv_disconnected(_socket, conn): conn.connect() assert conn.connected() From 3ff8d75892ed2b6517bbf4b09e39e7fcb6aaf9de Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Tue, 28 Feb 2017 20:13:28 +0200 Subject: [PATCH 099/291] Fail-fast on timeout constraint violations during KafkaConsumer creation (#986) --- kafka/consumer/group.py | 13 +++++++++++++ test/test_consumer.py | 8 ++++++++ 2 files changed, 21 insertions(+) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index d362c7278..fecd8865d 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -6,6 +6,8 @@ import sys import time +from kafka.errors import KafkaConfigurationError + from kafka.vendor import six from kafka.client_async import KafkaClient, selectors @@ -267,6 +269,17 @@ def __init__(self, *topics, **configs): new_config, self.config['auto_offset_reset']) self.config['auto_offset_reset'] = new_config + request_timeout_ms = self.config['request_timeout_ms'] + session_timeout_ms = self.config['session_timeout_ms'] + fetch_max_wait_ms = self.config['fetch_max_wait_ms'] + if request_timeout_ms <= session_timeout_ms: + raise KafkaConfigurationError( + "Request timeout (%s) must be larger than session timeout (%s)" % + (request_timeout_ms, session_timeout_ms)) + if request_timeout_ms <= fetch_max_wait_ms: + raise KafkaConfigurationError("Request timeout (%s) must be larger than fetch-max-wait-ms (%s)" % + (request_timeout_ms, fetch_max_wait_ms)) + metrics_tags = {'client-id': self.config['client_id']} metric_config = MetricConfig(samples=self.config['metrics_num_samples'], time_window_ms=self.config['metrics_sample_window_ms'], diff --git a/test/test_consumer.py b/test/test_consumer.py index 21a30a016..cc9d074c3 100644 --- a/test/test_consumer.py +++ b/test/test_consumer.py @@ -20,6 +20,14 @@ def test_broker_list_required(self): with self.assertRaises(KafkaConfigurationError): OldKafkaConsumer() + def test_session_timeout_larger_than_request_timeout_raises(self): + with self.assertRaises(KafkaConfigurationError): + KafkaConsumer(bootstrap_servers='localhost:9092', session_timeout_ms=60000, request_timeout_ms=40000) + + def test_fetch_max_wait_larger_than_request_timeout_raises(self): + with self.assertRaises(KafkaConfigurationError): + KafkaConsumer(bootstrap_servers='localhost:9092', fetch_max_wait_ms=41000, request_timeout_ms=40000) + class TestMultiProcessConsumer(unittest.TestCase): @unittest.skipIf(sys.platform.startswith('win'), 'test mocking fails on windows') From 46de9b62b6614a2caaab65a0153b208b3bb265f8 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Tue, 28 Feb 2017 10:18:02 -0800 Subject: [PATCH 100/291] Remove dead code (#967) --- kafka/util.py | 32 -------------------------------- test/test_util.py | 46 ---------------------------------------------- 2 files changed, 78 deletions(-) diff --git a/kafka/util.py b/kafka/util.py index bc011540a..de8f2280e 100644 --- a/kafka/util.py +++ b/kafka/util.py @@ -4,7 +4,6 @@ import binascii import collections import struct -import sys from threading import Thread, Event import weakref @@ -33,19 +32,6 @@ def write_int_string(s): return struct.pack('>i%ds' % len(s), len(s), s) -def write_short_string(s): - if s is not None and not isinstance(s, six.binary_type): - raise TypeError('Expected "%s" to be bytes\n' - 'data=%s' % (type(s), repr(s))) - if s is None: - return struct.pack('>h', -1) - elif len(s) > 32767 and sys.version_info < (2, 7): - # Python 2.6 issues a deprecation warning instead of a struct error - raise struct.error(len(s)) - else: - return struct.pack('>h%ds' % len(s), len(s), s) - - def read_short_string(data, cur): if len(data) < cur + 2: raise BufferUnderflowError("Not enough data left") @@ -62,24 +48,6 @@ def read_short_string(data, cur): return out, cur + strlen -def read_int_string(data, cur): - if len(data) < cur + 4: - raise BufferUnderflowError( - "Not enough data left to read string len (%d < %d)" % - (len(data), cur + 4)) - - (strlen,) = struct.unpack('>i', data[cur:cur + 4]) - if strlen == -1: - return None, cur + 4 - - cur += 4 - if len(data) < cur + strlen: - raise BufferUnderflowError("Not enough data left") - - out = data[cur:cur + strlen] - return out, cur + strlen - - def relative_unpack(fmt, data, cur): size = struct.calcsize(fmt) if len(data) < cur + size: diff --git a/test/test_util.py b/test/test_util.py index 5fc3f69f2..58e5ab840 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -43,57 +43,11 @@ def test_write_int_string__null(self): b'\xff\xff\xff\xff' ) - def test_read_int_string(self): - self.assertEqual(kafka.util.read_int_string(b'\xff\xff\xff\xff', 0), (None, 4)) - self.assertEqual(kafka.util.read_int_string(b'\x00\x00\x00\x00', 0), (b'', 4)) - self.assertEqual(kafka.util.read_int_string(b'\x00\x00\x00\x0bsome string', 0), (b'some string', 15)) - - def test_read_int_string__insufficient_data(self): - with self.assertRaises(kafka.errors.BufferUnderflowError): - kafka.util.read_int_string(b'\x00\x00\x00\x021', 0) - - def test_write_short_string(self): - self.assertEqual( - kafka.util.write_short_string(b'some string'), - b'\x00\x0bsome string' - ) - - def test_write_short_string__unicode(self): - with self.assertRaises(TypeError) as cm: - kafka.util.write_short_string(u'hello') - #: :type: TypeError - te = cm.exception - if six.PY2: - self.assertIn('unicode', str(te)) - else: - self.assertIn('str', str(te)) - self.assertIn('to be bytes', str(te)) - - def test_write_short_string__empty(self): - self.assertEqual( - kafka.util.write_short_string(b''), - b'\x00\x00' - ) - - def test_write_short_string__null(self): - self.assertEqual( - kafka.util.write_short_string(None), - b'\xff\xff' - ) - - def test_write_short_string__too_long(self): - with self.assertRaises(struct.error): - kafka.util.write_short_string(b' ' * 33000) - def test_read_short_string(self): self.assertEqual(kafka.util.read_short_string(b'\xff\xff', 0), (None, 2)) self.assertEqual(kafka.util.read_short_string(b'\x00\x00', 0), (b'', 2)) self.assertEqual(kafka.util.read_short_string(b'\x00\x0bsome string', 0), (b'some string', 13)) - def test_read_int_string__insufficient_data2(self): - with self.assertRaises(kafka.errors.BufferUnderflowError): - kafka.util.read_int_string('\x00\x021', 0) - def test_relative_unpack2(self): self.assertEqual( kafka.util.relative_unpack('>hh', b'\x00\x01\x00\x00\x02', 0), From cb6930bf167e1699e313486d5e1f7ad8bbd0b3e7 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Tue, 28 Feb 2017 14:35:53 -0800 Subject: [PATCH 101/291] Fix BrokerConnection api_version docs default (#909) --- kafka/client_async.py | 8 ++++---- kafka/conn.py | 7 +++---- kafka/consumer/group.py | 9 +++++---- kafka/producer/kafka.py | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/kafka/client_async.py b/kafka/client_async.py index f326cf74f..5c69905da 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -105,10 +105,10 @@ class KafkaClient(object): providing a file, only the leaf certificate will be checked against this CRL. The CRL can only be checked with Python 3.4+ or 2.7.9+. default: none. - api_version (tuple): specify which kafka API version to use. Accepted - values are: (0, 8, 0), (0, 8, 1), (0, 8, 2), (0, 9), (0, 10) - If None, KafkaClient will attempt to infer the broker - version by probing various APIs. Default: None + api_version (tuple): Specify which Kafka API version to use. If set + to None, KafkaClient will attempt to infer the broker version by + probing various APIs. For the full list of supported versions, + see KafkaClient.API_VERSIONS. Default: None api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker api version. Only applies if api_version is None diff --git a/kafka/conn.py b/kafka/conn.py index 11ede9f9e..12c59ff8b 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -114,10 +114,9 @@ class BrokerConnection(object): providing a file, only the leaf certificate will be checked against this CRL. The CRL can only be checked with Python 3.4+ or 2.7.9+. default: None. - api_version (tuple): specify which Kafka API version to use. Accepted - values are: (0, 8, 0), (0, 8, 1), (0, 8, 2), (0, 9), (0, 10) - If None, KafkaClient will attempt to infer the broker - version by probing various APIs. Default: None + api_version (tuple): Specify which Kafka API version to use. + Accepted values are: (0, 8, 0), (0, 8, 1), (0, 8, 2), (0, 9), + (0, 10). Default: (0, 8, 2) api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker api version. Only applies if api_version is None diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index fecd8865d..65f97b6b1 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -161,9 +161,9 @@ class KafkaConsumer(six.Iterator): providing a file, only the leaf certificate will be checked against this CRL. The CRL can only be checked with Python 3.4+ or 2.7.9+. Default: None. - api_version (tuple): Specify which kafka API version to use. - If set to None, the client will attempt to infer the broker version - by probing various APIs. Default: None + api_version (tuple): Specify which Kafka API version to use. If set to + None, the client will attempt to infer the broker version by probing + various APIs. Different versions enable different functionality. Examples: (0, 9) enables full group coordination features with automatic partition assignment and rebalancing, @@ -173,7 +173,8 @@ class KafkaConsumer(six.Iterator): partition assignment only, (0, 8, 0) enables basic functionality but requires manual partition assignment and offset management. - For a full list of supported versions, see KafkaClient.API_VERSIONS + For the full list of supported versions, see + KafkaClient.API_VERSIONS. Default: None api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker api version. Only applies if api_version set to 'auto' diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index 5f2b21d17..25a0acbd7 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -223,10 +223,10 @@ class KafkaProducer(object): providing a file, only the leaf certificate will be checked against this CRL. The CRL can only be checked with Python 3.4+ or 2.7.9+. default: none. - api_version (tuple): specify which kafka API version to use. - For a full list of supported versions, see KafkaClient.API_VERSIONS - If set to None, the client will attempt to infer the broker version - by probing various APIs. Default: None + api_version (tuple): Specify which Kafka API version to use. If set to + None, the client will attempt to infer the broker version by probing + various APIs. For a full list of supported versions, see + KafkaClient.API_VERSIONS. Default: None api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker api version. Only applies if api_version set to 'auto' From 8973d44ddeb085dd012ed9c1ba1dbfe88d4dfef4 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Thu, 2 Mar 2017 11:27:07 -0800 Subject: [PATCH 102/291] Issue 985: Clear memory wait condition before raising Exception (#999) --- kafka/producer/buffer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kafka/producer/buffer.py b/kafka/producer/buffer.py index 422d47c66..d1eeaf1a6 100644 --- a/kafka/producer/buffer.py +++ b/kafka/producer/buffer.py @@ -197,6 +197,7 @@ def allocate(self, size, max_time_to_block_ms): if self._free: buf = self._free.popleft() else: + self._waiters.remove(more_memory) raise Errors.KafkaTimeoutError( "Failed to allocate memory within the configured" " max blocking time") From 9c2d69c0512a63b307bb256aac0a9e4cc6a54705 Mon Sep 17 00:00:00 2001 From: charsyam Date: Fri, 3 Mar 2017 07:15:01 +0900 Subject: [PATCH 103/291] Add send_list_offset_request for searching offset by timestamp (#1001), --- kafka/client.py | 11 +++++++++++ kafka/protocol/legacy.py | 29 +++++++++++++++++++++++++++++ kafka/structs.py | 6 ++++++ 3 files changed, 46 insertions(+) diff --git a/kafka/client.py b/kafka/client.py index d4b55afd5..3aa0e496c 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -740,6 +740,17 @@ def send_offset_request(self, payloads=[], fail_on_error=True, return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] + @time_metric('offset_commit') + def send_list_offset_request(self, payloads=[], fail_on_error=True, + callback=None): + resps = self._send_broker_aware_request( + payloads, + KafkaProtocol.encode_list_offset_request, + KafkaProtocol.decode_list_offset_response) + + return [resp if not callback else callback(resp) for resp in resps + if not fail_on_error or not self._raise_on_response_error(resp)] + @time_metric('offset_commit') def send_offset_commit_request(self, group, payloads=[], fail_on_error=True, callback=None): diff --git a/kafka/protocol/legacy.py b/kafka/protocol/legacy.py index e1cfbd0c5..1776c020c 100644 --- a/kafka/protocol/legacy.py +++ b/kafka/protocol/legacy.py @@ -248,6 +248,35 @@ def decode_offset_response(cls, response): for partition, error, offsets in partitions ] + @classmethod + def encode_list_offset_request(cls, payloads=()): + return kafka.protocol.offset.OffsetRequest[1]( + replica_id=-1, + topics=[( + topic, + [( + partition, + payload.time) + for partition, payload in six.iteritems(topic_payloads)]) + for topic, topic_payloads in six.iteritems(group_by_topic_and_partition(payloads))]) + + @classmethod + def decode_list_offset_response(cls, response): + """ + Decode OffsetResponse_v2 into ListOffsetResponsePayloads + + Arguments: + response: OffsetResponse_v2 + + Returns: list of ListOffsetResponsePayloads + """ + return [ + kafka.structs.ListOffsetResponsePayload(topic, partition, error, timestamp, offset) + for topic, partitions in response.topics + for partition, error, timestamp, offset in partitions + ] + + @classmethod def encode_metadata_request(cls, topics=(), payloads=None): """ diff --git a/kafka/structs.py b/kafka/structs.py index 7d1d96a44..48321e718 100644 --- a/kafka/structs.py +++ b/kafka/structs.py @@ -37,9 +37,15 @@ OffsetRequestPayload = namedtuple("OffsetRequestPayload", ["topic", "partition", "time", "max_offsets"]) +ListOffsetRequestPayload = namedtuple("ListOffsetRequestPayload", + ["topic", "partition", "time"]) + OffsetResponsePayload = namedtuple("OffsetResponsePayload", ["topic", "partition", "error", "offsets"]) +ListOffsetResponsePayload = namedtuple("ListOffsetResponsePayload", + ["topic", "partition", "error", "timestamp", "offset"]) + # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI OffsetCommitRequestPayload = namedtuple("OffsetCommitRequestPayload", ["topic", "partition", "offset", "metadata"]) From ff923da62485051849f675e780f1cf3b02089a68 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 3 Mar 2017 10:07:59 -0800 Subject: [PATCH 104/291] Fix sasl reconnect bug: auth future must be reset on close (#1003) --- kafka/conn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kafka/conn.py b/kafka/conn.py index 12c59ff8b..95b67e8b8 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -500,6 +500,7 @@ def close(self, error=None): self._sock.close() self._sock = None self.state = ConnectionStates.DISCONNECTED + self._sasl_auth_future = None self._receiving = False self._next_payload_bytes = 0 self._rbuffer.seek(0) From b59a49aa19fb8e8090c5497d878f25f48cfa9724 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 3 Mar 2017 18:13:26 +0000 Subject: [PATCH 105/291] Replace %s with %r in producer debug log message (#973) --- kafka/producer/kafka.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index 25a0acbd7..f299f04dd 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -499,7 +499,7 @@ def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): tp = TopicPartition(topic, partition) if timestamp_ms is None: timestamp_ms = int(time.time() * 1000) - log.debug("Sending (key=%s value=%s) to %s", key, value, tp) + log.debug("Sending (key=%r value=%r) to %s", key, value, tp) result = self._accumulator.append(tp, timestamp_ms, key_bytes, value_bytes, self.config['max_block_ms']) From 5403e8ecf32871681345323ac9b842c3ef1afac6 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Fri, 3 Mar 2017 10:14:54 -0800 Subject: [PATCH 106/291] Add sphinx formatting to hyperlink methods (#898) --- kafka/client.py | 14 ++++++------ kafka/consumer/group.py | 45 +++++++++++++++++++++------------------ kafka/coordinator/base.py | 10 ++++----- kafka/producer/base.py | 8 ++++--- kafka/producer/kafka.py | 16 +++++++------- 5 files changed, 49 insertions(+), 44 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 3aa0e496c..5192af6f9 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -131,11 +131,11 @@ def _get_leader_for_partition(self, topic, partition): Returns the leader for a partition or None if the partition exists but has no leader. - UnknownTopicOrPartitionError will be raised if the topic or partition - is not part of the metadata. - - LeaderNotAvailableError is raised if server has metadata, but there is - no current leader + Raises: + UnknownTopicOrPartitionError: If the topic or partition is not part + of the metadata. + LeaderNotAvailableError: If the server has metadata, but there is no + current leader. """ key = TopicPartition(topic, partition) @@ -474,8 +474,8 @@ def copy(self): Create an inactive copy of the client object, suitable for passing to a separate thread. - Note that the copied connections are not initialized, so reinit() must - be called on the returned copy. + Note that the copied connections are not initialized, so :meth:`.reinit` + must be called on the returned copy. """ _conns = self._conns self._conns = {} diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 65f97b6b1..96a6bf768 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -35,7 +35,7 @@ class KafkaConsumer(six.Iterator): Arguments: *topics (str): optional list of topics to subscribe to. If not set, - call subscribe() or assign() before consuming records. + call :meth:`.subscribe` or :meth:`.assign` before consuming records. Keyword Arguments: bootstrap_servers: 'host[:port]' string (or list of 'host[:port]' @@ -119,7 +119,7 @@ class KafkaConsumer(six.Iterator): session_timeout_ms (int): The timeout used to detect failures when using Kafka's group management facilities. Default: 30000 max_poll_records (int): The maximum number of records returned in a - single call to poll(). Default: 500 + single call to :meth:`.poll`. Default: 500 receive_buffer_bytes (int): The size of the TCP receive buffer (SO_RCVBUF) to use when reading data. Default: None (relies on system defaults). The java client defaults to 32768. @@ -327,11 +327,11 @@ def assign(self, partitions): partitions (list of TopicPartition): Assignment for this instance. Raises: - IllegalStateError: If consumer has already called subscribe() + IllegalStateError: If consumer has already called :meth:`.subscribe`. Warning: It is not possible to use both manual partition assignment with - assign() and group assignment with subscribe(). + :meth:`.assign` and group assignment with :meth:`.subscribe`. Note: This interface does not support incremental assignment and will @@ -349,12 +349,12 @@ def assign(self, partitions): def assignment(self): """Get the TopicPartitions currently assigned to this consumer. - If partitions were directly assigned using assign(), then this will - simply return the same partitions that were previously assigned. - If topics were subscribed using subscribe(), then this will give the - set of topic partitions currently assigned to the consumer (which may - be None if the assignment hasn't happened yet, or if the partitions are - in the process of being reassigned). + If partitions were directly assigned using :meth:`.assign`, then this + will simply return the same partitions that were previously assigned. + If topics were subscribed using :meth:`.subscribe`, then this will give + the set of topic partitions currently assigned to the consumer (which + may be None if the assignment hasn't happened yet, or if the partitions + are in the process of being reassigned). Returns: set: {TopicPartition, ...} @@ -517,6 +517,9 @@ def poll(self, timeout_ms=0, max_records=None): data is not available in the buffer. If 0, returns immediately with any records that are available currently in the buffer, else returns empty. Must not be negative. Default: 0 + max_records (int, optional): The maximum number of records returned + in a single call to :meth:`.poll`. Default: Inherit value from + max_poll_records. Returns: dict: Topic to list of records since the last fetch for the @@ -627,10 +630,10 @@ def highwater(self, partition): def pause(self, *partitions): """Suspend fetching from the requested partitions. - Future calls to poll() will not return any records from these partitions - until they have been resumed using resume(). Note that this method does - not affect partition subscription. In particular, it does not cause a - group rebalance when automatic assignment is used. + Future calls to :meth:`.poll` will not return any records from these + partitions until they have been resumed using :meth:`.resume`. Note that + this method does not affect partition subscription. In particular, it + does not cause a group rebalance when automatic assignment is used. Arguments: *partitions (TopicPartition): Partitions to pause. @@ -642,7 +645,7 @@ def pause(self, *partitions): self._subscription.pause(partition) def paused(self): - """Get the partitions that were previously paused by a call to pause(). + """Get the partitions that were previously paused using :meth:`.pause`. Returns: set: {partition (TopicPartition), ...} @@ -665,10 +668,10 @@ def seek(self, partition, offset): """Manually specify the fetch offset for a TopicPartition. Overrides the fetch offsets that the consumer will use on the next - poll(). If this API is invoked for the same partition more than once, - the latest offset will be used on the next poll(). Note that you may - lose data if this API is arbitrarily used in the middle of consumption, - to reset the fetch offsets. + :meth:`.poll`. If this API is invoked for the same partition more than + once, the latest offset will be used on the next :meth:`.poll`. Note + that you may lose data if this API is arbitrarily used in the middle of + consumption, to reset the fetch offsets. Arguments: partition (TopicPartition): Partition for seek operation @@ -740,7 +743,7 @@ def subscribe(self, topics=(), pattern=None, listener=None): Topic subscriptions are not incremental: this list will replace the current assignment (if there is one). - This method is incompatible with assign(). + This method is incompatible with :meth:`.assign`. Arguments: topics (list): List of topics for subscription. @@ -769,7 +772,7 @@ def subscribe(self, topics=(), pattern=None, listener=None): through this interface are from topics subscribed in this call. Raises: - IllegalStateError: If called after previously calling assign(). + IllegalStateError: If called after previously calling :meth:`.assign`. AssertionError: If neither topics or pattern is provided. TypeError: If listener is not a ConsumerRebalanceListener. """ diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index 68de8df83..48449a243 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -43,10 +43,10 @@ class BaseCoordinator(object): leader and begins processing. To leverage this protocol, an implementation must define the format of - metadata provided by each member for group registration in group_protocols() - and the format of the state assignment provided by the leader in - _perform_assignment() and which becomes available to members in - _on_join_complete(). + metadata provided by each member for group registration in + :meth:`.group_protocols` and the format of the state assignment provided by + the leader in :meth:`._perform_assignment` and which becomes available to + members in :meth:`._on_join_complete`. """ DEFAULT_CONFIG = { @@ -274,7 +274,7 @@ def _send_join_group_request(self): """Join the group and return the assignment for the next generation. This function handles both JoinGroup and SyncGroup, delegating to - _perform_assignment() if elected leader by the coordinator. + :meth:`._perform_assignment` if elected leader by the coordinator. Returns: Future: resolves to the encoded-bytes assignment returned from the diff --git a/kafka/producer/base.py b/kafka/producer/base.py index 91d3ee7df..3c8904050 100644 --- a/kafka/producer/base.py +++ b/kafka/producer/base.py @@ -56,7 +56,8 @@ def _send_upstream(queue, client, codec, batch_time, batch_size, Messages placed on the queue should be tuples that conform to this format: ((topic, partition), message, key) - Currently does not mark messages with task_done. Do not attempt to join()! + Currently does not mark messages with task_done. Do not attempt to + :meth:`join`! Arguments: queue (threading.Queue): the queue from which to get messages @@ -227,7 +228,8 @@ class Producer(object): Arguments: client (kafka.SimpleClient): instance to use for broker communications. If async=True, the background thread will use - client.copy(), which is expected to return a thread-safe object. + :meth:`client.copy`, which is expected to return a thread-safe + object. codec (kafka.protocol.ALL_CODECS): compression codec to use. req_acks (int, optional): A value indicating the acknowledgements that the server must receive before responding to the request, @@ -263,7 +265,7 @@ class Producer(object): will not allow you to identify the specific message that failed, but it will allow you to match failures with retries. async_stop_timeout (int or float, optional): seconds to continue - attempting to send queued messages after producer.stop(), + attempting to send queued messages after :meth:`producer.stop`, defaults to 30. Deprecated Arguments: diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index f299f04dd..032656b4b 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -34,9 +34,9 @@ class KafkaProducer(object): thread that is responsible for turning these records into requests and transmitting them to the cluster. - The send() method is asynchronous. When called it adds the record to a - buffer of pending record sends and immediately returns. This allows the - producer to batch together individual records for efficiency. + :meth:`.send` is asynchronous. When called it adds the record to a buffer of + pending record sends and immediately returns. This allows the producer to + batch together individual records for efficiency. The 'acks' config controls the criteria under which requests are considered complete. The "all" setting will result in blocking on the full commit of @@ -166,9 +166,9 @@ class KafkaProducer(object): will block up to max_block_ms, raising an exception on timeout. In the current implementation, this setting is an approximation. Default: 33554432 (32MB) - max_block_ms (int): Number of milliseconds to block during send() and - partitions_for(). These methods can be blocked either because the - buffer is full or metadata unavailable. Blocking in the + max_block_ms (int): Number of milliseconds to block during :meth:`.send` + and :meth:`.partitions_for`. These methods can be blocked either + because the buffer is full or metadata unavailable. Blocking in the user-supplied serializers or partitioner will not be counted against this timeout. Default: 60000. max_request_size (int): The maximum size of a request. This is also @@ -531,8 +531,8 @@ def flush(self, timeout=None): Invoking this method makes all buffered records immediately available to send (even if linger_ms is greater than 0) and blocks on the completion of the requests associated with these records. The - post-condition of flush() is that any previously sent record will have - completed (e.g. Future.is_done() == True). A request is considered + post-condition of :meth:`.flush` is that any previously sent record will + have completed (e.g. Future.is_done() == True). A request is considered completed when either it is successfully acknowledged according to the 'acks' configuration for the producer, or it results in an error. From 5caa8883dc8b567fad1f7c2e4cf87f0621a2a942 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 3 Mar 2017 10:55:55 -0800 Subject: [PATCH 107/291] Update vendored berkerpeksag/selectors34 to ff61b82 --- kafka/vendor/selectors34.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kafka/vendor/selectors34.py b/kafka/vendor/selectors34.py index 2a6e55628..ebf5d515e 100644 --- a/kafka/vendor/selectors34.py +++ b/kafka/vendor/selectors34.py @@ -1,6 +1,6 @@ # pylint: skip-file # vendored from https://github.com/berkerpeksag/selectors34 -# at commit 5195dd2cbe598047ad0a2e446a829546f6ffc9eb (v1.1) +# at commit ff61b82168d2cc9c4922ae08e2a8bf94aab61ea2 (unreleased, ~1.2) # # Original author: Charles-Francois Natali (c.f.natali[at]gmail.com) # Maintainer: Berker Peksag (berker.peksag[at]gmail.com) @@ -92,8 +92,10 @@ def __getitem__(self, fileobj): def __iter__(self): return iter(self._selector._fd_to_key) - -class BaseSelector(six.with_metaclass(ABCMeta)): +# Using six.add_metaclass() decorator instead of six.with_metaclass() because +# the latter leaks temporary_class to garbage with gc disabled +@six.add_metaclass(ABCMeta) +class BaseSelector(object): """Selector abstract base class. A selector supports registering file objects to be monitored for specific From 442b7c3d6a0ffc3d0f4ffab6f20f4b35c3394222 Mon Sep 17 00:00:00 2001 From: ms7s Date: Fri, 3 Mar 2017 21:37:53 +0100 Subject: [PATCH 108/291] Run tests in python3.6 too (#992) * Test with Python 3.6 in Travis CI * Add Python 3.6 environment to tox config * Don't run automated tests on Python 3.3 --- .travis.yml | 1 + tox.ini | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index a31d62f30..e2c627358 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ language: python python: - 2.7 - 3.5 + - 3.6 - pypy env: diff --git a/tox.ini b/tox.ini index 817b57b77..23ca385ba 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{26,27,33,34,35,py}, docs +envlist = py{26,27,34,35,36,py}, docs [pytest] testpaths = kafka test @@ -12,7 +12,7 @@ deps = pytest pytest-cov pytest-catchlog - py{27,33,34,35,py}: pytest-pylint + py{27,34,35,py}: pytest-pylint pytest-sugar pytest-mock mock @@ -30,6 +30,11 @@ passenv = KAFKA_VERSION # pylint doesn't support python2.6 commands = py.test {posargs:--cov=kafka --cov-config=.covrc} +[testenv:py36] +# pylint doesn't support python3.6 yet +# https://github.com/PyCQA/pylint/issues/1072 +commands = py.test {posargs:--cov=kafka --cov-config=.covrc} + [testenv:pypy] # pylint is super slow on pypy... commands = py.test {posargs:--cov=kafka --cov-config=.covrc} From a6913367b0e0ef9f0f1d35539f1f3156875c8e3c Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 3 Mar 2017 12:51:21 -0800 Subject: [PATCH 109/291] Drop old brokers when rebuilding broker metadata (#1005) --- kafka/cluster.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kafka/cluster.py b/kafka/cluster.py index 4646378bd..0a5c07fea 100644 --- a/kafka/cluster.py +++ b/kafka/cluster.py @@ -199,20 +199,21 @@ def update_metadata(self, metadata): if not metadata.brokers: log.warning("No broker metadata found in MetadataResponse") + _new_brokers = {} for broker in metadata.brokers: if metadata.API_VERSION == 0: node_id, host, port = broker rack = None else: node_id, host, port, rack = broker - self._brokers.update({ + _new_brokers.update({ node_id: BrokerMetadata(node_id, host, port, rack) }) if metadata.API_VERSION == 0: - self.controller = None + _new_controller = None else: - self.controller = self._brokers.get(metadata.controller_id) + _new_controller = _new_brokers.get(metadata.controller_id) _new_partitions = {} _new_broker_partitions = collections.defaultdict(set) @@ -253,6 +254,8 @@ def update_metadata(self, metadata): topic, error_type) with self._lock: + self._brokers = _new_brokers + self.controller = _new_controller self._partitions = _new_partitions self._broker_partitions = _new_broker_partitions self.unauthorized_topics = _new_unauthorized_topics From d1102afc2e031158f284d1f708a3bc05ef3eb712 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 3 Mar 2017 15:03:37 -0800 Subject: [PATCH 110/291] Dont refresh metadata on failed group coordinator request unless needed (#1006) --- kafka/coordinator/base.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index 48449a243..9f94356dc 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -213,12 +213,10 @@ def ensure_coordinator_known(self): self._client.poll(future=future) if future.failed(): - if isinstance(future.exception, - Errors.GroupCoordinatorNotAvailableError): - continue - elif future.retriable(): - metadata_update = self._client.cluster.request_update() - self._client.poll(future=metadata_update) + if future.retriable(): + if getattr(future.exception, 'invalid_metadata', False): + metadata_update = self._client.cluster.request_update() + self._client.poll(future=metadata_update) else: raise future.exception # pylint: disable-msg=raising-bad-type From 065114715abf13101ca70b5228ac98c06a11445d Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 3 Mar 2017 16:44:02 -0800 Subject: [PATCH 111/291] Catch socket errors during ssl handshake (#1007) --- kafka/conn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/conn.py b/kafka/conn.py index 95b67e8b8..7af2a49f2 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -377,7 +377,7 @@ def _try_handshake(self): # old ssl in python2.6 will swallow all SSLErrors here... except (SSLWantReadError, SSLWantWriteError): pass - except SSLZeroReturnError: + except (SSLZeroReturnError, ConnectionError): log.warning('SSL connection closed by server during handshake.') self.close(Errors.ConnectionError('SSL connection closed by server during handshake')) # Other SSLErrors will be raised to user From 09b1e8f7d076a2c6920cda411b8878030394b6f3 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 3 Mar 2017 16:53:01 -0800 Subject: [PATCH 112/291] Mark last_attempt time during connection close to fix blackout calculation (#1008) --- kafka/conn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kafka/conn.py b/kafka/conn.py index 7af2a49f2..eb577d9ea 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -500,6 +500,7 @@ def close(self, error=None): self._sock.close() self._sock = None self.state = ConnectionStates.DISCONNECTED + self.last_attempt = time.time() self._sasl_auth_future = None self._receiving = False self._next_payload_bytes = 0 From 8d6354e227734106dfb5e52d5bb4b6958ac50bde Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 3 Mar 2017 16:53:13 -0800 Subject: [PATCH 113/291] Include the node id in BrokerConnection __repr__ (#1009) --- kafka/conn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kafka/conn.py b/kafka/conn.py index eb577d9ea..d1a7dcaf6 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -864,8 +864,8 @@ def connect(): return version def __repr__(self): - return "" % (self.hostname, self.host, - self.port) + return "" % ( + self.config['node_id'], self.hostname, self.host, self.port) class BrokerConnectionMetrics(object): From 22d6ea68102a34548a77c3a3389986d5ce558495 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 3 Mar 2017 17:51:48 -0800 Subject: [PATCH 114/291] When closing a broker connection without error, fail in-flight-requests with Cancelled (#1010) --- kafka/conn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/conn.py b/kafka/conn.py index d1a7dcaf6..462474098 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -507,7 +507,7 @@ def close(self, error=None): self._rbuffer.seek(0) self._rbuffer.truncate() if error is None: - error = Errors.ConnectionError(str(self)) + error = Errors.Cancelled(str(self)) while self.in_flight_requests: ifr = self.in_flight_requests.popleft() ifr.future.failure(error) From 2f243092b378988b0b6cf808cc3720c973f6a793 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 3 Mar 2017 17:00:35 -0800 Subject: [PATCH 115/291] Small style fixes in kafka.errors --- kafka/errors.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kafka/errors.py b/kafka/errors.py index 069c9e4b2..528b6b678 100644 --- a/kafka/errors.py +++ b/kafka/errors.py @@ -270,7 +270,8 @@ class InconsistentGroupProtocolError(BrokerResponseError): errno = 23 message = 'INCONSISTENT_GROUP_PROTOCOL' description = ('Returned in join group when the member provides a protocol' - ' type or set of protocols which is not compatible with the current group.') + ' type or set of protocols which is not compatible with the' + ' current group.') class InvalidGroupIdError(BrokerResponseError): @@ -333,19 +334,19 @@ class ClusterAuthorizationFailedError(BrokerResponseError): class InvalidTimestampError(BrokerResponseError): errno = 32 message = 'INVALID_TIMESTAMP' - description = ('The timestamp of the message is out of acceptable range.') + description = 'The timestamp of the message is out of acceptable range.' class UnsupportedSaslMechanismError(BrokerResponseError): errno = 33 message = 'UNSUPPORTED_SASL_MECHANISM' - description = ('The broker does not support the requested SASL mechanism.') + description = 'The broker does not support the requested SASL mechanism.' class IllegalSaslStateError(BrokerResponseError): errno = 34 message = 'ILLEGAL_SASL_STATE' - description = ('Request is not valid given the current SASL state.') + description = 'Request is not valid given the current SASL state.' class KafkaUnavailableError(KafkaError): From 4bc0039bef61a8d4f07dd1e0512d51b3d71c651a Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 3 Mar 2017 17:27:54 -0800 Subject: [PATCH 116/291] Add new broker response errors --- kafka/errors.py | 65 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/kafka/errors.py b/kafka/errors.py index 528b6b678..dd5322a29 100644 --- a/kafka/errors.py +++ b/kafka/errors.py @@ -349,6 +349,71 @@ class IllegalSaslStateError(BrokerResponseError): description = 'Request is not valid given the current SASL state.' +class UnsupportedVersionError(BrokerResponseError): + errno = 35 + message = 'UNSUPPORTED_VERSION' + description = 'The version of API is not supported.' + + +class TopicAlreadyExistsError(BrokerResponseError): + errno = 36 + message = 'TOPIC_ALREADY_EXISTS' + description = 'Topic with this name already exists.' + + +class InvalidPartitionsError(BrokerResponseError): + errno = 37 + message = 'INVALID_PARTITIONS' + description = 'Number of partitions is invalid.' + + +class InvalidReplicationFactorError(BrokerResponseError): + errno = 38 + message = 'INVALID_REPLICATION_FACTOR' + description = 'Replication-factor is invalid.' + + +class InvalidReplicationAssignmentError(BrokerResponseError): + errno = 39 + message = 'INVALID_REPLICATION_ASSIGNMENT' + description = 'Replication assignment is invalid.' + + +class InvalidConfigurationError(BrokerResponseError): + errno = 40 + message = 'INVALID_CONFIG' + description = 'Configuration is invalid.' + + +class NotControllerError(BrokerResponseError): + errno = 41 + message = 'NOT_CONTROLLER' + description = 'This is not the correct controller for this cluster.' + retriable = True + + +class InvalidRequestError(BrokerResponseError): + errno = 42 + message = 'INVALID_REQUEST' + description = ('This most likely occurs because of a request being' + ' malformed by the client library or the message was' + ' sent to an incompatible broker. See the broker logs' + ' for more details.') + + +class UnsupportedForMessageFormatError(BrokerResponseError): + errno = 43 + message = 'UNSUPPORTED_FOR_MESSAGE_FORMAT' + description = ('The message format version on the broker does not' + ' support this request.') + + +class PolicyViolationError(BrokerResponseError): + errno = 44 + message = 'POLICY_VIOLATION' + description = 'Request parameters do not satisfy the configured policy.' + + class KafkaUnavailableError(KafkaError): pass From 1ad30ff6900c7e4ab267f573523a561d5eb80830 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 4 Mar 2017 10:53:00 -0800 Subject: [PATCH 117/291] CreateTopicsRequest / Response v1 (#1012) --- kafka/protocol/admin.py | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/kafka/protocol/admin.py b/kafka/protocol/admin.py index 99ec1770e..89ea73981 100644 --- a/kafka/protocol/admin.py +++ b/kafka/protocol/admin.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from .struct import Struct -from .types import Array, Bytes, Int16, Int32, Schema, String +from .types import Array, Boolean, Bytes, Int16, Int32, Schema, String class ApiVersionResponse_v0(Struct): @@ -37,6 +37,17 @@ class CreateTopicsResponse_v0(Struct): ) +class CreateTopicsResponse_v1(Struct): + API_KEY = 19 + API_VERSION = 1 + SCHEMA = Schema( + ('topic_error_codes', Array( + ('topic', String('utf-8')), + ('error_code', Int16), + ('error_message', String('utf-8')))) + ) + + class CreateTopicsRequest_v0(Struct): API_KEY = 19 API_VERSION = 0 @@ -56,8 +67,28 @@ class CreateTopicsRequest_v0(Struct): ) -CreateTopicsRequest = [CreateTopicsRequest_v0] -CreateTopicsResponse = [CreateTopicsResponse_v0] +class CreateTopicsRequest_v1(Struct): + API_KEY = 19 + API_VERSION = 1 + RESPONSE_TYPE = CreateTopicsResponse_v1 + SCHEMA = Schema( + ('create_topic_requests', Array( + ('topic', String('utf-8')), + ('num_partitions', Int32), + ('replication_factor', Int16), + ('replica_assignment', Array( + ('partition_id', Int32), + ('replicas', Array(Int32)))), + ('configs', Array( + ('config_key', String('utf-8')), + ('config_value', String('utf-8')))))), + ('timeout', Int32), + ('validate_only', Boolean) + ) + + +CreateTopicsRequest = [CreateTopicsRequest_v0, CreateTopicsRequest_v1] +CreateTopicsResponse = [CreateTopicsResponse_v0, CreateTopicsRequest_v1] class DeleteTopicsResponse_v0(Struct): From 753c381baae3afe8b0f0ec7ddc14a43b596c391c Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 6 Mar 2017 11:01:07 -0800 Subject: [PATCH 118/291] Do not need str(self) when formatting to %s --- kafka/conn.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/kafka/conn.py b/kafka/conn.py index 462474098..dcecabf4e 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -215,7 +215,7 @@ def __init__(self, host, port, afi, **configs): def connect(self): """Attempt to connect and return ConnectionState""" if self.state is ConnectionStates.DISCONNECTED: - log.debug('%s: creating new socket', str(self)) + log.debug('%s: creating new socket', self) # if self.afi is set to AF_UNSPEC, then we need to do a name # resolution and try all available address families if self.afi == socket.AF_UNSPEC: @@ -284,9 +284,9 @@ def connect(self): # Connection succeeded if not ret or ret == errno.EISCONN: - log.debug('%s: established TCP connection', str(self)) + log.debug('%s: established TCP connection', self) if self.config['security_protocol'] in ('SSL', 'SASL_SSL'): - log.debug('%s: initiating SSL handshake', str(self)) + log.debug('%s: initiating SSL handshake', self) self.state = ConnectionStates.HANDSHAKE elif self.config['security_protocol'] == 'SASL_PLAINTEXT': self.state = ConnectionStates.AUTHENTICATING @@ -312,7 +312,7 @@ def connect(self): if self.state is ConnectionStates.HANDSHAKE: if self._try_handshake(): - log.debug('%s: completed SSL handshake.', str(self)) + log.debug('%s: completed SSL handshake.', self) if self.config['security_protocol'] == 'SASL_SSL': self.state = ConnectionStates.AUTHENTICATING else: @@ -322,7 +322,7 @@ def connect(self): if self.state is ConnectionStates.AUTHENTICATING: assert self.config['security_protocol'] in ('SASL_PLAINTEXT', 'SASL_SSL') if self._try_authenticate(): - log.info('%s: Authenticated as %s', str(self), self.config['sasl_plain_username']) + log.info('%s: Authenticated as %s', self, self.config['sasl_plain_username']) self.state = ConnectionStates.CONNECTED self.config['state_change_callback'](self) @@ -331,7 +331,7 @@ def connect(self): def _wrap_ssl(self): assert self.config['security_protocol'] in ('SSL', 'SASL_SSL') if self._ssl_context is None: - log.debug('%s: configuring default SSL Context', str(self)) + log.debug('%s: configuring default SSL Context', self) self._ssl_context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) # pylint: disable=no-member self._ssl_context.options |= ssl.OP_NO_SSLv2 # pylint: disable=no-member self._ssl_context.options |= ssl.OP_NO_SSLv3 # pylint: disable=no-member @@ -339,12 +339,12 @@ def _wrap_ssl(self): if self.config['ssl_check_hostname']: self._ssl_context.check_hostname = True if self.config['ssl_cafile']: - log.info('%s: Loading SSL CA from %s', str(self), self.config['ssl_cafile']) + log.info('%s: Loading SSL CA from %s', self, self.config['ssl_cafile']) self._ssl_context.load_verify_locations(self.config['ssl_cafile']) self._ssl_context.verify_mode = ssl.CERT_REQUIRED if self.config['ssl_certfile'] and self.config['ssl_keyfile']: - log.info('%s: Loading SSL Cert from %s', str(self), self.config['ssl_certfile']) - log.info('%s: Loading SSL Key from %s', str(self), self.config['ssl_keyfile']) + log.info('%s: Loading SSL Cert from %s', self, self.config['ssl_certfile']) + log.info('%s: Loading SSL Key from %s', self, self.config['ssl_keyfile']) self._ssl_context.load_cert_chain( certfile=self.config['ssl_certfile'], keyfile=self.config['ssl_keyfile'], @@ -355,18 +355,18 @@ def _wrap_ssl(self): log.error('%s: %s Disconnecting.', self, error) self.close(Errors.ConnectionError(error)) return - log.info('%s: Loading SSL CRL from %s', str(self), self.config['ssl_crlfile']) + log.info('%s: Loading SSL CRL from %s', self, self.config['ssl_crlfile']) self._ssl_context.load_verify_locations(self.config['ssl_crlfile']) # pylint: disable=no-member self._ssl_context.verify_flags |= ssl.VERIFY_CRL_CHECK_LEAF - log.debug('%s: wrapping socket in ssl context', str(self)) + log.debug('%s: wrapping socket in ssl context', self) try: self._sock = self._ssl_context.wrap_socket( self._sock, server_hostname=self.hostname, do_handshake_on_connect=False) except ssl.SSLError as e: - log.exception('%s: Failed to wrap socket in SSLContext!', str(self)) + log.exception('%s: Failed to wrap socket in SSLContext!', self) self.close(e) def _try_handshake(self): @@ -417,7 +417,7 @@ def _handle_sasl_handshake_response(self, future, response): def _try_authenticate_plain(self, future): if self.config['security_protocol'] == 'SASL_PLAINTEXT': - log.warning('%s: Sending username and password in the clear', str(self)) + log.warning('%s: Sending username and password in the clear', self) data = b'' try: @@ -444,7 +444,7 @@ def _try_authenticate_plain(self, future): self._sock.setblocking(False) except (AssertionError, ConnectionError) as e: log.exception("%s: Error receiving reply from server", self) - error = Errors.ConnectionError("%s: %s" % (str(self), e)) + error = Errors.ConnectionError("%s: %s" % (self, e)) future.failure(error) self.close(error=error) @@ -552,7 +552,7 @@ def _send(self, request, expect_response=True): self._sock.setblocking(False) except (AssertionError, ConnectionError) as e: log.exception("Error sending %s to %s", request, self) - error = Errors.ConnectionError("%s: %s" % (str(self), e)) + error = Errors.ConnectionError("%s: %s" % (self, e)) self.close(error=error) return future.failure(error) log.debug('%s Request %d: %s', self, correlation_id, request) @@ -710,7 +710,7 @@ def _process_response(self, read_buffer): elif ifr.correlation_id != recv_correlation_id: error = Errors.CorrelationIdError( '%s: Correlation IDs do not match: sent %d, recv %d' - % (str(self), ifr.correlation_id, recv_correlation_id)) + % (self, ifr.correlation_id, recv_correlation_id)) ifr.future.failure(error) self.close(error) self._processing = False From 5103e0351e0102ebaae9c3cc41d496a49b3c7215 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 6 Mar 2017 11:01:49 -0800 Subject: [PATCH 119/291] Add more debug-level connection logging --- kafka/conn.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kafka/conn.py b/kafka/conn.py index dcecabf4e..2b24e9fba 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -258,11 +258,13 @@ def connect(self): self._sock = socket.socket(self.afi, socket.SOCK_STREAM) for option in self.config['socket_options']: + log.debug('%s: setting socket option %s', self, option) self._sock.setsockopt(*option) self._sock.setblocking(False) if self.config['security_protocol'] in ('SSL', 'SASL_SSL'): self._wrap_ssl() + log.debug('%s: connecting to %s:%d', self, self.host, self.port) self.state = ConnectionStates.CONNECTING self.last_attempt = time.time() self.config['state_change_callback'](self) @@ -289,8 +291,10 @@ def connect(self): log.debug('%s: initiating SSL handshake', self) self.state = ConnectionStates.HANDSHAKE elif self.config['security_protocol'] == 'SASL_PLAINTEXT': + log.debug('%s: initiating SASL authentication', self) self.state = ConnectionStates.AUTHENTICATING else: + log.debug('%s: Connection complete.', self) self.state = ConnectionStates.CONNECTED self.config['state_change_callback'](self) @@ -314,8 +318,10 @@ def connect(self): if self._try_handshake(): log.debug('%s: completed SSL handshake.', self) if self.config['security_protocol'] == 'SASL_SSL': + log.debug('%s: initiating SASL authentication', self) self.state = ConnectionStates.AUTHENTICATING else: + log.debug('%s: Connection complete.', self) self.state = ConnectionStates.CONNECTED self.config['state_change_callback'](self) @@ -323,6 +329,7 @@ def connect(self): assert self.config['security_protocol'] in ('SASL_PLAINTEXT', 'SASL_SSL') if self._try_authenticate(): log.info('%s: Authenticated as %s', self, self.config['sasl_plain_username']) + log.debug('%s: Connection complete.', self) self.state = ConnectionStates.CONNECTED self.config['state_change_callback'](self) From 224b5be6740153d943818c96370bb3ad2ef75b99 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 6 Mar 2017 11:03:17 -0800 Subject: [PATCH 120/291] Minor additional logging for consumer coordinator --- kafka/coordinator/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index 9f94356dc..923b2c395 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -215,6 +215,7 @@ def ensure_coordinator_known(self): if future.failed(): if future.retriable(): if getattr(future.exception, 'invalid_metadata', False): + log.debug('Requesting metadata for group coordinator request: %s', future.exception) metadata_update = self._client.cluster.request_update() self._client.poll(future=metadata_update) else: @@ -529,6 +530,7 @@ def close(self): if not self.coordinator_unknown() and self.generation > 0: # this is a minimal effort attempt to leave the group. we do not # attempt any resending if the request fails or times out. + log.info('Leaving consumer group (%s).', self.group_id) request = LeaveGroupRequest[0](self.group_id, self.member_id) future = self._client.send(self.coordinator_id, request) future.add_callback(self._handle_leave_group_response) From 79719300e65afb3e7e02039406757fc6d4f93518 Mon Sep 17 00:00:00 2001 From: Taras Voinarovskyi Date: Tue, 7 Mar 2017 00:59:26 +0200 Subject: [PATCH 121/291] Added `max_bytes` option and FetchRequest_v3 usage. (#962) * Added `max_bytes` option and FetchRequest_v3 usage. * Add checks for versions above 0.10 based on ApiVersionResponse --- kafka/client_async.py | 2 ++ kafka/conn.py | 25 ++++++++++++++++- kafka/consumer/fetcher.py | 43 ++++++++++++++++++++++++----- kafka/consumer/group.py | 9 ++++++ test/test_consumer_integration.py | 46 +++++++++++++++++++++++++++++++ test/test_fetcher.py | 3 +- 6 files changed, 119 insertions(+), 9 deletions(-) diff --git a/kafka/client_async.py b/kafka/client_async.py index 5c69905da..d10644e23 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -156,6 +156,8 @@ class KafkaClient(object): 'sasl_plain_password': None, } API_VERSIONS = [ + (0, 10, 1), + (0, 10, 0), (0, 10), (0, 9), (0, 8, 2), diff --git a/kafka/conn.py b/kafka/conn.py index 2b24e9fba..45aa9a18b 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -18,6 +18,7 @@ from kafka.protocol.api import RequestHeader from kafka.protocol.admin import SaslHandShakeRequest from kafka.protocol.commit import GroupCoordinatorResponse +from kafka.protocol.metadata import MetadataRequest from kafka.protocol.types import Int32 from kafka.version import __version__ @@ -756,6 +757,24 @@ def _next_correlation_id(self): self._correlation_id = (self._correlation_id + 1) % 2**31 return self._correlation_id + def _check_version_above_0_10(self, response): + test_cases = [ + # format (, ) + ((0, 10, 1), MetadataRequest[2]) + ] + + error_type = Errors.for_code(response.error_code) + assert error_type is Errors.NoError, "API version check failed" + max_versions = dict([ + (api_key, max_version) + for api_key, _, max_version in response.api_versions + ]) + # Get the best match of test cases + for broker_version, struct in test_cases: + if max_versions.get(struct.API_KEY, -1) >= struct.API_VERSION: + return broker_version + return (0, 10, 0) + def check_version(self, timeout=2, strict=False): """Attempt to guess the broker version. @@ -780,7 +799,6 @@ def check_version(self, timeout=2, strict=False): # socket.error (32, 54, or 104) from .protocol.admin import ApiVersionRequest, ListGroupsRequest from .protocol.commit import OffsetFetchRequest, GroupCoordinatorRequest - from .protocol.metadata import MetadataRequest # Socket errors are logged as exceptions and can alarm users. Mute them from logging import Filter @@ -794,6 +812,7 @@ def filter(self, record): log.addFilter(log_filter) test_cases = [ + # All cases starting from 0.10 will be based on ApiVersionResponse ((0, 10), ApiVersionRequest[0]()), ((0, 9), ListGroupsRequest[0]()), ((0, 8, 2), GroupCoordinatorRequest[0]('kafka-python-default-group')), @@ -834,6 +853,10 @@ def connect(): self._sock.setblocking(False) if f.succeeded(): + if version == (0, 10): + # Starting from 0.10 kafka broker we determine version + # by looking at ApiVersionResponse + version = self._check_version_above_0_10(f.value) log.info('Broker version identifed as %s', '.'.join(map(str, version))) log.info('Set configuration api_version=%s to skip auto' ' check_version requests on startup', version) diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index 802f92efb..ad703a594 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -39,6 +39,7 @@ class Fetcher(six.Iterator): 'value_deserializer': None, 'fetch_min_bytes': 1, 'fetch_max_wait_ms': 500, + 'fetch_max_bytes': 52428800, 'max_partition_fetch_bytes': 1048576, 'max_poll_records': sys.maxsize, 'check_crcs': True, @@ -63,6 +64,15 @@ def __init__(self, client, subscriptions, metrics, **configs): the server will block before answering the fetch request if there isn't sufficient data to immediately satisfy the requirement given by fetch_min_bytes. Default: 500. + fetch_max_bytes (int): The maximum amount of data the server should + return for a fetch request. This is not an absolute maximum, if + the first message in the first non-empty partition of the fetch + is larger than this value, the message will still be returned + to ensure that the consumer can make progress. NOTE: consumer + performs fetches to multiple brokers in parallel so memory + usage will depend on the number of brokers containing + partitions for the topic. + Supported Kafka version >= 0.10.1.0. Default: 52428800 (50 Mb). max_partition_fetch_bytes (int): The maximum amount of data per-partition the server will return. The maximum total memory used for a request = #partitions * max_partition_fetch_bytes. @@ -610,7 +620,7 @@ def _handle_offset_response(self, partition, future, response): log.debug("Fetched offset %d for partition %s", offset, partition) future.success(offset) elif error_type in (Errors.NotLeaderForPartitionError, - Errors.UnknownTopicOrPartitionError): + Errors.UnknownTopicOrPartitionError): log.debug("Attempt to fetch offsets for partition %s failed due" " to obsolete leadership information, retrying.", partition) @@ -657,7 +667,9 @@ def _create_fetch_requests(self): log.debug("Adding fetch request for partition %s at offset %d", partition, position) - if self.config['api_version'] >= (0, 10): + if self.config['api_version'] >= (0, 10, 1): + version = 3 + elif self.config['api_version'] >= (0, 10): version = 2 elif self.config['api_version'] == (0, 9): version = 1 @@ -665,11 +677,28 @@ def _create_fetch_requests(self): version = 0 requests = {} for node_id, partition_data in six.iteritems(fetchable): - requests[node_id] = FetchRequest[version]( - -1, # replica_id - self.config['fetch_max_wait_ms'], - self.config['fetch_min_bytes'], - partition_data.items()) + if version < 3: + requests[node_id] = FetchRequest[version]( + -1, # replica_id + self.config['fetch_max_wait_ms'], + self.config['fetch_min_bytes'], + partition_data.items()) + else: + # As of version == 3 partitions will be returned in order as + # they are requested, so to avoid starvation with + # `fetch_max_bytes` option we need this shuffle + # NOTE: we do have partition_data in random order due to usage + # of unordered structures like dicts, but that does not + # guaranty equal distribution, and starting Python3.6 + # dicts retain insert order. + partition_data = list(partition_data.items()) + random.shuffle(partition_data) + requests[node_id] = FetchRequest[version]( + -1, # replica_id + self.config['fetch_max_wait_ms'], + self.config['fetch_min_bytes'], + self.config['fetch_max_bytes'], + partition_data) return requests def _handle_fetch_response(self, request, send_time, response): diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 96a6bf768..8db403c73 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -65,6 +65,14 @@ class KafkaConsumer(six.Iterator): the server will block before answering the fetch request if there isn't sufficient data to immediately satisfy the requirement given by fetch_min_bytes. Default: 500. + fetch_max_bytes (int): The maximum amount of data the server should + return for a fetch request. This is not an absolute maximum, if the + first message in the first non-empty partition of the fetch is + larger than this value, the message will still be returned to + ensure that the consumer can make progress. NOTE: consumer performs + fetches to multiple brokers in parallel so memory usage will depend + on the number of brokers containing partitions for the topic. + Supported Kafka version >= 0.10.1.0. Default: 52428800 (50 Mb). max_partition_fetch_bytes (int): The maximum amount of data per-partition the server will return. The maximum total memory used for a request = #partitions * max_partition_fetch_bytes. @@ -212,6 +220,7 @@ class KafkaConsumer(six.Iterator): 'value_deserializer': None, 'fetch_max_wait_ms': 500, 'fetch_min_bytes': 1, + 'fetch_max_bytes': 52428800, 'max_partition_fetch_bytes': 1 * 1024 * 1024, 'request_timeout_ms': 40 * 1000, 'retry_backoff_ms': 100, diff --git a/test/test_consumer_integration.py b/test/test_consumer_integration.py index 865a399ae..1b97244da 100644 --- a/test/test_consumer_integration.py +++ b/test/test_consumer_integration.py @@ -2,6 +2,7 @@ import os from six.moves import xrange +import six from . import unittest from kafka import ( @@ -720,3 +721,48 @@ def test_old_kafka_consumer__offset_commit_resume_dual(self): output_msgs2.append(m) self.assert_message_count(output_msgs2, 20) self.assertEqual(len(set(output_msgs1) & set(output_msgs2)), 15) + + @kafka_versions('>=0.10.1') + def test_kafka_consumer_max_bytes_simple(self): + self.send_messages(0, range(100, 200)) + self.send_messages(1, range(200, 300)) + + # Start a consumer + consumer = self.kafka_consumer( + auto_offset_reset='earliest', fetch_max_bytes=300) + fetched_size = 0 + seen_partitions = set([]) + for i in range(10): + poll_res = consumer.poll(timeout_ms=100) + for partition, msgs in six.iteritems(poll_res): + for msg in msgs: + fetched_size += len(msg.value) + seen_partitions.add(partition) + + # Check that we fetched at least 1 message from both partitions + self.assertEqual( + seen_partitions, set([ + TopicPartition(self.topic, 0), TopicPartition(self.topic, 1)])) + self.assertLess(fetched_size, 3000) + + @kafka_versions('>=0.10.1') + def test_kafka_consumer_max_bytes_one_msg(self): + # We send to only 1 partition so we don't have parallel requests to 2 + # nodes for data. + self.send_messages(0, range(100, 200)) + + # Start a consumer. FetchResponse_v3 should always include at least 1 + # full msg, so by setting fetch_max_bytes=1 we must get 1 msg at a time + consumer = self.kafka_consumer( + auto_offset_reset='earliest', fetch_max_bytes=1) + fetched_msgs = [] + # A bit hacky, but we need this in order for message count to be exact + consumer._coordinator.ensure_active_group() + for i in range(10): + poll_res = consumer.poll(timeout_ms=2000) + print(poll_res) + for partition, msgs in six.iteritems(poll_res): + for msg in msgs: + fetched_msgs.append(msg) + + self.assertEqual(len(fetched_msgs), 10) diff --git a/test/test_fetcher.py b/test/test_fetcher.py index 984de8883..dcfba78be 100644 --- a/test/test_fetcher.py +++ b/test/test_fetcher.py @@ -58,7 +58,8 @@ def test_send_fetches(fetcher, mocker): @pytest.mark.parametrize(("api_version", "fetch_version"), [ - ((0, 10), 2), + ((0, 10, 1), 3), + ((0, 10, 0), 2), ((0, 9), 1), ((0, 8), 0) ]) From e5c0169410eab3dda4079e32ed9e4527ab51a8c3 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 6 Mar 2017 15:16:05 -0800 Subject: [PATCH 122/291] Small cleanup for #962 --- kafka/conn.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kafka/conn.py b/kafka/conn.py index 45aa9a18b..8a3cb8cf4 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -757,7 +757,9 @@ def _next_correlation_id(self): self._correlation_id = (self._correlation_id + 1) % 2**31 return self._correlation_id - def _check_version_above_0_10(self, response): + def _check_api_version_response(self, response): + # The logic here is to check the list of supported request versions + # in descending order. As soon as we find one that works, return it test_cases = [ # format (, ) ((0, 10, 1), MetadataRequest[2]) @@ -770,9 +772,12 @@ def _check_version_above_0_10(self, response): for api_key, _, max_version in response.api_versions ]) # Get the best match of test cases - for broker_version, struct in test_cases: + for broker_version, struct in sorted(test_cases, reverse=True): if max_versions.get(struct.API_KEY, -1) >= struct.API_VERSION: return broker_version + + # We know that ApiVersionResponse is only supported in 0.10+ + # so if all else fails, choose that return (0, 10, 0) def check_version(self, timeout=2, strict=False): @@ -853,10 +858,10 @@ def connect(): self._sock.setblocking(False) if f.succeeded(): - if version == (0, 10): + if isinstance(request, ApiVersionRequest[0]): # Starting from 0.10 kafka broker we determine version # by looking at ApiVersionResponse - version = self._check_version_above_0_10(f.value) + version = self._check_api_version_response(f.value) log.info('Broker version identifed as %s', '.'.join(map(str, version))) log.info('Set configuration api_version=%s to skip auto' ' check_version requests on startup', version) From 964efe1333a79bb5f992ad70eb01f77c376f7887 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 6 Mar 2017 15:16:45 -0800 Subject: [PATCH 123/291] Add client info logging re bootstrap; log connection attempts to balance with close --- kafka/client_async.py | 3 +++ kafka/conn.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/kafka/client_async.py b/kafka/client_async.py index d10644e23..8dbae5ded 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -206,6 +206,7 @@ def __init__(self, **configs): self.config['api_version'] = self.check_version(timeout=check_timeout) def _bootstrap(self, hosts): + log.info('Bootstrapping cluster metadata from %s', hosts) # Exponential backoff if bootstrap fails backoff_ms = self.config['reconnect_backoff_ms'] * 2 ** self._bootstrap_fails next_at = self._last_bootstrap + backoff_ms / 1000.0 @@ -241,6 +242,8 @@ def _bootstrap(self, hosts): bootstrap.close() continue self.cluster.update_metadata(future.value) + log.info('Bootstrap succeeded: found %d brokers and %d topics.', + len(self.cluster.brokers()), len(self.cluster.topics())) # A cluster with no topics can return no broker metadata # in that case, we should keep the bootstrap connection diff --git a/kafka/conn.py b/kafka/conn.py index 8a3cb8cf4..ca7899447 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -265,7 +265,7 @@ def connect(self): self._sock.setblocking(False) if self.config['security_protocol'] in ('SSL', 'SASL_SSL'): self._wrap_ssl() - log.debug('%s: connecting to %s:%d', self, self.host, self.port) + log.info('%s: connecting to %s:%d', self, self.host, self.port) self.state = ConnectionStates.CONNECTING self.last_attempt = time.time() self.config['state_change_callback'](self) From aeb46279876ae84d70d9a0cdfcc439ce123bf4c9 Mon Sep 17 00:00:00 2001 From: gaosheng Date: Mon, 20 Feb 2017 23:48:55 +0800 Subject: [PATCH 124/291] change default timeout of KafkaProducer.close() to threading.TIMEOUT_MAX --- kafka/producer/kafka.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index 032656b4b..59a75048d 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -399,8 +399,12 @@ def close(self, timeout=None): log.info('Kafka producer closed') return if timeout is None: - timeout = 999999999 - assert timeout >= 0 + # threading.TIMEOUT_MAX is available in Python3.3+ + timeout = getattr(threading, 'TIMEOUT_MAX', 999999999) + if getattr(threading, 'TIMEOUT_MAX', False): + assert 0 <= timeout <= getattr(threading, 'TIMEOUT_MAX') + else: + assert timeout >= 0 log.info("Closing the Kafka producer with %s secs timeout.", timeout) #first_exception = AtomicReference() # this will keep track of the first encountered exception From 3f75f34c034cedd1881e98d63557f03d8e9ae346 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 6 Mar 2017 16:07:22 -0800 Subject: [PATCH 125/291] Add python3.6 support to pypi metadata --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 95cda28b8..745d57984 100644 --- a/setup.py +++ b/setup.py @@ -57,6 +57,7 @@ def run(cls): "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries :: Python Modules", ] From 748380b606b5dc13ccfd5f6abd3ff74d85457184 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 6 Mar 2017 16:07:43 -0800 Subject: [PATCH 126/291] Update changelog in preparation for release --- CHANGES.md | 75 ++++++++++++++++++++++++++++++++++++++++ docs/changelog.rst | 85 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index b0e01f2dc..435da60cb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,78 @@ +# 1.3.3 (Unreleased) + +Core / Protocol +* Added `max_bytes` option and FetchRequest_v3 usage. (Drizzt1991 962) +* CreateTopicsRequest / Response v1 (dpkp 1012) +* Add MetadataRequest_v2 and MetadataResponse_v2 structures for KIP-78 (Drizzt1991 974) +* KIP-88 / KAFKA-3853: OffsetFetch v2 structs (jeffwidman 971) +* DRY-up the MetadataRequest_v1 struct (jeffwidman 966) +* Add JoinGroup v1 structs (jeffwidman 965) +* DRY-up the OffsetCommitResponse Structs (jeffwidman 970) +* DRY-up the OffsetFetch structs (jeffwidman 964) +* time --> timestamp to match Java API (jeffwidman 969) +* Add support for offsetRequestV1 messages (jlafaye 951) +* Add FetchRequest/Response_v3 structs (jeffwidman 943) +* Add CreateTopics / DeleteTopics Structs (jeffwidman 944) + +Test Infrastructure +* Add python3.6 to travis test suite, drop python3.3 (exponea 992) +* Update to 0.10.1.1 for integration testing (dpkp 953) +* Update vendored berkerpeksag/selectors34 to ff61b82 (Mephius 979) +* Remove dead code (jeffwidman 967) +* Update pytest fixtures to new yield syntax (jeffwidman 919) + +Consumer +* Dont refresh metadata on failed group coordinator request unless needed (dpkp 1006) +* Fail-fast on timeout constraint violations during KafkaConsumer creation (harelba 986) +* Default max_poll_records to Java default of 500 (jeffwidman 947) + +Producer +* change default timeout of KafkaProducer.close() to threading.TIMEOUT_MAX on py3 (mmyjona 991) +* Issue 985: Clear memory wait condition before raising Exception (dpkp 999) + +Client +* When closing a broker connection without error, fail in-flight-requests with Cancelled (dpkp 1010) +* Mark last_attempt time during connection close to fix blackout calculation (dpkp 1008) +* Catch socket errors during ssl handshake (dpkp 1007) +* Drop old brokers when rebuilding broker metadata (dpkp 1005) +* Drop bad disconnect test -- just use the mocked-socket test (dpkp 982) +* Add support for Python built without ssl (minagawa-sho 954) +* Do not re-close a disconnected connection (dpkp) +* Drop unused last_failure time from BrokerConnection (dpkp) +* Use connection state functions where possible (dpkp) +* Pass error to BrokerConnection.close() (dpkp) + +Bugfixes +* Fix sasl reconnect bug: auth future must be reset on close (dpkp 1003) +* Fix raise exception from SubscriptionState.assign_from_subscribed (qntln 960) + +Logging / Error Messages +* Add client info logging re bootstrap; log connection attempts to balance with close (dpkp) +* Minor additional logging for consumer coordinator (dpkp) +* Add more debug-level connection logging (dpkp) +* Do not need str(self) when formatting to %s (dpkp) +* Add new broker response errors (dpkp) +* Small style fixes in kafka.errors (dpkp) +* Include the node id in BrokerConnection logging (dpkp 1009) +* Replace %s with %r in producer debug log message (chekunkov 973) + +Documentation +* Add sphinx formatting to hyperlink methods (jeffwidman 898) +* Fix BrokerConnection api_version docs default (jeffwidman 909) +* PEP-8: Spacing & removed unused imports (jeffwidman 899) +* Move BrokerConnection docstring to class (jeffwidman 968) +* Move docstring so it shows up in Sphinx/RTD (jeffwidman 952) +* Remove non-pip install instructions (jeffwidman 940) +* Spelling and grammar changes (melissacrawford396 923) +* Fix typo: coorelation --> correlation (jeffwidman 929) +* Make SSL warning list the correct Python versions (jeffwidman 924) + +Legacy Client +* Add send_list_offset_request for searching offset by timestamp (charsyam 1001) +* Use select to poll sockets for read to reduce CPU usage (jianbin-wei 958) +* Use select.select without instance bounding (adamwen829 949) + + # 1.3.2 (Dec 28, 2016) Core diff --git a/docs/changelog.rst b/docs/changelog.rst index 9d265388c..1bebd181e 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,91 @@ Changelog ========= +1.3.3 (Unreleased) +#################### + +Core / Protocol +--------------- +* Added `max_bytes` option and FetchRequest_v3 usage. (Drizzt1991 962) +* CreateTopicsRequest / Response v1 (dpkp 1012) +* Add MetadataRequest_v2 and MetadataResponse_v2 structures for KIP-78 (Drizzt1991 974) +* KIP-88 / KAFKA-3853: OffsetFetch v2 structs (jeffwidman 971) +* DRY-up the MetadataRequest_v1 struct (jeffwidman 966) +* Add JoinGroup v1 structs (jeffwidman 965) +* DRY-up the OffsetCommitResponse Structs (jeffwidman 970) +* DRY-up the OffsetFetch structs (jeffwidman 964) +* time --> timestamp to match Java API (jeffwidman 969) +* Add support for offsetRequestV1 messages (jlafaye 951) +* Add FetchRequest/Response_v3 structs (jeffwidman 943) +* Add CreateTopics / DeleteTopics Structs (jeffwidman 944) + +Test Infrastructure +------------------- +* Add python3.6 to travis test suite, drop python3.3 (exponea 992) +* Update to 0.10.1.1 for integration testing (dpkp 953) +* Update vendored berkerpeksag/selectors34 to ff61b82 (Mephius 979) +* Remove dead code (jeffwidman 967) +* Update pytest fixtures to new yield syntax (jeffwidman 919) + +Consumer +-------- +* Dont refresh metadata on failed group coordinator request unless needed (dpkp 1006) +* Fail-fast on timeout constraint violations during KafkaConsumer creation (harelba 986) +* Default max_poll_records to Java default of 500 (jeffwidman 947) + +Producer +-------- +* change default timeout of KafkaProducer.close() to threading.TIMEOUT_MAX on py3 (mmyjona 991) +* Issue 985: Clear memory wait condition before raising Exception (dpkp 999) + +Client +------ +* When closing a broker connection without error, fail in-flight-requests with Cancelled (dpkp 1010) +* Mark last_attempt time during connection close to fix blackout calculation (dpkp 1008) +* Catch socket errors during ssl handshake (dpkp 1007) +* Drop old brokers when rebuilding broker metadata (dpkp 1005) +* Drop bad disconnect test -- just use the mocked-socket test (dpkp 982) +* Add support for Python built without ssl (minagawa-sho 954) +* Do not re-close a disconnected connection (dpkp) +* Drop unused last_failure time from BrokerConnection (dpkp) +* Use connection state functions where possible (dpkp) +* Pass error to BrokerConnection.close() (dpkp) + +Bugfixes +-------- +* Fix sasl reconnect bug: auth future must be reset on close (dpkp 1003) +* Fix raise exception from SubscriptionState.assign_from_subscribed (qntln 960) + +Logging / Error Messages +------------------------ +* Add client info logging re bootstrap; log connection attempts to balance with close (dpkp) +* Minor additional logging for consumer coordinator (dpkp) +* Add more debug-level connection logging (dpkp) +* Do not need str(self) when formatting to %s (dpkp) +* Add new broker response errors (dpkp) +* Small style fixes in kafka.errors (dpkp) +* Include the node id in BrokerConnection logging (dpkp 1009) +* Replace %s with %r in producer debug log message (chekunkov 973) + +Documentation +------------- +* Add sphinx formatting to hyperlink methods (jeffwidman 898) +* Fix BrokerConnection api_version docs default (jeffwidman 909) +* PEP-8: Spacing & removed unused imports (jeffwidman 899) +* Move BrokerConnection docstring to class (jeffwidman 968) +* Move docstring so it shows up in Sphinx/RTD (jeffwidman 952) +* Remove non-pip install instructions (jeffwidman 940) +* Spelling and grammar changes (melissacrawford396 923) +* Fix typo: coorelation --> correlation (jeffwidman 929) +* Make SSL warning list the correct Python versions (jeffwidman 924) + +Legacy Client +------------- +* Add send_list_offset_request for searching offset by timestamp (charsyam 1001) +* Use select to poll sockets for read to reduce CPU usage (jianbin-wei 958) +* Use select.select without instance bounding (adamwen829 949) + + 1.3.2 (Dec 28, 2016) #################### From 74d059dfa19e0385b4f2ba042f0d8786304ca1f0 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 6 Mar 2017 16:08:33 -0800 Subject: [PATCH 127/291] Tweak README docs to show use of consumer group (no longer default); clarify producer.flush --- README.rst | 17 +++++++++++++---- docs/index.rst | 17 +++++++++++++---- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index d8367a4e4..e5e37ca75 100644 --- a/README.rst +++ b/README.rst @@ -52,6 +52,12 @@ that expose basic message attributes: topic, partition, offset, key, and value: >>> for msg in consumer: ... print (msg) +>>> # join a consumer group for dynamic partition assignment and offset commits +>>> from kafka import KafkaConsumer +>>> consumer = KafkaConsumer('my_favorite_topic', group_id='my_favorite_group') +>>> for msg in consumer: +... print (msg) + >>> # manually assign the partition list for the consumer >>> from kafka import TopicPartition >>> consumer = KafkaConsumer(bootstrap_servers='localhost:1234') @@ -78,11 +84,14 @@ for more details. >>> for _ in range(100): ... producer.send('foobar', b'some_message_bytes') ->>> # Block until all pending messages are sent ->>> producer.flush() - >>> # Block until a single message is sent (or timeout) ->>> producer.send('foobar', b'another_message').get(timeout=60) +>>> future = producer.send('foobar', b'another_message') +>>> result = future.get(timeout=60) + +>>> # Block until all pending messages are at least put on the network +>>> # NOTE: This does not guarantee delivery or success! It is really +>>> # only useful if you configure internal batching using linger_ms +>>> producer.flush() >>> # Use a key for hashed-partitioning >>> producer.send('foobar', key=b'foo', value=b'bar') diff --git a/docs/index.rst b/docs/index.rst index 5e74d02a6..2cef7fe06 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -51,6 +51,12 @@ that expose basic message attributes: topic, partition, offset, key, and value: >>> for msg in consumer: ... print (msg) +>>> # join a consumer group for dynamic partition assignment and offset commits +>>> from kafka import KafkaConsumer +>>> consumer = KafkaConsumer('my_favorite_topic', group_id='my_favorite_group') +>>> for msg in consumer: +... print (msg) + >>> # manually assign the partition list for the consumer >>> from kafka import TopicPartition >>> consumer = KafkaConsumer(bootstrap_servers='localhost:1234') @@ -76,11 +82,14 @@ client. See `KafkaProducer `_ for more details. >>> for _ in range(100): ... producer.send('foobar', b'some_message_bytes') ->>> # Block until all pending messages are sent ->>> producer.flush() - >>> # Block until a single message is sent (or timeout) ->>> producer.send('foobar', b'another_message').get(timeout=60) +>>> future = producer.send('foobar', b'another_message') +>>> result = future.get(timeout=60) + +>>> # Block until all pending messages are at least put on the network +>>> # NOTE: This does not guarantee delivery or success! It is really +>>> # only useful if you configure internal batching using linger_ms +>>> producer.flush() >>> # Use a key for hashed-partitioning >>> producer.send('foobar', key=b'foo', value=b'bar') From 6099a5ac09726a2a4672c195e0793dff7e99d5ae Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 6 Mar 2017 16:09:22 -0800 Subject: [PATCH 128/291] Disable default consumer group (#1016) --- kafka/consumer/group.py | 4 ++-- test/test_consumer_group.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 8db403c73..a78b858b4 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -53,7 +53,7 @@ class KafkaConsumer(six.Iterator): partition assignment (if enabled), and to use for fetching and committing offsets. If None, auto-partition assignment (via group coordinator) and offset commits are disabled. - Default: 'kafka-python-default-group' + Default: None key_deserializer (callable): Any callable that takes a raw message key and returns a deserialized key. value_deserializer (callable): Any callable that takes a @@ -215,7 +215,7 @@ class KafkaConsumer(six.Iterator): DEFAULT_CONFIG = { 'bootstrap_servers': 'localhost', 'client_id': 'kafka-python-' + __version__, - 'group_id': 'kafka-python-default-group', + 'group_id': None, 'key_deserializer': None, 'value_deserializer': None, 'fetch_max_wait_ms': 500, diff --git a/test/test_consumer_group.py b/test/test_consumer_group.py index 9d9be60e3..885ae832c 100644 --- a/test/test_consumer_group.py +++ b/test/test_consumer_group.py @@ -54,12 +54,14 @@ def test_group(kafka_broker, topic): stop = {} threads = {} messages = collections.defaultdict(list) + group_id = 'test-group-' + random_string(6) def consumer_thread(i): assert i not in consumers assert i not in stop stop[i] = threading.Event() consumers[i] = KafkaConsumer(topic, bootstrap_servers=connect_str, + group_id=group_id, heartbeat_interval_ms=500) while not stop[i].is_set(): for tp, records in six.itervalues(consumers[i].poll(100)): From 1025f96f87e61d7cd56d401f77367f2d7d0d0f9a Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 6 Mar 2017 16:15:23 -0800 Subject: [PATCH 129/291] Recategorize some bugfix changes in docs; add PR 1016 --- CHANGES.md | 5 +++-- docs/changelog.rst | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 435da60cb..da6d4026d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -22,17 +22,16 @@ Test Infrastructure * Update pytest fixtures to new yield syntax (jeffwidman 919) Consumer +* No longer configure a default consumer group (dpkp 1016) * Dont refresh metadata on failed group coordinator request unless needed (dpkp 1006) * Fail-fast on timeout constraint violations during KafkaConsumer creation (harelba 986) * Default max_poll_records to Java default of 500 (jeffwidman 947) Producer * change default timeout of KafkaProducer.close() to threading.TIMEOUT_MAX on py3 (mmyjona 991) -* Issue 985: Clear memory wait condition before raising Exception (dpkp 999) Client * When closing a broker connection without error, fail in-flight-requests with Cancelled (dpkp 1010) -* Mark last_attempt time during connection close to fix blackout calculation (dpkp 1008) * Catch socket errors during ssl handshake (dpkp 1007) * Drop old brokers when rebuilding broker metadata (dpkp 1005) * Drop bad disconnect test -- just use the mocked-socket test (dpkp 982) @@ -45,6 +44,8 @@ Client Bugfixes * Fix sasl reconnect bug: auth future must be reset on close (dpkp 1003) * Fix raise exception from SubscriptionState.assign_from_subscribed (qntln 960) +* Fix blackout calculation: mark last_attempt time during connection close (dpkp 1008) +* Fix buffer pool reallocation after raising timeout (dpkp 999) Logging / Error Messages * Add client info logging re bootstrap; log connection attempts to balance with close (dpkp) diff --git a/docs/changelog.rst b/docs/changelog.rst index 1bebd181e..ec377a328 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -29,6 +29,7 @@ Test Infrastructure Consumer -------- +* No longer configure a default consumer group (dpkp 1016) * Dont refresh metadata on failed group coordinator request unless needed (dpkp 1006) * Fail-fast on timeout constraint violations during KafkaConsumer creation (harelba 986) * Default max_poll_records to Java default of 500 (jeffwidman 947) @@ -36,12 +37,10 @@ Consumer Producer -------- * change default timeout of KafkaProducer.close() to threading.TIMEOUT_MAX on py3 (mmyjona 991) -* Issue 985: Clear memory wait condition before raising Exception (dpkp 999) Client ------ * When closing a broker connection without error, fail in-flight-requests with Cancelled (dpkp 1010) -* Mark last_attempt time during connection close to fix blackout calculation (dpkp 1008) * Catch socket errors during ssl handshake (dpkp 1007) * Drop old brokers when rebuilding broker metadata (dpkp 1005) * Drop bad disconnect test -- just use the mocked-socket test (dpkp 982) @@ -55,6 +54,8 @@ Bugfixes -------- * Fix sasl reconnect bug: auth future must be reset on close (dpkp 1003) * Fix raise exception from SubscriptionState.assign_from_subscribed (qntln 960) +* Fix blackout calculation: mark last_attempt time during connection close (dpkp 1008) +* Fix buffer pool reallocation after raising timeout (dpkp 999) Logging / Error Messages ------------------------ From d99afad0a230d534e084fe72041e05aadea24ca7 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Tue, 7 Mar 2017 11:14:49 -0800 Subject: [PATCH 130/291] Fixup comment reference to _maybe_connect --- kafka/client_async.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/client_async.py b/kafka/client_async.py index 8dbae5ded..62757d0c6 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -732,7 +732,7 @@ def refresh_done(val_or_error): elif self._can_connect(node_id): log.debug("Initializing connection to node %s for metadata request", node_id) self._maybe_connect(node_id) - # If initiateConnect failed immediately, this node will be put into blackout and we + # If _maybe_connect failed immediately, this node will be put into blackout and we # should allow immediately retrying in case there is another candidate node. If it # is still connecting, the worst case is that we end up setting a longer timeout # on the next round and then wait for the response. From 62b294224b3efe5ae3e8085d20cc346fa34a8b9e Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Tue, 7 Mar 2017 11:32:36 -0800 Subject: [PATCH 131/291] For 0.8.2, only attempt connection to coordinator if least_loaded_node succeeds --- kafka/coordinator/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index 923b2c395..b2f52c804 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -206,7 +206,8 @@ def ensure_coordinator_known(self): # it as the "coordinator" if self.config['api_version'] < (0, 8, 2): self.coordinator_id = self._client.least_loaded_node() - self._client.ready(self.coordinator_id) + if self.coordinator_id is not None: + self._client.ready(self.coordinator_id) continue future = self._send_group_coordinator_request() From 4a32205c636a14f2c077f80abb6effc4e7bb73a8 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Tue, 7 Mar 2017 11:33:23 -0800 Subject: [PATCH 132/291] Add ClusterMetadata documentation --- docs/apidoc/ClusterMetadata.rst | 5 +++++ docs/apidoc/kafka.rst | 10 ++++++++++ docs/apidoc/modules.rst | 1 + kafka/client_async.py | 4 ++++ kafka/cluster.py | 14 ++++++++++++++ 5 files changed, 34 insertions(+) create mode 100644 docs/apidoc/ClusterMetadata.rst diff --git a/docs/apidoc/ClusterMetadata.rst b/docs/apidoc/ClusterMetadata.rst new file mode 100644 index 000000000..4b575b376 --- /dev/null +++ b/docs/apidoc/ClusterMetadata.rst @@ -0,0 +1,5 @@ +ClusterMetadata +=========== + +.. autoclass:: kafka.cluster.ClusterMetadata + :members: diff --git a/docs/apidoc/kafka.rst b/docs/apidoc/kafka.rst index eb04c35b9..a29e06345 100644 --- a/docs/apidoc/kafka.rst +++ b/docs/apidoc/kafka.rst @@ -6,6 +6,7 @@ Subpackages .. toctree:: + kafka.cluster kafka.consumer kafka.partitioner kafka.producer @@ -13,6 +14,15 @@ Subpackages Submodules ---------- +kafka.cluster module +-------------------- + +.. automodule:: kafka.cluster + :members: + :undoc-members: + :show-inheritance: + + kafka.client module ------------------- diff --git a/docs/apidoc/modules.rst b/docs/apidoc/modules.rst index c1c3335f0..947788713 100644 --- a/docs/apidoc/modules.rst +++ b/docs/apidoc/modules.rst @@ -7,3 +7,4 @@ kafka-python API KafkaProducer KafkaClient BrokerConnection + ClusterMetadata diff --git a/kafka/client_async.py b/kafka/client_async.py index 62757d0c6..ebbde4f90 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -48,6 +48,10 @@ class KafkaClient(object): This class is not thread-safe! + Attributes: + cluster (:any:`ClusterMetadata`): Local cache of cluster metadata, retrived + via MetadataRequests during :meth:`.poll`. + Keyword Arguments: bootstrap_servers: 'host[:port]' string (or list of 'host[:port]' strings) that the consumer should contact to bootstrap initial diff --git a/kafka/cluster.py b/kafka/cluster.py index 0a5c07fea..d646fdfee 100644 --- a/kafka/cluster.py +++ b/kafka/cluster.py @@ -16,6 +16,20 @@ class ClusterMetadata(object): + """ + A class to manage kafka cluster metadata. + + This class does not perform any IO. It simply updates internal state + given API responses (MetadataResponse, GroupCoordinatorResponse). + + Keyword Arguments: + retry_backoff_ms (int): Milliseconds to backoff when retrying on + errors. Default: 100. + metadata_max_age_ms (int): The period of time in milliseconds after + which we force a refresh of metadata even if we haven't seen any + partition leadership changes to proactively discover any new + brokers or partitions. Default: 300000 + """ DEFAULT_CONFIG = { 'retry_backoff_ms': 100, 'metadata_max_age_ms': 300000, From 19368cfaac2f822aafe175c8ff8ea033b67f42ea Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Tue, 7 Mar 2017 12:25:30 -0800 Subject: [PATCH 133/291] Fixup :meth: sphinx documentation for use in KafkaConsumer.rst etc --- kafka/consumer/group.py | 58 +++++++++++++++++++++++++---------------- kafka/producer/kafka.py | 26 +++++++++--------- 2 files changed, 49 insertions(+), 35 deletions(-) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index a78b858b4..da963a3c3 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -35,7 +35,8 @@ class KafkaConsumer(six.Iterator): Arguments: *topics (str): optional list of topics to subscribe to. If not set, - call :meth:`.subscribe` or :meth:`.assign` before consuming records. + call :meth:`~kafka.KafkaConsumer.subscribe` or + :meth:`~kafka.KafkaConsumer.assign` before consuming records. Keyword Arguments: bootstrap_servers: 'host[:port]' string (or list of 'host[:port]' @@ -127,7 +128,7 @@ class KafkaConsumer(six.Iterator): session_timeout_ms (int): The timeout used to detect failures when using Kafka's group management facilities. Default: 30000 max_poll_records (int): The maximum number of records returned in a - single call to :meth:`.poll`. Default: 500 + single call to :meth:`~kafka.KafkaConsumer.poll`. Default: 500 receive_buffer_bytes (int): The size of the TCP receive buffer (SO_RCVBUF) to use when reading data. Default: None (relies on system defaults). The java client defaults to 32768. @@ -172,6 +173,7 @@ class KafkaConsumer(six.Iterator): api_version (tuple): Specify which Kafka API version to use. If set to None, the client will attempt to infer the broker version by probing various APIs. Different versions enable different functionality. + Examples: (0, 9) enables full group coordination features with automatic partition assignment and rebalancing, @@ -181,6 +183,7 @@ class KafkaConsumer(six.Iterator): partition assignment only, (0, 8, 0) enables basic functionality but requires manual partition assignment and offset management. + For the full list of supported versions, see KafkaClient.API_VERSIONS. Default: None api_version_auto_timeout_ms (int): number of milliseconds to throw a @@ -336,11 +339,13 @@ def assign(self, partitions): partitions (list of TopicPartition): Assignment for this instance. Raises: - IllegalStateError: If consumer has already called :meth:`.subscribe`. + IllegalStateError: If consumer has already called + :meth:`~kafka.KafkaConsumer.subscribe`. Warning: It is not possible to use both manual partition assignment with - :meth:`.assign` and group assignment with :meth:`.subscribe`. + :meth:`~kafka.KafkaConsumer.assign` and group assignment with + :meth:`~kafka.KafkaConsumer.subscribe`. Note: This interface does not support incremental assignment and will @@ -358,12 +363,13 @@ def assign(self, partitions): def assignment(self): """Get the TopicPartitions currently assigned to this consumer. - If partitions were directly assigned using :meth:`.assign`, then this - will simply return the same partitions that were previously assigned. - If topics were subscribed using :meth:`.subscribe`, then this will give - the set of topic partitions currently assigned to the consumer (which - may be None if the assignment hasn't happened yet, or if the partitions - are in the process of being reassigned). + If partitions were directly assigned using + :meth:`~kafka.KafkaConsumer.assign`, then this will simply return the + same partitions that were previously assigned. If topics were + subscribed using :meth:`~kafka.KafkaConsumer.subscribe`, then this will + give the set of topic partitions currently assigned to the consumer + (which may be None if the assignment hasn't happened yet, or if the + partitions are in the process of being reassigned). Returns: set: {TopicPartition, ...} @@ -527,8 +533,8 @@ def poll(self, timeout_ms=0, max_records=None): with any records that are available currently in the buffer, else returns empty. Must not be negative. Default: 0 max_records (int, optional): The maximum number of records returned - in a single call to :meth:`.poll`. Default: Inherit value from - max_poll_records. + in a single call to :meth:`~kafka.KafkaConsumer.poll`. + Default: Inherit value from max_poll_records. Returns: dict: Topic to list of records since the last fetch for the @@ -639,10 +645,12 @@ def highwater(self, partition): def pause(self, *partitions): """Suspend fetching from the requested partitions. - Future calls to :meth:`.poll` will not return any records from these - partitions until they have been resumed using :meth:`.resume`. Note that - this method does not affect partition subscription. In particular, it - does not cause a group rebalance when automatic assignment is used. + Future calls to :meth:`~kafka.KafkaConsumer.poll` will not return any + records from these partitions until they have been resumed using + :meth:`~kafka.KafkaConsumer.resume`. + + Note: This method does not affect partition subscription. In particular, + it does not cause a group rebalance when automatic assignment is used. Arguments: *partitions (TopicPartition): Partitions to pause. @@ -654,7 +662,8 @@ def pause(self, *partitions): self._subscription.pause(partition) def paused(self): - """Get the partitions that were previously paused using :meth:`.pause`. + """Get the partitions that were previously paused using + :meth:`~kafka.KafkaConsumer.pause`. Returns: set: {partition (TopicPartition), ...} @@ -677,10 +686,12 @@ def seek(self, partition, offset): """Manually specify the fetch offset for a TopicPartition. Overrides the fetch offsets that the consumer will use on the next - :meth:`.poll`. If this API is invoked for the same partition more than - once, the latest offset will be used on the next :meth:`.poll`. Note - that you may lose data if this API is arbitrarily used in the middle of - consumption, to reset the fetch offsets. + :meth:`~kafka.KafkaConsumer.poll`. If this API is invoked for the same + partition more than once, the latest offset will be used on the next + :meth:`~kafka.KafkaConsumer.poll`. + + Note: You may lose data if this API is arbitrarily used in the middle of + consumption to reset the fetch offsets. Arguments: partition (TopicPartition): Partition for seek operation @@ -752,7 +763,7 @@ def subscribe(self, topics=(), pattern=None, listener=None): Topic subscriptions are not incremental: this list will replace the current assignment (if there is one). - This method is incompatible with :meth:`.assign`. + This method is incompatible with :meth:`~kafka.KafkaConsumer.assign`. Arguments: topics (list): List of topics for subscription. @@ -781,7 +792,8 @@ def subscribe(self, topics=(), pattern=None, listener=None): through this interface are from topics subscribed in this call. Raises: - IllegalStateError: If called after previously calling :meth:`.assign`. + IllegalStateError: If called after previously calling + :meth:`~kafka.KafkaConsumer.assign`. AssertionError: If neither topics or pattern is provided. TypeError: If listener is not a ConsumerRebalanceListener. """ diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index 59a75048d..8762d8b2e 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -34,9 +34,9 @@ class KafkaProducer(object): thread that is responsible for turning these records into requests and transmitting them to the cluster. - :meth:`.send` is asynchronous. When called it adds the record to a buffer of - pending record sends and immediately returns. This allows the producer to - batch together individual records for efficiency. + :meth:`~kafka.KafkaProducer.send` is asynchronous. When called it adds the + record to a buffer of pending record sends and immediately returns. This + allows the producer to batch together individual records for efficiency. The 'acks' config controls the criteria under which requests are considered complete. The "all" setting will result in blocking on the full commit of @@ -166,11 +166,12 @@ class KafkaProducer(object): will block up to max_block_ms, raising an exception on timeout. In the current implementation, this setting is an approximation. Default: 33554432 (32MB) - max_block_ms (int): Number of milliseconds to block during :meth:`.send` - and :meth:`.partitions_for`. These methods can be blocked either - because the buffer is full or metadata unavailable. Blocking in the - user-supplied serializers or partitioner will not be counted against - this timeout. Default: 60000. + max_block_ms (int): Number of milliseconds to block during + :meth:`~kafka.KafkaProducer.send` and + :meth:`~kafka.KafkaProducer.partitions_for`. These methods can be + blocked either because the buffer is full or metadata unavailable. + Blocking in the user-supplied serializers or partitioner will not be + counted against this timeout. Default: 60000. max_request_size (int): The maximum size of a request. This is also effectively a cap on the maximum record size. Note that the server has its own cap on record size which may be different from this. @@ -535,10 +536,11 @@ def flush(self, timeout=None): Invoking this method makes all buffered records immediately available to send (even if linger_ms is greater than 0) and blocks on the completion of the requests associated with these records. The - post-condition of :meth:`.flush` is that any previously sent record will - have completed (e.g. Future.is_done() == True). A request is considered - completed when either it is successfully acknowledged according to the - 'acks' configuration for the producer, or it results in an error. + post-condition of :meth:`~kafka.KafkaProducer.flush` is that any + previously sent record will have completed + (e.g. Future.is_done() == True). A request is considered completed when + either it is successfully acknowledged according to the 'acks' + configuration for the producer, or it results in an error. Other threads can continue sending messages while one thread is blocked waiting for a flush call to complete; however, no guarantee is made From a475742ee1b65fb0417f3daf6820c4c4a3bfdf40 Mon Sep 17 00:00:00 2001 From: Max Baryshnikov Date: Tue, 7 Feb 2017 22:31:06 +0300 Subject: [PATCH 134/291] Fixed couple of "leaks" when gc is disabled (#979) --- kafka/protocol/legacy.py | 29 +++++++++++++++++------------ kafka/protocol/message.py | 4 ++-- kafka/protocol/struct.py | 6 +++++- kafka/vendor/six.py | 4 +++- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/kafka/protocol/legacy.py b/kafka/protocol/legacy.py index 1776c020c..d1d43cc0b 100644 --- a/kafka/protocol/legacy.py +++ b/kafka/protocol/legacy.py @@ -133,21 +133,26 @@ def encode_produce_request(cls, payloads=(), acks=1, timeout=1000): if acks not in (1, 0, -1): raise ValueError('ProduceRequest acks (%s) must be 1, 0, -1' % acks) + topics = [] + for topic, topic_payloads in group_by_topic_and_partition(payloads).items(): + topic_msgs = [] + for partition, payload in topic_payloads.items(): + partition_msgs = [] + for msg in payload.messages: + m = kafka.protocol.message.Message( + msg.value, key=msg.key, + magic=msg.magic, attributes=msg.attributes + ) + partition_msgs.append((0, m.encode())) + topic_msgs.append((partition, partition_msgs)) + topics.append((topic, topic_msgs)) + + return kafka.protocol.produce.ProduceRequest[0]( required_acks=acks, timeout=timeout, - topics=[( - topic, - [( - partition, - [(0, - kafka.protocol.message.Message( - msg.value, key=msg.key, - magic=msg.magic, attributes=msg.attributes - ).encode()) - for msg in payload.messages]) - for partition, payload in topic_payloads.items()]) - for topic, topic_payloads in group_by_topic_and_partition(payloads).items()]) + topics=topics + ) @classmethod def decode_produce_response(cls, response): diff --git a/kafka/protocol/message.py b/kafka/protocol/message.py index bfad1275d..ec5ee6c1b 100644 --- a/kafka/protocol/message.py +++ b/kafka/protocol/message.py @@ -10,7 +10,7 @@ from .types import ( Int8, Int32, Int64, Bytes, Schema, AbstractType ) -from ..util import crc32 +from ..util import crc32, WeakMethod class Message(Struct): @@ -52,7 +52,7 @@ def __init__(self, value, key=None, magic=0, attributes=0, crc=0, self.attributes = attributes self.key = key self.value = value - self.encode = self._encode_self + self.encode = WeakMethod(self._encode_self) @property def timestamp_type(self): diff --git a/kafka/protocol/struct.py b/kafka/protocol/struct.py index a3d28d76c..4c1afcb0d 100644 --- a/kafka/protocol/struct.py +++ b/kafka/protocol/struct.py @@ -5,6 +5,8 @@ from .abstract import AbstractType from .types import Schema +from ..util import WeakMethod + class Struct(AbstractType): SCHEMA = Schema() @@ -19,7 +21,9 @@ def __init__(self, *args, **kwargs): self.__dict__.update(kwargs) # overloading encode() to support both class and instance - self.encode = self._encode_self + # Without WeakMethod() this creates circular ref, which + # causes instances to "leak" to garbage + self.encode = WeakMethod(self._encode_self) @classmethod def encode(cls, item): # pylint: disable=E0202 diff --git a/kafka/vendor/six.py b/kafka/vendor/six.py index 808e6510e..a949b9539 100644 --- a/kafka/vendor/six.py +++ b/kafka/vendor/six.py @@ -70,7 +70,9 @@ def __len__(self): else: # 64-bit MAXSIZE = int((1 << 63) - 1) - del X + + # Don't del it here, cause with gc disabled this "leaks" to garbage + # del X def _add_doc(func, doc): From 61fce5c02e28868c65a080d4bd586c6af740116f Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Tue, 7 Mar 2017 14:50:20 -0800 Subject: [PATCH 135/291] Update changelog --- CHANGES.md | 4 ++++ docs/changelog.rst | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index da6d4026d..8496f0174 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,7 @@ # 1.3.3 (Unreleased) Core / Protocol +* Fixed couple of "leaks" when gc is disabled (Mephius 979) * Added `max_bytes` option and FetchRequest_v3 usage. (Drizzt1991 962) * CreateTopicsRequest / Response v1 (dpkp 1012) * Add MetadataRequest_v2 and MetadataResponse_v2 structures for KIP-78 (Drizzt1991 974) @@ -26,6 +27,7 @@ Consumer * Dont refresh metadata on failed group coordinator request unless needed (dpkp 1006) * Fail-fast on timeout constraint violations during KafkaConsumer creation (harelba 986) * Default max_poll_records to Java default of 500 (jeffwidman 947) +* For 0.8.2, only attempt connection to coordinator if least_loaded_node succeeds (dpkp) Producer * change default timeout of KafkaProducer.close() to threading.TIMEOUT_MAX on py3 (mmyjona 991) @@ -67,6 +69,8 @@ Documentation * Spelling and grammar changes (melissacrawford396 923) * Fix typo: coorelation --> correlation (jeffwidman 929) * Make SSL warning list the correct Python versions (jeffwidman 924) +* Fixup comment reference to _maybe_connect (dpkp) +* Add ClusterMetadata sphinx documentation (dpkp) Legacy Client * Add send_list_offset_request for searching offset by timestamp (charsyam 1001) diff --git a/docs/changelog.rst b/docs/changelog.rst index ec377a328..29eb9483f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -6,6 +6,7 @@ Changelog Core / Protocol --------------- +* Fixed couple of "leaks" when gc is disabled (Mephius 979) * Added `max_bytes` option and FetchRequest_v3 usage. (Drizzt1991 962) * CreateTopicsRequest / Response v1 (dpkp 1012) * Add MetadataRequest_v2 and MetadataResponse_v2 structures for KIP-78 (Drizzt1991 974) @@ -33,6 +34,7 @@ Consumer * Dont refresh metadata on failed group coordinator request unless needed (dpkp 1006) * Fail-fast on timeout constraint violations during KafkaConsumer creation (harelba 986) * Default max_poll_records to Java default of 500 (jeffwidman 947) +* For 0.8.2, only attempt connection to coordinator if least_loaded_node succeeds (dpkp) Producer -------- @@ -79,6 +81,8 @@ Documentation * Spelling and grammar changes (melissacrawford396 923) * Fix typo: coorelation --> correlation (jeffwidman 929) * Make SSL warning list the correct Python versions (jeffwidman 924) +* Fixup comment reference to _maybe_connect (dpkp) +* Add ClusterMetadata sphinx documentation (dpkp) Legacy Client ------------- From 0838de934e378a28d48d225799a7ac9ca3aad4eb Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Tue, 7 Mar 2017 16:27:06 -0800 Subject: [PATCH 136/291] Fix integration test that requires consumer group --- test/test_consumer_integration.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/test_consumer_integration.py b/test/test_consumer_integration.py index 1b97244da..cd08e5573 100644 --- a/test/test_consumer_integration.py +++ b/test/test_consumer_integration.py @@ -753,8 +753,11 @@ def test_kafka_consumer_max_bytes_one_msg(self): # Start a consumer. FetchResponse_v3 should always include at least 1 # full msg, so by setting fetch_max_bytes=1 we must get 1 msg at a time + group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5) consumer = self.kafka_consumer( - auto_offset_reset='earliest', fetch_max_bytes=1) + group_id=group, + auto_offset_reset='earliest', + fetch_max_bytes=1) fetched_msgs = [] # A bit hacky, but we need this in order for message count to be exact consumer._coordinator.ensure_active_group() From 058f29d4e614843e0f4a6912c13ce1acbac1b3a7 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 8 Mar 2017 22:52:08 -0800 Subject: [PATCH 137/291] A few Sphinx documentation updates (#1019) --- kafka/consumer/group.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index da963a3c3..907b0a728 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -213,7 +213,7 @@ class KafkaConsumer(six.Iterator): Note: Configuration parameters are described in more detail at - https://kafka.apache.org/0100/configuration.html#newconsumerconfigs + https://kafka.apache.org/documentation/#newconsumerconfigs """ DEFAULT_CONFIG = { 'bootstrap_servers': 'localhost', @@ -522,8 +522,8 @@ def poll(self, timeout_ms=0, max_records=None): Records are fetched and returned in batches by topic-partition. On each poll, consumer will try to use the last consumed offset as the starting offset and fetch sequentially. The last consumed offset can be - manually set through seek(partition, offset) or automatically set as - the last committed offset for the subscribed list of partitions. + manually set through :meth:`~kafka.KafkaConsumer.seek` or automatically + set as the last committed offset for the subscribed list of partitions. Incompatible with iterator interface -- use one or the other, not both. From d4fc9822d0ef3fdb7dadb127b0b56cd710800638 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Wed, 8 Mar 2017 22:53:31 -0800 Subject: [PATCH 138/291] Add optional kwarg to ready and is_ready to disable metadata-priority logic (#1017) --- kafka/client_async.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/kafka/client_async.py b/kafka/client_async.py index ebbde4f90..2199abcf3 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -333,17 +333,19 @@ def _maybe_connect(self, node_id): conn.connect() return conn.connected() - def ready(self, node_id): + def ready(self, node_id, metadata_priority=True): """Check whether a node is connected and ok to send more requests. Arguments: node_id (int): the id of the node to check + metadata_priority (bool): Mark node as not-ready if a metadata + refresh is required. Default: True Returns: bool: True if we are ready to send to the given node """ self._maybe_connect(node_id) - return self.is_ready(node_id) + return self.is_ready(node_id, metadata_priority=metadata_priority) def connected(self, node_id): """Return True iff the node_id is connected.""" @@ -414,7 +416,7 @@ def connection_delay(self, node_id): else: return 999999999 - def is_ready(self, node_id): + def is_ready(self, node_id, metadata_priority=True): """Check whether a node is ready to send more requests. In addition to connection-level checks, this method also is used to @@ -422,16 +424,23 @@ def is_ready(self, node_id): Arguments: node_id (int): id of the node to check + metadata_priority (bool): Mark node as not-ready if a metadata + refresh is required. Default: True Returns: bool: True if the node is ready and metadata is not refreshing """ + if not self._can_send_request(node_id): + return False + # if we need to update our metadata now declare all requests unready to # make metadata requests first priority - if not self._metadata_refresh_in_progress and not self.cluster.ttl() == 0: - if self._can_send_request(node_id): - return True - return False + if metadata_priority: + if self._metadata_refresh_in_progress: + return False + if self.cluster.ttl() == 0: + return False + return True def _can_send_request(self, node_id): if node_id not in self._conns: From 43e4ebc2666dc1b095f0b4f37fa5b4618ab77a3b Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Thu, 9 Mar 2017 12:49:05 -0800 Subject: [PATCH 139/291] Avoid unknown coordinator after client poll (#1023) From 13bfb98e3899bb7ff221b3963bdd8925fe0594ac Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Thu, 9 Mar 2017 14:26:09 -0800 Subject: [PATCH 140/291] Short-circuit group coordinator requests when NodeNotReady (#995) --- kafka/coordinator/base.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index b2f52c804..8c160e185 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -284,6 +284,10 @@ def _send_join_group_request(self): e = Errors.GroupCoordinatorNotAvailableError(self.coordinator_id) return Future().failure(e) + elif not self._client.ready(self.coordinator_id, metadata_priority=False): + e = Errors.NodeNotReadyError(self.coordinator_id) + return Future().failure(e) + # send a join group request to the coordinator log.info("(Re-)joining group %s", self.group_id) request = JoinGroupRequest[0]( @@ -414,6 +418,13 @@ def _send_sync_group_request(self, request): if self.coordinator_unknown(): e = Errors.GroupCoordinatorNotAvailableError(self.coordinator_id) return Future().failure(e) + + # We assume that coordinator is ready if we're sending SyncGroup + # as it typically follows a successful JoinGroup + # Also note that if client.ready() enforces a metadata priority policy, + # we can get into an infinite loop if the leader assignment process + # itself requests a metadata update + future = Future() _f = self._client.send(self.coordinator_id, request) _f.add_callback(self._handle_sync_group_response, future, time.time()) @@ -465,6 +476,10 @@ def _send_group_coordinator_request(self): if node_id is None: return Future().failure(Errors.NoBrokersAvailable()) + elif not self._client.ready(node_id, metadata_priority=False): + e = Errors.NodeNotReadyError(node_id) + return Future().failure(e) + log.debug("Sending group coordinator request for group %s to broker %s", self.group_id, node_id) request = GroupCoordinatorRequest[0](self.group_id) @@ -551,6 +566,14 @@ def _handle_leave_group_response(self, response): def _send_heartbeat_request(self): """Send a heartbeat request""" + if self.coordinator_unknown(): + e = Errors.GroupCoordinatorNotAvailableError(self.coordinator_id) + return Future().failure(e) + + elif not self._client.ready(self.coordinator_id, metadata_priority=False): + e = Errors.NodeNotReadyError(self.coordinator_id) + return Future().failure(e) + request = HeartbeatRequest[0](self.group_id, self.generation, self.member_id) log.debug("Heartbeat: %s[%s] %s", request.group, request.generation_id, request.member_id) # pylint: disable-msg=no-member future = Future() From 0b20f43d71008fc25ee729e07b824a3debfd2a72 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Thu, 9 Mar 2017 15:12:27 -0800 Subject: [PATCH 141/291] Fix kwarg handing in kafka.protocol.struct.Struct (#1025) --- kafka/protocol/struct.py | 7 ++++++- test/test_protocol.py | 16 +++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/kafka/protocol/struct.py b/kafka/protocol/struct.py index 4c1afcb0d..3288172cf 100644 --- a/kafka/protocol/struct.py +++ b/kafka/protocol/struct.py @@ -18,7 +18,12 @@ def __init__(self, *args, **kwargs): elif len(args) > 0: raise ValueError('Args must be empty or mirror schema') else: - self.__dict__.update(kwargs) + for name in self.SCHEMA.names: + self.__dict__[name] = kwargs.pop(name, None) + if kwargs: + raise ValueError('Keyword(s) not in schema %s: %s' + % (list(self.SCHEMA.names), + ', '.join(kwargs.keys()))) # overloading encode() to support both class and instance # Without WeakMethod() this creates circular ref, which diff --git a/test/test_protocol.py b/test/test_protocol.py index 1c9f0f989..aa3dd17b6 100644 --- a/test/test_protocol.py +++ b/test/test_protocol.py @@ -7,8 +7,9 @@ from kafka.protocol.api import RequestHeader from kafka.protocol.commit import GroupCoordinatorRequest -from kafka.protocol.fetch import FetchResponse +from kafka.protocol.fetch import FetchRequest, FetchResponse from kafka.protocol.message import Message, MessageSet, PartialMessage +from kafka.protocol.metadata import MetadataRequest from kafka.protocol.types import Int16, Int32, Int64, String @@ -244,3 +245,16 @@ def test_decode_fetch_response_partial(): m1 = partitions[0][3] assert len(m1) == 2 assert m1[1] == (None, None, PartialMessage()) + + +def test_struct_unrecognized_kwargs(): + try: + mr = MetadataRequest[0](topicz='foo') + assert False, 'Structs should not allow unrecognized kwargs' + except ValueError: + pass + + +def test_struct_missing_kwargs(): + fr = FetchRequest[0](max_wait_time=100) + assert fr.min_bytes is None From cb008c969b6f8f719bbf51db2465d44fc130aee5 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 13 Mar 2017 12:42:47 -0700 Subject: [PATCH 142/291] Return copy of consumer subscription set (#1029) --- kafka/consumer/group.py | 2 +- test/test_consumer.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 907b0a728..8ea833f1d 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -818,7 +818,7 @@ def subscription(self): Returns: set: {topic, ...} """ - return self._subscription.subscription + return self._subscription.subscription.copy() def unsubscribe(self): """Unsubscribe from all topics and clear all assigned partitions.""" diff --git a/test/test_consumer.py b/test/test_consumer.py index cc9d074c3..f29f43781 100644 --- a/test/test_consumer.py +++ b/test/test_consumer.py @@ -28,6 +28,14 @@ def test_fetch_max_wait_larger_than_request_timeout_raises(self): with self.assertRaises(KafkaConfigurationError): KafkaConsumer(bootstrap_servers='localhost:9092', fetch_max_wait_ms=41000, request_timeout_ms=40000) + def test_subscription_copy(self): + consumer = KafkaConsumer('foo', api_version=(0, 10)) + sub = consumer.subscription() + assert sub is not consumer.subscription() + assert sub == set(['foo']) + sub.add('fizz') + assert consumer.subscription() == set(['foo']) + class TestMultiProcessConsumer(unittest.TestCase): @unittest.skipIf(sys.platform.startswith('win'), 'test mocking fails on windows') From 9e9fd8efd2a743a20d76adc4d8c89d32376049f7 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 13 Mar 2017 16:39:23 -0700 Subject: [PATCH 143/291] Optionally skip auto-commit during consumer.close (#1031) --- kafka/consumer/group.py | 4 ++-- kafka/coordinator/consumer.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 8ea833f1d..0715f06b5 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -376,13 +376,13 @@ def assignment(self): """ return self._subscription.assigned_partitions() - def close(self): + def close(self, autocommit=True): """Close the consumer, waiting indefinitely for any needed cleanup.""" if self._closed: return log.debug("Closing the KafkaConsumer.") self._closed = True - self._coordinator.close() + self._coordinator.close(autocommit=autocommit) self._metrics.close() self._client.close() try: diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py index a600cb471..5ef45bdfb 100644 --- a/kafka/coordinator/consumer.py +++ b/kafka/coordinator/consumer.py @@ -325,9 +325,10 @@ def fetch_committed_offsets(self, partitions): time.sleep(self.config['retry_backoff_ms'] / 1000.0) - def close(self): + def close(self, autocommit=True): try: - self._maybe_auto_commit_offsets_sync() + if autocommit: + self._maybe_auto_commit_offsets_sync() finally: super(ConsumerCoordinator, self).close() From 12c42f9b10310818bb8fe4b459d20ea0b17814cd Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 13 Mar 2017 16:39:53 -0700 Subject: [PATCH 144/291] Additional docstrings for autocommit close option --- kafka/consumer/group.py | 8 +++++++- kafka/coordinator/base.py | 4 ++-- kafka/coordinator/consumer.py | 8 ++++++++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 0715f06b5..ca41385f4 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -377,7 +377,13 @@ def assignment(self): return self._subscription.assigned_partitions() def close(self, autocommit=True): - """Close the consumer, waiting indefinitely for any needed cleanup.""" + """Close the consumer, waiting indefinitely for any needed cleanup. + + Keyword Arguments: + autocommit (bool): If auto-commit is configured for this consumer, + this optional flag causes the consumer to attempt to commit any + pending consumed offsets prior to close. Default: True + """ if self._closed: return log.debug("Closing the KafkaConsumer.") diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index 8c160e185..4788e0091 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -536,8 +536,8 @@ def coordinator_dead(self, error=None): self.coordinator_id = None def close(self): - """Close the coordinator, leave the current group - and reset local generation/memberId.""" + """Close the coordinator, leave the current group, + and reset local generation / member_id""" try: self._client.unschedule(self.heartbeat_task) except KeyError: diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py index 5ef45bdfb..009fb592f 100644 --- a/kafka/coordinator/consumer.py +++ b/kafka/coordinator/consumer.py @@ -326,6 +326,14 @@ def fetch_committed_offsets(self, partitions): time.sleep(self.config['retry_backoff_ms'] / 1000.0) def close(self, autocommit=True): + """Close the coordinator, leave the current group, + and reset local generation / member_id. + + Keyword Arguments: + autocommit (bool): If auto-commit is configured for this consumer, + this optional flag causes the consumer to attempt to commit any + pending consumed offsets prior to close. Default: True + """ try: if autocommit: self._maybe_auto_commit_offsets_sync() From c4e796a6d81f246e7d2c1eb8ac4b97e13014cfa2 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 13 Mar 2017 16:41:38 -0700 Subject: [PATCH 145/291] Avoid re-encoding for message crc check (#1027) --- kafka/protocol/message.py | 18 ++++++++++++------ test/test_protocol.py | 24 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/kafka/protocol/message.py b/kafka/protocol/message.py index ec5ee6c1b..efdf4fc94 100644 --- a/kafka/protocol/message.py +++ b/kafka/protocol/message.py @@ -48,6 +48,7 @@ def __init__(self, value, key=None, magic=0, attributes=0, crc=0, timestamp = int(time.time() * 1000) self.timestamp = timestamp self.crc = crc + self._validated_crc = None self.magic = magic self.attributes = attributes self.key = key @@ -85,7 +86,9 @@ def _encode_self(self, recalc_crc=True): @classmethod def decode(cls, data): + _validated_crc = None if isinstance(data, bytes): + _validated_crc = crc32(data[4:]) data = io.BytesIO(data) # Partial decode required to determine message version base_fields = cls.SCHEMAS[0].fields[0:3] @@ -96,14 +99,17 @@ def decode(cls, data): timestamp = fields[0] else: timestamp = None - return cls(fields[-1], key=fields[-2], - magic=magic, attributes=attributes, crc=crc, - timestamp=timestamp) + msg = cls(fields[-1], key=fields[-2], + magic=magic, attributes=attributes, crc=crc, + timestamp=timestamp) + msg._validated_crc = _validated_crc + return msg def validate_crc(self): - raw_msg = self._encode_self(recalc_crc=False) - crc = crc32(raw_msg[4:]) - if crc == self.crc: + if self._validated_crc is None: + raw_msg = self._encode_self(recalc_crc=False) + self._validated_crc = crc32(raw_msg[4:]) + if self.crc == self._validated_crc: return True return False diff --git a/test/test_protocol.py b/test/test_protocol.py index aa3dd17b6..0203614ed 100644 --- a/test/test_protocol.py +++ b/test/test_protocol.py @@ -67,6 +67,30 @@ def test_decode_message(): assert decoded_message == msg +def test_decode_message_validate_crc(): + encoded = b''.join([ + struct.pack('>i', -1427009701), # CRC + struct.pack('>bb', 0, 0), # Magic, flags + struct.pack('>i', 3), # Length of key + b'key', # key + struct.pack('>i', 4), # Length of value + b'test', # value + ]) + decoded_message = Message.decode(encoded) + assert decoded_message.validate_crc() is True + + encoded = b''.join([ + struct.pack('>i', 1234), # Incorrect CRC + struct.pack('>bb', 0, 0), # Magic, flags + struct.pack('>i', 3), # Length of key + b'key', # key + struct.pack('>i', 4), # Length of value + b'test', # value + ]) + decoded_message = Message.decode(encoded) + assert decoded_message.validate_crc() is False + + def test_encode_message_set(): messages = [ Message(b'v1', key=b'k1'), From 31e6431556f8fa8b72a2e8a3b626618cc67aa471 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 13 Mar 2017 19:04:02 -0700 Subject: [PATCH 146/291] Update changelog --- CHANGES.md | 8 ++++++++ docs/changelog.rst | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 8496f0174..87ea9875f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,7 @@ # 1.3.3 (Unreleased) Core / Protocol +* Fix kwarg handing in kafka.protocol.struct.Struct (dpkp 1025) * Fixed couple of "leaks" when gc is disabled (Mephius 979) * Added `max_bytes` option and FetchRequest_v3 usage. (Drizzt1991 962) * CreateTopicsRequest / Response v1 (dpkp 1012) @@ -23,6 +24,11 @@ Test Infrastructure * Update pytest fixtures to new yield syntax (jeffwidman 919) Consumer +* Avoid re-encoding message for crc check (dpkp 1027) +* Optionally skip auto-commit during consumer.close (dpkp 1031) +* Return copy of consumer subscription set (dpkp 1029) +* Short-circuit group coordinator requests when NodeNotReady (dpkp 995) +* Avoid unknown coordinator after client poll (dpkp 1023) * No longer configure a default consumer group (dpkp 1016) * Dont refresh metadata on failed group coordinator request unless needed (dpkp 1006) * Fail-fast on timeout constraint violations during KafkaConsumer creation (harelba 986) @@ -33,6 +39,7 @@ Producer * change default timeout of KafkaProducer.close() to threading.TIMEOUT_MAX on py3 (mmyjona 991) Client +* Add optional kwarg to ready/is_ready to disable metadata-priority logic (dpkp 1017) * When closing a broker connection without error, fail in-flight-requests with Cancelled (dpkp 1010) * Catch socket errors during ssl handshake (dpkp 1007) * Drop old brokers when rebuilding broker metadata (dpkp 1005) @@ -60,6 +67,7 @@ Logging / Error Messages * Replace %s with %r in producer debug log message (chekunkov 973) Documentation +* Sphinx documentation updates (jeffwidman 1019) * Add sphinx formatting to hyperlink methods (jeffwidman 898) * Fix BrokerConnection api_version docs default (jeffwidman 909) * PEP-8: Spacing & removed unused imports (jeffwidman 899) diff --git a/docs/changelog.rst b/docs/changelog.rst index 29eb9483f..caf18d67c 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -6,6 +6,7 @@ Changelog Core / Protocol --------------- +* Fix kwarg handing in kafka.protocol.struct.Struct (dpkp 1025) * Fixed couple of "leaks" when gc is disabled (Mephius 979) * Added `max_bytes` option and FetchRequest_v3 usage. (Drizzt1991 962) * CreateTopicsRequest / Response v1 (dpkp 1012) @@ -30,6 +31,11 @@ Test Infrastructure Consumer -------- +* Avoid re-encoding message for crc check (dpkp 1027) +* Optionally skip auto-commit during consumer.close (dpkp 1031) +* Return copy of consumer subscription set (dpkp 1029) +* Short-circuit group coordinator requests when NodeNotReady (dpkp 995) +* Avoid unknown coordinator after client poll (dpkp 1023) * No longer configure a default consumer group (dpkp 1016) * Dont refresh metadata on failed group coordinator request unless needed (dpkp 1006) * Fail-fast on timeout constraint violations during KafkaConsumer creation (harelba 986) @@ -42,6 +48,7 @@ Producer Client ------ +* Add optional kwarg to ready/is_ready to disable metadata-priority logic (dpkp 1017) * When closing a broker connection without error, fail in-flight-requests with Cancelled (dpkp 1010) * Catch socket errors during ssl handshake (dpkp 1007) * Drop old brokers when rebuilding broker metadata (dpkp 1005) @@ -72,6 +79,7 @@ Logging / Error Messages Documentation ------------- +* Sphinx documentation updates (jeffwidman 1019) * Add sphinx formatting to hyperlink methods (jeffwidman 898) * Fix BrokerConnection api_version docs default (jeffwidman 909) * PEP-8: Spacing & removed unused imports (jeffwidman 899) From 9b8a482d8460e6e546dec17a1d51ab4b9dbcc8e1 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Thu, 9 Mar 2017 11:08:48 -0800 Subject: [PATCH 147/291] Free lz4 decompression context to avoid leak --- kafka/codec.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kafka/codec.py b/kafka/codec.py index 1e5710791..4deec49da 100644 --- a/kafka/codec.py +++ b/kafka/codec.py @@ -192,6 +192,7 @@ def lz4_decode(payload): # pylint: disable-msg=no-member ctx = lz4f.createDecompContext() data = lz4f.decompressFrame(payload, ctx) + lz4f.freeDecompContext(ctx) # lz4f python module does not expose how much of the payload was # actually read if the decompression was only partial. From 5d57eecfb2d24be6a30b6488ce696ac28b025ac0 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 13 Mar 2017 14:22:57 -0700 Subject: [PATCH 148/291] Prefer python-lz4 over lz4f if available --- docs/index.rst | 5 ++--- docs/install.rst | 6 ++---- kafka/codec.py | 39 ++++++++++++++++++++++++++++++++------- tox.ini | 2 +- 4 files changed, 37 insertions(+), 15 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 2cef7fe06..21cb3b9b8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -113,9 +113,8 @@ Compression *********** kafka-python supports gzip compression/decompression natively. To produce or -consume lz4 compressed messages, you must install lz4tools and xxhash (modules -may not work on python2.6). To enable snappy, install python-snappy (also -requires snappy library). +consume lz4 compressed messages, you should install python-lz4 (pip install lz4). +To enable snappy, install python-snappy (also requires snappy library). See `Installation `_ for more information. diff --git a/docs/install.rst b/docs/install.rst index 9720d65a1..cc0e82d68 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -26,12 +26,10 @@ Bleeding-Edge Optional LZ4 install ******************** -To enable LZ4 compression/decompression, install lz4tools and xxhash: +To enable LZ4 compression/decompression, install python-lz4: ->>> pip install lz4tools ->>> pip install xxhash +>>> pip install lz4 -*Note*: these modules do not support python2.6 Optional Snappy install *********************** diff --git a/kafka/codec.py b/kafka/codec.py index 4deec49da..29db48e48 100644 --- a/kafka/codec.py +++ b/kafka/codec.py @@ -16,12 +16,21 @@ except ImportError: snappy = None +try: + import lz4.frame as lz4 +except ImportError: + lz4 = None + try: import lz4f - import xxhash except ImportError: lz4f = None +try: + import xxhash +except ImportError: + xxhash = None + PYPY = bool(platform.python_implementation() == 'PyPy') def has_gzip(): @@ -33,7 +42,11 @@ def has_snappy(): def has_lz4(): - return lz4f is not None + if lz4 is not None: + return True + if lz4f is not None: + return True + return False def gzip_encode(payload, compresslevel=None): @@ -181,13 +194,15 @@ def snappy_decode(payload): return snappy.decompress(payload) -def lz4_encode(payload): - """Encode payload using interoperable LZ4 framing. Requires Kafka >= 0.10""" - # pylint: disable-msg=no-member - return lz4f.compressFrame(payload) +if lz4: + lz4_encode = lz4.compress # pylint: disable-msg=no-member +elif lz4f: + lz4_encode = lz4f.compressFrame # pylint: disable-msg=no-member +else: + lz4_encode = None -def lz4_decode(payload): +def lz4f_decode(payload): """Decode payload using interoperable LZ4 framing. Requires Kafka >= 0.10""" # pylint: disable-msg=no-member ctx = lz4f.createDecompContext() @@ -201,8 +216,17 @@ def lz4_decode(payload): return data['decomp'] +if lz4: + lz4_decode = lz4.decompress # pylint: disable-msg=no-member +elif lz4f: + lz4_decode = lz4f_decode +else: + lz4_decode = None + + def lz4_encode_old_kafka(payload): """Encode payload for 0.8/0.9 brokers -- requires an incorrect header checksum.""" + assert xxhash is not None data = lz4_encode(payload) header_size = 7 if isinstance(data[4], int): @@ -224,6 +248,7 @@ def lz4_encode_old_kafka(payload): def lz4_decode_old_kafka(payload): + assert xxhash is not None # Kafka's LZ4 code has a bug in its header checksum implementation header_size = 7 if isinstance(payload[4], int): diff --git a/tox.ini b/tox.ini index 23ca385ba..03a6893ad 100644 --- a/tox.ini +++ b/tox.ini @@ -17,7 +17,7 @@ deps = pytest-mock mock python-snappy - lz4tools + lz4 xxhash py26: unittest2 commands = From 737fa9d3be1d419255180db4714a2fe6b8d0c056 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Tue, 14 Mar 2017 11:01:58 -0700 Subject: [PATCH 149/291] LZ4 support in kafka 0.8/0.9 does not accept a ContentSize header --- kafka/codec.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/kafka/codec.py b/kafka/codec.py index 29db48e48..a527b4273 100644 --- a/kafka/codec.py +++ b/kafka/codec.py @@ -229,13 +229,21 @@ def lz4_encode_old_kafka(payload): assert xxhash is not None data = lz4_encode(payload) header_size = 7 - if isinstance(data[4], int): - flg = data[4] - else: - flg = ord(data[4]) + flg = data[4] + if not isinstance(flg, int): + flg = ord(flg) + content_size_bit = ((flg >> 3) & 1) if content_size_bit: - header_size += 8 + # Old kafka does not accept the content-size field + # so we need to discard it and reset the header flag + flg -= 8 + data = bytearray(data) + data[4] = flg + data = bytes(data) + payload = data[header_size+8:] + else: + payload = data[header_size:] # This is the incorrect hc hc = xxhash.xxh32(data[0:header_size-1]).digest()[-2:-1] # pylint: disable-msg=no-member @@ -243,7 +251,7 @@ def lz4_encode_old_kafka(payload): return b''.join([ data[0:header_size-1], hc, - data[header_size:] + payload ]) From e2a2cacb1eef25ef4719ab00c5459feb020261aa Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Tue, 14 Mar 2017 11:38:49 -0700 Subject: [PATCH 150/291] Alter test skips: python-lz4 works on python26, but not pypy --- test/test_buffer.py | 4 +++- test/test_codec.py | 13 +++++++++---- test/test_producer.py | 4 ++-- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/test/test_buffer.py b/test/test_buffer.py index c8e283d25..db6cbb37c 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -2,6 +2,7 @@ from __future__ import absolute_import import io +import platform import pytest @@ -34,7 +35,8 @@ def test_buffer_close(): @pytest.mark.parametrize('compression', [ 'gzip', 'snappy', - pytest.mark.skipif("sys.version_info < (2,7)")('lz4'), # lz4tools does not work on py26 + pytest.mark.skipif(platform.python_implementation() == 'PyPy', + reason='python-lz4 crashes on older versions of pypy')('lz4'), ]) def test_compressed_buffer_close(compression): records = MessageSetBuffer(io.BytesIO(), 100000, compression_type=compression) diff --git a/test/test_codec.py b/test/test_codec.py index 906b53c33..d31fc8674 100644 --- a/test/test_codec.py +++ b/test/test_codec.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import + +import platform import struct import pytest @@ -80,7 +83,8 @@ def test_snappy_encode_xerial(): assert compressed == to_ensure -@pytest.mark.skipif(not has_lz4(), reason="LZ4 not available") +@pytest.mark.skipif(not has_lz4() or platform.python_implementation() == 'PyPy', + reason="python-lz4 crashes on old versions of pypy") def test_lz4(): for i in xrange(1000): b1 = random_string(100).encode('utf-8') @@ -89,7 +93,8 @@ def test_lz4(): assert b1 == b2 -@pytest.mark.skipif(not has_lz4(), reason="LZ4 not available") +@pytest.mark.skipif(not has_lz4() or platform.python_implementation() == 'PyPy', + reason="python-lz4 crashes on old versions of pypy") def test_lz4_old(): for i in xrange(1000): b1 = random_string(100).encode('utf-8') @@ -98,8 +103,8 @@ def test_lz4_old(): assert b1 == b2 -@pytest.mark.xfail(reason="lz4tools library doesnt support incremental decompression") -@pytest.mark.skipif(not has_lz4(), reason="LZ4 not available") +@pytest.mark.skipif(not has_lz4() or platform.python_implementation() == 'PyPy', + reason="python-lz4 crashes on old versions of pypy") def test_lz4_incremental(): for i in xrange(1000): # lz4 max single block size is 4MB diff --git a/test/test_producer.py b/test/test_producer.py index 125737b34..54b9db230 100644 --- a/test/test_producer.py +++ b/test/test_producer.py @@ -31,8 +31,8 @@ def test_end_to_end(kafka_broker, compression): # LZ4 requires 0.8.2 if version() < (0, 8, 2): return - # LZ4 python libs dont work on python2.6 - elif sys.version_info < (2, 7): + # python-lz4 crashes on older versions of pypy + elif platform.python_implementation() == 'PyPy': return connect_str = 'localhost:' + str(kafka_broker.port) From 42aa40450d18f0c7f2d0f468ba4054a99853a7f6 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Tue, 14 Mar 2017 13:34:37 -0700 Subject: [PATCH 151/291] Derive all api classes from Request / Response base classes (#1030) --- kafka/client.py | 10 ++------ kafka/client_async.py | 7 +----- kafka/conn.py | 8 +++---- kafka/protocol/admin.py | 30 +++++++++++------------ kafka/protocol/api.py | 49 ++++++++++++++++++++++++++++++++++++++ kafka/protocol/commit.py | 30 +++++++++++------------ kafka/protocol/fetch.py | 18 +++++++------- kafka/protocol/group.py | 21 ++++++++-------- kafka/protocol/metadata.py | 14 +++++------ kafka/protocol/offset.py | 10 ++++---- kafka/protocol/produce.py | 29 ++++++++++++++++------ test/test_client_async.py | 5 ++-- test/test_conn.py | 5 ++-- 13 files changed, 146 insertions(+), 90 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 5192af6f9..e07a3bba1 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -298,13 +298,7 @@ def failed_payloads(payloads): continue request = encoder_fn(payloads=broker_payloads) - # decoder_fn=None signal that the server is expected to not - # send a response. This probably only applies to - # ProduceRequest w/ acks = 0 - expect_response = (decoder_fn is not None) - if expect_response: - selector.register(conn._sock, selectors.EVENT_READ, conn) - future = conn.send(request, expect_response=expect_response) + future = conn.send(request) if future.failed(): log.error("Request failed: %s", future.exception) @@ -313,7 +307,7 @@ def failed_payloads(payloads): failed_payloads(broker_payloads) continue - if not expect_response: + if not request.expect_response(): for payload in broker_payloads: topic_partition = (str(payload.topic), payload.partition) responses[topic_partition] = None diff --git a/kafka/client_async.py b/kafka/client_async.py index 2199abcf3..754807da6 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -464,12 +464,7 @@ def send(self, node_id, request): if not self._maybe_connect(node_id): return Future().failure(Errors.NodeNotReadyError(node_id)) - # Every request gets a response, except one special case: - expect_response = True - if isinstance(request, tuple(ProduceRequest)) and request.required_acks == 0: - expect_response = False - - return self._conns[node_id].send(request, expect_response=expect_response) + return self._conns[node_id].send(request) def poll(self, timeout_ms=None, future=None, sleep=True): """Try to read and write to sockets. diff --git a/kafka/conn.py b/kafka/conn.py index ca7899447..a2f40cc6a 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -521,7 +521,7 @@ def close(self, error=None): ifr.future.failure(error) self.config['state_change_callback'](self) - def send(self, request, expect_response=True): + def send(self, request): """send request, return Future() Can block on network if request is larger than send_buffer_bytes @@ -533,9 +533,9 @@ def send(self, request, expect_response=True): return future.failure(Errors.ConnectionError(str(self))) elif not self.can_send_more(): return future.failure(Errors.TooManyInFlightRequests(str(self))) - return self._send(request, expect_response=expect_response) + return self._send(request) - def _send(self, request, expect_response=True): + def _send(self, request): assert self.state in (ConnectionStates.AUTHENTICATING, ConnectionStates.CONNECTED) future = Future() correlation_id = self._next_correlation_id() @@ -565,7 +565,7 @@ def _send(self, request, expect_response=True): return future.failure(error) log.debug('%s Request %d: %s', self, correlation_id, request) - if expect_response: + if request.expect_response(): ifr = InFlightRequest(request=request, correlation_id=correlation_id, response_type=request.RESPONSE_TYPE, diff --git a/kafka/protocol/admin.py b/kafka/protocol/admin.py index 89ea73981..c5142b3ec 100644 --- a/kafka/protocol/admin.py +++ b/kafka/protocol/admin.py @@ -1,10 +1,10 @@ from __future__ import absolute_import -from .struct import Struct +from .api import Request, Response from .types import Array, Boolean, Bytes, Int16, Int32, Schema, String -class ApiVersionResponse_v0(Struct): +class ApiVersionResponse_v0(Response): API_KEY = 18 API_VERSION = 0 SCHEMA = Schema( @@ -16,7 +16,7 @@ class ApiVersionResponse_v0(Struct): ) -class ApiVersionRequest_v0(Struct): +class ApiVersionRequest_v0(Request): API_KEY = 18 API_VERSION = 0 RESPONSE_TYPE = ApiVersionResponse_v0 @@ -27,7 +27,7 @@ class ApiVersionRequest_v0(Struct): ApiVersionResponse = [ApiVersionResponse_v0] -class CreateTopicsResponse_v0(Struct): +class CreateTopicsResponse_v0(Response): API_KEY = 19 API_VERSION = 0 SCHEMA = Schema( @@ -37,7 +37,7 @@ class CreateTopicsResponse_v0(Struct): ) -class CreateTopicsResponse_v1(Struct): +class CreateTopicsResponse_v1(Response): API_KEY = 19 API_VERSION = 1 SCHEMA = Schema( @@ -48,7 +48,7 @@ class CreateTopicsResponse_v1(Struct): ) -class CreateTopicsRequest_v0(Struct): +class CreateTopicsRequest_v0(Request): API_KEY = 19 API_VERSION = 0 RESPONSE_TYPE = CreateTopicsResponse_v0 @@ -67,7 +67,7 @@ class CreateTopicsRequest_v0(Struct): ) -class CreateTopicsRequest_v1(Struct): +class CreateTopicsRequest_v1(Request): API_KEY = 19 API_VERSION = 1 RESPONSE_TYPE = CreateTopicsResponse_v1 @@ -91,7 +91,7 @@ class CreateTopicsRequest_v1(Struct): CreateTopicsResponse = [CreateTopicsResponse_v0, CreateTopicsRequest_v1] -class DeleteTopicsResponse_v0(Struct): +class DeleteTopicsResponse_v0(Response): API_KEY = 20 API_VERSION = 0 SCHEMA = Schema( @@ -101,7 +101,7 @@ class DeleteTopicsResponse_v0(Struct): ) -class DeleteTopicsRequest_v0(Struct): +class DeleteTopicsRequest_v0(Request): API_KEY = 20 API_VERSION = 0 RESPONSE_TYPE = DeleteTopicsResponse_v0 @@ -115,7 +115,7 @@ class DeleteTopicsRequest_v0(Struct): DeleteTopicsResponse = [DeleteTopicsResponse_v0] -class ListGroupsResponse_v0(Struct): +class ListGroupsResponse_v0(Response): API_KEY = 16 API_VERSION = 0 SCHEMA = Schema( @@ -126,7 +126,7 @@ class ListGroupsResponse_v0(Struct): ) -class ListGroupsRequest_v0(Struct): +class ListGroupsRequest_v0(Request): API_KEY = 16 API_VERSION = 0 RESPONSE_TYPE = ListGroupsResponse_v0 @@ -137,7 +137,7 @@ class ListGroupsRequest_v0(Struct): ListGroupsResponse = [ListGroupsResponse_v0] -class DescribeGroupsResponse_v0(Struct): +class DescribeGroupsResponse_v0(Response): API_KEY = 15 API_VERSION = 0 SCHEMA = Schema( @@ -156,7 +156,7 @@ class DescribeGroupsResponse_v0(Struct): ) -class DescribeGroupsRequest_v0(Struct): +class DescribeGroupsRequest_v0(Request): API_KEY = 15 API_VERSION = 0 RESPONSE_TYPE = DescribeGroupsResponse_v0 @@ -169,7 +169,7 @@ class DescribeGroupsRequest_v0(Struct): DescribeGroupsResponse = [DescribeGroupsResponse_v0] -class SaslHandShakeResponse_v0(Struct): +class SaslHandShakeResponse_v0(Response): API_KEY = 17 API_VERSION = 0 SCHEMA = Schema( @@ -178,7 +178,7 @@ class SaslHandShakeResponse_v0(Struct): ) -class SaslHandShakeRequest_v0(Struct): +class SaslHandShakeRequest_v0(Request): API_KEY = 17 API_VERSION = 0 RESPONSE_TYPE = SaslHandShakeResponse_v0 diff --git a/kafka/protocol/api.py b/kafka/protocol/api.py index 7779aac9c..ec24a3993 100644 --- a/kafka/protocol/api.py +++ b/kafka/protocol/api.py @@ -1,5 +1,7 @@ from __future__ import absolute_import +import abc + from .struct import Struct from .types import Int16, Int32, String, Schema @@ -16,3 +18,50 @@ def __init__(self, request, correlation_id=0, client_id='kafka-python'): super(RequestHeader, self).__init__( request.API_KEY, request.API_VERSION, correlation_id, client_id ) + + +class Request(Struct): + __metaclass__ = abc.ABCMeta + + @abc.abstractproperty + def API_KEY(self): + """Integer identifier for api request""" + pass + + @abc.abstractproperty + def API_VERSION(self): + """Integer of api request version""" + pass + + @abc.abstractproperty + def SCHEMA(self): + """An instance of Schema() representing the request structure""" + pass + + @abc.abstractproperty + def RESPONSE_TYPE(self): + """The Response class associated with the api request""" + pass + + def expect_response(self): + """Override this method if an api request does not always generate a response""" + return True + + +class Response(Struct): + __metaclass__ = abc.ABCMeta + + @abc.abstractproperty + def API_KEY(self): + """Integer identifier for api request/response""" + pass + + @abc.abstractproperty + def API_VERSION(self): + """Integer of api request/response version""" + pass + + @abc.abstractproperty + def SCHEMA(self): + """An instance of Schema() representing the response structure""" + pass diff --git a/kafka/protocol/commit.py b/kafka/protocol/commit.py index 564537240..bcffe67b6 100644 --- a/kafka/protocol/commit.py +++ b/kafka/protocol/commit.py @@ -1,10 +1,10 @@ from __future__ import absolute_import -from .struct import Struct +from .api import Request, Response from .types import Array, Int16, Int32, Int64, Schema, String -class OffsetCommitResponse_v0(Struct): +class OffsetCommitResponse_v0(Response): API_KEY = 8 API_VERSION = 0 SCHEMA = Schema( @@ -16,19 +16,19 @@ class OffsetCommitResponse_v0(Struct): ) -class OffsetCommitResponse_v1(Struct): +class OffsetCommitResponse_v1(Response): API_KEY = 8 API_VERSION = 1 SCHEMA = OffsetCommitResponse_v0.SCHEMA -class OffsetCommitResponse_v2(Struct): +class OffsetCommitResponse_v2(Response): API_KEY = 8 API_VERSION = 2 SCHEMA = OffsetCommitResponse_v1.SCHEMA -class OffsetCommitRequest_v0(Struct): +class OffsetCommitRequest_v0(Request): API_KEY = 8 API_VERSION = 0 # Zookeeper-backed storage RESPONSE_TYPE = OffsetCommitResponse_v0 @@ -43,7 +43,7 @@ class OffsetCommitRequest_v0(Struct): ) -class OffsetCommitRequest_v1(Struct): +class OffsetCommitRequest_v1(Request): API_KEY = 8 API_VERSION = 1 # Kafka-backed storage RESPONSE_TYPE = OffsetCommitResponse_v1 @@ -61,7 +61,7 @@ class OffsetCommitRequest_v1(Struct): ) -class OffsetCommitRequest_v2(Struct): +class OffsetCommitRequest_v2(Request): API_KEY = 8 API_VERSION = 2 # added retention_time, dropped timestamp RESPONSE_TYPE = OffsetCommitResponse_v2 @@ -87,7 +87,7 @@ class OffsetCommitRequest_v2(Struct): OffsetCommitResponse_v2] -class OffsetFetchResponse_v0(Struct): +class OffsetFetchResponse_v0(Response): API_KEY = 9 API_VERSION = 0 SCHEMA = Schema( @@ -101,13 +101,13 @@ class OffsetFetchResponse_v0(Struct): ) -class OffsetFetchResponse_v1(Struct): +class OffsetFetchResponse_v1(Response): API_KEY = 9 API_VERSION = 1 SCHEMA = OffsetFetchResponse_v0.SCHEMA -class OffsetFetchResponse_v2(Struct): +class OffsetFetchResponse_v2(Response): # Added in KIP-88 API_KEY = 9 API_VERSION = 2 @@ -123,7 +123,7 @@ class OffsetFetchResponse_v2(Struct): ) -class OffsetFetchRequest_v0(Struct): +class OffsetFetchRequest_v0(Request): API_KEY = 9 API_VERSION = 0 # zookeeper-backed storage RESPONSE_TYPE = OffsetFetchResponse_v0 @@ -135,14 +135,14 @@ class OffsetFetchRequest_v0(Struct): ) -class OffsetFetchRequest_v1(Struct): +class OffsetFetchRequest_v1(Request): API_KEY = 9 API_VERSION = 1 # kafka-backed storage RESPONSE_TYPE = OffsetFetchResponse_v1 SCHEMA = OffsetFetchRequest_v0.SCHEMA -class OffsetFetchRequest_v2(Struct): +class OffsetFetchRequest_v2(Request): # KIP-88: Allows passing null topics to return offsets for all partitions # that the consumer group has a stored offset for, even if no consumer in # the group is currently consuming that partition. @@ -158,7 +158,7 @@ class OffsetFetchRequest_v2(Struct): OffsetFetchResponse_v2] -class GroupCoordinatorResponse_v0(Struct): +class GroupCoordinatorResponse_v0(Response): API_KEY = 10 API_VERSION = 0 SCHEMA = Schema( @@ -169,7 +169,7 @@ class GroupCoordinatorResponse_v0(Struct): ) -class GroupCoordinatorRequest_v0(Struct): +class GroupCoordinatorRequest_v0(Request): API_KEY = 10 API_VERSION = 0 RESPONSE_TYPE = GroupCoordinatorResponse_v0 diff --git a/kafka/protocol/fetch.py b/kafka/protocol/fetch.py index 6a9ad5b99..b441e63f9 100644 --- a/kafka/protocol/fetch.py +++ b/kafka/protocol/fetch.py @@ -1,11 +1,11 @@ from __future__ import absolute_import +from .api import Request, Response from .message import MessageSet -from .struct import Struct from .types import Array, Int16, Int32, Int64, Schema, String -class FetchResponse_v0(Struct): +class FetchResponse_v0(Response): API_KEY = 1 API_VERSION = 0 SCHEMA = Schema( @@ -19,7 +19,7 @@ class FetchResponse_v0(Struct): ) -class FetchResponse_v1(Struct): +class FetchResponse_v1(Response): API_KEY = 1 API_VERSION = 1 SCHEMA = Schema( @@ -34,19 +34,19 @@ class FetchResponse_v1(Struct): ) -class FetchResponse_v2(Struct): +class FetchResponse_v2(Response): API_KEY = 1 API_VERSION = 2 SCHEMA = FetchResponse_v1.SCHEMA # message format changed internally -class FetchResponse_v3(Struct): +class FetchResponse_v3(Response): API_KEY = 1 API_VERSION = 3 SCHEMA = FetchResponse_v2.SCHEMA -class FetchRequest_v0(Struct): +class FetchRequest_v0(Request): API_KEY = 1 API_VERSION = 0 RESPONSE_TYPE = FetchResponse_v0 @@ -63,21 +63,21 @@ class FetchRequest_v0(Struct): ) -class FetchRequest_v1(Struct): +class FetchRequest_v1(Request): API_KEY = 1 API_VERSION = 1 RESPONSE_TYPE = FetchResponse_v1 SCHEMA = FetchRequest_v0.SCHEMA -class FetchRequest_v2(Struct): +class FetchRequest_v2(Request): API_KEY = 1 API_VERSION = 2 RESPONSE_TYPE = FetchResponse_v2 SCHEMA = FetchRequest_v1.SCHEMA -class FetchRequest_v3(Struct): +class FetchRequest_v3(Request): API_KEY = 1 API_VERSION = 3 RESPONSE_TYPE = FetchResponse_v3 diff --git a/kafka/protocol/group.py b/kafka/protocol/group.py index 0e0b70e19..5cab75404 100644 --- a/kafka/protocol/group.py +++ b/kafka/protocol/group.py @@ -1,10 +1,11 @@ from __future__ import absolute_import +from .api import Request, Response from .struct import Struct from .types import Array, Bytes, Int16, Int32, Schema, String -class JoinGroupResponse_v0(Struct): +class JoinGroupResponse_v0(Response): API_KEY = 11 API_VERSION = 0 SCHEMA = Schema( @@ -19,13 +20,13 @@ class JoinGroupResponse_v0(Struct): ) -class JoinGroupResponse_v1(Struct): +class JoinGroupResponse_v1(Response): API_KEY = 11 API_VERSION = 1 SCHEMA = JoinGroupResponse_v0.SCHEMA -class JoinGroupRequest_v0(Struct): +class JoinGroupRequest_v0(Request): API_KEY = 11 API_VERSION = 0 RESPONSE_TYPE = JoinGroupResponse_v0 @@ -41,7 +42,7 @@ class JoinGroupRequest_v0(Struct): UNKNOWN_MEMBER_ID = '' -class JoinGroupRequest_v1(Struct): +class JoinGroupRequest_v1(Request): API_KEY = 11 API_VERSION = 1 RESPONSE_TYPE = JoinGroupResponse_v1 @@ -70,7 +71,7 @@ class ProtocolMetadata(Struct): ) -class SyncGroupResponse_v0(Struct): +class SyncGroupResponse_v0(Response): API_KEY = 14 API_VERSION = 0 SCHEMA = Schema( @@ -79,7 +80,7 @@ class SyncGroupResponse_v0(Struct): ) -class SyncGroupRequest_v0(Struct): +class SyncGroupRequest_v0(Request): API_KEY = 14 API_VERSION = 0 RESPONSE_TYPE = SyncGroupResponse_v0 @@ -107,7 +108,7 @@ class MemberAssignment(Struct): ) -class HeartbeatResponse_v0(Struct): +class HeartbeatResponse_v0(Response): API_KEY = 12 API_VERSION = 0 SCHEMA = Schema( @@ -115,7 +116,7 @@ class HeartbeatResponse_v0(Struct): ) -class HeartbeatRequest_v0(Struct): +class HeartbeatRequest_v0(Request): API_KEY = 12 API_VERSION = 0 RESPONSE_TYPE = HeartbeatResponse_v0 @@ -130,7 +131,7 @@ class HeartbeatRequest_v0(Struct): HeartbeatResponse = [HeartbeatResponse_v0] -class LeaveGroupResponse_v0(Struct): +class LeaveGroupResponse_v0(Response): API_KEY = 13 API_VERSION = 0 SCHEMA = Schema( @@ -138,7 +139,7 @@ class LeaveGroupResponse_v0(Struct): ) -class LeaveGroupRequest_v0(Struct): +class LeaveGroupRequest_v0(Request): API_KEY = 13 API_VERSION = 0 RESPONSE_TYPE = LeaveGroupResponse_v0 diff --git a/kafka/protocol/metadata.py b/kafka/protocol/metadata.py index e017c5904..907ec2577 100644 --- a/kafka/protocol/metadata.py +++ b/kafka/protocol/metadata.py @@ -1,10 +1,10 @@ from __future__ import absolute_import -from .struct import Struct +from .api import Request, Response from .types import Array, Boolean, Int16, Int32, Schema, String -class MetadataResponse_v0(Struct): +class MetadataResponse_v0(Response): API_KEY = 3 API_VERSION = 0 SCHEMA = Schema( @@ -24,7 +24,7 @@ class MetadataResponse_v0(Struct): ) -class MetadataResponse_v1(Struct): +class MetadataResponse_v1(Response): API_KEY = 3 API_VERSION = 1 SCHEMA = Schema( @@ -47,7 +47,7 @@ class MetadataResponse_v1(Struct): ) -class MetadataResponse_v2(Struct): +class MetadataResponse_v2(Response): API_KEY = 3 API_VERSION = 2 SCHEMA = Schema( @@ -71,7 +71,7 @@ class MetadataResponse_v2(Struct): ) -class MetadataRequest_v0(Struct): +class MetadataRequest_v0(Request): API_KEY = 3 API_VERSION = 0 RESPONSE_TYPE = MetadataResponse_v0 @@ -81,7 +81,7 @@ class MetadataRequest_v0(Struct): ALL_TOPICS = None # Empty Array (len 0) for topics returns all topics -class MetadataRequest_v1(Struct): +class MetadataRequest_v1(Request): API_KEY = 3 API_VERSION = 1 RESPONSE_TYPE = MetadataResponse_v1 @@ -90,7 +90,7 @@ class MetadataRequest_v1(Struct): NO_TOPICS = None # Empty array (len 0) for topics returns no topics -class MetadataRequest_v2(Struct): +class MetadataRequest_v2(Request): API_KEY = 3 API_VERSION = 2 RESPONSE_TYPE = MetadataResponse_v2 diff --git a/kafka/protocol/offset.py b/kafka/protocol/offset.py index 5182d63ee..588dfec72 100644 --- a/kafka/protocol/offset.py +++ b/kafka/protocol/offset.py @@ -1,6 +1,6 @@ from __future__ import absolute_import -from .struct import Struct +from .api import Request, Response from .types import Array, Int16, Int32, Int64, Schema, String @@ -10,7 +10,7 @@ class OffsetResetStrategy(object): NONE = 0 -class OffsetResponse_v0(Struct): +class OffsetResponse_v0(Response): API_KEY = 2 API_VERSION = 0 SCHEMA = Schema( @@ -22,7 +22,7 @@ class OffsetResponse_v0(Struct): ('offsets', Array(Int64)))))) ) -class OffsetResponse_v1(Struct): +class OffsetResponse_v1(Response): API_KEY = 2 API_VERSION = 1 SCHEMA = Schema( @@ -36,7 +36,7 @@ class OffsetResponse_v1(Struct): ) -class OffsetRequest_v0(Struct): +class OffsetRequest_v0(Request): API_KEY = 2 API_VERSION = 0 RESPONSE_TYPE = OffsetResponse_v0 @@ -53,7 +53,7 @@ class OffsetRequest_v0(Struct): 'replica_id': -1 } -class OffsetRequest_v1(Struct): +class OffsetRequest_v1(Request): API_KEY = 2 API_VERSION = 1 RESPONSE_TYPE = OffsetResponse_v1 diff --git a/kafka/protocol/produce.py b/kafka/protocol/produce.py index c1a519ebb..9b03354f2 100644 --- a/kafka/protocol/produce.py +++ b/kafka/protocol/produce.py @@ -1,11 +1,11 @@ from __future__ import absolute_import +from .api import Request, Response from .message import MessageSet -from .struct import Struct from .types import Int16, Int32, Int64, String, Array, Schema -class ProduceResponse_v0(Struct): +class ProduceResponse_v0(Response): API_KEY = 0 API_VERSION = 0 SCHEMA = Schema( @@ -18,7 +18,7 @@ class ProduceResponse_v0(Struct): ) -class ProduceResponse_v1(Struct): +class ProduceResponse_v1(Response): API_KEY = 0 API_VERSION = 1 SCHEMA = Schema( @@ -32,7 +32,7 @@ class ProduceResponse_v1(Struct): ) -class ProduceResponse_v2(Struct): +class ProduceResponse_v2(Response): API_KEY = 0 API_VERSION = 2 SCHEMA = Schema( @@ -47,7 +47,7 @@ class ProduceResponse_v2(Struct): ) -class ProduceRequest_v0(Struct): +class ProduceRequest_v0(Request): API_KEY = 0 API_VERSION = 0 RESPONSE_TYPE = ProduceResponse_v0 @@ -61,20 +61,35 @@ class ProduceRequest_v0(Struct): ('messages', MessageSet))))) ) + def expect_response(self): + if self.required_acks == 0: # pylint: disable=no-member + return False + return True -class ProduceRequest_v1(Struct): + +class ProduceRequest_v1(Request): API_KEY = 0 API_VERSION = 1 RESPONSE_TYPE = ProduceResponse_v1 SCHEMA = ProduceRequest_v0.SCHEMA + def expect_response(self): + if self.required_acks == 0: # pylint: disable=no-member + return False + return True + -class ProduceRequest_v2(Struct): +class ProduceRequest_v2(Request): API_KEY = 0 API_VERSION = 2 RESPONSE_TYPE = ProduceResponse_v2 SCHEMA = ProduceRequest_v1.SCHEMA + def expect_response(self): + if self.required_acks == 0: # pylint: disable=no-member + return False + return True + ProduceRequest = [ProduceRequest_v0, ProduceRequest_v1, ProduceRequest_v2] ProduceResponse = [ProduceResponse_v0, ProduceResponse_v1, ProduceResponse_v2] diff --git a/test/test_client_async.py b/test/test_client_async.py index b3873cad4..51cb9415e 100644 --- a/test/test_client_async.py +++ b/test/test_client_async.py @@ -234,13 +234,14 @@ def test_send(cli, conn): cli._maybe_connect(0) # ProduceRequest w/ 0 required_acks -> no response request = ProduceRequest[0](0, 0, []) + assert request.expect_response() is False ret = cli.send(0, request) - assert conn.send.called_with(request, expect_response=False) + assert conn.send.called_with(request) assert isinstance(ret, Future) request = MetadataRequest[0]([]) cli.send(0, request) - assert conn.send.called_with(request, expect_response=True) + assert conn.send.called_with(request) def test_poll(mocker): diff --git a/test/test_conn.py b/test/test_conn.py index 358a57678..9c7700b99 100644 --- a/test/test_conn.py +++ b/test/test_conn.py @@ -10,6 +10,7 @@ from kafka.conn import BrokerConnection, ConnectionStates, collect_hosts from kafka.protocol.api import RequestHeader from kafka.protocol.metadata import MetadataRequest +from kafka.protocol.produce import ProduceRequest import kafka.common as Errors @@ -111,7 +112,7 @@ def test_send_max_ifr(conn): def test_send_no_response(_socket, conn): conn.connect() assert conn.state is ConnectionStates.CONNECTED - req = MetadataRequest[0]([]) + req = ProduceRequest[0](required_acks=0, timeout=0, topics=[]) header = RequestHeader(req, client_id=conn.config['client_id']) payload_bytes = len(header.encode()) + len(req.encode()) third = payload_bytes // 3 @@ -119,7 +120,7 @@ def test_send_no_response(_socket, conn): _socket.send.side_effect = [4, third, third, third, remainder] assert len(conn.in_flight_requests) == 0 - f = conn.send(req, expect_response=False) + f = conn.send(req) assert f.succeeded() is True assert f.value is None assert len(conn.in_flight_requests) == 0 From 89813b4d962ecca41d50ca425c21cb627919fd9e Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Tue, 14 Mar 2017 13:36:40 -0700 Subject: [PATCH 152/291] Release 1.3.3 --- CHANGES.md | 5 ++++- docs/changelog.rst | 5 ++++- kafka/version.py | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 87ea9875f..28c6737d7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,8 @@ -# 1.3.3 (Unreleased) +# 1.3.3 (Mar 14, 2017) Core / Protocol +* Derive all api classes from Request / Response base classes (dpkp 1030) +* Prefer python-lz4 if available (dpkp 1024) * Fix kwarg handing in kafka.protocol.struct.Struct (dpkp 1025) * Fixed couple of "leaks" when gc is disabled (Mephius 979) * Added `max_bytes` option and FetchRequest_v3 usage. (Drizzt1991 962) @@ -51,6 +53,7 @@ Client * Pass error to BrokerConnection.close() (dpkp) Bugfixes +* Free lz4 decompression context to avoid leak (dpkp 1024) * Fix sasl reconnect bug: auth future must be reset on close (dpkp 1003) * Fix raise exception from SubscriptionState.assign_from_subscribed (qntln 960) * Fix blackout calculation: mark last_attempt time during connection close (dpkp 1008) diff --git a/docs/changelog.rst b/docs/changelog.rst index caf18d67c..8a0c37647 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,11 +1,13 @@ Changelog ========= -1.3.3 (Unreleased) +1.3.3 (Mar 14, 2017) #################### Core / Protocol --------------- +* Derive all api classes from Request / Response base classes (dpkp 1030) +* Prefer python-lz4 if available (dpkp 1024) * Fix kwarg handing in kafka.protocol.struct.Struct (dpkp 1025) * Fixed couple of "leaks" when gc is disabled (Mephius 979) * Added `max_bytes` option and FetchRequest_v3 usage. (Drizzt1991 962) @@ -61,6 +63,7 @@ Client Bugfixes -------- +* Free lz4 decompression context to avoid leak (dpkp 1024) * Fix sasl reconnect bug: auth future must be reset on close (dpkp 1003) * Fix raise exception from SubscriptionState.assign_from_subscribed (qntln 960) * Fix blackout calculation: mark last_attempt time during connection close (dpkp 1008) diff --git a/kafka/version.py b/kafka/version.py index 58419f85f..07f744ca5 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.3.dev' +__version__ = '1.3.3' From 9ab5954b6b2159ceaa259b1593b9e546576e6a6f Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Fri, 7 Apr 2017 15:19:36 -0700 Subject: [PATCH 153/291] Upgrade kafka-python version to 1.3.3.post1 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 07f744ca5..1b124b19c 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.3' +__version__ = '1.3.3.post1' From a7db0bba61d7fa4e9db7019b78dfd76a4bf22f59 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Tue, 11 Apr 2017 14:42:58 -0700 Subject: [PATCH 154/291] Register connection socket for read event. --- kafka/client.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index e07a3bba1..2a09009f1 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -298,6 +298,8 @@ def failed_payloads(payloads): continue request = encoder_fn(payloads=broker_payloads) + if request.expect_response(): + selector.register(conn._sock, selectors.EVENT_READ, conn) future = conn.send(request) if future.failed(): @@ -416,8 +418,7 @@ def failed_payloads(payloads): # decoder_fn=None signal that the server is expected to not # send a response. This probably only applies to # ProduceRequest w/ acks = 0 - expect_response = (decoder_fn is not None) - future = conn.send(request, expect_response=expect_response) + future = conn.send(request) while not future.is_done: conn.recv() @@ -425,7 +426,7 @@ def failed_payloads(payloads): if future.failed(): failed_payloads(payloads) - elif not expect_response: + elif not request.expect_response(): failed_payloads(payloads) else: From fd2dcdc0ae0fba9034e417ea01c2762f7207f70d Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Mon, 17 Apr 2017 16:08:40 -0700 Subject: [PATCH 155/291] Change metric name --- docs/changelog.rst | 3 +++ kafka/client.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 8a0c37647..b0a11248b 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,8 @@ Changelog ========= +1.3.3.post1 (Apr 17, 2017) +########################## +Resolve merge conflicts while merging changes for version 1.3.3 1.3.3 (Mar 14, 2017) #################### diff --git a/kafka/client.py b/kafka/client.py index 2a09009f1..e34601583 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -735,9 +735,9 @@ def send_offset_request(self, payloads=[], fail_on_error=True, return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] - @time_metric('offset_commit') + @time_metric('list_offset') def send_list_offset_request(self, payloads=[], fail_on_error=True, - callback=None): + callback=None): resps = self._send_broker_aware_request( payloads, KafkaProtocol.encode_list_offset_request, From 9311d3c75f0fe509278b8b3d31be13bb24378849 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Mon, 17 Apr 2017 18:18:03 -0700 Subject: [PATCH 156/291] Disable pylint error E1129: contextmanager not implementing __enter__ and __exit__ functions --- pylint.rc | 1 + 1 file changed, 1 insertion(+) diff --git a/pylint.rc b/pylint.rc index 7f265074a..fd84f7838 100644 --- a/pylint.rc +++ b/pylint.rc @@ -1,2 +1,3 @@ [TYPECHECK] ignored-classes=SyncManager,_socketobject +disable=E1129 From c34f1bb798574fded90c473412086753e9f23946 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Fri, 21 Apr 2017 10:57:15 -0700 Subject: [PATCH 157/291] Change to offset_list to keep consistent with other metric names --- kafka/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/client.py b/kafka/client.py index e34601583..75b103bce 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -735,7 +735,7 @@ def send_offset_request(self, payloads=[], fail_on_error=True, return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] - @time_metric('list_offset') + @time_metric('offset_list') def send_list_offset_request(self, payloads=[], fail_on_error=True, callback=None): resps = self._send_broker_aware_request( From 38782cb9965396ee134d9f5e7d20775abc3e19ab Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Mon, 24 Apr 2017 13:48:02 -0700 Subject: [PATCH 158/291] Fix locale-gen not found --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6d777a999..05521eace 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,4 @@ FROM ubuntu:xenial -RUN /usr/sbin/locale-gen en_US.UTF-8 -ENV LANG en_US.UTF-8 ENV DEBIAN_FRONTEND=noninteractive RUN echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu precise main" >> /etc/apt/sources.list @@ -21,6 +19,8 @@ RUN apt-get update && apt-get install -y python2.7-dev \ python-pip \ python-tox +RUN /usr/sbin/locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 ENV JAVA_HOME="/usr/lib/jvm/java-8-oracle" ENV PATH="$PATH:$JAVA_HOME/bin" From b701903b3e8a8b53a41cb9279c9451c10e13a0ae Mon Sep 17 00:00:00 2001 From: Vipul Singh Date: Mon, 24 Apr 2017 15:48:11 -0700 Subject: [PATCH 159/291] bump version to 1.3.3.post2 and update changelog --- docs/changelog.rst | 5 +++++ kafka/version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index b0a11248b..f3c84a98c 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,10 @@ Changelog ========= + +1.3.3.post2 (Apr 24, 2017) +########################## +Fix locale-gen not found + 1.3.3.post1 (Apr 17, 2017) ########################## Resolve merge conflicts while merging changes for version 1.3.3 diff --git a/kafka/version.py b/kafka/version.py index 1b124b19c..cd490eb7e 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.3.post1' +__version__ = '1.3.3.post2' From 20cc5cddf1186398a2970754ac14f054497404aa Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sun, 13 Aug 2017 11:47:33 -0700 Subject: [PATCH 160/291] Initialize metadata_snapshot in group coordinator (#1174) --- kafka/coordinator/consumer.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py index 009fb592f..bee407b62 100644 --- a/kafka/coordinator/consumer.py +++ b/kafka/coordinator/consumer.py @@ -87,7 +87,7 @@ def __init__(self, client, subscription, metrics, **configs): assert self.config['assignors'], 'Coordinator requires assignors' self._subscription = subscription - self._metadata_snapshot = {} + self._metadata_snapshot = self._build_metadata_snapshot(subscription, client.cluster) self._assignment_snapshot = None self._cluster = client.cluster self._cluster.request_update() @@ -161,15 +161,18 @@ def _handle_metadata_update(self, cluster): for partition in self._metadata_snapshot[topic] ]) - def _subscription_metadata_changed(self, cluster): - if not self._subscription.partitions_auto_assigned(): - return False - + def _build_metadata_snapshot(self, subscription, cluster): metadata_snapshot = {} - for topic in self._subscription.group_subscription(): + for topic in subscription.group_subscription(): partitions = cluster.partitions_for_topic(topic) or [] metadata_snapshot[topic] = set(partitions) + return metadata_snapshot + + def _subscription_metadata_changed(self, cluster): + if not self._subscription.partitions_auto_assigned(): + return False + metadata_snapshot = self._build_metadata_snapshot(self._subscription, cluster) if self._metadata_snapshot != metadata_snapshot: self._metadata_snapshot = metadata_snapshot return True From 1da597bb7f731b27ebf991f5a01079df6fd58503 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Mon, 21 Aug 2017 16:46:43 -0700 Subject: [PATCH 161/291] Pin coveralls version to for pypy dependency failure --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 745d57984..5b6abb890 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,7 @@ def run(cls): description="Pure Python client for Apache Kafka", long_description=README, keywords="apache kafka", + install_requires=["coveralls<=1.1"], classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", From 7703b1a48d1a892388ae98e293cc377e3c22b2d9 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Mon, 18 Sep 2017 15:32:26 -0700 Subject: [PATCH 162/291] Fix seek logic used with poll method --- kafka/consumer/fetcher.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index ad703a594..39113cb5f 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -356,25 +356,28 @@ def _append(self, drained, part, max_records): " %s since it is no longer fetchable", tp) elif fetch_offset == position: - part_records = part.take(max_records) - if not part_records: - return 0 - next_offset = part_records[-1].offset + 1 + records_found = 0 + while records_found < max_records: + part_records = part.take(max_records - records_found) + if not part_records: + break + next_offset = part_records[-1].offset + 1 - log.log(0, "Returning fetched records at offset %d for assigned" - " partition %s and update position to %s", position, - tp, next_offset) + log.log(0, "Returning fetched records at offset %d for assigned" + " partition %s and update position to %s", position, + tp, next_offset) - for record in part_records: - # Fetched compressed messages may include additional records - if record.offset < fetch_offset: - log.debug("Skipping message offset: %s (expecting %s)", - record.offset, fetch_offset) - continue - drained[tp].append(record) + for record in part_records: + # Fetched compressed messages may include additional records + if record.offset < fetch_offset: + log.debug("Skipping message offset: %s (expecting %s)", + record.offset, fetch_offset) + continue + drained[tp].append(record) + records_found += 1 - self._subscriptions.assignment[tp].position = next_offset - return len(part_records) + self._subscriptions.assignment[tp].position = next_offset + return records_found else: # these records aren't next in line based on the last consumed From b36c72f4bcde1ce172bce49d7218f13eda3c8d8d Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Tue, 19 Sep 2017 15:20:43 -0700 Subject: [PATCH 163/291] Bump version to 1.3.3.post3 --- docs/changelog.rst | 5 +++++ kafka/version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index f3c84a98c..cd027868b 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,11 @@ Changelog ========= +1.3.3.post3 (Sept 19, 2017) +########################## +* Initialize metadata_snapshot correctly to avoid group rejoin after fixed metadata update +* Fix seek logic used with poll method + 1.3.3.post2 (Apr 24, 2017) ########################## Fix locale-gen not found diff --git a/kafka/version.py b/kafka/version.py index cd490eb7e..45e99f0d8 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.3.post2' +__version__ = '1.3.3.post3' From d5799a403f2f8d0ea7575827dc079deef10256da Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Wed, 20 Sep 2017 11:49:56 -0700 Subject: [PATCH 164/291] Remove coveralls dependency --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 5b6abb890..745d57984 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,6 @@ def run(cls): description="Pure Python client for Apache Kafka", long_description=README, keywords="apache kafka", - install_requires=["coveralls<=1.1"], classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", From 1c275d734ed8c4a6fb34512873e02f3433cc1d83 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Wed, 20 Sep 2017 12:50:42 -0700 Subject: [PATCH 165/291] Bump version --- docs/changelog.rst | 5 +++++ kafka/version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index cd027868b..ea86a3402 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,11 @@ Changelog ========= +1.3.3.post4 (Sept 20, 2017) +########################## +* Remove coveralls dependency + + 1.3.3.post3 (Sept 19, 2017) ########################## * Initialize metadata_snapshot correctly to avoid group rejoin after fixed metadata update diff --git a/kafka/version.py b/kafka/version.py index 45e99f0d8..e8b61eb01 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.3.post3' +__version__ = '1.3.3.post4' From 9ec83ad8bac37cbc3b7ef32113b039d4b8cf7407 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Mon, 25 Sep 2017 19:27:31 -0700 Subject: [PATCH 166/291] Send client poll request on every consumer poll request --- kafka/consumer/group.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index ca41385f4..e39fa22ed 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -597,6 +597,7 @@ def _poll_once(self, timeout_ms, max_records): # fetched records. if not partial: self._fetcher.send_fetches() + self._client.poll(timeout_ms=timeout_ms) return records # Send any new fetches (won't resend pending fetches) From 94ded2222835ba7c3fdb7879a01a6b97ec34dff0 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Fri, 29 Sep 2017 13:22:46 -0700 Subject: [PATCH 167/291] Adjust timeout for polling --- kafka/consumer/group.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index e39fa22ed..32bbb0e0a 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -597,7 +597,9 @@ def _poll_once(self, timeout_ms, max_records): # fetched records. if not partial: self._fetcher.send_fetches() - self._client.poll(timeout_ms=timeout_ms) + + # To handle any heartbeat responses + self._client.poll(timeout_ms=1) return records # Send any new fetches (won't resend pending fetches) From 5c940111d3a863d77e58b5c8f2421a1f9a7c5fe1 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Fri, 29 Sep 2017 17:32:47 -0700 Subject: [PATCH 168/291] Change timeout to 0 --- kafka/consumer/group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 32bbb0e0a..7d6bb11a5 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -599,7 +599,7 @@ def _poll_once(self, timeout_ms, max_records): self._fetcher.send_fetches() # To handle any heartbeat responses - self._client.poll(timeout_ms=1) + self._client.poll(timeout_ms=0) return records # Send any new fetches (won't resend pending fetches) From 8c6ec6face5eca80410bc88ff0ba0246c0307dd8 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Mon, 2 Oct 2017 17:29:48 -0700 Subject: [PATCH 169/291] Bump version to 1.3.3.post5 --- docs/changelog.rst | 4 ++++ kafka/version.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index ea86a3402..2c0d09fac 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,10 @@ Changelog ========= +1.3.3.post5 (Oct 2, 2017) +########################## +* Fix for rebalance not getting triggered (poll) + 1.3.3.post4 (Sept 20, 2017) ########################## * Remove coveralls dependency diff --git a/kafka/version.py b/kafka/version.py index e8b61eb01..3d727e774 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.3.post4' +__version__ = '1.3.3.post5' From c2b2a92d1eda425f7248058d95542045297517d7 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Thu, 5 Oct 2017 11:47:58 -0700 Subject: [PATCH 170/291] Revert client poll fix inside consumer poll --- kafka/consumer/group.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 7d6bb11a5..62e26f3e3 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -598,8 +598,6 @@ def _poll_once(self, timeout_ms, max_records): if not partial: self._fetcher.send_fetches() - # To handle any heartbeat responses - self._client.poll(timeout_ms=0) return records # Send any new fetches (won't resend pending fetches) From 1289fcae8874380970edb7aa550dc52bf2996e43 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Thu, 5 Oct 2017 13:05:35 -0700 Subject: [PATCH 171/291] Revert fix for poll --- docs/changelog.rst | 4 ++++ kafka/version.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 2c0d09fac..0a2b03058 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,10 @@ Changelog ========= +1.3.3.post6 (Oct 5, 2017) +########################## +* Revert fix for rebalance not getting triggered (poll) + 1.3.3.post5 (Oct 2, 2017) ########################## * Fix for rebalance not getting triggered (poll) diff --git a/kafka/version.py b/kafka/version.py index 3d727e774..7fb49369e 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.3.post5' +__version__ = '1.3.3.post6' From 49e5db8b838148ee0e5e5ac90f9b87dbc66e709c Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Wed, 3 Jan 2018 15:21:26 -0800 Subject: [PATCH 172/291] add admin_client api --- kafka/protocol/admin.py | 2 +- run_itest.sh | 6 ++++++ test/fixtures.py | 16 +--------------- test/testutil.py | 2 ++ tox.ini | 1 + 5 files changed, 11 insertions(+), 16 deletions(-) diff --git a/kafka/protocol/admin.py b/kafka/protocol/admin.py index c5142b3ec..17fa14a51 100644 --- a/kafka/protocol/admin.py +++ b/kafka/protocol/admin.py @@ -88,7 +88,7 @@ class CreateTopicsRequest_v1(Request): CreateTopicsRequest = [CreateTopicsRequest_v0, CreateTopicsRequest_v1] -CreateTopicsResponse = [CreateTopicsResponse_v0, CreateTopicsRequest_v1] +CreateTopicsResponse = [CreateTopicsResponse_v0, CreateTopicsResponse_v1] class DeleteTopicsResponse_v0(Response): diff --git a/run_itest.sh b/run_itest.sh index a55802b52..fcc78613f 100644 --- a/run_itest.sh +++ b/run_itest.sh @@ -12,3 +12,9 @@ export KAFKA_VERSION='0.10.0.0' tox -e py27 tox -e py35 tox -e pypy + +export KAFKA_VERSION='0.10.1.1' +./build_integration.sh +tox -e py27 +tox -e py35 +tox -e pypy diff --git a/test/fixtures.py b/test/fixtures.py index bfb01b8e5..839bae6d9 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -191,22 +191,8 @@ def instance(cls, broker_id, zk_host, zk_port, zk_chroot=None, (host, port) = (parse.hostname, parse.port) fixture = ExternalService(host, port) else: - # force IPv6 here because of a confusing point: - # - # - if the string "localhost" is passed, Kafka will *only* bind to the IPv4 address of localhost - # (127.0.0.1); however, kafka-python will attempt to connect on ::1 and fail - # - # - if the address literal 127.0.0.1 is passed, the metadata request during bootstrap will return - # the name "localhost" and we'll go back to the first case. This is odd! - # - # Ideally, Kafka would bind to all loopback addresses when we tell it to listen on "localhost" the - # way it makes an IPv6 socket bound to both 0.0.0.0/0 and ::/0 when we tell it to bind to "" (that is - # to say, when we make a listener of PLAINTEXT://:port. - # - # Note that even though we specify the bind host in bracket notation, Kafka responds to the bootstrap - # metadata request without square brackets later. if host is None: - host = "[::1]" + host = "localhost" fixture = KafkaFixture(host, port, broker_id, zk_host, zk_port, zk_chroot, transport=transport, diff --git a/test/testutil.py b/test/testutil.py index c247e6ad7..3a9773a8e 100644 --- a/test/testutil.py +++ b/test/testutil.py @@ -12,6 +12,7 @@ from . import unittest from kafka import SimpleClient +from kafka.client_async import KafkaClient from kafka.structs import OffsetRequestPayload __all__ = [ @@ -97,6 +98,7 @@ def setUp(self): if self.create_client: self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port)) + self.client_async = KafkaClient(bootstrap_servers='%s:%d' % (self.server.host, self.server.port)) self.client.ensure_topic_exists(self.topic) diff --git a/tox.ini b/tox.ini index 03a6893ad..e6ee86ae2 100644 --- a/tox.ini +++ b/tox.ini @@ -9,6 +9,7 @@ log_format = %(created)f %(filename)-23s %(threadName)s %(message)s [testenv] deps = + pylint==1.7.1 pytest pytest-cov pytest-catchlog From 607560bbf3ff5950f6fda9186d8b7bd081b055f9 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Wed, 3 Jan 2018 15:22:52 -0800 Subject: [PATCH 173/291] add admin_client api --- kafka/admin_client.py | 136 ++++++++++++++++++++++++++ test/test_admin_client.py | 78 +++++++++++++++ test/test_admin_client_integration.py | 46 +++++++++ 3 files changed, 260 insertions(+) create mode 100644 kafka/admin_client.py create mode 100644 test/test_admin_client.py create mode 100644 test/test_admin_client_integration.py diff --git a/kafka/admin_client.py b/kafka/admin_client.py new file mode 100644 index 000000000..69f3d3903 --- /dev/null +++ b/kafka/admin_client.py @@ -0,0 +1,136 @@ +import time +from .errors import NodeNotReadyError +from .protocol.admin import CreateTopicsRequest, DeleteTopicsRequest +from .protocol.metadata import MetadataRequest + +def convert_new_topic_request_format(new_topic): + return ( + new_topic.name, + new_topic.num_partitions, + new_topic.replication_factor, + [ + (partition_id,replicas) + for partition_id, replicas in new_topic.replica_assignments.items() + ], + [ + (config_key, config_value) + for config_key, config_value in new_topic.configs.items() + ], + ) + +class NewTopic(object): + """ A class for new topic creation + + Arguments: + name (string): name of the topic + num_partitions (int): number of partitions + or -1 if replica_assignment has been specified + replication_factor (int): replication factor or -1 if + replica assignment is specified + replica_assignment (dict of int: [int]): A mapping containing + partition id and replicas to assign to it. + topic_configs (dict of str: str): A mapping of config key + and value for the topic. + """ + + def __init__( + self, + name, + num_partitions, + replication_factor, + replica_assignments=None, + configs=None, + ): + self.name = name + self.configs = configs or {} + self.num_partitions = num_partitions + self.replication_factor = replication_factor + self.replica_assignments = replica_assignments or {} + + def __str__(self): + return ":{}, :{}, :{}" \ + ":{}, :{}".format( + self.name, + self.num_partitions, + self.replication_factor, + self.replica_assignments, + self.configs, + ) + +class AdminClient(object): + """ + An api to send CreateTopic requests + + """ + def __init__(self, client): + self.client = client + self.metadata_request = MetadataRequest[1]([]) + self.topic_request = CreateTopicsRequest[0] + self.delete_topics_request = DeleteTopicsRequest[0] + + def _send_controller_request(self): + response = self._send( + self.client.least_loaded_node(), + self.metadata_request, + ) + return response[0].controller_id + + def _send(self, node, request): + future = self.client.send(node, request) + return self.client.poll(future=future) + + def _send_request(self, request): + controller_id = self._send_controller_request() + if not self.client.ready(controller_id): + raise NodeNotReadyError(controller_id) + else: + return self._send(controller_id, request) + + + def create_topics( + self, + topics, + timeout, + ): + """ Create topics on the cluster + + Arguments: + new_topics (list of NewTopic): A list containing new + topics to be created + validate_only (bool): True if we just want to validate the request + timeout (int): timeout in seconds + max_retry (int): num of times we want to retry to send a create + topic request when the controller in not available + + Returns: + CreateTopicResponse: response from the broker + + Raises: + NodeNotReadyError: if controller is not ready + """ + request = self.topic_request( + create_topic_requests=[ + convert_new_topic_request_format(topic) + for topic in topics + ], + timeout=timeout, + ) + return self._send_request(request) + + def delete_topics(self, topics, timeout): + """ Deletes topics on the cluster + + Arguments: + topics (list of topic names): Topics to delete + timeout (int): The requested timeout for this operation + Raises: + NodeNotReadyError: if retry exceeds max_retry + """ + + request = self.delete_topics_request( + topics=topics, + timeout=timeout, + ) + return self._send_request(request) + + diff --git a/test/test_admin_client.py b/test/test_admin_client.py new file mode 100644 index 000000000..4f5d89544 --- /dev/null +++ b/test/test_admin_client.py @@ -0,0 +1,78 @@ +import mock +import pytest +from kafka.client_async import KafkaClient +from kafka.errors import BrokerNotAvailableError +from kafka.protocol.metadata import MetadataResponse +from kafka.protocol.admin import CreateTopicsResponse +from kafka.admin_client import AdminClient +from kafka.admin_client import NewTopic +from kafka.structs import BrokerMetadata +from kafka.future import Future + +@pytest.fixture +def bootstrap_brokers(): + return 'fake-broker:9092' + +@pytest.fixture +def controller_id(): + return 100 + +@pytest.fixture +def mock_least_loaded_node(): + return 2 + +@pytest.fixture +def metadata_response(controller_id): + return [MetadataResponse[1]( + [(1,'host',80,'rack')], controller_id, + [(37,'topic',False,[(7,1,2,[1,2,3],[1,2,3])])] + )] + +@pytest.fixture +def mock_new_topics(): + return [NewTopic('topic',1,1)] + +@pytest.fixture +def topic_response(): + return CreateTopicsResponse[1]([( + 'topic',7,'timeout_exception' + )]) + + +class TestTopicAdmin(): + + def test_send_controller_request( + self, + mock_least_loaded_node, + controller_id, + bootstrap_brokers, + metadata_response + ): + mock_kafka_client = mock.Mock() + mock_kafka_client.poll.return_value = metadata_response + mock_kafka_client.least_loaded_node.return_value = \ + mock_least_loaded_node + mock_kafka_client.send.return_value = Future() + mock_kafka_client.connected.return_value = True + admin = AdminClient(mock_kafka_client) + assert admin._send_controller_request() == controller_id + + def test_create_topics( + self, + mock_new_topics, + mock_least_loaded_node, + bootstrap_brokers, + topic_response, + metadata_response, + ): + mock_kafka_client = mock.Mock() + mock_kafka_client.poll = \ + mock.Mock(side_effect=[metadata_response, topic_response]) + mock_kafka_client.ready.return_value = True + mock_kafka_client.least_loaded_node.return_value = \ + mock_least_loaded_node + mock_kafka_client.send.return_value = Future() + admin = AdminClient(mock_kafka_client) + response = admin.create_topics(mock_new_topics, False) + assert response == topic_response + diff --git a/test/test_admin_client_integration.py b/test/test_admin_client_integration.py new file mode 100644 index 000000000..3a1a748a5 --- /dev/null +++ b/test/test_admin_client_integration.py @@ -0,0 +1,46 @@ +import os +import time +import unittest +from kafka.admin_client import AdminClient, NewTopic +from kafka.protocol.metadata import MetadataRequest +from test.fixtures import ZookeeperFixture, KafkaFixture +from test.testutil import KafkaIntegrationTestCase, kafka_versions + +class TestKafkaAdminClientIntegration(KafkaIntegrationTestCase): + + @classmethod + def setUpClass(cls): + if not os.environ.get('KAFKA_VERSION'): + return + + cls.zk = ZookeeperFixture.instance() + cls.server = KafkaFixture.instance(0, cls.zk.host, cls.zk.port) + + @classmethod + def tearDownClass(cls): + if not os.environ.get('KAFKA_VERSION'): + return + + cls.server.close() + cls.zk.close() + + @kafka_versions('>=0.10.1') + def test_create_delete_topics(self): + admin = AdminClient(self.client_async) + topic = NewTopic( + name='topic', + num_partitions=1, + replication_factor=1, + ) + metadata_request = MetadataRequest[1]() + response = admin.create_topics(topics=[topic], timeout=1) + self.assertTrue( + response[0].topic_error_codes[0][1] == 0 or + response[0].topic_error_codes[0][1] == 7 + ) + time.sleep(1) # allows the topic to be created + delete_response = admin.delete_topics(['topic'], timeout=1) + self.assertTrue( + response[0].topic_error_codes[0][1] == 0 or + response[0].topic_error_codes[0][1] == 7 + ) From 8667c3672e10e09177b295a62446bdf00aa51553 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Wed, 3 Jan 2018 16:14:04 -0800 Subject: [PATCH 174/291] remove pylint version pin --- tox.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/tox.ini b/tox.ini index e6ee86ae2..03a6893ad 100644 --- a/tox.ini +++ b/tox.ini @@ -9,7 +9,6 @@ log_format = %(created)f %(filename)-23s %(threadName)s %(message)s [testenv] deps = - pylint==1.7.1 pytest pytest-cov pytest-catchlog From b4aab4e9d9363fb44666099ca8460c04ff9a38b3 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Wed, 3 Jan 2018 16:30:51 -0800 Subject: [PATCH 175/291] turns theres an issue with pylint even on travis pinning pylint again --- tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/tox.ini b/tox.ini index 03a6893ad..e6ee86ae2 100644 --- a/tox.ini +++ b/tox.ini @@ -9,6 +9,7 @@ log_format = %(created)f %(filename)-23s %(threadName)s %(message)s [testenv] deps = + pylint==1.7.1 pytest pytest-cov pytest-catchlog From aea7a86e15079d70c503422bed75abe745e10c1d Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Wed, 3 Jan 2018 17:07:06 -0800 Subject: [PATCH 176/291] lets see if this passes travis --- tox.ini | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tox.ini b/tox.ini index e6ee86ae2..d07670207 100644 --- a/tox.ini +++ b/tox.ini @@ -9,16 +9,16 @@ log_format = %(created)f %(filename)-23s %(threadName)s %(message)s [testenv] deps = - pylint==1.7.1 pytest pytest-cov pytest-catchlog - py{27,34,35,py}: pytest-pylint + py{27,34,35,36,py}: pylint==1.8.0 + py{27,34,35,36,py}: pytest-pylint pytest-sugar pytest-mock mock python-snappy - lz4 + lz4==0.11.1 xxhash py26: unittest2 commands = @@ -31,11 +31,6 @@ passenv = KAFKA_VERSION # pylint doesn't support python2.6 commands = py.test {posargs:--cov=kafka --cov-config=.covrc} -[testenv:py36] -# pylint doesn't support python3.6 yet -# https://github.com/PyCQA/pylint/issues/1072 -commands = py.test {posargs:--cov=kafka --cov-config=.covrc} - [testenv:pypy] # pylint is super slow on pypy... commands = py.test {posargs:--cov=kafka --cov-config=.covrc} From bc935a80a349f78637d6b293dd3320d04ea64b04 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Thu, 4 Jan 2018 17:48:12 -0800 Subject: [PATCH 177/291] fix docstrings --- kafka/admin_client.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kafka/admin_client.py b/kafka/admin_client.py index 69f3d3903..67152f9c2 100644 --- a/kafka/admin_client.py +++ b/kafka/admin_client.py @@ -95,12 +95,9 @@ def create_topics( """ Create topics on the cluster Arguments: - new_topics (list of NewTopic): A list containing new + topics (list of NewTopic): A list containing new topics to be created - validate_only (bool): True if we just want to validate the request timeout (int): timeout in seconds - max_retry (int): num of times we want to retry to send a create - topic request when the controller in not available Returns: CreateTopicResponse: response from the broker @@ -132,5 +129,3 @@ def delete_topics(self, topics, timeout): timeout=timeout, ) return self._send_request(request) - - From 8642096864e83890f641bb060f3a809c83e5ae1f Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Wed, 10 Jan 2018 16:42:47 -0800 Subject: [PATCH 178/291] add delete_topics unit test --- test/test_admin_client.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/test/test_admin_client.py b/test/test_admin_client.py index 4f5d89544..2b4776246 100644 --- a/test/test_admin_client.py +++ b/test/test_admin_client.py @@ -3,7 +3,7 @@ from kafka.client_async import KafkaClient from kafka.errors import BrokerNotAvailableError from kafka.protocol.metadata import MetadataResponse -from kafka.protocol.admin import CreateTopicsResponse +from kafka.protocol.admin import CreateTopicsResponse, DeleteTopicsResponse from kafka.admin_client import AdminClient from kafka.admin_client import NewTopic from kafka.structs import BrokerMetadata @@ -38,6 +38,11 @@ def topic_response(): 'topic',7,'timeout_exception' )]) +@pytest.fixture +def delete_response(): + return DeleteTopicsResponse[0]([( + 'topic',7 + )]) class TestTopicAdmin(): @@ -73,6 +78,24 @@ def test_create_topics( mock_least_loaded_node mock_kafka_client.send.return_value = Future() admin = AdminClient(mock_kafka_client) - response = admin.create_topics(mock_new_topics, False) + response = admin.create_topics(mock_new_topics, 0) assert response == topic_response - + + def delete_topics( + self, + mock_new_topics, + mock_least_loaded_node, + bootstrap_brokers, + delete_response, + metadata_response, + ): + mock_kafka_client = mock.Mock() + mock_kafka_client.poll = \ + mock.Mock(side_effect=[metadata_response, delete_response]) + mock_kafka_client.ready.return_value = True + mock_kafka_client.least_loaded_node.return_value = \ + mock_least_loaded_node + mock_kafka_client.send.return_value = Future() + admin = AdminClient(mock_kafka_client) + response = admin.delete_topics(mock_new_topics, 0) + assert response == delete_response From 8db92934697c681d2b9aa9e00a2b1c4181f05ac8 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Wed, 10 Jan 2018 17:46:54 -0800 Subject: [PATCH 179/291] add test_ to delete_topic and add comment in integration test --- test/test_admin_client.py | 2 +- test/test_admin_client_integration.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_admin_client.py b/test/test_admin_client.py index 2b4776246..ccbd16dd4 100644 --- a/test/test_admin_client.py +++ b/test/test_admin_client.py @@ -81,7 +81,7 @@ def test_create_topics( response = admin.create_topics(mock_new_topics, 0) assert response == topic_response - def delete_topics( + def test_delete_topics( self, mock_new_topics, mock_least_loaded_node, diff --git a/test/test_admin_client_integration.py b/test/test_admin_client_integration.py index 3a1a748a5..5b1c73589 100644 --- a/test/test_admin_client_integration.py +++ b/test/test_admin_client_integration.py @@ -34,6 +34,10 @@ def test_create_delete_topics(self): ) metadata_request = MetadataRequest[1]() response = admin.create_topics(topics=[topic], timeout=1) + # Error code 7 means that RequestTimedOut but we can safely assume + # that topic is created or will be created eventually. + # see this https://cwiki.apache.org/confluence/display/KAFKA/ + # KIP-4+-+Command+line+and+centralized+administrative+operations self.assertTrue( response[0].topic_error_codes[0][1] == 0 or response[0].topic_error_codes[0][1] == 7 From 0deaff41941d4fd5908a52cec2e139c420ff996a Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Thu, 11 Jan 2018 11:54:12 -0800 Subject: [PATCH 180/291] fix docstring --- kafka/admin_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/admin_client.py b/kafka/admin_client.py index 67152f9c2..30147bc1e 100644 --- a/kafka/admin_client.py +++ b/kafka/admin_client.py @@ -121,7 +121,7 @@ def delete_topics(self, topics, timeout): topics (list of topic names): Topics to delete timeout (int): The requested timeout for this operation Raises: - NodeNotReadyError: if retry exceeds max_retry + NodeNotReadyError: if controller is not ready """ request = self.delete_topics_request( From 993ed3483ad8657069456401ca8b84f7d5fb9468 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Mon, 15 Jan 2018 00:22:48 -0800 Subject: [PATCH 181/291] test if the fix works --- test/fixtures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/fixtures.py b/test/fixtures.py index 839bae6d9..435c36c67 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -192,7 +192,7 @@ def instance(cls, broker_id, zk_host, zk_port, zk_chroot=None, fixture = ExternalService(host, port) else: if host is None: - host = "localhost" + host = "::1" fixture = KafkaFixture(host, port, broker_id, zk_host, zk_port, zk_chroot, transport=transport, From 7a7fb1fbc8d2eed39d3d8c2472ecf9438fafa0af Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Mon, 15 Jan 2018 02:19:06 -0800 Subject: [PATCH 182/291] fix failing build locally --- Makefile | 4 +++- test/fixtures.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 1e65744cb..1a53ae04b 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,9 @@ unit_test_docker: itest: docker build -t kafka_python_test . - docker run kafka_python_test /work/run_itest.sh +# travis build passes because ipv6 is disabled there +# it passes the integration test locally if we disable ipv6 here + docker run --sysctl net.ipv6.conf.all.disable_ipv6=1 kafka_python_test /work/run_itest.sh clean: rm -rf kafka-python.egg-info/ .tox/ diff --git a/test/fixtures.py b/test/fixtures.py index 435c36c67..839bae6d9 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -192,7 +192,7 @@ def instance(cls, broker_id, zk_host, zk_port, zk_chroot=None, fixture = ExternalService(host, port) else: if host is None: - host = "::1" + host = "localhost" fixture = KafkaFixture(host, port, broker_id, zk_host, zk_port, zk_chroot, transport=transport, From bcbe235ea001ad6cac9c616acbaf75c57d62207f Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Tue, 23 Jan 2018 17:17:34 -0800 Subject: [PATCH 183/291] bump version to 1.3.3.post7 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 7fb49369e..cb953153b 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.3.post6' +__version__ = '1.3.3.post7' From 832f7483f22ee315277c78e20b19c727e778f007 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Mon, 29 Jan 2018 11:41:02 -0800 Subject: [PATCH 184/291] add changelog --- docs/changelog.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 0a2b03058..0a62b65c6 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,10 @@ Changelog ========= +1.3.3.post7 (Jan 29, 2018) +########################## +* Add kafka admin client + 1.3.3.post6 (Oct 5, 2017) ########################## * Revert fix for rebalance not getting triggered (poll) From 26cfc0ade570d349964b3f60469b616b7b95d22f Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Tue, 6 Feb 2018 15:07:57 -0800 Subject: [PATCH 185/291] Release 1.4.0 --- CHANGES.md | 85 ++++++++++++++++++++++++++++++++++++ README.rst | 4 +- docs/changelog.rst | 97 ++++++++++++++++++++++++++++++++++++++++++ docs/compatibility.rst | 4 +- docs/index.rst | 4 +- kafka/version.py | 2 +- 6 files changed, 189 insertions(+), 7 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index e9d1e879b..2922023b4 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,88 @@ +# 1.4.0 (Feb 6, 2018) + +This is a substantial release. Although there are no known 'showstopper' bugs as of release, +we do recommend you test any planned upgrade to your application prior to running in production. + +Some of the major changes include: +* We have officially dropped python 2.6 support +* The KafkaConsumer now includes a background thread to handle coordinator heartbeats +* API protocol handling has been separated from networking code into a new class, KafkaProtocol +* Added support for kafka message format v2 +* Refactored DNS lookups during kafka broker connections +* SASL authentication is working (we think) +* Removed several circular references to improve gc on close() + +Thanks to all contributors -- the state of the kafka-python community is strong! + +Detailed changelog are listed below: + +Client +* Fixes for SASL support + * Refactor SASL/gssapi support (dpkp #1248 #1249 #1257 #1262 #1280) + * Add security layer negotiation to the GSSAPI authentication (asdaraujo #1283) + * Fix overriding sasl_kerberos_service_name in KafkaConsumer / KafkaProducer (natedogs911 #1264) + * Fix typo in _try_authenticate_plain (everpcpc #1333) + * Fix for Python 3 byte string handling in SASL auth (christophelec #1353) +* Move callback processing from BrokerConnection to KafkaClient (dpkp #1258) +* Use socket timeout of request_timeout_ms to prevent blocking forever on send (dpkp #1281) +* Refactor dns lookup in BrokerConnection (dpkp #1312) +* Read all available socket bytes (dpkp #1332) +* Honor reconnect_backoff in conn.connect() (dpkp #1342) + +Consumer +* KAFKA-3977: Defer fetch parsing for space efficiency, and to raise exceptions to user (dpkp #1245) +* KAFKA-4034: Avoid unnecessary consumer coordinator lookup (dpkp #1254) +* Handle lookup_coordinator send failures (dpkp #1279) +* KAFKA-3888 Use background thread to process consumer heartbeats (dpkp #1266) +* Improve KafkaConsumer cleanup (dpkp #1339) +* Fix coordinator join_future race condition (dpkp #1338) +* Avoid KeyError when filtering fetchable partitions (dpkp #1344) +* Name heartbeat thread with group_id; use backoff when polling (dpkp #1345) +* KAFKA-3949: Avoid race condition when subscription changes during rebalance (dpkp #1364) +* Fix #1239 regression to avoid consuming duplicate compressed messages from mid-batch (dpkp #1367) + +Producer +* Fix timestamp not passed to RecordMetadata (tvoinarovskyi #1273) +* Raise non-API exceptions (jeffwidman #1316) +* Fix reconnect_backoff_max_ms default config bug in KafkaProducer (YaoC #1352) + +Core / Protocol +* Add kafka.protocol.parser.KafkaProtocol w/ receive and send (dpkp #1230) +* Refactor MessageSet and Message into LegacyRecordBatch to later support v2 message format (tvoinarovskyi #1252) +* Add DefaultRecordBatch implementation aka V2 message format parser/builder. (tvoinarovskyi #1185) +* optimize util.crc32 (ofek #1304) +* Raise better struct pack/unpack errors (jeffwidman #1320) +* Add Request/Response structs for kafka broker 1.0.0 (dpkp #1368) + +Bugfixes +* use python standard max value (lukekingbru #1303) +* changed for to use enumerate() (TheAtomicOption #1301) +* Explicitly check for None rather than falsey (jeffwidman #1269) +* Minor Exception cleanup (jeffwidman #1317) +* Use non-deprecated exception handling (jeffwidman a699f6a) +* Remove assertion with side effect in client.wakeup() (bgedik #1348) +* use absolute imports everywhere (kevinkjt2000 #1362) + +Test Infrastructure +* Use 0.11.0.2 kafka broker for integration testing (dpkp #1357 #1244) +* Add a Makefile to help build the project, generate docs, and run tests (tvoinarovskyi #1247) +* Add fixture support for 1.0.0 broker (dpkp #1275) +* Add kafka 1.0.0 to travis integration tests (dpkp #1365) +* Change fixture default host to localhost (asdaraujo #1305) +* Minor test cleanups (dpkp #1343) +* Use latest pytest 3.4.0, but drop pytest-sugar due to incompatibility (dpkp #1361) + +Documentation +* Expand metrics docs (jeffwidman #1243) +* Fix docstring (jeffwidman #1261) +* Added controlled thread shutdown to example.py (TheAtomicOption #1268) +* Add license to wheel (jeffwidman #1286) +* Use correct casing for MB (jeffwidman #1298) + +Logging / Error Messages +* Fix two bugs in printing bytes instance (jeffwidman #1296) + + # 1.3.5 (Oct 7, 2017) Bugfixes diff --git a/README.rst b/README.rst index d4fc1a9ad..dcade4339 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ Kafka Python client ------------------------ -.. image:: https://img.shields.io/badge/kafka-0.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg +.. image:: https://img.shields.io/badge/kafka-1.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg :target: https://kafka-python.readthedocs.io/compatibility.html .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg :target: https://pypi.python.org/pypi/kafka-python @@ -141,7 +141,7 @@ for interacting with kafka brokers via the python repl. This is useful for testing, probing, and general experimentation. The protocol support is leveraged to enable a KafkaClient.check_version() method that probes a kafka broker and attempts to identify which version it is running -(0.8.0 to 0.11). +(0.8.0 to 1.0). Low-level ********* diff --git a/docs/changelog.rst b/docs/changelog.rst index dc5ca8523..51f5533a5 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,103 @@ Changelog ========= +1.4.0 (Feb 6, 2018) +################### + +This is a substantial release. Although there are no known 'showstopper' bugs as of release, +we do recommend you test any planned upgrade to your application prior to running in production. + +Some of the major changes include: + +* We have officially dropped python 2.6 support +* The KafkaConsumer now includes a background thread to handle coordinator heartbeats +* API protocol handling has been separated from networking code into a new class, KafkaProtocol +* Added support for kafka message format v2 +* Refactored DNS lookups during kafka broker connections +* SASL authentication is working (we think) +* Removed several circular references to improve gc on close() + +Thanks to all contributors -- the state of the kafka-python community is strong! + +Detailed changelog are listed below: + +Client +------ +* Fixes for SASL support + + * Refactor SASL/gssapi support (dpkp #1248 #1249 #1257 #1262 #1280) + * Add security layer negotiation to the GSSAPI authentication (asdaraujo #1283) + * Fix overriding sasl_kerberos_service_name in KafkaConsumer / KafkaProducer (natedogs911 #1264) + * Fix typo in _try_authenticate_plain (everpcpc #1333) + * Fix for Python 3 byte string handling in SASL auth (christophelec #1353) + +* Move callback processing from BrokerConnection to KafkaClient (dpkp #1258) +* Use socket timeout of request_timeout_ms to prevent blocking forever on send (dpkp #1281) +* Refactor dns lookup in BrokerConnection (dpkp #1312) +* Read all available socket bytes (dpkp #1332) +* Honor reconnect_backoff in conn.connect() (dpkp #1342) + +Consumer +-------- +* KAFKA-3977: Defer fetch parsing for space efficiency, and to raise exceptions to user (dpkp #1245) +* KAFKA-4034: Avoid unnecessary consumer coordinator lookup (dpkp #1254) +* Handle lookup_coordinator send failures (dpkp #1279) +* KAFKA-3888 Use background thread to process consumer heartbeats (dpkp #1266) +* Improve KafkaConsumer cleanup (dpkp #1339) +* Fix coordinator join_future race condition (dpkp #1338) +* Avoid KeyError when filtering fetchable partitions (dpkp #1344) +* Name heartbeat thread with group_id; use backoff when polling (dpkp #1345) +* KAFKA-3949: Avoid race condition when subscription changes during rebalance (dpkp #1364) +* Fix #1239 regression to avoid consuming duplicate compressed messages from mid-batch (dpkp #1367) + +Producer +-------- +* Fix timestamp not passed to RecordMetadata (tvoinarovskyi #1273) +* Raise non-API exceptions (jeffwidman #1316) +* Fix reconnect_backoff_max_ms default config bug in KafkaProducer (YaoC #1352) + +Core / Protocol +--------------- +* Add kafka.protocol.parser.KafkaProtocol w/ receive and send (dpkp #1230) +* Refactor MessageSet and Message into LegacyRecordBatch to later support v2 message format (tvoinarovskyi #1252) +* Add DefaultRecordBatch implementation aka V2 message format parser/builder. (tvoinarovskyi #1185) +* optimize util.crc32 (ofek #1304) +* Raise better struct pack/unpack errors (jeffwidman #1320) +* Add Request/Response structs for kafka broker 1.0.0 (dpkp #1368) + +Bugfixes +-------- +* use python standard max value (lukekingbru #1303) +* changed for to use enumerate() (TheAtomicOption #1301) +* Explicitly check for None rather than falsey (jeffwidman #1269) +* Minor Exception cleanup (jeffwidman #1317) +* Use non-deprecated exception handling (jeffwidman a699f6a) +* Remove assertion with side effect in client.wakeup() (bgedik #1348) +* use absolute imports everywhere (kevinkjt2000 #1362) + +Test Infrastructure +------------------- +* Use 0.11.0.2 kafka broker for integration testing (dpkp #1357 #1244) +* Add a Makefile to help build the project, generate docs, and run tests (tvoinarovskyi #1247) +* Add fixture support for 1.0.0 broker (dpkp #1275) +* Add kafka 1.0.0 to travis integration tests (dpkp #1365) +* Change fixture default host to localhost (asdaraujo #1305) +* Minor test cleanups (dpkp #1343) +* Use latest pytest 3.4.0, but drop pytest-sugar due to incompatibility (dpkp #1361) + +Documentation +------------- +* Expand metrics docs (jeffwidman #1243) +* Fix docstring (jeffwidman #1261) +* Added controlled thread shutdown to example.py (TheAtomicOption #1268) +* Add license to wheel (jeffwidman #1286) +* Use correct casing for MB (jeffwidman #1298) + +Logging / Error Messages +------------------------ +* Fix two bugs in printing bytes instance (jeffwidman #1296) + + 1.3.5 (Oct 7, 2017) #################### diff --git a/docs/compatibility.rst b/docs/compatibility.rst index a832ae631..1771d8f76 100644 --- a/docs/compatibility.rst +++ b/docs/compatibility.rst @@ -1,12 +1,12 @@ Compatibility ------------- -.. image:: https://img.shields.io/badge/kafka-0.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg +.. image:: https://img.shields.io/badge/kafka-1.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg :target: https://kafka-python.readthedocs.io/compatibility.html .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg :target: https://pypi.python.org/pypi/kafka-python -kafka-python is compatible with (and tested against) broker versions 0.11 +kafka-python is compatible with (and tested against) broker versions 1.0 through 0.8.0 . kafka-python is not compatible with the 0.8.2-beta release. kafka-python is tested on python 2.7, 3.4, 3.5, 3.6 and pypy. diff --git a/docs/index.rst b/docs/index.rst index f84992a77..51c39f59d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,7 @@ kafka-python ############ -.. image:: https://img.shields.io/badge/kafka-0.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg +.. image:: https://img.shields.io/badge/kafka-1.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg :target: https://kafka-python.readthedocs.io/compatibility.html .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg :target: https://pypi.python.org/pypi/kafka-python @@ -136,7 +136,7 @@ for interacting with kafka brokers via the python repl. This is useful for testing, probing, and general experimentation. The protocol support is leveraged to enable a :meth:`~kafka.KafkaClient.check_version()` method that probes a kafka broker and -attempts to identify which version it is running (0.8.0 to 0.11). +attempts to identify which version it is running (0.8.0 to 1.0). Low-level diff --git a/kafka/version.py b/kafka/version.py index 7ca88b023..96e3ce8d9 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.3.6.dev' +__version__ = '1.4.0' From ee23efd236eccdf42a0f7b147997e11dde2f13e5 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Tue, 13 Feb 2018 17:32:44 -0800 Subject: [PATCH 186/291] fix failing tests --- .pytest_cache/v/cache/lastfailed | 1 + kafka/client_async.py | 9 ++++++ kafka/consumer/fetcher.py | 29 +++++++++-------- kafka/protocol/legacy.py | 22 +++++++++++++ test/test_consumer.py | 22 +------------ test/test_consumer_integration.py | 52 ------------------------------- 6 files changed, 47 insertions(+), 88 deletions(-) create mode 100644 .pytest_cache/v/cache/lastfailed diff --git a/.pytest_cache/v/cache/lastfailed b/.pytest_cache/v/cache/lastfailed new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/.pytest_cache/v/cache/lastfailed @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/kafka/client_async.py b/kafka/client_async.py index f980621f1..4962d9f1d 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -69,6 +69,14 @@ class KafkaClient(object): reconnect_backoff_ms (int): The amount of time in milliseconds to wait before attempting to reconnect to a given host. Default: 50. + reconnect_backoff_max_ms (int): The maximum amount of time in + milliseconds to wait when reconnecting to a broker that has + repeatedly failed to connect. If provided, the backoff per host + will increase exponentially for each consecutive connection + failure, up to this maximum. To avoid connection storms, a + randomization factor of 0.2 will be applied to the backoff + resulting in a random range between 20% below and 20% above + the computed value. Default: 1000. request_timeout_ms (int): Client request timeout in milliseconds. Default: 40000. retry_backoff_ms (int): Milliseconds to backoff when retrying on @@ -515,6 +523,7 @@ def send(self, node_id, request): if not self._maybe_connect(node_id): return Future().failure(Errors.NodeNotReadyError(node_id)) + return self._conns[node_id].send(request) def poll(self, timeout_ms=None, future=None): """Try to read and write to sockets. diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index ca50ea701..c9bbb9717 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -367,21 +367,20 @@ def _append(self, drained, part, max_records): part_records = part.take(max_records) next_offset = part_records[-1].offset + 1 - log.log(0, "Returning fetched records at offset %d for assigned" - " partition %s and update position to %s", position, - tp, next_offset) - - for record in part_records: - # Fetched compressed messages may include additional records - if record.offset < fetch_offset: - log.debug("Skipping message offset: %s (expecting %s)", - record.offset, fetch_offset) - continue - drained[tp].append(record) - records_found += 1 - - self._subscriptions.assignment[tp].position = next_offset - return records_found + log.log(0, "Returning fetched records at offset %d for assigned" + " partition %s and update position to %s", position, + tp, next_offset) + + for record in part_records: + # Fetched compressed messages may include additional records + if record.offset < fetch_offset: + log.debug("Skipping message offset: %s (expecting %s)", + record.offset, fetch_offset) + continue + drained[tp].append(record) + + self._subscriptions.assignment[tp].position = next_offset + return len(part_records) else: # these records aren't next in line based on the last consumed diff --git a/kafka/protocol/legacy.py b/kafka/protocol/legacy.py index 4dfcb8c54..b50f25154 100644 --- a/kafka/protocol/legacy.py +++ b/kafka/protocol/legacy.py @@ -347,6 +347,28 @@ def encode_offset_commit_request(cls, group, payloads): payload.metadata) for partition, payload in six.iteritems(topic_payloads)]) for topic, topic_payloads in six.iteritems(group_by_topic_and_partition(payloads))]) + + @classmethod + def encode_offset_commit_request_kafka(cls, group, payloads): + """ + Encode an OffsetCommitRequest struct + Arguments: + group: string, the consumer group you are committing offsets for + payloads: list of OffsetCommitRequestPayload + """ + return kafka.protocol.commit.OffsetCommitRequest[2]( + consumer_group=group, + consumer_group_generation_id=kafka.protocol.commit.OffsetCommitRequest[2].DEFAULT_GENERATION_ID, + consumer_id='', + retention_time=kafka.protocol.commit.OffsetCommitRequest[2].DEFAULT_RETENTION_TIME, + topics=[( + topic, + [( + partition, + payload.offset, + payload.metadata) + for partition, payload in six.iteritems(topic_payloads)]) + for topic, topic_payloads in six.iteritems(group_by_topic_and_partition(payloads))]) @classmethod def decode_offset_commit_response(cls, response): diff --git a/test/test_consumer.py b/test/test_consumer.py index 58bf38ffc..013529f05 100644 --- a/test/test_consumer.py +++ b/test/test_consumer.py @@ -3,7 +3,7 @@ from mock import MagicMock, patch from . import unittest -from kafka import SimpleConsumer, KafkaConsumer, MultiProcessConsumer, OldKafkaConsumer +from kafka import SimpleConsumer, KafkaConsumer, MultiProcessConsumer from kafka.errors import ( FailedPayloadsError, KafkaConfigurationError, NotLeaderForPartitionError, UnknownTopicOrPartitionError) @@ -32,26 +32,6 @@ def test_subscription_copy(self): sub.add('fizz') assert consumer.subscription() == set(['foo']) - def test_broker_list_required(self): - with self.assertRaises(KafkaConfigurationError): - OldKafkaConsumer() - - def test_session_timeout_larger_than_request_timeout_raises(self): - with self.assertRaises(KafkaConfigurationError): - KafkaConsumer(bootstrap_servers='localhost:9092', session_timeout_ms=60000, request_timeout_ms=40000) - - def test_fetch_max_wait_larger_than_request_timeout_raises(self): - with self.assertRaises(KafkaConfigurationError): - KafkaConsumer(bootstrap_servers='localhost:9092', fetch_max_wait_ms=41000, request_timeout_ms=40000) - - def test_subscription_copy(self): - consumer = KafkaConsumer('foo', api_version=(0, 10)) - sub = consumer.subscription() - assert sub is not consumer.subscription() - assert sub == set(['foo']) - sub.add('fizz') - assert consumer.subscription() == set(['foo']) - class TestMultiProcessConsumer(unittest.TestCase): @unittest.skipIf(sys.platform.startswith('win'), 'test mocking fails on windows') diff --git a/test/test_consumer_integration.py b/test/test_consumer_integration.py index b2763a8d5..ded231477 100644 --- a/test/test_consumer_integration.py +++ b/test/test_consumer_integration.py @@ -519,26 +519,6 @@ def test_kafka_consumer(self): self.assertEqual(len(messages[0]), 100) self.assertEqual(len(messages[1]), 100) - def test_old_kafka_consumer(self): - self.send_messages(0, range(0, 100)) - self.send_messages(1, range(100, 200)) - - # Start a consumer - consumer = self.old_kafka_consumer(auto_offset_reset='smallest', - consumer_timeout_ms=5000) - n = 0 - messages = {0: set(), 1: set()} - logging.debug("kafka consumer offsets: %s" % consumer.offsets()) - for m in consumer: - logging.debug("Consumed message %s" % repr(m)) - n += 1 - messages[m.partition].add(m.offset) - if n >= 200: - break - - self.assertEqual(len(messages[0]), 100) - self.assertEqual(len(messages[1]), 100) - def test_kafka_consumer__blocking(self): TIMEOUT_MS = 500 consumer = self.kafka_consumer(auto_offset_reset='earliest', @@ -576,38 +556,6 @@ def test_kafka_consumer__blocking(self): self.assertEqual(len(messages), 5) self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0 ) - def test_old_kafka_consumer__blocking(self): - TIMEOUT_MS = 500 - consumer = self.old_kafka_consumer(auto_offset_reset='smallest', - consumer_timeout_ms=TIMEOUT_MS) - - # Ask for 5 messages, nothing in queue, block 500ms - with Timer() as t: - with self.assertRaises(ConsumerTimeout): - msg = consumer.next() - self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0) - - self.send_messages(0, range(0, 10)) - - # Ask for 5 messages, 10 in queue. Get 5 back, no blocking - messages = set() - with Timer() as t: - for i in range(5): - msg = consumer.next() - messages.add((msg.partition, msg.offset)) - self.assertEqual(len(messages), 5) - self.assertLess(t.interval, TIMEOUT_MS / 1000.0) - - # Ask for 10 messages, get 5 back, block 500ms - messages = set() - with Timer() as t: - with self.assertRaises(ConsumerTimeout): - for i in range(10): - msg = consumer.next() - messages.add((msg.partition, msg.offset)) - self.assertEqual(len(messages), 5) - self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0) - @kafka_versions('>=0.8.1') def test_kafka_consumer__offset_commit_resume(self): GROUP_ID = random_string(10) From e4ac2efb0030d871a52148fd37b95cb9af701181 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Tue, 13 Feb 2018 17:33:40 -0800 Subject: [PATCH 187/291] remove pytest dir --- .pytest_cache/v/cache/lastfailed | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .pytest_cache/v/cache/lastfailed diff --git a/.pytest_cache/v/cache/lastfailed b/.pytest_cache/v/cache/lastfailed deleted file mode 100644 index 9e26dfeeb..000000000 --- a/.pytest_cache/v/cache/lastfailed +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file From b72ec234fcf4bbd45ac8b4a2bfe426dce32db53d Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Thu, 15 Feb 2018 02:27:03 -0800 Subject: [PATCH 188/291] remove offset dual test --- test/test_client_integration.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/test/test_client_integration.py b/test/test_client_integration.py index 204d4dbbc..e3e9e2436 100644 --- a/test/test_client_integration.py +++ b/test/test_client_integration.py @@ -92,14 +92,3 @@ def test_commit_fetch_offsets(self): self.assertEqual(resp.offset, 42) self.assertEqual(resp.metadata, '') # Metadata isn't stored for now - @kafka_versions('>=0.9.0.0') - def test_commit_fetch_offsets_dual(self): - req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata') - (resp,) = self.client.send_offset_commit_request_kafka('group', [req]) - self.assertEqual(resp.error, 0) - - (resp,) = self.client.send_offset_fetch_request_kafka('group', [req]) - self.assertEqual(resp.error, 0) - self.assertEqual(resp.offset, 42) - # Metadata is stored in kafka - self.assertEqual(resp.metadata, 'metadata') From 2aa99345c4c413f07e76da519c6f6d70b1f9a18b Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Mon, 23 Apr 2018 13:11:24 -0700 Subject: [PATCH 189/291] fix failing tests --- test/testutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/testutil.py b/test/testutil.py index e1d7de087..d6a9339c4 100644 --- a/test/testutil.py +++ b/test/testutil.py @@ -8,7 +8,7 @@ import pytest from . import unittest -from kafka import SimpleClient, create_message +from kafka import SimpleClient, KafkaClient, create_message from kafka.errors import LeaderNotAvailableError, KafkaTimeoutError, InvalidTopicError from kafka.structs import OffsetRequestPayload, ProduceRequestPayload, \ NotLeaderForPartitionError, UnknownTopicOrPartitionError, \ From 5fd2429819bd41f57c290ea67ba0d1dca399e3a4 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Mon, 23 Apr 2018 13:55:14 -0700 Subject: [PATCH 190/291] fix import --- test/testutil.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/testutil.py b/test/testutil.py index d6a9339c4..1a75d264b 100644 --- a/test/testutil.py +++ b/test/testutil.py @@ -8,7 +8,8 @@ import pytest from . import unittest -from kafka import SimpleClient, KafkaClient, create_message +from kafka import SimpleClient, create_message +from kafka.client_async import KafkaClient from kafka.errors import LeaderNotAvailableError, KafkaTimeoutError, InvalidTopicError from kafka.structs import OffsetRequestPayload, ProduceRequestPayload, \ NotLeaderForPartitionError, UnknownTopicOrPartitionError, \ From 91e8e2330ba2f8c01ffcff204eff3d1ec65b966f Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Mon, 23 Apr 2018 15:30:07 -0700 Subject: [PATCH 191/291] fix setUpClass --- test/test_admin_client_integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_admin_client_integration.py b/test/test_admin_client_integration.py index 5b1c73589..938f476d2 100644 --- a/test/test_admin_client_integration.py +++ b/test/test_admin_client_integration.py @@ -14,7 +14,7 @@ def setUpClass(cls): return cls.zk = ZookeeperFixture.instance() - cls.server = KafkaFixture.instance(0, cls.zk.host, cls.zk.port) + cls.server = KafkaFixture.instance(0, cls.zk) @classmethod def tearDownClass(cls): From 155b0ff8599b99c6315f391c5525304132e3f2a1 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Mon, 7 May 2018 15:51:30 -0700 Subject: [PATCH 192/291] bump version --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 98d186bed..dd4c45afb 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.2' +__version__ = '1.4.2.post1' From 120cab8f287529ac4ec8cd777e1fdef1615e2744 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Fri, 22 Jun 2018 09:23:30 -0700 Subject: [PATCH 193/291] bump pypy for making jenkins happy --- Dockerfile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 05521eace..1d30c812b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,6 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 5BB92C09DB82666C C2 RUN echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | debconf-set-selections RUN apt-get update && apt-get install -y python2.7-dev \ python3.5-dev \ - pypy-dev \ python-pkg-resources \ python-setuptools \ python-virtualenv \ @@ -17,7 +16,12 @@ RUN apt-get update && apt-get install -y python2.7-dev \ g++ \ ca-certificates \ python-pip \ - python-tox + python-tox + +# python-lz4 crashes with pypy 5.1.2, using v5.8.0 pypy here +RUN wget https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.8.0-linux64.tar.bz2 +RUN tar xf pypy2-v5.8.0-linux64.tar.bz2 +RUN ln -s $PWD/pypy2-v5.8.0-linux64/bin/pypy /usr/local/bin/pypy RUN /usr/sbin/locale-gen en_US.UTF-8 ENV LANG en_US.UTF-8 From 097ed27370c7d6b07c37d39aed2f01cdc67779c4 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Fri, 22 Jun 2018 18:02:33 -0700 Subject: [PATCH 194/291] change comment --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 1d30c812b..899742476 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ RUN apt-get update && apt-get install -y python2.7-dev \ python-pip \ python-tox -# python-lz4 crashes with pypy 5.1.2, using v5.8.0 pypy here +# python-lz4 requires minium pypy version 5.x.y RUN wget https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.8.0-linux64.tar.bz2 RUN tar xf pypy2-v5.8.0-linux64.tar.bz2 RUN ln -s $PWD/pypy2-v5.8.0-linux64/bin/pypy /usr/local/bin/pypy From 16ffa6bbadac1fbff6c3fbee6685c66a4ae618ff Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Fri, 22 Jun 2018 18:03:26 -0700 Subject: [PATCH 195/291] change comment --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 899742476..247c51ff3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ RUN apt-get update && apt-get install -y python2.7-dev \ python-pip \ python-tox -# python-lz4 requires minium pypy version 5.x.y +# python-lz4 requires minium pypy version 5.8.0 RUN wget https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.8.0-linux64.tar.bz2 RUN tar xf pypy2-v5.8.0-linux64.tar.bz2 RUN ln -s $PWD/pypy2-v5.8.0-linux64/bin/pypy /usr/local/bin/pypy From a69b4339aeb8655d9541bff2831b4a091f796a0b Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Mon, 25 Jun 2018 09:35:17 -0700 Subject: [PATCH 196/291] bump version to 1.4.2.post2 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index dd4c45afb..3b2ff4db9 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.2.post1' +__version__ = '1.4.2.post2' From 5e7b90c7e056c14252bd5b053eebb4b3f3063b69 Mon Sep 17 00:00:00 2001 From: Ashutosh Singh Date: Mon, 25 Jun 2018 09:35:17 -0700 Subject: [PATCH 197/291] bump version to 1.4.2.post2 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index dd4c45afb..3b2ff4db9 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.2.post1' +__version__ = '1.4.2.post2' From 31f04f1a2a2bf426eda4d6768394f510c44c9e8f Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Wed, 18 Jul 2018 15:44:18 -0700 Subject: [PATCH 198/291] Re-upgrade kafka-python for any missing internal/upstream changes in kafka-python 1.4.2.post2 --- Dockerfile | 2 +- Makefile | 17 +--- kafka/client.py | 161 +++++++++++++++++------------- kafka/consumer/group.py | 1 - kafka/coordinator/consumer.py | 1 - kafka/protocol/commit.py | 1 + kafka/protocol/message.py | 1 - kafka/protocol/struct.py | 2 - test/conftest.py | 1 - test/fixtures.py | 3 - test/test_client_integration.py | 12 ++- test/test_consumer.py | 5 +- test/test_consumer_integration.py | 156 ++++++++++++++++++++++++++++- test/test_producer_integration.py | 3 + tox.ini | 3 + 15 files changed, 272 insertions(+), 97 deletions(-) diff --git a/Dockerfile b/Dockerfile index 247c51ff3..43e0e4304 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,7 @@ RUN apt-get update && apt-get install -y python2.7-dev \ g++ \ ca-certificates \ python-pip \ - python-tox + python-tox # python-lz4 requires minium pypy version 5.8.0 RUN wget https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.8.0-linux64.tar.bz2 diff --git a/Makefile b/Makefile index c903014f8..7faf34004 100644 --- a/Makefile +++ b/Makefile @@ -16,16 +16,9 @@ itest: # it passes the integration test locally if we disable ipv6 here docker run --sysctl net.ipv6.conf.all.disable_ipv6=1 kafka_python_test /work/run_itest.sh -clean: - rm -rf kafka-python.egg-info/ .tox/ - find . -name '*.pyc' -delete - find . -name '__pycache__' -delete - docker rmi -f kafka_python_test - docs: tox -e docs -.PHONY: docs test all FLAGS= KAFKA_VERSION=0.11.0.2 @@ -76,9 +69,9 @@ clean: rm -rf docs/_build/ rm -rf cover rm -rf dist + rm -rf kafka-python.egg-info/ .tox/ + find . -name '*.pyc' -delete + find . -name '__pycache__' -delete + docker rmi -f kafka_python_test -doc: - make -C docs html - @echo "open file://`pwd`/docs/_build/html/index.html" - -.PHONY: all test36 test27 test-local cov-local clean doc +.PHONY: all test36 test27 test-local cov-local clean doc docs test diff --git a/kafka/client.py b/kafka/client.py index 266757ef9..00877d1b3 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -8,6 +8,13 @@ import time import select +# selectors in stdlib as of py3.4 +try: + import selectors # pylint: disable=import-error +except ImportError: + # vendored backport module + from .vendor import selectors34 as selectors + from kafka.vendor import six import kafka.errors @@ -263,11 +270,11 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): def failed_payloads(payloads): for payload in payloads: topic_partition = (str(payload.topic), payload.partition) - responses[(topic_partition)] = FailedPayloadsError(payload) + responses[topic_partition] = FailedPayloadsError(payload) + + futures_by_connection = {} + selector = selectors.DefaultSelector() - # For each BrokerConnection keep the real socket so that we can use - # a select to perform unblocking I/O - connections_by_future = {} for broker, broker_payloads in six.iteritems(payloads_by_broker): if broker is None: failed_payloads(broker_payloads) @@ -275,16 +282,20 @@ def failed_payloads(payloads): host, port, afi = get_ip_port_afi(broker.host) try: - conn = self._get_conn(host, broker.port, afi) + conn = self._get_conn(host, broker.port, afi, broker.nodeId) except ConnectionError: refresh_metadata = True failed_payloads(broker_payloads) continue request = encoder_fn(payloads=broker_payloads) + if request.expect_response(): + selector.register(conn._sock, selectors.EVENT_READ, conn) future = conn.send(request) if future.failed(): + log.error("Request failed: %s", future.exception) + selector.unregister(conn._sock) refresh_metadata = True failed_payloads(broker_payloads) continue @@ -295,42 +306,50 @@ def failed_payloads(payloads): responses[topic_partition] = None continue - connections_by_future[future] = (conn, broker) + futures_by_connection[conn] = (future, broker) - conn = None - while connections_by_future: - futures = list(connections_by_future.keys()) + timeout = self.timeout + while futures_by_connection: + start_time = time.time() - # block until a socket is ready to be read - sockets = [ - conn._sock - for future, (conn, _) in six.iteritems(connections_by_future) - if not future.is_done and conn._sock is not None] - if sockets: - read_socks, _, _ = select.select(sockets, [], []) + ready = selector.select(timeout) - for future in futures: + for key, _ in ready: - if not future.is_done: - conn, _ = connections_by_future[future] + conn = key.data + future, _ = futures_by_connection[conn] + while not future.is_done: for r, f in conn.recv(): f.success(r) - continue - _, broker = connections_by_future.pop(future) + _, broker = futures_by_connection.pop(conn) + if future.failed(): + log.error("Request failed: %s", future.exception) refresh_metadata = True failed_payloads(payloads_by_broker[broker]) else: + _resps = [] for payload_response in decoder_fn(future.value): topic_partition = (str(payload_response.topic), payload_response.partition) responses[topic_partition] = payload_response + _resps.append(payload_response) + log.debug('Response %s', _resps) + + timeout -= time.time() - start_time + if timeout < 0: + log.error("%s requests timed out.", len(futures_by_connection)) + for _, broker in six.itervalues(futures_by_connection): + failed_payloads(payloads_by_broker[broker]) + refresh_metadata = True + break if refresh_metadata: self.reset_all_metadata() + selector.close() # Return responses in the same order as provided return [responses[tp] for tp in original_ordering] @@ -340,83 +359,78 @@ def _send_consumer_aware_request(self, group, payloads, encoder_fn, decoder_fn): specified using the supplied encode/decode functions. As the payloads that use consumer-aware requests do not contain the group (e.g. OffsetFetchRequest), all payloads must be for a single group. + Arguments: + group: the name of the consumer group (str) the payloads are for payloads: list of object-like entities with topic (str) and partition (int) attributes; payloads with duplicate topic+partition are not supported. + encode_fn: a method to encode the list of payloads to a request body, must accept client_id, correlation_id, and payloads as keyword arguments + decode_fn: a method to decode a response body into response objects. The response objects must be object-like and have topic and partition attributes + Returns: + List of response objects in the same order as the supplied payloads """ # encoders / decoders do not maintain ordering currently # so we need to keep this so we can rebuild order before returning original_ordering = [(p.topic, p.partition) for p in payloads] - broker = self._get_coordinator_for_group(group) + retries = 0 + broker = None + while not broker: + try: + broker = self._get_coordinator_for_group(group) + except (GroupCoordinatorNotAvailableError, GroupLoadInProgressError) as e: + if retries == CONSUMER_OFFSET_TOPIC_CREATION_RETRIES: + raise e + time.sleep(CONSUMER_OFFSET_RETRY_INTERVAL_SEC) + retries += 1 # Send the list of request payloads and collect the responses and # errors responses = {} - request_id = self._next_id() - log.debug('Request %s to %s: %s', request_id, broker, payloads) - request = encoder_fn(client_id=self.client_id, - correlation_id=request_id, payloads=payloads) - - # Send the request, recv the response - try: - host, port, afi = get_ip_port_afi(broker.host) - conn = self._get_conn(host, broker.port, afi) - except ConnectionError as e: - log.warning('ConnectionError attempting to send request %s ' - 'to server %s: %s', request_id, broker, e) + def failed_payloads(payloads): for payload in payloads: - topic_partition = (payload.topic, payload.partition) + topic_partition = (str(payload.topic), payload.partition) responses[topic_partition] = FailedPayloadsError(payload) - # No exception, try to get response - else: - - future = conn.send(request_id, request) - while not future.is_done: - for r, f in conn.recv(): - f.success(r) + host, port, afi = get_ip_port_afi(broker.host) + try: + conn = self._get_conn(host, broker.port, afi, broker.nodeId) + except ConnectionError: + failed_payloads(payloads) + else: + request = encoder_fn(payloads=payloads) # decoder_fn=None signal that the server is expected to not # send a response. This probably only applies to # ProduceRequest w/ acks = 0 - if decoder_fn is None: - log.debug('Request %s does not expect a response ' - '(skipping conn.recv)', request_id) - for payload in payloads: - topic_partition = (payload.topic, payload.partition) - responses[topic_partition] = None - return [] + future = conn.send(request) + while not future.is_done: + for r, f in conn.recv(): + f.success(r) if future.failed(): - log.warning('Error attempting to receive a ' - 'response to request %s from server %s: %s', - request_id, broker, future.exception) + failed_payloads(payloads) - for payload in payloads: - topic_partition = (payload.topic, payload.partition) - responses[topic_partition] = FailedPayloadsError(payload) + elif not request.expect_response(): + failed_payloads(payloads) else: - response = future.value - _resps = [] + response = future.value for payload_response in decoder_fn(response): - topic_partition = (payload_response.topic, + topic_partition = (str(payload_response.topic), payload_response.partition) responses[topic_partition] = payload_response - _resps.append(payload_response) - log.debug('Response %s: %s', request_id, _resps) # Return responses in the same order as provided return [responses[tp] for tp in original_ordering] @@ -612,7 +626,7 @@ def load_metadata_for_topics(self, *topics, **kwargs): if leader in self.brokers: self.topics_to_brokers[topic_part] = self.brokers[leader] - # If Unknown Broker, fake BrokerMetadata so we don't lose the id + # If Unknown Broker, fake BrokerMetadata so we dont lose the id # (not sure how this could happen. server could be in bad state) else: self.topics_to_brokers[topic_part] = BrokerMetadata( @@ -620,7 +634,7 @@ def load_metadata_for_topics(self, *topics, **kwargs): ) @time_metric('metadata') - def send_metadata_request(self, payloads=[], fail_on_error=True, + def send_metadata_request(self, payloads=(), fail_on_error=True, callback=None): encoder = KafkaProtocol.encode_metadata_request decoder = KafkaProtocol.decode_metadata_response @@ -628,7 +642,7 @@ def send_metadata_request(self, payloads=[], fail_on_error=True, return self._send_broker_unaware_request(payloads, encoder, decoder) @time_metric('consumer_metadata') - def send_consumer_metadata_request(self, payloads=[], fail_on_error=True, + def send_consumer_metadata_request(self, payloads=(), fail_on_error=True, callback=None): encoder = KafkaProtocol.encode_consumer_metadata_request decoder = KafkaProtocol.decode_consumer_metadata_response @@ -636,13 +650,15 @@ def send_consumer_metadata_request(self, payloads=[], fail_on_error=True, return self._send_broker_unaware_request(payloads, encoder, decoder) @time_metric('produce') - def send_produce_request(self, payloads=[], acks=1, timeout=1000, + def send_produce_request(self, payloads=(), acks=1, timeout=1000, fail_on_error=True, callback=None): """ Encode and send some ProduceRequests + ProduceRequests will be grouped by (topic, partition) and then sent to a specific broker. Output is a list of responses in the same order as the list of payloads specified + Arguments: payloads (list of ProduceRequest): produce requests to send to kafka ProduceRequest payloads must not contain duplicates for any @@ -662,6 +678,7 @@ def send_produce_request(self, payloads=[], acks=1, timeout=1000, server response errors, defaults to True. callback (function, optional): instead of returning the ProduceResponse, first pass it through this function, defaults to None. + Returns: list of ProduceResponses, or callback results if supplied, in the order of input payloads @@ -684,10 +701,11 @@ def send_produce_request(self, payloads=[], acks=1, timeout=1000, (not fail_on_error or not self._raise_on_response_error(resp))] @time_metric('fetch') - def send_fetch_request(self, payloads=[], fail_on_error=True, + def send_fetch_request(self, payloads=(), fail_on_error=True, callback=None, max_wait_time=100, min_bytes=4096): """ Encode and send a FetchRequest + Payloads are grouped by topic and partition so they can be pipelined to the same brokers. """ @@ -704,7 +722,7 @@ def send_fetch_request(self, payloads=[], fail_on_error=True, if not fail_on_error or not self._raise_on_response_error(resp)] @time_metric('offset') - def send_offset_request(self, payloads=[], fail_on_error=True, + def send_offset_request(self, payloads=(), fail_on_error=True, callback=None): resps = self._send_broker_aware_request( payloads, @@ -715,7 +733,7 @@ def send_offset_request(self, payloads=[], fail_on_error=True, if not fail_on_error or not self._raise_on_response_error(resp)] @time_metric('offset_list') - def send_list_offset_request(self, payloads=[], fail_on_error=True, + def send_list_offset_request(self, payloads=(), fail_on_error=True, callback=None): resps = self._send_broker_aware_request( payloads, @@ -726,7 +744,7 @@ def send_list_offset_request(self, payloads=[], fail_on_error=True, if not fail_on_error or not self._raise_on_response_error(resp)] @time_metric('offset_commit') - def send_offset_commit_request(self, group, payloads=[], + def send_offset_commit_request(self, group, payloads=(), fail_on_error=True, callback=None): encoder = functools.partial( KafkaProtocol.encode_offset_commit_request, @@ -739,7 +757,7 @@ def send_offset_commit_request(self, group, payloads=[], if not fail_on_error or not self._raise_on_response_error(resp)] @time_metric('offset_commit_kafka') - def send_offset_commit_request_kafka(self, group, payloads=[], + def send_offset_commit_request_kafka(self, group, payloads=(), fail_on_error=True, callback=None): encoder = functools.partial( KafkaProtocol.encode_offset_commit_request_kafka, @@ -752,7 +770,7 @@ def send_offset_commit_request_kafka(self, group, payloads=[], if not fail_on_error or not self._raise_on_response_error(resp)] @time_metric('offset_fetch') - def send_offset_fetch_request(self, group, payloads=[], + def send_offset_fetch_request(self, group, payloads=(), fail_on_error=True, callback=None): encoder = functools.partial(KafkaProtocol.encode_offset_fetch_request, @@ -764,7 +782,7 @@ def send_offset_fetch_request(self, group, payloads=[], if not fail_on_error or not self._raise_on_response_error(resp)] @time_metric('offset_fetch_kafka') - def send_offset_fetch_request_kafka(self, group, payloads=[], + def send_offset_fetch_request_kafka(self, group, payloads=(), fail_on_error=True, callback=None): encoder = functools.partial(KafkaProtocol.encode_offset_fetch_request, @@ -775,6 +793,7 @@ def send_offset_fetch_request_kafka(self, group, payloads=[], return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] + class SimpleClientMetrics(object): def __init__(self, metrics): diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 49113a787..0d9e95248 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -638,7 +638,6 @@ def _poll_once(self, timeout_ms, max_records): # fetched records. if not partial: self._fetcher.send_fetches() - return records # Send any new fetches (won't resend pending fetches) diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py index 5b80435c5..cb1de0d2e 100644 --- a/kafka/coordinator/consumer.py +++ b/kafka/coordinator/consumer.py @@ -202,7 +202,6 @@ def _build_metadata_snapshot(self, subscription, cluster): metadata_snapshot[topic] = set(partitions) return metadata_snapshot - def _lookup_assignor(self, name): for assignor in self.config['assignors']: if assignor.name == name: diff --git a/kafka/protocol/commit.py b/kafka/protocol/commit.py index 22b0f7858..31fc23707 100644 --- a/kafka/protocol/commit.py +++ b/kafka/protocol/commit.py @@ -162,6 +162,7 @@ class OffsetFetchResponse_v3(Response): ('error_code', Int16) ) + class OffsetFetchRequest_v0(Request): API_KEY = 9 API_VERSION = 0 # zookeeper-backed storage diff --git a/kafka/protocol/message.py b/kafka/protocol/message.py index a7100f4aa..19dcbd9de 100644 --- a/kafka/protocol/message.py +++ b/kafka/protocol/message.py @@ -6,7 +6,6 @@ from kafka.codec import (has_gzip, has_snappy, has_lz4, gzip_decode, snappy_decode, lz4_decode, lz4_decode_old_kafka) -from ..util import crc32, WeakMethod from kafka.protocol.frame import KafkaBytes from kafka.protocol.struct import Struct from kafka.protocol.types import ( diff --git a/kafka/protocol/struct.py b/kafka/protocol/struct.py index 75c728a11..676de1ba4 100644 --- a/kafka/protocol/struct.py +++ b/kafka/protocol/struct.py @@ -7,8 +7,6 @@ from kafka.util import WeakMethod -from ..util import WeakMethod - class Struct(AbstractType): SCHEMA = Schema() diff --git a/test/conftest.py b/test/conftest.py index 52ebfb4ea..ad45f2c0d 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -3,7 +3,6 @@ import inspect import pytest -from decorator import decorate from test.fixtures import KafkaFixture, ZookeeperFixture from test.testutil import kafka_version, random_string diff --git a/test/fixtures.py b/test/fixtures.py index 922627d38..493a664a5 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -296,9 +296,6 @@ def _create_zk_chroot(self): "kafka-python") env = self.kafka_run_class_env() proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - # Fix bugs where integration tests hang in docker container by sending - # blank line to stdin - proc.communicate('') if proc.wait() != 0 or proc.returncode != 0: self.out("Failed to create Zookeeper chroot node") diff --git a/test/test_client_integration.py b/test/test_client_integration.py index 88d4f88f6..4d204eb86 100644 --- a/test/test_client_integration.py +++ b/test/test_client_integration.py @@ -80,7 +80,6 @@ def test_send_produce_request_maintains_request_response_order(self): # Offset Tests # #################### - @kafka_versions('>=0.8.1') def test_commit_fetch_offsets(self): req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata') (resp,) = self.client.send_offset_commit_request('group', [req]) @@ -92,3 +91,14 @@ def test_commit_fetch_offsets(self): self.assertEqual(resp.offset, 42) self.assertEqual(resp.metadata, '') # Metadata isn't stored for now + @kafka_versions('>=0.9.0.0') + def test_commit_fetch_offsets_dual(self): + req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata') + (resp,) = self.client.send_offset_commit_request_kafka('group', [req]) + self.assertEqual(resp.error, 0) + + (resp,) = self.client.send_offset_fetch_request_kafka('group', [req]) + self.assertEqual(resp.error, 0) + self.assertEqual(resp.offset, 42) + # Metadata is stored in kafka + self.assertEqual(resp.metadata, 'metadata') diff --git a/test/test_consumer.py b/test/test_consumer.py index 013529f05..9b7ca4d65 100644 --- a/test/test_consumer.py +++ b/test/test_consumer.py @@ -3,7 +3,7 @@ from mock import MagicMock, patch from . import unittest -from kafka import SimpleConsumer, KafkaConsumer, MultiProcessConsumer +from kafka import SimpleConsumer, KafkaConsumer, MultiProcessConsumer, OldKafkaConsumer from kafka.errors import ( FailedPayloadsError, KafkaConfigurationError, NotLeaderForPartitionError, UnknownTopicOrPartitionError) @@ -32,6 +32,9 @@ def test_subscription_copy(self): sub.add('fizz') assert consumer.subscription() == set(['foo']) + def test_broker_list_required(self): + with self.assertRaises(KafkaConfigurationError): + OldKafkaConsumer() class TestMultiProcessConsumer(unittest.TestCase): @unittest.skipIf(sys.platform.startswith('win'), 'test mocking fails on windows') diff --git a/test/test_consumer_integration.py b/test/test_consumer_integration.py index 78a8a3c1e..c00f54cca 100644 --- a/test/test_consumer_integration.py +++ b/test/test_consumer_integration.py @@ -2,29 +2,32 @@ import os import time +import pytest from six.moves import xrange import six from . import unittest from kafka import ( - KafkaConsumer, MultiProcessConsumer, SimpleConsumer, create_message, + KafkaConsumer, MultiProcessConsumer, OldKafkaConsumer, SimpleConsumer, create_message, create_gzip_message, KafkaProducer ) from kafka.consumer.base import MAX_FETCH_BUFFER_SIZE_BYTES from kafka.errors import ( - ConsumerFetchSizeTooSmall, OffsetOutOfRangeError, UnsupportedVersionError, + ConsumerFetchSizeTooSmall, OffsetOutOfRangeError, ConsumerTimeout, UnsupportedVersionError, KafkaTimeoutError ) from kafka.structs import ( ProduceRequestPayload, TopicPartition, OffsetAndTimestamp ) +from test.conftest import version from test.fixtures import ZookeeperFixture, KafkaFixture from test.testutil import ( KafkaIntegrationTestCase, kafka_versions, random_string, Timer, send_messages ) +@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") def test_kafka_consumer(simple_client, topic, kafka_consumer_factory): """Test KafkaConsumer """ @@ -761,3 +764,152 @@ def test_kafka_consumer_offsets_for_times_errors(self): with self.assertRaises(KafkaTimeoutError): consumer.offsets_for_times({bad_tp: 0}) + + def old_kafka_consumer(self, **configs): + brokers = '%s:%d' % (self.server.host, self.server.port) + consumer = OldKafkaConsumer(self.topic, + bootstrap_servers=brokers, + **configs) + return consumer + + def test_old_kafka_consumer(self): + self.send_messages(0, range(0, 100)) + self.send_messages(1, range(100, 200)) + + # Start a consumer + consumer = self.old_kafka_consumer(auto_offset_reset='smallest', + consumer_timeout_ms=5000) + n = 0 + messages = {0: set(), 1: set()} + logging.debug("kafka consumer offsets: %s" % consumer.offsets()) + for m in consumer: + logging.debug("Consumed message %s" % repr(m)) + n += 1 + messages[m.partition].add(m.offset) + if n == 200: + break + + self.assertEqual(len(messages[0]), 100) + self.assertEqual(len(messages[1]), 100) + + def test_old_kafka_consumer__blocking(self): + TIMEOUT_MS = 500 + consumer = self.old_kafka_consumer( + auto_offset_reset='smallest', + consumer_timeout_ms=TIMEOUT_MS, + ) + + # Ask for 5 messages, nothing in queue, block 500ms + with Timer() as t: + with self.assertRaises(ConsumerTimeout): + msg = consumer.next() + self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0) + + self.send_messages(0, range(0, 10)) + + # Ask for 5 messages, 10 in queue. Get 5 back, no blocking + messages = set() + with Timer() as t: + for i in range(5): + msg = consumer.next() + messages.add((msg.partition, msg.offset)) + self.assertEqual(len(messages), 5) + self.assertLess(t.interval, TIMEOUT_MS / 1000.0) + + # Ask for 10 messages, get 5 back, block 500ms + messages = set() + with Timer() as t: + with self.assertRaises(ConsumerTimeout): + for i in range(10): + msg = consumer.next() + messages.add((msg.partition, msg.offset)) + self.assertEqual(len(messages), 5) + self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0) + + @kafka_versions('=0.8.1') + def test_old_kafka_consumer__offset_commit_resume(self): + GROUP_ID = random_string(10).encode('utf-8') + + self.send_messages(0, range(0, 100)) + self.send_messages(1, range(100, 200)) + + # Start a consumer + consumer1 = self.old_kafka_consumer( + group_id=GROUP_ID, + auto_commit_enable=True, + auto_commit_interval_ms=None, + auto_commit_interval_messages=20, + auto_offset_reset='smallest', + ) + + # Grab the first 195 messages + output_msgs1 = [] + for _ in xrange(195): + m = consumer1.next() + output_msgs1.append(m) + consumer1.task_done(m) + self.assert_message_count(output_msgs1, 195) + + # The total offset across both partitions should be at 180 + consumer2 = self.old_kafka_consumer( + group_id=GROUP_ID, + auto_commit_enable=True, + auto_commit_interval_ms=None, + auto_commit_interval_messages=20, + consumer_timeout_ms=100, + auto_offset_reset='smallest', + ) + + # 181-200 + output_msgs2 = [] + with self.assertRaises(ConsumerTimeout): + while True: + m = consumer2.next() + output_msgs2.append(m) + self.assert_message_count(output_msgs2, 20) + self.assertEqual(len(set(output_msgs1) & set(output_msgs2)), 15) + + @kafka_versions("=0.9.0.0") + def test_old_kafka_consumer__offset_commit_resume_dual(self): + GROUP_ID = random_string(10).encode('utf-8') + + self.send_messages(0, range(0, 100)) + self.send_messages(1, range(100, 200)) + + # Start a consumer + consumer1 = self.old_kafka_consumer( + group_id=GROUP_ID, + auto_commit_enable=True, + auto_commit_interval_ms=None, + auto_commit_interval_messages=20, + auto_offset_reset='smallest', + offset_storage='kafka', + ) + + # Grab the first 195 messages + output_msgs1 = [] + for _ in xrange(195): + m = consumer1.next() + output_msgs1.append(m) + consumer1.task_done(m) + self.assert_message_count(output_msgs1, 195) + + # The total offset across both partitions should be at 180 + consumer2 = self.old_kafka_consumer( + group_id=GROUP_ID, + auto_commit_enable=True, + auto_commit_interval_ms=None, + auto_commit_interval_messages=20, + consumer_timeout_ms=100, + auto_offset_reset='smallest', + offset_storage='dual', + ) + + # 181-200 + output_msgs2 = [] + with self.assertRaises(ConsumerTimeout): + while True: + m = consumer2.next() + output_msgs2.append(m) + self.assert_message_count(output_msgs2, 20) + self.assertEqual(len(set(output_msgs1) & set(output_msgs2)), 15) diff --git a/test/test_producer_integration.py b/test/test_producer_integration.py index ca0da6abd..dffa8d8d1 100644 --- a/test/test_producer_integration.py +++ b/test/test_producer_integration.py @@ -2,6 +2,7 @@ import time import uuid +import pytest from six.moves import range from kafka import ( @@ -14,6 +15,7 @@ from kafka.producer.base import Producer from kafka.structs import FetchRequestPayload, ProduceRequestPayload +from test.conftest import version from test.fixtures import ZookeeperFixture, KafkaFixture from test.testutil import KafkaIntegrationTestCase, kafka_versions, current_offset @@ -39,6 +41,7 @@ def assert_produce_response(resp, initial_offset): assert resp[0].error == 0 assert resp[0].offset == initial_offset +@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") def test_produce_many_simple(simple_client, topic): """Test multiple produces using the SimpleClient """ diff --git a/tox.ini b/tox.ini index ad95f9374..1c24473bf 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,7 @@ [tox] envlist = py{26,27,34,35,36,py}, docs +tox_pip_extensions_ext_pip_custom_platform = true +tox_pip_extensions_ext_venv_update = true [pytest] testpaths = kafka test @@ -21,6 +23,7 @@ deps = crc32c py26: unittest2 decorator + tox-pip-extensions==1.2.1 commands = py.test {posargs:--pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF --cov=kafka --cov-config=.covrc} setenv = From 290009963933df92d7c3b1ae9108a74e2cc843fe Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Wed, 18 Jul 2018 16:57:59 -0700 Subject: [PATCH 199/291] Pin versions of packages in tox, add support for 1.0.1 itests --- .dockerignore | 2 ++ run_itest.sh | 13 ++++++------- tox.ini | 22 +++++++++++----------- 3 files changed, 19 insertions(+), 18 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..766836180 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +**/__pycache__ +**/*.pyc diff --git a/run_itest.sh b/run_itest.sh index fcc78613f..012604a50 100644 --- a/run_itest.sh +++ b/run_itest.sh @@ -1,20 +1,19 @@ #!/bin/bash -e - -export KAFKA_VERSION='0.9.0.1' +export KAFKA_VERSION='0.10.1.1' ./build_integration.sh tox -e py27 tox -e py35 tox -e pypy -export KAFKA_VERSION='0.10.0.0' +export KAFKA_VERSION='0.11.0.2' ./build_integration.sh tox -e py27 tox -e py35 tox -e pypy -export KAFKA_VERSION='0.10.1.1' +export KAFKA_VERSION='1.0.1' ./build_integration.sh -tox -e py27 -tox -e py35 -tox -e pypy +tox -e py27 +tox -e py35 +tox -e pypy diff --git a/tox.ini b/tox.ini index 1c24473bf..b9b02f381 100644 --- a/tox.ini +++ b/tox.ini @@ -11,18 +11,18 @@ log_format = %(created)f %(filename)-23s %(threadName)s %(message)s [testenv] deps = - pytest - pytest-cov + pytest==3.6.3 + pytest-cov==2.5.1 py{27,34,35,36,py}: pylint==1.8.2 - py{27,34,35,36,py}: pytest-pylint - pytest-mock - mock - python-snappy - lz4 - xxhash - crc32c - py26: unittest2 - decorator + py{27,34,35,36,py}: pytest-pylint==0.11.0 + pytest-mock==1.10.0 + mock==2.0.0 + python-snappy==0.5.2 + lz4==1.1.0 + xxhash==1.0.1 + crc32c==1.4 + py26: unittest2==1.1.0 + decorator==4.3.0 tox-pip-extensions==1.2.1 commands = py.test {posargs:--pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF --cov=kafka --cov-config=.covrc} From 12d2765d7ccec21205bd6d1c92d07b2af571d572 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Wed, 25 Jul 2018 13:31:43 -0700 Subject: [PATCH 200/291] Bump version to v1.4.2.post3 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 3b2ff4db9..626f52b92 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.2.post2' +__version__ = '1.4.2.post3' From e8ccee7948cc692481bf7625622c73968d5b2395 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 23 Mar 2018 05:56:11 -0700 Subject: [PATCH 201/291] Check for immediate failure when looking up coordinator in heartbeat thread (#1457) --- kafka/coordinator/base.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index bff628669..9f67d6b6d 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -945,7 +945,11 @@ def _run_once(self): self.coordinator._client.poll(timeout_ms=0) if self.coordinator.coordinator_unknown(): - if not self.coordinator.lookup_coordinator().is_done: + future = self.coordinator.lookup_coordinator() + if not future.is_done or future.failed(): + # the immediate future check ensures that we backoff + # properly in the case that no brokers are available + # to connect to (and the future is automatically failed). self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) elif self.coordinator.heartbeat.session_timeout_expired(): From 03488f397832b685bd09dcd733b766a80996cfdb Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 23 Mar 2018 05:58:55 -0700 Subject: [PATCH 202/291] Change SimpleProducer to use async_send (async is reserved in py37) (#1454) --- docs/simple.rst | 8 +++---- kafka/producer/base.py | 38 +++++++++++++++++++------------ kafka/producer/keyed.py | 2 +- kafka/producer/simple.py | 2 +- test/test_failover_integration.py | 8 +++---- test/test_producer_integration.py | 8 +++---- test/test_producer_legacy.py | 10 ++++---- 7 files changed, 42 insertions(+), 34 deletions(-) diff --git a/docs/simple.rst b/docs/simple.rst index 8192a8b76..afdb9756c 100644 --- a/docs/simple.rst +++ b/docs/simple.rst @@ -49,7 +49,7 @@ Asynchronous Mode # To send messages asynchronously client = SimpleClient('localhost:9092') - producer = SimpleProducer(client, async=True) + producer = SimpleProducer(client, async_send=True) producer.send_messages('my-topic', b'async message') # To send messages in batch. You can use any of the available @@ -60,7 +60,7 @@ Asynchronous Mode # * If the producer dies before the messages are sent, there will be losses # * Call producer.stop() to send the messages and cleanup producer = SimpleProducer(client, - async=True, + async_send=True, batch_send_every_n=20, batch_send_every_t=60) @@ -73,7 +73,7 @@ Synchronous Mode # To send messages synchronously client = SimpleClient('localhost:9092') - producer = SimpleProducer(client, async=False) + producer = SimpleProducer(client, async_send=False) # Note that the application is responsible for encoding messages to type bytes producer.send_messages('my-topic', b'some message') @@ -88,7 +88,7 @@ Synchronous Mode # ACK_AFTER_CLUSTER_COMMIT : server will block until the message is committed # by all in sync replicas before sending a response producer = SimpleProducer(client, - async=False, + async_send=False, req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=2000, sync_fail_on_error=False) diff --git a/kafka/producer/base.py b/kafka/producer/base.py index c038bd3a0..e8d6c3d27 100644 --- a/kafka/producer/base.py +++ b/kafka/producer/base.py @@ -226,7 +226,7 @@ class Producer(object): Arguments: client (kafka.SimpleClient): instance to use for broker - communications. If async=True, the background thread will use + communications. If async_send=True, the background thread will use :meth:`client.copy`, which is expected to return a thread-safe object. codec (kafka.protocol.ALL_CODECS): compression codec to use. @@ -238,11 +238,11 @@ class Producer(object): sync_fail_on_error (bool, optional): whether sync producer should raise exceptions (True), or just return errors (False), defaults to True. - async (bool, optional): send message using a background thread, + async_send (bool, optional): send message using a background thread, defaults to False. - batch_send_every_n (int, optional): If async is True, messages are + batch_send_every_n (int, optional): If async_send is True, messages are sent in batches of this size, defaults to 20. - batch_send_every_t (int or float, optional): If async is True, + batch_send_every_t (int or float, optional): If async_send is True, messages are sent immediately after this timeout in seconds, even if there are fewer than batch_send_every_n, defaults to 20. async_retry_limit (int, optional): number of retries for failed messages @@ -268,8 +268,10 @@ class Producer(object): defaults to 30. Deprecated Arguments: + async (bool, optional): send message using a background thread, + defaults to False. Deprecated, use 'async_send' batch_send (bool, optional): If True, messages are sent by a background - thread in batches, defaults to False. Deprecated, use 'async' + thread in batches, defaults to False. Deprecated, use 'async_send' """ ACK_NOT_REQUIRED = 0 # No ack is required ACK_AFTER_LOCAL_WRITE = 1 # Send response after it is written to log @@ -282,8 +284,8 @@ def __init__(self, client, codec=None, codec_compresslevel=None, sync_fail_on_error=SYNC_FAIL_ON_ERROR_DEFAULT, - async=False, - batch_send=False, # deprecated, use async + async_send=False, + batch_send=False, # deprecated, use async_send batch_send_every_n=BATCH_SEND_MSG_COUNT, batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL, async_retry_limit=ASYNC_RETRY_LIMIT, @@ -292,15 +294,21 @@ def __init__(self, client, async_queue_maxsize=ASYNC_QUEUE_MAXSIZE, async_queue_put_timeout=ASYNC_QUEUE_PUT_TIMEOUT, async_log_messages_on_error=ASYNC_LOG_MESSAGES_ON_ERROR, - async_stop_timeout=ASYNC_STOP_TIMEOUT_SECS): + async_stop_timeout=ASYNC_STOP_TIMEOUT_SECS, + **kwargs): + + # async renamed async_send for python3.7 support + if 'async' in kwargs: + log.warning('Deprecated async option found -- use async_send') + async_send = kwargs['async'] - if async: + if async_send: assert batch_send_every_n > 0 assert batch_send_every_t > 0 assert async_queue_maxsize >= 0 self.client = client - self.async = async + self.async_send = async_send self.req_acks = req_acks self.ack_timeout = ack_timeout self.stopped = False @@ -313,7 +321,7 @@ def __init__(self, client, self.codec = codec self.codec_compresslevel = codec_compresslevel - if self.async: + if self.async_send: # Messages are sent through this queue self.queue = Queue(async_queue_maxsize) self.async_queue_put_timeout = async_queue_put_timeout @@ -400,7 +408,7 @@ def _send_messages(self, topic, partition, *msg, **kwargs): if key is not None and not isinstance(key, six.binary_type): raise TypeError("the key must be type bytes") - if self.async: + if self.async_send: for idx, m in enumerate(msg): try: item = (TopicPartition(topic, partition), m, key) @@ -435,7 +443,7 @@ def stop(self, timeout=None): log.warning('timeout argument to stop() is deprecated - ' 'it will be removed in future release') - if not self.async: + if not self.async_send: log.warning('producer.stop() called, but producer is not async') return @@ -443,7 +451,7 @@ def stop(self, timeout=None): log.warning('producer.stop() called, but producer is already stopped') return - if self.async: + if self.async_send: self.queue.put((STOP_ASYNC_PRODUCER, None, None)) self.thread_stop_event.set() self.thread.join() @@ -471,5 +479,5 @@ def stop(self, timeout=None): self.stopped = True def __del__(self): - if self.async and not self.stopped: + if self.async_send and not self.stopped: self.stop() diff --git a/kafka/producer/keyed.py b/kafka/producer/keyed.py index 8de3ad80f..62bb733fc 100644 --- a/kafka/producer/keyed.py +++ b/kafka/producer/keyed.py @@ -46,4 +46,4 @@ def send(self, topic, key, msg): return self.send_messages(topic, key, msg) def __repr__(self): - return '' % self.async + return '' % self.async_send diff --git a/kafka/producer/simple.py b/kafka/producer/simple.py index 589363c93..91e0abc4c 100644 --- a/kafka/producer/simple.py +++ b/kafka/producer/simple.py @@ -51,4 +51,4 @@ def send_messages(self, topic, *msg): ) def __repr__(self): - return '' % self.async + return '' % self.async_send diff --git a/test/test_failover_integration.py b/test/test_failover_integration.py index 8531cfbe8..797e1c8ea 100644 --- a/test/test_failover_integration.py +++ b/test/test_failover_integration.py @@ -60,7 +60,7 @@ def test_switch_leader(self): # require that the server commit messages to all in-sync replicas # so that failover doesn't lose any messages on server-side # and we can assert that server-side message count equals client-side - producer = Producer(self.client, async=False, + producer = Producer(self.client, async_send=False, req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT) # Send 100 random messages to a specific partition @@ -101,7 +101,7 @@ def test_switch_leader_async(self): partition = 0 # Test the base class Producer -- send_messages to a specific partition - producer = Producer(self.client, async=True, + producer = Producer(self.client, async_send=True, batch_send_every_n=15, batch_send_every_t=3, req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT, @@ -146,7 +146,7 @@ def test_switch_leader_async(self): def test_switch_leader_keyed_producer(self): topic = self.topic - producer = KeyedProducer(self.client, async=False) + producer = KeyedProducer(self.client, async_send=False) # Send 10 random messages for _ in range(10): @@ -182,7 +182,7 @@ def test_switch_leader_keyed_producer(self): producer.send_messages(topic, key, msg) def test_switch_leader_simple_consumer(self): - producer = Producer(self.client, async=False) + producer = Producer(self.client, async_send=False) consumer = SimpleConsumer(self.client, None, self.topic, partitions=None, auto_commit=False, iter_timeout=10) self._send_random_messages(producer, self.topic, 0, 2) consumer.get_messages() diff --git a/test/test_producer_integration.py b/test/test_producer_integration.py index dffa8d8d1..3a25de2cf 100644 --- a/test/test_producer_integration.py +++ b/test/test_producer_integration.py @@ -213,7 +213,7 @@ def test_async_simple_producer(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) - producer = SimpleProducer(self.client, async=True, random_start=False) + producer = SimpleProducer(self.client, async_send=True, random_start=False) resp = producer.send_messages(self.topic, self.msg("one")) self.assertEqual(len(resp), 0) @@ -232,7 +232,7 @@ def test_batched_simple_producer__triggers_by_message(self): batch_interval = 5 producer = SimpleProducer( self.client, - async=True, + async_send=True, batch_send_every_n=batch_messages, batch_send_every_t=batch_interval, random_start=False) @@ -297,7 +297,7 @@ def test_batched_simple_producer__triggers_by_time(self): batch_interval = 5 producer = SimpleProducer( self.client, - async=True, + async_send=True, batch_send_every_n=100, batch_send_every_t=batch_interval, random_start=False) @@ -429,7 +429,7 @@ def test_async_keyed_producer(self): producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner, - async=True, + async_send=True, batch_send_every_t=1) resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) diff --git a/test/test_producer_legacy.py b/test/test_producer_legacy.py index 9b87c7664..6d00116c3 100644 --- a/test/test_producer_legacy.py +++ b/test/test_producer_legacy.py @@ -73,7 +73,7 @@ def partitions(topic): @patch('kafka.producer.base._send_upstream') def test_producer_async_queue_overfilled(self, mock): queue_size = 2 - producer = Producer(MagicMock(), async=True, + producer = Producer(MagicMock(), async_send=True, async_queue_maxsize=queue_size) topic = b'test-topic' @@ -95,25 +95,25 @@ def test_producer_sync_fail_on_error(self): with patch.object(SimpleClient, '_send_broker_aware_request', return_value = [error]): client = SimpleClient(MagicMock()) - producer = SimpleProducer(client, async=False, sync_fail_on_error=False) + producer = SimpleProducer(client, async_send=False, sync_fail_on_error=False) # This should not raise (response,) = producer.send_messages('foobar', b'test message') self.assertEqual(response, error) - producer = SimpleProducer(client, async=False, sync_fail_on_error=True) + producer = SimpleProducer(client, async_send=False, sync_fail_on_error=True) with self.assertRaises(FailedPayloadsError): producer.send_messages('foobar', b'test message') def test_cleanup_is_not_called_on_stopped_producer(self): - producer = Producer(MagicMock(), async=True) + producer = Producer(MagicMock(), async_send=True) producer.stopped = True with patch.object(producer, 'stop') as mocked_stop: producer._cleanup_func(producer) self.assertEqual(mocked_stop.call_count, 0) def test_cleanup_is_called_on_running_producer(self): - producer = Producer(MagicMock(), async=True) + producer = Producer(MagicMock(), async_send=True) producer.stopped = False with patch.object(producer, 'stop') as mocked_stop: producer._cleanup_func(producer) From df77a1523434de835ed4c6634a72dbbdc088fafe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=B4mulo=20Rosa=20Furtado?= Date: Sun, 15 Apr 2018 22:30:21 -0300 Subject: [PATCH 203/291] Improve BrokerConnection initialization (#1475) --- kafka/conn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/conn.py b/kafka/conn.py index 2320eeade..726142130 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -949,9 +949,9 @@ def check_version(self, timeout=2, strict=False): selector = self.config['selector']() selector.register(self._sock, selectors.EVENT_READ) while not (f.is_done and mr.is_done): + selector.select(1) for response, future in self.recv(): future.success(response) - selector.select(1) selector.close() if f.succeeded(): From fbf34e0793502d2a0267b174b530efde3aade73d Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 26 May 2018 08:43:25 -0700 Subject: [PATCH 204/291] Ignore MetadataResponses with empty broker list (#1506) --- kafka/cluster.py | 3 ++- kafka/errors.py | 4 ++++ test/test_cluster.py | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 test/test_cluster.py diff --git a/kafka/cluster.py b/kafka/cluster.py index 5be3c2f65..f3c48550f 100644 --- a/kafka/cluster.py +++ b/kafka/cluster.py @@ -211,7 +211,8 @@ def update_metadata(self, metadata): return self.failed_update(error) if not metadata.brokers: - log.warning("No broker metadata found in MetadataResponse") + log.warning("No broker metadata found in MetadataResponse -- ignoring.") + return self.failed_update(Errors.MetadataEmptyBrokerList(metadata)) _new_brokers = {} for broker in metadata.brokers: diff --git a/kafka/errors.py b/kafka/errors.py index c70853c69..f7510b8c1 100644 --- a/kafka/errors.py +++ b/kafka/errors.py @@ -54,6 +54,10 @@ class StaleMetadata(KafkaError): invalid_metadata = True +class MetadataEmptyBrokerList(KafkaError): + retriable = True + + class UnrecognizedBrokerVersion(KafkaError): pass diff --git a/test/test_cluster.py b/test/test_cluster.py new file mode 100644 index 000000000..f010c4f71 --- /dev/null +++ b/test/test_cluster.py @@ -0,0 +1,22 @@ +# pylint: skip-file +from __future__ import absolute_import + +import pytest + +from kafka.cluster import ClusterMetadata +from kafka.protocol.metadata import MetadataResponse + + +def test_empty_broker_list(): + cluster = ClusterMetadata() + assert len(cluster.brokers()) == 0 + + cluster.update_metadata(MetadataResponse[0]( + [(0, 'foo', 12), (1, 'bar', 34)], [])) + assert len(cluster.brokers()) == 2 + + # empty broker list response should be ignored + cluster.update_metadata(MetadataResponse[0]( + [], # empty brokers + [(17, 'foo', []), (17, 'bar', [])])) # topics w/ error + assert len(cluster.brokers()) == 2 From ee8b943f78ea00e4ee43bbfb97c69f66af00446d Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 26 May 2018 08:48:41 -0700 Subject: [PATCH 205/291] Improve connection handling when bootstrap list is invalid (#1507) * only perform single dns lookup for connect_blocking() * fix blocking timeout in check_version() --- kafka/conn.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kafka/conn.py b/kafka/conn.py index 726142130..68e5b73b7 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -292,11 +292,7 @@ def connect_blocking(self, timeout=float('inf')): # First attempt to perform dns lookup # note that the underlying interface, socket.getaddrinfo, # has no explicit timeout so we may exceed the user-specified timeout - while time.time() < timeout: - if self._dns_lookup(): - break - else: - return False + self._dns_lookup() # Loop once over all returned dns entries selector = None @@ -903,6 +899,7 @@ def check_version(self, timeout=2, strict=False): Returns: version tuple, i.e. (0, 10), (0, 9), (0, 8, 2), ... """ + timeout_at = time.time() + timeout log.info('Probing node %s broker version', self.node_id) # Monkeypatch some connection configurations to avoid timeouts override_config = { @@ -932,7 +929,7 @@ def check_version(self, timeout=2, strict=False): ] for version, request in test_cases: - if not self.connect_blocking(timeout): + if not self.connect_blocking(timeout_at - time.time()): raise Errors.NodeNotReadyError() f = self.send(request) # HACK: sleeping to wait for socket to send bytes From 17fcdb11a9255d32cdee3e6455a26cac458e2289 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Wed, 18 Apr 2018 08:29:19 -0700 Subject: [PATCH 206/291] Always acquire client lock before coordinator lock to avoid deadlocks (#1464) --- kafka/coordinator/base.py | 123 ++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 59 deletions(-) diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index 9f67d6b6d..14b8c3f4a 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -231,20 +231,19 @@ def coordinator(self): Returns: the current coordinator id or None if it is unknown """ - with self._lock: - if self.coordinator_id is None: - return None - elif self._client.is_disconnected(self.coordinator_id): - self.coordinator_dead('Node Disconnected') - return None - else: - return self.coordinator_id + if self.coordinator_id is None: + return None + elif self._client.is_disconnected(self.coordinator_id): + self.coordinator_dead('Node Disconnected') + return None + else: + return self.coordinator_id def ensure_coordinator_ready(self): """Block until the coordinator for this group is known (and we have an active connection -- java client uses unsent queue). """ - with self._lock: + with self._client._lock, self._lock: while self.coordinator_unknown(): # Prior to 0.8.2 there was no group coordinator @@ -274,17 +273,18 @@ def _reset_find_coordinator_future(self, result): self._find_coordinator_future = None def lookup_coordinator(self): - if self._find_coordinator_future is not None: - return self._find_coordinator_future - - # If there is an error sending the group coordinator request - # then _reset_find_coordinator_future will immediately fire and - # set _find_coordinator_future = None - # To avoid returning None, we capture the future in a local variable - self._find_coordinator_future = self._send_group_coordinator_request() - future = self._find_coordinator_future - self._find_coordinator_future.add_both(self._reset_find_coordinator_future) - return future + with self._client._lock, self._lock: + if self._find_coordinator_future is not None: + return self._find_coordinator_future + + # If there is an error sending the group coordinator request + # then _reset_find_coordinator_future will immediately fire and + # set _find_coordinator_future = None + # To avoid returning None, we capture the future in a local variable + future = self._send_group_coordinator_request() + self._find_coordinator_future = future + self._find_coordinator_future.add_both(self._reset_find_coordinator_future) + return future def need_rejoin(self): """Check whether the group should be rejoined (e.g. if metadata changes) @@ -487,7 +487,7 @@ def _handle_join_group_response(self, future, send_time, response): log.debug("Received successful JoinGroup response for group %s: %s", self.group_id, response) self.sensors.join_latency.record((time.time() - send_time) * 1000) - with self._lock: + with self._client._lock, self._lock: if self.state is not MemberState.REBALANCING: # if the consumer was woken up before a rebalance completes, # we may have already left the group. In this case, we do @@ -663,7 +663,7 @@ def _handle_group_coordinator_response(self, future, response): error_type = Errors.for_code(response.error_code) if error_type is Errors.NoError: - with self._lock: + with self._client._lock, self._lock: ok = self._client.cluster.add_group_coordinator(self.group_id, response) if not ok: # This could happen if coordinator metadata is different @@ -693,11 +693,10 @@ def _handle_group_coordinator_response(self, future, response): def coordinator_dead(self, error): """Mark the current coordinator as dead.""" - with self._lock: - if self.coordinator_id is not None: - log.warning("Marking the coordinator dead (node %s) for group %s: %s.", - self.coordinator_id, self.group_id, error) - self.coordinator_id = None + if self.coordinator_id is not None: + log.warning("Marking the coordinator dead (node %s) for group %s: %s.", + self.coordinator_id, self.group_id, error) + self.coordinator_id = None def generation(self): """Get the current generation state if the group is stable. @@ -741,13 +740,13 @@ def __del__(self): def close(self): """Close the coordinator, leave the current group, and reset local generation / member_id""" - with self._lock: + with self._client._lock, self._lock: self._close_heartbeat_thread() self.maybe_leave_group() def maybe_leave_group(self): """Leave the current group and reset local generation/memberId.""" - with self._lock: + with self._client._lock, self._lock: if (not self.coordinator_unknown() and self.state is not MemberState.UNJOINED and self._generation is not Generation.NO_GENERATION): @@ -939,40 +938,46 @@ def _run_once(self): self.disable() return - # TODO: When consumer.wakeup() is implemented, we need to - # disable here to prevent propagating an exception to this - # heartbeat thread - self.coordinator._client.poll(timeout_ms=0) - - if self.coordinator.coordinator_unknown(): - future = self.coordinator.lookup_coordinator() - if not future.is_done or future.failed(): - # the immediate future check ensures that we backoff - # properly in the case that no brokers are available - # to connect to (and the future is automatically failed). + # TODO: When consumer.wakeup() is implemented, we need to + # disable here to prevent propagating an exception to this + # heartbeat thread + # + # Release coordinator lock during client poll to avoid deadlocks + # if/when connection errback needs coordinator lock + self.coordinator._client.poll(timeout_ms=0) + + if self.coordinator.coordinator_unknown(): + future = self.coordinator.lookup_coordinator() + if not future.is_done or future.failed(): + # the immediate future check ensures that we backoff + # properly in the case that no brokers are available + # to connect to (and the future is automatically failed). + with self.coordinator._lock: self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) - elif self.coordinator.heartbeat.session_timeout_expired(): - # the session timeout has expired without seeing a - # successful heartbeat, so we should probably make sure - # the coordinator is still healthy. - log.debug('Heartbeat session expired, marking coordinator dead') - self.coordinator.coordinator_dead('Heartbeat session expired') - - elif self.coordinator.heartbeat.poll_timeout_expired(): - # the poll timeout has expired, which means that the - # foreground thread has stalled in between calls to - # poll(), so we explicitly leave the group. - log.debug('Heartbeat poll expired, leaving group') - self.coordinator.maybe_leave_group() - - elif not self.coordinator.heartbeat.should_heartbeat(): - # poll again after waiting for the retry backoff in case - # the heartbeat failed or the coordinator disconnected - log.debug('Not ready to heartbeat, waiting') + elif self.coordinator.heartbeat.session_timeout_expired(): + # the session timeout has expired without seeing a + # successful heartbeat, so we should probably make sure + # the coordinator is still healthy. + log.warning('Heartbeat session expired, marking coordinator dead') + self.coordinator.coordinator_dead('Heartbeat session expired') + + elif self.coordinator.heartbeat.poll_timeout_expired(): + # the poll timeout has expired, which means that the + # foreground thread has stalled in between calls to + # poll(), so we explicitly leave the group. + log.warning('Heartbeat poll expired, leaving group') + self.coordinator.maybe_leave_group() + + elif not self.coordinator.heartbeat.should_heartbeat(): + # poll again after waiting for the retry backoff in case + # the heartbeat failed or the coordinator disconnected + log.log(0, 'Not ready to heartbeat, waiting') + with self.coordinator._lock: self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) - else: + else: + with self.coordinator._client._lock, self.coordinator._lock: self.coordinator.heartbeat.sent_heartbeat() future = self.coordinator._send_heartbeat_request() future.add_callback(self._handle_heartbeat_success) From 4ee87c7e6ceb0dc3532ccdf09476acc53432e1e4 Mon Sep 17 00:00:00 2001 From: Stephen SORRIAUX Date: Wed, 18 Apr 2018 23:55:49 +0200 Subject: [PATCH 207/291] Added AlterConfigs and DescribeConfigs apis (#1472) --- kafka/protocol/admin.py | 97 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/kafka/protocol/admin.py b/kafka/protocol/admin.py index 40963acb1..f22a8b6ae 100644 --- a/kafka/protocol/admin.py +++ b/kafka/protocol/admin.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from kafka.protocol.api import Request, Response -from kafka.protocol.types import Array, Boolean, Bytes, Int16, Int32, Schema, String +from kafka.protocol.types import Array, Boolean, Bytes, Int8, Int16, Int32, Schema, String class ApiVersionResponse_v0(Response): @@ -310,6 +310,101 @@ class SaslHandShakeRequest_v1(Request): SaslHandShakeRequest = [SaslHandShakeRequest_v0, SaslHandShakeRequest_v1] SaslHandShakeResponse = [SaslHandShakeResponse_v0, SaslHandShakeResponse_v1] +class AlterConfigsResponse_v0(Response): + API_KEY = 33 + API_VERSION = 0 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('resources', Array( + ('error_code', Int16), + ('error_message', String('utf-8')), + ('resource_type', Int8), + ('resource_name', String('utf-8')))) + ) + +class AlterConfigsRequest_v0(Request): + API_KEY = 33 + API_VERSION = 0 + RESPONSE_TYPE = AlterConfigsResponse_v0 + SCHEMA = Schema( + ('resources', Array( + ('resource_type', Int8), + ('resource_name', String('utf-8')), + ('config_entries', Array( + ('config_name', String('utf-8')), + ('config_value', String('utf-8')))))), + ('validate_only', Boolean) + ) + +AlterConfigsRequest = [AlterConfigsRequest_v0] +AlterConfigsResponse = [AlterConfigsResponse_v0] + + +class DescribeConfigsResponse_v0(Response): + API_KEY = 32 + API_VERSION = 0 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('resources', Array( + ('error_code', Int16), + ('error_message', String('utf-8')), + ('resource_type', Int8), + ('resource_name', String('utf-8')), + ('config_entries', Array( + ('config_names', String('utf-8')), + ('config_value', String('utf-8')), + ('read_only', Boolean), + ('is_default', Boolean), + ('is_sensitive', Boolean))))) + ) + +class DescribeConfigsResponse_v1(Response): + API_KEY = 32 + API_VERSION = 1 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('resources', Array( + ('error_code', Int16), + ('error_message', String('utf-8')), + ('resource_type', Int8), + ('resource_name', String('utf-8')), + ('config_entries', Array( + ('config_names', String('utf-8')), + ('config_value', String('utf-8')), + ('read_only', Boolean), + ('is_default', Boolean), + ('is_sensitive', Boolean), + ('config_synonyms', Array( + ('config_name', String('utf-8')), + ('config_value', String('utf-8')), + ('config_source', Int8))))))) + ) + +class DescribeConfigsRequest_v0(Request): + API_KEY = 32 + API_VERSION = 0 + RESPONSE_TYPE = DescribeConfigsResponse_v0 + SCHEMA = Schema( + ('resources', Array( + ('resource_type', Int8), + ('resource_name', String('utf-8')), + ('config_names', Array(String('utf-8'))))) + ) + +class DescribeConfigsRequest_v1(Request): + API_KEY = 32 + API_VERSION = 1 + RESPONSE_TYPE = DescribeConfigsResponse_v1 + SCHEMA = Schema( + ('resources', Array( + ('resource_type', Int8), + ('resource_name', String('utf-8')), + ('config_names', Array(String('utf-8'))))), + ('include_synonyms', Boolean) + ) + +DescribeConfigsRequest = [DescribeConfigsRequest_v0, DescribeConfigsRequest_v1] +DescribeConfigsResponse = [DescribeConfigsResponse_v0, DescribeConfigsResponse_v1] class SaslAuthenticateResponse_v0(Request): API_KEY = 36 From ea7187d0baf8f3fe4011df570cf524df8822cae9 Mon Sep 17 00:00:00 2001 From: Stephen SORRIAUX Date: Thu, 5 Apr 2018 22:44:54 +0200 Subject: [PATCH 208/291] Fix CreatePartitionsRequest_v0 (#1469) --- kafka/protocol/admin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/protocol/admin.py b/kafka/protocol/admin.py index f22a8b6ae..ed9026a52 100644 --- a/kafka/protocol/admin.py +++ b/kafka/protocol/admin.py @@ -450,7 +450,7 @@ class CreatePartitionsRequest_v0(Request): ('topic', String('utf-8')), ('new_partitions', Schema( ('count', Int32), - ('assignment', Array(Int32)))))), + ('assignment', Array(Array(Int32))))))), ('timeout', Int32), ('validate_only', Boolean) ) From ecc77e6310a422b26b56d3e279a6f3367a8beb34 Mon Sep 17 00:00:00 2001 From: "Michael P. Nitowski" Date: Wed, 18 Apr 2018 20:31:14 -0400 Subject: [PATCH 209/291] Force lz4 to disable Kafka-unsupported block linking when encoding (#1476) --- kafka/codec.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/kafka/codec.py b/kafka/codec.py index de15e7928..4d180ddd3 100644 --- a/kafka/codec.py +++ b/kafka/codec.py @@ -18,6 +18,18 @@ try: import lz4.frame as lz4 + + def _lz4_compress(payload, **kwargs): + # Kafka does not support LZ4 dependent blocks + try: + # For lz4>=0.12.0 + kwargs.pop('block_linked', None) + return lz4.compress(payload, block_linked=False, **kwargs) + except TypeError: + # For earlier versions of lz4 + kwargs.pop('block_mode', None) + return lz4.compress(payload, block_mode=1, **kwargs) + except ImportError: lz4 = None @@ -202,7 +214,7 @@ def snappy_decode(payload): if lz4: - lz4_encode = lz4.compress # pylint: disable-msg=no-member + lz4_encode = _lz4_compress # pylint: disable-msg=no-member elif lz4f: lz4_encode = lz4f.compressFrame # pylint: disable-msg=no-member elif lz4framed: From 6eafb9c7f07f9b246cf2ef8be423bca2449ecda0 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Thu, 10 May 2018 16:12:19 -0700 Subject: [PATCH 210/291] Stop shadowing `ConnectionError` In Python3, `ConnectionError` is a native exception. So rename our custom one to `KafkaConnectionError` to prevent accidentally shadowing the native one. Note that there are still valid uses of `ConnectionError` in this code. They already expect a native Python3 `ConnectionError`, and also already handle the Python2 compatibility issues. --- kafka/client.py | 8 ++++---- kafka/client_async.py | 2 +- kafka/conn.py | 28 ++++++++++++++-------------- kafka/errors.py | 6 +++--- kafka/producer/base.py | 1 - test/test_client.py | 2 +- test/test_conn.py | 4 ++-- test/test_failover_integration.py | 6 +++--- 8 files changed, 28 insertions(+), 29 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 00877d1b3..8225aa784 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -18,7 +18,7 @@ from kafka.vendor import six import kafka.errors -from kafka.errors import (UnknownError, ConnectionError, FailedPayloadsError, +from kafka.errors import (UnknownError, KafkaConnectionError, FailedPayloadsError, KafkaTimeoutError, KafkaUnavailableError, LeaderNotAvailableError, UnknownTopicOrPartitionError, NotLeaderForPartitionError, ReplicaNotAvailableError, @@ -113,7 +113,7 @@ def _get_conn(self, host, port, afi, node_id='bootstrap'): conn = self._conns[host_key] if not conn.connect_blocking(self.timeout): conn.close() - raise ConnectionError("%s:%s (%s)" % (host, port, afi)) + raise KafkaConnectionError("%s:%s (%s)" % (host, port, afi)) return conn def _get_leader_for_partition(self, topic, partition): @@ -196,7 +196,7 @@ def _send_broker_unaware_request(self, payloads, encoder_fn, decoder_fn): for (host, port, afi, node_id) in hosts: try: conn = self._get_conn(host, port, afi, node_id) - except ConnectionError: + except KafkaConnectionError: log.warning("Skipping unconnected connection: %s:%s (AFI %s)", host, port, afi) continue @@ -283,7 +283,7 @@ def failed_payloads(payloads): host, port, afi = get_ip_port_afi(broker.host) try: conn = self._get_conn(host, broker.port, afi, broker.nodeId) - except ConnectionError: + except KafkaConnectionError: refresh_metadata = True failed_payloads(broker_payloads) continue diff --git a/kafka/client_async.py b/kafka/client_async.py index ff9730eb2..9b1c1c497 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -602,7 +602,7 @@ def _poll(self, timeout): log.warning('Protocol out of sync on %r, closing', conn) except socket.error: pass - conn.close(Errors.ConnectionError('Socket EVENT_READ without in-flight-requests')) + conn.close(Errors.KafkaConnectionError('Socket EVENT_READ without in-flight-requests')) continue self._idle_expiry_manager.update(conn.node_id) diff --git a/kafka/conn.py b/kafka/conn.py index 68e5b73b7..4f0085c61 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -323,7 +323,7 @@ def connect(self): self.last_attempt = time.time() next_lookup = self._next_afi_sockaddr() if not next_lookup: - self.close(Errors.ConnectionError('DNS failure')) + self.close(Errors.KafkaConnectionError('DNS failure')) return else: log.debug('%s: creating new socket', self) @@ -377,12 +377,12 @@ def connect(self): log.error('Connect attempt to %s returned error %s.' ' Disconnecting.', self, ret) errstr = errno.errorcode.get(ret, 'UNKNOWN') - self.close(Errors.ConnectionError('{} {}'.format(ret, errstr))) + self.close(Errors.KafkaConnectionError('{} {}'.format(ret, errstr))) # Connection timed out elif time.time() > request_timeout + self.last_attempt: log.error('Connection attempt to %s timed out', self) - self.close(Errors.ConnectionError('timeout')) + self.close(Errors.KafkaConnectionError('timeout')) # Needs retry else: @@ -459,7 +459,7 @@ def _try_handshake(self): pass except (SSLZeroReturnError, ConnectionError, SSLEOFError): log.warning('SSL connection closed by server during handshake.') - self.close(Errors.ConnectionError('SSL connection closed by server during handshake')) + self.close(Errors.KafkaConnectionError('SSL connection closed by server during handshake')) # Other SSLErrors will be raised to user return False @@ -484,7 +484,7 @@ def _try_authenticate(self): return False elif self._sasl_auth_future.failed(): ex = self._sasl_auth_future.exception - if not isinstance(ex, Errors.ConnectionError): + if not isinstance(ex, Errors.KafkaConnectionError): raise ex # pylint: disable-msg=raising-bad-type return self._sasl_auth_future.succeeded() @@ -554,8 +554,8 @@ def _try_authenticate_plain(self, future): data = self._recv_bytes_blocking(4) except ConnectionError as e: - log.exception("%s: Error receiving reply from server", self) - error = Errors.ConnectionError("%s: %s" % (self, e)) + log.exception("%s: Error receiving reply from server", self) + error = Errors.KafkaConnectionError("%s: %s" % (self, e)) self.close(error=error) return future.failure(error) @@ -617,7 +617,7 @@ def _try_authenticate_gssapi(self, future): except ConnectionError as e: log.exception("%s: Error receiving reply from server", self) - error = Errors.ConnectionError("%s: %s" % (self, e)) + error = Errors.KafkaConnectionError("%s: %s" % (self, e)) self.close(error=error) return future.failure(error) except Exception as e: @@ -697,7 +697,7 @@ def close(self, error=None): Arguments: error (Exception, optional): pending in-flight-requests will be failed with this exception. - Default: kafka.errors.ConnectionError. + Default: kafka.errors.KafkaConnectionError. """ if self.state is ConnectionStates.DISCONNECTED: if error is not None: @@ -729,7 +729,7 @@ def send(self, request): if self.connecting(): return future.failure(Errors.NodeNotReadyError(str(self))) elif not self.connected(): - return future.failure(Errors.ConnectionError(str(self))) + return future.failure(Errors.KafkaConnectionError(str(self))) elif not self.can_send_more(): return future.failure(Errors.TooManyInFlightRequests(str(self))) return self._send(request) @@ -749,7 +749,7 @@ def _send(self, request): self._sensors.bytes_sent.record(total_bytes) except ConnectionError as e: log.exception("Error sending %s to %s", request, self) - error = Errors.ConnectionError("%s: %s" % (self, e)) + error = Errors.KafkaConnectionError("%s: %s" % (self, e)) self.close(error=error) return future.failure(error) log.debug('%s Request %d: %s', self, correlation_id, request) @@ -777,7 +777,7 @@ def recv(self): # If requests are pending, we should close the socket and # fail all the pending request futures if self.in_flight_requests: - self.close(Errors.ConnectionError('Socket not connected during recv with in-flight-requests')) + self.close(Errors.KafkaConnectionError('Socket not connected during recv with in-flight-requests')) return () elif not self.in_flight_requests: @@ -817,7 +817,7 @@ def _recv(self): # without an exception raised if not data: log.error('%s: socket disconnected', self) - self.close(error=Errors.ConnectionError('socket disconnected')) + self.close(error=Errors.KafkaConnectionError('socket disconnected')) return [] else: recvd.append(data) @@ -829,7 +829,7 @@ def _recv(self): break log.exception('%s: Error receiving network data' ' closing socket', self) - self.close(error=Errors.ConnectionError(e)) + self.close(error=Errors.KafkaConnectionError(e)) return [] except BlockingIOError: if six.PY3: diff --git a/kafka/errors.py b/kafka/errors.py index f7510b8c1..93a9f405e 100644 --- a/kafka/errors.py +++ b/kafka/errors.py @@ -451,7 +451,7 @@ def __init__(self, payload, *args): self.payload = payload -class ConnectionError(KafkaError): +class KafkaConnectionError(KafkaError): retriable = True invalid_metadata = True @@ -521,13 +521,13 @@ def check_error(response): RETRY_BACKOFF_ERROR_TYPES = ( KafkaUnavailableError, LeaderNotAvailableError, - ConnectionError, FailedPayloadsError + KafkaConnectionError, FailedPayloadsError ) RETRY_REFRESH_ERROR_TYPES = ( NotLeaderForPartitionError, UnknownTopicOrPartitionError, - LeaderNotAvailableError, ConnectionError + LeaderNotAvailableError, KafkaConnectionError ) diff --git a/kafka/producer/base.py b/kafka/producer/base.py index e8d6c3d27..c9dd6c3a1 100644 --- a/kafka/producer/base.py +++ b/kafka/producer/base.py @@ -372,7 +372,6 @@ def send_messages(self, topic, partition, *msg): Raises: FailedPayloadsError: low-level connection error, can be caused by networking failures, or a malformed request. - ConnectionError: KafkaUnavailableError: all known brokers are down when attempting to refresh metadata. LeaderNotAvailableError: topic or partition is initializing or diff --git a/test/test_client.py b/test/test_client.py index 4135af0c2..77b2a7462 100644 --- a/test/test_client.py +++ b/test/test_client.py @@ -8,7 +8,7 @@ from kafka import SimpleClient from kafka.errors import ( KafkaUnavailableError, LeaderNotAvailableError, KafkaTimeoutError, - UnknownTopicOrPartitionError, ConnectionError, FailedPayloadsError) + UnknownTopicOrPartitionError, FailedPayloadsError) from kafka.future import Future from kafka.protocol import KafkaProtocol, create_message from kafka.protocol.metadata import MetadataResponse diff --git a/test/test_conn.py b/test/test_conn.py index 12a32efb2..fbdeeb9e7 100644 --- a/test/test_conn.py +++ b/test/test_conn.py @@ -99,7 +99,7 @@ def test_send_disconnected(conn): conn.state = ConnectionStates.DISCONNECTED f = conn.send('foobar') assert f.failed() is True - assert isinstance(f.exception, Errors.ConnectionError) + assert isinstance(f.exception, Errors.KafkaConnectionError) def test_send_connecting(conn): @@ -162,7 +162,7 @@ def test_send_error(_socket, conn): _socket.send.side_effect = socket.error f = conn.send(req) assert f.failed() is True - assert isinstance(f.exception, Errors.ConnectionError) + assert isinstance(f.exception, Errors.KafkaConnectionError) assert _socket.close.call_count == 1 assert conn.state is ConnectionStates.DISCONNECTED diff --git a/test/test_failover_integration.py b/test/test_failover_integration.py index 797e1c8ea..ad7dcb98b 100644 --- a/test/test_failover_integration.py +++ b/test/test_failover_integration.py @@ -4,7 +4,7 @@ from kafka import SimpleClient, SimpleConsumer, KeyedProducer from kafka.errors import ( - FailedPayloadsError, ConnectionError, RequestTimedOutError, + FailedPayloadsError, KafkaConnectionError, RequestTimedOutError, NotLeaderForPartitionError) from kafka.producer.base import Producer from kafka.structs import TopicPartition @@ -79,7 +79,7 @@ def test_switch_leader(self): producer.send_messages(topic, partition, b'success') log.debug("success!") recovered = True - except (FailedPayloadsError, ConnectionError, RequestTimedOutError, + except (FailedPayloadsError, KafkaConnectionError, RequestTimedOutError, NotLeaderForPartitionError): log.debug("caught exception sending message -- will retry") continue @@ -167,7 +167,7 @@ def test_switch_leader_keyed_producer(self): producer.send_messages(topic, key, msg) if producer.partitioners[topic].partition(key) == 0: recovered = True - except (FailedPayloadsError, ConnectionError, RequestTimedOutError, + except (FailedPayloadsError, KafkaConnectionError, RequestTimedOutError, NotLeaderForPartitionError): log.debug("caught exception sending message -- will retry") continue From 8b84fa9b3fc72a8e6c3446cd157828787334d5eb Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Tue, 22 May 2018 11:33:36 -0700 Subject: [PATCH 211/291] Document methods that return None If a valid broker in the cluster has no partitions, it will return None rather than an empty set. Similarly updated a few other methods. --- kafka/cluster.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kafka/cluster.py b/kafka/cluster.py index f3c48550f..8078eb7cf 100644 --- a/kafka/cluster.py +++ b/kafka/cluster.py @@ -96,6 +96,7 @@ def available_partitions_for_topic(self, topic): Returns: set: {partition (int), ...} + None if topic not found. """ if topic not in self._partitions: return None @@ -119,6 +120,7 @@ def partitions_for_broker(self, broker_id): Returns: set: {TopicPartition, ...} + None if the broker either has no partitions or does not exist. """ return self._broker_partitions.get(broker_id) @@ -130,6 +132,7 @@ def coordinator_for_group(self, group): Returns: int: node_id for group coordinator + None if the group does not exist. """ return self._groups.get(group) From dfc18d356463d4589dfcd9663404dd80e8540dda Mon Sep 17 00:00:00 2001 From: Berkodev Date: Thu, 29 Mar 2018 22:39:04 +0300 Subject: [PATCH 212/291] Adds add_callback/add_errback example to docs (#1441) --- docs/usage.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/usage.rst b/docs/usage.rst index 22fe20d5c..1cf1aa414 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -91,6 +91,18 @@ KafkaProducer for _ in range(100): producer.send('my-topic', b'msg') + def on_send_success(record_metadata): + print(record_metadata.topic) + print(record_metadata.partition) + print(record_metadata.offset) + + def on_send_error(excp): + log.error('I am an errback', exc_info=excp) + # handle exception + + # produce asynchronously with callbacks + producer.send('my-topic', b'raw_bytes').add_callback(on_send_success).add_errback(on_send_error) + # block until all async messages are sent producer.flush() From 9c41d686bb1e215f91ab9947f631cb46beb2c6d1 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 23 Mar 2018 08:21:44 -0700 Subject: [PATCH 213/291] Fix KafkaConsumer docstring for request_timeout_ms default (#1459) --- kafka/client_async.py | 4 ++-- kafka/conn.py | 4 ++-- kafka/consumer/group.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kafka/client_async.py b/kafka/client_async.py index 9b1c1c497..be896cc83 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -78,7 +78,7 @@ class KafkaClient(object): resulting in a random range between 20% below and 20% above the computed value. Default: 1000. request_timeout_ms (int): Client request timeout in milliseconds. - Default: 40000. + Default: 30000. retry_backoff_ms (int): Milliseconds to backoff when retrying on errors. Default: 100. max_in_flight_requests_per_connection (int): Requests are pipelined @@ -145,7 +145,7 @@ class KafkaClient(object): DEFAULT_CONFIG = { 'bootstrap_servers': 'localhost', 'client_id': 'kafka-python-' + __version__, - 'request_timeout_ms': 40000, + 'request_timeout_ms': 30000, 'connections_max_idle_ms': 9 * 60 * 1000, 'reconnect_backoff_ms': 50, 'reconnect_backoff_max_ms': 1000, diff --git a/kafka/conn.py b/kafka/conn.py index 4f0085c61..a2d5ee6cc 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -116,7 +116,7 @@ class BrokerConnection(object): resulting in a random range between 20% below and 20% above the computed value. Default: 1000. request_timeout_ms (int): Client request timeout in milliseconds. - Default: 40000. + Default: 30000. max_in_flight_requests_per_connection (int): Requests are pipelined to kafka brokers up to this number of maximum requests per broker connection. Default: 5. @@ -181,7 +181,7 @@ class BrokerConnection(object): DEFAULT_CONFIG = { 'client_id': 'kafka-python-' + __version__, 'node_id': 0, - 'request_timeout_ms': 40000, + 'request_timeout_ms': 30000, 'reconnect_backoff_ms': 50, 'reconnect_backoff_max_ms': 1000, 'max_in_flight_requests_per_connection': 5, diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 0d9e95248..9abf15e9b 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -84,7 +84,7 @@ class KafkaConsumer(six.Iterator): happens, the consumer can get stuck trying to fetch a large message on a certain partition. Default: 1048576. request_timeout_ms (int): Client request timeout in milliseconds. - Default: 40000. + Default: 305000. retry_backoff_ms (int): Milliseconds to backoff when retrying on errors. Default: 100. reconnect_backoff_ms (int): The amount of time in milliseconds to From f23e1662e93b3cff9a1092dd3679e2ccc708161e Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Tue, 31 Jul 2018 18:30:54 -0700 Subject: [PATCH 214/291] Change missing ConnectionError to KafkaConnectionError --- kafka/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/client.py b/kafka/client.py index 8225aa784..d0c6f8abc 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -406,7 +406,7 @@ def failed_payloads(payloads): host, port, afi = get_ip_port_afi(broker.host) try: conn = self._get_conn(host, broker.port, afi, broker.nodeId) - except ConnectionError: + except KafkaConnectionError: failed_payloads(payloads) else: From 3d631bb60895099385310d6abe83df712410e94a Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Wed, 18 Apr 2018 15:10:33 -0700 Subject: [PATCH 215/291] Skip flakey SimpleProducer test --- test/test_producer_integration.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_producer_integration.py b/test/test_producer_integration.py index 3a25de2cf..1b6df11d7 100644 --- a/test/test_producer_integration.py +++ b/test/test_producer_integration.py @@ -291,6 +291,7 @@ def test_batched_simple_producer__triggers_by_message(self): producer.stop() def test_batched_simple_producer__triggers_by_time(self): + self.skipTest("Flakey test -- should be refactored or removed") partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] From 6612bfb95b27f2e0d7eb772d587e5e8023577189 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Thu, 22 Mar 2018 18:10:32 -0700 Subject: [PATCH 216/291] Fix skipped integration tests if KAFKA_VERSION unset (#1453) --- test/test_producer_integration.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/test_producer_integration.py b/test/test_producer_integration.py index 1b6df11d7..6533cfabb 100644 --- a/test/test_producer_integration.py +++ b/test/test_producer_integration.py @@ -19,6 +19,7 @@ from test.fixtures import ZookeeperFixture, KafkaFixture from test.testutil import KafkaIntegrationTestCase, kafka_versions, current_offset + # TODO: This duplicates a TestKafkaProducerIntegration method temporarily # while the migration to pytest is in progress def assert_produce_request(client, topic, messages, initial_offset, message_ct, @@ -34,6 +35,7 @@ def assert_produce_request(client, topic, messages, initial_offset, message_ct, assert current_offset(client, topic, partition) == initial_offset + message_ct + def assert_produce_response(resp, initial_offset): """Verify that a produce response is well-formed """ @@ -41,6 +43,7 @@ def assert_produce_response(resp, initial_offset): assert resp[0].error == 0 assert resp[0].offset == initial_offset + @pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") def test_produce_many_simple(simple_client, topic): """Test multiple produces using the SimpleClient From 33c0aa70a8116da5dde568e07bf6a423e4e6f8dd Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Thu, 10 May 2018 16:29:30 -0700 Subject: [PATCH 217/291] Stop using deprecated log.warn() --- kafka/consumer/fetcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index ea7d5d8a1..6ec1b71ed 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -610,7 +610,7 @@ def _handle_offset_response(self, future, response): future.failure(error_type(partition)) return elif error_type is Errors.UnknownTopicOrPartitionError: - log.warn("Received unknown topic or partition error in ListOffset " + log.warning("Received unknown topic or partition error in ListOffset " "request for partition %s. The topic/partition " + "may not exist or the user may not have Describe access " "to it.", partition) @@ -821,10 +821,10 @@ def _parse_fetched_data(self, completed_fetch): raise Errors.OffsetOutOfRangeError({tp: fetch_offset}) elif error_type is Errors.TopicAuthorizationFailedError: - log.warn("Not authorized to read from topic %s.", tp.topic) + log.warning("Not authorized to read from topic %s.", tp.topic) raise Errors.TopicAuthorizationFailedError(set(tp.topic)) elif error_type is Errors.UnknownError: - log.warn("Unknown error fetching data for topic-partition %s", tp) + log.warning("Unknown error fetching data for topic-partition %s", tp) else: raise error_type('Unexpected error while fetching data') From ff21b0308ebecb6014898b573345757474776c04 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Thu, 22 Mar 2018 15:10:40 -0700 Subject: [PATCH 218/291] Heartbeat thread start / close --- kafka/coordinator/base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index 14b8c3f4a..7deeaf05d 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -909,11 +909,10 @@ def close(self): def run(self): try: + log.debug('Heartbeat thread started') while not self.closed: self._run_once() - log.debug('Heartbeat thread closed') - except ReferenceError: log.debug('Heartbeat thread closed due to coordinator gc') @@ -922,6 +921,9 @@ def run(self): self.coordinator.group_id, e) self.failed = e + finally: + log.debug('Heartbeat thread closed') + def _run_once(self): with self.coordinator._lock: if not self.enabled: From 7a7b4b9dfa68fa1f1e165f8a60ad15a550120256 Mon Sep 17 00:00:00 2001 From: Taras Date: Mon, 19 Mar 2018 00:09:29 +0200 Subject: [PATCH 219/291] Fix MemoryRecord bugs re error handling and add test coverage (#1448) --- kafka/record/__init__.py | 4 +- kafka/record/default_records.py | 2 +- kafka/record/memory_records.py | 8 ++-- test/record/test_records.py | 69 ++++++++++++++++++++++++++++++++- 4 files changed, 75 insertions(+), 8 deletions(-) diff --git a/kafka/record/__init__.py b/kafka/record/__init__.py index cbd70d93a..93936df48 100644 --- a/kafka/record/__init__.py +++ b/kafka/record/__init__.py @@ -1,3 +1,3 @@ -from kafka.record.memory_records import MemoryRecords +from kafka.record.memory_records import MemoryRecords, MemoryRecordsBuilder -__all__ = ["MemoryRecords"] +__all__ = ["MemoryRecords", "MemoryRecordsBuilder"] diff --git a/kafka/record/default_records.py b/kafka/record/default_records.py index 2bbd47e9c..840868a01 100644 --- a/kafka/record/default_records.py +++ b/kafka/record/default_records.py @@ -237,7 +237,7 @@ def _read_msg( # validate whether we have read all header bytes in the current record if pos - start_pos != length: - CorruptRecordException( + raise CorruptRecordException( "Invalid record size: expected to read {} bytes in record " "payload, but instead read {}".format(length, pos - start_pos)) self._pos = pos diff --git a/kafka/record/memory_records.py b/kafka/record/memory_records.py index cb1cc01b4..f67c4fe3a 100644 --- a/kafka/record/memory_records.py +++ b/kafka/record/memory_records.py @@ -18,6 +18,7 @@ # # So we can iterate over batches just by knowing offsets of Length. Magic is # used to construct the correct class for Batch itself. +from __future__ import division import struct @@ -131,15 +132,14 @@ def __init__(self, magic, compression_type, batch_size): def append(self, timestamp, key, value, headers=[]): """ Append a message to the buffer. - Returns: - (int, int): checksum and bytes written + Returns: RecordMetadata or None if unable to append """ if self._closed: - return None, 0 + return None offset = self._next_offset metadata = self._builder.append(offset, timestamp, key, value, headers) - # Return of 0 size means there's no space to add a new message + # Return of None means there's no space to add a new message if metadata is None: return None diff --git a/test/record/test_records.py b/test/record/test_records.py index 7306bbc52..224989f38 100644 --- a/test/record/test_records.py +++ b/test/record/test_records.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals import pytest -from kafka.record import MemoryRecords +from kafka.record import MemoryRecords, MemoryRecordsBuilder from kafka.errors import CorruptRecordException # This is real live data from Kafka 11 broker @@ -152,3 +154,68 @@ def test_memory_records_corrupt(): ) with pytest.raises(CorruptRecordException): records.next_batch() + + +@pytest.mark.parametrize("compression_type", [0, 1, 2, 3]) +@pytest.mark.parametrize("magic", [0, 1, 2]) +def test_memory_records_builder(magic, compression_type): + builder = MemoryRecordsBuilder( + magic=magic, compression_type=compression_type, batch_size=1024 * 10) + base_size = builder.size_in_bytes() # V2 has a header before + + msg_sizes = [] + for offset in range(10): + metadata = builder.append( + timestamp=10000 + offset, key=b"test", value=b"Super") + msg_sizes.append(metadata.size) + assert metadata.offset == offset + if magic > 0: + assert metadata.timestamp == 10000 + offset + else: + assert metadata.timestamp == -1 + assert builder.next_offset() == offset + 1 + + # Error appends should not leave junk behind, like null bytes or something + with pytest.raises(TypeError): + builder.append( + timestamp=None, key="test", value="Super") # Not bytes, but str + + assert not builder.is_full() + size_before_close = builder.size_in_bytes() + assert size_before_close == sum(msg_sizes) + base_size + + # Size should remain the same after closing. No traling bytes + builder.close() + assert builder.compression_rate() > 0 + expected_size = size_before_close * builder.compression_rate() + assert builder.is_full() + assert builder.size_in_bytes() == expected_size + buffer = builder.buffer() + assert len(buffer) == expected_size + + # We can close second time, as in retry + builder.close() + assert builder.size_in_bytes() == expected_size + assert builder.buffer() == buffer + + # Can't append after close + meta = builder.append(timestamp=None, key=b"test", value=b"Super") + assert meta is None + + +@pytest.mark.parametrize("compression_type", [0, 1, 2, 3]) +@pytest.mark.parametrize("magic", [0, 1, 2]) +def test_memory_records_builder_full(magic, compression_type): + builder = MemoryRecordsBuilder( + magic=magic, compression_type=compression_type, batch_size=1024 * 10) + + # 1 message should always be appended + metadata = builder.append( + key=None, timestamp=None, value=b"M" * 10240) + assert metadata is not None + assert builder.is_full() + + metadata = builder.append( + key=None, timestamp=None, value=b"M") + assert metadata is None + assert builder.next_offset() == 1 From 3843b9830b144881a97d533980639a1aa8c69980 Mon Sep 17 00:00:00 2001 From: Taras Date: Sun, 18 Mar 2018 15:56:47 +0200 Subject: [PATCH 220/291] Add codec validators to record parser and builder for all formats (#1447) --- Makefile | 8 ++++--- kafka/record/default_records.py | 22 +++++++++++++---- kafka/record/legacy_records.py | 18 ++++++++++++-- requirements-dev.txt | 2 +- test/conftest.py | 1 + test/record/test_default_records.py | 37 ++++++++++++++++++++++++++++- test/record/test_legacy_records.py | 31 ++++++++++++++++++++++++ test/test_consumer_integration.py | 26 ++++++++++++++++++-- test/testutil.py | 7 ++++-- 9 files changed, 137 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 7faf34004..7e7536467 100644 --- a/Makefile +++ b/Makefile @@ -43,11 +43,13 @@ test27: build-integration # Test using py.test directly if you want to use local python. Useful for other # platforms that require manual installation for C libraries, ie. Windows. test-local: build-integration - py.test --pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF kafka test + KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) py.test \ + --pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF $(FLAGS) kafka test cov-local: build-integration - py.test --pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF --cov=kafka \ - --cov-config=.covrc --cov-report html kafka test + KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) py.test \ + --pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF --cov=kafka \ + --cov-config=.covrc --cov-report html $(FLAGS) kafka test @echo "open file://`pwd`/htmlcov/index.html" # Check the readme for syntax errors, which can lead to invalid formatting on diff --git a/kafka/record/default_records.py b/kafka/record/default_records.py index 840868a01..955e3ee2a 100644 --- a/kafka/record/default_records.py +++ b/kafka/record/default_records.py @@ -54,17 +54,18 @@ # * Timestamp Type (3) # * Compression Type (0-2) -import io import struct import time from kafka.record.abc import ABCRecord, ABCRecordBatch, ABCRecordBatchBuilder -from kafka.record.util import decode_varint, encode_varint, calc_crc32c, size_of_varint - -from kafka.errors import CorruptRecordException +from kafka.record.util import ( + decode_varint, encode_varint, calc_crc32c, size_of_varint +) +from kafka.errors import CorruptRecordException, UnsupportedCodecError from kafka.codec import ( gzip_encode, snappy_encode, lz4_encode, gzip_decode, snappy_decode, lz4_decode ) +import kafka.codec as codecs class DefaultRecordBase(object): @@ -101,6 +102,17 @@ class DefaultRecordBase(object): LOG_APPEND_TIME = 1 CREATE_TIME = 0 + def _assert_has_codec(self, compression_type): + if compression_type == self.CODEC_GZIP: + checker, name = codecs.has_gzip, "gzip" + elif compression_type == self.CODEC_SNAPPY: + checker, name = codecs.has_snappy, "snappy" + elif compression_type == self.CODEC_LZ4: + checker, name = codecs.has_lz4, "lz4" + if not checker(): + raise UnsupportedCodecError( + "Libraries for {} compression codec not found".format(name)) + class DefaultRecordBatch(DefaultRecordBase, ABCRecordBatch): @@ -156,6 +168,7 @@ def _maybe_uncompress(self): if not self._decompressed: compression_type = self.compression_type if compression_type != self.CODEC_NONE: + self._assert_has_codec(compression_type) data = memoryview(self._buffer)[self._pos:] if compression_type == self.CODEC_GZIP: uncompressed = gzip_decode(data) @@ -481,6 +494,7 @@ def write_header(self, use_compression_type=True): def _maybe_compress(self): if self._compression_type != self.CODEC_NONE: + self._assert_has_codec(self._compression_type) header_size = self.HEADER_STRUCT.size data = bytes(self._buffer[header_size:]) if self._compression_type == self.CODEC_GZIP: diff --git a/kafka/record/legacy_records.py b/kafka/record/legacy_records.py index 036e6c45c..1bdba8152 100644 --- a/kafka/record/legacy_records.py +++ b/kafka/record/legacy_records.py @@ -49,9 +49,10 @@ from kafka.codec import ( gzip_encode, snappy_encode, lz4_encode, lz4_encode_old_kafka, - gzip_decode, snappy_decode, lz4_decode, lz4_decode_old_kafka + gzip_decode, snappy_decode, lz4_decode, lz4_decode_old_kafka, ) -from kafka.errors import CorruptRecordException +import kafka.codec as codecs +from kafka.errors import CorruptRecordException, UnsupportedCodecError class LegacyRecordBase(object): @@ -112,6 +113,17 @@ class LegacyRecordBase(object): NO_TIMESTAMP = -1 + def _assert_has_codec(self, compression_type): + if compression_type == self.CODEC_GZIP: + checker, name = codecs.has_gzip, "gzip" + elif compression_type == self.CODEC_SNAPPY: + checker, name = codecs.has_snappy, "snappy" + elif compression_type == self.CODEC_LZ4: + checker, name = codecs.has_lz4, "lz4" + if not checker(): + raise UnsupportedCodecError( + "Libraries for {} compression codec not found".format(name)) + class LegacyRecordBatch(ABCRecordBatch, LegacyRecordBase): @@ -166,6 +178,7 @@ def _decompress(self, key_offset): data = self._buffer[pos:pos + value_size] compression_type = self.compression_type + self._assert_has_codec(compression_type) if compression_type == self.CODEC_GZIP: uncompressed = gzip_decode(data) elif compression_type == self.CODEC_SNAPPY: @@ -419,6 +432,7 @@ def _encode_msg(self, start_pos, offset, timestamp, key, value, def _maybe_compress(self): if self._compression_type: + self._assert_has_codec(self._compression_type) data = bytes(self._buffer) if self._compression_type == self.CODEC_GZIP: compressed = gzip_encode(data) diff --git a/requirements-dev.txt b/requirements-dev.txt index 88153e01f..b98b58ab9 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,7 +1,6 @@ flake8==3.4.1 pytest==3.4.0 pytest-cov==2.5.1 -pytest-catchlog==1.2.2 docker-py==1.10.6 coveralls==1.2.0 Sphinx==1.6.4 @@ -13,3 +12,4 @@ pylint==1.8.2 pytest-pylint==0.7.1 pytest-mock==1.6.3 sphinx-rtd-theme==0.2.4 +crc32c==1.2 diff --git a/test/conftest.py b/test/conftest.py index ad45f2c0d..dbc2378d9 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -72,6 +72,7 @@ def kafka_consumer_factory(kafka_broker, topic, request): def factory(**kafka_consumer_params): params = {} if kafka_consumer_params is None else kafka_consumer_params.copy() params.setdefault('client_id', 'consumer_%s' % (request.node.name,)) + params.setdefault('auto_offset_reset', 'earliest') _consumer[0] = next(kafka_broker.get_consumers(cnt=1, topics=[topic], **params)) return _consumer[0] diff --git a/test/record/test_default_records.py b/test/record/test_default_records.py index 193703e40..6e2f5e8ac 100644 --- a/test/record/test_default_records.py +++ b/test/record/test_default_records.py @@ -1,9 +1,12 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals import pytest +from mock import patch +import kafka.codec from kafka.record.default_records import ( DefaultRecordBatch, DefaultRecordBatchBuilder ) +from kafka.errors import UnsupportedCodecError @pytest.mark.parametrize("compression_type", [ @@ -17,7 +20,7 @@ def test_read_write_serde_v2(compression_type): magic=2, compression_type=compression_type, is_transactional=1, producer_id=123456, producer_epoch=123, base_sequence=9999, batch_size=999999) - headers = [] # [("header1", b"aaa"), ("header2", b"bbb")] + headers = [("header1", b"aaa"), ("header2", b"bbb")] for offset in range(10): builder.append( offset, timestamp=9999999, key=b"test", value=b"Super", @@ -167,3 +170,35 @@ def test_default_batch_size_limit(): 2, timestamp=None, key=None, value=b"M" * 700, headers=[]) assert meta is None assert len(builder.build()) < 1000 + + +@pytest.mark.parametrize("compression_type,name,checker_name", [ + (DefaultRecordBatch.CODEC_GZIP, "gzip", "has_gzip"), + (DefaultRecordBatch.CODEC_SNAPPY, "snappy", "has_snappy"), + (DefaultRecordBatch.CODEC_LZ4, "lz4", "has_lz4") +]) +@pytest.mark.parametrize("magic", [0, 1]) +def test_unavailable_codec(magic, compression_type, name, checker_name): + builder = DefaultRecordBatchBuilder( + magic=2, compression_type=compression_type, is_transactional=0, + producer_id=-1, producer_epoch=-1, base_sequence=-1, + batch_size=1024) + builder.append(0, timestamp=None, key=None, value=b"M" * 2000, headers=[]) + correct_buffer = builder.build() + + with patch.object(kafka.codec, checker_name) as mocked: + mocked.return_value = False + # Check that builder raises error + builder = DefaultRecordBatchBuilder( + magic=2, compression_type=compression_type, is_transactional=0, + producer_id=-1, producer_epoch=-1, base_sequence=-1, + batch_size=1024) + error_msg = "Libraries for {} compression codec not found".format(name) + with pytest.raises(UnsupportedCodecError, match=error_msg): + builder.append(0, timestamp=None, key=None, value=b"M", headers=[]) + builder.build() + + # Check that reader raises same error + batch = DefaultRecordBatch(bytes(correct_buffer)) + with pytest.raises(UnsupportedCodecError, match=error_msg): + list(batch) diff --git a/test/record/test_legacy_records.py b/test/record/test_legacy_records.py index ffe8a35f8..23b863605 100644 --- a/test/record/test_legacy_records.py +++ b/test/record/test_legacy_records.py @@ -1,8 +1,11 @@ from __future__ import unicode_literals import pytest +from mock import patch from kafka.record.legacy_records import ( LegacyRecordBatch, LegacyRecordBatchBuilder ) +import kafka.codec +from kafka.errors import UnsupportedCodecError @pytest.mark.parametrize("magic", [0, 1]) @@ -164,3 +167,31 @@ def test_legacy_batch_size_limit(magic): meta = builder.append(2, timestamp=None, key=None, value=b"M" * 700) assert meta is None assert len(builder.build()) < 1000 + + +@pytest.mark.parametrize("compression_type,name,checker_name", [ + (LegacyRecordBatch.CODEC_GZIP, "gzip", "has_gzip"), + (LegacyRecordBatch.CODEC_SNAPPY, "snappy", "has_snappy"), + (LegacyRecordBatch.CODEC_LZ4, "lz4", "has_lz4") +]) +@pytest.mark.parametrize("magic", [0, 1]) +def test_unavailable_codec(magic, compression_type, name, checker_name): + builder = LegacyRecordBatchBuilder( + magic=magic, compression_type=compression_type, batch_size=1024) + builder.append(0, timestamp=None, key=None, value=b"M") + correct_buffer = builder.build() + + with patch.object(kafka.codec, checker_name) as mocked: + mocked.return_value = False + # Check that builder raises error + builder = LegacyRecordBatchBuilder( + magic=magic, compression_type=compression_type, batch_size=1024) + error_msg = "Libraries for {} compression codec not found".format(name) + with pytest.raises(UnsupportedCodecError, match=error_msg): + builder.append(0, timestamp=None, key=None, value=b"M") + builder.build() + + # Check that reader raises same error + batch = LegacyRecordBatch(bytes(correct_buffer), magic) + with pytest.raises(UnsupportedCodecError, match=error_msg): + list(batch) diff --git a/test/test_consumer_integration.py b/test/test_consumer_integration.py index c00f54cca..78ed06b4c 100644 --- a/test/test_consumer_integration.py +++ b/test/test_consumer_integration.py @@ -1,6 +1,9 @@ import logging import os import time +from mock import patch +import pytest +import kafka.codec import pytest from six.moves import xrange @@ -13,8 +16,8 @@ ) from kafka.consumer.base import MAX_FETCH_BUFFER_SIZE_BYTES from kafka.errors import ( - ConsumerFetchSizeTooSmall, OffsetOutOfRangeError, ConsumerTimeout, UnsupportedVersionError, - KafkaTimeoutError + ConsumerFetchSizeTooSmall, OffsetOutOfRangeError, ConsumerTimeout, + UnsupportedVersionError, KafkaTimeoutError, UnsupportedCodecError ) from kafka.structs import ( ProduceRequestPayload, TopicPartition, OffsetAndTimestamp @@ -27,6 +30,7 @@ send_messages ) + @pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") def test_kafka_consumer(simple_client, topic, kafka_consumer_factory): """Test KafkaConsumer @@ -50,6 +54,24 @@ def test_kafka_consumer(simple_client, topic, kafka_consumer_factory): kafka_consumer.close() +@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +def test_kafka_consumer_unsupported_encoding( + topic, kafka_producer_factory, kafka_consumer_factory): + # Send a compressed message + producer = kafka_producer_factory(compression_type="gzip") + fut = producer.send(topic, b"simple message" * 200) + fut.get(timeout=5) + producer.close() + + # Consume, but with the related compression codec not available + with patch.object(kafka.codec, "has_gzip") as mocked: + mocked.return_value = False + consumer = kafka_consumer_factory(auto_offset_reset='earliest') + error_msg = "Libraries for gzip compression codec not found" + with pytest.raises(UnsupportedCodecError, match=error_msg): + consumer.poll(timeout_ms=2000) + + class TestConsumerIntegration(KafkaIntegrationTestCase): maxDiff = None diff --git a/test/testutil.py b/test/testutil.py index 1a75d264b..7d26a9e0f 100644 --- a/test/testutil.py +++ b/test/testutil.py @@ -1,10 +1,12 @@ +from __future__ import absolute_import + +import functools import operator import os import socket import time import uuid -import decorator import pytest from . import unittest @@ -46,6 +48,7 @@ def construct_lambda(s): validators = map(construct_lambda, versions) def real_kafka_versions(func): + @functools.wraps(func) def wrapper(func, *args, **kwargs): version = kafka_version() @@ -57,7 +60,7 @@ def wrapper(func, *args, **kwargs): pytest.skip("unsupported kafka version") return func(*args, **kwargs) - return decorator.decorator(wrapper, func) + return wrapper return real_kafka_versions From 284983c7a44e84109b35ce0ddde3f96002dd17fe Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 10 Mar 2018 19:55:44 -0500 Subject: [PATCH 221/291] Bump version for development --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 626f52b92..d89910b43 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.2.post3' +__version__ = '1.4.3.dev' From c726adc082da24923cb8ac6e74c92c107ce1b1ff Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Thu, 22 Mar 2018 18:10:32 -0700 Subject: [PATCH 222/291] Fix skipped integration tests if KAFKA_VERSION unset (#1453) From ae3c67284a3ba07d5e92c456b93351ab24f4118d Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 23 Mar 2018 05:56:11 -0700 Subject: [PATCH 223/291] Check for immediate failure when looking up coordinator in heartbeat thread (#1457) --- kafka/coordinator/base.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index 7deeaf05d..2a4f37037 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -940,21 +940,17 @@ def _run_once(self): self.disable() return - # TODO: When consumer.wakeup() is implemented, we need to - # disable here to prevent propagating an exception to this - # heartbeat thread - # - # Release coordinator lock during client poll to avoid deadlocks - # if/when connection errback needs coordinator lock - self.coordinator._client.poll(timeout_ms=0) - - if self.coordinator.coordinator_unknown(): - future = self.coordinator.lookup_coordinator() - if not future.is_done or future.failed(): - # the immediate future check ensures that we backoff - # properly in the case that no brokers are available - # to connect to (and the future is automatically failed). - with self.coordinator._lock: + # TODO: When consumer.wakeup() is implemented, we need to + # disable here to prevent propagating an exception to this + # heartbeat thread + self.coordinator._client.poll(timeout_ms=0) + + if self.coordinator.coordinator_unknown(): + future = self.coordinator.lookup_coordinator() + if not future.is_done or future.failed(): + # the immediate future check ensures that we backoff + # properly in the case that no brokers are available + # to connect to (and the future is automatically failed). self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) elif self.coordinator.heartbeat.session_timeout_expired(): From 3f2aa818c901284098844f3082975481c210a595 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 23 Mar 2018 05:58:55 -0700 Subject: [PATCH 224/291] Change SimpleProducer to use async_send (async is reserved in py37) (#1454) From 6c1b3a6e351bd807d6b46883333ff9c8b973d88e Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Fri, 23 Mar 2018 08:21:44 -0700 Subject: [PATCH 225/291] Fix KafkaConsumer docstring for request_timeout_ms default (#1459) From 46782b44cef05a0d4827b01eacb0af244aceaf43 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Thu, 22 Mar 2018 14:42:49 -0700 Subject: [PATCH 226/291] Change levels for some heartbeat thread logging --- kafka/coordinator/base.py | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index 2a4f37037..e7283c52c 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -953,25 +953,24 @@ def _run_once(self): # to connect to (and the future is automatically failed). self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) - elif self.coordinator.heartbeat.session_timeout_expired(): - # the session timeout has expired without seeing a - # successful heartbeat, so we should probably make sure - # the coordinator is still healthy. - log.warning('Heartbeat session expired, marking coordinator dead') - self.coordinator.coordinator_dead('Heartbeat session expired') - - elif self.coordinator.heartbeat.poll_timeout_expired(): - # the poll timeout has expired, which means that the - # foreground thread has stalled in between calls to - # poll(), so we explicitly leave the group. - log.warning('Heartbeat poll expired, leaving group') - self.coordinator.maybe_leave_group() - - elif not self.coordinator.heartbeat.should_heartbeat(): - # poll again after waiting for the retry backoff in case - # the heartbeat failed or the coordinator disconnected - log.log(0, 'Not ready to heartbeat, waiting') - with self.coordinator._lock: + elif self.coordinator.heartbeat.session_timeout_expired(): + # the session timeout has expired without seeing a + # successful heartbeat, so we should probably make sure + # the coordinator is still healthy. + log.warning('Heartbeat session expired, marking coordinator dead') + self.coordinator.coordinator_dead('Heartbeat session expired') + + elif self.coordinator.heartbeat.poll_timeout_expired(): + # the poll timeout has expired, which means that the + # foreground thread has stalled in between calls to + # poll(), so we explicitly leave the group. + log.warning('Heartbeat poll expired, leaving group') + self.coordinator.maybe_leave_group() + + elif not self.coordinator.heartbeat.should_heartbeat(): + # poll again after waiting for the retry backoff in case + # the heartbeat failed or the coordinator disconnected + log.log(0, 'Not ready to heartbeat, waiting') self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) else: From a34e0464d2ad17adab23520e915616d1fd67198e Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Thu, 22 Mar 2018 15:10:40 -0700 Subject: [PATCH 227/291] Heartbeat thread start / close From 61395a23ba5040342ba204c1723265c5c18cad1b Mon Sep 17 00:00:00 2001 From: Berkodev Date: Thu, 29 Mar 2018 22:39:04 +0300 Subject: [PATCH 228/291] Adds add_callback/add_errback example to docs (#1441) From 5285c2cabe93b646558e1bc8489620d3f1f8459b Mon Sep 17 00:00:00 2001 From: Stephen SORRIAUX Date: Thu, 5 Apr 2018 22:44:54 +0200 Subject: [PATCH 229/291] Fix CreatePartitionsRequest_v0 (#1469) From e650572ceccba816cb416e6482a6a4bc7cf2504e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=B4mulo=20Rosa=20Furtado?= Date: Sun, 15 Apr 2018 22:30:21 -0300 Subject: [PATCH 230/291] Improve BrokerConnection initialization (#1475) From 03ef0661aed9987ed15e83a6427efd1c124c9475 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Wed, 18 Apr 2018 08:29:19 -0700 Subject: [PATCH 231/291] Always acquire client lock before coordinator lock to avoid deadlocks (#1464) --- kafka/coordinator/base.py | 63 +++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index e7283c52c..7deeaf05d 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -940,37 +940,42 @@ def _run_once(self): self.disable() return - # TODO: When consumer.wakeup() is implemented, we need to - # disable here to prevent propagating an exception to this - # heartbeat thread - self.coordinator._client.poll(timeout_ms=0) - - if self.coordinator.coordinator_unknown(): - future = self.coordinator.lookup_coordinator() - if not future.is_done or future.failed(): - # the immediate future check ensures that we backoff - # properly in the case that no brokers are available - # to connect to (and the future is automatically failed). + # TODO: When consumer.wakeup() is implemented, we need to + # disable here to prevent propagating an exception to this + # heartbeat thread + # + # Release coordinator lock during client poll to avoid deadlocks + # if/when connection errback needs coordinator lock + self.coordinator._client.poll(timeout_ms=0) + + if self.coordinator.coordinator_unknown(): + future = self.coordinator.lookup_coordinator() + if not future.is_done or future.failed(): + # the immediate future check ensures that we backoff + # properly in the case that no brokers are available + # to connect to (and the future is automatically failed). + with self.coordinator._lock: self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) - elif self.coordinator.heartbeat.session_timeout_expired(): - # the session timeout has expired without seeing a - # successful heartbeat, so we should probably make sure - # the coordinator is still healthy. - log.warning('Heartbeat session expired, marking coordinator dead') - self.coordinator.coordinator_dead('Heartbeat session expired') - - elif self.coordinator.heartbeat.poll_timeout_expired(): - # the poll timeout has expired, which means that the - # foreground thread has stalled in between calls to - # poll(), so we explicitly leave the group. - log.warning('Heartbeat poll expired, leaving group') - self.coordinator.maybe_leave_group() - - elif not self.coordinator.heartbeat.should_heartbeat(): - # poll again after waiting for the retry backoff in case - # the heartbeat failed or the coordinator disconnected - log.log(0, 'Not ready to heartbeat, waiting') + elif self.coordinator.heartbeat.session_timeout_expired(): + # the session timeout has expired without seeing a + # successful heartbeat, so we should probably make sure + # the coordinator is still healthy. + log.warning('Heartbeat session expired, marking coordinator dead') + self.coordinator.coordinator_dead('Heartbeat session expired') + + elif self.coordinator.heartbeat.poll_timeout_expired(): + # the poll timeout has expired, which means that the + # foreground thread has stalled in between calls to + # poll(), so we explicitly leave the group. + log.warning('Heartbeat poll expired, leaving group') + self.coordinator.maybe_leave_group() + + elif not self.coordinator.heartbeat.should_heartbeat(): + # poll again after waiting for the retry backoff in case + # the heartbeat failed or the coordinator disconnected + log.log(0, 'Not ready to heartbeat, waiting') + with self.coordinator._lock: self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) else: From e27ae26f920ab3ca0f06bc2bb93e6b1faaea7e94 Mon Sep 17 00:00:00 2001 From: Taras Date: Mon, 19 Mar 2018 00:09:29 +0200 Subject: [PATCH 232/291] Fix MemoryRecord bugs re error handling and add test coverage (#1448) From f9ee8b27143f1b67b8b2d0e004249f3e9e7f8064 Mon Sep 17 00:00:00 2001 From: Taras Date: Sun, 18 Mar 2018 15:56:47 +0200 Subject: [PATCH 233/291] Add codec validators to record parser and builder for all formats (#1447) From eef82563a5b000eb630bc8f848f7941e3e180c42 Mon Sep 17 00:00:00 2001 From: Stephen SORRIAUX Date: Wed, 18 Apr 2018 23:55:49 +0200 Subject: [PATCH 234/291] Added AlterConfigs and DescribeConfigs apis (#1472) From ffedd107162e0d2946677d004c486c43b0f9e1e7 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Wed, 18 Apr 2018 15:10:33 -0700 Subject: [PATCH 235/291] Skip flakey SimpleProducer test From fec83a96a2e3bde5e4f5160225ef52c793bc6f44 Mon Sep 17 00:00:00 2001 From: "Michael P. Nitowski" Date: Wed, 18 Apr 2018 20:31:14 -0400 Subject: [PATCH 236/291] Force lz4 to disable Kafka-unsupported block linking when encoding (#1476) From 3cac1dec2b1b11dacd6980ba40491b76616798d6 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Tue, 24 Apr 2018 13:02:39 -0700 Subject: [PATCH 237/291] Minor doc capitalization cleanup --- kafka/client_async.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/kafka/client_async.py b/kafka/client_async.py index be896cc83..a9704fafd 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -99,26 +99,26 @@ class KafkaClient(object): brokers or partitions. Default: 300000 security_protocol (str): Protocol used to communicate with brokers. Valid values are: PLAINTEXT, SSL. Default: PLAINTEXT. - ssl_context (ssl.SSLContext): pre-configured SSLContext for wrapping + ssl_context (ssl.SSLContext): Pre-configured SSLContext for wrapping socket connections. If provided, all other ssl_* configurations will be ignored. Default: None. - ssl_check_hostname (bool): flag to configure whether ssl handshake - should verify that the certificate matches the brokers hostname. - default: true. - ssl_cafile (str): optional filename of ca file to use in certificate - veriication. default: none. - ssl_certfile (str): optional filename of file in pem format containing - the client certificate, as well as any ca certificates needed to - establish the certificate's authenticity. default: none. - ssl_keyfile (str): optional filename containing the client private key. - default: none. - ssl_password (str): optional password to be used when loading the - certificate chain. default: none. - ssl_crlfile (str): optional filename containing the CRL to check for + ssl_check_hostname (bool): Flag to configure whether SSL handshake + should verify that the certificate matches the broker's hostname. + Default: True. + ssl_cafile (str): Optional filename of CA file to use in certificate + veriication. Default: None. + ssl_certfile (str): Optional filename of file in PEM format containing + the client certificate, as well as any CA certificates needed to + establish the certificate's authenticity. Default: None. + ssl_keyfile (str): Optional filename containing the client private key. + Default: None. + ssl_password (str): Optional password to be used when loading the + certificate chain. Default: None. + ssl_crlfile (str): Optional filename containing the CRL to check for certificate expiration. By default, no CRL check is done. When providing a file, only the leaf certificate will be checked against this CRL. The CRL can only be checked with Python 3.4+ or 2.7.9+. - default: none. + Default: None. api_version (tuple): Specify which Kafka API version to use. If set to None, KafkaClient will attempt to infer the broker version by probing various APIs. Example: (0, 10, 2). Default: None From ed28ca309d979ceef8f46f858e85ee6217f943a8 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Thu, 10 May 2018 16:29:30 -0700 Subject: [PATCH 238/291] Stop using deprecated log.warn() From 22bb8b7f26799c05e11dcaa54a4c8eecd77392ca Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Thu, 10 May 2018 16:12:19 -0700 Subject: [PATCH 239/291] Stop shadowing `ConnectionError` In Python3, `ConnectionError` is a native exception. So rename our custom one to `KafkaConnectionError` to prevent accidentally shadowing the native one. Note that there are still valid uses of `ConnectionError` in this code. They already expect a native Python3 `ConnectionError`, and also already handle the Python2 compatibility issues. From 60cbdbd255c884cdb2dab48151142cb56850f9ee Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Tue, 22 May 2018 11:33:36 -0700 Subject: [PATCH 240/291] Document methods that return None If a valid broker in the cluster has no partitions, it will return None rather than an empty set. Similarly updated a few other methods. From af3c2bd2dda55e7dad0550bed551f9ac9c1d4732 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 26 May 2018 08:43:25 -0700 Subject: [PATCH 241/291] Ignore MetadataResponses with empty broker list (#1506) From 6959910ca84a04d43b8fa9754aeb87f617aed038 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 26 May 2018 08:48:41 -0700 Subject: [PATCH 242/291] Improve connection handling when bootstrap list is invalid (#1507) * only perform single dns lookup for connect_blocking() * fix blocking timeout in check_version() From 6f31d93fdcc14dd92819df742d9c5cc48ea6229b Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 26 May 2018 11:54:33 -0700 Subject: [PATCH 243/291] Retain but deprecate kafka.errors.ConnectionError for compatibility --- kafka/errors.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kafka/errors.py b/kafka/errors.py index 93a9f405e..47d228e48 100644 --- a/kafka/errors.py +++ b/kafka/errors.py @@ -456,6 +456,10 @@ class KafkaConnectionError(KafkaError): invalid_metadata = True +class ConnectionError(KafkaConnectionError): + """Deprecated""" + + class BufferUnderflowError(KafkaError): pass From 5dd9ea64361b4ead5f21fbdd1b7a662b87a126dd Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 26 May 2018 12:19:16 -0700 Subject: [PATCH 244/291] Release 1.4.3 --- CHANGES.md | 38 ++++++++++++++++++++++++++++++++++++++ docs/changelog.rst | 45 +++++++++++++++++++++++++++++++++++++++++++++ kafka/version.py | 2 +- 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 11d6ac71d..288ae9095 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,41 @@ +# 1.4.3 (May 26, 2018) + +Compatibility +* Fix for python 3.7 support: remove 'async' keyword from SimpleProducer (dpkp #1454) + +Client +* Improve BrokerConnection initialization time (romulorosa #1475) +* Ignore MetadataResponses with empty broker list (dpkp #1506) +* Improve connection handling when bootstrap list is invalid (dpkp #1507) + +Consumer +* Check for immediate failure when looking up coordinator in heartbeat thread (dpkp #1457) + +Core / Protocol +* Always acquire client lock before coordinator lock to avoid deadlocks (dpkp #1464) +* Added AlterConfigs and DescribeConfigs apis (StephenSorriaux #1472) +* Fix CreatePartitionsRequest_v0 (StephenSorriaux #1469) +* Add codec validators to record parser and builder for all formats (tvoinarovskyi #1447) +* Fix MemoryRecord bugs re error handling and add test coverage (tvoinarovskyi #1448) +* Force lz4 to disable Kafka-unsupported block linking when encoding (mnito #1476) +* Stop shadowing `ConnectionError` (jeffwidman #1492) + +Documentation +* Document methods that return None (jeffwidman #1504) +* Minor doc capitalization cleanup (jeffwidman) +* Adds add_callback/add_errback example to docs (Berkodev #1441) +* Fix KafkaConsumer docstring for request_timeout_ms default (dpkp #1459) + +Test Infrastructure +* Skip flakey SimpleProducer test (dpkp) +* Fix skipped integration tests if KAFKA_VERSION unset (dpkp #1453) + +Logging / Error Messages +* Stop using deprecated log.warn() (jeffwidman) +* Change levels for some heartbeat thread logging (dpkp #1456) +* Log Heartbeat thread start / close for debugging (dpkp) + + # 1.4.2 (Mar 10, 2018) Bugfixes diff --git a/docs/changelog.rst b/docs/changelog.rst index a817492b9..15c307d4b 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -29,6 +29,51 @@ Fix locale-gen not found 1.3.3.post1 (Apr 17, 2017) ########################## Resolve merge conflicts while merging changes for version 1.3.3 +======= +1.4.3 (May 26, 2018) +#################### + +Compatibility +------------- +* Fix for python 3.7 support: remove 'async' keyword from SimpleProducer (dpkp #1454) + +Client +------ +* Improve BrokerConnection initialization time (romulorosa #1475) +* Ignore MetadataResponses with empty broker list (dpkp #1506) +* Improve connection handling when bootstrap list is invalid (dpkp #1507) + +Consumer +-------- +* Check for immediate failure when looking up coordinator in heartbeat thread (dpkp #1457) + +Core / Protocol +--------------- +* Always acquire client lock before coordinator lock to avoid deadlocks (dpkp #1464) +* Added AlterConfigs and DescribeConfigs apis (StephenSorriaux #1472) +* Fix CreatePartitionsRequest_v0 (StephenSorriaux #1469) +* Add codec validators to record parser and builder for all formats (tvoinarovskyi #1447) +* Fix MemoryRecord bugs re error handling and add test coverage (tvoinarovskyi #1448) +* Force lz4 to disable Kafka-unsupported block linking when encoding (mnito #1476) +* Stop shadowing `ConnectionError` (jeffwidman #1492) + +Documentation +------------- +* Document methods that return None (jeffwidman #1504) +* Minor doc capitalization cleanup (jeffwidman) +* Adds add_callback/add_errback example to docs (Berkodev #1441) +* Fix KafkaConsumer docstring for request_timeout_ms default (dpkp #1459) + +Test Infrastructure +------------------- +* Skip flakey SimpleProducer test (dpkp) +* Fix skipped integration tests if KAFKA_VERSION unset (dpkp #1453) + +Logging / Error Messages +------------------------ +* Stop using deprecated log.warn() (jeffwidman) +* Change levels for some heartbeat thread logging (dpkp #1456) +* Log Heartbeat thread start / close for debugging (dpkp) 1.4.2 (Mar 10, 2018) #################### diff --git a/kafka/version.py b/kafka/version.py index d89910b43..4e7c72a59 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.3.dev' +__version__ = '1.4.3' From 113290897130664bc25b1438d3ecf5b411a5f139 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Wed, 1 Aug 2018 15:06:23 -0700 Subject: [PATCH 245/291] Remove unused kafka versions --- .travis.yml | 4 +--- build_integration.sh | 2 +- servers/{0.10.2.1 => 0.10.2.2}/resources/kafka.properties | 0 servers/{0.10.2.1 => 0.10.2.2}/resources/log4j.properties | 0 servers/{0.10.2.1 => 0.10.2.2}/resources/zookeeper.properties | 0 5 files changed, 2 insertions(+), 4 deletions(-) rename servers/{0.10.2.1 => 0.10.2.2}/resources/kafka.properties (100%) rename servers/{0.10.2.1 => 0.10.2.2}/resources/log4j.properties (100%) rename servers/{0.10.2.1 => 0.10.2.2}/resources/zookeeper.properties (100%) diff --git a/.travis.yml b/.travis.yml index 21cbd2b7d..7f389db4a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,9 +8,7 @@ python: - pypy env: - - KAFKA_VERSION=0.9.0.1 - - KAFKA_VERSION=0.10.1.1 - - KAFKA_VERSION=0.10.2.1 + - KAFKA_VERSION=0.10.2.2 - KAFKA_VERSION=0.11.0.2 - KAFKA_VERSION=1.0.1 diff --git a/build_integration.sh b/build_integration.sh index b686fffb5..68cb8bf2a 100755 --- a/build_integration.sh +++ b/build_integration.sh @@ -1,6 +1,6 @@ #!/bin/bash -: ${ALL_RELEASES:="0.8.2.2 0.9.0.1 0.10.1.1 0.10.2.1 0.11.0.2 1.0.1"} +: ${ALL_RELEASES:="0.10.2.2 0.11.0.2 1.0.1"} : ${SCALA_VERSION:=2.11} : ${DIST_BASE_URL:=https://archive.apache.org/dist/kafka/} : ${KAFKA_SRC_GIT:=https://github.com/apache/kafka.git} diff --git a/servers/0.10.2.1/resources/kafka.properties b/servers/0.10.2.2/resources/kafka.properties similarity index 100% rename from servers/0.10.2.1/resources/kafka.properties rename to servers/0.10.2.2/resources/kafka.properties diff --git a/servers/0.10.2.1/resources/log4j.properties b/servers/0.10.2.2/resources/log4j.properties similarity index 100% rename from servers/0.10.2.1/resources/log4j.properties rename to servers/0.10.2.2/resources/log4j.properties diff --git a/servers/0.10.2.1/resources/zookeeper.properties b/servers/0.10.2.2/resources/zookeeper.properties similarity index 100% rename from servers/0.10.2.1/resources/zookeeper.properties rename to servers/0.10.2.2/resources/zookeeper.properties From ee5e112caa9f91a12afbc785aa7e5f22a41ab92e Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Thu, 2 Aug 2018 11:29:35 -0700 Subject: [PATCH 246/291] Upgrade to kafka-python 1.4.3.post1 --- CHANGES.md | 4 ++++ docs/changelog.rst | 4 ++++ kafka/version.py | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 288ae9095..2474543ca 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,7 @@ +# 1.4.3.post1 (Aug 2, 2018) +1. Upgrade to upstream kafka-python 1.4.3 +2. Remove support of 0.8, 0.9, 0.10.1.1, 0.10.2.1 and added support of kafka 0.10.2.2 testing + # 1.4.3 (May 26, 2018) Compatibility diff --git a/docs/changelog.rst b/docs/changelog.rst index 15c307d4b..40e6a6153 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,9 @@ Changelog ========= +1.4.3.post1 (Aug 2, 2018) +########################## +* Upgrade to kafka-python 1.4.3 +* Change testing environment for supported kafka version testing 1.3.3.post7 (Jan 29, 2018) ########################## diff --git a/kafka/version.py b/kafka/version.py index 4e7c72a59..1e584229f 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.3' +__version__ = '1.4.3.post1' From fc5b4b4d3570a5fd78230c0d697094b5d2c49357 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Thu, 2 Aug 2018 11:48:03 -0700 Subject: [PATCH 247/291] Remove unused kafka versions --- servers/0.10.0.0/resources/kafka.properties | 142 ------------------ servers/0.10.0.0/resources/log4j.properties | 25 --- .../0.10.0.0/resources/zookeeper.properties | 21 --- servers/0.10.0.1/resources/kafka.properties | 142 ------------------ servers/0.10.0.1/resources/log4j.properties | 25 --- .../0.10.0.1/resources/zookeeper.properties | 21 --- servers/0.10.1.1/resources/kafka.properties | 142 ------------------ servers/0.10.1.1/resources/log4j.properties | 25 --- .../0.10.1.1/resources/zookeeper.properties | 21 --- servers/0.11.0.0/resources/kafka.properties | 142 ------------------ servers/0.11.0.0/resources/log4j.properties | 25 --- .../0.11.0.0/resources/zookeeper.properties | 21 --- servers/0.11.0.1/resources/kafka.properties | 142 ------------------ servers/0.11.0.1/resources/log4j.properties | 25 --- .../0.11.0.1/resources/zookeeper.properties | 21 --- servers/0.8.0/resources/kafka.properties | 67 --------- servers/0.8.0/resources/log4j.properties | 25 --- servers/0.8.0/resources/zookeeper.properties | 19 --- servers/0.8.1.1/resources/kafka.properties | 124 --------------- servers/0.8.1.1/resources/log4j.properties | 25 --- .../0.8.1.1/resources/zookeeper.properties | 21 --- servers/0.8.1/resources/kafka.properties | 67 --------- servers/0.8.1/resources/log4j.properties | 25 --- servers/0.8.1/resources/zookeeper.properties | 19 --- servers/0.8.2.0/resources/kafka.properties | 124 --------------- servers/0.8.2.0/resources/log4j.properties | 25 --- .../0.8.2.0/resources/zookeeper.properties | 21 --- servers/0.8.2.1/resources/kafka.properties | 124 --------------- servers/0.8.2.1/resources/log4j.properties | 25 --- .../0.8.2.1/resources/zookeeper.properties | 21 --- servers/0.8.2.2/resources/kafka.properties | 124 --------------- servers/0.8.2.2/resources/log4j.properties | 25 --- .../0.8.2.2/resources/zookeeper.properties | 21 --- servers/0.9.0.0/resources/kafka.properties | 141 ----------------- servers/0.9.0.0/resources/log4j.properties | 25 --- .../0.9.0.0/resources/zookeeper.properties | 21 --- servers/0.9.0.1/resources/kafka.properties | 142 ------------------ servers/0.9.0.1/resources/log4j.properties | 25 --- .../0.9.0.1/resources/zookeeper.properties | 21 --- servers/1.0.0/resources/kafka.properties | 142 ------------------ servers/1.0.0/resources/log4j.properties | 25 --- servers/1.0.0/resources/zookeeper.properties | 21 --- 42 files changed, 2405 deletions(-) delete mode 100644 servers/0.10.0.0/resources/kafka.properties delete mode 100644 servers/0.10.0.0/resources/log4j.properties delete mode 100644 servers/0.10.0.0/resources/zookeeper.properties delete mode 100644 servers/0.10.0.1/resources/kafka.properties delete mode 100644 servers/0.10.0.1/resources/log4j.properties delete mode 100644 servers/0.10.0.1/resources/zookeeper.properties delete mode 100644 servers/0.10.1.1/resources/kafka.properties delete mode 100644 servers/0.10.1.1/resources/log4j.properties delete mode 100644 servers/0.10.1.1/resources/zookeeper.properties delete mode 100644 servers/0.11.0.0/resources/kafka.properties delete mode 100644 servers/0.11.0.0/resources/log4j.properties delete mode 100644 servers/0.11.0.0/resources/zookeeper.properties delete mode 100644 servers/0.11.0.1/resources/kafka.properties delete mode 100644 servers/0.11.0.1/resources/log4j.properties delete mode 100644 servers/0.11.0.1/resources/zookeeper.properties delete mode 100644 servers/0.8.0/resources/kafka.properties delete mode 100644 servers/0.8.0/resources/log4j.properties delete mode 100644 servers/0.8.0/resources/zookeeper.properties delete mode 100644 servers/0.8.1.1/resources/kafka.properties delete mode 100644 servers/0.8.1.1/resources/log4j.properties delete mode 100644 servers/0.8.1.1/resources/zookeeper.properties delete mode 100644 servers/0.8.1/resources/kafka.properties delete mode 100644 servers/0.8.1/resources/log4j.properties delete mode 100644 servers/0.8.1/resources/zookeeper.properties delete mode 100644 servers/0.8.2.0/resources/kafka.properties delete mode 100644 servers/0.8.2.0/resources/log4j.properties delete mode 100644 servers/0.8.2.0/resources/zookeeper.properties delete mode 100644 servers/0.8.2.1/resources/kafka.properties delete mode 100644 servers/0.8.2.1/resources/log4j.properties delete mode 100644 servers/0.8.2.1/resources/zookeeper.properties delete mode 100644 servers/0.8.2.2/resources/kafka.properties delete mode 100644 servers/0.8.2.2/resources/log4j.properties delete mode 100644 servers/0.8.2.2/resources/zookeeper.properties delete mode 100644 servers/0.9.0.0/resources/kafka.properties delete mode 100644 servers/0.9.0.0/resources/log4j.properties delete mode 100644 servers/0.9.0.0/resources/zookeeper.properties delete mode 100644 servers/0.9.0.1/resources/kafka.properties delete mode 100644 servers/0.9.0.1/resources/log4j.properties delete mode 100644 servers/0.9.0.1/resources/zookeeper.properties delete mode 100644 servers/1.0.0/resources/kafka.properties delete mode 100644 servers/1.0.0/resources/log4j.properties delete mode 100644 servers/1.0.0/resources/zookeeper.properties diff --git a/servers/0.10.0.0/resources/kafka.properties b/servers/0.10.0.0/resources/kafka.properties deleted file mode 100644 index 7a19a1187..000000000 --- a/servers/0.10.0.0/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=2 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.10.0.0/resources/log4j.properties b/servers/0.10.0.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.10.0.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.10.0.0/resources/zookeeper.properties b/servers/0.10.0.0/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.10.0.0/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.10.0.1/resources/kafka.properties b/servers/0.10.0.1/resources/kafka.properties deleted file mode 100644 index 7a19a1187..000000000 --- a/servers/0.10.0.1/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=2 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.10.0.1/resources/log4j.properties b/servers/0.10.0.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.10.0.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.10.0.1/resources/zookeeper.properties b/servers/0.10.0.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.10.0.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.10.1.1/resources/kafka.properties b/servers/0.10.1.1/resources/kafka.properties deleted file mode 100644 index 7a19a1187..000000000 --- a/servers/0.10.1.1/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=2 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.10.1.1/resources/log4j.properties b/servers/0.10.1.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.10.1.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.10.1.1/resources/zookeeper.properties b/servers/0.10.1.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.10.1.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.11.0.0/resources/kafka.properties b/servers/0.11.0.0/resources/kafka.properties deleted file mode 100644 index f08855ce6..000000000 --- a/servers/0.11.0.0/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.0/resources/log4j.properties b/servers/0.11.0.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.11.0.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.11.0.0/resources/zookeeper.properties b/servers/0.11.0.0/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.11.0.0/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.11.0.1/resources/kafka.properties b/servers/0.11.0.1/resources/kafka.properties deleted file mode 100644 index f08855ce6..000000000 --- a/servers/0.11.0.1/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.1/resources/log4j.properties b/servers/0.11.0.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.11.0.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.11.0.1/resources/zookeeper.properties b/servers/0.11.0.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.11.0.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.8.0/resources/kafka.properties b/servers/0.8.0/resources/kafka.properties deleted file mode 100644 index b9f5c498f..000000000 --- a/servers/0.8.0/resources/kafka.properties +++ /dev/null @@ -1,67 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -############################# Server Basics ############################# - -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -port={port} -host.name={host} - -num.network.threads=2 -num.io.threads=2 - -socket.send.buffer.bytes=1048576 -socket.receive.buffer.bytes=1048576 -socket.request.max.bytes=104857600 - -############################# Log Basics ############################# - -log.dirs={tmp_dir}/data -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -log.flush.interval.messages=10000 -log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -log.retention.hours=168 -log.segment.bytes=536870912 -log.cleanup.interval.mins=1 - -############################# Zookeeper ############################# - -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 - -kafka.metrics.polling.interval.secs=5 -kafka.metrics.reporters=kafka.metrics.KafkaCSVMetricsReporter -kafka.csv.metrics.dir={tmp_dir} -kafka.csv.metrics.reporter.enabled=false - -log.cleanup.policy=delete diff --git a/servers/0.8.0/resources/log4j.properties b/servers/0.8.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.8.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.8.0/resources/zookeeper.properties b/servers/0.8.0/resources/zookeeper.properties deleted file mode 100644 index 68e1ef986..000000000 --- a/servers/0.8.0/resources/zookeeper.properties +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -dataDir={tmp_dir} -clientPortAddress={host} -clientPort={port} -maxClientCnxns=0 diff --git a/servers/0.8.1.1/resources/kafka.properties b/servers/0.8.1.1/resources/kafka.properties deleted file mode 100644 index 685aed15e..000000000 --- a/servers/0.8.1.1/resources/kafka.properties +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -# The port the socket server listens on -port={port} - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -host.name={host} - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=2 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=1048576 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=1048576 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=536870912 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=60000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.8.1.1/resources/log4j.properties b/servers/0.8.1.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.8.1.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.8.1.1/resources/zookeeper.properties b/servers/0.8.1.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.8.1.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.8.1/resources/kafka.properties b/servers/0.8.1/resources/kafka.properties deleted file mode 100644 index 76b0cb4ac..000000000 --- a/servers/0.8.1/resources/kafka.properties +++ /dev/null @@ -1,67 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -############################# Server Basics ############################# - -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -port={port} -host.name={host} - -num.network.threads=2 -num.io.threads=2 - -socket.send.buffer.bytes=1048576 -socket.receive.buffer.bytes=1048576 -socket.request.max.bytes=104857600 - -############################# Log Basics ############################# - -log.dirs={tmp_dir}/data -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -log.flush.interval.messages=10000 -log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -log.retention.hours=168 -log.segment.bytes=536870912 -log.retention.check.interval.ms=60000 -log.cleanup.interval.mins=1 -log.cleaner.enable=false - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.8.1/resources/log4j.properties b/servers/0.8.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.8.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.8.1/resources/zookeeper.properties b/servers/0.8.1/resources/zookeeper.properties deleted file mode 100644 index 68e1ef986..000000000 --- a/servers/0.8.1/resources/zookeeper.properties +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -dataDir={tmp_dir} -clientPortAddress={host} -clientPort={port} -maxClientCnxns=0 diff --git a/servers/0.8.2.0/resources/kafka.properties b/servers/0.8.2.0/resources/kafka.properties deleted file mode 100644 index 685aed15e..000000000 --- a/servers/0.8.2.0/resources/kafka.properties +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -# The port the socket server listens on -port={port} - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -host.name={host} - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=2 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=1048576 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=1048576 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=536870912 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=60000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.8.2.0/resources/log4j.properties b/servers/0.8.2.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.8.2.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.8.2.0/resources/zookeeper.properties b/servers/0.8.2.0/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.8.2.0/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.8.2.1/resources/kafka.properties b/servers/0.8.2.1/resources/kafka.properties deleted file mode 100644 index 685aed15e..000000000 --- a/servers/0.8.2.1/resources/kafka.properties +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -# The port the socket server listens on -port={port} - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -host.name={host} - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=2 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=1048576 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=1048576 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=536870912 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=60000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.8.2.1/resources/log4j.properties b/servers/0.8.2.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.8.2.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.8.2.1/resources/zookeeper.properties b/servers/0.8.2.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.8.2.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.8.2.2/resources/kafka.properties b/servers/0.8.2.2/resources/kafka.properties deleted file mode 100644 index 685aed15e..000000000 --- a/servers/0.8.2.2/resources/kafka.properties +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -# The port the socket server listens on -port={port} - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -host.name={host} - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=2 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=1048576 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=1048576 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=536870912 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=60000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.8.2.2/resources/log4j.properties b/servers/0.8.2.2/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.8.2.2/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.8.2.2/resources/zookeeper.properties b/servers/0.8.2.2/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.8.2.2/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.9.0.0/resources/kafka.properties b/servers/0.9.0.0/resources/kafka.properties deleted file mode 100644 index b70a0dae3..000000000 --- a/servers/0.9.0.0/resources/kafka.properties +++ /dev/null @@ -1,141 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=536870912 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=60000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=2 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.9.0.0/resources/log4j.properties b/servers/0.9.0.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.9.0.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.9.0.0/resources/zookeeper.properties b/servers/0.9.0.0/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.9.0.0/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.9.0.1/resources/kafka.properties b/servers/0.9.0.1/resources/kafka.properties deleted file mode 100644 index 7a19a1187..000000000 --- a/servers/0.9.0.1/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=2 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.9.0.1/resources/log4j.properties b/servers/0.9.0.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.9.0.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.9.0.1/resources/zookeeper.properties b/servers/0.9.0.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.9.0.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/1.0.0/resources/kafka.properties b/servers/1.0.0/resources/kafka.properties deleted file mode 100644 index f08855ce6..000000000 --- a/servers/1.0.0/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/1.0.0/resources/log4j.properties b/servers/1.0.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/1.0.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/1.0.0/resources/zookeeper.properties b/servers/1.0.0/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/1.0.0/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 From 22371e65d60ba4ce5e54e23c1c38c750cd470068 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Thu, 2 Aug 2018 14:53:27 -0700 Subject: [PATCH 248/291] Remove unused kafka versions --- .travis.yml | 1 - run_itest.sh | 8 +- servers/0.11.0.2/resources/kafka.properties | 142 ------------------ servers/0.11.0.2/resources/log4j.properties | 25 --- .../0.11.0.2/resources/zookeeper.properties | 21 --- 5 files changed, 1 insertion(+), 196 deletions(-) delete mode 100644 servers/0.11.0.2/resources/kafka.properties delete mode 100644 servers/0.11.0.2/resources/log4j.properties delete mode 100644 servers/0.11.0.2/resources/zookeeper.properties diff --git a/.travis.yml b/.travis.yml index 7f389db4a..c56a869b9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ python: env: - KAFKA_VERSION=0.10.2.2 - - KAFKA_VERSION=0.11.0.2 - KAFKA_VERSION=1.0.1 sudo: false diff --git a/run_itest.sh b/run_itest.sh index 012604a50..164656d61 100644 --- a/run_itest.sh +++ b/run_itest.sh @@ -1,12 +1,6 @@ #!/bin/bash -e -export KAFKA_VERSION='0.10.1.1' -./build_integration.sh -tox -e py27 -tox -e py35 -tox -e pypy - -export KAFKA_VERSION='0.11.0.2' +export KAFKA_VERSION='0.10.2.2' ./build_integration.sh tox -e py27 tox -e py35 diff --git a/servers/0.11.0.2/resources/kafka.properties b/servers/0.11.0.2/resources/kafka.properties deleted file mode 100644 index f08855ce6..000000000 --- a/servers/0.11.0.2/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.2/resources/log4j.properties b/servers/0.11.0.2/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.11.0.2/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.11.0.2/resources/zookeeper.properties b/servers/0.11.0.2/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.11.0.2/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 From af297e2d4574310d7d4260a23aa3eb24cd53ce45 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Thu, 2 Aug 2018 15:32:56 -0700 Subject: [PATCH 249/291] Change version to 1.0.1 for integration testing --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7e7536467..c61165eb2 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ docs: FLAGS= -KAFKA_VERSION=0.11.0.2 +KAFKA_VERSION=1.0.1 SCALA_VERSION=2.12 setup: From 28e20a8ebc040fb749c1138d4b426c1da52dbe28 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Thu, 2 Aug 2018 15:37:34 -0700 Subject: [PATCH 250/291] Add back 0.11.0.2 --- .travis.yml | 1 + Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c56a869b9..7f389db4a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ python: env: - KAFKA_VERSION=0.10.2.2 + - KAFKA_VERSION=0.11.0.2 - KAFKA_VERSION=1.0.1 sudo: false diff --git a/Makefile b/Makefile index c61165eb2..7e7536467 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ docs: FLAGS= -KAFKA_VERSION=1.0.1 +KAFKA_VERSION=0.11.0.2 SCALA_VERSION=2.12 setup: From 64776f25b77741ff0b6cef7c2c69cf85bd5a94e5 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Thu, 2 Aug 2018 16:15:22 -0700 Subject: [PATCH 251/291] Add 0.11.0.2 servers file --- servers/0.11.0.2/resources/kafka.properties | 142 ++++++++++++++++++ servers/0.11.0.2/resources/log4j.properties | 25 +++ .../0.11.0.2/resources/zookeeper.properties | 21 +++ 3 files changed, 188 insertions(+) create mode 100644 servers/0.11.0.2/resources/kafka.properties create mode 100644 servers/0.11.0.2/resources/log4j.properties create mode 100644 servers/0.11.0.2/resources/zookeeper.properties diff --git a/servers/0.11.0.2/resources/kafka.properties b/servers/0.11.0.2/resources/kafka.properties new file mode 100644 index 000000000..7a19a1187 --- /dev/null +++ b/servers/0.11.0.2/resources/kafka.properties @@ -0,0 +1,142 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/server.truststore.jks +ssl.truststore.password=foobar + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=2 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.2/resources/log4j.properties b/servers/0.11.0.2/resources/log4j.properties new file mode 100644 index 000000000..b0b76aa79 --- /dev/null +++ b/servers/0.11.0.2/resources/log4j.properties @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +log4j.rootLogger=INFO, stdout, logfile + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.logfile=org.apache.log4j.FileAppender +log4j.appender.logfile.File=${kafka.logs.dir}/server.log +log4j.appender.logfile.layout=org.apache.log4j.PatternLayout +log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.11.0.2/resources/zookeeper.properties b/servers/0.11.0.2/resources/zookeeper.properties new file mode 100644 index 000000000..e3fd09742 --- /dev/null +++ b/servers/0.11.0.2/resources/zookeeper.properties @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# the directory where the snapshot is stored. +dataDir={tmp_dir} +# the port at which the clients will connect +clientPort={port} +clientPortAddress={host} +# disable the per-ip limit on the number of connections since this is a non-production config +maxClientCnxns=0 From 2cc73f453b7f6c7cecc43f4688e9075904a62761 Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Thu, 2 Aug 2018 16:24:56 -0700 Subject: [PATCH 252/291] Remove unused versions of kafka for integration testing --- CHANGES.md | 3 +++ docs/changelog.rst | 4 ++++ kafka/version.py | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 2474543ca..05a30bc77 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,6 @@ +# 1.4.3.post2 (Aug 2, 2018) +* Remove unused versions of kafka for integration testing + # 1.4.3.post1 (Aug 2, 2018) 1. Upgrade to upstream kafka-python 1.4.3 2. Remove support of 0.8, 0.9, 0.10.1.1, 0.10.2.1 and added support of kafka 0.10.2.2 testing diff --git a/docs/changelog.rst b/docs/changelog.rst index 40e6a6153..2d5718514 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,9 @@ Changelog ========= +1.4.3.post2 (Aug 2, 2018) +########################## +* Remove unused versions of kafka for integration testing + 1.4.3.post1 (Aug 2, 2018) ########################## * Upgrade to kafka-python 1.4.3 diff --git a/kafka/version.py b/kafka/version.py index 1e584229f..c7f68e0f1 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.3.post1' +__version__ = '1.4.3.post2' From 852e197a9bfe76b664ccbf9f900c2bfa8629d743 Mon Sep 17 00:00:00 2001 From: Dong Weicheng Date: Mon, 30 Jul 2018 15:17:22 -0700 Subject: [PATCH 253/291] Change prospectus. --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 9dcdb8eaa..c86fc7b8b 100644 --- a/setup.py +++ b/setup.py @@ -39,9 +39,9 @@ def run(cls): tests_require=test_require, cmdclass={"test": Tox}, packages=find_packages(exclude=['test']), - author="Dana Powers", - author_email="dana.powers@gmail.com", - url="https://github.com/dpkp/kafka-python", + author="Distsys Streaming", + author_email="distsys-streaming@yelp.com", + url="https://github.com/Yelp/kafka-python", license="Apache License 2.0", description="Pure Python client for Apache Kafka", long_description=README, From 9fa3a227d800a6d219000a252918689b17669f3b Mon Sep 17 00:00:00 2001 From: Dong Weicheng Date: Fri, 3 Aug 2018 16:38:30 -0700 Subject: [PATCH 254/291] Bump version to 1.4.3.post3 --- CHANGES.md | 3 +++ docs/changelog.rst | 4 ++++ kafka/version.py | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 05a30bc77..3e7b1bec5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,6 @@ +# 1.4.3.post3 (Aug 3, 2018) +* Change prospectus + # 1.4.3.post2 (Aug 2, 2018) * Remove unused versions of kafka for integration testing diff --git a/docs/changelog.rst b/docs/changelog.rst index 2d5718514..4377518f6 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,9 @@ Changelog ========= +1.4.3.post3 (Aug 3, 2018) +########################## +* Change prospectus + 1.4.3.post2 (Aug 2, 2018) ########################## * Remove unused versions of kafka for integration testing diff --git a/kafka/version.py b/kafka/version.py index c7f68e0f1..16cdadda8 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.3.post2' +__version__ = '1.4.3.post3' From ec8cda2638c88d579e6040f6fa65b66da1594563 Mon Sep 17 00:00:00 2001 From: Yongzhou Gu Date: Tue, 7 Aug 2018 11:36:39 -0700 Subject: [PATCH 255/291] Add Usage of CreatePartition in adminClinet --- kafka/admin_client.py | 61 ++++++++++++++++++++++++++- test/test_admin_client.py | 32 +++++++++++++- test/test_admin_client_integration.py | 24 ++++++++++- 3 files changed, 114 insertions(+), 3 deletions(-) diff --git a/kafka/admin_client.py b/kafka/admin_client.py index 30147bc1e..84802102e 100644 --- a/kafka/admin_client.py +++ b/kafka/admin_client.py @@ -1,8 +1,24 @@ +import collections import time from .errors import NodeNotReadyError -from .protocol.admin import CreateTopicsRequest, DeleteTopicsRequest +from .protocol.admin import CreateTopicsRequest, DeleteTopicsRequest, CreatePartitionsRequest from .protocol.metadata import MetadataRequest + +"""TopicPartition + +Fields: + name (string): name of topic + count (int): the new partition count + broker_ids_matrix: list(list(brokerids)) + the sizes of inner lists are the replica factor of current topic + the size of outer list is the increased partition num of current topic +""" +TopicPartition = collections.namedtuple( + 'TopicPartition', + ['name', 'count', 'broker_ids_matrix'] +) + def convert_new_topic_request_format(new_topic): return ( new_topic.name, @@ -18,6 +34,15 @@ def convert_new_topic_request_format(new_topic): ], ) +def convert_topic_partitions_requst_format(topic_partition): + return ( + topic_partition.name, + ( + topic_partition.count, + topic_partition.broker_ids_matrix + ) + ) + class NewTopic(object): """ A class for new topic creation @@ -67,6 +92,7 @@ def __init__(self, client): self.metadata_request = MetadataRequest[1]([]) self.topic_request = CreateTopicsRequest[0] self.delete_topics_request = DeleteTopicsRequest[0] + self.create_partitions_request = CreatePartitionsRequest[0] def _send_controller_request(self): response = self._send( @@ -86,6 +112,39 @@ def _send_request(self, request): else: return self._send(controller_id, request) + def create_partitions( + self, + topic_partitions, + timeout, + validate_only, + ): + """ Create partitions on topics + + Arguments: + topic_partitions (list of TopicPartition): A list containing + infos on increasing partitions + timeout (int): timeout in seconds + validate_only (Boolean): If true then validate the + request without actually increasing the number of + partitions + + Returns: + CreatePartitionsResponse: response from the broker + + Raises: + NodeNotReadyError: if controller is not ready + """ + + request = self.create_partitions_request( + topic_partitions = [ + convert_topic_partitions_requst_format(topic_partition) + for topic_partition in topic_partitions + ], + timeout=timeout, + validate_only = validate_only, + ) + + return self._send_request(request) def create_topics( self, diff --git a/test/test_admin_client.py b/test/test_admin_client.py index ccbd16dd4..2c48e09b8 100644 --- a/test/test_admin_client.py +++ b/test/test_admin_client.py @@ -3,9 +3,10 @@ from kafka.client_async import KafkaClient from kafka.errors import BrokerNotAvailableError from kafka.protocol.metadata import MetadataResponse -from kafka.protocol.admin import CreateTopicsResponse, DeleteTopicsResponse +from kafka.protocol.admin import CreateTopicsResponse, DeleteTopicsResponse, CreatePartitionsResponse from kafka.admin_client import AdminClient from kafka.admin_client import NewTopic +from kafka.admin_client import TopicPartition from kafka.structs import BrokerMetadata from kafka.future import Future @@ -32,6 +33,10 @@ def metadata_response(controller_id): def mock_new_topics(): return [NewTopic('topic',1,1)] +@pytest.fixture +def mock_topic_partitions(): + return [TopicPartition('topic', 5, 4*[[1,2,3]]) ] + @pytest.fixture def topic_response(): return CreateTopicsResponse[1]([( @@ -44,6 +49,13 @@ def delete_response(): 'topic',7 )]) +@pytest.fixture +def partition_response(): + return CreatePartitionsResponse[0]( + 100, + [('topic', 7, 'timeout_exception')] + ) + class TestTopicAdmin(): def test_send_controller_request( @@ -99,3 +111,21 @@ def test_delete_topics( admin = AdminClient(mock_kafka_client) response = admin.delete_topics(mock_new_topics, 0) assert response == delete_response + + def test_create_partitions( + self, + mock_topic_partitions, + mock_least_loaded_node, + partition_response, + metadata_response, + ): + mock_kafka_client = mock.Mock() + mock_kafka_client.poll = \ + mock.Mock(side_effect=[metadata_response, partition_response]) + mock_kafka_client.ready.return_value = True + mock_kafka_client.least_loaded_node.return_value = \ + mock_least_loaded_node + mock_kafka_client.send.return_value = Future() + admin = AdminClient(mock_kafka_client) + response = admin.create_partitions(mock_topic_partitions, 0, False) + assert response == partition_response diff --git a/test/test_admin_client_integration.py b/test/test_admin_client_integration.py index 938f476d2..d8b808a05 100644 --- a/test/test_admin_client_integration.py +++ b/test/test_admin_client_integration.py @@ -1,7 +1,7 @@ import os import time import unittest -from kafka.admin_client import AdminClient, NewTopic +from kafka.admin_client import AdminClient, NewTopic, TopicPartition from kafka.protocol.metadata import MetadataRequest from test.fixtures import ZookeeperFixture, KafkaFixture from test.testutil import KafkaIntegrationTestCase, kafka_versions @@ -48,3 +48,25 @@ def test_create_delete_topics(self): response[0].topic_error_codes[0][1] == 0 or response[0].topic_error_codes[0][1] == 7 ) + + @kafka_versions('>=1.0.0') + def test_create_partitions(self): + admin = AdminClient(self.client_async) + topic = NewTopic( + name='topic', + num_partitions=1, + replication_factor=1, + ) + metadata_request = MetadataRequest[1]() + admin.create_topics(topics=[topic], timeout=1) + + time.sleep(1) # allows the topic to be created + + topic_partition = TopicPartition('topic', 2, [[0]]) + + + response = admin.create_partitions([topic_partition], timeout=1, validate_only=False) + + self.assertTrue( + response[0].topic_error_codes[0][1] == 0 + ) From 3ee535114557a9c37fd5ac5d8ac2fd8c6ae7c9c2 Mon Sep 17 00:00:00 2001 From: Yongzhou Gu Date: Tue, 7 Aug 2018 14:00:38 -0700 Subject: [PATCH 256/291] change name and add comments --- kafka/admin_client.py | 33 ++++++++++++++++++++------- test/test_admin_client.py | 4 ++-- test/test_admin_client_integration.py | 8 +++---- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/kafka/admin_client.py b/kafka/admin_client.py index 84802102e..2f70b0f85 100644 --- a/kafka/admin_client.py +++ b/kafka/admin_client.py @@ -5,7 +5,7 @@ from .protocol.metadata import MetadataRequest -"""TopicPartition +"""NewPartitionsInfo Fields: name (string): name of topic @@ -14,8 +14,8 @@ the sizes of inner lists are the replica factor of current topic the size of outer list is the increased partition num of current topic """ -TopicPartition = collections.namedtuple( - 'TopicPartition', +NewPartitionsInfo = collections.namedtuple( + 'NewPartitionsInfo', ['name', 'count', 'broker_ids_matrix'] ) @@ -114,15 +114,32 @@ def _send_request(self, request): def create_partitions( self, - topic_partitions, + new_partitions_infos, timeout, validate_only, ): """ Create partitions on topics Arguments: - topic_partitions (list of TopicPartition): A list containing - infos on increasing partitions + new_partitions_infos (list of NewPartitionsInfo): A list containing + infos on increasing partitions with following format + [ + NewPartitionsInfo( + 'name': String, + 'count': Int, + 'broker_ids_matrix': + [ + [id1, id2, id3], + [id1, id3, id4], + ... + ] + ), + ... + ] + especially, broker_ids_matrix is a matrix of broker ids. The row size is + the number of newly added partitions and the col size is the replication + factor of the topic + timeout (int): timeout in seconds validate_only (Boolean): If true then validate the request without actually increasing the number of @@ -137,8 +154,8 @@ def create_partitions( request = self.create_partitions_request( topic_partitions = [ - convert_topic_partitions_requst_format(topic_partition) - for topic_partition in topic_partitions + convert_topic_partitions_requst_format(new_partitions_info) + for new_partitions_info in new_partitions_infos ], timeout=timeout, validate_only = validate_only, diff --git a/test/test_admin_client.py b/test/test_admin_client.py index 2c48e09b8..aacf93581 100644 --- a/test/test_admin_client.py +++ b/test/test_admin_client.py @@ -6,7 +6,7 @@ from kafka.protocol.admin import CreateTopicsResponse, DeleteTopicsResponse, CreatePartitionsResponse from kafka.admin_client import AdminClient from kafka.admin_client import NewTopic -from kafka.admin_client import TopicPartition +from kafka.admin_client import NewPartitionsInfo from kafka.structs import BrokerMetadata from kafka.future import Future @@ -35,7 +35,7 @@ def mock_new_topics(): @pytest.fixture def mock_topic_partitions(): - return [TopicPartition('topic', 5, 4*[[1,2,3]]) ] + return [NewPartitionsInfo('topic', 5, 4*[[1,2,3]]) ] @pytest.fixture def topic_response(): diff --git a/test/test_admin_client_integration.py b/test/test_admin_client_integration.py index d8b808a05..8f7c05c32 100644 --- a/test/test_admin_client_integration.py +++ b/test/test_admin_client_integration.py @@ -1,7 +1,7 @@ import os import time import unittest -from kafka.admin_client import AdminClient, NewTopic, TopicPartition +from kafka.admin_client import AdminClient, NewTopic, NewPartitionsInfo from kafka.protocol.metadata import MetadataRequest from test.fixtures import ZookeeperFixture, KafkaFixture from test.testutil import KafkaIntegrationTestCase, kafka_versions @@ -62,10 +62,8 @@ def test_create_partitions(self): time.sleep(1) # allows the topic to be created - topic_partition = TopicPartition('topic', 2, [[0]]) - - - response = admin.create_partitions([topic_partition], timeout=1, validate_only=False) + new_partitions_info = NewPartitionsInfo('topic', 2, [[0]]) + response = admin.create_partitions([new_partitions_info], timeout=1, validate_only=False) self.assertTrue( response[0].topic_error_codes[0][1] == 0 From e75b03e1b7c9862a343aa74b248d3ab7c81710ee Mon Sep 17 00:00:00 2001 From: Manpreet Singh Date: Wed, 8 Aug 2018 12:23:38 -0700 Subject: [PATCH 257/291] Bump version to 1.4.3.post4 --- CHANGES.md | 3 +++ docs/changelog.rst | 4 ++++ kafka/version.py | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 3e7b1bec5..fc74d89b8 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,6 @@ +# 1.4.3.post4 (Aug 8, 2019) +* Add usage of CreatePartition protocol in adminClient + # 1.4.3.post3 (Aug 3, 2018) * Change prospectus diff --git a/docs/changelog.rst b/docs/changelog.rst index 4377518f6..cdfb894af 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,9 @@ Changelog ========= +1.4.3.post4 (Aug 8, 2018) +########################## +* Add usage of CreatePartition protocol in adminClient + 1.4.3.post3 (Aug 3, 2018) ########################## * Change prospectus diff --git a/kafka/version.py b/kafka/version.py index 16cdadda8..eabf5fe82 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.3.post3' +__version__ = '1.4.3.post4' From 452fe81fece79f0887ee2e0b07d9e8398129a0ce Mon Sep 17 00:00:00 2001 From: Dong Weicheng Date: Thu, 6 Sep 2018 17:24:03 -0700 Subject: [PATCH 258/291] Fix failing build when logging warning --- kafka/conn.py | 2 +- run_itest.sh | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/kafka/conn.py b/kafka/conn.py index a2d5ee6cc..0a882ba05 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -54,7 +54,7 @@ SSLZeroReturnError = ssl.SSLZeroReturnError except AttributeError: # support older ssl libraries - log.warning('Old SSL module detected.' + log.debug('Old SSL module detected.' ' SSL error handling may not operate cleanly.' ' Consider upgrading to Python 3.3 or 2.7.9') SSLEOFError = ssl.SSLError diff --git a/run_itest.sh b/run_itest.sh index 164656d61..446be4308 100644 --- a/run_itest.sh +++ b/run_itest.sh @@ -6,6 +6,12 @@ tox -e py27 tox -e py35 tox -e pypy +export KAFKA_VERSION='0.11.0.2' +./build_integration.sh +tox -e py27 +tox -e py35 +tox -e pypy + export KAFKA_VERSION='1.0.1' ./build_integration.sh tox -e py27 From 8c7b7aeeef955ef6ac34636c57fa453c88ed2e7d Mon Sep 17 00:00:00 2001 From: Dong Weicheng Date: Fri, 7 Sep 2018 13:59:11 -0700 Subject: [PATCH 259/291] Remove tests for kafka version 0.11.0.2 --- .travis.yml | 1 - Makefile | 2 +- build_integration.sh | 9 +- run_itest.sh | 6 - servers/0.11.0.2/resources/kafka.properties | 142 ------------------ servers/0.11.0.2/resources/log4j.properties | 25 --- .../0.11.0.2/resources/zookeeper.properties | 21 --- test/fixtures.py | 4 +- 8 files changed, 5 insertions(+), 205 deletions(-) delete mode 100644 servers/0.11.0.2/resources/kafka.properties delete mode 100644 servers/0.11.0.2/resources/log4j.properties delete mode 100644 servers/0.11.0.2/resources/zookeeper.properties diff --git a/.travis.yml b/.travis.yml index 7f389db4a..c56a869b9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ python: env: - KAFKA_VERSION=0.10.2.2 - - KAFKA_VERSION=0.11.0.2 - KAFKA_VERSION=1.0.1 sudo: false diff --git a/Makefile b/Makefile index 7e7536467..c61165eb2 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ docs: FLAGS= -KAFKA_VERSION=0.11.0.2 +KAFKA_VERSION=1.0.1 SCALA_VERSION=2.12 setup: diff --git a/build_integration.sh b/build_integration.sh index 68cb8bf2a..b353ea3d8 100755 --- a/build_integration.sh +++ b/build_integration.sh @@ -1,6 +1,6 @@ #!/bin/bash -: ${ALL_RELEASES:="0.10.2.2 0.11.0.2 1.0.1"} +: ${ALL_RELEASES:="0.10.2.2 1.0.1"} : ${SCALA_VERSION:=2.11} : ${DIST_BASE_URL:=https://archive.apache.org/dist/kafka/} : ${KAFKA_SRC_GIT:=https://github.com/apache/kafka.git} @@ -33,12 +33,7 @@ pushd servers echo "-------------------------------------" echo "Checking kafka binaries for ${kafka}" echo - # kafka 0.8.0 is only available w/ scala 2.8.0 - if [ "$kafka" == "0.8.0" ]; then - KAFKA_ARTIFACT="kafka_2.8.0-${kafka}.tar.gz" - else - KAFKA_ARTIFACT="kafka_${SCALA_VERSION}-${kafka}.tgz" - fi + KAFKA_ARTIFACT="kafka_${SCALA_VERSION}-${kafka}.tgz" if [ ! -f "../$kafka/kafka-bin/bin/kafka-run-class.sh" ]; then if [ -f "${KAFKA_ARTIFACT}" ]; then echo "Using cached artifact: ${KAFKA_ARTIFACT}" diff --git a/run_itest.sh b/run_itest.sh index 446be4308..164656d61 100644 --- a/run_itest.sh +++ b/run_itest.sh @@ -6,12 +6,6 @@ tox -e py27 tox -e py35 tox -e pypy -export KAFKA_VERSION='0.11.0.2' -./build_integration.sh -tox -e py27 -tox -e py35 -tox -e pypy - export KAFKA_VERSION='1.0.1' ./build_integration.sh tox -e py27 diff --git a/servers/0.11.0.2/resources/kafka.properties b/servers/0.11.0.2/resources/kafka.properties deleted file mode 100644 index 7a19a1187..000000000 --- a/servers/0.11.0.2/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=2 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.2/resources/log4j.properties b/servers/0.11.0.2/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.11.0.2/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.11.0.2/resources/zookeeper.properties b/servers/0.11.0.2/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.11.0.2/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/test/fixtures.py b/test/fixtures.py index 493a664a5..148a04b5e 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -42,8 +42,8 @@ def get_open_port(): return port class Fixture(object): - kafka_version = os.environ.get('KAFKA_VERSION', '0.11.0.2') - scala_version = os.environ.get("SCALA_VERSION", '2.8.0') + kafka_version = os.environ.get('KAFKA_VERSION', '1.0.1') + scala_version = os.environ.get("SCALA_VERSION", '2.11') project_root = os.environ.get('PROJECT_ROOT', os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) kafka_root = os.environ.get("KAFKA_ROOT", From e0ffd65fab7b6e3b4b1708bbdf43b89f5a1be1a7 Mon Sep 17 00:00:00 2001 From: Dong Weicheng Date: Mon, 10 Sep 2018 15:46:10 -0700 Subject: [PATCH 260/291] Bump version to 1.4.3.post5 --- CHANGES.md | 6 +++++- docs/changelog.rst | 5 +++++ kafka/version.py | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index fc74d89b8..6f8701aaf 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,8 @@ -# 1.4.3.post4 (Aug 8, 2019) +# 1.4.3.post5 (Sep 10, 2018) +* Remove tests for kafka version 0.11.0.2 +* Fix failing build when logging warning + +# 1.4.3.post4 (Aug 8, 2018) * Add usage of CreatePartition protocol in adminClient # 1.4.3.post3 (Aug 3, 2018) diff --git a/docs/changelog.rst b/docs/changelog.rst index cdfb894af..d1f617bb3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,10 @@ Changelog ========= +1.4.3.post5 (Sep 10, 2018) +########################## +* Remove tests for kafka version 0.11.0.2 +* Fix failing build when logging warning + 1.4.3.post4 (Aug 8, 2018) ########################## * Add usage of CreatePartition protocol in adminClient diff --git a/kafka/version.py b/kafka/version.py index eabf5fe82..61350fc0b 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.3.post4' +__version__ = '1.4.3.post5' From cf19cdc212aec6f93f5780bafabd935e23234eb9 Mon Sep 17 00:00:00 2001 From: Brian Sang Date: Wed, 9 Jan 2019 14:14:46 -0800 Subject: [PATCH 261/291] Use Popen.communicate() instead of Popen.wait() --- test/fixtures.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/test/fixtures.py b/test/fixtures.py index 1c83cd31b..1097e6d1c 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -295,11 +295,12 @@ def _create_zk_chroot(self): "kafka-python") env = self.kafka_run_class_env() proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = proc.communicate() - if proc.wait() != 0 or proc.returncode != 0: + if proc.returncode != 0: self.out("Failed to create Zookeeper chroot node") - self.out(proc.stdout.read()) - self.out(proc.stderr.read()) + self.out(stdout) + self.out(stderr) raise RuntimeError("Failed to create Zookeeper chroot node") self.out("Kafka chroot created in Zookeeper!") @@ -458,13 +459,12 @@ def _create_topic(self, topic_name, num_partitions, replication_factor, timeout_ args.append('--if-not-exists') env = self.kafka_run_class_env() proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - ret = proc.wait() - if ret != 0 or proc.returncode != 0: - output = proc.stdout.read() - if not 'kafka.common.TopicExistsException' in output: + stdout, stderr = proc.communicate() + if proc.returncode != 0: + if not 'kafka.common.TopicExistsException' in stdout: self.out("Failed to create topic %s" % (topic_name,)) - self.out(output) - self.out(proc.stderr.read()) + self.out(stdout) + self.out(stderr) raise RuntimeError("Failed to create topic %s" % (topic_name,)) def create_topics(self, topic_names, num_partitions=None, replication_factor=None): From c475c0a8d5d763b3dcad55d89598df4934f933ae Mon Sep 17 00:00:00 2001 From: Brian Sang Date: Wed, 9 Jan 2019 14:16:19 -0800 Subject: [PATCH 262/291] Only test with Kafka version 1.1.0 --- .travis.yml | 3 +-- Makefile | 2 +- build_integration.sh | 2 +- run_itest.sh | 8 +------- test/fixtures.py | 2 +- 5 files changed, 5 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index c56a869b9..58d4c164f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,8 +8,7 @@ python: - pypy env: - - KAFKA_VERSION=0.10.2.2 - - KAFKA_VERSION=1.0.1 + - KAFKA_VERSION=1.1.0 sudo: false diff --git a/Makefile b/Makefile index c61165eb2..44a85f2d4 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ docs: FLAGS= -KAFKA_VERSION=1.0.1 +KAFKA_VERSION=1.1.0 SCALA_VERSION=2.12 setup: diff --git a/build_integration.sh b/build_integration.sh index b353ea3d8..4e26e1e3d 100755 --- a/build_integration.sh +++ b/build_integration.sh @@ -1,6 +1,6 @@ #!/bin/bash -: ${ALL_RELEASES:="0.10.2.2 1.0.1"} +: ${ALL_RELEASES:="1.1.0"} : ${SCALA_VERSION:=2.11} : ${DIST_BASE_URL:=https://archive.apache.org/dist/kafka/} : ${KAFKA_SRC_GIT:=https://github.com/apache/kafka.git} diff --git a/run_itest.sh b/run_itest.sh index 164656d61..150c95989 100644 --- a/run_itest.sh +++ b/run_itest.sh @@ -1,12 +1,6 @@ #!/bin/bash -e -export KAFKA_VERSION='0.10.2.2' -./build_integration.sh -tox -e py27 -tox -e py35 -tox -e pypy - -export KAFKA_VERSION='1.0.1' +export KAFKA_VERSION='1.1.0' ./build_integration.sh tox -e py27 tox -e py35 diff --git a/test/fixtures.py b/test/fixtures.py index 1097e6d1c..ed0d2cf33 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -42,7 +42,7 @@ def get_open_port(): return port class Fixture(object): - kafka_version = os.environ.get('KAFKA_VERSION', '1.0.1') + kafka_version = os.environ.get('KAFKA_VERSION', '1.1.0') scala_version = os.environ.get("SCALA_VERSION", '2.11') project_root = os.environ.get('PROJECT_ROOT', os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) From fba056c944948a4f9cb06138b889f457db964b26 Mon Sep 17 00:00:00 2001 From: Brian Sang Date: Wed, 9 Jan 2019 15:01:10 -0800 Subject: [PATCH 263/291] Use internal pypi, fix requirements, remove py26 support --- .travis.yml | 2 +- Dockerfile | 1 + Makefile | 6 ++++++ requirements-dev.txt | 16 +++++++++------- tox.ini | 22 +++++----------------- 5 files changed, 22 insertions(+), 25 deletions(-) diff --git a/.travis.yml b/.travis.yml index 58d4c164f..81898de87 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,7 +30,7 @@ install: - pip install . script: - - tox -e `if [ "$TRAVIS_PYTHON_VERSION" == "pypy" ]; then echo pypy; else echo py${TRAVIS_PYTHON_VERSION/./}; fi` + - tox -i https://pypi.python.org/simple -e `if [ "$TRAVIS_PYTHON_VERSION" == "pypy" ]; then echo pypy; else echo py${TRAVIS_PYTHON_VERSION/./}; fi` after_success: - coveralls diff --git a/Dockerfile b/Dockerfile index 43e0e4304..f9538c48f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,6 +44,7 @@ COPY CHANGES.md /work COPY MANIFEST.in /work COPY run_itest.sh /work COPY run_utest.sh /work +COPY requirements-dev.txt /work RUN chmod +x /work/run_itest.sh RUN chmod +x /work/run_utest.sh diff --git a/Makefile b/Makefile index 44a85f2d4..e09bb992f 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,11 @@ .DELETE_ON_ERROR: +ifeq ($(findstring .yelpcorp.com,$(shell hostname -f)), .yelpcorp.com) + export PIP_INDEX_URL ?= https://pypi.yelpcorp.com/simple +else + export PIP_INDEX_URL ?= https://pypi.python.org/simple +endif + all: test itest test: diff --git a/requirements-dev.txt b/requirements-dev.txt index 9228bd4bc..7cce56d83 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,15 +1,17 @@ -flake8==3.4.1 -pytest==3.9.3 +flake8==3.6.0 +pytest==3.6.3 pytest-cov==2.6.0 docker-py==1.10.6 -coveralls==1.5.1 -Sphinx==1.6.4 -lz4==2.1.2 -xxhash==1.3.0 +Sphinx==1.7.9 +lz4==1.1.0 +xxhash==1.0.1 python-snappy==0.5.3 -tox==3.5.3 +tox==3.1.2 pylint==1.8.2 pytest-pylint==0.11.0 pytest-mock==1.10.0 sphinx-rtd-theme==0.2.4 crc32c==1.4 +mock==2.0.0 +decorator==4.3.0 +tox-pip-extensions==1.2.1 diff --git a/tox.ini b/tox.ini index b9b02f381..4a6312cb4 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,9 @@ [tox] envlist = py{26,27,34,35,36,py}, docs +# The Makefile and .travis.yml override the index server to the public one when +# running outside of Yelp. +indexserver = + default = https://pypi.yelpcorp.com/simple tox_pip_extensions_ext_pip_custom_platform = true tox_pip_extensions_ext_venv_update = true @@ -11,29 +15,13 @@ log_format = %(created)f %(filename)-23s %(threadName)s %(message)s [testenv] deps = - pytest==3.6.3 - pytest-cov==2.5.1 - py{27,34,35,36,py}: pylint==1.8.2 - py{27,34,35,36,py}: pytest-pylint==0.11.0 - pytest-mock==1.10.0 - mock==2.0.0 - python-snappy==0.5.2 - lz4==1.1.0 - xxhash==1.0.1 - crc32c==1.4 - py26: unittest2==1.1.0 - decorator==4.3.0 - tox-pip-extensions==1.2.1 + -rrequirements-dev.txt commands = py.test {posargs:--pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF --cov=kafka --cov-config=.covrc} setenv = PROJECT_ROOT = {toxinidir} passenv = KAFKA_VERSION -[testenv:py26] -# pylint doesn't support python2.6 -commands = py.test {posargs:--cov=kafka --cov-config=.covrc} - [testenv:pypy] # pylint is super slow on pypy... commands = py.test {posargs:--cov=kafka --cov-config=.covrc} From 1c93aac314982f88fad1936e4dececb3f55eaeb8 Mon Sep 17 00:00:00 2001 From: Brian Sang Date: Thu, 10 Jan 2019 15:00:16 -0800 Subject: [PATCH 264/291] Changelog for 1.4.4post1 --- CHANGES.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 71dbc3c01..945545e2e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,11 @@ +# 1.4.4.post1 (Jan 10, 2019) + +* Added proc.communicate() patch that got merged upstream to ensure tests don't deadlock +* Only run tests for KAFKA_VERSION 1.1.0 (faster builds) +* Use internal pypi when available +* Remove py26 support (faster builds) +* Use requirements-dev.txt instead of pinning requirements in tox.ini + # 1.4.4 (Nov 20, 2018) Bugfixes From 22a4a8a94e483adcda08ad5914395772c6fb671b Mon Sep 17 00:00:00 2001 From: Brian Sang Date: Thu, 10 Jan 2019 15:02:48 -0800 Subject: [PATCH 265/291] Bump version to 1.4.4.post1 --- CHANGES.md | 2 +- docs/changelog.rst | 8 ++++++++ kafka/version.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 945545e2e..cadefd166 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,7 +4,7 @@ * Only run tests for KAFKA_VERSION 1.1.0 (faster builds) * Use internal pypi when available * Remove py26 support (faster builds) -* Use requirements-dev.txt instead of pinning requirements in tox.ini +* Use requirements-dev.txt instead of pinning requirements in tox.ini # 1.4.4 (Nov 20, 2018) diff --git a/docs/changelog.rst b/docs/changelog.rst index 7c8491daa..dd362779d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,13 @@ Changelog ========= +1.4.4.post1 (Jan 10, 2019) +########################## +* Added proc.communicate() patch that got merged upstream to ensure tests don't deadlock +* Only run tests for KAFKA_VERSION 1.1.0 (faster builds) +* Use internal pypi when available +* Remove py26 support (faster builds) +* Use requirements-dev.txt instead of pinning requirements in tox.ini + 1.4.3.post5 (Sep 10, 2018) ########################## * Remove tests for kafka version 0.11.0.2 diff --git a/kafka/version.py b/kafka/version.py index 9e0feee72..c057aacd0 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.4' +__version__ = '1.4.4.post1' From 65c02b7d20d395c4ba3fb09c7afd070c95bd0604 Mon Sep 17 00:00:00 2001 From: Brian Sang Date: Wed, 5 Jun 2019 15:37:23 -0700 Subject: [PATCH 266/291] Use openjdk instead of oracle java, fix travis build --- .travis.yml | 2 +- Dockerfile | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 31abc616c..3ec0726c7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,7 +32,7 @@ install: - pip install . script: - - tox -e `if [ "$TRAVIS_PYTHON_VERSION" == "pypy2.7-6.0" ]; then echo pypy; else echo py${TRAVIS_PYTHON_VERSION/./}; fi` + - tox -i https://pypi.python.org/simple -e `if [ "$TRAVIS_PYTHON_VERSION" == "pypy2.7-6.0" ]; then echo pypy; else echo py${TRAVIS_PYTHON_VERSION/./}; fi` after_success: - coveralls diff --git a/Dockerfile b/Dockerfile index f9538c48f..be0337ace 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,16 @@ FROM ubuntu:xenial ENV DEBIAN_FRONTEND=noninteractive -RUN echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu precise main" >> /etc/apt/sources.list RUN echo "deb http://ppa.launchpad.net/fkrull/deadsnakes/ubuntu precise main" >> /etc/apt/sources.list -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 5BB92C09DB82666C C2518248EEA14886 -RUN echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | debconf-set-selections +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 5BB92C09DB82666C RUN apt-get update && apt-get install -y python2.7-dev \ python3.5-dev \ python-pkg-resources \ python-setuptools \ python-virtualenv \ libsnappy-dev \ - oracle-java8-installer \ + locales \ + openjdk-8-jdk \ wget\ g++ \ ca-certificates \ @@ -25,7 +24,7 @@ RUN ln -s $PWD/pypy2-v5.8.0-linux64/bin/pypy /usr/local/bin/pypy RUN /usr/sbin/locale-gen en_US.UTF-8 ENV LANG en_US.UTF-8 -ENV JAVA_HOME="/usr/lib/jvm/java-8-oracle" +ENV JAVA_HOME="/usr/lib/jvm/java-1.8.0-openjdk-amd64" ENV PATH="$PATH:$JAVA_HOME/bin" COPY servers /work/servers From 1c24d0ad0ccf08dd8877fb18d151b013d0134ed3 Mon Sep 17 00:00:00 2001 From: Brian Sang Date: Wed, 22 May 2019 23:54:14 -0700 Subject: [PATCH 267/291] Make partitions_for_topic a read-through cache (#1781) If the cluster metadata object has no info about the topic, then issue a blocking metadata call to fetch it. --- kafka/consumer/group.py | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index b3e182c5d..7f0e061a1 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -552,11 +552,9 @@ def committed(self, partition): committed = None return committed - def topics(self): - """Get all topics the user is authorized to view. - - Returns: - set: topics + def _fetch_all_topit_metadata(self): + """A blocking call that fetches topic metadata for all topics in the + cluster that the user is authorized to view. """ cluster = self._client.cluster if self._client._metadata_refresh_in_progress and self._client._topics: @@ -567,10 +565,24 @@ def topics(self): future = cluster.request_update() self._client.poll(future=future) cluster.need_all_topic_metadata = stash - return cluster.topics() + + def topics(self): + """Get all topics the user is authorized to view. + This will always issue a remote call to the cluster to fetch the latest + information. + + Returns: + set: topics + """ + self._fetch_all_topic_metadata() + return self._client.cluster.topics() def partitions_for_topic(self, topic): - """Get metadata about the partitions for a given topic. + """This method first checks the local metadata cache for information + about the topic. If the topic is not found (either because the topic + does not exist, the user is not authorized to view the topic, or the + metadata cache is not populated), then it will issue a metadata update + call to the cluster. Arguments: topic (str): Topic to check. @@ -578,7 +590,12 @@ def partitions_for_topic(self, topic): Returns: set: Partition ids """ - return self._client.cluster.partitions_for_topic(topic) + cluster = self._client.cluster + partitions = cluster.partitions_for_topic(topic) + if partitions is None: + self._fetch_all_topic_metadata() + partitions = cluster.partitions_for_topic(topic) + return partitions def poll(self, timeout_ms=0, max_records=None): """Fetch data from assigned topics / partitions. From d60b66627956d6b247b6fb3b2ca7a0ed6f59d1cb Mon Sep 17 00:00:00 2001 From: Brian Sang Date: Thu, 23 May 2019 09:07:11 -0700 Subject: [PATCH 268/291] Fix typo in _fetch_all_topic_metadata function (#1809) --- kafka/consumer/group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 7f0e061a1..3195b1b4a 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -552,7 +552,7 @@ def committed(self, partition): committed = None return committed - def _fetch_all_topit_metadata(self): + def _fetch_all_topic_metadata(self): """A blocking call that fetches topic metadata for all topics in the cluster that the user is authorized to view. """ From faa6a2a0c226ad3403d0d545631bcef201c2d46f Mon Sep 17 00:00:00 2001 From: Brian Sang Date: Fri, 31 May 2019 19:05:41 -0700 Subject: [PATCH 269/291] Sanity test for consumer.topics() and consumer.partitions_for_topic() --- test/test_consumer_group.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/test_consumer_group.py b/test/test_consumer_group.py index d7aaa8896..ec2685765 100644 --- a/test/test_consumer_group.py +++ b/test/test_consumer_group.py @@ -29,6 +29,15 @@ def test_consumer(kafka_broker, topic, version): assert consumer._client._conns[node_id].state is ConnectionStates.CONNECTED consumer.close() +@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +def test_consumer_topics(kafka_broker, topic, version): + consumer = KafkaConsumer(bootstrap_servers=get_connect_str(kafka_broker)) + # Necessary to drive the IO + consumer.poll(500) + consumer_topics = consumer.topics() + assert topic in consumer_topics + assert len(consumer.partitions_for_topic(topic)) > 0 + consumer.close() @pytest.mark.skipif(version() < (0, 9), reason='Unsupported Kafka Version') @pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") From a907306e4862f61476823cb1558378705b4a7c89 Mon Sep 17 00:00:00 2001 From: Brian Sang Date: Tue, 27 Aug 2019 13:46:08 -0700 Subject: [PATCH 270/291] Bump to 1.4.6.post2 --- CHANGES.md | 3 +++ kafka/version.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 5a8b5f0e7..b7f76cd27 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,6 @@ +# 1.4.6.post2 (Aug 27, 2019) +* Cherrypick change from upstream to make blocking calls for Kafka metadata if we don't have any + # 1.4.6.post1 (Jun 4, 2019) This release merges in changes from 1.4.5 and 1.4.6 upstream. The only key differences are we focus on py35 still instead of py36/py37, and diff --git a/kafka/version.py b/kafka/version.py index eb2bbae29..484a880e2 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.6.post1' +__version__ = '1.4.6.post2' From 970d4ccd9a9e8d37e7d57a91c70044f462abb1c5 Mon Sep 17 00:00:00 2001 From: Lennart Rudolph Date: Thu, 31 Oct 2019 16:37:11 -0700 Subject: [PATCH 271/291] KAFKA-24599: merge upstream v1.4.7 into mirrored master and fix merge conflicts and tests --- .travis.yml | 7 +- CHANGES.md | 62 +- README.rst | 4 +- build_integration.sh | 4 +- docs/changelog.rst | 63 ++ docs/compatibility.rst | 10 +- docs/index.rst | 4 +- kafka/admin/__init__.py | 6 +- kafka/admin/acl_resource.py | 212 ++++++ kafka/admin/client.py | 653 +++++++++++++---- kafka/admin_client.py | 11 +- kafka/client_async.py | 60 +- kafka/cluster.py | 40 +- kafka/conn.py | 377 ++++++---- kafka/consumer/fetcher.py | 41 +- kafka/consumer/group.py | 91 ++- kafka/consumer/subscription_state.py | 2 +- kafka/coordinator/assignors/range.py | 6 +- kafka/coordinator/base.py | 90 ++- kafka/coordinator/consumer.py | 8 +- kafka/errors.py | 6 + kafka/producer/kafka.py | 7 +- kafka/producer/sender.py | 5 +- kafka/version.py | 2 +- requirements-dev.txt | 4 +- servers/0.11.0.0/resources/kafka.properties | 145 ++++ servers/0.11.0.1/resources/kafka.properties | 145 ++++ servers/0.11.0.2/resources/kafka.properties | 145 ++++ servers/0.11.0.3/resources/kafka.properties | 145 ++++ servers/0.11.0.3/resources/log4j.properties | 25 + .../0.11.0.3/resources/zookeeper.properties | 21 + servers/1.0.0/resources/kafka.properties | 145 ++++ servers/1.0.1/resources/kafka.properties | 145 ++++ servers/1.0.2/resources/kafka.properties | 145 ++++ servers/1.1.0/resources/kafka.properties | 3 + servers/1.1.1/resources/kafka.properties | 3 + servers/2.0.0/resources/kafka.properties | 145 ++++ servers/2.0.1/resources/kafka.properties | 145 ++++ servers/2.1.0/resources/kafka.properties | 145 ++++ servers/2.1.0/resources/log4j.properties | 25 + servers/2.1.0/resources/zookeeper.properties | 21 + servers/2.1.1/resources/kafka.properties | 145 ++++ servers/2.1.1/resources/log4j.properties | 25 + servers/2.1.1/resources/zookeeper.properties | 21 + servers/2.2.0/resources/kafka.properties | 145 ++++ servers/2.2.0/resources/log4j.properties | 25 + servers/2.2.0/resources/zookeeper.properties | 21 + servers/2.2.1/resources/kafka.properties | 145 ++++ servers/2.2.1/resources/log4j.properties | 25 + servers/2.2.1/resources/zookeeper.properties | 21 + servers/2.3.0/resources/kafka.properties | 145 ++++ servers/2.3.0/resources/log4j.properties | 25 + servers/2.3.0/resources/zookeeper.properties | 21 + test/conftest.py | 49 +- test/fixtures.py | 22 +- test/test_admin.py | 31 + test/test_admin_client_integration.py | 24 +- test/test_admin_integration.py | 122 ++++ test/test_assignors.py | 3 +- test/test_client_async.py | 24 +- test/test_client_integration.py | 8 +- test/test_codec.py | 4 +- test/test_conn.py | 11 +- test/test_consumer_group.py | 25 +- test/test_consumer_integration.py | 662 +++++++----------- test/test_failover_integration.py | 4 +- test/test_fetcher.py | 6 +- test/test_producer.py | 10 +- test/test_producer_integration.py | 12 +- test/test_protocol.py | 1 - test/testutil.py | 76 +- tox.ini | 1 + 72 files changed, 4223 insertions(+), 959 deletions(-) create mode 100644 kafka/admin/acl_resource.py create mode 100644 servers/0.11.0.0/resources/kafka.properties create mode 100644 servers/0.11.0.1/resources/kafka.properties create mode 100644 servers/0.11.0.2/resources/kafka.properties create mode 100644 servers/0.11.0.3/resources/kafka.properties create mode 100644 servers/0.11.0.3/resources/log4j.properties create mode 100644 servers/0.11.0.3/resources/zookeeper.properties create mode 100644 servers/1.0.0/resources/kafka.properties create mode 100644 servers/1.0.1/resources/kafka.properties create mode 100644 servers/1.0.2/resources/kafka.properties create mode 100644 servers/2.0.0/resources/kafka.properties create mode 100644 servers/2.0.1/resources/kafka.properties create mode 100644 servers/2.1.0/resources/kafka.properties create mode 100644 servers/2.1.0/resources/log4j.properties create mode 100644 servers/2.1.0/resources/zookeeper.properties create mode 100644 servers/2.1.1/resources/kafka.properties create mode 100644 servers/2.1.1/resources/log4j.properties create mode 100644 servers/2.1.1/resources/zookeeper.properties create mode 100644 servers/2.2.0/resources/kafka.properties create mode 100644 servers/2.2.0/resources/log4j.properties create mode 100644 servers/2.2.0/resources/zookeeper.properties create mode 100644 servers/2.2.1/resources/kafka.properties create mode 100644 servers/2.2.1/resources/log4j.properties create mode 100644 servers/2.2.1/resources/zookeeper.properties create mode 100644 servers/2.3.0/resources/kafka.properties create mode 100644 servers/2.3.0/resources/log4j.properties create mode 100644 servers/2.3.0/resources/zookeeper.properties create mode 100644 test/test_admin_integration.py diff --git a/.travis.yml b/.travis.yml index 3ec0726c7..8b59fd369 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,12 @@ python: - pypy2.7-6.0 env: - - KAFKA_VERSION=1.1.0 + - KAFKA_VERSION=0.8.2.2 + - KAFKA_VERSION=0.9.0.1 + - KAFKA_VERSION=0.10.2.2 + - KAFKA_VERSION=0.11.0.3 + - KAFKA_VERSION=1.1.1 + - KAFKA_VERSION=2.3.0 addons: apt: diff --git a/CHANGES.md b/CHANGES.md index b7f76cd27..05fac9acc 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,59 @@ +# 1.4.7 (Sep 30, 2019) + +This is a minor release focused on KafkaConsumer performance, Admin Client +improvements, and Client concurrency. The KafkaConsumer iterator implementation +has been greatly simplified so that it just wraps consumer.poll(). The prior +implementation will remain available for a few more releases using the optional +KafkaConsumer config: `legacy_iterator=True` . This is expected to improve +consumer throughput substantially and help reduce heartbeat failures / group +rebalancing. + +Client +* Send socket data via non-blocking IO with send buffer (dpkp / PR #1912) +* Rely on socket selector to detect completed connection attempts (dpkp / PR #1909) +* Improve connection lock handling; always use context manager (melor,dpkp / PR #1895) +* Reduce client poll timeout when there are no in-flight requests (dpkp / PR #1823) + +KafkaConsumer +* Do not use wakeup when sending fetch requests from consumer (dpkp / PR #1911) +* Wrap `consumer.poll()` for KafkaConsumer iteration (dpkp / PR #1902) +* Allow the coordinator to auto-commit on old brokers (justecorruptio / PR #1832) +* Reduce internal client poll timeout for (legacy) consumer iterator interface (dpkp / PR #1824) +* Use dedicated connection for group coordinator (dpkp / PR #1822) +* Change coordinator lock acquisition order (dpkp / PR #1821) +* Make `partitions_for_topic` a read-through cache (Baisang / PR #1781,#1809) +* Fix consumer hanging indefinitely on topic deletion while rebalancing (commanderdishwasher / PR #1782) + +Miscellaneous Bugfixes / Improvements +* Fix crc32c avilability on non-intel architectures (ossdev07 / PR #1904) +* Load system default SSL CAs if `ssl_cafile` is not provided (iAnomaly / PR #1883) +* Catch py3 TimeoutError in BrokerConnection send/recv (dpkp / PR #1820) +* Added a function to determine if bootstrap is successfully connected (Wayde2014 / PR #1876) + +Admin Client +* Add ACL api support to KafkaAdminClient (ulrikjohansson / PR #1833) +* Add `sasl_kerberos_domain_name` config to KafkaAdminClient (jeffwidman / PR #1852) +* Update `security_protocol` config documentation for KafkaAdminClient (cardy31 / PR #1849) +* Break FindCoordinator into request/response methods in KafkaAdminClient (jeffwidman / PR #1871) +* Break consumer operations into request / response methods in KafkaAdminClient (jeffwidman / PR #1845) +* Parallelize calls to `_send_request_to_node()` in KafkaAdminClient (davidheitman / PR #1807) + +Test Infrastructure / Documentation / Maintenance +* Add Kafka 2.3.0 to test matrix and compatibility docs (dpkp / PR #1915) +* Convert remaining `KafkaConsumer` tests to `pytest` (jeffwidman / PR #1886) +* Bump integration tests to 0.10.2.2 and 0.11.0.3 (jeffwidman / #1890) +* Cleanup handling of `KAFKA_VERSION` env var in tests (jeffwidman / PR #1887) +* Minor test cleanup (jeffwidman / PR #1885) +* Use `socket.SOCK_STREAM` in test assertions (iv-m / PR #1879) +* Sanity test for `consumer.topics()` and `consumer.partitions_for_topic()` (Baisang / PR #1829) +* Cleanup seconds conversion in client poll timeout calculation (jeffwidman / PR #1825) +* Remove unused imports (jeffwidman / PR #1808) +* Cleanup python nits in RangePartitionAssignor (jeffwidman / PR #1805) +* Update links to kafka consumer config docs (jeffwidman) +* Fix minor documentation typos (carsonip / PR #1865) +* Remove unused/weird comment line (jeffwidman / PR #1813) +* Update docs for `api_version_auto_timeout_ms` (jeffwidman / PR #1812) + # 1.4.6.post2 (Aug 27, 2019) * Cherrypick change from upstream to make blocking calls for Kafka metadata if we don't have any @@ -11,9 +67,7 @@ we only build for versions 0.10.2.2 and 1.1.0 and 1.1.1 This is a patch release primarily focused on bugs related to concurrency, SSL connections and testing, and SASL authentication: - Client Concurrency Issues (Race Conditions / Deadlocks) - * Fix race condition in `protocol.send_bytes` (isamaru / PR #1752) * Do not call `state_change_callback` with lock (dpkp / PR #1775) * Additional BrokerConnection locks to synchronize protocol/IFR state (dpkp / PR #1768) @@ -22,12 +76,10 @@ Client Concurrency Issues (Race Conditions / Deadlocks) * Hold lock during `client.check_version` (dpkp / PR #1771) Producer Wakeup / TimeoutError - * Dont wakeup during `maybe_refresh_metadata` -- it is only called by poll() (dpkp / PR #1769) * Dont do client wakeup when sending from sender thread (dpkp / PR #1761) SSL - Python3.7 Support / Bootstrap Hostname Verification / Testing - * Wrap SSL sockets after connecting for python3.7 compatibility (dpkp / PR #1754) * Allow configuration of SSL Ciphers (dpkp / PR #1755) * Maintain shadow cluster metadata for bootstrapping (dpkp / PR #1753) @@ -36,13 +88,11 @@ SSL - Python3.7 Support / Bootstrap Hostname Verification / Testing * Reset reconnect backoff on SSL connection (dpkp / PR #1777) SASL - OAuthBearer support / api version bugfix - * Fix 0.8.2 protocol quick detection / fix SASL version check (dpkp / PR #1763) * Update sasl configuration docstrings to include supported mechanisms (dpkp) * Support SASL OAuthBearer Authentication (pt2pham / PR #1750) Miscellaneous Bugfixes - * Dont force metadata refresh when closing unneeded bootstrap connections (dpkp / PR #1773) * Fix possible AttributeError during conn._close_socket (dpkp / PR #1776) * Return connection state explicitly after close in connect() (dpkp / PR #1778) diff --git a/README.rst b/README.rst index 9469adea0..40cd55cbc 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ Kafka Python client ------------------------ -.. image:: https://img.shields.io/badge/kafka-1.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg +.. image:: https://img.shields.io/badge/kafka-2.3%2C%202.2%2C%202.1%2C%202.0%2C%201.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg :target: https://kafka-python.readthedocs.io/compatibility.html .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg :target: https://pypi.python.org/pypi/kafka-python @@ -150,7 +150,7 @@ for interacting with kafka brokers via the python repl. This is useful for testing, probing, and general experimentation. The protocol support is leveraged to enable a KafkaClient.check_version() method that probes a kafka broker and attempts to identify which version it is running -(0.8.0 to 1.1+). +(0.8.0 to 2.3+). Low-level ********* diff --git a/build_integration.sh b/build_integration.sh index 4e26e1e3d..44ee481fb 100755 --- a/build_integration.sh +++ b/build_integration.sh @@ -1,12 +1,12 @@ #!/bin/bash -: ${ALL_RELEASES:="1.1.0"} +: ${ALL_RELEASES:="0.8.2.2 0.9.0.1 0.10.1.1 0.10.2.2 0.11.0.3 1.0.2 1.1.0 1.1.1 2.0.1"} : ${SCALA_VERSION:=2.11} : ${DIST_BASE_URL:=https://archive.apache.org/dist/kafka/} : ${KAFKA_SRC_GIT:=https://github.com/apache/kafka.git} # On travis CI, empty KAFKA_VERSION means skip integration tests -# so we don't try to get binaries +# so we don't try to get binaries # Otherwise it means test all official releases, so we get all of them! if [ -z "$KAFKA_VERSION" -a -z "$TRAVIS" ]; then KAFKA_VERSION=$ALL_RELEASES diff --git a/docs/changelog.rst b/docs/changelog.rst index ec92ca12e..514c1d599 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -105,6 +105,69 @@ Logging / Error Messages * Log Heartbeat thread start / close for debugging (dpkp) +1.4.7 (Sep 30, 2019) +#################### + +This is a minor release focused on KafkaConsumer performance, Admin Client +improvements, and Client concurrency. The KafkaConsumer iterator implementation +has been greatly simplified so that it just wraps consumer.poll(). The prior +implementation will remain available for a few more releases using the optional +KafkaConsumer config: `legacy_iterator=True` . This is expected to improve +consumer throughput substantially and help reduce heartbeat failures / group +rebalancing. + +Client +------ +* Send socket data via non-blocking IO with send buffer (dpkp / PR #1912) +* Rely on socket selector to detect completed connection attempts (dpkp / PR #1909) +* Improve connection lock handling; always use context manager (melor,dpkp / PR #1895) +* Reduce client poll timeout when there are no in-flight requests (dpkp / PR #1823) + +KafkaConsumer +------------- +* Do not use wakeup when sending fetch requests from consumer (dpkp / PR #1911) +* Wrap `consumer.poll()` for KafkaConsumer iteration (dpkp / PR #1902) +* Allow the coordinator to auto-commit on old brokers (justecorruptio / PR #1832) +* Reduce internal client poll timeout for (legacy) consumer iterator interface (dpkp / PR #1824) +* Use dedicated connection for group coordinator (dpkp / PR #1822) +* Change coordinator lock acquisition order (dpkp / PR #1821) +* Make `partitions_for_topic` a read-through cache (Baisang / PR #1781,#1809) +* Fix consumer hanging indefinitely on topic deletion while rebalancing (commanderdishwasher / PR #1782) + +Miscellaneous Bugfixes / Improvements +------------------------------------- +* Fix crc32c avilability on non-intel architectures (ossdev07 / PR #1904) +* Load system default SSL CAs if `ssl_cafile` is not provided (iAnomaly / PR #1883) +* Catch py3 TimeoutError in BrokerConnection send/recv (dpkp / PR #1820) +* Added a function to determine if bootstrap is successfully connected (Wayde2014 / PR #1876) + +Admin Client +------------ +* Add ACL api support to KafkaAdminClient (ulrikjohansson / PR #1833) +* Add `sasl_kerberos_domain_name` config to KafkaAdminClient (jeffwidman / PR #1852) +* Update `security_protocol` config documentation for KafkaAdminClient (cardy31 / PR #1849) +* Break FindCoordinator into request/response methods in KafkaAdminClient (jeffwidman / PR #1871) +* Break consumer operations into request / response methods in KafkaAdminClient (jeffwidman / PR #1845) +* Parallelize calls to `_send_request_to_node()` in KafkaAdminClient (davidheitman / PR #1807) + +Test Infrastructure / Documentation / Maintenance +------------------------------------------------- +* Add Kafka 2.3.0 to test matrix and compatibility docs (dpkp / PR #1915) +* Convert remaining `KafkaConsumer` tests to `pytest` (jeffwidman / PR #1886) +* Bump integration tests to 0.10.2.2 and 0.11.0.3 (jeffwidman / #1890) +* Cleanup handling of `KAFKA_VERSION` env var in tests (jeffwidman / PR #1887) +* Minor test cleanup (jeffwidman / PR #1885) +* Use `socket.SOCK_STREAM` in test assertions (iv-m / PR #1879) +* Sanity test for `consumer.topics()` and `consumer.partitions_for_topic()` (Baisang / PR #1829) +* Cleanup seconds conversion in client poll timeout calculation (jeffwidman / PR #1825) +* Remove unused imports (jeffwidman / PR #1808) +* Cleanup python nits in RangePartitionAssignor (jeffwidman / PR #1805) +* Update links to kafka consumer config docs (jeffwidman) +* Fix minor documentation typos (carsonip / PR #1865) +* Remove unused/weird comment line (jeffwidman / PR #1813) +* Update docs for `api_version_auto_timeout_ms` (jeffwidman / PR #1812) + + 1.4.6 (Apr 2, 2019) ################### diff --git a/docs/compatibility.rst b/docs/compatibility.rst index fc9e7cc70..9ab877f3a 100644 --- a/docs/compatibility.rst +++ b/docs/compatibility.rst @@ -1,16 +1,20 @@ Compatibility ------------- -.. image:: https://img.shields.io/badge/kafka-1.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg +.. image:: https://img.shields.io/badge/kafka-2.3%2C%202.2%2C%202.1%2C%202.0%2C%201.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg :target: https://kafka-python.readthedocs.io/compatibility.html .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg :target: https://pypi.python.org/pypi/kafka-python -kafka-python is compatible with (and tested against) broker versions 1.1 +kafka-python is compatible with (and tested against) broker versions 2.3 through 0.8.0 . kafka-python is not compatible with the 0.8.2-beta release. Because the kafka server protocol is backwards compatible, kafka-python is -expected to work with newer broker releases as well (2.0+). +expected to work with newer broker releases as well. + +Although kafka-python is tested and expected to work on recent broker versions, +not all features are supported. Specifically, authentication codecs, and +transactional producer/consumer support are not fully implemented. PRs welcome! kafka-python is tested on python 2.7, 3.4, 3.7, and pypy2.7. diff --git a/docs/index.rst b/docs/index.rst index 0b5b53f0f..6fa9a0c98 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,7 @@ kafka-python ############ -.. image:: https://img.shields.io/badge/kafka-1.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg +.. image:: https://img.shields.io/badge/kafka-2.3%2C%202.2%2C%202.1%2C%202.0%2C%201.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg :target: https://kafka-python.readthedocs.io/compatibility.html .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg :target: https://pypi.python.org/pypi/kafka-python @@ -136,7 +136,7 @@ for interacting with kafka brokers via the python repl. This is useful for testing, probing, and general experimentation. The protocol support is leveraged to enable a :meth:`~kafka.KafkaClient.check_version()` method that probes a kafka broker and -attempts to identify which version it is running (0.8.0 to 1.1+). +attempts to identify which version it is running (0.8.0 to 2.3+). Low-level diff --git a/kafka/admin/__init__.py b/kafka/admin/__init__.py index a300301c6..c240fc6d0 100644 --- a/kafka/admin/__init__.py +++ b/kafka/admin/__init__.py @@ -2,9 +2,13 @@ from kafka.admin.config_resource import ConfigResource, ConfigResourceType from kafka.admin.client import KafkaAdminClient +from kafka.admin.acl_resource import (ACL, ACLFilter, ResourcePattern, ResourcePatternFilter, ACLOperation, + ResourceType, ACLPermissionType, ACLResourcePatternType) from kafka.admin.new_topic import NewTopic from kafka.admin.new_partitions import NewPartitions __all__ = [ - 'ConfigResource', 'ConfigResourceType', 'KafkaAdminClient', 'NewTopic', 'NewPartitions' + 'ConfigResource', 'ConfigResourceType', 'KafkaAdminClient', 'NewTopic', 'NewPartitions', 'ACL', 'ACLFilter', + 'ResourcePattern', 'ResourcePatternFilter', 'ACLOperation', 'ResourceType', 'ACLPermissionType', + 'ACLResourcePatternType' ] diff --git a/kafka/admin/acl_resource.py b/kafka/admin/acl_resource.py new file mode 100644 index 000000000..7a012d2fa --- /dev/null +++ b/kafka/admin/acl_resource.py @@ -0,0 +1,212 @@ +from __future__ import absolute_import +from kafka.errors import IllegalArgumentError + +# enum in stdlib as of py3.4 +try: + from enum import IntEnum # pylint: disable=import-error +except ImportError: + # vendored backport module + from kafka.vendor.enum34 import IntEnum + + +class ResourceType(IntEnum): + """Type of kafka resource to set ACL for + + The ANY value is only valid in a filter context + """ + + UNKNOWN = 0, + ANY = 1, + CLUSTER = 4, + DELEGATION_TOKEN = 6, + GROUP = 3, + TOPIC = 2, + TRANSACTIONAL_ID = 5 + + +class ACLOperation(IntEnum): + """Type of operation + + The ANY value is only valid in a filter context + """ + + ANY = 1, + ALL = 2, + READ = 3, + WRITE = 4, + CREATE = 5, + DELETE = 6, + ALTER = 7, + DESCRIBE = 8, + CLUSTER_ACTION = 9, + DESCRIBE_CONFIGS = 10, + ALTER_CONFIGS = 11, + IDEMPOTENT_WRITE = 12 + + +class ACLPermissionType(IntEnum): + """An enumerated type of permissions + + The ANY value is only valid in a filter context + """ + + ANY = 1, + DENY = 2, + ALLOW = 3 + + +class ACLResourcePatternType(IntEnum): + """An enumerated type of resource patterns + + More details on the pattern types and how they work + can be found in KIP-290 (Support for prefixed ACLs) + https://cwiki.apache.org/confluence/display/KAFKA/KIP-290%3A+Support+for+Prefixed+ACLs + """ + + ANY = 1, + MATCH = 2, + LITERAL = 3, + PREFIXED = 4 + + +class ACLFilter(object): + """Represents a filter to use with describing and deleting ACLs + + The difference between this class and the ACL class is mainly that + we allow using ANY with the operation, permission, and resource type objects + to fetch ALCs matching any of the properties. + + To make a filter matching any principal, set principal to None + """ + + def __init__( + self, + principal, + host, + operation, + permission_type, + resource_pattern + ): + self.principal = principal + self.host = host + self.operation = operation + self.permission_type = permission_type + self.resource_pattern = resource_pattern + + self.validate() + + def validate(self): + if not isinstance(self.operation, ACLOperation): + raise IllegalArgumentError("operation must be an ACLOperation object, and cannot be ANY") + if not isinstance(self.permission_type, ACLPermissionType): + raise IllegalArgumentError("permission_type must be an ACLPermissionType object, and cannot be ANY") + if not isinstance(self.resource_pattern, ResourcePatternFilter): + raise IllegalArgumentError("resource_pattern must be a ResourcePatternFilter object") + + def __repr__(self): + return "".format( + principal=self.principal, + host=self.host, + operation=self.operation.name, + type=self.permission_type.name, + resource=self.resource_pattern + ) + + +class ACL(ACLFilter): + """Represents a concrete ACL for a specific ResourcePattern + + In kafka an ACL is a 4-tuple of (principal, host, operation, permission_type) + that limits who can do what on a specific resource (or since KIP-290 a resource pattern) + + Terminology: + Principal -> This is the identifier for the user. Depending on the authorization method used (SSL, SASL etc) + the principal will look different. See http://kafka.apache.org/documentation/#security_authz for details. + The principal must be on the format "User:" or kafka will treat it as invalid. It's possible to use + other principal types than "User" if using a custom authorizer for the cluster. + Host -> This must currently be an IP address. It cannot be a range, and it cannot be a domain name. + It can be set to "*", which is special cased in kafka to mean "any host" + Operation -> Which client operation this ACL refers to. Has different meaning depending + on the resource type the ACL refers to. See https://docs.confluent.io/current/kafka/authorization.html#acl-format + for a list of which combinations of resource/operation that unlocks which kafka APIs + Permission Type: Whether this ACL is allowing or denying access + Resource Pattern -> This is a representation of the resource or resource pattern that the ACL + refers to. See the ResourcePattern class for details. + + """ + + def __init__( + self, + principal, + host, + operation, + permission_type, + resource_pattern + ): + super(ACL, self).__init__(principal, host, operation, permission_type, resource_pattern) + self.validate() + + def validate(self): + if self.operation == ACLOperation.ANY: + raise IllegalArgumentError("operation cannot be ANY") + if self.permission_type == ACLPermissionType.ANY: + raise IllegalArgumentError("permission_type cannot be ANY") + if not isinstance(self.resource_pattern, ResourcePattern): + raise IllegalArgumentError("resource_pattern must be a ResourcePattern object") + + +class ResourcePatternFilter(object): + def __init__( + self, + resource_type, + resource_name, + pattern_type + ): + self.resource_type = resource_type + self.resource_name = resource_name + self.pattern_type = pattern_type + + self.validate() + + def validate(self): + if not isinstance(self.resource_type, ResourceType): + raise IllegalArgumentError("resource_type must be a ResourceType object") + if not isinstance(self.pattern_type, ACLResourcePatternType): + raise IllegalArgumentError("pattern_type must be an ACLResourcePatternType object") + + def __repr__(self): + return "".format( + self.resource_type.name, + self.resource_name, + self.pattern_type.name + ) + + +class ResourcePattern(ResourcePatternFilter): + """A resource pattern to apply the ACL to + + Resource patterns are used to be able to specify which resources an ACL + describes in a more flexible way than just pointing to a literal topic name for example. + Since KIP-290 (kafka 2.0) it's possible to set an ACL for a prefixed resource name, which + can cut down considerably on the number of ACLs needed when the number of topics and + consumer groups start to grow. + The default pattern_type is LITERAL, and it describes a specific resource. This is also how + ACLs worked before the introduction of prefixed ACLs + """ + + def __init__( + self, + resource_type, + resource_name, + pattern_type=ACLResourcePatternType.LITERAL + ): + super(ResourcePattern, self).__init__(resource_type, resource_name, pattern_type) + self.validate() + + def validate(self): + if self.resource_type == ResourceType.ANY: + raise IllegalArgumentError("resource_type cannot be ANY") + if self.pattern_type in [ACLResourcePatternType.ANY, ACLResourcePatternType.MATCH]: + raise IllegalArgumentError( + "pattern_type cannot be {} on a concrete ResourcePattern".format(self.pattern_type.name) + ) \ No newline at end of file diff --git a/kafka/admin/client.py b/kafka/admin/client.py index e4219e930..df85f442b 100644 --- a/kafka/admin/client.py +++ b/kafka/admin/client.py @@ -11,14 +11,16 @@ import kafka.errors as Errors from kafka.errors import ( IncompatibleBrokerVersion, KafkaConfigurationError, NotControllerError, - UnrecognizedBrokerVersion) + UnrecognizedBrokerVersion, IllegalArgumentError) from kafka.metrics import MetricConfig, Metrics from kafka.protocol.admin import ( CreateTopicsRequest, DeleteTopicsRequest, DescribeConfigsRequest, AlterConfigsRequest, CreatePartitionsRequest, - ListGroupsRequest, DescribeGroupsRequest) + ListGroupsRequest, DescribeGroupsRequest, DescribeAclsRequest, CreateAclsRequest, DeleteAclsRequest) from kafka.protocol.commit import GroupCoordinatorRequest, OffsetFetchRequest from kafka.protocol.metadata import MetadataRequest from kafka.structs import TopicPartition, OffsetAndMetadata +from kafka.admin.acl_resource import ACLOperation, ACLPermissionType, ACLFilter, ACL, ResourcePattern, ResourceType, \ + ACLResourcePatternType from kafka.version import __version__ @@ -91,7 +93,8 @@ class KafkaAdminClient(object): partition leadership changes to proactively discover any new brokers or partitions. Default: 300000 security_protocol (str): Protocol used to communicate with brokers. - Valid values are: PLAINTEXT, SSL. Default: PLAINTEXT. + Valid values are: PLAINTEXT, SSL, SASL_PLAINTEXT, SASL_SSL. + Default: PLAINTEXT. ssl_context (ssl.SSLContext): Pre-configured SSLContext for wrapping socket connections. If provided, all other ssl_* configurations will be ignored. Default: None. @@ -133,6 +136,8 @@ class KafkaAdminClient(object): Required if sasl_mechanism is PLAIN. sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' + sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI + sasl mechanism handshake. Default: one of bootstrap servers sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider instance. (See kafka.oauth.abstract). Default: None @@ -168,6 +173,7 @@ class KafkaAdminClient(object): 'sasl_plain_username': None, 'sasl_plain_password': None, 'sasl_kerberos_service_name': 'kafka', + 'sasl_kerberos_domain_name': None, 'sasl_oauth_token_provider': None, # metrics configs @@ -226,14 +232,20 @@ def _matching_api_version(self, operation): :param operation: A list of protocol operation versions from kafka.protocol. :return: The max matching version number between client and broker. """ - version = min(len(operation) - 1, - self._client.get_api_versions()[operation[0].API_KEY][1]) - if version < self._client.get_api_versions()[operation[0].API_KEY][0]: + broker_api_versions = self._client.get_api_versions() + api_key = operation[0].API_KEY + if broker_api_versions is None or api_key not in broker_api_versions: + raise IncompatibleBrokerVersion( + "Kafka broker does not support the '{}' Kafka protocol." + .format(operation[0].__name__)) + min_version, max_version = broker_api_versions[api_key] + version = min(len(operation) - 1, max_version) + if version < min_version: # max library version is less than min broker version. Currently, # no Kafka versions specify a min msg version. Maybe in the future? raise IncompatibleBrokerVersion( "No version of the '{}' Kafka protocol is supported by both the client and broker." - .format(operation.__name__)) + .format(operation[0].__name__)) return version def _validate_timeout(self, timeout_ms): @@ -249,7 +261,11 @@ def _refresh_controller_id(self): version = self._matching_api_version(MetadataRequest) if 1 <= version <= 6: request = MetadataRequest[version]() - response = self._send_request_to_node(self._client.least_loaded_node(), request) + future = self._send_request_to_node(self._client.least_loaded_node(), request) + + self._wait_for_futures([future]) + + response = future.value controller_id = response.controller_id # verify the controller is new enough to support our requests controller_version = self._client.check_version(controller_id) @@ -263,7 +279,49 @@ def _refresh_controller_id(self): "Kafka Admin interface cannot determine the controller using MetadataRequest_v{}." .format(version)) - def _find_group_coordinator_id(self, group_id): + def _find_coordinator_id_send_request(self, group_id): + """Send a FindCoordinatorRequest to a broker. + + :param group_id: The consumer group ID. This is typically the group + name as a string. + :return: A message future + """ + # TODO add support for dynamically picking version of + # GroupCoordinatorRequest which was renamed to FindCoordinatorRequest. + # When I experimented with this, the coordinator value returned in + # GroupCoordinatorResponse_v1 didn't match the value returned by + # GroupCoordinatorResponse_v0 and I couldn't figure out why. + version = 0 + # version = self._matching_api_version(GroupCoordinatorRequest) + if version <= 0: + request = GroupCoordinatorRequest[version](group_id) + else: + raise NotImplementedError( + "Support for GroupCoordinatorRequest_v{} has not yet been added to KafkaAdminClient." + .format(version)) + return self._send_request_to_node(self._client.least_loaded_node(), request) + + def _find_coordinator_id_process_response(self, response): + """Process a FindCoordinatorResponse. + + :param response: a FindCoordinatorResponse. + :return: The node_id of the broker that is the coordinator. + """ + if response.API_VERSION <= 0: + error_type = Errors.for_code(response.error_code) + if error_type is not Errors.NoError: + # Note: When error_type.retriable, Java will retry... see + # KafkaAdminClient's handleFindCoordinatorError method + raise error_type( + "FindCoordinatorRequest failed with response '{}'." + .format(response)) + else: + raise NotImplementedError( + "Support for FindCoordinatorRequest_v{} has not yet been added to KafkaAdminClient." + .format(response.API_VERSION)) + return response.coordinator_id + + def _find_coordinator_id(self, group_id): """Find the broker node_id of the coordinator of the given group. Sends a FindCoordinatorRequest message to the cluster. Will block until @@ -275,52 +333,26 @@ def _find_group_coordinator_id(self, group_id): :return: The node_id of the broker that is the coordinator. """ # Note: Java may change how this is implemented in KAFKA-6791. - # - # TODO add support for dynamically picking version of - # GroupCoordinatorRequest which was renamed to FindCoordinatorRequest. - # When I experimented with this, GroupCoordinatorResponse_v1 didn't - # match GroupCoordinatorResponse_v0 and I couldn't figure out why. - gc_request = GroupCoordinatorRequest[0](group_id) - gc_response = self._send_request_to_node(self._client.least_loaded_node(), gc_request) - # use the extra error checking in add_group_coordinator() rather than - # immediately returning the group coordinator. - success = self._client.cluster.add_group_coordinator(group_id, gc_response) - if not success: - error_type = Errors.for_code(gc_response.error_code) - assert error_type is not Errors.NoError - # Note: When error_type.retriable, Java will retry... see - # KafkaAdminClient's handleFindCoordinatorError method - raise error_type( - "Could not identify group coordinator for group_id '{}' from response '{}'." - .format(group_id, gc_response)) - group_coordinator = self._client.cluster.coordinator_for_group(group_id) - # will be None if the coordinator was never populated, which should never happen here - assert group_coordinator is not None - # will be -1 if add_group_coordinator() failed... but by this point the - # error should have been raised. - assert group_coordinator != -1 - return group_coordinator + future = self._find_coordinator_id_send_request(group_id) + self._wait_for_futures([future]) + response = future.value + return self._find_coordinator_id_process_response(response) def _send_request_to_node(self, node_id, request): """Send a Kafka protocol message to a specific broker. - Will block until the message result is received. + Returns a future that may be polled for status and results. :param node_id: The broker id to which to send the message. :param request: The message to send. - :return: The Kafka protocol response for the message. + :return: A future object that may be polled for status and results. :exception: The exception if the message could not be sent. """ while not self._client.ready(node_id): # poll until the connection to broker is ready, otherwise send() # will fail with NodeNotReadyError self._client.poll() - future = self._client.send(node_id, request) - self._client.poll(future=future) - if future.succeeded(): - return future.value - else: - raise future.exception # pylint: disable-msg=raising-bad-type + return self._client.send(node_id, request) def _send_request_to_controller(self, request): """Send a Kafka protocol message to the cluster controller. @@ -333,7 +365,11 @@ def _send_request_to_controller(self, request): tries = 2 # in case our cached self._controller_id is outdated while tries: tries -= 1 - response = self._send_request_to_node(self._controller_id, request) + future = self._send_request_to_node(self._controller_id, request) + + self._wait_for_futures([future]) + + response = future.value # In Java, the error fieldname is inconsistent: # - CreateTopicsResponse / CreatePartitionsResponse uses topic_errors # - DeleteTopicsResponse uses topic_error_codes @@ -341,7 +377,7 @@ def _send_request_to_controller(self, request): # one of these attributes and that they always unpack into # (topic, error_code) tuples. topic_error_tuples = (response.topic_errors if hasattr(response, 'topic_errors') - else response.topic_error_codes) + else response.topic_error_codes) # Also small py2/py3 compatibility -- py3 can ignore extra values # during unpack via: for x, y, *rest in list_of_values. py2 cannot. # So for now we have to map across the list and explicitly drop any @@ -442,14 +478,269 @@ def delete_topics(self, topics, timeout_ms=None): # describe cluster functionality is in ClusterMetadata # Note: if implemented here, send the request to the least_loaded_node() - # describe_acls protocol not yet implemented - # Note: send the request to the least_loaded_node() + @staticmethod + def _convert_describe_acls_response_to_acls(describe_response): + version = describe_response.API_VERSION + + error = Errors.for_code(describe_response.error_code) + acl_list = [] + for resources in describe_response.resources: + if version == 0: + resource_type, resource_name, acls = resources + resource_pattern_type = ACLResourcePatternType.LITERAL.value + elif version <= 1: + resource_type, resource_name, resource_pattern_type, acls = resources + else: + raise NotImplementedError( + "Support for DescribeAcls Response v{} has not yet been added to KafkaAdmin." + .format(version) + ) + for acl in acls: + principal, host, operation, permission_type = acl + conv_acl = ACL( + principal=principal, + host=host, + operation=ACLOperation(operation), + permission_type=ACLPermissionType(permission_type), + resource_pattern=ResourcePattern( + ResourceType(resource_type), + resource_name, + ACLResourcePatternType(resource_pattern_type) + ) + ) + acl_list.append(conv_acl) + + return (acl_list, error,) + + def describe_acls(self, acl_filter): + """Describe a set of ACLs + + Used to return a set of ACLs matching the supplied ACLFilter. + The cluster must be configured with an authorizer for this to work, or + you will get a SecurityDisabledError + + :param acl_filter: an ACLFilter object + :return: tuple of a list of matching ACL objects and a KafkaError (NoError if successful) + """ - # create_acls protocol not yet implemented - # Note: send the request to the least_loaded_node() + version = self._matching_api_version(DescribeAclsRequest) + if version == 0: + request = DescribeAclsRequest[version]( + resource_type=acl_filter.resource_pattern.resource_type, + resource_name=acl_filter.resource_pattern.resource_name, + principal=acl_filter.principal, + host=acl_filter.host, + operation=acl_filter.operation, + permission_type=acl_filter.permission_type + ) + elif version <= 1: + request = DescribeAclsRequest[version]( + resource_type=acl_filter.resource_pattern.resource_type, + resource_name=acl_filter.resource_pattern.resource_name, + resource_pattern_type_filter=acl_filter.resource_pattern.pattern_type, + principal=acl_filter.principal, + host=acl_filter.host, + operation=acl_filter.operation, + permission_type=acl_filter.permission_type - # delete_acls protocol not yet implemented - # Note: send the request to the least_loaded_node() + ) + else: + raise NotImplementedError( + "Support for DescribeAcls v{} has not yet been added to KafkaAdmin." + .format(version) + ) + + future = self._send_request_to_node(self._client.least_loaded_node(), request) + self._wait_for_futures([future]) + response = future.value + + error_type = Errors.for_code(response.error_code) + if error_type is not Errors.NoError: + # optionally we could retry if error_type.retriable + raise error_type( + "Request '{}' failed with response '{}'." + .format(request, response)) + + return self._convert_describe_acls_response_to_acls(response) + + @staticmethod + def _convert_create_acls_resource_request_v0(acl): + + return ( + acl.resource_pattern.resource_type, + acl.resource_pattern.resource_name, + acl.principal, + acl.host, + acl.operation, + acl.permission_type + ) + + @staticmethod + def _convert_create_acls_resource_request_v1(acl): + + return ( + acl.resource_pattern.resource_type, + acl.resource_pattern.resource_name, + acl.resource_pattern.pattern_type, + acl.principal, + acl.host, + acl.operation, + acl.permission_type + ) + + @staticmethod + def _convert_create_acls_response_to_acls(acls, create_response): + version = create_response.API_VERSION + + creations_error = [] + creations_success = [] + for i, creations in enumerate(create_response.creation_responses): + if version <= 1: + error_code, error_message = creations + acl = acls[i] + error = Errors.for_code(error_code) + else: + raise NotImplementedError( + "Support for DescribeAcls Response v{} has not yet been added to KafkaAdmin." + .format(version) + ) + + if error is Errors.NoError: + creations_success.append(acl) + else: + creations_error.append((acl, error,)) + + return {"succeeded": creations_success, "failed": creations_error} + + def create_acls(self, acls): + """Create a list of ACLs + + This endpoint only accepts a list of concrete ACL objects, no ACLFilters. + Throws TopicAlreadyExistsError if topic is already present. + + :param acls: a list of ACL objects + :return: dict of successes and failures + """ + + for acl in acls: + if not isinstance(acl, ACL): + raise IllegalArgumentError("acls must contain ACL objects") + + version = self._matching_api_version(CreateAclsRequest) + if version == 0: + request = CreateAclsRequest[version]( + creations=[self._convert_create_acls_resource_request_v0(acl) for acl in acls] + ) + elif version <= 1: + request = CreateAclsRequest[version]( + creations=[self._convert_create_acls_resource_request_v1(acl) for acl in acls] + ) + else: + raise NotImplementedError( + "Support for CreateAcls v{} has not yet been added to KafkaAdmin." + .format(version) + ) + + future = self._send_request_to_node(self._client.least_loaded_node(), request) + self._wait_for_futures([future]) + response = future.value + + + return self._convert_create_acls_response_to_acls(acls, response) + + @staticmethod + def _convert_delete_acls_resource_request_v0(acl): + return ( + acl.resource_pattern.resource_type, + acl.resource_pattern.resource_name, + acl.principal, + acl.host, + acl.operation, + acl.permission_type + ) + + @staticmethod + def _convert_delete_acls_resource_request_v1(acl): + return ( + acl.resource_pattern.resource_type, + acl.resource_pattern.resource_name, + acl.resource_pattern.pattern_type, + acl.principal, + acl.host, + acl.operation, + acl.permission_type + ) + + @staticmethod + def _convert_delete_acls_response_to_matching_acls(acl_filters, delete_response): + version = delete_response.API_VERSION + filter_result_list = [] + for i, filter_responses in enumerate(delete_response.filter_responses): + filter_error_code, filter_error_message, matching_acls = filter_responses + filter_error = Errors.for_code(filter_error_code) + acl_result_list = [] + for acl in matching_acls: + if version == 0: + error_code, error_message, resource_type, resource_name, principal, host, operation, permission_type = acl + resource_pattern_type = ACLResourcePatternType.LITERAL.value + elif version == 1: + error_code, error_message, resource_type, resource_name, resource_pattern_type, principal, host, operation, permission_type = acl + else: + raise NotImplementedError( + "Support for DescribeAcls Response v{} has not yet been added to KafkaAdmin." + .format(version) + ) + acl_error = Errors.for_code(error_code) + conv_acl = ACL( + principal=principal, + host=host, + operation=ACLOperation(operation), + permission_type=ACLPermissionType(permission_type), + resource_pattern=ResourcePattern( + ResourceType(resource_type), + resource_name, + ACLResourcePatternType(resource_pattern_type) + ) + ) + acl_result_list.append((conv_acl, acl_error,)) + filter_result_list.append((acl_filters[i], acl_result_list, filter_error,)) + return filter_result_list + + def delete_acls(self, acl_filters): + """Delete a set of ACLs + + Deletes all ACLs matching the list of input ACLFilter + + :param acl_filters: a list of ACLFilter + :return: a list of 3-tuples corresponding to the list of input filters. + The tuples hold (the input ACLFilter, list of affected ACLs, KafkaError instance) + """ + + for acl in acl_filters: + if not isinstance(acl, ACLFilter): + raise IllegalArgumentError("acl_filters must contain ACLFilter type objects") + + version = self._matching_api_version(DeleteAclsRequest) + + if version == 0: + request = DeleteAclsRequest[version]( + filters=[self._convert_delete_acls_resource_request_v0(acl) for acl in acl_filters] + ) + elif version <= 1: + request = DeleteAclsRequest[version]( + filters=[self._convert_delete_acls_resource_request_v1(acl) for acl in acl_filters] + ) + else: + raise NotImplementedError( + "Support for DeleteAcls v{} has not yet been added to KafkaAdmin." + .format(version) + ) + + future = self._send_request_to_node(self._client.least_loaded_node(), request) + self._wait_for_futures([future]) + response = future.value + + return self._convert_delete_acls_response_to_matching_acls(acl_filters, response) @staticmethod def _convert_describe_config_resource_request(config_resource): @@ -490,7 +781,11 @@ def describe_configs(self, config_resources, include_synonyms=False): raise NotImplementedError( "Support for DescribeConfigs v{} has not yet been added to KafkaAdminClient." .format(version)) - return self._send_request_to_node(self._client.least_loaded_node(), request) + future = self._send_request_to_node(self._client.least_loaded_node(), request) + + self._wait_for_futures([future]) + response = future.value + return response @staticmethod def _convert_alter_config_resource_request(config_resource): @@ -529,7 +824,11 @@ def alter_configs(self, config_resources): # // a single request that may be sent to any broker. # # So this is currently broken as it always sends to the least_loaded_node() - return self._send_request_to_node(self._client.least_loaded_node(), request) + future = self._send_request_to_node(self._client.least_loaded_node(), request) + + self._wait_for_futures([future]) + response = future.value + return response # alter replica logs dir protocol not yet implemented # Note: have to lookup the broker with the replica assignment and send the request to that broker @@ -586,6 +885,54 @@ def create_partitions(self, topic_partitions, timeout_ms=None, validate_only=Fal # describe delegation_token protocol not yet implemented # Note: send the request to the least_loaded_node() + def _describe_consumer_groups_send_request(self, group_id, group_coordinator_id): + """Send a DescribeGroupsRequest to the group's coordinator. + + :param group_id: The group name as a string + :param group_coordinator_id: The node_id of the groups' coordinator + broker. + :return: A message future. + """ + version = self._matching_api_version(DescribeGroupsRequest) + if version <= 1: + # Note: KAFKA-6788 A potential optimization is to group the + # request per coordinator and send one request with a list of + # all consumer groups. Java still hasn't implemented this + # because the error checking is hard to get right when some + # groups error and others don't. + request = DescribeGroupsRequest[version](groups=(group_id,)) + else: + raise NotImplementedError( + "Support for DescribeGroupsRequest_v{} has not yet been added to KafkaAdminClient." + .format(version)) + return self._send_request_to_node(group_coordinator_id, request) + + def _describe_consumer_groups_process_response(self, response): + """Process a DescribeGroupsResponse into a group description.""" + if response.API_VERSION <= 1: + assert len(response.groups) == 1 + # TODO need to implement converting the response tuple into + # a more accessible interface like a namedtuple and then stop + # hardcoding tuple indices here. Several Java examples, + # including KafkaAdminClient.java + group_description = response.groups[0] + error_code = group_description[0] + error_type = Errors.for_code(error_code) + # Java has the note: KAFKA-6789, we can retry based on the error code + if error_type is not Errors.NoError: + raise error_type( + "DescribeGroupsResponse failed with response '{}'." + .format(response)) + # TODO Java checks the group protocol type, and if consumer + # (ConsumerProtocol.PROTOCOL_TYPE) or empty string, it decodes + # the members' partition assignments... that hasn't yet been + # implemented here so just return the raw struct results + else: + raise NotImplementedError( + "Support for DescribeGroupsResponse_v{} has not yet been added to KafkaAdminClient." + .format(response.API_VERSION)) + return group_description + def describe_consumer_groups(self, group_ids, group_coordinator_id=None): """Describe a set of consumer groups. @@ -605,44 +952,53 @@ def describe_consumer_groups(self, group_ids, group_coordinator_id=None): partition assignments. """ group_descriptions = [] - version = self._matching_api_version(DescribeGroupsRequest) + futures = [] for group_id in group_ids: if group_coordinator_id is not None: this_groups_coordinator_id = group_coordinator_id else: - this_groups_coordinator_id = self._find_group_coordinator_id(group_id) - if version <= 1: - # Note: KAFKA-6788 A potential optimization is to group the - # request per coordinator and send one request with a list of - # all consumer groups. Java still hasn't implemented this - # because the error checking is hard to get right when some - # groups error and others don't. - request = DescribeGroupsRequest[version](groups=(group_id,)) - response = self._send_request_to_node(this_groups_coordinator_id, request) - assert len(response.groups) == 1 - # TODO need to implement converting the response tuple into - # a more accessible interface like a namedtuple and then stop - # hardcoding tuple indices here. Several Java examples, - # including KafkaAdminClient.java - group_description = response.groups[0] - error_code = group_description[0] - error_type = Errors.for_code(error_code) - # Java has the note: KAFKA-6789, we can retry based on the error code - if error_type is not Errors.NoError: - raise error_type( - "Request '{}' failed with response '{}'." - .format(request, response)) - # TODO Java checks the group protocol type, and if consumer - # (ConsumerProtocol.PROTOCOL_TYPE) or empty string, it decodes - # the members' partition assignments... that hasn't yet been - # implemented here so just return the raw struct results - group_descriptions.append(group_description) - else: - raise NotImplementedError( - "Support for DescribeGroups v{} has not yet been added to KafkaAdminClient." - .format(version)) + this_groups_coordinator_id = self._find_coordinator_id(group_id) + f = self._describe_consumer_groups_send_request(group_id, this_groups_coordinator_id) + futures.append(f) + + self._wait_for_futures(futures) + + for future in futures: + response = future.value + group_description = self._describe_consumer_groups_process_response(response) + group_descriptions.append(group_description) + return group_descriptions + def _list_consumer_groups_send_request(self, broker_id): + """Send a ListGroupsRequest to a broker. + + :param broker_id: The broker's node_id. + :return: A message future + """ + version = self._matching_api_version(ListGroupsRequest) + if version <= 2: + request = ListGroupsRequest[version]() + else: + raise NotImplementedError( + "Support for ListGroupsRequest_v{} has not yet been added to KafkaAdminClient." + .format(version)) + return self._send_request_to_node(broker_id, request) + + def _list_consumer_groups_process_response(self, response): + """Process a ListGroupsResponse into a list of groups.""" + if response.API_VERSION <= 2: + error_type = Errors.for_code(response.error_code) + if error_type is not Errors.NoError: + raise error_type( + "ListGroupsRequest failed with response '{}'." + .format(response)) + else: + raise NotImplementedError( + "Support for ListGroupsResponse_v{} has not yet been added to KafkaAdminClient." + .format(response.API_VERSION)) + return response.groups + def list_consumer_groups(self, broker_ids=None): """List all consumer groups known to the cluster. @@ -675,52 +1031,22 @@ def list_consumer_groups(self, broker_ids=None): consumer_groups = set() if broker_ids is None: broker_ids = [broker.nodeId for broker in self._client.cluster.brokers()] - version = self._matching_api_version(ListGroupsRequest) - if version <= 2: - request = ListGroupsRequest[version]() - for broker_id in broker_ids: - response = self._send_request_to_node(broker_id, request) - error_type = Errors.for_code(response.error_code) - if error_type is not Errors.NoError: - raise error_type( - "Request '{}' failed with response '{}'." - .format(request, response)) - consumer_groups.update(response.groups) - else: - raise NotImplementedError( - "Support for ListGroups v{} has not yet been added to KafkaAdminClient." - .format(version)) + futures = [self._list_consumer_groups_send_request(b) for b in broker_ids] + self._wait_for_futures(futures) + for f in futures: + response = f.value + consumer_groups.update(self._list_consumer_groups_process_response(response)) return list(consumer_groups) - def list_consumer_group_offsets(self, group_id, group_coordinator_id=None, - partitions=None): - """Fetch Consumer Group Offsets. - - Note: - This does not verify that the group_id or partitions actually exist - in the cluster. - - As soon as any error is encountered, it is immediately raised. + def _list_consumer_group_offsets_send_request(self, group_id, + group_coordinator_id, partitions=None): + """Send an OffsetFetchRequest to a broker. :param group_id: The consumer group id name for which to fetch offsets. :param group_coordinator_id: The node_id of the group's coordinator - broker. If set to None, will query the cluster to find the group - coordinator. Explicitly specifying this can be useful to prevent - that extra network round trip if you already know the group - coordinator. Default: None. - :param partitions: A list of TopicPartitions for which to fetch - offsets. On brokers >= 0.10.2, this can be set to None to fetch all - known offsets for the consumer group. Default: None. - :return dictionary: A dictionary with TopicPartition keys and - OffsetAndMetada values. Partitions that are not specified and for - which the group_id does not have a recorded offset are omitted. An - offset value of `-1` indicates the group_id has no offset for that - TopicPartition. A `-1` can only happen for partitions that are - explicitly specified. + broker. + :return: A message future """ - group_offsets_listing = {} - if group_coordinator_id is None: - group_coordinator_id = self._find_group_coordinator_id(group_id) version = self._matching_api_version(OffsetFetchRequest) if version <= 3: if partitions is None: @@ -738,29 +1064,88 @@ def list_consumer_group_offsets(self, group_id, group_coordinator_id=None, topics_partitions_dict[topic].add(partition) topics_partitions = list(six.iteritems(topics_partitions_dict)) request = OffsetFetchRequest[version](group_id, topics_partitions) - response = self._send_request_to_node(group_coordinator_id, request) - if version > 1: # OffsetFetchResponse_v1 lacks a top-level error_code + else: + raise NotImplementedError( + "Support for OffsetFetchRequest_v{} has not yet been added to KafkaAdminClient." + .format(version)) + return self._send_request_to_node(group_coordinator_id, request) + + def _list_consumer_group_offsets_process_response(self, response): + """Process an OffsetFetchResponse. + + :param response: an OffsetFetchResponse. + :return: A dictionary composed of TopicPartition keys and + OffsetAndMetada values. + """ + if response.API_VERSION <= 3: + + # OffsetFetchResponse_v1 lacks a top-level error_code + if response.API_VERSION > 1: error_type = Errors.for_code(response.error_code) if error_type is not Errors.NoError: # optionally we could retry if error_type.retriable raise error_type( - "Request '{}' failed with response '{}'." - .format(request, response)) + "OffsetFetchResponse failed with response '{}'." + .format(response)) + # transform response into a dictionary with TopicPartition keys and # OffsetAndMetada values--this is what the Java AdminClient returns + offsets = {} for topic, partitions in response.topics: for partition, offset, metadata, error_code in partitions: error_type = Errors.for_code(error_code) if error_type is not Errors.NoError: raise error_type( - "Unable to fetch offsets for group_id {}, topic {}, partition {}" - .format(group_id, topic, partition)) - group_offsets_listing[TopicPartition(topic, partition)] = OffsetAndMetadata(offset, metadata) + "Unable to fetch consumer group offsets for topic {}, partition {}" + .format(topic, partition)) + offsets[TopicPartition(topic, partition)] = OffsetAndMetadata(offset, metadata) else: raise NotImplementedError( - "Support for OffsetFetch v{} has not yet been added to KafkaAdminClient." - .format(version)) - return group_offsets_listing + "Support for OffsetFetchResponse_v{} has not yet been added to KafkaAdminClient." + .format(response.API_VERSION)) + return offsets + + def list_consumer_group_offsets(self, group_id, group_coordinator_id=None, + partitions=None): + """Fetch Consumer Offsets for a single consumer group. + + Note: + This does not verify that the group_id or partitions actually exist + in the cluster. + + As soon as any error is encountered, it is immediately raised. + + :param group_id: The consumer group id name for which to fetch offsets. + :param group_coordinator_id: The node_id of the group's coordinator + broker. If set to None, will query the cluster to find the group + coordinator. Explicitly specifying this can be useful to prevent + that extra network round trip if you already know the group + coordinator. Default: None. + :param partitions: A list of TopicPartitions for which to fetch + offsets. On brokers >= 0.10.2, this can be set to None to fetch all + known offsets for the consumer group. Default: None. + :return dictionary: A dictionary with TopicPartition keys and + OffsetAndMetada values. Partitions that are not specified and for + which the group_id does not have a recorded offset are omitted. An + offset value of `-1` indicates the group_id has no offset for that + TopicPartition. A `-1` can only happen for partitions that are + explicitly specified. + """ + if group_coordinator_id is None: + group_coordinator_id = self._find_coordinator_id(group_id) + future = self._list_consumer_group_offsets_send_request( + group_id, group_coordinator_id, partitions) + self._wait_for_futures([future]) + response = future.value + return self._list_consumer_group_offsets_process_response(response) # delete groups protocol not yet implemented # Note: send the request to the group's coordinator. + + def _wait_for_futures(self, futures): + while not all(future.succeeded() for future in futures): + for future in futures: + self._client.poll(future=future) + + if future.failed(): + raise future.exception # pylint: disable-msg=raising-bad-type diff --git a/kafka/admin_client.py b/kafka/admin_client.py index 2f70b0f85..cb2bb86ff 100644 --- a/kafka/admin_client.py +++ b/kafka/admin_client.py @@ -107,11 +107,12 @@ def _send(self, node, request): def _send_request(self, request): controller_id = self._send_controller_request() - if not self.client.ready(controller_id): - raise NodeNotReadyError(controller_id) - else: - return self._send(controller_id, request) - + while not self.client.ready(controller_id): + # poll until the connection to broker is ready, otherwise send() + # will fail with NodeNotReadyError + self.client.poll() + return self._send(controller_id, request) + def create_partitions( self, new_partitions_infos, diff --git a/kafka/client_async.py b/kafka/client_async.py index 77efac869..3ec4eadc2 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -207,6 +207,7 @@ def __init__(self, **configs): self._conns = Dict() # object to support weakrefs self._api_versions = None self._connecting = set() + self._sending = set() self._refresh_on_disconnects = True self._last_bootstrap = 0 self._bootstrap_fails = 0 @@ -267,9 +268,9 @@ def _conn_state_change(self, node_id, sock, conn): if node_id not in self._connecting: self._connecting.add(node_id) try: - self._selector.register(sock, selectors.EVENT_WRITE) + self._selector.register(sock, selectors.EVENT_WRITE, conn) except KeyError: - self._selector.modify(sock, selectors.EVENT_WRITE) + self._selector.modify(sock, selectors.EVENT_WRITE, conn) if self.cluster.is_bootstrap(node_id): self._last_bootstrap = time.time() @@ -532,6 +533,7 @@ def send(self, node_id, request, wakeup=True): # we will need to call send_pending_requests() # to trigger network I/O future = conn.send(request, blocking=False) + self._sending.add(conn) # Wakeup signal is useful in case another thread is # blocked waiting for incoming network traffic while holding @@ -588,11 +590,16 @@ def poll(self, timeout_ms=None, future=None): metadata_timeout_ms, idle_connection_timeout_ms, self.config['request_timeout_ms']) - timeout = max(0, timeout / 1000) # avoid negative timeouts + # if there are no requests in flight, do not block longer than the retry backoff + if self.in_flight_request_count() == 0: + timeout = min(timeout, self.config['retry_backoff_ms']) + timeout = max(0, timeout) # avoid negative timeouts - self._poll(timeout) + self._poll(timeout / 1000) - responses.extend(self._fire_pending_completed_requests()) + # called without the lock to avoid deadlock potential + # if handlers need to acquire locks + responses.extend(self._fire_pending_completed_requests()) # If all we had was a timeout (future is None) - only do one poll # If we do have a future, we keep looping until it is done @@ -601,14 +608,23 @@ def poll(self, timeout_ms=None, future=None): return responses + def _register_send_sockets(self): + while self._sending: + conn = self._sending.pop() + try: + key = self._selector.get_key(conn._sock) + events = key.events | selectors.EVENT_WRITE + self._selector.modify(key.fileobj, events, key.data) + except KeyError: + self._selector.register(conn._sock, selectors.EVENT_WRITE, conn) + def _poll(self, timeout): # This needs to be locked, but since it is only called from within the # locked section of poll(), there is no additional lock acquisition here processed = set() # Send pending requests first, before polling for responses - for conn in six.itervalues(self._conns): - conn.send_pending_requests() + self._register_send_sockets() start_select = time.time() ready = self._selector.select(timeout) @@ -620,7 +636,25 @@ def _poll(self, timeout): if key.fileobj is self._wake_r: self._clear_wake_fd() continue - elif not (events & selectors.EVENT_READ): + + # Send pending requests if socket is ready to write + if events & selectors.EVENT_WRITE: + conn = key.data + if conn.connecting(): + conn.connect() + else: + if conn.send_pending_requests_v2(): + # If send is complete, we dont need to track write readiness + # for this socket anymore + if key.events ^ selectors.EVENT_WRITE: + self._selector.modify( + key.fileobj, + key.events ^ selectors.EVENT_WRITE, + key.data) + else: + self._selector.unregister(key.fileobj) + + if not (events & selectors.EVENT_READ): continue conn = key.data processed.add(conn) @@ -917,6 +951,16 @@ def _maybe_close_oldest_connection(self): log.info('Closing idle connection %s, last active %d ms ago', conn_id, idle_ms) self.close(node_id=conn_id) + def bootstrap_connected(self): + """Return True if a bootstrap node is connected""" + for node_id in self._conns: + if not self.cluster.is_bootstrap(node_id): + continue + if self._conns[node_id].connected(): + return True + else: + return False + # OrderedDict requires python2.7+ try: diff --git a/kafka/cluster.py b/kafka/cluster.py index 28b71c9d1..19137de62 100644 --- a/kafka/cluster.py +++ b/kafka/cluster.py @@ -9,7 +9,7 @@ from kafka.vendor import six from kafka import errors as Errors -from kafka.conn import collect_hosts, dns_lookup +from kafka.conn import collect_hosts from kafka.future import Future from kafka.structs import BrokerMetadata, PartitionMetadata, TopicPartition @@ -65,6 +65,7 @@ def __init__(self, **configs): self.config[key] = configs[key] self._bootstrap_brokers = self._generate_bootstrap_brokers() + self._coordinator_brokers = {} def _generate_bootstrap_brokers(self): # collect_hosts does not perform DNS, so we should be fine to re-use @@ -96,7 +97,11 @@ def broker_metadata(self, broker_id): Returns: BrokerMetadata or None if not found """ - return self._brokers.get(broker_id) or self._bootstrap_brokers.get(broker_id) + return ( + self._brokers.get(broker_id) or + self._bootstrap_brokers.get(broker_id) or + self._coordinator_brokers.get(broker_id) + ) def partitions_for_topic(self, topic): """Return set of all partitions for topic (whether available or not) @@ -189,7 +194,7 @@ def request_update(self): with self._lock: self._need_update = True if not self._future or self._future.is_done: - self._future = Future() + self._future = Future() return self._future def topics(self, exclude_internal_topics=True): @@ -341,41 +346,28 @@ def add_group_coordinator(self, group, response): response (GroupCoordinatorResponse): broker response Returns: - bool: True if metadata is updated, False on error + string: coordinator node_id if metadata is updated, None on error """ log.debug("Updating coordinator for %s: %s", group, response) error_type = Errors.for_code(response.error_code) if error_type is not Errors.NoError: log.error("GroupCoordinatorResponse error: %s", error_type) self._groups[group] = -1 - return False + return - node_id = response.coordinator_id + # Use a coordinator-specific node id so that group requests + # get a dedicated connection + node_id = 'coordinator-{}'.format(response.coordinator_id) coordinator = BrokerMetadata( - response.coordinator_id, + node_id, response.host, response.port, None) - # Assume that group coordinators are just brokers - # (this is true now, but could diverge in future) - if node_id not in self._brokers: - self._brokers[node_id] = coordinator - - # If this happens, either brokers have moved without - # changing IDs, or our assumption above is wrong - else: - node = self._brokers[node_id] - if coordinator.host != node.host or coordinator.port != node.port: - log.error("GroupCoordinator metadata conflicts with existing" - " broker metadata. Coordinator: %s, Broker: %s", - coordinator, node) - self._groups[group] = node_id - return False - log.info("Group coordinator for %s is %s", group, coordinator) + self._coordinator_brokers[node_id] = coordinator self._groups[group] = node_id - return True + return node_id def with_partitions(self, partitions_to_add): """Returns a copy of cluster metadata with partitions added""" diff --git a/kafka/conn.py b/kafka/conn.py index 19ba7044c..a06de4910 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -36,6 +36,7 @@ if six.PY2: ConnectionError = socket.error + TimeoutError = socket.error BlockingIOError = Exception log = logging.getLogger(__name__) @@ -288,6 +289,7 @@ def __init__(self, host, port, afi, **configs): self.state = ConnectionStates.DISCONNECTED self._reset_reconnect_backoff() self._sock = None + self._send_buffer = b'' self._ssl_context = None if self.config['ssl_context'] is not None: self._ssl_context = self.config['ssl_context'] @@ -463,6 +465,9 @@ def _wrap_ssl(self): log.info('%s: Loading SSL CA from %s', self, self.config['ssl_cafile']) self._ssl_context.load_verify_locations(self.config['ssl_cafile']) self._ssl_context.verify_mode = ssl.CERT_REQUIRED + else: + log.info('%s: Loading system default SSL CAs from %s', self, ssl.get_default_verify_paths()) + self._ssl_context.load_default_certs() if self.config['ssl_certfile'] and self.config['ssl_keyfile']: log.info('%s: Loading SSL Cert from %s', self, self.config['ssl_certfile']) log.info('%s: Loading SSL Key from %s', self, self.config['ssl_keyfile']) @@ -498,7 +503,7 @@ def _try_handshake(self): # old ssl in python2.6 will swallow all SSLErrors here... except (SSLWantReadError, SSLWantWriteError): pass - except (SSLZeroReturnError, ConnectionError, SSLEOFError): + except (SSLZeroReturnError, ConnectionError, TimeoutError, SSLEOFError): log.warning('SSL connection closed by server during handshake.') self.close(Errors.KafkaConnectionError('SSL connection closed by server during handshake')) # Other SSLErrors will be raised to user @@ -553,6 +558,32 @@ def _handle_sasl_handshake_response(self, future, response): 'kafka-python does not support SASL mechanism %s' % self.config['sasl_mechanism'])) + def _send_bytes(self, data): + """Send some data via non-blocking IO + + Note: this method is not synchronized internally; you should + always hold the _lock before calling + + Returns: number of bytes + Raises: socket exception + """ + total_sent = 0 + while total_sent < len(data): + try: + sent_bytes = self._sock.send(data[total_sent:]) + total_sent += sent_bytes + except (SSLWantReadError, SSLWantWriteError): + break + except (ConnectionError, TimeoutError) as e: + if six.PY2 and e.errno == errno.EWOULDBLOCK: + break + raise + except BlockingIOError: + if six.PY3: + break + raise + return total_sent + def _send_bytes_blocking(self, data): self._sock.settimeout(self.config['request_timeout_ms'] / 1000) total_sent = 0 @@ -589,21 +620,30 @@ def _try_authenticate_plain(self, future): self.config['sasl_plain_username'], self.config['sasl_plain_password']]).encode('utf-8')) size = Int32.encode(len(msg)) - try: - with self._lock: - if not self._can_send_recv(): - return future.failure(Errors.NodeNotReadyError(str(self))) - self._send_bytes_blocking(size + msg) - # The server will send a zero sized message (that is Int32(0)) on success. - # The connection is closed on failure - data = self._recv_bytes_blocking(4) + err = None + close = False + with self._lock: + if not self._can_send_recv(): + err = Errors.NodeNotReadyError(str(self)) + close = False + else: + try: + self._send_bytes_blocking(size + msg) - except ConnectionError as e: - log.exception("%s: Error receiving reply from server", self) - error = Errors.KafkaConnectionError("%s: %s" % (self, e)) - self.close(error=error) - return future.failure(error) + # The server will send a zero sized message (that is Int32(0)) on success. + # The connection is closed on failure + data = self._recv_bytes_blocking(4) + + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error receiving reply from server", self) + err = Errors.KafkaConnectionError("%s: %s" % (self, e)) + close = True + + if err is not None: + if close: + self.close(error=err) + return future.failure(err) if data != b'\x00\x00\x00\x00': error = Errors.AuthenticationFailedError('Unrecognized response during authentication') @@ -621,61 +661,67 @@ def _try_authenticate_gssapi(self, future): ).canonicalize(gssapi.MechType.kerberos) log.debug('%s: GSSAPI name: %s', self, gssapi_name) - self._lock.acquire() - if not self._can_send_recv(): - return future.failure(Errors.NodeNotReadyError(str(self))) - # Establish security context and negotiate protection level - # For reference RFC 2222, section 7.2.1 - try: - # Exchange tokens until authentication either succeeds or fails - client_ctx = gssapi.SecurityContext(name=gssapi_name, usage='initiate') - received_token = None - while not client_ctx.complete: - # calculate an output token from kafka token (or None if first iteration) - output_token = client_ctx.step(received_token) - - # pass output token to kafka, or send empty response if the security - # context is complete (output token is None in that case) - if output_token is None: - self._send_bytes_blocking(Int32.encode(0)) - else: - msg = output_token + err = None + close = False + with self._lock: + if not self._can_send_recv(): + err = Errors.NodeNotReadyError(str(self)) + close = False + else: + # Establish security context and negotiate protection level + # For reference RFC 2222, section 7.2.1 + try: + # Exchange tokens until authentication either succeeds or fails + client_ctx = gssapi.SecurityContext(name=gssapi_name, usage='initiate') + received_token = None + while not client_ctx.complete: + # calculate an output token from kafka token (or None if first iteration) + output_token = client_ctx.step(received_token) + + # pass output token to kafka, or send empty response if the security + # context is complete (output token is None in that case) + if output_token is None: + self._send_bytes_blocking(Int32.encode(0)) + else: + msg = output_token + size = Int32.encode(len(msg)) + self._send_bytes_blocking(size + msg) + + # The server will send a token back. Processing of this token either + # establishes a security context, or it needs further token exchange. + # The gssapi will be able to identify the needed next step. + # The connection is closed on failure. + header = self._recv_bytes_blocking(4) + (token_size,) = struct.unpack('>i', header) + received_token = self._recv_bytes_blocking(token_size) + + # Process the security layer negotiation token, sent by the server + # once the security context is established. + + # unwraps message containing supported protection levels and msg size + msg = client_ctx.unwrap(received_token).message + # Kafka currently doesn't support integrity or confidentiality security layers, so we + # simply set QoP to 'auth' only (first octet). We reuse the max message size proposed + # by the server + msg = Int8.encode(SASL_QOP_AUTH & Int8.decode(io.BytesIO(msg[0:1]))) + msg[1:] + # add authorization identity to the response, GSS-wrap and send it + msg = client_ctx.wrap(msg + auth_id.encode(), False).message size = Int32.encode(len(msg)) self._send_bytes_blocking(size + msg) - # The server will send a token back. Processing of this token either - # establishes a security context, or it needs further token exchange. - # The gssapi will be able to identify the needed next step. - # The connection is closed on failure. - header = self._recv_bytes_blocking(4) - (token_size,) = struct.unpack('>i', header) - received_token = self._recv_bytes_blocking(token_size) - - # Process the security layer negotiation token, sent by the server - # once the security context is established. - - # unwraps message containing supported protection levels and msg size - msg = client_ctx.unwrap(received_token).message - # Kafka currently doesn't support integrity or confidentiality security layers, so we - # simply set QoP to 'auth' only (first octet). We reuse the max message size proposed - # by the server - msg = Int8.encode(SASL_QOP_AUTH & Int8.decode(io.BytesIO(msg[0:1]))) + msg[1:] - # add authorization identity to the response, GSS-wrap and send it - msg = client_ctx.wrap(msg + auth_id.encode(), False).message - size = Int32.encode(len(msg)) - self._send_bytes_blocking(size + msg) - - except ConnectionError as e: - self._lock.release() - log.exception("%s: Error receiving reply from server", self) - error = Errors.KafkaConnectionError("%s: %s" % (self, e)) - self.close(error=error) - return future.failure(error) - except Exception as e: - self._lock.release() - return future.failure(e) + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error receiving reply from server", self) + err = Errors.KafkaConnectionError("%s: %s" % (self, e)) + close = True + except Exception as e: + err = e + close = True + + if err is not None: + if close: + self.close(error=err) + return future.failure(err) - self._lock.release() log.info('%s: Authenticated as %s via GSSAPI', self, gssapi_name) return future.success(True) @@ -684,25 +730,31 @@ def _try_authenticate_oauth(self, future): msg = bytes(self._build_oauth_client_request().encode("utf-8")) size = Int32.encode(len(msg)) - self._lock.acquire() - if not self._can_send_recv(): - return future.failure(Errors.NodeNotReadyError(str(self))) - try: - # Send SASL OAuthBearer request with OAuth token - self._send_bytes_blocking(size + msg) - # The server will send a zero sized message (that is Int32(0)) on success. - # The connection is closed on failure - data = self._recv_bytes_blocking(4) + err = None + close = False + with self._lock: + if not self._can_send_recv(): + err = Errors.NodeNotReadyError(str(self)) + close = False + else: + try: + # Send SASL OAuthBearer request with OAuth token + self._send_bytes_blocking(size + msg) + + # The server will send a zero sized message (that is Int32(0)) on success. + # The connection is closed on failure + data = self._recv_bytes_blocking(4) - except ConnectionError as e: - self._lock.release() - log.exception("%s: Error receiving reply from server", self) - error = Errors.KafkaConnectionError("%s: %s" % (self, e)) - self.close(error=error) - return future.failure(error) + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error receiving reply from server", self) + err = Errors.KafkaConnectionError("%s: %s" % (self, e)) + close = True - self._lock.release() + if err is not None: + if close: + self.close(error=err) + return future.failure(err) if data != b'\x00\x00\x00\x00': error = Errors.AuthenticationFailedError('Unrecognized response during authentication') @@ -744,16 +796,16 @@ def connection_delay(self): """ Return the number of milliseconds to wait, based on the connection state, before attempting to send data. When disconnected, this respects - the reconnect backoff time. When connecting, returns 0 to allow - non-blocking connect to finish. When connected, returns a very large - number to handle slow/stalled connections. + the reconnect backoff time. When connecting or connected, returns a very + large number to handle slow/stalled connections. """ time_waited = time.time() - (self.last_attempt or 0) if self.state is ConnectionStates.DISCONNECTED: return max(self._reconnect_backoff - time_waited, 0) * 1000 - elif self.connecting(): - return 0 else: + # When connecting or connected, we should be able to delay + # indefinitely since other events (connection or data acked) will + # cause a wakeup once data can be sent. return float('inf') def connected(self): @@ -814,6 +866,7 @@ def close(self, error=None): self._protocol = KafkaProtocol( client_id=self.config['client_id'], api_version=self.config['api_version']) + self._send_buffer = b'' if error is None: error = Errors.Cancelled(str(self)) ifrs = list(self.in_flight_requests.items()) @@ -853,6 +906,9 @@ def _send(self, request, blocking=True): future = Future() with self._lock: if not self._can_send_recv(): + # In this case, since we created the future above, + # we know there are no callbacks/errbacks that could fire w/ + # lock. So failing + returning inline should be safe return future.failure(Errors.NodeNotReadyError(str(self))) correlation_id = self._protocol.send_request(request) @@ -873,24 +929,60 @@ def _send(self, request, blocking=True): return future def send_pending_requests(self): - """Can block on network if request is larger than send_buffer_bytes""" + """Attempts to send pending requests messages via blocking IO + If all requests have been sent, return True + Otherwise, if the socket is blocked and there are more bytes to send, + return False. + """ try: with self._lock: if not self._can_send_recv(): - return Errors.NodeNotReadyError(str(self)) - # In the future we might manage an internal write buffer - # and send bytes asynchronously. For now, just block - # sending each request payload + return False data = self._protocol.send_bytes() total_bytes = self._send_bytes_blocking(data) + if self._sensors: self._sensors.bytes_sent.record(total_bytes) - return total_bytes - except ConnectionError as e: + return True + + except (ConnectionError, TimeoutError) as e: log.exception("Error sending request data to %s", self) error = Errors.KafkaConnectionError("%s: %s" % (self, e)) self.close(error=error) - return error + return False + + def send_pending_requests_v2(self): + """Attempts to send pending requests messages via non-blocking IO + If all requests have been sent, return True + Otherwise, if the socket is blocked and there are more bytes to send, + return False. + """ + try: + with self._lock: + if not self._can_send_recv(): + return False + + # _protocol.send_bytes returns encoded requests to send + # we send them via _send_bytes() + # and hold leftover bytes in _send_buffer + if not self._send_buffer: + self._send_buffer = self._protocol.send_bytes() + + total_bytes = 0 + if self._send_buffer: + total_bytes = self._send_bytes(self._send_buffer) + self._send_buffer = self._send_buffer[total_bytes:] + + if self._sensors: + self._sensors.bytes_sent.record(total_bytes) + # Return True iff send buffer is empty + return len(self._send_buffer) == 0 + + except (ConnectionError, TimeoutError, Exception) as e: + log.exception("Error sending request data to %s", self) + error = Errors.KafkaConnectionError("%s: %s" % (self, e)) + self.close(error=error) + return False def can_send_more(self): """Return True unless there are max_in_flight_requests_per_connection.""" @@ -911,7 +1003,7 @@ def recv(self): self.config['request_timeout_ms'])) return () - # augment respones w/ correlation_id, future, and timestamp + # augment responses w/ correlation_id, future, and timestamp for i, (correlation_id, response) in enumerate(responses): try: with self._lock: @@ -931,56 +1023,57 @@ def recv(self): def _recv(self): """Take all available bytes from socket, return list of any responses from parser""" recvd = [] - self._lock.acquire() - if not self._can_send_recv(): - log.warning('%s cannot recv: socket not connected', self) - self._lock.release() - return () - - while len(recvd) < self.config['sock_chunk_buffer_count']: - try: - data = self._sock.recv(self.config['sock_chunk_bytes']) - # We expect socket.recv to raise an exception if there are no - # bytes available to read from the socket in non-blocking mode. - # but if the socket is disconnected, we will get empty data - # without an exception raised - if not data: - log.error('%s: socket disconnected', self) - self._lock.release() - self.close(error=Errors.KafkaConnectionError('socket disconnected')) - return [] - else: - recvd.append(data) + err = None + with self._lock: + if not self._can_send_recv(): + log.warning('%s cannot recv: socket not connected', self) + return () - except SSLWantReadError: - break - except ConnectionError as e: - if six.PY2 and e.errno == errno.EWOULDBLOCK: + while len(recvd) < self.config['sock_chunk_buffer_count']: + try: + data = self._sock.recv(self.config['sock_chunk_bytes']) + # We expect socket.recv to raise an exception if there are no + # bytes available to read from the socket in non-blocking mode. + # but if the socket is disconnected, we will get empty data + # without an exception raised + if not data: + log.error('%s: socket disconnected', self) + err = Errors.KafkaConnectionError('socket disconnected') + break + else: + recvd.append(data) + + except (SSLWantReadError, SSLWantWriteError): break - log.exception('%s: Error receiving network data' - ' closing socket', self) - self._lock.release() - self.close(error=Errors.KafkaConnectionError(e)) - return [] - except BlockingIOError: - if six.PY3: + except (ConnectionError, TimeoutError) as e: + if six.PY2 and e.errno == errno.EWOULDBLOCK: + break + log.exception('%s: Error receiving network data' + ' closing socket', self) + err = Errors.KafkaConnectionError(e) break - self._lock.release() - raise - - recvd_data = b''.join(recvd) - if self._sensors: - self._sensors.bytes_received.record(len(recvd_data)) - - try: - responses = self._protocol.receive_bytes(recvd_data) - except Errors.KafkaProtocolError as e: - self._lock.release() - self.close(e) - return [] - else: - self._lock.release() - return responses + except BlockingIOError: + if six.PY3: + break + # For PY2 this is a catchall and should be re-raised + raise + + # Only process bytes if there was no connection exception + if err is None: + recvd_data = b''.join(recvd) + if self._sensors: + self._sensors.bytes_received.record(len(recvd_data)) + + # We need to keep the lock through protocol receipt + # so that we ensure that the processed byte order is the + # same as the received byte order + try: + return self._protocol.receive_bytes(recvd_data) + except Errors.KafkaProtocolError as e: + err = e + + self.close(error=err) + return () def requests_timed_out(self): with self._lock: diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index 36e269f19..5434c36a2 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -123,7 +123,7 @@ def send_fetches(self): for node_id, request in six.iteritems(self._create_fetch_requests()): if self._client.ready(node_id): log.debug("Sending FetchRequest to node %s", node_id) - future = self._client.send(node_id, request) + future = self._client.send(node_id, request, wakeup=False) future.add_callback(self._handle_fetch_response, request, time.time()) future.add_errback(log.error, 'Fetch to node %s failed: %s', node_id) futures.append(future) @@ -235,14 +235,16 @@ def _reset_offset(self, partition): log.debug("Resetting offset for partition %s to %s offset.", partition, strategy) offsets = self._retrieve_offsets({partition: timestamp}) - if partition not in offsets: - raise NoOffsetForPartitionError(partition) - offset = offsets[partition][0] - # we might lose the assignment while fetching the offset, - # so check it is still active - if self._subscriptions.is_assigned(partition): - self._subscriptions.seek(partition, offset) + if partition in offsets: + offset = offsets[partition][0] + + # we might lose the assignment while fetching the offset, + # so check it is still active + if self._subscriptions.is_assigned(partition): + self._subscriptions.seek(partition, offset) + else: + log.debug("Could not find offset for partition %s since it is probably deleted" % (partition,)) def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): """Fetch offset for each partition passed in ``timestamps`` map. @@ -266,7 +268,11 @@ def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): start_time = time.time() remaining_ms = timeout_ms + timestamps = copy.copy(timestamps) while remaining_ms > 0: + if not timestamps: + return {} + future = self._send_offset_requests(timestamps) self._client.poll(future=future, timeout_ms=remaining_ms) @@ -283,6 +289,15 @@ def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): if future.exception.invalid_metadata: refresh_future = self._client.cluster.request_update() self._client.poll(future=refresh_future, timeout_ms=remaining_ms) + + # Issue #1780 + # Recheck partition existance after after a successful metadata refresh + if refresh_future.succeeded() and isinstance(future.exception, Errors.StaleMetadata): + log.debug("Stale metadata was raised, and we now have an updated metadata. Rechecking partition existance") + unknown_partition = future.exception.args[0] # TopicPartition from StaleMetadata + if self._client.cluster.leader_for_partition(unknown_partition) is None: + log.debug("Removed partition %s from offsets retrieval" % (unknown_partition, )) + timestamps.pop(unknown_partition) else: time.sleep(self.config['retry_backoff_ms'] / 1000.0) @@ -292,7 +307,7 @@ def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): raise Errors.KafkaTimeoutError( "Failed to get offsets by timestamps in %s ms" % (timeout_ms,)) - def fetched_records(self, max_records=None): + def fetched_records(self, max_records=None, update_offsets=True): """Returns previously fetched records and updates consumed offsets. Arguments: @@ -330,10 +345,11 @@ def fetched_records(self, max_records=None): else: records_remaining -= self._append(drained, self._next_partition_records, - records_remaining) + records_remaining, + update_offsets) return dict(drained), bool(self._completed_fetches) - def _append(self, drained, part, max_records): + def _append(self, drained, part, max_records, update_offsets): if not part: return 0 @@ -366,7 +382,8 @@ def _append(self, drained, part, max_records): for record in part_records: drained[tp].append(record) - self._subscriptions.assignment[tp].position = next_offset + if update_offsets: + self._subscriptions.assignment[tp].position = next_offset return len(part_records) else: diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 3195b1b4a..15c2905d5 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -209,7 +209,7 @@ class KafkaConsumer(six.Iterator): Default: None api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker - api version. Only applies if api_version set to 'auto' + api version. Only applies if api_version set to None. connections_max_idle_ms: Close idle connections after the number of milliseconds specified by this config. The broker closes idle connections after connections.max.idle.ms, so this avoids hitting @@ -245,7 +245,7 @@ class KafkaConsumer(six.Iterator): Note: Configuration parameters are described in more detail at - https://kafka.apache.org/documentation/#newconsumerconfigs + https://kafka.apache.org/documentation/#consumerconfigs """ DEFAULT_CONFIG = { 'bootstrap_servers': 'localhost', @@ -302,7 +302,8 @@ class KafkaConsumer(six.Iterator): 'sasl_plain_password': None, 'sasl_kerberos_service_name': 'kafka', 'sasl_kerberos_domain_name': None, - 'sasl_oauth_token_provider': None + 'sasl_oauth_token_provider': None, + 'legacy_iterator': False, # enable to revert to < 1.4.7 iterator } DEFAULT_SESSION_TIMEOUT_MS_0_9 = 30000 @@ -390,6 +391,10 @@ def __init__(self, *topics, **configs): self._subscription.subscribe(topics=topics) self._client.set_topics(topics) + def bootstrap_connected(self): + """Return True if the bootstrap is connected.""" + return self._client.bootstrap_connected() + def assign(self, partitions): """Manually assign a list of TopicPartitions to this consumer. @@ -597,7 +602,7 @@ def partitions_for_topic(self, topic): partitions = cluster.partitions_for_topic(topic) return partitions - def poll(self, timeout_ms=0, max_records=None): + def poll(self, timeout_ms=0, max_records=None, update_offsets=True): """Fetch data from assigned topics / partitions. Records are fetched and returned in batches by topic-partition. @@ -621,6 +626,12 @@ def poll(self, timeout_ms=0, max_records=None): dict: Topic to list of records since the last fetch for the subscribed list of topics and partitions. """ + # Note: update_offsets is an internal-use only argument. It is used to + # support the python iterator interface, and which wraps consumer.poll() + # and requires that the partition offsets tracked by the fetcher are not + # updated until the iterator returns each record to the user. As such, + # the argument is not documented and should not be relied on by library + # users to not break in the future. assert timeout_ms >= 0, 'Timeout must not be negative' if max_records is None: max_records = self.config['max_poll_records'] @@ -631,7 +642,7 @@ def poll(self, timeout_ms=0, max_records=None): start = time.time() remaining = timeout_ms while True: - records = self._poll_once(remaining, max_records) + records = self._poll_once(remaining, max_records, update_offsets=update_offsets) if records: return records @@ -641,7 +652,7 @@ def poll(self, timeout_ms=0, max_records=None): if remaining <= 0: return {} - def _poll_once(self, timeout_ms, max_records): + def _poll_once(self, timeout_ms, max_records, update_offsets=True): """Do one round of polling. In addition to checking for new data, this does any needed heart-beating, auto-commits, and offset updates. @@ -660,18 +671,22 @@ def _poll_once(self, timeout_ms, max_records): # If data is available already, e.g. from a previous network client # poll() call to commit, then just return it immediately - records, partial = self._fetcher.fetched_records(max_records) + records, partial = self._fetcher.fetched_records(max_records, update_offsets=update_offsets) if records: # Before returning the fetched records, we can send off the # next round of fetches and avoid block waiting for their # responses to enable pipelining while the user is handling the # fetched records. if not partial: - self._fetcher.send_fetches() + futures = self._fetcher.send_fetches() + if len(futures): + self._client.poll(timeout_ms=0) return records # Send any new fetches (won't resend pending fetches) - self._fetcher.send_fetches() + futures = self._fetcher.send_fetches() + if len(futures): + self._client.poll(timeout_ms=0) timeout_ms = min(timeout_ms, self._coordinator.time_to_next_poll() * 1000) self._client.poll(timeout_ms=timeout_ms) @@ -680,7 +695,7 @@ def _poll_once(self, timeout_ms, max_records): if self._coordinator.need_rejoin(): return {} - records, _ = self._fetcher.fetched_records(max_records) + records, _ = self._fetcher.fetched_records(max_records, update_offsets=update_offsets) return records def position(self, partition): @@ -743,6 +758,9 @@ def pause(self, *partitions): for partition in partitions: log.debug("Pausing partition %s", partition) self._subscription.pause(partition) + # Because the iterator checks is_fetchable() on each iteration + # we expect pauses to get handled automatically and therefore + # we do not need to reset the full iterator (forcing a full refetch) def paused(self): """Get the partitions that were previously paused using @@ -790,6 +808,8 @@ def seek(self, partition, offset): assert partition in self._subscription.assigned_partitions(), 'Unassigned partition' log.debug("Seeking to offset %s for partition %s", offset, partition) self._subscription.assignment[partition].seek(offset) + if not self.config['legacy_iterator']: + self._iterator = None def seek_to_beginning(self, *partitions): """Seek to the oldest available offset for partitions. @@ -814,6 +834,8 @@ def seek_to_beginning(self, *partitions): for tp in partitions: log.debug("Seeking to beginning of partition %s", tp) self._subscription.need_offset_reset(tp, OffsetResetStrategy.EARLIEST) + if not self.config['legacy_iterator']: + self._iterator = None def seek_to_end(self, *partitions): """Seek to the most recent available offset for partitions. @@ -838,6 +860,8 @@ def seek_to_end(self, *partitions): for tp in partitions: log.debug("Seeking to end of partition %s", tp) self._subscription.need_offset_reset(tp, OffsetResetStrategy.LATEST) + if not self.config['legacy_iterator']: + self._iterator = None def subscribe(self, topics=(), pattern=None, listener=None): """Subscribe to a list of topics, or a topic regex pattern. @@ -913,12 +937,14 @@ def unsubscribe(self): self._client.cluster.need_all_topic_metadata = False self._client.set_topics([]) log.debug("Unsubscribed all topics or patterns and assigned partitions") + if not self.config['legacy_iterator']: + self._iterator = None def metrics(self, raw=False): """Get metrics on consumer performance. This is ported from the Java Consumer, for details see: - https://kafka.apache.org/documentation/#new_consumer_monitoring + https://kafka.apache.org/documentation/#consumer_monitoring Warning: This is an unstable interface. It may change in future @@ -1075,6 +1101,25 @@ def _update_fetch_positions(self, partitions): # Then, do any offset lookups in case some positions are not known self._fetcher.update_fetch_positions(partitions) + def _message_generator_v2(self): + timeout_ms = 1000 * (self._consumer_timeout - time.time()) + record_map = self.poll(timeout_ms=timeout_ms, update_offsets=False) + for tp, records in six.iteritems(record_map): + # Generators are stateful, and it is possible that the tp / records + # here may become stale during iteration -- i.e., we seek to a + # different offset, pause consumption, or lose assignment. + for record in records: + # is_fetchable(tp) should handle assignment changes and offset + # resets; for all other changes (e.g., seeks) we'll rely on the + # outer function destroying the existing iterator/generator + # via self._iterator = None + if not self._subscription.is_fetchable(tp): + log.debug("Not returning fetched records for partition %s" + " since it is no longer fetchable", tp) + break + self._subscription.assignment[tp].position = record.offset + 1 + yield record + def _message_generator(self): assert self.assignment() or self.subscription() is not None, 'No topic subscription or manual partition assignment' while time.time() < self._consumer_timeout: @@ -1086,9 +1131,7 @@ def _message_generator(self): partitions = self._subscription.missing_fetch_positions() self._update_fetch_positions(partitions) - poll_ms = 1000 * (self._consumer_timeout - time.time()) - if not self._fetcher.in_flight_fetches(): - poll_ms = min(poll_ms, self.config['reconnect_backoff_ms']) + poll_ms = min((1000 * (self._consumer_timeout - time.time())), self.config['retry_backoff_ms']) self._client.poll(timeout_ms=poll_ms) # after the long poll, we should check whether the group needs to rebalance @@ -1129,6 +1172,26 @@ def __iter__(self): # pylint: disable=non-iterator-returned return self def __next__(self): + # Now that the heartbeat thread runs in the background + # there should be no reason to maintain a separate iterator + # but we'll keep it available for a few releases just in case + if self.config['legacy_iterator']: + return self.next_v1() + else: + return self.next_v2() + + def next_v2(self): + self._set_consumer_timeout() + while time.time() < self._consumer_timeout: + if not self._iterator: + self._iterator = self._message_generator_v2() + try: + return next(self._iterator) + except StopIteration: + self._iterator = None + raise StopIteration() + + def next_v1(self): if not self._iterator: self._iterator = self._message_generator() diff --git a/kafka/consumer/subscription_state.py b/kafka/consumer/subscription_state.py index ef501661a..76a6c5022 100644 --- a/kafka/consumer/subscription_state.py +++ b/kafka/consumer/subscription_state.py @@ -148,7 +148,7 @@ def change_subscription(self, topics): topics (list of str): topics for subscription Raises: - IllegalStateErrror: if assign_from_user has been used already + IllegalStateError: if assign_from_user has been used already TypeError: if a topic is None or a non-str ValueError: if a topic is an empty string or - a topic name is '.' or '..' or diff --git a/kafka/coordinator/assignors/range.py b/kafka/coordinator/assignors/range.py index c232d9e41..299e39c48 100644 --- a/kafka/coordinator/assignors/range.py +++ b/kafka/coordinator/assignors/range.py @@ -46,20 +46,18 @@ def assign(cls, cluster, member_metadata): if partitions is None: log.warning('No partition metadata for topic %s', topic) continue - partitions = sorted(list(partitions)) - partitions_for_topic = len(partitions) + partitions = sorted(partitions) consumers_for_topic.sort() partitions_per_consumer = len(partitions) // len(consumers_for_topic) consumers_with_extra = len(partitions) % len(consumers_for_topic) - for i in range(len(consumers_for_topic)): + for i, member in enumerate(consumers_for_topic): start = partitions_per_consumer * i start += min(i, consumers_with_extra) length = partitions_per_consumer if not i + 1 > consumers_with_extra: length += 1 - member = consumers_for_topic[i] assignment[member][topic] = partitions[start:start+length] protocol_assignment = {} diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index e538fda33..700c31ff6 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -243,7 +243,7 @@ def ensure_coordinator_ready(self): """Block until the coordinator for this group is known (and we have an active connection -- java client uses unsent queue). """ - with self._client._lock, self._lock: + with self._lock: while self.coordinator_unknown(): # Prior to 0.8.2 there was no group coordinator @@ -273,7 +273,7 @@ def _reset_find_coordinator_future(self, result): self._find_coordinator_future = None def lookup_coordinator(self): - with self._client._lock, self._lock: + with self._lock: if self._find_coordinator_future is not None: return self._find_coordinator_future @@ -321,10 +321,14 @@ def poll_heartbeat(self): self.heartbeat.poll() def time_to_next_heartbeat(self): + """Returns seconds (float) remaining before next heartbeat should be sent + + Note: Returns infinite if group is not joined + """ with self._lock: # if we have not joined the group, we don't need to send heartbeats if self.state is MemberState.UNJOINED: - return sys.maxsize + return float('inf') return self.heartbeat.time_to_next_heartbeat() def _handle_join_success(self, member_assignment_bytes): @@ -342,7 +346,7 @@ def _handle_join_failure(self, _): def ensure_active_group(self): """Ensure that the group is active (i.e. joined and synced)""" - with self._client._lock, self._lock: + with self._lock: if self._heartbeat_thread is None: self._start_heartbeat_thread() @@ -500,7 +504,7 @@ def _handle_join_group_response(self, future, send_time, response): log.debug("Received successful JoinGroup response for group %s: %s", self.group_id, response) self.sensors.join_latency.record((time.time() - send_time) * 1000) - with self._client._lock, self._lock: + with self._lock: if self.state is not MemberState.REBALANCING: # if the consumer was woken up before a rebalance completes, # we may have already left the group. In this case, we do @@ -675,15 +679,15 @@ def _handle_group_coordinator_response(self, future, response): error_type = Errors.for_code(response.error_code) if error_type is Errors.NoError: - with self._client._lock, self._lock: - ok = self._client.cluster.add_group_coordinator(self.group_id, response) - if not ok: + with self._lock: + coordinator_id = self._client.cluster.add_group_coordinator(self.group_id, response) + if not coordinator_id: # This could happen if coordinator metadata is different # than broker metadata future.failure(Errors.IllegalStateError()) return - self.coordinator_id = response.coordinator_id + self.coordinator_id = coordinator_id log.info("Discovered coordinator %s for group %s", self.coordinator_id, self.group_id) self._client.maybe_connect(self.coordinator_id) @@ -757,7 +761,7 @@ def close(self): def maybe_leave_group(self): """Leave the current group and reset local generation/memberId.""" - with self._client._lock, self._lock: + with self._lock: if (not self.coordinator_unknown() and self.state is not MemberState.UNJOINED and self._generation is not Generation.NO_GENERATION): @@ -955,46 +959,40 @@ def _run_once(self): self.disable() return - # TODO: When consumer.wakeup() is implemented, we need to - # disable here to prevent propagating an exception to this - # heartbeat thread - # - # Release coordinator lock during client poll to avoid deadlocks - # if/when connection errback needs coordinator lock - self.coordinator._client.poll(timeout_ms=0) - - if self.coordinator.coordinator_unknown(): - future = self.coordinator.lookup_coordinator() - if not future.is_done or future.failed(): - # the immediate future check ensures that we backoff - # properly in the case that no brokers are available - # to connect to (and the future is automatically failed). - with self.coordinator._lock: + # TODO: When consumer.wakeup() is implemented, we need to + # disable here to prevent propagating an exception to this + # heartbeat thread + self.coordinator._client.poll(timeout_ms=0) + + if self.coordinator.coordinator_unknown(): + future = self.coordinator.lookup_coordinator() + if not future.is_done or future.failed(): + # the immediate future check ensures that we backoff + # properly in the case that no brokers are available + # to connect to (and the future is automatically failed). self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) - elif self.coordinator.heartbeat.session_timeout_expired(): - # the session timeout has expired without seeing a - # successful heartbeat, so we should probably make sure - # the coordinator is still healthy. - log.warning('Heartbeat session expired, marking coordinator dead') - self.coordinator.coordinator_dead('Heartbeat session expired') - - elif self.coordinator.heartbeat.poll_timeout_expired(): - # the poll timeout has expired, which means that the - # foreground thread has stalled in between calls to - # poll(), so we explicitly leave the group. - log.warning('Heartbeat poll expired, leaving group') - self.coordinator.maybe_leave_group() - - elif not self.coordinator.heartbeat.should_heartbeat(): - # poll again after waiting for the retry backoff in case - # the heartbeat failed or the coordinator disconnected - log.log(0, 'Not ready to heartbeat, waiting') - with self.coordinator._lock: + elif self.coordinator.heartbeat.session_timeout_expired(): + # the session timeout has expired without seeing a + # successful heartbeat, so we should probably make sure + # the coordinator is still healthy. + log.warning('Heartbeat session expired, marking coordinator dead') + self.coordinator.coordinator_dead('Heartbeat session expired') + + elif self.coordinator.heartbeat.poll_timeout_expired(): + # the poll timeout has expired, which means that the + # foreground thread has stalled in between calls to + # poll(), so we explicitly leave the group. + log.warning('Heartbeat poll expired, leaving group') + self.coordinator.maybe_leave_group() + + elif not self.coordinator.heartbeat.should_heartbeat(): + # poll again after waiting for the retry backoff in case + # the heartbeat failed or the coordinator disconnected + log.log(0, 'Not ready to heartbeat, waiting') self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) - else: - with self.coordinator._client._lock, self.coordinator._lock: + else: self.coordinator.heartbeat.sent_heartbeat() future = self.coordinator._send_heartbeat_request() future.add_callback(self._handle_heartbeat_success) diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py index b575664b2..9b7a3cddd 100644 --- a/kafka/coordinator/consumer.py +++ b/kafka/coordinator/consumer.py @@ -225,7 +225,11 @@ def _on_join_complete(self, generation, member_id, protocol, self._subscription.needs_fetch_committed_offsets = True # update partition assignment - self._subscription.assign_from_subscribed(assignment.partitions()) + try: + self._subscription.assign_from_subscribed(assignment.partitions()) + except ValueError as e: + log.warning("%s. Probably due to a deleted topic. Requesting Re-join" % e) + self.request_rejoin() # give the assignor a chance to update internal state # based on the received assignment @@ -256,7 +260,7 @@ def poll(self): ensures that the consumer has joined the group. This also handles periodic offset commits if they are enabled. """ - if self.group_id is None or self.config['api_version'] < (0, 8, 2): + if self.group_id is None: return self._invoke_completed_offset_commit_callbacks() diff --git a/kafka/errors.py b/kafka/errors.py index a3609ba38..6150712c6 100644 --- a/kafka/errors.py +++ b/kafka/errors.py @@ -443,6 +443,12 @@ class PolicyViolationError(BrokerResponseError): description = 'Request parameters do not satisfy the configured policy.' +class SecurityDisabledError(BrokerResponseError): + errno = 54 + message = 'SECURITY_DISABLED' + description = 'Security features are disabled.' + + class KafkaUnavailableError(KafkaError): pass diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index 2a306e0c1..3ff1a0913 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -255,7 +255,7 @@ class KafkaProducer(object): various APIs. Example: (0, 10, 2). Default: None api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker - api version. Only applies if api_version set to 'auto' + api version. Only applies if api_version set to None. metric_reporters (list): A list of classes to use as metrics reporters. Implementing the AbstractMetricsReporter interface allows plugging in classes that will be notified of new metric creation. Default: [] @@ -412,6 +412,10 @@ def __init__(self, **configs): atexit.register(self._cleanup) log.debug("Kafka producer started") + def bootstrap_connected(self): + """Return True if the bootstrap is connected.""" + return self._sender.bootstrap_connected() + def _cleanup_factory(self): """Build a cleanup clojure that doesn't increase our ref count""" _self = weakref.proxy(self) @@ -464,7 +468,6 @@ def close(self, timeout=None): assert timeout >= 0 log.info("Closing the Kafka producer with %s secs timeout.", timeout) - #first_exception = AtomicReference() # this will keep track of the first encountered exception invoked_from_callback = bool(threading.current_thread() is self._sender) if timeout > 0: if invoked_from_callback: diff --git a/kafka/producer/sender.py b/kafka/producer/sender.py index 064fee410..705b58f9a 100644 --- a/kafka/producer/sender.py +++ b/kafka/producer/sender.py @@ -157,7 +157,7 @@ def run_once(self): # difference between now and its linger expiry time; otherwise the # select time will be the time difference between now and the # metadata expiry time - self._client.poll(poll_timeout_ms) + self._client.poll(timeout_ms=poll_timeout_ms) def initiate_close(self): """Start closing the sender (won't complete until all data is sent).""" @@ -315,6 +315,9 @@ def wakeup(self): """Wake up the selector associated with this send thread.""" self._client.wakeup() + def bootstrap_connected(self): + return self._client.bootstrap_connected() + class SenderMetrics(object): diff --git a/kafka/version.py b/kafka/version.py index 484a880e2..1be3a88de 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.6.post2' +__version__ = '1.4.7' diff --git a/requirements-dev.txt b/requirements-dev.txt index 607ff7f56..c39294d4f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -14,5 +14,5 @@ sphinx-rtd-theme==0.2.4 mock==2.0.0 decorator==4.3.0 tox-pip-extensions==1.2.1 -crc32c==1.4 -py==1.6.0 +crc32c==1.7 +py==1.8.0 \ No newline at end of file diff --git a/servers/0.11.0.0/resources/kafka.properties b/servers/0.11.0.0/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/0.11.0.0/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.1/resources/kafka.properties b/servers/0.11.0.1/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/0.11.0.1/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.2/resources/kafka.properties b/servers/0.11.0.2/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/0.11.0.2/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.3/resources/kafka.properties b/servers/0.11.0.3/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/0.11.0.3/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.3/resources/log4j.properties b/servers/0.11.0.3/resources/log4j.properties new file mode 100644 index 000000000..b0b76aa79 --- /dev/null +++ b/servers/0.11.0.3/resources/log4j.properties @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +log4j.rootLogger=INFO, stdout, logfile + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.logfile=org.apache.log4j.FileAppender +log4j.appender.logfile.File=${kafka.logs.dir}/server.log +log4j.appender.logfile.layout=org.apache.log4j.PatternLayout +log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.11.0.3/resources/zookeeper.properties b/servers/0.11.0.3/resources/zookeeper.properties new file mode 100644 index 000000000..e3fd09742 --- /dev/null +++ b/servers/0.11.0.3/resources/zookeeper.properties @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# the directory where the snapshot is stored. +dataDir={tmp_dir} +# the port at which the clients will connect +clientPort={port} +clientPortAddress={host} +# disable the per-ip limit on the number of connections since this is a non-production config +maxClientCnxns=0 diff --git a/servers/1.0.0/resources/kafka.properties b/servers/1.0.0/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/1.0.0/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/1.0.1/resources/kafka.properties b/servers/1.0.1/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/1.0.1/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/1.0.2/resources/kafka.properties b/servers/1.0.2/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/1.0.2/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/1.1.0/resources/kafka.properties b/servers/1.1.0/resources/kafka.properties index 28668db95..630dbc5fa 100644 --- a/servers/1.1.0/resources/kafka.properties +++ b/servers/1.1.0/resources/kafka.properties @@ -30,6 +30,9 @@ ssl.key.password=foobar ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks ssl.truststore.password=foobar +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + # The port the socket server listens on #port=9092 diff --git a/servers/1.1.1/resources/kafka.properties b/servers/1.1.1/resources/kafka.properties index 64f94d528..fe6a89f4a 100644 --- a/servers/1.1.1/resources/kafka.properties +++ b/servers/1.1.1/resources/kafka.properties @@ -30,6 +30,9 @@ ssl.key.password=foobar ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks ssl.truststore.password=foobar +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + # List of enabled mechanisms, can be more than one sasl.enabled.mechanisms=PLAIN sasl.mechanism.inter.broker.protocol=PLAIN diff --git a/servers/2.0.0/resources/kafka.properties b/servers/2.0.0/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/2.0.0/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/2.0.1/resources/kafka.properties b/servers/2.0.1/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/2.0.1/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/2.1.0/resources/kafka.properties b/servers/2.1.0/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/2.1.0/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/2.1.0/resources/log4j.properties b/servers/2.1.0/resources/log4j.properties new file mode 100644 index 000000000..b0b76aa79 --- /dev/null +++ b/servers/2.1.0/resources/log4j.properties @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +log4j.rootLogger=INFO, stdout, logfile + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.logfile=org.apache.log4j.FileAppender +log4j.appender.logfile.File=${kafka.logs.dir}/server.log +log4j.appender.logfile.layout=org.apache.log4j.PatternLayout +log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/2.1.0/resources/zookeeper.properties b/servers/2.1.0/resources/zookeeper.properties new file mode 100644 index 000000000..e3fd09742 --- /dev/null +++ b/servers/2.1.0/resources/zookeeper.properties @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# the directory where the snapshot is stored. +dataDir={tmp_dir} +# the port at which the clients will connect +clientPort={port} +clientPortAddress={host} +# disable the per-ip limit on the number of connections since this is a non-production config +maxClientCnxns=0 diff --git a/servers/2.1.1/resources/kafka.properties b/servers/2.1.1/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/2.1.1/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/2.1.1/resources/log4j.properties b/servers/2.1.1/resources/log4j.properties new file mode 100644 index 000000000..b0b76aa79 --- /dev/null +++ b/servers/2.1.1/resources/log4j.properties @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +log4j.rootLogger=INFO, stdout, logfile + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.logfile=org.apache.log4j.FileAppender +log4j.appender.logfile.File=${kafka.logs.dir}/server.log +log4j.appender.logfile.layout=org.apache.log4j.PatternLayout +log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/2.1.1/resources/zookeeper.properties b/servers/2.1.1/resources/zookeeper.properties new file mode 100644 index 000000000..e3fd09742 --- /dev/null +++ b/servers/2.1.1/resources/zookeeper.properties @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# the directory where the snapshot is stored. +dataDir={tmp_dir} +# the port at which the clients will connect +clientPort={port} +clientPortAddress={host} +# disable the per-ip limit on the number of connections since this is a non-production config +maxClientCnxns=0 diff --git a/servers/2.2.0/resources/kafka.properties b/servers/2.2.0/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/2.2.0/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/2.2.0/resources/log4j.properties b/servers/2.2.0/resources/log4j.properties new file mode 100644 index 000000000..b0b76aa79 --- /dev/null +++ b/servers/2.2.0/resources/log4j.properties @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +log4j.rootLogger=INFO, stdout, logfile + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.logfile=org.apache.log4j.FileAppender +log4j.appender.logfile.File=${kafka.logs.dir}/server.log +log4j.appender.logfile.layout=org.apache.log4j.PatternLayout +log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/2.2.0/resources/zookeeper.properties b/servers/2.2.0/resources/zookeeper.properties new file mode 100644 index 000000000..e3fd09742 --- /dev/null +++ b/servers/2.2.0/resources/zookeeper.properties @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# the directory where the snapshot is stored. +dataDir={tmp_dir} +# the port at which the clients will connect +clientPort={port} +clientPortAddress={host} +# disable the per-ip limit on the number of connections since this is a non-production config +maxClientCnxns=0 diff --git a/servers/2.2.1/resources/kafka.properties b/servers/2.2.1/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/2.2.1/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/2.2.1/resources/log4j.properties b/servers/2.2.1/resources/log4j.properties new file mode 100644 index 000000000..b0b76aa79 --- /dev/null +++ b/servers/2.2.1/resources/log4j.properties @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +log4j.rootLogger=INFO, stdout, logfile + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.logfile=org.apache.log4j.FileAppender +log4j.appender.logfile.File=${kafka.logs.dir}/server.log +log4j.appender.logfile.layout=org.apache.log4j.PatternLayout +log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/2.2.1/resources/zookeeper.properties b/servers/2.2.1/resources/zookeeper.properties new file mode 100644 index 000000000..e3fd09742 --- /dev/null +++ b/servers/2.2.1/resources/zookeeper.properties @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# the directory where the snapshot is stored. +dataDir={tmp_dir} +# the port at which the clients will connect +clientPort={port} +clientPortAddress={host} +# disable the per-ip limit on the number of connections since this is a non-production config +maxClientCnxns=0 diff --git a/servers/2.3.0/resources/kafka.properties b/servers/2.3.0/resources/kafka.properties new file mode 100644 index 000000000..630dbc5fa --- /dev/null +++ b/servers/2.3.0/resources/kafka.properties @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/2.3.0/resources/log4j.properties b/servers/2.3.0/resources/log4j.properties new file mode 100644 index 000000000..b0b76aa79 --- /dev/null +++ b/servers/2.3.0/resources/log4j.properties @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +log4j.rootLogger=INFO, stdout, logfile + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.logfile=org.apache.log4j.FileAppender +log4j.appender.logfile.File=${kafka.logs.dir}/server.log +log4j.appender.logfile.layout=org.apache.log4j.PatternLayout +log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/2.3.0/resources/zookeeper.properties b/servers/2.3.0/resources/zookeeper.properties new file mode 100644 index 000000000..e3fd09742 --- /dev/null +++ b/servers/2.3.0/resources/zookeeper.properties @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# the directory where the snapshot is stored. +dataDir={tmp_dir} +# the port at which the clients will connect +clientPort={port} +clientPortAddress={host} +# disable the per-ip limit on the number of connections since this is a non-production config +maxClientCnxns=0 diff --git a/test/conftest.py b/test/conftest.py index ffaae033b..267ac6aa9 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,14 +1,11 @@ from __future__ import absolute_import -import pytest - -from test.fixtures import KafkaFixture, ZookeeperFixture, random_string, version as kafka_version +import uuid +import pytest -@pytest.fixture(scope="module") -def version(): - """Return the Kafka version set in the OS environment""" - return kafka_version() +from test.testutil import env_kafka_version, random_string +from test.fixtures import KafkaFixture, ZookeeperFixture @pytest.fixture(scope="module") def zookeeper(): @@ -17,15 +14,17 @@ def zookeeper(): yield zk_instance zk_instance.close() + @pytest.fixture(scope="module") def kafka_broker(kafka_broker_factory): """Return a Kafka broker fixture""" return kafka_broker_factory()[0] + @pytest.fixture(scope="module") -def kafka_broker_factory(version, zookeeper): +def kafka_broker_factory(zookeeper): """Return a Kafka broker fixture factory""" - assert version, 'KAFKA_VERSION must be specified to run integration tests' + assert env_kafka_version(), 'KAFKA_VERSION must be specified to run integration tests' _brokers = [] def factory(**broker_params): @@ -42,6 +41,7 @@ def factory(**broker_params): for broker in _brokers: broker.close() + @pytest.fixture def simple_client(kafka_broker, request, topic): """Return a SimpleClient fixture""" @@ -50,6 +50,7 @@ def simple_client(kafka_broker, request, topic): yield client client.close() + @pytest.fixture def kafka_client(kafka_broker, request): """Return a KafkaClient fixture""" @@ -57,11 +58,13 @@ def kafka_client(kafka_broker, request): yield client client.close() + @pytest.fixture def kafka_consumer(kafka_consumer_factory): """Return a KafkaConsumer fixture""" return kafka_consumer_factory() + @pytest.fixture def kafka_consumer_factory(kafka_broker, topic, request): """Return a KafkaConsumer factory fixture""" @@ -79,11 +82,13 @@ def factory(**kafka_consumer_params): if _consumer[0]: _consumer[0].close() + @pytest.fixture def kafka_producer(kafka_producer_factory): """Return a KafkaProducer fixture""" yield kafka_producer_factory() + @pytest.fixture def kafka_producer_factory(kafka_broker, request): """Return a KafkaProduce factory fixture""" @@ -100,6 +105,7 @@ def factory(**kafka_producer_params): if _producer[0]: _producer[0].close() + @pytest.fixture def topic(kafka_broker, request): """Return a topic fixture""" @@ -107,6 +113,7 @@ def topic(kafka_broker, request): kafka_broker.create_topics([topic_name]) return topic_name + @pytest.fixture def conn(mocker): """Return a connection mocker fixture""" @@ -132,3 +139,27 @@ def _set_conn_state(state): conn.connected = lambda: conn.state is ConnectionStates.CONNECTED conn.disconnected = lambda: conn.state is ConnectionStates.DISCONNECTED return conn + + +@pytest.fixture() +def send_messages(topic, kafka_producer, request): + """A factory that returns a send_messages function with a pre-populated + topic topic / producer.""" + + def _send_messages(number_range, partition=0, topic=topic, producer=kafka_producer, request=request): + """ + messages is typically `range(0,100)` + partition is an int + """ + messages_and_futures = [] # [(message, produce_future),] + for i in number_range: + # request.node.name provides the test name (including parametrized values) + encoded_msg = '{}-{}-{}'.format(i, request.node.name, uuid.uuid4()).encode('utf-8') + future = kafka_producer.send(topic, value=encoded_msg, partition=partition) + messages_and_futures.append((encoded_msg, future)) + kafka_producer.flush() + for (msg, f) in messages_and_futures: + assert f.succeeded() + return [msg for (msg, f) in messages_and_futures] + + return _send_messages diff --git a/test/fixtures.py b/test/fixtures.py index dd7d815de..35aade6bd 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -4,9 +4,7 @@ import logging import os import os.path -import random import socket -import string import subprocess import time import uuid @@ -19,20 +17,11 @@ from kafka.client_async import KafkaClient from kafka.protocol.admin import CreateTopicsRequest from kafka.protocol.metadata import MetadataRequest +from test.testutil import env_kafka_version, random_string from test.service import ExternalService, SpawnedService log = logging.getLogger(__name__) -def random_string(length): - return "".join(random.choice(string.ascii_letters) for i in range(length)) - -def version_str_to_list(version_str): - return tuple(map(int, version_str.split('.'))) # e.g., (0, 8, 1, 1) - -def version(): - if 'KAFKA_VERSION' not in os.environ: - return () - return version_str_to_list(os.environ['KAFKA_VERSION']) def get_open_port(): sock = socket.socket() @@ -41,6 +30,7 @@ def get_open_port(): sock.close() return port + def gen_ssl_resources(directory): os.system(""" cd {0} @@ -70,6 +60,7 @@ def gen_ssl_resources(directory): -file cert-signed -storepass foobar -noprompt """.format(directory)) + class Fixture(object): kafka_version = os.environ.get('KAFKA_VERSION', '1.1.0') scala_version = os.environ.get("SCALA_VERSION", '2.11') @@ -154,6 +145,7 @@ def render_template(cls, source_file, target_file, binding): def dump_logs(self): self.child.dump_logs() + class ZookeeperFixture(Fixture): @classmethod def instance(cls): @@ -465,7 +457,7 @@ def _create_topic(self, topic_name, num_partitions, replication_factor, timeout_ num_partitions == self.partitions and \ replication_factor == self.replicas: self._send_request(MetadataRequest[0]([topic_name])) - elif version() >= (0, 10, 1, 0): + elif env_kafka_version() >= (0, 10, 1, 0): request = CreateTopicsRequest[0]([(topic_name, num_partitions, replication_factor, [], [])], timeout_ms) result = self._send_request(request, timeout=timeout_ms) @@ -485,13 +477,13 @@ def _create_topic(self, topic_name, num_partitions, replication_factor, timeout_ '--replication-factor', self.replicas \ if replication_factor is None \ else replication_factor) - if version() >= (0, 10): + if env_kafka_version() >= (0, 10): args.append('--if-not-exists') env = self.kafka_run_class_env() proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() if proc.returncode != 0: - if not 'kafka.common.TopicExistsException' in stdout: + if 'kafka.common.TopicExistsException' not in stdout: self.out("Failed to create topic %s" % (topic_name,)) self.out(stdout) self.out(stderr) diff --git a/test/test_admin.py b/test/test_admin.py index 300d5bced..279f85abf 100644 --- a/test/test_admin.py +++ b/test/test_admin.py @@ -26,6 +26,37 @@ def test_new_partitions(): assert good_partitions.new_assignments == [[1, 2, 3]] +def test_acl_resource(): + good_acl = kafka.admin.ACL( + "User:bar", + "*", + kafka.admin.ACLOperation.ALL, + kafka.admin.ACLPermissionType.ALLOW, + kafka.admin.ResourcePattern( + kafka.admin.ResourceType.TOPIC, + "foo", + kafka.admin.ACLResourcePatternType.LITERAL + ) + ) + + assert(good_acl.resource_pattern.resource_type == kafka.admin.ResourceType.TOPIC) + assert(good_acl.operation == kafka.admin.ACLOperation.ALL) + assert(good_acl.permission_type == kafka.admin.ACLPermissionType.ALLOW) + assert(good_acl.resource_pattern.pattern_type == kafka.admin.ACLResourcePatternType.LITERAL) + + with pytest.raises(IllegalArgumentError): + kafka.admin.ACL( + "User:bar", + "*", + kafka.admin.ACLOperation.ANY, + kafka.admin.ACLPermissionType.ANY, + kafka.admin.ResourcePattern( + kafka.admin.ResourceType.TOPIC, + "foo", + kafka.admin.ACLResourcePatternType.LITERAL + ) + ) + def test_new_topic(): with pytest.raises(IllegalArgumentError): bad_topic = kafka.admin.NewTopic('foo', -1, -1) diff --git a/test/test_admin_client_integration.py b/test/test_admin_client_integration.py index 8f7c05c32..a19cfdda9 100644 --- a/test/test_admin_client_integration.py +++ b/test/test_admin_client_integration.py @@ -1,10 +1,13 @@ import os import time import unittest +import pytest from kafka.admin_client import AdminClient, NewTopic, NewPartitionsInfo from kafka.protocol.metadata import MetadataRequest from test.fixtures import ZookeeperFixture, KafkaFixture -from test.testutil import KafkaIntegrationTestCase, kafka_versions +from test.testutil import KafkaIntegrationTestCase, env_kafka_version + +KAFKA_ADMIN_TIMEOUT_SECONDS = 5 class TestKafkaAdminClientIntegration(KafkaIntegrationTestCase): @@ -24,7 +27,7 @@ def tearDownClass(cls): cls.server.close() cls.zk.close() - @kafka_versions('>=0.10.1') + @pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason='Unsupported Kafka Version') def test_create_delete_topics(self): admin = AdminClient(self.client_async) topic = NewTopic( @@ -33,23 +36,23 @@ def test_create_delete_topics(self): replication_factor=1, ) metadata_request = MetadataRequest[1]() - response = admin.create_topics(topics=[topic], timeout=1) + response = admin.create_topics(topics=[topic], timeout=KAFKA_ADMIN_TIMEOUT_SECONDS) # Error code 7 means that RequestTimedOut but we can safely assume # that topic is created or will be created eventually. # see this https://cwiki.apache.org/confluence/display/KAFKA/ # KIP-4+-+Command+line+and+centralized+administrative+operations self.assertTrue( - response[0].topic_error_codes[0][1] == 0 or - response[0].topic_error_codes[0][1] == 7 + response[0].topic_errors[0][1] == 0 or + response[0].topic_errors[0][1] == 7 ) time.sleep(1) # allows the topic to be created delete_response = admin.delete_topics(['topic'], timeout=1) self.assertTrue( - response[0].topic_error_codes[0][1] == 0 or - response[0].topic_error_codes[0][1] == 7 + response[0].topic_errors[0][1] == 0 or + response[0].topic_errors[0][1] == 7 ) - @kafka_versions('>=1.0.0') + @pytest.mark.skipif(env_kafka_version() < (1, 0, 0), reason='Unsupported Kafka Version') def test_create_partitions(self): admin = AdminClient(self.client_async) topic = NewTopic( @@ -58,7 +61,7 @@ def test_create_partitions(self): replication_factor=1, ) metadata_request = MetadataRequest[1]() - admin.create_topics(topics=[topic], timeout=1) + admin.create_topics(topics=[topic], timeout=KAFKA_ADMIN_TIMEOUT_SECONDS) time.sleep(1) # allows the topic to be created @@ -66,5 +69,6 @@ def test_create_partitions(self): response = admin.create_partitions([new_partitions_info], timeout=1, validate_only=False) self.assertTrue( - response[0].topic_error_codes[0][1] == 0 + response[0].topic_errors[0][1] == 0 or + response[0].topic_errors[0][1] == 7 ) diff --git a/test/test_admin_integration.py b/test/test_admin_integration.py new file mode 100644 index 000000000..2672faa0c --- /dev/null +++ b/test/test_admin_integration.py @@ -0,0 +1,122 @@ +import pytest +import os + +from test.fixtures import ZookeeperFixture, KafkaFixture +from test.testutil import KafkaIntegrationTestCase, env_kafka_version, current_offset + +from kafka.errors import NoError +from kafka.admin import KafkaAdminClient, ACLFilter, ACLOperation, ACLPermissionType, ResourcePattern, ResourceType, ACL + +# This test suite passes for me locally, but fails on travis +# Needs investigation +DISABLED = True + +# TODO: Convert to pytest / fixtures +# Note that ACL features require broker 0.11, but other admin apis may work on +# earlier broker versions +class TestAdminClientIntegration(KafkaIntegrationTestCase): + @classmethod + def setUpClass(cls): # noqa + if env_kafka_version() < (0, 11) or DISABLED: + return + + cls.zk = ZookeeperFixture.instance() + cls.server = KafkaFixture.instance(0, cls.zk) + + @classmethod + def tearDownClass(cls): # noqa + if env_kafka_version() < (0, 11) or DISABLED: + return + + cls.server.close() + cls.zk.close() + + def setUp(self): + if env_kafka_version() < (0, 11) or DISABLED: + self.skipTest('Admin ACL Integration test requires KAFKA_VERSION >= 0.11') + super(TestAdminClientIntegration, self).setUp() + + def tearDown(self): + if env_kafka_version() < (0, 11) or DISABLED: + return + super(TestAdminClientIntegration, self).tearDown() + + def test_create_describe_delete_acls(self): + """Tests that we can add, list and remove ACLs + """ + + # Setup + brokers = '%s:%d' % (self.server.host, self.server.port) + admin_client = KafkaAdminClient( + bootstrap_servers=brokers + ) + + # Check that we don't have any ACLs in the cluster + acls, error = admin_client.describe_acls( + ACLFilter( + principal=None, + host="*", + operation=ACLOperation.ANY, + permission_type=ACLPermissionType.ANY, + resource_pattern=ResourcePattern(ResourceType.TOPIC, "topic") + ) + ) + + self.assertIs(error, NoError) + self.assertEqual(0, len(acls)) + + # Try to add an ACL + acl = ACL( + principal="User:test", + host="*", + operation=ACLOperation.READ, + permission_type=ACLPermissionType.ALLOW, + resource_pattern=ResourcePattern(ResourceType.TOPIC, "topic") + ) + result = admin_client.create_acls([acl]) + + self.assertFalse(len(result["failed"])) + self.assertEqual(len(result["succeeded"]), 1) + + # Check that we can list the ACL we created + acl_filter = ACLFilter( + principal=None, + host="*", + operation=ACLOperation.ANY, + permission_type=ACLPermissionType.ANY, + resource_pattern=ResourcePattern(ResourceType.TOPIC, "topic") + ) + acls, error = admin_client.describe_acls(acl_filter) + + self.assertIs(error, NoError) + self.assertEqual(1, len(acls)) + + # Remove the ACL + delete_results = admin_client.delete_acls( + [ + ACLFilter( + principal="User:test", + host="*", + operation=ACLOperation.READ, + permission_type=ACLPermissionType.ALLOW, + resource_pattern=ResourcePattern(ResourceType.TOPIC, "topic") + ) + ] + ) + + self.assertEqual(1, len(delete_results)) + self.assertEqual(1, len(delete_results[0][1])) # Check number of affected ACLs + + + # Make sure the ACL does not exist in the cluster anymore + acls, error = admin_client.describe_acls( + ACLFilter( + principal="*", + host="*", + operation=ACLOperation.ANY, + permission_type=ACLPermissionType.ANY, + resource_pattern=ResourcePattern(ResourceType.TOPIC, "topic") + ) + ) + self.assertIs(error, NoError) + self.assertEqual(0, len(acls)) diff --git a/test/test_assignors.py b/test/test_assignors.py index e2a1d4fdd..0821caf83 100644 --- a/test/test_assignors.py +++ b/test/test_assignors.py @@ -5,8 +5,7 @@ from kafka.coordinator.assignors.range import RangePartitionAssignor from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor -from kafka.coordinator.protocol import ( - ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment) +from kafka.coordinator.protocol import ConsumerProtocolMemberAssignment @pytest.fixture diff --git a/test/test_client_async.py b/test/test_client_async.py index 2132c8e4c..74da66a36 100644 --- a/test/test_client_async.py +++ b/test/test_client_async.py @@ -17,25 +17,23 @@ from kafka.conn import ConnectionStates import kafka.errors as Errors from kafka.future import Future -from kafka.protocol.metadata import MetadataResponse, MetadataRequest +from kafka.protocol.metadata import MetadataRequest from kafka.protocol.produce import ProduceRequest from kafka.structs import BrokerMetadata @pytest.fixture def cli(mocker, conn): - mocker.patch('kafka.cluster.dns_lookup', - return_value=[(socket.AF_INET, None, None, None, ('localhost', 9092))]) client = KafkaClient(api_version=(0, 9)) + mocker.patch.object(client, '_selector') client.poll(future=client.cluster.request_update()) return client def test_bootstrap(mocker, conn): conn.state = ConnectionStates.CONNECTED - mocker.patch('kafka.cluster.dns_lookup', - return_value=[(socket.AF_INET, None, None, None, ('localhost', 9092))]) cli = KafkaClient(api_version=(0, 9)) + mocker.patch.object(cli, '_selector') future = cli.cluster.request_update() cli.poll(future=future) @@ -90,7 +88,7 @@ def test_maybe_connect(cli, conn): def test_conn_state_change(mocker, cli, conn): - sel = mocker.patch.object(cli, '_selector') + sel = cli._selector node_id = 0 cli._conns[node_id] = conn @@ -98,7 +96,7 @@ def test_conn_state_change(mocker, cli, conn): sock = conn._sock cli._conn_state_change(node_id, sock, conn) assert node_id in cli._connecting - sel.register.assert_called_with(sock, selectors.EVENT_WRITE) + sel.register.assert_called_with(sock, selectors.EVENT_WRITE, conn) conn.state = ConnectionStates.CONNECTED cli._conn_state_change(node_id, sock, conn) @@ -233,6 +231,8 @@ def test_send(cli, conn): def test_poll(mocker): metadata = mocker.patch.object(KafkaClient, '_maybe_refresh_metadata') _poll = mocker.patch.object(KafkaClient, '_poll') + ifrs = mocker.patch.object(KafkaClient, 'in_flight_request_count') + ifrs.return_value = 1 cli = KafkaClient(api_version=(0, 9)) # metadata timeout wins @@ -249,6 +249,11 @@ def test_poll(mocker): cli.poll() _poll.assert_called_with(cli.config['request_timeout_ms'] / 1000.0) + # If no in-flight-requests, drop timeout to retry_backoff_ms + ifrs.return_value = 0 + cli.poll() + _poll.assert_called_with(cli.config['retry_backoff_ms'] / 1000.0) + def test__poll(): pass @@ -304,12 +309,14 @@ def client(mocker): def test_maybe_refresh_metadata_ttl(mocker, client): client.cluster.ttl.return_value = 1234 + mocker.patch.object(KafkaClient, 'in_flight_request_count', return_value=1) client.poll(timeout_ms=12345678) client._poll.assert_called_with(1.234) def test_maybe_refresh_metadata_backoff(mocker, client): + mocker.patch.object(KafkaClient, 'in_flight_request_count', return_value=1) now = time.time() t = mocker.patch('time.time') t.return_value = now @@ -320,6 +327,7 @@ def test_maybe_refresh_metadata_backoff(mocker, client): def test_maybe_refresh_metadata_in_progress(mocker, client): client._metadata_refresh_in_progress = True + mocker.patch.object(KafkaClient, 'in_flight_request_count', return_value=1) client.poll(timeout_ms=12345678) client._poll.assert_called_with(9999.999) # request_timeout_ms @@ -328,6 +336,7 @@ def test_maybe_refresh_metadata_in_progress(mocker, client): def test_maybe_refresh_metadata_update(mocker, client): mocker.patch.object(client, 'least_loaded_node', return_value='foobar') mocker.patch.object(client, '_can_send_request', return_value=True) + mocker.patch.object(KafkaClient, 'in_flight_request_count', return_value=1) send = mocker.patch.object(client, 'send') client.poll(timeout_ms=12345678) @@ -342,6 +351,7 @@ def test_maybe_refresh_metadata_cant_send(mocker, client): mocker.patch.object(client, '_can_connect', return_value=True) mocker.patch.object(client, '_maybe_connect', return_value=True) mocker.patch.object(client, 'maybe_connect', return_value=True) + mocker.patch.object(KafkaClient, 'in_flight_request_count', return_value=1) now = time.time() t = mocker.patch('time.time') diff --git a/test/test_client_integration.py b/test/test_client_integration.py index 4d204eb86..cceb97b00 100644 --- a/test/test_client_integration.py +++ b/test/test_client_integration.py @@ -1,5 +1,7 @@ import os +import pytest + from kafka.errors import KafkaTimeoutError from kafka.protocol import create_message from kafka.structs import ( @@ -7,7 +9,7 @@ ProduceRequestPayload) from test.fixtures import ZookeeperFixture, KafkaFixture -from test.testutil import KafkaIntegrationTestCase, kafka_versions +from test.testutil import KafkaIntegrationTestCase, env_kafka_version class TestKafkaClientIntegration(KafkaIntegrationTestCase): @@ -80,6 +82,7 @@ def test_send_produce_request_maintains_request_response_order(self): # Offset Tests # #################### + @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_commit_fetch_offsets(self): req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata') (resp,) = self.client.send_offset_commit_request('group', [req]) @@ -91,7 +94,8 @@ def test_commit_fetch_offsets(self): self.assertEqual(resp.offset, 42) self.assertEqual(resp.metadata, '') # Metadata isn't stored for now - @kafka_versions('>=0.9.0.0') + + @pytest.mark.skipif(env_kafka_version() < (0, 9), reason='Unsupported Kafka Version') def test_commit_fetch_offsets_dual(self): req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata') (resp,) = self.client.send_offset_commit_request_kafka('group', [req]) diff --git a/test/test_codec.py b/test/test_codec.py index 0fefe6faa..9eff888fe 100644 --- a/test/test_codec.py +++ b/test/test_codec.py @@ -7,14 +7,14 @@ from kafka.vendor.six.moves import range from kafka.codec import ( - has_snappy, has_gzip, has_lz4, + has_snappy, has_lz4, gzip_encode, gzip_decode, snappy_encode, snappy_decode, lz4_encode, lz4_decode, lz4_encode_old_kafka, lz4_decode_old_kafka, ) -from test.fixtures import random_string +from test.testutil import random_string def test_gzip(): diff --git a/test/test_conn.py b/test/test_conn.py index 5da5effcf..966f7b34d 100644 --- a/test/test_conn.py +++ b/test/test_conn.py @@ -3,7 +3,6 @@ from errno import EALREADY, EINPROGRESS, EISCONN, ECONNRESET import socket -import time import mock import pytest @@ -86,7 +85,7 @@ def test_connection_delay(conn): conn.last_attempt = 1000 assert conn.connection_delay() == conn.config['reconnect_backoff_ms'] conn.state = ConnectionStates.CONNECTING - assert conn.connection_delay() == 0 + assert conn.connection_delay() == float('inf') conn.state = ConnectionStates.CONNECTED assert conn.connection_delay() == float('inf') @@ -275,7 +274,7 @@ def test_lookup_on_connect(): ] with mock.patch("socket.getaddrinfo", return_value=mock_return1) as m: conn.connect() - m.assert_called_once_with(hostname, port, 0, 1) + m.assert_called_once_with(hostname, port, 0, socket.SOCK_STREAM) assert conn._sock_afi == afi1 assert conn._sock_addr == sockaddr1 conn.close() @@ -289,7 +288,7 @@ def test_lookup_on_connect(): with mock.patch("socket.getaddrinfo", return_value=mock_return2) as m: conn.last_attempt = 0 conn.connect() - m.assert_called_once_with(hostname, port, 0, 1) + m.assert_called_once_with(hostname, port, 0, socket.SOCK_STREAM) assert conn._sock_afi == afi2 assert conn._sock_addr == sockaddr2 conn.close() @@ -304,7 +303,7 @@ def test_relookup_on_failure(): with mock.patch("socket.getaddrinfo", return_value=mock_return1) as m: last_attempt = conn.last_attempt conn.connect() - m.assert_called_once_with(hostname, port, 0, 1) + m.assert_called_once_with(hostname, port, 0, socket.SOCK_STREAM) assert conn.disconnected() assert conn.last_attempt > last_attempt @@ -317,7 +316,7 @@ def test_relookup_on_failure(): with mock.patch("socket.getaddrinfo", return_value=mock_return2) as m: conn.last_attempt = 0 conn.connect() - m.assert_called_once_with(hostname, port, 0, 1) + m.assert_called_once_with(hostname, port, 0, socket.SOCK_STREAM) assert conn._sock_afi == afi2 assert conn._sock_addr == sockaddr2 conn.close() diff --git a/test/test_consumer_group.py b/test/test_consumer_group.py index ec2685765..58dc7ebf9 100644 --- a/test/test_consumer_group.py +++ b/test/test_consumer_group.py @@ -8,18 +8,18 @@ from kafka.conn import ConnectionStates from kafka.consumer.group import KafkaConsumer -from kafka.coordinator.base import MemberState, Generation +from kafka.coordinator.base import MemberState from kafka.structs import TopicPartition -from test.fixtures import random_string, version +from test.testutil import env_kafka_version, random_string def get_connect_str(kafka_broker): return kafka_broker.host + ':' + str(kafka_broker.port) -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") -def test_consumer(kafka_broker, topic, version): +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") +def test_consumer(kafka_broker, topic): # The `topic` fixture is included because # 0.8.2 brokers need a topic to function well consumer = KafkaConsumer(bootstrap_servers=get_connect_str(kafka_broker)) @@ -29,18 +29,18 @@ def test_consumer(kafka_broker, topic, version): assert consumer._client._conns[node_id].state is ConnectionStates.CONNECTED consumer.close() -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") -def test_consumer_topics(kafka_broker, topic, version): + +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") +def test_consumer_topics(kafka_broker, topic): consumer = KafkaConsumer(bootstrap_servers=get_connect_str(kafka_broker)) # Necessary to drive the IO consumer.poll(500) - consumer_topics = consumer.topics() - assert topic in consumer_topics + assert topic in consumer.topics() assert len(consumer.partitions_for_topic(topic)) > 0 consumer.close() -@pytest.mark.skipif(version() < (0, 9), reason='Unsupported Kafka Version') -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") + +@pytest.mark.skipif(env_kafka_version() < (0, 9), reason='Unsupported Kafka Version') def test_group(kafka_broker, topic): num_partitions = 4 connect_str = get_connect_str(kafka_broker) @@ -130,7 +130,7 @@ def consumer_thread(i): threads[c] = None -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_paused(kafka_broker, topic): consumer = KafkaConsumer(bootstrap_servers=get_connect_str(kafka_broker)) topics = [TopicPartition(topic, 1)] @@ -149,8 +149,7 @@ def test_paused(kafka_broker, topic): consumer.close() -@pytest.mark.skipif(version() < (0, 9), reason='Unsupported Kafka Version') -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +@pytest.mark.skipif(env_kafka_version() < (0, 9), reason='Unsupported Kafka Version') def test_heartbeat_thread(kafka_broker, topic): group_id = 'test-group-' + random_string(6) consumer = KafkaConsumer(topic, diff --git a/test/test_consumer_integration.py b/test/test_consumer_integration.py index d2210077a..2319d94eb 100644 --- a/test/test_consumer_integration.py +++ b/test/test_consumer_integration.py @@ -1,56 +1,51 @@ import logging import os import time -from mock import patch -import pytest -import kafka.codec +from mock import patch import pytest from kafka.vendor.six.moves import range -from kafka.vendor import six from . import unittest from kafka import ( KafkaConsumer, MultiProcessConsumer, OldKafkaConsumer, SimpleConsumer, create_message, create_gzip_message, KafkaProducer ) +import kafka.codec from kafka.consumer.base import MAX_FETCH_BUFFER_SIZE_BYTES from kafka.errors import ( ConsumerFetchSizeTooSmall, OffsetOutOfRangeError, UnsupportedVersionError, KafkaTimeoutError, UnsupportedCodecError, ConsumerTimeout ) +from kafka.protocol.message import PartialMessage from kafka.structs import ( ProduceRequestPayload, TopicPartition, OffsetAndTimestamp ) -from test.fixtures import ZookeeperFixture, KafkaFixture, random_string, version -from test.testutil import KafkaIntegrationTestCase, kafka_versions, Timer, send_messages - +from test.fixtures import ZookeeperFixture, KafkaFixture +from test.testutil import KafkaIntegrationTestCase, Timer, assert_message_count, env_kafka_version, random_string -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") -def test_kafka_consumer(simple_client, topic, kafka_consumer_factory): - """Test KafkaConsumer - """ - kafka_consumer = kafka_consumer_factory(auto_offset_reset='earliest') - - send_messages(simple_client, topic, 0, range(0, 100)) - send_messages(simple_client, topic, 1, range(100, 200)) +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") +def test_kafka_consumer(kafka_consumer_factory, send_messages): + """Test KafkaConsumer""" + consumer = kafka_consumer_factory(auto_offset_reset='earliest') + send_messages(range(0, 100), partition=0) + send_messages(range(0, 100), partition=1) cnt = 0 - messages = {0: set(), 1: set()} - for message in kafka_consumer: + messages = {0: [], 1: []} + for message in consumer: logging.debug("Consumed message %s", repr(message)) cnt += 1 - messages[message.partition].add(message.offset) + messages[message.partition].append(message) if cnt >= 200: break - assert len(messages[0]) == 100 - assert len(messages[1]) == 100 - kafka_consumer.close() + assert_message_count(messages[0], 100) + assert_message_count(messages[1], 100) -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_kafka_consumer_unsupported_encoding( topic, kafka_producer_factory, kafka_consumer_factory): # Send a compressed message @@ -207,7 +202,7 @@ def test_simple_consumer_no_reset(self): with self.assertRaises(OffsetOutOfRangeError): consumer.get_message() - @kafka_versions('>=0.8.1') + @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_simple_consumer_load_initial_offsets(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) @@ -255,6 +250,8 @@ def test_simple_consumer__seek(self): consumer.stop() + @pytest.mark.skipif(env_kafka_version() >= (2, 0), + reason="SimpleConsumer blocking does not handle PartialMessage change in kafka 2.0+") def test_simple_consumer_blocking(self): consumer = self.consumer() @@ -384,7 +381,7 @@ def test_multi_proc_pending(self): consumer.stop() @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky') - @kafka_versions('>=0.8.1') + @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_multi_process_consumer_load_initial_offsets(self): self.send_messages(0, range(0, 10)) self.send_messages(1, range(10, 20)) @@ -420,7 +417,8 @@ def test_large_messages(self): consumer = self.consumer(max_buffer_size=60000) expected_messages = set(small_messages + large_messages) - actual_messages = set([ x.message.value for x in consumer ]) + actual_messages = set([x.message.value for x in consumer + if not isinstance(x.message, PartialMessage)]) self.assertEqual(expected_messages, actual_messages) consumer.stop() @@ -455,7 +453,7 @@ def test_huge_messages(self): big_consumer.stop() - @kafka_versions('>=0.8.1') + @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_offset_behavior__resuming_behavior(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) @@ -487,7 +485,7 @@ def test_offset_behavior__resuming_behavior(self): consumer2.stop() @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky') - @kafka_versions('>=0.8.1') + @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_multi_process_offset_behavior__resuming_behavior(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) @@ -544,390 +542,230 @@ def test_fetch_buffer_size(self): messages = [ message for message in consumer ] self.assertEqual(len(messages), 2) - def test_kafka_consumer__blocking(self): - TIMEOUT_MS = 500 - consumer = self.kafka_consumer(auto_offset_reset='earliest', - enable_auto_commit=False, - consumer_timeout_ms=TIMEOUT_MS) - - # Manual assignment avoids overhead of consumer group mgmt - consumer.unsubscribe() - consumer.assign([TopicPartition(self.topic, 0)]) - # Ask for 5 messages, nothing in queue, block 500ms - with Timer() as t: - with self.assertRaises(StopIteration): - msg = next(consumer) - self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0 ) - - self.send_messages(0, range(0, 10)) - - # Ask for 5 messages, 10 in queue. Get 5 back, no blocking - messages = set() - with Timer() as t: - for i in range(5): +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") +def test_kafka_consumer__blocking(kafka_consumer_factory, topic, send_messages): + TIMEOUT_MS = 500 + consumer = kafka_consumer_factory(auto_offset_reset='earliest', + enable_auto_commit=False, + consumer_timeout_ms=TIMEOUT_MS) + + # Manual assignment avoids overhead of consumer group mgmt + consumer.unsubscribe() + consumer.assign([TopicPartition(topic, 0)]) + + # Ask for 5 messages, nothing in queue, block 500ms + with Timer() as t: + with pytest.raises(StopIteration): + msg = next(consumer) + assert t.interval >= (TIMEOUT_MS / 1000.0) + + send_messages(range(0, 10)) + + # Ask for 5 messages, 10 in queue. Get 5 back, no blocking + messages = [] + with Timer() as t: + for i in range(5): + msg = next(consumer) + messages.append(msg) + assert_message_count(messages, 5) + assert t.interval < (TIMEOUT_MS / 1000.0) + + # Ask for 10 messages, get 5 back, block 500ms + messages = [] + with Timer() as t: + with pytest.raises(StopIteration): + for i in range(10): msg = next(consumer) - messages.add((msg.partition, msg.offset)) - self.assertEqual(len(messages), 5) - self.assertLess(t.interval, TIMEOUT_MS / 1000.0 ) - - # Ask for 10 messages, get 5 back, block 500ms - messages = set() - with Timer() as t: - with self.assertRaises(StopIteration): - for i in range(10): - msg = next(consumer) - messages.add((msg.partition, msg.offset)) - self.assertEqual(len(messages), 5) - self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0 ) - consumer.close() - - @kafka_versions('>=0.8.1') - def test_kafka_consumer__offset_commit_resume(self): - GROUP_ID = random_string(10) - - self.send_messages(0, range(0, 100)) - self.send_messages(1, range(100, 200)) - - # Start a consumer - consumer1 = self.kafka_consumer( - group_id=GROUP_ID, - enable_auto_commit=True, - auto_commit_interval_ms=100, - auto_offset_reset='earliest', - ) - - # Grab the first 180 messages - output_msgs1 = [] - for _ in range(180): - m = next(consumer1) - output_msgs1.append(m) - self.assert_message_count(output_msgs1, 180) - consumer1.close() - - # The total offset across both partitions should be at 180 - consumer2 = self.kafka_consumer( - group_id=GROUP_ID, - enable_auto_commit=True, - auto_commit_interval_ms=100, - auto_offset_reset='earliest', - ) - - # 181-200 - output_msgs2 = [] - for _ in range(20): - m = next(consumer2) - output_msgs2.append(m) - self.assert_message_count(output_msgs2, 20) - self.assertEqual(len(set(output_msgs1) | set(output_msgs2)), 200) - consumer2.close() - - @kafka_versions('>=0.10.1') - def test_kafka_consumer_max_bytes_simple(self): - self.send_messages(0, range(100, 200)) - self.send_messages(1, range(200, 300)) - - # Start a consumer - consumer = self.kafka_consumer( - auto_offset_reset='earliest', fetch_max_bytes=300) - seen_partitions = set([]) - for i in range(10): - poll_res = consumer.poll(timeout_ms=100) - for partition, msgs in six.iteritems(poll_res): - for msg in msgs: - seen_partitions.add(partition) - - # Check that we fetched at least 1 message from both partitions - self.assertEqual( - seen_partitions, set([ - TopicPartition(self.topic, 0), TopicPartition(self.topic, 1)])) - consumer.close() - - @kafka_versions('>=0.10.1') - def test_kafka_consumer_max_bytes_one_msg(self): - # We send to only 1 partition so we don't have parallel requests to 2 - # nodes for data. - self.send_messages(0, range(100, 200)) - - # Start a consumer. FetchResponse_v3 should always include at least 1 - # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time - # But 0.11.0.0 returns 1 MessageSet at a time when the messages are - # stored in the new v2 format by the broker. - # - # DP Note: This is a strange test. The consumer shouldn't care - # how many messages are included in a FetchResponse, as long as it is - # non-zero. I would not mind if we deleted this test. It caused - # a minor headache when testing 0.11.0.0. - group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5) - consumer = self.kafka_consumer( - group_id=group, - auto_offset_reset='earliest', - consumer_timeout_ms=5000, - fetch_max_bytes=1) - - fetched_msgs = [next(consumer) for i in range(10)] - self.assertEqual(len(fetched_msgs), 10) - consumer.close() - - @kafka_versions('>=0.10.1') - def test_kafka_consumer_offsets_for_time(self): - late_time = int(time.time()) * 1000 - middle_time = late_time - 1000 - early_time = late_time - 2000 - tp = TopicPartition(self.topic, 0) - - timeout = 10 - kafka_producer = self.kafka_producer() - early_msg = kafka_producer.send( - self.topic, partition=0, value=b"first", - timestamp_ms=early_time).get(timeout) - late_msg = kafka_producer.send( - self.topic, partition=0, value=b"last", - timestamp_ms=late_time).get(timeout) - - consumer = self.kafka_consumer() - offsets = consumer.offsets_for_times({tp: early_time}) - self.assertEqual(len(offsets), 1) - self.assertEqual(offsets[tp].offset, early_msg.offset) - self.assertEqual(offsets[tp].timestamp, early_time) - - offsets = consumer.offsets_for_times({tp: middle_time}) - self.assertEqual(offsets[tp].offset, late_msg.offset) - self.assertEqual(offsets[tp].timestamp, late_time) - - offsets = consumer.offsets_for_times({tp: late_time}) - self.assertEqual(offsets[tp].offset, late_msg.offset) - self.assertEqual(offsets[tp].timestamp, late_time) - - offsets = consumer.offsets_for_times({}) - self.assertEqual(offsets, {}) - - # Out of bound timestamps check - - offsets = consumer.offsets_for_times({tp: 0}) - self.assertEqual(offsets[tp].offset, early_msg.offset) - self.assertEqual(offsets[tp].timestamp, early_time) - - offsets = consumer.offsets_for_times({tp: 9999999999999}) - self.assertEqual(offsets[tp], None) - - # Beginning/End offsets - - offsets = consumer.beginning_offsets([tp]) - self.assertEqual(offsets, { - tp: early_msg.offset, - }) - offsets = consumer.end_offsets([tp]) - self.assertEqual(offsets, { - tp: late_msg.offset + 1 - }) - consumer.close() - - @kafka_versions('>=0.10.1') - def test_kafka_consumer_offsets_search_many_partitions(self): - tp0 = TopicPartition(self.topic, 0) - tp1 = TopicPartition(self.topic, 1) - - kafka_producer = self.kafka_producer() - send_time = int(time.time() * 1000) - timeout = 10 - p0msg = kafka_producer.send( - self.topic, partition=0, value=b"XXX", - timestamp_ms=send_time).get(timeout) - p1msg = kafka_producer.send( - self.topic, partition=1, value=b"XXX", - timestamp_ms=send_time).get(timeout) - - consumer = self.kafka_consumer() - offsets = consumer.offsets_for_times({ - tp0: send_time, - tp1: send_time - }) - - self.assertEqual(offsets, { - tp0: OffsetAndTimestamp(p0msg.offset, send_time), - tp1: OffsetAndTimestamp(p1msg.offset, send_time) - }) - - offsets = consumer.beginning_offsets([tp0, tp1]) - self.assertEqual(offsets, { - tp0: p0msg.offset, - tp1: p1msg.offset - }) - - offsets = consumer.end_offsets([tp0, tp1]) - self.assertEqual(offsets, { - tp0: p0msg.offset + 1, - tp1: p1msg.offset + 1 - }) - consumer.close() - - @kafka_versions('<0.10.1') - def test_kafka_consumer_offsets_for_time_old(self): - consumer = self.kafka_consumer() - tp = TopicPartition(self.topic, 0) - - with self.assertRaises(UnsupportedVersionError): - consumer.offsets_for_times({tp: int(time.time())}) - - @kafka_versions('>=0.10.1') - def test_kafka_consumer_offsets_for_times_errors(self): - consumer = self.kafka_consumer(fetch_max_wait_ms=200, - request_timeout_ms=500) - tp = TopicPartition(self.topic, 0) - bad_tp = TopicPartition(self.topic, 100) - - with self.assertRaises(ValueError): - consumer.offsets_for_times({tp: -1}) - - with self.assertRaises(KafkaTimeoutError): - consumer.offsets_for_times({bad_tp: 0}) - - def old_kafka_consumer(self, **configs): - brokers = '%s:%d' % (self.server.host, self.server.port) - consumer = OldKafkaConsumer(self.topic, - bootstrap_servers=brokers, - **configs) - return consumer - - def test_old_kafka_consumer(self): - self.send_messages(0, range(0, 100)) - self.send_messages(1, range(100, 200)) - - # Start a consumer - consumer = self.old_kafka_consumer(auto_offset_reset='smallest', - consumer_timeout_ms=5000) - n = 0 - messages = {0: set(), 1: set()} - logging.debug("kafka consumer offsets: %s" % consumer.offsets()) - for m in consumer: - logging.debug("Consumed message %s" % repr(m)) - n += 1 - messages[m.partition].add(m.offset) - if n == 200: - break - - self.assertEqual(len(messages[0]), 100) - self.assertEqual(len(messages[1]), 100) - - def test_old_kafka_consumer__blocking(self): - TIMEOUT_MS = 500 - consumer = self.old_kafka_consumer( - auto_offset_reset='smallest', - consumer_timeout_ms=TIMEOUT_MS, - ) - - # Ask for 5 messages, nothing in queue, block 500ms - with Timer() as t: - with self.assertRaises(ConsumerTimeout): - msg = consumer.next() - self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0) - - self.send_messages(0, range(0, 10)) - - # Ask for 5 messages, 10 in queue. Get 5 back, no blocking - messages = set() - with Timer() as t: - for i in range(5): - msg = consumer.next() - messages.add((msg.partition, msg.offset)) - self.assertEqual(len(messages), 5) - self.assertLess(t.interval, TIMEOUT_MS / 1000.0) - - # Ask for 10 messages, get 5 back, block 500ms - messages = set() - with Timer() as t: - with self.assertRaises(ConsumerTimeout): - for i in range(10): - msg = consumer.next() - messages.add((msg.partition, msg.offset)) - self.assertEqual(len(messages), 5) - self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0) - - @kafka_versions('=0.8.1') - def test_old_kafka_consumer__offset_commit_resume(self): - GROUP_ID = random_string(10).encode('utf-8') - - self.send_messages(0, range(0, 100)) - self.send_messages(1, range(100, 200)) - - # Start a consumer - consumer1 = self.old_kafka_consumer( - group_id=GROUP_ID, - auto_commit_enable=True, - auto_commit_interval_ms=None, - auto_commit_interval_messages=20, - auto_offset_reset='smallest', - ) - - # Grab the first 195 messages - output_msgs1 = [] - for _ in range(195): - m = consumer1.next() - output_msgs1.append(m) - consumer1.task_done(m) - self.assert_message_count(output_msgs1, 195) - - # The total offset across both partitions should be at 180 - consumer2 = self.old_kafka_consumer( - group_id=GROUP_ID, - auto_commit_enable=True, - auto_commit_interval_ms=None, - auto_commit_interval_messages=20, - consumer_timeout_ms=100, - auto_offset_reset='smallest', - ) - - # 181-200 - output_msgs2 = [] - with self.assertRaises(ConsumerTimeout): - while True: - m = consumer2.next() - output_msgs2.append(m) - self.assert_message_count(output_msgs2, 20) - self.assertEqual(len(set(output_msgs1) & set(output_msgs2)), 15) - - @kafka_versions("=0.9.0.0") - def test_old_kafka_consumer__offset_commit_resume_dual(self): - GROUP_ID = random_string(10).encode('utf-8') - - self.send_messages(0, range(0, 100)) - self.send_messages(1, range(100, 200)) - - # Start a consumer - consumer1 = self.old_kafka_consumer( - group_id=GROUP_ID, - auto_commit_enable=True, - auto_commit_interval_ms=None, - auto_commit_interval_messages=20, - auto_offset_reset='smallest', - offset_storage='kafka', - ) - - # Grab the first 195 messages - output_msgs1 = [] - for _ in range(195): - m = consumer1.next() - output_msgs1.append(m) - consumer1.task_done(m) - self.assert_message_count(output_msgs1, 195) - - # The total offset across both partitions should be at 180 - consumer2 = self.old_kafka_consumer( - group_id=GROUP_ID, - auto_commit_enable=True, - auto_commit_interval_ms=None, - auto_commit_interval_messages=20, - consumer_timeout_ms=100, - auto_offset_reset='smallest', - offset_storage='dual', - ) - - # 181-200 - output_msgs2 = [] - with self.assertRaises(ConsumerTimeout): - while True: - m = consumer2.next() - output_msgs2.append(m) - self.assert_message_count(output_msgs2, 20) - self.assertEqual(len(set(output_msgs1) & set(output_msgs2)), 15) + messages.append(msg) + assert_message_count(messages, 5) + assert t.interval >= (TIMEOUT_MS / 1000.0) + + +@pytest.mark.skipif(env_kafka_version() < (0, 8, 1), reason="Requires KAFKA_VERSION >= 0.8.1") +def test_kafka_consumer__offset_commit_resume(kafka_consumer_factory, send_messages): + GROUP_ID = random_string(10) + + send_messages(range(0, 100), partition=0) + send_messages(range(100, 200), partition=1) + + # Start a consumer and grab the first 180 messages + consumer1 = kafka_consumer_factory( + group_id=GROUP_ID, + enable_auto_commit=True, + auto_commit_interval_ms=100, + auto_offset_reset='earliest', + ) + output_msgs1 = [] + for _ in range(180): + m = next(consumer1) + output_msgs1.append(m) + assert_message_count(output_msgs1, 180) + + # Normally we let the pytest fixture `kafka_consumer_factory` handle + # closing as part of its teardown. Here we manually call close() to force + # auto-commit to occur before the second consumer starts. That way the + # second consumer only consumes previously unconsumed messages. + consumer1.close() + + # Start a second consumer to grab 181-200 + consumer2 = kafka_consumer_factory( + group_id=GROUP_ID, + enable_auto_commit=True, + auto_commit_interval_ms=100, + auto_offset_reset='earliest', + ) + output_msgs2 = [] + for _ in range(20): + m = next(consumer2) + output_msgs2.append(m) + assert_message_count(output_msgs2, 20) + + # Verify the second consumer wasn't reconsuming messages that the first + # consumer already saw + assert_message_count(output_msgs1 + output_msgs2, 200) + + +@pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") +def test_kafka_consumer_max_bytes_simple(kafka_consumer_factory, topic, send_messages): + send_messages(range(100, 200), partition=0) + send_messages(range(200, 300), partition=1) + + # Start a consumer + consumer = kafka_consumer_factory( + auto_offset_reset='earliest', fetch_max_bytes=300) + seen_partitions = set() + for i in range(90): + poll_res = consumer.poll(timeout_ms=100) + for partition, msgs in poll_res.items(): + for msg in msgs: + seen_partitions.add(partition) + + # Check that we fetched at least 1 message from both partitions + assert seen_partitions == {TopicPartition(topic, 0), TopicPartition(topic, 1)} + + +@pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") +def test_kafka_consumer_max_bytes_one_msg(kafka_consumer_factory, send_messages): + # We send to only 1 partition so we don't have parallel requests to 2 + # nodes for data. + send_messages(range(100, 200)) + + # Start a consumer. FetchResponse_v3 should always include at least 1 + # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time + # But 0.11.0.0 returns 1 MessageSet at a time when the messages are + # stored in the new v2 format by the broker. + # + # DP Note: This is a strange test. The consumer shouldn't care + # how many messages are included in a FetchResponse, as long as it is + # non-zero. I would not mind if we deleted this test. It caused + # a minor headache when testing 0.11.0.0. + group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5) + consumer = kafka_consumer_factory( + group_id=group, + auto_offset_reset='earliest', + consumer_timeout_ms=5000, + fetch_max_bytes=1) + + fetched_msgs = [next(consumer) for i in range(10)] + assert_message_count(fetched_msgs, 10) + + +@pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") +def test_kafka_consumer_offsets_for_time(topic, kafka_consumer, kafka_producer): + late_time = int(time.time()) * 1000 + middle_time = late_time - 1000 + early_time = late_time - 2000 + tp = TopicPartition(topic, 0) + + timeout = 10 + early_msg = kafka_producer.send( + topic, partition=0, value=b"first", + timestamp_ms=early_time).get(timeout) + late_msg = kafka_producer.send( + topic, partition=0, value=b"last", + timestamp_ms=late_time).get(timeout) + + consumer = kafka_consumer + offsets = consumer.offsets_for_times({tp: early_time}) + assert len(offsets) == 1 + assert offsets[tp].offset == early_msg.offset + assert offsets[tp].timestamp == early_time + + offsets = consumer.offsets_for_times({tp: middle_time}) + assert offsets[tp].offset == late_msg.offset + assert offsets[tp].timestamp == late_time + + offsets = consumer.offsets_for_times({tp: late_time}) + assert offsets[tp].offset == late_msg.offset + assert offsets[tp].timestamp == late_time + + offsets = consumer.offsets_for_times({}) + assert offsets == {} + + # Out of bound timestamps check + + offsets = consumer.offsets_for_times({tp: 0}) + assert offsets[tp].offset == early_msg.offset + assert offsets[tp].timestamp == early_time + + offsets = consumer.offsets_for_times({tp: 9999999999999}) + assert offsets[tp] is None + + # Beginning/End offsets + + offsets = consumer.beginning_offsets([tp]) + assert offsets == {tp: early_msg.offset} + offsets = consumer.end_offsets([tp]) + assert offsets == {tp: late_msg.offset + 1} + + +@pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") +def test_kafka_consumer_offsets_search_many_partitions(kafka_consumer, kafka_producer, topic): + tp0 = TopicPartition(topic, 0) + tp1 = TopicPartition(topic, 1) + + send_time = int(time.time() * 1000) + timeout = 10 + p0msg = kafka_producer.send( + topic, partition=0, value=b"XXX", + timestamp_ms=send_time).get(timeout) + p1msg = kafka_producer.send( + topic, partition=1, value=b"XXX", + timestamp_ms=send_time).get(timeout) + + consumer = kafka_consumer + offsets = consumer.offsets_for_times({ + tp0: send_time, + tp1: send_time + }) + + assert offsets == { + tp0: OffsetAndTimestamp(p0msg.offset, send_time), + tp1: OffsetAndTimestamp(p1msg.offset, send_time) + } + + offsets = consumer.beginning_offsets([tp0, tp1]) + assert offsets == { + tp0: p0msg.offset, + tp1: p1msg.offset + } + + offsets = consumer.end_offsets([tp0, tp1]) + assert offsets == { + tp0: p0msg.offset + 1, + tp1: p1msg.offset + 1 + } + + +@pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") +def test_kafka_consumer_offsets_for_times_errors(kafka_consumer_factory, topic): + consumer = kafka_consumer_factory(fetch_max_wait_ms=200, + request_timeout_ms=500) + tp = TopicPartition(topic, 0) + bad_tp = TopicPartition(topic, 100) + + with pytest.raises(ValueError): + consumer.offsets_for_times({tp: -1}) + + assert consumer.offsets_for_times({bad_tp: 0}) == {bad_tp: None} diff --git a/test/test_failover_integration.py b/test/test_failover_integration.py index 48021a443..ad7dcb98b 100644 --- a/test/test_failover_integration.py +++ b/test/test_failover_integration.py @@ -9,8 +9,8 @@ from kafka.producer.base import Producer from kafka.structs import TopicPartition -from test.fixtures import ZookeeperFixture, KafkaFixture, random_string -from test.testutil import KafkaIntegrationTestCase +from test.fixtures import ZookeeperFixture, KafkaFixture +from test.testutil import KafkaIntegrationTestCase, random_string log = logging.getLogger(__name__) diff --git a/test/test_fetcher.py b/test/test_fetcher.py index e37a70db5..b61a0f026 100644 --- a/test/test_fetcher.py +++ b/test/test_fetcher.py @@ -81,7 +81,7 @@ def test_send_fetches(fetcher, topic, mocker): ret = fetcher.send_fetches() for node, request in enumerate(fetch_requests): - fetcher._client.send.assert_any_call(node, request) + fetcher._client.send.assert_any_call(node, request, wakeup=False) assert len(ret) == len(fetch_requests) @@ -138,10 +138,6 @@ def test__reset_offset(fetcher, mocker): fetcher._subscriptions.need_offset_reset(tp) mocked = mocker.patch.object(fetcher, '_retrieve_offsets') - mocked.return_value = {} - with pytest.raises(NoOffsetForPartitionError): - fetcher._reset_offset(tp) - mocked.return_value = {tp: (1001, None)} fetcher._reset_offset(tp) assert not fetcher._subscriptions.assignment[tp].awaiting_reset diff --git a/test/test_producer.py b/test/test_producer.py index 60b19bfb9..9605adf58 100644 --- a/test/test_producer.py +++ b/test/test_producer.py @@ -7,7 +7,7 @@ from kafka import KafkaConsumer, KafkaProducer, TopicPartition from kafka.producer.buffer import SimpleBufferPool -from test.fixtures import random_string, version +from test.testutil import env_kafka_version, random_string def test_buffer_pool(): @@ -22,13 +22,13 @@ def test_buffer_pool(): assert buf2.read() == b'' -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") @pytest.mark.parametrize("compression", [None, 'gzip', 'snappy', 'lz4']) def test_end_to_end(kafka_broker, compression): if compression == 'lz4': # LZ4 requires 0.8.2 - if version() < (0, 8, 2): + if env_kafka_version() < (0, 8, 2): return # python-lz4 crashes on older versions of pypy elif platform.python_implementation() == 'PyPy': @@ -80,7 +80,7 @@ def test_kafka_producer_gc_cleanup(): assert threading.active_count() == threads -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") @pytest.mark.parametrize("compression", [None, 'gzip', 'snappy', 'lz4']) def test_kafka_producer_proper_record_metadata(kafka_broker, compression): connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) @@ -91,7 +91,7 @@ def test_kafka_producer_proper_record_metadata(kafka_broker, compression): magic = producer._max_usable_produce_magic() # record headers are supported in 0.11.0 - if version() < (0, 11, 0): + if env_kafka_version() < (0, 11, 0): headers = None else: headers = [("Header Key", b"Header Value")] diff --git a/test/test_producer_integration.py b/test/test_producer_integration.py index 7109886f1..8f32cf870 100644 --- a/test/test_producer_integration.py +++ b/test/test_producer_integration.py @@ -13,10 +13,11 @@ from kafka.codec import has_snappy from kafka.errors import UnknownTopicOrPartitionError, LeaderNotAvailableError from kafka.producer.base import Producer +from kafka.protocol.message import PartialMessage from kafka.structs import FetchRequestPayload, ProduceRequestPayload -from test.fixtures import ZookeeperFixture, KafkaFixture, version -from test.testutil import KafkaIntegrationTestCase, kafka_versions, current_offset +from test.fixtures import ZookeeperFixture, KafkaFixture +from test.testutil import KafkaIntegrationTestCase, env_kafka_version, current_offset # TODO: This duplicates a TestKafkaProducerIntegration method temporarily @@ -43,7 +44,7 @@ def assert_produce_response(resp, initial_offset): assert resp[0].offset == initial_offset -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_produce_many_simple(simple_client, topic): """Test multiple produces using the SimpleClient """ @@ -353,7 +354,7 @@ def test_batched_simple_producer__triggers_by_time(self): # KeyedProducer Tests # ############################ - @kafka_versions('>=0.8.1') + @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_keyedproducer_null_payload(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] @@ -521,7 +522,8 @@ def assert_fetch_offset(self, partition, start_offset, expected_messages): self.assertEqual(resp.error, 0) self.assertEqual(resp.partition, partition) - messages = [ x.message.value for x in resp.messages ] + messages = [ x.message.value for x in resp.messages + if not isinstance(x.message, PartialMessage) ] self.assertEqual(messages, expected_messages) self.assertEqual(resp.highwaterMark, start_offset+len(expected_messages)) diff --git a/test/test_protocol.py b/test/test_protocol.py index 7abcefb46..e295174d4 100644 --- a/test/test_protocol.py +++ b/test/test_protocol.py @@ -3,7 +3,6 @@ import struct import pytest -from kafka.vendor import six from kafka.protocol.api import RequestHeader from kafka.protocol.commit import GroupCoordinatorRequest diff --git a/test/testutil.py b/test/testutil.py index 1614815ed..8c682ac02 100644 --- a/test/testutil.py +++ b/test/testutil.py @@ -1,9 +1,9 @@ from __future__ import absolute_import -import functools -import operator import os import socket +import random +import string import time import uuid @@ -12,58 +12,27 @@ from kafka import SimpleClient, create_message from kafka.client_async import KafkaClient -from kafka.errors import LeaderNotAvailableError, KafkaTimeoutError, InvalidTopicError, \ - NotLeaderForPartitionError, UnknownTopicOrPartitionError, \ - FailedPayloadsError +from kafka.errors import ( + LeaderNotAvailableError, KafkaTimeoutError, InvalidTopicError, + NotLeaderForPartitionError, UnknownTopicOrPartitionError, + FailedPayloadsError +) from kafka.structs import OffsetRequestPayload, ProduceRequestPayload -from test.fixtures import random_string, version_str_to_list, version as kafka_version #pylint: disable=wrong-import-order +#from test.fixtures import random_string, version_str_to_list, version as kafka_version #pylint: disable=wrong-import-order -def kafka_versions(*versions): +def random_string(length): + return "".join(random.choice(string.ascii_letters) for i in range(length)) - def construct_lambda(s): - if s[0].isdigit(): - op_str = '=' - v_str = s - elif s[1].isdigit(): - op_str = s[0] # ! < > = - v_str = s[1:] - elif s[2].isdigit(): - op_str = s[0:2] # >= <= - v_str = s[2:] - else: - raise ValueError('Unrecognized kafka version / operator: %s' % (s,)) - - op_map = { - '=': operator.eq, - '!': operator.ne, - '>': operator.gt, - '<': operator.lt, - '>=': operator.ge, - '<=': operator.le - } - op = op_map[op_str] - version = version_str_to_list(v_str) - return lambda a: op(a, version) - - validators = map(construct_lambda, versions) - - def real_kafka_versions(func): - @functools.wraps(func) - def wrapper(func, *args, **kwargs): - version = kafka_version() - - if not version: - pytest.skip("no kafka version set in KAFKA_VERSION env var") - for f in validators: - if not f(version): - pytest.skip("unsupported kafka version") +def env_kafka_version(): + """Return the Kafka version set in the OS environment as a tuple. - return func(*args, **kwargs) - return wrapper - - return real_kafka_versions + Example: '0.8.1.1' --> (0, 8, 1, 1) + """ + if 'KAFKA_VERSION' not in os.environ: + return () + return tuple(map(int, os.environ['KAFKA_VERSION'].split('.'))) def get_open_port(): sock = socket.socket() @@ -107,6 +76,17 @@ def current_offset(client, topic, partition, kafka_broker=None): return offsets.offsets[0] +def assert_message_count(messages, num_messages): + """Check that we received the expected number of messages with no duplicates.""" + # Make sure we got them all + assert len(messages) == num_messages + # Make sure there are no duplicates + # Note: Currently duplicates are identified only using key/value. Other attributes like topic, partition, headers, + # timestamp, etc are ignored... this could be changed if necessary, but will be more tolerant of dupes. + unique_messages = {(m.key, m.value) for m in messages} + assert len(unique_messages) == num_messages + + class KafkaIntegrationTestCase(unittest.TestCase): create_client = True topic = None diff --git a/tox.ini b/tox.ini index db97a834d..8741b4dcb 100644 --- a/tox.ini +++ b/tox.ini @@ -19,6 +19,7 @@ deps = commands = py.test {posargs:--pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF --cov=kafka --cov-config=.covrc} setenv = + CRC32C_SW_MODE = auto PROJECT_ROOT = {toxinidir} passenv = KAFKA_VERSION From 537c1aa0f7b7c27075dd3449e1a1b4a7f92a5f2e Mon Sep 17 00:00:00 2001 From: Lennart Rudolph Date: Fri, 1 Nov 2019 15:03:51 -0700 Subject: [PATCH 272/291] KAFKA-24599: adjust travis.yml such that we only test for kafka versions which are relevant internally --- .travis.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8b59fd369..3ec0726c7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,12 +10,7 @@ python: - pypy2.7-6.0 env: - - KAFKA_VERSION=0.8.2.2 - - KAFKA_VERSION=0.9.0.1 - - KAFKA_VERSION=0.10.2.2 - - KAFKA_VERSION=0.11.0.3 - - KAFKA_VERSION=1.1.1 - - KAFKA_VERSION=2.3.0 + - KAFKA_VERSION=1.1.0 addons: apt: From 31a0d2c52f7d149cdb1b852b5e2d3e0898fd653c Mon Sep 17 00:00:00 2001 From: Lennart Rudolph Date: Fri, 1 Nov 2019 15:05:04 -0700 Subject: [PATCH 273/291] KAFKA-24599: remove irrelevant server resources --- servers/0.11.0.0/resources/kafka.properties | 145 ------------------ servers/0.11.0.1/resources/kafka.properties | 145 ------------------ servers/0.11.0.2/resources/kafka.properties | 145 ------------------ servers/0.11.0.3/resources/kafka.properties | 145 ------------------ servers/0.11.0.3/resources/log4j.properties | 25 --- .../0.11.0.3/resources/zookeeper.properties | 21 --- servers/1.0.0/resources/kafka.properties | 145 ------------------ servers/1.0.1/resources/kafka.properties | 145 ------------------ servers/1.0.2/resources/kafka.properties | 145 ------------------ servers/2.0.0/resources/kafka.properties | 145 ------------------ servers/2.0.1/resources/kafka.properties | 145 ------------------ servers/2.1.0/resources/kafka.properties | 145 ------------------ servers/2.1.0/resources/log4j.properties | 25 --- servers/2.1.0/resources/zookeeper.properties | 21 --- servers/2.1.1/resources/kafka.properties | 145 ------------------ servers/2.1.1/resources/log4j.properties | 25 --- servers/2.1.1/resources/zookeeper.properties | 21 --- servers/2.2.0/resources/kafka.properties | 145 ------------------ servers/2.2.0/resources/log4j.properties | 25 --- servers/2.2.0/resources/zookeeper.properties | 21 --- servers/2.2.1/resources/kafka.properties | 145 ------------------ servers/2.2.1/resources/log4j.properties | 25 --- servers/2.2.1/resources/zookeeper.properties | 21 --- servers/2.3.0/resources/kafka.properties | 145 ------------------ servers/2.3.0/resources/log4j.properties | 25 --- servers/2.3.0/resources/zookeeper.properties | 21 --- 26 files changed, 2306 deletions(-) delete mode 100644 servers/0.11.0.0/resources/kafka.properties delete mode 100644 servers/0.11.0.1/resources/kafka.properties delete mode 100644 servers/0.11.0.2/resources/kafka.properties delete mode 100644 servers/0.11.0.3/resources/kafka.properties delete mode 100644 servers/0.11.0.3/resources/log4j.properties delete mode 100644 servers/0.11.0.3/resources/zookeeper.properties delete mode 100644 servers/1.0.0/resources/kafka.properties delete mode 100644 servers/1.0.1/resources/kafka.properties delete mode 100644 servers/1.0.2/resources/kafka.properties delete mode 100644 servers/2.0.0/resources/kafka.properties delete mode 100644 servers/2.0.1/resources/kafka.properties delete mode 100644 servers/2.1.0/resources/kafka.properties delete mode 100644 servers/2.1.0/resources/log4j.properties delete mode 100644 servers/2.1.0/resources/zookeeper.properties delete mode 100644 servers/2.1.1/resources/kafka.properties delete mode 100644 servers/2.1.1/resources/log4j.properties delete mode 100644 servers/2.1.1/resources/zookeeper.properties delete mode 100644 servers/2.2.0/resources/kafka.properties delete mode 100644 servers/2.2.0/resources/log4j.properties delete mode 100644 servers/2.2.0/resources/zookeeper.properties delete mode 100644 servers/2.2.1/resources/kafka.properties delete mode 100644 servers/2.2.1/resources/log4j.properties delete mode 100644 servers/2.2.1/resources/zookeeper.properties delete mode 100644 servers/2.3.0/resources/kafka.properties delete mode 100644 servers/2.3.0/resources/log4j.properties delete mode 100644 servers/2.3.0/resources/zookeeper.properties diff --git a/servers/0.11.0.0/resources/kafka.properties b/servers/0.11.0.0/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/0.11.0.0/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.1/resources/kafka.properties b/servers/0.11.0.1/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/0.11.0.1/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.2/resources/kafka.properties b/servers/0.11.0.2/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/0.11.0.2/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.3/resources/kafka.properties b/servers/0.11.0.3/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/0.11.0.3/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.3/resources/log4j.properties b/servers/0.11.0.3/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.11.0.3/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.11.0.3/resources/zookeeper.properties b/servers/0.11.0.3/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.11.0.3/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/1.0.0/resources/kafka.properties b/servers/1.0.0/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/1.0.0/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/1.0.1/resources/kafka.properties b/servers/1.0.1/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/1.0.1/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/1.0.2/resources/kafka.properties b/servers/1.0.2/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/1.0.2/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/2.0.0/resources/kafka.properties b/servers/2.0.0/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/2.0.0/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/2.0.1/resources/kafka.properties b/servers/2.0.1/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/2.0.1/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/2.1.0/resources/kafka.properties b/servers/2.1.0/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/2.1.0/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/2.1.0/resources/log4j.properties b/servers/2.1.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/2.1.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/2.1.0/resources/zookeeper.properties b/servers/2.1.0/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/2.1.0/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/2.1.1/resources/kafka.properties b/servers/2.1.1/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/2.1.1/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/2.1.1/resources/log4j.properties b/servers/2.1.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/2.1.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/2.1.1/resources/zookeeper.properties b/servers/2.1.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/2.1.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/2.2.0/resources/kafka.properties b/servers/2.2.0/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/2.2.0/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/2.2.0/resources/log4j.properties b/servers/2.2.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/2.2.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/2.2.0/resources/zookeeper.properties b/servers/2.2.0/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/2.2.0/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/2.2.1/resources/kafka.properties b/servers/2.2.1/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/2.2.1/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/2.2.1/resources/log4j.properties b/servers/2.2.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/2.2.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/2.2.1/resources/zookeeper.properties b/servers/2.2.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/2.2.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/2.3.0/resources/kafka.properties b/servers/2.3.0/resources/kafka.properties deleted file mode 100644 index 630dbc5fa..000000000 --- a/servers/2.3.0/resources/kafka.properties +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer -allow.everyone.if.no.acl.found=true - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/2.3.0/resources/log4j.properties b/servers/2.3.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/2.3.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/2.3.0/resources/zookeeper.properties b/servers/2.3.0/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/2.3.0/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 From c24a5e26339be997c59f183335a04c318e1e1425 Mon Sep 17 00:00:00 2001 From: Lennart Rudolph Date: Fri, 1 Nov 2019 16:14:08 -0700 Subject: [PATCH 274/291] KAFKA-24599: revert deletion of test_kafka_consumer_offsets_for_time_old since it won't get run for our version anyway; increase timeout on assert_message_count to avoid test failures --- test/test_consumer_integration.py | 9 +++++++++ test/test_failover_integration.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/test/test_consumer_integration.py b/test/test_consumer_integration.py index 2319d94eb..fbd6235fb 100644 --- a/test/test_consumer_integration.py +++ b/test/test_consumer_integration.py @@ -758,6 +758,15 @@ def test_kafka_consumer_offsets_search_many_partitions(kafka_consumer, kafka_pro } +@pytest.mark.skipif(env_kafka_version() >= (0, 10, 1), reason="Requires KAFKA_VERSION < 0.10.1") +def test_kafka_consumer_offsets_for_time_old(kafka_consumer, topic): + consumer = kafka_consumer + tp = TopicPartition(topic, 0) + + with pytest.raises(UnsupportedVersionError): + consumer.offsets_for_times({tp: int(time.time())}) + + @pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") def test_kafka_consumer_offsets_for_times_errors(kafka_consumer_factory, topic): consumer = kafka_consumer_factory(fetch_max_wait_ms=200, diff --git a/test/test_failover_integration.py b/test/test_failover_integration.py index ad7dcb98b..314288dc1 100644 --- a/test/test_failover_integration.py +++ b/test/test_failover_integration.py @@ -208,7 +208,7 @@ def _kill_leader(self, topic, partition): broker.close() return broker - def assert_message_count(self, topic, check_count, timeout=10, + def assert_message_count(self, topic, check_count, timeout=25, partitions=None, at_least=False): hosts = ','.join(['%s:%d' % (broker.host, broker.port) for broker in self.brokers]) From 595417fe5a7cfe872298c957858d1c28b36d2bb9 Mon Sep 17 00:00:00 2001 From: Lennart Rudolph Date: Fri, 1 Nov 2019 16:20:09 -0700 Subject: [PATCH 275/291] KAFKA-24599: update list of all builds in build_integration shell script --- build_integration.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_integration.sh b/build_integration.sh index 44ee481fb..c9b3ea075 100755 --- a/build_integration.sh +++ b/build_integration.sh @@ -1,6 +1,6 @@ #!/bin/bash -: ${ALL_RELEASES:="0.8.2.2 0.9.0.1 0.10.1.1 0.10.2.2 0.11.0.3 1.0.2 1.1.0 1.1.1 2.0.1"} +: ${ALL_RELEASES:="1.1.0"} : ${SCALA_VERSION:=2.11} : ${DIST_BASE_URL:=https://archive.apache.org/dist/kafka/} : ${KAFKA_SRC_GIT:=https://github.com/apache/kafka.git} From 47adeb4cc6748180f4d1c6e228f6c768f1c74b49 Mon Sep 17 00:00:00 2001 From: Lennart Rudolph Date: Fri, 1 Nov 2019 17:05:36 -0700 Subject: [PATCH 276/291] KAFKA-24599: don't execute tests for python 3.4 (deprecated) --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 3ec0726c7..b934cec8b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,6 @@ dist: xenial python: - 2.7 - - 3.4 - 3.5 - 3.6 - pypy2.7-6.0 From ea2975069cef3496981ff05eda47b463cee46d44 Mon Sep 17 00:00:00 2001 From: Lennart Rudolph Date: Mon, 4 Nov 2019 13:32:32 -0800 Subject: [PATCH 277/291] KAFKA-24599: bump version to 1.4.7.post1 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 1be3a88de..52762af2e 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.7' +__version__ = '1.4.7.post1' From cccc968335ae0d28e5d2f932f516d43e64c7ade8 Mon Sep 17 00:00:00 2001 From: Brian Sang Date: Fri, 3 Apr 2020 13:41:20 -0700 Subject: [PATCH 278/291] Upper bound pin on setuptools since we're still py2 --- requirements-dev.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index c39294d4f..c46b48691 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -10,9 +10,10 @@ tox==3.1.2 pylint==1.8.2 pytest-pylint==0.11.0 pytest-mock==1.10.0 +setuptools<45.0.0 sphinx-rtd-theme==0.2.4 mock==2.0.0 decorator==4.3.0 tox-pip-extensions==1.2.1 crc32c==1.7 -py==1.8.0 \ No newline at end of file +py==1.8.0 From 9d055a820001566da47ea866414f1013594e047c Mon Sep 17 00:00:00 2001 From: Landon Sterk Date: Thu, 14 May 2020 18:25:30 -0700 Subject: [PATCH 279/291] Pull in upstream updates to the kafka protocol --- kafka/protocol/admin.py | 461 +++++++++++++++++++++++++++++--------- kafka/protocol/api.py | 32 ++- kafka/protocol/fetch.py | 182 ++++++++++++++- kafka/protocol/offset.py | 89 +++++++- kafka/protocol/produce.py | 78 ++++++- kafka/protocol/struct.py | 14 +- 6 files changed, 730 insertions(+), 126 deletions(-) diff --git a/kafka/protocol/admin.py b/kafka/protocol/admin.py index 1e1c0ad6a..af88ea473 100644 --- a/kafka/protocol/admin.py +++ b/kafka/protocol/admin.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from kafka.protocol.api import Request, Response -from kafka.protocol.types import Array, Boolean, Bytes, Int8, Int16, Int32, Schema, String +from kafka.protocol.types import Array, Boolean, Bytes, Int8, Int16, Int32, Int64, Schema, String class ApiVersionResponse_v0(Response): @@ -29,6 +29,12 @@ class ApiVersionResponse_v1(Response): ) +class ApiVersionResponse_v2(Response): + API_KEY = 18 + API_VERSION = 2 + SCHEMA = ApiVersionResponse_v1.SCHEMA + + class ApiVersionRequest_v0(Request): API_KEY = 18 API_VERSION = 0 @@ -43,8 +49,19 @@ class ApiVersionRequest_v1(Request): SCHEMA = ApiVersionRequest_v0.SCHEMA -ApiVersionRequest = [ApiVersionRequest_v0, ApiVersionRequest_v1] -ApiVersionResponse = [ApiVersionResponse_v0, ApiVersionResponse_v1] +class ApiVersionRequest_v2(Request): + API_KEY = 18 + API_VERSION = 2 + RESPONSE_TYPE = ApiVersionResponse_v1 + SCHEMA = ApiVersionRequest_v0.SCHEMA + + +ApiVersionRequest = [ + ApiVersionRequest_v0, ApiVersionRequest_v1, ApiVersionRequest_v2, +] +ApiVersionResponse = [ + ApiVersionResponse_v0, ApiVersionResponse_v1, ApiVersionResponse_v2, +] class CreateTopicsResponse_v0(Response): @@ -79,6 +96,11 @@ class CreateTopicsResponse_v2(Response): ('error_message', String('utf-8')))) ) +class CreateTopicsResponse_v3(Response): + API_KEY = 19 + API_VERSION = 3 + SCHEMA = CreateTopicsResponse_v2.SCHEMA + class CreateTopicsRequest_v0(Request): API_KEY = 19 @@ -126,11 +148,20 @@ class CreateTopicsRequest_v2(Request): SCHEMA = CreateTopicsRequest_v1.SCHEMA +class CreateTopicsRequest_v3(Request): + API_KEY = 19 + API_VERSION = 3 + RESPONSE_TYPE = CreateTopicsResponse_v3 + SCHEMA = CreateTopicsRequest_v1.SCHEMA + + CreateTopicsRequest = [ - CreateTopicsRequest_v0, CreateTopicsRequest_v1, CreateTopicsRequest_v2 + CreateTopicsRequest_v0, CreateTopicsRequest_v1, + CreateTopicsRequest_v2, CreateTopicsRequest_v3, ] CreateTopicsResponse = [ - CreateTopicsResponse_v0, CreateTopicsResponse_v1, CreateTopicsResponse_v2 + CreateTopicsResponse_v0, CreateTopicsResponse_v1, + CreateTopicsResponse_v2, CreateTopicsResponse_v3, ] @@ -155,6 +186,18 @@ class DeleteTopicsResponse_v1(Response): ) +class DeleteTopicsResponse_v2(Response): + API_KEY = 20 + API_VERSION = 2 + SCHEMA = DeleteTopicsResponse_v1.SCHEMA + + +class DeleteTopicsResponse_v3(Response): + API_KEY = 20 + API_VERSION = 3 + SCHEMA = DeleteTopicsResponse_v1.SCHEMA + + class DeleteTopicsRequest_v0(Request): API_KEY = 20 API_VERSION = 0 @@ -172,8 +215,28 @@ class DeleteTopicsRequest_v1(Request): SCHEMA = DeleteTopicsRequest_v0.SCHEMA -DeleteTopicsRequest = [DeleteTopicsRequest_v0, DeleteTopicsRequest_v1] -DeleteTopicsResponse = [DeleteTopicsResponse_v0, DeleteTopicsResponse_v1] +class DeleteTopicsRequest_v2(Request): + API_KEY = 20 + API_VERSION = 2 + RESPONSE_TYPE = DeleteTopicsResponse_v2 + SCHEMA = DeleteTopicsRequest_v0.SCHEMA + + +class DeleteTopicsRequest_v3(Request): + API_KEY = 20 + API_VERSION = 3 + RESPONSE_TYPE = DeleteTopicsResponse_v3 + SCHEMA = DeleteTopicsRequest_v0.SCHEMA + + +DeleteTopicsRequest = [ + DeleteTopicsRequest_v0, DeleteTopicsRequest_v1, + DeleteTopicsRequest_v2, DeleteTopicsRequest_v3, +] +DeleteTopicsResponse = [ + DeleteTopicsResponse_v0, DeleteTopicsResponse_v1, + DeleteTopicsResponse_v2, DeleteTopicsResponse_v3, +] class ListGroupsResponse_v0(Response): @@ -198,6 +261,11 @@ class ListGroupsResponse_v1(Response): ('protocol_type', String('utf-8')))) ) +class ListGroupsResponse_v2(Response): + API_KEY = 16 + API_VERSION = 2 + SCHEMA = ListGroupsResponse_v1.SCHEMA + class ListGroupsRequest_v0(Request): API_KEY = 16 @@ -212,9 +280,21 @@ class ListGroupsRequest_v1(Request): RESPONSE_TYPE = ListGroupsResponse_v1 SCHEMA = ListGroupsRequest_v0.SCHEMA +class ListGroupsRequest_v2(Request): + API_KEY = 16 + API_VERSION = 1 + RESPONSE_TYPE = ListGroupsResponse_v2 + SCHEMA = ListGroupsRequest_v0.SCHEMA -ListGroupsRequest = [ListGroupsRequest_v0, ListGroupsRequest_v1] -ListGroupsResponse = [ListGroupsResponse_v0, ListGroupsResponse_v1] + +ListGroupsRequest = [ + ListGroupsRequest_v0, ListGroupsRequest_v1, + ListGroupsRequest_v2, +] +ListGroupsResponse = [ + ListGroupsResponse_v0, ListGroupsResponse_v1, + ListGroupsResponse_v2, +] class DescribeGroupsResponse_v0(Response): @@ -256,6 +336,33 @@ class DescribeGroupsResponse_v1(Response): ) +class DescribeGroupsResponse_v2(Response): + API_KEY = 15 + API_VERSION = 2 + SCHEMA = DescribeGroupsResponse_v1.SCHEMA + + +class DescribeGroupsResponse_v3(Response): + API_KEY = 15 + API_VERSION = 3 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('groups', Array( + ('error_code', Int16), + ('group', String('utf-8')), + ('state', String('utf-8')), + ('protocol_type', String('utf-8')), + ('protocol', String('utf-8')), + ('members', Array( + ('member_id', String('utf-8')), + ('client_id', String('utf-8')), + ('client_host', String('utf-8')), + ('member_metadata', Bytes), + ('member_assignment', Bytes)))), + ('authorized_operations', Int32)) + ) + + class DescribeGroupsRequest_v0(Request): API_KEY = 15 API_VERSION = 0 @@ -272,8 +379,31 @@ class DescribeGroupsRequest_v1(Request): SCHEMA = DescribeGroupsRequest_v0.SCHEMA -DescribeGroupsRequest = [DescribeGroupsRequest_v0, DescribeGroupsRequest_v1] -DescribeGroupsResponse = [DescribeGroupsResponse_v0, DescribeGroupsResponse_v1] +class DescribeGroupsRequest_v2(Request): + API_KEY = 15 + API_VERSION = 2 + RESPONSE_TYPE = DescribeGroupsResponse_v2 + SCHEMA = DescribeGroupsRequest_v0.SCHEMA + + +class DescribeGroupsRequest_v3(Request): + API_KEY = 15 + API_VERSION = 3 + RESPONSE_TYPE = DescribeGroupsResponse_v2 + SCHEMA = Schema( + ('groups', Array(String('utf-8'))), + ('include_authorized_operations', Boolean) + ) + + +DescribeGroupsRequest = [ + DescribeGroupsRequest_v0, DescribeGroupsRequest_v1, + DescribeGroupsRequest_v2, DescribeGroupsRequest_v3, +] +DescribeGroupsResponse = [ + DescribeGroupsResponse_v0, DescribeGroupsResponse_v1, + DescribeGroupsResponse_v2, DescribeGroupsResponse_v3, +] class SaslHandShakeResponse_v0(Response): @@ -310,101 +440,6 @@ class SaslHandShakeRequest_v1(Request): SaslHandShakeRequest = [SaslHandShakeRequest_v0, SaslHandShakeRequest_v1] SaslHandShakeResponse = [SaslHandShakeResponse_v0, SaslHandShakeResponse_v1] -class AlterConfigsResponse_v0(Response): - API_KEY = 33 - API_VERSION = 0 - SCHEMA = Schema( - ('throttle_time_ms', Int32), - ('resources', Array( - ('error_code', Int16), - ('error_message', String('utf-8')), - ('resource_type', Int8), - ('resource_name', String('utf-8')))) - ) - -class AlterConfigsRequest_v0(Request): - API_KEY = 33 - API_VERSION = 0 - RESPONSE_TYPE = AlterConfigsResponse_v0 - SCHEMA = Schema( - ('resources', Array( - ('resource_type', Int8), - ('resource_name', String('utf-8')), - ('config_entries', Array( - ('config_name', String('utf-8')), - ('config_value', String('utf-8')))))), - ('validate_only', Boolean) - ) - -AlterConfigsRequest = [AlterConfigsRequest_v0] -AlterConfigsResponse = [AlterConfigsResponse_v0] - - -class DescribeConfigsResponse_v0(Response): - API_KEY = 32 - API_VERSION = 0 - SCHEMA = Schema( - ('throttle_time_ms', Int32), - ('resources', Array( - ('error_code', Int16), - ('error_message', String('utf-8')), - ('resource_type', Int8), - ('resource_name', String('utf-8')), - ('config_entries', Array( - ('config_names', String('utf-8')), - ('config_value', String('utf-8')), - ('read_only', Boolean), - ('is_default', Boolean), - ('is_sensitive', Boolean))))) - ) - -class DescribeConfigsResponse_v1(Response): - API_KEY = 32 - API_VERSION = 1 - SCHEMA = Schema( - ('throttle_time_ms', Int32), - ('resources', Array( - ('error_code', Int16), - ('error_message', String('utf-8')), - ('resource_type', Int8), - ('resource_name', String('utf-8')), - ('config_entries', Array( - ('config_names', String('utf-8')), - ('config_value', String('utf-8')), - ('read_only', Boolean), - ('is_default', Boolean), - ('is_sensitive', Boolean), - ('config_synonyms', Array( - ('config_name', String('utf-8')), - ('config_value', String('utf-8')), - ('config_source', Int8))))))) - ) - -class DescribeConfigsRequest_v0(Request): - API_KEY = 32 - API_VERSION = 0 - RESPONSE_TYPE = DescribeConfigsResponse_v0 - SCHEMA = Schema( - ('resources', Array( - ('resource_type', Int8), - ('resource_name', String('utf-8')), - ('config_names', Array(String('utf-8'))))) - ) - -class DescribeConfigsRequest_v1(Request): - API_KEY = 32 - API_VERSION = 1 - RESPONSE_TYPE = DescribeConfigsResponse_v1 - SCHEMA = Schema( - ('resources', Array( - ('resource_type', Int8), - ('resource_name', String('utf-8')), - ('config_names', Array(String('utf-8'))))), - ('include_synonyms', Boolean) - ) - -DescribeConfigsRequest = [DescribeConfigsRequest_v0, DescribeConfigsRequest_v1] -DescribeConfigsResponse = [DescribeConfigsResponse_v0, DescribeConfigsResponse_v1] class DescribeAclsResponse_v0(Response): API_KEY = 29 @@ -442,6 +477,13 @@ class DescribeAclsResponse_v1(Response): ('permission_type', Int8))))) ) + +class DescribeAclsResponse_v2(Response): + API_KEY = 29 + API_VERSION = 2 + SCHEMA = DescribeAclsResponse_v1.SCHEMA + + class DescribeAclsRequest_v0(Request): API_KEY = 29 API_VERSION = 0 @@ -455,6 +497,7 @@ class DescribeAclsRequest_v0(Request): ('permission_type', Int8) ) + class DescribeAclsRequest_v1(Request): API_KEY = 29 API_VERSION = 1 @@ -469,6 +512,17 @@ class DescribeAclsRequest_v1(Request): ('permission_type', Int8) ) + +class DescribeAclsRequest_v2(Request): + """ + Enable flexible version + """ + API_KEY = 29 + API_VERSION = 2 + RESPONSE_TYPE = DescribeAclsResponse_v2 + SCHEMA = DescribeAclsRequest_v1.SCHEMA + + DescribeAclsRequest = [DescribeAclsRequest_v0, DescribeAclsRequest_v1] DescribeAclsResponse = [DescribeAclsResponse_v0, DescribeAclsResponse_v1] @@ -590,8 +644,153 @@ class DeleteAclsRequest_v1(Request): DeleteAclsRequest = [DeleteAclsRequest_v0, DeleteAclsRequest_v1] DeleteAclsResponse = [DeleteAclsResponse_v0, DeleteAclsResponse_v1] +class AlterConfigsResponse_v0(Response): + API_KEY = 33 + API_VERSION = 0 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('resources', Array( + ('error_code', Int16), + ('error_message', String('utf-8')), + ('resource_type', Int8), + ('resource_name', String('utf-8')))) + ) + + +class AlterConfigsResponse_v1(Response): + API_KEY = 33 + API_VERSION = 1 + SCHEMA = AlterConfigsResponse_v0.SCHEMA + + +class AlterConfigsRequest_v0(Request): + API_KEY = 33 + API_VERSION = 0 + RESPONSE_TYPE = AlterConfigsResponse_v0 + SCHEMA = Schema( + ('resources', Array( + ('resource_type', Int8), + ('resource_name', String('utf-8')), + ('config_entries', Array( + ('config_name', String('utf-8')), + ('config_value', String('utf-8')))))), + ('validate_only', Boolean) + ) + +class AlterConfigsRequest_v1(Request): + API_KEY = 33 + API_VERSION = 1 + RESPONSE_TYPE = AlterConfigsResponse_v1 + SCHEMA = AlterConfigsRequest_v0.SCHEMA + +AlterConfigsRequest = [AlterConfigsRequest_v0, AlterConfigsRequest_v1] +AlterConfigsResponse = [AlterConfigsResponse_v0, AlterConfigsRequest_v1] + + +class DescribeConfigsResponse_v0(Response): + API_KEY = 32 + API_VERSION = 0 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('resources', Array( + ('error_code', Int16), + ('error_message', String('utf-8')), + ('resource_type', Int8), + ('resource_name', String('utf-8')), + ('config_entries', Array( + ('config_names', String('utf-8')), + ('config_value', String('utf-8')), + ('read_only', Boolean), + ('is_default', Boolean), + ('is_sensitive', Boolean))))) + ) + +class DescribeConfigsResponse_v1(Response): + API_KEY = 32 + API_VERSION = 1 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('resources', Array( + ('error_code', Int16), + ('error_message', String('utf-8')), + ('resource_type', Int8), + ('resource_name', String('utf-8')), + ('config_entries', Array( + ('config_names', String('utf-8')), + ('config_value', String('utf-8')), + ('read_only', Boolean), + ('is_default', Boolean), + ('is_sensitive', Boolean), + ('config_synonyms', Array( + ('config_name', String('utf-8')), + ('config_value', String('utf-8')), + ('config_source', Int8))))))) + ) + +class DescribeConfigsResponse_v2(Response): + API_KEY = 32 + API_VERSION = 2 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('resources', Array( + ('error_code', Int16), + ('error_message', String('utf-8')), + ('resource_type', Int8), + ('resource_name', String('utf-8')), + ('config_entries', Array( + ('config_names', String('utf-8')), + ('config_value', String('utf-8')), + ('read_only', Boolean), + ('config_source', Int8), + ('is_sensitive', Boolean), + ('config_synonyms', Array( + ('config_name', String('utf-8')), + ('config_value', String('utf-8')), + ('config_source', Int8))))))) + ) + +class DescribeConfigsRequest_v0(Request): + API_KEY = 32 + API_VERSION = 0 + RESPONSE_TYPE = DescribeConfigsResponse_v0 + SCHEMA = Schema( + ('resources', Array( + ('resource_type', Int8), + ('resource_name', String('utf-8')), + ('config_names', Array(String('utf-8'))))) + ) -class SaslAuthenticateResponse_v0(Request): +class DescribeConfigsRequest_v1(Request): + API_KEY = 32 + API_VERSION = 1 + RESPONSE_TYPE = DescribeConfigsResponse_v1 + SCHEMA = Schema( + ('resources', Array( + ('resource_type', Int8), + ('resource_name', String('utf-8')), + ('config_names', Array(String('utf-8'))))), + ('include_synonyms', Boolean) + ) + + +class DescribeConfigsRequest_v2(Request): + API_KEY = 32 + API_VERSION = 2 + RESPONSE_TYPE = DescribeConfigsResponse_v2 + SCHEMA = DescribeConfigsRequest_v1.SCHEMA + + +DescribeConfigsRequest = [ + DescribeConfigsRequest_v0, DescribeConfigsRequest_v1, + DescribeConfigsRequest_v2, +] +DescribeConfigsResponse = [ + DescribeConfigsResponse_v0, DescribeConfigsResponse_v1, + DescribeConfigsResponse_v2, +] + + +class SaslAuthenticateResponse_v0(Response): API_KEY = 36 API_VERSION = 0 SCHEMA = Schema( @@ -601,6 +800,17 @@ class SaslAuthenticateResponse_v0(Request): ) +class SaslAuthenticateResponse_v1(Response): + API_KEY = 36 + API_VERSION = 1 + SCHEMA = Schema( + ('error_code', Int16), + ('error_message', String('utf-8')), + ('sasl_auth_bytes', Bytes), + ('session_lifetime_ms', Int64) + ) + + class SaslAuthenticateRequest_v0(Request): API_KEY = 36 API_VERSION = 0 @@ -610,8 +820,19 @@ class SaslAuthenticateRequest_v0(Request): ) -SaslAuthenticateRequest = [SaslAuthenticateRequest_v0] -SaslAuthenticateResponse = [SaslAuthenticateResponse_v0] +class SaslAuthenticateRequest_v1(Request): + API_KEY = 36 + API_VERSION = 1 + RESPONSE_TYPE = SaslAuthenticateResponse_v1 + SCHEMA = SaslAuthenticateRequest_v0.SCHEMA + + +SaslAuthenticateRequest = [ + SaslAuthenticateRequest_v0, SaslAuthenticateRequest_v1, +] +SaslAuthenticateResponse = [ + SaslAuthenticateResponse_v0, SaslAuthenticateResponse_v1, +] class CreatePartitionsResponse_v0(Response): @@ -626,6 +847,12 @@ class CreatePartitionsResponse_v0(Response): ) +class CreatePartitionsResponse_v1(Response): + API_KEY = 37 + API_VERSION = 1 + SCHEMA = CreatePartitionsResponse_v0.SCHEMA + + class CreatePartitionsRequest_v0(Request): API_KEY = 37 API_VERSION = 0 @@ -641,5 +868,17 @@ class CreatePartitionsRequest_v0(Request): ) -CreatePartitionsRequest = [CreatePartitionsRequest_v0] -CreatePartitionsResponse = [CreatePartitionsResponse_v0] +class CreatePartitionsRequest_v1(Request): + API_KEY = 37 + API_VERSION = 1 + SCHEMA = CreatePartitionsRequest_v0.SCHEMA + RESPONSE_TYPE = CreatePartitionsResponse_v1 + + +CreatePartitionsRequest = [ + CreatePartitionsRequest_v0, CreatePartitionsRequest_v1, +] +CreatePartitionsResponse = [ + CreatePartitionsResponse_v0, CreatePartitionsResponse_v1, +] + diff --git a/kafka/protocol/api.py b/kafka/protocol/api.py index efaf63ea2..64276fc17 100644 --- a/kafka/protocol/api.py +++ b/kafka/protocol/api.py @@ -3,7 +3,7 @@ import abc from kafka.protocol.struct import Struct -from kafka.protocol.types import Int16, Int32, String, Schema +from kafka.protocol.types import Int16, Int32, String, Schema, Array class RequestHeader(Struct): @@ -47,6 +47,9 @@ def expect_response(self): """Override this method if an api request does not always generate a response""" return True + def to_object(self): + return _to_object(self.SCHEMA, self) + class Response(Struct): __metaclass__ = abc.ABCMeta @@ -65,3 +68,30 @@ def API_VERSION(self): def SCHEMA(self): """An instance of Schema() representing the response structure""" pass + + def to_object(self): + return _to_object(self.SCHEMA, self) + + +def _to_object(schema, data): + obj = {} + for idx, (name, _type) in enumerate(zip(schema.names, schema.fields)): + if isinstance(data, Struct): + val = data.get_item(name) + else: + val = data[idx] + + if isinstance(_type, Schema): + obj[name] = _to_object(_type, val) + elif isinstance(_type, Array): + if isinstance(_type.array_of, (Array, Schema)): + obj[name] = [ + _to_object(_type.array_of, x) + for x in val + ] + else: + obj[name] = val + else: + obj[name] = val + + return obj diff --git a/kafka/protocol/fetch.py b/kafka/protocol/fetch.py index dd3f648cf..f367848ce 100644 --- a/kafka/protocol/fetch.py +++ b/kafka/protocol/fetch.py @@ -94,6 +94,72 @@ class FetchResponse_v6(Response): SCHEMA = FetchResponse_v5.SCHEMA +class FetchResponse_v7(Response): + """ + Add error_code and session_id to response + """ + API_KEY = 1 + API_VERSION = 7 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('error_code', Int16), + ('session_id', Int32), + ('topics', Array( + ('topics', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('error_code', Int16), + ('highwater_offset', Int64), + ('last_stable_offset', Int64), + ('log_start_offset', Int64), + ('aborted_transactions', Array( + ('producer_id', Int64), + ('first_offset', Int64))), + ('message_set', Bytes))))) + ) + + +class FetchResponse_v8(Response): + API_KEY = 1 + API_VERSION = 8 + SCHEMA = FetchResponse_v7.SCHEMA + + +class FetchResponse_v9(Response): + API_KEY = 1 + API_VERSION = 9 + SCHEMA = FetchResponse_v7.SCHEMA + + +class FetchResponse_v10(Response): + API_KEY = 1 + API_VERSION = 10 + SCHEMA = FetchResponse_v7.SCHEMA + + +class FetchResponse_v11(Response): + API_KEY = 1 + API_VERSION = 11 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('error_code', Int16), + ('session_id', Int32), + ('topics', Array( + ('topics', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('error_code', Int16), + ('highwater_offset', Int64), + ('last_stable_offset', Int64), + ('log_start_offset', Int64), + ('aborted_transactions', Array( + ('producer_id', Int64), + ('first_offset', Int64))), + ('preferred_read_replica', Int32), + ('message_set', Bytes))))) + ) + + class FetchRequest_v0(Request): API_KEY = 1 API_VERSION = 0 @@ -196,13 +262,125 @@ class FetchRequest_v6(Request): SCHEMA = FetchRequest_v5.SCHEMA +class FetchRequest_v7(Request): + """ + Add incremental fetch requests + """ + API_KEY = 1 + API_VERSION = 7 + RESPONSE_TYPE = FetchResponse_v7 + SCHEMA = Schema( + ('replica_id', Int32), + ('max_wait_time', Int32), + ('min_bytes', Int32), + ('max_bytes', Int32), + ('isolation_level', Int8), + ('session_id', Int32), + ('session_epoch', Int32), + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('fetch_offset', Int64), + ('log_start_offset', Int64), + ('max_bytes', Int32))))), + ('forgotten_topics_data', Array( + ('topic', String), + ('partitions', Array(Int32)) + )), + ) + + +class FetchRequest_v8(Request): + """ + bump used to indicate that on quota violation brokers send out responses before throttling. + """ + API_KEY = 1 + API_VERSION = 8 + RESPONSE_TYPE = FetchResponse_v8 + SCHEMA = FetchRequest_v7.SCHEMA + + +class FetchRequest_v9(Request): + """ + adds the current leader epoch (see KIP-320) + """ + API_KEY = 1 + API_VERSION = 9 + RESPONSE_TYPE = FetchResponse_v9 + SCHEMA = Schema( + ('replica_id', Int32), + ('max_wait_time', Int32), + ('min_bytes', Int32), + ('max_bytes', Int32), + ('isolation_level', Int8), + ('session_id', Int32), + ('session_epoch', Int32), + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('current_leader_epoch', Int32), + ('fetch_offset', Int64), + ('log_start_offset', Int64), + ('max_bytes', Int32))))), + ('forgotten_topics_data', Array( + ('topic', String), + ('partitions', Array(Int32)), + )), + ) + + +class FetchRequest_v10(Request): + """ + bumped up to indicate ZStandard capability. (see KIP-110) + """ + API_KEY = 1 + API_VERSION = 10 + RESPONSE_TYPE = FetchResponse_v10 + SCHEMA = FetchRequest_v9.SCHEMA + + +class FetchRequest_v11(Request): + """ + added rack ID to support read from followers (KIP-392) + """ + API_KEY = 1 + API_VERSION = 11 + RESPONSE_TYPE = FetchResponse_v11 + SCHEMA = Schema( + ('replica_id', Int32), + ('max_wait_time', Int32), + ('min_bytes', Int32), + ('max_bytes', Int32), + ('isolation_level', Int8), + ('session_id', Int32), + ('session_epoch', Int32), + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('current_leader_epoch', Int32), + ('fetch_offset', Int64), + ('log_start_offset', Int64), + ('max_bytes', Int32))))), + ('forgotten_topics_data', Array( + ('topic', String), + ('partitions', Array(Int32)) + )), + ('rack_id', String('utf-8')), + ) + + FetchRequest = [ FetchRequest_v0, FetchRequest_v1, FetchRequest_v2, FetchRequest_v3, FetchRequest_v4, FetchRequest_v5, - FetchRequest_v6 + FetchRequest_v6, FetchRequest_v7, FetchRequest_v8, + FetchRequest_v9, FetchRequest_v10, FetchRequest_v11, ] FetchResponse = [ FetchResponse_v0, FetchResponse_v1, FetchResponse_v2, FetchResponse_v3, FetchResponse_v4, FetchResponse_v5, - FetchResponse_v6 + FetchResponse_v6, FetchResponse_v7, FetchResponse_v8, + FetchResponse_v9, FetchResponse_v10, FetchResponse_v11, ] diff --git a/kafka/protocol/offset.py b/kafka/protocol/offset.py index 3c254de40..1ed382b0d 100644 --- a/kafka/protocol/offset.py +++ b/kafka/protocol/offset.py @@ -53,6 +53,43 @@ class OffsetResponse_v2(Response): ) +class OffsetResponse_v3(Response): + """ + on quota violation, brokers send out responses before throttling + """ + API_KEY = 2 + API_VERSION = 3 + SCHEMA = OffsetResponse_v2.SCHEMA + + +class OffsetResponse_v4(Response): + """ + Add leader_epoch to response + """ + API_KEY = 2 + API_VERSION = 4 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('error_code', Int16), + ('timestamp', Int64), + ('offset', Int64), + ('leader_epoch', Int32))))) + ) + + +class OffsetResponse_v5(Response): + """ + adds a new error code, OFFSET_NOT_AVAILABLE + """ + API_KEY = 2 + API_VERSION = 5 + SCHEMA = OffsetResponse_v4.SCHEMA + + class OffsetRequest_v0(Request): API_KEY = 2 API_VERSION = 0 @@ -105,5 +142,53 @@ class OffsetRequest_v2(Request): } -OffsetRequest = [OffsetRequest_v0, OffsetRequest_v1, OffsetRequest_v2] -OffsetResponse = [OffsetResponse_v0, OffsetResponse_v1, OffsetResponse_v2] +class OffsetRequest_v3(Request): + API_KEY = 2 + API_VERSION = 3 + RESPONSE_TYPE = OffsetResponse_v3 + SCHEMA = OffsetRequest_v2.SCHEMA + DEFAULTS = { + 'replica_id': -1 + } + + +class OffsetRequest_v4(Request): + """ + Add current_leader_epoch to request + """ + API_KEY = 2 + API_VERSION = 4 + RESPONSE_TYPE = OffsetResponse_v4 + SCHEMA = Schema( + ('replica_id', Int32), + ('isolation_level', Int8), # <- added isolation_level + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('current_leader_epoch', Int64), + ('timestamp', Int64))))) + ) + DEFAULTS = { + 'replica_id': -1 + } + + +class OffsetRequest_v5(Request): + API_KEY = 2 + API_VERSION = 5 + RESPONSE_TYPE = OffsetResponse_v5 + SCHEMA = OffsetRequest_v4.SCHEMA + DEFAULTS = { + 'replica_id': -1 + } + + +OffsetRequest = [ + OffsetRequest_v0, OffsetRequest_v1, OffsetRequest_v2, + OffsetRequest_v3, OffsetRequest_v4, OffsetRequest_v5, +] +OffsetResponse = [ + OffsetResponse_v0, OffsetResponse_v1, OffsetResponse_v2, + OffsetResponse_v3, OffsetResponse_v4, OffsetResponse_v5, +] diff --git a/kafka/protocol/produce.py b/kafka/protocol/produce.py index f2df1f51a..9b3f6bf55 100644 --- a/kafka/protocol/produce.py +++ b/kafka/protocol/produce.py @@ -78,6 +78,50 @@ class ProduceResponse_v5(Response): ) +class ProduceResponse_v6(Response): + """ + The version number is bumped to indicate that on quota violation brokers send out responses before throttling. + """ + API_KEY = 0 + API_VERSION = 6 + SCHEMA = ProduceResponse_v5.SCHEMA + + +class ProduceResponse_v7(Response): + """ + V7 bumped up to indicate ZStandard capability. (see KIP-110) + """ + API_KEY = 0 + API_VERSION = 7 + SCHEMA = ProduceResponse_v6.SCHEMA + + +class ProduceResponse_v8(Response): + """ + V8 bumped up to add two new fields record_errors offset list and error_message + (See KIP-467) + """ + API_KEY = 0 + API_VERSION = 8 + SCHEMA = Schema( + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('error_code', Int16), + ('offset', Int64), + ('timestamp', Int64), + ('log_start_offset', Int64)), + ('record_errors', (Array( + ('batch_index', Int32), + ('batch_index_error_message', String('utf-8')) + ))), + ('error_message', String('utf-8')) + ))), + ('throttle_time_ms', Int32) + ) + + class ProduceRequest(Request): API_KEY = 0 @@ -148,11 +192,41 @@ class ProduceRequest_v5(ProduceRequest): SCHEMA = ProduceRequest_v4.SCHEMA +class ProduceRequest_v6(ProduceRequest): + """ + The version number is bumped to indicate that on quota violation brokers send out responses before throttling. + """ + API_VERSION = 6 + RESPONSE_TYPE = ProduceResponse_v6 + SCHEMA = ProduceRequest_v5.SCHEMA + + +class ProduceRequest_v7(ProduceRequest): + """ + V7 bumped up to indicate ZStandard capability. (see KIP-110) + """ + API_VERSION = 7 + RESPONSE_TYPE = ProduceResponse_v7 + SCHEMA = ProduceRequest_v6.SCHEMA + + +class ProduceRequest_v8(ProduceRequest): + """ + V8 bumped up to add two new fields record_errors offset list and error_message to PartitionResponse + (See KIP-467) + """ + API_VERSION = 8 + RESPONSE_TYPE = ProduceResponse_v8 + SCHEMA = ProduceRequest_v7.SCHEMA + + ProduceRequest = [ ProduceRequest_v0, ProduceRequest_v1, ProduceRequest_v2, - ProduceRequest_v3, ProduceRequest_v4, ProduceRequest_v5 + ProduceRequest_v3, ProduceRequest_v4, ProduceRequest_v5, + ProduceRequest_v6, ProduceRequest_v7, ProduceRequest_v8, ] ProduceResponse = [ ProduceResponse_v0, ProduceResponse_v1, ProduceResponse_v2, - ProduceResponse_v3, ProduceResponse_v4, ProduceResponse_v5 + ProduceResponse_v3, ProduceResponse_v4, ProduceResponse_v5, + ProduceResponse_v6, ProduceResponse_v7, ProduceResponse_v8, ] diff --git a/kafka/protocol/struct.py b/kafka/protocol/struct.py index 676de1ba4..e9da6e6c1 100644 --- a/kafka/protocol/struct.py +++ b/kafka/protocol/struct.py @@ -30,6 +30,7 @@ def __init__(self, *args, **kwargs): # causes instances to "leak" to garbage self.encode = WeakMethod(self._encode_self) + @classmethod def encode(cls, item): # pylint: disable=E0202 bits = [] @@ -48,6 +49,11 @@ def decode(cls, data): data = BytesIO(data) return cls(*[field.decode(data) for field in cls.SCHEMA.fields]) + def get_item(self, name): + if name not in self.SCHEMA.names: + raise KeyError("%s is not in the schema" % name) + return self.__dict__[name] + def __repr__(self): key_vals = [] for name, field in zip(self.SCHEMA.names, self.SCHEMA.fields): @@ -64,11 +70,3 @@ def __eq__(self, other): if self.__dict__[attr] != other.__dict__[attr]: return False return True - -""" -class MetaStruct(type): - def __new__(cls, clsname, bases, dct): - nt = namedtuple(clsname, [name for (name, _) in dct['SCHEMA']]) - bases = tuple([Struct, nt] + list(bases)) - return super(MetaStruct, cls).__new__(cls, clsname, bases, dct) -""" From c852e2a4ffc4906650c253f0743ef1d50ccd8a04 Mon Sep 17 00:00:00 2001 From: Landon Sterk Date: Mon, 8 Jun 2020 09:55:01 -0700 Subject: [PATCH 280/291] Release v1.4.7.post2: protocol upgrades from gabriel-tincu, TylerLubeck --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 52762af2e..013f2b967 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.7.post1' +__version__ = '1.4.7.post2' From 4f4c205ff0cb54e4c0405f4bffa6ba8601a1f44f Mon Sep 17 00:00:00 2001 From: Jamie Hewland Date: Mon, 14 Aug 2023 14:05:09 +0100 Subject: [PATCH 281/291] Skip control batches (dpkp/kafka-python#2361) --- kafka/consumer/fetcher.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index 5434c36a2..99d11f274 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -464,6 +464,13 @@ def _unpack_message_set(self, tp, records): except AttributeError: pass + # Control messages are used to enable transactions in Kafka and are generated by the + # broker. Clients should not return control batches (ie. those with this bit set) to + # applications. (since 0.11.0.0) + if getattr(batch, "is_control_batch", False): + batch = records.next_batch() + continue + for record in batch: key_size = len(record.key) if record.key is not None else -1 value_size = len(record.value) if record.value is not None else -1 From 4a320f93941453300bb35711b09e77b08530e14b Mon Sep 17 00:00:00 2001 From: Jamie Hewland Date: Mon, 14 Aug 2023 14:05:54 +0100 Subject: [PATCH 282/291] Bump version --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index 013f2b967..c6af931a1 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.7.post2' +__version__ = '1.4.7.post3' From c4ee77636436ad51d88cfe3f8e7f28cd764a775b Mon Sep 17 00:00:00 2001 From: Georgios Kousouris Date: Fri, 18 Aug 2023 15:53:32 +0100 Subject: [PATCH 283/291] Fix control batch bug --- kafka/consumer/fetcher.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index 99d11f274..cb355b20f 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -821,11 +821,12 @@ def _parse_fetched_data(self, completed_fetch): " offset %d to buffered record list", tp, position) unpacked = list(self._unpack_message_set(tp, records)) - parsed_records = self.PartitionRecords(fetch_offset, tp, unpacked) - last_offset = unpacked[-1].offset - self._sensors.records_fetch_lag.record(highwater - last_offset) - num_bytes = records.valid_bytes() - records_count = len(unpacked) + if unpacked: + parsed_records = self.PartitionRecords(fetch_offset, tp, unpacked) + last_offset = unpacked[-1].offset + self._sensors.records_fetch_lag.record(highwater - last_offset) + num_bytes = records.valid_bytes() + records_count = len(unpacked) elif records.size_in_bytes() > 0: # we did not read a single message from a non-empty # buffer because that message's size is larger than From 84f985b21bdb891913e1a896d82465c99cbb73c6 Mon Sep 17 00:00:00 2001 From: Georgios Kousouris Date: Mon, 21 Aug 2023 11:25:01 +0100 Subject: [PATCH 284/291] Bump version to post4 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index c6af931a1..ccf7bda8c 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.7.post3' +__version__ = '1.4.7.post4' From af6e1034643db4c1e980cd4bef491c2789490c61 Mon Sep 17 00:00:00 2001 From: Daniel Popescu Date: Tue, 31 Oct 2023 20:03:30 -0700 Subject: [PATCH 285/291] MSK IAM Authentication implementation --- kafka/conn.py | 50 ++++++++++++- kafka/msk.py | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+), 1 deletion(-) create mode 100644 kafka/msk.py diff --git a/kafka/conn.py b/kafka/conn.py index a06de4910..d352c84a9 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -25,6 +25,7 @@ import kafka.errors as Errors from kafka.future import Future from kafka.metrics.stats import Avg, Count, Max, Rate +from kafka.msk import AwsMskIamClient from kafka.oauth.abstract import AbstractTokenProvider from kafka.protocol.admin import SaslHandShakeRequest from kafka.protocol.commit import OffsetFetchRequest @@ -81,6 +82,12 @@ class SSLWantWriteError(Exception): gssapi = None GSSError = None +# needed for AWS_MSK_IAM authentication: +try: + from botocore.session import Session as BotoSession +except ImportError: + # no botocore available, will disable AWS_MSK_IAM mechanism + BotoSession = None AFI_NAMES = { socket.AF_UNSPEC: "unspecified", @@ -224,7 +231,7 @@ class BrokerConnection(object): 'sasl_oauth_token_provider': None } SECURITY_PROTOCOLS = ('PLAINTEXT', 'SSL', 'SASL_PLAINTEXT', 'SASL_SSL') - SASL_MECHANISMS = ('PLAIN', 'GSSAPI', 'OAUTHBEARER') + SASL_MECHANISMS = ('PLAIN', 'GSSAPI', 'OAUTHBEARER', 'AWS_MSK_IAM') def __init__(self, host, port, afi, **configs): self.host = host @@ -269,6 +276,11 @@ def __init__(self, host, port, afi, **configs): token_provider = self.config['sasl_oauth_token_provider'] assert token_provider is not None, 'sasl_oauth_token_provider required for OAUTHBEARER sasl' assert callable(getattr(token_provider, "token", None)), 'sasl_oauth_token_provider must implement method #token()' + + if self.config['sasl_mechanism'] == 'AWS_MSK_IAM': + assert BotoSession is not None, 'AWS_MSK_IAM requires the "botocore" package' + assert self.config['security_protocol'] == 'SASL_SSL', 'AWS_MSK_IAM requires SASL_SSL' + # This is not a general lock / this class is not generally thread-safe yet # However, to avoid pushing responsibility for maintaining # per-connection locks to the upstream client, we will use this lock to @@ -552,6 +564,8 @@ def _handle_sasl_handshake_response(self, future, response): return self._try_authenticate_gssapi(future) elif self.config['sasl_mechanism'] == 'OAUTHBEARER': return self._try_authenticate_oauth(future) + elif self.config['sasl_mechanism'] == 'AWS_MSK_IAM': + return self._try_authenticate_aws_msk_iam(future) else: return future.failure( Errors.UnsupportedSaslMechanismError( @@ -652,6 +666,40 @@ def _try_authenticate_plain(self, future): log.info('%s: Authenticated as %s via PLAIN', self, self.config['sasl_plain_username']) return future.success(True) + def _try_authenticate_aws_msk_iam(self, future): + session = BotoSession() + client = AwsMskIamClient( + host=self.host, + boto_session=session, + ) + + msg = client.first_message() + size = Int32.encode(len(msg)) + + err = None + close = False + with self._lock: + if not self._can_send_recv(): + err = Errors.NodeNotReadyError(str(self)) + close = False + else: + try: + self._send_bytes_blocking(size + msg) + data = self._recv_bytes_blocking(4) + data = self._recv_bytes_blocking(struct.unpack('4B', data)[-1]) + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error receiving reply from server", self) + err = Errors.KafkaConnectionError("%s: %s" % (self, e)) + close = True + + if err is not None: + if close: + self.close(error=err) + return future.failure(err) + + log.info('%s: Authenticated via AWS_MSK_IAM %s', self, data.decode('utf-8')) + return future.success(True) + def _try_authenticate_gssapi(self, future): kerberos_damin_name = self.config['sasl_kerberos_domain_name'] or self.host auth_id = self.config['sasl_kerberos_service_name'] + '@' + kerberos_damin_name diff --git a/kafka/msk.py b/kafka/msk.py new file mode 100644 index 000000000..3351c1409 --- /dev/null +++ b/kafka/msk.py @@ -0,0 +1,197 @@ +import datetime +import hashlib +import hmac +import json +import string + +from kafka.vendor.six.moves import urllib + + +class AwsMskIamClient: + UNRESERVED_CHARS = string.ascii_letters + string.digits + '-._~' + + def __init__(self, host, boto_session): + """ + Arguments: + host (str): The hostname of the broker. + boto_session (botocore.BotoSession) the boto session + """ + self.algorithm = 'AWS4-HMAC-SHA256' + self.expires = '900' + self.hashfunc = hashlib.sha256 + self.headers = [ + ('host', host) + ] + self.version = '2020_10_22' + + self.service = 'kafka-cluster' + self.action = '{}:Connect'.format(self.service) + + now = datetime.datetime.utcnow() + self.datestamp = now.strftime('%Y%m%d') + self.timestamp = now.strftime('%Y%m%dT%H%M%SZ') + + self.host = host + self.boto_session = boto_session + + @property + def access_key(self): + return self.boto_session.get_credentials().access_key + + @property + def secret_key(self): + return self.boto_session.get_credentials().secret_key + + @property + def token(self): + return self.boto_session.get_credentials().token + + @property + def region(self): + # TODO: This logic is not perfect and should be revisited + for host in self.host.split(','): + if 'amazonaws.com' in host: + return host.split('.')[-3] + return 'us-west-2' + + @property + def _credential(self): + return '{0.access_key}/{0._scope}'.format(self) + + @property + def _scope(self): + return '{0.datestamp}/{0.region}/{0.service}/aws4_request'.format(self) + + @property + def _signed_headers(self): + """ + Returns (str): + An alphabetically sorted, semicolon-delimited list of lowercase + request header names. + """ + return ';'.join(sorted(k.lower() for k, _ in self.headers)) + + @property + def _canonical_headers(self): + """ + Returns (str): + A newline-delited list of header names and values. + Header names are lowercased. + """ + return '\n'.join(map(':'.join, self.headers)) + '\n' + + @property + def _canonical_request(self): + """ + Returns (str): + An AWS Signature Version 4 canonical request in the format: + \n + \n + \n + \n + \n + + """ + # The hashed_payload is always an empty string for MSK. + hashed_payload = self.hashfunc(b'').hexdigest() + return '\n'.join(( + 'GET', + '/', + self._canonical_querystring, + self._canonical_headers, + self._signed_headers, + hashed_payload, + )) + + @property + def _canonical_querystring(self): + """ + Returns (str): + A '&'-separated list of URI-encoded key/value pairs. + """ + params = [] + params.append(('Action', self.action)) + params.append(('X-Amz-Algorithm', self.algorithm)) + params.append(('X-Amz-Credential', self._credential)) + params.append(('X-Amz-Date', self.timestamp)) + params.append(('X-Amz-Expires', self.expires)) + if self.token: + params.append(('X-Amz-Security-Token', self.token)) + params.append(('X-Amz-SignedHeaders', self._signed_headers)) + + return '&'.join(self._uriencode(k) + '=' + self._uriencode(v) for k, v in params) + + @property + def _signing_key(self): + """ + Returns (bytes): + An AWS Signature V4 signing key generated from the secret_key, date, + region, service, and request type. + """ + key = self._hmac(('AWS4' + self.secret_key).encode('utf-8'), self.datestamp) + key = self._hmac(key, self.region) + key = self._hmac(key, self.service) + key = self._hmac(key, 'aws4_request') + return key + + @property + def _signing_str(self): + """ + Returns (str): + A string used to sign the AWS Signature V4 payload in the format: + \n + \n + \n + + """ + canonical_request_hash = self.hashfunc(self._canonical_request.encode('utf-8')).hexdigest() + return '\n'.join((self.algorithm, self.timestamp, self._scope, canonical_request_hash)) + + def _uriencode(self, msg): + """ + Arguments: + msg (str): A string to URI-encode. + + Returns (str): + The URI-encoded version of the provided msg, following the encoding + rules specified: https://github.com/aws/aws-msk-iam-auth#uriencode + """ + return urllib.parse.quote(msg, safe=self.UNRESERVED_CHARS) + + def _hmac(self, key, msg): + """ + Arguments: + key (bytes): A key to use for the HMAC digest. + msg (str): A value to include in the HMAC digest. + Returns (bytes): + An HMAC digest of the given key and msg. + """ + return hmac.new(key, msg.encode('utf-8'), digestmod=self.hashfunc).digest() + + def first_message(self): + """ + Returns (bytes): + An encoded JSON authentication payload that can be sent to the + broker. + """ + signature = hmac.new( + self._signing_key, + self._signing_str.encode('utf-8'), + digestmod=self.hashfunc, + ).hexdigest() + msg = { + 'version': self.version, + 'host': self.host, + 'user-agent': 'kafka-python', + 'action': self.action, + 'x-amz-algorithm': self.algorithm, + 'x-amz-credential': self._credential, + 'x-amz-date': self.timestamp, + 'x-amz-signedheaders': self._signed_headers, + 'x-amz-expires': self.expires, + 'x-amz-signature': signature, + } + if self.token: + msg['x-amz-security-token'] = self.token + + return json.dumps(msg, separators=(',', ':')).encode('utf-8') From fc6b277eb90a2ba8f0820beefc08cf8b0296c781 Mon Sep 17 00:00:00 2001 From: Daniel Popescu Date: Thu, 2 Nov 2023 19:00:30 -0700 Subject: [PATCH 286/291] Support arbitrary kafka configuration in SimpleClient to support things like authentication --- kafka/client.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kafka/client.py b/kafka/client.py index b65019f0b..c66c52bed 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -76,7 +76,7 @@ class SimpleClient(object): # socket timeout. def __init__(self, hosts, client_id=CLIENT_ID, timeout=DEFAULT_SOCKET_TIMEOUT_SECONDS, - correlation_id=0, metrics=None): + correlation_id=0, metrics=None, **kwargs): # We need one connection to bootstrap self.client_id = client_id self.timeout = timeout @@ -90,6 +90,10 @@ def __init__(self, hosts, client_id=CLIENT_ID, self.topics_to_brokers = {} # TopicPartition -> BrokerMetadata self.topic_partitions = {} # topic -> partition -> leader + # Support arbitrary kwargs to be provided as config to BrokerConnection + # This will allow advanced features like Authentication to work + self.config = kwargs + self.load_metadata_for_topics() # bootstrap with all metadata ################## @@ -108,6 +112,7 @@ def _get_conn(self, host, port, afi, node_id='bootstrap'): metrics=self._metrics_registry, metric_group_prefix='simple-client', node_id=node_id, + **self.config, ) conn = self._conns[host_key] From 950045041fa3e6f534c140b01b32270e0d85c1b0 Mon Sep 17 00:00:00 2001 From: Daniel Popescu Date: Mon, 13 Nov 2023 18:04:00 -0800 Subject: [PATCH 287/291] Add tests from upstream PR --- test/test_msk.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 test/test_msk.py diff --git a/test/test_msk.py b/test/test_msk.py new file mode 100644 index 000000000..d4620e39e --- /dev/null +++ b/test/test_msk.py @@ -0,0 +1,67 @@ +import datetime +import json + +from kafka.msk import AwsMskIamClient + +try: + from unittest import mock +except ImportError: + import mock + + +def client_factory(token=None): + now = datetime.datetime.utcfromtimestamp(1629321911) + with mock.patch('kafka.msk.datetime') as mock_dt: + mock_dt.datetime.utcnow = mock.Mock(return_value=now) + return AwsMskIamClient( + host='localhost', + access_key='XXXXXXXXXXXXXXXXXXXX', + secret_key='XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX', + region='us-east-1', + token=token, + ) + + +def test_aws_msk_iam_client_permanent_credentials(): + client = client_factory(token=None) + msg = client.first_message() + assert msg + assert isinstance(msg, bytes) + actual = json.loads(msg) + + expected = { + 'version': '2020_10_22', + 'host': 'localhost', + 'user-agent': 'kafka-python', + 'action': 'kafka-cluster:Connect', + 'x-amz-algorithm': 'AWS4-HMAC-SHA256', + 'x-amz-credential': 'XXXXXXXXXXXXXXXXXXXX/20210818/us-east-1/kafka-cluster/aws4_request', + 'x-amz-date': '20210818T212511Z', + 'x-amz-signedheaders': 'host', + 'x-amz-expires': '900', + 'x-amz-signature': '0fa42ae3d5693777942a7a4028b564f0b372bafa2f71c1a19ad60680e6cb994b', + } + assert actual == expected + + +def test_aws_msk_iam_client_temporary_credentials(): + client = client_factory(token='XXXXX') + msg = client.first_message() + assert msg + assert isinstance(msg, bytes) + actual = json.loads(msg) + + expected = { + 'version': '2020_10_22', + 'host': 'localhost', + 'user-agent': 'kafka-python', + 'action': 'kafka-cluster:Connect', + 'x-amz-algorithm': 'AWS4-HMAC-SHA256', + 'x-amz-credential': 'XXXXXXXXXXXXXXXXXXXX/20210818/us-east-1/kafka-cluster/aws4_request', + 'x-amz-date': '20210818T212511Z', + 'x-amz-signedheaders': 'host', + 'x-amz-expires': '900', + 'x-amz-signature': 'b0619c50b7ecb4a7f6f92bd5f733770df5710e97b25146f97015c0b1db783b05', + 'x-amz-security-token': 'XXXXX', + } + assert actual == expected From c12044802d456d31a38546e517a56f25e5c5e168 Mon Sep 17 00:00:00 2001 From: Daniel Popescu Date: Tue, 14 Nov 2023 16:45:02 -0800 Subject: [PATCH 288/291] Make tests less verbose and mock botocore --- test/test_msk.py | 67 ++++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 42 deletions(-) diff --git a/test/test_msk.py b/test/test_msk.py index d4620e39e..72b9737c4 100644 --- a/test/test_msk.py +++ b/test/test_msk.py @@ -1,5 +1,9 @@ import datetime import json +import sys + +import pytest +from unittest import TestCase from kafka.msk import AwsMskIamClient @@ -9,59 +13,38 @@ import mock -def client_factory(token=None): - now = datetime.datetime.utcfromtimestamp(1629321911) - with mock.patch('kafka.msk.datetime') as mock_dt: - mock_dt.datetime.utcnow = mock.Mock(return_value=now) - return AwsMskIamClient( - host='localhost', - access_key='XXXXXXXXXXXXXXXXXXXX', - secret_key='XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX', - region='us-east-1', - token=token, - ) - +@pytest.fixture(params=[{'session_token': 'session_token', 'host': 'localhost'}, {'session_token': None, 'host': 'localhost.us-east-1.amazonaws.com'}]) +def msk_client(request): + # To avoid a package dependency on the optional botocore library, we mock the module out + sys.modules['botocore.session'] = mock.MagicMock() + from botocore.session import Session # pylint: disable=import-error -def test_aws_msk_iam_client_permanent_credentials(): - client = client_factory(token=None) - msg = client.first_message() - assert msg - assert isinstance(msg, bytes) - actual = json.loads(msg) - - expected = { - 'version': '2020_10_22', - 'host': 'localhost', - 'user-agent': 'kafka-python', - 'action': 'kafka-cluster:Connect', - 'x-amz-algorithm': 'AWS4-HMAC-SHA256', - 'x-amz-credential': 'XXXXXXXXXXXXXXXXXXXX/20210818/us-east-1/kafka-cluster/aws4_request', - 'x-amz-date': '20210818T212511Z', - 'x-amz-signedheaders': 'host', - 'x-amz-expires': '900', - 'x-amz-signature': '0fa42ae3d5693777942a7a4028b564f0b372bafa2f71c1a19ad60680e6cb994b', - } - assert actual == expected + session = Session() + session.get_credentials = mock.MagicMock(return_value=mock.MagicMock(id='the_actual_credentials', access_key='akia', secret_key='secret', token=request.param['session_token'])) + yield AwsMskIamClient( + host=request.param["host"], + boto_session = session, + ) -def test_aws_msk_iam_client_temporary_credentials(): - client = client_factory(token='XXXXX') - msg = client.first_message() +def test_aws_msk_iam(msk_client): + msg = msk_client.first_message() assert msg assert isinstance(msg, bytes) - actual = json.loads(msg) + actual = json.loads(msg.decode('utf-8')) expected = { 'version': '2020_10_22', - 'host': 'localhost', + 'host': msk_client.host, 'user-agent': 'kafka-python', 'action': 'kafka-cluster:Connect', 'x-amz-algorithm': 'AWS4-HMAC-SHA256', - 'x-amz-credential': 'XXXXXXXXXXXXXXXXXXXX/20210818/us-east-1/kafka-cluster/aws4_request', - 'x-amz-date': '20210818T212511Z', + 'x-amz-credential': '{}/{}/{}/kafka-cluster/aws4_request'.format(msk_client.access_key, datetime.datetime.utcnow().strftime('%Y%m%d'), 'us-west-2' if msk_client.host == 'localhost' else 'us-east-1'), + 'x-amz-date': mock.ANY, 'x-amz-signedheaders': 'host', 'x-amz-expires': '900', - 'x-amz-signature': 'b0619c50b7ecb4a7f6f92bd5f733770df5710e97b25146f97015c0b1db783b05', - 'x-amz-security-token': 'XXXXX', + 'x-amz-signature': mock.ANY, } - assert actual == expected + if msk_client.token: + expected['x-amz-security-token'] = msk_client.token + TestCase().assertEqual(actual, expected) From 5ab4bbfc16a7fdc5b5cdbd50eafe7bcdaaff8986 Mon Sep 17 00:00:00 2001 From: Daniel Popescu Date: Wed, 15 Nov 2023 19:00:58 -0800 Subject: [PATCH 289/291] Update region resolution logic and refactor / add more tests --- kafka/msk.py | 19 ++++++++-- test/test_msk.py | 93 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 99 insertions(+), 13 deletions(-) diff --git a/kafka/msk.py b/kafka/msk.py index 3351c1409..a6d8e772d 100644 --- a/kafka/msk.py +++ b/kafka/msk.py @@ -34,6 +34,11 @@ def __init__(self, host, boto_session): self.host = host self.boto_session = boto_session + # This will raise if the region can't be determined + # Do this during init instead of waiting for failures downstream + if self.region: + pass + @property def access_key(self): return self.boto_session.get_credentials().access_key @@ -48,11 +53,21 @@ def token(self): @property def region(self): - # TODO: This logic is not perfect and should be revisited + # Try to get the region information from the broker hostname for host in self.host.split(','): if 'amazonaws.com' in host: return host.split('.')[-3] - return 'us-west-2' + + # If the region can't be determined from hostname, try the boto session + # This will only have a value if: + # - `AWS_DEFAULT_REGION` environment variable is set + # - `~/.aws/config` region variable is set + region = self.boto_session.get_config_variable('region') + if region: + return region + + # Otherwise give up + raise Exception('Could not determine region from broker host(s) or aws configuration') @property def _credential(self): diff --git a/test/test_msk.py b/test/test_msk.py index 72b9737c4..0e5c414de 100644 --- a/test/test_msk.py +++ b/test/test_msk.py @@ -13,21 +13,92 @@ import mock -@pytest.fixture(params=[{'session_token': 'session_token', 'host': 'localhost'}, {'session_token': None, 'host': 'localhost.us-east-1.amazonaws.com'}]) -def msk_client(request): +@pytest.fixture +def boto_session(): # To avoid a package dependency on the optional botocore library, we mock the module out sys.modules['botocore.session'] = mock.MagicMock() from botocore.session import Session # pylint: disable=import-error - session = Session() - session.get_credentials = mock.MagicMock(return_value=mock.MagicMock(id='the_actual_credentials', access_key='akia', secret_key='secret', token=request.param['session_token'])) - yield AwsMskIamClient( - host=request.param["host"], - boto_session = session, + boto_session = Session() + boto_session.get_credentials = mock.MagicMock(return_value=mock.MagicMock(id='the_actual_credentials', access_key='akia', secret_key='secret', token=None)) + yield boto_session + + +def test_aws_msk_iam_region_from_config(boto_session): + # Region determined by configuration + boto_session.get_config_variable = mock.MagicMock(return_value='us-west-2') + msk_client = AwsMskIamClient( + host='localhost', + boto_session = boto_session, ) + msg = msk_client.first_message() + assert msg + assert isinstance(msg, bytes) + actual = json.loads(msg.decode('utf-8')) + + expected = { + 'version': '2020_10_22', + 'host': msk_client.host, + 'user-agent': 'kafka-python', + 'action': 'kafka-cluster:Connect', + 'x-amz-algorithm': 'AWS4-HMAC-SHA256', + 'x-amz-credential': '{}/{}/us-west-2/kafka-cluster/aws4_request'.format(msk_client.access_key, datetime.datetime.utcnow().strftime('%Y%m%d')), + 'x-amz-date': mock.ANY, + 'x-amz-signedheaders': 'host', + 'x-amz-expires': '900', + 'x-amz-signature': mock.ANY, + } + TestCase().assertEqual(actual, expected) -def test_aws_msk_iam(msk_client): +def test_aws_msk_iam_region_from_hostname(boto_session): + # Region determined by hostname + msk_client = AwsMskIamClient( + host='localhost.us-east-1.amazonaws.com', + boto_session = boto_session, + ) + msg = msk_client.first_message() + assert msg + assert isinstance(msg, bytes) + actual = json.loads(msg.decode('utf-8')) + + expected = { + 'version': '2020_10_22', + 'host': msk_client.host, + 'user-agent': 'kafka-python', + 'action': 'kafka-cluster:Connect', + 'x-amz-algorithm': 'AWS4-HMAC-SHA256', + 'x-amz-credential': '{}/{}/us-east-1/kafka-cluster/aws4_request'.format(msk_client.access_key, datetime.datetime.utcnow().strftime('%Y%m%d')), + 'x-amz-date': mock.ANY, + 'x-amz-signedheaders': 'host', + 'x-amz-expires': '900', + 'x-amz-signature': mock.ANY, + } + TestCase().assertEqual(actual, expected) + + +def test_aws_msk_iam_no_region(boto_session): + # No region from config + boto_session.get_config_variable = mock.MagicMock(return_value=None) + + with TestCase().assertRaises(Exception) as e: + # No region from hostname + msk_client = AwsMskIamClient( + host='localhost', + boto_session = boto_session, + ) + assert 'Could not determine region from broker host(s) or aws configuration' == str(e.exception) + + +@pytest.mark.parametrize('session_token', [(None), ('the_token')]) +def test_aws_msk_iam_permanent_and_temporary_credentials(session_token, request): + boto_session = request.getfixturevalue('boto_session') + if session_token: + boto_session.get_credentials.return_value.token = session_token + msk_client = AwsMskIamClient( + host='localhost.us-east-1.amazonaws.com', + boto_session = boto_session, + ) msg = msk_client.first_message() assert msg assert isinstance(msg, bytes) @@ -39,12 +110,12 @@ def test_aws_msk_iam(msk_client): 'user-agent': 'kafka-python', 'action': 'kafka-cluster:Connect', 'x-amz-algorithm': 'AWS4-HMAC-SHA256', - 'x-amz-credential': '{}/{}/{}/kafka-cluster/aws4_request'.format(msk_client.access_key, datetime.datetime.utcnow().strftime('%Y%m%d'), 'us-west-2' if msk_client.host == 'localhost' else 'us-east-1'), + 'x-amz-credential': '{}/{}/us-east-1/kafka-cluster/aws4_request'.format(msk_client.access_key, datetime.datetime.utcnow().strftime('%Y%m%d')), 'x-amz-date': mock.ANY, 'x-amz-signedheaders': 'host', 'x-amz-expires': '900', 'x-amz-signature': mock.ANY, } - if msk_client.token: - expected['x-amz-security-token'] = msk_client.token + if session_token: + expected['x-amz-security-token'] = session_token TestCase().assertEqual(actual, expected) From beaee8602c97d7961853785676af6150a9fa7dab Mon Sep 17 00:00:00 2001 From: Daniel Popescu Date: Wed, 15 Nov 2023 19:04:42 -0800 Subject: [PATCH 290/291] Use different exception type --- kafka/msk.py | 3 ++- test/test_msk.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/kafka/msk.py b/kafka/msk.py index a6d8e772d..e058c286e 100644 --- a/kafka/msk.py +++ b/kafka/msk.py @@ -4,6 +4,7 @@ import json import string +from kafka.errors import IllegalArgumentError from kafka.vendor.six.moves import urllib @@ -67,7 +68,7 @@ def region(self): return region # Otherwise give up - raise Exception('Could not determine region from broker host(s) or aws configuration') + raise IllegalArgumentError('Could not determine region from broker host(s) or aws configuration') @property def _credential(self): diff --git a/test/test_msk.py b/test/test_msk.py index 0e5c414de..69855fbb9 100644 --- a/test/test_msk.py +++ b/test/test_msk.py @@ -5,6 +5,7 @@ import pytest from unittest import TestCase +from kafka.errors import IllegalArgumentError from kafka.msk import AwsMskIamClient try: @@ -81,13 +82,13 @@ def test_aws_msk_iam_no_region(boto_session): # No region from config boto_session.get_config_variable = mock.MagicMock(return_value=None) - with TestCase().assertRaises(Exception) as e: + with TestCase().assertRaises(IllegalArgumentError) as e: # No region from hostname msk_client = AwsMskIamClient( host='localhost', boto_session = boto_session, ) - assert 'Could not determine region from broker host(s) or aws configuration' == str(e.exception) + assert 'IllegalArgumentError: Could not determine region from broker host(s) or aws configuration' == str(e.exception) @pytest.mark.parametrize('session_token', [(None), ('the_token')]) From c062282be24dbaef619eb7fcd0c1ac9296b8ac0e Mon Sep 17 00:00:00 2001 From: dpopes Date: Wed, 29 Nov 2023 08:49:39 -0800 Subject: [PATCH 291/291] Bump version to post5 --- kafka/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/version.py b/kafka/version.py index ccf7bda8c..e47fd1c67 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.7.post4' +__version__ = '1.4.7.post5'