diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..766836180 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +**/__pycache__ +**/*.pyc diff --git a/.travis.yml b/.travis.yml index cdb93396d..b934cec8b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,16 +4,12 @@ dist: xenial python: - 2.7 - - 3.4 - - 3.7 + - 3.5 + - 3.6 - pypy2.7-6.0 env: - - KAFKA_VERSION=0.8.2.2 - - KAFKA_VERSION=0.9.0.1 - - KAFKA_VERSION=0.10.2.1 - - KAFKA_VERSION=0.11.0.2 - - KAFKA_VERSION=1.1.1 + - KAFKA_VERSION=1.1.0 addons: apt: @@ -35,7 +31,7 @@ install: - pip install . script: - - tox -e `if [ "$TRAVIS_PYTHON_VERSION" == "pypy2.7-6.0" ]; then echo pypy; else echo py${TRAVIS_PYTHON_VERSION/./}; fi` + - tox -i https://pypi.python.org/simple -e `if [ "$TRAVIS_PYTHON_VERSION" == "pypy2.7-6.0" ]; then echo pypy; else echo py${TRAVIS_PYTHON_VERSION/./}; fi` after_success: - coveralls diff --git a/CHANGES.md b/CHANGES.md index 2e3918eda..05fac9acc 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,11 +1,73 @@ +# 1.4.7 (Sep 30, 2019) + +This is a minor release focused on KafkaConsumer performance, Admin Client +improvements, and Client concurrency. The KafkaConsumer iterator implementation +has been greatly simplified so that it just wraps consumer.poll(). The prior +implementation will remain available for a few more releases using the optional +KafkaConsumer config: `legacy_iterator=True` . This is expected to improve +consumer throughput substantially and help reduce heartbeat failures / group +rebalancing. + +Client +* Send socket data via non-blocking IO with send buffer (dpkp / PR #1912) +* Rely on socket selector to detect completed connection attempts (dpkp / PR #1909) +* Improve connection lock handling; always use context manager (melor,dpkp / PR #1895) +* Reduce client poll timeout when there are no in-flight requests (dpkp / PR #1823) + +KafkaConsumer +* Do not use wakeup when sending fetch requests from consumer (dpkp / PR #1911) +* Wrap `consumer.poll()` for KafkaConsumer iteration (dpkp / PR #1902) +* Allow the coordinator to auto-commit on old brokers (justecorruptio / PR #1832) +* Reduce internal client poll timeout for (legacy) consumer iterator interface (dpkp / PR #1824) +* Use dedicated connection for group coordinator (dpkp / PR #1822) +* Change coordinator lock acquisition order (dpkp / PR #1821) +* Make `partitions_for_topic` a read-through cache (Baisang / PR #1781,#1809) +* Fix consumer hanging indefinitely on topic deletion while rebalancing (commanderdishwasher / PR #1782) + +Miscellaneous Bugfixes / Improvements +* Fix crc32c avilability on non-intel architectures (ossdev07 / PR #1904) +* Load system default SSL CAs if `ssl_cafile` is not provided (iAnomaly / PR #1883) +* Catch py3 TimeoutError in BrokerConnection send/recv (dpkp / PR #1820) +* Added a function to determine if bootstrap is successfully connected (Wayde2014 / PR #1876) + +Admin Client +* Add ACL api support to KafkaAdminClient (ulrikjohansson / PR #1833) +* Add `sasl_kerberos_domain_name` config to KafkaAdminClient (jeffwidman / PR #1852) +* Update `security_protocol` config documentation for KafkaAdminClient (cardy31 / PR #1849) +* Break FindCoordinator into request/response methods in KafkaAdminClient (jeffwidman / PR #1871) +* Break consumer operations into request / response methods in KafkaAdminClient (jeffwidman / PR #1845) +* Parallelize calls to `_send_request_to_node()` in KafkaAdminClient (davidheitman / PR #1807) + +Test Infrastructure / Documentation / Maintenance +* Add Kafka 2.3.0 to test matrix and compatibility docs (dpkp / PR #1915) +* Convert remaining `KafkaConsumer` tests to `pytest` (jeffwidman / PR #1886) +* Bump integration tests to 0.10.2.2 and 0.11.0.3 (jeffwidman / #1890) +* Cleanup handling of `KAFKA_VERSION` env var in tests (jeffwidman / PR #1887) +* Minor test cleanup (jeffwidman / PR #1885) +* Use `socket.SOCK_STREAM` in test assertions (iv-m / PR #1879) +* Sanity test for `consumer.topics()` and `consumer.partitions_for_topic()` (Baisang / PR #1829) +* Cleanup seconds conversion in client poll timeout calculation (jeffwidman / PR #1825) +* Remove unused imports (jeffwidman / PR #1808) +* Cleanup python nits in RangePartitionAssignor (jeffwidman / PR #1805) +* Update links to kafka consumer config docs (jeffwidman) +* Fix minor documentation typos (carsonip / PR #1865) +* Remove unused/weird comment line (jeffwidman / PR #1813) +* Update docs for `api_version_auto_timeout_ms` (jeffwidman / PR #1812) + +# 1.4.6.post2 (Aug 27, 2019) +* Cherrypick change from upstream to make blocking calls for Kafka metadata if we don't have any + +# 1.4.6.post1 (Jun 4, 2019) +This release merges in changes from 1.4.5 and 1.4.6 upstream. +The only key differences are we focus on py35 still instead of py36/py37, and +we only build for versions 0.10.2.2 and 1.1.0 and 1.1.1 + # 1.4.6 (Apr 2, 2019) This is a patch release primarily focused on bugs related to concurrency, SSL connections and testing, and SASL authentication: - Client Concurrency Issues (Race Conditions / Deadlocks) - * Fix race condition in `protocol.send_bytes` (isamaru / PR #1752) * Do not call `state_change_callback` with lock (dpkp / PR #1775) * Additional BrokerConnection locks to synchronize protocol/IFR state (dpkp / PR #1768) @@ -14,12 +76,10 @@ Client Concurrency Issues (Race Conditions / Deadlocks) * Hold lock during `client.check_version` (dpkp / PR #1771) Producer Wakeup / TimeoutError - * Dont wakeup during `maybe_refresh_metadata` -- it is only called by poll() (dpkp / PR #1769) * Dont do client wakeup when sending from sender thread (dpkp / PR #1761) SSL - Python3.7 Support / Bootstrap Hostname Verification / Testing - * Wrap SSL sockets after connecting for python3.7 compatibility (dpkp / PR #1754) * Allow configuration of SSL Ciphers (dpkp / PR #1755) * Maintain shadow cluster metadata for bootstrapping (dpkp / PR #1753) @@ -28,13 +88,11 @@ SSL - Python3.7 Support / Bootstrap Hostname Verification / Testing * Reset reconnect backoff on SSL connection (dpkp / PR #1777) SASL - OAuthBearer support / api version bugfix - * Fix 0.8.2 protocol quick detection / fix SASL version check (dpkp / PR #1763) * Update sasl configuration docstrings to include supported mechanisms (dpkp) * Support SASL OAuthBearer Authentication (pt2pham / PR #1750) Miscellaneous Bugfixes - * Dont force metadata refresh when closing unneeded bootstrap connections (dpkp / PR #1773) * Fix possible AttributeError during conn._close_socket (dpkp / PR #1776) * Return connection state explicitly after close in connect() (dpkp / PR #1778) @@ -92,6 +150,13 @@ Compatibility * Remove unused import from kafka/producer/record_accumulator.py (jeffwidman / PR #1705) * Fix SSL connection testing in Python 3.7 (seanthegeek, silentben / PR #1669) +# 1.4.4.post1 (Jan 10, 2019) + +* Added proc.communicate() patch that got merged upstream to ensure tests don't deadlock +* Only run tests for KAFKA_VERSION 1.1.0 (faster builds) +* Use internal pypi when available +* Remove py26 support (faster builds) +* Use requirements-dev.txt instead of pinning requirements in tox.ini # 1.4.4 (Nov 20, 2018) @@ -158,6 +223,22 @@ Compatibility * Vendor `six` consistently (jeffwidman #1605) * Prevent `pylint` import errors on `six.moves` (jeffwidman #1609) +# 1.4.3.post5 (Sep 10, 2018) +* Remove tests for kafka version 0.11.0.2 +* Fix failing build when logging warning + +# 1.4.3.post4 (Aug 8, 2018) +* Add usage of CreatePartition protocol in adminClient + +# 1.4.3.post3 (Aug 3, 2018) +* Change prospectus + +# 1.4.3.post2 (Aug 2, 2018) +* Remove unused versions of kafka for integration testing + +# 1.4.3.post1 (Aug 2, 2018) +1. Upgrade to upstream kafka-python 1.4.3 +2. Remove support of 0.8, 0.9, 0.10.1.1, 0.10.2.1 and added support of kafka 0.10.2.2 testing # 1.4.3 (May 26, 2018) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..be0337ace --- /dev/null +++ b/Dockerfile @@ -0,0 +1,50 @@ +FROM ubuntu:xenial +ENV DEBIAN_FRONTEND=noninteractive + +RUN echo "deb http://ppa.launchpad.net/fkrull/deadsnakes/ubuntu precise main" >> /etc/apt/sources.list +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 5BB92C09DB82666C +RUN apt-get update && apt-get install -y python2.7-dev \ + python3.5-dev \ + python-pkg-resources \ + python-setuptools \ + python-virtualenv \ + libsnappy-dev \ + locales \ + openjdk-8-jdk \ + wget\ + g++ \ + ca-certificates \ + python-pip \ + python-tox + +# python-lz4 requires minium pypy version 5.8.0 +RUN wget https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.8.0-linux64.tar.bz2 +RUN tar xf pypy2-v5.8.0-linux64.tar.bz2 +RUN ln -s $PWD/pypy2-v5.8.0-linux64/bin/pypy /usr/local/bin/pypy + +RUN /usr/sbin/locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV JAVA_HOME="/usr/lib/jvm/java-1.8.0-openjdk-amd64" +ENV PATH="$PATH:$JAVA_HOME/bin" + +COPY servers /work/servers +COPY kafka /work/kafka +COPY test /work/test +COPY .covrc /work +COPY pylint.rc /work +COPY README.rst /work +COPY build_integration.sh /work +COPY setup.cfg /work +COPY setup.py /work +COPY tox.ini /work +COPY LICENSE /work +COPY AUTHORS.md /work +COPY CHANGES.md /work +COPY MANIFEST.in /work +COPY run_itest.sh /work +COPY run_utest.sh /work +COPY requirements-dev.txt /work +RUN chmod +x /work/run_itest.sh +RUN chmod +x /work/run_utest.sh + +WORKDIR /work diff --git a/Makefile b/Makefile index b4dcbffc9..935b7d2ad 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,34 @@ -# Some simple testing tasks (sorry, UNIX only). +.DELETE_ON_ERROR: + +ifeq ($(findstring .yelpcorp.com,$(shell hostname -f)), .yelpcorp.com) + export PIP_INDEX_URL ?= https://pypi.yelpcorp.com/simple +else + export PIP_INDEX_URL ?= https://pypi.python.org/simple +endif + +all: test itest + +test: + tox -e py27 + tox -e py35 + tox -e py36 + +unit_test_docker: + docker build -t kafka_python_test . + docker run kafka_python_test /work/run_utest.sh + +itest: + docker build -t kafka_python_test . +# travis build passes because ipv6 is disabled there +# it passes the integration test locally if we disable ipv6 here + docker run --sysctl net.ipv6.conf.all.disable_ipv6=1 kafka_python_test /work/run_itest.sh + +docs: + tox -e docs + FLAGS= -KAFKA_VERSION=0.11.0.2 +KAFKA_VERSION=1.1.0 SCALA_VERSION=2.12 setup: @@ -14,8 +41,8 @@ servers/$(KAFKA_VERSION)/kafka-bin: build-integration: servers/$(KAFKA_VERSION)/kafka-bin # Test and produce coverage using tox. This is the same as is run on Travis -test37: build-integration - KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) tox -e py37 -- $(FLAGS) +test35: build-integration + KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) tox -e py35 -- $(FLAGS) test27: build-integration KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) tox -e py27 -- $(FLAGS) @@ -51,9 +78,13 @@ clean: rm -rf docs/_build/ rm -rf cover rm -rf dist + rm -rf kafka-python.egg-info/ .tox/ + find . -name '*.pyc' -delete + find . -name '__pycache__' -delete + docker rmi -f kafka_python_test doc: make -C docs html @echo "open file://`pwd`/docs/_build/html/index.html" -.PHONY: all test37 test27 test-local cov-local clean doc +.PHONY: all test test35 test27 test-local cov-local clean doc diff --git a/README.rst b/README.rst index 9469adea0..40cd55cbc 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ Kafka Python client ------------------------ -.. image:: https://img.shields.io/badge/kafka-1.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg +.. image:: https://img.shields.io/badge/kafka-2.3%2C%202.2%2C%202.1%2C%202.0%2C%201.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg :target: https://kafka-python.readthedocs.io/compatibility.html .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg :target: https://pypi.python.org/pypi/kafka-python @@ -150,7 +150,7 @@ for interacting with kafka brokers via the python repl. This is useful for testing, probing, and general experimentation. The protocol support is leveraged to enable a KafkaClient.check_version() method that probes a kafka broker and attempts to identify which version it is running -(0.8.0 to 1.1+). +(0.8.0 to 2.3+). Low-level ********* diff --git a/build_integration.sh b/build_integration.sh index c6df0b26b..c9b3ea075 100755 --- a/build_integration.sh +++ b/build_integration.sh @@ -1,12 +1,12 @@ #!/bin/bash -: ${ALL_RELEASES:="0.8.2.2 0.9.0.1 0.10.1.1 0.10.2.1 0.11.0.2 1.0.2 1.1.1 2.0.1"} +: ${ALL_RELEASES:="1.1.0"} : ${SCALA_VERSION:=2.11} : ${DIST_BASE_URL:=https://archive.apache.org/dist/kafka/} : ${KAFKA_SRC_GIT:=https://github.com/apache/kafka.git} # On travis CI, empty KAFKA_VERSION means skip integration tests -# so we don't try to get binaries +# so we don't try to get binaries # Otherwise it means test all official releases, so we get all of them! if [ -z "$KAFKA_VERSION" -a -z "$TRAVIS" ]; then KAFKA_VERSION=$ALL_RELEASES @@ -33,12 +33,7 @@ pushd servers echo "-------------------------------------" echo "Checking kafka binaries for ${kafka}" echo - # kafka 0.8.0 is only available w/ scala 2.8.0 - if [ "$kafka" == "0.8.0" ]; then - KAFKA_ARTIFACT="kafka_2.8.0-${kafka}.tar.gz" - else - KAFKA_ARTIFACT="kafka_${SCALA_VERSION}-${kafka}.tgz" - fi + KAFKA_ARTIFACT="kafka_${SCALA_VERSION}-${kafka}.tgz" if [ ! -f "../$kafka/kafka-bin/bin/kafka-run-class.sh" ]; then if [ -f "${KAFKA_ARTIFACT}" ]; then echo "Using cached artifact: ${KAFKA_ARTIFACT}" diff --git a/docs/changelog.rst b/docs/changelog.rst index ab36b1ec8..514c1d599 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,171 @@ Changelog ========= +1.4.4.post1 (Jan 10, 2019) +########################## +* Added proc.communicate() patch that got merged upstream to ensure tests don't deadlock +* Only run tests for KAFKA_VERSION 1.1.0 (faster builds) +* Use internal pypi when available +* Remove py26 support (faster builds) +* Use requirements-dev.txt instead of pinning requirements in tox.ini + +1.4.3.post5 (Sep 10, 2018) +########################## +* Remove tests for kafka version 0.11.0.2 +* Fix failing build when logging warning + +1.4.3.post4 (Aug 8, 2018) +########################## +* Add usage of CreatePartition protocol in adminClient + +1.4.3.post3 (Aug 3, 2018) +########################## +* Change prospectus + +1.4.3.post2 (Aug 2, 2018) +########################## +* Remove unused versions of kafka for integration testing + +1.4.3.post1 (Aug 2, 2018) +########################## +* Upgrade to kafka-python 1.4.3 +* Change testing environment for supported kafka version testing + +1.3.3.post7 (Jan 29, 2018) +########################## +* Add kafka admin client + +1.3.3.post6 (Oct 5, 2017) +########################## +* Revert fix for rebalance not getting triggered (poll) + +1.3.3.post5 (Oct 2, 2017) +########################## +* Fix for rebalance not getting triggered (poll) + +1.3.3.post4 (Sept 20, 2017) +########################## +* Remove coveralls dependency + +1.3.3.post3 (Sept 19, 2017) +########################## +* Initialize metadata_snapshot correctly to avoid group rejoin after fixed metadata update +* Fix seek logic used with poll method + +1.3.3.post2 (Apr 24, 2017) +########################## +Fix locale-gen not found + +1.3.3.post1 (Apr 17, 2017) +########################## +Resolve merge conflicts while merging changes for version 1.3.3 +======= +1.4.3 (May 26, 2018) +#################### + +Compatibility +------------- +* Fix for python 3.7 support: remove 'async' keyword from SimpleProducer (dpkp #1454) + +Client +------ +* Improve BrokerConnection initialization time (romulorosa #1475) +* Ignore MetadataResponses with empty broker list (dpkp #1506) +* Improve connection handling when bootstrap list is invalid (dpkp #1507) + +Consumer +-------- +* Check for immediate failure when looking up coordinator in heartbeat thread (dpkp #1457) + +Core / Protocol +--------------- +* Always acquire client lock before coordinator lock to avoid deadlocks (dpkp #1464) +* Added AlterConfigs and DescribeConfigs apis (StephenSorriaux #1472) +* Fix CreatePartitionsRequest_v0 (StephenSorriaux #1469) +* Add codec validators to record parser and builder for all formats (tvoinarovskyi #1447) +* Fix MemoryRecord bugs re error handling and add test coverage (tvoinarovskyi #1448) +* Force lz4 to disable Kafka-unsupported block linking when encoding (mnito #1476) +* Stop shadowing `ConnectionError` (jeffwidman #1492) + +Documentation +------------- +* Document methods that return None (jeffwidman #1504) +* Minor doc capitalization cleanup (jeffwidman) +* Adds add_callback/add_errback example to docs (Berkodev #1441) +* Fix KafkaConsumer docstring for request_timeout_ms default (dpkp #1459) + +Test Infrastructure +------------------- +* Skip flakey SimpleProducer test (dpkp) +* Fix skipped integration tests if KAFKA_VERSION unset (dpkp #1453) + +Logging / Error Messages +------------------------ +* Stop using deprecated log.warn() (jeffwidman) +* Change levels for some heartbeat thread logging (dpkp #1456) +* Log Heartbeat thread start / close for debugging (dpkp) + + +1.4.7 (Sep 30, 2019) +#################### + +This is a minor release focused on KafkaConsumer performance, Admin Client +improvements, and Client concurrency. The KafkaConsumer iterator implementation +has been greatly simplified so that it just wraps consumer.poll(). The prior +implementation will remain available for a few more releases using the optional +KafkaConsumer config: `legacy_iterator=True` . This is expected to improve +consumer throughput substantially and help reduce heartbeat failures / group +rebalancing. + +Client +------ +* Send socket data via non-blocking IO with send buffer (dpkp / PR #1912) +* Rely on socket selector to detect completed connection attempts (dpkp / PR #1909) +* Improve connection lock handling; always use context manager (melor,dpkp / PR #1895) +* Reduce client poll timeout when there are no in-flight requests (dpkp / PR #1823) + +KafkaConsumer +------------- +* Do not use wakeup when sending fetch requests from consumer (dpkp / PR #1911) +* Wrap `consumer.poll()` for KafkaConsumer iteration (dpkp / PR #1902) +* Allow the coordinator to auto-commit on old brokers (justecorruptio / PR #1832) +* Reduce internal client poll timeout for (legacy) consumer iterator interface (dpkp / PR #1824) +* Use dedicated connection for group coordinator (dpkp / PR #1822) +* Change coordinator lock acquisition order (dpkp / PR #1821) +* Make `partitions_for_topic` a read-through cache (Baisang / PR #1781,#1809) +* Fix consumer hanging indefinitely on topic deletion while rebalancing (commanderdishwasher / PR #1782) + +Miscellaneous Bugfixes / Improvements +------------------------------------- +* Fix crc32c avilability on non-intel architectures (ossdev07 / PR #1904) +* Load system default SSL CAs if `ssl_cafile` is not provided (iAnomaly / PR #1883) +* Catch py3 TimeoutError in BrokerConnection send/recv (dpkp / PR #1820) +* Added a function to determine if bootstrap is successfully connected (Wayde2014 / PR #1876) + +Admin Client +------------ +* Add ACL api support to KafkaAdminClient (ulrikjohansson / PR #1833) +* Add `sasl_kerberos_domain_name` config to KafkaAdminClient (jeffwidman / PR #1852) +* Update `security_protocol` config documentation for KafkaAdminClient (cardy31 / PR #1849) +* Break FindCoordinator into request/response methods in KafkaAdminClient (jeffwidman / PR #1871) +* Break consumer operations into request / response methods in KafkaAdminClient (jeffwidman / PR #1845) +* Parallelize calls to `_send_request_to_node()` in KafkaAdminClient (davidheitman / PR #1807) + +Test Infrastructure / Documentation / Maintenance +------------------------------------------------- +* Add Kafka 2.3.0 to test matrix and compatibility docs (dpkp / PR #1915) +* Convert remaining `KafkaConsumer` tests to `pytest` (jeffwidman / PR #1886) +* Bump integration tests to 0.10.2.2 and 0.11.0.3 (jeffwidman / #1890) +* Cleanup handling of `KAFKA_VERSION` env var in tests (jeffwidman / PR #1887) +* Minor test cleanup (jeffwidman / PR #1885) +* Use `socket.SOCK_STREAM` in test assertions (iv-m / PR #1879) +* Sanity test for `consumer.topics()` and `consumer.partitions_for_topic()` (Baisang / PR #1829) +* Cleanup seconds conversion in client poll timeout calculation (jeffwidman / PR #1825) +* Remove unused imports (jeffwidman / PR #1808) +* Cleanup python nits in RangePartitionAssignor (jeffwidman / PR #1805) +* Update links to kafka consumer config docs (jeffwidman) +* Fix minor documentation typos (carsonip / PR #1865) +* Remove unused/weird comment line (jeffwidman / PR #1813) +* Update docs for `api_version_auto_timeout_ms` (jeffwidman / PR #1812) 1.4.6 (Apr 2, 2019) diff --git a/docs/compatibility.rst b/docs/compatibility.rst index fc9e7cc70..9ab877f3a 100644 --- a/docs/compatibility.rst +++ b/docs/compatibility.rst @@ -1,16 +1,20 @@ Compatibility ------------- -.. image:: https://img.shields.io/badge/kafka-1.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg +.. image:: https://img.shields.io/badge/kafka-2.3%2C%202.2%2C%202.1%2C%202.0%2C%201.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg :target: https://kafka-python.readthedocs.io/compatibility.html .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg :target: https://pypi.python.org/pypi/kafka-python -kafka-python is compatible with (and tested against) broker versions 1.1 +kafka-python is compatible with (and tested against) broker versions 2.3 through 0.8.0 . kafka-python is not compatible with the 0.8.2-beta release. Because the kafka server protocol is backwards compatible, kafka-python is -expected to work with newer broker releases as well (2.0+). +expected to work with newer broker releases as well. + +Although kafka-python is tested and expected to work on recent broker versions, +not all features are supported. Specifically, authentication codecs, and +transactional producer/consumer support are not fully implemented. PRs welcome! kafka-python is tested on python 2.7, 3.4, 3.7, and pypy2.7. diff --git a/docs/index.rst b/docs/index.rst index 0b5b53f0f..6fa9a0c98 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,7 @@ kafka-python ############ -.. image:: https://img.shields.io/badge/kafka-1.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg +.. image:: https://img.shields.io/badge/kafka-2.3%2C%202.2%2C%202.1%2C%202.0%2C%201.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg :target: https://kafka-python.readthedocs.io/compatibility.html .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg :target: https://pypi.python.org/pypi/kafka-python @@ -136,7 +136,7 @@ for interacting with kafka brokers via the python repl. This is useful for testing, probing, and general experimentation. The protocol support is leveraged to enable a :meth:`~kafka.KafkaClient.check_version()` method that probes a kafka broker and -attempts to identify which version it is running (0.8.0 to 1.1+). +attempts to identify which version it is running (0.8.0 to 2.3+). Low-level diff --git a/kafka/__init__.py b/kafka/__init__.py index cafa04363..173459f86 100644 --- a/kafka/__init__.py +++ b/kafka/__init__.py @@ -20,6 +20,7 @@ def emit(self, record): from kafka.admin import KafkaAdminClient from kafka.consumer import KafkaConsumer +from kafka.consumer.kafka import KafkaConsumer as OldKafkaConsumer from kafka.consumer.subscription_state import ConsumerRebalanceListener from kafka.producer import KafkaProducer from kafka.conn import BrokerConnection diff --git a/kafka/admin/__init__.py b/kafka/admin/__init__.py index a300301c6..c240fc6d0 100644 --- a/kafka/admin/__init__.py +++ b/kafka/admin/__init__.py @@ -2,9 +2,13 @@ from kafka.admin.config_resource import ConfigResource, ConfigResourceType from kafka.admin.client import KafkaAdminClient +from kafka.admin.acl_resource import (ACL, ACLFilter, ResourcePattern, ResourcePatternFilter, ACLOperation, + ResourceType, ACLPermissionType, ACLResourcePatternType) from kafka.admin.new_topic import NewTopic from kafka.admin.new_partitions import NewPartitions __all__ = [ - 'ConfigResource', 'ConfigResourceType', 'KafkaAdminClient', 'NewTopic', 'NewPartitions' + 'ConfigResource', 'ConfigResourceType', 'KafkaAdminClient', 'NewTopic', 'NewPartitions', 'ACL', 'ACLFilter', + 'ResourcePattern', 'ResourcePatternFilter', 'ACLOperation', 'ResourceType', 'ACLPermissionType', + 'ACLResourcePatternType' ] diff --git a/kafka/admin/acl_resource.py b/kafka/admin/acl_resource.py new file mode 100644 index 000000000..7a012d2fa --- /dev/null +++ b/kafka/admin/acl_resource.py @@ -0,0 +1,212 @@ +from __future__ import absolute_import +from kafka.errors import IllegalArgumentError + +# enum in stdlib as of py3.4 +try: + from enum import IntEnum # pylint: disable=import-error +except ImportError: + # vendored backport module + from kafka.vendor.enum34 import IntEnum + + +class ResourceType(IntEnum): + """Type of kafka resource to set ACL for + + The ANY value is only valid in a filter context + """ + + UNKNOWN = 0, + ANY = 1, + CLUSTER = 4, + DELEGATION_TOKEN = 6, + GROUP = 3, + TOPIC = 2, + TRANSACTIONAL_ID = 5 + + +class ACLOperation(IntEnum): + """Type of operation + + The ANY value is only valid in a filter context + """ + + ANY = 1, + ALL = 2, + READ = 3, + WRITE = 4, + CREATE = 5, + DELETE = 6, + ALTER = 7, + DESCRIBE = 8, + CLUSTER_ACTION = 9, + DESCRIBE_CONFIGS = 10, + ALTER_CONFIGS = 11, + IDEMPOTENT_WRITE = 12 + + +class ACLPermissionType(IntEnum): + """An enumerated type of permissions + + The ANY value is only valid in a filter context + """ + + ANY = 1, + DENY = 2, + ALLOW = 3 + + +class ACLResourcePatternType(IntEnum): + """An enumerated type of resource patterns + + More details on the pattern types and how they work + can be found in KIP-290 (Support for prefixed ACLs) + https://cwiki.apache.org/confluence/display/KAFKA/KIP-290%3A+Support+for+Prefixed+ACLs + """ + + ANY = 1, + MATCH = 2, + LITERAL = 3, + PREFIXED = 4 + + +class ACLFilter(object): + """Represents a filter to use with describing and deleting ACLs + + The difference between this class and the ACL class is mainly that + we allow using ANY with the operation, permission, and resource type objects + to fetch ALCs matching any of the properties. + + To make a filter matching any principal, set principal to None + """ + + def __init__( + self, + principal, + host, + operation, + permission_type, + resource_pattern + ): + self.principal = principal + self.host = host + self.operation = operation + self.permission_type = permission_type + self.resource_pattern = resource_pattern + + self.validate() + + def validate(self): + if not isinstance(self.operation, ACLOperation): + raise IllegalArgumentError("operation must be an ACLOperation object, and cannot be ANY") + if not isinstance(self.permission_type, ACLPermissionType): + raise IllegalArgumentError("permission_type must be an ACLPermissionType object, and cannot be ANY") + if not isinstance(self.resource_pattern, ResourcePatternFilter): + raise IllegalArgumentError("resource_pattern must be a ResourcePatternFilter object") + + def __repr__(self): + return "".format( + principal=self.principal, + host=self.host, + operation=self.operation.name, + type=self.permission_type.name, + resource=self.resource_pattern + ) + + +class ACL(ACLFilter): + """Represents a concrete ACL for a specific ResourcePattern + + In kafka an ACL is a 4-tuple of (principal, host, operation, permission_type) + that limits who can do what on a specific resource (or since KIP-290 a resource pattern) + + Terminology: + Principal -> This is the identifier for the user. Depending on the authorization method used (SSL, SASL etc) + the principal will look different. See http://kafka.apache.org/documentation/#security_authz for details. + The principal must be on the format "User:" or kafka will treat it as invalid. It's possible to use + other principal types than "User" if using a custom authorizer for the cluster. + Host -> This must currently be an IP address. It cannot be a range, and it cannot be a domain name. + It can be set to "*", which is special cased in kafka to mean "any host" + Operation -> Which client operation this ACL refers to. Has different meaning depending + on the resource type the ACL refers to. See https://docs.confluent.io/current/kafka/authorization.html#acl-format + for a list of which combinations of resource/operation that unlocks which kafka APIs + Permission Type: Whether this ACL is allowing or denying access + Resource Pattern -> This is a representation of the resource or resource pattern that the ACL + refers to. See the ResourcePattern class for details. + + """ + + def __init__( + self, + principal, + host, + operation, + permission_type, + resource_pattern + ): + super(ACL, self).__init__(principal, host, operation, permission_type, resource_pattern) + self.validate() + + def validate(self): + if self.operation == ACLOperation.ANY: + raise IllegalArgumentError("operation cannot be ANY") + if self.permission_type == ACLPermissionType.ANY: + raise IllegalArgumentError("permission_type cannot be ANY") + if not isinstance(self.resource_pattern, ResourcePattern): + raise IllegalArgumentError("resource_pattern must be a ResourcePattern object") + + +class ResourcePatternFilter(object): + def __init__( + self, + resource_type, + resource_name, + pattern_type + ): + self.resource_type = resource_type + self.resource_name = resource_name + self.pattern_type = pattern_type + + self.validate() + + def validate(self): + if not isinstance(self.resource_type, ResourceType): + raise IllegalArgumentError("resource_type must be a ResourceType object") + if not isinstance(self.pattern_type, ACLResourcePatternType): + raise IllegalArgumentError("pattern_type must be an ACLResourcePatternType object") + + def __repr__(self): + return "".format( + self.resource_type.name, + self.resource_name, + self.pattern_type.name + ) + + +class ResourcePattern(ResourcePatternFilter): + """A resource pattern to apply the ACL to + + Resource patterns are used to be able to specify which resources an ACL + describes in a more flexible way than just pointing to a literal topic name for example. + Since KIP-290 (kafka 2.0) it's possible to set an ACL for a prefixed resource name, which + can cut down considerably on the number of ACLs needed when the number of topics and + consumer groups start to grow. + The default pattern_type is LITERAL, and it describes a specific resource. This is also how + ACLs worked before the introduction of prefixed ACLs + """ + + def __init__( + self, + resource_type, + resource_name, + pattern_type=ACLResourcePatternType.LITERAL + ): + super(ResourcePattern, self).__init__(resource_type, resource_name, pattern_type) + self.validate() + + def validate(self): + if self.resource_type == ResourceType.ANY: + raise IllegalArgumentError("resource_type cannot be ANY") + if self.pattern_type in [ACLResourcePatternType.ANY, ACLResourcePatternType.MATCH]: + raise IllegalArgumentError( + "pattern_type cannot be {} on a concrete ResourcePattern".format(self.pattern_type.name) + ) \ No newline at end of file diff --git a/kafka/admin/client.py b/kafka/admin/client.py index e4219e930..df85f442b 100644 --- a/kafka/admin/client.py +++ b/kafka/admin/client.py @@ -11,14 +11,16 @@ import kafka.errors as Errors from kafka.errors import ( IncompatibleBrokerVersion, KafkaConfigurationError, NotControllerError, - UnrecognizedBrokerVersion) + UnrecognizedBrokerVersion, IllegalArgumentError) from kafka.metrics import MetricConfig, Metrics from kafka.protocol.admin import ( CreateTopicsRequest, DeleteTopicsRequest, DescribeConfigsRequest, AlterConfigsRequest, CreatePartitionsRequest, - ListGroupsRequest, DescribeGroupsRequest) + ListGroupsRequest, DescribeGroupsRequest, DescribeAclsRequest, CreateAclsRequest, DeleteAclsRequest) from kafka.protocol.commit import GroupCoordinatorRequest, OffsetFetchRequest from kafka.protocol.metadata import MetadataRequest from kafka.structs import TopicPartition, OffsetAndMetadata +from kafka.admin.acl_resource import ACLOperation, ACLPermissionType, ACLFilter, ACL, ResourcePattern, ResourceType, \ + ACLResourcePatternType from kafka.version import __version__ @@ -91,7 +93,8 @@ class KafkaAdminClient(object): partition leadership changes to proactively discover any new brokers or partitions. Default: 300000 security_protocol (str): Protocol used to communicate with brokers. - Valid values are: PLAINTEXT, SSL. Default: PLAINTEXT. + Valid values are: PLAINTEXT, SSL, SASL_PLAINTEXT, SASL_SSL. + Default: PLAINTEXT. ssl_context (ssl.SSLContext): Pre-configured SSLContext for wrapping socket connections. If provided, all other ssl_* configurations will be ignored. Default: None. @@ -133,6 +136,8 @@ class KafkaAdminClient(object): Required if sasl_mechanism is PLAIN. sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' + sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI + sasl mechanism handshake. Default: one of bootstrap servers sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider instance. (See kafka.oauth.abstract). Default: None @@ -168,6 +173,7 @@ class KafkaAdminClient(object): 'sasl_plain_username': None, 'sasl_plain_password': None, 'sasl_kerberos_service_name': 'kafka', + 'sasl_kerberos_domain_name': None, 'sasl_oauth_token_provider': None, # metrics configs @@ -226,14 +232,20 @@ def _matching_api_version(self, operation): :param operation: A list of protocol operation versions from kafka.protocol. :return: The max matching version number between client and broker. """ - version = min(len(operation) - 1, - self._client.get_api_versions()[operation[0].API_KEY][1]) - if version < self._client.get_api_versions()[operation[0].API_KEY][0]: + broker_api_versions = self._client.get_api_versions() + api_key = operation[0].API_KEY + if broker_api_versions is None or api_key not in broker_api_versions: + raise IncompatibleBrokerVersion( + "Kafka broker does not support the '{}' Kafka protocol." + .format(operation[0].__name__)) + min_version, max_version = broker_api_versions[api_key] + version = min(len(operation) - 1, max_version) + if version < min_version: # max library version is less than min broker version. Currently, # no Kafka versions specify a min msg version. Maybe in the future? raise IncompatibleBrokerVersion( "No version of the '{}' Kafka protocol is supported by both the client and broker." - .format(operation.__name__)) + .format(operation[0].__name__)) return version def _validate_timeout(self, timeout_ms): @@ -249,7 +261,11 @@ def _refresh_controller_id(self): version = self._matching_api_version(MetadataRequest) if 1 <= version <= 6: request = MetadataRequest[version]() - response = self._send_request_to_node(self._client.least_loaded_node(), request) + future = self._send_request_to_node(self._client.least_loaded_node(), request) + + self._wait_for_futures([future]) + + response = future.value controller_id = response.controller_id # verify the controller is new enough to support our requests controller_version = self._client.check_version(controller_id) @@ -263,7 +279,49 @@ def _refresh_controller_id(self): "Kafka Admin interface cannot determine the controller using MetadataRequest_v{}." .format(version)) - def _find_group_coordinator_id(self, group_id): + def _find_coordinator_id_send_request(self, group_id): + """Send a FindCoordinatorRequest to a broker. + + :param group_id: The consumer group ID. This is typically the group + name as a string. + :return: A message future + """ + # TODO add support for dynamically picking version of + # GroupCoordinatorRequest which was renamed to FindCoordinatorRequest. + # When I experimented with this, the coordinator value returned in + # GroupCoordinatorResponse_v1 didn't match the value returned by + # GroupCoordinatorResponse_v0 and I couldn't figure out why. + version = 0 + # version = self._matching_api_version(GroupCoordinatorRequest) + if version <= 0: + request = GroupCoordinatorRequest[version](group_id) + else: + raise NotImplementedError( + "Support for GroupCoordinatorRequest_v{} has not yet been added to KafkaAdminClient." + .format(version)) + return self._send_request_to_node(self._client.least_loaded_node(), request) + + def _find_coordinator_id_process_response(self, response): + """Process a FindCoordinatorResponse. + + :param response: a FindCoordinatorResponse. + :return: The node_id of the broker that is the coordinator. + """ + if response.API_VERSION <= 0: + error_type = Errors.for_code(response.error_code) + if error_type is not Errors.NoError: + # Note: When error_type.retriable, Java will retry... see + # KafkaAdminClient's handleFindCoordinatorError method + raise error_type( + "FindCoordinatorRequest failed with response '{}'." + .format(response)) + else: + raise NotImplementedError( + "Support for FindCoordinatorRequest_v{} has not yet been added to KafkaAdminClient." + .format(response.API_VERSION)) + return response.coordinator_id + + def _find_coordinator_id(self, group_id): """Find the broker node_id of the coordinator of the given group. Sends a FindCoordinatorRequest message to the cluster. Will block until @@ -275,52 +333,26 @@ def _find_group_coordinator_id(self, group_id): :return: The node_id of the broker that is the coordinator. """ # Note: Java may change how this is implemented in KAFKA-6791. - # - # TODO add support for dynamically picking version of - # GroupCoordinatorRequest which was renamed to FindCoordinatorRequest. - # When I experimented with this, GroupCoordinatorResponse_v1 didn't - # match GroupCoordinatorResponse_v0 and I couldn't figure out why. - gc_request = GroupCoordinatorRequest[0](group_id) - gc_response = self._send_request_to_node(self._client.least_loaded_node(), gc_request) - # use the extra error checking in add_group_coordinator() rather than - # immediately returning the group coordinator. - success = self._client.cluster.add_group_coordinator(group_id, gc_response) - if not success: - error_type = Errors.for_code(gc_response.error_code) - assert error_type is not Errors.NoError - # Note: When error_type.retriable, Java will retry... see - # KafkaAdminClient's handleFindCoordinatorError method - raise error_type( - "Could not identify group coordinator for group_id '{}' from response '{}'." - .format(group_id, gc_response)) - group_coordinator = self._client.cluster.coordinator_for_group(group_id) - # will be None if the coordinator was never populated, which should never happen here - assert group_coordinator is not None - # will be -1 if add_group_coordinator() failed... but by this point the - # error should have been raised. - assert group_coordinator != -1 - return group_coordinator + future = self._find_coordinator_id_send_request(group_id) + self._wait_for_futures([future]) + response = future.value + return self._find_coordinator_id_process_response(response) def _send_request_to_node(self, node_id, request): """Send a Kafka protocol message to a specific broker. - Will block until the message result is received. + Returns a future that may be polled for status and results. :param node_id: The broker id to which to send the message. :param request: The message to send. - :return: The Kafka protocol response for the message. + :return: A future object that may be polled for status and results. :exception: The exception if the message could not be sent. """ while not self._client.ready(node_id): # poll until the connection to broker is ready, otherwise send() # will fail with NodeNotReadyError self._client.poll() - future = self._client.send(node_id, request) - self._client.poll(future=future) - if future.succeeded(): - return future.value - else: - raise future.exception # pylint: disable-msg=raising-bad-type + return self._client.send(node_id, request) def _send_request_to_controller(self, request): """Send a Kafka protocol message to the cluster controller. @@ -333,7 +365,11 @@ def _send_request_to_controller(self, request): tries = 2 # in case our cached self._controller_id is outdated while tries: tries -= 1 - response = self._send_request_to_node(self._controller_id, request) + future = self._send_request_to_node(self._controller_id, request) + + self._wait_for_futures([future]) + + response = future.value # In Java, the error fieldname is inconsistent: # - CreateTopicsResponse / CreatePartitionsResponse uses topic_errors # - DeleteTopicsResponse uses topic_error_codes @@ -341,7 +377,7 @@ def _send_request_to_controller(self, request): # one of these attributes and that they always unpack into # (topic, error_code) tuples. topic_error_tuples = (response.topic_errors if hasattr(response, 'topic_errors') - else response.topic_error_codes) + else response.topic_error_codes) # Also small py2/py3 compatibility -- py3 can ignore extra values # during unpack via: for x, y, *rest in list_of_values. py2 cannot. # So for now we have to map across the list and explicitly drop any @@ -442,14 +478,269 @@ def delete_topics(self, topics, timeout_ms=None): # describe cluster functionality is in ClusterMetadata # Note: if implemented here, send the request to the least_loaded_node() - # describe_acls protocol not yet implemented - # Note: send the request to the least_loaded_node() + @staticmethod + def _convert_describe_acls_response_to_acls(describe_response): + version = describe_response.API_VERSION + + error = Errors.for_code(describe_response.error_code) + acl_list = [] + for resources in describe_response.resources: + if version == 0: + resource_type, resource_name, acls = resources + resource_pattern_type = ACLResourcePatternType.LITERAL.value + elif version <= 1: + resource_type, resource_name, resource_pattern_type, acls = resources + else: + raise NotImplementedError( + "Support for DescribeAcls Response v{} has not yet been added to KafkaAdmin." + .format(version) + ) + for acl in acls: + principal, host, operation, permission_type = acl + conv_acl = ACL( + principal=principal, + host=host, + operation=ACLOperation(operation), + permission_type=ACLPermissionType(permission_type), + resource_pattern=ResourcePattern( + ResourceType(resource_type), + resource_name, + ACLResourcePatternType(resource_pattern_type) + ) + ) + acl_list.append(conv_acl) + + return (acl_list, error,) + + def describe_acls(self, acl_filter): + """Describe a set of ACLs + + Used to return a set of ACLs matching the supplied ACLFilter. + The cluster must be configured with an authorizer for this to work, or + you will get a SecurityDisabledError + + :param acl_filter: an ACLFilter object + :return: tuple of a list of matching ACL objects and a KafkaError (NoError if successful) + """ - # create_acls protocol not yet implemented - # Note: send the request to the least_loaded_node() + version = self._matching_api_version(DescribeAclsRequest) + if version == 0: + request = DescribeAclsRequest[version]( + resource_type=acl_filter.resource_pattern.resource_type, + resource_name=acl_filter.resource_pattern.resource_name, + principal=acl_filter.principal, + host=acl_filter.host, + operation=acl_filter.operation, + permission_type=acl_filter.permission_type + ) + elif version <= 1: + request = DescribeAclsRequest[version]( + resource_type=acl_filter.resource_pattern.resource_type, + resource_name=acl_filter.resource_pattern.resource_name, + resource_pattern_type_filter=acl_filter.resource_pattern.pattern_type, + principal=acl_filter.principal, + host=acl_filter.host, + operation=acl_filter.operation, + permission_type=acl_filter.permission_type - # delete_acls protocol not yet implemented - # Note: send the request to the least_loaded_node() + ) + else: + raise NotImplementedError( + "Support for DescribeAcls v{} has not yet been added to KafkaAdmin." + .format(version) + ) + + future = self._send_request_to_node(self._client.least_loaded_node(), request) + self._wait_for_futures([future]) + response = future.value + + error_type = Errors.for_code(response.error_code) + if error_type is not Errors.NoError: + # optionally we could retry if error_type.retriable + raise error_type( + "Request '{}' failed with response '{}'." + .format(request, response)) + + return self._convert_describe_acls_response_to_acls(response) + + @staticmethod + def _convert_create_acls_resource_request_v0(acl): + + return ( + acl.resource_pattern.resource_type, + acl.resource_pattern.resource_name, + acl.principal, + acl.host, + acl.operation, + acl.permission_type + ) + + @staticmethod + def _convert_create_acls_resource_request_v1(acl): + + return ( + acl.resource_pattern.resource_type, + acl.resource_pattern.resource_name, + acl.resource_pattern.pattern_type, + acl.principal, + acl.host, + acl.operation, + acl.permission_type + ) + + @staticmethod + def _convert_create_acls_response_to_acls(acls, create_response): + version = create_response.API_VERSION + + creations_error = [] + creations_success = [] + for i, creations in enumerate(create_response.creation_responses): + if version <= 1: + error_code, error_message = creations + acl = acls[i] + error = Errors.for_code(error_code) + else: + raise NotImplementedError( + "Support for DescribeAcls Response v{} has not yet been added to KafkaAdmin." + .format(version) + ) + + if error is Errors.NoError: + creations_success.append(acl) + else: + creations_error.append((acl, error,)) + + return {"succeeded": creations_success, "failed": creations_error} + + def create_acls(self, acls): + """Create a list of ACLs + + This endpoint only accepts a list of concrete ACL objects, no ACLFilters. + Throws TopicAlreadyExistsError if topic is already present. + + :param acls: a list of ACL objects + :return: dict of successes and failures + """ + + for acl in acls: + if not isinstance(acl, ACL): + raise IllegalArgumentError("acls must contain ACL objects") + + version = self._matching_api_version(CreateAclsRequest) + if version == 0: + request = CreateAclsRequest[version]( + creations=[self._convert_create_acls_resource_request_v0(acl) for acl in acls] + ) + elif version <= 1: + request = CreateAclsRequest[version]( + creations=[self._convert_create_acls_resource_request_v1(acl) for acl in acls] + ) + else: + raise NotImplementedError( + "Support for CreateAcls v{} has not yet been added to KafkaAdmin." + .format(version) + ) + + future = self._send_request_to_node(self._client.least_loaded_node(), request) + self._wait_for_futures([future]) + response = future.value + + + return self._convert_create_acls_response_to_acls(acls, response) + + @staticmethod + def _convert_delete_acls_resource_request_v0(acl): + return ( + acl.resource_pattern.resource_type, + acl.resource_pattern.resource_name, + acl.principal, + acl.host, + acl.operation, + acl.permission_type + ) + + @staticmethod + def _convert_delete_acls_resource_request_v1(acl): + return ( + acl.resource_pattern.resource_type, + acl.resource_pattern.resource_name, + acl.resource_pattern.pattern_type, + acl.principal, + acl.host, + acl.operation, + acl.permission_type + ) + + @staticmethod + def _convert_delete_acls_response_to_matching_acls(acl_filters, delete_response): + version = delete_response.API_VERSION + filter_result_list = [] + for i, filter_responses in enumerate(delete_response.filter_responses): + filter_error_code, filter_error_message, matching_acls = filter_responses + filter_error = Errors.for_code(filter_error_code) + acl_result_list = [] + for acl in matching_acls: + if version == 0: + error_code, error_message, resource_type, resource_name, principal, host, operation, permission_type = acl + resource_pattern_type = ACLResourcePatternType.LITERAL.value + elif version == 1: + error_code, error_message, resource_type, resource_name, resource_pattern_type, principal, host, operation, permission_type = acl + else: + raise NotImplementedError( + "Support for DescribeAcls Response v{} has not yet been added to KafkaAdmin." + .format(version) + ) + acl_error = Errors.for_code(error_code) + conv_acl = ACL( + principal=principal, + host=host, + operation=ACLOperation(operation), + permission_type=ACLPermissionType(permission_type), + resource_pattern=ResourcePattern( + ResourceType(resource_type), + resource_name, + ACLResourcePatternType(resource_pattern_type) + ) + ) + acl_result_list.append((conv_acl, acl_error,)) + filter_result_list.append((acl_filters[i], acl_result_list, filter_error,)) + return filter_result_list + + def delete_acls(self, acl_filters): + """Delete a set of ACLs + + Deletes all ACLs matching the list of input ACLFilter + + :param acl_filters: a list of ACLFilter + :return: a list of 3-tuples corresponding to the list of input filters. + The tuples hold (the input ACLFilter, list of affected ACLs, KafkaError instance) + """ + + for acl in acl_filters: + if not isinstance(acl, ACLFilter): + raise IllegalArgumentError("acl_filters must contain ACLFilter type objects") + + version = self._matching_api_version(DeleteAclsRequest) + + if version == 0: + request = DeleteAclsRequest[version]( + filters=[self._convert_delete_acls_resource_request_v0(acl) for acl in acl_filters] + ) + elif version <= 1: + request = DeleteAclsRequest[version]( + filters=[self._convert_delete_acls_resource_request_v1(acl) for acl in acl_filters] + ) + else: + raise NotImplementedError( + "Support for DeleteAcls v{} has not yet been added to KafkaAdmin." + .format(version) + ) + + future = self._send_request_to_node(self._client.least_loaded_node(), request) + self._wait_for_futures([future]) + response = future.value + + return self._convert_delete_acls_response_to_matching_acls(acl_filters, response) @staticmethod def _convert_describe_config_resource_request(config_resource): @@ -490,7 +781,11 @@ def describe_configs(self, config_resources, include_synonyms=False): raise NotImplementedError( "Support for DescribeConfigs v{} has not yet been added to KafkaAdminClient." .format(version)) - return self._send_request_to_node(self._client.least_loaded_node(), request) + future = self._send_request_to_node(self._client.least_loaded_node(), request) + + self._wait_for_futures([future]) + response = future.value + return response @staticmethod def _convert_alter_config_resource_request(config_resource): @@ -529,7 +824,11 @@ def alter_configs(self, config_resources): # // a single request that may be sent to any broker. # # So this is currently broken as it always sends to the least_loaded_node() - return self._send_request_to_node(self._client.least_loaded_node(), request) + future = self._send_request_to_node(self._client.least_loaded_node(), request) + + self._wait_for_futures([future]) + response = future.value + return response # alter replica logs dir protocol not yet implemented # Note: have to lookup the broker with the replica assignment and send the request to that broker @@ -586,6 +885,54 @@ def create_partitions(self, topic_partitions, timeout_ms=None, validate_only=Fal # describe delegation_token protocol not yet implemented # Note: send the request to the least_loaded_node() + def _describe_consumer_groups_send_request(self, group_id, group_coordinator_id): + """Send a DescribeGroupsRequest to the group's coordinator. + + :param group_id: The group name as a string + :param group_coordinator_id: The node_id of the groups' coordinator + broker. + :return: A message future. + """ + version = self._matching_api_version(DescribeGroupsRequest) + if version <= 1: + # Note: KAFKA-6788 A potential optimization is to group the + # request per coordinator and send one request with a list of + # all consumer groups. Java still hasn't implemented this + # because the error checking is hard to get right when some + # groups error and others don't. + request = DescribeGroupsRequest[version](groups=(group_id,)) + else: + raise NotImplementedError( + "Support for DescribeGroupsRequest_v{} has not yet been added to KafkaAdminClient." + .format(version)) + return self._send_request_to_node(group_coordinator_id, request) + + def _describe_consumer_groups_process_response(self, response): + """Process a DescribeGroupsResponse into a group description.""" + if response.API_VERSION <= 1: + assert len(response.groups) == 1 + # TODO need to implement converting the response tuple into + # a more accessible interface like a namedtuple and then stop + # hardcoding tuple indices here. Several Java examples, + # including KafkaAdminClient.java + group_description = response.groups[0] + error_code = group_description[0] + error_type = Errors.for_code(error_code) + # Java has the note: KAFKA-6789, we can retry based on the error code + if error_type is not Errors.NoError: + raise error_type( + "DescribeGroupsResponse failed with response '{}'." + .format(response)) + # TODO Java checks the group protocol type, and if consumer + # (ConsumerProtocol.PROTOCOL_TYPE) or empty string, it decodes + # the members' partition assignments... that hasn't yet been + # implemented here so just return the raw struct results + else: + raise NotImplementedError( + "Support for DescribeGroupsResponse_v{} has not yet been added to KafkaAdminClient." + .format(response.API_VERSION)) + return group_description + def describe_consumer_groups(self, group_ids, group_coordinator_id=None): """Describe a set of consumer groups. @@ -605,44 +952,53 @@ def describe_consumer_groups(self, group_ids, group_coordinator_id=None): partition assignments. """ group_descriptions = [] - version = self._matching_api_version(DescribeGroupsRequest) + futures = [] for group_id in group_ids: if group_coordinator_id is not None: this_groups_coordinator_id = group_coordinator_id else: - this_groups_coordinator_id = self._find_group_coordinator_id(group_id) - if version <= 1: - # Note: KAFKA-6788 A potential optimization is to group the - # request per coordinator and send one request with a list of - # all consumer groups. Java still hasn't implemented this - # because the error checking is hard to get right when some - # groups error and others don't. - request = DescribeGroupsRequest[version](groups=(group_id,)) - response = self._send_request_to_node(this_groups_coordinator_id, request) - assert len(response.groups) == 1 - # TODO need to implement converting the response tuple into - # a more accessible interface like a namedtuple and then stop - # hardcoding tuple indices here. Several Java examples, - # including KafkaAdminClient.java - group_description = response.groups[0] - error_code = group_description[0] - error_type = Errors.for_code(error_code) - # Java has the note: KAFKA-6789, we can retry based on the error code - if error_type is not Errors.NoError: - raise error_type( - "Request '{}' failed with response '{}'." - .format(request, response)) - # TODO Java checks the group protocol type, and if consumer - # (ConsumerProtocol.PROTOCOL_TYPE) or empty string, it decodes - # the members' partition assignments... that hasn't yet been - # implemented here so just return the raw struct results - group_descriptions.append(group_description) - else: - raise NotImplementedError( - "Support for DescribeGroups v{} has not yet been added to KafkaAdminClient." - .format(version)) + this_groups_coordinator_id = self._find_coordinator_id(group_id) + f = self._describe_consumer_groups_send_request(group_id, this_groups_coordinator_id) + futures.append(f) + + self._wait_for_futures(futures) + + for future in futures: + response = future.value + group_description = self._describe_consumer_groups_process_response(response) + group_descriptions.append(group_description) + return group_descriptions + def _list_consumer_groups_send_request(self, broker_id): + """Send a ListGroupsRequest to a broker. + + :param broker_id: The broker's node_id. + :return: A message future + """ + version = self._matching_api_version(ListGroupsRequest) + if version <= 2: + request = ListGroupsRequest[version]() + else: + raise NotImplementedError( + "Support for ListGroupsRequest_v{} has not yet been added to KafkaAdminClient." + .format(version)) + return self._send_request_to_node(broker_id, request) + + def _list_consumer_groups_process_response(self, response): + """Process a ListGroupsResponse into a list of groups.""" + if response.API_VERSION <= 2: + error_type = Errors.for_code(response.error_code) + if error_type is not Errors.NoError: + raise error_type( + "ListGroupsRequest failed with response '{}'." + .format(response)) + else: + raise NotImplementedError( + "Support for ListGroupsResponse_v{} has not yet been added to KafkaAdminClient." + .format(response.API_VERSION)) + return response.groups + def list_consumer_groups(self, broker_ids=None): """List all consumer groups known to the cluster. @@ -675,52 +1031,22 @@ def list_consumer_groups(self, broker_ids=None): consumer_groups = set() if broker_ids is None: broker_ids = [broker.nodeId for broker in self._client.cluster.brokers()] - version = self._matching_api_version(ListGroupsRequest) - if version <= 2: - request = ListGroupsRequest[version]() - for broker_id in broker_ids: - response = self._send_request_to_node(broker_id, request) - error_type = Errors.for_code(response.error_code) - if error_type is not Errors.NoError: - raise error_type( - "Request '{}' failed with response '{}'." - .format(request, response)) - consumer_groups.update(response.groups) - else: - raise NotImplementedError( - "Support for ListGroups v{} has not yet been added to KafkaAdminClient." - .format(version)) + futures = [self._list_consumer_groups_send_request(b) for b in broker_ids] + self._wait_for_futures(futures) + for f in futures: + response = f.value + consumer_groups.update(self._list_consumer_groups_process_response(response)) return list(consumer_groups) - def list_consumer_group_offsets(self, group_id, group_coordinator_id=None, - partitions=None): - """Fetch Consumer Group Offsets. - - Note: - This does not verify that the group_id or partitions actually exist - in the cluster. - - As soon as any error is encountered, it is immediately raised. + def _list_consumer_group_offsets_send_request(self, group_id, + group_coordinator_id, partitions=None): + """Send an OffsetFetchRequest to a broker. :param group_id: The consumer group id name for which to fetch offsets. :param group_coordinator_id: The node_id of the group's coordinator - broker. If set to None, will query the cluster to find the group - coordinator. Explicitly specifying this can be useful to prevent - that extra network round trip if you already know the group - coordinator. Default: None. - :param partitions: A list of TopicPartitions for which to fetch - offsets. On brokers >= 0.10.2, this can be set to None to fetch all - known offsets for the consumer group. Default: None. - :return dictionary: A dictionary with TopicPartition keys and - OffsetAndMetada values. Partitions that are not specified and for - which the group_id does not have a recorded offset are omitted. An - offset value of `-1` indicates the group_id has no offset for that - TopicPartition. A `-1` can only happen for partitions that are - explicitly specified. + broker. + :return: A message future """ - group_offsets_listing = {} - if group_coordinator_id is None: - group_coordinator_id = self._find_group_coordinator_id(group_id) version = self._matching_api_version(OffsetFetchRequest) if version <= 3: if partitions is None: @@ -738,29 +1064,88 @@ def list_consumer_group_offsets(self, group_id, group_coordinator_id=None, topics_partitions_dict[topic].add(partition) topics_partitions = list(six.iteritems(topics_partitions_dict)) request = OffsetFetchRequest[version](group_id, topics_partitions) - response = self._send_request_to_node(group_coordinator_id, request) - if version > 1: # OffsetFetchResponse_v1 lacks a top-level error_code + else: + raise NotImplementedError( + "Support for OffsetFetchRequest_v{} has not yet been added to KafkaAdminClient." + .format(version)) + return self._send_request_to_node(group_coordinator_id, request) + + def _list_consumer_group_offsets_process_response(self, response): + """Process an OffsetFetchResponse. + + :param response: an OffsetFetchResponse. + :return: A dictionary composed of TopicPartition keys and + OffsetAndMetada values. + """ + if response.API_VERSION <= 3: + + # OffsetFetchResponse_v1 lacks a top-level error_code + if response.API_VERSION > 1: error_type = Errors.for_code(response.error_code) if error_type is not Errors.NoError: # optionally we could retry if error_type.retriable raise error_type( - "Request '{}' failed with response '{}'." - .format(request, response)) + "OffsetFetchResponse failed with response '{}'." + .format(response)) + # transform response into a dictionary with TopicPartition keys and # OffsetAndMetada values--this is what the Java AdminClient returns + offsets = {} for topic, partitions in response.topics: for partition, offset, metadata, error_code in partitions: error_type = Errors.for_code(error_code) if error_type is not Errors.NoError: raise error_type( - "Unable to fetch offsets for group_id {}, topic {}, partition {}" - .format(group_id, topic, partition)) - group_offsets_listing[TopicPartition(topic, partition)] = OffsetAndMetadata(offset, metadata) + "Unable to fetch consumer group offsets for topic {}, partition {}" + .format(topic, partition)) + offsets[TopicPartition(topic, partition)] = OffsetAndMetadata(offset, metadata) else: raise NotImplementedError( - "Support for OffsetFetch v{} has not yet been added to KafkaAdminClient." - .format(version)) - return group_offsets_listing + "Support for OffsetFetchResponse_v{} has not yet been added to KafkaAdminClient." + .format(response.API_VERSION)) + return offsets + + def list_consumer_group_offsets(self, group_id, group_coordinator_id=None, + partitions=None): + """Fetch Consumer Offsets for a single consumer group. + + Note: + This does not verify that the group_id or partitions actually exist + in the cluster. + + As soon as any error is encountered, it is immediately raised. + + :param group_id: The consumer group id name for which to fetch offsets. + :param group_coordinator_id: The node_id of the group's coordinator + broker. If set to None, will query the cluster to find the group + coordinator. Explicitly specifying this can be useful to prevent + that extra network round trip if you already know the group + coordinator. Default: None. + :param partitions: A list of TopicPartitions for which to fetch + offsets. On brokers >= 0.10.2, this can be set to None to fetch all + known offsets for the consumer group. Default: None. + :return dictionary: A dictionary with TopicPartition keys and + OffsetAndMetada values. Partitions that are not specified and for + which the group_id does not have a recorded offset are omitted. An + offset value of `-1` indicates the group_id has no offset for that + TopicPartition. A `-1` can only happen for partitions that are + explicitly specified. + """ + if group_coordinator_id is None: + group_coordinator_id = self._find_coordinator_id(group_id) + future = self._list_consumer_group_offsets_send_request( + group_id, group_coordinator_id, partitions) + self._wait_for_futures([future]) + response = future.value + return self._list_consumer_group_offsets_process_response(response) # delete groups protocol not yet implemented # Note: send the request to the group's coordinator. + + def _wait_for_futures(self, futures): + while not all(future.succeeded() for future in futures): + for future in futures: + self._client.poll(future=future) + + if future.failed(): + raise future.exception # pylint: disable-msg=raising-bad-type diff --git a/kafka/admin_client.py b/kafka/admin_client.py new file mode 100644 index 000000000..cb2bb86ff --- /dev/null +++ b/kafka/admin_client.py @@ -0,0 +1,208 @@ +import collections +import time +from .errors import NodeNotReadyError +from .protocol.admin import CreateTopicsRequest, DeleteTopicsRequest, CreatePartitionsRequest +from .protocol.metadata import MetadataRequest + + +"""NewPartitionsInfo + +Fields: + name (string): name of topic + count (int): the new partition count + broker_ids_matrix: list(list(brokerids)) + the sizes of inner lists are the replica factor of current topic + the size of outer list is the increased partition num of current topic +""" +NewPartitionsInfo = collections.namedtuple( + 'NewPartitionsInfo', + ['name', 'count', 'broker_ids_matrix'] +) + +def convert_new_topic_request_format(new_topic): + return ( + new_topic.name, + new_topic.num_partitions, + new_topic.replication_factor, + [ + (partition_id,replicas) + for partition_id, replicas in new_topic.replica_assignments.items() + ], + [ + (config_key, config_value) + for config_key, config_value in new_topic.configs.items() + ], + ) + +def convert_topic_partitions_requst_format(topic_partition): + return ( + topic_partition.name, + ( + topic_partition.count, + topic_partition.broker_ids_matrix + ) + ) + +class NewTopic(object): + """ A class for new topic creation + + Arguments: + name (string): name of the topic + num_partitions (int): number of partitions + or -1 if replica_assignment has been specified + replication_factor (int): replication factor or -1 if + replica assignment is specified + replica_assignment (dict of int: [int]): A mapping containing + partition id and replicas to assign to it. + topic_configs (dict of str: str): A mapping of config key + and value for the topic. + """ + + def __init__( + self, + name, + num_partitions, + replication_factor, + replica_assignments=None, + configs=None, + ): + self.name = name + self.configs = configs or {} + self.num_partitions = num_partitions + self.replication_factor = replication_factor + self.replica_assignments = replica_assignments or {} + + def __str__(self): + return ":{}, :{}, :{}" \ + ":{}, :{}".format( + self.name, + self.num_partitions, + self.replication_factor, + self.replica_assignments, + self.configs, + ) + +class AdminClient(object): + """ + An api to send CreateTopic requests + + """ + def __init__(self, client): + self.client = client + self.metadata_request = MetadataRequest[1]([]) + self.topic_request = CreateTopicsRequest[0] + self.delete_topics_request = DeleteTopicsRequest[0] + self.create_partitions_request = CreatePartitionsRequest[0] + + def _send_controller_request(self): + response = self._send( + self.client.least_loaded_node(), + self.metadata_request, + ) + return response[0].controller_id + + def _send(self, node, request): + future = self.client.send(node, request) + return self.client.poll(future=future) + + def _send_request(self, request): + controller_id = self._send_controller_request() + while not self.client.ready(controller_id): + # poll until the connection to broker is ready, otherwise send() + # will fail with NodeNotReadyError + self.client.poll() + return self._send(controller_id, request) + + def create_partitions( + self, + new_partitions_infos, + timeout, + validate_only, + ): + """ Create partitions on topics + + Arguments: + new_partitions_infos (list of NewPartitionsInfo): A list containing + infos on increasing partitions with following format + [ + NewPartitionsInfo( + 'name': String, + 'count': Int, + 'broker_ids_matrix': + [ + [id1, id2, id3], + [id1, id3, id4], + ... + ] + ), + ... + ] + especially, broker_ids_matrix is a matrix of broker ids. The row size is + the number of newly added partitions and the col size is the replication + factor of the topic + + timeout (int): timeout in seconds + validate_only (Boolean): If true then validate the + request without actually increasing the number of + partitions + + Returns: + CreatePartitionsResponse: response from the broker + + Raises: + NodeNotReadyError: if controller is not ready + """ + + request = self.create_partitions_request( + topic_partitions = [ + convert_topic_partitions_requst_format(new_partitions_info) + for new_partitions_info in new_partitions_infos + ], + timeout=timeout, + validate_only = validate_only, + ) + + return self._send_request(request) + + def create_topics( + self, + topics, + timeout, + ): + """ Create topics on the cluster + + Arguments: + topics (list of NewTopic): A list containing new + topics to be created + timeout (int): timeout in seconds + + Returns: + CreateTopicResponse: response from the broker + + Raises: + NodeNotReadyError: if controller is not ready + """ + request = self.topic_request( + create_topic_requests=[ + convert_new_topic_request_format(topic) + for topic in topics + ], + timeout=timeout, + ) + return self._send_request(request) + + def delete_topics(self, topics, timeout): + """ Deletes topics on the cluster + + Arguments: + topics (list of topic names): Topics to delete + timeout (int): The requested timeout for this operation + Raises: + NodeNotReadyError: if controller is not ready + """ + + request = self.delete_topics_request( + topics=topics, + timeout=timeout, + ) + return self._send_request(request) diff --git a/kafka/client.py b/kafka/client.py index 148cae0d8..c66c52bed 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -8,14 +8,24 @@ import time import select +# selectors in stdlib as of py3.4 +try: + import selectors # pylint: disable=import-error +except ImportError: + # vendored backport module + from .vendor import selectors34 as selectors + from kafka.vendor import six import kafka.errors from kafka.errors import (UnknownError, KafkaConnectionError, FailedPayloadsError, KafkaTimeoutError, KafkaUnavailableError, LeaderNotAvailableError, UnknownTopicOrPartitionError, - NotLeaderForPartitionError, ReplicaNotAvailableError) + NotLeaderForPartitionError, ReplicaNotAvailableError, + GroupCoordinatorNotAvailableError, GroupLoadInProgressError) from kafka.structs import TopicPartition, BrokerMetadata +from kafka.metrics.metrics import Metrics +from kafka.metrics.stats.avg import Avg from kafka.conn import ( collect_hosts, BrokerConnection, @@ -28,9 +38,33 @@ from kafka.client_async import KafkaClient +# If the __consumer_offsets topic is missing, the first consumer coordinator +# request will fail and it will trigger the creation of the topic; for this +# reason, we will retry few times until the creation is completed. +CONSUMER_OFFSET_TOPIC_CREATION_RETRIES = 20 +CONSUMER_OFFSET_RETRY_INTERVAL_SEC = 0.5 + + log = logging.getLogger(__name__) +def time_metric(metric_name): + def decorator(fn): + @functools.wraps(fn) + def wrapper(self, *args, **kwargs): + start_time = time.time() + ret = fn(self, *args, **kwargs) + + self.metrics.record( + metric_name, + (time.time() - start_time) * 1000, + ) + + return ret + return wrapper + return decorator + + # Legacy KafkaClient interface -- will be deprecated soon class SimpleClient(object): @@ -42,32 +76,43 @@ class SimpleClient(object): # socket timeout. def __init__(self, hosts, client_id=CLIENT_ID, timeout=DEFAULT_SOCKET_TIMEOUT_SECONDS, - correlation_id=0): + correlation_id=0, metrics=None, **kwargs): # We need one connection to bootstrap self.client_id = client_id self.timeout = timeout - self.hosts = collect_hosts(hosts) + self.hosts = [host + ('bootstrap',) for host in collect_hosts(hosts)] self.correlation_id = correlation_id + self._metrics_registry = metrics + self.metrics = SimpleClientMetrics(metrics if metrics else Metrics()) self._conns = {} self.brokers = {} # broker_id -> BrokerMetadata self.topics_to_brokers = {} # TopicPartition -> BrokerMetadata self.topic_partitions = {} # topic -> partition -> leader + # Support arbitrary kwargs to be provided as config to BrokerConnection + # This will allow advanced features like Authentication to work + self.config = kwargs + self.load_metadata_for_topics() # bootstrap with all metadata ################## # Private API # ################## - def _get_conn(self, host, port, afi): + def _get_conn(self, host, port, afi, node_id='bootstrap'): """Get or create a connection to a broker using host and port""" host_key = (host, port) if host_key not in self._conns: + self._conns[host_key] = BrokerConnection( host, port, afi, request_timeout_ms=self.timeout * 1000, - client_id=self.client_id + client_id=self.client_id, + metrics=self._metrics_registry, + metric_group_prefix='simple-client', + node_id=node_id, + **self.config, ) conn = self._conns[host_key] @@ -145,17 +190,17 @@ def _send_broker_unaware_request(self, payloads, encoder_fn, decoder_fn): brokers. Keep trying until you succeed. """ hosts = set() - for broker in self.brokers.values(): + for node_id, broker in self.brokers.items(): host, port, afi = get_ip_port_afi(broker.host) - hosts.add((host, broker.port, afi)) + hosts.add((host, broker.port, afi, node_id)) hosts.update(self.hosts) hosts = list(hosts) random.shuffle(hosts) - for (host, port, afi) in hosts: + for (host, port, afi, node_id) in hosts: try: - conn = self._get_conn(host, port, afi) + conn = self._get_conn(host, port, afi, node_id) except KafkaConnectionError: log.warning("Skipping unconnected connection: %s:%s (AFI %s)", host, port, afi) @@ -163,7 +208,8 @@ def _send_broker_unaware_request(self, payloads, encoder_fn, decoder_fn): request = encoder_fn(payloads=payloads) future = conn.send(request) - # Block + # Block, also waste CPU cycle here, but broker unaware requests + # shouldn't be very frequent. while not future.is_done: for r, f in conn.recv(): f.success(r) @@ -229,11 +275,11 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): def failed_payloads(payloads): for payload in payloads: topic_partition = (str(payload.topic), payload.partition) - responses[(topic_partition)] = FailedPayloadsError(payload) + responses[topic_partition] = FailedPayloadsError(payload) + + futures_by_connection = {} + selector = selectors.DefaultSelector() - # For each BrokerConnection keep the real socket so that we can use - # a select to perform unblocking I/O - connections_by_future = {} for broker, broker_payloads in six.iteritems(payloads_by_broker): if broker is None: failed_payloads(broker_payloads) @@ -241,16 +287,20 @@ def failed_payloads(payloads): host, port, afi = get_ip_port_afi(broker.host) try: - conn = self._get_conn(host, broker.port, afi) + conn = self._get_conn(host, broker.port, afi, broker.nodeId) except KafkaConnectionError: refresh_metadata = True failed_payloads(broker_payloads) continue request = encoder_fn(payloads=broker_payloads) + if request.expect_response(): + selector.register(conn._sock, selectors.EVENT_READ, conn) future = conn.send(request) if future.failed(): + log.error("Request failed: %s", future.exception) + selector.unregister(conn._sock) refresh_metadata = True failed_payloads(broker_payloads) continue @@ -261,42 +311,50 @@ def failed_payloads(payloads): responses[topic_partition] = None continue - connections_by_future[future] = (conn, broker) + futures_by_connection[conn] = (future, broker) - conn = None - while connections_by_future: - futures = list(connections_by_future.keys()) + timeout = self.timeout + while futures_by_connection: + start_time = time.time() - # block until a socket is ready to be read - sockets = [ - conn._sock - for future, (conn, _) in six.iteritems(connections_by_future) - if not future.is_done and conn._sock is not None] - if sockets: - read_socks, _, _ = select.select(sockets, [], []) + ready = selector.select(timeout) - for future in futures: + for key, _ in ready: - if not future.is_done: - conn, _ = connections_by_future[future] + conn = key.data + future, _ = futures_by_connection[conn] + while not future.is_done: for r, f in conn.recv(): f.success(r) - continue - _, broker = connections_by_future.pop(future) + _, broker = futures_by_connection.pop(conn) + if future.failed(): + log.error("Request failed: %s", future.exception) refresh_metadata = True failed_payloads(payloads_by_broker[broker]) else: + _resps = [] for payload_response in decoder_fn(future.value): topic_partition = (str(payload_response.topic), payload_response.partition) responses[topic_partition] = payload_response + _resps.append(payload_response) + log.debug('Response %s', _resps) + + timeout -= time.time() - start_time + if timeout < 0: + log.error("%s requests timed out.", len(futures_by_connection)) + for _, broker in six.itervalues(futures_by_connection): + failed_payloads(payloads_by_broker[broker]) + refresh_metadata = True + break if refresh_metadata: self.reset_all_metadata() + selector.close() # Return responses in the same order as provided return [responses[tp] for tp in original_ordering] @@ -330,65 +388,54 @@ def _send_consumer_aware_request(self, group, payloads, encoder_fn, decoder_fn): # so we need to keep this so we can rebuild order before returning original_ordering = [(p.topic, p.partition) for p in payloads] - broker = self._get_coordinator_for_group(group) + retries = 0 + broker = None + while not broker: + try: + broker = self._get_coordinator_for_group(group) + except (GroupCoordinatorNotAvailableError, GroupLoadInProgressError) as e: + if retries == CONSUMER_OFFSET_TOPIC_CREATION_RETRIES: + raise e + time.sleep(CONSUMER_OFFSET_RETRY_INTERVAL_SEC) + retries += 1 # Send the list of request payloads and collect the responses and # errors responses = {} - request_id = self._next_id() - log.debug('Request %s to %s: %s', request_id, broker, payloads) - request = encoder_fn(client_id=self.client_id, - correlation_id=request_id, payloads=payloads) - - # Send the request, recv the response - try: - host, port, afi = get_ip_port_afi(broker.host) - conn = self._get_conn(host, broker.port, afi) - except KafkaConnectionError as e: - log.warning('KafkaConnectionError attempting to send request %s ' - 'to server %s: %s', request_id, broker, e) + def failed_payloads(payloads): for payload in payloads: - topic_partition = (payload.topic, payload.partition) + topic_partition = (str(payload.topic), payload.partition) responses[topic_partition] = FailedPayloadsError(payload) - # No exception, try to get response - else: - - future = conn.send(request_id, request) - while not future.is_done: - for r, f in conn.recv(): - f.success(r) + host, port, afi = get_ip_port_afi(broker.host) + try: + conn = self._get_conn(host, broker.port, afi, broker.nodeId) + except KafkaConnectionError: + failed_payloads(payloads) + else: + request = encoder_fn(payloads=payloads) # decoder_fn=None signal that the server is expected to not # send a response. This probably only applies to # ProduceRequest w/ acks = 0 - if decoder_fn is None: - log.debug('Request %s does not expect a response ' - '(skipping conn.recv)', request_id) - for payload in payloads: - topic_partition = (payload.topic, payload.partition) - responses[topic_partition] = None - return [] + future = conn.send(request) + while not future.is_done: + for r, f in conn.recv(): + f.success(r) if future.failed(): - log.warning('Error attempting to receive a ' - 'response to request %s from server %s: %s', - request_id, broker, future.exception) + failed_payloads(payloads) - for payload in payloads: - topic_partition = (payload.topic, payload.partition) - responses[topic_partition] = FailedPayloadsError(payload) + elif not request.expect_response(): + failed_payloads(payloads) else: - response = future.value - _resps = [] + response = future.value for payload_response in decoder_fn(response): - topic_partition = (payload_response.topic, + topic_partition = (str(payload_response.topic), payload_response.partition) responses[topic_partition] = payload_response - _resps.append(payload_response) - log.debug('Response %s: %s', request_id, _resps) # Return responses in the same order as provided return [responses[tp] for tp in original_ordering] @@ -429,8 +476,17 @@ def copy(self): """ _conns = self._conns self._conns = {} + _metrics_registry = self._metrics_registry + self._metrics_registry = None + _metrics = self.metrics + self.metrics = None + c = copy.deepcopy(self) self._conns = _conns + self.metrics = _metrics + self._metrics_registry = _metrics_registry + c.metrics = _metrics + c._metrics_registry = _metrics_registry return c def reinit(self): @@ -575,13 +631,14 @@ def load_metadata_for_topics(self, *topics, **kwargs): if leader in self.brokers: self.topics_to_brokers[topic_part] = self.brokers[leader] - # If Unknown Broker, fake BrokerMetadata so we don't lose the id + # If Unknown Broker, fake BrokerMetadata so we dont lose the id # (not sure how this could happen. server could be in bad state) else: self.topics_to_brokers[topic_part] = BrokerMetadata( leader, None, None, None ) + @time_metric('metadata') def send_metadata_request(self, payloads=(), fail_on_error=True, callback=None): encoder = KafkaProtocol.encode_metadata_request @@ -589,6 +646,7 @@ def send_metadata_request(self, payloads=(), fail_on_error=True, return self._send_broker_unaware_request(payloads, encoder, decoder) + @time_metric('consumer_metadata') def send_consumer_metadata_request(self, payloads=(), fail_on_error=True, callback=None): encoder = KafkaProtocol.encode_consumer_metadata_request @@ -596,6 +654,7 @@ def send_consumer_metadata_request(self, payloads=(), fail_on_error=True, return self._send_broker_unaware_request(payloads, encoder, decoder) + @time_metric('produce') def send_produce_request(self, payloads=(), acks=1, timeout=1000, fail_on_error=True, callback=None): """ @@ -646,6 +705,7 @@ def send_produce_request(self, payloads=(), acks=1, timeout=1000, if resp is not None and (not fail_on_error or not self._raise_on_response_error(resp))] + @time_metric('fetch') def send_fetch_request(self, payloads=(), fail_on_error=True, callback=None, max_wait_time=100, min_bytes=4096): """ @@ -666,6 +726,7 @@ def send_fetch_request(self, payloads=(), fail_on_error=True, return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] + @time_metric('offset') def send_offset_request(self, payloads=(), fail_on_error=True, callback=None): resps = self._send_broker_aware_request( @@ -676,8 +737,9 @@ def send_offset_request(self, payloads=(), fail_on_error=True, return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] + @time_metric('offset_list') def send_list_offset_request(self, payloads=(), fail_on_error=True, - callback=None): + callback=None): resps = self._send_broker_aware_request( payloads, KafkaProtocol.encode_list_offset_request, @@ -686,16 +748,33 @@ def send_list_offset_request(self, payloads=(), fail_on_error=True, return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] + @time_metric('offset_commit') def send_offset_commit_request(self, group, payloads=(), fail_on_error=True, callback=None): - encoder = functools.partial(KafkaProtocol.encode_offset_commit_request, - group=group) + encoder = functools.partial( + KafkaProtocol.encode_offset_commit_request, + group=group, + ) decoder = KafkaProtocol.decode_offset_commit_response resps = self._send_broker_aware_request(payloads, encoder, decoder) return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] + @time_metric('offset_commit_kafka') + def send_offset_commit_request_kafka(self, group, payloads=(), + fail_on_error=True, callback=None): + encoder = functools.partial( + KafkaProtocol.encode_offset_commit_request_kafka, + group=group, + ) + decoder = KafkaProtocol.decode_offset_commit_response + resps = self._send_consumer_aware_request(group, payloads, encoder, decoder) + + return [resp if not callback else callback(resp) for resp in resps + if not fail_on_error or not self._raise_on_response_error(resp)] + + @time_metric('offset_fetch') def send_offset_fetch_request(self, group, payloads=(), fail_on_error=True, callback=None): @@ -707,6 +786,7 @@ def send_offset_fetch_request(self, group, payloads=(), return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] + @time_metric('offset_fetch_kafka') def send_offset_fetch_request_kafka(self, group, payloads=(), fail_on_error=True, callback=None): @@ -717,3 +797,31 @@ def send_offset_fetch_request_kafka(self, group, payloads=(), return [resp if not callback else callback(resp) for resp in resps if not fail_on_error or not self._raise_on_response_error(resp)] + + +class SimpleClientMetrics(object): + + def __init__(self, metrics): + self.metrics = metrics + self.group_name = 'simple-client' + self.request_timers = {} + + def record(self, request_name, value): + # Note: there is a possible race condition here when using async simple + # producer. A metric can be added twice to the same sensor and reported + # twice. This case should be extremely rare and shouldn't be too bad for + # metrics. + timer = self.request_timers.get(request_name) + if not timer: + timer = self.metrics.sensor(request_name.replace('_', '-')) + timer.add( + self.metrics.metric_name( + 'request-time-avg', + self.group_name, + "Time latency for request {}".format(request_name), + {'request-type': request_name.replace('_', '-')}, + ), + Avg(), + ) + self.request_timers[request_name] = timer + timer.record(value) diff --git a/kafka/client_async.py b/kafka/client_async.py index 77efac869..3ec4eadc2 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -207,6 +207,7 @@ def __init__(self, **configs): self._conns = Dict() # object to support weakrefs self._api_versions = None self._connecting = set() + self._sending = set() self._refresh_on_disconnects = True self._last_bootstrap = 0 self._bootstrap_fails = 0 @@ -267,9 +268,9 @@ def _conn_state_change(self, node_id, sock, conn): if node_id not in self._connecting: self._connecting.add(node_id) try: - self._selector.register(sock, selectors.EVENT_WRITE) + self._selector.register(sock, selectors.EVENT_WRITE, conn) except KeyError: - self._selector.modify(sock, selectors.EVENT_WRITE) + self._selector.modify(sock, selectors.EVENT_WRITE, conn) if self.cluster.is_bootstrap(node_id): self._last_bootstrap = time.time() @@ -532,6 +533,7 @@ def send(self, node_id, request, wakeup=True): # we will need to call send_pending_requests() # to trigger network I/O future = conn.send(request, blocking=False) + self._sending.add(conn) # Wakeup signal is useful in case another thread is # blocked waiting for incoming network traffic while holding @@ -588,11 +590,16 @@ def poll(self, timeout_ms=None, future=None): metadata_timeout_ms, idle_connection_timeout_ms, self.config['request_timeout_ms']) - timeout = max(0, timeout / 1000) # avoid negative timeouts + # if there are no requests in flight, do not block longer than the retry backoff + if self.in_flight_request_count() == 0: + timeout = min(timeout, self.config['retry_backoff_ms']) + timeout = max(0, timeout) # avoid negative timeouts - self._poll(timeout) + self._poll(timeout / 1000) - responses.extend(self._fire_pending_completed_requests()) + # called without the lock to avoid deadlock potential + # if handlers need to acquire locks + responses.extend(self._fire_pending_completed_requests()) # If all we had was a timeout (future is None) - only do one poll # If we do have a future, we keep looping until it is done @@ -601,14 +608,23 @@ def poll(self, timeout_ms=None, future=None): return responses + def _register_send_sockets(self): + while self._sending: + conn = self._sending.pop() + try: + key = self._selector.get_key(conn._sock) + events = key.events | selectors.EVENT_WRITE + self._selector.modify(key.fileobj, events, key.data) + except KeyError: + self._selector.register(conn._sock, selectors.EVENT_WRITE, conn) + def _poll(self, timeout): # This needs to be locked, but since it is only called from within the # locked section of poll(), there is no additional lock acquisition here processed = set() # Send pending requests first, before polling for responses - for conn in six.itervalues(self._conns): - conn.send_pending_requests() + self._register_send_sockets() start_select = time.time() ready = self._selector.select(timeout) @@ -620,7 +636,25 @@ def _poll(self, timeout): if key.fileobj is self._wake_r: self._clear_wake_fd() continue - elif not (events & selectors.EVENT_READ): + + # Send pending requests if socket is ready to write + if events & selectors.EVENT_WRITE: + conn = key.data + if conn.connecting(): + conn.connect() + else: + if conn.send_pending_requests_v2(): + # If send is complete, we dont need to track write readiness + # for this socket anymore + if key.events ^ selectors.EVENT_WRITE: + self._selector.modify( + key.fileobj, + key.events ^ selectors.EVENT_WRITE, + key.data) + else: + self._selector.unregister(key.fileobj) + + if not (events & selectors.EVENT_READ): continue conn = key.data processed.add(conn) @@ -917,6 +951,16 @@ def _maybe_close_oldest_connection(self): log.info('Closing idle connection %s, last active %d ms ago', conn_id, idle_ms) self.close(node_id=conn_id) + def bootstrap_connected(self): + """Return True if a bootstrap node is connected""" + for node_id in self._conns: + if not self.cluster.is_bootstrap(node_id): + continue + if self._conns[node_id].connected(): + return True + else: + return False + # OrderedDict requires python2.7+ try: diff --git a/kafka/cluster.py b/kafka/cluster.py index 28b71c9d1..19137de62 100644 --- a/kafka/cluster.py +++ b/kafka/cluster.py @@ -9,7 +9,7 @@ from kafka.vendor import six from kafka import errors as Errors -from kafka.conn import collect_hosts, dns_lookup +from kafka.conn import collect_hosts from kafka.future import Future from kafka.structs import BrokerMetadata, PartitionMetadata, TopicPartition @@ -65,6 +65,7 @@ def __init__(self, **configs): self.config[key] = configs[key] self._bootstrap_brokers = self._generate_bootstrap_brokers() + self._coordinator_brokers = {} def _generate_bootstrap_brokers(self): # collect_hosts does not perform DNS, so we should be fine to re-use @@ -96,7 +97,11 @@ def broker_metadata(self, broker_id): Returns: BrokerMetadata or None if not found """ - return self._brokers.get(broker_id) or self._bootstrap_brokers.get(broker_id) + return ( + self._brokers.get(broker_id) or + self._bootstrap_brokers.get(broker_id) or + self._coordinator_brokers.get(broker_id) + ) def partitions_for_topic(self, topic): """Return set of all partitions for topic (whether available or not) @@ -189,7 +194,7 @@ def request_update(self): with self._lock: self._need_update = True if not self._future or self._future.is_done: - self._future = Future() + self._future = Future() return self._future def topics(self, exclude_internal_topics=True): @@ -341,41 +346,28 @@ def add_group_coordinator(self, group, response): response (GroupCoordinatorResponse): broker response Returns: - bool: True if metadata is updated, False on error + string: coordinator node_id if metadata is updated, None on error """ log.debug("Updating coordinator for %s: %s", group, response) error_type = Errors.for_code(response.error_code) if error_type is not Errors.NoError: log.error("GroupCoordinatorResponse error: %s", error_type) self._groups[group] = -1 - return False + return - node_id = response.coordinator_id + # Use a coordinator-specific node id so that group requests + # get a dedicated connection + node_id = 'coordinator-{}'.format(response.coordinator_id) coordinator = BrokerMetadata( - response.coordinator_id, + node_id, response.host, response.port, None) - # Assume that group coordinators are just brokers - # (this is true now, but could diverge in future) - if node_id not in self._brokers: - self._brokers[node_id] = coordinator - - # If this happens, either brokers have moved without - # changing IDs, or our assumption above is wrong - else: - node = self._brokers[node_id] - if coordinator.host != node.host or coordinator.port != node.port: - log.error("GroupCoordinator metadata conflicts with existing" - " broker metadata. Coordinator: %s, Broker: %s", - coordinator, node) - self._groups[group] = node_id - return False - log.info("Group coordinator for %s is %s", group, coordinator) + self._coordinator_brokers[node_id] = coordinator self._groups[group] = node_id - return True + return node_id def with_partitions(self, partitions_to_add): """Returns a copy of cluster metadata with partitions added""" diff --git a/kafka/conn.py b/kafka/conn.py index 044d2d5d6..d352c84a9 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -25,6 +25,7 @@ import kafka.errors as Errors from kafka.future import Future from kafka.metrics.stats import Avg, Count, Max, Rate +from kafka.msk import AwsMskIamClient from kafka.oauth.abstract import AbstractTokenProvider from kafka.protocol.admin import SaslHandShakeRequest from kafka.protocol.commit import OffsetFetchRequest @@ -36,6 +37,7 @@ if six.PY2: ConnectionError = socket.error + TimeoutError = socket.error BlockingIOError = Exception log = logging.getLogger(__name__) @@ -56,7 +58,7 @@ SSLZeroReturnError = ssl.SSLZeroReturnError except AttributeError: # support older ssl libraries - log.warning('Old SSL module detected.' + log.debug('Old SSL module detected.' ' SSL error handling may not operate cleanly.' ' Consider upgrading to Python 3.3 or 2.7.9') SSLEOFError = ssl.SSLError @@ -80,6 +82,12 @@ class SSLWantWriteError(Exception): gssapi = None GSSError = None +# needed for AWS_MSK_IAM authentication: +try: + from botocore.session import Session as BotoSession +except ImportError: + # no botocore available, will disable AWS_MSK_IAM mechanism + BotoSession = None AFI_NAMES = { socket.AF_UNSPEC: "unspecified", @@ -223,7 +231,7 @@ class BrokerConnection(object): 'sasl_oauth_token_provider': None } SECURITY_PROTOCOLS = ('PLAINTEXT', 'SSL', 'SASL_PLAINTEXT', 'SASL_SSL') - SASL_MECHANISMS = ('PLAIN', 'GSSAPI', 'OAUTHBEARER') + SASL_MECHANISMS = ('PLAIN', 'GSSAPI', 'OAUTHBEARER', 'AWS_MSK_IAM') def __init__(self, host, port, afi, **configs): self.host = host @@ -268,6 +276,11 @@ def __init__(self, host, port, afi, **configs): token_provider = self.config['sasl_oauth_token_provider'] assert token_provider is not None, 'sasl_oauth_token_provider required for OAUTHBEARER sasl' assert callable(getattr(token_provider, "token", None)), 'sasl_oauth_token_provider must implement method #token()' + + if self.config['sasl_mechanism'] == 'AWS_MSK_IAM': + assert BotoSession is not None, 'AWS_MSK_IAM requires the "botocore" package' + assert self.config['security_protocol'] == 'SASL_SSL', 'AWS_MSK_IAM requires SASL_SSL' + # This is not a general lock / this class is not generally thread-safe yet # However, to avoid pushing responsibility for maintaining # per-connection locks to the upstream client, we will use this lock to @@ -288,6 +301,7 @@ def __init__(self, host, port, afi, **configs): self.state = ConnectionStates.DISCONNECTED self._reset_reconnect_backoff() self._sock = None + self._send_buffer = b'' self._ssl_context = None if self.config['ssl_context'] is not None: self._ssl_context = self.config['ssl_context'] @@ -463,6 +477,9 @@ def _wrap_ssl(self): log.info('%s: Loading SSL CA from %s', self, self.config['ssl_cafile']) self._ssl_context.load_verify_locations(self.config['ssl_cafile']) self._ssl_context.verify_mode = ssl.CERT_REQUIRED + else: + log.info('%s: Loading system default SSL CAs from %s', self, ssl.get_default_verify_paths()) + self._ssl_context.load_default_certs() if self.config['ssl_certfile'] and self.config['ssl_keyfile']: log.info('%s: Loading SSL Cert from %s', self, self.config['ssl_certfile']) log.info('%s: Loading SSL Key from %s', self, self.config['ssl_keyfile']) @@ -498,7 +515,7 @@ def _try_handshake(self): # old ssl in python2.6 will swallow all SSLErrors here... except (SSLWantReadError, SSLWantWriteError): pass - except (SSLZeroReturnError, ConnectionError, SSLEOFError): + except (SSLZeroReturnError, ConnectionError, TimeoutError, SSLEOFError): log.warning('SSL connection closed by server during handshake.') self.close(Errors.KafkaConnectionError('SSL connection closed by server during handshake')) # Other SSLErrors will be raised to user @@ -547,12 +564,40 @@ def _handle_sasl_handshake_response(self, future, response): return self._try_authenticate_gssapi(future) elif self.config['sasl_mechanism'] == 'OAUTHBEARER': return self._try_authenticate_oauth(future) + elif self.config['sasl_mechanism'] == 'AWS_MSK_IAM': + return self._try_authenticate_aws_msk_iam(future) else: return future.failure( Errors.UnsupportedSaslMechanismError( 'kafka-python does not support SASL mechanism %s' % self.config['sasl_mechanism'])) + def _send_bytes(self, data): + """Send some data via non-blocking IO + + Note: this method is not synchronized internally; you should + always hold the _lock before calling + + Returns: number of bytes + Raises: socket exception + """ + total_sent = 0 + while total_sent < len(data): + try: + sent_bytes = self._sock.send(data[total_sent:]) + total_sent += sent_bytes + except (SSLWantReadError, SSLWantWriteError): + break + except (ConnectionError, TimeoutError) as e: + if six.PY2 and e.errno == errno.EWOULDBLOCK: + break + raise + except BlockingIOError: + if six.PY3: + break + raise + return total_sent + def _send_bytes_blocking(self, data): self._sock.settimeout(self.config['request_timeout_ms'] / 1000) total_sent = 0 @@ -589,21 +634,30 @@ def _try_authenticate_plain(self, future): self.config['sasl_plain_username'], self.config['sasl_plain_password']]).encode('utf-8')) size = Int32.encode(len(msg)) - try: - with self._lock: - if not self._can_send_recv(): - return future.failure(Errors.NodeNotReadyError(str(self))) - self._send_bytes_blocking(size + msg) - # The server will send a zero sized message (that is Int32(0)) on success. - # The connection is closed on failure - data = self._recv_bytes_blocking(4) + err = None + close = False + with self._lock: + if not self._can_send_recv(): + err = Errors.NodeNotReadyError(str(self)) + close = False + else: + try: + self._send_bytes_blocking(size + msg) + + # The server will send a zero sized message (that is Int32(0)) on success. + # The connection is closed on failure + data = self._recv_bytes_blocking(4) - except ConnectionError as e: - log.exception("%s: Error receiving reply from server", self) - error = Errors.KafkaConnectionError("%s: %s" % (self, e)) - self.close(error=error) - return future.failure(error) + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error receiving reply from server", self) + err = Errors.KafkaConnectionError("%s: %s" % (self, e)) + close = True + + if err is not None: + if close: + self.close(error=err) + return future.failure(err) if data != b'\x00\x00\x00\x00': error = Errors.AuthenticationFailedError('Unrecognized response during authentication') @@ -612,6 +666,40 @@ def _try_authenticate_plain(self, future): log.info('%s: Authenticated as %s via PLAIN', self, self.config['sasl_plain_username']) return future.success(True) + def _try_authenticate_aws_msk_iam(self, future): + session = BotoSession() + client = AwsMskIamClient( + host=self.host, + boto_session=session, + ) + + msg = client.first_message() + size = Int32.encode(len(msg)) + + err = None + close = False + with self._lock: + if not self._can_send_recv(): + err = Errors.NodeNotReadyError(str(self)) + close = False + else: + try: + self._send_bytes_blocking(size + msg) + data = self._recv_bytes_blocking(4) + data = self._recv_bytes_blocking(struct.unpack('4B', data)[-1]) + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error receiving reply from server", self) + err = Errors.KafkaConnectionError("%s: %s" % (self, e)) + close = True + + if err is not None: + if close: + self.close(error=err) + return future.failure(err) + + log.info('%s: Authenticated via AWS_MSK_IAM %s', self, data.decode('utf-8')) + return future.success(True) + def _try_authenticate_gssapi(self, future): kerberos_damin_name = self.config['sasl_kerberos_domain_name'] or self.host auth_id = self.config['sasl_kerberos_service_name'] + '@' + kerberos_damin_name @@ -621,61 +709,67 @@ def _try_authenticate_gssapi(self, future): ).canonicalize(gssapi.MechType.kerberos) log.debug('%s: GSSAPI name: %s', self, gssapi_name) - self._lock.acquire() - if not self._can_send_recv(): - return future.failure(Errors.NodeNotReadyError(str(self))) - # Establish security context and negotiate protection level - # For reference RFC 2222, section 7.2.1 - try: - # Exchange tokens until authentication either succeeds or fails - client_ctx = gssapi.SecurityContext(name=gssapi_name, usage='initiate') - received_token = None - while not client_ctx.complete: - # calculate an output token from kafka token (or None if first iteration) - output_token = client_ctx.step(received_token) - - # pass output token to kafka, or send empty response if the security - # context is complete (output token is None in that case) - if output_token is None: - self._send_bytes_blocking(Int32.encode(0)) - else: - msg = output_token + err = None + close = False + with self._lock: + if not self._can_send_recv(): + err = Errors.NodeNotReadyError(str(self)) + close = False + else: + # Establish security context and negotiate protection level + # For reference RFC 2222, section 7.2.1 + try: + # Exchange tokens until authentication either succeeds or fails + client_ctx = gssapi.SecurityContext(name=gssapi_name, usage='initiate') + received_token = None + while not client_ctx.complete: + # calculate an output token from kafka token (or None if first iteration) + output_token = client_ctx.step(received_token) + + # pass output token to kafka, or send empty response if the security + # context is complete (output token is None in that case) + if output_token is None: + self._send_bytes_blocking(Int32.encode(0)) + else: + msg = output_token + size = Int32.encode(len(msg)) + self._send_bytes_blocking(size + msg) + + # The server will send a token back. Processing of this token either + # establishes a security context, or it needs further token exchange. + # The gssapi will be able to identify the needed next step. + # The connection is closed on failure. + header = self._recv_bytes_blocking(4) + (token_size,) = struct.unpack('>i', header) + received_token = self._recv_bytes_blocking(token_size) + + # Process the security layer negotiation token, sent by the server + # once the security context is established. + + # unwraps message containing supported protection levels and msg size + msg = client_ctx.unwrap(received_token).message + # Kafka currently doesn't support integrity or confidentiality security layers, so we + # simply set QoP to 'auth' only (first octet). We reuse the max message size proposed + # by the server + msg = Int8.encode(SASL_QOP_AUTH & Int8.decode(io.BytesIO(msg[0:1]))) + msg[1:] + # add authorization identity to the response, GSS-wrap and send it + msg = client_ctx.wrap(msg + auth_id.encode(), False).message size = Int32.encode(len(msg)) self._send_bytes_blocking(size + msg) - # The server will send a token back. Processing of this token either - # establishes a security context, or it needs further token exchange. - # The gssapi will be able to identify the needed next step. - # The connection is closed on failure. - header = self._recv_bytes_blocking(4) - (token_size,) = struct.unpack('>i', header) - received_token = self._recv_bytes_blocking(token_size) - - # Process the security layer negotiation token, sent by the server - # once the security context is established. - - # unwraps message containing supported protection levels and msg size - msg = client_ctx.unwrap(received_token).message - # Kafka currently doesn't support integrity or confidentiality security layers, so we - # simply set QoP to 'auth' only (first octet). We reuse the max message size proposed - # by the server - msg = Int8.encode(SASL_QOP_AUTH & Int8.decode(io.BytesIO(msg[0:1]))) + msg[1:] - # add authorization identity to the response, GSS-wrap and send it - msg = client_ctx.wrap(msg + auth_id.encode(), False).message - size = Int32.encode(len(msg)) - self._send_bytes_blocking(size + msg) - - except ConnectionError as e: - self._lock.release() - log.exception("%s: Error receiving reply from server", self) - error = Errors.KafkaConnectionError("%s: %s" % (self, e)) - self.close(error=error) - return future.failure(error) - except Exception as e: - self._lock.release() - return future.failure(e) + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error receiving reply from server", self) + err = Errors.KafkaConnectionError("%s: %s" % (self, e)) + close = True + except Exception as e: + err = e + close = True + + if err is not None: + if close: + self.close(error=err) + return future.failure(err) - self._lock.release() log.info('%s: Authenticated as %s via GSSAPI', self, gssapi_name) return future.success(True) @@ -684,25 +778,31 @@ def _try_authenticate_oauth(self, future): msg = bytes(self._build_oauth_client_request().encode("utf-8")) size = Int32.encode(len(msg)) - self._lock.acquire() - if not self._can_send_recv(): - return future.failure(Errors.NodeNotReadyError(str(self))) - try: - # Send SASL OAuthBearer request with OAuth token - self._send_bytes_blocking(size + msg) - # The server will send a zero sized message (that is Int32(0)) on success. - # The connection is closed on failure - data = self._recv_bytes_blocking(4) + err = None + close = False + with self._lock: + if not self._can_send_recv(): + err = Errors.NodeNotReadyError(str(self)) + close = False + else: + try: + # Send SASL OAuthBearer request with OAuth token + self._send_bytes_blocking(size + msg) - except ConnectionError as e: - self._lock.release() - log.exception("%s: Error receiving reply from server", self) - error = Errors.KafkaConnectionError("%s: %s" % (self, e)) - self.close(error=error) - return future.failure(error) + # The server will send a zero sized message (that is Int32(0)) on success. + # The connection is closed on failure + data = self._recv_bytes_blocking(4) + + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error receiving reply from server", self) + err = Errors.KafkaConnectionError("%s: %s" % (self, e)) + close = True - self._lock.release() + if err is not None: + if close: + self.close(error=err) + return future.failure(err) if data != b'\x00\x00\x00\x00': error = Errors.AuthenticationFailedError('Unrecognized response during authentication') @@ -744,16 +844,16 @@ def connection_delay(self): """ Return the number of milliseconds to wait, based on the connection state, before attempting to send data. When disconnected, this respects - the reconnect backoff time. When connecting, returns 0 to allow - non-blocking connect to finish. When connected, returns a very large - number to handle slow/stalled connections. + the reconnect backoff time. When connecting or connected, returns a very + large number to handle slow/stalled connections. """ time_waited = time.time() - (self.last_attempt or 0) if self.state is ConnectionStates.DISCONNECTED: return max(self._reconnect_backoff - time_waited, 0) * 1000 - elif self.connecting(): - return 0 else: + # When connecting or connected, we should be able to delay + # indefinitely since other events (connection or data acked) will + # cause a wakeup once data can be sent. return float('inf') def connected(self): @@ -814,6 +914,7 @@ def close(self, error=None): self._protocol = KafkaProtocol( client_id=self.config['client_id'], api_version=self.config['api_version']) + self._send_buffer = b'' if error is None: error = Errors.Cancelled(str(self)) ifrs = list(self.in_flight_requests.items()) @@ -853,6 +954,9 @@ def _send(self, request, blocking=True): future = Future() with self._lock: if not self._can_send_recv(): + # In this case, since we created the future above, + # we know there are no callbacks/errbacks that could fire w/ + # lock. So failing + returning inline should be safe return future.failure(Errors.NodeNotReadyError(str(self))) correlation_id = self._protocol.send_request(request) @@ -873,24 +977,60 @@ def _send(self, request, blocking=True): return future def send_pending_requests(self): - """Can block on network if request is larger than send_buffer_bytes""" + """Attempts to send pending requests messages via blocking IO + If all requests have been sent, return True + Otherwise, if the socket is blocked and there are more bytes to send, + return False. + """ try: with self._lock: if not self._can_send_recv(): - return Errors.NodeNotReadyError(str(self)) - # In the future we might manage an internal write buffer - # and send bytes asynchronously. For now, just block - # sending each request payload + return False data = self._protocol.send_bytes() total_bytes = self._send_bytes_blocking(data) + if self._sensors: self._sensors.bytes_sent.record(total_bytes) - return total_bytes - except ConnectionError as e: + return True + + except (ConnectionError, TimeoutError) as e: log.exception("Error sending request data to %s", self) error = Errors.KafkaConnectionError("%s: %s" % (self, e)) self.close(error=error) - return error + return False + + def send_pending_requests_v2(self): + """Attempts to send pending requests messages via non-blocking IO + If all requests have been sent, return True + Otherwise, if the socket is blocked and there are more bytes to send, + return False. + """ + try: + with self._lock: + if not self._can_send_recv(): + return False + + # _protocol.send_bytes returns encoded requests to send + # we send them via _send_bytes() + # and hold leftover bytes in _send_buffer + if not self._send_buffer: + self._send_buffer = self._protocol.send_bytes() + + total_bytes = 0 + if self._send_buffer: + total_bytes = self._send_bytes(self._send_buffer) + self._send_buffer = self._send_buffer[total_bytes:] + + if self._sensors: + self._sensors.bytes_sent.record(total_bytes) + # Return True iff send buffer is empty + return len(self._send_buffer) == 0 + + except (ConnectionError, TimeoutError, Exception) as e: + log.exception("Error sending request data to %s", self) + error = Errors.KafkaConnectionError("%s: %s" % (self, e)) + self.close(error=error) + return False def can_send_more(self): """Return True unless there are max_in_flight_requests_per_connection.""" @@ -911,7 +1051,7 @@ def recv(self): self.config['request_timeout_ms'])) return () - # augment respones w/ correlation_id, future, and timestamp + # augment responses w/ correlation_id, future, and timestamp for i, (correlation_id, response) in enumerate(responses): try: with self._lock: @@ -931,56 +1071,57 @@ def recv(self): def _recv(self): """Take all available bytes from socket, return list of any responses from parser""" recvd = [] - self._lock.acquire() - if not self._can_send_recv(): - log.warning('%s cannot recv: socket not connected', self) - self._lock.release() - return () - - while len(recvd) < self.config['sock_chunk_buffer_count']: - try: - data = self._sock.recv(self.config['sock_chunk_bytes']) - # We expect socket.recv to raise an exception if there are no - # bytes available to read from the socket in non-blocking mode. - # but if the socket is disconnected, we will get empty data - # without an exception raised - if not data: - log.error('%s: socket disconnected', self) - self._lock.release() - self.close(error=Errors.KafkaConnectionError('socket disconnected')) - return [] - else: - recvd.append(data) + err = None + with self._lock: + if not self._can_send_recv(): + log.warning('%s cannot recv: socket not connected', self) + return () - except SSLWantReadError: - break - except ConnectionError as e: - if six.PY2 and e.errno == errno.EWOULDBLOCK: + while len(recvd) < self.config['sock_chunk_buffer_count']: + try: + data = self._sock.recv(self.config['sock_chunk_bytes']) + # We expect socket.recv to raise an exception if there are no + # bytes available to read from the socket in non-blocking mode. + # but if the socket is disconnected, we will get empty data + # without an exception raised + if not data: + log.error('%s: socket disconnected', self) + err = Errors.KafkaConnectionError('socket disconnected') + break + else: + recvd.append(data) + + except (SSLWantReadError, SSLWantWriteError): break - log.exception('%s: Error receiving network data' - ' closing socket', self) - self._lock.release() - self.close(error=Errors.KafkaConnectionError(e)) - return [] - except BlockingIOError: - if six.PY3: + except (ConnectionError, TimeoutError) as e: + if six.PY2 and e.errno == errno.EWOULDBLOCK: + break + log.exception('%s: Error receiving network data' + ' closing socket', self) + err = Errors.KafkaConnectionError(e) break - self._lock.release() - raise - - recvd_data = b''.join(recvd) - if self._sensors: - self._sensors.bytes_received.record(len(recvd_data)) - - try: - responses = self._protocol.receive_bytes(recvd_data) - except Errors.KafkaProtocolError as e: - self._lock.release() - self.close(e) - return [] - else: - self._lock.release() - return responses + except BlockingIOError: + if six.PY3: + break + # For PY2 this is a catchall and should be re-raised + raise + + # Only process bytes if there was no connection exception + if err is None: + recvd_data = b''.join(recvd) + if self._sensors: + self._sensors.bytes_received.record(len(recvd_data)) + + # We need to keep the lock through protocol receipt + # so that we ensure that the processed byte order is the + # same as the received byte order + try: + return self._protocol.receive_bytes(recvd_data) + except Errors.KafkaProtocolError as e: + err = e + + self.close(error=err) + return () def requests_timed_out(self): with self._lock: diff --git a/kafka/consumer/base.py b/kafka/consumer/base.py index a77ce7ea0..9eb695fe5 100644 --- a/kafka/consumer/base.py +++ b/kafka/consumer/base.py @@ -43,7 +43,8 @@ class Consumer(object): """ def __init__(self, client, group, topic, partitions=None, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, - auto_commit_every_t=AUTO_COMMIT_INTERVAL): + auto_commit_every_t=AUTO_COMMIT_INTERVAL, + offset_storage='zookeeper'): warnings.warn('deprecated -- this class will be removed in a future' ' release. Use KafkaConsumer instead.', @@ -66,6 +67,7 @@ def __init__(self, client, group, topic, partitions=None, auto_commit=True, self.auto_commit = auto_commit self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_t = auto_commit_every_t + self.offset_storage = offset_storage # Set up the auto-commit timer if auto_commit is True and auto_commit_every_t is not None: @@ -101,11 +103,19 @@ def fetch_last_known_offsets(self, partitions=None): if partitions is None: partitions = self.client.get_partition_ids_for_topic(self.topic) - responses = self.client.send_offset_fetch_request( - self.group, - [OffsetFetchRequestPayload(self.topic, p) for p in partitions], - fail_on_error=False - ) + responses = [] + if self.offset_storage in ['zookeeper', 'dual']: + responses += self.client.send_offset_fetch_request( + self.group, + [OffsetFetchRequestPayload(self.topic, p) for p in partitions], + fail_on_error=False + ) + if self.offset_storage in ['kafka', 'dual']: + responses += self.client.send_offset_fetch_request_kafka( + self.group, + [OffsetFetchRequestPayload(self.topic, p) for p in partitions], + fail_on_error=False + ) for resp in responses: try: @@ -115,14 +125,15 @@ def fetch_last_known_offsets(self, partitions=None): except UnknownTopicOrPartitionError: pass + prev = self.offsets.get(resp.partition, 0) # -1 offset signals no commit is currently stored if resp.offset == -1: - self.offsets[resp.partition] = 0 + self.offsets[resp.partition] = prev # Otherwise we committed the stored offset # and need to fetch the next one else: - self.offsets[resp.partition] = resp.offset + self.offsets[resp.partition] = max(prev, resp.offset) def commit(self, partitions=None): """Commit stored offsets to Kafka via OffsetCommitRequest (v0) @@ -161,7 +172,10 @@ def commit(self, partitions=None): offset, None)) try: - self.client.send_offset_commit_request(self.group, reqs) + if self.offset_storage in ['zookeeper', 'dual']: + self.client.send_offset_commit_request(self.group, reqs) + if self.offset_storage in ['kafka', 'dual']: + self.client.send_offset_commit_request_kafka(self.group, reqs) except KafkaError as e: log.error('%s saving offsets: %s', e.__class__.__name__, e) return False diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index 36e269f19..cb355b20f 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -123,7 +123,7 @@ def send_fetches(self): for node_id, request in six.iteritems(self._create_fetch_requests()): if self._client.ready(node_id): log.debug("Sending FetchRequest to node %s", node_id) - future = self._client.send(node_id, request) + future = self._client.send(node_id, request, wakeup=False) future.add_callback(self._handle_fetch_response, request, time.time()) future.add_errback(log.error, 'Fetch to node %s failed: %s', node_id) futures.append(future) @@ -235,14 +235,16 @@ def _reset_offset(self, partition): log.debug("Resetting offset for partition %s to %s offset.", partition, strategy) offsets = self._retrieve_offsets({partition: timestamp}) - if partition not in offsets: - raise NoOffsetForPartitionError(partition) - offset = offsets[partition][0] - # we might lose the assignment while fetching the offset, - # so check it is still active - if self._subscriptions.is_assigned(partition): - self._subscriptions.seek(partition, offset) + if partition in offsets: + offset = offsets[partition][0] + + # we might lose the assignment while fetching the offset, + # so check it is still active + if self._subscriptions.is_assigned(partition): + self._subscriptions.seek(partition, offset) + else: + log.debug("Could not find offset for partition %s since it is probably deleted" % (partition,)) def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): """Fetch offset for each partition passed in ``timestamps`` map. @@ -266,7 +268,11 @@ def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): start_time = time.time() remaining_ms = timeout_ms + timestamps = copy.copy(timestamps) while remaining_ms > 0: + if not timestamps: + return {} + future = self._send_offset_requests(timestamps) self._client.poll(future=future, timeout_ms=remaining_ms) @@ -283,6 +289,15 @@ def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): if future.exception.invalid_metadata: refresh_future = self._client.cluster.request_update() self._client.poll(future=refresh_future, timeout_ms=remaining_ms) + + # Issue #1780 + # Recheck partition existance after after a successful metadata refresh + if refresh_future.succeeded() and isinstance(future.exception, Errors.StaleMetadata): + log.debug("Stale metadata was raised, and we now have an updated metadata. Rechecking partition existance") + unknown_partition = future.exception.args[0] # TopicPartition from StaleMetadata + if self._client.cluster.leader_for_partition(unknown_partition) is None: + log.debug("Removed partition %s from offsets retrieval" % (unknown_partition, )) + timestamps.pop(unknown_partition) else: time.sleep(self.config['retry_backoff_ms'] / 1000.0) @@ -292,7 +307,7 @@ def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): raise Errors.KafkaTimeoutError( "Failed to get offsets by timestamps in %s ms" % (timeout_ms,)) - def fetched_records(self, max_records=None): + def fetched_records(self, max_records=None, update_offsets=True): """Returns previously fetched records and updates consumed offsets. Arguments: @@ -330,10 +345,11 @@ def fetched_records(self, max_records=None): else: records_remaining -= self._append(drained, self._next_partition_records, - records_remaining) + records_remaining, + update_offsets) return dict(drained), bool(self._completed_fetches) - def _append(self, drained, part, max_records): + def _append(self, drained, part, max_records, update_offsets): if not part: return 0 @@ -366,7 +382,8 @@ def _append(self, drained, part, max_records): for record in part_records: drained[tp].append(record) - self._subscriptions.assignment[tp].position = next_offset + if update_offsets: + self._subscriptions.assignment[tp].position = next_offset return len(part_records) else: @@ -447,6 +464,13 @@ def _unpack_message_set(self, tp, records): except AttributeError: pass + # Control messages are used to enable transactions in Kafka and are generated by the + # broker. Clients should not return control batches (ie. those with this bit set) to + # applications. (since 0.11.0.0) + if getattr(batch, "is_control_batch", False): + batch = records.next_batch() + continue + for record in batch: key_size = len(record.key) if record.key is not None else -1 value_size = len(record.value) if record.value is not None else -1 @@ -797,11 +821,12 @@ def _parse_fetched_data(self, completed_fetch): " offset %d to buffered record list", tp, position) unpacked = list(self._unpack_message_set(tp, records)) - parsed_records = self.PartitionRecords(fetch_offset, tp, unpacked) - last_offset = unpacked[-1].offset - self._sensors.records_fetch_lag.record(highwater - last_offset) - num_bytes = records.valid_bytes() - records_count = len(unpacked) + if unpacked: + parsed_records = self.PartitionRecords(fetch_offset, tp, unpacked) + last_offset = unpacked[-1].offset + self._sensors.records_fetch_lag.record(highwater - last_offset) + num_bytes = records.valid_bytes() + records_count = len(unpacked) elif records.size_in_bytes() > 0: # we did not read a single message from a non-empty # buffer because that message's size is larger than diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index b3e182c5d..15c2905d5 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -209,7 +209,7 @@ class KafkaConsumer(six.Iterator): Default: None api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker - api version. Only applies if api_version set to 'auto' + api version. Only applies if api_version set to None. connections_max_idle_ms: Close idle connections after the number of milliseconds specified by this config. The broker closes idle connections after connections.max.idle.ms, so this avoids hitting @@ -245,7 +245,7 @@ class KafkaConsumer(six.Iterator): Note: Configuration parameters are described in more detail at - https://kafka.apache.org/documentation/#newconsumerconfigs + https://kafka.apache.org/documentation/#consumerconfigs """ DEFAULT_CONFIG = { 'bootstrap_servers': 'localhost', @@ -302,7 +302,8 @@ class KafkaConsumer(six.Iterator): 'sasl_plain_password': None, 'sasl_kerberos_service_name': 'kafka', 'sasl_kerberos_domain_name': None, - 'sasl_oauth_token_provider': None + 'sasl_oauth_token_provider': None, + 'legacy_iterator': False, # enable to revert to < 1.4.7 iterator } DEFAULT_SESSION_TIMEOUT_MS_0_9 = 30000 @@ -390,6 +391,10 @@ def __init__(self, *topics, **configs): self._subscription.subscribe(topics=topics) self._client.set_topics(topics) + def bootstrap_connected(self): + """Return True if the bootstrap is connected.""" + return self._client.bootstrap_connected() + def assign(self, partitions): """Manually assign a list of TopicPartitions to this consumer. @@ -552,11 +557,9 @@ def committed(self, partition): committed = None return committed - def topics(self): - """Get all topics the user is authorized to view. - - Returns: - set: topics + def _fetch_all_topic_metadata(self): + """A blocking call that fetches topic metadata for all topics in the + cluster that the user is authorized to view. """ cluster = self._client.cluster if self._client._metadata_refresh_in_progress and self._client._topics: @@ -567,10 +570,24 @@ def topics(self): future = cluster.request_update() self._client.poll(future=future) cluster.need_all_topic_metadata = stash - return cluster.topics() + + def topics(self): + """Get all topics the user is authorized to view. + This will always issue a remote call to the cluster to fetch the latest + information. + + Returns: + set: topics + """ + self._fetch_all_topic_metadata() + return self._client.cluster.topics() def partitions_for_topic(self, topic): - """Get metadata about the partitions for a given topic. + """This method first checks the local metadata cache for information + about the topic. If the topic is not found (either because the topic + does not exist, the user is not authorized to view the topic, or the + metadata cache is not populated), then it will issue a metadata update + call to the cluster. Arguments: topic (str): Topic to check. @@ -578,9 +595,14 @@ def partitions_for_topic(self, topic): Returns: set: Partition ids """ - return self._client.cluster.partitions_for_topic(topic) + cluster = self._client.cluster + partitions = cluster.partitions_for_topic(topic) + if partitions is None: + self._fetch_all_topic_metadata() + partitions = cluster.partitions_for_topic(topic) + return partitions - def poll(self, timeout_ms=0, max_records=None): + def poll(self, timeout_ms=0, max_records=None, update_offsets=True): """Fetch data from assigned topics / partitions. Records are fetched and returned in batches by topic-partition. @@ -604,6 +626,12 @@ def poll(self, timeout_ms=0, max_records=None): dict: Topic to list of records since the last fetch for the subscribed list of topics and partitions. """ + # Note: update_offsets is an internal-use only argument. It is used to + # support the python iterator interface, and which wraps consumer.poll() + # and requires that the partition offsets tracked by the fetcher are not + # updated until the iterator returns each record to the user. As such, + # the argument is not documented and should not be relied on by library + # users to not break in the future. assert timeout_ms >= 0, 'Timeout must not be negative' if max_records is None: max_records = self.config['max_poll_records'] @@ -614,7 +642,7 @@ def poll(self, timeout_ms=0, max_records=None): start = time.time() remaining = timeout_ms while True: - records = self._poll_once(remaining, max_records) + records = self._poll_once(remaining, max_records, update_offsets=update_offsets) if records: return records @@ -624,7 +652,7 @@ def poll(self, timeout_ms=0, max_records=None): if remaining <= 0: return {} - def _poll_once(self, timeout_ms, max_records): + def _poll_once(self, timeout_ms, max_records, update_offsets=True): """Do one round of polling. In addition to checking for new data, this does any needed heart-beating, auto-commits, and offset updates. @@ -643,18 +671,22 @@ def _poll_once(self, timeout_ms, max_records): # If data is available already, e.g. from a previous network client # poll() call to commit, then just return it immediately - records, partial = self._fetcher.fetched_records(max_records) + records, partial = self._fetcher.fetched_records(max_records, update_offsets=update_offsets) if records: # Before returning the fetched records, we can send off the # next round of fetches and avoid block waiting for their # responses to enable pipelining while the user is handling the # fetched records. if not partial: - self._fetcher.send_fetches() + futures = self._fetcher.send_fetches() + if len(futures): + self._client.poll(timeout_ms=0) return records # Send any new fetches (won't resend pending fetches) - self._fetcher.send_fetches() + futures = self._fetcher.send_fetches() + if len(futures): + self._client.poll(timeout_ms=0) timeout_ms = min(timeout_ms, self._coordinator.time_to_next_poll() * 1000) self._client.poll(timeout_ms=timeout_ms) @@ -663,7 +695,7 @@ def _poll_once(self, timeout_ms, max_records): if self._coordinator.need_rejoin(): return {} - records, _ = self._fetcher.fetched_records(max_records) + records, _ = self._fetcher.fetched_records(max_records, update_offsets=update_offsets) return records def position(self, partition): @@ -726,6 +758,9 @@ def pause(self, *partitions): for partition in partitions: log.debug("Pausing partition %s", partition) self._subscription.pause(partition) + # Because the iterator checks is_fetchable() on each iteration + # we expect pauses to get handled automatically and therefore + # we do not need to reset the full iterator (forcing a full refetch) def paused(self): """Get the partitions that were previously paused using @@ -773,6 +808,8 @@ def seek(self, partition, offset): assert partition in self._subscription.assigned_partitions(), 'Unassigned partition' log.debug("Seeking to offset %s for partition %s", offset, partition) self._subscription.assignment[partition].seek(offset) + if not self.config['legacy_iterator']: + self._iterator = None def seek_to_beginning(self, *partitions): """Seek to the oldest available offset for partitions. @@ -797,6 +834,8 @@ def seek_to_beginning(self, *partitions): for tp in partitions: log.debug("Seeking to beginning of partition %s", tp) self._subscription.need_offset_reset(tp, OffsetResetStrategy.EARLIEST) + if not self.config['legacy_iterator']: + self._iterator = None def seek_to_end(self, *partitions): """Seek to the most recent available offset for partitions. @@ -821,6 +860,8 @@ def seek_to_end(self, *partitions): for tp in partitions: log.debug("Seeking to end of partition %s", tp) self._subscription.need_offset_reset(tp, OffsetResetStrategy.LATEST) + if not self.config['legacy_iterator']: + self._iterator = None def subscribe(self, topics=(), pattern=None, listener=None): """Subscribe to a list of topics, or a topic regex pattern. @@ -896,12 +937,14 @@ def unsubscribe(self): self._client.cluster.need_all_topic_metadata = False self._client.set_topics([]) log.debug("Unsubscribed all topics or patterns and assigned partitions") + if not self.config['legacy_iterator']: + self._iterator = None def metrics(self, raw=False): """Get metrics on consumer performance. This is ported from the Java Consumer, for details see: - https://kafka.apache.org/documentation/#new_consumer_monitoring + https://kafka.apache.org/documentation/#consumer_monitoring Warning: This is an unstable interface. It may change in future @@ -1058,6 +1101,25 @@ def _update_fetch_positions(self, partitions): # Then, do any offset lookups in case some positions are not known self._fetcher.update_fetch_positions(partitions) + def _message_generator_v2(self): + timeout_ms = 1000 * (self._consumer_timeout - time.time()) + record_map = self.poll(timeout_ms=timeout_ms, update_offsets=False) + for tp, records in six.iteritems(record_map): + # Generators are stateful, and it is possible that the tp / records + # here may become stale during iteration -- i.e., we seek to a + # different offset, pause consumption, or lose assignment. + for record in records: + # is_fetchable(tp) should handle assignment changes and offset + # resets; for all other changes (e.g., seeks) we'll rely on the + # outer function destroying the existing iterator/generator + # via self._iterator = None + if not self._subscription.is_fetchable(tp): + log.debug("Not returning fetched records for partition %s" + " since it is no longer fetchable", tp) + break + self._subscription.assignment[tp].position = record.offset + 1 + yield record + def _message_generator(self): assert self.assignment() or self.subscription() is not None, 'No topic subscription or manual partition assignment' while time.time() < self._consumer_timeout: @@ -1069,9 +1131,7 @@ def _message_generator(self): partitions = self._subscription.missing_fetch_positions() self._update_fetch_positions(partitions) - poll_ms = 1000 * (self._consumer_timeout - time.time()) - if not self._fetcher.in_flight_fetches(): - poll_ms = min(poll_ms, self.config['reconnect_backoff_ms']) + poll_ms = min((1000 * (self._consumer_timeout - time.time())), self.config['retry_backoff_ms']) self._client.poll(timeout_ms=poll_ms) # after the long poll, we should check whether the group needs to rebalance @@ -1112,6 +1172,26 @@ def __iter__(self): # pylint: disable=non-iterator-returned return self def __next__(self): + # Now that the heartbeat thread runs in the background + # there should be no reason to maintain a separate iterator + # but we'll keep it available for a few releases just in case + if self.config['legacy_iterator']: + return self.next_v1() + else: + return self.next_v2() + + def next_v2(self): + self._set_consumer_timeout() + while time.time() < self._consumer_timeout: + if not self._iterator: + self._iterator = self._message_generator_v2() + try: + return next(self._iterator) + except StopIteration: + self._iterator = None + raise StopIteration() + + def next_v1(self): if not self._iterator: self._iterator = self._message_generator() diff --git a/kafka/consumer/kafka.py b/kafka/consumer/kafka.py new file mode 100644 index 000000000..e16227f19 --- /dev/null +++ b/kafka/consumer/kafka.py @@ -0,0 +1,838 @@ +from __future__ import absolute_import + +from collections import namedtuple +from copy import deepcopy +import logging +import random +import sys +import time + +import six + +from kafka.client import SimpleClient +from kafka.common import ( + check_error, NotLeaderForPartitionError, UnknownTopicOrPartitionError, + OffsetOutOfRangeError, RequestTimedOutError, KafkaMessage, ConsumerTimeout, + FailedPayloadsError, KafkaUnavailableError, KafkaConfigurationError +) +from kafka.metrics.metrics import Metrics +from kafka.metrics.stats.rate import Rate +from kafka.protocol.message import PartialMessage +from kafka.structs import ( + FetchRequestPayload, OffsetCommitRequestPayload, OffsetFetchRequestPayload, + OffsetRequestPayload +) + +logger = logging.getLogger(__name__) + +OffsetsStruct = namedtuple("OffsetsStruct", ["fetch", "highwater", "commit", "task_done"]) + +DEFAULT_CONSUMER_CONFIG = { + 'client_id': __name__, + 'group_id': None, + 'bootstrap_servers': [], + 'socket_timeout_ms': 30 * 1000, + 'fetch_message_max_bytes': 1024 * 1024, + 'auto_offset_reset': 'largest', + 'fetch_min_bytes': 1, + 'fetch_wait_max_ms': 100, + 'refresh_leader_backoff_ms': 200, + 'deserializer_class': lambda msg: msg, + 'auto_commit_enable': False, + 'auto_commit_interval_ms': 60 * 1000, + 'auto_commit_interval_messages': None, + 'consumer_timeout_ms': -1, + 'metrics_reporter': None, + 'offset_storage': 'zookeeper', + + # Currently unused + 'socket_receive_buffer_bytes': 64 * 1024, + 'num_consumer_fetchers': 1, + 'default_fetcher_backoff_ms': 1000, + 'queued_max_message_chunks': 10, + 'rebalance_max_retries': 4, + 'rebalance_backoff_ms': 2000, +} + +DEPRECATED_CONFIG_KEYS = { + 'metadata_broker_list': 'bootstrap_servers', +} + + +class KafkaConsumer(object): + """A simpler kafka consumer""" + DEFAULT_CONFIG = deepcopy(DEFAULT_CONSUMER_CONFIG) + + def __init__(self, *topics, **configs): + self.configure(**configs) + self.set_topic_partitions(*topics) + + def configure(self, **configs): + """Configure the consumer instance + + Configuration settings can be passed to constructor, + otherwise defaults will be used: + + Keyword Arguments: + bootstrap_servers (list): List of initial broker nodes the consumer + should contact to bootstrap initial cluster metadata. This does + not have to be the full node list. It just needs to have at + least one broker that will respond to a Metadata API Request. + client_id (str): a unique name for this client. Defaults to + 'kafka.consumer.kafka'. + group_id (str): the name of the consumer group to join, + Offsets are fetched / committed to this group name. + fetch_message_max_bytes (int, optional): Maximum bytes for each + topic/partition fetch request. Defaults to 1024*1024. + fetch_min_bytes (int, optional): Minimum amount of data the server + should return for a fetch request, otherwise wait up to + fetch_wait_max_ms for more data to accumulate. Defaults to 1. + fetch_wait_max_ms (int, optional): Maximum time for the server to + block waiting for fetch_min_bytes messages to accumulate. + Defaults to 100. + refresh_leader_backoff_ms (int, optional): Milliseconds to backoff + when refreshing metadata on errors (subject to random jitter). + Defaults to 200. + socket_timeout_ms (int, optional): TCP socket timeout in + milliseconds. Defaults to 30*1000. + auto_offset_reset (str, optional): A policy for resetting offsets on + OffsetOutOfRange errors. 'smallest' will move to the oldest + available message, 'largest' will move to the most recent. Any + ofther value will raise the exception. Defaults to 'largest'. + deserializer_class (callable, optional): Any callable that takes a + raw message value and returns a deserialized value. Defaults to + lambda msg: msg. + auto_commit_enable (bool, optional): Enabling auto-commit will cause + the KafkaConsumer to periodically commit offsets without an + explicit call to commit(). Defaults to False. + auto_commit_interval_ms (int, optional): If auto_commit_enabled, + the milliseconds between automatic offset commits. Defaults to + 60 * 1000. + auto_commit_interval_messages (int, optional): If + auto_commit_enabled, a number of messages consumed between + automatic offset commits. Defaults to None (disabled). + consumer_timeout_ms (int, optional): number of millisecond to throw + a timeout exception to the consumer if no message is available + for consumption. Defaults to -1 (dont throw exception). + + Configuration parameters are described in more detail at + http://kafka.apache.org/documentation.html#highlevelconsumerapi + """ + configs = self._deprecate_configs(**configs) + self._config = {} + for key in self.DEFAULT_CONFIG: + self._config[key] = configs.pop(key, self.DEFAULT_CONFIG[key]) + + if configs: + raise KafkaConfigurationError('Unknown configuration key(s): ' + + str(list(configs.keys()))) + + if self._config['auto_commit_enable']: + if not self._config['group_id']: + raise KafkaConfigurationError( + 'KafkaConsumer configured to auto-commit ' + 'without required consumer group (group_id)' + ) + + # Check auto-commit configuration + if self._config['auto_commit_enable']: + logger.info("Configuring consumer to auto-commit offsets") + self._reset_auto_commit() + + if not self._config['bootstrap_servers']: + raise KafkaConfigurationError( + 'bootstrap_servers required to configure KafkaConsumer' + ) + + reporters = [self._config['metrics_reporter']()] if \ + self._config['metrics_reporter'] else [] + metrics = Metrics(reporters=reporters) + self.metrics = KafkaConsumerMetrics(metrics) + + self._client = SimpleClient( + self._config['bootstrap_servers'], + client_id=self._config['client_id'], + timeout=(self._config['socket_timeout_ms'] / 1000.0), + metrics=metrics, + ) + + def set_topic_partitions(self, *topics): + """ + Set the topic/partitions to consume + Optionally specify offsets to start from + + Accepts types: + + * str (utf-8): topic name (will consume all available partitions) + * tuple: (topic, partition) + * dict: + - { topic: partition } + - { topic: [partition list] } + - { topic: (partition tuple,) } + + Optionally, offsets can be specified directly: + + * tuple: (topic, partition, offset) + * dict: { (topic, partition): offset, ... } + + Example: + + .. code:: python + + kafka = KafkaConsumer() + + # Consume topic1-all; topic2-partition2; topic3-partition0 + kafka.set_topic_partitions("topic1", ("topic2", 2), {"topic3": 0}) + + # Consume topic1-0 starting at offset 12, and topic2-1 at offset 45 + # using tuples -- + kafka.set_topic_partitions(("topic1", 0, 12), ("topic2", 1, 45)) + + # using dict -- + kafka.set_topic_partitions({ ("topic1", 0): 12, ("topic2", 1): 45 }) + + """ + self._topics = [] + self._client.load_metadata_for_topics() + + # Setup offsets + self._offsets = OffsetsStruct(fetch=dict(), + commit=dict(), + highwater=dict(), + task_done=dict()) + + # Handle different topic types + for arg in topics: + + # Topic name str -- all partitions + if isinstance(arg, (six.string_types, six.binary_type)): + topic = arg + + for partition in self._client.get_partition_ids_for_topic(topic): + self._consume_topic_partition(topic, partition) + + # (topic, partition [, offset]) tuple + elif isinstance(arg, tuple): + topic = arg[0] + partition = arg[1] + self._consume_topic_partition(topic, partition) + if len(arg) == 3: + offset = arg[2] + self._offsets.fetch[(topic, partition)] = offset + + # { topic: partitions, ... } dict + elif isinstance(arg, dict): + for key, value in six.iteritems(arg): + + # key can be string (a topic) + if isinstance(key, (six.string_types, six.binary_type)): + topic = key + + # topic: partition + if isinstance(value, int): + self._consume_topic_partition(topic, value) + + # topic: [ partition1, partition2, ... ] + elif isinstance(value, (list, tuple)): + for partition in value: + self._consume_topic_partition(topic, partition) + else: + raise KafkaConfigurationError( + 'Unknown topic type ' + '(dict key must be int or list/tuple of ints)' + ) + + # (topic, partition): offset + elif isinstance(key, tuple): + topic = key[0] + partition = key[1] + self._consume_topic_partition(topic, partition) + self._offsets.fetch[(topic, partition)] = value + + else: + raise KafkaConfigurationError('Unknown topic type (%s)' % type(arg)) + + # If we have a consumer group, try to fetch stored offsets + if self._config['group_id']: + self._get_commit_offsets() + + # Update missing fetch/commit offsets + for topic_partition in self._topics: + + # Commit offsets default is None + if topic_partition not in self._offsets.commit: + self._offsets.commit[topic_partition] = None + + # Skip if we already have a fetch offset from user args + if topic_partition not in self._offsets.fetch: + + # Fetch offsets default is (1) commit + if self._offsets.commit[topic_partition] is not None: + self._offsets.fetch[topic_partition] = self._offsets.commit[topic_partition] + + # or (2) auto reset + else: + self._offsets.fetch[topic_partition] = self._reset_partition_offset(topic_partition) + + # highwater marks (received from server on fetch response) + # and task_done (set locally by user) + # should always get initialized to None + self._reset_highwater_offsets() + self._reset_task_done_offsets() + + # Reset message iterator in case we were in the middle of one + self._reset_message_iterator() + + def close(self): + """Close this consumer's underlying client.""" + self._client.close() + + def next(self): + """Return the next available message + + Blocks indefinitely unless consumer_timeout_ms > 0 + + Returns: + a single KafkaMessage from the message iterator + + Raises: + ConsumerTimeout after consumer_timeout_ms and no message + + Note: + This is also the method called internally during iteration + + """ + self._set_consumer_timeout_start() + while True: + + try: + return six.next(self._get_message_iterator()) + + # Handle batch completion + except StopIteration: + self._reset_message_iterator() + + self._check_consumer_timeout() + + def fetch_messages(self): + """Sends FetchRequests for all topic/partitions set for consumption + + Returns: + Generator that yields KafkaMessage structs + after deserializing with the configured `deserializer_class` + + Note: + Refreshes metadata on errors, and resets fetch offset on + OffsetOutOfRange, per the configured `auto_offset_reset` policy + + See Also: + Key KafkaConsumer configuration parameters: + * `fetch_message_max_bytes` + * `fetch_max_wait_ms` + * `fetch_min_bytes` + * `deserializer_class` + * `auto_offset_reset` + + """ + + max_bytes = self._config['fetch_message_max_bytes'] + max_wait_time = self._config['fetch_wait_max_ms'] + min_bytes = self._config['fetch_min_bytes'] + + if not self._topics: + raise KafkaConfigurationError('No topics or partitions configured') + + if not self._offsets.fetch: + raise KafkaConfigurationError( + 'No fetch offsets found when calling fetch_messages' + ) + + fetches = [FetchRequestPayload(topic, partition, + self._offsets.fetch[(topic, partition)], + max_bytes) + for (topic, partition) in self._topics] + + # send_fetch_request will batch topic/partition requests by leader + responses = self._client.send_fetch_request( + fetches, + max_wait_time=max_wait_time, + min_bytes=min_bytes, + fail_on_error=False + ) + + for resp in responses: + + if isinstance(resp, FailedPayloadsError): + self.metrics.record('failed-payloads', 1) + + logger.warning('FailedPayloadsError attempting to fetch data') + self._refresh_metadata_on_error() + continue + + topic = resp.topic + partition = resp.partition + try: + check_error(resp) + except OffsetOutOfRangeError: + self.metrics.record('offset-out-of-range', 1) + + logger.warning('OffsetOutOfRange: topic %s, partition %d, ' + 'offset %d (Highwatermark: %d)', + topic, partition, + self._offsets.fetch[(topic, partition)], + resp.highwaterMark) + # Reset offset + self._offsets.fetch[(topic, partition)] = ( + self._reset_partition_offset((topic, partition)) + ) + continue + + except NotLeaderForPartitionError: + self.metrics.record('not-leader-for-partition', 1) + + logger.warning("NotLeaderForPartitionError for %s - %d. " + "Metadata may be out of date", + topic, partition) + self._refresh_metadata_on_error() + continue + + except RequestTimedOutError: + self.metrics.record('request-timed-out', 1) + + logger.warning("RequestTimedOutError for %s - %d", + topic, partition) + continue + + # Track server highwater mark + self._offsets.highwater[(topic, partition)] = resp.highwaterMark + + # Check for partial message and remove + if resp.messages and isinstance(resp.messages[-1].message, PartialMessage): + resp.messages.pop() + + # Yield each message + # Kafka-python could raise an exception during iteration + # we are not catching -- user will need to address + for (offset, message) in resp.messages: + # deserializer_class could raise an exception here + val = self._config['deserializer_class'](message.value) + msg = KafkaMessage(topic, partition, offset, message.key, val) + + # in some cases the server will return earlier messages + # than we requested. skip them per kafka spec + if offset < self._offsets.fetch[(topic, partition)]: + logger.debug('message offset less than fetched offset ' + 'skipping: %s', msg) + continue + # Only increment fetch offset + # if we safely got the message and deserialized + self._offsets.fetch[(topic, partition)] = offset + 1 + + # Then yield to user + yield msg + + def get_partition_offsets(self, topic, partition, request_time_ms, max_num_offsets): + """Request available fetch offsets for a single topic/partition + + Keyword Arguments: + topic (str): topic for offset request + partition (int): partition for offset request + request_time_ms (int): Used to ask for all messages before a + certain time (ms). There are two special values. + Specify -1 to receive the latest offset (i.e. the offset of the + next coming message) and -2 to receive the earliest available + offset. Note that because offsets are pulled in descending + order, asking for the earliest offset will always return you a + single element. + max_num_offsets (int): Maximum offsets to include in the OffsetResponse + + Returns: + a list of offsets in the OffsetResponse submitted for the provided + topic / partition. See: + https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetAPI + """ + reqs = [OffsetRequestPayload(topic, partition, request_time_ms, max_num_offsets)] + + (resp,) = self._client.send_offset_request(reqs) + + check_error(resp) + + # Just for sanity.. + # probably unnecessary + assert resp.topic == topic + assert resp.partition == partition + + return resp.offsets + + def offsets(self, group=None): + """Get internal consumer offset values + + Keyword Arguments: + group: Either "fetch", "commit", "task_done", or "highwater". + If no group specified, returns all groups. + + Returns: + A copy of internal offsets struct + """ + if not group: + return { + 'fetch': self.offsets('fetch'), + 'commit': self.offsets('commit'), + 'task_done': self.offsets('task_done'), + 'highwater': self.offsets('highwater') + } + else: + return dict(deepcopy(getattr(self._offsets, group))) + + def task_done(self, message): + """Mark a fetched message as consumed. + + Offsets for messages marked as "task_done" will be stored back + to the kafka cluster for this consumer group on commit() + + Arguments: + message (KafkaMessage): the message to mark as complete + + Returns: + True, unless the topic-partition for this message has not + been configured for the consumer. In normal operation, this + should not happen. But see github issue 364. + """ + topic_partition = (message.topic, message.partition) + if topic_partition not in self._topics: + logger.warning('Unrecognized topic/partition in task_done message: ' + '{0}:{1}'.format(*topic_partition)) + return False + + offset = message.offset + + # Warn on non-contiguous offsets + prev_done = self._offsets.task_done[topic_partition] + if prev_done is not None and offset != (prev_done + 1): + logger.warning('Marking task_done on a non-continuous offset: %d != %d + 1', + offset, prev_done) + + # Warn on smaller offsets than previous commit + # "commit" offsets are actually the offset of the next message to fetch. + prev_commit = self._offsets.commit[topic_partition] + if prev_commit is not None and ((offset + 1) <= prev_commit): + logger.warning('Marking task_done on a previously committed offset?: %d (+1) <= %d', + offset, prev_commit) + + self._offsets.task_done[topic_partition] = offset + + # Check for auto-commit + if self._does_auto_commit_messages(): + self._incr_auto_commit_message_count() + + if self._should_auto_commit(): + self.commit() + + return True + + def commit(self): + """Store consumed message offsets (marked via task_done()) + to kafka cluster for this consumer_group. + + Returns: + True on success, or False if no offsets were found for commit + + Note: + this functionality requires server version >=0.8.1.1 + https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI + """ + if not self._config['group_id']: + logger.warning('Cannot commit without a group_id!') + raise KafkaConfigurationError( + 'Attempted to commit offsets ' + 'without a configured consumer group (group_id)' + ) + + # API supports storing metadata with each commit + # but for now it is unused + metadata = b'' + + offsets = self._offsets.task_done + commits = [] + for topic_partition, task_done_offset in six.iteritems(offsets): + + # Skip if None + if task_done_offset is None: + continue + + # Commit offsets as the next offset to fetch + # which is consistent with the Java Client + # task_done is marked by messages consumed, + # so add one to mark the next message for fetching + commit_offset = (task_done_offset + 1) + + # Skip if no change from previous committed + if commit_offset == self._offsets.commit[topic_partition]: + continue + + commits.append( + OffsetCommitRequestPayload(topic_partition[0], topic_partition[1], + commit_offset, metadata) + ) + + if commits: + logger.info('committing consumer offsets to group %s', self._config['group_id']) + + resps = [] + if self._config['offset_storage'] in ['zookeeper', 'dual']: + resps += self._client.send_offset_commit_request( + self._config['group_id'], commits, + fail_on_error=False, + ) + if self._config['offset_storage'] in ['kafka', 'dual']: + resps += self._client.send_offset_commit_request_kafka( + self._config['group_id'], commits, + fail_on_error=False, + ) + + for r in resps: + check_error(r) + topic_partition = (r.topic, r.partition) + task_done = self._offsets.task_done[topic_partition] + self._offsets.commit[topic_partition] = (task_done + 1) + + if self._config['auto_commit_enable']: + self._reset_auto_commit() + + return True + + else: + logger.info('No new offsets found to commit in group %s', self._config['group_id']) + return False + + # + # Topic/partition management private methods + # + + def _consume_topic_partition(self, topic, partition): + topic = topic + if not isinstance(partition, int): + raise KafkaConfigurationError('Unknown partition type (%s) ' + '-- expected int' % type(partition)) + + if topic not in self._client.topic_partitions: + raise UnknownTopicOrPartitionError("Topic %s not found in broker metadata" % topic) + if partition not in self._client.get_partition_ids_for_topic(topic): + raise UnknownTopicOrPartitionError("Partition %d not found in Topic %s " + "in broker metadata" % (partition, topic)) + logger.info("Configuring consumer to fetch topic '%s', partition %d", topic, partition) + self._topics.append((topic, partition)) + + def _refresh_metadata_on_error(self): + refresh_ms = self._config['refresh_leader_backoff_ms'] + jitter_pct = 0.20 + sleep_ms = random.randint( + int((1.0 - 0.5 * jitter_pct) * refresh_ms), + int((1.0 + 0.5 * jitter_pct) * refresh_ms) + ) + while True: + logger.info("Sleeping for refresh_leader_backoff_ms: %d", sleep_ms) + time.sleep(sleep_ms / 1000.0) + try: + self._client.load_metadata_for_topics() + except KafkaUnavailableError: + logger.warning("Unable to refresh topic metadata... cluster unavailable") + self._check_consumer_timeout() + else: + logger.info("Topic metadata refreshed") + return + + # + # Offset-managment private methods + # + + def _get_commit_offsets(self): + logger.info("Consumer fetching stored offsets") + for topic_partition in self._topics: + resps = [] + if self._config['offset_storage'] in ('zookeeper', 'dual'): + resps += self._client.send_offset_fetch_request( + self._config['group_id'], + [OffsetFetchRequestPayload(topic_partition[0], topic_partition[1])], + fail_on_error=False) + if self._config['offset_storage'] in ('kafka', 'dual'): + resps += self._client.send_offset_fetch_request_kafka( + self._config['group_id'], + [OffsetFetchRequestPayload(topic_partition[0], topic_partition[1])], + fail_on_error=False) + try: + for r in resps: + check_error(r) + # API spec says server wont set an error here + # but 0.8.1.1 does actually... + except UnknownTopicOrPartitionError: + pass + + # -1 offset signals no commit is currently stored + max_offset = max(r.offset for r in resps) + if max_offset == -1: + self._offsets.commit[topic_partition] = None + + # Otherwise we committed the stored offset + # and need to fetch the next one + else: + self._offsets.commit[topic_partition] = max_offset + + def _reset_highwater_offsets(self): + for topic_partition in self._topics: + self._offsets.highwater[topic_partition] = None + + def _reset_task_done_offsets(self): + for topic_partition in self._topics: + self._offsets.task_done[topic_partition] = None + + def _reset_partition_offset(self, topic_partition): + (topic, partition) = topic_partition + LATEST = -1 + EARLIEST = -2 + + request_time_ms = None + if self._config['auto_offset_reset'] == 'largest': + request_time_ms = LATEST + elif self._config['auto_offset_reset'] == 'smallest': + request_time_ms = EARLIEST + else: + + # Let's raise an reasonable exception type if user calls + # outside of an exception context + if sys.exc_info() == (None, None, None): + raise OffsetOutOfRangeError('Cannot reset partition offsets without a ' + 'valid auto_offset_reset setting ' + '(largest|smallest)') + + # Otherwise we should re-raise the upstream exception + # b/c it typically includes additional data about + # the request that triggered it, and we do not want to drop that + raise # pylint: disable-msg=E0704 + + (offset, ) = self.get_partition_offsets(topic, partition, + request_time_ms, max_num_offsets=1) + return offset + + # + # Consumer Timeout private methods + # + + def _set_consumer_timeout_start(self): + self._consumer_timeout = False + if self._config['consumer_timeout_ms'] >= 0: + self._consumer_timeout = time.time() + (self._config['consumer_timeout_ms'] / 1000.0) + + def _check_consumer_timeout(self): + if self._consumer_timeout and time.time() > self._consumer_timeout: + raise ConsumerTimeout('Consumer timed out after %d ms' % + self._config['consumer_timeout_ms']) + + # + # Autocommit private methods + # + + def _should_auto_commit(self): + if self._does_auto_commit_ms(): + if time.time() >= self._next_commit_time: + return True + + if self._does_auto_commit_messages(): + if self._uncommitted_message_count >= self._config['auto_commit_interval_messages']: + return True + + return False + + def _reset_auto_commit(self): + self._uncommitted_message_count = 0 + self._next_commit_time = None + if self._does_auto_commit_ms(): + self._next_commit_time = time.time() + (self._config['auto_commit_interval_ms'] / 1000.0) + + def _incr_auto_commit_message_count(self, n=1): + self._uncommitted_message_count += n + + def _does_auto_commit_ms(self): + if not self._config['auto_commit_enable']: + return False + + conf = self._config['auto_commit_interval_ms'] + if conf is not None and conf > 0: + return True + return False + + def _does_auto_commit_messages(self): + if not self._config['auto_commit_enable']: + return False + + conf = self._config['auto_commit_interval_messages'] + if conf is not None and conf > 0: + return True + return False + + # + # Message iterator private methods + # + + def __iter__(self): + return self + + def __next__(self): + return self.next() + + def _get_message_iterator(self): + # Fetch a new batch if needed + if self._msg_iter is None: + self._msg_iter = self.fetch_messages() + + return self._msg_iter + + def _reset_message_iterator(self): + self._msg_iter = None + + # + # python private methods + # + + def __repr__(self): + return '<{0} topics=({1})>'.format( + self.__class__.__name__, + '|'.join(["%s-%d" % topic_partition + for topic_partition in self._topics]) + ) + + # + # other private methods + # + + def _deprecate_configs(self, **configs): + for old, new in six.iteritems(DEPRECATED_CONFIG_KEYS): + if old in configs: + logger.warning('Deprecated Kafka Consumer configuration: %s. ' + 'Please use %s instead.', old, new) + old_value = configs.pop(old) + if new not in configs: + configs[new] = old_value + return configs + + +class KafkaConsumerMetrics(object): + + def __init__(self, metrics): + self.metrics = metrics + self.group_name = 'legacy-kafka-consumer' + self.sensors = {} + + def record(self, sensor_name, value): + sensor = self.sensors.get(sensor_name) + if not sensor: + sensor = self.metrics.sensor(sensor_name) + sensor.add( + self.metrics.metric_name( + sensor_name + '-rate', + self.group_name, + "Rate of {}".format(sensor_name), + ), + Rate(), + ) + self.sensors[sensor_name] = sensor + sensor.record(value) diff --git a/kafka/consumer/simple.py b/kafka/consumer/simple.py index a6a64a58f..17e8906ca 100644 --- a/kafka/consumer/simple.py +++ b/kafka/consumer/simple.py @@ -105,6 +105,10 @@ class SimpleConsumer(Consumer): OffsetOutOfRangeError. Valid values are largest and smallest. Otherwise, do not reset the offsets and raise OffsetOutOfRangeError. + offset_storage: default zookeeper. Specifies that offset storage that + will be used to fetch and commit the offsets. Valid values are + 'zookeeper', 'kafka', or 'dual'. + Auto commit details: If both auto_commit_every_n and auto_commit_every_t are set, they will reset one another when one is triggered. These triggers simply call the @@ -118,7 +122,8 @@ def __init__(self, client, group, topic, auto_commit=True, partitions=None, buffer_size=FETCH_BUFFER_SIZE_BYTES, max_buffer_size=MAX_FETCH_BUFFER_SIZE_BYTES, iter_timeout=None, - auto_offset_reset='largest'): + auto_offset_reset='largest', + offset_storage='zookeeper'): warnings.warn('deprecated - this class will be removed in a future' ' release. Use KafkaConsumer instead.', DeprecationWarning) @@ -127,7 +132,8 @@ def __init__(self, client, group, topic, auto_commit=True, partitions=None, partitions=partitions, auto_commit=auto_commit, auto_commit_every_n=auto_commit_every_n, - auto_commit_every_t=auto_commit_every_t) + auto_commit_every_t=auto_commit_every_t, + offset_storage=offset_storage) if max_buffer_size is not None and buffer_size > max_buffer_size: raise ValueError('buffer_size (%d) is greater than ' diff --git a/kafka/consumer/subscription_state.py b/kafka/consumer/subscription_state.py index ef501661a..76a6c5022 100644 --- a/kafka/consumer/subscription_state.py +++ b/kafka/consumer/subscription_state.py @@ -148,7 +148,7 @@ def change_subscription(self, topics): topics (list of str): topics for subscription Raises: - IllegalStateErrror: if assign_from_user has been used already + IllegalStateError: if assign_from_user has been used already TypeError: if a topic is None or a non-str ValueError: if a topic is an empty string or - a topic name is '.' or '..' or diff --git a/kafka/coordinator/assignors/range.py b/kafka/coordinator/assignors/range.py index c232d9e41..299e39c48 100644 --- a/kafka/coordinator/assignors/range.py +++ b/kafka/coordinator/assignors/range.py @@ -46,20 +46,18 @@ def assign(cls, cluster, member_metadata): if partitions is None: log.warning('No partition metadata for topic %s', topic) continue - partitions = sorted(list(partitions)) - partitions_for_topic = len(partitions) + partitions = sorted(partitions) consumers_for_topic.sort() partitions_per_consumer = len(partitions) // len(consumers_for_topic) consumers_with_extra = len(partitions) % len(consumers_for_topic) - for i in range(len(consumers_for_topic)): + for i, member in enumerate(consumers_for_topic): start = partitions_per_consumer * i start += min(i, consumers_with_extra) length = partitions_per_consumer if not i + 1 > consumers_with_extra: length += 1 - member = consumers_for_topic[i] assignment[member][topic] = partitions[start:start+length] protocol_assignment = {} diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index e538fda33..700c31ff6 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -243,7 +243,7 @@ def ensure_coordinator_ready(self): """Block until the coordinator for this group is known (and we have an active connection -- java client uses unsent queue). """ - with self._client._lock, self._lock: + with self._lock: while self.coordinator_unknown(): # Prior to 0.8.2 there was no group coordinator @@ -273,7 +273,7 @@ def _reset_find_coordinator_future(self, result): self._find_coordinator_future = None def lookup_coordinator(self): - with self._client._lock, self._lock: + with self._lock: if self._find_coordinator_future is not None: return self._find_coordinator_future @@ -321,10 +321,14 @@ def poll_heartbeat(self): self.heartbeat.poll() def time_to_next_heartbeat(self): + """Returns seconds (float) remaining before next heartbeat should be sent + + Note: Returns infinite if group is not joined + """ with self._lock: # if we have not joined the group, we don't need to send heartbeats if self.state is MemberState.UNJOINED: - return sys.maxsize + return float('inf') return self.heartbeat.time_to_next_heartbeat() def _handle_join_success(self, member_assignment_bytes): @@ -342,7 +346,7 @@ def _handle_join_failure(self, _): def ensure_active_group(self): """Ensure that the group is active (i.e. joined and synced)""" - with self._client._lock, self._lock: + with self._lock: if self._heartbeat_thread is None: self._start_heartbeat_thread() @@ -500,7 +504,7 @@ def _handle_join_group_response(self, future, send_time, response): log.debug("Received successful JoinGroup response for group %s: %s", self.group_id, response) self.sensors.join_latency.record((time.time() - send_time) * 1000) - with self._client._lock, self._lock: + with self._lock: if self.state is not MemberState.REBALANCING: # if the consumer was woken up before a rebalance completes, # we may have already left the group. In this case, we do @@ -675,15 +679,15 @@ def _handle_group_coordinator_response(self, future, response): error_type = Errors.for_code(response.error_code) if error_type is Errors.NoError: - with self._client._lock, self._lock: - ok = self._client.cluster.add_group_coordinator(self.group_id, response) - if not ok: + with self._lock: + coordinator_id = self._client.cluster.add_group_coordinator(self.group_id, response) + if not coordinator_id: # This could happen if coordinator metadata is different # than broker metadata future.failure(Errors.IllegalStateError()) return - self.coordinator_id = response.coordinator_id + self.coordinator_id = coordinator_id log.info("Discovered coordinator %s for group %s", self.coordinator_id, self.group_id) self._client.maybe_connect(self.coordinator_id) @@ -757,7 +761,7 @@ def close(self): def maybe_leave_group(self): """Leave the current group and reset local generation/memberId.""" - with self._client._lock, self._lock: + with self._lock: if (not self.coordinator_unknown() and self.state is not MemberState.UNJOINED and self._generation is not Generation.NO_GENERATION): @@ -955,46 +959,40 @@ def _run_once(self): self.disable() return - # TODO: When consumer.wakeup() is implemented, we need to - # disable here to prevent propagating an exception to this - # heartbeat thread - # - # Release coordinator lock during client poll to avoid deadlocks - # if/when connection errback needs coordinator lock - self.coordinator._client.poll(timeout_ms=0) - - if self.coordinator.coordinator_unknown(): - future = self.coordinator.lookup_coordinator() - if not future.is_done or future.failed(): - # the immediate future check ensures that we backoff - # properly in the case that no brokers are available - # to connect to (and the future is automatically failed). - with self.coordinator._lock: + # TODO: When consumer.wakeup() is implemented, we need to + # disable here to prevent propagating an exception to this + # heartbeat thread + self.coordinator._client.poll(timeout_ms=0) + + if self.coordinator.coordinator_unknown(): + future = self.coordinator.lookup_coordinator() + if not future.is_done or future.failed(): + # the immediate future check ensures that we backoff + # properly in the case that no brokers are available + # to connect to (and the future is automatically failed). self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) - elif self.coordinator.heartbeat.session_timeout_expired(): - # the session timeout has expired without seeing a - # successful heartbeat, so we should probably make sure - # the coordinator is still healthy. - log.warning('Heartbeat session expired, marking coordinator dead') - self.coordinator.coordinator_dead('Heartbeat session expired') - - elif self.coordinator.heartbeat.poll_timeout_expired(): - # the poll timeout has expired, which means that the - # foreground thread has stalled in between calls to - # poll(), so we explicitly leave the group. - log.warning('Heartbeat poll expired, leaving group') - self.coordinator.maybe_leave_group() - - elif not self.coordinator.heartbeat.should_heartbeat(): - # poll again after waiting for the retry backoff in case - # the heartbeat failed or the coordinator disconnected - log.log(0, 'Not ready to heartbeat, waiting') - with self.coordinator._lock: + elif self.coordinator.heartbeat.session_timeout_expired(): + # the session timeout has expired without seeing a + # successful heartbeat, so we should probably make sure + # the coordinator is still healthy. + log.warning('Heartbeat session expired, marking coordinator dead') + self.coordinator.coordinator_dead('Heartbeat session expired') + + elif self.coordinator.heartbeat.poll_timeout_expired(): + # the poll timeout has expired, which means that the + # foreground thread has stalled in between calls to + # poll(), so we explicitly leave the group. + log.warning('Heartbeat poll expired, leaving group') + self.coordinator.maybe_leave_group() + + elif not self.coordinator.heartbeat.should_heartbeat(): + # poll again after waiting for the retry backoff in case + # the heartbeat failed or the coordinator disconnected + log.log(0, 'Not ready to heartbeat, waiting') self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) - else: - with self.coordinator._client._lock, self.coordinator._lock: + else: self.coordinator.heartbeat.sent_heartbeat() future = self.coordinator._send_heartbeat_request() future.add_callback(self._handle_heartbeat_success) diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py index b575664b2..9b7a3cddd 100644 --- a/kafka/coordinator/consumer.py +++ b/kafka/coordinator/consumer.py @@ -225,7 +225,11 @@ def _on_join_complete(self, generation, member_id, protocol, self._subscription.needs_fetch_committed_offsets = True # update partition assignment - self._subscription.assign_from_subscribed(assignment.partitions()) + try: + self._subscription.assign_from_subscribed(assignment.partitions()) + except ValueError as e: + log.warning("%s. Probably due to a deleted topic. Requesting Re-join" % e) + self.request_rejoin() # give the assignor a chance to update internal state # based on the received assignment @@ -256,7 +260,7 @@ def poll(self): ensures that the consumer has joined the group. This also handles periodic offset commits if they are enabled. """ - if self.group_id is None or self.config['api_version'] < (0, 8, 2): + if self.group_id is None: return self._invoke_completed_offset_commit_callbacks() diff --git a/kafka/errors.py b/kafka/errors.py index f13f97853..6150712c6 100644 --- a/kafka/errors.py +++ b/kafka/errors.py @@ -443,6 +443,12 @@ class PolicyViolationError(BrokerResponseError): description = 'Request parameters do not satisfy the configured policy.' +class SecurityDisabledError(BrokerResponseError): + errno = 54 + message = 'SECURITY_DISABLED' + description = 'Security features are disabled.' + + class KafkaUnavailableError(KafkaError): pass @@ -494,6 +500,10 @@ class KafkaConfigurationError(KafkaError): pass +class ConsumerTimeout(KafkaError): + pass + + class QuotaViolationError(KafkaError): pass diff --git a/kafka/metrics/dict_reporter.py b/kafka/metrics/dict_reporter.py index 0b98fe1e4..abf30ea76 100644 --- a/kafka/metrics/dict_reporter.py +++ b/kafka/metrics/dict_reporter.py @@ -81,3 +81,6 @@ def configure(self, configs): def close(self): pass + + def record(self, sensor_name, metric, value, timestamp, config): + pass diff --git a/kafka/metrics/metrics.py b/kafka/metrics/metrics.py index 2c53488ff..9bb0f43ed 100644 --- a/kafka/metrics/metrics.py +++ b/kafka/metrics/metrics.py @@ -140,11 +140,15 @@ def sensor(self, name, config=None, if sensor: return sensor + with self._lock: sensor = self.get_sensor(name) if not sensor: - sensor = Sensor(self, name, parents, config or self.config, - inactive_sensor_expiration_time_seconds) + sensor = Sensor( + self, name, parents, config or self.config, + inactive_sensor_expiration_time_seconds, + self._reporters + ) self._sensors[name] = sensor if parents: for parent in parents: @@ -257,5 +261,8 @@ def close(self): """Close this metrics repository.""" for reporter in self._reporters: reporter.close() - self._metrics.clear() + + def init(self): + for reporter in self._reporters: + reporter.init() diff --git a/kafka/metrics/metrics_reporter.py b/kafka/metrics/metrics_reporter.py index d8bd12b3b..3c0ba334b 100644 --- a/kafka/metrics/metrics_reporter.py +++ b/kafka/metrics/metrics_reporter.py @@ -55,3 +55,16 @@ def configure(self, configs): def close(self): """Called when the metrics repository is closed.""" raise NotImplementedError + + @abc.abstractmethod + def record(self, sensor_name, metric, value, timestamp, config): + """ + Called to record and emit metrics + + Arguments: + sensor_name: name of the sensor + metric: KafkaMetric object of the metric to be recorded + value(float): value to be recorded + timestamp: the time the value was recorded at + config: sensor config + """ diff --git a/kafka/metrics/stats/sensor.py b/kafka/metrics/stats/sensor.py index 571723f97..1b69c010f 100644 --- a/kafka/metrics/stats/sensor.py +++ b/kafka/metrics/stats/sensor.py @@ -16,7 +16,7 @@ class Sensor(object): of metrics about request sizes such as the average or max. """ def __init__(self, registry, name, parents, config, - inactive_sensor_expiration_time_seconds): + inactive_sensor_expiration_time_seconds, reporters): if not name: raise ValueError('name must be non-empty') self._lock = threading.RLock() @@ -30,6 +30,7 @@ def __init__(self, registry, name, parents, config, inactive_sensor_expiration_time_seconds * 1000) self._last_record_time = time.time() * 1000 self._check_forest(set()) + self.reporters = reporters def _check_forest(self, sensors): """Validate that this sensor doesn't end up referencing itself.""" @@ -69,6 +70,18 @@ def record(self, value=1.0, time_ms=None): self._last_record_time = time_ms with self._lock: # XXX high volume, might be performance issue # increment all the stats + for metric in self._metrics: + # Some metrics are not stats and they don't have any measurable + # we cannot report them. + if hasattr(metric, 'measurable'): + for reporter in self.reporters: + reporter.record( + self._name, + metric, + value, + time_ms, + self._config, + ) for stat in self._stats: stat.record(self._config, value, time_ms) self._check_quotas(time_ms) diff --git a/kafka/msk.py b/kafka/msk.py new file mode 100644 index 000000000..e058c286e --- /dev/null +++ b/kafka/msk.py @@ -0,0 +1,213 @@ +import datetime +import hashlib +import hmac +import json +import string + +from kafka.errors import IllegalArgumentError +from kafka.vendor.six.moves import urllib + + +class AwsMskIamClient: + UNRESERVED_CHARS = string.ascii_letters + string.digits + '-._~' + + def __init__(self, host, boto_session): + """ + Arguments: + host (str): The hostname of the broker. + boto_session (botocore.BotoSession) the boto session + """ + self.algorithm = 'AWS4-HMAC-SHA256' + self.expires = '900' + self.hashfunc = hashlib.sha256 + self.headers = [ + ('host', host) + ] + self.version = '2020_10_22' + + self.service = 'kafka-cluster' + self.action = '{}:Connect'.format(self.service) + + now = datetime.datetime.utcnow() + self.datestamp = now.strftime('%Y%m%d') + self.timestamp = now.strftime('%Y%m%dT%H%M%SZ') + + self.host = host + self.boto_session = boto_session + + # This will raise if the region can't be determined + # Do this during init instead of waiting for failures downstream + if self.region: + pass + + @property + def access_key(self): + return self.boto_session.get_credentials().access_key + + @property + def secret_key(self): + return self.boto_session.get_credentials().secret_key + + @property + def token(self): + return self.boto_session.get_credentials().token + + @property + def region(self): + # Try to get the region information from the broker hostname + for host in self.host.split(','): + if 'amazonaws.com' in host: + return host.split('.')[-3] + + # If the region can't be determined from hostname, try the boto session + # This will only have a value if: + # - `AWS_DEFAULT_REGION` environment variable is set + # - `~/.aws/config` region variable is set + region = self.boto_session.get_config_variable('region') + if region: + return region + + # Otherwise give up + raise IllegalArgumentError('Could not determine region from broker host(s) or aws configuration') + + @property + def _credential(self): + return '{0.access_key}/{0._scope}'.format(self) + + @property + def _scope(self): + return '{0.datestamp}/{0.region}/{0.service}/aws4_request'.format(self) + + @property + def _signed_headers(self): + """ + Returns (str): + An alphabetically sorted, semicolon-delimited list of lowercase + request header names. + """ + return ';'.join(sorted(k.lower() for k, _ in self.headers)) + + @property + def _canonical_headers(self): + """ + Returns (str): + A newline-delited list of header names and values. + Header names are lowercased. + """ + return '\n'.join(map(':'.join, self.headers)) + '\n' + + @property + def _canonical_request(self): + """ + Returns (str): + An AWS Signature Version 4 canonical request in the format: + \n + \n + \n + \n + \n + + """ + # The hashed_payload is always an empty string for MSK. + hashed_payload = self.hashfunc(b'').hexdigest() + return '\n'.join(( + 'GET', + '/', + self._canonical_querystring, + self._canonical_headers, + self._signed_headers, + hashed_payload, + )) + + @property + def _canonical_querystring(self): + """ + Returns (str): + A '&'-separated list of URI-encoded key/value pairs. + """ + params = [] + params.append(('Action', self.action)) + params.append(('X-Amz-Algorithm', self.algorithm)) + params.append(('X-Amz-Credential', self._credential)) + params.append(('X-Amz-Date', self.timestamp)) + params.append(('X-Amz-Expires', self.expires)) + if self.token: + params.append(('X-Amz-Security-Token', self.token)) + params.append(('X-Amz-SignedHeaders', self._signed_headers)) + + return '&'.join(self._uriencode(k) + '=' + self._uriencode(v) for k, v in params) + + @property + def _signing_key(self): + """ + Returns (bytes): + An AWS Signature V4 signing key generated from the secret_key, date, + region, service, and request type. + """ + key = self._hmac(('AWS4' + self.secret_key).encode('utf-8'), self.datestamp) + key = self._hmac(key, self.region) + key = self._hmac(key, self.service) + key = self._hmac(key, 'aws4_request') + return key + + @property + def _signing_str(self): + """ + Returns (str): + A string used to sign the AWS Signature V4 payload in the format: + \n + \n + \n + + """ + canonical_request_hash = self.hashfunc(self._canonical_request.encode('utf-8')).hexdigest() + return '\n'.join((self.algorithm, self.timestamp, self._scope, canonical_request_hash)) + + def _uriencode(self, msg): + """ + Arguments: + msg (str): A string to URI-encode. + + Returns (str): + The URI-encoded version of the provided msg, following the encoding + rules specified: https://github.com/aws/aws-msk-iam-auth#uriencode + """ + return urllib.parse.quote(msg, safe=self.UNRESERVED_CHARS) + + def _hmac(self, key, msg): + """ + Arguments: + key (bytes): A key to use for the HMAC digest. + msg (str): A value to include in the HMAC digest. + Returns (bytes): + An HMAC digest of the given key and msg. + """ + return hmac.new(key, msg.encode('utf-8'), digestmod=self.hashfunc).digest() + + def first_message(self): + """ + Returns (bytes): + An encoded JSON authentication payload that can be sent to the + broker. + """ + signature = hmac.new( + self._signing_key, + self._signing_str.encode('utf-8'), + digestmod=self.hashfunc, + ).hexdigest() + msg = { + 'version': self.version, + 'host': self.host, + 'user-agent': 'kafka-python', + 'action': self.action, + 'x-amz-algorithm': self.algorithm, + 'x-amz-credential': self._credential, + 'x-amz-date': self.timestamp, + 'x-amz-signedheaders': self._signed_headers, + 'x-amz-expires': self.expires, + 'x-amz-signature': signature, + } + if self.token: + msg['x-amz-security-token'] = self.token + + return json.dumps(msg, separators=(',', ':')).encode('utf-8') diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index 2a306e0c1..3ff1a0913 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -255,7 +255,7 @@ class KafkaProducer(object): various APIs. Example: (0, 10, 2). Default: None api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker - api version. Only applies if api_version set to 'auto' + api version. Only applies if api_version set to None. metric_reporters (list): A list of classes to use as metrics reporters. Implementing the AbstractMetricsReporter interface allows plugging in classes that will be notified of new metric creation. Default: [] @@ -412,6 +412,10 @@ def __init__(self, **configs): atexit.register(self._cleanup) log.debug("Kafka producer started") + def bootstrap_connected(self): + """Return True if the bootstrap is connected.""" + return self._sender.bootstrap_connected() + def _cleanup_factory(self): """Build a cleanup clojure that doesn't increase our ref count""" _self = weakref.proxy(self) @@ -464,7 +468,6 @@ def close(self, timeout=None): assert timeout >= 0 log.info("Closing the Kafka producer with %s secs timeout.", timeout) - #first_exception = AtomicReference() # this will keep track of the first encountered exception invoked_from_callback = bool(threading.current_thread() is self._sender) if timeout > 0: if invoked_from_callback: diff --git a/kafka/producer/sender.py b/kafka/producer/sender.py index 064fee410..705b58f9a 100644 --- a/kafka/producer/sender.py +++ b/kafka/producer/sender.py @@ -157,7 +157,7 @@ def run_once(self): # difference between now and its linger expiry time; otherwise the # select time will be the time difference between now and the # metadata expiry time - self._client.poll(poll_timeout_ms) + self._client.poll(timeout_ms=poll_timeout_ms) def initiate_close(self): """Start closing the sender (won't complete until all data is sent).""" @@ -315,6 +315,9 @@ def wakeup(self): """Wake up the selector associated with this send thread.""" self._client.wakeup() + def bootstrap_connected(self): + return self._client.bootstrap_connected() + class SenderMetrics(object): diff --git a/kafka/protocol/admin.py b/kafka/protocol/admin.py index e6efad784..af88ea473 100644 --- a/kafka/protocol/admin.py +++ b/kafka/protocol/admin.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from kafka.protocol.api import Request, Response -from kafka.protocol.types import Array, Boolean, Bytes, Int8, Int16, Int32, Schema, String +from kafka.protocol.types import Array, Boolean, Bytes, Int8, Int16, Int32, Int64, Schema, String class ApiVersionResponse_v0(Response): @@ -29,6 +29,12 @@ class ApiVersionResponse_v1(Response): ) +class ApiVersionResponse_v2(Response): + API_KEY = 18 + API_VERSION = 2 + SCHEMA = ApiVersionResponse_v1.SCHEMA + + class ApiVersionRequest_v0(Request): API_KEY = 18 API_VERSION = 0 @@ -43,8 +49,19 @@ class ApiVersionRequest_v1(Request): SCHEMA = ApiVersionRequest_v0.SCHEMA -ApiVersionRequest = [ApiVersionRequest_v0, ApiVersionRequest_v1] -ApiVersionResponse = [ApiVersionResponse_v0, ApiVersionResponse_v1] +class ApiVersionRequest_v2(Request): + API_KEY = 18 + API_VERSION = 2 + RESPONSE_TYPE = ApiVersionResponse_v1 + SCHEMA = ApiVersionRequest_v0.SCHEMA + + +ApiVersionRequest = [ + ApiVersionRequest_v0, ApiVersionRequest_v1, ApiVersionRequest_v2, +] +ApiVersionResponse = [ + ApiVersionResponse_v0, ApiVersionResponse_v1, ApiVersionResponse_v2, +] class CreateTopicsResponse_v0(Response): @@ -79,6 +96,11 @@ class CreateTopicsResponse_v2(Response): ('error_message', String('utf-8')))) ) +class CreateTopicsResponse_v3(Response): + API_KEY = 19 + API_VERSION = 3 + SCHEMA = CreateTopicsResponse_v2.SCHEMA + class CreateTopicsRequest_v0(Request): API_KEY = 19 @@ -126,11 +148,20 @@ class CreateTopicsRequest_v2(Request): SCHEMA = CreateTopicsRequest_v1.SCHEMA +class CreateTopicsRequest_v3(Request): + API_KEY = 19 + API_VERSION = 3 + RESPONSE_TYPE = CreateTopicsResponse_v3 + SCHEMA = CreateTopicsRequest_v1.SCHEMA + + CreateTopicsRequest = [ - CreateTopicsRequest_v0, CreateTopicsRequest_v1, CreateTopicsRequest_v2 + CreateTopicsRequest_v0, CreateTopicsRequest_v1, + CreateTopicsRequest_v2, CreateTopicsRequest_v3, ] CreateTopicsResponse = [ - CreateTopicsResponse_v0, CreateTopicsResponse_v1, CreateTopicsResponse_v2 + CreateTopicsResponse_v0, CreateTopicsResponse_v1, + CreateTopicsResponse_v2, CreateTopicsResponse_v3, ] @@ -155,6 +186,18 @@ class DeleteTopicsResponse_v1(Response): ) +class DeleteTopicsResponse_v2(Response): + API_KEY = 20 + API_VERSION = 2 + SCHEMA = DeleteTopicsResponse_v1.SCHEMA + + +class DeleteTopicsResponse_v3(Response): + API_KEY = 20 + API_VERSION = 3 + SCHEMA = DeleteTopicsResponse_v1.SCHEMA + + class DeleteTopicsRequest_v0(Request): API_KEY = 20 API_VERSION = 0 @@ -172,8 +215,28 @@ class DeleteTopicsRequest_v1(Request): SCHEMA = DeleteTopicsRequest_v0.SCHEMA -DeleteTopicsRequest = [DeleteTopicsRequest_v0, DeleteTopicsRequest_v1] -DeleteTopicsResponse = [DeleteTopicsResponse_v0, DeleteTopicsResponse_v1] +class DeleteTopicsRequest_v2(Request): + API_KEY = 20 + API_VERSION = 2 + RESPONSE_TYPE = DeleteTopicsResponse_v2 + SCHEMA = DeleteTopicsRequest_v0.SCHEMA + + +class DeleteTopicsRequest_v3(Request): + API_KEY = 20 + API_VERSION = 3 + RESPONSE_TYPE = DeleteTopicsResponse_v3 + SCHEMA = DeleteTopicsRequest_v0.SCHEMA + + +DeleteTopicsRequest = [ + DeleteTopicsRequest_v0, DeleteTopicsRequest_v1, + DeleteTopicsRequest_v2, DeleteTopicsRequest_v3, +] +DeleteTopicsResponse = [ + DeleteTopicsResponse_v0, DeleteTopicsResponse_v1, + DeleteTopicsResponse_v2, DeleteTopicsResponse_v3, +] class ListGroupsResponse_v0(Response): @@ -198,6 +261,11 @@ class ListGroupsResponse_v1(Response): ('protocol_type', String('utf-8')))) ) +class ListGroupsResponse_v2(Response): + API_KEY = 16 + API_VERSION = 2 + SCHEMA = ListGroupsResponse_v1.SCHEMA + class ListGroupsRequest_v0(Request): API_KEY = 16 @@ -212,9 +280,21 @@ class ListGroupsRequest_v1(Request): RESPONSE_TYPE = ListGroupsResponse_v1 SCHEMA = ListGroupsRequest_v0.SCHEMA +class ListGroupsRequest_v2(Request): + API_KEY = 16 + API_VERSION = 1 + RESPONSE_TYPE = ListGroupsResponse_v2 + SCHEMA = ListGroupsRequest_v0.SCHEMA + -ListGroupsRequest = [ListGroupsRequest_v0, ListGroupsRequest_v1] -ListGroupsResponse = [ListGroupsResponse_v0, ListGroupsResponse_v1] +ListGroupsRequest = [ + ListGroupsRequest_v0, ListGroupsRequest_v1, + ListGroupsRequest_v2, +] +ListGroupsResponse = [ + ListGroupsResponse_v0, ListGroupsResponse_v1, + ListGroupsResponse_v2, +] class DescribeGroupsResponse_v0(Response): @@ -256,6 +336,33 @@ class DescribeGroupsResponse_v1(Response): ) +class DescribeGroupsResponse_v2(Response): + API_KEY = 15 + API_VERSION = 2 + SCHEMA = DescribeGroupsResponse_v1.SCHEMA + + +class DescribeGroupsResponse_v3(Response): + API_KEY = 15 + API_VERSION = 3 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('groups', Array( + ('error_code', Int16), + ('group', String('utf-8')), + ('state', String('utf-8')), + ('protocol_type', String('utf-8')), + ('protocol', String('utf-8')), + ('members', Array( + ('member_id', String('utf-8')), + ('client_id', String('utf-8')), + ('client_host', String('utf-8')), + ('member_metadata', Bytes), + ('member_assignment', Bytes)))), + ('authorized_operations', Int32)) + ) + + class DescribeGroupsRequest_v0(Request): API_KEY = 15 API_VERSION = 0 @@ -272,8 +379,31 @@ class DescribeGroupsRequest_v1(Request): SCHEMA = DescribeGroupsRequest_v0.SCHEMA -DescribeGroupsRequest = [DescribeGroupsRequest_v0, DescribeGroupsRequest_v1] -DescribeGroupsResponse = [DescribeGroupsResponse_v0, DescribeGroupsResponse_v1] +class DescribeGroupsRequest_v2(Request): + API_KEY = 15 + API_VERSION = 2 + RESPONSE_TYPE = DescribeGroupsResponse_v2 + SCHEMA = DescribeGroupsRequest_v0.SCHEMA + + +class DescribeGroupsRequest_v3(Request): + API_KEY = 15 + API_VERSION = 3 + RESPONSE_TYPE = DescribeGroupsResponse_v2 + SCHEMA = Schema( + ('groups', Array(String('utf-8'))), + ('include_authorized_operations', Boolean) + ) + + +DescribeGroupsRequest = [ + DescribeGroupsRequest_v0, DescribeGroupsRequest_v1, + DescribeGroupsRequest_v2, DescribeGroupsRequest_v3, +] +DescribeGroupsResponse = [ + DescribeGroupsResponse_v0, DescribeGroupsResponse_v1, + DescribeGroupsResponse_v2, DescribeGroupsResponse_v3, +] class SaslHandShakeResponse_v0(Response): @@ -347,6 +477,13 @@ class DescribeAclsResponse_v1(Response): ('permission_type', Int8))))) ) + +class DescribeAclsResponse_v2(Response): + API_KEY = 29 + API_VERSION = 2 + SCHEMA = DescribeAclsResponse_v1.SCHEMA + + class DescribeAclsRequest_v0(Request): API_KEY = 29 API_VERSION = 0 @@ -360,6 +497,7 @@ class DescribeAclsRequest_v0(Request): ('permission_type', Int8) ) + class DescribeAclsRequest_v1(Request): API_KEY = 29 API_VERSION = 1 @@ -374,6 +512,17 @@ class DescribeAclsRequest_v1(Request): ('permission_type', Int8) ) + +class DescribeAclsRequest_v2(Request): + """ + Enable flexible version + """ + API_KEY = 29 + API_VERSION = 2 + RESPONSE_TYPE = DescribeAclsResponse_v2 + SCHEMA = DescribeAclsRequest_v1.SCHEMA + + DescribeAclsRequest = [DescribeAclsRequest_v0, DescribeAclsRequest_v1] DescribeAclsResponse = [DescribeAclsResponse_v0, DescribeAclsResponse_v1] @@ -507,6 +656,13 @@ class AlterConfigsResponse_v0(Response): ('resource_name', String('utf-8')))) ) + +class AlterConfigsResponse_v1(Response): + API_KEY = 33 + API_VERSION = 1 + SCHEMA = AlterConfigsResponse_v0.SCHEMA + + class AlterConfigsRequest_v0(Request): API_KEY = 33 API_VERSION = 0 @@ -521,8 +677,14 @@ class AlterConfigsRequest_v0(Request): ('validate_only', Boolean) ) -AlterConfigsRequest = [AlterConfigsRequest_v0] -AlterConfigsResponse = [AlterConfigsResponse_v0] +class AlterConfigsRequest_v1(Request): + API_KEY = 33 + API_VERSION = 1 + RESPONSE_TYPE = AlterConfigsResponse_v1 + SCHEMA = AlterConfigsRequest_v0.SCHEMA + +AlterConfigsRequest = [AlterConfigsRequest_v0, AlterConfigsRequest_v1] +AlterConfigsResponse = [AlterConfigsResponse_v0, AlterConfigsRequest_v1] class DescribeConfigsResponse_v0(Response): @@ -565,6 +727,28 @@ class DescribeConfigsResponse_v1(Response): ('config_source', Int8))))))) ) +class DescribeConfigsResponse_v2(Response): + API_KEY = 32 + API_VERSION = 2 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('resources', Array( + ('error_code', Int16), + ('error_message', String('utf-8')), + ('resource_type', Int8), + ('resource_name', String('utf-8')), + ('config_entries', Array( + ('config_names', String('utf-8')), + ('config_value', String('utf-8')), + ('read_only', Boolean), + ('config_source', Int8), + ('is_sensitive', Boolean), + ('config_synonyms', Array( + ('config_name', String('utf-8')), + ('config_value', String('utf-8')), + ('config_source', Int8))))))) + ) + class DescribeConfigsRequest_v0(Request): API_KEY = 32 API_VERSION = 0 @@ -588,10 +772,25 @@ class DescribeConfigsRequest_v1(Request): ('include_synonyms', Boolean) ) -DescribeConfigsRequest = [DescribeConfigsRequest_v0, DescribeConfigsRequest_v1] -DescribeConfigsResponse = [DescribeConfigsResponse_v0, DescribeConfigsResponse_v1] -class SaslAuthenticateResponse_v0(Request): +class DescribeConfigsRequest_v2(Request): + API_KEY = 32 + API_VERSION = 2 + RESPONSE_TYPE = DescribeConfigsResponse_v2 + SCHEMA = DescribeConfigsRequest_v1.SCHEMA + + +DescribeConfigsRequest = [ + DescribeConfigsRequest_v0, DescribeConfigsRequest_v1, + DescribeConfigsRequest_v2, +] +DescribeConfigsResponse = [ + DescribeConfigsResponse_v0, DescribeConfigsResponse_v1, + DescribeConfigsResponse_v2, +] + + +class SaslAuthenticateResponse_v0(Response): API_KEY = 36 API_VERSION = 0 SCHEMA = Schema( @@ -601,6 +800,17 @@ class SaslAuthenticateResponse_v0(Request): ) +class SaslAuthenticateResponse_v1(Response): + API_KEY = 36 + API_VERSION = 1 + SCHEMA = Schema( + ('error_code', Int16), + ('error_message', String('utf-8')), + ('sasl_auth_bytes', Bytes), + ('session_lifetime_ms', Int64) + ) + + class SaslAuthenticateRequest_v0(Request): API_KEY = 36 API_VERSION = 0 @@ -610,8 +820,19 @@ class SaslAuthenticateRequest_v0(Request): ) -SaslAuthenticateRequest = [SaslAuthenticateRequest_v0] -SaslAuthenticateResponse = [SaslAuthenticateResponse_v0] +class SaslAuthenticateRequest_v1(Request): + API_KEY = 36 + API_VERSION = 1 + RESPONSE_TYPE = SaslAuthenticateResponse_v1 + SCHEMA = SaslAuthenticateRequest_v0.SCHEMA + + +SaslAuthenticateRequest = [ + SaslAuthenticateRequest_v0, SaslAuthenticateRequest_v1, +] +SaslAuthenticateResponse = [ + SaslAuthenticateResponse_v0, SaslAuthenticateResponse_v1, +] class CreatePartitionsResponse_v0(Response): @@ -626,6 +847,12 @@ class CreatePartitionsResponse_v0(Response): ) +class CreatePartitionsResponse_v1(Response): + API_KEY = 37 + API_VERSION = 1 + SCHEMA = CreatePartitionsResponse_v0.SCHEMA + + class CreatePartitionsRequest_v0(Request): API_KEY = 37 API_VERSION = 0 @@ -641,5 +868,17 @@ class CreatePartitionsRequest_v0(Request): ) -CreatePartitionsRequest = [CreatePartitionsRequest_v0] -CreatePartitionsResponse = [CreatePartitionsResponse_v0] +class CreatePartitionsRequest_v1(Request): + API_KEY = 37 + API_VERSION = 1 + SCHEMA = CreatePartitionsRequest_v0.SCHEMA + RESPONSE_TYPE = CreatePartitionsResponse_v1 + + +CreatePartitionsRequest = [ + CreatePartitionsRequest_v0, CreatePartitionsRequest_v1, +] +CreatePartitionsResponse = [ + CreatePartitionsResponse_v0, CreatePartitionsResponse_v1, +] + diff --git a/kafka/protocol/api.py b/kafka/protocol/api.py index efaf63ea2..64276fc17 100644 --- a/kafka/protocol/api.py +++ b/kafka/protocol/api.py @@ -3,7 +3,7 @@ import abc from kafka.protocol.struct import Struct -from kafka.protocol.types import Int16, Int32, String, Schema +from kafka.protocol.types import Int16, Int32, String, Schema, Array class RequestHeader(Struct): @@ -47,6 +47,9 @@ def expect_response(self): """Override this method if an api request does not always generate a response""" return True + def to_object(self): + return _to_object(self.SCHEMA, self) + class Response(Struct): __metaclass__ = abc.ABCMeta @@ -65,3 +68,30 @@ def API_VERSION(self): def SCHEMA(self): """An instance of Schema() representing the response structure""" pass + + def to_object(self): + return _to_object(self.SCHEMA, self) + + +def _to_object(schema, data): + obj = {} + for idx, (name, _type) in enumerate(zip(schema.names, schema.fields)): + if isinstance(data, Struct): + val = data.get_item(name) + else: + val = data[idx] + + if isinstance(_type, Schema): + obj[name] = _to_object(_type, val) + elif isinstance(_type, Array): + if isinstance(_type.array_of, (Array, Schema)): + obj[name] = [ + _to_object(_type.array_of, x) + for x in val + ] + else: + obj[name] = val + else: + obj[name] = val + + return obj diff --git a/kafka/protocol/fetch.py b/kafka/protocol/fetch.py index dd3f648cf..f367848ce 100644 --- a/kafka/protocol/fetch.py +++ b/kafka/protocol/fetch.py @@ -94,6 +94,72 @@ class FetchResponse_v6(Response): SCHEMA = FetchResponse_v5.SCHEMA +class FetchResponse_v7(Response): + """ + Add error_code and session_id to response + """ + API_KEY = 1 + API_VERSION = 7 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('error_code', Int16), + ('session_id', Int32), + ('topics', Array( + ('topics', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('error_code', Int16), + ('highwater_offset', Int64), + ('last_stable_offset', Int64), + ('log_start_offset', Int64), + ('aborted_transactions', Array( + ('producer_id', Int64), + ('first_offset', Int64))), + ('message_set', Bytes))))) + ) + + +class FetchResponse_v8(Response): + API_KEY = 1 + API_VERSION = 8 + SCHEMA = FetchResponse_v7.SCHEMA + + +class FetchResponse_v9(Response): + API_KEY = 1 + API_VERSION = 9 + SCHEMA = FetchResponse_v7.SCHEMA + + +class FetchResponse_v10(Response): + API_KEY = 1 + API_VERSION = 10 + SCHEMA = FetchResponse_v7.SCHEMA + + +class FetchResponse_v11(Response): + API_KEY = 1 + API_VERSION = 11 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('error_code', Int16), + ('session_id', Int32), + ('topics', Array( + ('topics', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('error_code', Int16), + ('highwater_offset', Int64), + ('last_stable_offset', Int64), + ('log_start_offset', Int64), + ('aborted_transactions', Array( + ('producer_id', Int64), + ('first_offset', Int64))), + ('preferred_read_replica', Int32), + ('message_set', Bytes))))) + ) + + class FetchRequest_v0(Request): API_KEY = 1 API_VERSION = 0 @@ -196,13 +262,125 @@ class FetchRequest_v6(Request): SCHEMA = FetchRequest_v5.SCHEMA +class FetchRequest_v7(Request): + """ + Add incremental fetch requests + """ + API_KEY = 1 + API_VERSION = 7 + RESPONSE_TYPE = FetchResponse_v7 + SCHEMA = Schema( + ('replica_id', Int32), + ('max_wait_time', Int32), + ('min_bytes', Int32), + ('max_bytes', Int32), + ('isolation_level', Int8), + ('session_id', Int32), + ('session_epoch', Int32), + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('fetch_offset', Int64), + ('log_start_offset', Int64), + ('max_bytes', Int32))))), + ('forgotten_topics_data', Array( + ('topic', String), + ('partitions', Array(Int32)) + )), + ) + + +class FetchRequest_v8(Request): + """ + bump used to indicate that on quota violation brokers send out responses before throttling. + """ + API_KEY = 1 + API_VERSION = 8 + RESPONSE_TYPE = FetchResponse_v8 + SCHEMA = FetchRequest_v7.SCHEMA + + +class FetchRequest_v9(Request): + """ + adds the current leader epoch (see KIP-320) + """ + API_KEY = 1 + API_VERSION = 9 + RESPONSE_TYPE = FetchResponse_v9 + SCHEMA = Schema( + ('replica_id', Int32), + ('max_wait_time', Int32), + ('min_bytes', Int32), + ('max_bytes', Int32), + ('isolation_level', Int8), + ('session_id', Int32), + ('session_epoch', Int32), + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('current_leader_epoch', Int32), + ('fetch_offset', Int64), + ('log_start_offset', Int64), + ('max_bytes', Int32))))), + ('forgotten_topics_data', Array( + ('topic', String), + ('partitions', Array(Int32)), + )), + ) + + +class FetchRequest_v10(Request): + """ + bumped up to indicate ZStandard capability. (see KIP-110) + """ + API_KEY = 1 + API_VERSION = 10 + RESPONSE_TYPE = FetchResponse_v10 + SCHEMA = FetchRequest_v9.SCHEMA + + +class FetchRequest_v11(Request): + """ + added rack ID to support read from followers (KIP-392) + """ + API_KEY = 1 + API_VERSION = 11 + RESPONSE_TYPE = FetchResponse_v11 + SCHEMA = Schema( + ('replica_id', Int32), + ('max_wait_time', Int32), + ('min_bytes', Int32), + ('max_bytes', Int32), + ('isolation_level', Int8), + ('session_id', Int32), + ('session_epoch', Int32), + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('current_leader_epoch', Int32), + ('fetch_offset', Int64), + ('log_start_offset', Int64), + ('max_bytes', Int32))))), + ('forgotten_topics_data', Array( + ('topic', String), + ('partitions', Array(Int32)) + )), + ('rack_id', String('utf-8')), + ) + + FetchRequest = [ FetchRequest_v0, FetchRequest_v1, FetchRequest_v2, FetchRequest_v3, FetchRequest_v4, FetchRequest_v5, - FetchRequest_v6 + FetchRequest_v6, FetchRequest_v7, FetchRequest_v8, + FetchRequest_v9, FetchRequest_v10, FetchRequest_v11, ] FetchResponse = [ FetchResponse_v0, FetchResponse_v1, FetchResponse_v2, FetchResponse_v3, FetchResponse_v4, FetchResponse_v5, - FetchResponse_v6 + FetchResponse_v6, FetchResponse_v7, FetchResponse_v8, + FetchResponse_v9, FetchResponse_v10, FetchResponse_v11, ] diff --git a/kafka/protocol/legacy.py b/kafka/protocol/legacy.py index 2e8f5bc17..9961293a8 100644 --- a/kafka/protocol/legacy.py +++ b/kafka/protocol/legacy.py @@ -15,6 +15,7 @@ from kafka.codec import gzip_encode, snappy_encode from kafka.errors import ProtocolError, UnsupportedCodecError +from kafka.structs import ConsumerMetadataResponse from kafka.util import ( crc32, read_short_string, relative_unpack, write_int_string, group_by_topic_and_partition) @@ -301,36 +302,31 @@ def decode_metadata_response(cls, response): return response @classmethod - def encode_consumer_metadata_request(cls, client_id, correlation_id, payloads): + def encode_consumer_metadata_request(cls, payloads): """ - Encode a ConsumerMetadataRequest + Encode a GroupCoordinatorRequest. Note that ConsumerMetadataRequest is + renamed to GroupCoordinatorRequest in 0.9+. Interface is unchanged Arguments: - client_id: string - correlation_id: int payloads: string (consumer group) """ - message = [] - message.append(cls._encode_message_header(client_id, correlation_id, - KafkaProtocol.CONSUMER_METADATA_KEY)) - message.append(struct.pack('>h%ds' % len(payloads), len(payloads), payloads)) - - msg = b''.join(message) - return write_int_string(msg) + return kafka.protocol.commit.GroupCoordinatorRequest[0](payloads) @classmethod - def decode_consumer_metadata_response(cls, data): + def decode_consumer_metadata_response(cls, response): """ - Decode bytes to a kafka.structs.ConsumerMetadataResponse + Decode GroupCoordinatorResponse. Note that ConsumerMetadataResponse is + renamed to GroupCoordinatorResponse in 0.9+ Arguments: - data: bytes to decode + response: response to decode """ - ((correlation_id, error, nodeId), cur) = relative_unpack('>ihi', data, 0) - (host, cur) = read_short_string(data, cur) - ((port,), cur) = relative_unpack('>i', data, cur) - - return kafka.structs.ConsumerMetadataResponse(error, nodeId, host, port) + return ConsumerMetadataResponse( + response.error_code, + response.coordinator_id, + response.host, + response.port, + ) @classmethod def encode_offset_commit_request(cls, group, payloads): @@ -352,6 +348,28 @@ def encode_offset_commit_request(cls, group, payloads): for partition, payload in six.iteritems(topic_payloads)]) for topic, topic_payloads in six.iteritems(group_by_topic_and_partition(payloads))]) + @classmethod + def encode_offset_commit_request_kafka(cls, group, payloads): + """ + Encode an OffsetCommitRequest struct + Arguments: + group: string, the consumer group you are committing offsets for + payloads: list of OffsetCommitRequestPayload + """ + return kafka.protocol.commit.OffsetCommitRequest[2]( + consumer_group=group, + consumer_group_generation_id=kafka.protocol.commit.OffsetCommitRequest[2].DEFAULT_GENERATION_ID, + consumer_id='', + retention_time=kafka.protocol.commit.OffsetCommitRequest[2].DEFAULT_RETENTION_TIME, + topics=[( + topic, + [( + partition, + payload.offset, + payload.metadata) + for partition, payload in six.iteritems(topic_payloads)]) + for topic, topic_payloads in six.iteritems(group_by_topic_and_partition(payloads))]) + @classmethod def decode_offset_commit_response(cls, response): """ diff --git a/kafka/protocol/offset.py b/kafka/protocol/offset.py index 3c254de40..1ed382b0d 100644 --- a/kafka/protocol/offset.py +++ b/kafka/protocol/offset.py @@ -53,6 +53,43 @@ class OffsetResponse_v2(Response): ) +class OffsetResponse_v3(Response): + """ + on quota violation, brokers send out responses before throttling + """ + API_KEY = 2 + API_VERSION = 3 + SCHEMA = OffsetResponse_v2.SCHEMA + + +class OffsetResponse_v4(Response): + """ + Add leader_epoch to response + """ + API_KEY = 2 + API_VERSION = 4 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('error_code', Int16), + ('timestamp', Int64), + ('offset', Int64), + ('leader_epoch', Int32))))) + ) + + +class OffsetResponse_v5(Response): + """ + adds a new error code, OFFSET_NOT_AVAILABLE + """ + API_KEY = 2 + API_VERSION = 5 + SCHEMA = OffsetResponse_v4.SCHEMA + + class OffsetRequest_v0(Request): API_KEY = 2 API_VERSION = 0 @@ -105,5 +142,53 @@ class OffsetRequest_v2(Request): } -OffsetRequest = [OffsetRequest_v0, OffsetRequest_v1, OffsetRequest_v2] -OffsetResponse = [OffsetResponse_v0, OffsetResponse_v1, OffsetResponse_v2] +class OffsetRequest_v3(Request): + API_KEY = 2 + API_VERSION = 3 + RESPONSE_TYPE = OffsetResponse_v3 + SCHEMA = OffsetRequest_v2.SCHEMA + DEFAULTS = { + 'replica_id': -1 + } + + +class OffsetRequest_v4(Request): + """ + Add current_leader_epoch to request + """ + API_KEY = 2 + API_VERSION = 4 + RESPONSE_TYPE = OffsetResponse_v4 + SCHEMA = Schema( + ('replica_id', Int32), + ('isolation_level', Int8), # <- added isolation_level + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('current_leader_epoch', Int64), + ('timestamp', Int64))))) + ) + DEFAULTS = { + 'replica_id': -1 + } + + +class OffsetRequest_v5(Request): + API_KEY = 2 + API_VERSION = 5 + RESPONSE_TYPE = OffsetResponse_v5 + SCHEMA = OffsetRequest_v4.SCHEMA + DEFAULTS = { + 'replica_id': -1 + } + + +OffsetRequest = [ + OffsetRequest_v0, OffsetRequest_v1, OffsetRequest_v2, + OffsetRequest_v3, OffsetRequest_v4, OffsetRequest_v5, +] +OffsetResponse = [ + OffsetResponse_v0, OffsetResponse_v1, OffsetResponse_v2, + OffsetResponse_v3, OffsetResponse_v4, OffsetResponse_v5, +] diff --git a/kafka/protocol/produce.py b/kafka/protocol/produce.py index f4032b311..9b3f6bf55 100644 --- a/kafka/protocol/produce.py +++ b/kafka/protocol/produce.py @@ -78,6 +78,50 @@ class ProduceResponse_v5(Response): ) +class ProduceResponse_v6(Response): + """ + The version number is bumped to indicate that on quota violation brokers send out responses before throttling. + """ + API_KEY = 0 + API_VERSION = 6 + SCHEMA = ProduceResponse_v5.SCHEMA + + +class ProduceResponse_v7(Response): + """ + V7 bumped up to indicate ZStandard capability. (see KIP-110) + """ + API_KEY = 0 + API_VERSION = 7 + SCHEMA = ProduceResponse_v6.SCHEMA + + +class ProduceResponse_v8(Response): + """ + V8 bumped up to add two new fields record_errors offset list and error_message + (See KIP-467) + """ + API_KEY = 0 + API_VERSION = 8 + SCHEMA = Schema( + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('error_code', Int16), + ('offset', Int64), + ('timestamp', Int64), + ('log_start_offset', Int64)), + ('record_errors', (Array( + ('batch_index', Int32), + ('batch_index_error_message', String('utf-8')) + ))), + ('error_message', String('utf-8')) + ))), + ('throttle_time_ms', Int32) + ) + + class ProduceRequest(Request): API_KEY = 0 @@ -106,6 +150,7 @@ class ProduceRequest_v1(ProduceRequest): RESPONSE_TYPE = ProduceResponse_v1 SCHEMA = ProduceRequest_v0.SCHEMA + class ProduceRequest_v2(ProduceRequest): API_VERSION = 2 RESPONSE_TYPE = ProduceResponse_v2 @@ -147,11 +192,41 @@ class ProduceRequest_v5(ProduceRequest): SCHEMA = ProduceRequest_v4.SCHEMA +class ProduceRequest_v6(ProduceRequest): + """ + The version number is bumped to indicate that on quota violation brokers send out responses before throttling. + """ + API_VERSION = 6 + RESPONSE_TYPE = ProduceResponse_v6 + SCHEMA = ProduceRequest_v5.SCHEMA + + +class ProduceRequest_v7(ProduceRequest): + """ + V7 bumped up to indicate ZStandard capability. (see KIP-110) + """ + API_VERSION = 7 + RESPONSE_TYPE = ProduceResponse_v7 + SCHEMA = ProduceRequest_v6.SCHEMA + + +class ProduceRequest_v8(ProduceRequest): + """ + V8 bumped up to add two new fields record_errors offset list and error_message to PartitionResponse + (See KIP-467) + """ + API_VERSION = 8 + RESPONSE_TYPE = ProduceResponse_v8 + SCHEMA = ProduceRequest_v7.SCHEMA + + ProduceRequest = [ ProduceRequest_v0, ProduceRequest_v1, ProduceRequest_v2, - ProduceRequest_v3, ProduceRequest_v4, ProduceRequest_v5 + ProduceRequest_v3, ProduceRequest_v4, ProduceRequest_v5, + ProduceRequest_v6, ProduceRequest_v7, ProduceRequest_v8, ] ProduceResponse = [ ProduceResponse_v0, ProduceResponse_v1, ProduceResponse_v2, - ProduceResponse_v3, ProduceResponse_v4, ProduceResponse_v5 + ProduceResponse_v3, ProduceResponse_v4, ProduceResponse_v5, + ProduceResponse_v6, ProduceResponse_v7, ProduceResponse_v8, ] diff --git a/kafka/protocol/struct.py b/kafka/protocol/struct.py index 676de1ba4..e9da6e6c1 100644 --- a/kafka/protocol/struct.py +++ b/kafka/protocol/struct.py @@ -30,6 +30,7 @@ def __init__(self, *args, **kwargs): # causes instances to "leak" to garbage self.encode = WeakMethod(self._encode_self) + @classmethod def encode(cls, item): # pylint: disable=E0202 bits = [] @@ -48,6 +49,11 @@ def decode(cls, data): data = BytesIO(data) return cls(*[field.decode(data) for field in cls.SCHEMA.fields]) + def get_item(self, name): + if name not in self.SCHEMA.names: + raise KeyError("%s is not in the schema" % name) + return self.__dict__[name] + def __repr__(self): key_vals = [] for name, field in zip(self.SCHEMA.names, self.SCHEMA.fields): @@ -64,11 +70,3 @@ def __eq__(self, other): if self.__dict__[attr] != other.__dict__[attr]: return False return True - -""" -class MetaStruct(type): - def __new__(cls, clsname, bases, dct): - nt = namedtuple(clsname, [name for (name, _) in dct['SCHEMA']]) - bases = tuple([Struct, nt] + list(bases)) - return super(MetaStruct, cls).__new__(cls, clsname, bases, dct) -""" diff --git a/kafka/version.py b/kafka/version.py index adf1ed520..e47fd1c67 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '1.4.6' +__version__ = '1.4.7.post5' diff --git a/requirements-dev.txt b/requirements-dev.txt index 218fb63f3..c46b48691 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,16 +1,19 @@ -flake8==3.4.1 -pytest==3.10.0 +flake8==3.6.0 +pytest==3.6.3 pytest-cov==2.6.0 docker-py==1.10.6 -coveralls==1.5.1 -Sphinx==1.6.4 -lz4==2.1.2 -xxhash==1.3.0 +Sphinx==1.7.9 +lz4==1.1.0 +xxhash==1.0.1 python-snappy==0.5.3 -tox==3.5.3 -pylint==1.9.3 -pytest-pylint==0.12.3 +tox==3.1.2 +pylint==1.8.2 +pytest-pylint==0.11.0 pytest-mock==1.10.0 +setuptools<45.0.0 sphinx-rtd-theme==0.2.4 -crc32c==1.5 +mock==2.0.0 +decorator==4.3.0 +tox-pip-extensions==1.2.1 +crc32c==1.7 py==1.8.0 diff --git a/run_itest.sh b/run_itest.sh new file mode 100644 index 000000000..150c95989 --- /dev/null +++ b/run_itest.sh @@ -0,0 +1,7 @@ +#!/bin/bash -e + +export KAFKA_VERSION='1.1.0' +./build_integration.sh +tox -e py27 +tox -e py35 +tox -e pypy diff --git a/run_utest.sh b/run_utest.sh new file mode 100644 index 000000000..3922ab3a0 --- /dev/null +++ b/run_utest.sh @@ -0,0 +1,5 @@ +#!/bin/bash -e + +tox -e py27 +tox -e py35 +tox -e pypy diff --git a/servers/0.10.0.1/resources/kafka.properties b/servers/0.10.0.1/resources/kafka.properties deleted file mode 100644 index 7d8e2b1f0..000000000 --- a/servers/0.10.0.1/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=2 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.10.0.1/resources/log4j.properties b/servers/0.10.0.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.10.0.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.10.0.1/resources/zookeeper.properties b/servers/0.10.0.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.10.0.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.10.1.1/resources/kafka.properties b/servers/0.10.1.1/resources/kafka.properties deleted file mode 100644 index 7d8e2b1f0..000000000 --- a/servers/0.10.1.1/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=2 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.10.1.1/resources/log4j.properties b/servers/0.10.1.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.10.1.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.10.1.1/resources/zookeeper.properties b/servers/0.10.1.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.10.1.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.10.2.1/resources/kafka.properties b/servers/0.10.2.1/resources/kafka.properties deleted file mode 100644 index 7d8e2b1f0..000000000 --- a/servers/0.10.2.1/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=2 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.10.2.1/resources/log4j.properties b/servers/0.10.2.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.10.2.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.10.2.1/resources/zookeeper.properties b/servers/0.10.2.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.10.2.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.10.0.0/resources/kafka.properties b/servers/0.10.2.2/resources/kafka.properties similarity index 100% rename from servers/0.10.0.0/resources/kafka.properties rename to servers/0.10.2.2/resources/kafka.properties diff --git a/servers/0.10.0.0/resources/log4j.properties b/servers/0.10.2.2/resources/log4j.properties similarity index 100% rename from servers/0.10.0.0/resources/log4j.properties rename to servers/0.10.2.2/resources/log4j.properties diff --git a/servers/0.10.0.0/resources/zookeeper.properties b/servers/0.10.2.2/resources/zookeeper.properties similarity index 100% rename from servers/0.10.0.0/resources/zookeeper.properties rename to servers/0.10.2.2/resources/zookeeper.properties diff --git a/servers/0.11.0.0/resources/kafka.properties b/servers/0.11.0.0/resources/kafka.properties deleted file mode 100644 index 28668db95..000000000 --- a/servers/0.11.0.0/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.0/resources/log4j.properties b/servers/0.11.0.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.11.0.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.11.0.0/resources/zookeeper.properties b/servers/0.11.0.0/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.11.0.0/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.11.0.1/resources/kafka.properties b/servers/0.11.0.1/resources/kafka.properties deleted file mode 100644 index 28668db95..000000000 --- a/servers/0.11.0.1/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.1/resources/log4j.properties b/servers/0.11.0.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.11.0.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.11.0.1/resources/zookeeper.properties b/servers/0.11.0.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.11.0.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.11.0.2/resources/kafka.properties b/servers/0.11.0.2/resources/kafka.properties deleted file mode 100644 index 28668db95..000000000 --- a/servers/0.11.0.2/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.11.0.2/resources/log4j.properties b/servers/0.11.0.2/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.11.0.2/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.11.0.2/resources/zookeeper.properties b/servers/0.11.0.2/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.11.0.2/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.8.0/resources/kafka.properties b/servers/0.8.0/resources/kafka.properties deleted file mode 100644 index b9f5c498f..000000000 --- a/servers/0.8.0/resources/kafka.properties +++ /dev/null @@ -1,67 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -############################# Server Basics ############################# - -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -port={port} -host.name={host} - -num.network.threads=2 -num.io.threads=2 - -socket.send.buffer.bytes=1048576 -socket.receive.buffer.bytes=1048576 -socket.request.max.bytes=104857600 - -############################# Log Basics ############################# - -log.dirs={tmp_dir}/data -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -log.flush.interval.messages=10000 -log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -log.retention.hours=168 -log.segment.bytes=536870912 -log.cleanup.interval.mins=1 - -############################# Zookeeper ############################# - -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 - -kafka.metrics.polling.interval.secs=5 -kafka.metrics.reporters=kafka.metrics.KafkaCSVMetricsReporter -kafka.csv.metrics.dir={tmp_dir} -kafka.csv.metrics.reporter.enabled=false - -log.cleanup.policy=delete diff --git a/servers/0.8.0/resources/log4j.properties b/servers/0.8.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.8.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.8.0/resources/zookeeper.properties b/servers/0.8.0/resources/zookeeper.properties deleted file mode 100644 index 68e1ef986..000000000 --- a/servers/0.8.0/resources/zookeeper.properties +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -dataDir={tmp_dir} -clientPortAddress={host} -clientPort={port} -maxClientCnxns=0 diff --git a/servers/0.8.1.1/resources/kafka.properties b/servers/0.8.1.1/resources/kafka.properties deleted file mode 100644 index 685aed15e..000000000 --- a/servers/0.8.1.1/resources/kafka.properties +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -# The port the socket server listens on -port={port} - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -host.name={host} - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=2 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=1048576 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=1048576 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=536870912 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=60000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.8.1.1/resources/log4j.properties b/servers/0.8.1.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.8.1.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.8.1.1/resources/zookeeper.properties b/servers/0.8.1.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.8.1.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.8.1/resources/kafka.properties b/servers/0.8.1/resources/kafka.properties deleted file mode 100644 index 76b0cb4ac..000000000 --- a/servers/0.8.1/resources/kafka.properties +++ /dev/null @@ -1,67 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -############################# Server Basics ############################# - -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -port={port} -host.name={host} - -num.network.threads=2 -num.io.threads=2 - -socket.send.buffer.bytes=1048576 -socket.receive.buffer.bytes=1048576 -socket.request.max.bytes=104857600 - -############################# Log Basics ############################# - -log.dirs={tmp_dir}/data -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -log.flush.interval.messages=10000 -log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -log.retention.hours=168 -log.segment.bytes=536870912 -log.retention.check.interval.ms=60000 -log.cleanup.interval.mins=1 -log.cleaner.enable=false - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.8.1/resources/log4j.properties b/servers/0.8.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.8.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.8.1/resources/zookeeper.properties b/servers/0.8.1/resources/zookeeper.properties deleted file mode 100644 index 68e1ef986..000000000 --- a/servers/0.8.1/resources/zookeeper.properties +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -dataDir={tmp_dir} -clientPortAddress={host} -clientPort={port} -maxClientCnxns=0 diff --git a/servers/0.8.2.0/resources/kafka.properties b/servers/0.8.2.0/resources/kafka.properties deleted file mode 100644 index 685aed15e..000000000 --- a/servers/0.8.2.0/resources/kafka.properties +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -# The port the socket server listens on -port={port} - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -host.name={host} - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=2 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=1048576 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=1048576 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=536870912 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=60000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.8.2.0/resources/log4j.properties b/servers/0.8.2.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.8.2.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.8.2.0/resources/zookeeper.properties b/servers/0.8.2.0/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.8.2.0/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.8.2.1/resources/kafka.properties b/servers/0.8.2.1/resources/kafka.properties deleted file mode 100644 index 685aed15e..000000000 --- a/servers/0.8.2.1/resources/kafka.properties +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -# The port the socket server listens on -port={port} - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -host.name={host} - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=2 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=1048576 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=1048576 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=536870912 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=60000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.8.2.1/resources/log4j.properties b/servers/0.8.2.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.8.2.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.8.2.1/resources/zookeeper.properties b/servers/0.8.2.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.8.2.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.8.2.2/resources/kafka.properties b/servers/0.8.2.2/resources/kafka.properties deleted file mode 100644 index 685aed15e..000000000 --- a/servers/0.8.2.2/resources/kafka.properties +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -# The port the socket server listens on -port={port} - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -host.name={host} - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=2 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=1048576 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=1048576 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=536870912 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=60000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.8.2.2/resources/log4j.properties b/servers/0.8.2.2/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.8.2.2/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.8.2.2/resources/zookeeper.properties b/servers/0.8.2.2/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.8.2.2/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.9.0.0/resources/kafka.properties b/servers/0.9.0.0/resources/kafka.properties deleted file mode 100644 index b4c4088db..000000000 --- a/servers/0.9.0.0/resources/kafka.properties +++ /dev/null @@ -1,141 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=536870912 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=60000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=2 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=1000000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.9.0.0/resources/log4j.properties b/servers/0.9.0.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.9.0.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.9.0.0/resources/zookeeper.properties b/servers/0.9.0.0/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.9.0.0/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/0.9.0.1/resources/kafka.properties b/servers/0.9.0.1/resources/kafka.properties deleted file mode 100644 index 7d8e2b1f0..000000000 --- a/servers/0.9.0.1/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=2 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/0.9.0.1/resources/log4j.properties b/servers/0.9.0.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/0.9.0.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/0.9.0.1/resources/zookeeper.properties b/servers/0.9.0.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/0.9.0.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/1.0.0/resources/kafka.properties b/servers/1.0.0/resources/kafka.properties deleted file mode 100644 index 28668db95..000000000 --- a/servers/1.0.0/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/1.0.0/resources/log4j.properties b/servers/1.0.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/1.0.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/1.0.0/resources/zookeeper.properties b/servers/1.0.0/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/1.0.0/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/1.0.1/resources/kafka.properties b/servers/1.0.1/resources/kafka.properties deleted file mode 100644 index 28668db95..000000000 --- a/servers/1.0.1/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/1.0.1/resources/log4j.properties b/servers/1.0.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/1.0.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/1.0.1/resources/zookeeper.properties b/servers/1.0.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/1.0.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/1.0.2/resources/kafka.properties b/servers/1.0.2/resources/kafka.properties deleted file mode 100644 index 28668db95..000000000 --- a/servers/1.0.2/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/1.0.2/resources/log4j.properties b/servers/1.0.2/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/1.0.2/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/1.0.2/resources/zookeeper.properties b/servers/1.0.2/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/1.0.2/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/1.1.0/resources/kafka.properties b/servers/1.1.0/resources/kafka.properties index 28668db95..630dbc5fa 100644 --- a/servers/1.1.0/resources/kafka.properties +++ b/servers/1.1.0/resources/kafka.properties @@ -30,6 +30,9 @@ ssl.key.password=foobar ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks ssl.truststore.password=foobar +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + # The port the socket server listens on #port=9092 diff --git a/servers/1.1.1/resources/kafka.properties b/servers/1.1.1/resources/kafka.properties index 64f94d528..fe6a89f4a 100644 --- a/servers/1.1.1/resources/kafka.properties +++ b/servers/1.1.1/resources/kafka.properties @@ -30,6 +30,9 @@ ssl.key.password=foobar ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks ssl.truststore.password=foobar +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + # List of enabled mechanisms, can be more than one sasl.enabled.mechanisms=PLAIN sasl.mechanism.inter.broker.protocol=PLAIN diff --git a/servers/2.0.0/resources/kafka.properties b/servers/2.0.0/resources/kafka.properties deleted file mode 100644 index 28668db95..000000000 --- a/servers/2.0.0/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/2.0.0/resources/log4j.properties b/servers/2.0.0/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/2.0.0/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/2.0.0/resources/zookeeper.properties b/servers/2.0.0/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/2.0.0/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/servers/2.0.1/resources/kafka.properties b/servers/2.0.1/resources/kafka.properties deleted file mode 100644 index 28668db95..000000000 --- a/servers/2.0.1/resources/kafka.properties +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# see kafka.server.KafkaConfig for additional details and defaults - -############################# Server Basics ############################# - -# The id of the broker. This must be set to a unique integer for each broker. -broker.id={broker_id} - -############################# Socket Server Settings ############################# - -listeners={transport}://{host}:{port} -security.inter.broker.protocol={transport} - -ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks -ssl.keystore.password=foobar -ssl.key.password=foobar -ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks -ssl.truststore.password=foobar - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= - -# The number of threads handling network requests -num.network.threads=3 - -# The number of threads doing disk I/O -num.io.threads=8 - -# The send buffer (SO_SNDBUF) used by the socket server -socket.send.buffer.bytes=102400 - -# The receive buffer (SO_RCVBUF) used by the socket server -socket.receive.buffer.bytes=102400 - -# The maximum size of a request that the socket server will accept (protection against OOM) -socket.request.max.bytes=104857600 - - -############################# Log Basics ############################# - -# A comma seperated list of directories under which to store log files -log.dirs={tmp_dir}/data - -# The default number of log partitions per topic. More partitions allow greater -# parallelism for consumption, but this will also result in more files across -# the brokers. -num.partitions={partitions} -default.replication.factor={replicas} - -## Short Replica Lag -- Drops failed brokers out of ISR -replica.lag.time.max.ms=1000 -replica.socket.timeout.ms=1000 - -############################# Log Flush Policy ############################# - -# Messages are immediately written to the filesystem but by default we only fsync() to sync -# the OS cache lazily. The following configurations control the flush of data to disk. -# There are a few important trade-offs here: -# 1. Durability: Unflushed data may be lost if you are not using replication. -# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. -# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. -# The settings below allow one to configure the flush policy to flush data after a period of time or -# every N messages (or both). This can be done globally and overridden on a per-topic basis. - -# The number of messages to accept before forcing a flush of data to disk -#log.flush.interval.messages=10000 - -# The maximum amount of time a message can sit in a log before we force a flush -#log.flush.interval.ms=1000 - -############################# Log Retention Policy ############################# - -# The following configurations control the disposal of log segments. The policy can -# be set to delete segments after a period of time, or after a given size has accumulated. -# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens -# from the end of the log. - -# The minimum age of a log file to be eligible for deletion -log.retention.hours=168 - -# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining -# segments don't drop below log.retention.bytes. -#log.retention.bytes=1073741824 - -# The maximum size of a log segment file. When this size is reached a new log segment will be created. -log.segment.bytes=1073741824 - -# The interval at which log segments are checked to see if they can be deleted according -# to the retention policies -log.retention.check.interval.ms=300000 - -# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. -# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. -log.cleaner.enable=false - -# tune down offset topics to reduce setup time in tests -offsets.commit.timeout.ms=500 -offsets.topic.num.partitions=2 -offsets.topic.replication.factor=1 - -# Allow shorter session timeouts for tests -group.min.session.timeout.ms=1000 - - -############################# Zookeeper ############################# - -# Zookeeper connection string (see zookeeper docs for details). -# This is a comma separated host:port pairs, each corresponding to a zk -# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". -# You can also append an optional chroot string to the urls to specify the -# root directory for all kafka znodes. -zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} - -# Timeout in ms for connecting to zookeeper -zookeeper.connection.timeout.ms=30000 -# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly -zookeeper.session.timeout.ms=500 diff --git a/servers/2.0.1/resources/log4j.properties b/servers/2.0.1/resources/log4j.properties deleted file mode 100644 index b0b76aa79..000000000 --- a/servers/2.0.1/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootLogger=INFO, stdout, logfile - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - -log4j.appender.logfile=org.apache.log4j.FileAppender -log4j.appender.logfile.File=${kafka.logs.dir}/server.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/2.0.1/resources/zookeeper.properties b/servers/2.0.1/resources/zookeeper.properties deleted file mode 100644 index e3fd09742..000000000 --- a/servers/2.0.1/resources/zookeeper.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# the directory where the snapshot is stored. -dataDir={tmp_dir} -# the port at which the clients will connect -clientPort={port} -clientPortAddress={host} -# disable the per-ip limit on the number of connections since this is a non-production config -maxClientCnxns=0 diff --git a/setup.py b/setup.py index 779adb92b..f611c3cfc 100644 --- a/setup.py +++ b/setup.py @@ -39,9 +39,9 @@ def run(cls): tests_require=test_require, cmdclass={"test": Tox}, packages=find_packages(exclude=['test']), - author="Dana Powers", - author_email="dana.powers@gmail.com", - url="https://github.com/dpkp/kafka-python", + author="Distsys Streaming", + author_email="distsys-streaming@yelp.com", + url="https://github.com/Yelp/kafka-python", license="Apache License 2.0", description="Pure Python client for Apache Kafka", long_description=README, diff --git a/test/conftest.py b/test/conftest.py index ffaae033b..267ac6aa9 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,14 +1,11 @@ from __future__ import absolute_import -import pytest - -from test.fixtures import KafkaFixture, ZookeeperFixture, random_string, version as kafka_version +import uuid +import pytest -@pytest.fixture(scope="module") -def version(): - """Return the Kafka version set in the OS environment""" - return kafka_version() +from test.testutil import env_kafka_version, random_string +from test.fixtures import KafkaFixture, ZookeeperFixture @pytest.fixture(scope="module") def zookeeper(): @@ -17,15 +14,17 @@ def zookeeper(): yield zk_instance zk_instance.close() + @pytest.fixture(scope="module") def kafka_broker(kafka_broker_factory): """Return a Kafka broker fixture""" return kafka_broker_factory()[0] + @pytest.fixture(scope="module") -def kafka_broker_factory(version, zookeeper): +def kafka_broker_factory(zookeeper): """Return a Kafka broker fixture factory""" - assert version, 'KAFKA_VERSION must be specified to run integration tests' + assert env_kafka_version(), 'KAFKA_VERSION must be specified to run integration tests' _brokers = [] def factory(**broker_params): @@ -42,6 +41,7 @@ def factory(**broker_params): for broker in _brokers: broker.close() + @pytest.fixture def simple_client(kafka_broker, request, topic): """Return a SimpleClient fixture""" @@ -50,6 +50,7 @@ def simple_client(kafka_broker, request, topic): yield client client.close() + @pytest.fixture def kafka_client(kafka_broker, request): """Return a KafkaClient fixture""" @@ -57,11 +58,13 @@ def kafka_client(kafka_broker, request): yield client client.close() + @pytest.fixture def kafka_consumer(kafka_consumer_factory): """Return a KafkaConsumer fixture""" return kafka_consumer_factory() + @pytest.fixture def kafka_consumer_factory(kafka_broker, topic, request): """Return a KafkaConsumer factory fixture""" @@ -79,11 +82,13 @@ def factory(**kafka_consumer_params): if _consumer[0]: _consumer[0].close() + @pytest.fixture def kafka_producer(kafka_producer_factory): """Return a KafkaProducer fixture""" yield kafka_producer_factory() + @pytest.fixture def kafka_producer_factory(kafka_broker, request): """Return a KafkaProduce factory fixture""" @@ -100,6 +105,7 @@ def factory(**kafka_producer_params): if _producer[0]: _producer[0].close() + @pytest.fixture def topic(kafka_broker, request): """Return a topic fixture""" @@ -107,6 +113,7 @@ def topic(kafka_broker, request): kafka_broker.create_topics([topic_name]) return topic_name + @pytest.fixture def conn(mocker): """Return a connection mocker fixture""" @@ -132,3 +139,27 @@ def _set_conn_state(state): conn.connected = lambda: conn.state is ConnectionStates.CONNECTED conn.disconnected = lambda: conn.state is ConnectionStates.DISCONNECTED return conn + + +@pytest.fixture() +def send_messages(topic, kafka_producer, request): + """A factory that returns a send_messages function with a pre-populated + topic topic / producer.""" + + def _send_messages(number_range, partition=0, topic=topic, producer=kafka_producer, request=request): + """ + messages is typically `range(0,100)` + partition is an int + """ + messages_and_futures = [] # [(message, produce_future),] + for i in number_range: + # request.node.name provides the test name (including parametrized values) + encoded_msg = '{}-{}-{}'.format(i, request.node.name, uuid.uuid4()).encode('utf-8') + future = kafka_producer.send(topic, value=encoded_msg, partition=partition) + messages_and_futures.append((encoded_msg, future)) + kafka_producer.flush() + for (msg, f) in messages_and_futures: + assert f.succeeded() + return [msg for (msg, f) in messages_and_futures] + + return _send_messages diff --git a/test/fixtures.py b/test/fixtures.py index d4e8e435c..35aade6bd 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -4,9 +4,7 @@ import logging import os import os.path -import random import socket -import string import subprocess import time import uuid @@ -19,20 +17,11 @@ from kafka.client_async import KafkaClient from kafka.protocol.admin import CreateTopicsRequest from kafka.protocol.metadata import MetadataRequest +from test.testutil import env_kafka_version, random_string from test.service import ExternalService, SpawnedService log = logging.getLogger(__name__) -def random_string(length): - return "".join(random.choice(string.ascii_letters) for i in range(length)) - -def version_str_to_list(version_str): - return tuple(map(int, version_str.split('.'))) # e.g., (0, 8, 1, 1) - -def version(): - if 'KAFKA_VERSION' not in os.environ: - return () - return version_str_to_list(os.environ['KAFKA_VERSION']) def get_open_port(): sock = socket.socket() @@ -41,6 +30,7 @@ def get_open_port(): sock.close() return port + def gen_ssl_resources(directory): os.system(""" cd {0} @@ -70,9 +60,10 @@ def gen_ssl_resources(directory): -file cert-signed -storepass foobar -noprompt """.format(directory)) + class Fixture(object): - kafka_version = os.environ.get('KAFKA_VERSION', '0.11.0.2') - scala_version = os.environ.get("SCALA_VERSION", '2.8.0') + kafka_version = os.environ.get('KAFKA_VERSION', '1.1.0') + scala_version = os.environ.get("SCALA_VERSION", '2.11') project_root = os.environ.get('PROJECT_ROOT', os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) kafka_root = os.environ.get("KAFKA_ROOT", @@ -154,6 +145,7 @@ def render_template(cls, source_file, target_file, binding): def dump_logs(self): self.child.dump_logs() + class ZookeeperFixture(Fixture): @classmethod def instance(cls): @@ -324,7 +316,6 @@ def _create_zk_chroot(self): "kafka-python") env = self.kafka_run_class_env() proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = proc.communicate() if proc.returncode != 0: @@ -466,7 +457,7 @@ def _create_topic(self, topic_name, num_partitions, replication_factor, timeout_ num_partitions == self.partitions and \ replication_factor == self.replicas: self._send_request(MetadataRequest[0]([topic_name])) - elif version() >= (0, 10, 1, 0): + elif env_kafka_version() >= (0, 10, 1, 0): request = CreateTopicsRequest[0]([(topic_name, num_partitions, replication_factor, [], [])], timeout_ms) result = self._send_request(request, timeout=timeout_ms) @@ -486,13 +477,13 @@ def _create_topic(self, topic_name, num_partitions, replication_factor, timeout_ '--replication-factor', self.replicas \ if replication_factor is None \ else replication_factor) - if version() >= (0, 10): + if env_kafka_version() >= (0, 10): args.append('--if-not-exists') env = self.kafka_run_class_env() proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() if proc.returncode != 0: - if not 'kafka.common.TopicExistsException' in stdout: + if 'kafka.common.TopicExistsException' not in stdout: self.out("Failed to create topic %s" % (topic_name,)) self.out(stdout) self.out(stderr) diff --git a/test/test_admin.py b/test/test_admin.py index 300d5bced..279f85abf 100644 --- a/test/test_admin.py +++ b/test/test_admin.py @@ -26,6 +26,37 @@ def test_new_partitions(): assert good_partitions.new_assignments == [[1, 2, 3]] +def test_acl_resource(): + good_acl = kafka.admin.ACL( + "User:bar", + "*", + kafka.admin.ACLOperation.ALL, + kafka.admin.ACLPermissionType.ALLOW, + kafka.admin.ResourcePattern( + kafka.admin.ResourceType.TOPIC, + "foo", + kafka.admin.ACLResourcePatternType.LITERAL + ) + ) + + assert(good_acl.resource_pattern.resource_type == kafka.admin.ResourceType.TOPIC) + assert(good_acl.operation == kafka.admin.ACLOperation.ALL) + assert(good_acl.permission_type == kafka.admin.ACLPermissionType.ALLOW) + assert(good_acl.resource_pattern.pattern_type == kafka.admin.ACLResourcePatternType.LITERAL) + + with pytest.raises(IllegalArgumentError): + kafka.admin.ACL( + "User:bar", + "*", + kafka.admin.ACLOperation.ANY, + kafka.admin.ACLPermissionType.ANY, + kafka.admin.ResourcePattern( + kafka.admin.ResourceType.TOPIC, + "foo", + kafka.admin.ACLResourcePatternType.LITERAL + ) + ) + def test_new_topic(): with pytest.raises(IllegalArgumentError): bad_topic = kafka.admin.NewTopic('foo', -1, -1) diff --git a/test/test_admin_client.py b/test/test_admin_client.py new file mode 100644 index 000000000..aacf93581 --- /dev/null +++ b/test/test_admin_client.py @@ -0,0 +1,131 @@ +import mock +import pytest +from kafka.client_async import KafkaClient +from kafka.errors import BrokerNotAvailableError +from kafka.protocol.metadata import MetadataResponse +from kafka.protocol.admin import CreateTopicsResponse, DeleteTopicsResponse, CreatePartitionsResponse +from kafka.admin_client import AdminClient +from kafka.admin_client import NewTopic +from kafka.admin_client import NewPartitionsInfo +from kafka.structs import BrokerMetadata +from kafka.future import Future + +@pytest.fixture +def bootstrap_brokers(): + return 'fake-broker:9092' + +@pytest.fixture +def controller_id(): + return 100 + +@pytest.fixture +def mock_least_loaded_node(): + return 2 + +@pytest.fixture +def metadata_response(controller_id): + return [MetadataResponse[1]( + [(1,'host',80,'rack')], controller_id, + [(37,'topic',False,[(7,1,2,[1,2,3],[1,2,3])])] + )] + +@pytest.fixture +def mock_new_topics(): + return [NewTopic('topic',1,1)] + +@pytest.fixture +def mock_topic_partitions(): + return [NewPartitionsInfo('topic', 5, 4*[[1,2,3]]) ] + +@pytest.fixture +def topic_response(): + return CreateTopicsResponse[1]([( + 'topic',7,'timeout_exception' + )]) + +@pytest.fixture +def delete_response(): + return DeleteTopicsResponse[0]([( + 'topic',7 + )]) + +@pytest.fixture +def partition_response(): + return CreatePartitionsResponse[0]( + 100, + [('topic', 7, 'timeout_exception')] + ) + +class TestTopicAdmin(): + + def test_send_controller_request( + self, + mock_least_loaded_node, + controller_id, + bootstrap_brokers, + metadata_response + ): + mock_kafka_client = mock.Mock() + mock_kafka_client.poll.return_value = metadata_response + mock_kafka_client.least_loaded_node.return_value = \ + mock_least_loaded_node + mock_kafka_client.send.return_value = Future() + mock_kafka_client.connected.return_value = True + admin = AdminClient(mock_kafka_client) + assert admin._send_controller_request() == controller_id + + def test_create_topics( + self, + mock_new_topics, + mock_least_loaded_node, + bootstrap_brokers, + topic_response, + metadata_response, + ): + mock_kafka_client = mock.Mock() + mock_kafka_client.poll = \ + mock.Mock(side_effect=[metadata_response, topic_response]) + mock_kafka_client.ready.return_value = True + mock_kafka_client.least_loaded_node.return_value = \ + mock_least_loaded_node + mock_kafka_client.send.return_value = Future() + admin = AdminClient(mock_kafka_client) + response = admin.create_topics(mock_new_topics, 0) + assert response == topic_response + + def test_delete_topics( + self, + mock_new_topics, + mock_least_loaded_node, + bootstrap_brokers, + delete_response, + metadata_response, + ): + mock_kafka_client = mock.Mock() + mock_kafka_client.poll = \ + mock.Mock(side_effect=[metadata_response, delete_response]) + mock_kafka_client.ready.return_value = True + mock_kafka_client.least_loaded_node.return_value = \ + mock_least_loaded_node + mock_kafka_client.send.return_value = Future() + admin = AdminClient(mock_kafka_client) + response = admin.delete_topics(mock_new_topics, 0) + assert response == delete_response + + def test_create_partitions( + self, + mock_topic_partitions, + mock_least_loaded_node, + partition_response, + metadata_response, + ): + mock_kafka_client = mock.Mock() + mock_kafka_client.poll = \ + mock.Mock(side_effect=[metadata_response, partition_response]) + mock_kafka_client.ready.return_value = True + mock_kafka_client.least_loaded_node.return_value = \ + mock_least_loaded_node + mock_kafka_client.send.return_value = Future() + admin = AdminClient(mock_kafka_client) + response = admin.create_partitions(mock_topic_partitions, 0, False) + assert response == partition_response diff --git a/test/test_admin_client_integration.py b/test/test_admin_client_integration.py new file mode 100644 index 000000000..a19cfdda9 --- /dev/null +++ b/test/test_admin_client_integration.py @@ -0,0 +1,74 @@ +import os +import time +import unittest +import pytest +from kafka.admin_client import AdminClient, NewTopic, NewPartitionsInfo +from kafka.protocol.metadata import MetadataRequest +from test.fixtures import ZookeeperFixture, KafkaFixture +from test.testutil import KafkaIntegrationTestCase, env_kafka_version + +KAFKA_ADMIN_TIMEOUT_SECONDS = 5 + +class TestKafkaAdminClientIntegration(KafkaIntegrationTestCase): + + @classmethod + def setUpClass(cls): + if not os.environ.get('KAFKA_VERSION'): + return + + cls.zk = ZookeeperFixture.instance() + cls.server = KafkaFixture.instance(0, cls.zk) + + @classmethod + def tearDownClass(cls): + if not os.environ.get('KAFKA_VERSION'): + return + + cls.server.close() + cls.zk.close() + + @pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason='Unsupported Kafka Version') + def test_create_delete_topics(self): + admin = AdminClient(self.client_async) + topic = NewTopic( + name='topic', + num_partitions=1, + replication_factor=1, + ) + metadata_request = MetadataRequest[1]() + response = admin.create_topics(topics=[topic], timeout=KAFKA_ADMIN_TIMEOUT_SECONDS) + # Error code 7 means that RequestTimedOut but we can safely assume + # that topic is created or will be created eventually. + # see this https://cwiki.apache.org/confluence/display/KAFKA/ + # KIP-4+-+Command+line+and+centralized+administrative+operations + self.assertTrue( + response[0].topic_errors[0][1] == 0 or + response[0].topic_errors[0][1] == 7 + ) + time.sleep(1) # allows the topic to be created + delete_response = admin.delete_topics(['topic'], timeout=1) + self.assertTrue( + response[0].topic_errors[0][1] == 0 or + response[0].topic_errors[0][1] == 7 + ) + + @pytest.mark.skipif(env_kafka_version() < (1, 0, 0), reason='Unsupported Kafka Version') + def test_create_partitions(self): + admin = AdminClient(self.client_async) + topic = NewTopic( + name='topic', + num_partitions=1, + replication_factor=1, + ) + metadata_request = MetadataRequest[1]() + admin.create_topics(topics=[topic], timeout=KAFKA_ADMIN_TIMEOUT_SECONDS) + + time.sleep(1) # allows the topic to be created + + new_partitions_info = NewPartitionsInfo('topic', 2, [[0]]) + response = admin.create_partitions([new_partitions_info], timeout=1, validate_only=False) + + self.assertTrue( + response[0].topic_errors[0][1] == 0 or + response[0].topic_errors[0][1] == 7 + ) diff --git a/test/test_admin_integration.py b/test/test_admin_integration.py new file mode 100644 index 000000000..2672faa0c --- /dev/null +++ b/test/test_admin_integration.py @@ -0,0 +1,122 @@ +import pytest +import os + +from test.fixtures import ZookeeperFixture, KafkaFixture +from test.testutil import KafkaIntegrationTestCase, env_kafka_version, current_offset + +from kafka.errors import NoError +from kafka.admin import KafkaAdminClient, ACLFilter, ACLOperation, ACLPermissionType, ResourcePattern, ResourceType, ACL + +# This test suite passes for me locally, but fails on travis +# Needs investigation +DISABLED = True + +# TODO: Convert to pytest / fixtures +# Note that ACL features require broker 0.11, but other admin apis may work on +# earlier broker versions +class TestAdminClientIntegration(KafkaIntegrationTestCase): + @classmethod + def setUpClass(cls): # noqa + if env_kafka_version() < (0, 11) or DISABLED: + return + + cls.zk = ZookeeperFixture.instance() + cls.server = KafkaFixture.instance(0, cls.zk) + + @classmethod + def tearDownClass(cls): # noqa + if env_kafka_version() < (0, 11) or DISABLED: + return + + cls.server.close() + cls.zk.close() + + def setUp(self): + if env_kafka_version() < (0, 11) or DISABLED: + self.skipTest('Admin ACL Integration test requires KAFKA_VERSION >= 0.11') + super(TestAdminClientIntegration, self).setUp() + + def tearDown(self): + if env_kafka_version() < (0, 11) or DISABLED: + return + super(TestAdminClientIntegration, self).tearDown() + + def test_create_describe_delete_acls(self): + """Tests that we can add, list and remove ACLs + """ + + # Setup + brokers = '%s:%d' % (self.server.host, self.server.port) + admin_client = KafkaAdminClient( + bootstrap_servers=brokers + ) + + # Check that we don't have any ACLs in the cluster + acls, error = admin_client.describe_acls( + ACLFilter( + principal=None, + host="*", + operation=ACLOperation.ANY, + permission_type=ACLPermissionType.ANY, + resource_pattern=ResourcePattern(ResourceType.TOPIC, "topic") + ) + ) + + self.assertIs(error, NoError) + self.assertEqual(0, len(acls)) + + # Try to add an ACL + acl = ACL( + principal="User:test", + host="*", + operation=ACLOperation.READ, + permission_type=ACLPermissionType.ALLOW, + resource_pattern=ResourcePattern(ResourceType.TOPIC, "topic") + ) + result = admin_client.create_acls([acl]) + + self.assertFalse(len(result["failed"])) + self.assertEqual(len(result["succeeded"]), 1) + + # Check that we can list the ACL we created + acl_filter = ACLFilter( + principal=None, + host="*", + operation=ACLOperation.ANY, + permission_type=ACLPermissionType.ANY, + resource_pattern=ResourcePattern(ResourceType.TOPIC, "topic") + ) + acls, error = admin_client.describe_acls(acl_filter) + + self.assertIs(error, NoError) + self.assertEqual(1, len(acls)) + + # Remove the ACL + delete_results = admin_client.delete_acls( + [ + ACLFilter( + principal="User:test", + host="*", + operation=ACLOperation.READ, + permission_type=ACLPermissionType.ALLOW, + resource_pattern=ResourcePattern(ResourceType.TOPIC, "topic") + ) + ] + ) + + self.assertEqual(1, len(delete_results)) + self.assertEqual(1, len(delete_results[0][1])) # Check number of affected ACLs + + + # Make sure the ACL does not exist in the cluster anymore + acls, error = admin_client.describe_acls( + ACLFilter( + principal="*", + host="*", + operation=ACLOperation.ANY, + permission_type=ACLPermissionType.ANY, + resource_pattern=ResourcePattern(ResourceType.TOPIC, "topic") + ) + ) + self.assertIs(error, NoError) + self.assertEqual(0, len(acls)) diff --git a/test/test_assignors.py b/test/test_assignors.py index e2a1d4fdd..0821caf83 100644 --- a/test/test_assignors.py +++ b/test/test_assignors.py @@ -5,8 +5,7 @@ from kafka.coordinator.assignors.range import RangePartitionAssignor from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor -from kafka.coordinator.protocol import ( - ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment) +from kafka.coordinator.protocol import ConsumerProtocolMemberAssignment @pytest.fixture diff --git a/test/test_client.py b/test/test_client.py index 1c689789b..e8152626e 100644 --- a/test/test_client.py +++ b/test/test_client.py @@ -37,27 +37,33 @@ def test_init_with_list(self): client = SimpleClient(hosts=['kafka01:9092', 'kafka02:9092', 'kafka03:9092']) self.assertEqual( - sorted([('kafka01', 9092, socket.AF_UNSPEC), ('kafka02', 9092, socket.AF_UNSPEC), - ('kafka03', 9092, socket.AF_UNSPEC)]), - sorted(client.hosts)) + sorted([('kafka01', 9092, socket.AF_UNSPEC, 'bootstrap'), + ('kafka02', 9092, socket.AF_UNSPEC, 'bootstrap'), + ('kafka03', 9092, socket.AF_UNSPEC, 'bootstrap')]), + sorted(client.hosts), + ) def test_init_with_csv(self): with patch.object(SimpleClient, 'load_metadata_for_topics'): client = SimpleClient(hosts='kafka01:9092,kafka02:9092,kafka03:9092') self.assertEqual( - sorted([('kafka01', 9092, socket.AF_UNSPEC), ('kafka02', 9092, socket.AF_UNSPEC), - ('kafka03', 9092, socket.AF_UNSPEC)]), - sorted(client.hosts)) + sorted([('kafka01', 9092, socket.AF_UNSPEC, 'bootstrap'), + ('kafka02', 9092, socket.AF_UNSPEC, 'bootstrap'), + ('kafka03', 9092, socket.AF_UNSPEC, 'bootstrap')]), + sorted(client.hosts), + ) def test_init_with_unicode_csv(self): with patch.object(SimpleClient, 'load_metadata_for_topics'): client = SimpleClient(hosts=u'kafka01:9092,kafka02:9092,kafka03:9092') self.assertEqual( - sorted([('kafka01', 9092, socket.AF_UNSPEC), ('kafka02', 9092, socket.AF_UNSPEC), - ('kafka03', 9092, socket.AF_UNSPEC)]), - sorted(client.hosts)) + sorted([('kafka01', 9092, socket.AF_UNSPEC, 'bootstrap'), + ('kafka02', 9092, socket.AF_UNSPEC, 'bootstrap'), + ('kafka03', 9092, socket.AF_UNSPEC, 'bootstrap')]), + sorted(client.hosts), + ) @patch.object(SimpleClient, '_get_conn') @patch.object(SimpleClient, 'load_metadata_for_topics') @@ -69,7 +75,7 @@ def test_send_broker_unaware_request_fail(self, load_metadata, conn): for val in mocked_conns.values(): mock_conn(val, success=False) - def mock_get_conn(host, port, afi): + def mock_get_conn(host, port, afi, node_id='bootstrap'): return mocked_conns[(host, port)] conn.side_effect = mock_get_conn @@ -97,7 +103,7 @@ def test_send_broker_unaware_request(self): mocked_conns[('kafka02', 9092)].send.return_value = future mocked_conns[('kafka02', 9092)].recv.return_value = [('valid response', future)] - def mock_get_conn(host, port, afi): + def mock_get_conn(host, port, afi, node_id='bootstrap'): return mocked_conns[(host, port)] # patch to avoid making requests before we want it diff --git a/test/test_client_async.py b/test/test_client_async.py index 2132c8e4c..74da66a36 100644 --- a/test/test_client_async.py +++ b/test/test_client_async.py @@ -17,25 +17,23 @@ from kafka.conn import ConnectionStates import kafka.errors as Errors from kafka.future import Future -from kafka.protocol.metadata import MetadataResponse, MetadataRequest +from kafka.protocol.metadata import MetadataRequest from kafka.protocol.produce import ProduceRequest from kafka.structs import BrokerMetadata @pytest.fixture def cli(mocker, conn): - mocker.patch('kafka.cluster.dns_lookup', - return_value=[(socket.AF_INET, None, None, None, ('localhost', 9092))]) client = KafkaClient(api_version=(0, 9)) + mocker.patch.object(client, '_selector') client.poll(future=client.cluster.request_update()) return client def test_bootstrap(mocker, conn): conn.state = ConnectionStates.CONNECTED - mocker.patch('kafka.cluster.dns_lookup', - return_value=[(socket.AF_INET, None, None, None, ('localhost', 9092))]) cli = KafkaClient(api_version=(0, 9)) + mocker.patch.object(cli, '_selector') future = cli.cluster.request_update() cli.poll(future=future) @@ -90,7 +88,7 @@ def test_maybe_connect(cli, conn): def test_conn_state_change(mocker, cli, conn): - sel = mocker.patch.object(cli, '_selector') + sel = cli._selector node_id = 0 cli._conns[node_id] = conn @@ -98,7 +96,7 @@ def test_conn_state_change(mocker, cli, conn): sock = conn._sock cli._conn_state_change(node_id, sock, conn) assert node_id in cli._connecting - sel.register.assert_called_with(sock, selectors.EVENT_WRITE) + sel.register.assert_called_with(sock, selectors.EVENT_WRITE, conn) conn.state = ConnectionStates.CONNECTED cli._conn_state_change(node_id, sock, conn) @@ -233,6 +231,8 @@ def test_send(cli, conn): def test_poll(mocker): metadata = mocker.patch.object(KafkaClient, '_maybe_refresh_metadata') _poll = mocker.patch.object(KafkaClient, '_poll') + ifrs = mocker.patch.object(KafkaClient, 'in_flight_request_count') + ifrs.return_value = 1 cli = KafkaClient(api_version=(0, 9)) # metadata timeout wins @@ -249,6 +249,11 @@ def test_poll(mocker): cli.poll() _poll.assert_called_with(cli.config['request_timeout_ms'] / 1000.0) + # If no in-flight-requests, drop timeout to retry_backoff_ms + ifrs.return_value = 0 + cli.poll() + _poll.assert_called_with(cli.config['retry_backoff_ms'] / 1000.0) + def test__poll(): pass @@ -304,12 +309,14 @@ def client(mocker): def test_maybe_refresh_metadata_ttl(mocker, client): client.cluster.ttl.return_value = 1234 + mocker.patch.object(KafkaClient, 'in_flight_request_count', return_value=1) client.poll(timeout_ms=12345678) client._poll.assert_called_with(1.234) def test_maybe_refresh_metadata_backoff(mocker, client): + mocker.patch.object(KafkaClient, 'in_flight_request_count', return_value=1) now = time.time() t = mocker.patch('time.time') t.return_value = now @@ -320,6 +327,7 @@ def test_maybe_refresh_metadata_backoff(mocker, client): def test_maybe_refresh_metadata_in_progress(mocker, client): client._metadata_refresh_in_progress = True + mocker.patch.object(KafkaClient, 'in_flight_request_count', return_value=1) client.poll(timeout_ms=12345678) client._poll.assert_called_with(9999.999) # request_timeout_ms @@ -328,6 +336,7 @@ def test_maybe_refresh_metadata_in_progress(mocker, client): def test_maybe_refresh_metadata_update(mocker, client): mocker.patch.object(client, 'least_loaded_node', return_value='foobar') mocker.patch.object(client, '_can_send_request', return_value=True) + mocker.patch.object(KafkaClient, 'in_flight_request_count', return_value=1) send = mocker.patch.object(client, 'send') client.poll(timeout_ms=12345678) @@ -342,6 +351,7 @@ def test_maybe_refresh_metadata_cant_send(mocker, client): mocker.patch.object(client, '_can_connect', return_value=True) mocker.patch.object(client, '_maybe_connect', return_value=True) mocker.patch.object(client, 'maybe_connect', return_value=True) + mocker.patch.object(KafkaClient, 'in_flight_request_count', return_value=1) now = time.time() t = mocker.patch('time.time') diff --git a/test/test_client_integration.py b/test/test_client_integration.py index df0faef69..cceb97b00 100644 --- a/test/test_client_integration.py +++ b/test/test_client_integration.py @@ -1,5 +1,7 @@ import os +import pytest + from kafka.errors import KafkaTimeoutError from kafka.protocol import create_message from kafka.structs import ( @@ -7,7 +9,7 @@ ProduceRequestPayload) from test.fixtures import ZookeeperFixture, KafkaFixture -from test.testutil import KafkaIntegrationTestCase, kafka_versions +from test.testutil import KafkaIntegrationTestCase, env_kafka_version class TestKafkaClientIntegration(KafkaIntegrationTestCase): @@ -80,7 +82,7 @@ def test_send_produce_request_maintains_request_response_order(self): # Offset Tests # #################### - @kafka_versions('>=0.8.1') + @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_commit_fetch_offsets(self): req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata') (resp,) = self.client.send_offset_commit_request('group', [req]) @@ -91,3 +93,16 @@ def test_commit_fetch_offsets(self): self.assertEqual(resp.error, 0) self.assertEqual(resp.offset, 42) self.assertEqual(resp.metadata, '') # Metadata isn't stored for now + + + @pytest.mark.skipif(env_kafka_version() < (0, 9), reason='Unsupported Kafka Version') + def test_commit_fetch_offsets_dual(self): + req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata') + (resp,) = self.client.send_offset_commit_request_kafka('group', [req]) + self.assertEqual(resp.error, 0) + + (resp,) = self.client.send_offset_fetch_request_kafka('group', [req]) + self.assertEqual(resp.error, 0) + self.assertEqual(resp.offset, 42) + # Metadata is stored in kafka + self.assertEqual(resp.metadata, 'metadata') diff --git a/test/test_codec.py b/test/test_codec.py index 0fefe6faa..9eff888fe 100644 --- a/test/test_codec.py +++ b/test/test_codec.py @@ -7,14 +7,14 @@ from kafka.vendor.six.moves import range from kafka.codec import ( - has_snappy, has_gzip, has_lz4, + has_snappy, has_lz4, gzip_encode, gzip_decode, snappy_encode, snappy_decode, lz4_encode, lz4_decode, lz4_encode_old_kafka, lz4_decode_old_kafka, ) -from test.fixtures import random_string +from test.testutil import random_string def test_gzip(): diff --git a/test/test_conn.py b/test/test_conn.py index 5da5effcf..966f7b34d 100644 --- a/test/test_conn.py +++ b/test/test_conn.py @@ -3,7 +3,6 @@ from errno import EALREADY, EINPROGRESS, EISCONN, ECONNRESET import socket -import time import mock import pytest @@ -86,7 +85,7 @@ def test_connection_delay(conn): conn.last_attempt = 1000 assert conn.connection_delay() == conn.config['reconnect_backoff_ms'] conn.state = ConnectionStates.CONNECTING - assert conn.connection_delay() == 0 + assert conn.connection_delay() == float('inf') conn.state = ConnectionStates.CONNECTED assert conn.connection_delay() == float('inf') @@ -275,7 +274,7 @@ def test_lookup_on_connect(): ] with mock.patch("socket.getaddrinfo", return_value=mock_return1) as m: conn.connect() - m.assert_called_once_with(hostname, port, 0, 1) + m.assert_called_once_with(hostname, port, 0, socket.SOCK_STREAM) assert conn._sock_afi == afi1 assert conn._sock_addr == sockaddr1 conn.close() @@ -289,7 +288,7 @@ def test_lookup_on_connect(): with mock.patch("socket.getaddrinfo", return_value=mock_return2) as m: conn.last_attempt = 0 conn.connect() - m.assert_called_once_with(hostname, port, 0, 1) + m.assert_called_once_with(hostname, port, 0, socket.SOCK_STREAM) assert conn._sock_afi == afi2 assert conn._sock_addr == sockaddr2 conn.close() @@ -304,7 +303,7 @@ def test_relookup_on_failure(): with mock.patch("socket.getaddrinfo", return_value=mock_return1) as m: last_attempt = conn.last_attempt conn.connect() - m.assert_called_once_with(hostname, port, 0, 1) + m.assert_called_once_with(hostname, port, 0, socket.SOCK_STREAM) assert conn.disconnected() assert conn.last_attempt > last_attempt @@ -317,7 +316,7 @@ def test_relookup_on_failure(): with mock.patch("socket.getaddrinfo", return_value=mock_return2) as m: conn.last_attempt = 0 conn.connect() - m.assert_called_once_with(hostname, port, 0, 1) + m.assert_called_once_with(hostname, port, 0, socket.SOCK_STREAM) assert conn._sock_afi == afi2 assert conn._sock_addr == sockaddr2 conn.close() diff --git a/test/test_consumer.py b/test/test_consumer.py index edcc2d8c7..1c03ca70d 100644 --- a/test/test_consumer.py +++ b/test/test_consumer.py @@ -4,7 +4,7 @@ from . import unittest import pytest -from kafka import SimpleConsumer, KafkaConsumer, MultiProcessConsumer +from kafka import SimpleConsumer, KafkaConsumer, MultiProcessConsumer, OldKafkaConsumer from kafka.errors import ( FailedPayloadsError, KafkaConfigurationError, NotLeaderForPartitionError, UnknownTopicOrPartitionError) @@ -12,7 +12,7 @@ FetchResponsePayload, OffsetAndMessage, OffsetFetchResponsePayload) -class TestKafkaConsumer: +class TestKafkaConsumer(unittest.TestCase): def test_session_timeout_larger_than_request_timeout_raises(self): with pytest.raises(KafkaConfigurationError): KafkaConsumer(bootstrap_servers='localhost:9092', api_version=(0, 9), group_id='foo', session_timeout_ms=50000, request_timeout_ms=40000) @@ -33,6 +33,9 @@ def test_subscription_copy(self): sub.add('fizz') assert consumer.subscription() == set(['foo']) + def test_broker_list_required(self): + with self.assertRaises(KafkaConfigurationError): + OldKafkaConsumer() class TestMultiProcessConsumer(unittest.TestCase): @unittest.skipIf(sys.platform.startswith('win'), 'test mocking fails on windows') diff --git a/test/test_consumer_group.py b/test/test_consumer_group.py index d7aaa8896..58dc7ebf9 100644 --- a/test/test_consumer_group.py +++ b/test/test_consumer_group.py @@ -8,18 +8,18 @@ from kafka.conn import ConnectionStates from kafka.consumer.group import KafkaConsumer -from kafka.coordinator.base import MemberState, Generation +from kafka.coordinator.base import MemberState from kafka.structs import TopicPartition -from test.fixtures import random_string, version +from test.testutil import env_kafka_version, random_string def get_connect_str(kafka_broker): return kafka_broker.host + ':' + str(kafka_broker.port) -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") -def test_consumer(kafka_broker, topic, version): +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") +def test_consumer(kafka_broker, topic): # The `topic` fixture is included because # 0.8.2 brokers need a topic to function well consumer = KafkaConsumer(bootstrap_servers=get_connect_str(kafka_broker)) @@ -30,8 +30,17 @@ def test_consumer(kafka_broker, topic, version): consumer.close() -@pytest.mark.skipif(version() < (0, 9), reason='Unsupported Kafka Version') -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") +def test_consumer_topics(kafka_broker, topic): + consumer = KafkaConsumer(bootstrap_servers=get_connect_str(kafka_broker)) + # Necessary to drive the IO + consumer.poll(500) + assert topic in consumer.topics() + assert len(consumer.partitions_for_topic(topic)) > 0 + consumer.close() + + +@pytest.mark.skipif(env_kafka_version() < (0, 9), reason='Unsupported Kafka Version') def test_group(kafka_broker, topic): num_partitions = 4 connect_str = get_connect_str(kafka_broker) @@ -121,7 +130,7 @@ def consumer_thread(i): threads[c] = None -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_paused(kafka_broker, topic): consumer = KafkaConsumer(bootstrap_servers=get_connect_str(kafka_broker)) topics = [TopicPartition(topic, 1)] @@ -140,8 +149,7 @@ def test_paused(kafka_broker, topic): consumer.close() -@pytest.mark.skipif(version() < (0, 9), reason='Unsupported Kafka Version') -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +@pytest.mark.skipif(env_kafka_version() < (0, 9), reason='Unsupported Kafka Version') def test_heartbeat_thread(kafka_broker, topic): group_id = 'test-group-' + random_string(6) consumer = KafkaConsumer(topic, diff --git a/test/test_consumer_integration.py b/test/test_consumer_integration.py index fdffd05a7..fbd6235fb 100644 --- a/test/test_consumer_integration.py +++ b/test/test_consumer_integration.py @@ -1,60 +1,51 @@ import logging import os import time -from mock import patch -import pytest -import kafka.codec +from mock import patch import pytest from kafka.vendor.six.moves import range -from kafka.vendor import six from . import unittest from kafka import ( - KafkaConsumer, MultiProcessConsumer, SimpleConsumer, create_message, + KafkaConsumer, MultiProcessConsumer, OldKafkaConsumer, SimpleConsumer, create_message, create_gzip_message, KafkaProducer ) +import kafka.codec from kafka.consumer.base import MAX_FETCH_BUFFER_SIZE_BYTES from kafka.errors import ( ConsumerFetchSizeTooSmall, OffsetOutOfRangeError, UnsupportedVersionError, - KafkaTimeoutError, UnsupportedCodecError + KafkaTimeoutError, UnsupportedCodecError, ConsumerTimeout ) +from kafka.protocol.message import PartialMessage from kafka.structs import ( ProduceRequestPayload, TopicPartition, OffsetAndTimestamp ) -from test.fixtures import ZookeeperFixture, KafkaFixture, random_string, version -from test.testutil import KafkaIntegrationTestCase, kafka_versions, Timer +from test.fixtures import ZookeeperFixture, KafkaFixture +from test.testutil import KafkaIntegrationTestCase, Timer, assert_message_count, env_kafka_version, random_string -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") -def test_kafka_consumer(kafka_producer, topic, kafka_consumer_factory): +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") +def test_kafka_consumer(kafka_consumer_factory, send_messages): """Test KafkaConsumer""" - kafka_consumer = kafka_consumer_factory(auto_offset_reset='earliest') - - # TODO replace this with a `send_messages()` pytest fixture - # as we will likely need this elsewhere - for i in range(0, 100): - kafka_producer.send(topic, partition=0, value=str(i).encode()) - for i in range(100, 200): - kafka_producer.send(topic, partition=1, value=str(i).encode()) - kafka_producer.flush() - + consumer = kafka_consumer_factory(auto_offset_reset='earliest') + send_messages(range(0, 100), partition=0) + send_messages(range(0, 100), partition=1) cnt = 0 - messages = {0: set(), 1: set()} - for message in kafka_consumer: + messages = {0: [], 1: []} + for message in consumer: logging.debug("Consumed message %s", repr(message)) cnt += 1 - messages[message.partition].add(message.offset) + messages[message.partition].append(message) if cnt >= 200: break - assert len(messages[0]) == 100 - assert len(messages[1]) == 100 - kafka_consumer.close() + assert_message_count(messages[0], 100) + assert_message_count(messages[1], 100) -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_kafka_consumer_unsupported_encoding( topic, kafka_producer_factory, kafka_consumer_factory): # Send a compressed message @@ -211,7 +202,7 @@ def test_simple_consumer_no_reset(self): with self.assertRaises(OffsetOutOfRangeError): consumer.get_message() - @kafka_versions('>=0.8.1') + @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_simple_consumer_load_initial_offsets(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) @@ -259,6 +250,8 @@ def test_simple_consumer__seek(self): consumer.stop() + @pytest.mark.skipif(env_kafka_version() >= (2, 0), + reason="SimpleConsumer blocking does not handle PartialMessage change in kafka 2.0+") def test_simple_consumer_blocking(self): consumer = self.consumer() @@ -388,7 +381,7 @@ def test_multi_proc_pending(self): consumer.stop() @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky') - @kafka_versions('>=0.8.1') + @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_multi_process_consumer_load_initial_offsets(self): self.send_messages(0, range(0, 10)) self.send_messages(1, range(10, 20)) @@ -424,7 +417,8 @@ def test_large_messages(self): consumer = self.consumer(max_buffer_size=60000) expected_messages = set(small_messages + large_messages) - actual_messages = set([ x.message.value for x in consumer ]) + actual_messages = set([x.message.value for x in consumer + if not isinstance(x.message, PartialMessage)]) self.assertEqual(expected_messages, actual_messages) consumer.stop() @@ -459,7 +453,7 @@ def test_huge_messages(self): big_consumer.stop() - @kafka_versions('>=0.8.1') + @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_offset_behavior__resuming_behavior(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) @@ -491,7 +485,7 @@ def test_offset_behavior__resuming_behavior(self): consumer2.stop() @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky') - @kafka_versions('>=0.8.1') + @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_multi_process_offset_behavior__resuming_behavior(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) @@ -548,241 +542,239 @@ def test_fetch_buffer_size(self): messages = [ message for message in consumer ] self.assertEqual(len(messages), 2) - def test_kafka_consumer__blocking(self): - TIMEOUT_MS = 500 - consumer = self.kafka_consumer(auto_offset_reset='earliest', - enable_auto_commit=False, - consumer_timeout_ms=TIMEOUT_MS) - - # Manual assignment avoids overhead of consumer group mgmt - consumer.unsubscribe() - consumer.assign([TopicPartition(self.topic, 0)]) - # Ask for 5 messages, nothing in queue, block 500ms - with Timer() as t: - with self.assertRaises(StopIteration): - msg = next(consumer) - self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0 ) - - self.send_messages(0, range(0, 10)) - - # Ask for 5 messages, 10 in queue. Get 5 back, no blocking - messages = set() - with Timer() as t: - for i in range(5): +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") +def test_kafka_consumer__blocking(kafka_consumer_factory, topic, send_messages): + TIMEOUT_MS = 500 + consumer = kafka_consumer_factory(auto_offset_reset='earliest', + enable_auto_commit=False, + consumer_timeout_ms=TIMEOUT_MS) + + # Manual assignment avoids overhead of consumer group mgmt + consumer.unsubscribe() + consumer.assign([TopicPartition(topic, 0)]) + + # Ask for 5 messages, nothing in queue, block 500ms + with Timer() as t: + with pytest.raises(StopIteration): + msg = next(consumer) + assert t.interval >= (TIMEOUT_MS / 1000.0) + + send_messages(range(0, 10)) + + # Ask for 5 messages, 10 in queue. Get 5 back, no blocking + messages = [] + with Timer() as t: + for i in range(5): + msg = next(consumer) + messages.append(msg) + assert_message_count(messages, 5) + assert t.interval < (TIMEOUT_MS / 1000.0) + + # Ask for 10 messages, get 5 back, block 500ms + messages = [] + with Timer() as t: + with pytest.raises(StopIteration): + for i in range(10): msg = next(consumer) - messages.add((msg.partition, msg.offset)) - self.assertEqual(len(messages), 5) - self.assertLess(t.interval, TIMEOUT_MS / 1000.0 ) - - # Ask for 10 messages, get 5 back, block 500ms - messages = set() - with Timer() as t: - with self.assertRaises(StopIteration): - for i in range(10): - msg = next(consumer) - messages.add((msg.partition, msg.offset)) - self.assertEqual(len(messages), 5) - self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0 ) - consumer.close() - - @kafka_versions('>=0.8.1') - def test_kafka_consumer__offset_commit_resume(self): - GROUP_ID = random_string(10) - - self.send_messages(0, range(0, 100)) - self.send_messages(1, range(100, 200)) - - # Start a consumer - consumer1 = self.kafka_consumer( - group_id=GROUP_ID, - enable_auto_commit=True, - auto_commit_interval_ms=100, - auto_offset_reset='earliest', - ) - - # Grab the first 180 messages - output_msgs1 = [] - for _ in range(180): - m = next(consumer1) - output_msgs1.append(m) - self.assert_message_count(output_msgs1, 180) - consumer1.close() - - # The total offset across both partitions should be at 180 - consumer2 = self.kafka_consumer( - group_id=GROUP_ID, - enable_auto_commit=True, - auto_commit_interval_ms=100, - auto_offset_reset='earliest', - ) - - # 181-200 - output_msgs2 = [] - for _ in range(20): - m = next(consumer2) - output_msgs2.append(m) - self.assert_message_count(output_msgs2, 20) - self.assertEqual(len(set(output_msgs1) | set(output_msgs2)), 200) - consumer2.close() - - @kafka_versions('>=0.10.1') - def test_kafka_consumer_max_bytes_simple(self): - self.send_messages(0, range(100, 200)) - self.send_messages(1, range(200, 300)) - - # Start a consumer - consumer = self.kafka_consumer( - auto_offset_reset='earliest', fetch_max_bytes=300) - seen_partitions = set([]) - for i in range(10): - poll_res = consumer.poll(timeout_ms=100) - for partition, msgs in six.iteritems(poll_res): - for msg in msgs: - seen_partitions.add(partition) - - # Check that we fetched at least 1 message from both partitions - self.assertEqual( - seen_partitions, set([ - TopicPartition(self.topic, 0), TopicPartition(self.topic, 1)])) - consumer.close() - - @kafka_versions('>=0.10.1') - def test_kafka_consumer_max_bytes_one_msg(self): - # We send to only 1 partition so we don't have parallel requests to 2 - # nodes for data. - self.send_messages(0, range(100, 200)) - - # Start a consumer. FetchResponse_v3 should always include at least 1 - # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time - # But 0.11.0.0 returns 1 MessageSet at a time when the messages are - # stored in the new v2 format by the broker. - # - # DP Note: This is a strange test. The consumer shouldn't care - # how many messages are included in a FetchResponse, as long as it is - # non-zero. I would not mind if we deleted this test. It caused - # a minor headache when testing 0.11.0.0. - group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5) - consumer = self.kafka_consumer( - group_id=group, - auto_offset_reset='earliest', - consumer_timeout_ms=5000, - fetch_max_bytes=1) - - fetched_msgs = [next(consumer) for i in range(10)] - self.assertEqual(len(fetched_msgs), 10) - consumer.close() - - @kafka_versions('>=0.10.1') - def test_kafka_consumer_offsets_for_time(self): - late_time = int(time.time()) * 1000 - middle_time = late_time - 1000 - early_time = late_time - 2000 - tp = TopicPartition(self.topic, 0) - - timeout = 10 - kafka_producer = self.kafka_producer() - early_msg = kafka_producer.send( - self.topic, partition=0, value=b"first", - timestamp_ms=early_time).get(timeout) - late_msg = kafka_producer.send( - self.topic, partition=0, value=b"last", - timestamp_ms=late_time).get(timeout) - - consumer = self.kafka_consumer() - offsets = consumer.offsets_for_times({tp: early_time}) - self.assertEqual(len(offsets), 1) - self.assertEqual(offsets[tp].offset, early_msg.offset) - self.assertEqual(offsets[tp].timestamp, early_time) - - offsets = consumer.offsets_for_times({tp: middle_time}) - self.assertEqual(offsets[tp].offset, late_msg.offset) - self.assertEqual(offsets[tp].timestamp, late_time) - - offsets = consumer.offsets_for_times({tp: late_time}) - self.assertEqual(offsets[tp].offset, late_msg.offset) - self.assertEqual(offsets[tp].timestamp, late_time) - - offsets = consumer.offsets_for_times({}) - self.assertEqual(offsets, {}) - - # Out of bound timestamps check - - offsets = consumer.offsets_for_times({tp: 0}) - self.assertEqual(offsets[tp].offset, early_msg.offset) - self.assertEqual(offsets[tp].timestamp, early_time) - - offsets = consumer.offsets_for_times({tp: 9999999999999}) - self.assertEqual(offsets[tp], None) - - # Beginning/End offsets - - offsets = consumer.beginning_offsets([tp]) - self.assertEqual(offsets, { - tp: early_msg.offset, - }) - offsets = consumer.end_offsets([tp]) - self.assertEqual(offsets, { - tp: late_msg.offset + 1 - }) - consumer.close() - - @kafka_versions('>=0.10.1') - def test_kafka_consumer_offsets_search_many_partitions(self): - tp0 = TopicPartition(self.topic, 0) - tp1 = TopicPartition(self.topic, 1) - - kafka_producer = self.kafka_producer() - send_time = int(time.time() * 1000) - timeout = 10 - p0msg = kafka_producer.send( - self.topic, partition=0, value=b"XXX", - timestamp_ms=send_time).get(timeout) - p1msg = kafka_producer.send( - self.topic, partition=1, value=b"XXX", - timestamp_ms=send_time).get(timeout) - - consumer = self.kafka_consumer() - offsets = consumer.offsets_for_times({ - tp0: send_time, - tp1: send_time - }) - - self.assertEqual(offsets, { - tp0: OffsetAndTimestamp(p0msg.offset, send_time), - tp1: OffsetAndTimestamp(p1msg.offset, send_time) - }) - - offsets = consumer.beginning_offsets([tp0, tp1]) - self.assertEqual(offsets, { - tp0: p0msg.offset, - tp1: p1msg.offset - }) - - offsets = consumer.end_offsets([tp0, tp1]) - self.assertEqual(offsets, { - tp0: p0msg.offset + 1, - tp1: p1msg.offset + 1 - }) - consumer.close() - - @kafka_versions('<0.10.1') - def test_kafka_consumer_offsets_for_time_old(self): - consumer = self.kafka_consumer() - tp = TopicPartition(self.topic, 0) - - with self.assertRaises(UnsupportedVersionError): - consumer.offsets_for_times({tp: int(time.time())}) - - @kafka_versions('>=0.10.1') - def test_kafka_consumer_offsets_for_times_errors(self): - consumer = self.kafka_consumer(fetch_max_wait_ms=200, - request_timeout_ms=500) - tp = TopicPartition(self.topic, 0) - bad_tp = TopicPartition(self.topic, 100) - - with self.assertRaises(ValueError): - consumer.offsets_for_times({tp: -1}) - - with self.assertRaises(KafkaTimeoutError): - consumer.offsets_for_times({bad_tp: 0}) + messages.append(msg) + assert_message_count(messages, 5) + assert t.interval >= (TIMEOUT_MS / 1000.0) + + +@pytest.mark.skipif(env_kafka_version() < (0, 8, 1), reason="Requires KAFKA_VERSION >= 0.8.1") +def test_kafka_consumer__offset_commit_resume(kafka_consumer_factory, send_messages): + GROUP_ID = random_string(10) + + send_messages(range(0, 100), partition=0) + send_messages(range(100, 200), partition=1) + + # Start a consumer and grab the first 180 messages + consumer1 = kafka_consumer_factory( + group_id=GROUP_ID, + enable_auto_commit=True, + auto_commit_interval_ms=100, + auto_offset_reset='earliest', + ) + output_msgs1 = [] + for _ in range(180): + m = next(consumer1) + output_msgs1.append(m) + assert_message_count(output_msgs1, 180) + + # Normally we let the pytest fixture `kafka_consumer_factory` handle + # closing as part of its teardown. Here we manually call close() to force + # auto-commit to occur before the second consumer starts. That way the + # second consumer only consumes previously unconsumed messages. + consumer1.close() + + # Start a second consumer to grab 181-200 + consumer2 = kafka_consumer_factory( + group_id=GROUP_ID, + enable_auto_commit=True, + auto_commit_interval_ms=100, + auto_offset_reset='earliest', + ) + output_msgs2 = [] + for _ in range(20): + m = next(consumer2) + output_msgs2.append(m) + assert_message_count(output_msgs2, 20) + + # Verify the second consumer wasn't reconsuming messages that the first + # consumer already saw + assert_message_count(output_msgs1 + output_msgs2, 200) + + +@pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") +def test_kafka_consumer_max_bytes_simple(kafka_consumer_factory, topic, send_messages): + send_messages(range(100, 200), partition=0) + send_messages(range(200, 300), partition=1) + + # Start a consumer + consumer = kafka_consumer_factory( + auto_offset_reset='earliest', fetch_max_bytes=300) + seen_partitions = set() + for i in range(90): + poll_res = consumer.poll(timeout_ms=100) + for partition, msgs in poll_res.items(): + for msg in msgs: + seen_partitions.add(partition) + + # Check that we fetched at least 1 message from both partitions + assert seen_partitions == {TopicPartition(topic, 0), TopicPartition(topic, 1)} + + +@pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") +def test_kafka_consumer_max_bytes_one_msg(kafka_consumer_factory, send_messages): + # We send to only 1 partition so we don't have parallel requests to 2 + # nodes for data. + send_messages(range(100, 200)) + + # Start a consumer. FetchResponse_v3 should always include at least 1 + # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time + # But 0.11.0.0 returns 1 MessageSet at a time when the messages are + # stored in the new v2 format by the broker. + # + # DP Note: This is a strange test. The consumer shouldn't care + # how many messages are included in a FetchResponse, as long as it is + # non-zero. I would not mind if we deleted this test. It caused + # a minor headache when testing 0.11.0.0. + group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5) + consumer = kafka_consumer_factory( + group_id=group, + auto_offset_reset='earliest', + consumer_timeout_ms=5000, + fetch_max_bytes=1) + + fetched_msgs = [next(consumer) for i in range(10)] + assert_message_count(fetched_msgs, 10) + + +@pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") +def test_kafka_consumer_offsets_for_time(topic, kafka_consumer, kafka_producer): + late_time = int(time.time()) * 1000 + middle_time = late_time - 1000 + early_time = late_time - 2000 + tp = TopicPartition(topic, 0) + + timeout = 10 + early_msg = kafka_producer.send( + topic, partition=0, value=b"first", + timestamp_ms=early_time).get(timeout) + late_msg = kafka_producer.send( + topic, partition=0, value=b"last", + timestamp_ms=late_time).get(timeout) + + consumer = kafka_consumer + offsets = consumer.offsets_for_times({tp: early_time}) + assert len(offsets) == 1 + assert offsets[tp].offset == early_msg.offset + assert offsets[tp].timestamp == early_time + + offsets = consumer.offsets_for_times({tp: middle_time}) + assert offsets[tp].offset == late_msg.offset + assert offsets[tp].timestamp == late_time + + offsets = consumer.offsets_for_times({tp: late_time}) + assert offsets[tp].offset == late_msg.offset + assert offsets[tp].timestamp == late_time + + offsets = consumer.offsets_for_times({}) + assert offsets == {} + + # Out of bound timestamps check + + offsets = consumer.offsets_for_times({tp: 0}) + assert offsets[tp].offset == early_msg.offset + assert offsets[tp].timestamp == early_time + + offsets = consumer.offsets_for_times({tp: 9999999999999}) + assert offsets[tp] is None + + # Beginning/End offsets + + offsets = consumer.beginning_offsets([tp]) + assert offsets == {tp: early_msg.offset} + offsets = consumer.end_offsets([tp]) + assert offsets == {tp: late_msg.offset + 1} + + +@pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") +def test_kafka_consumer_offsets_search_many_partitions(kafka_consumer, kafka_producer, topic): + tp0 = TopicPartition(topic, 0) + tp1 = TopicPartition(topic, 1) + + send_time = int(time.time() * 1000) + timeout = 10 + p0msg = kafka_producer.send( + topic, partition=0, value=b"XXX", + timestamp_ms=send_time).get(timeout) + p1msg = kafka_producer.send( + topic, partition=1, value=b"XXX", + timestamp_ms=send_time).get(timeout) + + consumer = kafka_consumer + offsets = consumer.offsets_for_times({ + tp0: send_time, + tp1: send_time + }) + + assert offsets == { + tp0: OffsetAndTimestamp(p0msg.offset, send_time), + tp1: OffsetAndTimestamp(p1msg.offset, send_time) + } + + offsets = consumer.beginning_offsets([tp0, tp1]) + assert offsets == { + tp0: p0msg.offset, + tp1: p1msg.offset + } + + offsets = consumer.end_offsets([tp0, tp1]) + assert offsets == { + tp0: p0msg.offset + 1, + tp1: p1msg.offset + 1 + } + + +@pytest.mark.skipif(env_kafka_version() >= (0, 10, 1), reason="Requires KAFKA_VERSION < 0.10.1") +def test_kafka_consumer_offsets_for_time_old(kafka_consumer, topic): + consumer = kafka_consumer + tp = TopicPartition(topic, 0) + + with pytest.raises(UnsupportedVersionError): + consumer.offsets_for_times({tp: int(time.time())}) + + +@pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") +def test_kafka_consumer_offsets_for_times_errors(kafka_consumer_factory, topic): + consumer = kafka_consumer_factory(fetch_max_wait_ms=200, + request_timeout_ms=500) + tp = TopicPartition(topic, 0) + bad_tp = TopicPartition(topic, 100) + + with pytest.raises(ValueError): + consumer.offsets_for_times({tp: -1}) + + assert consumer.offsets_for_times({bad_tp: 0}) == {bad_tp: None} diff --git a/test/test_failover_integration.py b/test/test_failover_integration.py index 48021a443..314288dc1 100644 --- a/test/test_failover_integration.py +++ b/test/test_failover_integration.py @@ -9,8 +9,8 @@ from kafka.producer.base import Producer from kafka.structs import TopicPartition -from test.fixtures import ZookeeperFixture, KafkaFixture, random_string -from test.testutil import KafkaIntegrationTestCase +from test.fixtures import ZookeeperFixture, KafkaFixture +from test.testutil import KafkaIntegrationTestCase, random_string log = logging.getLogger(__name__) @@ -208,7 +208,7 @@ def _kill_leader(self, topic, partition): broker.close() return broker - def assert_message_count(self, topic, check_count, timeout=10, + def assert_message_count(self, topic, check_count, timeout=25, partitions=None, at_least=False): hosts = ','.join(['%s:%d' % (broker.host, broker.port) for broker in self.brokers]) diff --git a/test/test_fetcher.py b/test/test_fetcher.py index e37a70db5..b61a0f026 100644 --- a/test/test_fetcher.py +++ b/test/test_fetcher.py @@ -81,7 +81,7 @@ def test_send_fetches(fetcher, topic, mocker): ret = fetcher.send_fetches() for node, request in enumerate(fetch_requests): - fetcher._client.send.assert_any_call(node, request) + fetcher._client.send.assert_any_call(node, request, wakeup=False) assert len(ret) == len(fetch_requests) @@ -138,10 +138,6 @@ def test__reset_offset(fetcher, mocker): fetcher._subscriptions.need_offset_reset(tp) mocked = mocker.patch.object(fetcher, '_retrieve_offsets') - mocked.return_value = {} - with pytest.raises(NoOffsetForPartitionError): - fetcher._reset_offset(tp) - mocked.return_value = {tp: (1001, None)} fetcher._reset_offset(tp) assert not fetcher._subscriptions.assignment[tp].awaiting_reset diff --git a/test/test_msk.py b/test/test_msk.py new file mode 100644 index 000000000..69855fbb9 --- /dev/null +++ b/test/test_msk.py @@ -0,0 +1,122 @@ +import datetime +import json +import sys + +import pytest +from unittest import TestCase + +from kafka.errors import IllegalArgumentError +from kafka.msk import AwsMskIamClient + +try: + from unittest import mock +except ImportError: + import mock + + +@pytest.fixture +def boto_session(): + # To avoid a package dependency on the optional botocore library, we mock the module out + sys.modules['botocore.session'] = mock.MagicMock() + from botocore.session import Session # pylint: disable=import-error + + boto_session = Session() + boto_session.get_credentials = mock.MagicMock(return_value=mock.MagicMock(id='the_actual_credentials', access_key='akia', secret_key='secret', token=None)) + yield boto_session + + +def test_aws_msk_iam_region_from_config(boto_session): + # Region determined by configuration + boto_session.get_config_variable = mock.MagicMock(return_value='us-west-2') + msk_client = AwsMskIamClient( + host='localhost', + boto_session = boto_session, + ) + msg = msk_client.first_message() + assert msg + assert isinstance(msg, bytes) + actual = json.loads(msg.decode('utf-8')) + + expected = { + 'version': '2020_10_22', + 'host': msk_client.host, + 'user-agent': 'kafka-python', + 'action': 'kafka-cluster:Connect', + 'x-amz-algorithm': 'AWS4-HMAC-SHA256', + 'x-amz-credential': '{}/{}/us-west-2/kafka-cluster/aws4_request'.format(msk_client.access_key, datetime.datetime.utcnow().strftime('%Y%m%d')), + 'x-amz-date': mock.ANY, + 'x-amz-signedheaders': 'host', + 'x-amz-expires': '900', + 'x-amz-signature': mock.ANY, + } + TestCase().assertEqual(actual, expected) + + +def test_aws_msk_iam_region_from_hostname(boto_session): + # Region determined by hostname + msk_client = AwsMskIamClient( + host='localhost.us-east-1.amazonaws.com', + boto_session = boto_session, + ) + msg = msk_client.first_message() + assert msg + assert isinstance(msg, bytes) + actual = json.loads(msg.decode('utf-8')) + + expected = { + 'version': '2020_10_22', + 'host': msk_client.host, + 'user-agent': 'kafka-python', + 'action': 'kafka-cluster:Connect', + 'x-amz-algorithm': 'AWS4-HMAC-SHA256', + 'x-amz-credential': '{}/{}/us-east-1/kafka-cluster/aws4_request'.format(msk_client.access_key, datetime.datetime.utcnow().strftime('%Y%m%d')), + 'x-amz-date': mock.ANY, + 'x-amz-signedheaders': 'host', + 'x-amz-expires': '900', + 'x-amz-signature': mock.ANY, + } + TestCase().assertEqual(actual, expected) + + +def test_aws_msk_iam_no_region(boto_session): + # No region from config + boto_session.get_config_variable = mock.MagicMock(return_value=None) + + with TestCase().assertRaises(IllegalArgumentError) as e: + # No region from hostname + msk_client = AwsMskIamClient( + host='localhost', + boto_session = boto_session, + ) + assert 'IllegalArgumentError: Could not determine region from broker host(s) or aws configuration' == str(e.exception) + + +@pytest.mark.parametrize('session_token', [(None), ('the_token')]) +def test_aws_msk_iam_permanent_and_temporary_credentials(session_token, request): + boto_session = request.getfixturevalue('boto_session') + if session_token: + boto_session.get_credentials.return_value.token = session_token + msk_client = AwsMskIamClient( + host='localhost.us-east-1.amazonaws.com', + boto_session = boto_session, + ) + msg = msk_client.first_message() + assert msg + assert isinstance(msg, bytes) + actual = json.loads(msg.decode('utf-8')) + + expected = { + 'version': '2020_10_22', + 'host': msk_client.host, + 'user-agent': 'kafka-python', + 'action': 'kafka-cluster:Connect', + 'x-amz-algorithm': 'AWS4-HMAC-SHA256', + 'x-amz-credential': '{}/{}/us-east-1/kafka-cluster/aws4_request'.format(msk_client.access_key, datetime.datetime.utcnow().strftime('%Y%m%d')), + 'x-amz-date': mock.ANY, + 'x-amz-signedheaders': 'host', + 'x-amz-expires': '900', + 'x-amz-signature': mock.ANY, + } + if session_token: + expected['x-amz-security-token'] = session_token + TestCase().assertEqual(actual, expected) diff --git a/test/test_producer.py b/test/test_producer.py index 60b19bfb9..9605adf58 100644 --- a/test/test_producer.py +++ b/test/test_producer.py @@ -7,7 +7,7 @@ from kafka import KafkaConsumer, KafkaProducer, TopicPartition from kafka.producer.buffer import SimpleBufferPool -from test.fixtures import random_string, version +from test.testutil import env_kafka_version, random_string def test_buffer_pool(): @@ -22,13 +22,13 @@ def test_buffer_pool(): assert buf2.read() == b'' -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") @pytest.mark.parametrize("compression", [None, 'gzip', 'snappy', 'lz4']) def test_end_to_end(kafka_broker, compression): if compression == 'lz4': # LZ4 requires 0.8.2 - if version() < (0, 8, 2): + if env_kafka_version() < (0, 8, 2): return # python-lz4 crashes on older versions of pypy elif platform.python_implementation() == 'PyPy': @@ -80,7 +80,7 @@ def test_kafka_producer_gc_cleanup(): assert threading.active_count() == threads -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") @pytest.mark.parametrize("compression", [None, 'gzip', 'snappy', 'lz4']) def test_kafka_producer_proper_record_metadata(kafka_broker, compression): connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) @@ -91,7 +91,7 @@ def test_kafka_producer_proper_record_metadata(kafka_broker, compression): magic = producer._max_usable_produce_magic() # record headers are supported in 0.11.0 - if version() < (0, 11, 0): + if env_kafka_version() < (0, 11, 0): headers = None else: headers = [("Header Key", b"Header Value")] diff --git a/test/test_producer_integration.py b/test/test_producer_integration.py index 7109886f1..8f32cf870 100644 --- a/test/test_producer_integration.py +++ b/test/test_producer_integration.py @@ -13,10 +13,11 @@ from kafka.codec import has_snappy from kafka.errors import UnknownTopicOrPartitionError, LeaderNotAvailableError from kafka.producer.base import Producer +from kafka.protocol.message import PartialMessage from kafka.structs import FetchRequestPayload, ProduceRequestPayload -from test.fixtures import ZookeeperFixture, KafkaFixture, version -from test.testutil import KafkaIntegrationTestCase, kafka_versions, current_offset +from test.fixtures import ZookeeperFixture, KafkaFixture +from test.testutil import KafkaIntegrationTestCase, env_kafka_version, current_offset # TODO: This duplicates a TestKafkaProducerIntegration method temporarily @@ -43,7 +44,7 @@ def assert_produce_response(resp, initial_offset): assert resp[0].offset == initial_offset -@pytest.mark.skipif(not version(), reason="No KAFKA_VERSION set") +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_produce_many_simple(simple_client, topic): """Test multiple produces using the SimpleClient """ @@ -353,7 +354,7 @@ def test_batched_simple_producer__triggers_by_time(self): # KeyedProducer Tests # ############################ - @kafka_versions('>=0.8.1') + @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_keyedproducer_null_payload(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] @@ -521,7 +522,8 @@ def assert_fetch_offset(self, partition, start_offset, expected_messages): self.assertEqual(resp.error, 0) self.assertEqual(resp.partition, partition) - messages = [ x.message.value for x in resp.messages ] + messages = [ x.message.value for x in resp.messages + if not isinstance(x.message, PartialMessage) ] self.assertEqual(messages, expected_messages) self.assertEqual(resp.highwaterMark, start_offset+len(expected_messages)) diff --git a/test/test_protocol.py b/test/test_protocol.py index 7abcefb46..e295174d4 100644 --- a/test/test_protocol.py +++ b/test/test_protocol.py @@ -3,7 +3,6 @@ import struct import pytest -from kafka.vendor import six from kafka.protocol.api import RequestHeader from kafka.protocol.commit import GroupCoordinatorRequest diff --git a/test/test_protocol_legacy.py b/test/test_protocol_legacy.py index 1341af003..d8716289e 100644 --- a/test/test_protocol_legacy.py +++ b/test/test_protocol_legacy.py @@ -581,6 +581,7 @@ def test_decode_metadata_response(self): self.assertEqual(decoded, (node_brokers, topic_partitions)) ''' + @unittest.skip('needs updating for new protocol classes') def test_encode_consumer_metadata_request(self): expected = b"".join([ struct.pack(">i", 17), # Total length of the request @@ -595,6 +596,7 @@ def test_encode_consumer_metadata_request(self): self.assertEqual(encoded, expected) + @unittest.skip('needs updating for new protocol classes') def test_decode_consumer_metadata_response(self): encoded = b"".join([ struct.pack(">i", 42), # Correlation ID diff --git a/test/testutil.py b/test/testutil.py index a8227cfb6..8c682ac02 100644 --- a/test/testutil.py +++ b/test/testutil.py @@ -1,70 +1,65 @@ from __future__ import absolute_import -import functools -import operator import os +import socket +import random +import string import time import uuid import pytest from . import unittest -from kafka import SimpleClient +from kafka import SimpleClient, create_message +from kafka.client_async import KafkaClient from kafka.errors import ( LeaderNotAvailableError, KafkaTimeoutError, InvalidTopicError, NotLeaderForPartitionError, UnknownTopicOrPartitionError, FailedPayloadsError ) -from kafka.structs import OffsetRequestPayload -from test.fixtures import random_string, version_str_to_list, version as kafka_version #pylint: disable=wrong-import-order - - -def kafka_versions(*versions): - - def construct_lambda(s): - if s[0].isdigit(): - op_str = '=' - v_str = s - elif s[1].isdigit(): - op_str = s[0] # ! < > = - v_str = s[1:] - elif s[2].isdigit(): - op_str = s[0:2] # >= <= - v_str = s[2:] - else: - raise ValueError('Unrecognized kafka version / operator: %s' % (s,)) +from kafka.structs import OffsetRequestPayload, ProduceRequestPayload +#from test.fixtures import random_string, version_str_to_list, version as kafka_version #pylint: disable=wrong-import-order - op_map = { - '=': operator.eq, - '!': operator.ne, - '>': operator.gt, - '<': operator.lt, - '>=': operator.ge, - '<=': operator.le - } - op = op_map[op_str] - version = version_str_to_list(v_str) - return lambda a: op(a, version) - validators = map(construct_lambda, versions) +def random_string(length): + return "".join(random.choice(string.ascii_letters) for i in range(length)) - def real_kafka_versions(func): - @functools.wraps(func) - def wrapper(func, *args, **kwargs): - version = kafka_version() - if not version: - pytest.skip("no kafka version set in KAFKA_VERSION env var") +def env_kafka_version(): + """Return the Kafka version set in the OS environment as a tuple. - for f in validators: - if not f(version): - pytest.skip("unsupported kafka version") + Example: '0.8.1.1' --> (0, 8, 1, 1) + """ + if 'KAFKA_VERSION' not in os.environ: + return () + return tuple(map(int, os.environ['KAFKA_VERSION'].split('.'))) + +def get_open_port(): + sock = socket.socket() + sock.bind(("", 0)) + port = sock.getsockname()[1] + sock.close() + return port + +_MESSAGES = {} +def msg(message): + """Format, encode and deduplicate a message + """ + global _MESSAGES #pylint: disable=global-statement + if message not in _MESSAGES: + _MESSAGES[message] = '%s-%s' % (message, str(uuid.uuid4())) - return func(*args, **kwargs) - return wrapper + return _MESSAGES[message].encode('utf-8') - return real_kafka_versions +def send_messages(client, topic, partition, messages): + """Send messages to a topic's partition + """ + messages = [create_message(msg(str(m))) for m in messages] + produce = ProduceRequestPayload(topic, partition, messages=messages) + resp, = client.send_produce_request([produce]) + assert resp.error == 0 + return [x.value for x in messages] def current_offset(client, topic, partition, kafka_broker=None): """Get the current offset of a topic's partition @@ -81,6 +76,17 @@ def current_offset(client, topic, partition, kafka_broker=None): return offsets.offsets[0] +def assert_message_count(messages, num_messages): + """Check that we received the expected number of messages with no duplicates.""" + # Make sure we got them all + assert len(messages) == num_messages + # Make sure there are no duplicates + # Note: Currently duplicates are identified only using key/value. Other attributes like topic, partition, headers, + # timestamp, etc are ignored... this could be changed if necessary, but will be more tolerant of dupes. + unique_messages = {(m.key, m.value) for m in messages} + assert len(unique_messages) == num_messages + + class KafkaIntegrationTestCase(unittest.TestCase): create_client = True topic = None @@ -98,6 +104,7 @@ def setUp(self): if self.create_client: self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port)) + self.client_async = KafkaClient(bootstrap_servers='%s:%d' % (self.server.host, self.server.port)) timeout = time.time() + 30 while time.time() < timeout: diff --git a/tox.ini b/tox.ini index 48a143eea..8741b4dcb 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,11 @@ [tox] envlist = py{26,27,34,35,36,37,py}, docs +# The Makefile and .travis.yml override the index server to the public one when +# running outside of Yelp. +indexserver = + default = https://pypi.yelpcorp.com/simple +tox_pip_extensions_ext_pip_custom_platform = true +tox_pip_extensions_ext_venv_update = true [pytest] testpaths = kafka test @@ -9,27 +15,14 @@ log_format = %(created)f %(filename)-23s %(threadName)s %(message)s [testenv] deps = - pytest<4.0 - pytest-cov - py{27,34,35,36,37,py}: pylint - py{27,34,35,36,37,py}: pytest-pylint - pytest-mock - mock - python-snappy - lz4 - xxhash - crc32c - py26: unittest2 + -rrequirements-dev.txt commands = py.test {posargs:--pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF --cov=kafka --cov-config=.covrc} setenv = + CRC32C_SW_MODE = auto PROJECT_ROOT = {toxinidir} passenv = KAFKA_VERSION -[testenv:py26] -# pylint doesn't support python2.6 -commands = py.test {posargs:--cov=kafka --cov-config=.covrc} - [testenv:pypy] # pylint is super slow on pypy... commands = py.test {posargs:--cov=kafka --cov-config=.covrc}