From f5cfb31d0192e95ed9fea4f1b80227902a904442 Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Thu, 27 Sep 2018 11:20:39 -0700 Subject: [PATCH 01/53] add a gitignore file --- .gitignore | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6c73837 --- /dev/null +++ b/.gitignore @@ -0,0 +1,111 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json \ No newline at end of file From d5d21e774ef19c6ab173bf9adb75c5fd4b531836 Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Thu, 27 Sep 2018 11:21:12 -0700 Subject: [PATCH 02/53] move files into pybase module, use setuptools rather than distutils --- __init__.py => pybase/__init__.py | 0 client.py => pybase/client.py | 0 exceptions.py => pybase/exceptions.py | 0 filters.py => pybase/filters.py | 0 {helpers => pybase/helpers}/README.md | 0 {helpers => pybase/helpers}/__init__.py | 0 {helpers => pybase/helpers}/varint.py | 0 {pb => pybase/pb}/Cell.proto | 0 {pb => pybase/pb}/Cell_pb2.py | 0 {pb => pybase/pb}/Client.proto | 0 {pb => pybase/pb}/Client_pb2.py | 0 {pb => pybase/pb}/ClusterId.proto | 0 {pb => pybase/pb}/ClusterId_pb2.py | 0 {pb => pybase/pb}/ClusterStatus.proto | 0 {pb => pybase/pb}/ClusterStatus_pb2.py | 0 {pb => pybase/pb}/Comparator.proto | 0 {pb => pybase/pb}/Comparator_pb2.py | 0 {pb => pybase/pb}/ErrorHandling.proto | 0 {pb => pybase/pb}/ErrorHandling_pb2.py | 0 {pb => pybase/pb}/FS.proto | 0 {pb => pybase/pb}/FS_pb2.py | 0 {pb => pybase/pb}/Filter.proto | 0 {pb => pybase/pb}/Filter_pb2.py | 0 {pb => pybase/pb}/HBase.proto | 0 {pb => pybase/pb}/HBase_pb2.py | 0 {pb => pybase/pb}/Master.proto | 0 {pb => pybase/pb}/Master_pb2.py | 0 {pb => pybase/pb}/MultiRowMutation.proto | 0 {pb => pybase/pb}/MultiRowMutation_pb2.py | 0 {pb => pybase/pb}/Quota.proto | 0 {pb => pybase/pb}/Quota_pb2.py | 0 {pb => pybase/pb}/README.txt | 0 {pb => pybase/pb}/RPC.proto | 0 {pb => pybase/pb}/RPC_pb2.py | 0 {pb => pybase/pb}/Tracing.proto | 0 {pb => pybase/pb}/Tracing_pb2.py | 0 {pb => pybase/pb}/ZooKeeper.proto | 0 {pb => pybase/pb}/ZooKeeper_pb2.py | 0 {pb => pybase/pb}/__init__.py | 0 {region => pybase/region}/__init__.py | 0 {region => pybase/region}/client.py | 0 {region => pybase/region}/region.py | 0 {request => pybase/request}/__init__.py | 0 {request => pybase/request}/request.py | 0 {zk => pybase/zk}/__init__.py | 0 {zk => pybase/zk}/client.py | 0 setup.py | 8 +++----- 47 files changed, 3 insertions(+), 5 deletions(-) rename __init__.py => pybase/__init__.py (100%) rename client.py => pybase/client.py (100%) rename exceptions.py => pybase/exceptions.py (100%) rename filters.py => pybase/filters.py (100%) rename {helpers => pybase/helpers}/README.md (100%) rename {helpers => pybase/helpers}/__init__.py (100%) rename {helpers => pybase/helpers}/varint.py (100%) rename {pb => pybase/pb}/Cell.proto (100%) rename {pb => pybase/pb}/Cell_pb2.py (100%) rename {pb => pybase/pb}/Client.proto (100%) rename {pb => pybase/pb}/Client_pb2.py (100%) rename {pb => pybase/pb}/ClusterId.proto (100%) rename {pb => pybase/pb}/ClusterId_pb2.py (100%) rename {pb => pybase/pb}/ClusterStatus.proto (100%) rename {pb => pybase/pb}/ClusterStatus_pb2.py (100%) rename {pb => pybase/pb}/Comparator.proto (100%) rename {pb => pybase/pb}/Comparator_pb2.py (100%) rename {pb => pybase/pb}/ErrorHandling.proto (100%) rename {pb => pybase/pb}/ErrorHandling_pb2.py (100%) rename {pb => pybase/pb}/FS.proto (100%) rename {pb => pybase/pb}/FS_pb2.py (100%) rename {pb => pybase/pb}/Filter.proto (100%) rename {pb => pybase/pb}/Filter_pb2.py (100%) rename {pb => pybase/pb}/HBase.proto (100%) rename {pb => pybase/pb}/HBase_pb2.py (100%) rename {pb => pybase/pb}/Master.proto (100%) rename {pb => pybase/pb}/Master_pb2.py (100%) rename {pb => pybase/pb}/MultiRowMutation.proto (100%) rename {pb => pybase/pb}/MultiRowMutation_pb2.py (100%) rename {pb => pybase/pb}/Quota.proto (100%) rename {pb => pybase/pb}/Quota_pb2.py (100%) rename {pb => pybase/pb}/README.txt (100%) rename {pb => pybase/pb}/RPC.proto (100%) rename {pb => pybase/pb}/RPC_pb2.py (100%) rename {pb => pybase/pb}/Tracing.proto (100%) rename {pb => pybase/pb}/Tracing_pb2.py (100%) rename {pb => pybase/pb}/ZooKeeper.proto (100%) rename {pb => pybase/pb}/ZooKeeper_pb2.py (100%) rename {pb => pybase/pb}/__init__.py (100%) rename {region => pybase/region}/__init__.py (100%) rename {region => pybase/region}/client.py (100%) rename {region => pybase/region}/region.py (100%) rename {request => pybase/request}/__init__.py (100%) rename {request => pybase/request}/request.py (100%) rename {zk => pybase/zk}/__init__.py (100%) rename {zk => pybase/zk}/client.py (100%) diff --git a/__init__.py b/pybase/__init__.py similarity index 100% rename from __init__.py rename to pybase/__init__.py diff --git a/client.py b/pybase/client.py similarity index 100% rename from client.py rename to pybase/client.py diff --git a/exceptions.py b/pybase/exceptions.py similarity index 100% rename from exceptions.py rename to pybase/exceptions.py diff --git a/filters.py b/pybase/filters.py similarity index 100% rename from filters.py rename to pybase/filters.py diff --git a/helpers/README.md b/pybase/helpers/README.md similarity index 100% rename from helpers/README.md rename to pybase/helpers/README.md diff --git a/helpers/__init__.py b/pybase/helpers/__init__.py similarity index 100% rename from helpers/__init__.py rename to pybase/helpers/__init__.py diff --git a/helpers/varint.py b/pybase/helpers/varint.py similarity index 100% rename from helpers/varint.py rename to pybase/helpers/varint.py diff --git a/pb/Cell.proto b/pybase/pb/Cell.proto similarity index 100% rename from pb/Cell.proto rename to pybase/pb/Cell.proto diff --git a/pb/Cell_pb2.py b/pybase/pb/Cell_pb2.py similarity index 100% rename from pb/Cell_pb2.py rename to pybase/pb/Cell_pb2.py diff --git a/pb/Client.proto b/pybase/pb/Client.proto similarity index 100% rename from pb/Client.proto rename to pybase/pb/Client.proto diff --git a/pb/Client_pb2.py b/pybase/pb/Client_pb2.py similarity index 100% rename from pb/Client_pb2.py rename to pybase/pb/Client_pb2.py diff --git a/pb/ClusterId.proto b/pybase/pb/ClusterId.proto similarity index 100% rename from pb/ClusterId.proto rename to pybase/pb/ClusterId.proto diff --git a/pb/ClusterId_pb2.py b/pybase/pb/ClusterId_pb2.py similarity index 100% rename from pb/ClusterId_pb2.py rename to pybase/pb/ClusterId_pb2.py diff --git a/pb/ClusterStatus.proto b/pybase/pb/ClusterStatus.proto similarity index 100% rename from pb/ClusterStatus.proto rename to pybase/pb/ClusterStatus.proto diff --git a/pb/ClusterStatus_pb2.py b/pybase/pb/ClusterStatus_pb2.py similarity index 100% rename from pb/ClusterStatus_pb2.py rename to pybase/pb/ClusterStatus_pb2.py diff --git a/pb/Comparator.proto b/pybase/pb/Comparator.proto similarity index 100% rename from pb/Comparator.proto rename to pybase/pb/Comparator.proto diff --git a/pb/Comparator_pb2.py b/pybase/pb/Comparator_pb2.py similarity index 100% rename from pb/Comparator_pb2.py rename to pybase/pb/Comparator_pb2.py diff --git a/pb/ErrorHandling.proto b/pybase/pb/ErrorHandling.proto similarity index 100% rename from pb/ErrorHandling.proto rename to pybase/pb/ErrorHandling.proto diff --git a/pb/ErrorHandling_pb2.py b/pybase/pb/ErrorHandling_pb2.py similarity index 100% rename from pb/ErrorHandling_pb2.py rename to pybase/pb/ErrorHandling_pb2.py diff --git a/pb/FS.proto b/pybase/pb/FS.proto similarity index 100% rename from pb/FS.proto rename to pybase/pb/FS.proto diff --git a/pb/FS_pb2.py b/pybase/pb/FS_pb2.py similarity index 100% rename from pb/FS_pb2.py rename to pybase/pb/FS_pb2.py diff --git a/pb/Filter.proto b/pybase/pb/Filter.proto similarity index 100% rename from pb/Filter.proto rename to pybase/pb/Filter.proto diff --git a/pb/Filter_pb2.py b/pybase/pb/Filter_pb2.py similarity index 100% rename from pb/Filter_pb2.py rename to pybase/pb/Filter_pb2.py diff --git a/pb/HBase.proto b/pybase/pb/HBase.proto similarity index 100% rename from pb/HBase.proto rename to pybase/pb/HBase.proto diff --git a/pb/HBase_pb2.py b/pybase/pb/HBase_pb2.py similarity index 100% rename from pb/HBase_pb2.py rename to pybase/pb/HBase_pb2.py diff --git a/pb/Master.proto b/pybase/pb/Master.proto similarity index 100% rename from pb/Master.proto rename to pybase/pb/Master.proto diff --git a/pb/Master_pb2.py b/pybase/pb/Master_pb2.py similarity index 100% rename from pb/Master_pb2.py rename to pybase/pb/Master_pb2.py diff --git a/pb/MultiRowMutation.proto b/pybase/pb/MultiRowMutation.proto similarity index 100% rename from pb/MultiRowMutation.proto rename to pybase/pb/MultiRowMutation.proto diff --git a/pb/MultiRowMutation_pb2.py b/pybase/pb/MultiRowMutation_pb2.py similarity index 100% rename from pb/MultiRowMutation_pb2.py rename to pybase/pb/MultiRowMutation_pb2.py diff --git a/pb/Quota.proto b/pybase/pb/Quota.proto similarity index 100% rename from pb/Quota.proto rename to pybase/pb/Quota.proto diff --git a/pb/Quota_pb2.py b/pybase/pb/Quota_pb2.py similarity index 100% rename from pb/Quota_pb2.py rename to pybase/pb/Quota_pb2.py diff --git a/pb/README.txt b/pybase/pb/README.txt similarity index 100% rename from pb/README.txt rename to pybase/pb/README.txt diff --git a/pb/RPC.proto b/pybase/pb/RPC.proto similarity index 100% rename from pb/RPC.proto rename to pybase/pb/RPC.proto diff --git a/pb/RPC_pb2.py b/pybase/pb/RPC_pb2.py similarity index 100% rename from pb/RPC_pb2.py rename to pybase/pb/RPC_pb2.py diff --git a/pb/Tracing.proto b/pybase/pb/Tracing.proto similarity index 100% rename from pb/Tracing.proto rename to pybase/pb/Tracing.proto diff --git a/pb/Tracing_pb2.py b/pybase/pb/Tracing_pb2.py similarity index 100% rename from pb/Tracing_pb2.py rename to pybase/pb/Tracing_pb2.py diff --git a/pb/ZooKeeper.proto b/pybase/pb/ZooKeeper.proto similarity index 100% rename from pb/ZooKeeper.proto rename to pybase/pb/ZooKeeper.proto diff --git a/pb/ZooKeeper_pb2.py b/pybase/pb/ZooKeeper_pb2.py similarity index 100% rename from pb/ZooKeeper_pb2.py rename to pybase/pb/ZooKeeper_pb2.py diff --git a/pb/__init__.py b/pybase/pb/__init__.py similarity index 100% rename from pb/__init__.py rename to pybase/pb/__init__.py diff --git a/region/__init__.py b/pybase/region/__init__.py similarity index 100% rename from region/__init__.py rename to pybase/region/__init__.py diff --git a/region/client.py b/pybase/region/client.py similarity index 100% rename from region/client.py rename to pybase/region/client.py diff --git a/region/region.py b/pybase/region/region.py similarity index 100% rename from region/region.py rename to pybase/region/region.py diff --git a/request/__init__.py b/pybase/request/__init__.py similarity index 100% rename from request/__init__.py rename to pybase/request/__init__.py diff --git a/request/request.py b/pybase/request/request.py similarity index 100% rename from request/request.py rename to pybase/request/request.py diff --git a/zk/__init__.py b/pybase/zk/__init__.py similarity index 100% rename from zk/__init__.py rename to pybase/zk/__init__.py diff --git a/zk/client.py b/pybase/zk/client.py similarity index 100% rename from zk/client.py rename to pybase/zk/client.py diff --git a/setup.py b/setup.py index 7b304cc..813964d 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from distutils.core import setup +from setuptools import setup, find_packages setup(name='pybase', version='0.1', @@ -7,8 +7,6 @@ author='Sam Curley', author_email='CurleySamuel@gmail.com', license='Apache License 2.0', - packages=['pybase', 'pybase.zk', 'pybase.pb', 'pybase.request', - 'pybase.region', 'pybase.helpers', 'pybase.tests'], - package_dir={'pybase': '.'}, - install_requires=["intervaltree","kazoo","six", "zope.interface", "protobuf"], + packages=find_packages('.', exclude=['tests']), + install_requires=["intervaltree", "kazoo", "six", "zope.interface", "protobuf"], zip_safe=False) From c49eba659e054eed4fa34e8043aec71139a4bad5 Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Thu, 27 Sep 2018 11:34:09 -0700 Subject: [PATCH 03/53] sort imports --- pybase/__init__.py | 3 +-- pybase/client.py | 23 ++++++++++++++--------- pybase/exceptions.py | 11 ++++++----- pybase/filters.py | 10 ++++++---- pybase/helpers/varint.py | 2 +- pybase/pb/Cell_pb2.py | 10 +++++++--- pybase/pb/Client_pb2.py | 19 ++++++++++++------- pybase/pb/ClusterId_pb2.py | 8 ++++++-- pybase/pb/ClusterStatus_pb2.py | 15 ++++++++++----- pybase/pb/Comparator_pb2.py | 8 ++++++-- pybase/pb/ErrorHandling_pb2.py | 8 ++++++-- pybase/pb/FS_pb2.py | 8 ++++++-- pybase/pb/Filter_pb2.py | 13 +++++++++---- pybase/pb/HBase_pb2.py | 13 +++++++++---- pybase/pb/Master_pb2.py | 19 ++++++++++++------- pybase/pb/MultiRowMutation_pb2.py | 11 ++++++++--- pybase/pb/Quota_pb2.py | 13 +++++++++---- pybase/pb/RPC_pb2.py | 13 +++++++++---- pybase/pb/Tracing_pb2.py | 8 ++++++-- pybase/pb/ZooKeeper_pb2.py | 13 +++++++++---- pybase/region/client.py | 23 ++++++++++++----------- pybase/region/region.py | 4 +++- pybase/request/request.py | 7 ++++--- pybase/zk/client.py | 17 +++++++++++------ setup.cfg | 8 ++++++++ setup.py | 2 +- tests/test_integration.py | 6 ++++-- tests/test_integration_availability.py | 13 ++++++++----- 28 files changed, 203 insertions(+), 105 deletions(-) create mode 100644 setup.cfg diff --git a/pybase/__init__.py b/pybase/__init__.py index 38d745b..48bd80a 100644 --- a/pybase/__init__.py +++ b/pybase/__init__.py @@ -1,2 +1 @@ -from client import NewClient -assert NewClient # silence pyflakes +from .client import NewClient # noqa diff --git a/pybase/client.py b/pybase/client.py index 35a0770..6c7dfe8 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -13,23 +13,29 @@ See the License for the specific language governing permissions and limitations under the License. """ -import zk.client as zk -import region.client as region -from region.region import region_from_cell -from request import request +from __future__ import absolute_import, print_function + import logging import logging.config -from intervaltree import IntervalTree -from threading import Lock -from time import sleep from itertools import chain +from threading import Lock + +import pybase.region.client as region +import pybase.zk.client as zk +from intervaltree import IntervalTree + from filters import _to_filter -from exceptions import * + +from .exceptions import (MasterServerException, NoSuchTableException, + PyBaseException, RegionException, RegionServerException) +from .region.region import region_from_cell +from .request import request # Using a tiered logger such that all submodules propagate through to this # logger. Changing the logging level here should affect all other modules. logger = logging.getLogger('pybase') + class MainClient: def __init__(self, zkquorum, pool_size): @@ -484,4 +490,3 @@ def NewClient(zkquorum, socket_pool_size=1): # Create the master client. a._recreate_master_client() return a - diff --git a/pybase/exceptions.py b/pybase/exceptions.py index 6c022f8..8e177da 100644 --- a/pybase/exceptions.py +++ b/pybase/exceptions.py @@ -13,13 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. """ +from __future__ import absolute_import, print_function + import logging -from time import sleep -from threading import Condition, Lock, RLock, Semaphore -from time import time from collections import defaultdict -logger = logging.getLogger('pybase.' + __name__) -logger.setLevel(logging.DEBUG) +from threading import Lock, Semaphore +from time import sleep, time + +logger = logging.getLogger(__name__) # All PyBase exceptions inherit from me. Assumes unrecoverable. diff --git a/pybase/filters.py b/pybase/filters.py index ccdc0f5..69a9680 100644 --- a/pybase/filters.py +++ b/pybase/filters.py @@ -13,10 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. """ +from __future__ import absolute_import, print_function + import traceback -import pb.Filter_pb2 as pbFilter -import pb.Comparator_pb2 as pbComparator -from pb.HBase_pb2 import BytesBytesPair as pbBytesBytesPair + +from .pb import Comparator_pb2 as pbComparator +from .pb import Filter_pb2 as pbFilter +from .pb.HBase_pb2 import BytesBytesPair as pbBytesBytesPair # You're brave to venture into this file. @@ -495,4 +498,3 @@ def _to_row_range(rr): return new except Exception: raise ValueError("Malformed RowRange provided") - diff --git a/pybase/helpers/varint.py b/pybase/helpers/varint.py index 13b8a58..786af93 100644 --- a/pybase/helpers/varint.py +++ b/pybase/helpers/varint.py @@ -27,6 +27,7 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from __future__ import absolute_import, print_function class NotEnoughDataExcption(Exception): @@ -187,4 +188,3 @@ def EncodeSignedVarint(write, value): encodeVarint = _VarintEncoder() encodeSignedVarint = _SignedVarintEncoder() - diff --git a/pybase/pb/Cell_pb2.py b/pybase/pb/Cell_pb2.py index a64f8ae..6de3f38 100644 --- a/pybase/pb/Cell_pb2.py +++ b/pybase/pb/Cell_pb2.py @@ -1,14 +1,18 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: Cell.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf.internal import enum_type_wrapper + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 +from google.protobuf.internal import enum_type_wrapper + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() diff --git a/pybase/pb/Client_pb2.py b/pybase/pb/Client_pb2.py index df9092e..0db478e 100644 --- a/pybase/pb/Client_pb2.py +++ b/pybase/pb/Client_pb2.py @@ -1,23 +1,28 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: Client.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf.internal import enum_type_wrapper + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 +from google.protobuf.internal import enum_type_wrapper + +import Cell_pb2 +import Comparator_pb2 +import Filter_pb2 +import HBase_pb2 + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() -import HBase_pb2 -import Filter_pb2 -import Cell_pb2 -import Comparator_pb2 DESCRIPTOR = _descriptor.FileDescriptor( diff --git a/pybase/pb/ClusterId_pb2.py b/pybase/pb/ClusterId_pb2.py index 2b9517e..20e17d2 100644 --- a/pybase/pb/ClusterId_pb2.py +++ b/pybase/pb/ClusterId_pb2.py @@ -1,13 +1,17 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: ClusterId.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() diff --git a/pybase/pb/ClusterStatus_pb2.py b/pybase/pb/ClusterStatus_pb2.py index 6d00ad4..24f0806 100644 --- a/pybase/pb/ClusterStatus_pb2.py +++ b/pybase/pb/ClusterStatus_pb2.py @@ -1,21 +1,26 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: ClusterStatus.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 + +import ClusterId_pb2 +import FS_pb2 +import HBase_pb2 + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() -import HBase_pb2 -import ClusterId_pb2 -import FS_pb2 DESCRIPTOR = _descriptor.FileDescriptor( diff --git a/pybase/pb/Comparator_pb2.py b/pybase/pb/Comparator_pb2.py index cfcaa1e..b519d7e 100644 --- a/pybase/pb/Comparator_pb2.py +++ b/pybase/pb/Comparator_pb2.py @@ -1,13 +1,17 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: Comparator.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() diff --git a/pybase/pb/ErrorHandling_pb2.py b/pybase/pb/ErrorHandling_pb2.py index d6e8443..6d9779a 100644 --- a/pybase/pb/ErrorHandling_pb2.py +++ b/pybase/pb/ErrorHandling_pb2.py @@ -1,13 +1,17 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: ErrorHandling.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() diff --git a/pybase/pb/FS_pb2.py b/pybase/pb/FS_pb2.py index 72a61d7..5972995 100644 --- a/pybase/pb/FS_pb2.py +++ b/pybase/pb/FS_pb2.py @@ -1,13 +1,17 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: FS.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() diff --git a/pybase/pb/Filter_pb2.py b/pybase/pb/Filter_pb2.py index 2700503..2e0f75e 100644 --- a/pybase/pb/Filter_pb2.py +++ b/pybase/pb/Filter_pb2.py @@ -1,20 +1,25 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: Filter.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 + +import Comparator_pb2 +import HBase_pb2 + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() -import HBase_pb2 -import Comparator_pb2 DESCRIPTOR = _descriptor.FileDescriptor( diff --git a/pybase/pb/HBase_pb2.py b/pybase/pb/HBase_pb2.py index 035ba7c..140983f 100644 --- a/pybase/pb/HBase_pb2.py +++ b/pybase/pb/HBase_pb2.py @@ -1,20 +1,25 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: HBase.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf.internal import enum_type_wrapper + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 +from google.protobuf.internal import enum_type_wrapper + +import Cell_pb2 + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() -import Cell_pb2 DESCRIPTOR = _descriptor.FileDescriptor( diff --git a/pybase/pb/Master_pb2.py b/pybase/pb/Master_pb2.py index adc1157..ea87777 100644 --- a/pybase/pb/Master_pb2.py +++ b/pybase/pb/Master_pb2.py @@ -1,24 +1,29 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: Master.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - -import HBase_pb2 import Client_pb2 import ClusterStatus_pb2 import ErrorHandling_pb2 +import HBase_pb2 import Quota_pb2 +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + DESCRIPTOR = _descriptor.FileDescriptor( name='Master.proto', diff --git a/pybase/pb/MultiRowMutation_pb2.py b/pybase/pb/MultiRowMutation_pb2.py index ed3f4c5..616c955 100644 --- a/pybase/pb/MultiRowMutation_pb2.py +++ b/pybase/pb/MultiRowMutation_pb2.py @@ -1,19 +1,24 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: MultiRowMutation.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 + +import Client_pb2 + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() -import Client_pb2 DESCRIPTOR = _descriptor.FileDescriptor( diff --git a/pybase/pb/Quota_pb2.py b/pybase/pb/Quota_pb2.py index 6ca78d5..e9dacbe 100644 --- a/pybase/pb/Quota_pb2.py +++ b/pybase/pb/Quota_pb2.py @@ -1,20 +1,25 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: Quota.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf.internal import enum_type_wrapper + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 +from google.protobuf.internal import enum_type_wrapper + +import HBase_pb2 + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() -import HBase_pb2 DESCRIPTOR = _descriptor.FileDescriptor( diff --git a/pybase/pb/RPC_pb2.py b/pybase/pb/RPC_pb2.py index 612f68d..2f80447 100644 --- a/pybase/pb/RPC_pb2.py +++ b/pybase/pb/RPC_pb2.py @@ -1,20 +1,25 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: RPC.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 + +import HBase_pb2 +import Tracing_pb2 + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() -import Tracing_pb2 -import HBase_pb2 DESCRIPTOR = _descriptor.FileDescriptor( diff --git a/pybase/pb/Tracing_pb2.py b/pybase/pb/Tracing_pb2.py index 90f7442..a1eb33a 100644 --- a/pybase/pb/Tracing_pb2.py +++ b/pybase/pb/Tracing_pb2.py @@ -1,13 +1,17 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: Tracing.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() diff --git a/pybase/pb/ZooKeeper_pb2.py b/pybase/pb/ZooKeeper_pb2.py index 78762d9..52283d4 100644 --- a/pybase/pb/ZooKeeper_pb2.py +++ b/pybase/pb/ZooKeeper_pb2.py @@ -1,20 +1,25 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: ZooKeeper.proto +from __future__ import absolute_import, print_function + import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) + from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pb2 from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 + +import ClusterStatus_pb2 +import HBase_pb2 + +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() -import HBase_pb2 -import ClusterStatus_pb2 DESCRIPTOR = _descriptor.FileDescriptor( diff --git a/pybase/region/client.py b/pybase/region/client.py index 12d1c70..157206e 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -13,21 +13,23 @@ See the License for the specific language governing permissions and limitations under the License. """ +from __future__ import absolute_import, print_function + +import logging import socket +from cStringIO import StringIO from struct import pack, unpack -from ..pb.RPC_pb2 import ConnectionHeader, RequestHeader, ResponseHeader -from ..pb.Client_pb2 import GetResponse, MutateResponse, ScanResponse +from threading import Condition, Lock + +from ..exceptions import (NoSuchColumnFamilyException, NotServingRegionException, PyBaseException, + RegionMovedException, RegionOpeningException, RegionServerException) from ..helpers import varint -from threading import Lock, Condition -import logging -from time import sleep -from cStringIO import StringIO -from ..exceptions import * +from ..pb.Client_pb2 import GetResponse, MutateResponse, ScanResponse +from ..pb.RPC_pb2 import ConnectionHeader, RequestHeader, ResponseHeader -logger = logging.getLogger('pybase.' + __name__) -logger.setLevel(logging.DEBUG) +logger = logging.getLogger(__name__) # socket.setdefaulttimeout interfers with gevent. -#socket.setdefaulttimeout(2) +# socket.setdefaulttimeout(2) # Used to encode and decode varints in a format protobuf expects. encoder = varint.encodeVarint @@ -288,4 +290,3 @@ def _to_varint(val): temp = [] encoder(temp.append, val) return "".join(temp) - diff --git a/pybase/region/region.py b/pybase/region/region.py index e1c01f6..a0b68d6 100644 --- a/pybase/region/region.py +++ b/pybase/region/region.py @@ -13,7 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. """ +from __future__ import absolute_import, print_function + from struct import unpack + from ..pb.HBase_pb2 import RegionInfo as pbRegionInfo @@ -50,4 +53,3 @@ def region_from_cell(cell): start_key = region_info.start_key stop_key = region_info.end_key return Region(table, region_name, start_key, stop_key) - diff --git a/pybase/request/request.py b/pybase/request/request.py index 4602e8d..0809f48 100644 --- a/pybase/request/request.py +++ b/pybase/request/request.py @@ -1,6 +1,8 @@ -from ..pb.Client_pb2 import GetRequest, MutateRequest, ScanRequest, Column, MutationProto -from ..filters import _to_filter +from __future__ import absolute_import, print_function + from ..exceptions import MalformedFamilies, MalformedValues +from ..filters import _to_filter +from ..pb.Client_pb2 import Column, GetRequest, MutateRequest, MutationProto, ScanRequest # Table + Family used when requesting meta information from the # MetaRegionServer @@ -164,4 +166,3 @@ def values_to_column_values(val, delete=False): return col_vals except Exception: raise MalformedValues() - diff --git a/pybase/zk/client.py b/pybase/zk/client.py index 5c396d8..6677230 100644 --- a/pybase/zk/client.py +++ b/pybase/zk/client.py @@ -13,14 +13,20 @@ See the License for the specific language governing permissions and limitations under the License. """ +from __future__ import absolute_import, print_function + +import logging +from struct import unpack +from time import sleep + from kazoo.client import KazooClient -from kazoo.handlers.threading import KazooTimeoutError from kazoo.exceptions import NoNodeError +from kazoo.handlers.threading import KazooTimeoutError + +from ..exceptions import (ZookeeperConnectionException, + ZookeeperResponseException, ZookeeperZNodeException) from ..pb.ZooKeeper_pb2 import MetaRegionServer -from ..exceptions import * -from struct import unpack -from time import sleep -import logging + logger = logging.getLogger('pybase.' + __name__) logger.setLevel(logging.DEBUG) @@ -86,4 +92,3 @@ def LocateMaster(zkquorum, establish_connection_timeout=5, missing_znode_retries logger.info('Discovered Master at %s:%s', meta.server.host_name, meta.server.port) return meta.server.host_name, meta.server.port - diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..5943bac --- /dev/null +++ b/setup.cfg @@ -0,0 +1,8 @@ +[flake8] +max-line-length = 100 +ignore = E123,E133,E226,E241,E242,T003 + +[isort] +line_length=100 +indent=' ' +balanced_wrapping=True diff --git a/setup.py b/setup.py index 813964d..5208f84 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from setuptools import setup, find_packages +from setuptools import find_packages, setup setup(name='pybase', version='0.1', diff --git a/tests/test_integration.py b/tests/test_integration.py index 3aea68a..f2b304d 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1,9 +1,11 @@ +from __future__ import absolute_import, print_function + import unittest -import pybase from collections import defaultdict from time import sleep -from pybase.exceptions import * +import pybase +from pybase.exceptions import * # Please note that all below unit tests require the existence of a table # to play with. Table must contain two column families specified below as well. diff --git a/tests/test_integration_availability.py b/tests/test_integration_availability.py index 8c97dff..8a93a98 100644 --- a/tests/test_integration_availability.py +++ b/tests/test_integration_availability.py @@ -1,9 +1,12 @@ +from __future__ import absolute_import, print_function + +import os +import subprocess import unittest -import pybase from collections import defaultdict + +import pybase from pybase.exceptions import * -import subprocess -import os # Please note that all below unit tests require the existence of a table # to play with. Table must contain two column families specified below as well. @@ -109,14 +112,14 @@ def hbase_shell(cmd): def start_region_servers(server_ids): - print "" + print("") a = [os.environ['HBASE_HOME'] + "/bin/local-regionservers.sh", "start", ' '.join(server_ids)] subprocess.call(a) def stop_region_servers(server_ids): - print "" + print("") a = [os.environ['HBASE_HOME'] + "/bin/local-regionservers.sh", "stop", ' '.join(server_ids)] subprocess.call(a) From 1b190ad10ded8f04a4c15e37970037566fff5ee6 Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Thu, 27 Sep 2018 11:54:37 -0700 Subject: [PATCH 04/53] flake8 everything --- pybase/client.py | 23 ++++++---- pybase/exceptions.py | 17 ++++--- pybase/filters.py | 19 ++++---- pybase/region/client.py | 14 ++++-- pybase/region/region.py | 3 +- pybase/request/request.py | 2 +- pybase/zk/client.py | 11 +++-- setup.cfg | 1 + tests/test_integration.py | 63 ++++++++------------------ tests/test_integration_availability.py | 7 ++- 10 files changed, 76 insertions(+), 84 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 6c7dfe8..9779372 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -212,13 +212,15 @@ def scan(self, table, start_key='', stop_key=None, families={}, filters=None): # or merged so this recursive call may be scanning multiple regions or only half # of one region). result_set._append_response(self.scan( - table, start_key=previous_stop_key, stop_key=cur_region.stop_key, families=families, filters=filters)) + table, start_key=previous_stop_key, stop_key=cur_region.stop_key, + families=families, filters=filters)) # We continue here because we don't want to append the # first_response results to the result_set. When we did the # recursive scan it rescanned whatever the first_response # initially contained. Appending both will produce duplicates. previous_stop_key = cur_region.stop_key - if previous_stop_key == '' or (stop_key is not None and previous_stop_key > stop_key): + if previous_stop_key == '' or \ + (stop_key is not None and previous_stop_key > stop_key): break continue # Both calls succeeded! Append the results to the result_set. @@ -233,7 +235,8 @@ def scan(self, table, start_key='', stop_key=None, families={}, filters=None): break return result_set - def _scan_hit_region_once(self, previous_stop_key, table, start_key, stop_key, families, filters): + def _scan_hit_region_once(self, previous_stop_key, table, start_key, stop_key, families, + filters): try: # Lookup the next region to scan by searching for the # previous_stop_key (region keys are inclusive on the start and @@ -241,10 +244,11 @@ def _scan_hit_region_once(self, previous_stop_key, table, start_key, stop_key, f cur_region = self._find_hosting_region( table, previous_stop_key) except PyBaseException as e: - # This means that either Master is down or something's funky with the META region. Try handling it - # and recursively perform the same call again. + # This means that either Master is down or something's funky with the META region. + # Try handling it and recursively perform the same call again. e._handle_exception(self) - return self._scan_hit_region_once(previous_stop_key, table, start_key, stop_key, families, filters) + return self._scan_hit_region_once(previous_stop_key, table, start_key, stop_key, + families, filters) # Create the scan request object. The last two values are 'Close' and # 'Scanner_ID' respectively. rq = request.scan_request( @@ -256,7 +260,8 @@ def _scan_hit_region_once(self, previous_stop_key, table, start_key, stop_key, f # Uh oh. Probably a region/region server issue. Handle it and try # again. e._handle_exception(self, dest_region=cur_region) - return self._scan_hit_region_once(previous_stop_key, table, start_key, stop_key, families, filters) + return self._scan_hit_region_once(previous_stop_key, table, start_key, stop_key, + families, filters) return response, cur_region def _scan_region_while_more_results(self, cur_region, response): @@ -279,7 +284,7 @@ def _scan_region_while_more_results(self, cur_region, response): # Now close the scanner. rq = request.scan_request( cur_region, None, None, None, None, True, scanner_id) - _ = cur_region.region_client._send_request(rq) + cur_region.region_client._send_request(rq) # Close it and return the results! return response_set @@ -474,7 +479,7 @@ def _append_response(self, rsp): try: self.cells.extend([result.cell for result in rsp.results]) self.stale = self.stale or rsp.stale - except AttributeError as e: + except AttributeError: # This is a single result object we're merging instead. self.cells.extend(rsp.cells) self.stale = self.stale or rsp.stale diff --git a/pybase/exceptions.py b/pybase/exceptions.py index 8e177da..acf93a2 100644 --- a/pybase/exceptions.py +++ b/pybase/exceptions.py @@ -17,6 +17,7 @@ import logging from collections import defaultdict +from functools import reduce from threading import Lock, Semaphore from time import sleep, time @@ -85,7 +86,8 @@ def _handle_exception(self, main_client, **kwargs): if loc in main_client.reverse_client_cache: # We're the first in and it's our job to kill the client. # Purge it. - logger.warn("Region server %s:%s refusing connections. Purging cache, sleeping, retrying.", + logger.warn("Region server %s:%s refusing connections. Purging cache, " + "sleeping, retrying.", self.region_client.host, self.region_client.port) main_client._purge_client(self.region_client) # Sleep for an arbitrary amount of time. If this returns @@ -116,9 +118,11 @@ def _handle_exception(self, main_client, **kwargs): if _let_one_through(self, None): try: # Makes sure someone else hasn't already fixed the issue. - if main_client.master_client is None or (self.host == main_client.master_client.host and self.port == main_client.master_client.port): - logger.warn( - "Encountered an exception with the Master server. Sleeping then reestablishing.") + if main_client.master_client is None or \ + (self.host == main_client.master_client.host and + self.port == main_client.master_client.port): + logger.warn("Encountered an exception with the Master server. " + "Sleeping then reestablishing.") if not _dynamic_sleep(self, None): raise self main_client._recreate_master_client() @@ -269,8 +273,9 @@ def _let_all_through(exception, data): # We want to sleep more and more with every exception retry. -def sleep_formula(x): return (x / 1.5)**2 -# [0.0, 0.44, 1.77, 4.0, 7.11, 11.11, 16.0, 21.77, 28.44, 36.0] +def sleep_formula(x): + # [0.0, 0.44, 1.77, 4.0, 7.11, 11.11, 16.0, 21.77, 28.44, 36.0] + return (x / 1.5)**2 _exception_count = defaultdict(lambda: (0, time())) _max_retries = 7 diff --git a/pybase/filters.py b/pybase/filters.py index 69a9680..8ea878f 100644 --- a/pybase/filters.py +++ b/pybase/filters.py @@ -55,12 +55,7 @@ def __init__(self, operator, *arg): self.name = filter_path + "FilterList" self.operator = operator self.filters = [] - try: - for incoming_filter in arg: - self.filters.append(_to_filter(incoming_filter)) - except TypeError: - # They passed a single filter and not a sequence of filters. - self.filters.append(_to_filter(filters)) + self.add_filters(*arg) def add_filters(self, *arg): for new_filter in arg: @@ -247,7 +242,8 @@ def __init__(self, single_column_value_filter): class SkipColumnValueFilter: - def __init__(self, compare_op, comparator, column_family, column_qualifier, filter_if_missing, latest_version_only): + def __init__(self, compare_op, comparator, column_family, column_qualifier, filter_if_missing, + latest_version_only): self.filter_type = pbFilter.SkipColumnValueFilter self.name = filter_path + "SkipColumnValueFilter" self.compare_op = compare_op @@ -330,11 +326,14 @@ def _to_filter(orig_filter): ft.serialized_filter = _to_pb_filter(orig_filter).SerializeToString() return ft + def _to_pb_filter(orig_filter): try: ft2 = orig_filter.filter_type() - members = [attr for attr in dir(orig_filter) if not callable( - attr) and not attr.startswith("__") and attr not in ["name", "filter_type", "add_filters"]] + members = [ + attr for attr in dir(orig_filter) + if not callable(attr) and not attr.startswith("__") and + attr not in ["name", "filter_type", "add_filters"]] for member in members: try: val = getattr(orig_filter, member) @@ -354,7 +353,6 @@ def _to_pb_filter(orig_filter): raise ValueError("Malformed Filter provided, %s %s" % (ex, traceback.format_exc())) - class ByteArrayComparable: def __init__(self, value): @@ -460,6 +458,7 @@ def _to_comparator(orig_cmp): except Exception as ex: raise ValueError("Malformed Comparator provided %s %s" % (ex, traceback.format_exc())) + class BytesBytesPair: def __init__(self, first, second): diff --git a/pybase/region/client.py b/pybase/region/client.py index 157206e..37bd575 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -75,7 +75,8 @@ def __init__(self, host, port): # Receive an RPC with incorrect call_id? # 1. Acquire lock # 2. Place raw data into missed_rpcs with key call_id - # 3. Notify all other threads to wake up (nothing will happen until you release the lock) + # 3. Notify all other threads to wake up (nothing will happen until you release the + # lock) # 4. WHILE: Your call_id is not in the dictionary # 4.5 Call wait() on the conditional and get comfy. # 5. Pop your data out @@ -127,8 +128,8 @@ def _send_request(self, rq): pool_id = my_id % self.pool_size try: with self.write_lock_pool[pool_id]: - logger.debug( - 'Sending %s RPC to %s:%s on pool port %s', rq.type, self.host, self.port, pool_id) + logger.debug('Sending %s RPC to %s:%s on pool port %s', + rq.type, self.host, self.port, pool_id) self.sock_pool[pool_id].send(to_send) except socket.error: # RegionServer dead? @@ -183,13 +184,16 @@ def _receive_rpc(self, call_id, rq, data=None): elif header.exception.exception_class_name != u'': # If we're in here it means a remote exception has happened. exception_class = header.exception.exception_class_name - if exception_class == 'org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException' or exception_class == "java.io.IOException": + if exception_class in \ + {'org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException', + "java.io.IOException"}: raise NoSuchColumnFamilyException() elif exception_class == 'org.apache.hadoop.hbase.exceptions.RegionMovedException': raise RegionMovedException() elif exception_class == 'org.apache.hadoop.hbase.NotServingRegionException': raise NotServingRegionException() - elif exception_class == 'org.apache.hadoop.hbase.regionserver.RegionServerStoppedException': + elif exception_class == \ + 'org.apache.hadoop.hbase.regionserver.RegionServerStoppedException': raise RegionServerException(region_client=self) elif exception_class == 'org.apache.hadoop.hbase.exceptions.RegionOpeningException': raise RegionOpeningException() diff --git a/pybase/region/region.py b/pybase/region/region.py index a0b68d6..31baee6 100644 --- a/pybase/region/region.py +++ b/pybase/region/region.py @@ -45,7 +45,8 @@ def region_from_cell(cell): if magic != 1346524486: # Either it's a corrupt message or an unsupported region info version. raise RuntimeError( - "HBase returned an invalid response (are you running a version of HBase supporting Protobufs?)") + "HBase returned an invalid response (are you running a version of HBase supporting " + "Protobufs?)") region_info = pbRegionInfo() region_info.ParseFromString(cell.value[4:-4]) table = region_info.table_name.qualifier diff --git a/pybase/request/request.py b/pybase/request/request.py index 0809f48..2f05827 100644 --- a/pybase/request/request.py +++ b/pybase/request/request.py @@ -12,7 +12,7 @@ class Request: - def __init__(self, type, pb): + def __init__(self, type, pb): # noqa: B002 self.type = type self.pb = pb diff --git a/pybase/zk/client.py b/pybase/zk/client.py index 6677230..af48df2 100644 --- a/pybase/zk/client.py +++ b/pybase/zk/client.py @@ -53,10 +53,11 @@ def LocateMaster(zkquorum, establish_connection_timeout=5, missing_znode_retries if missing_znode_retries == 0: raise ZookeeperZNodeException( "ZooKeeper does not contain meta-region-server node.") - logger.warn( - "ZooKeeper does not contain meta-region-server node. Retrying in 2 seconds. (%s retries remaining)", missing_znode_retries) + logger.warn("ZooKeeper does not contain meta-region-server node. Retrying in 2 seconds. " + "(%s retries remaining)", missing_znode_retries) sleep(2.0) - return LocateMeta(zkquorum, establish_connection_timeout=establish_connection_timeout, missing_znode_retries=missing_znode_retries - 1, zk=zk) + return LocateMaster(zkquorum, establish_connection_timeout=establish_connection_timeout, + missing_znode_retries=missing_znode_retries - 1, zk=zk) # We don't need to maintain a connection to ZK. If we need it again we'll # recreate the connection. A possible future implementation can subscribe # to ZK and listen for when RegionServers go down, then pre-emptively @@ -84,8 +85,8 @@ def LocateMaster(zkquorum, establish_connection_timeout=5, missing_znode_retries magic = unpack(">I", rsp[meta_length + 5:meta_length + 9])[0] if magic != 1346524486: # 4 bytes: PBUF - raise ZookeeperResponseException( - "ZooKeeper returned an invalid response (are you running a version of HBase supporting Protobufs?)") + raise ZookeeperResponseException("ZooKeeper returned an invalid response (are you running " + "a version of HBase supporting Protobufs?)") rsp = rsp[meta_length + 9:] meta = MetaRegionServer() meta.ParseFromString(rsp) diff --git a/setup.cfg b/setup.cfg index 5943bac..b4d9225 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,7 @@ [flake8] max-line-length = 100 ignore = E123,E133,E226,E241,E242,T003 +exclude = pybase/pb,build,dist [isort] line_length=100 diff --git a/tests/test_integration.py b/tests/test_integration.py index f2b304d..0bd5d8d 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -5,7 +5,8 @@ from time import sleep import pybase -from pybase.exceptions import * +from pybase.exceptions import (MalformedValues, NoSuchColumnFamilyException, + NoSuchTableException, ZookeeperException) # Please note that all below unit tests require the existence of a table # to play with. Table must contain two column families specified below as well. @@ -24,11 +25,8 @@ def test_new_client_good(self): self.assertIsNotNone(c.master_client.host) def test_new_client_bad(self): - try: - c = pybase.NewClient("badzkquorum") - self.assertEqual(1, 0) - except ZookeeperException: - pass + with self.assertRaises(ZookeeperException): + pybase.NewClient("badzkquorum") def test_client_close(self): c = pybase.NewClient(zkquorum) @@ -82,11 +80,8 @@ def test_get_specific_cell(self): self.assertNotIn(cf2, resd.keys()) def test_get_bad_table(self): - try: - res = self.c.get("asdasdasd", "plsfail") - self.assertEqual(1, 0) - except NoSuchTableException: - pass + with self.assertRaises(NoSuchTableException): + self.c.get("asdasdasd", "plsfail") def test_get_bad_row(self): res = self.c.get(table, "plsfail") @@ -114,11 +109,8 @@ def test_get_with_filter(self): def test_get_with_bad_filter(self): ft = "badfilter" - try: - res = self.c.get(table, self.row_prefix, filters=ft) - self.assertEqual(1, 0) - except ValueError: - pass + with self.assertRaises(ValueError): + self.c.get(table, self.row_prefix, filters=ft) class TestPut(unittest.TestCase): @@ -216,17 +208,14 @@ def test_scan_simple(self): def test_scan_with_range(self): rsp = self.c.scan( - table, start_key=self.row_prefix + "0", stop_key=self.row_prefix + "50", filters=self.pFilter) + table, start_key=self.row_prefix + "0", stop_key=self.row_prefix + "50", + filters=self.pFilter) # It's not 100 because rows are compared lexicographically. self.assertEqual(len(rsp.flatten_cells()), 92) def test_scan_with_bad_range(self): - try: - rsp = self.c.scan( - table, start_key="hmm", stop_key=24, filters=self.pFilter) - self.assertEqual(1, 0) - except TypeError: - pass + with self.assertRaises(TypeError): + self.c.scan(table, start_key="hmm", stop_key=24, filters=self.pFilter) def test_scan_with_families(self): fam = {cf1: ["oberyn"]} @@ -235,11 +224,8 @@ def test_scan_with_families(self): def test_scan_with_bad_column_family(self): fam = {"hodor": ["stillhodor"]} - try: - rsp = self.c.scan(table, filters=self.pFilter, families=fam) - self.assertEqual(1, 0) - except NoSuchColumnFamilyException: - pass + with self.assertRaises(NoSuchColumnFamilyException): + self.c.scan(table, filters=self.pFilter, families=fam) def test_scan_with_bad_column_qualifier(self): fam = {cf1: ["badqual"], cf2: ["one"]} @@ -316,11 +302,8 @@ def test_delete_bad_column_family(self): "i am hodor": "" } } - try: - rsp = self.c.delete(table, self.row_prefix + "2", value) - self.assertEqual(0, 1) - except NoSuchColumnFamilyException: - pass + with self.assertRaises(NoSuchColumnFamilyException): + self.c.delete(table, self.row_prefix + "2", value) def test_delete_bad_column_qualifier(self): value = { @@ -398,11 +381,8 @@ def test_append_bad_column_family(self): "oberyn": "is the", } } - try: - rsp = self.c.append(table, self.row_prefix + "3", values) - self.assertEqual(1, 0) - except NoSuchColumnFamilyException: - pass + with self.assertRaises(NoSuchColumnFamilyException): + self.c.append(table, self.row_prefix + "3", values) def test_append_bad_column_qualifier(self): values = { @@ -531,11 +511,8 @@ def test_increment_bad_column_family(self): } # TODO: Throwing RuntimeError: java.io.IOException when it should be throwing # column family exception. - try: - rsp = self.c.increment(table, self.row_prefix + "2", new_values) - self.assertEqual(1, 0) - except NoSuchColumnFamilyException: - pass + with self.assertRaises(NoSuchColumnFamilyException): + self.c.increment(table, self.row_prefix + "2", new_values) def test_increment_new_column_qualifier(self): new_values = { diff --git a/tests/test_integration_availability.py b/tests/test_integration_availability.py index 8a93a98..c2b62dd 100644 --- a/tests/test_integration_availability.py +++ b/tests/test_integration_availability.py @@ -3,10 +3,9 @@ import os import subprocess import unittest -from collections import defaultdict import pybase -from pybase.exceptions import * +from pybase.exceptions import ZookeeperException # Please note that all below unit tests require the existence of a table # to play with. Table must contain two column families specified below as well. @@ -16,6 +15,7 @@ cf1 = "cf1" cf2 = "cf2" + class TestAvailability(unittest.TestCase): @classmethod @@ -101,13 +101,12 @@ def test_region_server_musical_chairs(self): pass - # Currently no admin functionality. Have to go through the hbase shell to # do things like moving regions, rebalancing, etc. def hbase_shell(cmd): echo = subprocess.Popen( ('echo', '"' + cmd + ';exit"'), stdout=subprocess.PIPE) - output = subprocess.check_output(('hbase', 'shell'), stdin=echo.stdout) + subprocess.check_output(('hbase', 'shell'), stdin=echo.stdout) echo.wait() From 34fb2005525290a868aa56188d3e9c4ca6acf272 Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Thu, 27 Sep 2018 12:29:19 -0700 Subject: [PATCH 05/53] fix imports, python3 compat --- pybase/client.py | 3 +-- pybase/helpers/__init__.py | 6 +++--- pybase/pb/Client_pb2.py | 8 ++++---- pybase/pb/ClusterStatus_pb2.py | 6 +++--- pybase/pb/Filter_pb2.py | 4 ++-- pybase/pb/HBase_pb2.py | 2 +- pybase/pb/Master_pb2.py | 10 +++++----- pybase/pb/MultiRowMutation_pb2.py | 2 +- pybase/pb/Quota_pb2.py | 2 +- pybase/pb/RPC_pb2.py | 4 ++-- pybase/pb/ZooKeeper_pb2.py | 4 ++-- pybase/region/client.py | 5 ++++- 12 files changed, 29 insertions(+), 27 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 9779372..8aff419 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -24,10 +24,9 @@ import pybase.zk.client as zk from intervaltree import IntervalTree -from filters import _to_filter - from .exceptions import (MasterServerException, NoSuchTableException, PyBaseException, RegionException, RegionServerException) +from .filters import _to_filter from .region.region import region_from_cell from .request import request diff --git a/pybase/helpers/__init__.py b/pybase/helpers/__init__.py index 4db2fe6..c36125f 100644 --- a/pybase/helpers/__init__.py +++ b/pybase/helpers/__init__.py @@ -1,3 +1,3 @@ -import varint -# Silence pyflakes -assert varint +from __future__ import absolute_import, print_function + +from . import varint # noqa diff --git a/pybase/pb/Client_pb2.py b/pybase/pb/Client_pb2.py index 0db478e..e59ca92 100644 --- a/pybase/pb/Client_pb2.py +++ b/pybase/pb/Client_pb2.py @@ -12,10 +12,10 @@ from google.protobuf import symbol_database as _symbol_database from google.protobuf.internal import enum_type_wrapper -import Cell_pb2 -import Comparator_pb2 -import Filter_pb2 -import HBase_pb2 +from . import Cell_pb2 +from . import Comparator_pb2 +from . import Filter_pb2 +from . import HBase_pb2 _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) diff --git a/pybase/pb/ClusterStatus_pb2.py b/pybase/pb/ClusterStatus_pb2.py index 24f0806..303fb8c 100644 --- a/pybase/pb/ClusterStatus_pb2.py +++ b/pybase/pb/ClusterStatus_pb2.py @@ -11,9 +11,9 @@ from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -import ClusterId_pb2 -import FS_pb2 -import HBase_pb2 +from . import ClusterId_pb2 +from . import FS_pb2 +from . import HBase_pb2 _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) diff --git a/pybase/pb/Filter_pb2.py b/pybase/pb/Filter_pb2.py index 2e0f75e..5090652 100644 --- a/pybase/pb/Filter_pb2.py +++ b/pybase/pb/Filter_pb2.py @@ -11,8 +11,8 @@ from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -import Comparator_pb2 -import HBase_pb2 +from . import Comparator_pb2 +from . import HBase_pb2 _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) diff --git a/pybase/pb/HBase_pb2.py b/pybase/pb/HBase_pb2.py index 140983f..a7acdad 100644 --- a/pybase/pb/HBase_pb2.py +++ b/pybase/pb/HBase_pb2.py @@ -12,7 +12,7 @@ from google.protobuf import symbol_database as _symbol_database from google.protobuf.internal import enum_type_wrapper -import Cell_pb2 +from . import Cell_pb2 _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) diff --git a/pybase/pb/Master_pb2.py b/pybase/pb/Master_pb2.py index ea87777..b9af77f 100644 --- a/pybase/pb/Master_pb2.py +++ b/pybase/pb/Master_pb2.py @@ -11,11 +11,11 @@ from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -import Client_pb2 -import ClusterStatus_pb2 -import ErrorHandling_pb2 -import HBase_pb2 -import Quota_pb2 +from . import Client_pb2 +from . import ClusterStatus_pb2 +from . import ErrorHandling_pb2 +from . import HBase_pb2 +from . import Quota_pb2 _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) diff --git a/pybase/pb/MultiRowMutation_pb2.py b/pybase/pb/MultiRowMutation_pb2.py index 616c955..c3bc07c 100644 --- a/pybase/pb/MultiRowMutation_pb2.py +++ b/pybase/pb/MultiRowMutation_pb2.py @@ -11,7 +11,7 @@ from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -import Client_pb2 +from . import Client_pb2 _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) diff --git a/pybase/pb/Quota_pb2.py b/pybase/pb/Quota_pb2.py index e9dacbe..fc45a25 100644 --- a/pybase/pb/Quota_pb2.py +++ b/pybase/pb/Quota_pb2.py @@ -12,7 +12,7 @@ from google.protobuf import symbol_database as _symbol_database from google.protobuf.internal import enum_type_wrapper -import HBase_pb2 +from . import HBase_pb2 _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) diff --git a/pybase/pb/RPC_pb2.py b/pybase/pb/RPC_pb2.py index 2f80447..7562452 100644 --- a/pybase/pb/RPC_pb2.py +++ b/pybase/pb/RPC_pb2.py @@ -11,8 +11,8 @@ from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -import HBase_pb2 -import Tracing_pb2 +from . import HBase_pb2 +from . import Tracing_pb2 _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) diff --git a/pybase/pb/ZooKeeper_pb2.py b/pybase/pb/ZooKeeper_pb2.py index 52283d4..6a19f6b 100644 --- a/pybase/pb/ZooKeeper_pb2.py +++ b/pybase/pb/ZooKeeper_pb2.py @@ -11,8 +11,8 @@ from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -import ClusterStatus_pb2 -import HBase_pb2 +from . import ClusterStatus_pb2 +from . import HBase_pb2 _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) # @@protoc_insertion_point(imports) diff --git a/pybase/region/client.py b/pybase/region/client.py index 37bd575..5136567 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -17,7 +17,10 @@ import logging import socket -from cStringIO import StringIO +try: + from io import StringIO +except ImportError: + from cStringIO import StringIO from struct import pack, unpack from threading import Condition, Lock From 57e06f978ce105788606f092a08240c254dfc8aa Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Thu, 11 Oct 2018 17:01:20 -0700 Subject: [PATCH 06/53] bump to 0.2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5208f84..7d00b2e 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages, setup setup(name='pybase', - version='0.1', + version='0.2', description='Native python client to hbase 1.0+', url='https://github.com/CurleySamuel/PyBase', author='Sam Curley', From c49c2ac7bfb1372191942016c264bcac615ee780 Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Thu, 11 Oct 2018 17:34:05 -0700 Subject: [PATCH 07/53] add classifiers --- setup.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/setup.py b/setup.py index 7d00b2e..ced8a30 100644 --- a/setup.py +++ b/setup.py @@ -9,4 +9,13 @@ license='Apache License 2.0', packages=find_packages('.', exclude=['tests']), install_requires=["intervaltree", "kazoo", "six", "zope.interface", "protobuf"], + classifiers=[ + "Programming Language :: Python", + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + ], zip_safe=False) From 387057e0658b86dc8edc51a5c8b42831dc788885 Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Thu, 11 Oct 2018 17:40:52 -0700 Subject: [PATCH 08/53] add bdist_wheel config --- setup.cfg | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.cfg b/setup.cfg index b4d9225..78bbac2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,3 +7,6 @@ exclude = pybase/pb,build,dist line_length=100 indent=' ' balanced_wrapping=True + +[bdist_wheel] +universal=1 From ade95d0562a13e902768ebae9b95078dc922579e Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Fri, 12 Oct 2018 08:19:49 -0700 Subject: [PATCH 09/53] use BytesIO, not StringIO --- pybase/region/client.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pybase/region/client.py b/pybase/region/client.py index 5136567..e6813f8 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -17,10 +17,7 @@ import logging import socket -try: - from io import StringIO -except ImportError: - from cStringIO import StringIO +from io import BytesIO from struct import pack, unpack from threading import Condition, Lock @@ -235,7 +232,7 @@ def _bad_call_id(self, my_id, my_request, msg_id, data): # received. If a socket is closed (RegionServer died) then raise an # exception that goes all the way back to the main client def _recv_n(self, sock, n): - partial_str = StringIO() + partial_str = BytesIO() partial_len = 0 while partial_len < n: packet = sock.recv(n - partial_len) From 91a3c02410b3ea43a9192e13821a1eb1d85c3058 Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Fri, 12 Oct 2018 09:19:05 -0700 Subject: [PATCH 10/53] convert everything to byte string/unicode literals --- pybase/client.py | 16 ++++++++-------- pybase/exceptions.py | 2 +- pybase/filters.py | 2 +- pybase/helpers/varint.py | 2 +- pybase/region/client.py | 6 +++--- pybase/region/region.py | 2 +- pybase/request/request.py | 22 +++++++++++----------- pybase/zk/client.py | 4 ++-- setup.py | 2 ++ tests/test_integration.py | 2 +- tests/test_integration_availability.py | 2 +- 11 files changed, 32 insertions(+), 30 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 8aff419..8a8b572 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import logging import logging.config @@ -62,15 +62,15 @@ def __init__(self, zkquorum, pool_size): def _add_to_region_cache(self, new_region): stop_key = new_region.stop_key - if stop_key == '': + if stop_key == b'': # This is hacky but our interval tree requires hard interval stops. # So what's the largest char out there? chr(255) -> '\xff'. If # you're using '\xff' as a prefix for your rows then this'll cause # a cache miss on every request. - stop_key = '\xff' + stop_key = b'\xff' # Keys are formatted like: 'tablename,key' - start_key = new_region.table + ',' + new_region.start_key - stop_key = new_region.table + ',' + stop_key + start_key = new_region.table + b',' + new_region.start_key + stop_key = new_region.table + b',' + stop_key # Only let one person touch the cache at once. with self._cache_lock: @@ -106,7 +106,7 @@ def _get_from_region_cache(self, table, key): def _delete_from_region_cache(self, table, start_key): # Don't acquire the lock because the calling function should have done # so already - self.region_cache.remove_overlap(table + "," + start_key) + self.region_cache.remove_overlap(table + b"," + start_key) """ HERE LAY REQUESTS @@ -218,7 +218,7 @@ def scan(self, table, start_key='', stop_key=None, families={}, filters=None): # recursive scan it rescanned whatever the first_response # initially contained. Appending both will produce duplicates. previous_stop_key = cur_region.stop_key - if previous_stop_key == '' or \ + if previous_stop_key == b'' or \ (stop_key is not None and previous_stop_key > stop_key): break continue @@ -426,7 +426,7 @@ def _purge_region(self, reg): pass def _construct_meta_key(self, table, key): - return table + "," + key + ",:" + return table + b"," + key + b",:" def close(self): logger.info("Main client received close request.") diff --git a/pybase/exceptions.py b/pybase/exceptions.py index acf93a2..b56415f 100644 --- a/pybase/exceptions.py +++ b/pybase/exceptions.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import logging from collections import defaultdict diff --git a/pybase/filters.py b/pybase/filters.py index 8ea878f..a9f3dcc 100644 --- a/pybase/filters.py +++ b/pybase/filters.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import traceback diff --git a/pybase/helpers/varint.py b/pybase/helpers/varint.py index 786af93..13b515b 100644 --- a/pybase/helpers/varint.py +++ b/pybase/helpers/varint.py @@ -27,7 +27,7 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals class NotEnoughDataExcption(Exception): diff --git a/pybase/region/client.py b/pybase/region/client.py index e6813f8..a4dbe52 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import logging import socket @@ -181,7 +181,7 @@ def _receive_rpc(self, call_id, rq, data=None): # call_ids don't match? Looks like a different thread nabbed our # response. return self._bad_call_id(call_id, rq, header.call_id, full_data) - elif header.exception.exception_class_name != u'': + elif header.exception.exception_class_name != '': # If we're in here it means a remote exception has happened. exception_class = header.exception.exception_class_name if exception_class in \ @@ -293,4 +293,4 @@ def _send_hello(sock): def _to_varint(val): temp = [] encoder(temp.append, val) - return "".join(temp) + return b"".join(temp) diff --git a/pybase/region/region.py b/pybase/region/region.py index 31baee6..b97ceda 100644 --- a/pybase/region/region.py +++ b/pybase/region/region.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals from struct import unpack diff --git a/pybase/request/request.py b/pybase/request/request.py index 2f05827..fffe198 100644 --- a/pybase/request/request.py +++ b/pybase/request/request.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals from ..exceptions import MalformedFamilies, MalformedValues from ..filters import _to_filter @@ -6,8 +6,8 @@ # Table + Family used when requesting meta information from the # MetaRegionServer -metaTableName = "hbase:meta,,1" -metaInfoFamily = {"info": []} +metaTableName = b"hbase:meta,,1" +metaInfoFamily = {b"info": []} class Request: @@ -24,7 +24,7 @@ def master_request(meta_key): rq.get.closest_row_before = True rq.region.type = 1 rq.region.value = metaTableName - return Request("Get", rq) + return Request(b"Get", rq) def get_request(region, key, families, filters): @@ -36,7 +36,7 @@ def get_request(region, key, families, filters): rq.region.value = region.region_name if pbFilter is not None: rq.get.filter.CopyFrom(pbFilter) - return Request("Get", rq) + return Request(b"Get", rq) def put_request(region, key, values): @@ -46,7 +46,7 @@ def put_request(region, key, values): rq.mutation.row = key rq.mutation.mutate_type = 2 rq.mutation.column_value.extend(values_to_column_values(values)) - return Request("Mutate", rq) + return Request(b"Mutate", rq) def delete_request(region, key, values): @@ -57,7 +57,7 @@ def delete_request(region, key, values): rq.mutation.mutate_type = 3 rq.mutation.column_value.extend( values_to_column_values(values, delete=True)) - return Request("Mutate", rq) + return Request(b"Mutate", rq) def append_request(region, key, values): @@ -67,7 +67,7 @@ def append_request(region, key, values): rq.mutation.row = key rq.mutation.mutate_type = 0 rq.mutation.column_value.extend(values_to_column_values(values)) - return Request("Mutate", rq) + return Request(b"Mutate", rq) def increment_request(region, key, values): @@ -77,7 +77,7 @@ def increment_request(region, key, values): rq.mutation.row = key rq.mutation.mutate_type = 1 rq.mutation.column_value.extend(values_to_column_values(values)) - return Request("Mutate", rq) + return Request(b"Mutate", rq) def scan_request(region, start_key, stop_key, families, filters, close, scanner_id): @@ -89,14 +89,14 @@ def scan_request(region, start_key, stop_key, families, filters, close, scanner_ rq.close_scanner = close if scanner_id is not None: rq.scanner_id = int(scanner_id) - return Request("Scan", rq) + return Request(b"Scan", rq) rq.scan.column.extend(families_to_columns(families)) rq.scan.start_row = start_key if stop_key is not None: rq.scan.stop_row = stop_key if filters is not None: rq.scan.filter.CopyFrom(filters) - return Request("Scan", rq) + return Request(b"Scan", rq) # Converts a dictionary specifying ColumnFamilys -> Qualifiers into the Column pb type. diff --git a/pybase/zk/client.py b/pybase/zk/client.py index af48df2..6cf0afc 100644 --- a/pybase/zk/client.py +++ b/pybase/zk/client.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import logging from struct import unpack @@ -72,7 +72,7 @@ def LocateMaster(zkquorum, establish_connection_timeout=5, missing_znode_retries # The first byte must be \xff and the next four bytes are a little-endian # uint32 containing the length of the meta. first_byte, meta_length = unpack(">cI", rsp[:5]) - if first_byte != '\xff': + if first_byte != b'\xff': # Malformed response raise ZookeeperResponseException( "ZooKeeper returned an invalid response") diff --git a/setup.py b/setup.py index ced8a30..b0bef3b 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + from setuptools import find_packages, setup setup(name='pybase', diff --git a/tests/test_integration.py b/tests/test_integration.py index 0bd5d8d..2053494 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import unittest from collections import defaultdict diff --git a/tests/test_integration_availability.py b/tests/test_integration_availability.py index c2b62dd..679cdc0 100644 --- a/tests/test_integration_availability.py +++ b/tests/test_integration_availability.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, print_function +from __future__ import absolute_import, print_function, unicode_literals import os import subprocess From cad6bba3819eb07f4d2514192f2f51b67e19dd69 Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Thu, 1 Nov 2018 16:15:34 -0700 Subject: [PATCH 11/53] make all classes subclass object --- pybase/client.py | 4 +-- pybase/filters.py | 76 +++++++++++++++++++-------------------- pybase/region/client.py | 2 +- pybase/region/region.py | 2 +- pybase/request/request.py | 2 +- 5 files changed, 43 insertions(+), 43 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 8a8b572..0024809 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -35,7 +35,7 @@ logger = logging.getLogger('pybase') -class MainClient: +class MainClient(object): def __init__(self, zkquorum, pool_size): # Location of the ZooKeeper quorum (csv) @@ -441,7 +441,7 @@ def close(self): self.reverse_client_cache = {} -class Result: +class Result(object): # Called like Result(my_response), takes all the wanted data from # my_response and puts it into our own result structure. diff --git a/pybase/filters.py b/pybase/filters.py index a9f3dcc..e9b0da5 100644 --- a/pybase/filters.py +++ b/pybase/filters.py @@ -48,7 +48,7 @@ # A FilterList is also a Filter. But it's also a list of Filters with an # operator. This allows you to build up complicated boolean expressions by # chaining FilterLists. -class FilterList: +class FilterList(object): def __init__(self, operator, *arg): self.filter_type = pbFilter.FilterList @@ -62,7 +62,7 @@ def add_filters(self, *arg): self.filters.append(_to_filter(new_filter)) -class ColumnCountGetFilter: +class ColumnCountGetFilter(object): def __init__(self, limit): self.filter_type = pbFilter.ColumnCountGetFilter @@ -70,7 +70,7 @@ def __init__(self, limit): self.limit = limit -class ColumnPaginationFilter: +class ColumnPaginationFilter(object): def __init__(self, limit, offset, column_offset): self.filter_type = pbFilter.ColumnPaginationFilter @@ -80,7 +80,7 @@ def __init__(self, limit, offset, column_offset): self.column_offset = column_offset -class ColumnPrefixFilter: +class ColumnPrefixFilter(object): def __init__(self, prefix): self.filter_type = pbFilter.ColumnPrefixFilter @@ -88,7 +88,7 @@ def __init__(self, prefix): self.prefix = prefix -class ColumnRangeFilter: +class ColumnRangeFilter(object): def __init__(self, min_column, min_column_inclusive, max_column, max_column_inclusive): self.filter_type = pbFilter.ColumnRangeFilter @@ -99,7 +99,7 @@ def __init__(self, min_column, min_column_inclusive, max_column, max_column_incl self.max_column_inclusive = max_column_inclusive -class CompareFilter: +class CompareFilter(object): def __init__(self, compare_op, comparator): self.filter_type = pbFilter.CompareFilter @@ -108,7 +108,7 @@ def __init__(self, compare_op, comparator): self.comparator = _to_comparator(comparator) -class DependentColumnFilter: +class DependentColumnFilter(object): def __init__(self, compare_filter, column_family, column_qualifier, drop_dependent_column): self.filter_type = pbFilter.DependentColumnFilter @@ -119,7 +119,7 @@ def __init__(self, compare_filter, column_family, column_qualifier, drop_depende self.drop_dependent_column = drop_dependent_column -class FamilyFilter: +class FamilyFilter(object): def __init__(self, compare_filter): self.filter_type = pbFilter.FamilyFilter @@ -127,7 +127,7 @@ def __init__(self, compare_filter): self.compare_filter = _to_filter(compare_filter) -class FilterWrapper: +class FilterWrapper(object): def __init__(self, new_filter): self.filter_type = pbFilter.FilterWrapper @@ -135,14 +135,14 @@ def __init__(self, new_filter): self.filter = _to_filter(new_filter) -class FirstKeyOnlyFilter: +class FirstKeyOnlyFilter(object): def __init__(self): self.filter_type = pbFilter.FirstKeyOnlyFilter self.name = filter_path + "FirstKeyOnlyFilter" -class FirstKeyValueMatchingQualifiersFilter: +class FirstKeyValueMatchingQualifiersFilter(object): def __init__(self, qualifiers): self.filter_type = pbFilter.FirstKeyValueMatchingQualifiersFilter @@ -150,7 +150,7 @@ def __init__(self, qualifiers): self.qualifiers = qualifiers -class FuzzyRowFilter: +class FuzzyRowFilter(object): def __init__(self, fuzzy_keys_data): self.filter_type = pbFilter.FuzzyRowFilter @@ -164,7 +164,7 @@ def __init__(self, fuzzy_keys_data): self.fuzzy_keys_data.append(_to_bytes_bytes_pair(fuzzy_keys_data)) -class InclusiveStopFilter: +class InclusiveStopFilter(object): def __init__(self, stop_row_key): self.filter_type = pbFilter.InclusiveStopFilter @@ -172,7 +172,7 @@ def __init__(self, stop_row_key): self.stop_row_key = stop_row_key -class KeyOnlyFilter: +class KeyOnlyFilter(object): def __init__(self, len_as_val): self.filter_type = pbFilter.KeyOnlyFilter @@ -180,7 +180,7 @@ def __init__(self, len_as_val): self.len_as_val = len_as_val -class MultipleColumnPrefixFilter: +class MultipleColumnPrefixFilter(object): def __init__(self, sorted_prefixes): self.filter_type = pbFilter.MultipleColumnPrefixFilter @@ -191,7 +191,7 @@ def __init__(self, sorted_prefixes): self.sorted_prefixes = [sorted_prefixes] -class PageFilter: +class PageFilter(object): def __init__(self, page_size): self.filter_type = pbFilter.PageFilter @@ -199,7 +199,7 @@ def __init__(self, page_size): self.page_size = page_size -class PrefixFilter: +class PrefixFilter(object): def __init__(self, prefix): self.filter_type = pbFilter.PrefixFilter @@ -207,7 +207,7 @@ def __init__(self, prefix): self.prefix = prefix -class QualifierFilter: +class QualifierFilter(object): def __init__(self, compare_filter): self.filter_type = pbFilter.QualifierFilter @@ -215,7 +215,7 @@ def __init__(self, compare_filter): self.compare_filter = _to_pb_filter(compare_filter) -class RandomRowFilter: +class RandomRowFilter(object): def __init__(self, chance): self.filter_type = pbFilter.RandomRowFilter @@ -223,7 +223,7 @@ def __init__(self, chance): self.chance = chance -class RowFilter: +class RowFilter(object): def __init__(self, compare_filter): self.filter_type = pbFilter.RowFilter @@ -231,7 +231,7 @@ def __init__(self, compare_filter): self.compare_filter = _to_filter(compare_filter) -class SkipColumnValueExcludeFilter: +class SkipColumnValueExcludeFilter(object): def __init__(self, single_column_value_filter): self.filter_type = pbFilter.SkipColumnValueExcludeFilter @@ -240,7 +240,7 @@ def __init__(self, single_column_value_filter): single_column_value_filter) -class SkipColumnValueFilter: +class SkipColumnValueFilter(object): def __init__(self, compare_op, comparator, column_family, column_qualifier, filter_if_missing, latest_version_only): @@ -254,7 +254,7 @@ def __init__(self, compare_op, comparator, column_family, column_qualifier, filt self.latest_version_only = latest_version_only -class SkipFilter: +class SkipFilter(object): def __init__(self, orig_filter): self.filter_type = pbFilter.SkipFilter @@ -262,7 +262,7 @@ def __init__(self, orig_filter): self.filter = orig_filter -class TimestampsFilter: +class TimestampsFilter(object): def __init__(self, timestamps): self.filter_type = pbFilter.TimestampsFilter @@ -273,7 +273,7 @@ def __init__(self, timestamps): self.timestamps = [timestamps] -class ValueFilter: +class ValueFilter(object): def __init__(self, compare_filter): self.filter_type = pbFilter.ValueFilter @@ -281,7 +281,7 @@ def __init__(self, compare_filter): self.compare_filter = _to_filter(compare_filter) -class WhileMatchFilter: +class WhileMatchFilter(object): def __init__(self, origFilter): self.filter_type = pbFilter.WhileMatchFilter @@ -289,14 +289,14 @@ def __init__(self, origFilter): self.filter = _to_filter(origFilter) -class FilterAllFilter: +class FilterAllFilter(object): def __init__(self): self.filter_type = pbFilter.FilterAllFilter self.name = filter_path + "FilterAllFilter" -class MultiRowRangeFilter: +class MultiRowRangeFilter(object): def __init__(self, row_range_list): self.filter_type = pbFilter.MultiRowRangeFilter @@ -353,7 +353,7 @@ def _to_pb_filter(orig_filter): raise ValueError("Malformed Filter provided, %s %s" % (ex, traceback.format_exc())) -class ByteArrayComparable: +class ByteArrayComparable(object): def __init__(self, value): self.comparable_type = pbComparator.ByteArrayComparable @@ -376,7 +376,7 @@ def _to_comparable(orig_cmp): raise ValueError("Malformed Comparable provided %s %s" % (ex, traceback.format_exc())) -class BinaryComparator: +class BinaryComparator(object): def __init__(self, comparable): self.comparator_type = pbComparator.BinaryComparator @@ -384,7 +384,7 @@ def __init__(self, comparable): self.comparable = _to_comparable(comparable) -class LongComparator: +class LongComparator(object): def __init__(self, comparable): self.comparator_type = pbComparator.LongComparator @@ -392,7 +392,7 @@ def __init__(self, comparable): self.comparable = _to_comparable(comparable) -class BinaryPrefixComparator: +class BinaryPrefixComparator(object): def __init__(self, comparable): self.comparator_type = pbComparator.BinaryPrefixComparator @@ -400,7 +400,7 @@ def __init__(self, comparable): self.comparable = _to_comparable(comparable) -class BitComparator: +class BitComparator(object): def __init__(self, comparable, bitwise_op): self.comparator_type = pbComparator.BitComparator @@ -409,14 +409,14 @@ def __init__(self, comparable, bitwise_op): self.bitwise_op = bitwise_op -class NullComparator: +class NullComparator(object): def __init__(self): self.comparator_type = pbComparator.NullComparator self.name = comparator_path + "NullComparator" -class RegexStringComparator: +class RegexStringComparator(object): def __init__(self, pattern, pattern_flags, charset, engine): self.comparator_type = pbComparator.RegexStringComparator @@ -427,7 +427,7 @@ def __init__(self, pattern, pattern_flags, charset, engine): self.engine = engine -class StringComparator: +class StringComparator(object): def __init__(self, substr): self.comparator_type = pbComparator.BinaryPrefixComparator @@ -459,7 +459,7 @@ def _to_comparator(orig_cmp): raise ValueError("Malformed Comparator provided %s %s" % (ex, traceback.format_exc())) -class BytesBytesPair: +class BytesBytesPair(object): def __init__(self, first, second): self.first = first @@ -476,7 +476,7 @@ def _to_bytes_bytes_pair(bbp): raise ValueError("Malformed BytesBytesPair provided") -class RowRange: +class RowRange(object): def __init__(self, start_row, start_row_inclusive, stop_row, stop_row_inclusive): self.filter_type = pbFilter.RowRange diff --git a/pybase/region/client.py b/pybase/region/client.py index a4dbe52..9d93ea0 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -46,7 +46,7 @@ # This Client is created once per RegionServer. Handles all communication # to and from this specific RegionServer. -class Client: +class Client(object): # Variables are as follows: # - Host: The hostname of the RegionServer # - Port: The port of the RegionServer diff --git a/pybase/region/region.py b/pybase/region/region.py index b97ceda..5479b02 100644 --- a/pybase/region/region.py +++ b/pybase/region/region.py @@ -20,7 +20,7 @@ from ..pb.HBase_pb2 import RegionInfo as pbRegionInfo -class Region: +class Region(object): def __init__(self, table, name, start, stop): self.table = table diff --git a/pybase/request/request.py b/pybase/request/request.py index fffe198..68bc1b5 100644 --- a/pybase/request/request.py +++ b/pybase/request/request.py @@ -10,7 +10,7 @@ metaInfoFamily = {b"info": []} -class Request: +class Request(object): def __init__(self, type, pb): # noqa: B002 self.type = type From d1fa3f54d77d0d5dffcf41a8c1acfdcd6c741e6a Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Thu, 1 Nov 2018 16:15:50 -0700 Subject: [PATCH 12/53] tidy up loggers --- pybase/client.py | 5 +---- pybase/region/client.py | 2 -- pybase/zk/client.py | 3 +-- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 0024809..39bb717 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -16,7 +16,6 @@ from __future__ import absolute_import, print_function, unicode_literals import logging -import logging.config from itertools import chain from threading import Lock @@ -30,9 +29,7 @@ from .region.region import region_from_cell from .request import request -# Using a tiered logger such that all submodules propagate through to this -# logger. Changing the logging level here should affect all other modules. -logger = logging.getLogger('pybase') +logger = logging.getLogger(__name__) class MainClient(object): diff --git a/pybase/region/client.py b/pybase/region/client.py index 9d93ea0..4acbf16 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -28,8 +28,6 @@ from ..pb.RPC_pb2 import ConnectionHeader, RequestHeader, ResponseHeader logger = logging.getLogger(__name__) -# socket.setdefaulttimeout interfers with gevent. -# socket.setdefaulttimeout(2) # Used to encode and decode varints in a format protobuf expects. encoder = varint.encodeVarint diff --git a/pybase/zk/client.py b/pybase/zk/client.py index 6cf0afc..af6742d 100644 --- a/pybase/zk/client.py +++ b/pybase/zk/client.py @@ -27,8 +27,7 @@ ZookeeperResponseException, ZookeeperZNodeException) from ..pb.ZooKeeper_pb2 import MetaRegionServer -logger = logging.getLogger('pybase.' + __name__) -logger.setLevel(logging.DEBUG) +logger = logging.getLogger(__name__) znode = "/hbase" From f886b278a0d7bb5ec410b4f7a9e5d5ce495f8875 Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Thu, 1 Nov 2018 16:16:53 -0700 Subject: [PATCH 13/53] v0.2.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b0bef3b..076715a 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import find_packages, setup setup(name='pybase', - version='0.2', + version='0.2.1', description='Native python client to hbase 1.0+', url='https://github.com/CurleySamuel/PyBase', author='Sam Curley', From 8d258b091885cd542a96380b5bf3d734bea6ecd7 Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Tue, 18 Dec 2018 11:22:24 -0800 Subject: [PATCH 14/53] update intervaltree version --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 076715a..b234a17 100644 --- a/setup.py +++ b/setup.py @@ -3,14 +3,15 @@ from setuptools import find_packages, setup setup(name='pybase', - version='0.2.1', + version='0.3.0', description='Native python client to hbase 1.0+', url='https://github.com/CurleySamuel/PyBase', author='Sam Curley', author_email='CurleySamuel@gmail.com', license='Apache License 2.0', packages=find_packages('.', exclude=['tests']), - install_requires=["intervaltree", "kazoo", "six", "zope.interface", "protobuf"], + install_requires=["intervaltree >= 3.0, < 4.0", + "kazoo", "six", "zope.interface", "protobuf"], classifiers=[ "Programming Language :: Python", "Programming Language :: Python :: 2", From 5279c603503bacd1601d3a6e5d7fca3f4160e414 Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Thu, 21 Feb 2019 11:53:02 -0800 Subject: [PATCH 15/53] use bytes --- pybase/client.py | 2 +- pybase/region/client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 39bb717..3bf4136 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -227,7 +227,7 @@ def scan(self, table, start_key='', stop_key=None, families={}, filters=None): previous_stop_key = cur_region.stop_key # Stopping criteria. This region is either the end ('') or the end of this region is # beyond the specific stop_key. - if previous_stop_key == '' or (stop_key is not None and previous_stop_key > stop_key): + if previous_stop_key == b'' or (stop_key is not None and previous_stop_key > stop_key): break return result_set diff --git a/pybase/region/client.py b/pybase/region/client.py index 4acbf16..7576d34 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -282,7 +282,7 @@ def _send_hello(sock): # 1. "HBas\x00\x50". Magic prefix that HBase requires. # 2. Little-endian uint32 indicating length of serialized ConnectionHeader # 3. Serialized ConnectionHeader - message = "HBas\x00\x50" + pack(">I", len(serialized)) + serialized + message = b"HBas\x00\x50" + pack(">I", len(serialized)) + serialized sock.send(message) From d0b9cd28e3b20bb9d7ebf986dfef858ca25407be Mon Sep 17 00:00:00 2001 From: Arnab Bhadury Date: Fri, 22 Feb 2019 03:24:31 +0000 Subject: [PATCH 16/53] a lot of bytes/unicode mangling and updating protobufs decoder --- pybase/client.py | 13 +++-- pybase/helpers/varint.py | 111 ++++++++++++++++++-------------------- pybase/region/client.py | 8 +-- pybase/request/request.py | 4 ++ 4 files changed, 69 insertions(+), 67 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 3bf4136..f2f8e07 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -16,6 +16,7 @@ from __future__ import absolute_import, print_function, unicode_literals import logging +from builtins import str from itertools import chain from threading import Lock @@ -341,15 +342,15 @@ def _create_new_region(self, response, table): # We get ~4 cells back each holding different information. We only care # about two of them. for cell in cells: - if cell.qualifier == "regioninfo": + if cell.qualifier == b"regioninfo": # Take the regioninfo information and parse it into our own # Region representation. new_region = region_from_cell(cell) - elif cell.qualifier == "server": + elif cell.qualifier == b"server": # Grab the host, port of the Region Server that this region is # hosted on. server_loc = cell.value - host, port = cell.value.split(':') + host, port = cell.value.split(b':') else: continue # Do we have an existing client for this region server already? @@ -423,7 +424,11 @@ def _purge_region(self, reg): pass def _construct_meta_key(self, table, key): - return table + b"," + key + b",:" + if isinstance(table, str): + table = table.encode('utf8') + if isinstance(key, str): + key = key.encode('utf8') + return b"%b,%b,:" % (table, key) def close(self): logger.info("Main client received close request.") diff --git a/pybase/helpers/varint.py b/pybase/helpers/varint.py index 13b515b..57b1b82 100644 --- a/pybase/helpers/varint.py +++ b/pybase/helpers/varint.py @@ -29,77 +29,70 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import absolute_import, print_function, unicode_literals +import six class NotEnoughDataExcption(Exception): pass -def _VarintDecoder(mask): - """Return an encoder for a basic varint value (does not include tag). - - Decoded values will be bitwise-anded with the given mask before being - returned, e.g. to limit them to 32 bits. The returned decoder does not - take the usual "end" parameter -- the caller is expected to do bounds checking - after the fact (often the caller can defer such checking until later). The - decoder returns a (value, new_pos) pair. - """ - - local_ord = ord - - def DecodeVarint(buffer, pos): - result = 0 - shift = 0 - while 1: - if pos > len(buffer) - 1: - raise NotEnoughDataExcption("Not enough data to decode varint") - b = local_ord(buffer[pos]) - result |= ((b & 0x7f) << shift) - pos += 1 - if not (b & 0x80): - result &= mask - return (result, pos) - shift += 7 - if shift >= 64: - raise ValueError('Too many bytes when decoding varint.') - - return DecodeVarint - - -def _SignedVarintDecoder(mask): - """Like _VarintDecoder() but decodes signed values.""" - - local_ord = ord - - def DecodeVarint(buffer, pos): - result = 0 - shift = 0 - while 1: - if pos > len(buffer) - 1: - raise NotEnoughDataExcption("Not enough data to decode varint") - b = local_ord(buffer[pos]) - result |= ((b & 0x7f) << shift) - pos += 1 - if not (b & 0x80): - if result > 0x7fffffffffffffff: - result -= (1 << 64) - result |= ~mask - else: - result &= mask - return (result, pos) - shift += 7 - if shift >= 64: - raise ValueError('Too many bytes when decoding varint.') - - return DecodeVarint +def _VarintDecoder(mask, result_type=int): + """Return an encoder for a basic varint value (does not include tag). + Decoded values will be bitwise-anded with the given mask before being + returned, e.g. to limit them to 32 bits. The returned decoder does not + take the usual "end" parameter -- the caller is expected to do bounds checking + after the fact (often the caller can defer such checking until later). The + decoder returns a (value, new_pos) pair. + """ + + def DecodeVarint(buffer, pos): + result = 0 + shift = 0 + while 1: + b = six.indexbytes(buffer, pos) + result |= ((b & 0x7f) << shift) + pos += 1 + if not (b & 0x80): + result &= mask + result = result_type(result) + return (result, pos) + shift += 7 + if shift >= 64: + raise ValueError('Too many bytes when decoding varint.') + return DecodeVarint + + +def _SignedVarintDecoder(bits, result_type=int): + """Like _VarintDecoder() but decodes signed values.""" + + signbit = 1 << (bits - 1) + mask = (1 << bits) - 1 + + def DecodeVarint(buffer, pos): + result = 0 + shift = 0 + while 1: + b = six.indexbytes(buffer, pos) + result |= ((b & 0x7f) << shift) + pos += 1 + if not (b & 0x80): + result &= mask + result = (result ^ signbit) - signbit + result = result_type(result) + return (result, pos) + shift += 7 + if shift >= 64: + raise ValueError('Too many bytes when decoding varint.') + return DecodeVarint + decodeVarint = _VarintDecoder((1 << 64) - 1) -decodeSignedVarint = _SignedVarintDecoder((1 << 64) - 1) +decodeSignedVarint = _SignedVarintDecoder(64, int) # Use these versions for values which must be limited to 32 bits. decodeVarint32 = _VarintDecoder((1 << 32) - 1) -decodeSignedVarint32 = _SignedVarintDecoder((1 << 32) - 1) +decodeSignedVarint32 = _SignedVarintDecoder(32, int) def varintSize(value): diff --git a/pybase/region/client.py b/pybase/region/client.py index 7576d34..975d385 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -36,9 +36,9 @@ # We need to know how to interpret an incoming proto.Message. This maps # the request_type to the response_type. response_types = { - "Get": GetResponse, - "Mutate": MutateResponse, - "Scan": ScanResponse + b"Get": GetResponse, + b"Mutate": MutateResponse, + b"Scan": ScanResponse } @@ -291,4 +291,4 @@ def _send_hello(sock): def _to_varint(val): temp = [] encoder(temp.append, val) - return b"".join(temp) + return "".join(temp).encode('utf8') diff --git a/pybase/request/request.py b/pybase/request/request.py index 68bc1b5..3c54348 100644 --- a/pybase/request/request.py +++ b/pybase/request/request.py @@ -1,5 +1,7 @@ from __future__ import absolute_import, print_function, unicode_literals +from builtins import str + from ..exceptions import MalformedFamilies, MalformedValues from ..filters import _to_filter from ..pb.Client_pb2 import Column, GetRequest, MutateRequest, MutationProto, ScanRequest @@ -18,6 +20,8 @@ def __init__(self, type, pb): # noqa: B002 def master_request(meta_key): + if isinstance(meta_key, str): + meta_key = meta_key.encode('utf8') rq = GetRequest() rq.get.row = meta_key rq.get.column.extend(families_to_columns(metaInfoFamily)) From dc1731e8aad6ea91d7fc652862ba79b2f164e5ba Mon Sep 17 00:00:00 2001 From: Arnab Bhadury Date: Fri, 22 Feb 2019 03:50:27 +0000 Subject: [PATCH 17/53] commit new protobuf encoders and lint --- pybase/helpers/varint.py | 114 ++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 62 deletions(-) diff --git a/pybase/helpers/varint.py b/pybase/helpers/varint.py index 57b1b82..4711a2e 100644 --- a/pybase/helpers/varint.py +++ b/pybase/helpers/varint.py @@ -31,63 +31,60 @@ import six + class NotEnoughDataExcption(Exception): pass def _VarintDecoder(mask, result_type=int): - """Return an encoder for a basic varint value (does not include tag). - Decoded values will be bitwise-anded with the given mask before being - returned, e.g. to limit them to 32 bits. The returned decoder does not - take the usual "end" parameter -- the caller is expected to do bounds checking - after the fact (often the caller can defer such checking until later). The - decoder returns a (value, new_pos) pair. - """ - - def DecodeVarint(buffer, pos): - result = 0 - shift = 0 - while 1: - b = six.indexbytes(buffer, pos) - result |= ((b & 0x7f) << shift) - pos += 1 - if not (b & 0x80): - result &= mask - result = result_type(result) - return (result, pos) - shift += 7 - if shift >= 64: - raise ValueError('Too many bytes when decoding varint.') - return DecodeVarint + """Return an encoder for a basic varint value (does not include tag). + Decoded values will be bitwise-anded with the given mask before being + returned, e.g. to limit them to 32 bits. The returned decoder does not + take the usual "end" parameter -- the caller is expected to do bounds checking + after the fact (often the caller can defer such checking until later). The + decoder returns a (value, new_pos) pair. + """ + def DecodeVarint(buffer, pos): + result = 0 + shift = 0 + while 1: + b = six.indexbytes(buffer, pos) + result |= ((b & 0x7f) << shift) + pos += 1 + if not (b & 0x80): + result &= mask + result = result_type(result) + return (result, pos) + shift += 7 + if shift >= 64: + raise ValueError('Too many bytes when decoding varint.') + return DecodeVarint def _SignedVarintDecoder(bits, result_type=int): - """Like _VarintDecoder() but decodes signed values.""" - - signbit = 1 << (bits - 1) - mask = (1 << bits) - 1 - - def DecodeVarint(buffer, pos): - result = 0 - shift = 0 - while 1: - b = six.indexbytes(buffer, pos) - result |= ((b & 0x7f) << shift) - pos += 1 - if not (b & 0x80): - result &= mask - result = (result ^ signbit) - signbit - result = result_type(result) - return (result, pos) - shift += 7 - if shift >= 64: - raise ValueError('Too many bytes when decoding varint.') - return DecodeVarint - + """Like _VarintDecoder() but decodes signed values.""" + signbit = 1 << (bits - 1) + mask = (1 << bits) - 1 + + def DecodeVarint(buffer, pos): + result = 0 + shift = 0 + while 1: + b = six.indexbytes(buffer, pos) + result |= ((b & 0x7f) << shift) + pos += 1 + if not (b & 0x80): + result &= mask + result = (result ^ signbit) - signbit + result = result_type(result) + return (result, pos) + shift += 7 + if shift >= 64: + raise ValueError('Too many bytes when decoding varint.') + return DecodeVarint decodeVarint = _VarintDecoder((1 << 64) - 1) - decodeSignedVarint = _SignedVarintDecoder(64, int) # Use these versions for values which must be limited to 32 bits. @@ -144,38 +141,31 @@ def signedVarintSize(value): def _VarintEncoder(): - """Return an encoder for a basic varint value.""" - - local_chr = chr - - def EncodeVarint(write, value): + """Return an encoder for a basic varint value (does not include tag).""" + def EncodeVarint(write, value, unused_deterministic=None): bits = value & 0x7f value >>= 7 while value: - write(local_chr(0x80 | bits)) + write(six.int2byte(0x80|bits)) bits = value & 0x7f value >>= 7 - return write(local_chr(bits)) - + return write(six.int2byte(bits)) return EncodeVarint def _SignedVarintEncoder(): - """Return an encoder for a basic signed varint value.""" - - local_chr = chr - - def EncodeSignedVarint(write, value): + """Return an encoder for a basic signed varint value (does not include + tag).""" + def EncodeSignedVarint(write, value, unused_deterministic=None): if value < 0: value += (1 << 64) bits = value & 0x7f value >>= 7 while value: - write(local_chr(0x80 | bits)) + write(six.int2byte(0x80|bits)) bits = value & 0x7f value >>= 7 - return write(local_chr(bits)) - + return write(six.int2byte(bits)) return EncodeSignedVarint From e93a649c4070c81557dbb789737dbad6344c83a1 Mon Sep 17 00:00:00 2001 From: Arnab Bhadury Date: Fri, 22 Feb 2019 06:06:20 +0000 Subject: [PATCH 18/53] address code review --- pybase/region/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybase/region/client.py b/pybase/region/client.py index 975d385..87cf31a 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -291,4 +291,4 @@ def _send_hello(sock): def _to_varint(val): temp = [] encoder(temp.append, val) - return "".join(temp).encode('utf8') + return b"".join(temp) From 0c411e8c91c7bcbd5d879ba69f28887ff61a3401 Mon Sep 17 00:00:00 2001 From: Arnab Bhadury Date: Fri, 22 Feb 2019 18:02:42 +0000 Subject: [PATCH 19/53] increment version to 0.3.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b234a17..e6c01d1 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import find_packages, setup setup(name='pybase', - version='0.3.0', + version='0.3.1', description='Native python client to hbase 1.0+', url='https://github.com/CurleySamuel/PyBase', author='Sam Curley', From bfcb14ca6c4152d3e45d9b13f5b13e8e5fe2d2c6 Mon Sep 17 00:00:00 2001 From: arnie0426 Date: Mon, 25 Feb 2019 14:34:10 -0800 Subject: [PATCH 20/53] python 2 compatibility -- %b doesnt exist in python2 --- pybase/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybase/client.py b/pybase/client.py index f2f8e07..1b96450 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -428,7 +428,7 @@ def _construct_meta_key(self, table, key): table = table.encode('utf8') if isinstance(key, str): key = key.encode('utf8') - return b"%b,%b,:" % (table, key) + return table + b',' + key + b',:' def close(self): logger.info("Main client received close request.") From 02b7412450048991e18250cca217a4be7e44e70f Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Mon, 25 Feb 2019 14:36:17 -0800 Subject: [PATCH 21/53] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e6c01d1..1993bd4 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import find_packages, setup setup(name='pybase', - version='0.3.1', + version='0.3.2', description='Native python client to hbase 1.0+', url='https://github.com/CurleySamuel/PyBase', author='Sam Curley', From 9ccb943c8c467dd361f1eb6b85dbbe239d89e19e Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Tue, 26 Feb 2019 14:27:55 -0800 Subject: [PATCH 22/53] py2/py3 compat for exception re-raising --- pybase/exceptions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pybase/exceptions.py b/pybase/exceptions.py index b56415f..9d569aa 100644 --- a/pybase/exceptions.py +++ b/pybase/exceptions.py @@ -32,7 +32,7 @@ class PyBaseException(Exception): # unrecoverable and thus the _handle method # just reraises the exception. def _handle_exception(self, main_client, **kwargs): - raise self.__class__(self.message) + raise self.__class__(str(self)) # Parent of any exceptions involving Zookeeper @@ -137,7 +137,7 @@ def __init__(self, host, port): self.port = port def _handle_exception(self, main_client, **kwargs): - raise self.__class__(self.message) + raise self.__class__(str(self)) # All region exceptions inherit from me. From 679c44ef57c3c3fdbeaf99e18a83cb85350767c8 Mon Sep 17 00:00:00 2001 From: Simon Hewitt Date: Wed, 27 Feb 2019 09:33:36 -0800 Subject: [PATCH 23/53] fix byte/str encoding in exception classes --- pybase/client.py | 2 +- pybase/exceptions.py | 20 +++++++++----------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 1b96450..ac0ae4e 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -410,7 +410,7 @@ def _purge_client(self, region_client): for reg in region_client.regions: self._delete_from_region_cache(reg.table, reg.start_key) self.reverse_client_cache.pop( - region_client.host + ":" + region_client.port, None) + region_client.host + b":" + region_client.port, None) region_client.close() def _purge_region(self, reg): diff --git a/pybase/exceptions.py b/pybase/exceptions.py index 9d569aa..5108d88 100644 --- a/pybase/exceptions.py +++ b/pybase/exceptions.py @@ -21,6 +21,8 @@ from threading import Lock, Semaphore from time import sleep, time +from builtins import str + logger = logging.getLogger(__name__) @@ -64,15 +66,15 @@ class ZookeeperResponseException(ZookeeperException): class RegionServerException(PyBaseException): def __init__(self, host=None, port=None, region_client=None): - self.host = host - self.port = port + self.host = host.encode('utf8') if isinstance(host, str) else host + self.port = port.encode('utf8') if isinstance(port, str) else port self.region_client = region_client def _handle_exception(self, main_client, **kwargs): # region_client not set? Then host/port must have been. Fetch the # client given the host, port if self.region_client is None: - concat = self.host + ":" + self.port + concat = self.host + b":" + self.port self.region_client = main_client.reverse_client_cache.get( concat, None) # Let one greenlet through per region_client (returns True otherwise @@ -82,7 +84,7 @@ def _handle_exception(self, main_client, **kwargs): if self.region_client is not None: # We need to make sure that a different thread hasn't already # reestablished to this region. - loc = self.region_client.host + ":" + self.region_client.port + loc = self.region_client.host + b":" + self.region_client.port if loc in main_client.reverse_client_cache: # We're the first in and it's our job to kill the client. # Purge it. @@ -92,7 +94,7 @@ def _handle_exception(self, main_client, **kwargs): main_client._purge_client(self.region_client) # Sleep for an arbitrary amount of time. If this returns # False then we've hit our max retry threshold. Die. - key = self.region_client.host + ':' + self.region_client.port + key = self.region_client.host + b":" + self.region_client.port if not _dynamic_sleep(self, key): raise self finally: @@ -110,8 +112,8 @@ class RegionServerStoppedException(RegionServerException): class MasterServerException(PyBaseException): def __init__(self, host, port): - self.host = host - self.port = port + self.host = host.encode('utf8') if isinstance(host, str) else host + self.port = port.encode('utf8') if isinstance(port, str) else port def _handle_exception(self, main_client, **kwargs): # Let one greenlet through. Others block and eventually return False. @@ -132,10 +134,6 @@ def _handle_exception(self, main_client, **kwargs): # Master gave us funky data. Unrecoverable. class MasterMalformedResponseException(MasterServerException): - def __init__(self, host, port): - self.host = host - self.port = port - def _handle_exception(self, main_client, **kwargs): raise self.__class__(str(self)) From 09537afd4efb8e61cadb8111f14295d47d08f407 Mon Sep 17 00:00:00 2001 From: Mike Cora Date: Tue, 16 Jun 2020 16:07:27 -0700 Subject: [PATCH 24/53] maybe patch deadlock that's plaguing flavour --- pybase/region/client.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/pybase/region/client.py b/pybase/region/client.py index 87cf31a..c1064e7 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -17,6 +17,7 @@ import logging import socket +from contextlib import contextmanager from io import BytesIO from struct import pack, unpack from threading import Condition, Lock @@ -42,6 +43,14 @@ } +@contextmanager +def acquire_timeout(lock, timeout): + result = lock.acquire(timeout=timeout) + yield result + if result: + lock.release() + + # This Client is created once per RegionServer. Handles all communication # to and from this specific RegionServer. class Client(object): @@ -102,7 +111,7 @@ def __init__(self, host, port): # 4. A varint representing the length of the serialized RPC. # 5. The serialized RPC. # - def _send_request(self, rq): + def _send_request(self, rq, lock_timeout=30): with self.call_lock: my_id = self.call_id self.call_id += 1 @@ -125,10 +134,16 @@ def _send_request(self, rq): pool_id = my_id % self.pool_size try: - with self.write_lock_pool[pool_id]: - logger.debug('Sending %s RPC to %s:%s on pool port %s', - rq.type, self.host, self.port, pool_id) - self.sock_pool[pool_id].send(to_send) + # todo: quick hack to patch a deadlock happening here. Needs revisiting. + with acquire_timeout(self.write_lock_pool[pool_id], lock_timeout) as acquired: + if acquired: + logger.debug('Sending %s RPC to %s:%s on pool port %s', + rq.type, self.host, self.port, pool_id) + self.sock_pool[pool_id].send(to_send) + else: + logger.warning('Lock timeout %s RPC to %s:%s on pool port %s' % + (rq.type, self.host, self.port, pool_id)) + raise RegionServerException(region_client=self) except socket.error: # RegionServer dead? raise RegionServerException(region_client=self) From 1b8ca5cb0524ca86caecac66d08fe5d7a4fe5dce Mon Sep 17 00:00:00 2001 From: Mike Cora Date: Tue, 16 Jun 2020 16:11:59 -0700 Subject: [PATCH 25/53] increment version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1993bd4..4235244 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import find_packages, setup setup(name='pybase', - version='0.3.2', + version='0.3.3', description='Native python client to hbase 1.0+', url='https://github.com/CurleySamuel/PyBase', author='Sam Curley', From a773ed170be81258696b6df02fae3bc6135eaaab Mon Sep 17 00:00:00 2001 From: Mike Cora Date: Wed, 17 Jun 2020 08:14:47 -0700 Subject: [PATCH 26/53] more lock timeouts --- pybase/region/client.py | 73 ++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 30 deletions(-) diff --git a/pybase/region/client.py b/pybase/region/client.py index c1064e7..eea7b00 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -111,10 +111,14 @@ def __init__(self, host, port): # 4. A varint representing the length of the serialized RPC. # 5. The serialized RPC. # - def _send_request(self, rq, lock_timeout=30): - with self.call_lock: - my_id = self.call_id - self.call_id += 1 + def _send_request(self, rq, lock_timeout=10): + with acquire_timeout(self.call_lock, lock_timeout) as acquired: + if acquired: + my_id = self.call_id + self.call_id += 1 + else: + logger.warning('Lock timeout %s RPC to %s:%s' % (rq.type, self.host, self.port)) + raise RegionServerException(region_client=self) serialized_rpc = rq.pb.SerializeToString() header = RequestHeader() header.call_id = my_id @@ -141,7 +145,7 @@ def _send_request(self, rq, lock_timeout=30): rq.type, self.host, self.port, pool_id) self.sock_pool[pool_id].send(to_send) else: - logger.warning('Lock timeout %s RPC to %s:%s on pool port %s' % + logger.warning('Lock timeout sending %s RPC to %s:%s on pool port %s' % (rq.type, self.host, self.port, pool_id)) raise RegionServerException(region_client=self) except socket.error: @@ -161,7 +165,7 @@ def _send_request(self, rq, lock_timeout=30): # 4. A varint representing the length of the serialized ResponseMessage. # 5. The ResponseMessage. # - def _receive_rpc(self, call_id, rq, data=None): + def _receive_rpc(self, call_id, rq, data=None, lock_timeout=10): # If the field data is populated that means we should process from that # instead of the socket. full_data = data @@ -169,18 +173,23 @@ def _receive_rpc(self, call_id, rq, data=None): pool_id = call_id % self.pool_size # Total message length is going to be the first four bytes # (little-endian uint32) - with self.read_lock_pool[pool_id]: - try: - msg_length = self._recv_n(self.sock_pool[pool_id], 4) - if msg_length is None: - raise - msg_length = unpack(">I", msg_length)[0] - # The message is then going to be however many bytes the first four - # bytes specified. We don't want to overread or underread as that'll - # cause havoc. - full_data = self._recv_n( - self.sock_pool[pool_id], msg_length) - except socket.error: + with acquire_timeout(self.read_lock_pool[pool_id], lock_timeout) as acquired: + if acquired: + try: + msg_length = self._recv_n(self.sock_pool[pool_id], 4) + if msg_length is None: + raise + msg_length = unpack(">I", msg_length)[0] + # The message is then going to be however many bytes the first four + # bytes specified. We don't want to overread or underread as that'll + # cause havoc. + full_data = self._recv_n( + self.sock_pool[pool_id], msg_length) + except socket.error: + raise RegionServerException(region_client=self) + else: + logger.warning('Lock timeout receive %s RPC to %s:%s on pool port %s' % + (rq.type, self.host, self.port, pool_id)) raise RegionServerException(region_client=self) # Pass in the full data as well as your current position to the # decoder. It'll then return two variables: @@ -227,18 +236,22 @@ def _receive_rpc(self, call_id, rq, data=None): # 4.5 Call wait() on the conditional and get comfy. # 5. Pop your data out # 6. Release the lock - def _bad_call_id(self, my_id, my_request, msg_id, data): - with self.missed_rpcs_lock: - logger.debug( - "Received invalid RPC ID. Got: %s, Expected: %s.", msg_id, my_id) - self.missed_rpcs[msg_id] = data - self.missed_rpcs_condition.notifyAll() - while my_id not in self.missed_rpcs: - if self.shutting_down: - raise RegionServerException(region_client=self) - self.missed_rpcs_condition.wait() - new_data = self.missed_rpcs.pop(my_id) - logger.debug("Another thread found my RPC! RPC ID: %s", my_id) + def _bad_call_id(self, my_id, my_request, msg_id, data, lock_timeout=10): + with acquire_timeout(self.missed_rpcs_lock, lock_timeout) as acquired: + if acquired: + logger.debug( + "Received invalid RPC ID. Got: %s, Expected: %s.", msg_id, my_id) + self.missed_rpcs[msg_id] = data + self.missed_rpcs_condition.notifyAll() + while my_id not in self.missed_rpcs: + if self.shutting_down: + raise RegionServerException(region_client=self) + self.missed_rpcs_condition.wait() + new_data = self.missed_rpcs.pop(my_id) + logger.debug("Another thread found my RPC! RPC ID: %s", my_id) + else: + logger.warning('Lock timeout bad_call to %s:%s' % (self.host, self.port)) + raise RegionServerException(region_client=self) return self._receive_rpc(my_id, my_request, data=new_data) # Receives exactly n bytes from the socket. Will block until n bytes are From 42a48d70f3089eca7bb511deeb76f1f5398e6a9a Mon Sep 17 00:00:00 2001 From: Mike Cora Date: Mon, 16 Nov 2020 18:56:05 -0800 Subject: [PATCH 27/53] this got stuck too --- pybase/region/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybase/region/client.py b/pybase/region/client.py index eea7b00..23c5637 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -246,7 +246,7 @@ def _bad_call_id(self, my_id, my_request, msg_id, data, lock_timeout=10): while my_id not in self.missed_rpcs: if self.shutting_down: raise RegionServerException(region_client=self) - self.missed_rpcs_condition.wait() + self.missed_rpcs_condition.wait(lock_timeout) new_data = self.missed_rpcs.pop(my_id) logger.debug("Another thread found my RPC! RPC ID: %s", my_id) else: From c8ee9c7d9cc0181fe6d6b615432e274c4455e80b Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Thu, 19 Aug 2021 17:26:48 -0300 Subject: [PATCH 28/53] Fix reference to undeclared var --- pybase/client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pybase/client.py b/pybase/client.py index ac0ae4e..c39bc95 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -339,6 +339,7 @@ def _create_new_region(self, response, table): # table doesn't exist! if len(cells) == 0: raise NoSuchTableException("Table does not exist.") + server_loc = None # We get ~4 cells back each holding different information. We only care # about two of them. for cell in cells: @@ -354,7 +355,7 @@ def _create_new_region(self, response, table): else: continue # Do we have an existing client for this region server already? - if server_loc in self.reverse_client_cache: + if server_loc and server_loc in self.reverse_client_cache: # If so, grab it! new_region.region_client = self.reverse_client_cache[server_loc] else: From eacdbabdb6d1aa79c4baf886da74fcccfa01347e Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Fri, 20 Aug 2021 17:01:10 -0300 Subject: [PATCH 29/53] Think I got everything --- pybase/client.py | 18 +++++++++++------- pybase/exceptions.py | 27 +++++++++++++++++++++++++-- pybase/region/client.py | 16 ++++++++++------ 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index c39bc95..976c8bc 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -35,7 +35,7 @@ class MainClient(object): - def __init__(self, zkquorum, pool_size): + def __init__(self, zkquorum, pool_size, secondary=False): # Location of the ZooKeeper quorum (csv) self.zkquorum = zkquorum # Connection pool size per region server (and master!) @@ -53,6 +53,10 @@ def __init__(self, zkquorum, pool_size): # Mutex used so only one thread can request meta information from # the master at a time. self._master_lookup_lock = Lock() + # Capture if this client is being used for secondary operations + # We don't really care if it fails, best effort only. + self.secondary = secondary + """ HERE LAY CACHE OPERATIONS @@ -326,9 +330,9 @@ def _discover_region(self, table, key): except (AttributeError, RegionServerException, RegionException): if self.master_client is None: # I don't know why this can happen but it does. - raise MasterServerException(None, None) + raise MasterServerException(None, None, secondary=self.secondary) raise MasterServerException( - self.master_client.host, self.master_client.port) + self.master_client.host, self.master_client.port, secondary=self.secondary) # Master gave us a response. We need to run and parse the response, # then do all necessary work for entering it into our structures. return self._create_new_region(response, table) @@ -360,11 +364,11 @@ def _create_new_region(self, response, table): new_region.region_client = self.reverse_client_cache[server_loc] else: # Otherwise we need to create a new region client instance. - new_client = region.NewClient(host, port, self.pool_size) + new_client = region.NewClient(host, port, self.pool_size, secondary=self.secondary) if new_client is None: # Welp. We can't connect to the server that the Master # supplied. Raise an exception. - raise RegionServerException(host=host, port=port) + raise RegionServerException(host=host, port=port, secondary=self.secondary) logger.info("Created new Client for RegionServer %s", server_loc) # Add it to the host,port -> instance of region client map. self.reverse_client_cache[server_loc] = new_client @@ -385,11 +389,11 @@ def _recreate_master_client(self): try: # Try creating a new client instance and setting it as the new # master_client. - self.master_client = region.NewClient(ip, port, self.pool_size) + self.master_client = region.NewClient(ip, port, self.pool_size, secondary=self.secondary) except RegionServerException: # We can't connect to the address that ZK supplied. Raise an # exception. - raise MasterServerException(ip, port) + raise MasterServerException(ip, port, secondary=self.secondary) """ HERE LAY THE MISCELLANEOUS diff --git a/pybase/exceptions.py b/pybase/exceptions.py index 5108d88..30169b9 100644 --- a/pybase/exceptions.py +++ b/pybase/exceptions.py @@ -65,10 +65,11 @@ class ZookeeperResponseException(ZookeeperException): # Means an RS is dead or unreachable. class RegionServerException(PyBaseException): - def __init__(self, host=None, port=None, region_client=None): + def __init__(self, host=None, port=None, region_client=None, secondary=False): self.host = host.encode('utf8') if isinstance(host, str) else host self.port = port.encode('utf8') if isinstance(port, str) else port self.region_client = region_client + self.secondary = secondary def _handle_exception(self, main_client, **kwargs): # region_client not set? Then host/port must have been. Fetch the @@ -77,6 +78,11 @@ def _handle_exception(self, main_client, **kwargs): concat = self.host + b":" + self.port self.region_client = main_client.reverse_client_cache.get( concat, None) + + # we don't care about secondaries, move on + if (self.region_client and self.region_client.secondary) or self.secondary: + _let_all_through(self, self.region_client) + # Let one greenlet through per region_client (returns True otherwise # blocks and eventually returns False) if _let_one_through(self, self.region_client): @@ -111,11 +117,16 @@ class RegionServerStoppedException(RegionServerException): # All Master exceptions inherit from me class MasterServerException(PyBaseException): - def __init__(self, host, port): + def __init__(self, host, port, secondary=False): self.host = host.encode('utf8') if isinstance(host, str) else host self.port = port.encode('utf8') if isinstance(port, str) else port + self.secondary = secondary def _handle_exception(self, main_client, **kwargs): + # we don't care about secondaries, move on + if self.secondary: + _let_all_through(self, None) + # Let one greenlet through. Others block and eventually return False. if _let_one_through(self, None): try: @@ -141,9 +152,17 @@ def _handle_exception(self, main_client, **kwargs): # All region exceptions inherit from me. class RegionException(PyBaseException): + def __init__(self, region_client=None): + self.region_client = region_client + def _handle_exception(self, main_client, **kwargs): if "dest_region" in kwargs: rg_n = kwargs["dest_region"].region_name + + # we don't care about secondaries, move on + if self.region_client and self.region_client.secondary: + _let_all_through(self, rg_n) + if _let_one_through(self, rg_n): try: main_client._purge_region(kwargs["dest_region"]) @@ -168,6 +187,10 @@ class NotServingRegionException(RegionException): class RegionOpeningException(RegionException): def _handle_exception(self, main_client, **kwargs): + # we don't care about secondaries, move on + if self.region_client and self.region_client.secondary: + raise self + if "dest_region" in kwargs: rg_n = kwargs["dest_region"].region_name # There's nothing to handle here. We just need to give the region diff --git a/pybase/region/client.py b/pybase/region/client.py index 23c5637..1bb8f36 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -61,7 +61,7 @@ class Client(object): # - call_id: A monotonically increasing int used as a sequence number for rpcs. This way # we can match incoming responses with the rpc that made the request. - def __init__(self, host, port): + def __init__(self, host, port, secondary): self.host = host self.port = port self.pool_size = 0 @@ -100,6 +100,10 @@ def __init__(self, host, port): # amount of meta lookups). self.regions = [] + # Capture if this client is being used for secondary operations + # We don't really care if it fails, best effort only. + self.secondary = secondary + # Sends an RPC over the wire then calls _receive_rpc and returns the # response RPC. # @@ -211,14 +215,14 @@ def _receive_rpc(self, call_id, rq, data=None, lock_timeout=10): "java.io.IOException"}: raise NoSuchColumnFamilyException() elif exception_class == 'org.apache.hadoop.hbase.exceptions.RegionMovedException': - raise RegionMovedException() + raise RegionMovedException(region_client=self) elif exception_class == 'org.apache.hadoop.hbase.NotServingRegionException': - raise NotServingRegionException() + raise NotServingRegionException(region_client=self) elif exception_class == \ 'org.apache.hadoop.hbase.regionserver.RegionServerStoppedException': raise RegionServerException(region_client=self) elif exception_class == 'org.apache.hadoop.hbase.exceptions.RegionOpeningException': - raise RegionOpeningException() + raise RegionOpeningException(region_client=self) else: raise PyBaseException( exception_class + ". Remote traceback:\n%s" % header.exception.stack_trace) @@ -282,8 +286,8 @@ def close(self): # Creates a new RegionServer client. Creates the socket, initializes the # connection and returns an instance of Client. -def NewClient(host, port, pool_size): - c = Client(host, port) +def NewClient(host, port, pool_size, secondary=False): + c = Client(host, port, secondary) try: c.pool_size = pool_size for x in range(pool_size): From 70706b33da2a0fd104a16ddbc83832604a1e3d47 Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Mon, 23 Aug 2021 16:28:53 -0300 Subject: [PATCH 30/53] Missed one --- pybase/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 976c8bc..d8f144e 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -495,9 +495,9 @@ def _append_response(self, rsp): # location of ZooKeeper this function will ask ZK for the location of the # meta table and create the region client responsible for future meta # lookups (masterclient). Returns an instance of MainClient -def NewClient(zkquorum, socket_pool_size=1): +def NewClient(zkquorum, socket_pool_size=1, secondary=False): # Create the main client. - a = MainClient(zkquorum, socket_pool_size) + a = MainClient(zkquorum, socket_pool_size, secondary) # Create the master client. a._recreate_master_client() return a From 2508c89ab1fb93a7f3f4e91515d50d3068e37ad9 Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Mon, 23 Aug 2021 16:29:48 -0300 Subject: [PATCH 31/53] Remove optional main client param --- pybase/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybase/client.py b/pybase/client.py index d8f144e..8a07147 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -35,7 +35,7 @@ class MainClient(object): - def __init__(self, zkquorum, pool_size, secondary=False): + def __init__(self, zkquorum, pool_size, secondary): # Location of the ZooKeeper quorum (csv) self.zkquorum = zkquorum # Connection pool size per region server (and master!) From 448d388bd8f1e6e8e00e861e53b372ed2f75a0a9 Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Wed, 1 Sep 2021 14:15:36 -0300 Subject: [PATCH 32/53] Fix locking --- pybase/region/client.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pybase/region/client.py b/pybase/region/client.py index 1bb8f36..941ae65 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -46,9 +46,11 @@ @contextmanager def acquire_timeout(lock, timeout): result = lock.acquire(timeout=timeout) - yield result - if result: - lock.release() + try: + yield result + finally: + if result: + lock.release() # This Client is created once per RegionServer. Handles all communication From 2a1f3f9f0833440126cfc90345ea0ce1bad0831c Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Sat, 16 Oct 2021 11:11:09 -0300 Subject: [PATCH 33/53] Add zk watch for master connection --- pybase/client.py | 19 +++++------- pybase/zk/client.py | 76 +++++++++++++++++++++------------------------ 2 files changed, 43 insertions(+), 52 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 8a07147..f95cdbf 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -57,6 +57,9 @@ def __init__(self, zkquorum, pool_size, secondary): # We don't really care if it fails, best effort only. self.secondary = secondary + self.zk_client = zk.connect(zkquorum) + ip, port = zk.get_master_info(self.zk_client, self.update_master_client) + self.update_master_client(ip, port) """ HERE LAY CACHE OPERATIONS @@ -380,19 +383,15 @@ def _create_new_region(self, response, table): logger.info("Successfully discovered new region %s", new_region) return new_region - def _recreate_master_client(self): - if self.master_client is not None: - # yep, still no idea why self.master_client can be set to None. + def update_master_client(self, ip, port): + if self.master_client: self.master_client.close() - # Ask ZooKeeper for the location of the Master. - ip, port = zk.LocateMaster(self.zkquorum) + try: - # Try creating a new client instance and setting it as the new - # master_client. + # Try creating a new client instance and setting it as the new master_client. self.master_client = region.NewClient(ip, port, self.pool_size, secondary=self.secondary) + logger.info("Updated master client to %s:%s", ip, port) except RegionServerException: - # We can't connect to the address that ZK supplied. Raise an - # exception. raise MasterServerException(ip, port, secondary=self.secondary) """ @@ -498,6 +497,4 @@ def _append_response(self, rsp): def NewClient(zkquorum, socket_pool_size=1, secondary=False): # Create the main client. a = MainClient(zkquorum, socket_pool_size, secondary) - # Create the master client. - a._recreate_master_client() return a diff --git a/pybase/zk/client.py b/pybase/zk/client.py index af6742d..2478635 100644 --- a/pybase/zk/client.py +++ b/pybase/zk/client.py @@ -32,63 +32,57 @@ znode = "/hbase" -# LocateMeta takes a string representing the location of the ZooKeeper -# quorum. It then asks ZK for the location of the MetaRegionServer, -# returning a tuple containing (host_name, port). -def LocateMaster(zkquorum, establish_connection_timeout=5, missing_znode_retries=5, zk=None): - - if zk is None: - # Using Kazoo for interfacing with ZK - zk = KazooClient(hosts=zkquorum) +def connect(zkquorum, establish_connection_timeout=5): + zk = KazooClient(hosts=zkquorum) + try: zk.start(timeout=establish_connection_timeout) except KazooTimeoutError: - raise ZookeeperConnectionException( - "Cannot connect to ZooKeeper at {}".format(zkquorum)) - # MetaRegionServer information is located at /hbase/meta-region-server - try: - rsp, znodestat = zk.get(znode + "/meta-region-server") - except NoNodeError: - if missing_znode_retries == 0: - raise ZookeeperZNodeException( - "ZooKeeper does not contain meta-region-server node.") - logger.warn("ZooKeeper does not contain meta-region-server node. Retrying in 2 seconds. " - "(%s retries remaining)", missing_znode_retries) - sleep(2.0) - return LocateMaster(zkquorum, establish_connection_timeout=establish_connection_timeout, - missing_znode_retries=missing_znode_retries - 1, zk=zk) - # We don't need to maintain a connection to ZK. If we need it again we'll - # recreate the connection. A possible future implementation can subscribe - # to ZK and listen for when RegionServers go down, then pre-emptively - # reestablish those regions instead of waiting for a failed rpc to come - # back. Only issue is that if too many clients subscribe ZK may become - # overloaded. - zk.stop() - if len(rsp) == 0: + raise ZookeeperConnectionException("Cannot connect to ZooKeeper at {}".format(zkquorum)) + + return zk + + +def _parse_master_info(resp): + if len(resp) == 0: # Empty response is bad. - raise ZookeeperResponseException( - "ZooKeeper returned an empty response") + raise ZookeeperResponseException("ZooKeeper returned an empty response") # The first byte must be \xff and the next four bytes are a little-endian # uint32 containing the length of the meta. - first_byte, meta_length = unpack(">cI", rsp[:5]) + first_byte, meta_length = unpack(">cI", resp[:5]) if first_byte != b'\xff': # Malformed response - raise ZookeeperResponseException( - "ZooKeeper returned an invalid response") + raise ZookeeperResponseException("ZooKeeper returned an invalid response") if meta_length < 1 or meta_length > 65000: # Is this really an error? - raise ZookeeperResponseException( - "ZooKeeper returned too much meta information") + raise ZookeeperResponseException("ZooKeeper returned too much meta information") # ZNode data in HBase are serialized protobufs with a four byte magic # 'PBUF' prefix. - magic = unpack(">I", rsp[meta_length + 5:meta_length + 9])[0] + magic = unpack(">I", resp[meta_length + 5:meta_length + 9])[0] if magic != 1346524486: # 4 bytes: PBUF raise ZookeeperResponseException("ZooKeeper returned an invalid response (are you running " "a version of HBase supporting Protobufs?)") - rsp = rsp[meta_length + 9:] + rsp = resp[meta_length + 9:] meta = MetaRegionServer() meta.ParseFromString(rsp) - logger.info('Discovered Master at %s:%s', - meta.server.host_name, meta.server.port) + logger.info('Discovered Master at %s:%s', meta.server.host_name, meta.server.port) return meta.server.host_name, meta.server.port + + +def get_master_info(zk, watch_fn, missing_znode_retries=5): + def _wrapped_watch(resp, stat): + watch_fn(*_parse_master_info(resp)) + + try: + resp, _ = zk.get(znode + "/meta-region-server", watch=_wrapped_watch) + + return _parse_master_info(resp) + except NoNodeError: + if missing_znode_retries == 0: + raise ZookeeperZNodeException( + "ZooKeeper does not contain meta-region-server node.") + logger.warn("ZooKeeper does not contain meta-region-server node. Retrying in 2 seconds. " + "(%s retries remaining)", missing_znode_retries) + sleep(2.0) + return get_master_info(zk, missing_znode_retries - 1) From 36a6d63ac7f0bd5262fe5daa4dbfd5a0cef13d88 Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Sat, 16 Oct 2021 11:30:37 -0300 Subject: [PATCH 34/53] Switch to using datawatch recipe for watch re-registry --- pybase/client.py | 8 ++++++-- pybase/zk/client.py | 13 +++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index f95cdbf..7b0ccbe 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -58,8 +58,12 @@ def __init__(self, zkquorum, pool_size, secondary): self.secondary = secondary self.zk_client = zk.connect(zkquorum) - ip, port = zk.get_master_info(self.zk_client, self.update_master_client) - self.update_master_client(ip, port) + self.update_master_client(*zk.get_master_info(self.zk_client)) + + # register a callback handler when master znode data changes + @self.zk_client.DataWatch(zk.master_znode) + def _update_master_info(data, stat): + self.update_master_client(*zk.parse_master_info(data)) """ HERE LAY CACHE OPERATIONS diff --git a/pybase/zk/client.py b/pybase/zk/client.py index 2478635..ccc7ba0 100644 --- a/pybase/zk/client.py +++ b/pybase/zk/client.py @@ -30,6 +30,7 @@ logger = logging.getLogger(__name__) znode = "/hbase" +master_znode = znode + "/meta-region-server" def connect(zkquorum, establish_connection_timeout=5): @@ -43,7 +44,7 @@ def connect(zkquorum, establish_connection_timeout=5): return zk -def _parse_master_info(resp): +def parse_master_info(resp): if len(resp) == 0: # Empty response is bad. raise ZookeeperResponseException("ZooKeeper returned an empty response") @@ -70,14 +71,10 @@ def _parse_master_info(resp): return meta.server.host_name, meta.server.port -def get_master_info(zk, watch_fn, missing_znode_retries=5): - def _wrapped_watch(resp, stat): - watch_fn(*_parse_master_info(resp)) - +def get_master_info(zk, missing_znode_retries=5): try: - resp, _ = zk.get(znode + "/meta-region-server", watch=_wrapped_watch) - - return _parse_master_info(resp) + resp, _ = zk.get(master_znode) + return parse_master_info(resp) except NoNodeError: if missing_znode_retries == 0: raise ZookeeperZNodeException( From b22d0344487227b302378c9b4dc45ee063de966b Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Tue, 19 Oct 2021 08:45:42 -0300 Subject: [PATCH 35/53] Handle delete event & add conditional wait --- pybase/client.py | 15 ++++++++++----- pybase/zk/client.py | 3 ++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 7b0ccbe..f874ce9 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -18,14 +18,14 @@ import logging from builtins import str from itertools import chain -from threading import Lock +from threading import Condition, Lock import pybase.region.client as region import pybase.zk.client as zk from intervaltree import IntervalTree from .exceptions import (MasterServerException, NoSuchTableException, - PyBaseException, RegionException, RegionServerException) + PyBaseException, RegionException, RegionServerException, ZookeeperException) from .filters import _to_filter from .region.region import region_from_cell from .request import request @@ -58,12 +58,17 @@ def __init__(self, zkquorum, pool_size, secondary): self.secondary = secondary self.zk_client = zk.connect(zkquorum) - self.update_master_client(*zk.get_master_info(self.zk_client)) # register a callback handler when master znode data changes @self.zk_client.DataWatch(zk.master_znode) def _update_master_info(data, stat): - self.update_master_client(*zk.parse_master_info(data)) + if data: + self.update_master_client(*zk.parse_master_info(data)) + + wait_for_master = Condition() + wait_for_master.acquire() + if wait_for_master.wait_for(lambda: self.master_client is not None, 10.0): + raise ZookeeperException("Timed out waiting for master server watch to fire") """ HERE LAY CACHE OPERATIONS @@ -501,4 +506,4 @@ def _append_response(self, rsp): def NewClient(zkquorum, socket_pool_size=1, secondary=False): # Create the main client. a = MainClient(zkquorum, socket_pool_size, secondary) - return a + return a \ No newline at end of file diff --git a/pybase/zk/client.py b/pybase/zk/client.py index ccc7ba0..dbb2f6e 100644 --- a/pybase/zk/client.py +++ b/pybase/zk/client.py @@ -67,7 +67,8 @@ def parse_master_info(resp): rsp = resp[meta_length + 9:] meta = MetaRegionServer() meta.ParseFromString(rsp) - logger.info('Discovered Master at %s:%s', meta.server.host_name, meta.server.port) + print('Discovered Master at %s:%s' % (meta.server.host_name, meta.server.port)) + #logger.info('Discovered Master at %s:%s', meta.server.host_name, meta.server.port) return meta.server.host_name, meta.server.port From 9df2d0238a1e59810886c606ff0f1528492fb789 Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Tue, 19 Oct 2021 08:56:23 -0300 Subject: [PATCH 36/53] PR feedback --- pybase/client.py | 3 ++- pybase/zk/client.py | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index f874ce9..11e3052 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -63,7 +63,8 @@ def __init__(self, zkquorum, pool_size, secondary): @self.zk_client.DataWatch(zk.master_znode) def _update_master_info(data, stat): if data: - self.update_master_client(*zk.parse_master_info(data)) + with self._master_lookup_lock: + self.update_master_client(*zk.parse_master_info(data)) wait_for_master = Condition() wait_for_master.acquire() diff --git a/pybase/zk/client.py b/pybase/zk/client.py index dbb2f6e..31a0296 100644 --- a/pybase/zk/client.py +++ b/pybase/zk/client.py @@ -67,8 +67,7 @@ def parse_master_info(resp): rsp = resp[meta_length + 9:] meta = MetaRegionServer() meta.ParseFromString(rsp) - print('Discovered Master at %s:%s' % (meta.server.host_name, meta.server.port)) - #logger.info('Discovered Master at %s:%s', meta.server.host_name, meta.server.port) + logger.info('Discovered Master at %s:%s', meta.server.host_name, meta.server.port) return meta.server.host_name, meta.server.port @@ -80,7 +79,7 @@ def get_master_info(zk, missing_znode_retries=5): if missing_znode_retries == 0: raise ZookeeperZNodeException( "ZooKeeper does not contain meta-region-server node.") - logger.warn("ZooKeeper does not contain meta-region-server node. Retrying in 2 seconds. " - "(%s retries remaining)", missing_znode_retries) + logger.warning("ZooKeeper does not contain meta-region-server node. Retrying in 2 seconds. " + "(%s retries remaining)", missing_znode_retries) sleep(2.0) return get_master_info(zk, missing_znode_retries - 1) From a37a25f674b6def7911f1290f54a631cbe3149bd Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Tue, 19 Oct 2021 13:28:53 -0300 Subject: [PATCH 37/53] Make py2 compatible plus remove some dead code --- pybase/client.py | 23 +++++++++++++++++------ pybase/zk/client.py | 16 +--------------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 11e3052..c7e9a56 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -59,16 +59,29 @@ def __init__(self, zkquorum, pool_size, secondary): self.zk_client = zk.connect(zkquorum) + wait_for_master = Condition() + wait_for_master.acquire() + # register a callback handler when master znode data changes @self.zk_client.DataWatch(zk.master_znode) def _update_master_info(data, stat): + initial = self.master_client is None if data: with self._master_lookup_lock: self.update_master_client(*zk.parse_master_info(data)) - wait_for_master = Condition() - wait_for_master.acquire() - if wait_for_master.wait_for(lambda: self.master_client is not None, 10.0): + if initial: + wait_for_master.acquire() + wait_for_master.notify_all() + wait_for_master.release() + + wait_time = 0.0 + # wait up to 10s + while self.master_client is None and wait_time < 10.0: + wait_for_master.wait(1.0) + wait_time += 1.0 + + if self.master_client is None: raise ZookeeperException("Timed out waiting for master server watch to fire") """ @@ -505,6 +518,4 @@ def _append_response(self, rsp): # meta table and create the region client responsible for future meta # lookups (masterclient). Returns an instance of MainClient def NewClient(zkquorum, socket_pool_size=1, secondary=False): - # Create the main client. - a = MainClient(zkquorum, socket_pool_size, secondary) - return a \ No newline at end of file + return MainClient(zkquorum, socket_pool_size, secondary) \ No newline at end of file diff --git a/pybase/zk/client.py b/pybase/zk/client.py index 31a0296..7b2c347 100644 --- a/pybase/zk/client.py +++ b/pybase/zk/client.py @@ -68,18 +68,4 @@ def parse_master_info(resp): meta = MetaRegionServer() meta.ParseFromString(rsp) logger.info('Discovered Master at %s:%s', meta.server.host_name, meta.server.port) - return meta.server.host_name, meta.server.port - - -def get_master_info(zk, missing_znode_retries=5): - try: - resp, _ = zk.get(master_znode) - return parse_master_info(resp) - except NoNodeError: - if missing_znode_retries == 0: - raise ZookeeperZNodeException( - "ZooKeeper does not contain meta-region-server node.") - logger.warning("ZooKeeper does not contain meta-region-server node. Retrying in 2 seconds. " - "(%s retries remaining)", missing_znode_retries) - sleep(2.0) - return get_master_info(zk, missing_znode_retries - 1) + return meta.server.host_name, meta.server.port \ No newline at end of file From 9cdd49f578fa1be7b3fef390ee36bf1478f03c08 Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Tue, 16 Aug 2022 15:58:55 -0300 Subject: [PATCH 38/53] Fix bytes string host --- pybase/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pybase/client.py b/pybase/client.py index c7e9a56..4ab6a8a 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -382,6 +382,7 @@ def _create_new_region(self, response, table): # hosted on. server_loc = cell.value host, port = cell.value.split(b':') + host = host.decode("utf-8") else: continue # Do we have an existing client for this region server already? From 643371a2bc43d462e00c99bc9f72748b2f8020bb Mon Sep 17 00:00:00 2001 From: Charlotte Iwasaki Date: Thu, 6 Oct 2022 11:45:02 -0700 Subject: [PATCH 39/53] Use native str for region_client exceptions (#13) * use native str for region_client exceptions * minor cleanup * try enforcing unicode elsewhere * test log * use unicode host * remove test log, fix last flake8 warning --- pybase/client.py | 23 ++++++++++++++--------- pybase/exceptions.py | 25 +++++++++++++++---------- pybase/region/client.py | 21 ++++++++++----------- 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 4ab6a8a..3dfa029 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -24,8 +24,14 @@ import pybase.zk.client as zk from intervaltree import IntervalTree -from .exceptions import (MasterServerException, NoSuchTableException, - PyBaseException, RegionException, RegionServerException, ZookeeperException) +from .exceptions import ( + MasterServerException, + NoSuchTableException, + PyBaseException, + RegionException, + RegionServerException, + ZookeeperException +) from .filters import _to_filter from .region.region import region_from_cell from .request import request @@ -59,7 +65,7 @@ def __init__(self, zkquorum, pool_size, secondary): self.zk_client = zk.connect(zkquorum) - wait_for_master = Condition() + wait_for_master = Condition() wait_for_master.acquire() # register a callback handler when master znode data changes @@ -82,7 +88,7 @@ def _update_master_info(data, stat): wait_time += 1.0 if self.master_client is None: - raise ZookeeperException("Timed out waiting for master server watch to fire") + raise ZookeeperException("Timed out waiting for master server watch to fire") """ HERE LAY CACHE OPERATIONS @@ -339,8 +345,7 @@ def _find_hosting_region(self, table, key): dest_region = self._get_from_region_cache(table, key) if dest_region is None: # Nope, still not in the cache. - logger.debug( - 'Region cache miss! Table: %s, Key: %s', table, key) + logger.debug('Region cache miss! Table: %s, Key: %s', table, key) # Ask master for region information. dest_region = self._discover_region(table, key) return dest_region @@ -382,7 +387,6 @@ def _create_new_region(self, response, table): # hosted on. server_loc = cell.value host, port = cell.value.split(b':') - host = host.decode("utf-8") else: continue # Do we have an existing client for this region server already? @@ -413,7 +417,8 @@ def update_master_client(self, ip, port): try: # Try creating a new client instance and setting it as the new master_client. - self.master_client = region.NewClient(ip, port, self.pool_size, secondary=self.secondary) + self.master_client = region.NewClient( + ip, port, self.pool_size, secondary=self.secondary) logger.info("Updated master client to %s:%s", ip, port) except RegionServerException: raise MasterServerException(ip, port, secondary=self.secondary) @@ -519,4 +524,4 @@ def _append_response(self, rsp): # meta table and create the region client responsible for future meta # lookups (masterclient). Returns an instance of MainClient def NewClient(zkquorum, socket_pool_size=1, secondary=False): - return MainClient(zkquorum, socket_pool_size, secondary) \ No newline at end of file + return MainClient(zkquorum, socket_pool_size, secondary) diff --git a/pybase/exceptions.py b/pybase/exceptions.py index 30169b9..43d0188 100644 --- a/pybase/exceptions.py +++ b/pybase/exceptions.py @@ -79,7 +79,7 @@ def _handle_exception(self, main_client, **kwargs): self.region_client = main_client.reverse_client_cache.get( concat, None) - # we don't care about secondaries, move on + # we don't care about secondaries, move on if (self.region_client and self.region_client.secondary) or self.secondary: _let_all_through(self, self.region_client) @@ -90,17 +90,19 @@ def _handle_exception(self, main_client, **kwargs): if self.region_client is not None: # We need to make sure that a different thread hasn't already # reestablished to this region. - loc = self.region_client.host + b":" + self.region_client.port + loc = self.region_client.host + ":" + self.region_client.port if loc in main_client.reverse_client_cache: # We're the first in and it's our job to kill the client. # Purge it. - logger.warn("Region server %s:%s refusing connections. Purging cache, " - "sleeping, retrying.", - self.region_client.host, self.region_client.port) + logger.warning( + "Region server %s:%s refusing connections. " + "Purging cache, sleeping, retrying.", + self.region_client.host, self.region_client.port + ) main_client._purge_client(self.region_client) # Sleep for an arbitrary amount of time. If this returns # False then we've hit our max retry threshold. Die. - key = self.region_client.host + b":" + self.region_client.port + key = self.region_client.host + ":" + self.region_client.port if not _dynamic_sleep(self, key): raise self finally: @@ -132,10 +134,12 @@ def _handle_exception(self, main_client, **kwargs): try: # Makes sure someone else hasn't already fixed the issue. if main_client.master_client is None or \ - (self.host == main_client.master_client.host and - self.port == main_client.master_client.port): - logger.warn("Encountered an exception with the Master server. " - "Sleeping then reestablishing.") + (self.host == main_client.master_client.host + and self.port == main_client.master_client.port): + logger.warning( + "Encountered an exception with the Master server. " + "Sleeping then reestablishing." + ) if not _dynamic_sleep(self, None): raise self main_client._recreate_master_client() @@ -298,6 +302,7 @@ def sleep_formula(x): # [0.0, 0.44, 1.77, 4.0, 7.11, 11.11, 16.0, 21.77, 28.44, 36.0] return (x / 1.5)**2 + _exception_count = defaultdict(lambda: (0, time())) _max_retries = 7 _max_sleep = sleep_formula(_max_retries) diff --git a/pybase/region/client.py b/pybase/region/client.py index 941ae65..bfad8e4 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -64,8 +64,8 @@ class Client(object): # we can match incoming responses with the rpc that made the request. def __init__(self, host, port, secondary): - self.host = host - self.port = port + self.host = host.decode('utf8') if isinstance(host, bytes) else host + self.port = port.decode('utf8') if isinstance(port, bytes) else port self.pool_size = 0 # We support connection pools so have lists of sockets and read/write # mutexes on them. @@ -123,7 +123,7 @@ def _send_request(self, rq, lock_timeout=10): my_id = self.call_id self.call_id += 1 else: - logger.warning('Lock timeout %s RPC to %s:%s' % (rq.type, self.host, self.port)) + logger.warning('Lock timeout %s RPC to %s:%s', rq.type, self.host, self.port) raise RegionServerException(region_client=self) serialized_rpc = rq.pb.SerializeToString() header = RequestHeader() @@ -151,8 +151,8 @@ def _send_request(self, rq, lock_timeout=10): rq.type, self.host, self.port, pool_id) self.sock_pool[pool_id].send(to_send) else: - logger.warning('Lock timeout sending %s RPC to %s:%s on pool port %s' % - (rq.type, self.host, self.port, pool_id)) + logger.warning('Lock timeout sending %s RPC to %s:%s on pool port %s', + rq.type, self.host, self.port, pool_id) raise RegionServerException(region_client=self) except socket.error: # RegionServer dead? @@ -194,8 +194,8 @@ def _receive_rpc(self, call_id, rq, data=None, lock_timeout=10): except socket.error: raise RegionServerException(region_client=self) else: - logger.warning('Lock timeout receive %s RPC to %s:%s on pool port %s' % - (rq.type, self.host, self.port, pool_id)) + logger.warning('Lock timeout receive %s RPC to %s:%s on pool port %s', + rq.type, self.host, self.port, pool_id) raise RegionServerException(region_client=self) # Pass in the full data as well as your current position to the # decoder. It'll then return two variables: @@ -245,8 +245,7 @@ def _receive_rpc(self, call_id, rq, data=None, lock_timeout=10): def _bad_call_id(self, my_id, my_request, msg_id, data, lock_timeout=10): with acquire_timeout(self.missed_rpcs_lock, lock_timeout) as acquired: if acquired: - logger.debug( - "Received invalid RPC ID. Got: %s, Expected: %s.", msg_id, my_id) + logger.debug("Received invalid RPC ID. Got: %s, Expected: %s.", msg_id, my_id) self.missed_rpcs[msg_id] = data self.missed_rpcs_condition.notifyAll() while my_id not in self.missed_rpcs: @@ -256,7 +255,7 @@ def _bad_call_id(self, my_id, my_request, msg_id, data, lock_timeout=10): new_data = self.missed_rpcs.pop(my_id) logger.debug("Another thread found my RPC! RPC ID: %s", my_id) else: - logger.warning('Lock timeout bad_call to %s:%s' % (self.host, self.port)) + logger.warning('Lock timeout bad_call to %s:%s', self.host, self.port) raise RegionServerException(region_client=self) return self._receive_rpc(my_id, my_request, data=new_data) @@ -294,7 +293,7 @@ def NewClient(host, port, pool_size, secondary=False): c.pool_size = pool_size for x in range(pool_size): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.connect((host, int(port))) + s.connect((c.host, int(port))) _send_hello(s) s.settimeout(2) c.sock_pool.append(s) From 8f39ac025f0efa466820ac296f9774ecd64b9afd Mon Sep 17 00:00:00 2001 From: Zachary Pitts Date: Fri, 19 May 2023 16:23:33 -0700 Subject: [PATCH 40/53] using a ThreadPoolExecutor with matching number of works to socket pool size --- pybase/region/client.py | 100 ++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 66 deletions(-) diff --git a/pybase/region/client.py b/pybase/region/client.py index bfad8e4..cb1651c 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -17,10 +17,11 @@ import logging import socket +from concurrent.futures import ThreadPoolExecutor from contextlib import contextmanager from io import BytesIO from struct import pack, unpack -from threading import Condition, Lock +from threading import current_thread, Condition, Lock from ..exceptions import (NoSuchColumnFamilyException, NotServingRegionException, PyBaseException, RegionMovedException, RegionOpeningException, RegionServerException) @@ -67,11 +68,10 @@ def __init__(self, host, port, secondary): self.host = host.decode('utf8') if isinstance(host, bytes) else host self.port = port.decode('utf8') if isinstance(port, bytes) else port self.pool_size = 0 - # We support connection pools so have lists of sockets and read/write - # mutexes on them. + + self.thread_pool = None self.sock_pool = [] - self.write_lock_pool = [] - self.read_lock_pool = [] + # Why yes, we do have a mutex protecting a single variable. self.call_lock = Lock() self.call_id = 0 @@ -142,7 +142,6 @@ def _send_request(self, rq, lock_timeout=10): to_send = pack(">IB", total_length - 4, len(serialized_header)) to_send += serialized_header + rpc_length_bytes + serialized_rpc - pool_id = my_id % self.pool_size try: # todo: quick hack to patch a deadlock happening here. Needs revisiting. with acquire_timeout(self.write_lock_pool[pool_id], lock_timeout) as acquired: @@ -158,10 +157,10 @@ def _send_request(self, rq, lock_timeout=10): # RegionServer dead? raise RegionServerException(region_client=self) # Message is sent! Now go listen for the results. - return self._receive_rpc(my_id, rq) + future = self.thread_pool.submit(Client.send_and_receive_rpc, [self, my_id, rq, to_send]) + return future.result() - # Called after sending an RPC, listens for the response and builds the - # correct pbResponse object. + # Sending an RPC, listens for the response and builds the correct pbResponse object. # # The raw bytes we receive are composed (in order) - # @@ -171,32 +170,30 @@ def _send_request(self, rq, lock_timeout=10): # 4. A varint representing the length of the serialized ResponseMessage. # 5. The ResponseMessage. # - def _receive_rpc(self, call_id, rq, data=None, lock_timeout=10): + @staticmethod + def send_and_receive_rpc(client, call_id, rq, to_send): + thread_name = current_thread().name + sp = thread_name.split("_") # i.e. splitting "ThreadPoolExecutor-1_0" + pool_id = int(sp[1]) # thread number is now responsible for only using its matching socket + + client.sock_pool[pool_id].send(to_send) + # If the field data is populated that means we should process from that # instead of the socket. - full_data = data - if data is None: - pool_id = call_id % self.pool_size - # Total message length is going to be the first four bytes - # (little-endian uint32) - with acquire_timeout(self.read_lock_pool[pool_id], lock_timeout) as acquired: - if acquired: - try: - msg_length = self._recv_n(self.sock_pool[pool_id], 4) - if msg_length is None: - raise - msg_length = unpack(">I", msg_length)[0] - # The message is then going to be however many bytes the first four - # bytes specified. We don't want to overread or underread as that'll - # cause havoc. - full_data = self._recv_n( - self.sock_pool[pool_id], msg_length) - except socket.error: - raise RegionServerException(region_client=self) - else: - logger.warning('Lock timeout receive %s RPC to %s:%s on pool port %s', - rq.type, self.host, self.port, pool_id) - raise RegionServerException(region_client=self) + full_data = None + # Total message length is going to be the first four bytes + # (little-endian uint32) + try: + msg_length = Client._recv_n(self.sock_pool[pool_id], 4) + if msg_length is None: + raise + msg_length = unpack(">I", msg_length)[0] + # The message is then going to be however many bytes the first four + # bytes specified. We don't want to overread or underread as that'll + # cause havoc. + full_data = Client._recv_n(self.sock_pool[pool_id], msg_length) + except socket.error: + raise RegionServerException(region_client=self) # Pass in the full data as well as your current position to the # decoder. It'll then return two variables: # - next_pos: The number of bytes of data specified by the varint @@ -205,11 +202,7 @@ def _receive_rpc(self, call_id, rq, data=None, lock_timeout=10): header = ResponseHeader() header.ParseFromString(full_data[pos: pos + next_pos]) pos += next_pos - if header.call_id != call_id: - # call_ids don't match? Looks like a different thread nabbed our - # response. - return self._bad_call_id(call_id, rq, header.call_id, full_data) - elif header.exception.exception_class_name != '': + if header.exception.exception_class_name != '': # If we're in here it means a remote exception has happened. exception_class = header.exception.exception_class_name if exception_class in \ @@ -234,35 +227,11 @@ def _receive_rpc(self, call_id, rq, data=None, lock_timeout=10): # The rpc is fully built! return rpc - # Receive an RPC with incorrect call_id? - # 1. Acquire lock - # 2. Place raw data into missed_rpcs with key call_id - # 3. Notify all other threads to wake up (nothing will happen until you release the lock) - # 4. WHILE: Your call_id is not in the dictionary - # 4.5 Call wait() on the conditional and get comfy. - # 5. Pop your data out - # 6. Release the lock - def _bad_call_id(self, my_id, my_request, msg_id, data, lock_timeout=10): - with acquire_timeout(self.missed_rpcs_lock, lock_timeout) as acquired: - if acquired: - logger.debug("Received invalid RPC ID. Got: %s, Expected: %s.", msg_id, my_id) - self.missed_rpcs[msg_id] = data - self.missed_rpcs_condition.notifyAll() - while my_id not in self.missed_rpcs: - if self.shutting_down: - raise RegionServerException(region_client=self) - self.missed_rpcs_condition.wait(lock_timeout) - new_data = self.missed_rpcs.pop(my_id) - logger.debug("Another thread found my RPC! RPC ID: %s", my_id) - else: - logger.warning('Lock timeout bad_call to %s:%s', self.host, self.port) - raise RegionServerException(region_client=self) - return self._receive_rpc(my_id, my_request, data=new_data) - # Receives exactly n bytes from the socket. Will block until n bytes are # received. If a socket is closed (RegionServer died) then raise an # exception that goes all the way back to the main client - def _recv_n(self, sock, n): + @staticmethod + def _recv_n(sock, n): partial_str = BytesIO() partial_len = 0 while partial_len < n: @@ -291,14 +260,13 @@ def NewClient(host, port, pool_size, secondary=False): c = Client(host, port, secondary) try: c.pool_size = pool_size + c.thread_pool = ThreadPoolExecutor(pool_size) for x in range(pool_size): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((c.host, int(port))) _send_hello(s) s.settimeout(2) c.sock_pool.append(s) - c.read_lock_pool.append(Lock()) - c.write_lock_pool.append(Lock()) except (socket.error, socket.timeout): return None return c From 9ea538fb9e0d4cb72f7b7bc60338e3b201b915f0 Mon Sep 17 00:00:00 2001 From: Zachary Pitts Date: Fri, 19 May 2023 18:09:41 -0700 Subject: [PATCH 41/53] removing more unneeded code --- pybase/region/client.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/pybase/region/client.py b/pybase/region/client.py index cb1651c..b19090a 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -142,21 +142,7 @@ def _send_request(self, rq, lock_timeout=10): to_send = pack(">IB", total_length - 4, len(serialized_header)) to_send += serialized_header + rpc_length_bytes + serialized_rpc - try: - # todo: quick hack to patch a deadlock happening here. Needs revisiting. - with acquire_timeout(self.write_lock_pool[pool_id], lock_timeout) as acquired: - if acquired: - logger.debug('Sending %s RPC to %s:%s on pool port %s', - rq.type, self.host, self.port, pool_id) - self.sock_pool[pool_id].send(to_send) - else: - logger.warning('Lock timeout sending %s RPC to %s:%s on pool port %s', - rq.type, self.host, self.port, pool_id) - raise RegionServerException(region_client=self) - except socket.error: - # RegionServer dead? - raise RegionServerException(region_client=self) - # Message is sent! Now go listen for the results. + # send and receive the request future = self.thread_pool.submit(Client.send_and_receive_rpc, [self, my_id, rq, to_send]) return future.result() From 2776bd2a071cb26570bfb5910f4c89aa8443b2c1 Mon Sep 17 00:00:00 2001 From: Zachary Pitts Date: Sat, 20 May 2023 06:13:17 -0700 Subject: [PATCH 42/53] send in the try block... where it belongs --- .gitignore | 4 +++- pybase/region/client.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 6c73837..4890598 100644 --- a/.gitignore +++ b/.gitignore @@ -108,4 +108,6 @@ venv.bak/ # mypy .mypy_cache/ .dmypy.json -dmypy.json \ No newline at end of file +dmypy.json + +.idea \ No newline at end of file diff --git a/pybase/region/client.py b/pybase/region/client.py index b19090a..c4435fe 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -162,14 +162,14 @@ def send_and_receive_rpc(client, call_id, rq, to_send): sp = thread_name.split("_") # i.e. splitting "ThreadPoolExecutor-1_0" pool_id = int(sp[1]) # thread number is now responsible for only using its matching socket - client.sock_pool[pool_id].send(to_send) - # If the field data is populated that means we should process from that # instead of the socket. full_data = None # Total message length is going to be the first four bytes # (little-endian uint32) try: + client.sock_pool[pool_id].send(to_send) + msg_length = Client._recv_n(self.sock_pool[pool_id], 4) if msg_length is None: raise From 56bf755f0c320acd0994e1af85b2ee1159420896 Mon Sep 17 00:00:00 2001 From: Zachary Pitts Date: Wed, 24 May 2023 09:45:31 -0700 Subject: [PATCH 43/53] switching send_and_receive_rpc to not be static --- pybase/region/client.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pybase/region/client.py b/pybase/region/client.py index c4435fe..c484e40 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -143,7 +143,7 @@ def _send_request(self, rq, lock_timeout=10): to_send += serialized_header + rpc_length_bytes + serialized_rpc # send and receive the request - future = self.thread_pool.submit(Client.send_and_receive_rpc, [self, my_id, rq, to_send]) + future = self.thread_pool.submit(self.send_and_receive_rpc, rq, to_send) return future.result() # Sending an RPC, listens for the response and builds the correct pbResponse object. @@ -156,8 +156,8 @@ def _send_request(self, rq, lock_timeout=10): # 4. A varint representing the length of the serialized ResponseMessage. # 5. The ResponseMessage. # - @staticmethod - def send_and_receive_rpc(client, call_id, rq, to_send): + # @staticmethod + def send_and_receive_rpc(self, rq, to_send): thread_name = current_thread().name sp = thread_name.split("_") # i.e. splitting "ThreadPoolExecutor-1_0" pool_id = int(sp[1]) # thread number is now responsible for only using its matching socket @@ -168,7 +168,7 @@ def send_and_receive_rpc(client, call_id, rq, to_send): # Total message length is going to be the first four bytes # (little-endian uint32) try: - client.sock_pool[pool_id].send(to_send) + self.sock_pool[pool_id].send(to_send) msg_length = Client._recv_n(self.sock_pool[pool_id], 4) if msg_length is None: From 99af621c732ad7a6537ff4a13b3352fafbf33883 Mon Sep 17 00:00:00 2001 From: Zachary Pitts Date: Wed, 24 May 2023 12:20:22 -0700 Subject: [PATCH 44/53] receive_rpc will call itself if the call_id was not the same as the send --- pybase/region/client.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/pybase/region/client.py b/pybase/region/client.py index c484e40..fd34242 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -143,8 +143,8 @@ def _send_request(self, rq, lock_timeout=10): to_send += serialized_header + rpc_length_bytes + serialized_rpc # send and receive the request - future = self.thread_pool.submit(self.send_and_receive_rpc, rq, to_send) - return future.result() + future = self.thread_pool.submit(self.send_and_receive_rpc, my_id, rq, to_send) + return future.result(timeout=60) # Sending an RPC, listens for the response and builds the correct pbResponse object. # @@ -156,20 +156,26 @@ def _send_request(self, rq, lock_timeout=10): # 4. A varint representing the length of the serialized ResponseMessage. # 5. The ResponseMessage. # - # @staticmethod - def send_and_receive_rpc(self, rq, to_send): + def send_and_receive_rpc(self, call_id, rq, to_send): thread_name = current_thread().name sp = thread_name.split("_") # i.e. splitting "ThreadPoolExecutor-1_0" pool_id = int(sp[1]) # thread number is now responsible for only using its matching socket + try: + self.sock_pool[pool_id].send(to_send) + except socket.error: + raise RegionServerException(region_client=self) + + return self.receive_rpc(pool_id=pool_id, call_id=call_id, rq=rq) + + + def receive_rpc(self, pool_id, call_id, rq, data=None): # If the field data is populated that means we should process from that # instead of the socket. - full_data = None + full_data = data # Total message length is going to be the first four bytes # (little-endian uint32) try: - self.sock_pool[pool_id].send(to_send) - msg_length = Client._recv_n(self.sock_pool[pool_id], 4) if msg_length is None: raise @@ -188,7 +194,11 @@ def send_and_receive_rpc(self, rq, to_send): header = ResponseHeader() header.ParseFromString(full_data[pos: pos + next_pos]) pos += next_pos - if header.exception.exception_class_name != '': + if header.call_id != call_id: + # Receive an RPC with incorrect call_id, so call receive again to receive the next + # data on the socket. Most likely, this means that + return self.receive_rpc(pool_id, call_id, rq) + elif header.exception.exception_class_name != '': # If we're in here it means a remote exception has happened. exception_class = header.exception.exception_class_name if exception_class in \ @@ -213,6 +223,7 @@ def send_and_receive_rpc(self, rq, to_send): # The rpc is fully built! return rpc + # Receives exactly n bytes from the socket. Will block until n bytes are # received. If a socket is closed (RegionServer died) then raise an # exception that goes all the way back to the main client From 76538728fc3113f74338962e379589bacdd3b6f7 Mon Sep 17 00:00:00 2001 From: Zachary Pitts Date: Wed, 24 May 2023 12:30:23 -0700 Subject: [PATCH 45/53] comment cleanup --- pybase/region/client.py | 31 ++++--------------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/pybase/region/client.py b/pybase/region/client.py index fd34242..190d2b6 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -75,24 +75,7 @@ def __init__(self, host, port, secondary): # Why yes, we do have a mutex protecting a single variable. self.call_lock = Lock() self.call_id = 0 - # This dictionary and associated sync primitives are for when _receive_rpc - # receives an RPC that isn't theirs. If a thread gets one that isn't - # theirs it means there's another thread who also just sent an RPC. The - # other thread will also get the wrong call_id. So how do we make them - # switch RPCs? - # - # Receive an RPC with incorrect call_id? - # 1. Acquire lock - # 2. Place raw data into missed_rpcs with key call_id - # 3. Notify all other threads to wake up (nothing will happen until you release the - # lock) - # 4. WHILE: Your call_id is not in the dictionary - # 4.5 Call wait() on the conditional and get comfy. - # 5. Pop your data out - # 6. Release the lock - self.missed_rpcs = {} - self.missed_rpcs_lock = Lock() - self.missed_rpcs_condition = Condition(self.missed_rpcs_lock) + # Set to true when .close is called - this allows threads/greenlets # stuck in _bad_call_id to escape into the error handling code. self.shutting_down = False @@ -167,12 +150,10 @@ def send_and_receive_rpc(self, call_id, rq, to_send): return self.receive_rpc(pool_id=pool_id, call_id=call_id, rq=rq) - - def receive_rpc(self, pool_id, call_id, rq, data=None): - + def receive_rpc(self, pool_id, call_id, rq): # If the field data is populated that means we should process from that # instead of the socket. - full_data = data + full_data = None # Total message length is going to be the first four bytes # (little-endian uint32) try: @@ -196,7 +177,7 @@ def receive_rpc(self, pool_id, call_id, rq, data=None): pos += next_pos if header.call_id != call_id: # Receive an RPC with incorrect call_id, so call receive again to receive the next - # data on the socket. Most likely, this means that + # data on the socket. Likely, this means that that some caller abandoned their request return self.receive_rpc(pool_id, call_id, rq) elif header.exception.exception_class_name != '': # If we're in here it means a remote exception has happened. @@ -223,7 +204,6 @@ def receive_rpc(self, pool_id, call_id, rq, data=None): # The rpc is fully built! return rpc - # Receives exactly n bytes from the socket. Will block until n bytes are # received. If a socket is closed (RegionServer died) then raise an # exception that goes all the way back to the main client @@ -246,9 +226,6 @@ def close(self): sock.close() # We could still have greenlets waiting in the bad_call_id pools! Wake # them up so they can fail to error handling as well. - self.missed_rpcs_condition.acquire() - self.missed_rpcs_condition.notifyAll() - self.missed_rpcs_condition.release() # Creates a new RegionServer client. Creates the socket, initializes the From 2497c65cd5ea5bd7d40a906dc13eccd2b3645300 Mon Sep 17 00:00:00 2001 From: Zachary Pitts Date: Wed, 24 May 2023 13:47:49 -0700 Subject: [PATCH 46/53] adding call_timeout parameter to Client constructor --- pybase/region/client.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pybase/region/client.py b/pybase/region/client.py index 190d2b6..c9c19d8 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -64,12 +64,13 @@ class Client(object): # - call_id: A monotonically increasing int used as a sequence number for rpcs. This way # we can match incoming responses with the rpc that made the request. - def __init__(self, host, port, secondary): + def __init__(self, host, port, secondary, call_timeout=60): self.host = host.decode('utf8') if isinstance(host, bytes) else host self.port = port.decode('utf8') if isinstance(port, bytes) else port self.pool_size = 0 self.thread_pool = None + self.thread_pool_timeout = call_timeout self.sock_pool = [] # Why yes, we do have a mutex protecting a single variable. @@ -127,7 +128,7 @@ def _send_request(self, rq, lock_timeout=10): # send and receive the request future = self.thread_pool.submit(self.send_and_receive_rpc, my_id, rq, to_send) - return future.result(timeout=60) + return future.result(timeout=self.thread_pool_timeout) # Sending an RPC, listens for the response and builds the correct pbResponse object. # @@ -230,8 +231,8 @@ def close(self): # Creates a new RegionServer client. Creates the socket, initializes the # connection and returns an instance of Client. -def NewClient(host, port, pool_size, secondary=False): - c = Client(host, port, secondary) +def NewClient(host, port, pool_size, secondary=False, call_timeout=60): + c = Client(host, port, secondary, call_timeout) try: c.pool_size = pool_size c.thread_pool = ThreadPoolExecutor(pool_size) From 6886130bd28422aaff1b76b24ed1324cd2c95e7d Mon Sep 17 00:00:00 2001 From: Zachary Pitts Date: Wed, 24 May 2023 13:50:38 -0700 Subject: [PATCH 47/53] updating version to 4.0.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4235244..01bf638 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import find_packages, setup setup(name='pybase', - version='0.3.3', + version='4.0.0', description='Native python client to hbase 1.0+', url='https://github.com/CurleySamuel/PyBase', author='Sam Curley', From 983e0115fd299c592d5674dadaa5f864da642d55 Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Mon, 12 Feb 2024 14:59:13 -0400 Subject: [PATCH 48/53] Add support for `get_many` --- pybase/client.py | 41 +++++++++++++++++++++++++++++++++++++++ pybase/region/client.py | 40 +++++++++++++++++++++----------------- pybase/request/request.py | 19 +++++++++++++++++- 3 files changed, 81 insertions(+), 19 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index 3dfa029..214f64b 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -14,6 +14,7 @@ limitations under the License. """ from __future__ import absolute_import, print_function, unicode_literals +from collections import defaultdict import logging from builtins import str @@ -181,6 +182,46 @@ def get(self, table, key, families={}, filters=None): e._handle_exception(self, dest_region=dest_region) # Everything should be dandy now. Repeat the request! return self.get(table, key, families=families, filters=filters) + + def get_many(self, table, keys, families=None): + """ + get row or specified cell with optional filter for all provided keys + :param table: hbase table + :param key: list of row key + :param families: (optional) specifies columns to get, + e.g., {"columnFamily1":["col1","col2"], "colFamily2": "col3"} + :return: tuple of (list of responses with cells, list of exceptions that occurred) + """ + try: + if len(keys) == 0: + return [] + # need a region client to originate the request + client = self._find_hosting_region(table, keys[0]).region_client + + grouped_by_region = defaultdict(list) + for key in keys: + dest_region = self._find_hosting_region(table, key) + grouped_by_region[dest_region].append(key) + + rq = request.multi_get(grouped_by_region, families) + response = client._send_request(rq) + results = [] + errors = [] + for ra_result in response.regionActionResult: + if ra_result.exception.name != "": + errors.append(client._parse_exception(ra_result.exception.name, + ra_result.exception.value)) + else: + for res_or_err in ra_result.regionActionResult: + if res_or_err.exception.name != "": + errors.append(client._parse_exception(res_or_err.exception.name, + res_or_err.exception.value)) + else: + results.append(Result(res_or_err.result)) + return results, errors + except PyBaseException as e: + e._handle_exception(self, dest_region=dest_region) + return self.get_many(table, key, families=families) def put(self, table, key, values): return self._mutate(table, key, values, request.put_request) diff --git a/pybase/region/client.py b/pybase/region/client.py index c9c19d8..4276442 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -26,7 +26,7 @@ from ..exceptions import (NoSuchColumnFamilyException, NotServingRegionException, PyBaseException, RegionMovedException, RegionOpeningException, RegionServerException) from ..helpers import varint -from ..pb.Client_pb2 import GetResponse, MutateResponse, ScanResponse +from ..pb.Client_pb2 import GetResponse, MutateResponse, ScanResponse, MultiResponse from ..pb.RPC_pb2 import ConnectionHeader, RequestHeader, ResponseHeader logger = logging.getLogger(__name__) @@ -40,7 +40,8 @@ response_types = { b"Get": GetResponse, b"Mutate": MutateResponse, - b"Scan": ScanResponse + b"Scan": ScanResponse, + b"Multi": MultiResponse } @@ -150,6 +151,23 @@ def send_and_receive_rpc(self, call_id, rq, to_send): raise RegionServerException(region_client=self) return self.receive_rpc(pool_id=pool_id, call_id=call_id, rq=rq) + + def _parse_exception(exception_class, stack_trace, region_client): + if exception_class in ('org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException', + "java.io.IOException"): + return NoSuchColumnFamilyException() + elif exception_class == 'org.apache.hadoop.hbase.exceptions.RegionMovedException': + return RegionMovedException(region_client=region_client) + elif exception_class == 'org.apache.hadoop.hbase.NotServingRegionException': + return NotServingRegionException(region_client=region_client) + elif exception_class == \ + 'org.apache.hadoop.hbase.regionserver.RegionServerStoppedException': + return RegionServerException(region_client=region_client) + elif exception_class == 'org.apache.hadoop.hbase.exceptions.RegionOpeningException': + return RegionOpeningException(region_client=region_client) + else: + return PyBaseException( + exception_class + ". Remote traceback:\n%s" % stack_trace) def receive_rpc(self, pool_id, call_id, rq): # If the field data is populated that means we should process from that @@ -183,22 +201,8 @@ def receive_rpc(self, pool_id, call_id, rq): elif header.exception.exception_class_name != '': # If we're in here it means a remote exception has happened. exception_class = header.exception.exception_class_name - if exception_class in \ - {'org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException', - "java.io.IOException"}: - raise NoSuchColumnFamilyException() - elif exception_class == 'org.apache.hadoop.hbase.exceptions.RegionMovedException': - raise RegionMovedException(region_client=self) - elif exception_class == 'org.apache.hadoop.hbase.NotServingRegionException': - raise NotServingRegionException(region_client=self) - elif exception_class == \ - 'org.apache.hadoop.hbase.regionserver.RegionServerStoppedException': - raise RegionServerException(region_client=self) - elif exception_class == 'org.apache.hadoop.hbase.exceptions.RegionOpeningException': - raise RegionOpeningException(region_client=self) - else: - raise PyBaseException( - exception_class + ". Remote traceback:\n%s" % header.exception.stack_trace) + if err := self._parse_exception(exception_class, header.exception.stack_trace, self): + raise err next_pos, pos = decoder(full_data, pos) rpc = response_types[rq.type]() rpc.ParseFromString(full_data[pos: pos + next_pos]) diff --git a/pybase/request/request.py b/pybase/request/request.py index 3c54348..8b8a886 100644 --- a/pybase/request/request.py +++ b/pybase/request/request.py @@ -4,7 +4,7 @@ from ..exceptions import MalformedFamilies, MalformedValues from ..filters import _to_filter -from ..pb.Client_pb2 import Column, GetRequest, MutateRequest, MutationProto, ScanRequest +from ..pb.Client_pb2 import Action, Column, GetRequest, MultiRequest, MutateRequest, MutationProto, RegionAction, ScanRequest # Table + Family used when requesting meta information from the # MetaRegionServer @@ -42,6 +42,23 @@ def get_request(region, key, families, filters): rq.get.filter.CopyFrom(pbFilter) return Request(b"Get", rq) +def region_action(region, keys, families): + ra = RegionAction() + ra.region.type = 1 + ra.region.value = region.region_name + ra.atomic = False + for key in keys: + action = Action() + action.get.row = key + action.get.column.extend(families_to_columns(families)) + ra.action.append(action) + return ra + +def multi_get(regions_and_keys, families): + rq = MultiRequest() + rq.regionAction.extend([region_action(region, keys, families) + for region, keys in regions_and_keys.items()]) + return Request(b"Multi", rq) def put_request(region, key, values): rq = MutateRequest() From 40486fe2dd59292e76de48f2c2c7f9fe1bc12bce Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Tue, 13 Feb 2024 11:11:02 -0400 Subject: [PATCH 49/53] bump --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 01bf638..a903560 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import find_packages, setup setup(name='pybase', - version='4.0.0', + version='4.1.0', description='Native python client to hbase 1.0+', url='https://github.com/CurleySamuel/PyBase', author='Sam Curley', From dce584f2303036488ab0be8d9cdc840b10ccee24 Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Thu, 15 Feb 2024 11:05:51 -0400 Subject: [PATCH 50/53] Fix typo --- pybase/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybase/client.py b/pybase/client.py index 214f64b..043dca9 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -212,7 +212,7 @@ def get_many(self, table, keys, families=None): errors.append(client._parse_exception(ra_result.exception.name, ra_result.exception.value)) else: - for res_or_err in ra_result.regionActionResult: + for res_or_err in ra_result.resultOrException: if res_or_err.exception.name != "": errors.append(client._parse_exception(res_or_err.exception.name, res_or_err.exception.value)) From a059db73a15d4189c1eead832df0fb7e6b953bb2 Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Thu, 15 Feb 2024 11:33:30 -0400 Subject: [PATCH 51/53] Pass entire result into Result --- pybase/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybase/client.py b/pybase/client.py index 043dca9..e49b564 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -217,7 +217,7 @@ def get_many(self, table, keys, families=None): errors.append(client._parse_exception(res_or_err.exception.name, res_or_err.exception.value)) else: - results.append(Result(res_or_err.result)) + results.append(Result(res_or_err)) return results, errors except PyBaseException as e: e._handle_exception(self, dest_region=dest_region) From cea2dc60a0f403bec840006612bd5b02af5f7d62 Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Thu, 15 Feb 2024 11:48:56 -0400 Subject: [PATCH 52/53] Fix parse_exception --- pybase/region/client.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pybase/region/client.py b/pybase/region/client.py index 4276442..c66c7c4 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -152,19 +152,19 @@ def send_and_receive_rpc(self, call_id, rq, to_send): return self.receive_rpc(pool_id=pool_id, call_id=call_id, rq=rq) - def _parse_exception(exception_class, stack_trace, region_client): + def _parse_exception(self, exception_class, stack_trace): if exception_class in ('org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException', "java.io.IOException"): return NoSuchColumnFamilyException() elif exception_class == 'org.apache.hadoop.hbase.exceptions.RegionMovedException': - return RegionMovedException(region_client=region_client) + return RegionMovedException(region_client=self) elif exception_class == 'org.apache.hadoop.hbase.NotServingRegionException': - return NotServingRegionException(region_client=region_client) + return NotServingRegionException(region_client=self) elif exception_class == \ 'org.apache.hadoop.hbase.regionserver.RegionServerStoppedException': - return RegionServerException(region_client=region_client) + return RegionServerException(region_client=self) elif exception_class == 'org.apache.hadoop.hbase.exceptions.RegionOpeningException': - return RegionOpeningException(region_client=region_client) + return RegionOpeningException(region_client=self) else: return PyBaseException( exception_class + ". Remote traceback:\n%s" % stack_trace) @@ -201,7 +201,7 @@ def receive_rpc(self, pool_id, call_id, rq): elif header.exception.exception_class_name != '': # If we're in here it means a remote exception has happened. exception_class = header.exception.exception_class_name - if err := self._parse_exception(exception_class, header.exception.stack_trace, self): + if err := self._parse_exception(exception_class, header.exception.stack_trace): raise err next_pos, pos = decoder(full_data, pos) rpc = response_types[rq.type]() From f747c2e025084d7b46a4e2fdb3679570f75798e8 Mon Sep 17 00:00:00 2001 From: Ian Bishop Date: Fri, 16 Feb 2024 12:06:00 -0400 Subject: [PATCH 53/53] Use threadpool sanely, order reqs correctly --- pybase/client.py | 86 +++++++++++++++++++++++++---------------- pybase/region/client.py | 8 ++-- 2 files changed, 57 insertions(+), 37 deletions(-) diff --git a/pybase/client.py b/pybase/client.py index e49b564..1551f9b 100644 --- a/pybase/client.py +++ b/pybase/client.py @@ -18,6 +18,7 @@ import logging from builtins import str +from concurrent.futures import as_completed from itertools import chain from threading import Condition, Lock @@ -42,7 +43,7 @@ class MainClient(object): - def __init__(self, zkquorum, pool_size, secondary): + def __init__(self, zkquorum, pool_size, secondary, call_timeout=60): # Location of the ZooKeeper quorum (csv) self.zkquorum = zkquorum # Connection pool size per region server (and master!) @@ -63,6 +64,8 @@ def __init__(self, zkquorum, pool_size, secondary): # Capture if this client is being used for secondary operations # We don't really care if it fails, best effort only. self.secondary = secondary + # How long to wait before a call times out + self.call_timeout = call_timeout self.zk_client = zk.connect(zkquorum) @@ -192,36 +195,49 @@ def get_many(self, table, keys, families=None): e.g., {"columnFamily1":["col1","col2"], "colFamily2": "col3"} :return: tuple of (list of responses with cells, list of exceptions that occurred) """ + if len(keys) == 0: + return [] + + grouped_by_server = defaultdict(lambda: defaultdict(list)) + for key in keys: + dest_region = self._find_hosting_region(table, key) + # we must call each region server, which can server many key ranges + grouped_by_server[dest_region.region_client.host][dest_region].append(key) + + results = [] + errors = [] + tasks = [] + for grouped_by_region in grouped_by_server.values(): + try: + dest_region = next(iter(grouped_by_region.keys())) + client = dest_region.region_client + rq = request.multi_get(grouped_by_region, families) + tasks.append(client._send_request(rq, _async=True)) + except PyBaseException as e: + e._handle_exception(self, dest_region=dest_region) + errors.append(e) + try: - if len(keys) == 0: - return [] - # need a region client to originate the request - client = self._find_hosting_region(table, keys[0]).region_client - - grouped_by_region = defaultdict(list) - for key in keys: - dest_region = self._find_hosting_region(table, key) - grouped_by_region[dest_region].append(key) - - rq = request.multi_get(grouped_by_region, families) - response = client._send_request(rq) - results = [] - errors = [] - for ra_result in response.regionActionResult: - if ra_result.exception.name != "": - errors.append(client._parse_exception(ra_result.exception.name, - ra_result.exception.value)) - else: - for res_or_err in ra_result.resultOrException: - if res_or_err.exception.name != "": - errors.append(client._parse_exception(res_or_err.exception.name, - res_or_err.exception.value)) + for f in as_completed(tasks, timeout=self.call_timeout * len(grouped_by_server)): + try: + response = f.result() + for ra_result in response.regionActionResult: + if ra_result.exception.name != "": + errors.append(client._parse_exception(ra_result.exception.name, + ra_result.exception.value)) else: - results.append(Result(res_or_err)) - return results, errors - except PyBaseException as e: - e._handle_exception(self, dest_region=dest_region) - return self.get_many(table, key, families=families) + for res_or_err in ra_result.resultOrException: + if res_or_err.exception.name != "": + errors.append(client._parse_exception(res_or_err.exception.name, + res_or_err.exception.value)) + else: + results.append(Result(res_or_err)) + except PyBaseException as e: + e._handle_exception(self, dest_region=dest_region) + errors.append(e) + except TimeoutError: + errors.append(e) + return results, errors def put(self, table, key, values): return self._mutate(table, key, values, request.put_request) @@ -436,7 +452,8 @@ def _create_new_region(self, response, table): new_region.region_client = self.reverse_client_cache[server_loc] else: # Otherwise we need to create a new region client instance. - new_client = region.NewClient(host, port, self.pool_size, secondary=self.secondary) + new_client = region.NewClient(host, port, self.pool_size, + secondary=self.secondary, call_timeout=self.call_timeout) if new_client is None: # Welp. We can't connect to the server that the Master # supplied. Raise an exception. @@ -458,8 +475,9 @@ def update_master_client(self, ip, port): try: # Try creating a new client instance and setting it as the new master_client. - self.master_client = region.NewClient( - ip, port, self.pool_size, secondary=self.secondary) + self.master_client = region.NewClient(ip, port, self.pool_size, + secondary=self.secondary, + call_timeout=self.call_timeout) logger.info("Updated master client to %s:%s", ip, port) except RegionServerException: raise MasterServerException(ip, port, secondary=self.secondary) @@ -564,5 +582,5 @@ def _append_response(self, rsp): # location of ZooKeeper this function will ask ZK for the location of the # meta table and create the region client responsible for future meta # lookups (masterclient). Returns an instance of MainClient -def NewClient(zkquorum, socket_pool_size=1, secondary=False): - return MainClient(zkquorum, socket_pool_size, secondary) +def NewClient(zkquorum, socket_pool_size=1, secondary=False, call_timeout=60): + return MainClient(zkquorum, socket_pool_size, secondary, call_timeout=call_timeout) diff --git a/pybase/region/client.py b/pybase/region/client.py index c66c7c4..608a96f 100644 --- a/pybase/region/client.py +++ b/pybase/region/client.py @@ -102,7 +102,7 @@ def __init__(self, host, port, secondary, call_timeout=60): # 4. A varint representing the length of the serialized RPC. # 5. The serialized RPC. # - def _send_request(self, rq, lock_timeout=10): + def _send_request(self, rq, lock_timeout=10, _async=False): with acquire_timeout(self.call_lock, lock_timeout) as acquired: if acquired: my_id = self.call_id @@ -129,6 +129,8 @@ def _send_request(self, rq, lock_timeout=10): # send and receive the request future = self.thread_pool.submit(self.send_and_receive_rpc, my_id, rq, to_send) + if _async: + return future return future.result(timeout=self.thread_pool_timeout) # Sending an RPC, listens for the response and builds the correct pbResponse object. @@ -184,7 +186,7 @@ def receive_rpc(self, pool_id, call_id, rq): # bytes specified. We don't want to overread or underread as that'll # cause havoc. full_data = Client._recv_n(self.sock_pool[pool_id], msg_length) - except socket.error: + except socket.error as e: raise RegionServerException(region_client=self) # Pass in the full data as well as your current position to the # decoder. It'll then return two variables: @@ -244,7 +246,7 @@ def NewClient(host, port, pool_size, secondary=False, call_timeout=60): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((c.host, int(port))) _send_hello(s) - s.settimeout(2) + s.settimeout(call_timeout) c.sock_pool.append(s) except (socket.error, socket.timeout): return None