From 1d2021f7d79a317d20d1c610a7b5bf846150909e Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 24 Aug 2022 15:37:43 -0500 Subject: [PATCH 01/12] Add accelerator --- pymysql/connections.py | 83 +- pymysql/cursors.py | 60 +- setup.py | 7 +- src/accel.c | 2106 ++++++++++++++++++++++++++++++++++++++++ src/accel.c.orig | 1935 ++++++++++++++++++++++++++++++++++++ 5 files changed, 4181 insertions(+), 10 deletions(-) create mode 100644 src/accel.c create mode 100644 src/accel.c.orig diff --git a/pymysql/connections.py b/pymysql/connections.py index 94ea545fc..88cdf2b3d 100644 --- a/pymysql/connections.py +++ b/pymysql/connections.py @@ -3,6 +3,7 @@ # Error codes: # https://dev.mysql.com/doc/refman/5.5/en/error-handling.html import errno +import functools import os import socket import struct @@ -10,12 +11,26 @@ import traceback import warnings +try: + import _pymysqlsv +except ImportError: + _pymysqlsv = None + warnings.warn(RuntimeError, 'Accelerator extension could not be loaded; ' + 'running in pure Python mode.') + from . import _auth from .charset import charset_by_name, charset_by_id from .constants import CLIENT, COMMAND, CR, ER, FIELD_TYPE, SERVER_STATUS from . import converters -from .cursors import Cursor +from .cursors import ( + Cursor, + SSCursor, + DictCursor, + SSDictCursor, + SSCursorSV, + SSDictCursorSV, +) from .optionfile import Parser from .protocol import ( dump_packet, @@ -58,6 +73,7 @@ FIELD_TYPE.GEOMETRY, } +UNSET = "unset" DEFAULT_CHARSET = "utf8mb4" @@ -151,6 +167,15 @@ class Connection: :param named_pipe: Not supported. :param db: **DEPRECATED** Alias for database. :param passwd: **DEPRECATED** Alias for password. + :param output_type: Type of result to return: tuples, namedtuples, dicts, numpy or pandas. + :param parse_json: Parse JSON values into Python objects? + :param invalid_date_value: Value to use in place of an invalid date. By default, a string + containing the invalid content is returned. + :param invalid_time_value: Value to use in place of an invalid time. By default, a string + containing the invalid content is returned. + :param invalid_datetime_value: Value to use in place of an invalid datetime. By default, + a string containing the invalid content is returned. + :param pure_python: Should we ignore the C extension even if it's available? See `Connection `_ in the specification. @@ -198,6 +223,12 @@ def __init__( ssl_key=None, ssl_verify_cert=None, ssl_verify_identity=None, + output_type='tuples', + parse_json=False, + invalid_date_value=UNSET, + invalid_time_value=UNSET, + invalid_datetime_value=UNSET, + pure_python=False, compress=None, # not supported named_pipe=None, # not supported passwd=None, # deprecated @@ -315,7 +346,24 @@ def _config(key, arg): self.client_flag = client_flag + self.pure_python = pure_python + self.unbuffered = False + self.output_type = output_type self.cursorclass = cursorclass + self.resultclass = MySQLResult + + # The C extension handles these types internally. + if _pymysqlsv is not None and not self.pure_python: + self.resultclass = MySQLResultSV + if self.cursorclass is SSCursor: + self.cursorclass = SSCursorSV + self.unbuffered = True + elif self.cursorclass is DictCursor: + self.output_type = 'dicts' + elif self.cursorclass is SSDictCursor: + self.cursorclass = SSDictCursorSV + self.unbuffered = True + self.output_type = 'dicts' self._result = None self._affected_rows = 0 @@ -327,6 +375,11 @@ def _config(key, arg): if conv is None: conv = converters.conversions + self.invalid_date_value = invalid_date_value + self.invalid_time_value = invalid_time_value + self.invalid_datetime_value = invalid_datetime_value + self.parse_json = parse_json + # Need for MySQLdb compatibility. self.encoders = {k: v for (k, v) in conv.items() if type(k) is not int} self.decoders = {k: v for (k, v) in conv.items() if type(k) is int} @@ -338,7 +391,7 @@ def _config(key, arg): self.server_public_key = server_public_key self._connect_attrs = { - "_client_name": "pymysql", + "_client_name": "pymysqlsv", "_pid": str(os.getpid()), "_client_version": VERSION_STRING, } @@ -484,7 +537,7 @@ def rollback(self): def show_warnings(self): """Send the "SHOW WARNINGS" SQL command.""" self._execute_command(COMMAND.COM_QUERY, "SHOW WARNINGS") - result = MySQLResult(self) + result = self.resultclass(self) result.read() return result.rows @@ -546,11 +599,11 @@ def query(self, sql, unbuffered=False): if isinstance(sql, str): sql = sql.encode(self.encoding, "surrogateescape") self._execute_command(COMMAND.COM_QUERY, sql) - self._affected_rows = self._read_query_result(unbuffered=unbuffered) + self._affected_rows = self._read_query_result(unbuffered=unbuffered or self.unbuffered) return self._affected_rows def next_result(self, unbuffered=False): - self._affected_rows = self._read_query_result(unbuffered=unbuffered) + self._affected_rows = self._read_query_result(unbuffered=unbuffered or self.unbuffered) return self._affected_rows def affected_rows(self): @@ -766,16 +819,16 @@ def _write_bytes(self, data): def _read_query_result(self, unbuffered=False): self._result = None - if unbuffered: + if unbuffered or self.unbuffered: try: - result = MySQLResult(self) + result = self.resultclass(self) result.init_unbuffered_query() except: result.unbuffered_active = False result.connection = None raise else: - result = MySQLResult(self) + result = self.resultclass(self) result.read() self._result = result if result.server_status is not None: @@ -1342,6 +1395,20 @@ def _get_descriptions(self): assert eof_packet.is_eof_packet(), "Protocol error, expecting EOF" self.description = tuple(description) +class MySQLResultSV(MySQLResult): + def __init__(self, connection): + MySQLResult.__init__(self, connection) + self.options = {k: v for k, v in dict( + default_converters=converters.decoders, + output_type=connection.output_type, + parse_json=connection.parse_json, + invalid_date_value=connection.invalid_date_value, + invalid_time_value=connection.invalid_time_value, + invalid_datetime_value=connection.invalid_datetime_value, + unbuffered=connection.unbuffered, + ).items() if v is not UNSET} + self._read_rowdata_packet = functools.partial(_pymysqlsv.read_rowdata_packet, self) + self._read_rowdata_packet_unbuffered = functools.partial(_pymysqlsv.read_rowdata_packet, self) class LoadLocalFile: def __init__(self, filename, connection): diff --git a/pymysql/cursors.py b/pymysql/cursors.py index 2b5ccca90..73023755c 100644 --- a/pymysql/cursors.py +++ b/pymysql/cursors.py @@ -440,6 +440,10 @@ def read_next(self): def fetchone(self): """Fetch next row.""" self._check_executed() + return self._unchecked_fetchone() + + def _unchecked_fetchone(self): + """Fetch next row.""" row = self.read_next() if row is None: return None @@ -460,7 +464,8 @@ def fetchall_unbuffered(self): however, it doesn't make sense to return everything in a list, as that would use ridiculous memory for large result sets. """ - return iter(self.fetchone, None) + self._check_executed() + return iter(self._unchecked_fetchone, None) def __iter__(self): return self.fetchall_unbuffered() @@ -505,6 +510,59 @@ def scroll(self, value, mode="relative"): else: raise err.ProgrammingError("unknown scroll mode %s" % mode) +class SSCursorSV(SSCursor): + """An unbuffered cursor for use with PyMySQLsv.""" + + def _unchecked_fetchone(self): + """Fetch next row.""" + row = self._result._read_rowdata_packet_unbuffered() + if row is None: + return None + self.rownumber += 1 + return row + + def fetchmany(self, size=None): + """Fetch many.""" + self._check_executed() + if size is None: + size = self.arraysize + + rows = [] + for i in range(size): + row = self._result._read_rowdata_packet_unbuffered() + if row is None: + break + rows.append(row) + self.rownumber += 1 + return rows + + def scroll(self, value, mode="relative"): + self._check_executed() + + if mode == "relative": + if value < 0: + raise err.NotSupportedError( + "Backwards scrolling not supported by this cursor" + ) + + for _ in range(value): + self._result._read_rowdata_packet_unbuffered() + self.rownumber += value + elif mode == "absolute": + if value < self.rownumber: + raise err.NotSupportedError( + "Backwards scrolling not supported by this cursor" + ) + + end = value - self.rownumber + for _ in range(end): + self._result._read_rowdata_packet_unbuffered() + self.rownumber = value + else: + raise err.ProgrammingError("unknown scroll mode %s" % mode) + +class SSDictCursorSV(SSCursorSV): + """An unbuffered cursor for use with PyMySQLsv, which returns results as a dictionary""" class SSDictCursor(DictCursorMixin, SSCursor): """An unbuffered cursor, which returns results as a dictionary""" diff --git a/setup.py b/setup.py index 7cdc692fb..be0ff86c4 100755 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -from setuptools import setup, find_packages +from setuptools import setup, find_packages, Extension version = "1.0.2" @@ -35,5 +35,10 @@ "License :: OSI Approved :: MIT License", "Topic :: Database", ], + ext_modules=[ + Extension( + '_pymysqlsv', ['src/accel.c'], py_limited_api=True, + ), + ], keywords="MySQL", ) diff --git a/src/accel.c b/src/accel.c new file mode 100644 index 000000000..208bb442d --- /dev/null +++ b/src/accel.c @@ -0,0 +1,2106 @@ + +#include +#include +#include +#include +#include + +#define MYSQL_ACCEL_OUT_TUPLES 0 +#define MYSQL_ACCEL_OUT_NAMEDTUPLES 1 +#define MYSQL_ACCEL_OUT_DICTS 2 +#define MYSQL_ACCEL_OUT_NUMPY 3 +#define MYSQL_ACCEL_OUT_PANDAS 4 + +#define MYSQL_FLAG_NOT_NULL 1 +#define MYSQL_FLAG_PRI_KEY 2 +#define MYSQL_FLAG_UNIQUE_KEY 4 +#define MYSQL_FLAG_MULTIPLE_KEY 8 +#define MYSQL_FLAG_BLOB 16 +#define MYSQL_FLAG_UNSIGNED 32 +#define MYSQL_FLAG_ZEROFILL 64 +#define MYSQL_FLAG_BINARY 128 +#define MYSQL_FLAG_ENUM 256 +#define MYSQL_FLAG_AUTO_INCREMENT 512 +#define MYSQL_FLAG_TIMESTAMP 1024 +#define MYSQL_FLAG_SET 2048 +#define MYSQL_FLAG_PART_KEY 16384 +#define MYSQL_FLAG_GROUP 32767 +#define MYSQL_FLAG_UNIQUE 65536 + +#define MYSQL_TYPE_DECIMAL 0 +#define MYSQL_TYPE_TINY 1 +#define MYSQL_TYPE_SHORT 2 +#define MYSQL_TYPE_LONG 3 +#define MYSQL_TYPE_FLOAT 4 +#define MYSQL_TYPE_DOUBLE 5 +#define MYSQL_TYPE_NULL 6 +#define MYSQL_TYPE_TIMESTAMP 7 +#define MYSQL_TYPE_LONGLONG 8 +#define MYSQL_TYPE_INT24 9 +#define MYSQL_TYPE_DATE 10 +#define MYSQL_TYPE_TIME 11 +#define MYSQL_TYPE_DATETIME 12 +#define MYSQL_TYPE_YEAR 13 +#define MYSQL_TYPE_NEWDATE 14 +#define MYSQL_TYPE_VARCHAR 15 +#define MYSQL_TYPE_BIT 16 +#define MYSQL_TYPE_JSON 245 +#define MYSQL_TYPE_NEWDECIMAL 246 +#define MYSQL_TYPE_ENUM 247 +#define MYSQL_TYPE_SET 248 +#define MYSQL_TYPE_TINY_BLOB 249 +#define MYSQL_TYPE_MEDIUM_BLOB 250 +#define MYSQL_TYPE_LONG_BLOB 251 +#define MYSQL_TYPE_BLOB 252 +#define MYSQL_TYPE_VAR_STRING 253 +#define MYSQL_TYPE_STRING 254 +#define MYSQL_TYPE_GEOMETRY 255 + +#define MYSQL_TYPE_CHAR MYSQL_TYPE_TINY +#define MYSQL_TYPE_INTERVAL MYSQL_TYPE_ENUM + +#define MYSQL_COLUMN_NULL 251 +#define MYSQL_COLUMN_UNSIGNED_CHAR 251 +#define MYSQL_COLUMN_UNSIGNED_SHORT 252 +#define MYSQL_COLUMN_UNSIGNED_INT24 253 +#define MYSQL_COLUMN_UNSIGNED_INT64 254 + +#define MYSQL_SERVER_MORE_RESULTS_EXISTS 8 + +// 2**24 - 1 +#define MYSQL_MAX_PACKET_LEN 16777215 + +#define EPOCH_TO_DAYS 719528 +#define SECONDS_PER_DAY (24 * 60 * 60) + +#define MYSQL_ACCEL_OPTION_TIME_TYPE_TIMEDELTA 0 +#define MYSQL_ACCEL_OPTION_TIME_TYPE_TIME 1 +#define MYSQL_ACCEL_OPTION_JSON_TYPE_STRING 0 +#define MYSQL_ACCEL_OPTION_JSON_TYPE_OBJ 1 +#define MYSQL_ACCEL_OPTION_BIT_TYPE_BYTES 0 +#define MYSQL_ACCEL_OPTION_BIT_TYPE_INT 1 + +#define CHR2INT1(x) ((x)[1] - '0') +#define CHR2INT2(x) ((((x)[0] - '0') * 10) + ((x)[1] - '0')) +#define CHR2INT3(x) ((((x)[0] - '0') * 1e2) + (((x)[1] - '0') * 10) + ((x)[2] - '0')) +#define CHR2INT4(x) ((((x)[0] - '0') * 1e3) + (((x)[1] - '0') * 1e2) + (((x)[2] - '0') * 10) + ((x)[3] - '0')) +#define CHR2INT6(x) ((((x)[0] - '0') * 1e5) + (((x)[1] - '0') * 1e4) + (((x)[2] - '0') * 1e3) + (((x)[3] - '0') * 1e2) + (((x)[4] - '0') * 10) + (((x)[5] - '0'))) + +#define CHECK_DATE_STR(s, s_l) \ + ((s_l) == 10 && \ + (s)[0] >= '0' && (s)[0] <= '9' && \ + (s)[1] >= '0' && (s)[1] <= '9' && \ + (s)[2] >= '0' && (s)[2] <= '9' && \ + (s)[3] >= '0' && (s)[3] <= '9' && \ + (s)[4] == '-' && \ + (((s)[5] == '1' && ((s)[6] >= '0' && (s)[6] <= '2')) || \ + ((s)[5] == '0' && ((s)[6] >= '1' && (s)[6] <= '9'))) && \ + (s)[7] == '-' && \ + ((((s)[8] >= '0' && (s)[8] <= '2') && ((s)[9] >= '0' && (s)[9] <= '9')) || \ + ((s)[8] == '3' && ((s)[9] >= '0' && (s)[9] <= '1'))) && \ + !((s)[0] == '0' && (s)[1] == '0' && (s)[2] == '0' && (s)[3] == '0') && \ + !((s)[5] == '0' && (s)[6] == '0') && \ + !((s)[8] == '0' && (s)[9] == '0')) + +#define CHECK_TIME_STR(s, s_l) \ + ((s_l) == 8 && \ + ((((s)[0] >= '0' && (s)[0] <= '1') && ((s)[1] >= '0' && (s)[1] <= '9')) || \ + ((s)[0] == '2' && ((s)[1] >= '0' && (s)[1] <= '3'))) && \ + (s)[2] == ':' && \ + (((s)[3] >= '0' && (s)[3] <= '5') && ((s)[4] >= '0' && (s)[4] <= '9')) && \ + (s)[5] == ':' && \ + (((s)[6] >= '0' && (s)[6] <= '5') && ((s)[7] >= '0' && (s)[7] <= '9'))) + +#define CHECK_MICROSECONDS_STR(s, s_l) \ + ((s_l) == 7 && \ + (s)[0] == '.' && \ + (s)[1] >= '0' && (s)[1] <= '9' && \ + (s)[2] >= '0' && (s)[2] <= '9' && \ + (s)[3] >= '0' && (s)[3] <= '9' && \ + (s)[4] >= '0' && (s)[4] <= '9' && \ + (s)[5] >= '0' && (s)[5] <= '9' && \ + (s)[6] >= '0' && (s)[6] <= '9') + +#define CHECK_MILLISECONDS_STR(s, s_l) \ + ((s_l) == 4 && \ + (s)[0] == '.' && \ + (s)[1] >= '0' && (s)[1] <= '9' && \ + (s)[2] >= '0' && (s)[2] <= '9' && \ + (s)[3] >= '0' && (s)[3] <= '9') + +#define CHECK_MICRO_TIME_STR(s, s_l) \ + ((s_l) == 15 && CHECK_TIME_STR(s, 8) && CHECK_MICROSECONDS_STR((s)+8, 7)) + +#define CHECK_MILLI_TIME_STR(s, s_l) \ + ((s_l) == 12 && CHECK_TIME_STR(s, 8) && CHECK_MILLISECONDS_STR((s)+8, 4)) + +#define CHECK_DATETIME_STR(s, s_l) \ + ((s_l) == 19 && \ + CHECK_DATE_STR(s, 10) && \ + ((s)[10] == ' ' || (s)[10] == 'T') && \ + CHECK_TIME_STR((s)+11, 8)) + +#define CHECK_MICRO_DATETIME_STR(s, s_l) \ + ((s_l) == 26 && \ + CHECK_DATE_STR(s, 10) && \ + ((s)[10] == ' ' || (s)[10] == 'T') && \ + CHECK_MICRO_TIME_STR((s)+11, 15)) + +#define CHECK_MILLI_DATETIME_STR(s, s_l) \ + ((s_l) == 23 && \ + CHECK_DATE_STR(s, 10) && \ + ((s)[10] == ' ' || (s)[10] == 'T') && \ + CHECK_MICRO_TIME_STR((s)+11, 12)) + +#define CHECK_ANY_DATETIME_STR(s, s_l) \ + (((s_l) == 19 && CHECK_DATETIME_STR(s, s_l)) || \ + ((s_l) == 23 && CHECK_MILLI_DATETIME_STR(s, s_l)) || \ + ((s_l) == 26 && CHECK_MICRO_DATETIME_STR(s, s_l))) + +#define DATETIME_SIZE (19) +#define DATETIME_MILLI_SIZE (23) +#define DATETIME_MICRO_SIZE (26) + +#define IS_DATETIME_MILLI(s, s_l) ((s_l) == 23) +#define IS_DATETIME_MICRO(s, s_l) ((s_l) == 26) + +#define CHECK_ANY_TIME_STR(s, s_l) \ + (((s_l) == 8 && CHECK_TIME_STR(s, s_l)) || \ + ((s_l) == 12 && CHECK_MILLI_TIME_STR(s, s_l)) || \ + ((s_l) == 15 && CHECK_MICRO_TIME_STR(s, s_l))) + +#define TIME_SIZE (8) +#define TIME_MILLI_SIZE (12) +#define TIME_MICRO_SIZE (15) + +#define IS_TIME_MILLI(s, s_l) ((s_l) == 12) +#define IS_TIME_MICRO(s, s_l) ((s_l) == 15) + +#define CHECK_TIMEDELTA1_STR(s, s_l) \ + ((s_l) == 7 && \ + (s)[0] >= '0' && (s)[0] <= '9' && \ + (s)[1] == ':' && \ + (s)[2] >= '0' && (s)[2] <= '5' && \ + (s)[3] >= '0' && (s)[3] <= '9' && \ + (s)[4] == ':' && \ + (s)[5] >= '0' && (s)[5] <= '5' && \ + (s)[6] >= '0' && (s)[6] <= '9') + +#define CHECK_TIMEDELTA1_MILLI_STR(s, s_l) \ + ((s_l) == 11 && CHECK_TIMEDELTA1_STR(s, 7) && CHECK_MILLISECONDS_STR((s)+7, 4)) + +#define CHECK_TIMEDELTA1_MICRO_STR(s, s_l) \ + ((s_l) == 14 && CHECK_TIMEDELTA1_STR(s, 7) && CHECK_MICROSECONDS_STR((s)+7, 7)) + +#define CHECK_TIMEDELTA2_STR(s, s_l) \ + ((s_l) == 8 && \ + (s)[0] >= '0' && (s)[0] <= '9' && \ + CHECK_TIMEDELTA1_STR((s)+1, 7)) + +#define CHECK_TIMEDELTA2_MILLI_STR(s, s_l) \ + ((s_l) == 12 && CHECK_TIMEDELTA2_STR(s, 8) && CHECK_MILLISECONDS_STR((s)+8, 4)) + +#define CHECK_TIMEDELTA2_MICRO_STR(s, s_l) \ + ((s_l) == 15 && CHECK_TIMEDELTA2_STR(s, 8) && CHECK_MICROSECONDS_STR((s)+8, 7)) + +#define CHECK_TIMEDELTA3_STR(s, s_l) \ + ((s_l) == 9 && \ + (s)[0] >= '0' && (s)[0] <= '9' && \ + (s)[1] >= '0' && (s)[1] <= '9' && \ + CHECK_TIMEDELTA1_STR((s)+2, 7)) + +#define CHECK_TIMEDELTA3_MILLI_STR(s, s_l) \ + ((s_l) == 13 && CHECK_TIMEDELTA3_STR(s, 9) && CHECK_MILLISECONDS_STR((s)+9, 4)) + +#define CHECK_TIMEDELTA3_MICRO_STR(s, s_l) \ + ((s_l) == 16 && CHECK_TIMEDELTA3_STR(s, 9) && CHECK_MICROSECONDS_STR((s)+9, 7)) + +// +// 0:00:00 / 0:00:00.000 / 0:00:00.000000 +// 00:00:00 / 00:00:00.000 / 00:00:00.000000 +// 000:00:00 / 000:00:00.000 / 000:00:00.000000 +// +#define CHECK_ANY_TIMEDELTA_STR(s, s_l) \ + (((s_l) > 0 && (s)[0] == '-') ? \ + (-1 * (_CHECK_ANY_TIMEDELTA_STR((s)+1, (s_l)-1))) : \ + (_CHECK_ANY_TIMEDELTA_STR((s), (s_l)))) + +#define _CHECK_ANY_TIMEDELTA_STR(s, s_l) \ + (CHECK_TIMEDELTA1_STR(s, s_l) || \ + CHECK_TIMEDELTA2_STR(s, s_l) || \ + CHECK_TIMEDELTA3_STR(s, s_l) || \ + CHECK_TIMEDELTA1_MILLI_STR(s, s_l) || \ + CHECK_TIMEDELTA2_MILLI_STR(s, s_l) || \ + CHECK_TIMEDELTA3_MILLI_STR(s, s_l) || \ + CHECK_TIMEDELTA1_MICRO_STR(s, s_l) || \ + CHECK_TIMEDELTA2_MICRO_STR(s, s_l) || \ + CHECK_TIMEDELTA3_MICRO_STR(s, s_l)) + +#define TIMEDELTA1_SIZE (7) +#define TIMEDELTA2_SIZE (8) +#define TIMEDELTA3_SIZE (9) +#define TIMEDELTA1_MILLI_SIZE (11) +#define TIMEDELTA2_MILLI_SIZE (12) +#define TIMEDELTA3_MILLI_SIZE (13) +#define TIMEDELTA1_MICRO_SIZE (14) +#define TIMEDELTA2_MICRO_SIZE (15) +#define TIMEDELTA3_MICRO_SIZE (16) + +#define IS_TIMEDELTA1(s, s_l) ((s_l) == 7 || (s_l) == 11 || (s_l) == 14) +#define IS_TIMEDELTA2(s, s_l) ((s_l) == 8 || (s_l) == 12 || (s_l) == 15) +#define IS_TIMEDELTA3(s, s_l) ((s_l) == 9 || (s_l) == 13 || (s_l) == 16) + +#define IS_TIMEDELTA_MILLI(s, s_l) ((s_l) == 11 || (s_l) == 12 || (s_l) == 13) +#define IS_TIMEDELTA_MICRO(s, s_l) ((s_l) == 14 || (s_l) == 15 || (s_l) == 16) + +typedef struct { + int output_type; + int parse_json; + PyObject *invalid_date_value; + PyObject *invalid_time_value; + PyObject *invalid_datetime_value; +} MySQLAccelOptions; + +inline int IMAX(int a, int b) { return((a) > (b) ? a : b); } +inline int IMIN(int a, int b) { return((a) < (b) ? a : b); } + +static void read_options(MySQLAccelOptions *options, PyObject *dict); +static unsigned long long compute_row_size(unsigned long *type_codes, unsigned long *flags, unsigned long *scales, unsigned long n_cols); + +// +// Array +// + +typedef struct { + PyObject_HEAD + PyObject *array_interface; +} ArrayObject; + +static void Array_dealloc(ArrayObject *self) { + if (self->array_interface) { + PyObject *data = PyDict_GetItemString(self->array_interface, "data"); + if (data) { + PyObject *buffer = PyTuple_GetItem(data, 0); + if (buffer) { + free((char*)PyLong_AsUnsignedLongLong(buffer)); + } + } + } + Py_XDECREF(self->array_interface); + Py_TYPE(self)->tp_free((PyObject*)self); +} + +static PyObject *Array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { + ArrayObject *self = (ArrayObject*)type->tp_alloc(type, 0); + if (self != NULL) { + self->array_interface = Py_None; + Py_INCREF(Py_None); + } + return (PyObject*)self; +} + +static int Array_init(ArrayObject *self, PyObject *args, PyObject *kwds) { + static char *kwlist[] = {"array_interface", NULL}; + PyObject *array_interface = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &array_interface)) { + return -1; + } + + if (array_interface) { + PyObject *tmp = self->array_interface; + Py_INCREF(array_interface); + self->array_interface = array_interface; + Py_DECREF(tmp); + } + + return 0; +} + +static PyObject *Array_get__array_interface__(ArrayObject *self, void *closure) { + Py_INCREF(self->array_interface); + return self->array_interface; +} + +static PyGetSetDef Array_getsetters[] = { + {"__array_interface__", (getter)Array_get__array_interface__, + (setter)NULL, "array interface", NULL}, + {NULL} +}; + +static PyTypeObject ArrayType = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_pymysqlsv.Array", + .tp_doc = PyDoc_STR("Array manager"), + .tp_basicsize = sizeof(ArrayObject), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .tp_new = Array_new, + .tp_init = (initproc)Array_init, + .tp_dealloc = (destructor)Array_dealloc, + .tp_getset = Array_getsetters, +}; + +// +// End Array +// + +// +// State +// + +typedef struct { + PyObject_HEAD + PyObject *py_conn; // Database connection + PyObject *py_fields; // List of table fields + PyObject *py_decimal_mod; // decimal module + PyObject *py_decimal; // decimal.Decimal + PyObject *py_json_mod; // json module + PyObject *py_json_loads; // json.loads + PyObject *py_rows; // Output object + PyObject *py_rfile; // Socket file I/O + PyObject *py_read; // File I/O read method + PyObject *py_sock; // Socket + PyObject *py_read_timeout; // Socket read timeout value + PyObject *py_settimeout; // Socket settimeout method + PyObject **py_converters; // List of converter functions + PyObject **py_names; // Column names + PyObject *py_default_converters; // Dict of default converters + PyTypeObject *namedtuple; // Generated namedtuple type + PyObject **py_encodings; // Encoding for each column as Python string + const char **encodings; // Encoding for each column + unsigned long long n_cols; + unsigned long long n_rows; + unsigned long *type_codes; // Type code for each column + unsigned long *flags; // Column flags + unsigned long *scales; // Column scales + unsigned long *offsets; // Column offsets in buffer + unsigned long long next_seq_id; + MySQLAccelOptions options; + unsigned long long df_buffer_row_size; + unsigned long long df_buffer_n_rows; + char *df_cursor; + char *df_buffer; + PyStructSequence_Desc namedtuple_desc; + int unbuffered; +} StateObject; + +#define DESTROY(x) do { if (x) { free(x); (x) = NULL; } } while (0) + +static void State_clear_fields(StateObject *self) { + if (!self) return; + DESTROY(self->type_codes); + DESTROY(self->flags); + DESTROY(self->scales); + DESTROY(self->encodings); + DESTROY(self->offsets); + DESTROY(self->namedtuple_desc.fields); + if (self->py_converters) { + for (unsigned long i = 0; i < self->n_cols; i++) { + Py_XDECREF(self->py_converters[i]); + self->py_converters[i] = NULL; + } + DESTROY(self->py_converters); + } + if (self->py_names) { + for (unsigned long i = 0; i < self->n_cols; i++) { + Py_XDECREF(self->py_names[i]); + self->py_names[i] = NULL; + } + DESTROY(self->py_names); + } + if (self->py_encodings) { + for (unsigned long i = 0; i < self->n_cols; i++) { + Py_XDECREF(self->py_encodings[i]); + self->py_encodings[i] = NULL; + } + DESTROY(self->py_encodings); + } + Py_CLEAR(self->namedtuple); + Py_CLEAR(self->py_rows); + Py_CLEAR(self->py_read); + Py_CLEAR(self->py_rfile); + Py_CLEAR(self->py_read_timeout); + Py_CLEAR(self->py_settimeout); + Py_CLEAR(self->py_sock); + Py_CLEAR(self->py_conn); + Py_CLEAR(self->py_default_converters); + Py_CLEAR(self->py_fields); + Py_CLEAR(self->py_decimal); + Py_CLEAR(self->py_decimal_mod); + Py_CLEAR(self->py_json_loads); + Py_CLEAR(self->py_json_mod); +} + +static void State_dealloc(StateObject *self) { + State_clear_fields(self); + Py_TYPE(self)->tp_free((PyObject*)self); +} + +static PyObject *State_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { + StateObject *self = (StateObject*)type->tp_alloc(type, 0); + return (PyObject*)self; +} + +static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { + int rc = 0; + PyObject *py_res = NULL; + PyObject *py_converters = NULL; + PyObject *py_options = NULL; + + if (!PyArg_ParseTuple(args, "O", &py_res)) { + return -1; + } + + py_options = PyObject_GetAttrString(py_res, "options"); + if (!py_options) { + Py_INCREF(Py_None); + py_options = Py_None; + } + + if (PyDict_Check(py_options)) { + self->py_default_converters = PyDict_GetItemString(py_options, "default_converters"); + if (self->py_default_converters && !PyDict_Check(self->py_default_converters)) { + self->py_default_converters = NULL; + } + Py_XINCREF(self->py_default_converters); + PyObject *py_unbuffered = PyDict_GetItemString(py_options, "unbuffered"); + if (py_unbuffered && PyObject_IsTrue(py_unbuffered)) { + self->unbuffered = 1; + } + } + + if (self->unbuffered) { + PyObject *unbuffered_active = PyObject_GetAttrString(py_res, "unbuffered_active"); + if (!PyObject_IsTrue(unbuffered_active)) { + Py_XDECREF(unbuffered_active); + goto error; + } + Py_XDECREF(unbuffered_active); + } + + // Import decimal module. + self->py_decimal_mod = PyImport_ImportModule("decimal"); + if (!self->py_decimal_mod) goto error; + self->py_decimal = PyObject_GetAttrString(self->py_decimal_mod, "Decimal"); + if (!self->py_decimal) goto error; + + // Import json module. + self->py_json_mod = PyImport_ImportModule("json"); + if (!self->py_json_mod) goto error; + self->py_json_loads = PyObject_GetAttrString(self->py_json_mod, "loads"); + if (!self->py_json_loads) goto error; + + // Retrieve type codes for each column. + PyObject *py_field_count = PyObject_GetAttrString(py_res, "field_count"); + if (!py_field_count) goto error; + self->n_cols = PyLong_AsUnsignedLong(py_field_count); + Py_XDECREF(py_field_count); + + py_converters = PyObject_GetAttrString(py_res, "converters"); + if (!py_converters) goto error; + + self->py_converters = calloc(self->n_cols, sizeof(PyObject*)); + if (!self->py_converters) goto error; + + self->type_codes = calloc(self->n_cols, sizeof(unsigned long)); + if (!self->type_codes) goto error; + + self->flags = calloc(self->n_cols, sizeof(unsigned long)); + if (!self->flags) goto error; + + self->scales = calloc(self->n_cols, sizeof(unsigned long)); + if (!self->scales) goto error; + + self->encodings = calloc(self->n_cols, sizeof(char*)); + if (!self->encodings) goto error; + + self->py_encodings = calloc(self->n_cols, sizeof(char*)); + if (!self->py_encodings) goto error; + + self->py_names = calloc(self->n_cols, sizeof(PyObject*)); + if (!self->py_names) goto error; + + self->py_fields = PyObject_GetAttrString(py_res, "fields"); + if (!self->py_fields) goto error; + + for (unsigned long i = 0; i < self->n_cols; i++) { + // Get type codes. + PyObject *py_field = PyList_GetItem(self->py_fields, i); + if (!py_field) goto error; + + PyObject *py_flags = PyObject_GetAttrString(py_field, "flags"); + if (!py_flags) goto error; + self->flags[i] = PyLong_AsUnsignedLong(py_flags); + Py_XDECREF(py_flags); + + PyObject *py_scale = PyObject_GetAttrString(py_field, "scale"); + if (!py_scale) goto error; + self->scales[i] = PyLong_AsUnsignedLong(py_scale); + Py_XDECREF(py_scale); + + PyObject *py_field_type = PyObject_GetAttrString(py_field, "type_code"); + if (!py_field_type) goto error; + self->type_codes[i] = PyLong_AsUnsignedLong(py_field_type); + PyObject *py_default_converter = (self->py_default_converters) ? + PyDict_GetItem(self->py_default_converters, py_field_type) : NULL; + Py_XDECREF(py_field_type); + + // Get field name. + PyObject *py_field_name = PyObject_GetAttrString(py_field, "name"); + if (!py_field_name) goto error; + self->py_names[i] = py_field_name; + + // Get field encodings (NULL means binary) and default converters. + PyObject *py_tmp = PyList_GetItem(py_converters, i); + if (!py_tmp) goto error; + PyObject *py_encoding = PyTuple_GetItem(py_tmp, 0); + if (!py_encoding) goto error; + PyObject *py_converter = PyTuple_GetItem(py_tmp, 1); + if (!py_converter) goto error; + + self->py_encodings[i] = (py_encoding == Py_None) ? NULL : py_encoding; + Py_XINCREF(self->py_encodings[i]); + + self->encodings[i] = (py_encoding == Py_None) ? + NULL : PyUnicode_AsUTF8AndSize(py_encoding, NULL); + + self->py_converters[i] = (py_converter == Py_None || py_converter == py_default_converter) ? + NULL : py_converter; + Py_XINCREF(self->py_converters[i]); + } + + // Loop over all data packets. + self->py_conn = PyObject_GetAttrString(py_res, "connection"); + if (!self->py_conn) goto error; + + // Cache socket timeout and read methods. + self->py_sock = PyObject_GetAttrString(self->py_conn, "_sock"); + if (!self->py_sock) goto error; + self->py_settimeout = PyObject_GetAttrString(self->py_sock, "settimeout"); + if (!self->py_settimeout) goto error; + self->py_read_timeout = PyObject_GetAttrString(self->py_conn, "_read_timeout"); + if (!self->py_read_timeout) goto error; + + self->py_rfile = PyObject_GetAttrString(self->py_conn, "_rfile"); + if (!self->py_rfile) goto error; + self->py_read = PyObject_GetAttrString(self->py_rfile, "read"); + if (!self->py_read) goto error; + + PyObject *py_next_seq_id = PyObject_GetAttrString(self->py_conn, "_next_seq_id"); + if (!py_next_seq_id) goto error; + self->next_seq_id = PyLong_AsUnsignedLongLong(py_next_seq_id); + Py_XDECREF(py_next_seq_id); + + if (PyDict_Check(py_options)) { + read_options(&self->options, py_options); + } + + switch (self->options.output_type) { + case MYSQL_ACCEL_OUT_PANDAS: + case MYSQL_ACCEL_OUT_NUMPY: + // Setup dataframe buffer. + self->df_buffer_n_rows = (self->unbuffered) ? 1 : 500; + self->df_buffer_row_size = compute_row_size(self->type_codes, self->flags, self->scales, self->n_cols); + self->df_buffer = malloc(self->df_buffer_row_size * self->df_buffer_n_rows); + if (!self->df_buffer) goto error; + self->df_cursor = self->df_buffer; + break; + + case MYSQL_ACCEL_OUT_NAMEDTUPLES: + self->namedtuple_desc.name = "Row"; + self->namedtuple_desc.doc = "Row of data values"; + self->namedtuple_desc.n_in_sequence = self->n_cols; + self->namedtuple_desc.fields = calloc(self->n_cols + 1, sizeof(PyStructSequence_Field)); + if (!self->namedtuple_desc.fields) goto error; + for (unsigned long long i = 0; i < self->n_cols; i++) { + self->namedtuple_desc.fields[i].name = PyUnicode_AsUTF8AndSize(self->py_names[i], NULL); + self->namedtuple_desc.fields[i].doc = NULL; + } + self->namedtuple = PyStructSequence_NewType(&self->namedtuple_desc); + if (!self->namedtuple) goto error; + + // Fall through + + default: + self->py_rows = PyList_New(0); + if (!self->py_rows) goto error; + + // Unbuffered results always have exactly 1 row. + if (self->unbuffered) { + PyList_Append(self->py_rows, Py_None); + } + + PyObject_SetAttrString(py_res, "rows", self->py_rows); + } + +exit: + Py_XDECREF(py_converters); + Py_XDECREF(py_options); + return rc; + +error: + State_clear_fields(self); + rc = -1; + goto exit; +} + +static PyTypeObject StateType = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_pymysqlsv.State", + .tp_doc = PyDoc_STR("Rowdata state manager"), + .tp_basicsize = sizeof(StateObject), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .tp_new = State_new, + .tp_init = (initproc)State_init, + .tp_dealloc = (destructor)State_dealloc, +}; + +// +// End State +// + +static void read_options(MySQLAccelOptions *options, PyObject *dict) { + if (!options || !dict) return; + + PyObject *key = NULL; + PyObject *value = NULL; + Py_ssize_t pos = 0; + + while (PyDict_Next(dict, &pos, &key, &value)) { + if (PyUnicode_CompareWithASCIIString(key, "output_type") == 0) { + if (PyUnicode_CompareWithASCIIString(value, "dict") == 0 || + PyUnicode_CompareWithASCIIString(value, "dicts") == 0 ) { + options->output_type = MYSQL_ACCEL_OUT_DICTS; + } + else if (PyUnicode_CompareWithASCIIString(value, "namedtuple") == 0 || + PyUnicode_CompareWithASCIIString(value, "namedtuples") == 0) { + options->output_type = MYSQL_ACCEL_OUT_NAMEDTUPLES; + } + else if (PyUnicode_CompareWithASCIIString(value, "numpy") == 0) { + options->output_type = MYSQL_ACCEL_OUT_NUMPY; + } + else if (PyUnicode_CompareWithASCIIString(value, "pandas") == 0) { + options->output_type = MYSQL_ACCEL_OUT_PANDAS; + } + else { + options->output_type = MYSQL_ACCEL_OUT_TUPLES; + } + } else if (PyUnicode_CompareWithASCIIString(key, "parse_json") == 0) { + options->parse_json = PyObject_IsTrue(value); + } else if (PyUnicode_CompareWithASCIIString(key, "invalid_date_value") == 0) { + options->invalid_date_value = value; + } else if (PyUnicode_CompareWithASCIIString(key, "invalid_time_value") == 0) { + options->invalid_time_value = value; + } else if (PyUnicode_CompareWithASCIIString(key, "invalid_datetime_value") == 0) { + options->invalid_datetime_value = value; + } + } +} + +// mysql, for whatever reason, treats 0 as an actual year, but not +// a leap year +// +inline int is_leap_year(int year) +{ + return (year % 4) == 0 && year != 0 && ((year % 100) != 0 || (year % 400) == 0); +} + +inline int days_in_previous_months(int month, int year) +{ + static const int previous_days[13] = + { + -31, + 0, + 31, + 31 + 28, + 31 + 28 + 31, + 31 + 28 + 31 + 30, + 31 + 28 + 31 + 30 + 31, + 31 + 28 + 31 + 30 + 31 + 30, + 31 + 28 + 31 + 30 + 31 + 30 + 31, + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31, + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30, + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31, + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30, + }; + return previous_days[month] + (month > 2 && is_leap_year(year)); +} + +// NOTE: year 0 does not actually exist, but mysql pretends it does (and is NOT +// a leap year) +// +inline int leap_years_before(int year) +{ + return (year - 1) / 4 - (year - 1) / 100 + (year - 1) / 400; +} + +inline int days_in_previous_years(int year) +{ + return 365 * year + leap_years_before(year); +} + +static int64_t to_days(int year, int month, int day) { + return days_in_previous_years(year) + days_in_previous_months(month, year) + day; +} + +static void raise_exception( + PyObject *self, + char *err_type, + unsigned long long err_code, + char *err_str +) { + PyObject *py_exc = NULL; + PyObject *py_val = NULL; + + py_exc = PyObject_GetAttrString(self, err_type); + if (!py_exc) goto error; + + py_val = Py_BuildValue("(Ks)", err_code, err_str); + if (!py_val) goto error; + + PyErr_SetObject(py_exc, py_val); + +exit: + if (py_exc) { Py_DECREF(py_exc); } + if (py_val) { Py_DECREF(py_val); } + return; + +error: + goto exit; +} + +static int is_error_packet(char *buff_bytes) { + return buff_bytes && *(uint8_t*)buff_bytes == 0xFF; +} + +static void force_close(PyObject *self) { + PyObject *py_sock = NULL; + + py_sock = PyObject_GetAttrString(self, "_sock"); + if (!py_sock) goto error; + + PyObject_CallMethod(py_sock, "close", NULL); + PyErr_Clear(); + + PyObject_SetAttrString(self, "_sock", Py_None); + PyObject_SetAttrString(self, "_rfile", Py_None); + +exit: + if (py_sock) { Py_DECREF(py_sock); } + return; + +error: + goto exit; +} + +static PyObject *read_bytes( + PyObject *self, + unsigned long long num_bytes, + PyObject *py_read, + PyObject *py_settimeout, + PyObject *py_read_timeout +) { + PyObject *py_num_bytes = NULL; + PyObject *py_data = NULL; + PyObject *py_exc = NULL; + + if (py_read_timeout != Py_None) { + Py_XDECREF(PyObject_CallFunctionObjArgs(py_settimeout, py_read_timeout, NULL)); + if (PyErr_Occurred()) goto error; + } + + py_num_bytes = PyLong_FromUnsignedLongLong(num_bytes); + if (!py_num_bytes) goto error; + + while (1) { + py_data = PyObject_CallFunctionObjArgs(py_read, py_num_bytes, NULL); + + if ((py_exc = PyErr_Occurred())) { + if (PyErr_ExceptionMatches(PyExc_IOError) || PyErr_ExceptionMatches(PyExc_OSError)) { + PyObject *py_errno = PyObject_GetAttrString(py_exc, "errno"); + if (!py_errno) goto error; + unsigned long long err = PyLong_AsUnsignedLongLong(py_errno); + Py_DECREF(py_errno); + if (err == 4 /* errno.EINTER */) { + continue; + } + force_close(self); + raise_exception(self, "OperationalError", 0, + "Lost connection to MySQL server during query"); + goto error; + } + else if (PyErr_ExceptionMatches(PyExc_BaseException)) { + // Don't convert unknown exception to MySQLError. + force_close(self); + goto error; + } + } + + if (py_data) { + break; + } + } + + if (PyBytes_GET_SIZE(py_data) < (long int)num_bytes) { + force_close(self); + raise_exception(self, "OperationalError", 0, + "Lost connection to MySQL server during query"); + goto error; + } + +exit: + if (py_num_bytes) { Py_DECREF(py_num_bytes); } + return py_data; + +error: + if (py_data) { Py_DECREF(py_data); py_data = NULL; } + goto exit; +} + +static PyObject *read_packet( + PyObject *self, + PyObject *py_read, + PyObject *py_settimeout, + PyObject *py_read_timeout, + unsigned long long *next_seq_id +) { + PyObject *py_buff = PyByteArray_FromStringAndSize(NULL, 0); + PyObject *py_new_buff = NULL; + PyObject *py_packet_header = NULL; + PyObject *py_bytes_to_read = NULL; + PyObject *py_recv_data = NULL; + unsigned long long bytes_to_read = 0; + char *buff = NULL; + uint64_t btrl = 0; + uint8_t btrh = 0; + uint8_t packet_number = 0; + + while (1) { + py_packet_header = read_bytes(self, 4, py_read, py_settimeout, py_read_timeout); + if (!py_packet_header) goto error; + + buff = PyBytes_AsString(py_packet_header); + + btrl = *(uint16_t*)buff; + btrh = *(uint8_t*)(buff+2); + packet_number = *(uint8_t*)(buff+3); + bytes_to_read = btrl + (btrh << 16); + + Py_DECREF(py_packet_header); py_packet_header = NULL; + + if (packet_number != *next_seq_id) { + force_close(self); + if (packet_number == 0) { + raise_exception(self, "OperationalError", 0, + "Lost connection to MySQL server during query"); + + goto error; + } + raise_exception(self, "InternalError", 0, + "Packet sequence number wrong"); + goto error; + } + + *next_seq_id = (*next_seq_id + 1) % 256; + + py_recv_data = read_bytes(self, bytes_to_read, py_read, py_settimeout, py_read_timeout); + if (!py_recv_data) goto error; + + py_new_buff = PyByteArray_Concat(py_buff, py_recv_data); + if (!py_new_buff) goto error; + + Py_DECREF(py_buff); py_buff = py_new_buff; py_new_buff = NULL; + Py_DECREF(py_recv_data); py_recv_data = NULL; + + if (bytes_to_read == 0xFFFFFF) { + continue; + } + + if (bytes_to_read < MYSQL_MAX_PACKET_LEN) { + break; + } + } + + if (is_error_packet(PyByteArray_AsString(py_buff))) { + PyObject *py_result = PyObject_GetAttrString(self, "_result"); + if (py_result && py_result != Py_None) { + PyObject *py_unbuffered_active = PyObject_GetAttrString(py_result, "unbuffered_active"); + if (py_unbuffered_active == Py_True) { + PyObject_SetAttrString(py_result, "unbuffered_active", Py_False); + Py_DECREF(py_result); + } + Py_XDECREF(py_unbuffered_active); + } + Py_XDECREF(py_result); + PyObject_CallMethod(self, "_raise_mysql_exception", "O", py_buff, NULL); + } + +exit: + if (py_new_buff) { Py_DECREF(py_new_buff); } + if (py_bytes_to_read) { Py_DECREF(py_bytes_to_read); } + if (py_recv_data) { Py_DECREF(py_recv_data); } + if (py_packet_header) { Py_DECREF(py_packet_header); } + return py_buff; + +error: + if (py_buff) { Py_DECREF(py_buff); py_buff = NULL; } + goto exit; +} + +static int is_eof_packet(char *data, unsigned long long data_l) { + return data && (uint8_t)*(uint8_t*)data == 0xFE && data_l < 9; +} + +static int check_packet_is_eof( + char **data, + unsigned long long *data_l, + unsigned long long *warning_count, + int *has_next +) { + uint16_t server_status = 0; + if (!data || !data_l) { + return 0; + if (has_next) *has_next = 0; + if (warning_count) *warning_count = 0; + } + if (!is_eof_packet(*data, *data_l)) { + return 0; + } + *data += 1; *data_l -= 1; + if (warning_count) *warning_count = **(uint16_t**)data; + *data += 2; *data_l -= 2; + server_status = **(uint16_t**)data; + *data += 2; *data_l -= 2; + if (has_next) *has_next = server_status & MYSQL_SERVER_MORE_RESULTS_EXISTS; + return 1; +} + +static unsigned long long read_length_encoded_integer( + char **data, + unsigned long long *data_l, + int *is_null +) { + if (is_null) *is_null = 0; + + if (!data || !data_l || *data_l == 0) { + if (is_null) *is_null = 1; + return 0; + } + + uint8_t c = **(uint8_t**)data; + *data += 1; *data_l -= 1; + + if (c == MYSQL_COLUMN_NULL) { + if (is_null) *is_null = 1; + return 0; + } + + if (c < MYSQL_COLUMN_UNSIGNED_CHAR) { + return c; + } + + if (c == MYSQL_COLUMN_UNSIGNED_SHORT) { + if (*data_l < 2) { + if (is_null) *is_null = 1; + return 0; + } + uint16_t out = **(uint16_t**)data; + *data += 2; *data_l -= 2; + return out; + } + + if (c == MYSQL_COLUMN_UNSIGNED_INT24) { + if (*data_l < 3) { + if (is_null) *is_null = 1; + return 0; + } + uint16_t low = **(uint8_t**)data; + *data += 1; *data_l -= 1; + uint16_t high = **(uint16_t**)data; + *data += 2; *data_l -= 2; + return low + (high << 16); + } + + if (c == MYSQL_COLUMN_UNSIGNED_INT64) { + if (*data_l < 8) { + if (is_null) *is_null = 1; + return 0; + } + uint64_t out = **(uint64_t**)data; + *data += 8; *data_l -= 8; + return out; + } + + if (is_null) *is_null = 1; + return 0; +} + +static void read_length_coded_string( + char **data, + unsigned long long *data_l, + char **out, + unsigned long long *out_l, + int *is_null +) { + if (is_null) *is_null = 0; + + if (!data || !data_l || !out || !out_l) { + if (is_null) *is_null = 1; + return; + } + + unsigned long long length = read_length_encoded_integer(data, data_l, is_null); + + if (is_null && *is_null) { + return; + } + + length = (length > *data_l) ? *data_l : length; + + *out = *data; + *out_l = length; + + *data += length; + *data_l -= length; + + return; +} + +static PyObject *build_array( + unsigned long long n_rows, + unsigned long n_cols, + PyObject **names, + unsigned long *type_codes, + unsigned long *flags, + unsigned long *scales, + char *buffer, + unsigned long long row_size, + MySQLAccelOptions *options +) { + PyObject *py_out = NULL; + PyObject *py_shape = NULL; + PyObject *py_typestr = NULL; + PyObject *py_descr = NULL; + PyObject *py_descr_item = NULL; + PyObject *py_type = NULL; + PyObject *py_data = NULL; + PyObject *py_args = NULL; + PyObject *py_kwds = NULL; + PyObject *py_numpy_mod = NULL; + PyObject *py_numpy_array = NULL; + PyObject *py_array = NULL; + + py_numpy_mod = PyImport_ImportModule("numpy"); + if (!py_numpy_mod) goto error; + py_numpy_array = PyObject_GetAttrString(py_numpy_mod, "array"); + if (!py_numpy_array) goto error; + + py_out = PyDict_New(); + if (!py_out) goto error; + + py_shape = PyTuple_New(1); + if (!py_shape) goto error; + PyTuple_SetItem(py_shape, 0, PyLong_FromUnsignedLongLong(n_rows)); + PyDict_SetItemString(py_out, "shape", py_shape); + Py_DECREF(py_shape); + + py_typestr = PyUnicode_FromFormat("|V%llu", row_size); + if (!py_typestr) goto error; + PyDict_SetItemString(py_out, "typestr", py_typestr); + Py_DECREF(py_typestr); + + py_descr = PyList_New(n_cols); + if (!py_descr) goto error; + PyDict_SetItemString(py_out, "descr", py_descr); + Py_DECREF(py_descr); + + for (unsigned long i = 0; i < n_cols; i++) { + py_descr_item = PyTuple_New(2); + if (!py_descr_item) goto error; + + PyList_SetItem(py_descr, i, py_descr_item); + + PyTuple_SetItem(py_descr_item, 0, names[i]); + // Caller already uses the borrowed reference. + Py_INCREF(names[i]); + + switch (type_codes[i]) { + case MYSQL_TYPE_NEWDECIMAL: + case MYSQL_TYPE_DECIMAL: + py_type = PyUnicode_FromString("|O"); + break; + + case MYSQL_TYPE_TINY: + if (flags[i] & MYSQL_FLAG_UNSIGNED) { + py_type = PyUnicode_FromString("output_type == MYSQL_ACCEL_OUT_PANDAS) { + PyObject *out2 = NULL; + PyObject *py_pandas_mod = PyImport_ImportModule("pandas"); + if (!py_pandas_mod) goto error; + out2 = PyObject_CallMethod(py_pandas_mod, "DataFrame", "O", py_out); + Py_DECREF(py_pandas_mod); + if (!out2) goto error; + Py_DECREF(py_out); + py_out = out2; + } + +exit: + Py_XDECREF(py_args); + Py_XDECREF(py_kwds); + Py_XDECREF(py_numpy_array); + Py_XDECREF(py_numpy_mod); + return py_out; + +error: + Py_CLEAR(py_out); + goto exit; +} + +static unsigned long long compute_row_size( + unsigned long *type_codes, + unsigned long *flags, + unsigned long *scales, + unsigned long n_cols +) { + unsigned long long row_size = 0; + + for (unsigned long i = 0; i < n_cols; i++) { + switch (type_codes[i]) { + case MYSQL_TYPE_NEWDECIMAL: + case MYSQL_TYPE_DECIMAL: + row_size += sizeof(PyObject*); + break; + + case MYSQL_TYPE_TINY: + row_size += sizeof(int8_t); + break; + + case MYSQL_TYPE_SHORT: + row_size += sizeof(int16_t); + break; + + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_LONG: + row_size += sizeof(int32_t); + break; + + case MYSQL_TYPE_LONGLONG: + row_size += sizeof(int64_t); + break; + + case MYSQL_TYPE_FLOAT: + row_size += sizeof(float); + break; + + case MYSQL_TYPE_DOUBLE: + row_size += sizeof(double); + break; + + case MYSQL_TYPE_NULL: + row_size += sizeof(PyObject*); + break; + + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_TIMESTAMP: + row_size += sizeof(int64_t); + break; + + case MYSQL_TYPE_NEWDATE: + case MYSQL_TYPE_DATE: + row_size += sizeof(int64_t); + break; + + case MYSQL_TYPE_TIME: + row_size += sizeof(int64_t); + break; + + case MYSQL_TYPE_YEAR: + row_size += sizeof(int16_t); + break; + + case MYSQL_TYPE_BIT: + case MYSQL_TYPE_JSON: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_GEOMETRY: + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_SET: + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_STRING: + row_size += sizeof(PyObject*); + break; + + default: + PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", type_codes[i], NULL); + return 0; + } + } + + return row_size; +} + +static void read_dataframe_row_from_packet( + unsigned long n_cols, + unsigned long *type_codes, + unsigned long *flags, + unsigned long *scales, + PyObject **names, + const char **encodings, + PyObject **converters, + PyObject *py_decimal, + PyObject *py_json_loads, + char *data, + unsigned long long data_l, + MySQLAccelOptions *options, + char *buffer +) { + char *out = NULL; + unsigned long long out_l = 0; + int is_null = 0; + PyObject *item = NULL; + PyObject *str = NULL; + char *end = NULL; + char *loc = buffer; + + int sign = 1; + int year = 0; + int month = 0; + int day = 0; + int hour = 0; + int minute = 0; + int second = 0; + int microsecond = 0; + + float float_nan = nanf(""); + double double_nan = nan(""); + + if (!buffer) goto error; + + for (unsigned long i = 0; i < n_cols; i++) { + + read_length_coded_string(&data, &data_l, &out, &out_l, &is_null); + end = &out[out_l]; + + switch (type_codes[i]) { + case MYSQL_TYPE_NEWDECIMAL: + case MYSQL_TYPE_DECIMAL: + if (is_null) { + *(PyObject**)loc = Py_None; + Py_INCREF(Py_None); + } else { + str = NULL; + str = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + if (!str) goto error; + + item = PyObject_CallFunctionObjArgs(py_decimal, str, NULL); + Py_DECREF(str); str = NULL; + if (!item) goto error; + + *(PyObject**)loc = item; + } + loc += sizeof(PyObject*); + break; + + case MYSQL_TYPE_TINY: + if (flags[i] & MYSQL_FLAG_UNSIGNED) { + *(uint8_t*)loc = (is_null) ? 0 : (uint8_t)strtoul(out, &end, 10); + } else { + *(int8_t*)loc = (is_null) ? INT8_MIN : (int8_t)strtol(out, &end, 10); + } + loc += sizeof(int8_t); + break; + + case MYSQL_TYPE_SHORT: + if (flags[i] & MYSQL_FLAG_UNSIGNED) { + *(uint16_t*)loc = (is_null) ? 0 : (uint16_t)strtoul(out, &end, 10); + } else { + *(int16_t*)loc = (is_null) ? INT16_MIN : (int16_t)strtol(out, &end, 10); + } + loc += sizeof(int16_t); + break; + + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_LONG: + if (flags[i] & MYSQL_FLAG_UNSIGNED) { + *(uint32_t*)loc = (is_null) ? 0 : (uint32_t)strtoul(out, &end, 10); + } else { + *(int32_t*)loc = (is_null) ? INT32_MIN : (int32_t)strtol(out, &end, 10); + } + loc += sizeof(int32_t); + break; + + case MYSQL_TYPE_LONGLONG: + if (flags[i] & MYSQL_FLAG_UNSIGNED) { + *(uint64_t*)loc = (is_null) ? 0 : (uint64_t)strtoul(out, &end, 10); + } else { + *(int64_t*)loc = (is_null) ? INT64_MIN : (int64_t)strtol(out, &end, 10); + } + loc += sizeof(int64_t); + break; + + case MYSQL_TYPE_FLOAT: + if (is_null) { + *(float*)loc = (float)float_nan; + } else { + *(float*)loc = (float)strtod(out, &end); + } + loc += sizeof(float); + break; + + case MYSQL_TYPE_DOUBLE: + if (is_null) { + *(double*)loc = (double)double_nan; + } else { + *(double*)loc = (double)strtod(out, &end); + } + loc += sizeof(double); + break; + + case MYSQL_TYPE_NULL: + *(PyObject**)loc = Py_None; + loc += sizeof(PyObject*); + break; + + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_TIMESTAMP: + // TODO: Should use numpy's NaT + if (!CHECK_ANY_DATETIME_STR(out, out_l)) { + *(int64_t*)loc = (int64_t)(INT64_MIN); + loc += sizeof(int64_t); + break; + } + year = CHR2INT4(out); out += 5; + month = CHR2INT2(out); out += 3; + day = CHR2INT2(out); out += 3; + hour = CHR2INT2(out); out += 3; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_DATETIME_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_DATETIME_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + *(int64_t*)loc = (int64_t)(((to_days(year, month, day) - EPOCH_TO_DAYS) + * SECONDS_PER_DAY + hour * 3600 + minute * 60 + second) + * 1e9 + microsecond * 1e3); + loc += sizeof(int64_t); + break; + + case MYSQL_TYPE_NEWDATE: + case MYSQL_TYPE_DATE: + if (!CHECK_DATE_STR(out, out_l)) { + *(int64_t*)loc = (int64_t)(INT64_MIN); + loc += sizeof(int64_t); + break; + } + year = CHR2INT4(out); out += 5; + month = CHR2INT2(out); out += 3; + day = CHR2INT2(out); out += 3; + *(int64_t*)loc = (int64_t)((to_days(year, month, day) - EPOCH_TO_DAYS) + * SECONDS_PER_DAY * 1e9); + loc += sizeof(int64_t); + break; + + case MYSQL_TYPE_TIME: + sign = CHECK_ANY_TIMEDELTA_STR(out, out_l); + if (!sign) { + *(int64_t*)loc = (int64_t)(INT64_MIN); + loc += sizeof(int64_t); + break; + } else if (sign < 0) { + out += 1; out_l -= 1; + } + if (IS_TIMEDELTA1(out, out_l)) { + hour = CHR2INT1(out); out += 2; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + } + else if (IS_TIMEDELTA2(out, out_l)) { + hour = CHR2INT2(out); out += 3; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + } + else if (IS_TIMEDELTA3(out, out_l)) { + hour = CHR2INT3(out); out += 4; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + } + *(int64_t*)loc = (int64_t)((hour * 3600 + minute * 60 + second) + * 1e9 + microsecond * 1e3) * sign; + loc += sizeof(int64_t); + break; + + case MYSQL_TYPE_YEAR: + if (out_l == 0) { + *(uint16_t*)loc = 0; + loc += sizeof(uint16_t); + break; + } + end = &out[out_l]; + *(uint16_t*)loc = (uint16_t)strtoul(out, &end, 10); + loc += sizeof(uint16_t); + break; + + case MYSQL_TYPE_BIT: + case MYSQL_TYPE_JSON: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_GEOMETRY: + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_SET: + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_STRING: + if (encodings[i] == NULL) { + item = PyBytes_FromStringAndSize(out, out_l); + if (!item) goto error; + break; + } + + item = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + if (!item) goto error; + + // Parse JSON string. + if (type_codes[i] == MYSQL_TYPE_JSON && options->parse_json) { + str = item; + item = PyObject_CallFunctionObjArgs(py_json_loads, str, NULL); + Py_DECREF(str); str = NULL; + if (!item) goto error; + } + + *(PyObject**)loc = item; + loc += sizeof(PyObject*); + + break; + + default: + PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", type_codes[i], NULL); + goto error; + } + } + +exit: + return; + +error: + goto exit; +} + +static PyObject *read_obj_row_from_packet( + unsigned long n_cols, + unsigned long *type_codes, + unsigned long *flags, + unsigned long *scales, + PyObject **names, + const char **encodings, + PyObject **converters, + PyObject *py_decimal, + PyObject *py_json_loads, + PyTypeObject *namedtuple, + char *data, + unsigned long long data_l, + MySQLAccelOptions *options +) { + char *out = NULL; + char *orig_out = NULL; + unsigned long long out_l = 0; + unsigned long long orig_out_l = 0; + int is_null = 0; + PyObject *py_result = NULL; + PyObject *py_item = NULL; + PyObject *py_str = NULL; + char *end = NULL; + + int sign = 1; + int year = 0; + int month = 0; + int day = 0; + int hour = 0; + int minute = 0; + int second = 0; + int microsecond = 0; + + switch (options->output_type) { + case MYSQL_ACCEL_OUT_NAMEDTUPLES: { + if (!namedtuple) goto error; + py_result = PyStructSequence_New(namedtuple); + break; + } + case MYSQL_ACCEL_OUT_DICTS: + py_result = PyDict_New(); + break; + default: + py_result = PyTuple_New(n_cols); + } + + for (unsigned long i = 0; i < n_cols; i++) { + + read_length_coded_string(&data, &data_l, &out, &out_l, &is_null); + end = &out[out_l]; + + orig_out = out; + orig_out_l = out_l; + + py_item = Py_None; + + // Don't convert if it's a NULL. + if (!is_null) { + + // If a converter was passed in, use it. + if (converters[i]) { + py_str = NULL; + if (encodings[i] == NULL) { + py_str = PyBytes_FromStringAndSize(out, out_l); + if (!py_str) goto error; + } else { + py_str = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + if (!py_str) goto error; + } + py_item = PyObject_CallFunctionObjArgs(converters[i], py_str, NULL); + Py_DECREF(py_str); py_str = NULL; + if (!py_item) goto error; + } + + // If no converter was passed in, do the default processing. + else { + switch (type_codes[i]) { + case MYSQL_TYPE_NEWDECIMAL: + case MYSQL_TYPE_DECIMAL: + py_str = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + if (!py_str) goto error; + + py_item = PyObject_CallFunctionObjArgs(py_decimal, py_str, NULL); + Py_DECREF(py_str); py_str = NULL; + if (!py_item) goto error; + break; + + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_LONGLONG: + case MYSQL_TYPE_INT24: + if (flags[i] & MYSQL_FLAG_UNSIGNED) { + py_item = PyLong_FromUnsignedLongLong(strtoul(out, &end, 10)); + } else { + py_item = PyLong_FromLongLong(strtol(out, &end, 10)); + } + if (!py_item) goto error; + break; + + case MYSQL_TYPE_FLOAT: + case MYSQL_TYPE_DOUBLE: + py_item = PyFloat_FromDouble(strtod(out, &end)); + if (!py_item) goto error; + break; + + case MYSQL_TYPE_NULL: + py_item = Py_None; + break; + + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_TIMESTAMP: + if (!CHECK_ANY_DATETIME_STR(out, out_l)) { + if (options && options->invalid_datetime_value) { + py_item = options->invalid_datetime_value; + Py_INCREF(py_item); + } else { + py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); + if (!py_item) goto error; + } + break; + } + year = CHR2INT4(out); out += 5; + month = CHR2INT2(out); out += 3; + day = CHR2INT2(out); out += 3; + hour = CHR2INT2(out); out += 3; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_DATETIME_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_DATETIME_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + py_item = PyDateTime_FromDateAndTime(year, month, day, + hour, minute, second, microsecond); + if (!py_item) { + PyErr_Clear(); + py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); + } + if (!py_item) goto error; + break; + + case MYSQL_TYPE_NEWDATE: + case MYSQL_TYPE_DATE: + if (!CHECK_DATE_STR(out, out_l)) { + if (options && options->invalid_date_value) { + py_item = options->invalid_date_value; + Py_INCREF(py_item); + } else { + py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); + if (!py_item) goto error; + } + break; + } + year = CHR2INT4(out); out += 5; + month = CHR2INT2(out); out += 3; + day = CHR2INT2(out); out += 3; + py_item = PyDate_FromDate(year, month, day); + if (!py_item) { + PyErr_Clear(); + py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); + } + if (!py_item) goto error; + break; + + case MYSQL_TYPE_TIME: + sign = CHECK_ANY_TIMEDELTA_STR(out, out_l); + if (!sign) { + if (options && options->invalid_time_value) { + py_item = options->invalid_time_value; + Py_INCREF(py_item); + } else { + py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); + if (!py_item) goto error; + } + break; + } else if (sign < 0) { + out += 1; out_l -= 1; + } + if (IS_TIMEDELTA1(out, out_l)) { + hour = CHR2INT1(out); out += 2; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + } + else if (IS_TIMEDELTA2(out, out_l)) { + hour = CHR2INT2(out); out += 3; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + } + else if (IS_TIMEDELTA3(out, out_l)) { + hour = CHR2INT3(out); out += 4; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + } + py_item = PyDelta_FromDSU(0, sign * hour * 60 * 60 + + sign * minute * 60 + + sign * second, + sign * microsecond); + if (!py_item) { + PyErr_Clear(); + py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); + } + if (!py_item) goto error; + break; + + case MYSQL_TYPE_YEAR: + if (out_l == 0) { + goto error; + break; + } + end = &out[out_l]; + year = strtoul(out, &end, 10); + py_item = PyLong_FromLong(year); + if (!py_item) goto error; + break; + + case MYSQL_TYPE_BIT: + case MYSQL_TYPE_JSON: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_GEOMETRY: + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_SET: + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_STRING: + if (encodings[i] == NULL) { + py_item = PyBytes_FromStringAndSize(out, out_l); + if (!py_item) goto error; + break; + } + + py_item = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + if (!py_item) goto error; + + // Parse JSON string. + if (type_codes[i] == MYSQL_TYPE_JSON && options->parse_json) { + py_str = py_item; + py_item = PyObject_CallFunctionObjArgs(py_json_loads, py_str, NULL); + Py_DECREF(py_str); py_str = NULL; + if (!py_item) goto error; + } + + break; + + default: + PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", type_codes[i], NULL); + goto error; + } + } + } + + if (py_item == Py_None) { + Py_INCREF(Py_None); + } + + switch (options->output_type) { + case MYSQL_ACCEL_OUT_NAMEDTUPLES: + PyStructSequence_SET_ITEM(py_result, i, py_item); + break; + case MYSQL_ACCEL_OUT_DICTS: + PyDict_SetItem(py_result, names[i], py_item); + Py_INCREF(names[i]); + Py_DECREF(py_item); + break; + default: + PyTuple_SET_ITEM(py_result, i, py_item); + } + } + +exit: + return py_result; + +error: + Py_XDECREF(py_result); + goto exit; +} + +static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *kwargs) { + StateObject *py_state = NULL; + PyObject *py_res = NULL; + PyObject *py_out = NULL; + int is_eof = 0; + + // Parse function args. + if (!PyArg_ParseTuple(args, "O", &py_res)) { + goto error; + } + + // Get the rowdata state. + py_state = (StateObject*)PyObject_GetAttrString(py_res, "_state"); + if (!py_state) { + PyErr_Clear(); + int rc = 0; + PyObject *py_args = PyTuple_New(1); + if (!py_args) goto error; + PyTuple_SET_ITEM(py_args, 0, py_res); Py_INCREF(py_res); + + py_state = (StateObject*)State_new(&StateType, py_args, NULL); + if (!py_state) goto error; + rc = State_init((StateObject*)py_state, py_args, NULL); + Py_XDECREF(py_args); + if (rc != 0) goto error; + + PyObject_SetAttrString(py_res, "_state", (PyObject*)py_state); + Py_DECREF(py_state); + } + + while (1) { + PyObject *py_buff = read_packet(py_state->py_conn, py_state->py_read, + py_state->py_settimeout, py_state->py_read_timeout, + &py_state->next_seq_id); + if (!py_buff) goto error; + + PyObject *py_row = NULL; + char *data = PyByteArray_AsString(py_buff); + unsigned long long data_l = PyByteArray_GET_SIZE(py_buff); + unsigned long long warning_count = 0; + int has_next = 0; + + if (check_packet_is_eof(&data, &data_l, &warning_count, &has_next)) { + is_eof = 1; + + PyObject *py_long = NULL; + + py_long = PyLong_FromUnsignedLongLong(warning_count); + PyObject_SetAttrString(py_res, "warning_count", py_long); + Py_XDECREF(py_long); + + py_long = PyLong_FromLong(has_next); + PyObject_SetAttrString(py_res, "has_next", py_long); + Py_XDECREF(py_long); + + PyObject_SetAttrString(py_res, "connection", Py_None); + + // Hold a reference until the end of this function. + Py_INCREF(py_state); + PyObject_DelAttrString(py_res, "_state"); + + Py_XDECREF(py_buff); + + if (py_state->unbuffered) { + PyObject_SetAttrString(py_res, "unbuffered_active", Py_False); + PyObject_SetAttrString(py_res, "rows", Py_None); + goto exit; + } + + break; + } + + py_state->n_rows += 1; + + switch (py_state->options.output_type) { + case MYSQL_ACCEL_OUT_PANDAS: + case MYSQL_ACCEL_OUT_NUMPY: + if (py_state->n_rows >= py_state->df_buffer_n_rows) { + py_state->df_buffer_n_rows *= 1.7; + py_state->df_buffer = realloc(py_state->df_buffer, + py_state->df_buffer_row_size * + py_state->df_buffer_n_rows); + py_state->df_cursor = py_state->df_buffer + + py_state->df_buffer_row_size * py_state->n_rows; + } + read_dataframe_row_from_packet(py_state->n_cols, py_state->type_codes, + py_state->flags, py_state->scales, py_state->py_names, + py_state->encodings, py_state->py_converters, + py_state->py_decimal, py_state->py_json_loads, + data, data_l, &py_state->options, py_state->df_cursor); + py_state->df_cursor += py_state->df_buffer_row_size; + break; + + default: + py_row = read_obj_row_from_packet(py_state->n_cols, py_state->type_codes, + py_state->flags, py_state->scales, + py_state->py_names, py_state->encodings, + py_state->py_converters, + py_state->py_decimal, + py_state->py_json_loads, + py_state->namedtuple, + data, data_l, &py_state->options); + if (!py_row) { Py_XDECREF(py_buff); goto error; } + + if (py_state->unbuffered) { + PyList_SetItem(py_state->py_rows, 0, py_row); + } else { + PyList_Append(py_state->py_rows, py_row); + Py_XDECREF(py_row); + } + } + + Py_DECREF(py_buff); + + if (py_state->unbuffered) break; + } + + switch (py_state->options.output_type) { + case MYSQL_ACCEL_OUT_PANDAS: + case MYSQL_ACCEL_OUT_NUMPY: + py_state->df_buffer = realloc(py_state->df_buffer, + py_state->df_buffer_row_size * py_state->n_rows); + py_state->py_rows = build_array(py_state->n_rows, py_state->n_cols, + py_state->py_names, py_state->type_codes, + py_state->flags, py_state->scales, + py_state->df_buffer, + py_state->df_buffer_row_size, &py_state->options); + PyObject_SetAttrString(py_res, "rows", py_state->py_rows); + Py_DECREF(py_state->py_rows); + } + + PyObject *py_next_seq_id = PyLong_FromUnsignedLongLong(py_state->next_seq_id); + if (!py_next_seq_id) goto error; + PyObject_SetAttrString(py_state->py_conn, "_next_seq_id", py_next_seq_id); + Py_XDECREF(py_next_seq_id); + +exit: + py_out = NULL; + + if (py_state->unbuffered) { + if (is_eof) { + Py_INCREF(Py_None); + py_out = Py_None; + PyObject *py_n_rows = PyLong_FromSsize_t(py_state->n_rows); + PyObject_SetAttrString(py_res, "affected_rows", py_n_rows); + Py_DECREF(py_n_rows); + } + else { + switch (py_state->options.output_type) { + case MYSQL_ACCEL_OUT_PANDAS: + case MYSQL_ACCEL_OUT_NUMPY: + // TODO: reshape? + py_out = py_state->py_rows; + Py_INCREF(py_out); + break; + default: + py_out = PyList_GetItem(py_state->py_rows, 0); + Py_INCREF(py_out); + } + } + } + else { + switch (py_state->options.output_type) { + case MYSQL_ACCEL_OUT_PANDAS: + case MYSQL_ACCEL_OUT_NUMPY: + py_out = py_state->py_rows; + Py_INCREF(py_out); + break; + default: + py_out = py_state->py_rows; + Py_INCREF(py_out); + } + PyObject *py_n_rows = PyLong_FromSsize_t(py_state->n_rows); + PyObject_SetAttrString(py_res, "affected_rows", py_n_rows); + Py_DECREF(py_n_rows); + } + + if (is_eof) { + Py_DECREF(py_state); + } + + return py_out; + +error: + goto exit; +} + +static PyMethodDef PyMySQLAccelMethods[] = { + {"read_rowdata_packet", (PyCFunction)read_rowdata_packet, METH_VARARGS | METH_KEYWORDS, "MySQL row data packet reader"}, + {NULL, NULL, 0, NULL} +}; + +static struct PyModuleDef _pymysqlsvmodule = { + PyModuleDef_HEAD_INIT, + "_pymysqlsv", + "PyMySQL row data packet reader accelerator", + -1, + PyMySQLAccelMethods +}; + +PyMODINIT_FUNC PyInit__pymysqlsv(void) { + PyDateTime_IMPORT; + if (PyType_Ready(&ArrayType) < 0) { + return NULL; + } + if (PyType_Ready(&StateType) < 0) { + return NULL; + } + return PyModule_Create(&_pymysqlsvmodule); +} diff --git a/src/accel.c.orig b/src/accel.c.orig new file mode 100644 index 000000000..f3dfd79f9 --- /dev/null +++ b/src/accel.c.orig @@ -0,0 +1,1935 @@ + +#include +#include +#include +#include +#include + +#define MYSQL_ACCEL_OUT_TUPLES 0 +#define MYSQL_ACCEL_OUT_NAMEDTUPLES 1 +#define MYSQL_ACCEL_OUT_DICTS 2 +#define MYSQL_ACCEL_OUT_NUMPY 3 +#define MYSQL_ACCEL_OUT_DATAFRAME 4 + +#define MYSQL_FLAG_NOT_NULL 1 +#define MYSQL_FLAG_PRI_KEY 2 +#define MYSQL_FLAG_UNIQUE_KEY 4 +#define MYSQL_FLAG_MULTIPLE_KEY 8 +#define MYSQL_FLAG_BLOB 16 +#define MYSQL_FLAG_UNSIGNED 32 +#define MYSQL_FLAG_ZEROFILL 64 +#define MYSQL_FLAG_BINARY 128 +#define MYSQL_FLAG_ENUM 256 +#define MYSQL_FLAG_AUTO_INCREMENT 512 +#define MYSQL_FLAG_TIMESTAMP 1024 +#define MYSQL_FLAG_SET 2048 +#define MYSQL_FLAG_PART_KEY 16384 +#define MYSQL_FLAG_GROUP 32767 +#define MYSQL_FLAG_UNIQUE 65536 + +#define MYSQL_TYPE_DECIMAL 0 +#define MYSQL_TYPE_TINY 1 +#define MYSQL_TYPE_SHORT 2 +#define MYSQL_TYPE_LONG 3 +#define MYSQL_TYPE_FLOAT 4 +#define MYSQL_TYPE_DOUBLE 5 +#define MYSQL_TYPE_NULL 6 +#define MYSQL_TYPE_TIMESTAMP 7 +#define MYSQL_TYPE_LONGLONG 8 +#define MYSQL_TYPE_INT24 9 +#define MYSQL_TYPE_DATE 10 +#define MYSQL_TYPE_TIME 11 +#define MYSQL_TYPE_DATETIME 12 +#define MYSQL_TYPE_YEAR 13 +#define MYSQL_TYPE_NEWDATE 14 +#define MYSQL_TYPE_VARCHAR 15 +#define MYSQL_TYPE_BIT 16 +#define MYSQL_TYPE_JSON 245 +#define MYSQL_TYPE_NEWDECIMAL 246 +#define MYSQL_TYPE_ENUM 247 +#define MYSQL_TYPE_SET 248 +#define MYSQL_TYPE_TINY_BLOB 249 +#define MYSQL_TYPE_MEDIUM_BLOB 250 +#define MYSQL_TYPE_LONG_BLOB 251 +#define MYSQL_TYPE_BLOB 252 +#define MYSQL_TYPE_VAR_STRING 253 +#define MYSQL_TYPE_STRING 254 +#define MYSQL_TYPE_GEOMETRY 255 + +#define MYSQL_TYPE_CHAR MYSQL_TYPE_TINY +#define MYSQL_TYPE_INTERVAL MYSQL_TYPE_ENUM + +#define MYSQL_COLUMN_NULL 251 +#define MYSQL_COLUMN_UNSIGNED_CHAR 251 +#define MYSQL_COLUMN_UNSIGNED_SHORT 252 +#define MYSQL_COLUMN_UNSIGNED_INT24 253 +#define MYSQL_COLUMN_UNSIGNED_INT64 254 + +#define MYSQL_SERVER_MORE_RESULTS_EXISTS 8 + +// 2**24 - 1 +#define MYSQL_MAX_PACKET_LEN 16777215 + +#define EPOCH_TO_DAYS 719528 +#define SECONDS_PER_DAY (24 * 60 * 60) + +#define MYSQL_ACCEL_OPTION_TIME_TYPE_TIMEDELTA 0 +#define MYSQL_ACCEL_OPTION_TIME_TYPE_TIME 1 +#define MYSQL_ACCEL_OPTION_JSON_TYPE_STRING 0 +#define MYSQL_ACCEL_OPTION_JSON_TYPE_OBJ 1 +#define MYSQL_ACCEL_OPTION_BIT_TYPE_BYTES 0 +#define MYSQL_ACCEL_OPTION_BIT_TYPE_INT 1 + +#define CHR2INT1(x) ((x)[1] - '0') +#define CHR2INT2(x) ((((x)[0] - '0') * 10) + ((x)[1] - '0')) +#define CHR2INT3(x) ((((x)[0] - '0') * 1e2) + (((x)[1] - '0') * 10) + ((x)[2] - '0')) +#define CHR2INT4(x) ((((x)[0] - '0') * 1e3) + (((x)[1] - '0') * 1e2) + (((x)[2] - '0') * 10) + ((x)[3] - '0')) +#define CHR2INT6(x) ((((x)[0] - '0') * 1e5) + (((x)[1] - '0') * 1e4) + (((x)[2] - '0') * 1e3) + (((x)[3] - '0') * 1e2) + (((x)[4] - '0') * 10) + (((x)[5] - '0'))) + +#define CHECK_DATE_STR(s, s_l) \ + ((s_l) == 10 && \ + (s)[0] >= '0' && (s)[0] <= '9' && \ + (s)[1] >= '0' && (s)[1] <= '9' && \ + (s)[2] >= '0' && (s)[2] <= '9' && \ + (s)[3] >= '0' && (s)[3] <= '9' && \ + (s)[4] == '-' && \ + (((s)[5] == '1' && ((s)[6] >= '0' && (s)[6] <= '2')) || \ + ((s)[5] == '0' && ((s)[6] >= '1' && (s)[6] <= '9'))) && \ + (s)[7] == '-' && \ + ((((s)[8] >= '0' && (s)[8] <= '2') && ((s)[9] >= '0' && (s)[9] <= '9')) || \ + ((s)[8] == '3' && ((s)[9] >= '0' && (s)[9] <= '1'))) && \ + !((s)[0] == '0' && (s)[1] == '0' && (s)[2] == '0' && (s)[3] == '0') && \ + !((s)[5] == '0' && (s)[6] == '0') && \ + !((s)[8] == '0' && (s)[9] == '0')) + +#define CHECK_TIME_STR(s, s_l) \ + ((s_l) == 8 && \ + ((((s)[0] >= '0' && (s)[0] <= '1') && ((s)[1] >= '0' && (s)[1] <= '9')) || \ + ((s)[0] == '2' && ((s)[1] >= '0' && (s)[1] <= '3'))) && \ + (s)[2] == ':' && \ + (((s)[3] >= '0' && (s)[3] <= '5') && ((s)[4] >= '0' && (s)[4] <= '9')) && \ + (s)[5] == ':' && \ + (((s)[6] >= '0' && (s)[6] <= '5') && ((s)[7] >= '0' && (s)[7] <= '9'))) + +#define CHECK_MICROSECONDS_STR(s, s_l) \ + ((s_l) == 7 && \ + (s)[0] == '.' && \ + (s)[1] >= '0' && (s)[1] <= '9' && \ + (s)[2] >= '0' && (s)[2] <= '9' && \ + (s)[3] >= '0' && (s)[3] <= '9' && \ + (s)[4] >= '0' && (s)[4] <= '9' && \ + (s)[5] >= '0' && (s)[5] <= '9' && \ + (s)[6] >= '0' && (s)[6] <= '9') + +#define CHECK_MILLISECONDS_STR(s, s_l) \ + ((s_l) == 4 && \ + (s)[0] == '.' && \ + (s)[1] >= '0' && (s)[1] <= '9' && \ + (s)[2] >= '0' && (s)[2] <= '9' && \ + (s)[3] >= '0' && (s)[3] <= '9') + +#define CHECK_MICRO_TIME_STR(s, s_l) \ + ((s_l) == 15 && CHECK_TIME_STR(s, 8) && CHECK_MICROSECONDS_STR((s)+8, 7)) + +#define CHECK_MILLI_TIME_STR(s, s_l) \ + ((s_l) == 12 && CHECK_TIME_STR(s, 8) && CHECK_MILLISECONDS_STR((s)+8, 4)) + +#define CHECK_DATETIME_STR(s, s_l) \ + ((s_l) == 19 && \ + CHECK_DATE_STR(s, 10) && \ + ((s)[10] == ' ' || (s)[10] == 'T') && \ + CHECK_TIME_STR((s)+11, 8)) + +#define CHECK_MICRO_DATETIME_STR(s, s_l) \ + ((s_l) == 26 && \ + CHECK_DATE_STR(s, 10) && \ + ((s)[10] == ' ' || (s)[10] == 'T') && \ + CHECK_MICRO_TIME_STR((s)+11, 15)) + +#define CHECK_MILLI_DATETIME_STR(s, s_l) \ + ((s_l) == 23 && \ + CHECK_DATE_STR(s, 10) && \ + ((s)[10] == ' ' || (s)[10] == 'T') && \ + CHECK_MICRO_TIME_STR((s)+11, 12)) + +#define CHECK_ANY_DATETIME_STR(s, s_l) \ + (((s_l) == 19 && CHECK_DATETIME_STR(s, s_l)) || \ + ((s_l) == 23 && CHECK_MILLI_DATETIME_STR(s, s_l)) || \ + ((s_l) == 26 && CHECK_MICRO_DATETIME_STR(s, s_l))) + +#define DATETIME_SIZE (19) +#define DATETIME_MILLI_SIZE (23) +#define DATETIME_MICRO_SIZE (26) + +#define IS_DATETIME_MILLI(s, s_l) ((s_l) == 23) +#define IS_DATETIME_MICRO(s, s_l) ((s_l) == 26) + +#define CHECK_ANY_TIME_STR(s, s_l) \ + (((s_l) == 8 && CHECK_TIME_STR(s, s_l)) || \ + ((s_l) == 12 && CHECK_MILLI_TIME_STR(s, s_l)) || \ + ((s_l) == 15 && CHECK_MICRO_TIME_STR(s, s_l))) + +#define TIME_SIZE (8) +#define TIME_MILLI_SIZE (12) +#define TIME_MICRO_SIZE (15) + +#define IS_TIME_MILLI(s, s_l) ((s_l) == 12) +#define IS_TIME_MICRO(s, s_l) ((s_l) == 15) + +#define CHECK_TIMEDELTA1_STR(s, s_l) \ + ((s_l) == 7 && \ + (s)[0] >= '0' && (s)[0] <= '9' && \ + (s)[1] == ':' && \ + (s)[2] >= '0' && (s)[2] <= '5' && \ + (s)[3] >= '0' && (s)[3] <= '9' && \ + (s)[4] == ':' && \ + (s)[5] >= '0' && (s)[5] <= '5' && \ + (s)[6] >= '0' && (s)[6] <= '9') + +#define CHECK_TIMEDELTA1_MILLI_STR(s, s_l) \ + ((s_l) == 11 && CHECK_TIMEDELTA1_STR(s, 7) && CHECK_MILLISECONDS_STR((s)+7, 4)) + +#define CHECK_TIMEDELTA1_MICRO_STR(s, s_l) \ + ((s_l) == 14 && CHECK_TIMEDELTA1_STR(s, 7) && CHECK_MICROSECONDS_STR((s)+7, 7)) + +#define CHECK_TIMEDELTA2_STR(s, s_l) \ + ((s_l) == 8 && \ + (s)[0] >= '0' && (s)[0] <= '9' && \ + CHECK_TIMEDELTA1_STR((s)+1, 7)) + +#define CHECK_TIMEDELTA2_MILLI_STR(s, s_l) \ + ((s_l) == 12 && CHECK_TIMEDELTA2_STR(s, 8) && CHECK_MILLISECONDS_STR((s)+8, 4)) + +#define CHECK_TIMEDELTA2_MICRO_STR(s, s_l) \ + ((s_l) == 15 && CHECK_TIMEDELTA2_STR(s, 8) && CHECK_MICROSECONDS_STR((s)+8, 7)) + +#define CHECK_TIMEDELTA3_STR(s, s_l) \ + ((s_l) == 9 && \ + (s)[0] >= '0' && (s)[0] <= '9' && \ + (s)[1] >= '0' && (s)[1] <= '9' && \ + CHECK_TIMEDELTA1_STR((s)+2, 7)) + +#define CHECK_TIMEDELTA3_MILLI_STR(s, s_l) \ + ((s_l) == 13 && CHECK_TIMEDELTA3_STR(s, 9) && CHECK_MILLISECONDS_STR((s)+9, 4)) + +#define CHECK_TIMEDELTA3_MICRO_STR(s, s_l) \ + ((s_l) == 16 && CHECK_TIMEDELTA3_STR(s, 9) && CHECK_MICROSECONDS_STR((s)+9, 7)) + +// +// 0:00:00 / 0:00:00.000 / 0:00:00.000000 +// 00:00:00 / 00:00:00.000 / 00:00:00.000000 +// 000:00:00 / 000:00:00.000 / 000:00:00.000000 +// +#define CHECK_ANY_TIMEDELTA_STR(s, s_l) \ + (((s_l) > 0 && (s)[0] == '-') ? \ + (-1 * (_CHECK_ANY_TIMEDELTA_STR((s)+1, (s_l)-1))) : \ + (_CHECK_ANY_TIMEDELTA_STR((s), (s_l)))) + +#define _CHECK_ANY_TIMEDELTA_STR(s, s_l) \ + (CHECK_TIMEDELTA1_STR(s, s_l) || \ + CHECK_TIMEDELTA2_STR(s, s_l) || \ + CHECK_TIMEDELTA3_STR(s, s_l) || \ + CHECK_TIMEDELTA1_MILLI_STR(s, s_l) || \ + CHECK_TIMEDELTA2_MILLI_STR(s, s_l) || \ + CHECK_TIMEDELTA3_MILLI_STR(s, s_l) || \ + CHECK_TIMEDELTA1_MICRO_STR(s, s_l) || \ + CHECK_TIMEDELTA2_MICRO_STR(s, s_l) || \ + CHECK_TIMEDELTA3_MICRO_STR(s, s_l)) + +#define TIMEDELTA1_SIZE (7) +#define TIMEDELTA2_SIZE (8) +#define TIMEDELTA3_SIZE (9) +#define TIMEDELTA1_MILLI_SIZE (11) +#define TIMEDELTA2_MILLI_SIZE (12) +#define TIMEDELTA3_MILLI_SIZE (13) +#define TIMEDELTA1_MICRO_SIZE (14) +#define TIMEDELTA2_MICRO_SIZE (15) +#define TIMEDELTA3_MICRO_SIZE (16) + +#define IS_TIMEDELTA1(s, s_l) ((s_l) == 7 || (s_l) == 11 || (s_l) == 14) +#define IS_TIMEDELTA2(s, s_l) ((s_l) == 8 || (s_l) == 12 || (s_l) == 15) +#define IS_TIMEDELTA3(s, s_l) ((s_l) == 9 || (s_l) == 13 || (s_l) == 16) + +#define IS_TIMEDELTA_MILLI(s, s_l) ((s_l) == 11 || (s_l) == 12 || (s_l) == 13) +#define IS_TIMEDELTA_MICRO(s, s_l) ((s_l) == 14 || (s_l) == 15 || (s_l) == 16) + +typedef struct { + int output_type; + int parse_json; + PyObject *invalid_date_value; + PyObject *invalid_time_value; + PyObject *invalid_datetime_value; +} MySQLAccelOptions; + +inline int IMAX(int a, int b) { return((a) > (b) ? a : b); } +inline int IMIN(int a, int b) { return((a) < (b) ? a : b); } + +typedef struct { + PyObject_HEAD + PyObject *array_interface; +} ArrayObject; + +static void Array_dealloc(ArrayObject *self) { + if (self->array_interface) { + PyObject *data = PyDict_GetItemString(self->array_interface, "data"); + if (data) { + PyObject *buffer = PyTuple_GetItem(data, 0); + if (buffer) { + free((char*)PyLong_AsUnsignedLongLong(buffer)); + } + } + } + Py_XDECREF(self->array_interface); + Py_TYPE(self)->tp_free((PyObject*)self); +} + +static PyObject *Array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { + ArrayObject *self = (ArrayObject*)type->tp_alloc(type, 0); + if (self != NULL) { + self->array_interface = Py_None; + Py_INCREF(Py_None); + } + return (PyObject*)self; +} + +static int Array_init(ArrayObject *self, PyObject *args, PyObject *kwds) { + static char *kwlist[] = {"array_interface", NULL}; + PyObject *array_interface = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &array_interface)) { + return -1; + } + + if (array_interface) { + PyObject *tmp = self->array_interface; + Py_INCREF(array_interface); + self->array_interface = array_interface; + Py_DECREF(tmp); + } + + return 0; +} + +static PyObject *Array_get__array_interface__(ArrayObject *self, void *closure) { + Py_INCREF(self->array_interface); + return self->array_interface; +} + +static PyGetSetDef Array_getsetters[] = { + {"__array_interface__", (getter)Array_get__array_interface__, + (setter)NULL, "array interface", NULL}, + {NULL} +}; + +static PyTypeObject ArrayType = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_pymysqlsv.Array", + .tp_doc = PyDoc_STR("Array manager"), + .tp_basicsize = sizeof(ArrayObject), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .tp_new = Array_new, + .tp_init = (initproc)Array_init, + .tp_dealloc = (destructor)Array_dealloc, + .tp_getset = Array_getsetters, +}; + +static void read_options(MySQLAccelOptions *options, PyObject *dict) { + if (!options || !dict) return; + + PyObject *key = NULL; + PyObject *value = NULL; + Py_ssize_t pos = 0; + + while (PyDict_Next(dict, &pos, &key, &value)) { + if (PyUnicode_CompareWithASCIIString(key, "output_type") == 0) { + if (PyUnicode_CompareWithASCIIString(value, "dict") == 0 || + PyUnicode_CompareWithASCIIString(value, "dicts") == 0 ) { + options->output_type = MYSQL_ACCEL_OUT_DICTS; + } + else if (PyUnicode_CompareWithASCIIString(value, "namedtuple") == 0 || + PyUnicode_CompareWithASCIIString(value, "namedtuples") == 0) { + options->output_type = MYSQL_ACCEL_OUT_NAMEDTUPLES; + } + else if (PyUnicode_CompareWithASCIIString(value, "numpy") == 0) { + options->output_type = MYSQL_ACCEL_OUT_NUMPY; + } + else if (PyUnicode_CompareWithASCIIString(value, "dataframe") == 0) { + options->output_type = MYSQL_ACCEL_OUT_DATAFRAME; + } + else { + options->output_type = MYSQL_ACCEL_OUT_TUPLES; + } + } else if (PyUnicode_CompareWithASCIIString(key, "parse_json") == 0) { + options->parse_json = PyObject_IsTrue(value); + } else if (PyUnicode_CompareWithASCIIString(key, "invalid_date_value") == 0) { + options->invalid_date_value = value; + } else if (PyUnicode_CompareWithASCIIString(key, "invalid_time_value") == 0) { + options->invalid_time_value = value; + } else if (PyUnicode_CompareWithASCIIString(key, "invalid_datetime_value") == 0) { + options->invalid_datetime_value = value; + } + } +} + +// mysql, for whatever reason, treats 0 as an actual year, but not +// a leap year +// +inline int is_leap_year(int year) +{ + return (year % 4) == 0 && year != 0 && ((year % 100) != 0 || (year % 400) == 0); +} + +inline int days_in_previous_months(int month, int year) +{ + static const int previous_days[13] = + { + -31, + 0, + 31, + 31 + 28, + 31 + 28 + 31, + 31 + 28 + 31 + 30, + 31 + 28 + 31 + 30 + 31, + 31 + 28 + 31 + 30 + 31 + 30, + 31 + 28 + 31 + 30 + 31 + 30 + 31, + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31, + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30, + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31, + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30, + }; + return previous_days[month] + (month > 2 && is_leap_year(year)); +} + +// NOTE: year 0 does not actually exist, but mysql pretends it does (and is NOT +// a leap year) +// +inline int leap_years_before(int year) +{ + return (year - 1) / 4 - (year - 1) / 100 + (year - 1) / 400; +} + +inline int days_in_previous_years(int year) +{ + return 365 * year + leap_years_before(year); +} + +static int64_t to_days(int year, int month, int day) { + return days_in_previous_years(year) + days_in_previous_months(month, year) + day; +} + +static void raise_exception( + PyObject *self, + char *err_type, + unsigned long long err_code, + char *err_str +) { + PyObject *py_exc = NULL; + PyObject *py_val = NULL; + + py_exc = PyObject_GetAttrString(self, err_type); + if (!py_exc) goto error; + + py_val = Py_BuildValue("(Ks)", err_code, err_str); + if (!py_val) goto error; + + PyErr_SetObject(py_exc, py_val); + +exit: + if (py_exc) { Py_DECREF(py_exc); } + if (py_val) { Py_DECREF(py_val); } + return; + +error: + goto exit; +} + +static int is_error_packet(char *buff_bytes) { + return buff_bytes && *(uint8_t*)buff_bytes == 0xFF; +} + +static void force_close(PyObject *self) { + PyObject *py_sock = NULL; + + py_sock = PyObject_GetAttrString(self, "_sock"); + if (!py_sock) goto error; + + PyObject_CallMethod(py_sock, "close", NULL); + PyErr_Clear(); + + PyObject_SetAttrString(self, "_sock", Py_None); + PyObject_SetAttrString(self, "_rfile", Py_None); + +exit: + if (py_sock) { Py_DECREF(py_sock); } + return; + +error: + goto exit; +} + +static PyObject *read_bytes( + PyObject *self, + unsigned long long num_bytes, + PyObject *py_read, + PyObject *py_settimeout, + PyObject *py_read_timeout +) { + PyObject *py_num_bytes = NULL; + PyObject *py_data = NULL; + PyObject *py_exc = NULL; + + if (py_read_timeout != Py_None) { + Py_XDECREF(PyObject_CallFunctionObjArgs(py_settimeout, py_read_timeout, NULL)); + if (PyErr_Occurred()) goto error; + } + + py_num_bytes = PyLong_FromUnsignedLongLong(num_bytes); + if (!py_num_bytes) goto error; + + while (1) { + py_data = PyObject_CallFunctionObjArgs(py_read, py_num_bytes, NULL); + + if ((py_exc = PyErr_Occurred())) { + if (PyErr_ExceptionMatches(PyExc_IOError) || PyErr_ExceptionMatches(PyExc_OSError)) { + PyObject *py_errno = PyObject_GetAttrString(py_exc, "errno"); + if (!py_errno) goto error; + unsigned long long err = PyLong_AsUnsignedLongLong(py_errno); + Py_DECREF(py_errno); + if (err == 4 /* errno.EINTER */) { + continue; + } + force_close(self); + raise_exception(self, "OperationalError", 0, + "Lost connection to MySQL server during query"); + goto error; + } + else if (PyErr_ExceptionMatches(PyExc_BaseException)) { + // Don't convert unknown exception to MySQLError. + force_close(self); + goto error; + } + } + + if (py_data) { + break; + } + } + + if (PyBytes_GET_SIZE(py_data) < (long int)num_bytes) { + force_close(self); + raise_exception(self, "OperationalError", 0, + "Lost connection to MySQL server during query"); + goto error; + } + +exit: + if (py_num_bytes) { Py_DECREF(py_num_bytes); } + return py_data; + +error: + if (py_data) { Py_DECREF(py_data); py_data = NULL; } + goto exit; +} + +static PyObject *read_packet( + PyObject *self, + PyObject *py_read, + PyObject *py_settimeout, + PyObject *py_read_timeout, + unsigned long long *next_seq_id +) { + PyObject *py_buff = PyByteArray_FromStringAndSize(NULL, 0); + PyObject *py_new_buff = NULL; + PyObject *py_packet_header = NULL; + PyObject *py_bytes_to_read = NULL; + PyObject *py_recv_data = NULL; + unsigned long long bytes_to_read = 0; + char *buff = NULL; + uint64_t btrl = 0; + uint8_t btrh = 0; + uint8_t packet_number = 0; + + while (1) { + py_packet_header = read_bytes(self, 4, py_read, py_settimeout, py_read_timeout); + if (!py_packet_header) goto error; + + buff = PyBytes_AsString(py_packet_header); + + btrl = *(uint16_t*)buff; + btrh = *(uint8_t*)(buff+2); + packet_number = *(uint8_t*)(buff+3); + bytes_to_read = btrl + (btrh << 16); + + Py_DECREF(py_packet_header); py_packet_header = NULL; + + if (packet_number != *next_seq_id) { + force_close(self); + if (packet_number == 0) { + raise_exception(self, "OperationalError", 0, + "Lost connection to MySQL server during query"); + + goto error; + } + raise_exception(self, "InternalError", 0, + "Packet sequence number wrong"); + goto error; + } + + *next_seq_id = (*next_seq_id + 1) % 256; + + py_recv_data = read_bytes(self, bytes_to_read, py_read, py_settimeout, py_read_timeout); + if (!py_recv_data) goto error; + + py_new_buff = PyByteArray_Concat(py_buff, py_recv_data); + if (!py_new_buff) goto error; + + Py_DECREF(py_buff); py_buff = py_new_buff; py_new_buff = NULL; + Py_DECREF(py_recv_data); py_recv_data = NULL; + + if (bytes_to_read == 0xFFFFFF) { + continue; + } + + if (bytes_to_read < MYSQL_MAX_PACKET_LEN) { + break; + } + } + + if (is_error_packet(PyByteArray_AsString(py_buff))) { + PyObject *py_result = PyObject_GetAttrString(self, "_result"); + if (py_result && py_result != Py_None) { + PyObject *py_unbuffered_active = PyObject_GetAttrString(py_result, "unbuffered_active"); + if (py_unbuffered_active == Py_True) { + PyObject_SetAttrString(py_result, "unbuffered_active", Py_False); + Py_DECREF(py_result); py_result = NULL; + } + Py_XDECREF(py_unbuffered_active); py_unbuffered_active = NULL; + } + Py_XDECREF(py_result); py_result = NULL; + PyObject_CallMethod(self, "_raise_mysql_exception", "O", py_buff, NULL); + } + +exit: + if (py_new_buff) { Py_DECREF(py_new_buff); } + if (py_bytes_to_read) { Py_DECREF(py_bytes_to_read); } + if (py_recv_data) { Py_DECREF(py_recv_data); } + if (py_packet_header) { Py_DECREF(py_packet_header); } + return py_buff; + +error: + if (py_buff) { Py_DECREF(py_buff); py_buff = NULL; } + goto exit; +} + +static int is_eof_packet(char *data, unsigned long long data_l) { + return data && (uint8_t)*(uint8_t*)data == 0xFE && data_l < 9; +} + +static int check_packet_is_eof( + char **data, + unsigned long long *data_l, + unsigned long long *warning_count, + int *has_next +) { + uint16_t server_status = 0; + if (!data || !data_l) { + return 0; + if (has_next) *has_next = 0; + if (warning_count) *warning_count = 0; + } + if (!is_eof_packet(*data, *data_l)) { + return 0; + } + *data += 1; *data_l -= 1; + if (warning_count) *warning_count = **(uint16_t**)data; + *data += 2; *data_l -= 2; + server_status = **(uint16_t**)data; + *data += 2; *data_l -= 2; + if (has_next) *has_next = server_status & MYSQL_SERVER_MORE_RESULTS_EXISTS; + return 1; +} + +static unsigned long long read_length_encoded_integer( + char **data, + unsigned long long *data_l, + int *is_null +) { + if (is_null) *is_null = 0; + + if (!data || !data_l || *data_l == 0) { + if (is_null) *is_null = 1; + return 0; + } + + uint8_t c = **(uint8_t**)data; + *data += 1; *data_l -= 1; + + if (c == MYSQL_COLUMN_NULL) { + if (is_null) *is_null = 1; + return 0; + } + + if (c < MYSQL_COLUMN_UNSIGNED_CHAR) { + return c; + } + + if (c == MYSQL_COLUMN_UNSIGNED_SHORT) { + if (*data_l < 2) { + if (is_null) *is_null = 1; + return 0; + } + uint16_t out = **(uint16_t**)data; + *data += 2; *data_l -= 2; + return out; + } + + if (c == MYSQL_COLUMN_UNSIGNED_INT24) { + if (*data_l < 3) { + if (is_null) *is_null = 1; + return 0; + } + uint16_t low = **(uint8_t**)data; + *data += 1; *data_l -= 1; + uint16_t high = **(uint16_t**)data; + *data += 2; *data_l -= 2; + return low + (high << 16); + } + + if (c == MYSQL_COLUMN_UNSIGNED_INT64) { + if (*data_l < 8) { + if (is_null) *is_null = 1; + return 0; + } + uint64_t out = **(uint64_t**)data; + *data += 8; *data_l -= 8; + return out; + } + + if (is_null) *is_null = 1; + return 0; +} + +static void read_length_coded_string( + char **data, + unsigned long long *data_l, + char **out, + unsigned long long *out_l, + int *is_null +) { + if (is_null) *is_null = 0; + + if (!data || !data_l || !out || !out_l) { + if (is_null) *is_null = 1; + return; + } + + unsigned long long length = read_length_encoded_integer(data, data_l, is_null); + + if (is_null && *is_null) { + return; + } + + length = (length > *data_l) ? *data_l : length; + + *out = *data; + *out_l = length; + + *data += length; + *data_l -= length; + + return; +} + +static PyObject *build_array( + unsigned long long n_rows, + unsigned long n_cols, + PyObject **names, + unsigned long *type_codes, + unsigned long *flags, + unsigned long *scales, + char *buffer, + unsigned long long row_size, + MySQLAccelOptions *options +) { + PyObject *py_out = NULL; + PyObject *py_shape = NULL; + PyObject *py_typestr = NULL; + PyObject *py_descr = NULL; + PyObject *py_descr_item = NULL; + PyObject *py_type = NULL; + PyObject *py_data = NULL; + PyObject *py_args = NULL; + PyObject *py_kwds = NULL; + PyObject *py_numpy_mod = NULL; + PyObject *py_numpy_array = NULL; + PyObject *py_array = NULL; + + py_numpy_mod = PyImport_ImportModule("numpy"); + if (!py_numpy_mod) goto error; + py_numpy_array = PyObject_GetAttrString(py_numpy_mod, "array"); + if (!py_numpy_array) goto error; + + py_out = PyDict_New(); + if (!py_out) goto error; + + py_shape = PyTuple_New(1); + if (!py_shape) goto error; + PyTuple_SetItem(py_shape, 0, PyLong_FromUnsignedLongLong(n_rows)); + PyDict_SetItemString(py_out, "shape", py_shape); + Py_DECREF(py_shape); + + py_typestr = PyUnicode_FromFormat("|V%llu", row_size); + if (!py_typestr) goto error; + PyDict_SetItemString(py_out, "typestr", py_typestr); + Py_DECREF(py_typestr); + + py_descr = PyList_New(n_cols); + if (!py_descr) goto error; + PyDict_SetItemString(py_out, "descr", py_descr); + Py_DECREF(py_descr); + + for (unsigned long i = 0; i < n_cols; i++) { + py_descr_item = PyTuple_New(2); + if (!py_descr_item) goto error; + + PyList_SetItem(py_descr, i, py_descr_item); + + PyTuple_SetItem(py_descr_item, 0, names[i]); + // Caller already uses the borrowed reference. + Py_INCREF(names[i]); + + switch (type_codes[i]) { + case MYSQL_TYPE_NEWDECIMAL: + case MYSQL_TYPE_DECIMAL: + py_type = PyUnicode_FromString("|O"); + break; + + case MYSQL_TYPE_TINY: + if (flags[i] & MYSQL_FLAG_UNSIGNED) { + py_type = PyUnicode_FromString("output_type == MYSQL_ACCEL_OUT_DATAFRAME) { + PyObject *out2 = NULL; + PyObject *py_pandas_mod = PyImport_ImportModule("pandas"); + if (!py_pandas_mod) goto error; + out2 = PyObject_CallMethod(py_pandas_mod, "DataFrame", "O", py_out); + Py_DECREF(py_pandas_mod); + if (!out2) goto error; + Py_DECREF(py_out); + py_out = out2; + } + +exit: + if (py_args) { Py_DECREF(py_args); } + if (py_kwds) { Py_DECREF(py_args); } + if (py_numpy_array) { Py_DECREF(py_numpy_array); } + if (py_numpy_mod) { Py_DECREF(py_numpy_mod); } + return py_out; + +error: + if (py_out) { Py_DECREF(py_out); py_out = NULL; } + goto exit; +} + +static unsigned long long compute_row_size( + unsigned long *type_codes, + unsigned long *flags, + unsigned long *scales, + unsigned long n_cols +) { + unsigned long long row_size = 0; + + for (unsigned long i = 0; i < n_cols; i++) { + switch (type_codes[i]) { + case MYSQL_TYPE_NEWDECIMAL: + case MYSQL_TYPE_DECIMAL: + row_size += sizeof(PyObject*); + break; + + case MYSQL_TYPE_TINY: + row_size += sizeof(int8_t); + break; + + case MYSQL_TYPE_SHORT: + row_size += sizeof(int16_t); + break; + + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_LONG: + row_size += sizeof(int32_t); + break; + + case MYSQL_TYPE_LONGLONG: + row_size += sizeof(int64_t); + break; + + case MYSQL_TYPE_FLOAT: + row_size += sizeof(float); + break; + + case MYSQL_TYPE_DOUBLE: + row_size += sizeof(double); + break; + + case MYSQL_TYPE_NULL: + row_size += sizeof(PyObject*); + break; + + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_TIMESTAMP: + row_size += sizeof(int64_t); + break; + + case MYSQL_TYPE_NEWDATE: + case MYSQL_TYPE_DATE: + row_size += sizeof(int64_t); + break; + + case MYSQL_TYPE_TIME: + row_size += sizeof(int64_t); + break; + + case MYSQL_TYPE_YEAR: + row_size += sizeof(int16_t); + break; + + case MYSQL_TYPE_BIT: + case MYSQL_TYPE_JSON: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_GEOMETRY: + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_SET: + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_STRING: + row_size += sizeof(PyObject*); + break; + + default: + PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", type_codes[i], NULL); + return 0; + } + } + + return row_size; +} + +static void read_dataframe_row_from_packet( + unsigned long n_cols, + unsigned long *type_codes, + unsigned long *flags, + unsigned long *scales, + PyObject **names, + const char **encodings, + PyObject **converters, + PyObject *py_decimal, + PyObject *py_json_loads, + char *data, + unsigned long long data_l, + MySQLAccelOptions *options, + char *buffer +) { + char *out = NULL; + unsigned long long out_l = 0; + int is_null = 0; + PyObject *item = NULL; + PyObject *str = NULL; + char *end = NULL; + char *loc = buffer; + + int sign = 1; + int year = 0; + int month = 0; + int day = 0; + int hour = 0; + int minute = 0; + int second = 0; + int microsecond = 0; + + float float_nan = nanf(""); + double double_nan = nan(""); + + if (!buffer) goto error; + + for (unsigned long i = 0; i < n_cols; i++) { + + read_length_coded_string(&data, &data_l, &out, &out_l, &is_null); + end = &out[out_l]; + + switch (type_codes[i]) { + case MYSQL_TYPE_NEWDECIMAL: + case MYSQL_TYPE_DECIMAL: + if (is_null) { + *(PyObject**)loc = Py_None; + Py_INCREF(Py_None); + } else { + str = NULL; + str = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + if (!str) goto error; + + item = PyObject_CallFunctionObjArgs(py_decimal, str, NULL); + Py_DECREF(str); str = NULL; + if (!item) goto error; + + *(PyObject**)loc = item; + } + loc += sizeof(PyObject*); + break; + + case MYSQL_TYPE_TINY: + if (flags[i] & MYSQL_FLAG_UNSIGNED) { + *(uint8_t*)loc = (is_null) ? 0 : (uint8_t)strtoul(out, &end, 10); + } else { + *(int8_t*)loc = (is_null) ? INT8_MIN : (int8_t)strtol(out, &end, 10); + } + loc += sizeof(int8_t); + break; + + case MYSQL_TYPE_SHORT: + if (flags[i] & MYSQL_FLAG_UNSIGNED) { + *(uint16_t*)loc = (is_null) ? 0 : (uint16_t)strtoul(out, &end, 10); + } else { + *(int16_t*)loc = (is_null) ? INT16_MIN : (int16_t)strtol(out, &end, 10); + } + loc += sizeof(int16_t); + break; + + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_LONG: + if (flags[i] & MYSQL_FLAG_UNSIGNED) { + *(uint32_t*)loc = (is_null) ? 0 : (uint32_t)strtoul(out, &end, 10); + } else { + *(int32_t*)loc = (is_null) ? INT32_MIN : (int32_t)strtol(out, &end, 10); + } + loc += sizeof(int32_t); + break; + + case MYSQL_TYPE_LONGLONG: + if (flags[i] & MYSQL_FLAG_UNSIGNED) { + *(uint64_t*)loc = (is_null) ? 0 : (uint64_t)strtoul(out, &end, 10); + } else { + *(int64_t*)loc = (is_null) ? INT64_MIN : (int64_t)strtol(out, &end, 10); + } + loc += sizeof(int64_t); + break; + + case MYSQL_TYPE_FLOAT: + if (is_null) { + *(float*)loc = (float)float_nan; + } else { + *(float*)loc = (float)strtod(out, &end); + } + loc += sizeof(float); + break; + + case MYSQL_TYPE_DOUBLE: + if (is_null) { + *(double*)loc = (double)double_nan; + } else { + *(double*)loc = (double)strtod(out, &end); + } + loc += sizeof(double); + break; + + case MYSQL_TYPE_NULL: + *(PyObject**)loc = Py_None; + loc += sizeof(PyObject*); + break; + + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_TIMESTAMP: + // TODO: Should use numpy's NaT + if (!CHECK_ANY_DATETIME_STR(out, out_l)) { + *(int64_t*)loc = (int64_t)(INT64_MIN); + loc += sizeof(int64_t); + break; + } + year = CHR2INT4(out); out += 5; + month = CHR2INT2(out); out += 3; + day = CHR2INT2(out); out += 3; + hour = CHR2INT2(out); out += 3; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_DATETIME_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_DATETIME_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + *(int64_t*)loc = (int64_t)(((to_days(year, month, day) - EPOCH_TO_DAYS) + * SECONDS_PER_DAY + hour * 3600 + minute * 60 + second) + * 1e9 + microsecond * 1e3); + loc += sizeof(int64_t); + break; + + case MYSQL_TYPE_NEWDATE: + case MYSQL_TYPE_DATE: + if (!CHECK_DATE_STR(out, out_l)) { + *(int64_t*)loc = (int64_t)(INT64_MIN); + loc += sizeof(int64_t); + break; + } + year = CHR2INT4(out); out += 5; + month = CHR2INT2(out); out += 3; + day = CHR2INT2(out); out += 3; + *(int64_t*)loc = (int64_t)((to_days(year, month, day) - EPOCH_TO_DAYS) + * SECONDS_PER_DAY * 1e9); + loc += sizeof(int64_t); + break; + + case MYSQL_TYPE_TIME: + sign = CHECK_ANY_TIMEDELTA_STR(out, out_l); + if (!sign) { + *(int64_t*)loc = (int64_t)(INT64_MIN); + loc += sizeof(int64_t); + break; + } else if (sign < 0) { + out += 1; out_l -= 1; + } + if (IS_TIMEDELTA1(out, out_l)) { + hour = CHR2INT1(out); out += 2; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + } + else if (IS_TIMEDELTA2(out, out_l)) { + hour = CHR2INT2(out); out += 3; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + } + else if (IS_TIMEDELTA3(out, out_l)) { + hour = CHR2INT3(out); out += 4; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + } + *(int64_t*)loc = (int64_t)((hour * 3600 + minute * 60 + second) + * 1e9 + microsecond * 1e3) * sign; + loc += sizeof(int64_t); + break; + + case MYSQL_TYPE_YEAR: + if (out_l == 0) { + *(uint16_t*)loc = 0; + loc += sizeof(uint16_t); + break; + } + end = &out[out_l]; + *(uint16_t*)loc = (uint16_t)strtoul(out, &end, 10); + loc += sizeof(uint16_t); + break; + + case MYSQL_TYPE_BIT: + case MYSQL_TYPE_JSON: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_GEOMETRY: + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_SET: + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_STRING: + if (encodings[i] == NULL) { + item = PyBytes_FromStringAndSize(out, out_l); + if (!item) goto error; + break; + } + + item = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + if (!item) goto error; + + // Parse JSON string. + if (type_codes[i] == MYSQL_TYPE_JSON && options->parse_json) { + str = item; + item = PyObject_CallFunctionObjArgs(py_json_loads, str, NULL); + Py_DECREF(str); str = NULL; + if (!item) goto error; + } + + *(PyObject**)loc = item; + loc += sizeof(PyObject*); + + break; + + default: + PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", type_codes[i], NULL); + goto error; + } + } + +exit: + return; + +error: + goto exit; +} + +static PyObject *read_obj_row_from_packet( + unsigned long n_cols, + unsigned long *type_codes, + unsigned long *flags, + unsigned long *scales, + PyObject **names, + const char **encodings, + PyObject **converters, + PyObject *py_decimal, + PyObject *py_json_loads, + PyTypeObject *namedtuple, + char *data, + unsigned long long data_l, + MySQLAccelOptions *options +) { + char *out = NULL; + char *orig_out = NULL; + unsigned long long out_l = 0; + unsigned long long orig_out_l = 0; + int is_null = 0; + PyObject *py_result = NULL; + PyObject *py_item = NULL; + PyObject *py_str = NULL; + char *end = NULL; + + int sign = 1; + int year = 0; + int month = 0; + int day = 0; + int hour = 0; + int minute = 0; + int second = 0; + int microsecond = 0; + + switch (options->output_type) { + case MYSQL_ACCEL_OUT_NAMEDTUPLES: { + if (!namedtuple) goto error; + py_result = PyStructSequence_New(namedtuple); + break; + } + case MYSQL_ACCEL_OUT_DICTS: + py_result = PyDict_New(); + break; + default: + py_result = PyTuple_New(n_cols); + } + + for (unsigned long i = 0; i < n_cols; i++) { + + read_length_coded_string(&data, &data_l, &out, &out_l, &is_null); + end = &out[out_l]; + + orig_out = out; + orig_out_l = out_l; + + py_item = Py_None; + + // Don't convert if it's a NULL. + if (!is_null) { + + // If a converter was passed in, use it. + if (converters[i]) { + py_str = NULL; + if (encodings[i] == NULL) { + py_str = PyBytes_FromStringAndSize(out, out_l); + if (!py_str) goto error; + } else { + py_str = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + if (!py_str) goto error; + } + py_item = PyObject_CallFunctionObjArgs(converters[i], py_str, NULL); + Py_DECREF(py_str); py_str = NULL; + if (!py_item) goto error; + } + + // If no converter was passed in, do the default processing. + else { + switch (type_codes[i]) { + case MYSQL_TYPE_NEWDECIMAL: + case MYSQL_TYPE_DECIMAL: + py_str = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + if (!py_str) goto error; + + py_item = PyObject_CallFunctionObjArgs(py_decimal, py_str, NULL); + Py_DECREF(py_str); py_str = NULL; + if (!py_item) goto error; + break; + + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_LONGLONG: + case MYSQL_TYPE_INT24: + if (flags[i] & MYSQL_FLAG_UNSIGNED) { + py_item = PyLong_FromUnsignedLongLong(strtoul(out, &end, 10)); + } else { + py_item = PyLong_FromLongLong(strtol(out, &end, 10)); + } + if (!py_item) goto error; + break; + + case MYSQL_TYPE_FLOAT: + case MYSQL_TYPE_DOUBLE: + py_item = PyFloat_FromDouble(strtod(out, &end)); + if (!py_item) goto error; + break; + + case MYSQL_TYPE_NULL: + py_item = Py_None; + break; + + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_TIMESTAMP: + if (!CHECK_ANY_DATETIME_STR(out, out_l)) { + if (options && options->invalid_datetime_value) { + py_item = options->invalid_datetime_value; + Py_INCREF(py_item); + } else { + py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); + if (!py_item) goto error; + } + break; + } + year = CHR2INT4(out); out += 5; + month = CHR2INT2(out); out += 3; + day = CHR2INT2(out); out += 3; + hour = CHR2INT2(out); out += 3; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_DATETIME_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_DATETIME_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + py_item = PyDateTime_FromDateAndTime(year, month, day, + hour, minute, second, microsecond); + if (!py_item) { + PyErr_Clear(); + py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); + } + if (!py_item) goto error; + break; + + case MYSQL_TYPE_NEWDATE: + case MYSQL_TYPE_DATE: + if (!CHECK_DATE_STR(out, out_l)) { + if (options && options->invalid_date_value) { + py_item = options->invalid_date_value; + Py_INCREF(py_item); + } else { + py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); + if (!py_item) goto error; + } + break; + } + year = CHR2INT4(out); out += 5; + month = CHR2INT2(out); out += 3; + day = CHR2INT2(out); out += 3; + py_item = PyDate_FromDate(year, month, day); + if (!py_item) { + PyErr_Clear(); + py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); + } + if (!py_item) goto error; + break; + + case MYSQL_TYPE_TIME: + sign = CHECK_ANY_TIMEDELTA_STR(out, out_l); + if (!sign) { + if (options && options->invalid_time_value) { + py_item = options->invalid_time_value; + Py_INCREF(py_item); + } else { + py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); + if (!py_item) goto error; + } + break; + } else if (sign < 0) { + out += 1; out_l -= 1; + } + if (IS_TIMEDELTA1(out, out_l)) { + hour = CHR2INT1(out); out += 2; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + } + else if (IS_TIMEDELTA2(out, out_l)) { + hour = CHR2INT2(out); out += 3; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + } + else if (IS_TIMEDELTA3(out, out_l)) { + hour = CHR2INT3(out); out += 4; + minute = CHR2INT2(out); out += 3; + second = CHR2INT2(out); out += 3; + microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : + (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; + } + py_item = PyDelta_FromDSU(0, sign * hour * 60 * 60 + + sign * minute * 60 + + sign * second, + sign * microsecond); + if (!py_item) { + PyErr_Clear(); + py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); + } + if (!py_item) goto error; + break; + + case MYSQL_TYPE_YEAR: + if (out_l == 0) { + goto error; + break; + } + end = &out[out_l]; + year = strtoul(out, &end, 10); + py_item = PyLong_FromLong(year); + if (!py_item) goto error; + break; + + case MYSQL_TYPE_BIT: + case MYSQL_TYPE_JSON: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_GEOMETRY: + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_SET: + case MYSQL_TYPE_VARCHAR: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_STRING: + if (encodings[i] == NULL) { + py_item = PyBytes_FromStringAndSize(out, out_l); + if (!py_item) goto error; + break; + } + + py_item = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + if (!py_item) goto error; + + // Parse JSON string. + if (type_codes[i] == MYSQL_TYPE_JSON && options->parse_json) { + py_str = py_item; + py_item = PyObject_CallFunctionObjArgs(py_json_loads, py_str, NULL); + Py_DECREF(py_str); py_str = NULL; + if (!py_item) goto error; + } + + break; + + default: + PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", type_codes[i], NULL); + goto error; + } + } + } + + if (py_item == Py_None) { + Py_INCREF(Py_None); + } + + switch (options->output_type) { + case MYSQL_ACCEL_OUT_NAMEDTUPLES: + PyStructSequence_SET_ITEM(py_result, i, py_item); + break; + case MYSQL_ACCEL_OUT_DICTS: + PyDict_SetItem(py_result, names[i], py_item); + Py_INCREF(names[i]); + Py_DECREF(py_item); + break; + default: + PyTuple_SET_ITEM(py_result, i, py_item); + } + } + +exit: + return py_result; + +error: + if (py_result) { Py_DECREF(py_result); py_result = NULL; } + goto exit; +} + +static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *kwargs) { + PyObject *py_next_seq_id = NULL; // Packet sequence number + PyObject *py_conn = NULL; // Database connection + PyObject *py_fields = NULL; // List of table fields + PyObject *py_buff = NULL; // Packet bytes + PyObject *py_row = NULL; // Output row + PyObject *py_res = NULL; // MySQLResult object + PyObject *py_converters = NULL; // res.converters + PyObject *py_decimal_mod = NULL; // decimal module + PyObject *py_decimal = NULL; // decimal.Decimal + PyObject *py_json_mod = NULL; // json module + PyObject *py_json_loads = NULL; // json.loads + PyObject *py_rows = NULL; // Output object + PyObject *py_rfile = NULL; // Socket file I/O + PyObject *py_read = NULL; // File I/O read method + PyObject *py_sock = NULL; // Socket + PyObject *py_read_timeout = NULL; // Socket read timeout value + PyObject *py_settimeout = NULL; // Socket settimeout method + PyObject **converters = NULL; // List of converter functions + PyObject **names = NULL; // Column names + PyObject *py_default_converters = NULL; // Dict of default converters + PyObject *py_options = NULL; // Reader options + PyTypeObject *namedtuple = NULL; // Generated namedtuple type + const char **encodings = NULL; // Encoding for each column + unsigned long long n_cols = 0; + unsigned long long n_rows = 0; + unsigned long *type_codes = NULL; // Type code for each column + unsigned long *flags = NULL; // Column flags + unsigned long *scales = NULL; // Column scales + unsigned long *offsets = NULL; // Column offsets in buffer + unsigned long long next_seq_id = 0; + char *keywords[] = {"result", "options", NULL}; + MySQLAccelOptions options = {0}; + unsigned long long df_buffer_row_size = 0; + unsigned long long df_buffer_n_rows = 0; + char *df_buffer = NULL; + char *orig_df_buffer = NULL; + PyStructSequence_Desc namedtuple_desc = {0}; + int unbuffered = 0; + + // Parse function args. + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", keywords, &py_res, &py_options)) { + goto error; + } + + if (py_options && PyDict_Check(py_options)) { + py_default_converters = PyDict_GetItemString(py_options, "default_converters"); + if (py_default_converters && !PyDict_Check(py_default_converters)) { + py_default_converters = NULL; + } + Py_XINCREF(py_default_converters); + PyObject *py_unbuffered = PyDict_GetItemString(py_options, "unbuffered"); + if (py_unbuffered && PyObject_IsTrue(py_unbuffered)) { + unbuffered = 1; + } + } + + if (unbuffered) { + PyObject *unbuffered_active = PyObject_GetAttrString(py_res, "unbuffered_active"); + if (!PyObject_IsTrue(unbuffered_active)) { + Py_XDECREF(py_default_converters); + Py_XDECREF(unbuffered_active); + Py_INCREF(Py_None); + return Py_None; + } + Py_DECREF(unbuffered_active); unbuffered_active = NULL; + } + + // Import decimal module. + py_decimal_mod = PyImport_ImportModule("decimal"); + if (!py_decimal_mod) goto error; + py_decimal = PyObject_GetAttrString(py_decimal_mod, "Decimal"); + if (!py_decimal) goto error; + + // Import json module. + py_json_mod = PyImport_ImportModule("json"); + if (!py_json_mod) goto error; + py_json_loads = PyObject_GetAttrString(py_json_mod, "loads"); + if (!py_json_loads) goto error; + + // Retrieve type codes for each column. + PyObject *py_field_count = PyObject_GetAttrString(py_res, "field_count"); + if (!py_field_count) goto error; + n_cols = PyLong_AsUnsignedLong(py_field_count); + Py_DECREF(py_field_count); py_field_count = NULL; + + py_converters = PyObject_GetAttrString(py_res, "converters"); + if (!py_converters) goto error; + + type_codes = calloc(n_cols, sizeof(unsigned long)); + if (!type_codes) goto error; + + flags = calloc(n_cols, sizeof(unsigned long)); + if (!flags) goto error; + + scales = calloc(n_cols, sizeof(unsigned long)); + if (!scales) goto error; + + encodings = calloc(n_cols, sizeof(char*)); + if (!encodings) goto error; + + converters = calloc(n_cols, sizeof(PyObject*)); + if (!converters) goto error; + + names = calloc(n_cols, sizeof(PyObject*)); + if (!names) goto error; + + py_fields = PyObject_GetAttrString(py_res, "fields"); + if (!py_fields) goto error; + + for (unsigned long i = 0; i < n_cols; i++) { + // Get type codes. + PyObject *py_field = PyList_GetItem(py_fields, i); + if (!py_field) goto error; + + PyObject *py_flags = PyObject_GetAttrString(py_field, "flags"); + if (!py_flags) goto error; + flags[i] = PyLong_AsUnsignedLong(py_flags); + Py_DECREF(py_flags); py_flags = NULL; + + PyObject *py_scale = PyObject_GetAttrString(py_field, "scale"); + if (!py_scale) goto error; + scales[i] = PyLong_AsUnsignedLong(py_scale); + Py_DECREF(py_scale); py_scale = NULL; + + PyObject *py_field_type = PyObject_GetAttrString(py_field, "type_code"); + if (!py_field_type) goto error; + type_codes[i] = PyLong_AsUnsignedLong(py_field_type); + PyObject *py_default_converter = (py_default_converters) ? + PyDict_GetItem(py_default_converters, py_field_type) : NULL; + Py_DECREF(py_field_type); py_field_type = NULL; + + // Get field name. + PyObject *py_field_name = PyObject_GetAttrString(py_field, "name"); + if (!py_field_name) goto error; + names[i] = py_field_name; + + // Get field encodings (NULL means binary) and default converters. + PyObject *py_tmp = PyList_GetItem(py_converters, i); + if (!py_tmp) goto error; + PyObject *py_encoding = PyTuple_GetItem(py_tmp, 0); + if (!py_encoding) goto error; + PyObject *py_converter = PyTuple_GetItem(py_tmp, 1); + if (!py_converter) goto error; + + encodings[i] = (py_encoding == Py_None) ? + NULL : PyUnicode_AsUTF8AndSize(py_encoding, NULL); + converters[i] = (py_converter == Py_None || py_converter == py_default_converter) ? + NULL : py_converter; + } + + // Loop over all data packets. + py_conn = PyObject_GetAttrString(py_res, "connection"); + if (!py_conn) goto error; + + // Cache socket timeout and read methods. + py_sock = PyObject_GetAttrString(py_conn, "_sock"); + if (!py_sock) goto error; + py_settimeout = PyObject_GetAttrString(py_sock, "settimeout"); + if (!py_settimeout) goto error; + py_read_timeout = PyObject_GetAttrString(py_conn, "_read_timeout"); + if (!py_read_timeout) goto error; + + py_rfile = PyObject_GetAttrString(py_conn, "_rfile"); + if (!py_rfile) goto error; + py_read = PyObject_GetAttrString(py_rfile, "read"); + if (!py_read) goto error; + + py_next_seq_id = PyObject_GetAttrString(py_conn, "_next_seq_id"); + if (!py_next_seq_id) goto error; + next_seq_id = PyLong_AsUnsignedLongLong(py_next_seq_id); + Py_DECREF(py_next_seq_id); py_next_seq_id = NULL; + + options.parse_json = 1; + if (py_options && py_options != Py_None) { + read_options(&options, py_options); + } + + switch (options.output_type) { + case MYSQL_ACCEL_OUT_DATAFRAME: + case MYSQL_ACCEL_OUT_NUMPY: + // Setup dataframe buffer. + df_buffer_n_rows = (unbuffered) ? 1 : 500; + df_buffer_row_size = compute_row_size(type_codes, flags, scales, n_cols); + orig_df_buffer = malloc(df_buffer_row_size * df_buffer_n_rows); + if (!orig_df_buffer) goto error; + df_buffer = orig_df_buffer; + break; + case MYSQL_ACCEL_OUT_NAMEDTUPLES: + namedtuple_desc.name = "Row"; + namedtuple_desc.doc = "Row of data values"; + namedtuple_desc.n_in_sequence = n_cols; + namedtuple_desc.fields = calloc(n_cols + 1, sizeof(PyStructSequence_Field)); + if (!namedtuple_desc.fields) goto error; + for (unsigned long long i = 0; i < n_cols; i++) { + namedtuple_desc.fields[i].name = PyUnicode_AsUTF8AndSize(names[i], NULL); + namedtuple_desc.fields[i].doc = NULL; + } + namedtuple = PyStructSequence_NewType(&namedtuple_desc); + if (!namedtuple) goto error; + // Fall through + default: + py_rows = PyList_New(0); + if (!py_rows) goto error; + } + + py_buff = NULL; + while (1) { + py_buff = read_packet(py_conn, py_read, py_settimeout, py_read_timeout, &next_seq_id); + if (!py_buff) goto error; + + char *data = PyByteArray_AsString(py_buff); + unsigned long long data_l = PyByteArray_GET_SIZE(py_buff); + unsigned long long warning_count = 0; + int has_next = 0; + + if (check_packet_is_eof(&data, &data_l, &warning_count, &has_next)) { + PyObject_SetAttrString(py_res, "warning_count", PyLong_FromUnsignedLongLong(warning_count)); + PyObject_SetAttrString(py_res, "has_next", PyLong_FromLong(has_next)); + PyObject_SetAttrString(py_res, "connection", Py_None); + if (unbuffered) { + PyObject_SetAttrString(py_res, "unbuffered_active", Py_False); + PyObject_SetAttrString(py_res, "rows", Py_None); + Py_DECREF(py_rows); Py_INCREF(Py_None); py_rows = Py_None; + goto exit; + } + break; + } + + n_rows += 1; + + switch (options.output_type) { + case MYSQL_ACCEL_OUT_DATAFRAME: + case MYSQL_ACCEL_OUT_NUMPY: + if (n_rows >= df_buffer_n_rows) { + df_buffer_n_rows *= 1.7; + orig_df_buffer = realloc(orig_df_buffer, + df_buffer_row_size * df_buffer_n_rows); + df_buffer = orig_df_buffer + df_buffer_row_size * n_rows; + } + read_dataframe_row_from_packet(n_cols, type_codes, flags, scales, names, encodings, + converters, py_decimal, py_json_loads, + data, data_l, &options, df_buffer); + df_buffer += df_buffer_row_size; + break; + default: + py_row = read_obj_row_from_packet(n_cols, type_codes, flags, scales, names, encodings, + converters, py_decimal, py_json_loads, + namedtuple, data, data_l, &options); + if (!py_row) goto error; + PyList_Append(py_rows, py_row); + Py_DECREF(py_row); py_row = NULL; + } + + Py_DECREF(py_buff); py_buff = NULL; + + if (unbuffered) break; + } + + switch (options.output_type) { + case MYSQL_ACCEL_OUT_DATAFRAME: + case MYSQL_ACCEL_OUT_NUMPY: + orig_df_buffer = realloc(orig_df_buffer, df_buffer_row_size * n_rows); + py_rows = build_array(n_rows, n_cols, names, type_codes, flags, scales, + orig_df_buffer, df_buffer_row_size, &options); + } + + py_next_seq_id = PyLong_FromUnsignedLongLong(next_seq_id); + if (!py_next_seq_id) goto error; + PyObject_SetAttrString(py_conn, "_next_seq_id", py_next_seq_id); + Py_DECREF(py_next_seq_id); py_next_seq_id = NULL; + +exit: + if (converters) free(converters); + if (type_codes) free(type_codes); + if (flags) free(flags); + if (scales) free(scales); + if (encodings) free(encodings); + if (names) { + for (unsigned long i = 0; i < 0; i++) { + if (names[i]) { + Py_DECREF(names[i]); + } + } + free(names); + } + if (offsets) free(offsets); + + if (py_buff) { Py_DECREF(py_buff); } + if (namedtuple_desc.fields) { free(namedtuple_desc.fields); }; + if (namedtuple) { Py_DECREF(namedtuple); } + if (py_read) { Py_DECREF(py_read); } + if (py_rfile) { Py_DECREF(py_rfile); } + if (py_read_timeout) { Py_DECREF(py_read_timeout); } + if (py_settimeout) { Py_DECREF(py_settimeout); } + if (py_sock) { Py_DECREF(py_sock); } + if (py_conn) { Py_DECREF(py_conn); } + if (py_converters) { Py_DECREF(py_converters); } + if (py_default_converters) { Py_DECREF(py_default_converters); } + if (py_fields) { Py_DECREF(py_fields); } + if (py_decimal) { Py_DECREF(py_decimal); } + if (py_decimal_mod) { Py_DECREF(py_decimal_mod); } + if (py_json_loads) { Py_DECREF(py_json_loads); } + if (py_json_mod) { Py_DECREF(py_json_mod); } + + if (py_rows && py_rows != Py_None) { + PyObject *out = NULL; + PyObject *py_tuple_rows = NULL; + PyObject *py_n_rows = PyLong_FromSsize_t(n_rows); + PyObject_SetAttrString(py_res, "affected_rows", py_n_rows); + Py_DECREF(py_n_rows); + switch (options.output_type) { + case MYSQL_ACCEL_OUT_DATAFRAME: + case MYSQL_ACCEL_OUT_NUMPY: + PyObject_SetAttrString(py_res, "rows", py_rows); + if (unbuffered) { + // TODO: reshape? + return py_rows; + } + break; + default: + py_tuple_rows = PyList_AsTuple(py_rows); + PyObject_SetAttrString(py_res, "rows", py_tuple_rows); + Py_DECREF(py_rows); py_rows = NULL; + if (unbuffered) { + out = PyTuple_GetItem(py_tuple_rows, 0); + Py_DECREF(py_tuple_rows); + Py_XINCREF(out); + return out; + } + Py_DECREF(py_tuple_rows); + } + return Py_None; + } + else if (py_rows && py_rows == Py_None) { + return Py_None; + } + + return NULL; + +error: + if (orig_df_buffer) { free(orig_df_buffer); } + if (py_rows) { Py_DECREF(py_rows); py_rows = NULL; } + goto exit; +} + +static PyMethodDef PyMySQLAccelMethods[] = { + {"read_rowdata_packet", (PyCFunction)read_rowdata_packet, METH_VARARGS | METH_KEYWORDS, "MySQL row data packet reader"}, + {NULL, NULL, 0, NULL} +}; + +static struct PyModuleDef _pymysqlsvmodule = { + PyModuleDef_HEAD_INIT, + "_pymysqlsv", + "PyMySQL row data packet reader accelerator", + -1, + PyMySQLAccelMethods +}; + +PyMODINIT_FUNC PyInit__pymysqlsv(void) { + PyDateTime_IMPORT; + if (PyType_Ready(&ArrayType) < 0) { + return NULL; + } + return PyModule_Create(&_pymysqlsvmodule); +} From 655e85872009b52b189032ce34ed23735ac3cbb7 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 24 Aug 2022 15:38:02 -0500 Subject: [PATCH 02/12] Remove extra file --- src/accel.c.orig | 1935 ---------------------------------------------- 1 file changed, 1935 deletions(-) delete mode 100644 src/accel.c.orig diff --git a/src/accel.c.orig b/src/accel.c.orig deleted file mode 100644 index f3dfd79f9..000000000 --- a/src/accel.c.orig +++ /dev/null @@ -1,1935 +0,0 @@ - -#include -#include -#include -#include -#include - -#define MYSQL_ACCEL_OUT_TUPLES 0 -#define MYSQL_ACCEL_OUT_NAMEDTUPLES 1 -#define MYSQL_ACCEL_OUT_DICTS 2 -#define MYSQL_ACCEL_OUT_NUMPY 3 -#define MYSQL_ACCEL_OUT_DATAFRAME 4 - -#define MYSQL_FLAG_NOT_NULL 1 -#define MYSQL_FLAG_PRI_KEY 2 -#define MYSQL_FLAG_UNIQUE_KEY 4 -#define MYSQL_FLAG_MULTIPLE_KEY 8 -#define MYSQL_FLAG_BLOB 16 -#define MYSQL_FLAG_UNSIGNED 32 -#define MYSQL_FLAG_ZEROFILL 64 -#define MYSQL_FLAG_BINARY 128 -#define MYSQL_FLAG_ENUM 256 -#define MYSQL_FLAG_AUTO_INCREMENT 512 -#define MYSQL_FLAG_TIMESTAMP 1024 -#define MYSQL_FLAG_SET 2048 -#define MYSQL_FLAG_PART_KEY 16384 -#define MYSQL_FLAG_GROUP 32767 -#define MYSQL_FLAG_UNIQUE 65536 - -#define MYSQL_TYPE_DECIMAL 0 -#define MYSQL_TYPE_TINY 1 -#define MYSQL_TYPE_SHORT 2 -#define MYSQL_TYPE_LONG 3 -#define MYSQL_TYPE_FLOAT 4 -#define MYSQL_TYPE_DOUBLE 5 -#define MYSQL_TYPE_NULL 6 -#define MYSQL_TYPE_TIMESTAMP 7 -#define MYSQL_TYPE_LONGLONG 8 -#define MYSQL_TYPE_INT24 9 -#define MYSQL_TYPE_DATE 10 -#define MYSQL_TYPE_TIME 11 -#define MYSQL_TYPE_DATETIME 12 -#define MYSQL_TYPE_YEAR 13 -#define MYSQL_TYPE_NEWDATE 14 -#define MYSQL_TYPE_VARCHAR 15 -#define MYSQL_TYPE_BIT 16 -#define MYSQL_TYPE_JSON 245 -#define MYSQL_TYPE_NEWDECIMAL 246 -#define MYSQL_TYPE_ENUM 247 -#define MYSQL_TYPE_SET 248 -#define MYSQL_TYPE_TINY_BLOB 249 -#define MYSQL_TYPE_MEDIUM_BLOB 250 -#define MYSQL_TYPE_LONG_BLOB 251 -#define MYSQL_TYPE_BLOB 252 -#define MYSQL_TYPE_VAR_STRING 253 -#define MYSQL_TYPE_STRING 254 -#define MYSQL_TYPE_GEOMETRY 255 - -#define MYSQL_TYPE_CHAR MYSQL_TYPE_TINY -#define MYSQL_TYPE_INTERVAL MYSQL_TYPE_ENUM - -#define MYSQL_COLUMN_NULL 251 -#define MYSQL_COLUMN_UNSIGNED_CHAR 251 -#define MYSQL_COLUMN_UNSIGNED_SHORT 252 -#define MYSQL_COLUMN_UNSIGNED_INT24 253 -#define MYSQL_COLUMN_UNSIGNED_INT64 254 - -#define MYSQL_SERVER_MORE_RESULTS_EXISTS 8 - -// 2**24 - 1 -#define MYSQL_MAX_PACKET_LEN 16777215 - -#define EPOCH_TO_DAYS 719528 -#define SECONDS_PER_DAY (24 * 60 * 60) - -#define MYSQL_ACCEL_OPTION_TIME_TYPE_TIMEDELTA 0 -#define MYSQL_ACCEL_OPTION_TIME_TYPE_TIME 1 -#define MYSQL_ACCEL_OPTION_JSON_TYPE_STRING 0 -#define MYSQL_ACCEL_OPTION_JSON_TYPE_OBJ 1 -#define MYSQL_ACCEL_OPTION_BIT_TYPE_BYTES 0 -#define MYSQL_ACCEL_OPTION_BIT_TYPE_INT 1 - -#define CHR2INT1(x) ((x)[1] - '0') -#define CHR2INT2(x) ((((x)[0] - '0') * 10) + ((x)[1] - '0')) -#define CHR2INT3(x) ((((x)[0] - '0') * 1e2) + (((x)[1] - '0') * 10) + ((x)[2] - '0')) -#define CHR2INT4(x) ((((x)[0] - '0') * 1e3) + (((x)[1] - '0') * 1e2) + (((x)[2] - '0') * 10) + ((x)[3] - '0')) -#define CHR2INT6(x) ((((x)[0] - '0') * 1e5) + (((x)[1] - '0') * 1e4) + (((x)[2] - '0') * 1e3) + (((x)[3] - '0') * 1e2) + (((x)[4] - '0') * 10) + (((x)[5] - '0'))) - -#define CHECK_DATE_STR(s, s_l) \ - ((s_l) == 10 && \ - (s)[0] >= '0' && (s)[0] <= '9' && \ - (s)[1] >= '0' && (s)[1] <= '9' && \ - (s)[2] >= '0' && (s)[2] <= '9' && \ - (s)[3] >= '0' && (s)[3] <= '9' && \ - (s)[4] == '-' && \ - (((s)[5] == '1' && ((s)[6] >= '0' && (s)[6] <= '2')) || \ - ((s)[5] == '0' && ((s)[6] >= '1' && (s)[6] <= '9'))) && \ - (s)[7] == '-' && \ - ((((s)[8] >= '0' && (s)[8] <= '2') && ((s)[9] >= '0' && (s)[9] <= '9')) || \ - ((s)[8] == '3' && ((s)[9] >= '0' && (s)[9] <= '1'))) && \ - !((s)[0] == '0' && (s)[1] == '0' && (s)[2] == '0' && (s)[3] == '0') && \ - !((s)[5] == '0' && (s)[6] == '0') && \ - !((s)[8] == '0' && (s)[9] == '0')) - -#define CHECK_TIME_STR(s, s_l) \ - ((s_l) == 8 && \ - ((((s)[0] >= '0' && (s)[0] <= '1') && ((s)[1] >= '0' && (s)[1] <= '9')) || \ - ((s)[0] == '2' && ((s)[1] >= '0' && (s)[1] <= '3'))) && \ - (s)[2] == ':' && \ - (((s)[3] >= '0' && (s)[3] <= '5') && ((s)[4] >= '0' && (s)[4] <= '9')) && \ - (s)[5] == ':' && \ - (((s)[6] >= '0' && (s)[6] <= '5') && ((s)[7] >= '0' && (s)[7] <= '9'))) - -#define CHECK_MICROSECONDS_STR(s, s_l) \ - ((s_l) == 7 && \ - (s)[0] == '.' && \ - (s)[1] >= '0' && (s)[1] <= '9' && \ - (s)[2] >= '0' && (s)[2] <= '9' && \ - (s)[3] >= '0' && (s)[3] <= '9' && \ - (s)[4] >= '0' && (s)[4] <= '9' && \ - (s)[5] >= '0' && (s)[5] <= '9' && \ - (s)[6] >= '0' && (s)[6] <= '9') - -#define CHECK_MILLISECONDS_STR(s, s_l) \ - ((s_l) == 4 && \ - (s)[0] == '.' && \ - (s)[1] >= '0' && (s)[1] <= '9' && \ - (s)[2] >= '0' && (s)[2] <= '9' && \ - (s)[3] >= '0' && (s)[3] <= '9') - -#define CHECK_MICRO_TIME_STR(s, s_l) \ - ((s_l) == 15 && CHECK_TIME_STR(s, 8) && CHECK_MICROSECONDS_STR((s)+8, 7)) - -#define CHECK_MILLI_TIME_STR(s, s_l) \ - ((s_l) == 12 && CHECK_TIME_STR(s, 8) && CHECK_MILLISECONDS_STR((s)+8, 4)) - -#define CHECK_DATETIME_STR(s, s_l) \ - ((s_l) == 19 && \ - CHECK_DATE_STR(s, 10) && \ - ((s)[10] == ' ' || (s)[10] == 'T') && \ - CHECK_TIME_STR((s)+11, 8)) - -#define CHECK_MICRO_DATETIME_STR(s, s_l) \ - ((s_l) == 26 && \ - CHECK_DATE_STR(s, 10) && \ - ((s)[10] == ' ' || (s)[10] == 'T') && \ - CHECK_MICRO_TIME_STR((s)+11, 15)) - -#define CHECK_MILLI_DATETIME_STR(s, s_l) \ - ((s_l) == 23 && \ - CHECK_DATE_STR(s, 10) && \ - ((s)[10] == ' ' || (s)[10] == 'T') && \ - CHECK_MICRO_TIME_STR((s)+11, 12)) - -#define CHECK_ANY_DATETIME_STR(s, s_l) \ - (((s_l) == 19 && CHECK_DATETIME_STR(s, s_l)) || \ - ((s_l) == 23 && CHECK_MILLI_DATETIME_STR(s, s_l)) || \ - ((s_l) == 26 && CHECK_MICRO_DATETIME_STR(s, s_l))) - -#define DATETIME_SIZE (19) -#define DATETIME_MILLI_SIZE (23) -#define DATETIME_MICRO_SIZE (26) - -#define IS_DATETIME_MILLI(s, s_l) ((s_l) == 23) -#define IS_DATETIME_MICRO(s, s_l) ((s_l) == 26) - -#define CHECK_ANY_TIME_STR(s, s_l) \ - (((s_l) == 8 && CHECK_TIME_STR(s, s_l)) || \ - ((s_l) == 12 && CHECK_MILLI_TIME_STR(s, s_l)) || \ - ((s_l) == 15 && CHECK_MICRO_TIME_STR(s, s_l))) - -#define TIME_SIZE (8) -#define TIME_MILLI_SIZE (12) -#define TIME_MICRO_SIZE (15) - -#define IS_TIME_MILLI(s, s_l) ((s_l) == 12) -#define IS_TIME_MICRO(s, s_l) ((s_l) == 15) - -#define CHECK_TIMEDELTA1_STR(s, s_l) \ - ((s_l) == 7 && \ - (s)[0] >= '0' && (s)[0] <= '9' && \ - (s)[1] == ':' && \ - (s)[2] >= '0' && (s)[2] <= '5' && \ - (s)[3] >= '0' && (s)[3] <= '9' && \ - (s)[4] == ':' && \ - (s)[5] >= '0' && (s)[5] <= '5' && \ - (s)[6] >= '0' && (s)[6] <= '9') - -#define CHECK_TIMEDELTA1_MILLI_STR(s, s_l) \ - ((s_l) == 11 && CHECK_TIMEDELTA1_STR(s, 7) && CHECK_MILLISECONDS_STR((s)+7, 4)) - -#define CHECK_TIMEDELTA1_MICRO_STR(s, s_l) \ - ((s_l) == 14 && CHECK_TIMEDELTA1_STR(s, 7) && CHECK_MICROSECONDS_STR((s)+7, 7)) - -#define CHECK_TIMEDELTA2_STR(s, s_l) \ - ((s_l) == 8 && \ - (s)[0] >= '0' && (s)[0] <= '9' && \ - CHECK_TIMEDELTA1_STR((s)+1, 7)) - -#define CHECK_TIMEDELTA2_MILLI_STR(s, s_l) \ - ((s_l) == 12 && CHECK_TIMEDELTA2_STR(s, 8) && CHECK_MILLISECONDS_STR((s)+8, 4)) - -#define CHECK_TIMEDELTA2_MICRO_STR(s, s_l) \ - ((s_l) == 15 && CHECK_TIMEDELTA2_STR(s, 8) && CHECK_MICROSECONDS_STR((s)+8, 7)) - -#define CHECK_TIMEDELTA3_STR(s, s_l) \ - ((s_l) == 9 && \ - (s)[0] >= '0' && (s)[0] <= '9' && \ - (s)[1] >= '0' && (s)[1] <= '9' && \ - CHECK_TIMEDELTA1_STR((s)+2, 7)) - -#define CHECK_TIMEDELTA3_MILLI_STR(s, s_l) \ - ((s_l) == 13 && CHECK_TIMEDELTA3_STR(s, 9) && CHECK_MILLISECONDS_STR((s)+9, 4)) - -#define CHECK_TIMEDELTA3_MICRO_STR(s, s_l) \ - ((s_l) == 16 && CHECK_TIMEDELTA3_STR(s, 9) && CHECK_MICROSECONDS_STR((s)+9, 7)) - -// -// 0:00:00 / 0:00:00.000 / 0:00:00.000000 -// 00:00:00 / 00:00:00.000 / 00:00:00.000000 -// 000:00:00 / 000:00:00.000 / 000:00:00.000000 -// -#define CHECK_ANY_TIMEDELTA_STR(s, s_l) \ - (((s_l) > 0 && (s)[0] == '-') ? \ - (-1 * (_CHECK_ANY_TIMEDELTA_STR((s)+1, (s_l)-1))) : \ - (_CHECK_ANY_TIMEDELTA_STR((s), (s_l)))) - -#define _CHECK_ANY_TIMEDELTA_STR(s, s_l) \ - (CHECK_TIMEDELTA1_STR(s, s_l) || \ - CHECK_TIMEDELTA2_STR(s, s_l) || \ - CHECK_TIMEDELTA3_STR(s, s_l) || \ - CHECK_TIMEDELTA1_MILLI_STR(s, s_l) || \ - CHECK_TIMEDELTA2_MILLI_STR(s, s_l) || \ - CHECK_TIMEDELTA3_MILLI_STR(s, s_l) || \ - CHECK_TIMEDELTA1_MICRO_STR(s, s_l) || \ - CHECK_TIMEDELTA2_MICRO_STR(s, s_l) || \ - CHECK_TIMEDELTA3_MICRO_STR(s, s_l)) - -#define TIMEDELTA1_SIZE (7) -#define TIMEDELTA2_SIZE (8) -#define TIMEDELTA3_SIZE (9) -#define TIMEDELTA1_MILLI_SIZE (11) -#define TIMEDELTA2_MILLI_SIZE (12) -#define TIMEDELTA3_MILLI_SIZE (13) -#define TIMEDELTA1_MICRO_SIZE (14) -#define TIMEDELTA2_MICRO_SIZE (15) -#define TIMEDELTA3_MICRO_SIZE (16) - -#define IS_TIMEDELTA1(s, s_l) ((s_l) == 7 || (s_l) == 11 || (s_l) == 14) -#define IS_TIMEDELTA2(s, s_l) ((s_l) == 8 || (s_l) == 12 || (s_l) == 15) -#define IS_TIMEDELTA3(s, s_l) ((s_l) == 9 || (s_l) == 13 || (s_l) == 16) - -#define IS_TIMEDELTA_MILLI(s, s_l) ((s_l) == 11 || (s_l) == 12 || (s_l) == 13) -#define IS_TIMEDELTA_MICRO(s, s_l) ((s_l) == 14 || (s_l) == 15 || (s_l) == 16) - -typedef struct { - int output_type; - int parse_json; - PyObject *invalid_date_value; - PyObject *invalid_time_value; - PyObject *invalid_datetime_value; -} MySQLAccelOptions; - -inline int IMAX(int a, int b) { return((a) > (b) ? a : b); } -inline int IMIN(int a, int b) { return((a) < (b) ? a : b); } - -typedef struct { - PyObject_HEAD - PyObject *array_interface; -} ArrayObject; - -static void Array_dealloc(ArrayObject *self) { - if (self->array_interface) { - PyObject *data = PyDict_GetItemString(self->array_interface, "data"); - if (data) { - PyObject *buffer = PyTuple_GetItem(data, 0); - if (buffer) { - free((char*)PyLong_AsUnsignedLongLong(buffer)); - } - } - } - Py_XDECREF(self->array_interface); - Py_TYPE(self)->tp_free((PyObject*)self); -} - -static PyObject *Array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - ArrayObject *self = (ArrayObject*)type->tp_alloc(type, 0); - if (self != NULL) { - self->array_interface = Py_None; - Py_INCREF(Py_None); - } - return (PyObject*)self; -} - -static int Array_init(ArrayObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"array_interface", NULL}; - PyObject *array_interface = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &array_interface)) { - return -1; - } - - if (array_interface) { - PyObject *tmp = self->array_interface; - Py_INCREF(array_interface); - self->array_interface = array_interface; - Py_DECREF(tmp); - } - - return 0; -} - -static PyObject *Array_get__array_interface__(ArrayObject *self, void *closure) { - Py_INCREF(self->array_interface); - return self->array_interface; -} - -static PyGetSetDef Array_getsetters[] = { - {"__array_interface__", (getter)Array_get__array_interface__, - (setter)NULL, "array interface", NULL}, - {NULL} -}; - -static PyTypeObject ArrayType = { - PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "_pymysqlsv.Array", - .tp_doc = PyDoc_STR("Array manager"), - .tp_basicsize = sizeof(ArrayObject), - .tp_itemsize = 0, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - .tp_new = Array_new, - .tp_init = (initproc)Array_init, - .tp_dealloc = (destructor)Array_dealloc, - .tp_getset = Array_getsetters, -}; - -static void read_options(MySQLAccelOptions *options, PyObject *dict) { - if (!options || !dict) return; - - PyObject *key = NULL; - PyObject *value = NULL; - Py_ssize_t pos = 0; - - while (PyDict_Next(dict, &pos, &key, &value)) { - if (PyUnicode_CompareWithASCIIString(key, "output_type") == 0) { - if (PyUnicode_CompareWithASCIIString(value, "dict") == 0 || - PyUnicode_CompareWithASCIIString(value, "dicts") == 0 ) { - options->output_type = MYSQL_ACCEL_OUT_DICTS; - } - else if (PyUnicode_CompareWithASCIIString(value, "namedtuple") == 0 || - PyUnicode_CompareWithASCIIString(value, "namedtuples") == 0) { - options->output_type = MYSQL_ACCEL_OUT_NAMEDTUPLES; - } - else if (PyUnicode_CompareWithASCIIString(value, "numpy") == 0) { - options->output_type = MYSQL_ACCEL_OUT_NUMPY; - } - else if (PyUnicode_CompareWithASCIIString(value, "dataframe") == 0) { - options->output_type = MYSQL_ACCEL_OUT_DATAFRAME; - } - else { - options->output_type = MYSQL_ACCEL_OUT_TUPLES; - } - } else if (PyUnicode_CompareWithASCIIString(key, "parse_json") == 0) { - options->parse_json = PyObject_IsTrue(value); - } else if (PyUnicode_CompareWithASCIIString(key, "invalid_date_value") == 0) { - options->invalid_date_value = value; - } else if (PyUnicode_CompareWithASCIIString(key, "invalid_time_value") == 0) { - options->invalid_time_value = value; - } else if (PyUnicode_CompareWithASCIIString(key, "invalid_datetime_value") == 0) { - options->invalid_datetime_value = value; - } - } -} - -// mysql, for whatever reason, treats 0 as an actual year, but not -// a leap year -// -inline int is_leap_year(int year) -{ - return (year % 4) == 0 && year != 0 && ((year % 100) != 0 || (year % 400) == 0); -} - -inline int days_in_previous_months(int month, int year) -{ - static const int previous_days[13] = - { - -31, - 0, - 31, - 31 + 28, - 31 + 28 + 31, - 31 + 28 + 31 + 30, - 31 + 28 + 31 + 30 + 31, - 31 + 28 + 31 + 30 + 31 + 30, - 31 + 28 + 31 + 30 + 31 + 30 + 31, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30, - }; - return previous_days[month] + (month > 2 && is_leap_year(year)); -} - -// NOTE: year 0 does not actually exist, but mysql pretends it does (and is NOT -// a leap year) -// -inline int leap_years_before(int year) -{ - return (year - 1) / 4 - (year - 1) / 100 + (year - 1) / 400; -} - -inline int days_in_previous_years(int year) -{ - return 365 * year + leap_years_before(year); -} - -static int64_t to_days(int year, int month, int day) { - return days_in_previous_years(year) + days_in_previous_months(month, year) + day; -} - -static void raise_exception( - PyObject *self, - char *err_type, - unsigned long long err_code, - char *err_str -) { - PyObject *py_exc = NULL; - PyObject *py_val = NULL; - - py_exc = PyObject_GetAttrString(self, err_type); - if (!py_exc) goto error; - - py_val = Py_BuildValue("(Ks)", err_code, err_str); - if (!py_val) goto error; - - PyErr_SetObject(py_exc, py_val); - -exit: - if (py_exc) { Py_DECREF(py_exc); } - if (py_val) { Py_DECREF(py_val); } - return; - -error: - goto exit; -} - -static int is_error_packet(char *buff_bytes) { - return buff_bytes && *(uint8_t*)buff_bytes == 0xFF; -} - -static void force_close(PyObject *self) { - PyObject *py_sock = NULL; - - py_sock = PyObject_GetAttrString(self, "_sock"); - if (!py_sock) goto error; - - PyObject_CallMethod(py_sock, "close", NULL); - PyErr_Clear(); - - PyObject_SetAttrString(self, "_sock", Py_None); - PyObject_SetAttrString(self, "_rfile", Py_None); - -exit: - if (py_sock) { Py_DECREF(py_sock); } - return; - -error: - goto exit; -} - -static PyObject *read_bytes( - PyObject *self, - unsigned long long num_bytes, - PyObject *py_read, - PyObject *py_settimeout, - PyObject *py_read_timeout -) { - PyObject *py_num_bytes = NULL; - PyObject *py_data = NULL; - PyObject *py_exc = NULL; - - if (py_read_timeout != Py_None) { - Py_XDECREF(PyObject_CallFunctionObjArgs(py_settimeout, py_read_timeout, NULL)); - if (PyErr_Occurred()) goto error; - } - - py_num_bytes = PyLong_FromUnsignedLongLong(num_bytes); - if (!py_num_bytes) goto error; - - while (1) { - py_data = PyObject_CallFunctionObjArgs(py_read, py_num_bytes, NULL); - - if ((py_exc = PyErr_Occurred())) { - if (PyErr_ExceptionMatches(PyExc_IOError) || PyErr_ExceptionMatches(PyExc_OSError)) { - PyObject *py_errno = PyObject_GetAttrString(py_exc, "errno"); - if (!py_errno) goto error; - unsigned long long err = PyLong_AsUnsignedLongLong(py_errno); - Py_DECREF(py_errno); - if (err == 4 /* errno.EINTER */) { - continue; - } - force_close(self); - raise_exception(self, "OperationalError", 0, - "Lost connection to MySQL server during query"); - goto error; - } - else if (PyErr_ExceptionMatches(PyExc_BaseException)) { - // Don't convert unknown exception to MySQLError. - force_close(self); - goto error; - } - } - - if (py_data) { - break; - } - } - - if (PyBytes_GET_SIZE(py_data) < (long int)num_bytes) { - force_close(self); - raise_exception(self, "OperationalError", 0, - "Lost connection to MySQL server during query"); - goto error; - } - -exit: - if (py_num_bytes) { Py_DECREF(py_num_bytes); } - return py_data; - -error: - if (py_data) { Py_DECREF(py_data); py_data = NULL; } - goto exit; -} - -static PyObject *read_packet( - PyObject *self, - PyObject *py_read, - PyObject *py_settimeout, - PyObject *py_read_timeout, - unsigned long long *next_seq_id -) { - PyObject *py_buff = PyByteArray_FromStringAndSize(NULL, 0); - PyObject *py_new_buff = NULL; - PyObject *py_packet_header = NULL; - PyObject *py_bytes_to_read = NULL; - PyObject *py_recv_data = NULL; - unsigned long long bytes_to_read = 0; - char *buff = NULL; - uint64_t btrl = 0; - uint8_t btrh = 0; - uint8_t packet_number = 0; - - while (1) { - py_packet_header = read_bytes(self, 4, py_read, py_settimeout, py_read_timeout); - if (!py_packet_header) goto error; - - buff = PyBytes_AsString(py_packet_header); - - btrl = *(uint16_t*)buff; - btrh = *(uint8_t*)(buff+2); - packet_number = *(uint8_t*)(buff+3); - bytes_to_read = btrl + (btrh << 16); - - Py_DECREF(py_packet_header); py_packet_header = NULL; - - if (packet_number != *next_seq_id) { - force_close(self); - if (packet_number == 0) { - raise_exception(self, "OperationalError", 0, - "Lost connection to MySQL server during query"); - - goto error; - } - raise_exception(self, "InternalError", 0, - "Packet sequence number wrong"); - goto error; - } - - *next_seq_id = (*next_seq_id + 1) % 256; - - py_recv_data = read_bytes(self, bytes_to_read, py_read, py_settimeout, py_read_timeout); - if (!py_recv_data) goto error; - - py_new_buff = PyByteArray_Concat(py_buff, py_recv_data); - if (!py_new_buff) goto error; - - Py_DECREF(py_buff); py_buff = py_new_buff; py_new_buff = NULL; - Py_DECREF(py_recv_data); py_recv_data = NULL; - - if (bytes_to_read == 0xFFFFFF) { - continue; - } - - if (bytes_to_read < MYSQL_MAX_PACKET_LEN) { - break; - } - } - - if (is_error_packet(PyByteArray_AsString(py_buff))) { - PyObject *py_result = PyObject_GetAttrString(self, "_result"); - if (py_result && py_result != Py_None) { - PyObject *py_unbuffered_active = PyObject_GetAttrString(py_result, "unbuffered_active"); - if (py_unbuffered_active == Py_True) { - PyObject_SetAttrString(py_result, "unbuffered_active", Py_False); - Py_DECREF(py_result); py_result = NULL; - } - Py_XDECREF(py_unbuffered_active); py_unbuffered_active = NULL; - } - Py_XDECREF(py_result); py_result = NULL; - PyObject_CallMethod(self, "_raise_mysql_exception", "O", py_buff, NULL); - } - -exit: - if (py_new_buff) { Py_DECREF(py_new_buff); } - if (py_bytes_to_read) { Py_DECREF(py_bytes_to_read); } - if (py_recv_data) { Py_DECREF(py_recv_data); } - if (py_packet_header) { Py_DECREF(py_packet_header); } - return py_buff; - -error: - if (py_buff) { Py_DECREF(py_buff); py_buff = NULL; } - goto exit; -} - -static int is_eof_packet(char *data, unsigned long long data_l) { - return data && (uint8_t)*(uint8_t*)data == 0xFE && data_l < 9; -} - -static int check_packet_is_eof( - char **data, - unsigned long long *data_l, - unsigned long long *warning_count, - int *has_next -) { - uint16_t server_status = 0; - if (!data || !data_l) { - return 0; - if (has_next) *has_next = 0; - if (warning_count) *warning_count = 0; - } - if (!is_eof_packet(*data, *data_l)) { - return 0; - } - *data += 1; *data_l -= 1; - if (warning_count) *warning_count = **(uint16_t**)data; - *data += 2; *data_l -= 2; - server_status = **(uint16_t**)data; - *data += 2; *data_l -= 2; - if (has_next) *has_next = server_status & MYSQL_SERVER_MORE_RESULTS_EXISTS; - return 1; -} - -static unsigned long long read_length_encoded_integer( - char **data, - unsigned long long *data_l, - int *is_null -) { - if (is_null) *is_null = 0; - - if (!data || !data_l || *data_l == 0) { - if (is_null) *is_null = 1; - return 0; - } - - uint8_t c = **(uint8_t**)data; - *data += 1; *data_l -= 1; - - if (c == MYSQL_COLUMN_NULL) { - if (is_null) *is_null = 1; - return 0; - } - - if (c < MYSQL_COLUMN_UNSIGNED_CHAR) { - return c; - } - - if (c == MYSQL_COLUMN_UNSIGNED_SHORT) { - if (*data_l < 2) { - if (is_null) *is_null = 1; - return 0; - } - uint16_t out = **(uint16_t**)data; - *data += 2; *data_l -= 2; - return out; - } - - if (c == MYSQL_COLUMN_UNSIGNED_INT24) { - if (*data_l < 3) { - if (is_null) *is_null = 1; - return 0; - } - uint16_t low = **(uint8_t**)data; - *data += 1; *data_l -= 1; - uint16_t high = **(uint16_t**)data; - *data += 2; *data_l -= 2; - return low + (high << 16); - } - - if (c == MYSQL_COLUMN_UNSIGNED_INT64) { - if (*data_l < 8) { - if (is_null) *is_null = 1; - return 0; - } - uint64_t out = **(uint64_t**)data; - *data += 8; *data_l -= 8; - return out; - } - - if (is_null) *is_null = 1; - return 0; -} - -static void read_length_coded_string( - char **data, - unsigned long long *data_l, - char **out, - unsigned long long *out_l, - int *is_null -) { - if (is_null) *is_null = 0; - - if (!data || !data_l || !out || !out_l) { - if (is_null) *is_null = 1; - return; - } - - unsigned long long length = read_length_encoded_integer(data, data_l, is_null); - - if (is_null && *is_null) { - return; - } - - length = (length > *data_l) ? *data_l : length; - - *out = *data; - *out_l = length; - - *data += length; - *data_l -= length; - - return; -} - -static PyObject *build_array( - unsigned long long n_rows, - unsigned long n_cols, - PyObject **names, - unsigned long *type_codes, - unsigned long *flags, - unsigned long *scales, - char *buffer, - unsigned long long row_size, - MySQLAccelOptions *options -) { - PyObject *py_out = NULL; - PyObject *py_shape = NULL; - PyObject *py_typestr = NULL; - PyObject *py_descr = NULL; - PyObject *py_descr_item = NULL; - PyObject *py_type = NULL; - PyObject *py_data = NULL; - PyObject *py_args = NULL; - PyObject *py_kwds = NULL; - PyObject *py_numpy_mod = NULL; - PyObject *py_numpy_array = NULL; - PyObject *py_array = NULL; - - py_numpy_mod = PyImport_ImportModule("numpy"); - if (!py_numpy_mod) goto error; - py_numpy_array = PyObject_GetAttrString(py_numpy_mod, "array"); - if (!py_numpy_array) goto error; - - py_out = PyDict_New(); - if (!py_out) goto error; - - py_shape = PyTuple_New(1); - if (!py_shape) goto error; - PyTuple_SetItem(py_shape, 0, PyLong_FromUnsignedLongLong(n_rows)); - PyDict_SetItemString(py_out, "shape", py_shape); - Py_DECREF(py_shape); - - py_typestr = PyUnicode_FromFormat("|V%llu", row_size); - if (!py_typestr) goto error; - PyDict_SetItemString(py_out, "typestr", py_typestr); - Py_DECREF(py_typestr); - - py_descr = PyList_New(n_cols); - if (!py_descr) goto error; - PyDict_SetItemString(py_out, "descr", py_descr); - Py_DECREF(py_descr); - - for (unsigned long i = 0; i < n_cols; i++) { - py_descr_item = PyTuple_New(2); - if (!py_descr_item) goto error; - - PyList_SetItem(py_descr, i, py_descr_item); - - PyTuple_SetItem(py_descr_item, 0, names[i]); - // Caller already uses the borrowed reference. - Py_INCREF(names[i]); - - switch (type_codes[i]) { - case MYSQL_TYPE_NEWDECIMAL: - case MYSQL_TYPE_DECIMAL: - py_type = PyUnicode_FromString("|O"); - break; - - case MYSQL_TYPE_TINY: - if (flags[i] & MYSQL_FLAG_UNSIGNED) { - py_type = PyUnicode_FromString("output_type == MYSQL_ACCEL_OUT_DATAFRAME) { - PyObject *out2 = NULL; - PyObject *py_pandas_mod = PyImport_ImportModule("pandas"); - if (!py_pandas_mod) goto error; - out2 = PyObject_CallMethod(py_pandas_mod, "DataFrame", "O", py_out); - Py_DECREF(py_pandas_mod); - if (!out2) goto error; - Py_DECREF(py_out); - py_out = out2; - } - -exit: - if (py_args) { Py_DECREF(py_args); } - if (py_kwds) { Py_DECREF(py_args); } - if (py_numpy_array) { Py_DECREF(py_numpy_array); } - if (py_numpy_mod) { Py_DECREF(py_numpy_mod); } - return py_out; - -error: - if (py_out) { Py_DECREF(py_out); py_out = NULL; } - goto exit; -} - -static unsigned long long compute_row_size( - unsigned long *type_codes, - unsigned long *flags, - unsigned long *scales, - unsigned long n_cols -) { - unsigned long long row_size = 0; - - for (unsigned long i = 0; i < n_cols; i++) { - switch (type_codes[i]) { - case MYSQL_TYPE_NEWDECIMAL: - case MYSQL_TYPE_DECIMAL: - row_size += sizeof(PyObject*); - break; - - case MYSQL_TYPE_TINY: - row_size += sizeof(int8_t); - break; - - case MYSQL_TYPE_SHORT: - row_size += sizeof(int16_t); - break; - - case MYSQL_TYPE_INT24: - case MYSQL_TYPE_LONG: - row_size += sizeof(int32_t); - break; - - case MYSQL_TYPE_LONGLONG: - row_size += sizeof(int64_t); - break; - - case MYSQL_TYPE_FLOAT: - row_size += sizeof(float); - break; - - case MYSQL_TYPE_DOUBLE: - row_size += sizeof(double); - break; - - case MYSQL_TYPE_NULL: - row_size += sizeof(PyObject*); - break; - - case MYSQL_TYPE_DATETIME: - case MYSQL_TYPE_TIMESTAMP: - row_size += sizeof(int64_t); - break; - - case MYSQL_TYPE_NEWDATE: - case MYSQL_TYPE_DATE: - row_size += sizeof(int64_t); - break; - - case MYSQL_TYPE_TIME: - row_size += sizeof(int64_t); - break; - - case MYSQL_TYPE_YEAR: - row_size += sizeof(int16_t); - break; - - case MYSQL_TYPE_BIT: - case MYSQL_TYPE_JSON: - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_LONG_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_GEOMETRY: - case MYSQL_TYPE_ENUM: - case MYSQL_TYPE_SET: - case MYSQL_TYPE_VARCHAR: - case MYSQL_TYPE_VAR_STRING: - case MYSQL_TYPE_STRING: - row_size += sizeof(PyObject*); - break; - - default: - PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", type_codes[i], NULL); - return 0; - } - } - - return row_size; -} - -static void read_dataframe_row_from_packet( - unsigned long n_cols, - unsigned long *type_codes, - unsigned long *flags, - unsigned long *scales, - PyObject **names, - const char **encodings, - PyObject **converters, - PyObject *py_decimal, - PyObject *py_json_loads, - char *data, - unsigned long long data_l, - MySQLAccelOptions *options, - char *buffer -) { - char *out = NULL; - unsigned long long out_l = 0; - int is_null = 0; - PyObject *item = NULL; - PyObject *str = NULL; - char *end = NULL; - char *loc = buffer; - - int sign = 1; - int year = 0; - int month = 0; - int day = 0; - int hour = 0; - int minute = 0; - int second = 0; - int microsecond = 0; - - float float_nan = nanf(""); - double double_nan = nan(""); - - if (!buffer) goto error; - - for (unsigned long i = 0; i < n_cols; i++) { - - read_length_coded_string(&data, &data_l, &out, &out_l, &is_null); - end = &out[out_l]; - - switch (type_codes[i]) { - case MYSQL_TYPE_NEWDECIMAL: - case MYSQL_TYPE_DECIMAL: - if (is_null) { - *(PyObject**)loc = Py_None; - Py_INCREF(Py_None); - } else { - str = NULL; - str = PyUnicode_Decode(out, out_l, encodings[i], "strict"); - if (!str) goto error; - - item = PyObject_CallFunctionObjArgs(py_decimal, str, NULL); - Py_DECREF(str); str = NULL; - if (!item) goto error; - - *(PyObject**)loc = item; - } - loc += sizeof(PyObject*); - break; - - case MYSQL_TYPE_TINY: - if (flags[i] & MYSQL_FLAG_UNSIGNED) { - *(uint8_t*)loc = (is_null) ? 0 : (uint8_t)strtoul(out, &end, 10); - } else { - *(int8_t*)loc = (is_null) ? INT8_MIN : (int8_t)strtol(out, &end, 10); - } - loc += sizeof(int8_t); - break; - - case MYSQL_TYPE_SHORT: - if (flags[i] & MYSQL_FLAG_UNSIGNED) { - *(uint16_t*)loc = (is_null) ? 0 : (uint16_t)strtoul(out, &end, 10); - } else { - *(int16_t*)loc = (is_null) ? INT16_MIN : (int16_t)strtol(out, &end, 10); - } - loc += sizeof(int16_t); - break; - - case MYSQL_TYPE_INT24: - case MYSQL_TYPE_LONG: - if (flags[i] & MYSQL_FLAG_UNSIGNED) { - *(uint32_t*)loc = (is_null) ? 0 : (uint32_t)strtoul(out, &end, 10); - } else { - *(int32_t*)loc = (is_null) ? INT32_MIN : (int32_t)strtol(out, &end, 10); - } - loc += sizeof(int32_t); - break; - - case MYSQL_TYPE_LONGLONG: - if (flags[i] & MYSQL_FLAG_UNSIGNED) { - *(uint64_t*)loc = (is_null) ? 0 : (uint64_t)strtoul(out, &end, 10); - } else { - *(int64_t*)loc = (is_null) ? INT64_MIN : (int64_t)strtol(out, &end, 10); - } - loc += sizeof(int64_t); - break; - - case MYSQL_TYPE_FLOAT: - if (is_null) { - *(float*)loc = (float)float_nan; - } else { - *(float*)loc = (float)strtod(out, &end); - } - loc += sizeof(float); - break; - - case MYSQL_TYPE_DOUBLE: - if (is_null) { - *(double*)loc = (double)double_nan; - } else { - *(double*)loc = (double)strtod(out, &end); - } - loc += sizeof(double); - break; - - case MYSQL_TYPE_NULL: - *(PyObject**)loc = Py_None; - loc += sizeof(PyObject*); - break; - - case MYSQL_TYPE_DATETIME: - case MYSQL_TYPE_TIMESTAMP: - // TODO: Should use numpy's NaT - if (!CHECK_ANY_DATETIME_STR(out, out_l)) { - *(int64_t*)loc = (int64_t)(INT64_MIN); - loc += sizeof(int64_t); - break; - } - year = CHR2INT4(out); out += 5; - month = CHR2INT2(out); out += 3; - day = CHR2INT2(out); out += 3; - hour = CHR2INT2(out); out += 3; - minute = CHR2INT2(out); out += 3; - second = CHR2INT2(out); out += 3; - microsecond = (IS_DATETIME_MICRO(out, out_l)) ? CHR2INT6(out) : - (IS_DATETIME_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; - *(int64_t*)loc = (int64_t)(((to_days(year, month, day) - EPOCH_TO_DAYS) - * SECONDS_PER_DAY + hour * 3600 + minute * 60 + second) - * 1e9 + microsecond * 1e3); - loc += sizeof(int64_t); - break; - - case MYSQL_TYPE_NEWDATE: - case MYSQL_TYPE_DATE: - if (!CHECK_DATE_STR(out, out_l)) { - *(int64_t*)loc = (int64_t)(INT64_MIN); - loc += sizeof(int64_t); - break; - } - year = CHR2INT4(out); out += 5; - month = CHR2INT2(out); out += 3; - day = CHR2INT2(out); out += 3; - *(int64_t*)loc = (int64_t)((to_days(year, month, day) - EPOCH_TO_DAYS) - * SECONDS_PER_DAY * 1e9); - loc += sizeof(int64_t); - break; - - case MYSQL_TYPE_TIME: - sign = CHECK_ANY_TIMEDELTA_STR(out, out_l); - if (!sign) { - *(int64_t*)loc = (int64_t)(INT64_MIN); - loc += sizeof(int64_t); - break; - } else if (sign < 0) { - out += 1; out_l -= 1; - } - if (IS_TIMEDELTA1(out, out_l)) { - hour = CHR2INT1(out); out += 2; - minute = CHR2INT2(out); out += 3; - second = CHR2INT2(out); out += 3; - microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : - (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; - } - else if (IS_TIMEDELTA2(out, out_l)) { - hour = CHR2INT2(out); out += 3; - minute = CHR2INT2(out); out += 3; - second = CHR2INT2(out); out += 3; - microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : - (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; - } - else if (IS_TIMEDELTA3(out, out_l)) { - hour = CHR2INT3(out); out += 4; - minute = CHR2INT2(out); out += 3; - second = CHR2INT2(out); out += 3; - microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : - (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; - } - *(int64_t*)loc = (int64_t)((hour * 3600 + minute * 60 + second) - * 1e9 + microsecond * 1e3) * sign; - loc += sizeof(int64_t); - break; - - case MYSQL_TYPE_YEAR: - if (out_l == 0) { - *(uint16_t*)loc = 0; - loc += sizeof(uint16_t); - break; - } - end = &out[out_l]; - *(uint16_t*)loc = (uint16_t)strtoul(out, &end, 10); - loc += sizeof(uint16_t); - break; - - case MYSQL_TYPE_BIT: - case MYSQL_TYPE_JSON: - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_LONG_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_GEOMETRY: - case MYSQL_TYPE_ENUM: - case MYSQL_TYPE_SET: - case MYSQL_TYPE_VARCHAR: - case MYSQL_TYPE_VAR_STRING: - case MYSQL_TYPE_STRING: - if (encodings[i] == NULL) { - item = PyBytes_FromStringAndSize(out, out_l); - if (!item) goto error; - break; - } - - item = PyUnicode_Decode(out, out_l, encodings[i], "strict"); - if (!item) goto error; - - // Parse JSON string. - if (type_codes[i] == MYSQL_TYPE_JSON && options->parse_json) { - str = item; - item = PyObject_CallFunctionObjArgs(py_json_loads, str, NULL); - Py_DECREF(str); str = NULL; - if (!item) goto error; - } - - *(PyObject**)loc = item; - loc += sizeof(PyObject*); - - break; - - default: - PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", type_codes[i], NULL); - goto error; - } - } - -exit: - return; - -error: - goto exit; -} - -static PyObject *read_obj_row_from_packet( - unsigned long n_cols, - unsigned long *type_codes, - unsigned long *flags, - unsigned long *scales, - PyObject **names, - const char **encodings, - PyObject **converters, - PyObject *py_decimal, - PyObject *py_json_loads, - PyTypeObject *namedtuple, - char *data, - unsigned long long data_l, - MySQLAccelOptions *options -) { - char *out = NULL; - char *orig_out = NULL; - unsigned long long out_l = 0; - unsigned long long orig_out_l = 0; - int is_null = 0; - PyObject *py_result = NULL; - PyObject *py_item = NULL; - PyObject *py_str = NULL; - char *end = NULL; - - int sign = 1; - int year = 0; - int month = 0; - int day = 0; - int hour = 0; - int minute = 0; - int second = 0; - int microsecond = 0; - - switch (options->output_type) { - case MYSQL_ACCEL_OUT_NAMEDTUPLES: { - if (!namedtuple) goto error; - py_result = PyStructSequence_New(namedtuple); - break; - } - case MYSQL_ACCEL_OUT_DICTS: - py_result = PyDict_New(); - break; - default: - py_result = PyTuple_New(n_cols); - } - - for (unsigned long i = 0; i < n_cols; i++) { - - read_length_coded_string(&data, &data_l, &out, &out_l, &is_null); - end = &out[out_l]; - - orig_out = out; - orig_out_l = out_l; - - py_item = Py_None; - - // Don't convert if it's a NULL. - if (!is_null) { - - // If a converter was passed in, use it. - if (converters[i]) { - py_str = NULL; - if (encodings[i] == NULL) { - py_str = PyBytes_FromStringAndSize(out, out_l); - if (!py_str) goto error; - } else { - py_str = PyUnicode_Decode(out, out_l, encodings[i], "strict"); - if (!py_str) goto error; - } - py_item = PyObject_CallFunctionObjArgs(converters[i], py_str, NULL); - Py_DECREF(py_str); py_str = NULL; - if (!py_item) goto error; - } - - // If no converter was passed in, do the default processing. - else { - switch (type_codes[i]) { - case MYSQL_TYPE_NEWDECIMAL: - case MYSQL_TYPE_DECIMAL: - py_str = PyUnicode_Decode(out, out_l, encodings[i], "strict"); - if (!py_str) goto error; - - py_item = PyObject_CallFunctionObjArgs(py_decimal, py_str, NULL); - Py_DECREF(py_str); py_str = NULL; - if (!py_item) goto error; - break; - - case MYSQL_TYPE_TINY: - case MYSQL_TYPE_SHORT: - case MYSQL_TYPE_LONG: - case MYSQL_TYPE_LONGLONG: - case MYSQL_TYPE_INT24: - if (flags[i] & MYSQL_FLAG_UNSIGNED) { - py_item = PyLong_FromUnsignedLongLong(strtoul(out, &end, 10)); - } else { - py_item = PyLong_FromLongLong(strtol(out, &end, 10)); - } - if (!py_item) goto error; - break; - - case MYSQL_TYPE_FLOAT: - case MYSQL_TYPE_DOUBLE: - py_item = PyFloat_FromDouble(strtod(out, &end)); - if (!py_item) goto error; - break; - - case MYSQL_TYPE_NULL: - py_item = Py_None; - break; - - case MYSQL_TYPE_DATETIME: - case MYSQL_TYPE_TIMESTAMP: - if (!CHECK_ANY_DATETIME_STR(out, out_l)) { - if (options && options->invalid_datetime_value) { - py_item = options->invalid_datetime_value; - Py_INCREF(py_item); - } else { - py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); - if (!py_item) goto error; - } - break; - } - year = CHR2INT4(out); out += 5; - month = CHR2INT2(out); out += 3; - day = CHR2INT2(out); out += 3; - hour = CHR2INT2(out); out += 3; - minute = CHR2INT2(out); out += 3; - second = CHR2INT2(out); out += 3; - microsecond = (IS_DATETIME_MICRO(out, out_l)) ? CHR2INT6(out) : - (IS_DATETIME_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; - py_item = PyDateTime_FromDateAndTime(year, month, day, - hour, minute, second, microsecond); - if (!py_item) { - PyErr_Clear(); - py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); - } - if (!py_item) goto error; - break; - - case MYSQL_TYPE_NEWDATE: - case MYSQL_TYPE_DATE: - if (!CHECK_DATE_STR(out, out_l)) { - if (options && options->invalid_date_value) { - py_item = options->invalid_date_value; - Py_INCREF(py_item); - } else { - py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); - if (!py_item) goto error; - } - break; - } - year = CHR2INT4(out); out += 5; - month = CHR2INT2(out); out += 3; - day = CHR2INT2(out); out += 3; - py_item = PyDate_FromDate(year, month, day); - if (!py_item) { - PyErr_Clear(); - py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); - } - if (!py_item) goto error; - break; - - case MYSQL_TYPE_TIME: - sign = CHECK_ANY_TIMEDELTA_STR(out, out_l); - if (!sign) { - if (options && options->invalid_time_value) { - py_item = options->invalid_time_value; - Py_INCREF(py_item); - } else { - py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); - if (!py_item) goto error; - } - break; - } else if (sign < 0) { - out += 1; out_l -= 1; - } - if (IS_TIMEDELTA1(out, out_l)) { - hour = CHR2INT1(out); out += 2; - minute = CHR2INT2(out); out += 3; - second = CHR2INT2(out); out += 3; - microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : - (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; - } - else if (IS_TIMEDELTA2(out, out_l)) { - hour = CHR2INT2(out); out += 3; - minute = CHR2INT2(out); out += 3; - second = CHR2INT2(out); out += 3; - microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : - (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; - } - else if (IS_TIMEDELTA3(out, out_l)) { - hour = CHR2INT3(out); out += 4; - minute = CHR2INT2(out); out += 3; - second = CHR2INT2(out); out += 3; - microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : - (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; - } - py_item = PyDelta_FromDSU(0, sign * hour * 60 * 60 + - sign * minute * 60 + - sign * second, - sign * microsecond); - if (!py_item) { - PyErr_Clear(); - py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); - } - if (!py_item) goto error; - break; - - case MYSQL_TYPE_YEAR: - if (out_l == 0) { - goto error; - break; - } - end = &out[out_l]; - year = strtoul(out, &end, 10); - py_item = PyLong_FromLong(year); - if (!py_item) goto error; - break; - - case MYSQL_TYPE_BIT: - case MYSQL_TYPE_JSON: - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_LONG_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_GEOMETRY: - case MYSQL_TYPE_ENUM: - case MYSQL_TYPE_SET: - case MYSQL_TYPE_VARCHAR: - case MYSQL_TYPE_VAR_STRING: - case MYSQL_TYPE_STRING: - if (encodings[i] == NULL) { - py_item = PyBytes_FromStringAndSize(out, out_l); - if (!py_item) goto error; - break; - } - - py_item = PyUnicode_Decode(out, out_l, encodings[i], "strict"); - if (!py_item) goto error; - - // Parse JSON string. - if (type_codes[i] == MYSQL_TYPE_JSON && options->parse_json) { - py_str = py_item; - py_item = PyObject_CallFunctionObjArgs(py_json_loads, py_str, NULL); - Py_DECREF(py_str); py_str = NULL; - if (!py_item) goto error; - } - - break; - - default: - PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", type_codes[i], NULL); - goto error; - } - } - } - - if (py_item == Py_None) { - Py_INCREF(Py_None); - } - - switch (options->output_type) { - case MYSQL_ACCEL_OUT_NAMEDTUPLES: - PyStructSequence_SET_ITEM(py_result, i, py_item); - break; - case MYSQL_ACCEL_OUT_DICTS: - PyDict_SetItem(py_result, names[i], py_item); - Py_INCREF(names[i]); - Py_DECREF(py_item); - break; - default: - PyTuple_SET_ITEM(py_result, i, py_item); - } - } - -exit: - return py_result; - -error: - if (py_result) { Py_DECREF(py_result); py_result = NULL; } - goto exit; -} - -static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *kwargs) { - PyObject *py_next_seq_id = NULL; // Packet sequence number - PyObject *py_conn = NULL; // Database connection - PyObject *py_fields = NULL; // List of table fields - PyObject *py_buff = NULL; // Packet bytes - PyObject *py_row = NULL; // Output row - PyObject *py_res = NULL; // MySQLResult object - PyObject *py_converters = NULL; // res.converters - PyObject *py_decimal_mod = NULL; // decimal module - PyObject *py_decimal = NULL; // decimal.Decimal - PyObject *py_json_mod = NULL; // json module - PyObject *py_json_loads = NULL; // json.loads - PyObject *py_rows = NULL; // Output object - PyObject *py_rfile = NULL; // Socket file I/O - PyObject *py_read = NULL; // File I/O read method - PyObject *py_sock = NULL; // Socket - PyObject *py_read_timeout = NULL; // Socket read timeout value - PyObject *py_settimeout = NULL; // Socket settimeout method - PyObject **converters = NULL; // List of converter functions - PyObject **names = NULL; // Column names - PyObject *py_default_converters = NULL; // Dict of default converters - PyObject *py_options = NULL; // Reader options - PyTypeObject *namedtuple = NULL; // Generated namedtuple type - const char **encodings = NULL; // Encoding for each column - unsigned long long n_cols = 0; - unsigned long long n_rows = 0; - unsigned long *type_codes = NULL; // Type code for each column - unsigned long *flags = NULL; // Column flags - unsigned long *scales = NULL; // Column scales - unsigned long *offsets = NULL; // Column offsets in buffer - unsigned long long next_seq_id = 0; - char *keywords[] = {"result", "options", NULL}; - MySQLAccelOptions options = {0}; - unsigned long long df_buffer_row_size = 0; - unsigned long long df_buffer_n_rows = 0; - char *df_buffer = NULL; - char *orig_df_buffer = NULL; - PyStructSequence_Desc namedtuple_desc = {0}; - int unbuffered = 0; - - // Parse function args. - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", keywords, &py_res, &py_options)) { - goto error; - } - - if (py_options && PyDict_Check(py_options)) { - py_default_converters = PyDict_GetItemString(py_options, "default_converters"); - if (py_default_converters && !PyDict_Check(py_default_converters)) { - py_default_converters = NULL; - } - Py_XINCREF(py_default_converters); - PyObject *py_unbuffered = PyDict_GetItemString(py_options, "unbuffered"); - if (py_unbuffered && PyObject_IsTrue(py_unbuffered)) { - unbuffered = 1; - } - } - - if (unbuffered) { - PyObject *unbuffered_active = PyObject_GetAttrString(py_res, "unbuffered_active"); - if (!PyObject_IsTrue(unbuffered_active)) { - Py_XDECREF(py_default_converters); - Py_XDECREF(unbuffered_active); - Py_INCREF(Py_None); - return Py_None; - } - Py_DECREF(unbuffered_active); unbuffered_active = NULL; - } - - // Import decimal module. - py_decimal_mod = PyImport_ImportModule("decimal"); - if (!py_decimal_mod) goto error; - py_decimal = PyObject_GetAttrString(py_decimal_mod, "Decimal"); - if (!py_decimal) goto error; - - // Import json module. - py_json_mod = PyImport_ImportModule("json"); - if (!py_json_mod) goto error; - py_json_loads = PyObject_GetAttrString(py_json_mod, "loads"); - if (!py_json_loads) goto error; - - // Retrieve type codes for each column. - PyObject *py_field_count = PyObject_GetAttrString(py_res, "field_count"); - if (!py_field_count) goto error; - n_cols = PyLong_AsUnsignedLong(py_field_count); - Py_DECREF(py_field_count); py_field_count = NULL; - - py_converters = PyObject_GetAttrString(py_res, "converters"); - if (!py_converters) goto error; - - type_codes = calloc(n_cols, sizeof(unsigned long)); - if (!type_codes) goto error; - - flags = calloc(n_cols, sizeof(unsigned long)); - if (!flags) goto error; - - scales = calloc(n_cols, sizeof(unsigned long)); - if (!scales) goto error; - - encodings = calloc(n_cols, sizeof(char*)); - if (!encodings) goto error; - - converters = calloc(n_cols, sizeof(PyObject*)); - if (!converters) goto error; - - names = calloc(n_cols, sizeof(PyObject*)); - if (!names) goto error; - - py_fields = PyObject_GetAttrString(py_res, "fields"); - if (!py_fields) goto error; - - for (unsigned long i = 0; i < n_cols; i++) { - // Get type codes. - PyObject *py_field = PyList_GetItem(py_fields, i); - if (!py_field) goto error; - - PyObject *py_flags = PyObject_GetAttrString(py_field, "flags"); - if (!py_flags) goto error; - flags[i] = PyLong_AsUnsignedLong(py_flags); - Py_DECREF(py_flags); py_flags = NULL; - - PyObject *py_scale = PyObject_GetAttrString(py_field, "scale"); - if (!py_scale) goto error; - scales[i] = PyLong_AsUnsignedLong(py_scale); - Py_DECREF(py_scale); py_scale = NULL; - - PyObject *py_field_type = PyObject_GetAttrString(py_field, "type_code"); - if (!py_field_type) goto error; - type_codes[i] = PyLong_AsUnsignedLong(py_field_type); - PyObject *py_default_converter = (py_default_converters) ? - PyDict_GetItem(py_default_converters, py_field_type) : NULL; - Py_DECREF(py_field_type); py_field_type = NULL; - - // Get field name. - PyObject *py_field_name = PyObject_GetAttrString(py_field, "name"); - if (!py_field_name) goto error; - names[i] = py_field_name; - - // Get field encodings (NULL means binary) and default converters. - PyObject *py_tmp = PyList_GetItem(py_converters, i); - if (!py_tmp) goto error; - PyObject *py_encoding = PyTuple_GetItem(py_tmp, 0); - if (!py_encoding) goto error; - PyObject *py_converter = PyTuple_GetItem(py_tmp, 1); - if (!py_converter) goto error; - - encodings[i] = (py_encoding == Py_None) ? - NULL : PyUnicode_AsUTF8AndSize(py_encoding, NULL); - converters[i] = (py_converter == Py_None || py_converter == py_default_converter) ? - NULL : py_converter; - } - - // Loop over all data packets. - py_conn = PyObject_GetAttrString(py_res, "connection"); - if (!py_conn) goto error; - - // Cache socket timeout and read methods. - py_sock = PyObject_GetAttrString(py_conn, "_sock"); - if (!py_sock) goto error; - py_settimeout = PyObject_GetAttrString(py_sock, "settimeout"); - if (!py_settimeout) goto error; - py_read_timeout = PyObject_GetAttrString(py_conn, "_read_timeout"); - if (!py_read_timeout) goto error; - - py_rfile = PyObject_GetAttrString(py_conn, "_rfile"); - if (!py_rfile) goto error; - py_read = PyObject_GetAttrString(py_rfile, "read"); - if (!py_read) goto error; - - py_next_seq_id = PyObject_GetAttrString(py_conn, "_next_seq_id"); - if (!py_next_seq_id) goto error; - next_seq_id = PyLong_AsUnsignedLongLong(py_next_seq_id); - Py_DECREF(py_next_seq_id); py_next_seq_id = NULL; - - options.parse_json = 1; - if (py_options && py_options != Py_None) { - read_options(&options, py_options); - } - - switch (options.output_type) { - case MYSQL_ACCEL_OUT_DATAFRAME: - case MYSQL_ACCEL_OUT_NUMPY: - // Setup dataframe buffer. - df_buffer_n_rows = (unbuffered) ? 1 : 500; - df_buffer_row_size = compute_row_size(type_codes, flags, scales, n_cols); - orig_df_buffer = malloc(df_buffer_row_size * df_buffer_n_rows); - if (!orig_df_buffer) goto error; - df_buffer = orig_df_buffer; - break; - case MYSQL_ACCEL_OUT_NAMEDTUPLES: - namedtuple_desc.name = "Row"; - namedtuple_desc.doc = "Row of data values"; - namedtuple_desc.n_in_sequence = n_cols; - namedtuple_desc.fields = calloc(n_cols + 1, sizeof(PyStructSequence_Field)); - if (!namedtuple_desc.fields) goto error; - for (unsigned long long i = 0; i < n_cols; i++) { - namedtuple_desc.fields[i].name = PyUnicode_AsUTF8AndSize(names[i], NULL); - namedtuple_desc.fields[i].doc = NULL; - } - namedtuple = PyStructSequence_NewType(&namedtuple_desc); - if (!namedtuple) goto error; - // Fall through - default: - py_rows = PyList_New(0); - if (!py_rows) goto error; - } - - py_buff = NULL; - while (1) { - py_buff = read_packet(py_conn, py_read, py_settimeout, py_read_timeout, &next_seq_id); - if (!py_buff) goto error; - - char *data = PyByteArray_AsString(py_buff); - unsigned long long data_l = PyByteArray_GET_SIZE(py_buff); - unsigned long long warning_count = 0; - int has_next = 0; - - if (check_packet_is_eof(&data, &data_l, &warning_count, &has_next)) { - PyObject_SetAttrString(py_res, "warning_count", PyLong_FromUnsignedLongLong(warning_count)); - PyObject_SetAttrString(py_res, "has_next", PyLong_FromLong(has_next)); - PyObject_SetAttrString(py_res, "connection", Py_None); - if (unbuffered) { - PyObject_SetAttrString(py_res, "unbuffered_active", Py_False); - PyObject_SetAttrString(py_res, "rows", Py_None); - Py_DECREF(py_rows); Py_INCREF(Py_None); py_rows = Py_None; - goto exit; - } - break; - } - - n_rows += 1; - - switch (options.output_type) { - case MYSQL_ACCEL_OUT_DATAFRAME: - case MYSQL_ACCEL_OUT_NUMPY: - if (n_rows >= df_buffer_n_rows) { - df_buffer_n_rows *= 1.7; - orig_df_buffer = realloc(orig_df_buffer, - df_buffer_row_size * df_buffer_n_rows); - df_buffer = orig_df_buffer + df_buffer_row_size * n_rows; - } - read_dataframe_row_from_packet(n_cols, type_codes, flags, scales, names, encodings, - converters, py_decimal, py_json_loads, - data, data_l, &options, df_buffer); - df_buffer += df_buffer_row_size; - break; - default: - py_row = read_obj_row_from_packet(n_cols, type_codes, flags, scales, names, encodings, - converters, py_decimal, py_json_loads, - namedtuple, data, data_l, &options); - if (!py_row) goto error; - PyList_Append(py_rows, py_row); - Py_DECREF(py_row); py_row = NULL; - } - - Py_DECREF(py_buff); py_buff = NULL; - - if (unbuffered) break; - } - - switch (options.output_type) { - case MYSQL_ACCEL_OUT_DATAFRAME: - case MYSQL_ACCEL_OUT_NUMPY: - orig_df_buffer = realloc(orig_df_buffer, df_buffer_row_size * n_rows); - py_rows = build_array(n_rows, n_cols, names, type_codes, flags, scales, - orig_df_buffer, df_buffer_row_size, &options); - } - - py_next_seq_id = PyLong_FromUnsignedLongLong(next_seq_id); - if (!py_next_seq_id) goto error; - PyObject_SetAttrString(py_conn, "_next_seq_id", py_next_seq_id); - Py_DECREF(py_next_seq_id); py_next_seq_id = NULL; - -exit: - if (converters) free(converters); - if (type_codes) free(type_codes); - if (flags) free(flags); - if (scales) free(scales); - if (encodings) free(encodings); - if (names) { - for (unsigned long i = 0; i < 0; i++) { - if (names[i]) { - Py_DECREF(names[i]); - } - } - free(names); - } - if (offsets) free(offsets); - - if (py_buff) { Py_DECREF(py_buff); } - if (namedtuple_desc.fields) { free(namedtuple_desc.fields); }; - if (namedtuple) { Py_DECREF(namedtuple); } - if (py_read) { Py_DECREF(py_read); } - if (py_rfile) { Py_DECREF(py_rfile); } - if (py_read_timeout) { Py_DECREF(py_read_timeout); } - if (py_settimeout) { Py_DECREF(py_settimeout); } - if (py_sock) { Py_DECREF(py_sock); } - if (py_conn) { Py_DECREF(py_conn); } - if (py_converters) { Py_DECREF(py_converters); } - if (py_default_converters) { Py_DECREF(py_default_converters); } - if (py_fields) { Py_DECREF(py_fields); } - if (py_decimal) { Py_DECREF(py_decimal); } - if (py_decimal_mod) { Py_DECREF(py_decimal_mod); } - if (py_json_loads) { Py_DECREF(py_json_loads); } - if (py_json_mod) { Py_DECREF(py_json_mod); } - - if (py_rows && py_rows != Py_None) { - PyObject *out = NULL; - PyObject *py_tuple_rows = NULL; - PyObject *py_n_rows = PyLong_FromSsize_t(n_rows); - PyObject_SetAttrString(py_res, "affected_rows", py_n_rows); - Py_DECREF(py_n_rows); - switch (options.output_type) { - case MYSQL_ACCEL_OUT_DATAFRAME: - case MYSQL_ACCEL_OUT_NUMPY: - PyObject_SetAttrString(py_res, "rows", py_rows); - if (unbuffered) { - // TODO: reshape? - return py_rows; - } - break; - default: - py_tuple_rows = PyList_AsTuple(py_rows); - PyObject_SetAttrString(py_res, "rows", py_tuple_rows); - Py_DECREF(py_rows); py_rows = NULL; - if (unbuffered) { - out = PyTuple_GetItem(py_tuple_rows, 0); - Py_DECREF(py_tuple_rows); - Py_XINCREF(out); - return out; - } - Py_DECREF(py_tuple_rows); - } - return Py_None; - } - else if (py_rows && py_rows == Py_None) { - return Py_None; - } - - return NULL; - -error: - if (orig_df_buffer) { free(orig_df_buffer); } - if (py_rows) { Py_DECREF(py_rows); py_rows = NULL; } - goto exit; -} - -static PyMethodDef PyMySQLAccelMethods[] = { - {"read_rowdata_packet", (PyCFunction)read_rowdata_packet, METH_VARARGS | METH_KEYWORDS, "MySQL row data packet reader"}, - {NULL, NULL, 0, NULL} -}; - -static struct PyModuleDef _pymysqlsvmodule = { - PyModuleDef_HEAD_INIT, - "_pymysqlsv", - "PyMySQL row data packet reader accelerator", - -1, - PyMySQLAccelMethods -}; - -PyMODINIT_FUNC PyInit__pymysqlsv(void) { - PyDateTime_IMPORT; - if (PyType_Ready(&ArrayType) < 0) { - return NULL; - } - return PyModule_Create(&_pymysqlsvmodule); -} From 37b003dc4a55f13a374d14f0c05bc8239a43bebf Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Fri, 26 Aug 2022 09:03:18 -0500 Subject: [PATCH 03/12] Continuing unbuffered refactoring --- src/accel.c | 539 +++++++++++++++++++++++++++------------------------- 1 file changed, 280 insertions(+), 259 deletions(-) diff --git a/src/accel.c b/src/accel.c index 208bb442d..954b9d247 100644 --- a/src/accel.c +++ b/src/accel.c @@ -264,9 +264,6 @@ typedef struct { inline int IMAX(int a, int b) { return((a) > (b) ? a : b); } inline int IMIN(int a, int b) { return((a) < (b) ? a : b); } -static void read_options(MySQLAccelOptions *options, PyObject *dict); -static unsigned long long compute_row_size(unsigned long *type_codes, unsigned long *flags, unsigned long *scales, unsigned long n_cols); - // // Array // @@ -277,7 +274,7 @@ typedef struct { } ArrayObject; static void Array_dealloc(ArrayObject *self) { - if (self->array_interface) { + if (self->array_interface && PyDict_Check(self->array_interface)) { PyObject *data = PyDict_GetItemString(self->array_interface, "data"); if (data) { PyObject *buffer = PyTuple_GetItem(data, 0); @@ -286,13 +283,13 @@ static void Array_dealloc(ArrayObject *self) { } } } - Py_XDECREF(self->array_interface); + Py_CLEAR(self->array_interface); Py_TYPE(self)->tp_free((PyObject*)self); } static PyObject *Array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { ArrayObject *self = (ArrayObject*)type->tp_alloc(type, 0); - if (self != NULL) { + if (self) { self->array_interface = Py_None; Py_INCREF(Py_None); } @@ -366,6 +363,13 @@ typedef struct { PyObject **py_converters; // List of converter functions PyObject **py_names; // Column names PyObject *py_default_converters; // Dict of default converters + PyObject *py_numpy_mod; // Numpy module + PyObject *py_numpy_array; // numpy.array + PyObject *py_pandas_mod; // pandas module + PyObject *py_pandas_dataframe; // pandas.DataFrame + PyObject *py_array_def; // numpy array definition + PyObject *py_array_args; // Positional args for numpy.array + PyObject *py_array_kwds; // Keyword args for numpy.array PyTypeObject *namedtuple; // Generated namedtuple type PyObject **py_encodings; // Encoding for each column as Python string const char **encodings; // Encoding for each column @@ -385,51 +389,61 @@ typedef struct { int unbuffered; } StateObject; +static void read_options(MySQLAccelOptions *options, PyObject *dict); +static unsigned long long compute_row_size(StateObject *py_state); +static void build_array_interface(StateObject *py_state); + #define DESTROY(x) do { if (x) { free(x); (x) = NULL; } } while (0) static void State_clear_fields(StateObject *self) { if (!self) return; - DESTROY(self->type_codes); - DESTROY(self->flags); + self->df_cursor = NULL; + self->df_buffer = NULL; + DESTROY(self->namedtuple_desc.fields); + DESTROY(self->offsets); DESTROY(self->scales); + DESTROY(self->flags); + DESTROY(self->type_codes); DESTROY(self->encodings); - DESTROY(self->offsets); - DESTROY(self->namedtuple_desc.fields); if (self->py_converters) { for (unsigned long i = 0; i < self->n_cols; i++) { - Py_XDECREF(self->py_converters[i]); - self->py_converters[i] = NULL; + Py_CLEAR(self->py_converters[i]); } DESTROY(self->py_converters); } if (self->py_names) { for (unsigned long i = 0; i < self->n_cols; i++) { - Py_XDECREF(self->py_names[i]); - self->py_names[i] = NULL; + Py_CLEAR(self->py_names[i]); } DESTROY(self->py_names); } if (self->py_encodings) { for (unsigned long i = 0; i < self->n_cols; i++) { - Py_XDECREF(self->py_encodings[i]); - self->py_encodings[i] = NULL; + Py_CLEAR(self->py_encodings[i]); } DESTROY(self->py_encodings); } + Py_CLEAR(self->py_array_def); + Py_CLEAR(self->py_numpy_mod); + Py_CLEAR(self->py_numpy_array); + Py_CLEAR(self->py_pandas_mod); + Py_CLEAR(self->py_pandas_dataframe); + Py_CLEAR(self->py_array_args); + Py_CLEAR(self->py_array_kwds); Py_CLEAR(self->namedtuple); - Py_CLEAR(self->py_rows); - Py_CLEAR(self->py_read); - Py_CLEAR(self->py_rfile); - Py_CLEAR(self->py_read_timeout); + Py_CLEAR(self->py_default_converters); Py_CLEAR(self->py_settimeout); + Py_CLEAR(self->py_read_timeout); Py_CLEAR(self->py_sock); - Py_CLEAR(self->py_conn); - Py_CLEAR(self->py_default_converters); - Py_CLEAR(self->py_fields); - Py_CLEAR(self->py_decimal); - Py_CLEAR(self->py_decimal_mod); + Py_CLEAR(self->py_read); + Py_CLEAR(self->py_rfile); + Py_CLEAR(self->py_rows); Py_CLEAR(self->py_json_loads); Py_CLEAR(self->py_json_mod); + Py_CLEAR(self->py_decimal); + Py_CLEAR(self->py_decimal_mod); + Py_CLEAR(self->py_fields); + Py_CLEAR(self->py_conn); } static void State_dealloc(StateObject *self) { @@ -472,7 +486,7 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { if (self->unbuffered) { PyObject *unbuffered_active = PyObject_GetAttrString(py_res, "unbuffered_active"); - if (!PyObject_IsTrue(unbuffered_active)) { + if (!unbuffered_active || !PyObject_IsTrue(unbuffered_active)) { Py_XDECREF(unbuffered_active); goto error; } @@ -562,11 +576,13 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { self->py_encodings[i] = (py_encoding == Py_None) ? NULL : py_encoding; Py_XINCREF(self->py_encodings[i]); - self->encodings[i] = (py_encoding == Py_None) ? + self->encodings[i] = (!py_encoding || py_encoding == Py_None) ? NULL : PyUnicode_AsUTF8AndSize(py_encoding, NULL); - self->py_converters[i] = (py_converter == Py_None || py_converter == py_default_converter) ? - NULL : py_converter; + self->py_converters[i] = (!py_converter + || py_converter == Py_None + || py_converter == py_default_converter) ? + NULL : py_converter; Py_XINCREF(self->py_converters[i]); } @@ -592,16 +608,34 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { self->next_seq_id = PyLong_AsUnsignedLongLong(py_next_seq_id); Py_XDECREF(py_next_seq_id); - if (PyDict_Check(py_options)) { + if (py_options && PyDict_Check(py_options)) { read_options(&self->options, py_options); } switch (self->options.output_type) { case MYSQL_ACCEL_OUT_PANDAS: + // Import pandas module. + self->py_pandas_mod = PyImport_ImportModule("pandas"); + if (!self->py_pandas_mod) goto error; + self->py_pandas_dataframe = PyObject_GetAttrString(self->py_pandas_mod, "DataFrame"); + if (!self->py_pandas_dataframe) goto error; + + // Fall through + case MYSQL_ACCEL_OUT_NUMPY: + // Import numpy module. + self->py_numpy_mod = PyImport_ImportModule("numpy"); + if (!self->py_numpy_mod) goto error; + self->py_numpy_array = PyObject_GetAttrString(self->py_numpy_mod, "array"); + if (!self->py_numpy_array) goto error; + + // Build array interface arguments. + build_array_interface(self); + if (!self->py_array_def || !self->py_array_args || !self->py_array_kwds) goto error; + // Setup dataframe buffer. self->df_buffer_n_rows = (self->unbuffered) ? 1 : 500; - self->df_buffer_row_size = compute_row_size(self->type_codes, self->flags, self->scales, self->n_cols); + self->df_buffer_row_size = compute_row_size(self); self->df_buffer = malloc(self->df_buffer_row_size * self->df_buffer_n_rows); if (!self->df_buffer) goto error; self->df_cursor = self->df_buffer; @@ -613,7 +647,7 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { self->namedtuple_desc.n_in_sequence = self->n_cols; self->namedtuple_desc.fields = calloc(self->n_cols + 1, sizeof(PyStructSequence_Field)); if (!self->namedtuple_desc.fields) goto error; - for (unsigned long long i = 0; i < self->n_cols; i++) { + for (unsigned long i = 0; i < self->n_cols; i++) { self->namedtuple_desc.fields[i].name = PyUnicode_AsUTF8AndSize(self->py_names[i], NULL); self->namedtuple_desc.fields[i].doc = NULL; } @@ -775,39 +809,34 @@ static int is_error_packet(char *buff_bytes) { return buff_bytes && *(uint8_t*)buff_bytes == 0xFF; } -static void force_close(PyObject *self) { +static void force_close(PyObject *py_conn) { PyObject *py_sock = NULL; - py_sock = PyObject_GetAttrString(self, "_sock"); + py_sock = PyObject_GetAttrString(py_conn, "_sock"); if (!py_sock) goto error; - PyObject_CallMethod(py_sock, "close", NULL); + Py_XDECREF(PyObject_CallMethod(py_sock, "close", NULL)); PyErr_Clear(); - PyObject_SetAttrString(self, "_sock", Py_None); - PyObject_SetAttrString(self, "_rfile", Py_None); + PyObject_SetAttrString(py_conn, "_sock", Py_None); + PyObject_SetAttrString(py_conn, "_rfile", Py_None); exit: - if (py_sock) { Py_DECREF(py_sock); } + Py_XDECREF(py_sock); return; error: goto exit; } -static PyObject *read_bytes( - PyObject *self, - unsigned long long num_bytes, - PyObject *py_read, - PyObject *py_settimeout, - PyObject *py_read_timeout -) { +static PyObject *read_bytes(StateObject *py_state, unsigned long long num_bytes) { PyObject *py_num_bytes = NULL; PyObject *py_data = NULL; PyObject *py_exc = NULL; - if (py_read_timeout != Py_None) { - Py_XDECREF(PyObject_CallFunctionObjArgs(py_settimeout, py_read_timeout, NULL)); + if (py_state->py_read_timeout && py_state->py_read_timeout != Py_None) { + Py_XDECREF(PyObject_CallFunctionObjArgs(py_state->py_settimeout, + py_state->py_read_timeout, NULL)); if (PyErr_Occurred()) goto error; } @@ -815,25 +844,28 @@ static PyObject *read_bytes( if (!py_num_bytes) goto error; while (1) { - py_data = PyObject_CallFunctionObjArgs(py_read, py_num_bytes, NULL); + py_data = PyObject_CallFunctionObjArgs(py_state->py_read, py_num_bytes, NULL); if ((py_exc = PyErr_Occurred())) { if (PyErr_ExceptionMatches(PyExc_IOError) || PyErr_ExceptionMatches(PyExc_OSError)) { PyObject *py_errno = PyObject_GetAttrString(py_exc, "errno"); if (!py_errno) goto error; + unsigned long long err = PyLong_AsUnsignedLongLong(py_errno); Py_DECREF(py_errno); + if (err == 4 /* errno.EINTER */) { continue; } - force_close(self); - raise_exception(self, "OperationalError", 0, + + force_close(py_state->py_conn); + raise_exception(py_state->py_conn, "OperationalError", 0, "Lost connection to MySQL server during query"); goto error; } else if (PyErr_ExceptionMatches(PyExc_BaseException)) { // Don't convert unknown exception to MySQLError. - force_close(self); + force_close(py_state->py_conn); goto error; } } @@ -844,29 +876,23 @@ static PyObject *read_bytes( } if (PyBytes_GET_SIZE(py_data) < (long int)num_bytes) { - force_close(self); - raise_exception(self, "OperationalError", 0, + force_close(py_state->py_conn); + raise_exception(py_state->py_conn, "OperationalError", 0, "Lost connection to MySQL server during query"); goto error; } exit: - if (py_num_bytes) { Py_DECREF(py_num_bytes); } + Py_XDECREF(py_num_bytes); return py_data; error: - if (py_data) { Py_DECREF(py_data); py_data = NULL; } + Py_CLEAR(py_data); goto exit; } -static PyObject *read_packet( - PyObject *self, - PyObject *py_read, - PyObject *py_settimeout, - PyObject *py_read_timeout, - unsigned long long *next_seq_id -) { - PyObject *py_buff = PyByteArray_FromStringAndSize(NULL, 0); +static PyObject *read_packet(StateObject *py_state) { + PyObject *py_buff = NULL; PyObject *py_new_buff = NULL; PyObject *py_packet_header = NULL; PyObject *py_bytes_to_read = NULL; @@ -877,8 +903,11 @@ static PyObject *read_packet( uint8_t btrh = 0; uint8_t packet_number = 0; + py_buff = PyByteArray_FromStringAndSize(NULL, 0); + if (!py_buff) goto error; + while (1) { - py_packet_header = read_bytes(self, 4, py_read, py_settimeout, py_read_timeout); + py_packet_header = read_bytes(py_state, 4); if (!py_packet_header) goto error; buff = PyBytes_AsString(py_packet_header); @@ -888,31 +917,33 @@ static PyObject *read_packet( packet_number = *(uint8_t*)(buff+3); bytes_to_read = btrl + (btrh << 16); - Py_DECREF(py_packet_header); py_packet_header = NULL; + Py_CLEAR(py_packet_header); - if (packet_number != *next_seq_id) { - force_close(self); + if (packet_number != py_state->next_seq_id) { + force_close(py_state->py_conn); if (packet_number == 0) { - raise_exception(self, "OperationalError", 0, + raise_exception(py_state->py_conn, "OperationalError", 0, "Lost connection to MySQL server during query"); goto error; } - raise_exception(self, "InternalError", 0, + raise_exception(py_state->py_conn, "InternalError", 0, "Packet sequence number wrong"); goto error; } - *next_seq_id = (*next_seq_id + 1) % 256; + py_state->next_seq_id = (py_state->next_seq_id + 1) % 256; - py_recv_data = read_bytes(self, bytes_to_read, py_read, py_settimeout, py_read_timeout); + py_recv_data = read_bytes(py_state, bytes_to_read); if (!py_recv_data) goto error; py_new_buff = PyByteArray_Concat(py_buff, py_recv_data); + Py_CLEAR(py_recv_data); + Py_CLEAR(py_buff); if (!py_new_buff) goto error; - Py_DECREF(py_buff); py_buff = py_new_buff; py_new_buff = NULL; - Py_DECREF(py_recv_data); py_recv_data = NULL; + py_buff = py_new_buff; + py_new_buff = NULL; if (bytes_to_read == 0xFFFFFF) { continue; @@ -924,28 +955,28 @@ static PyObject *read_packet( } if (is_error_packet(PyByteArray_AsString(py_buff))) { - PyObject *py_result = PyObject_GetAttrString(self, "_result"); + PyObject *py_result = PyObject_GetAttrString(py_state->py_conn, "_result"); if (py_result && py_result != Py_None) { PyObject *py_unbuffered_active = PyObject_GetAttrString(py_result, "unbuffered_active"); if (py_unbuffered_active == Py_True) { PyObject_SetAttrString(py_result, "unbuffered_active", Py_False); - Py_DECREF(py_result); } Py_XDECREF(py_unbuffered_active); } Py_XDECREF(py_result); - PyObject_CallMethod(self, "_raise_mysql_exception", "O", py_buff, NULL); + Py_XDECREF(PyObject_CallMethod(py_state->py_conn, "_raise_mysql_exception", + "O", py_buff, NULL)); } exit: - if (py_new_buff) { Py_DECREF(py_new_buff); } - if (py_bytes_to_read) { Py_DECREF(py_bytes_to_read); } - if (py_recv_data) { Py_DECREF(py_recv_data); } - if (py_packet_header) { Py_DECREF(py_packet_header); } + Py_XDECREF(py_new_buff); + Py_XDECREF(py_bytes_to_read); + Py_XDECREF(py_recv_data); + Py_XDECREF(py_packet_header); return py_buff; error: - if (py_buff) { Py_DECREF(py_buff); py_buff = NULL; } + Py_CLEAR(py_buff); goto exit; } @@ -1068,72 +1099,52 @@ static void read_length_coded_string( return; } -static PyObject *build_array( - unsigned long long n_rows, - unsigned long n_cols, - PyObject **names, - unsigned long *type_codes, - unsigned long *flags, - unsigned long *scales, - char *buffer, - unsigned long long row_size, - MySQLAccelOptions *options -) { +static void build_array_interface(StateObject *py_state) { PyObject *py_out = NULL; PyObject *py_shape = NULL; PyObject *py_typestr = NULL; PyObject *py_descr = NULL; PyObject *py_descr_item = NULL; PyObject *py_type = NULL; - PyObject *py_data = NULL; - PyObject *py_args = NULL; - PyObject *py_kwds = NULL; - PyObject *py_numpy_mod = NULL; - PyObject *py_numpy_array = NULL; PyObject *py_array = NULL; - py_numpy_mod = PyImport_ImportModule("numpy"); - if (!py_numpy_mod) goto error; - py_numpy_array = PyObject_GetAttrString(py_numpy_mod, "array"); - if (!py_numpy_array) goto error; - py_out = PyDict_New(); if (!py_out) goto error; py_shape = PyTuple_New(1); if (!py_shape) goto error; - PyTuple_SetItem(py_shape, 0, PyLong_FromUnsignedLongLong(n_rows)); + // Populated in build_array. + PyTuple_SetItem(py_shape, 0, PyLong_FromUnsignedLongLong(1)); PyDict_SetItemString(py_out, "shape", py_shape); Py_DECREF(py_shape); - py_typestr = PyUnicode_FromFormat("|V%llu", row_size); + py_typestr = PyUnicode_FromFormat("|V%llu", py_state->df_buffer_row_size); if (!py_typestr) goto error; PyDict_SetItemString(py_out, "typestr", py_typestr); Py_DECREF(py_typestr); - py_descr = PyList_New(n_cols); + py_descr = PyList_New(py_state->n_cols); if (!py_descr) goto error; PyDict_SetItemString(py_out, "descr", py_descr); Py_DECREF(py_descr); - for (unsigned long i = 0; i < n_cols; i++) { + for (unsigned long i = 0; i < py_state->n_cols; i++) { py_descr_item = PyTuple_New(2); if (!py_descr_item) goto error; PyList_SetItem(py_descr, i, py_descr_item); - PyTuple_SetItem(py_descr_item, 0, names[i]); - // Caller already uses the borrowed reference. - Py_INCREF(names[i]); + PyTuple_SetItem(py_descr_item, 0, py_state->py_names[i]); + Py_INCREF(py_state->py_names[i]); - switch (type_codes[i]) { + switch (py_state->type_codes[i]) { case MYSQL_TYPE_NEWDECIMAL: case MYSQL_TYPE_DECIMAL: py_type = PyUnicode_FromString("|O"); break; case MYSQL_TYPE_TINY: - if (flags[i] & MYSQL_FLAG_UNSIGNED) { + if (py_state->flags[i] & MYSQL_FLAG_UNSIGNED) { py_type = PyUnicode_FromString("flags[i] & MYSQL_FLAG_UNSIGNED) { py_type = PyUnicode_FromString("flags[i] & MYSQL_FLAG_UNSIGNED) { py_type = PyUnicode_FromString("flags[i] & MYSQL_FLAG_UNSIGNED) { py_type = PyUnicode_FromString("type_codes[i], NULL); goto error; } @@ -1222,39 +1234,68 @@ static PyObject *build_array( py_descr_item = NULL; } + py_state->py_array_def = py_out; + + py_state->py_array_args = PyTuple_New(1); + if (!py_state->py_array_args) goto error; + PyTuple_SetItem(py_state->py_array_args, 0, py_array); + py_state->py_array_kwds = PyDict_New(); + PyDict_SetItemString(py_state->py_array_kwds, "copy", Py_False); + +exit: + return; + +error: + Py_CLEAR(py_state->py_array_def); + Py_CLEAR(py_state->py_array_args); + Py_CLEAR(py_state->py_array_kwds); + goto exit; +} + +PyObject *build_array(StateObject *py_state) { + PyObject *py_out = NULL; + PyObject *py_data = NULL; + PyObject *py_array = NULL; + PyObject *py_array_def = NULL; + PyObject *py_args = NULL; + py_data = PyTuple_New(2); if (!py_data) goto error; - PyTuple_SetItem(py_data, 0, PyLong_FromUnsignedLongLong((unsigned long long)buffer)); + PyTuple_SetItem(py_data, 0, PyLong_FromUnsignedLongLong((unsigned long long)py_state->df_buffer)); PyTuple_SetItem(py_data, 1, Py_False); - Py_INCREF(Py_True); - PyDict_SetItemString(py_out, "data", py_data); - Py_DECREF(py_data); py_data = NULL; + Py_INCREF(Py_False); + + py_array_def = PyDict_Copy(py_state->py_array_def); + if (!py_array_def) goto error; + + if (!py_state->unbuffered) { + PyObject *py_shape = PyTuple_New(1); + if (!py_shape) goto error; + PyTuple_SetItem(py_shape, 0, PyLong_FromUnsignedLongLong(py_state->n_rows)); + PyDict_SetItemString(py_array_def, "shape", py_shape); + } + + PyDict_SetItemString(py_array_def, "data", py_data); + Py_CLEAR(py_data); py_args = PyTuple_New(1); if (!py_args) goto error; - PyTuple_SetItem(py_args, 0, py_out); + PyTuple_SetItem(py_args, 0, py_array_def); + py_array_def = NULL; py_array = Array_new(&ArrayType, py_args, NULL); if (!py_array) goto error; Array_init((ArrayObject*)py_array, py_args, NULL); - Py_DECREF(py_args); py_args = NULL; - py_args = PyTuple_New(1); - if (!py_args) goto error; - PyTuple_SetItem(py_args, 0, py_array); - py_kwds = PyDict_New(); - PyDict_SetItemString(py_kwds, "copy", Py_False); - py_out = PyObject_Call(py_numpy_array, py_args, py_kwds); + PyTuple_SetItem(py_state->py_array_args, 0, py_array); + + py_out = PyObject_Call(py_state->py_numpy_array, py_state->py_array_args, + py_state->py_array_kwds); if (!py_out) goto error; - Py_DECREF(py_args); py_args = NULL; - Py_DECREF(py_kwds); py_kwds = NULL; - if (options->output_type == MYSQL_ACCEL_OUT_PANDAS) { + if (py_state->options.output_type == MYSQL_ACCEL_OUT_PANDAS) { PyObject *out2 = NULL; - PyObject *py_pandas_mod = PyImport_ImportModule("pandas"); - if (!py_pandas_mod) goto error; - out2 = PyObject_CallMethod(py_pandas_mod, "DataFrame", "O", py_out); - Py_DECREF(py_pandas_mod); + out2 = PyObject_CallFunctionObjArgs(py_state->py_pandas_mod, py_out, NULL); if (!out2) goto error; Py_DECREF(py_out); py_out = out2; @@ -1262,9 +1303,6 @@ static PyObject *build_array( exit: Py_XDECREF(py_args); - Py_XDECREF(py_kwds); - Py_XDECREF(py_numpy_array); - Py_XDECREF(py_numpy_mod); return py_out; error: @@ -1272,16 +1310,11 @@ static PyObject *build_array( goto exit; } -static unsigned long long compute_row_size( - unsigned long *type_codes, - unsigned long *flags, - unsigned long *scales, - unsigned long n_cols -) { +static unsigned long long compute_row_size(StateObject *py_state) { unsigned long long row_size = 0; - for (unsigned long i = 0; i < n_cols; i++) { - switch (type_codes[i]) { + for (unsigned long i = 0; i < py_state->n_cols; i++) { + switch (py_state->type_codes[i]) { case MYSQL_TYPE_NEWDECIMAL: case MYSQL_TYPE_DECIMAL: row_size += sizeof(PyObject*); @@ -1350,7 +1383,8 @@ static unsigned long long compute_row_size( break; default: - PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", type_codes[i], NULL); + PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", + py_state->type_codes[i], NULL); return 0; } } @@ -1359,19 +1393,9 @@ static unsigned long long compute_row_size( } static void read_dataframe_row_from_packet( - unsigned long n_cols, - unsigned long *type_codes, - unsigned long *flags, - unsigned long *scales, - PyObject **names, - const char **encodings, - PyObject **converters, - PyObject *py_decimal, - PyObject *py_json_loads, + StateObject *py_state, char *data, - unsigned long long data_l, - MySQLAccelOptions *options, - char *buffer + unsigned long long data_l ) { char *out = NULL; unsigned long long out_l = 0; @@ -1379,7 +1403,7 @@ static void read_dataframe_row_from_packet( PyObject *item = NULL; PyObject *str = NULL; char *end = NULL; - char *loc = buffer; + char *loc = py_state->df_cursor; int sign = 1; int year = 0; @@ -1393,14 +1417,14 @@ static void read_dataframe_row_from_packet( float float_nan = nanf(""); double double_nan = nan(""); - if (!buffer) goto error; + if (!py_state->df_cursor) goto error; - for (unsigned long i = 0; i < n_cols; i++) { + for (unsigned long i = 0; i < py_state->n_cols; i++) { read_length_coded_string(&data, &data_l, &out, &out_l, &is_null); end = &out[out_l]; - switch (type_codes[i]) { + switch (py_state->type_codes[i]) { case MYSQL_TYPE_NEWDECIMAL: case MYSQL_TYPE_DECIMAL: if (is_null) { @@ -1408,10 +1432,10 @@ static void read_dataframe_row_from_packet( Py_INCREF(Py_None); } else { str = NULL; - str = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + str = PyUnicode_Decode(out, out_l, py_state->encodings[i], "strict"); if (!str) goto error; - item = PyObject_CallFunctionObjArgs(py_decimal, str, NULL); + item = PyObject_CallFunctionObjArgs(py_state->py_decimal, str, NULL); Py_DECREF(str); str = NULL; if (!item) goto error; @@ -1421,7 +1445,7 @@ static void read_dataframe_row_from_packet( break; case MYSQL_TYPE_TINY: - if (flags[i] & MYSQL_FLAG_UNSIGNED) { + if (py_state->flags[i] & MYSQL_FLAG_UNSIGNED) { *(uint8_t*)loc = (is_null) ? 0 : (uint8_t)strtoul(out, &end, 10); } else { *(int8_t*)loc = (is_null) ? INT8_MIN : (int8_t)strtol(out, &end, 10); @@ -1430,7 +1454,7 @@ static void read_dataframe_row_from_packet( break; case MYSQL_TYPE_SHORT: - if (flags[i] & MYSQL_FLAG_UNSIGNED) { + if (py_state->flags[i] & MYSQL_FLAG_UNSIGNED) { *(uint16_t*)loc = (is_null) ? 0 : (uint16_t)strtoul(out, &end, 10); } else { *(int16_t*)loc = (is_null) ? INT16_MIN : (int16_t)strtol(out, &end, 10); @@ -1440,7 +1464,7 @@ static void read_dataframe_row_from_packet( case MYSQL_TYPE_INT24: case MYSQL_TYPE_LONG: - if (flags[i] & MYSQL_FLAG_UNSIGNED) { + if (py_state->flags[i] & MYSQL_FLAG_UNSIGNED) { *(uint32_t*)loc = (is_null) ? 0 : (uint32_t)strtoul(out, &end, 10); } else { *(int32_t*)loc = (is_null) ? INT32_MIN : (int32_t)strtol(out, &end, 10); @@ -1449,7 +1473,7 @@ static void read_dataframe_row_from_packet( break; case MYSQL_TYPE_LONGLONG: - if (flags[i] & MYSQL_FLAG_UNSIGNED) { + if (py_state->flags[i] & MYSQL_FLAG_UNSIGNED) { *(uint64_t*)loc = (is_null) ? 0 : (uint64_t)strtoul(out, &end, 10); } else { *(int64_t*)loc = (is_null) ? INT64_MIN : (int64_t)strtol(out, &end, 10); @@ -1575,19 +1599,19 @@ static void read_dataframe_row_from_packet( case MYSQL_TYPE_VARCHAR: case MYSQL_TYPE_VAR_STRING: case MYSQL_TYPE_STRING: - if (encodings[i] == NULL) { + if (py_state->encodings[i] == NULL) { item = PyBytes_FromStringAndSize(out, out_l); if (!item) goto error; break; } - item = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + item = PyUnicode_Decode(out, out_l, py_state->encodings[i], "strict"); if (!item) goto error; // Parse JSON string. - if (type_codes[i] == MYSQL_TYPE_JSON && options->parse_json) { + if (py_state->type_codes[i] == MYSQL_TYPE_JSON && py_state->options.parse_json) { str = item; - item = PyObject_CallFunctionObjArgs(py_json_loads, str, NULL); + item = PyObject_CallFunctionObjArgs(py_state->py_json_loads, str, NULL); Py_DECREF(str); str = NULL; if (!item) goto error; } @@ -1598,7 +1622,8 @@ static void read_dataframe_row_from_packet( break; default: - PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", type_codes[i], NULL); + PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", + py_state->type_codes[i], NULL); goto error; } } @@ -1611,19 +1636,9 @@ static void read_dataframe_row_from_packet( } static PyObject *read_obj_row_from_packet( - unsigned long n_cols, - unsigned long *type_codes, - unsigned long *flags, - unsigned long *scales, - PyObject **names, - const char **encodings, - PyObject **converters, - PyObject *py_decimal, - PyObject *py_json_loads, - PyTypeObject *namedtuple, + StateObject *py_state, char *data, - unsigned long long data_l, - MySQLAccelOptions *options + unsigned long long data_l ) { char *out = NULL; char *orig_out = NULL; @@ -1644,20 +1659,20 @@ static PyObject *read_obj_row_from_packet( int second = 0; int microsecond = 0; - switch (options->output_type) { + switch (py_state->options.output_type) { case MYSQL_ACCEL_OUT_NAMEDTUPLES: { - if (!namedtuple) goto error; - py_result = PyStructSequence_New(namedtuple); + if (!py_state->namedtuple) goto error; + py_result = PyStructSequence_New(py_state->namedtuple); break; } case MYSQL_ACCEL_OUT_DICTS: py_result = PyDict_New(); break; default: - py_result = PyTuple_New(n_cols); + py_result = PyTuple_New(py_state->n_cols); } - for (unsigned long i = 0; i < n_cols; i++) { + for (unsigned long i = 0; i < py_state->n_cols; i++) { read_length_coded_string(&data, &data_l, &out, &out_l, &is_null); end = &out[out_l]; @@ -1671,30 +1686,30 @@ static PyObject *read_obj_row_from_packet( if (!is_null) { // If a converter was passed in, use it. - if (converters[i]) { + if (py_state->py_converters[i]) { py_str = NULL; - if (encodings[i] == NULL) { + if (py_state->encodings[i] == NULL) { py_str = PyBytes_FromStringAndSize(out, out_l); if (!py_str) goto error; } else { - py_str = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + py_str = PyUnicode_Decode(out, out_l, py_state->encodings[i], "strict"); if (!py_str) goto error; } - py_item = PyObject_CallFunctionObjArgs(converters[i], py_str, NULL); - Py_DECREF(py_str); py_str = NULL; + py_item = PyObject_CallFunctionObjArgs(py_state->py_converters[i], py_str, NULL); + Py_CLEAR(py_str); if (!py_item) goto error; } // If no converter was passed in, do the default processing. else { - switch (type_codes[i]) { + switch (py_state->type_codes[i]) { case MYSQL_TYPE_NEWDECIMAL: case MYSQL_TYPE_DECIMAL: - py_str = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + py_str = PyUnicode_Decode(out, out_l, py_state->encodings[i], "strict"); if (!py_str) goto error; - py_item = PyObject_CallFunctionObjArgs(py_decimal, py_str, NULL); - Py_DECREF(py_str); py_str = NULL; + py_item = PyObject_CallFunctionObjArgs(py_state->py_decimal, py_str, NULL); + Py_CLEAR(py_str); if (!py_item) goto error; break; @@ -1703,7 +1718,7 @@ static PyObject *read_obj_row_from_packet( case MYSQL_TYPE_LONG: case MYSQL_TYPE_LONGLONG: case MYSQL_TYPE_INT24: - if (flags[i] & MYSQL_FLAG_UNSIGNED) { + if (py_state->flags[i] & MYSQL_FLAG_UNSIGNED) { py_item = PyLong_FromUnsignedLongLong(strtoul(out, &end, 10)); } else { py_item = PyLong_FromLongLong(strtol(out, &end, 10)); @@ -1724,8 +1739,8 @@ static PyObject *read_obj_row_from_packet( case MYSQL_TYPE_DATETIME: case MYSQL_TYPE_TIMESTAMP: if (!CHECK_ANY_DATETIME_STR(out, out_l)) { - if (options && options->invalid_datetime_value) { - py_item = options->invalid_datetime_value; + if (py_state->options.invalid_datetime_value) { + py_item = py_state->options.invalid_datetime_value; Py_INCREF(py_item); } else { py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); @@ -1753,8 +1768,8 @@ static PyObject *read_obj_row_from_packet( case MYSQL_TYPE_NEWDATE: case MYSQL_TYPE_DATE: if (!CHECK_DATE_STR(out, out_l)) { - if (options && options->invalid_date_value) { - py_item = options->invalid_date_value; + if (py_state->options.invalid_date_value) { + py_item = py_state->options.invalid_date_value; Py_INCREF(py_item); } else { py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); @@ -1776,8 +1791,8 @@ static PyObject *read_obj_row_from_packet( case MYSQL_TYPE_TIME: sign = CHECK_ANY_TIMEDELTA_STR(out, out_l); if (!sign) { - if (options && options->invalid_time_value) { - py_item = options->invalid_time_value; + if (py_state->options.invalid_time_value) { + py_item = py_state->options.invalid_time_value; Py_INCREF(py_item); } else { py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); @@ -1842,27 +1857,28 @@ static PyObject *read_obj_row_from_packet( case MYSQL_TYPE_VARCHAR: case MYSQL_TYPE_VAR_STRING: case MYSQL_TYPE_STRING: - if (encodings[i] == NULL) { + if (!py_state->encodings[i]) { py_item = PyBytes_FromStringAndSize(out, out_l); if (!py_item) goto error; - break; + break; } - py_item = PyUnicode_Decode(out, out_l, encodings[i], "strict"); + py_item = PyUnicode_Decode(out, out_l, py_state->encodings[i], "strict"); if (!py_item) goto error; // Parse JSON string. - if (type_codes[i] == MYSQL_TYPE_JSON && options->parse_json) { + if (py_state->type_codes[i] == MYSQL_TYPE_JSON && py_state->options.parse_json) { py_str = py_item; - py_item = PyObject_CallFunctionObjArgs(py_json_loads, py_str, NULL); - Py_DECREF(py_str); py_str = NULL; + py_item = PyObject_CallFunctionObjArgs(py_state->py_json_loads, py_str, NULL); + Py_CLEAR(py_str); if (!py_item) goto error; } break; default: - PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", type_codes[i], NULL); + PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", + py_state->type_codes[i], NULL); goto error; } } @@ -1872,13 +1888,13 @@ static PyObject *read_obj_row_from_packet( Py_INCREF(Py_None); } - switch (options->output_type) { + switch (py_state->options.output_type) { case MYSQL_ACCEL_OUT_NAMEDTUPLES: PyStructSequence_SET_ITEM(py_result, i, py_item); break; case MYSQL_ACCEL_OUT_DICTS: - PyDict_SetItem(py_result, names[i], py_item); - Py_INCREF(names[i]); + PyDict_SetItem(py_result, py_state->py_names[i], py_item); + Py_INCREF(py_state->py_names[i]); Py_DECREF(py_item); break; default: @@ -1890,7 +1906,7 @@ static PyObject *read_obj_row_from_packet( return py_result; error: - Py_XDECREF(py_result); + Py_CLEAR(py_result); goto exit; } @@ -1898,6 +1914,7 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k StateObject *py_state = NULL; PyObject *py_res = NULL; PyObject *py_out = NULL; + PyObject *py_next_seq_id = NULL; int is_eof = 0; // Parse function args. @@ -1908,26 +1925,27 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k // Get the rowdata state. py_state = (StateObject*)PyObject_GetAttrString(py_res, "_state"); if (!py_state) { - PyErr_Clear(); int rc = 0; + + PyErr_Clear(); + PyObject *py_args = PyTuple_New(1); if (!py_args) goto error; - PyTuple_SET_ITEM(py_args, 0, py_res); Py_INCREF(py_res); + PyTuple_SET_ITEM(py_args, 0, py_res); + Py_INCREF(py_res); py_state = (StateObject*)State_new(&StateType, py_args, NULL); - if (!py_state) goto error; + if (!py_state) { Py_DECREF(py_args); goto error; } rc = State_init((StateObject*)py_state, py_args, NULL); - Py_XDECREF(py_args); - if (rc != 0) goto error; + Py_DECREF(py_args); + if (rc != 0) { Py_DECREF(py_state); goto error; } PyObject_SetAttrString(py_res, "_state", (PyObject*)py_state); Py_DECREF(py_state); } while (1) { - PyObject *py_buff = read_packet(py_state->py_conn, py_state->py_read, - py_state->py_settimeout, py_state->py_read_timeout, - &py_state->next_seq_id); + PyObject *py_buff = read_packet(py_state); if (!py_buff) goto error; PyObject *py_row = NULL; @@ -1942,12 +1960,14 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k PyObject *py_long = NULL; py_long = PyLong_FromUnsignedLongLong(warning_count); + if (!py_long) goto error; PyObject_SetAttrString(py_res, "warning_count", py_long); - Py_XDECREF(py_long); + Py_DECREF(py_long); py_long = PyLong_FromLong(has_next); + if (!py_long) goto error; PyObject_SetAttrString(py_res, "has_next", py_long); - Py_XDECREF(py_long); + Py_DECREF(py_long); PyObject_SetAttrString(py_res, "connection", Py_None); @@ -1955,7 +1975,7 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k Py_INCREF(py_state); PyObject_DelAttrString(py_res, "_state"); - Py_XDECREF(py_buff); + Py_DECREF(py_buff); if (py_state->unbuffered) { PyObject_SetAttrString(py_res, "unbuffered_active", Py_False); @@ -1971,7 +1991,7 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k switch (py_state->options.output_type) { case MYSQL_ACCEL_OUT_PANDAS: case MYSQL_ACCEL_OUT_NUMPY: - if (py_state->n_rows >= py_state->df_buffer_n_rows) { + if (!py_state->unbuffered && py_state->n_rows >= py_state->df_buffer_n_rows) { py_state->df_buffer_n_rows *= 1.7; py_state->df_buffer = realloc(py_state->df_buffer, py_state->df_buffer_row_size * @@ -1979,30 +1999,19 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k py_state->df_cursor = py_state->df_buffer + py_state->df_buffer_row_size * py_state->n_rows; } - read_dataframe_row_from_packet(py_state->n_cols, py_state->type_codes, - py_state->flags, py_state->scales, py_state->py_names, - py_state->encodings, py_state->py_converters, - py_state->py_decimal, py_state->py_json_loads, - data, data_l, &py_state->options, py_state->df_cursor); + read_dataframe_row_from_packet(py_state, data, data_l); py_state->df_cursor += py_state->df_buffer_row_size; break; default: - py_row = read_obj_row_from_packet(py_state->n_cols, py_state->type_codes, - py_state->flags, py_state->scales, - py_state->py_names, py_state->encodings, - py_state->py_converters, - py_state->py_decimal, - py_state->py_json_loads, - py_state->namedtuple, - data, data_l, &py_state->options); - if (!py_row) { Py_XDECREF(py_buff); goto error; } + py_row = read_obj_row_from_packet(py_state, data, data_l); + if (!py_row) { Py_DECREF(py_buff); goto error; } if (py_state->unbuffered) { PyList_SetItem(py_state->py_rows, 0, py_row); } else { PyList_Append(py_state->py_rows, py_row); - Py_XDECREF(py_row); + Py_DECREF(py_row); } } @@ -2014,34 +2023,46 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k switch (py_state->options.output_type) { case MYSQL_ACCEL_OUT_PANDAS: case MYSQL_ACCEL_OUT_NUMPY: - py_state->df_buffer = realloc(py_state->df_buffer, - py_state->df_buffer_row_size * py_state->n_rows); - py_state->py_rows = build_array(py_state->n_rows, py_state->n_cols, - py_state->py_names, py_state->type_codes, - py_state->flags, py_state->scales, - py_state->df_buffer, - py_state->df_buffer_row_size, &py_state->options); + // Resize the buffer down. + if (!py_state->unbuffered) { + py_state->df_buffer = realloc(py_state->df_buffer, + py_state->df_buffer_row_size * py_state->n_rows); + py_state->df_cursor = py_state->df_buffer + + py_state->df_buffer_row_size * py_state->n_rows; + } + + py_state->py_rows = build_array(py_state); + PyObject_SetAttrString(py_res, "rows", py_state->py_rows); - Py_DECREF(py_state->py_rows); } - PyObject *py_next_seq_id = PyLong_FromUnsignedLongLong(py_state->next_seq_id); +exit: + + py_next_seq_id = PyLong_FromUnsignedLongLong(py_state->next_seq_id); if (!py_next_seq_id) goto error; PyObject_SetAttrString(py_state->py_conn, "_next_seq_id", py_next_seq_id); - Py_XDECREF(py_next_seq_id); + Py_DECREF(py_next_seq_id); -exit: py_out = NULL; if (py_state->unbuffered) { if (is_eof) { + if (py_state->df_buffer) { + free(py_state->df_buffer); + py_state->df_buffer = NULL; + py_state->df_cursor = NULL; + } Py_INCREF(Py_None); py_out = Py_None; PyObject *py_n_rows = PyLong_FromSsize_t(py_state->n_rows); - PyObject_SetAttrString(py_res, "affected_rows", py_n_rows); - Py_DECREF(py_n_rows); + PyObject_SetAttrString(py_res, "affected_rows", (py_n_rows) ? py_n_rows : Py_None); + Py_XDECREF(py_n_rows); } else { + // Unbuffered needs a new row every time. + py_state->df_buffer = malloc(py_state->df_buffer_row_size); + py_state->df_cursor = py_state->df_buffer; + switch (py_state->options.output_type) { case MYSQL_ACCEL_OUT_PANDAS: case MYSQL_ACCEL_OUT_NUMPY: @@ -2067,8 +2088,8 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k Py_INCREF(py_out); } PyObject *py_n_rows = PyLong_FromSsize_t(py_state->n_rows); - PyObject_SetAttrString(py_res, "affected_rows", py_n_rows); - Py_DECREF(py_n_rows); + PyObject_SetAttrString(py_res, "affected_rows", (py_n_rows) ? py_n_rows : Py_None); + Py_XDECREF(py_n_rows); } if (is_eof) { From 622396fc0dda85b6943d42987b3a9a47ce4d5eb6 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Fri, 26 Aug 2022 10:26:26 -0500 Subject: [PATCH 04/12] Small cleanup of df buffer and output object --- src/accel.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/accel.c b/src/accel.c index 954b9d247..27e68f600 100644 --- a/src/accel.c +++ b/src/accel.c @@ -2059,13 +2059,13 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k Py_XDECREF(py_n_rows); } else { - // Unbuffered needs a new row every time. - py_state->df_buffer = malloc(py_state->df_buffer_row_size); - py_state->df_cursor = py_state->df_buffer; - switch (py_state->options.output_type) { case MYSQL_ACCEL_OUT_PANDAS: case MYSQL_ACCEL_OUT_NUMPY: + // Unbuffered needs a new row every time. + py_state->df_buffer = malloc(py_state->df_buffer_row_size); + py_state->df_cursor = py_state->df_buffer; + // TODO: reshape? py_out = py_state->py_rows; Py_INCREF(py_out); @@ -2077,16 +2077,8 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k } } else { - switch (py_state->options.output_type) { - case MYSQL_ACCEL_OUT_PANDAS: - case MYSQL_ACCEL_OUT_NUMPY: - py_out = py_state->py_rows; - Py_INCREF(py_out); - break; - default: - py_out = py_state->py_rows; - Py_INCREF(py_out); - } + py_out = py_state->py_rows; + Py_INCREF(py_out); PyObject *py_n_rows = PyLong_FromSsize_t(py_state->n_rows); PyObject_SetAttrString(py_res, "affected_rows", (py_n_rows) ? py_n_rows : Py_None); Py_XDECREF(py_n_rows); From 5d6ff1318adfd1c9dddfd4f176423f802b131463 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 30 Aug 2022 14:37:29 -0500 Subject: [PATCH 05/12] Fix issues with numpy / dataframe memory --- pymysql/cursors.py | 25 ++++++++- src/accel.c | 131 ++++++++++++++++++++++++--------------------- 2 files changed, 93 insertions(+), 63 deletions(-) diff --git a/pymysql/cursors.py b/pymysql/cursors.py index 73023755c..0dfcdcdfe 100644 --- a/pymysql/cursors.py +++ b/pymysql/cursors.py @@ -278,6 +278,10 @@ def callproc(self, procname, args=()): def fetchone(self): """Fetch the next row.""" self._check_executed() + return self._unchecked_fetchone() + + def _unchecked_fetchone(self): + """Fetch the next row.""" if self._rows is None or self.rownumber >= len(self._rows): return None result = self._rows[self.rownumber] @@ -346,7 +350,16 @@ def _do_get_result(self): self._rows = result.rows def __iter__(self): - return iter(self.fetchone, None) + self._check_executed() + _unchecked_fetchone = self._unchecked_fetchone + def fetchall_unbuffered_gen(): + while True: + out =_unchecked_fetchone() + if out is not None: + yield out + else: + break + return fetchall_unbuffered_gen() Warning = err.Warning Error = err.Error @@ -465,7 +478,15 @@ def fetchall_unbuffered(self): would use ridiculous memory for large result sets. """ self._check_executed() - return iter(self._unchecked_fetchone, None) + _unchecked_fetchone = self._unchecked_fetchone + def fetchall_unbuffered_gen(): + while True: + out =_unchecked_fetchone() + if out is not None: + yield out + else: + break + return fetchall_unbuffered_gen() def __iter__(self): return self.fetchall_unbuffered() diff --git a/src/accel.c b/src/accel.c index 27e68f600..fbfbe1976 100644 --- a/src/accel.c +++ b/src/accel.c @@ -5,11 +5,11 @@ #include #include -#define MYSQL_ACCEL_OUT_TUPLES 0 -#define MYSQL_ACCEL_OUT_NAMEDTUPLES 1 -#define MYSQL_ACCEL_OUT_DICTS 2 -#define MYSQL_ACCEL_OUT_NUMPY 3 -#define MYSQL_ACCEL_OUT_PANDAS 4 +#define MYSQLSV_OUT_TUPLES 0 +#define MYSQLSV_OUT_NAMEDTUPLES 1 +#define MYSQLSV_OUT_DICTS 2 +#define MYSQLSV_OUT_NUMPY 3 +#define MYSQLSV_OUT_PANDAS 4 #define MYSQL_FLAG_NOT_NULL 1 #define MYSQL_FLAG_PRI_KEY 2 @@ -73,12 +73,12 @@ #define EPOCH_TO_DAYS 719528 #define SECONDS_PER_DAY (24 * 60 * 60) -#define MYSQL_ACCEL_OPTION_TIME_TYPE_TIMEDELTA 0 -#define MYSQL_ACCEL_OPTION_TIME_TYPE_TIME 1 -#define MYSQL_ACCEL_OPTION_JSON_TYPE_STRING 0 -#define MYSQL_ACCEL_OPTION_JSON_TYPE_OBJ 1 -#define MYSQL_ACCEL_OPTION_BIT_TYPE_BYTES 0 -#define MYSQL_ACCEL_OPTION_BIT_TYPE_INT 1 +#define MYSQLSV_OPTION_TIME_TYPE_TIMEDELTA 0 +#define MYSQLSV_OPTION_TIME_TYPE_TIME 1 +#define MYSQLSV_OPTION_JSON_TYPE_STRING 0 +#define MYSQLSV_OPTION_JSON_TYPE_OBJ 1 +#define MYSQLSV_OPTION_BIT_TYPE_BYTES 0 +#define MYSQLSV_OPTION_BIT_TYPE_INT 1 #define CHR2INT1(x) ((x)[1] - '0') #define CHR2INT2(x) ((((x)[0] - '0') * 10) + ((x)[1] - '0')) @@ -274,15 +274,19 @@ typedef struct { } ArrayObject; static void Array_dealloc(ArrayObject *self) { + // Numpy arrays take ownership of our memory. This happens because we are + // using the Python API to create our array, rather than the numpy C API. +#if 0 if (self->array_interface && PyDict_Check(self->array_interface)) { PyObject *data = PyDict_GetItemString(self->array_interface, "data"); if (data) { PyObject *buffer = PyTuple_GetItem(data, 0); if (buffer) { - free((char*)PyLong_AsUnsignedLongLong(buffer)); + free((char*)PyLong_AsVoidPtr(buffer)); } } } +#endif Py_CLEAR(self->array_interface); Py_TYPE(self)->tp_free((PyObject*)self); } @@ -392,6 +396,7 @@ typedef struct { static void read_options(MySQLAccelOptions *options, PyObject *dict); static unsigned long long compute_row_size(StateObject *py_state); static void build_array_interface(StateObject *py_state); +static PyObject *build_array(StateObject *py_state); #define DESTROY(x) do { if (x) { free(x); (x) = NULL; } } while (0) @@ -613,7 +618,7 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { } switch (self->options.output_type) { - case MYSQL_ACCEL_OUT_PANDAS: + case MYSQLSV_OUT_PANDAS: // Import pandas module. self->py_pandas_mod = PyImport_ImportModule("pandas"); if (!self->py_pandas_mod) goto error; @@ -622,7 +627,7 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { // Fall through - case MYSQL_ACCEL_OUT_NUMPY: + case MYSQLSV_OUT_NUMPY: // Import numpy module. self->py_numpy_mod = PyImport_ImportModule("numpy"); if (!self->py_numpy_mod) goto error; @@ -636,12 +641,18 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { // Setup dataframe buffer. self->df_buffer_n_rows = (self->unbuffered) ? 1 : 500; self->df_buffer_row_size = compute_row_size(self); - self->df_buffer = malloc(self->df_buffer_row_size * self->df_buffer_n_rows); + self->df_buffer = calloc(self->df_buffer_row_size, self->df_buffer_n_rows); if (!self->df_buffer) goto error; self->df_cursor = self->df_buffer; + + // Construct the array to use for every fetch (it's reused each time). + self->py_rows = build_array(self); + if (!self->py_rows) goto error; + + PyObject_SetAttrString(py_res, "rows", self->py_rows); break; - case MYSQL_ACCEL_OUT_NAMEDTUPLES: + case MYSQLSV_OUT_NAMEDTUPLES: self->namedtuple_desc.name = "Row"; self->namedtuple_desc.doc = "Row of data values"; self->namedtuple_desc.n_in_sequence = self->n_cols; @@ -706,20 +717,20 @@ static void read_options(MySQLAccelOptions *options, PyObject *dict) { if (PyUnicode_CompareWithASCIIString(key, "output_type") == 0) { if (PyUnicode_CompareWithASCIIString(value, "dict") == 0 || PyUnicode_CompareWithASCIIString(value, "dicts") == 0 ) { - options->output_type = MYSQL_ACCEL_OUT_DICTS; + options->output_type = MYSQLSV_OUT_DICTS; } else if (PyUnicode_CompareWithASCIIString(value, "namedtuple") == 0 || PyUnicode_CompareWithASCIIString(value, "namedtuples") == 0) { - options->output_type = MYSQL_ACCEL_OUT_NAMEDTUPLES; + options->output_type = MYSQLSV_OUT_NAMEDTUPLES; } else if (PyUnicode_CompareWithASCIIString(value, "numpy") == 0) { - options->output_type = MYSQL_ACCEL_OUT_NUMPY; + options->output_type = MYSQLSV_OUT_NUMPY; } else if (PyUnicode_CompareWithASCIIString(value, "pandas") == 0) { - options->output_type = MYSQL_ACCEL_OUT_PANDAS; + options->output_type = MYSQLSV_OUT_PANDAS; } else { - options->output_type = MYSQL_ACCEL_OUT_TUPLES; + options->output_type = MYSQLSV_OUT_TUPLES; } } else if (PyUnicode_CompareWithASCIIString(key, "parse_json") == 0) { options->parse_json = PyObject_IsTrue(value); @@ -736,12 +747,12 @@ static void read_options(MySQLAccelOptions *options, PyObject *dict) { // mysql, for whatever reason, treats 0 as an actual year, but not // a leap year // -inline int is_leap_year(int year) +static int is_leap_year(int year) { return (year % 4) == 0 && year != 0 && ((year % 100) != 0 || (year % 400) == 0); } -inline int days_in_previous_months(int month, int year) +static int days_in_previous_months(int month, int year) { static const int previous_days[13] = { @@ -765,12 +776,12 @@ inline int days_in_previous_months(int month, int year) // NOTE: year 0 does not actually exist, but mysql pretends it does (and is NOT // a leap year) // -inline int leap_years_before(int year) +static int leap_years_before(int year) { return (year - 1) / 4 - (year - 1) / 100 + (year - 1) / 400; } -inline int days_in_previous_years(int year) +static int days_in_previous_years(int year) { return 365 * year + leap_years_before(year); } @@ -1252,7 +1263,7 @@ static void build_array_interface(StateObject *py_state) { goto exit; } -PyObject *build_array(StateObject *py_state) { +static PyObject *build_array(StateObject *py_state) { PyObject *py_out = NULL; PyObject *py_data = NULL; PyObject *py_array = NULL; @@ -1262,7 +1273,7 @@ PyObject *build_array(StateObject *py_state) { py_data = PyTuple_New(2); if (!py_data) goto error; - PyTuple_SetItem(py_data, 0, PyLong_FromUnsignedLongLong((unsigned long long)py_state->df_buffer)); + PyTuple_SetItem(py_data, 0, PyLong_FromVoidPtr(py_state->df_buffer)); PyTuple_SetItem(py_data, 1, Py_False); Py_INCREF(Py_False); @@ -1293,12 +1304,11 @@ PyObject *build_array(StateObject *py_state) { py_state->py_array_kwds); if (!py_out) goto error; - if (py_state->options.output_type == MYSQL_ACCEL_OUT_PANDAS) { - PyObject *out2 = NULL; - out2 = PyObject_CallFunctionObjArgs(py_state->py_pandas_mod, py_out, NULL); - if (!out2) goto error; - Py_DECREF(py_out); - py_out = out2; + if (py_state->options.output_type == MYSQLSV_OUT_PANDAS) { + PyObject *py_tmp = py_out; + py_out = PyObject_CallFunctionObjArgs(py_state->py_pandas_dataframe, py_out, NULL); + if (!py_out) goto error; + Py_DECREF(py_tmp); } exit: @@ -1660,12 +1670,12 @@ static PyObject *read_obj_row_from_packet( int microsecond = 0; switch (py_state->options.output_type) { - case MYSQL_ACCEL_OUT_NAMEDTUPLES: { + case MYSQLSV_OUT_NAMEDTUPLES: { if (!py_state->namedtuple) goto error; py_result = PyStructSequence_New(py_state->namedtuple); break; } - case MYSQL_ACCEL_OUT_DICTS: + case MYSQLSV_OUT_DICTS: py_result = PyDict_New(); break; default: @@ -1889,10 +1899,10 @@ static PyObject *read_obj_row_from_packet( } switch (py_state->options.output_type) { - case MYSQL_ACCEL_OUT_NAMEDTUPLES: + case MYSQLSV_OUT_NAMEDTUPLES: PyStructSequence_SET_ITEM(py_result, i, py_item); break; - case MYSQL_ACCEL_OUT_DICTS: + case MYSQLSV_OUT_DICTS: PyDict_SetItem(py_result, py_state->py_names[i], py_item); Py_INCREF(py_state->py_names[i]); Py_DECREF(py_item); @@ -1938,11 +1948,12 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k if (!py_state) { Py_DECREF(py_args); goto error; } rc = State_init((StateObject*)py_state, py_args, NULL); Py_DECREF(py_args); - if (rc != 0) { Py_DECREF(py_state); goto error; } + if (rc != 0) { Py_CLEAR(py_state); goto error; } PyObject_SetAttrString(py_res, "_state", (PyObject*)py_state); - Py_DECREF(py_state); } + // We are depending on the res._state variable to hold a ref count. + Py_DECREF(py_state); while (1) { PyObject *py_buff = read_packet(py_state); @@ -1989,8 +2000,8 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k py_state->n_rows += 1; switch (py_state->options.output_type) { - case MYSQL_ACCEL_OUT_PANDAS: - case MYSQL_ACCEL_OUT_NUMPY: + case MYSQLSV_OUT_PANDAS: + case MYSQLSV_OUT_NUMPY: if (!py_state->unbuffered && py_state->n_rows >= py_state->df_buffer_n_rows) { py_state->df_buffer_n_rows *= 1.7; py_state->df_buffer = realloc(py_state->df_buffer, @@ -2000,7 +2011,11 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k py_state->df_buffer_row_size * py_state->n_rows; } read_dataframe_row_from_packet(py_state, data, data_l); - py_state->df_cursor += py_state->df_buffer_row_size; + if (py_state->unbuffered) { + py_state->df_cursor = py_state->df_buffer; + } else { + py_state->df_cursor += py_state->df_buffer_row_size; + } break; default: @@ -2021,23 +2036,26 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k } switch (py_state->options.output_type) { - case MYSQL_ACCEL_OUT_PANDAS: - case MYSQL_ACCEL_OUT_NUMPY: - // Resize the buffer down. + case MYSQLSV_OUT_PANDAS: + case MYSQLSV_OUT_NUMPY: + // Resize the buffer down to be only the required amount needed. if (!py_state->unbuffered) { py_state->df_buffer = realloc(py_state->df_buffer, py_state->df_buffer_row_size * py_state->n_rows); py_state->df_cursor = py_state->df_buffer + py_state->df_buffer_row_size * py_state->n_rows; + PyObject *py_tmp = py_state->py_rows; + py_state->py_rows = build_array(py_state); + if (!py_state->py_rows) goto error; + Py_DECREF(py_tmp); + PyObject_SetAttrString(py_res, "rows", py_state->py_rows); } - - py_state->py_rows = build_array(py_state); - - PyObject_SetAttrString(py_res, "rows", py_state->py_rows); } exit: + if (!py_state) return NULL; + py_next_seq_id = PyLong_FromUnsignedLongLong(py_state->next_seq_id); if (!py_next_seq_id) goto error; PyObject_SetAttrString(py_state->py_conn, "_next_seq_id", py_next_seq_id); @@ -2047,11 +2065,6 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k if (py_state->unbuffered) { if (is_eof) { - if (py_state->df_buffer) { - free(py_state->df_buffer); - py_state->df_buffer = NULL; - py_state->df_cursor = NULL; - } Py_INCREF(Py_None); py_out = Py_None; PyObject *py_n_rows = PyLong_FromSsize_t(py_state->n_rows); @@ -2060,13 +2073,9 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k } else { switch (py_state->options.output_type) { - case MYSQL_ACCEL_OUT_PANDAS: - case MYSQL_ACCEL_OUT_NUMPY: - // Unbuffered needs a new row every time. - py_state->df_buffer = malloc(py_state->df_buffer_row_size); - py_state->df_cursor = py_state->df_buffer; - - // TODO: reshape? + case MYSQLSV_OUT_PANDAS: + case MYSQLSV_OUT_NUMPY: + // TODO: Return single row for fetchone. py_out = py_state->py_rows; Py_INCREF(py_out); break; From 582758940c2eebe2463f61e3019ded865f646a7c Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 30 Aug 2022 15:13:45 -0500 Subject: [PATCH 06/12] Doc cleanup and new license --- LICENSE | 220 +++++++++++++++++++++++++++++++++++++++---- README.md | 67 +++++++++++++ README.rst | 141 --------------------------- licenses/PyMySQL.txt | 19 ++++ setup.py | 3 +- 5 files changed, 289 insertions(+), 161 deletions(-) create mode 100644 README.md delete mode 100644 README.rst create mode 100644 licenses/PyMySQL.txt diff --git a/LICENSE b/LICENSE index 86b18e10b..ded2d59fa 100644 --- a/LICENSE +++ b/LICENSE @@ -1,19 +1,201 @@ -Copyright (c) 2010, 2013 PyMySQL contributors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2021 SingleStore + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 000000000..a0330b051 --- /dev/null +++ b/README.md @@ -0,0 +1,67 @@ +# PyMySQLsv + +This project contains classes based on the PyMySQL database client which have +been accelerated using a C extension. Why would you want to do such a thing? Isn't +the whole point of PyMySQL to be a pure Python database client? Well, yes. +However, the only C-based alternatives either have restrictive licenses, have +dependencies that a system administrator must install, or both. +A MySQL client that is fast, easy to install for a non-admin user, and has a +permissive license is something that hasn't existed. + +## What does the 'sv' stand for? + +This tag borrowed from the Lamborghini Aventador SuperVeloce. SuperVeloce is +Italian for "super velocity". + +## What about CyMySQL? + +CyMySQL is a great idea, but doesn't give the performance we were looking for. +It increases the perforance of PyMySQL about 10-15%, which still leaves it at +the second slowest client. It is also based on a PyMySQL codebase from years ago, +so it does not contain any recent bug fixes or features of that project. + +## Install + +This package installs just like any other Python package. Since it includes a C +extension it does require a C compiler if there isn't a pre-compiled version for your +architecture. +``` +python3 setup.py install +``` + +## How to use it + +This package contains a Python DB-API compliant interface. So connections are made +the same way as any other DB-API connection. + +``` +import pymysql as sv + +with sv.connect(...) as conn: + with conn.cursor() as cur: + ... +``` + +## License + +This library is licensed under the [Apache 2.0 License](https://raw.githubusercontent.com/singlestore-labs/PyMySQLsv/main/LICENSE?token=GHSAT0AAAAAABMGV6QPNR6N23BVICDYK5LAYTVK5EA). + +## Resources + +* [PyMySQL](https://pymysql.readthedocs.io/en/latest/) +* [SingleStore](https://singlestore.com) +* [Python](https://python.org) + +## User agreement + +SINGLESTORE, INC. ("SINGLESTORE") AGREES TO GRANT YOU AND YOUR COMPANY ACCESS TO THIS OPEN SOURCE SOFTWARE CONNECTOR ONLY IF (A) YOU AND YOUR COMPANY REPRESENT AND WARRANT THAT YOU, ON BEHALF OF YOUR COMPANY, HAVE THE AUTHORITY TO LEGALLY BIND YOUR COMPANY AND (B) YOU, ON BEHALF OF YOUR COMPANY ACCEPT AND AGREE TO BE BOUND BY ALL OF THE OPEN SOURCE TERMS AND CONDITIONS APPLICABLE TO THIS OPEN SOURCE CONNECTOR AS SET FORTH BELOW (THIS “AGREEMENT”), WHICH SHALL BE DEFINITIVELY EVIDENCED BY ANY ONE OF THE FOLLOWING MEANS: YOU, ON BEHALF OF YOUR COMPANY, CLICKING THE “DOWNLOAD, “ACCEPTANCE” OR “CONTINUE” BUTTON, AS APPLICABLE OR COMPANY’S INSTALLATION, ACCESS OR USE OF THE OPEN SOURCE CONNECTOR AND SHALL BE EFFECTIVE ON THE EARLIER OF THE DATE ON WHICH THE DOWNLOAD, ACCESS, COPY OR INSTALL OF THE CONNECTOR OR USE ANY SERVICES (INCLUDING ANY UPDATES OR UPGRADES) PROVIDED BY SINGLESTORE. +BETA SOFTWARE CONNECTOR + +Customer Understands and agrees that it is being granted access to pre-release or “beta” versions of SingleStore’s open source software connector (“Beta Software Connector”) for the limited purposes of non-production testing and evaluation of such Beta Software Connector. Customer acknowledges that SingleStore shall have no obligation to release a generally available version of such Beta Software Connector or to provide support or warranty for such versions of the Beta Software Connector for any production or non-evaluation use. + +NOTWITHSTANDING ANYTHING TO THE CONTRARY IN ANY DOCUMENTATION, AGREEMENT OR IN ANY ORDER DOCUMENT, SINGLESTORE WILL HAVE NO WARRANTY, INDEMNITY, SUPPORT, OR SERVICE LEVEL, OBLIGATIONS WITH +RESPECT TO THIS BETA SOFTWARE CONNECTOR (INCLUDING TOOLS AND UTILITIES). + +APPLICABLE OPEN SOURCE LICENSE: Apache 2.0 + +IF YOU OR YOUR COMPANY DO NOT AGREE TO THESE TERMS AND CONDITIONS, DO NOT CHECK THE ACCEPTANCE BOX, AND DO NOT DOWNLOAD, ACCESS, COPY, INSTALL OR USE THE SOFTWARE OR THE SERVICES. diff --git a/README.rst b/README.rst deleted file mode 100644 index 318e94604..000000000 --- a/README.rst +++ /dev/null @@ -1,141 +0,0 @@ -.. image:: https://readthedocs.org/projects/pymysql/badge/?version=latest - :target: https://pymysql.readthedocs.io/ - :alt: Documentation Status - -.. image:: https://coveralls.io/repos/PyMySQL/PyMySQL/badge.svg?branch=main&service=github - :target: https://coveralls.io/github/PyMySQL/PyMySQL?branch=main - -.. image:: https://img.shields.io/lgtm/grade/python/g/PyMySQL/PyMySQL.svg?logo=lgtm&logoWidth=18 - :target: https://lgtm.com/projects/g/PyMySQL/PyMySQL/context:python - - -PyMySQL -======= - -.. contents:: Table of Contents - :local: - -This package contains a pure-Python MySQL client library, based on `PEP 249`_. - -.. _`PEP 249`: https://www.python.org/dev/peps/pep-0249/ - - -Requirements -------------- - -* Python -- one of the following: - - - CPython_ : 3.7 and newer - - PyPy_ : Latest 3.x version - -* MySQL Server -- one of the following: - - - MySQL_ >= 5.7 - - MariaDB_ >= 10.3 - -.. _CPython: https://www.python.org/ -.. _PyPy: https://pypy.org/ -.. _MySQL: https://www.mysql.com/ -.. _MariaDB: https://mariadb.org/ - - -Installation ------------- - -Package is uploaded on `PyPI `_. - -You can install it with pip:: - - $ python3 -m pip install PyMySQL - -To use "sha256_password" or "caching_sha2_password" for authenticate, -you need to install additional dependency:: - - $ python3 -m pip install PyMySQL[rsa] - -To use MariaDB's "ed25519" authentication method, you need to install -additional dependency:: - - $ python3 -m pip install PyMySQL[ed25519] - - -Documentation -------------- - -Documentation is available online: https://pymysql.readthedocs.io/ - -For support, please refer to the `StackOverflow -`_. - - -Example -------- - -The following examples make use of a simple table - -.. code:: sql - - CREATE TABLE `users` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `email` varchar(255) COLLATE utf8_bin NOT NULL, - `password` varchar(255) COLLATE utf8_bin NOT NULL, - PRIMARY KEY (`id`) - ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin - AUTO_INCREMENT=1 ; - - -.. code:: python - - import pymysql.cursors - - # Connect to the database - connection = pymysql.connect(host='localhost', - user='user', - password='passwd', - database='db', - cursorclass=pymysql.cursors.DictCursor) - - with connection: - with connection.cursor() as cursor: - # Create a new record - sql = "INSERT INTO `users` (`email`, `password`) VALUES (%s, %s)" - cursor.execute(sql, ('webmaster@python.org', 'very-secret')) - - # connection is not autocommit by default. So you must commit to save - # your changes. - connection.commit() - - with connection.cursor() as cursor: - # Read a single record - sql = "SELECT `id`, `password` FROM `users` WHERE `email`=%s" - cursor.execute(sql, ('webmaster@python.org',)) - result = cursor.fetchone() - print(result) - - -This example will print: - -.. code:: python - - {'password': 'very-secret', 'id': 1} - - -Resources ---------- - -* DB-API 2.0: https://www.python.org/dev/peps/pep-0249/ - -* MySQL Reference Manuals: https://dev.mysql.com/doc/ - -* MySQL client/server protocol: - https://dev.mysql.com/doc/internals/en/client-server-protocol.html - -* "Connector" channel in MySQL Community Slack: - https://lefred.be/mysql-community-on-slack/ - -* PyMySQL mailing list: https://groups.google.com/forum/#!forum/pymysql-users - -License -------- - -PyMySQL is released under the MIT License. See LICENSE for more information. diff --git a/licenses/PyMySQL.txt b/licenses/PyMySQL.txt new file mode 100644 index 000000000..86b18e10b --- /dev/null +++ b/licenses/PyMySQL.txt @@ -0,0 +1,19 @@ +Copyright (c) 2010, 2013 PyMySQL contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/setup.py b/setup.py index be0ff86c4..ff33324b8 100755 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ version = "1.0.2" -with open("./README.rst", encoding="utf-8") as f: +with open("./README.md", encoding="utf-8") as f: readme = f.read() setup( @@ -15,6 +15,7 @@ }, description="Pure Python MySQL Driver", long_description=readme, + long_description_content_type='text/markdown', packages=find_packages(exclude=["tests*", "pymysql.tests*"]), python_requires=">=3.7", extras_require={ From 76d676704734812ad8c8be117da687c85f73615d Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 30 Aug 2022 15:42:54 -0500 Subject: [PATCH 07/12] Add mysql exception to connection --- pymysql/connections.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pymysql/connections.py b/pymysql/connections.py index 88cdf2b3d..2289e09ef 100644 --- a/pymysql/connections.py +++ b/pymysql/connections.py @@ -411,6 +411,9 @@ def __exit__(self, *exc_info): del exc_info self.close() + def _raise_mysql_exception(self, data): + err.raise_mysql_exception(data) + def _create_ssl_ctx(self, sslp): if isinstance(sslp, ssl.SSLContext): return sslp From 7b22cd601e0db2ff7944fe940546393915ce28c9 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 1 Sep 2022 08:34:26 -0500 Subject: [PATCH 08/12] More performance improvements --- pymysql/cursors.py | 22 ++--- src/accel.c | 211 ++++++++++++++++++++++++++------------------- 2 files changed, 132 insertions(+), 101 deletions(-) diff --git a/pymysql/cursors.py b/pymysql/cursors.py index 0dfcdcdfe..ff1d29bdd 100644 --- a/pymysql/cursors.py +++ b/pymysql/cursors.py @@ -536,7 +536,7 @@ class SSCursorSV(SSCursor): def _unchecked_fetchone(self): """Fetch next row.""" - row = self._result._read_rowdata_packet_unbuffered() + row = self._result._read_rowdata_packet_unbuffered(1) if row is None: return None self.rownumber += 1 @@ -547,15 +547,11 @@ def fetchmany(self, size=None): self._check_executed() if size is None: size = self.arraysize - - rows = [] - for i in range(size): - row = self._result._read_rowdata_packet_unbuffered() - if row is None: - break - rows.append(row) - self.rownumber += 1 - return rows + out = self._result._read_rowdata_packet_unbuffered(size) + if out is None: + return [] + self.rownumber += len(out) + return out def scroll(self, value, mode="relative"): self._check_executed() @@ -566,8 +562,7 @@ def scroll(self, value, mode="relative"): "Backwards scrolling not supported by this cursor" ) - for _ in range(value): - self._result._read_rowdata_packet_unbuffered() + self._result._read_rowdata_packet_unbuffered(value) self.rownumber += value elif mode == "absolute": if value < self.rownumber: @@ -576,8 +571,7 @@ def scroll(self, value, mode="relative"): ) end = value - self.rownumber - for _ in range(end): - self._result._read_rowdata_packet_unbuffered() + self._result._read_rowdata_packet_unbuffered(end) self.rownumber = value else: raise err.ProgrammingError("unknown scroll mode %s" % mode) diff --git a/src/accel.c b/src/accel.c index fbfbe1976..8f20cdef3 100644 --- a/src/accel.c +++ b/src/accel.c @@ -377,20 +377,22 @@ typedef struct { PyTypeObject *namedtuple; // Generated namedtuple type PyObject **py_encodings; // Encoding for each column as Python string const char **encodings; // Encoding for each column - unsigned long long n_cols; - unsigned long long n_rows; + unsigned long long n_cols; // Total number of columns + unsigned long long n_rows; // Total number of rows read + unsigned long long n_rows_in_batch; // Number of rows in current batch (fetchmany size) unsigned long *type_codes; // Type code for each column unsigned long *flags; // Column flags unsigned long *scales; // Column scales unsigned long *offsets; // Column offsets in buffer - unsigned long long next_seq_id; - MySQLAccelOptions options; - unsigned long long df_buffer_row_size; - unsigned long long df_buffer_n_rows; - char *df_cursor; - char *df_buffer; + unsigned long long next_seq_id; // MySQL packet sequence number + MySQLAccelOptions options; // Packet reader options + unsigned long long df_buffer_row_size; // Size of each df buffer row in bytes + unsigned long long df_buffer_n_rows; // Total number of rows in current df buffer + char *df_cursor; // Current position to write to in df buffer + char *df_buffer; // Head of df buffer PyStructSequence_Desc namedtuple_desc; - int unbuffered; + int unbuffered; // Are we running in unbuffered mode? + int is_eof; // Have we hit the eof packet yet? } StateObject; static void read_options(MySQLAccelOptions *options, PyObject *dict); @@ -466,8 +468,9 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { PyObject *py_res = NULL; PyObject *py_converters = NULL; PyObject *py_options = NULL; + unsigned long long requested_n_rows = 0; - if (!PyArg_ParseTuple(args, "O", &py_res)) { + if (!PyArg_ParseTuple(args, "OK", &py_res, &requested_n_rows)) { return -1; } @@ -639,14 +642,24 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { if (!self->py_array_def || !self->py_array_args || !self->py_array_kwds) goto error; // Setup dataframe buffer. - self->df_buffer_n_rows = (self->unbuffered) ? 1 : 500; self->df_buffer_row_size = compute_row_size(self); + if (requested_n_rows) { + self->df_buffer_n_rows = requested_n_rows; + } else if (self->unbuffered) { + self->df_buffer_n_rows = 1; + } else if (self->df_buffer_row_size > 10e6) { + self->df_buffer_n_rows = 1; + } else { + self->df_buffer_n_rows = 10e6 / self->df_buffer_row_size; + } self->df_buffer = calloc(self->df_buffer_row_size, self->df_buffer_n_rows); if (!self->df_buffer) goto error; self->df_cursor = self->df_buffer; // Construct the array to use for every fetch (it's reused each time). + self->n_rows_in_batch = self->df_buffer_n_rows; self->py_rows = build_array(self); + self->n_rows_in_batch = 0; if (!self->py_rows) goto error; PyObject_SetAttrString(py_res, "rows", self->py_rows); @@ -671,11 +684,6 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { self->py_rows = PyList_New(0); if (!self->py_rows) goto error; - // Unbuffered results always have exactly 1 row. - if (self->unbuffered) { - PyList_Append(self->py_rows, Py_None); - } - PyObject_SetAttrString(py_res, "rows", self->py_rows); } @@ -690,6 +698,31 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { goto exit; } +static int State_reset_batch(StateObject *self, PyObject *py_res) { + int rc = 0; + PyObject *py_tmp = NULL; + + self->n_rows_in_batch = 0; + + switch (self->options.output_type) { + case MYSQLSV_OUT_PANDAS: + case MYSQLSV_OUT_NUMPY: + break; + default: + py_tmp = self->py_rows; + self->py_rows = PyList_New(0); + Py_XDECREF(py_tmp); + if (!self->py_rows) { rc = -1; goto error; } + rc = PyObject_SetAttrString(py_res, "rows", self->py_rows); + } + +exit: + return rc; + +error: + goto exit; +} + static PyTypeObject StateType = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "_pymysqlsv.State", @@ -1112,7 +1145,6 @@ static void read_length_coded_string( static void build_array_interface(StateObject *py_state) { PyObject *py_out = NULL; - PyObject *py_shape = NULL; PyObject *py_typestr = NULL; PyObject *py_descr = NULL; PyObject *py_descr_item = NULL; @@ -1122,13 +1154,6 @@ static void build_array_interface(StateObject *py_state) { py_out = PyDict_New(); if (!py_out) goto error; - py_shape = PyTuple_New(1); - if (!py_shape) goto error; - // Populated in build_array. - PyTuple_SetItem(py_shape, 0, PyLong_FromUnsignedLongLong(1)); - PyDict_SetItemString(py_out, "shape", py_shape); - Py_DECREF(py_shape); - py_typestr = PyUnicode_FromFormat("|V%llu", py_state->df_buffer_row_size); if (!py_typestr) goto error; PyDict_SetItemString(py_out, "typestr", py_typestr); @@ -1280,12 +1305,11 @@ static PyObject *build_array(StateObject *py_state) { py_array_def = PyDict_Copy(py_state->py_array_def); if (!py_array_def) goto error; - if (!py_state->unbuffered) { - PyObject *py_shape = PyTuple_New(1); - if (!py_shape) goto error; - PyTuple_SetItem(py_shape, 0, PyLong_FromUnsignedLongLong(py_state->n_rows)); - PyDict_SetItemString(py_array_def, "shape", py_shape); - } + PyObject *py_shape = PyTuple_New(1); + if (!py_shape) goto error; + PyTuple_SetItem(py_shape, 0, PyLong_FromUnsignedLongLong(py_state->n_rows_in_batch)); + PyDict_SetItemString(py_array_def, "shape", py_shape); + Py_CLEAR(py_shape); PyDict_SetItemString(py_array_def, "data", py_data); Py_CLEAR(py_data); @@ -1449,6 +1473,9 @@ static void read_dataframe_row_from_packet( Py_DECREF(str); str = NULL; if (!item) goto error; + // Free previous value if we are reusing a buffer. + Py_XDECREF(*(PyObject**)loc); + *(PyObject**)loc = item; } loc += sizeof(PyObject*); @@ -1626,6 +1653,9 @@ static void read_dataframe_row_from_packet( if (!item) goto error; } + // Free previous value if we are reusing a buffer. + Py_XDECREF(*(PyObject**)loc); + *(PyObject**)loc = item; loc += sizeof(PyObject*); @@ -1921,28 +1951,35 @@ static PyObject *read_obj_row_from_packet( } static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *kwargs) { + int rc = 0; StateObject *py_state = NULL; PyObject *py_res = NULL; PyObject *py_out = NULL; PyObject *py_next_seq_id = NULL; - int is_eof = 0; + PyObject *py_zero = PyLong_FromUnsignedLong(0); + unsigned long long requested_n_rows = 0; + unsigned long long row_idx = 0; + char *keywords[] = {"result", "size", NULL}; // Parse function args. - if (!PyArg_ParseTuple(args, "O", &py_res)) { + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|K", keywords, &py_res, &requested_n_rows)) { goto error; } // Get the rowdata state. py_state = (StateObject*)PyObject_GetAttrString(py_res, "_state"); if (!py_state) { - int rc = 0; - PyErr_Clear(); - PyObject *py_args = PyTuple_New(1); + PyObject *py_requested_n_rows = PyLong_FromUnsignedLongLong(requested_n_rows); + if (!py_requested_n_rows) goto error; + + PyObject *py_args = PyTuple_New(2); if (!py_args) goto error; PyTuple_SET_ITEM(py_args, 0, py_res); + PyTuple_SET_ITEM(py_args, 1, py_requested_n_rows); Py_INCREF(py_res); + Py_INCREF(py_requested_n_rows); py_state = (StateObject*)State_new(&StateType, py_args, NULL); if (!py_state) { Py_DECREF(py_args); goto error; } @@ -1952,10 +1989,19 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k PyObject_SetAttrString(py_res, "_state", (PyObject*)py_state); } - // We are depending on the res._state variable to hold a ref count. - Py_DECREF(py_state); + else if (requested_n_rows > 0) { + State_reset_batch(py_state, py_res); + } - while (1) { + if (requested_n_rows == 0) { + requested_n_rows = UINTMAX_MAX; + } + + if (py_state->is_eof) { + goto exit; + } + + while (row_idx < requested_n_rows) { PyObject *py_buff = read_packet(py_state); if (!py_buff) goto error; @@ -1966,94 +2012,80 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k int has_next = 0; if (check_packet_is_eof(&data, &data_l, &warning_count, &has_next)) { - is_eof = 1; + Py_CLEAR(py_buff); + + py_state->is_eof = 1; PyObject *py_long = NULL; py_long = PyLong_FromUnsignedLongLong(warning_count); - if (!py_long) goto error; - PyObject_SetAttrString(py_res, "warning_count", py_long); - Py_DECREF(py_long); + PyObject_SetAttrString(py_res, "warning_count", py_long ? py_long : 0); + Py_CLEAR(py_long); py_long = PyLong_FromLong(has_next); - if (!py_long) goto error; - PyObject_SetAttrString(py_res, "has_next", py_long); - Py_DECREF(py_long); + PyObject_SetAttrString(py_res, "has_next", py_long ? py_long : 0); + Py_CLEAR(py_long); PyObject_SetAttrString(py_res, "connection", Py_None); - - // Hold a reference until the end of this function. - Py_INCREF(py_state); - PyObject_DelAttrString(py_res, "_state"); - - Py_DECREF(py_buff); - - if (py_state->unbuffered) { - PyObject_SetAttrString(py_res, "unbuffered_active", Py_False); - PyObject_SetAttrString(py_res, "rows", Py_None); - goto exit; - } + PyObject_SetAttrString(py_res, "unbuffered_active", Py_False); break; } - py_state->n_rows += 1; + py_state->n_rows++; + py_state->n_rows_in_batch++; switch (py_state->options.output_type) { case MYSQLSV_OUT_PANDAS: case MYSQLSV_OUT_NUMPY: + // Add to df_buffer size as needed. if (!py_state->unbuffered && py_state->n_rows >= py_state->df_buffer_n_rows) { py_state->df_buffer_n_rows *= 1.7; py_state->df_buffer = realloc(py_state->df_buffer, py_state->df_buffer_row_size * py_state->df_buffer_n_rows); + if (!py_state->df_buffer) { Py_CLEAR(py_buff); goto error; } py_state->df_cursor = py_state->df_buffer + py_state->df_buffer_row_size * py_state->n_rows; } read_dataframe_row_from_packet(py_state, data, data_l); - if (py_state->unbuffered) { - py_state->df_cursor = py_state->df_buffer; - } else { - py_state->df_cursor += py_state->df_buffer_row_size; - } + py_state->df_cursor += py_state->df_buffer_row_size; break; default: py_row = read_obj_row_from_packet(py_state, data, data_l); - if (!py_row) { Py_DECREF(py_buff); goto error; } + if (!py_row) { Py_CLEAR(py_buff); goto error; } - if (py_state->unbuffered) { - PyList_SetItem(py_state->py_rows, 0, py_row); - } else { - PyList_Append(py_state->py_rows, py_row); - Py_DECREF(py_row); - } + rc = PyList_Append(py_state->py_rows, py_row); + if (rc != 0) { Py_CLEAR(py_buff); goto error; } + Py_DECREF(py_row); } - Py_DECREF(py_buff); + row_idx++; - if (py_state->unbuffered) break; + Py_CLEAR(py_buff); } switch (py_state->options.output_type) { case MYSQLSV_OUT_PANDAS: case MYSQLSV_OUT_NUMPY: // Resize the buffer down to be only the required amount needed. - if (!py_state->unbuffered) { + if (py_state->n_rows_in_batch > row_idx) { py_state->df_buffer = realloc(py_state->df_buffer, - py_state->df_buffer_row_size * py_state->n_rows); + py_state->df_buffer_row_size * py_state->n_rows_in_batch); + if (!py_state->df_buffer) goto error; py_state->df_cursor = py_state->df_buffer + - py_state->df_buffer_row_size * py_state->n_rows; + py_state->df_buffer_row_size * py_state->n_rows_in_batch; PyObject *py_tmp = py_state->py_rows; py_state->py_rows = build_array(py_state); - if (!py_state->py_rows) goto error; Py_DECREF(py_tmp); - PyObject_SetAttrString(py_res, "rows", py_state->py_rows); + if (!py_state->py_rows) goto error; + rc = PyObject_SetAttrString(py_res, "rows", py_state->py_rows); + if (rc != 0) goto error; } } exit: - if (!py_state) return NULL; py_next_seq_id = PyLong_FromUnsignedLongLong(py_state->next_seq_id); @@ -2064,25 +2096,28 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k py_out = NULL; if (py_state->unbuffered) { - if (is_eof) { + if (py_state->is_eof && row_idx == 0) { Py_INCREF(Py_None); py_out = Py_None; + PyObject_SetAttrString(py_res, "rows", Py_None); PyObject *py_n_rows = PyLong_FromSsize_t(py_state->n_rows); PyObject_SetAttrString(py_res, "affected_rows", (py_n_rows) ? py_n_rows : Py_None); Py_XDECREF(py_n_rows); + PyObject_DelAttrString(py_res, "_state"); + Py_CLEAR(py_state); } else { switch (py_state->options.output_type) { case MYSQLSV_OUT_PANDAS: case MYSQLSV_OUT_NUMPY: - // TODO: Return single row for fetchone. - py_out = py_state->py_rows; - Py_INCREF(py_out); + py_out = (requested_n_rows == 1) ? + PyObject_GetItem(py_state->py_rows, py_zero) : py_state->py_rows; break; default: - py_out = PyList_GetItem(py_state->py_rows, 0); - Py_INCREF(py_out); + py_out = (requested_n_rows == 1) ? + PyList_GetItem(py_state->py_rows, 0) : py_state->py_rows; } + Py_XINCREF(py_out); } } else { @@ -2091,11 +2126,13 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k PyObject *py_n_rows = PyLong_FromSsize_t(py_state->n_rows); PyObject_SetAttrString(py_res, "affected_rows", (py_n_rows) ? py_n_rows : Py_None); Py_XDECREF(py_n_rows); + if (py_state->is_eof) { + PyObject_DelAttrString(py_res, "_state"); + Py_CLEAR(py_state); + } } - if (is_eof) { - Py_DECREF(py_state); - } + Py_XDECREF(py_zero); return py_out; From ea0ae59966cfb0a435629db3c7be4d07e02d94d7 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 1 Sep 2022 12:41:14 -0500 Subject: [PATCH 09/12] Remove numpy support for now; clean up API --- README.md | 24 ++ pymysql/connections.py | 26 +- src/accel.c | 846 ++--------------------------------------- 3 files changed, 76 insertions(+), 820 deletions(-) diff --git a/README.md b/README.md index a0330b051..56bd3b9e5 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,30 @@ It increases the perforance of PyMySQL about 10-15%, which still leaves it at the second slowest client. It is also based on a PyMySQL codebase from years ago, so it does not contain any recent bug fixes or features of that project. +## How fast can it be? + +If it's still mostly based on PyMySQL, how fast can it really be? Here's one +benchmark. We uploaded +[this data file](http://studiotutorials.s3.amazonaws.com/eCommerce/2019-Dec.csv) +into a SingleStoreDB table six times to get a total of around 21 million rows of +data including one datetime column, one float column, and 8 character columns. +We then used PyMySQL, MySQLdb, and PyMySQLsv to fetch the entire table with `fetchone`, +`fetchmany(20)`, and `fetchall` using both buffered and unbuffered cursors. +Here are the results. + +| | PyMySQL | MySQLdb | PyMySQLsv | +|--------------------------|---------|---------|-----------| +| Buffered fetchone | 224.8s | 50.6s | 19.9s | +| Buffered fetchmany(20) | 217.63s | 50.3s | 15.5s | +| Buffered fetchall | 217.9s | 49.6s | 14.8s | +| Unbuffered fetchone | 230.5s | 48.3s | 25.3s | +| Unbuffered fetchmany(20) | 224.0s | 35.0s | 14.6s | +| Unbuffered fetchall | 232.4s | 37.7s | 29.2s | + +As you can see the gains are quite significant for this test case. Even MySQLdb, +which is based on the MySQL libraries takes twice as long in all but one +of the categories. + ## Install This package installs just like any other Python package. Since it includes a C diff --git a/pymysql/connections.py b/pymysql/connections.py index ca9d7ead5..09bd3aeee 100644 --- a/pymysql/connections.py +++ b/pymysql/connections.py @@ -167,7 +167,6 @@ class Connection: :param named_pipe: Not supported. :param db: **DEPRECATED** Alias for database. :param passwd: **DEPRECATED** Alias for password. - :param output_type: Type of result to return: tuples, namedtuples, dicts, numpy or pandas. :param parse_json: Parse JSON values into Python objects? :param invalid_date_value: Value to use in place of an invalid date. By default, a string containing the invalid content is returned. @@ -223,7 +222,6 @@ def __init__( ssl_key=None, ssl_verify_cert=None, ssl_verify_identity=None, - output_type='tuples', parse_json=False, invalid_date_value=UNSET, invalid_time_value=UNSET, @@ -347,8 +345,7 @@ def _config(key, arg): self.client_flag = client_flag self.pure_python = pure_python - self.unbuffered = False - self.output_type = output_type + self.output_type = 'tuples' self.cursorclass = cursorclass self.resultclass = MySQLResult @@ -357,12 +354,10 @@ def _config(key, arg): self.resultclass = MySQLResultSV if self.cursorclass is SSCursor: self.cursorclass = SSCursorSV - self.unbuffered = True elif self.cursorclass is DictCursor: self.output_type = 'dicts' elif self.cursorclass is SSDictCursor: self.cursorclass = SSDictCursorSV - self.unbuffered = True self.output_type = 'dicts' self._result = None @@ -602,11 +597,11 @@ def query(self, sql, unbuffered=False): if isinstance(sql, str): sql = sql.encode(self.encoding, "surrogateescape") self._execute_command(COMMAND.COM_QUERY, sql) - self._affected_rows = self._read_query_result(unbuffered=unbuffered or self.unbuffered) + self._affected_rows = self._read_query_result(unbuffered=unbuffered) return self._affected_rows def next_result(self, unbuffered=False): - self._affected_rows = self._read_query_result(unbuffered=unbuffered or self.unbuffered) + self._affected_rows = self._read_query_result(unbuffered=unbuffered) return self._affected_rows def affected_rows(self): @@ -822,10 +817,9 @@ def _write_bytes(self, data): def _read_query_result(self, unbuffered=False): self._result = None - if unbuffered or self.unbuffered: + if unbuffered: try: - result = self.resultclass(self) - result.init_unbuffered_query() + result = self.resultclass(self, unbuffered=unbuffered) except: result.unbuffered_active = False result.connection = None @@ -1188,7 +1182,7 @@ def get_server_info(self): class MySQLResult: - def __init__(self, connection): + def __init__(self, connection, unbuffered=False): """ :type connection: Connection """ @@ -1203,6 +1197,8 @@ def __init__(self, connection): self.rows = None self.has_next = None self.unbuffered_active = False + if unbuffered: + self.init_unbuffered_query() def __del__(self): if self.unbuffered_active: @@ -1399,8 +1395,8 @@ def _get_descriptions(self): self.description = tuple(description) class MySQLResultSV(MySQLResult): - def __init__(self, connection): - MySQLResult.__init__(self, connection) + def __init__(self, connection, unbuffered=False): + MySQLResult.__init__(self, connection, unbuffered=unbuffered) self.options = {k: v for k, v in dict( default_converters=converters.decoders, output_type=connection.output_type, @@ -1408,7 +1404,7 @@ def __init__(self, connection): invalid_date_value=connection.invalid_date_value, invalid_time_value=connection.invalid_time_value, invalid_datetime_value=connection.invalid_datetime_value, - unbuffered=connection.unbuffered, + unbuffered=unbuffered, ).items() if v is not UNSET} self._read_rowdata_packet = functools.partial(_pymysqlsv.read_rowdata_packet, self) self._read_rowdata_packet_unbuffered = functools.partial(_pymysqlsv.read_rowdata_packet, self) diff --git a/src/accel.c b/src/accel.c index 8f20cdef3..842a3a17e 100644 --- a/src/accel.c +++ b/src/accel.c @@ -8,8 +8,6 @@ #define MYSQLSV_OUT_TUPLES 0 #define MYSQLSV_OUT_NAMEDTUPLES 1 #define MYSQLSV_OUT_DICTS 2 -#define MYSQLSV_OUT_NUMPY 3 -#define MYSQLSV_OUT_PANDAS 4 #define MYSQL_FLAG_NOT_NULL 1 #define MYSQL_FLAG_PRI_KEY 2 @@ -70,9 +68,6 @@ // 2**24 - 1 #define MYSQL_MAX_PACKET_LEN 16777215 -#define EPOCH_TO_DAYS 719528 -#define SECONDS_PER_DAY (24 * 60 * 60) - #define MYSQLSV_OPTION_TIME_TYPE_TIMEDELTA 0 #define MYSQLSV_OPTION_TIME_TYPE_TIME 1 #define MYSQLSV_OPTION_JSON_TYPE_STRING 0 @@ -264,88 +259,6 @@ typedef struct { inline int IMAX(int a, int b) { return((a) > (b) ? a : b); } inline int IMIN(int a, int b) { return((a) < (b) ? a : b); } -// -// Array -// - -typedef struct { - PyObject_HEAD - PyObject *array_interface; -} ArrayObject; - -static void Array_dealloc(ArrayObject *self) { - // Numpy arrays take ownership of our memory. This happens because we are - // using the Python API to create our array, rather than the numpy C API. -#if 0 - if (self->array_interface && PyDict_Check(self->array_interface)) { - PyObject *data = PyDict_GetItemString(self->array_interface, "data"); - if (data) { - PyObject *buffer = PyTuple_GetItem(data, 0); - if (buffer) { - free((char*)PyLong_AsVoidPtr(buffer)); - } - } - } -#endif - Py_CLEAR(self->array_interface); - Py_TYPE(self)->tp_free((PyObject*)self); -} - -static PyObject *Array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - ArrayObject *self = (ArrayObject*)type->tp_alloc(type, 0); - if (self) { - self->array_interface = Py_None; - Py_INCREF(Py_None); - } - return (PyObject*)self; -} - -static int Array_init(ArrayObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"array_interface", NULL}; - PyObject *array_interface = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &array_interface)) { - return -1; - } - - if (array_interface) { - PyObject *tmp = self->array_interface; - Py_INCREF(array_interface); - self->array_interface = array_interface; - Py_DECREF(tmp); - } - - return 0; -} - -static PyObject *Array_get__array_interface__(ArrayObject *self, void *closure) { - Py_INCREF(self->array_interface); - return self->array_interface; -} - -static PyGetSetDef Array_getsetters[] = { - {"__array_interface__", (getter)Array_get__array_interface__, - (setter)NULL, "array interface", NULL}, - {NULL} -}; - -static PyTypeObject ArrayType = { - PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "_pymysqlsv.Array", - .tp_doc = PyDoc_STR("Array manager"), - .tp_basicsize = sizeof(ArrayObject), - .tp_itemsize = 0, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - .tp_new = Array_new, - .tp_init = (initproc)Array_init, - .tp_dealloc = (destructor)Array_dealloc, - .tp_getset = Array_getsetters, -}; - -// -// End Array -// - // // State // @@ -367,13 +280,6 @@ typedef struct { PyObject **py_converters; // List of converter functions PyObject **py_names; // Column names PyObject *py_default_converters; // Dict of default converters - PyObject *py_numpy_mod; // Numpy module - PyObject *py_numpy_array; // numpy.array - PyObject *py_pandas_mod; // pandas module - PyObject *py_pandas_dataframe; // pandas.DataFrame - PyObject *py_array_def; // numpy array definition - PyObject *py_array_args; // Positional args for numpy.array - PyObject *py_array_kwds; // Keyword args for numpy.array PyTypeObject *namedtuple; // Generated namedtuple type PyObject **py_encodings; // Encoding for each column as Python string const char **encodings; // Encoding for each column @@ -386,26 +292,21 @@ typedef struct { unsigned long *offsets; // Column offsets in buffer unsigned long long next_seq_id; // MySQL packet sequence number MySQLAccelOptions options; // Packet reader options - unsigned long long df_buffer_row_size; // Size of each df buffer row in bytes - unsigned long long df_buffer_n_rows; // Total number of rows in current df buffer - char *df_cursor; // Current position to write to in df buffer - char *df_buffer; // Head of df buffer PyStructSequence_Desc namedtuple_desc; int unbuffered; // Are we running in unbuffered mode? int is_eof; // Have we hit the eof packet yet? + struct { + PyObject *_next_seq_id; + PyObject *rows; + } py_str; } StateObject; static void read_options(MySQLAccelOptions *options, PyObject *dict); -static unsigned long long compute_row_size(StateObject *py_state); -static void build_array_interface(StateObject *py_state); -static PyObject *build_array(StateObject *py_state); #define DESTROY(x) do { if (x) { free(x); (x) = NULL; } } while (0) static void State_clear_fields(StateObject *self) { if (!self) return; - self->df_cursor = NULL; - self->df_buffer = NULL; DESTROY(self->namedtuple_desc.fields); DESTROY(self->offsets); DESTROY(self->scales); @@ -430,13 +331,6 @@ static void State_clear_fields(StateObject *self) { } DESTROY(self->py_encodings); } - Py_CLEAR(self->py_array_def); - Py_CLEAR(self->py_numpy_mod); - Py_CLEAR(self->py_numpy_array); - Py_CLEAR(self->py_pandas_mod); - Py_CLEAR(self->py_pandas_dataframe); - Py_CLEAR(self->py_array_args); - Py_CLEAR(self->py_array_kwds); Py_CLEAR(self->namedtuple); Py_CLEAR(self->py_default_converters); Py_CLEAR(self->py_settimeout); @@ -451,6 +345,9 @@ static void State_clear_fields(StateObject *self) { Py_CLEAR(self->py_decimal_mod); Py_CLEAR(self->py_fields); Py_CLEAR(self->py_conn); + + Py_CLEAR(self->py_str._next_seq_id); + Py_CLEAR(self->py_str.rows); } static void State_dealloc(StateObject *self) { @@ -501,6 +398,12 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { Py_XDECREF(unbuffered_active); } + // Create reused strings. + self->py_str._next_seq_id = PyUnicode_FromString("_next_seq_id"); + if (!self->py_str._next_seq_id) goto error; + self->py_str.rows = PyUnicode_FromString("rows"); + if (!self->py_str.rows) goto error; + // Import decimal module. self->py_decimal_mod = PyImport_ImportModule("decimal"); if (!self->py_decimal_mod) goto error; @@ -611,7 +514,7 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { self->py_read = PyObject_GetAttrString(self->py_rfile, "read"); if (!self->py_read) goto error; - PyObject *py_next_seq_id = PyObject_GetAttrString(self->py_conn, "_next_seq_id"); + PyObject *py_next_seq_id = PyObject_GetAttr(self->py_conn, self->py_str._next_seq_id); if (!py_next_seq_id) goto error; self->next_seq_id = PyLong_AsUnsignedLongLong(py_next_seq_id); Py_XDECREF(py_next_seq_id); @@ -621,50 +524,6 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { } switch (self->options.output_type) { - case MYSQLSV_OUT_PANDAS: - // Import pandas module. - self->py_pandas_mod = PyImport_ImportModule("pandas"); - if (!self->py_pandas_mod) goto error; - self->py_pandas_dataframe = PyObject_GetAttrString(self->py_pandas_mod, "DataFrame"); - if (!self->py_pandas_dataframe) goto error; - - // Fall through - - case MYSQLSV_OUT_NUMPY: - // Import numpy module. - self->py_numpy_mod = PyImport_ImportModule("numpy"); - if (!self->py_numpy_mod) goto error; - self->py_numpy_array = PyObject_GetAttrString(self->py_numpy_mod, "array"); - if (!self->py_numpy_array) goto error; - - // Build array interface arguments. - build_array_interface(self); - if (!self->py_array_def || !self->py_array_args || !self->py_array_kwds) goto error; - - // Setup dataframe buffer. - self->df_buffer_row_size = compute_row_size(self); - if (requested_n_rows) { - self->df_buffer_n_rows = requested_n_rows; - } else if (self->unbuffered) { - self->df_buffer_n_rows = 1; - } else if (self->df_buffer_row_size > 10e6) { - self->df_buffer_n_rows = 1; - } else { - self->df_buffer_n_rows = 10e6 / self->df_buffer_row_size; - } - self->df_buffer = calloc(self->df_buffer_row_size, self->df_buffer_n_rows); - if (!self->df_buffer) goto error; - self->df_cursor = self->df_buffer; - - // Construct the array to use for every fetch (it's reused each time). - self->n_rows_in_batch = self->df_buffer_n_rows; - self->py_rows = build_array(self); - self->n_rows_in_batch = 0; - if (!self->py_rows) goto error; - - PyObject_SetAttrString(py_res, "rows", self->py_rows); - break; - case MYSQLSV_OUT_NAMEDTUPLES: self->namedtuple_desc.name = "Row"; self->namedtuple_desc.doc = "Row of data values"; @@ -681,10 +540,16 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { // Fall through default: - self->py_rows = PyList_New(0); + // For fetchone, reuse the same list every time. + if (requested_n_rows == 1) { + self->py_rows = PyList_New(1); + PyList_SetItem(self->py_rows, 0, Py_None); + } else { + self->py_rows = PyList_New(0); + } if (!self->py_rows) goto error; - PyObject_SetAttrString(py_res, "rows", self->py_rows); + PyObject_SetAttr(py_res, self->py_str.rows, self->py_rows); } exit: @@ -698,22 +563,22 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { goto exit; } -static int State_reset_batch(StateObject *self, PyObject *py_res) { +static int State_reset_batch( + StateObject *self, + PyObject *py_res, + unsigned long long requested_n_rows +) { int rc = 0; PyObject *py_tmp = NULL; self->n_rows_in_batch = 0; - switch (self->options.output_type) { - case MYSQLSV_OUT_PANDAS: - case MYSQLSV_OUT_NUMPY: - break; - default: + if (requested_n_rows != 1) { py_tmp = self->py_rows; self->py_rows = PyList_New(0); Py_XDECREF(py_tmp); if (!self->py_rows) { rc = -1; goto error; } - rc = PyObject_SetAttrString(py_res, "rows", self->py_rows); + rc = PyObject_SetAttr(py_res, self->py_str.rows, self->py_rows); } exit: @@ -756,12 +621,6 @@ static void read_options(MySQLAccelOptions *options, PyObject *dict) { PyUnicode_CompareWithASCIIString(value, "namedtuples") == 0) { options->output_type = MYSQLSV_OUT_NAMEDTUPLES; } - else if (PyUnicode_CompareWithASCIIString(value, "numpy") == 0) { - options->output_type = MYSQLSV_OUT_NUMPY; - } - else if (PyUnicode_CompareWithASCIIString(value, "pandas") == 0) { - options->output_type = MYSQLSV_OUT_PANDAS; - } else { options->output_type = MYSQLSV_OUT_TUPLES; } @@ -777,52 +636,6 @@ static void read_options(MySQLAccelOptions *options, PyObject *dict) { } } -// mysql, for whatever reason, treats 0 as an actual year, but not -// a leap year -// -static int is_leap_year(int year) -{ - return (year % 4) == 0 && year != 0 && ((year % 100) != 0 || (year % 400) == 0); -} - -static int days_in_previous_months(int month, int year) -{ - static const int previous_days[13] = - { - -31, - 0, - 31, - 31 + 28, - 31 + 28 + 31, - 31 + 28 + 31 + 30, - 31 + 28 + 31 + 30 + 31, - 31 + 28 + 31 + 30 + 31 + 30, - 31 + 28 + 31 + 30 + 31 + 30 + 31, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30, - }; - return previous_days[month] + (month > 2 && is_leap_year(year)); -} - -// NOTE: year 0 does not actually exist, but mysql pretends it does (and is NOT -// a leap year) -// -static int leap_years_before(int year) -{ - return (year - 1) / 4 - (year - 1) / 100 + (year - 1) / 400; -} - -static int days_in_previous_years(int year) -{ - return 365 * year + leap_years_before(year); -} - -static int64_t to_days(int year, int month, int day) { - return days_in_previous_years(year) + days_in_previous_months(month, year) + day; -} - static void raise_exception( PyObject *self, char *err_type, @@ -1143,539 +956,7 @@ static void read_length_coded_string( return; } -static void build_array_interface(StateObject *py_state) { - PyObject *py_out = NULL; - PyObject *py_typestr = NULL; - PyObject *py_descr = NULL; - PyObject *py_descr_item = NULL; - PyObject *py_type = NULL; - PyObject *py_array = NULL; - - py_out = PyDict_New(); - if (!py_out) goto error; - - py_typestr = PyUnicode_FromFormat("|V%llu", py_state->df_buffer_row_size); - if (!py_typestr) goto error; - PyDict_SetItemString(py_out, "typestr", py_typestr); - Py_DECREF(py_typestr); - - py_descr = PyList_New(py_state->n_cols); - if (!py_descr) goto error; - PyDict_SetItemString(py_out, "descr", py_descr); - Py_DECREF(py_descr); - - for (unsigned long i = 0; i < py_state->n_cols; i++) { - py_descr_item = PyTuple_New(2); - if (!py_descr_item) goto error; - - PyList_SetItem(py_descr, i, py_descr_item); - - PyTuple_SetItem(py_descr_item, 0, py_state->py_names[i]); - Py_INCREF(py_state->py_names[i]); - - switch (py_state->type_codes[i]) { - case MYSQL_TYPE_NEWDECIMAL: - case MYSQL_TYPE_DECIMAL: - py_type = PyUnicode_FromString("|O"); - break; - - case MYSQL_TYPE_TINY: - if (py_state->flags[i] & MYSQL_FLAG_UNSIGNED) { - py_type = PyUnicode_FromString("flags[i] & MYSQL_FLAG_UNSIGNED) { - py_type = PyUnicode_FromString("flags[i] & MYSQL_FLAG_UNSIGNED) { - py_type = PyUnicode_FromString("flags[i] & MYSQL_FLAG_UNSIGNED) { - py_type = PyUnicode_FromString("type_codes[i], NULL); - goto error; - } - - if (!py_type) goto error; - - PyTuple_SetItem(py_descr_item, 1, py_type); - - py_descr_item = NULL; - } - - py_state->py_array_def = py_out; - - py_state->py_array_args = PyTuple_New(1); - if (!py_state->py_array_args) goto error; - PyTuple_SetItem(py_state->py_array_args, 0, py_array); - py_state->py_array_kwds = PyDict_New(); - PyDict_SetItemString(py_state->py_array_kwds, "copy", Py_False); - -exit: - return; - -error: - Py_CLEAR(py_state->py_array_def); - Py_CLEAR(py_state->py_array_args); - Py_CLEAR(py_state->py_array_kwds); - goto exit; -} - -static PyObject *build_array(StateObject *py_state) { - PyObject *py_out = NULL; - PyObject *py_data = NULL; - PyObject *py_array = NULL; - PyObject *py_array_def = NULL; - PyObject *py_args = NULL; - - py_data = PyTuple_New(2); - if (!py_data) goto error; - - PyTuple_SetItem(py_data, 0, PyLong_FromVoidPtr(py_state->df_buffer)); - PyTuple_SetItem(py_data, 1, Py_False); - Py_INCREF(Py_False); - - py_array_def = PyDict_Copy(py_state->py_array_def); - if (!py_array_def) goto error; - - PyObject *py_shape = PyTuple_New(1); - if (!py_shape) goto error; - PyTuple_SetItem(py_shape, 0, PyLong_FromUnsignedLongLong(py_state->n_rows_in_batch)); - PyDict_SetItemString(py_array_def, "shape", py_shape); - Py_CLEAR(py_shape); - - PyDict_SetItemString(py_array_def, "data", py_data); - Py_CLEAR(py_data); - - py_args = PyTuple_New(1); - if (!py_args) goto error; - PyTuple_SetItem(py_args, 0, py_array_def); - py_array_def = NULL; - py_array = Array_new(&ArrayType, py_args, NULL); - if (!py_array) goto error; - Array_init((ArrayObject*)py_array, py_args, NULL); - - PyTuple_SetItem(py_state->py_array_args, 0, py_array); - - py_out = PyObject_Call(py_state->py_numpy_array, py_state->py_array_args, - py_state->py_array_kwds); - if (!py_out) goto error; - - if (py_state->options.output_type == MYSQLSV_OUT_PANDAS) { - PyObject *py_tmp = py_out; - py_out = PyObject_CallFunctionObjArgs(py_state->py_pandas_dataframe, py_out, NULL); - if (!py_out) goto error; - Py_DECREF(py_tmp); - } - -exit: - Py_XDECREF(py_args); - return py_out; - -error: - Py_CLEAR(py_out); - goto exit; -} - -static unsigned long long compute_row_size(StateObject *py_state) { - unsigned long long row_size = 0; - - for (unsigned long i = 0; i < py_state->n_cols; i++) { - switch (py_state->type_codes[i]) { - case MYSQL_TYPE_NEWDECIMAL: - case MYSQL_TYPE_DECIMAL: - row_size += sizeof(PyObject*); - break; - - case MYSQL_TYPE_TINY: - row_size += sizeof(int8_t); - break; - - case MYSQL_TYPE_SHORT: - row_size += sizeof(int16_t); - break; - - case MYSQL_TYPE_INT24: - case MYSQL_TYPE_LONG: - row_size += sizeof(int32_t); - break; - - case MYSQL_TYPE_LONGLONG: - row_size += sizeof(int64_t); - break; - - case MYSQL_TYPE_FLOAT: - row_size += sizeof(float); - break; - - case MYSQL_TYPE_DOUBLE: - row_size += sizeof(double); - break; - - case MYSQL_TYPE_NULL: - row_size += sizeof(PyObject*); - break; - - case MYSQL_TYPE_DATETIME: - case MYSQL_TYPE_TIMESTAMP: - row_size += sizeof(int64_t); - break; - - case MYSQL_TYPE_NEWDATE: - case MYSQL_TYPE_DATE: - row_size += sizeof(int64_t); - break; - - case MYSQL_TYPE_TIME: - row_size += sizeof(int64_t); - break; - - case MYSQL_TYPE_YEAR: - row_size += sizeof(int16_t); - break; - - case MYSQL_TYPE_BIT: - case MYSQL_TYPE_JSON: - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_LONG_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_GEOMETRY: - case MYSQL_TYPE_ENUM: - case MYSQL_TYPE_SET: - case MYSQL_TYPE_VARCHAR: - case MYSQL_TYPE_VAR_STRING: - case MYSQL_TYPE_STRING: - row_size += sizeof(PyObject*); - break; - - default: - PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", - py_state->type_codes[i], NULL); - return 0; - } - } - - return row_size; -} - -static void read_dataframe_row_from_packet( - StateObject *py_state, - char *data, - unsigned long long data_l -) { - char *out = NULL; - unsigned long long out_l = 0; - int is_null = 0; - PyObject *item = NULL; - PyObject *str = NULL; - char *end = NULL; - char *loc = py_state->df_cursor; - - int sign = 1; - int year = 0; - int month = 0; - int day = 0; - int hour = 0; - int minute = 0; - int second = 0; - int microsecond = 0; - - float float_nan = nanf(""); - double double_nan = nan(""); - - if (!py_state->df_cursor) goto error; - - for (unsigned long i = 0; i < py_state->n_cols; i++) { - - read_length_coded_string(&data, &data_l, &out, &out_l, &is_null); - end = &out[out_l]; - - switch (py_state->type_codes[i]) { - case MYSQL_TYPE_NEWDECIMAL: - case MYSQL_TYPE_DECIMAL: - if (is_null) { - *(PyObject**)loc = Py_None; - Py_INCREF(Py_None); - } else { - str = NULL; - str = PyUnicode_Decode(out, out_l, py_state->encodings[i], "strict"); - if (!str) goto error; - - item = PyObject_CallFunctionObjArgs(py_state->py_decimal, str, NULL); - Py_DECREF(str); str = NULL; - if (!item) goto error; - - // Free previous value if we are reusing a buffer. - Py_XDECREF(*(PyObject**)loc); - - *(PyObject**)loc = item; - } - loc += sizeof(PyObject*); - break; - - case MYSQL_TYPE_TINY: - if (py_state->flags[i] & MYSQL_FLAG_UNSIGNED) { - *(uint8_t*)loc = (is_null) ? 0 : (uint8_t)strtoul(out, &end, 10); - } else { - *(int8_t*)loc = (is_null) ? INT8_MIN : (int8_t)strtol(out, &end, 10); - } - loc += sizeof(int8_t); - break; - - case MYSQL_TYPE_SHORT: - if (py_state->flags[i] & MYSQL_FLAG_UNSIGNED) { - *(uint16_t*)loc = (is_null) ? 0 : (uint16_t)strtoul(out, &end, 10); - } else { - *(int16_t*)loc = (is_null) ? INT16_MIN : (int16_t)strtol(out, &end, 10); - } - loc += sizeof(int16_t); - break; - - case MYSQL_TYPE_INT24: - case MYSQL_TYPE_LONG: - if (py_state->flags[i] & MYSQL_FLAG_UNSIGNED) { - *(uint32_t*)loc = (is_null) ? 0 : (uint32_t)strtoul(out, &end, 10); - } else { - *(int32_t*)loc = (is_null) ? INT32_MIN : (int32_t)strtol(out, &end, 10); - } - loc += sizeof(int32_t); - break; - - case MYSQL_TYPE_LONGLONG: - if (py_state->flags[i] & MYSQL_FLAG_UNSIGNED) { - *(uint64_t*)loc = (is_null) ? 0 : (uint64_t)strtoul(out, &end, 10); - } else { - *(int64_t*)loc = (is_null) ? INT64_MIN : (int64_t)strtol(out, &end, 10); - } - loc += sizeof(int64_t); - break; - - case MYSQL_TYPE_FLOAT: - if (is_null) { - *(float*)loc = (float)float_nan; - } else { - *(float*)loc = (float)strtod(out, &end); - } - loc += sizeof(float); - break; - - case MYSQL_TYPE_DOUBLE: - if (is_null) { - *(double*)loc = (double)double_nan; - } else { - *(double*)loc = (double)strtod(out, &end); - } - loc += sizeof(double); - break; - - case MYSQL_TYPE_NULL: - *(PyObject**)loc = Py_None; - loc += sizeof(PyObject*); - break; - - case MYSQL_TYPE_DATETIME: - case MYSQL_TYPE_TIMESTAMP: - // TODO: Should use numpy's NaT - if (!CHECK_ANY_DATETIME_STR(out, out_l)) { - *(int64_t*)loc = (int64_t)(INT64_MIN); - loc += sizeof(int64_t); - break; - } - year = CHR2INT4(out); out += 5; - month = CHR2INT2(out); out += 3; - day = CHR2INT2(out); out += 3; - hour = CHR2INT2(out); out += 3; - minute = CHR2INT2(out); out += 3; - second = CHR2INT2(out); out += 3; - microsecond = (IS_DATETIME_MICRO(out, out_l)) ? CHR2INT6(out) : - (IS_DATETIME_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; - *(int64_t*)loc = (int64_t)(((to_days(year, month, day) - EPOCH_TO_DAYS) - * SECONDS_PER_DAY + hour * 3600 + minute * 60 + second) - * 1e9 + microsecond * 1e3); - loc += sizeof(int64_t); - break; - - case MYSQL_TYPE_NEWDATE: - case MYSQL_TYPE_DATE: - if (!CHECK_DATE_STR(out, out_l)) { - *(int64_t*)loc = (int64_t)(INT64_MIN); - loc += sizeof(int64_t); - break; - } - year = CHR2INT4(out); out += 5; - month = CHR2INT2(out); out += 3; - day = CHR2INT2(out); out += 3; - *(int64_t*)loc = (int64_t)((to_days(year, month, day) - EPOCH_TO_DAYS) - * SECONDS_PER_DAY * 1e9); - loc += sizeof(int64_t); - break; - - case MYSQL_TYPE_TIME: - sign = CHECK_ANY_TIMEDELTA_STR(out, out_l); - if (!sign) { - *(int64_t*)loc = (int64_t)(INT64_MIN); - loc += sizeof(int64_t); - break; - } else if (sign < 0) { - out += 1; out_l -= 1; - } - if (IS_TIMEDELTA1(out, out_l)) { - hour = CHR2INT1(out); out += 2; - minute = CHR2INT2(out); out += 3; - second = CHR2INT2(out); out += 3; - microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : - (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; - } - else if (IS_TIMEDELTA2(out, out_l)) { - hour = CHR2INT2(out); out += 3; - minute = CHR2INT2(out); out += 3; - second = CHR2INT2(out); out += 3; - microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : - (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; - } - else if (IS_TIMEDELTA3(out, out_l)) { - hour = CHR2INT3(out); out += 4; - minute = CHR2INT2(out); out += 3; - second = CHR2INT2(out); out += 3; - microsecond = (IS_TIMEDELTA_MICRO(out, out_l)) ? CHR2INT6(out) : - (IS_TIMEDELTA_MILLI(out, out_l)) ? CHR2INT3(out) * 1e3 : 0; - } - *(int64_t*)loc = (int64_t)((hour * 3600 + minute * 60 + second) - * 1e9 + microsecond * 1e3) * sign; - loc += sizeof(int64_t); - break; - - case MYSQL_TYPE_YEAR: - if (out_l == 0) { - *(uint16_t*)loc = 0; - loc += sizeof(uint16_t); - break; - } - end = &out[out_l]; - *(uint16_t*)loc = (uint16_t)strtoul(out, &end, 10); - loc += sizeof(uint16_t); - break; - - case MYSQL_TYPE_BIT: - case MYSQL_TYPE_JSON: - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_LONG_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_GEOMETRY: - case MYSQL_TYPE_ENUM: - case MYSQL_TYPE_SET: - case MYSQL_TYPE_VARCHAR: - case MYSQL_TYPE_VAR_STRING: - case MYSQL_TYPE_STRING: - if (py_state->encodings[i] == NULL) { - item = PyBytes_FromStringAndSize(out, out_l); - if (!item) goto error; - break; - } - - item = PyUnicode_Decode(out, out_l, py_state->encodings[i], "strict"); - if (!item) goto error; - - // Parse JSON string. - if (py_state->type_codes[i] == MYSQL_TYPE_JSON && py_state->options.parse_json) { - str = item; - item = PyObject_CallFunctionObjArgs(py_state->py_json_loads, str, NULL); - Py_DECREF(str); str = NULL; - if (!item) goto error; - } - - // Free previous value if we are reusing a buffer. - Py_XDECREF(*(PyObject**)loc); - - *(PyObject**)loc = item; - loc += sizeof(PyObject*); - - break; - - default: - PyErr_Format(PyExc_TypeError, "Unknown type code: %ld", - py_state->type_codes[i], NULL); - goto error; - } - } - -exit: - return; - -error: - goto exit; -} - -static PyObject *read_obj_row_from_packet( +static PyObject *read_row_from_packet( StateObject *py_state, char *data, unsigned long long data_l @@ -1990,7 +1271,7 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k PyObject_SetAttrString(py_res, "_state", (PyObject*)py_state); } else if (requested_n_rows > 0) { - State_reset_batch(py_state, py_res); + State_reset_batch(py_state, py_res, requested_n_rows); } if (requested_n_rows == 0) { @@ -2035,62 +1316,28 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k py_state->n_rows++; py_state->n_rows_in_batch++; - switch (py_state->options.output_type) { - case MYSQLSV_OUT_PANDAS: - case MYSQLSV_OUT_NUMPY: - // Add to df_buffer size as needed. - if (!py_state->unbuffered && py_state->n_rows >= py_state->df_buffer_n_rows) { - py_state->df_buffer_n_rows *= 1.7; - py_state->df_buffer = realloc(py_state->df_buffer, - py_state->df_buffer_row_size * - py_state->df_buffer_n_rows); - if (!py_state->df_buffer) { Py_CLEAR(py_buff); goto error; } - py_state->df_cursor = py_state->df_buffer + - py_state->df_buffer_row_size * py_state->n_rows; - } - read_dataframe_row_from_packet(py_state, data, data_l); - py_state->df_cursor += py_state->df_buffer_row_size; - break; - - default: - py_row = read_obj_row_from_packet(py_state, data, data_l); - if (!py_row) { Py_CLEAR(py_buff); goto error; } + py_row = read_row_from_packet(py_state, data, data_l); + if (!py_row) { Py_CLEAR(py_buff); goto error; } + if (requested_n_rows == 1) { + rc = PyList_SetItem(py_state->py_rows, 0, py_row); + } else { rc = PyList_Append(py_state->py_rows, py_row); - if (rc != 0) { Py_CLEAR(py_buff); goto error; } Py_DECREF(py_row); } + if (rc != 0) { Py_CLEAR(py_buff); goto error; } row_idx++; Py_CLEAR(py_buff); } - switch (py_state->options.output_type) { - case MYSQLSV_OUT_PANDAS: - case MYSQLSV_OUT_NUMPY: - // Resize the buffer down to be only the required amount needed. - if (py_state->n_rows_in_batch > row_idx) { - py_state->df_buffer = realloc(py_state->df_buffer, - py_state->df_buffer_row_size * py_state->n_rows_in_batch); - if (!py_state->df_buffer) goto error; - py_state->df_cursor = py_state->df_buffer + - py_state->df_buffer_row_size * py_state->n_rows_in_batch; - PyObject *py_tmp = py_state->py_rows; - py_state->py_rows = build_array(py_state); - Py_DECREF(py_tmp); - if (!py_state->py_rows) goto error; - rc = PyObject_SetAttrString(py_res, "rows", py_state->py_rows); - if (rc != 0) goto error; - } - } - exit: if (!py_state) return NULL; py_next_seq_id = PyLong_FromUnsignedLongLong(py_state->next_seq_id); if (!py_next_seq_id) goto error; - PyObject_SetAttrString(py_state->py_conn, "_next_seq_id", py_next_seq_id); + PyObject_SetAttr(py_state->py_conn, py_state->py_str._next_seq_id, py_next_seq_id); Py_DECREF(py_next_seq_id); py_out = NULL; @@ -2099,7 +1346,7 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k if (py_state->is_eof && row_idx == 0) { Py_INCREF(Py_None); py_out = Py_None; - PyObject_SetAttrString(py_res, "rows", Py_None); + PyObject_SetAttr(py_res, py_state->py_str.rows, Py_None); PyObject *py_n_rows = PyLong_FromSsize_t(py_state->n_rows); PyObject_SetAttrString(py_res, "affected_rows", (py_n_rows) ? py_n_rows : Py_None); Py_XDECREF(py_n_rows); @@ -2107,16 +1354,8 @@ static PyObject *read_rowdata_packet(PyObject *self, PyObject *args, PyObject *k Py_CLEAR(py_state); } else { - switch (py_state->options.output_type) { - case MYSQLSV_OUT_PANDAS: - case MYSQLSV_OUT_NUMPY: - py_out = (requested_n_rows == 1) ? - PyObject_GetItem(py_state->py_rows, py_zero) : py_state->py_rows; - break; - default: - py_out = (requested_n_rows == 1) ? - PyList_GetItem(py_state->py_rows, 0) : py_state->py_rows; - } + py_out = (requested_n_rows == 1) ? + PyList_GetItem(py_state->py_rows, 0) : py_state->py_rows; Py_XINCREF(py_out); } } @@ -2155,9 +1394,6 @@ static struct PyModuleDef _pymysqlsvmodule = { PyMODINIT_FUNC PyInit__pymysqlsv(void) { PyDateTime_IMPORT; - if (PyType_Ready(&ArrayType) < 0) { - return NULL; - } if (PyType_Ready(&StateType) < 0) { return NULL; } From 8e0fdd98a94b3bef31be59304d46e6fc7684f1f3 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 1 Sep 2022 12:45:19 -0500 Subject: [PATCH 10/12] Fix formatting --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 56bd3b9e5..4fa302fe4 100644 --- a/README.md +++ b/README.md @@ -33,12 +33,12 @@ Here are the results. | | PyMySQL | MySQLdb | PyMySQLsv | |--------------------------|---------|---------|-----------| -| Buffered fetchone | 224.8s | 50.6s | 19.9s | -| Buffered fetchmany(20) | 217.63s | 50.3s | 15.5s | -| Buffered fetchall | 217.9s | 49.6s | 14.8s | -| Unbuffered fetchone | 230.5s | 48.3s | 25.3s | -| Unbuffered fetchmany(20) | 224.0s | 35.0s | 14.6s | -| Unbuffered fetchall | 232.4s | 37.7s | 29.2s | +| Buffered fetchone | 224.8s | 50.6s | 19.9s | +| Buffered fetchmany(20) | 217.6s | 50.3s | 15.5s | +| Buffered fetchall | 217.9s | 49.6s | 14.8s | +| Unbuffered fetchone | 230.5s | 48.3s | 25.3s | +| Unbuffered fetchmany(20) | 224.0s | 35.0s | 14.6s | +| Unbuffered fetchall | 232.4s | 37.7s | 29.2s | As you can see the gains are quite significant for this test case. Even MySQLdb, which is based on the MySQL libraries takes twice as long in all but one From e134133c31186d12dc124616c81bfaed0024cabf Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Thu, 1 Sep 2022 13:08:31 -0500 Subject: [PATCH 11/12] Make invalid value replacements more generic --- pymysql/connections.py | 26 ++++++++++---------------- src/accel.c | 42 +++++++++++++++++++++++++++--------------- 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/pymysql/connections.py b/pymysql/connections.py index 09bd3aeee..54adfbb2a 100644 --- a/pymysql/connections.py +++ b/pymysql/connections.py @@ -168,13 +168,13 @@ class Connection: :param db: **DEPRECATED** Alias for database. :param passwd: **DEPRECATED** Alias for password. :param parse_json: Parse JSON values into Python objects? - :param invalid_date_value: Value to use in place of an invalid date. By default, a string - containing the invalid content is returned. - :param invalid_time_value: Value to use in place of an invalid time. By default, a string - containing the invalid content is returned. - :param invalid_datetime_value: Value to use in place of an invalid datetime. By default, - a string containing the invalid content is returned. + :param invalid_values: Dictionary of values to use in place of invalid values + found during conversion of data. The default is to return the byte content + containing the invalid value. The keys are the integers associtated with + the column type. :param pure_python: Should we ignore the C extension even if it's available? + This can be given explicitly using True or False, or if the value is None, + the C extension will be loaded if it is available. See `Connection `_ in the specification. @@ -223,10 +223,8 @@ def __init__( ssl_verify_cert=None, ssl_verify_identity=None, parse_json=False, - invalid_date_value=UNSET, - invalid_time_value=UNSET, - invalid_datetime_value=UNSET, - pure_python=False, + invalid_values=None, + pure_python=None, compress=None, # not supported named_pipe=None, # not supported passwd=None, # deprecated @@ -370,10 +368,8 @@ def _config(key, arg): if conv is None: conv = converters.conversions - self.invalid_date_value = invalid_date_value - self.invalid_time_value = invalid_time_value - self.invalid_datetime_value = invalid_datetime_value self.parse_json = parse_json + self.invalid_values = (invalid_values or {}).copy() # Need for MySQLdb compatibility. self.encoders = {k: v for (k, v) in conv.items() if type(k) is not int} @@ -1401,9 +1397,7 @@ def __init__(self, connection, unbuffered=False): default_converters=converters.decoders, output_type=connection.output_type, parse_json=connection.parse_json, - invalid_date_value=connection.invalid_date_value, - invalid_time_value=connection.invalid_time_value, - invalid_datetime_value=connection.invalid_datetime_value, + invalid_values=connection.invalid_values, unbuffered=unbuffered, ).items() if v is not UNSET} self._read_rowdata_packet = functools.partial(_pymysqlsv.read_rowdata_packet, self) diff --git a/src/accel.c b/src/accel.c index 842a3a17e..883ed48db 100644 --- a/src/accel.c +++ b/src/accel.c @@ -251,9 +251,7 @@ typedef struct { int output_type; int parse_json; - PyObject *invalid_date_value; - PyObject *invalid_time_value; - PyObject *invalid_datetime_value; + PyObject *invalid_values; } MySQLAccelOptions; inline int IMAX(int a, int b) { return((a) > (b) ? a : b); } @@ -282,6 +280,7 @@ typedef struct { PyObject *py_default_converters; // Dict of default converters PyTypeObject *namedtuple; // Generated namedtuple type PyObject **py_encodings; // Encoding for each column as Python string + PyObject **py_invalid_values; // Values to use when invalid data exists in a cell const char **encodings; // Encoding for each column unsigned long long n_cols; // Total number of columns unsigned long long n_rows; // Total number of rows read @@ -331,6 +330,12 @@ static void State_clear_fields(StateObject *self) { } DESTROY(self->py_encodings); } + if (self->py_invalid_values) { + for (unsigned long i = 0; i < self->n_cols; i++) { + Py_CLEAR(self->py_invalid_values[i]); + } + DESTROY(self->py_invalid_values); + } Py_CLEAR(self->namedtuple); Py_CLEAR(self->py_default_converters); Py_CLEAR(self->py_settimeout); @@ -443,6 +448,9 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { self->py_encodings = calloc(self->n_cols, sizeof(char*)); if (!self->py_encodings) goto error; + self->py_invalid_values = calloc(self->n_cols, sizeof(char*)); + if (!self->py_invalid_values) goto error; + self->py_names = calloc(self->n_cols, sizeof(PyObject*)); if (!self->py_names) goto error; @@ -469,6 +477,8 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { self->type_codes[i] = PyLong_AsUnsignedLong(py_field_type); PyObject *py_default_converter = (self->py_default_converters) ? PyDict_GetItem(self->py_default_converters, py_field_type) : NULL; + PyObject *py_invalid_value = (self->options.invalid_values) ? + PyDict_GetItem(self->options.invalid_values, py_field_type) : NULL; Py_XDECREF(py_field_type); // Get field name. @@ -490,6 +500,10 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) { self->encodings[i] = (!py_encoding || py_encoding == Py_None) ? NULL : PyUnicode_AsUTF8AndSize(py_encoding, NULL); + self->py_invalid_values[i] = (!py_invalid_value || py_invalid_value == Py_None) ? + NULL : py_converter; + Py_XINCREF(self->py_invalid_values[i]); + self->py_converters[i] = (!py_converter || py_converter == Py_None || py_converter == py_default_converter) ? @@ -626,12 +640,10 @@ static void read_options(MySQLAccelOptions *options, PyObject *dict) { } } else if (PyUnicode_CompareWithASCIIString(key, "parse_json") == 0) { options->parse_json = PyObject_IsTrue(value); - } else if (PyUnicode_CompareWithASCIIString(key, "invalid_date_value") == 0) { - options->invalid_date_value = value; - } else if (PyUnicode_CompareWithASCIIString(key, "invalid_time_value") == 0) { - options->invalid_time_value = value; - } else if (PyUnicode_CompareWithASCIIString(key, "invalid_datetime_value") == 0) { - options->invalid_datetime_value = value; + } else if (PyUnicode_CompareWithASCIIString(key, "invalid_values") == 0) { + if (PyDict_Check(value)) { + options->invalid_values = value; + } } } } @@ -1060,8 +1072,8 @@ static PyObject *read_row_from_packet( case MYSQL_TYPE_DATETIME: case MYSQL_TYPE_TIMESTAMP: if (!CHECK_ANY_DATETIME_STR(out, out_l)) { - if (py_state->options.invalid_datetime_value) { - py_item = py_state->options.invalid_datetime_value; + if (py_state->py_invalid_values[i]) { + py_item = py_state->py_invalid_values[i]; Py_INCREF(py_item); } else { py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); @@ -1089,8 +1101,8 @@ static PyObject *read_row_from_packet( case MYSQL_TYPE_NEWDATE: case MYSQL_TYPE_DATE: if (!CHECK_DATE_STR(out, out_l)) { - if (py_state->options.invalid_date_value) { - py_item = py_state->options.invalid_date_value; + if (py_state->py_invalid_values[i]) { + py_item = py_state->py_invalid_values[i]; Py_INCREF(py_item); } else { py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); @@ -1112,8 +1124,8 @@ static PyObject *read_row_from_packet( case MYSQL_TYPE_TIME: sign = CHECK_ANY_TIMEDELTA_STR(out, out_l); if (!sign) { - if (py_state->options.invalid_time_value) { - py_item = py_state->options.invalid_time_value; + if (py_state->py_invalid_values[i]) { + py_item = py_state->py_invalid_values[i]; Py_INCREF(py_item); } else { py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict"); From 14c993c00d4a3d98318637886694476fdb1df3a7 Mon Sep 17 00:00:00 2001 From: Kevin D Smith Date: Mon, 9 Jan 2023 13:42:58 -0600 Subject: [PATCH 12/12] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 4fa302fe4..9ff8b4367 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +> **Warning** +> All work on the accelerated MySQL client has been moved to the +> [SingleStoreDB Python client](https://github.com/singlestore-labs/singlestoredb-python). + # PyMySQLsv This project contains classes based on the PyMySQL database client which have