diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 92e656b..9493eb4 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,14 @@ Change log ================================================================================ +0.6.4 - 31.10.2020 +-------------------------------------------------------------------------------- + +**updated** + +#. `#102 `_: skip columns from + imported excel sheet. + 0.6.3 - 12.10.2020 -------------------------------------------------------------------------------- diff --git a/README.rst b/README.rst index 2f9b838..4dabd67 100644 --- a/README.rst +++ b/README.rst @@ -5,7 +5,7 @@ pyexcel-io - Let you focus on data, instead of file formats .. image:: https://raw.githubusercontent.com/pyexcel/pyexcel.github.io/master/images/patreon.png :target: https://www.patreon.com/chfw -.. image:: https://cdn.rawgit.com/sindresorhus/awesome/d7305f38d29fed78fa85652e3a63e154dd8e8829/media/badge.svg +.. image:: https://raw.githubusercontent.com/pyexcel/pyexcel-mobans/master/images/awesome-badge.svg :target: https://awesome-python.com/#specific-formats-processing .. image:: https://travis-ci.org/pyexcel/pyexcel-io.svg?branch=master @@ -21,7 +21,7 @@ pyexcel-io - Let you focus on data, instead of file formats :target: https://anaconda.org/conda-forge/pyexcel-io .. image:: https://pepy.tech/badge/pyexcel-io/month - :target: https://pepy.tech/project/pyexcel-io/month + :target: https://pepy.tech/project/pyexcel-io .. image:: https://anaconda.org/conda-forge/pyexcel-io/badges/downloads.svg :target: https://anaconda.org/conda-forge/pyexcel-io @@ -60,6 +60,8 @@ Known constraints Fonts, colors and charts are not supported. +Nor to read password protected xls, xlsx and ods files. + Introduction ================================================================================ @@ -74,44 +76,47 @@ sqlalchemy supported databases. Its supported file formats are extended to cover .. table:: A list of file formats supported by external plugins - ======================== ======================= ================= ================== - Package name Supported file formats Dependencies Python versions - ======================== ======================= ================= ================== - `pyexcel-io`_ >=v0.6.0 csv, csvz [#f1]_, tsv, 3.6+ - tsvz [#f2]_ - `pyexcel-io`_ <=0.5.20 same as above 2.6, 2.7, 3.3, - 3.4, 3.5, 3.6 - pypy - `pyexcel-xls`_ xls, xlsx(read only), `xlrd`_, same as above + ======================== ======================= ================= + Package name Supported file formats Dependencies + ======================== ======================= ================= + `pyexcel-io`_ csv, csvz [#f1]_, tsv, + tsvz [#f2]_ + `pyexcel-xls`_ xls, xlsx(read only), `xlrd`_, xlsm(read only) `xlwt`_ - `pyexcel-xlsx`_ xlsx `openpyxl`_ same as above - `pyexcel-ods3`_ ods `pyexcel-ezodf`_, 2.6, 2.7, 3.3, 3.4 - lxml 3.5, 3.6 - `pyexcel-ods`_ ods `odfpy`_ same as above - ======================== ======================= ================= ================== + `pyexcel-xlsx`_ xlsx `openpyxl`_ + `pyexcel-ods3`_ ods `pyexcel-ezodf`_, + lxml + `pyexcel-ods`_ ods `odfpy`_ + ======================== ======================= ================= .. table:: Dedicated file reader and writers - ======================== ======================= ================= ================== - Package name Supported file formats Dependencies Python versions - ======================== ======================= ================= ================== - `pyexcel-xlsxw`_ xlsx(write only) `XlsxWriter`_ Python 2 and 3 - `pyexcel-xlsxr`_ xlsx(read only) lxml same as above - `pyexcel-xlsbr`_ xlsx(read only) pyxlsb same as above - `pyexcel-odsr`_ read only for ods, fods lxml same as above - `pyexcel-odsw`_ write only for ods loxun same as above - `pyexcel-htmlr`_ html(read only) lxml,html5lib same as above - `pyexcel-pdfr`_ pdf(read only) pdftables Python 2 only. - ======================== ======================= ================= ================== + ======================== ======================= ================= + Package name Supported file formats Dependencies + ======================== ======================= ================= + `pyexcel-xlsxw`_ xlsx(write only) `XlsxWriter`_ + `pyexcel-libxlsxw`_ xlsx(write only) `libxlsxwriter`_ + `pyexcel-xlsxr`_ xlsx(read only) lxml + `pyexcel-xlsbr`_ xlsb(read only) pyxlsb + `pyexcel-odsr`_ read only for ods, fods lxml + `pyexcel-odsw`_ write only for ods loxun + `pyexcel-htmlr`_ html(read only) lxml,html5lib + `pyexcel-pdfr`_ pdf(read only) camelot + ======================== ======================= ================= Plugin shopping guide ------------------------ +Since 2020, all pyexcel-io plugins have dropped the support for python version +lower than 3.6. If you want to use any python verions, please use pyexcel-io +and its plugins version lower than 0.6.0. + + Except csv files, xls, xlsx and ods files are a zip of a folder containing a lot of -xml files +xml files -The dedicated readers for excel files can stream read +The dedicated readers for excel files can stream read In order to manage the list of plugins installed, you need to use pip to add or remove @@ -133,6 +138,7 @@ You need to append get_array(..., library='pyexcel-odsr'). .. _pyexcel-pdfr: https://github.com/pyexcel/pyexcel-pdfr .. _pyexcel-xlsxw: https://github.com/pyexcel/pyexcel-xlsxw +.. _pyexcel-libxlsxw: https://github.com/pyexcel/pyexcel-libxlsxw .. _pyexcel-xlsxr: https://github.com/pyexcel/pyexcel-xlsxr .. _pyexcel-xlsbr: https://github.com/pyexcel/pyexcel-xlsbr .. _pyexcel-htmlr: https://github.com/pyexcel/pyexcel-htmlr @@ -143,6 +149,7 @@ You need to append get_array(..., library='pyexcel-odsr'). .. _XlsxWriter: https://github.com/jmcnamara/XlsxWriter .. _pyexcel-ezodf: https://github.com/pyexcel/pyexcel-ezodf .. _odfpy: https://github.com/eea/odfpy +.. _libxlsxwriter: http://libxlsxwriter.github.io/getting_started.html .. rubric:: Footnotes diff --git a/changelog.yml b/changelog.yml index 47f4053..2716ee5 100644 --- a/changelog.yml +++ b/changelog.yml @@ -1,6 +1,12 @@ name: pyexcel-io organisation: pyexcel releases: +- changes: + - action: updated + details: + - "`#102`: skip columns from imported excel sheet." + version: 0.6.4 + date: 31.10.2020 - changes: - action: fixed details: diff --git a/docs/source/conf.py b/docs/source/conf.py index a2fc48f..de27cdb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,11 +24,11 @@ project = 'pyexcel-io' copyright = '2015-2020 Onni Software Ltd.' -author = 'chfw' +author = 'C.W.' # The short X.Y version -version = '0.6.3' +version = '0.6.4' # The full version, including alpha/beta/rc tags -release = '0.6.3' +release = '0.6.4' # -- General configuration --------------------------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index 95be968..00332e4 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -6,7 +6,7 @@ `pyexcel-io` - Let you focus on data, instead of file formats ================================================================================ -:Author: chfw +:Author: C.W. :Source code: http://github.com/pyexcel/pyexcel-io.git :Issues: http://github.com/pyexcel/pyexcel-io/issues :License: New BSD License @@ -64,44 +64,47 @@ For individual excel file formats, please install them as you wish: .. table:: A list of file formats supported by external plugins - ======================== ======================= ================= ================== - Package name Supported file formats Dependencies Python versions - ======================== ======================= ================= ================== - `pyexcel-io`_ >=v0.6.0 csv, csvz [#f1]_, tsv, 3.6+ - tsvz [#f2]_ - `pyexcel-io`_ <=0.5.20 same as above 2.6, 2.7, 3.3, - 3.4, 3.5, 3.6 - pypy - `pyexcel-xls`_ xls, xlsx(read only), `xlrd`_, same as above + ======================== ======================= ================= + Package name Supported file formats Dependencies + ======================== ======================= ================= + `pyexcel-io`_ csv, csvz [#f1]_, tsv, + tsvz [#f2]_ + `pyexcel-xls`_ xls, xlsx(read only), `xlrd`_, xlsm(read only) `xlwt`_ - `pyexcel-xlsx`_ xlsx `openpyxl`_ same as above - `pyexcel-ods3`_ ods `pyexcel-ezodf`_, 2.6, 2.7, 3.3, 3.4 - lxml 3.5, 3.6 - `pyexcel-ods`_ ods `odfpy`_ same as above - ======================== ======================= ================= ================== + `pyexcel-xlsx`_ xlsx `openpyxl`_ + `pyexcel-ods3`_ ods `pyexcel-ezodf`_, + lxml + `pyexcel-ods`_ ods `odfpy`_ + ======================== ======================= ================= .. table:: Dedicated file reader and writers - ======================== ======================= ================= ================== - Package name Supported file formats Dependencies Python versions - ======================== ======================= ================= ================== - `pyexcel-xlsxw`_ xlsx(write only) `XlsxWriter`_ Python 2 and 3 - `pyexcel-xlsxr`_ xlsx(read only) lxml same as above - `pyexcel-xlsbr`_ xlsx(read only) pyxlsb same as above - `pyexcel-odsr`_ read only for ods, fods lxml same as above - `pyexcel-odsw`_ write only for ods loxun same as above - `pyexcel-htmlr`_ html(read only) lxml,html5lib same as above - `pyexcel-pdfr`_ pdf(read only) pdftables Python 2 only. - ======================== ======================= ================= ================== + ======================== ======================= ================= + Package name Supported file formats Dependencies + ======================== ======================= ================= + `pyexcel-xlsxw`_ xlsx(write only) `XlsxWriter`_ + `pyexcel-libxlsxw`_ xlsx(write only) `libxlsxwriter`_ + `pyexcel-xlsxr`_ xlsx(read only) lxml + `pyexcel-xlsbr`_ xlsb(read only) pyxlsb + `pyexcel-odsr`_ read only for ods, fods lxml + `pyexcel-odsw`_ write only for ods loxun + `pyexcel-htmlr`_ html(read only) lxml,html5lib + `pyexcel-pdfr`_ pdf(read only) camelot + ======================== ======================= ================= Plugin shopping guide ------------------------ +Since 2020, all pyexcel-io plugins have dropped the support for python version +lower than 3.6. If you want to use any python verions, please use pyexcel-io +and its plugins version lower than 0.6.0. + + Except csv files, xls, xlsx and ods files are a zip of a folder containing a lot of -xml files +xml files -The dedicated readers for excel files can stream read +The dedicated readers for excel files can stream read In order to manage the list of plugins installed, you need to use pip to add or remove @@ -123,6 +126,7 @@ You need to append get_array(..., library='pyexcel-odsr'). .. _pyexcel-pdfr: https://github.com/pyexcel/pyexcel-pdfr .. _pyexcel-xlsxw: https://github.com/pyexcel/pyexcel-xlsxw +.. _pyexcel-libxlsxw: https://github.com/pyexcel/pyexcel-libxlsxw .. _pyexcel-xlsxr: https://github.com/pyexcel/pyexcel-xlsxr .. _pyexcel-xlsbr: https://github.com/pyexcel/pyexcel-xlsbr .. _pyexcel-htmlr: https://github.com/pyexcel/pyexcel-htmlr @@ -133,6 +137,7 @@ You need to append get_array(..., library='pyexcel-odsr'). .. _XlsxWriter: https://github.com/jmcnamara/XlsxWriter .. _pyexcel-ezodf: https://github.com/pyexcel/pyexcel-ezodf .. _odfpy: https://github.com/eea/odfpy +.. _libxlsxwriter: http://libxlsxwriter.github.io/getting_started.html .. rubric:: Footnotes diff --git a/lint.sh b/lint.sh index 891aa63..d31eeaa 100644 --- a/lint.sh +++ b/lint.sh @@ -1,2 +1,2 @@ pip install flake8 -flake8 --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long . && python setup.py checkdocs +flake8 --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long . && python setup.py checkdocs \ No newline at end of file diff --git a/pyexcel-io.yml b/pyexcel-io.yml index 8b43aff..fbf944e 100644 --- a/pyexcel-io.yml +++ b/pyexcel-io.yml @@ -2,9 +2,9 @@ overrides: "pyexcel.yaml" project: "pyexcel-io" name: pyexcel-io nick_name: io -version: 0.6.3 -current_version: 0.6.3 -release: 0.6.3 +version: 0.6.4 +current_version: 0.6.4 +release: 0.6.4 copyright_year: 2015-2020 moban_command: false is_on_conda: true diff --git a/pyexcel_io/database/common.py b/pyexcel_io/database/common.py index 1f60ebd..f074609 100644 --- a/pyexcel_io/database/common.py +++ b/pyexcel_io/database/common.py @@ -38,61 +38,66 @@ def __init__(self): def __init__(self, model): DjangoModelExportAdapter.__init__(self, model) - self.__column_names = self.InOutParameter() - self.__column_name_mapping_dict = self.InOutParameter() - self.__row_initializer = self.InOutParameter() + self._column_names = self.InOutParameter() + self._column_name_mapping_dict = self.InOutParameter() + self._row_initializer = self.InOutParameter() self._process_parameters() @property def row_initializer(self): """ contructor for a database table entry """ - return self.__row_initializer.output + return self._row_initializer.output @property def column_names(self): """ the desginated database column names """ - return self.__column_names.output + return self._column_names.output @property def column_name_mapping_dict(self): """ if not the same, a mapping dictionary is looked up""" - return self.__column_name_mapping_dict.output + return self._column_name_mapping_dict.output @row_initializer.setter def row_initializer(self, a_function): """ set the contructor """ - self.__row_initializer.input = a_function + self._row_initializer.input = a_function self._process_parameters() @column_names.setter def column_names(self, column_names): """ set the column names """ - self.__column_names.input = column_names + self._column_names.input = column_names self._process_parameters() @column_name_mapping_dict.setter def column_name_mapping_dict(self, mapping_dict): """ set the mapping dict """ - self.__column_name_mapping_dict.input = mapping_dict + self._column_name_mapping_dict.input = mapping_dict self._process_parameters() def _process_parameters(self): - if self.__row_initializer.input is None: - self.__row_initializer.output = None + if self._row_initializer.input is None: + self._row_initializer.output = None else: - self.__row_initializer.output = self.__row_initializer.input - if isinstance(self.__column_name_mapping_dict.input, list): - self.__column_names.output = self.__column_name_mapping_dict.input - self.__column_name_mapping_dict.output = None - elif isinstance(self.__column_name_mapping_dict.input, dict): - if self.__column_names.input: - self.__column_names.output = [ - self.__column_name_mapping_dict.input[name] - for name in self.__column_names.input - ] - self.__column_name_mapping_dict.output = None - if self.__column_names.output is None: - self.__column_names.output = self.__column_names.input + self._row_initializer.output = self._row_initializer.input + if isinstance(self._column_name_mapping_dict.input, list): + self._column_names.output = self._column_name_mapping_dict.input + self._column_name_mapping_dict.output = None + elif isinstance(self._column_name_mapping_dict.input, dict): + + if self._column_names.input: + self._column_names.output = [] + indices = [] + for index, name in enumerate(self._column_names.input): + if name in self._column_name_mapping_dict.input: + self._column_names.output.append( + self._column_name_mapping_dict.input[name] + ) + indices.append(index) + self._column_name_mapping_dict.output = indices + if self._column_names.output is None: + self._column_names.output = self._column_names.input class DjangoModelExporter(object): @@ -110,15 +115,15 @@ class DjangoModelImporter(object): """ public interface for django model import """ def __init__(self): - self.__adapters = {} + self._adapters = {} def append(self, import_adapter): """ store model parameter for more than one model """ - self.__adapters[import_adapter.get_name()] = import_adapter + self._adapters[import_adapter.get_name()] = import_adapter def get(self, name): """ get a parameter out """ - return self.__adapters.get(name, None) + return self._adapters.get(name, None) class SQLTableExportAdapter(DjangoModelExportAdapter): diff --git a/pyexcel_io/database/importers/django.py b/pyexcel_io/database/importers/django.py index 9d7307e..49ee14c 100644 --- a/pyexcel_io/database/importers/django.py +++ b/pyexcel_io/database/importers/django.py @@ -20,39 +20,43 @@ class DjangoModelWriter(ISheetWriter): """ import data into a django model """ def __init__(self, importer, adapter, batch_size=None, bulk_save=True): - self.__batch_size = batch_size - self.__model = adapter.model - self.__column_names = adapter.column_names - self.__mapdict = adapter.column_name_mapping_dict - self.__initializer = adapter.row_initializer - self.__objs = [] - self.__bulk_save = bulk_save + self.batch_size = batch_size + self.model = adapter.model + self.column_names = adapter.column_names + self.mapdict = adapter.column_name_mapping_dict + self.initializer = adapter.row_initializer + self.objs = [] + self.bulk_save = bulk_save + self.adapter = adapter def write_row(self, array): if is_empty_array(array): print(constants.MESSAGE_EMPTY_ARRAY) else: new_array = swap_empty_string_for_none(array) + if self.mapdict: + another_new_array = [] + for index, element in enumerate(new_array): + if index in self.mapdict: + another_new_array.append(element) + new_array = another_new_array model_to_be_created = new_array - if self.__initializer is not None: - model_to_be_created = self.__initializer(new_array) + if self.initializer is not None: + model_to_be_created = self.initializer(new_array) if model_to_be_created: - self.__objs.append( - self.__model( - **dict(zip(self.__column_names, model_to_be_created)) - ) - ) + row = dict(zip(self.column_names, model_to_be_created)) + self.objs.append(self.model(**row)) # else # skip the row def close(self): - if self.__bulk_save: - self.__model.objects.bulk_create( - self.__objs, batch_size=self.__batch_size + if self.bulk_save: + self.model.objects.bulk_create( + self.objs, batch_size=self.batch_size ) else: - for an_object in self.__objs: + for an_object in self.objs: an_object.save() @@ -60,15 +64,15 @@ class DjangoBookWriter(IWriter): """ write data into django models """ def __init__(self, exporter, _, **keywords): - self.__importer = exporter + self.importer = exporter self._keywords = keywords def create_sheet(self, sheet_name): sheet_writer = None - model = self.__importer.get(sheet_name) + model = self.importer.get(sheet_name) if model: sheet_writer = DjangoModelWriter( - self.__importer, + self.importer, model, batch_size=self._keywords.get("batch_size", None), bulk_save=self._keywords.get("bulk_save", True), diff --git a/pyexcel_io/database/importers/sqlalchemy.py b/pyexcel_io/database/importers/sqlalchemy.py index 5c5cd27..cf2705a 100644 --- a/pyexcel_io/database/importers/sqlalchemy.py +++ b/pyexcel_io/database/importers/sqlalchemy.py @@ -45,7 +45,14 @@ def write_row(self, array): print(new_array) def _write_row(self, array): - row = dict(zip(self.adapter.column_names, array)) + new_array = array + if self.adapter.column_name_mapping_dict: + another_new_array = [] + for index, element in enumerate(new_array): + if index in self.adapter.column_name_mapping_dict: + another_new_array.append(element) + new_array = another_new_array + row = dict(zip(self.adapter.column_names, new_array)) obj = None if self.adapter.row_initializer: # allow initinalizer to return None @@ -54,11 +61,7 @@ def _write_row(self, array): if obj is None: obj = self.adapter.table() for name in self.adapter.column_names: - if self.adapter.column_name_mapping_dict is not None: - key = self.adapter.column_name_mapping_dict[name] - else: - key = name - setattr(obj, key, row[name]) + setattr(obj, name, row[name]) self.importer.session.add(obj) if self.__auto_commit and self.__bulk_size != float("inf"): self.__count += 1 diff --git a/rnd_requirements.txt b/rnd_requirements.txt index e69de29..8b13789 100644 --- a/rnd_requirements.txt +++ b/rnd_requirements.txt @@ -0,0 +1 @@ + diff --git a/setup.py b/setup.py index b5e4bea..267d6a2 100644 --- a/setup.py +++ b/setup.py @@ -31,8 +31,8 @@ locale.setlocale(locale.LC_ALL, "en_US.UTF-8") NAME = "pyexcel-io" -AUTHOR = "chfw" -VERSION = "0.6.3" +AUTHOR = "C.W." +VERSION = "0.6.4" EMAIL = "info@pyexcel.org" LICENSE = "New BSD" DESCRIPTION = ( @@ -40,7 +40,7 @@ "format and to/from databases" ) URL = "https://github.com/pyexcel/pyexcel-io" -DOWNLOAD_URL = "%s/archive/0.6.3.tar.gz" % URL +DOWNLOAD_URL = "%s/archive/0.6.4.tar.gz" % URL FILES = ["README.rst", "CHANGELOG.rst"] KEYWORDS = [ "python", @@ -85,13 +85,14 @@ } # You do not need to read beyond this line PUBLISH_COMMAND = "{0} setup.py sdist bdist_wheel upload -r pypi".format(sys.executable) -GS_COMMAND = ("gs pyexcel-io v0.6.3 " + - "Find 0.6.3 in changelog for more details") +HERE = os.path.abspath(os.path.dirname(__file__)) + +GS_COMMAND = ("gease pyexcel-io v0.6.4 " + + "Find 0.6.4 in changelog for more details") NO_GS_MESSAGE = ("Automatic github release is disabled. " + "Please install gease to enable it.") UPLOAD_FAILED_MSG = ( 'Upload failed. please run "%s" yourself.' % PUBLISH_COMMAND) -HERE = os.path.abspath(os.path.dirname(__file__)) class PublishCommand(Command): @@ -137,7 +138,6 @@ def run(self): "publish": PublishCommand }) - def has_gease(): """ test if github release command is installed diff --git a/tests/test_django_book.py b/tests/test_django_book.py index 5db3e7c..5ccdd01 100644 --- a/tests/test_django_book.py +++ b/tests/test_django_book.py @@ -158,10 +158,13 @@ def wrapper(row): writer = DjangoModelWriter(None, adapter) writer.write_array(self.data[1:]) writer.close() - assert model.objects.objs == [ - {"Y": 2, "X": 2, "Z": 3}, - {"Y": 5, "X": 5, "Z": 6}, - ] + eq_( + model.objects.objs, + [ + {"Y": 2, "X": 2, "Z": 3}, + {"Y": 5, "X": 5, "Z": 6}, + ], + ) def test_sheet_save_to_django_model_skip_me(self): model = FakeDjangoModel() @@ -178,7 +181,7 @@ def wrapper(row): writer = DjangoModelWriter(None, adapter) writer.write_array(self.data[1:]) writer.close() - assert model.objects.objs == [{"Y": 2, "X": 1, "Z": 3}] + eq_(model.objects.objs, [{"Y": 2, "X": 1, "Z": 3}]) def test_load_sheet_from_django_model(self): model = FakeDjangoModel() @@ -241,6 +244,18 @@ def test_mapping_dict(self): writer.close() eq_(model.objects.objs, self.result) + def test_jumping_columns(self): + data2 = [["D", "A", "B", "C"], [1, 1, 2, 3], [10, 4, 5, 6]] + mapdict = {"C": "Z", "A": "X", "B": "Y"} + model = FakeDjangoModel() + adapter = DjangoModelImportAdapter(model) + adapter.column_names = data2[0] + adapter.column_name_mapping_dict = mapdict + writer = DjangoModelWriter(None, adapter) + writer.write_array(data2[1:]) + writer.close() + eq_(model.objects.objs, self.result) + def test_empty_model(self): model = FakeDjangoModel() reader = DjangoModelReader(model)