From b850c2ab27e8051c60ad520d7ce90e4dfc18f300 Mon Sep 17 00:00:00 2001 From: chfw Date: Sun, 10 Nov 2019 21:36:12 +0000 Subject: [PATCH 01/10] :sparkles: use inherited mobanfile. https://github.com/moremoban/moban/pull/348 --- .github/FUNDING.yml | 4 + .github/PULL_REQUEST_TEMPLATE.md | 10 + .gitignore | 431 +++++++++++++++++- .../{README.rst => custom_README.rst.jj2} | 0 .moban.d/{setup.py => custom_setup.py.jj2} | 0 ...ements.txt => custom_requirements.txt.jj2} | 0 .moban.yml | 13 +- .travis.yml | 56 ++- CHANGELOG.rst | 23 +- LICENSE | 28 +- MANIFEST.in | 2 + README.rst | 82 ++-- changelog.yml | 31 ++ docs/source/conf.py | 97 ++-- docs/source/index.rst | 1 - lint.sh | 2 + setup.py | 132 +++--- test.bat | 2 +- test.sh | 3 +- 19 files changed, 726 insertions(+), 191 deletions(-) create mode 100644 .github/FUNDING.yml create mode 100644 .github/PULL_REQUEST_TEMPLATE.md rename .moban.d/{README.rst => custom_README.rst.jj2} (100%) rename .moban.d/{setup.py => custom_setup.py.jj2} (100%) rename .moban.d/tests/{requirements.txt => custom_requirements.txt.jj2} (100%) create mode 100644 changelog.yml create mode 100644 lint.sh diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..0faea60 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,4 @@ +# These are supported funding model platforms + +github: chfw +patreon: chfw diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..7b632ce --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,10 @@ +With your PR, here is a check list: + +- [ ] Has Test cases written +- [ ] Has all code lines tested +- [ ] Passes all Travis CI builds +- [ ] Has fair amount of documentation if your change is complex +- [ ] run 'make format' so as to confirm the pyexcel organisation's coding style +- [ ] Please update CHANGELOG.rst +- [ ] Please add yourself to CONTRIBUTORS.rst +- [ ] Agree on NEW BSD License for your contribution diff --git a/.gitignore b/.gitignore index f32c742..88bc3f6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ -# April 2016 -# reference: https://github.com/github/gitignore/blob/master/Python.gitignore +# moban hashes +.moban.hashes + +# Extra rules from https://github.com/github/gitignore/ +# Python rules # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -10,7 +13,6 @@ __pycache__/ # Distribution / packaging .Python -env/ build/ develop-eggs/ dist/ @@ -22,9 +24,13 @@ lib64/ parts/ sdist/ var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ *.egg-info/ .installed.cfg *.egg +MANIFEST # PyInstaller # Usually these files are written by a python script from a template @@ -39,13 +45,15 @@ pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ +.nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml -*,cover +*.cover .hypothesis/ +.pytest_cache/ # Translations *.mo @@ -54,6 +62,7 @@ coverage.xml # Django stuff: *.log local_settings.py +db.sqlite3 # Flask stuff: instance/ @@ -68,32 +77,430 @@ docs/_build/ # PyBuilder target/ -# IPython Notebook +# Jupyter Notebook .ipynb_checkpoints +# IPython +profile_default/ +ipython_config.py + # pyenv .python-version +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don’t work, or not +# install all needed dependencies. +#Pipfile.lock + # celery beat schedule file celerybeat-schedule -# dotenv -.env +# SageMath parsed files +*.sage.py -# virtualenv +# Environments +.env +.venv +env/ venv/ ENV/ +env.bak/ +venv.bak/ # Spyder project settings .spyderproject +.spyproject # Rope project settings .ropeproject -# emacs +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# VirtualEnv rules +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +.Python +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +.venv +pip-selfcheck.json + +# Linux rules *~ -# moban hashes -.moban.hashes -.DS_store +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +# Windows rules +# Windows thumbnail cache files +Thumbs.db +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# macOS rules +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# Emacs rules +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +# network security +/network-security.data + + +# Vim rules +# Swap +[._]*.s[a-v][a-z] +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim + +# Temporary +.netrwhist +*~ +# Auto-generated tag files +tags +# Persistent undo +[._]*.un~ + +# JetBrains rules +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/modules.xml +# .idea/*.iml +# .idea/modules + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +# SublimeText rules +# Cache files for Sublime Text +*.tmlanguage.cache +*.tmPreferences.cache +*.stTheme.cache + +# Workspace files are user-specific +*.sublime-workspace + +# Project files should be checked into the repository, unless a significant +# proportion of contributors will probably not be using Sublime Text +# *.sublime-project + +# SFTP configuration file +sftp-config.json + +# Package control specific files +Package Control.last-run +Package Control.ca-list +Package Control.ca-bundle +Package Control.system-ca-bundle +Package Control.cache/ +Package Control.ca-certs/ +Package Control.merged-ca-bundle +Package Control.user-ca-bundle +oscrypto-ca-bundle.crt +bh_unicode_properties.cache + +# Sublime-github package stores a github token in this file +# https://packagecontrol.io/packages/sublime-github +GitHub.sublime-settings + +# KDevelop4 rules +*.kdev4 +.kdev4/ + +# Kate rules +# Swap Files # +.*.kate-swp +.swp.* + +# TextMate rules +*.tmproj +*.tmproject +tmtags + +# VisualStudioCode rules +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json + +# Xcode rules +# Xcode +# +# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore + +## User settings +xcuserdata/ + +## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9) +*.xcscmblueprint +*.xccheckout + +## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4) +build/ +DerivedData/ +*.moved-aside +*.pbxuser +!default.pbxuser +*.mode1v3 +!default.mode1v3 +*.mode2v3 +!default.mode2v3 +*.perspectivev3 +!default.perspectivev3 + +# Eclipse rules +.metadata +bin/ +tmp/ +*.tmp +*.bak +*.swp +*~.nib +local.properties +.settings/ +.loadpath +.recommenders + +# External tool builders +.externalToolBuilders/ + +# Locally stored "Eclipse launch configurations" +*.launch + +# PyDev specific (Python IDE for Eclipse) +*.pydevproject + +# CDT-specific (C/C++ Development Tooling) +.cproject + +# CDT- autotools +.autotools + +# Java annotation processor (APT) +.factorypath + +# PDT-specific (PHP Development Tools) +.buildpath + +# sbteclipse plugin +.target + +# Tern plugin +.tern-project + +# TeXlipse plugin +.texlipse + +# STS (Spring Tool Suite) +.springBeans + +# Code Recommenders +.recommenders/ + +# Annotation Processing +.apt_generated/ + +# Scala IDE specific (Scala & Java development for Eclipse) +.cache-main +.scala_dependencies +.worksheet + +# TortoiseGit rules +# Project-level settings +/.tgitconfig +# Tags rules +# Ignore tags created by etags, ctags, gtags (GNU global) and cscope +TAGS +.TAGS +!TAGS/ +tags +.tags +!tags/ +gtags.files +GTAGS +GRTAGS +GPATH +GSYMS +cscope.files +cscope.out +cscope.in.out +cscope.po.out diff --git a/.moban.d/README.rst b/.moban.d/custom_README.rst.jj2 similarity index 100% rename from .moban.d/README.rst rename to .moban.d/custom_README.rst.jj2 diff --git a/.moban.d/setup.py b/.moban.d/custom_setup.py.jj2 similarity index 100% rename from .moban.d/setup.py rename to .moban.d/custom_setup.py.jj2 diff --git a/.moban.d/tests/requirements.txt b/.moban.d/tests/custom_requirements.txt.jj2 similarity index 100% rename from .moban.d/tests/requirements.txt rename to .moban.d/tests/custom_requirements.txt.jj2 diff --git a/.moban.yml b/.moban.yml index 652d537..d4ffc0b 100644 --- a/.moban.yml +++ b/.moban.yml @@ -1,17 +1,12 @@ +overrides: "git://github.com/pyexcel/pyexcel-mobans!/mobanfile.yaml" configuration: - configuration_dir: "commons/config" - template_dir: - - "commons/templates" - - "setupmobans/templates" - - ".moban.d" configuration: pyexcel-htmlr.yml targets: - - setup.py: setup.py + - README.rst: custom_README.rst.jj2 + - setup.py: custom_setup.py.jj2 - requirements.txt: requirements.txt - MANIFEST.in: MANIFEST.in.jj2 - - "tests/requirements.txt": "tests/requirements.txt" - - test.sh: test.script.jj2 - - test.bat: test.script.jj2 + - "tests/requirements.txt": "tests/custom_requirements.txt.jj2" - .travis.yml: travis.yml.jj2 - .gitignore: gitignore.jj2 - "docs/source/conf.py": "docs/source/conf.py.jj2" diff --git a/.travis.yml b/.travis.yml index ce8dcac..ba5cbb8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,20 +1,66 @@ sudo: false +dist: xenial language: python notifications: email: false python: - - pypy-5.3.1 + - &pypy2 pypy2.7-6.0 + - &pypy3 pypy3.5-6.0 + - 3.8-dev + - 3.7 - 3.6 - 3.5 - - 3.4 - - 3.3 - 2.7 + +stages: + - lint + - moban + - test + +.disable_global: &disable_global + addons: false + cache: false + env: {} + python: false + before_install: false + install: false + before_script: false + script: false + after_success: false + after_failure: false + before_deploy: false + deploy: false + +.lint: &lint + <<: *disable_global + git: + submodules: false + python: 3.6 + stage: lint + script: make lint + +.moban: &moban + <<: *disable_global + python: 3.6 + stage: moban + install: pip install moban>=0.0.4 + script: + - moban + - git diff --exit-code + +jobs: + include: + - *moban + - *lint + +stage: test + before_install: - - if [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then pip install flake8==2.6.2; fi - if [[ -f min_requirements.txt && "$MINREQ" -eq 1 ]]; then mv min_requirements.txt requirements.txt ; fi - - test ! -f rnd_requirements.txt || pip install --no-deps -r rnd_requirements.txt + - test ! -f rnd_requirements.txt || + pip install --no-deps -r rnd_requirements.txt - test ! -f rnd_requirements.txt || pip install -r rnd_requirements.txt ; - pip install -r tests/requirements.txt script: diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c74b712..dad9cd6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,12 +1,12 @@ Change log -=========== +================================================================================ 0.5.2 - 23.10.2017 -------------------------------------------------------------------------------- -updated -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#. pyexcel `#105 `_, remove gease +**updated** + +#. pyexcel `pyexcel#105 `_, remove gease from setup_requires, introduced by 0.5.1. #. remove python2.6 test support #. update its dependecy on pyexcel-io to 0.5.3 @@ -14,29 +14,24 @@ updated 0.5.1 - 20.10.2017 -------------------------------------------------------------------------------- -added -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +**added** -#. `#103 `_, include LICENSE file +#. `pyexcel#103 `_, include LICENSE file in MANIFEST.in, meaning LICENSE file will appear in the released tar ball. 0.5.0 - 30.08.2017 -------------------------------------------------------------------------------- -Updated -******************************************************************************** +**Updated** #. put dependency on pyexcel-io 0.5.0, which uses cStringIO instead of StringIO. Hence, there will be performance boost in handling files in memory. #. version jumped because it will be easy to see pyexcel-htmlr depends on pyexcel-io v0.5.0 -Relocated -******************************************************************************** +**Relocated** #. type detection code is being relocated into pyexcel-io 0.0.1 - 26-07-2017 ---------------------------- - -Initial release +-------------------------------------------------------------------------------- diff --git a/LICENSE b/LICENSE index a633d8d..f6469b2 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2017 by Onni Software Ltd. and its contributors +Copyright (c) 2015-2019 by Onni Software Ltd. and its contributors All rights reserved. Redistribution and use in source and binary forms of the software as well @@ -13,13 +13,9 @@ that the following conditions are met: and/or other materials provided with the distribution. * Neither the name of 'pyexcel-htmlr' nor the names of the contributors - may not be used to endorse or promote products derived from this software + may be used to endorse or promote products derived from this software without specific prior written permission. -Please also note that this library contains a few functions and test fixtures -from messytables which is under MIT license and please see their license -at the end. - THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -32,23 +28,3 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE AND DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Copyright (c) 2012-2017 The Open Knowledge Foundation Ltd. - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in index c2e4b1b..b1bf562 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,5 @@ include README.rst include LICENSE include CHANGELOG.rst +recursive-include tests * +recursive-include docs * diff --git a/README.rst b/README.rst index e821c47..8b7a9cd 100644 --- a/README.rst +++ b/README.rst @@ -3,17 +3,28 @@ pyexcel-htmlr - Let you focus on data, instead of html format ================================================================================ .. image:: https://raw.githubusercontent.com/pyexcel/pyexcel.github.io/master/images/patreon.png - :target: https://www.patreon.com/pyexcel + :target: https://www.patreon.com/chfw -.. image:: https://api.travis-ci.org/pyexcel/pyexcel-htmlr.svg?branch=master +.. image:: https://travis-ci.org/pyexcel/pyexcel-htmlr.svg?branch=master :target: http://travis-ci.org/pyexcel/pyexcel-htmlr .. image:: https://codecov.io/gh/pyexcel/pyexcel-htmlr/branch/master/graph/badge.svg :target: https://codecov.io/gh/pyexcel/pyexcel-htmlr +.. image:: https://badge.fury.io/py/pyexcel-htmlr.svg + :target: https://pypi.org/project/pyexcel-htmlr + + +.. image:: https://pepy.tech/badge/pyexcel-htmlr/month + :target: https://pepy.tech/project/pyexcel-htmlr/month + + .. image:: https://img.shields.io/gitter/room/gitterHQ/gitter.svg :target: https://gitter.im/pyexcel/Lobby +.. image:: https://readthedocs.org/projects/pyexcel-htmlr/badge/?version=latest + :target: http://pyexcel-htmlr.readthedocs.org/en/latest/ + Known constraints ================== @@ -23,7 +34,8 @@ Fonts, colors and charts are not supported. Installation ================================================================================ -You can install it via pip: + +You can install pyexcel-htmlr via pip: .. code-block:: bash @@ -42,13 +54,16 @@ Support the project ================================================================================ If your company has embedded pyexcel and its components into a revenue generating -product, please `support me on patreon `_ to -maintain the project and develop it further. +product, please support me on `github `_, `patreon `_ +or `bounty source `_ to maintain +the project and develop it further. -If you are an individual, you are welcome to support me too on patreon and for however long -you feel like to. As a patreon, you will receive +If you are an individual, you are welcome to support me too and for however long +you feel like. As my backer, you will receive `early access to pyexcel related contents `_. +And your issues will get prioritized if you would like to become my patreon as `pyexcel pro user`. + With your financial support, I will be able to invest a little bit more time in coding, documentation and writing interesting posts. @@ -64,7 +79,6 @@ As a standalone library >>> import os >>> import sys - >>> import pyexcel as pe >>> if sys.version_info[0] < 3: ... from StringIO import StringIO ... else: @@ -74,12 +88,7 @@ As a standalone library ... from ordereddict import OrderedDict ... else: ... from collections import OrderedDict - >>> - >>> data = OrderedDict() # from collections import OrderedDict - >>> data.update({"Sheet 1": [[1, 2, 3], [4, 5, 6]]}) - >>> data.update({"Sheet 2": [["row 1", "row 2", "row 3"]]}) - >>> book = pe.get_book(bookdict=data) - >>> book.save_as("your_file.html") + Read from an html file ******************************************************************************** @@ -92,7 +101,7 @@ Here's the sample code: >>> data = get_data("your_file.html") >>> import json >>> print(json.dumps(data)) - {"Table 1": [[1, 2, 3], [4, 5, 6]], "Table 2": [["row 1", "row 2", "row 3"]]} + {"Sheet 1": [[1, 2, 3], [4, 5, 6]], "Sheet 2": [["row 1", "row 2", "row 3"]]} @@ -107,10 +116,9 @@ Continue from previous example: >>> # This is just an illustration >>> # In reality, you might deal with html file upload >>> # where you will read from requests.FILES['YOUR_HTML_FILE'] - >>> data = get_data(book.stream.html) + >>> data = get_data(io) >>> print(json.dumps(data)) - {"Table 1": [[1, 2, 3], [4, 5, 6]], "Table 2": [["row 1", "row 2", "row 3"]]} - + {"Sheet 1": [[1, 2, 3], [4, 5, 6]], "Sheet 2": [[7, 8, 9], [10, 11, 12]]} Pagination feature @@ -133,7 +141,7 @@ Let's assume the following file is a huge html file: >>> sheetx = { ... "huge": huge_data ... } - >>> pe.save_as(bookdict=sheetx, dest_file_name="huge_file.html") + >>> save_data("huge_file.html", sheetx) And let's pretend to read partial data: @@ -141,7 +149,7 @@ And let's pretend to read partial data: >>> partial_data = get_data("huge_file.html", start_row=2, row_limit=3) >>> print(json.dumps(partial_data)) - {"Table 1": [[3, 23, 33], [4, 24, 34], [5, 25, 35]]} + {"huge": [[3, 23, 33], [4, 24, 34], [5, 25, 35]]} And you could as well do the same for columns: @@ -149,7 +157,7 @@ And you could as well do the same for columns: >>> partial_data = get_data("huge_file.html", start_column=1, column_limit=2) >>> print(json.dumps(partial_data)) - {"Table 1": [[21, 31], [22, 32], [23, 33], [24, 34], [25, 35], [26, 36]]} + {"huge": [[21, 31], [22, 32], [23, 33], [24, 34], [25, 35], [26, 36]]} Obvious, you could do both at the same time: @@ -159,7 +167,7 @@ Obvious, you could do both at the same time: ... start_row=2, row_limit=3, ... start_column=1, column_limit=2) >>> print(json.dumps(partial_data)) - {"Table 1": [[23, 33], [24, 34], [25, 35]]} + {"huge": [[23, 33], [24, 34], [25, 35]]} .. testcode:: :hide: @@ -185,13 +193,13 @@ Here is the sample code: >>> import pyexcel as pe >>> sheet = pe.get_book(file_name="your_file.html") >>> sheet - Table 1: + Sheet 1: +---+---+---+ | 1 | 2 | 3 | +---+---+---+ | 4 | 5 | 6 | +---+---+---+ - Table 2: + Sheet 2: +-------+-------+-------+ | row 1 | row 2 | row 3 | +-------+-------+-------+ @@ -209,19 +217,19 @@ You got to wrap the binary content with stream to get html working: >>> # This is just an illustration >>> # In reality, you might deal with html file upload >>> # where you will read from requests.FILES['YOUR_HTML_FILE'] - >>> htmlfile = "your_file.html" - >>> with open(htmlfile, "r") as f: + >>> htmlfile = "another_file.html" + >>> with open(htmlfile, "rb") as f: ... content = f.read() ... r = pe.get_book(file_type="html", file_content=content) ... print(r) ... - Table 1: + Sheet 1: +---+---+---+ | 1 | 2 | 3 | +---+---+---+ | 4 | 5 | 6 | +---+---+---+ - Table 2: + Sheet 2: +-------+-------+-------+ | row 1 | row 2 | row 3 | +-------+-------+-------+ @@ -258,9 +266,9 @@ and update CHANGELOG.rst. .. note:: As to rnd_requirements.txt, usually, it is created when a dependent - library is not released. Once the dependecy is installed - (will be released), the future - version of the dependency in the requirements.txt will be valid. + library is not released. Once the dependecy is installed + (will be released), the future + version of the dependency in the requirements.txt will be valid. How to test your contribution @@ -282,7 +290,6 @@ How to update test environment and update documentation Additional steps are required: #. pip install moban -#. git clone https://github.com/pyexcel/pyexcel-commons.git commons #. make your changes in `.moban.d` directory, then issue command `moban` What is pyexcel-commons @@ -295,19 +302,10 @@ What is .moban.d `.moban.d` stores the specific meta data for the library. -Acceptance criteria -------------------- - -#. Has Test cases written -#. Has all code lines tested -#. Passes all Travis CI builds -#. Has fair amount of documentation if your change is complex -#. Agree on NEW BSD License for your contribution - - .. testcode:: :hide: >>> import os >>> os.unlink("your_file.html") + >>> os.unlink("another_file.html") diff --git a/changelog.yml b/changelog.yml new file mode 100644 index 0000000..712cfac --- /dev/null +++ b/changelog.yml @@ -0,0 +1,31 @@ +releases: +- changes: + - action: updated + details: + - pyexcel `pyexcel#105`, remove gease from setup_requires, introduced by 0.5.1. + - remove python2.6 test support + - update its dependecy on pyexcel-io to 0.5.3 + date: 23.10.2017 + version: 0.5.2 +- changes: + - action: added + details: + - '`pyexcel#103`, include LICENSE file in MANIFEST.in, meaning LICENSE file will + appear in the released tar ball.' + date: 20.10.2017 + version: 0.5.1 +- changes: + - action: Updated + details: + - put dependency on pyexcel-io 0.5.0, which uses cStringIO instead of StringIO. Hence, + there will be performance boost in handling files in memory. + - version jumped because it will be easy to see pyexcel-htmlr depends on pyexcel-io + v0.5.0 + - action: Relocated + details: + - type detection code is being relocated into pyexcel-io + date: 30.08.2017 + version: 0.5.0 +- changes: [] + date: 26-07-2017 + version: 0.0.1 diff --git a/docs/source/conf.py b/docs/source/conf.py index 3eb72a1..53488f6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -3,41 +3,76 @@ 'read tables in html file as excel data' + '' ) -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'sphinx.ext.viewcode', -] +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html -intersphinx_mapping = { - 'pyexcel': ('http://pyexcel.readthedocs.io/en/latest/', None), -} -spelling_word_list_filename = 'spelling_wordlist.txt' -templates_path = ['_templates'] -source_suffix = '.rst' -master_doc = 'index' +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) -project = u'pyexcel-htmlr' -copyright = u'2015-2017 Onni Software Ltd.' +# -- Project information ----------------------------------------------------- + +project = 'pyexcel-htmlr' +copyright = '2015-2019 Onni Software Ltd.' +author = 'C.W.' +# The short X.Y version version = '0.5.2' +# The full version, including alpha/beta/rc tags release = '0.5.2' + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.viewcode',] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = 'en' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. exclude_patterns = [] -pygments_style = 'sphinx' -html_theme = 'default' + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] -htmlhelp_basename = 'pyexcel-htmlrdoc' -latex_elements = {} -latex_documents = [ - ('index', 'pyexcel-htmlr.tex', - 'pyexcel-htmlr Documentation', - 'Onni Software Ltd.', 'manual'), -] -man_pages = [ - ('index', 'pyexcel-htmlr', - 'pyexcel-htmlr Documentation', - [u'Onni Software Ltd.'], 1) -] + +# -- Extension configuration ------------------------------------------------- +# -- Options for intersphinx extension --------------------------------------- + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {'https://docs.python.org/3/': None} +# TODO: html_theme not configurable upstream +html_theme = 'default' + +# TODO: DESCRIPTION not configurable upstream texinfo_documents = [ ('index', 'pyexcel-htmlr', 'pyexcel-htmlr Documentation', @@ -45,3 +80,7 @@ DESCRIPTION, 'Miscellaneous'), ] +intersphinx_mapping.update({ + 'pyexcel': ('http://pyexcel.readthedocs.io/en/latest/', None), +}) +master_doc = "index" diff --git a/docs/source/index.rst b/docs/source/index.rst index 2238699..0703582 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -35,4 +35,3 @@ or clone it and install it: $ git clone https://github.com/pyexcel/pyexcel-htmlr.git $ cd pyexcel-htmlr $ python setup.py install - diff --git a/lint.sh b/lint.sh new file mode 100644 index 0000000..976f745 --- /dev/null +++ b/lint.sh @@ -0,0 +1,2 @@ +pip install flake8 +flake8 . --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long \ No newline at end of file diff --git a/setup.py b/setup.py index df371d8..13c8207 100644 --- a/setup.py +++ b/setup.py @@ -1,74 +1,97 @@ -# Template by setupmobans +#!/usr/bin/env python3 + +# Template by pypi-mobans +import codecs +import locale import os +import platform import sys -import codecs from shutil import rmtree -from setuptools import setup, find_packages, Command + +from setuptools import Command, find_packages, setup + PY2 = sys.version_info[0] == 2 PY26 = PY2 and sys.version_info[1] < 7 - -NAME = 'pyexcel-htmlr' -AUTHOR = 'C.W.' -VERSION = '0.5.2' -EMAIL = 'wangc_2011@hotmail.com' -LICENSE = 'New BSD' +PY33 = sys.version_info < (3, 4) + +# Work around mbcs bug in distutils. +# http://bugs.python.org/issue10945 +# This work around is only if a project supports Python < 3.4 + +# Work around for locale not being set +try: + lc = locale.getlocale() + pf = platform.system() + if pf != "Windows" and lc == (None, None): + locale.setlocale(locale.LC_ALL, "C.UTF-8") +except (ValueError, UnicodeError, locale.Error): + locale.setlocale(locale.LC_ALL, "en_US.UTF-8") + +NAME = "pyexcel-htmlr" +AUTHOR = "C.W." +VERSION = "0.5.2" +EMAIL = "info@pyexcel.org" +LICENSE = "New BSD" DESCRIPTION = ( - 'read tables in html file as excel data' + - '' + "read tables in html file as excel data" ) -URL = 'https://github.com/pyexcel/pyexcel-htmlr' -DOWNLOAD_URL = '%s/archive/0.5.2.tar.gz' % URL -FILES = ['README.rst', 'CHANGELOG.rst'] +URL = "https://github.com/pyexcel/pyexcel-htmlr" +DOWNLOAD_URL = "%s/archive/0.5.2.tar.gz" % URL +FILES = ["README.rst", "CHANGELOG.rst"] KEYWORDS = [ - 'python' + "python", ] CLASSIFIERS = [ - 'Topic :: Office/Business', - 'Topic :: Utilities', - 'Topic :: Software Development :: Libraries', - 'Programming Language :: Python', - 'Intended Audience :: Developers', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', + "Topic :: Software Development :: Libraries", + "Programming Language :: Python", + "Intended Audience :: Developers", + "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + + "Programming Language :: Python :: 3.7", + + "Programming Language :: Python :: 3.8", + 'License :: OSI Approved :: BSD License', ] INSTALL_REQUIRES = [ - 'html5lib', - 'lxml', - 'pyexcel-io>=0.5.3', + "html5lib", + "lxml", + "pyexcel-io>=0.5.3", ] +SETUP_COMMANDS = {} -PACKAGES = find_packages(exclude=['ez_setup', 'examples', 'tests']) +PACKAGES = find_packages(exclude=["ez_setup", "examples", "tests"]) EXTRAS_REQUIRE = { } # You do not need to read beyond this line -PUBLISH_COMMAND = '{0} setup.py sdist bdist_wheel upload -r pypi'.format( - sys.executable) -GS_COMMAND = ('gs pyexcel-htmlr v0.5.2 ' + +PUBLISH_COMMAND = "{0} setup.py sdist bdist_wheel upload -r pypi".format(sys.executable) +GS_COMMAND = ("gs pyexcel-htmlr v0.5.2 " + "Find 0.5.2 in changelog for more details") -NO_GS_MESSAGE = ('Automatic github release is disabled. ' + - 'Please install gease to enable it.') -UPLOAD_FAILED_MSG = ('Upload failed. please run "%s" yourself.') +NO_GS_MESSAGE = ("Automatic github release is disabled. " + + "Please install gease to enable it.") +UPLOAD_FAILED_MSG = ( + 'Upload failed. please run "%s" yourself.' % PUBLISH_COMMAND) HERE = os.path.abspath(os.path.dirname(__file__)) class PublishCommand(Command): """Support setup.py upload.""" - description = 'Build and publish the package on github and pypi' + description = "Build and publish the package on github and pypi" user_options = [] @staticmethod def status(s): """Prints things in bold.""" - print('\033[1m{0}\033[0m'.format(s)) + print("\033[1m{0}\033[0m".format(s)) def initialize_options(self): pass @@ -78,12 +101,14 @@ def finalize_options(self): def run(self): try: - self.status('Removing previous builds...') - rmtree(os.path.join(HERE, 'dist')) + self.status("Removing previous builds...") + rmtree(os.path.join(HERE, "dist")) + rmtree(os.path.join(HERE, "build")) + rmtree(os.path.join(HERE, "pyexcel_htmlr.egg-info")) except OSError: pass - self.status('Building Source and Wheel (universal) distribution...') + self.status("Building Source and Wheel (universal) distribution...") run_status = True if has_gease(): run_status = os.system(GS_COMMAND) == 0 @@ -91,11 +116,16 @@ def run(self): self.status(NO_GS_MESSAGE) if run_status: if os.system(PUBLISH_COMMAND) != 0: - self.status(UPLOAD_FAILED_MSG % PUBLISH_COMMAND) + self.status(UPLOAD_FAILED_MSG) sys.exit() +SETUP_COMMANDS.update({ + "publish": PublishCommand +}) + + def has_gease(): """ test if github release command is installed @@ -120,7 +150,8 @@ def read_files(*files): def read(afile): """Read a file into setup""" - with codecs.open(afile, 'r', 'utf-8') as opened_file: + the_relative_file = os.path.join(HERE, afile) + with codecs.open(the_relative_file, "r", "utf-8") as opened_file: content = filter_out_test_code(opened_file) content = "".join(list(content)) return content @@ -129,11 +160,11 @@ def read(afile): def filter_out_test_code(file_handle): found_test_code = False for line in file_handle.readlines(): - if line.startswith('.. testcode:'): + if line.startswith(".. testcode:"): found_test_code = True continue if found_test_code is True: - if line.startswith(' '): + if line.startswith(" "): continue else: empty_line = line.strip() @@ -143,15 +174,16 @@ def filter_out_test_code(file_handle): found_test_code = False yield line else: - for keyword in ['|version|', '|today|']: + for keyword in ["|version|", "|today|"]: if keyword in line: break else: yield line -if __name__ == '__main__': +if __name__ == "__main__": setup( + test_suite="tests", name=NAME, author=AUTHOR, version=VERSION, @@ -163,13 +195,11 @@ def filter_out_test_code(file_handle): license=LICENSE, keywords=KEYWORDS, extras_require=EXTRAS_REQUIRE, - tests_require=['nose'], + tests_require=["nose"], install_requires=INSTALL_REQUIRES, packages=PACKAGES, include_package_data=True, zip_safe=False, classifiers=CLASSIFIERS, - cmdclass={ - 'publish': PublishCommand, - } + cmdclass=SETUP_COMMANDS ) diff --git a/test.bat b/test.bat index dd8f9e9..03328a0 100644 --- a/test.bat +++ b/test.bat @@ -1,2 +1,2 @@ pip freeze -nosetests --with-coverage --cover-package pyexcel_htmlr --cover-package tests --with-doctest --doctest-extension=.rst README.rst tests docs/source pyexcel_htmlr && flake8 . --exclude=.moban.d --builtins=unicode,xrange,long +nosetests --with-coverage --cover-package pyexcel_htmlr --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_htmlr diff --git a/test.sh b/test.sh index dd8f9e9..f8c0776 100644 --- a/test.sh +++ b/test.sh @@ -1,2 +1,3 @@ +#/bin/bash pip freeze -nosetests --with-coverage --cover-package pyexcel_htmlr --cover-package tests --with-doctest --doctest-extension=.rst README.rst tests docs/source pyexcel_htmlr && flake8 . --exclude=.moban.d --builtins=unicode,xrange,long +nosetests --with-coverage --cover-package pyexcel_htmlr --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_htmlr From 2855e37b89d7043168e7d0d0705d8cd21da570c1 Mon Sep 17 00:00:00 2001 From: chfw Date: Mon, 8 Jun 2020 22:49:19 +0100 Subject: [PATCH 02/10] :handshake: synchronize the organisational meta data --- .github/PULL_REQUEST_TEMPLATE.md | 2 ++ .gitignore | 46 ++++++++++++++++++++++--- .travis.yml | 23 ++++--------- README.rst | 58 +++++++++++++++++++------------- setup.py | 17 +++++----- 5 files changed, 93 insertions(+), 53 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 7b632ce..d5a2c03 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,6 +2,8 @@ With your PR, here is a check list: - [ ] Has Test cases written - [ ] Has all code lines tested +- [ ] Has `make format` been run? +- [ ] Has `moban` been run? - [ ] Passes all Travis CI builds - [ ] Has fair amount of documentation if your change is complex - [ ] run 'make format' so as to confirm the pyexcel organisation's coding style diff --git a/.gitignore b/.gitignore index 88bc3f6..a9ca840 100644 --- a/.gitignore +++ b/.gitignore @@ -25,7 +25,6 @@ parts/ sdist/ var/ wheels/ -pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg @@ -52,8 +51,10 @@ htmlcov/ nosetests.xml coverage.xml *.cover +*.py,cover .hypothesis/ .pytest_cache/ +cover/ # Translations *.mo @@ -63,6 +64,7 @@ coverage.xml *.log local_settings.py db.sqlite3 +db.sqlite3-journal # Flask stuff: instance/ @@ -75,6 +77,7 @@ instance/ docs/_build/ # PyBuilder +.pybuilder/ target/ # Jupyter Notebook @@ -85,17 +88,23 @@ profile_default/ ipython_config.py # pyenv -.python-version +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don’t work, or not +# having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock -# celery beat schedule file +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff celerybeat-schedule +celerybeat.pid # SageMath parsed files *.sage.py @@ -127,6 +136,12 @@ dmypy.json # Pyre type checker .pyre/ +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + # VirtualEnv rules # Virtualenv # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ @@ -159,6 +174,7 @@ pip-selfcheck.json # Windows rules # Windows thumbnail cache files Thumbs.db +Thumbs.db:encryptable ehthumbs.db ehthumbs_vista.db @@ -264,6 +280,7 @@ flycheck_*.el # Vim rules # Swap [._]*.s[a-v][a-z] +!*.svg # comment out if you don't need vector files [._]*.sw[a-p] [._]s[a-rt-v][a-z] [._]ss[a-gi-z] @@ -271,6 +288,7 @@ flycheck_*.el # Session Session.vim +Sessionx.vim # Temporary .netrwhist @@ -281,7 +299,7 @@ tags [._]*.un~ # JetBrains rules -# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 # User-specific stuff @@ -311,9 +329,14 @@ tags # When using Gradle or Maven with auto-import, you should exclude module files, # since they will be recreated, and may cause churn. Uncomment if using # auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml # .idea/modules.xml # .idea/*.iml # .idea/modules +# *.iml +# *.ipr # CMake cmake-build-*/ @@ -363,6 +386,7 @@ fabric.properties # SFTP configuration file sftp-config.json +sftp-config-alt*.json # Package control specific files Package Control.last-run @@ -400,6 +424,10 @@ tmtags !.vscode/tasks.json !.vscode/launch.json !.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ # Xcode rules # Xcode @@ -426,6 +454,9 @@ DerivedData/ *.perspectivev3 !default.perspectivev3 +## Gcc Patch +/*.gcno + # Eclipse rules .metadata bin/ @@ -477,12 +508,17 @@ local.properties # Annotation Processing .apt_generated/ +.apt_generated_test/ # Scala IDE specific (Scala & Java development for Eclipse) .cache-main .scala_dependencies .worksheet +# Uncomment this line if you wish to ignore the project description file. +# Typically, this file would be tracked if it contains build/dependency configurations: +#.project + # TortoiseGit rules # Project-level settings /.tgitconfig diff --git a/.travis.yml b/.travis.yml index ba5cbb8..009cae9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,7 @@ notifications: python: - &pypy2 pypy2.7-6.0 - &pypy3 pypy3.5-6.0 - - 3.8-dev + - 3.8 - 3.7 - 3.6 - 3.5 @@ -17,33 +17,22 @@ stages: - moban - test -.disable_global: &disable_global - addons: false - cache: false - env: {} - python: false - before_install: false - install: false - before_script: false - script: false - after_success: false - after_failure: false - before_deploy: false - deploy: false .lint: &lint - <<: *disable_global git: submodules: false python: 3.6 + env: + - MINREQ=0 stage: lint script: make lint .moban: &moban - <<: *disable_global python: 3.6 + env: + - MINREQ=0 stage: moban - install: pip install moban>=0.0.4 + install: pip install moban>=0.0.4 gitfs2 pypifs script: - moban - git diff --exit-code diff --git a/README.rst b/README.rst index 8b7a9cd..6443384 100644 --- a/README.rst +++ b/README.rst @@ -5,6 +5,9 @@ pyexcel-htmlr - Let you focus on data, instead of html format .. image:: https://raw.githubusercontent.com/pyexcel/pyexcel.github.io/master/images/patreon.png :target: https://www.patreon.com/chfw +.. image:: https://cdn.rawgit.com/sindresorhus/awesome/d7305f38d29fed78fa85652e3a63e154dd8e8829/media/badge.svg + :target: https://awesome-python.com/#specific-formats-processing + .. image:: https://travis-ci.org/pyexcel/pyexcel-htmlr.svg?branch=master :target: http://travis-ci.org/pyexcel/pyexcel-htmlr @@ -26,6 +29,24 @@ pyexcel-htmlr - Let you focus on data, instead of html format :target: http://pyexcel-htmlr.readthedocs.org/en/latest/ +Support the project +================================================================================ + +If your company has embedded pyexcel and its components into a revenue generating +product, please support me on github, `patreon `_ +or `bounty source `_ to maintain +the project and develop it further. + +If you are an individual, you are welcome to support me too and for however long +you feel like. As my backer, you will receive +`early access to pyexcel related contents `_. + +And your issues will get prioritized if you would like to become my patreon as `pyexcel pro user`. + +With your financial support, I will be able to invest +a little bit more time in coding, documentation and writing interesting posts. + + Known constraints ================== @@ -50,24 +71,6 @@ or clone it and install it: $ cd pyexcel-htmlr $ python setup.py install -Support the project -================================================================================ - -If your company has embedded pyexcel and its components into a revenue generating -product, please support me on `github `_, `patreon `_ -or `bounty source `_ to maintain -the project and develop it further. - -If you are an individual, you are welcome to support me too and for however long -you feel like. As my backer, you will receive -`early access to pyexcel related contents `_. - -And your issues will get prioritized if you would like to become my patreon as `pyexcel pro user`. - -With your financial support, I will be able to invest -a little bit more time in coding, documentation and writing interesting posts. - - Usage ================================================================================ @@ -284,18 +287,27 @@ On Windows systems, please issue this command:: > test.bat -How to update test environment and update documentation + +Before you commit +------------------------------ + +Please run:: + + $ make format + +so as to beautify your code otherwise travis-ci may fail your unit test. + + +And make sure you would have run moban command --------------------------------------------------------- Additional steps are required: #. pip install moban #. make your changes in `.moban.d` directory, then issue command `moban` +#. moban -What is pyexcel-commons ---------------------------------- - -Many information that are shared across pyexcel projects, such as: this developer guide, license info, etc. are stored in `pyexcel-commons` project. +otherwise travis-ci may also fail your unit test. What is .moban.d --------------------------------- diff --git a/setup.py b/setup.py index 13c8207..28af69a 100644 --- a/setup.py +++ b/setup.py @@ -1,14 +1,17 @@ #!/usr/bin/env python3 -# Template by pypi-mobans +""" +Template by pypi-mobans +""" + +import os +import sys import codecs import locale -import os import platform -import sys from shutil import rmtree -from setuptools import Command, find_packages, setup +from setuptools import Command, setup, find_packages PY2 = sys.version_info[0] == 2 PY26 = PY2 and sys.version_info[1] < 7 @@ -52,14 +55,13 @@ "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", 'License :: OSI Approved :: BSD License', ] + INSTALL_REQUIRES = [ "html5lib", "lxml", @@ -67,8 +69,7 @@ ] SETUP_COMMANDS = {} - -PACKAGES = find_packages(exclude=["ez_setup", "examples", "tests"]) +PACKAGES = find_packages(exclude=["ez_setup", "examples", "tests", "tests.*"]) EXTRAS_REQUIRE = { } # You do not need to read beyond this line From 8cd5f37b1a1e5681ad84a7fc039d04709c6d3c87 Mon Sep 17 00:00:00 2001 From: jaska Date: Fri, 2 Oct 2020 22:50:00 +0100 Subject: [PATCH 03/10] New style reader (#1) * :tada: new style io * :books: update readme * :handshake: update project meta data * :lipstick: update coding style * :books: update change log and version. :fire: remove builds lower than python 3.6 * :fire: remove unwanted matrix --- .github/PULL_REQUEST_TEMPLATE.md | 9 +- .github/workflows/moban-update.yml | 29 +++++++ .github/workflows/pythonpublish.yml | 26 ++++++ .gitignore | 4 + .isort.cfg | 10 +++ .moban.d/custom_travis.yml.jj2 | 8 ++ .moban.yml | 3 +- .travis.yml | 6 +- CHANGELOG.rst | 7 ++ CONTRIBUTORS.rst | 6 ++ MANIFEST.in | 1 + Makefile | 16 ++-- README.rst | 33 ++++---- changelog.yml | 6 ++ docs/source/conf.py | 4 +- docs/source/index.rst | 3 +- lint.sh | 2 +- pyexcel-htmlr.yml | 7 +- pyexcel_htmlr/__init__.py | 29 +++++-- pyexcel_htmlr/_version.py | 4 +- pyexcel_htmlr/htmlr.py | 123 ++++++++++++++-------------- setup.py | 4 +- tests/requirements.txt | 8 +- tests/test_htmlr.py | 31 +++---- 24 files changed, 251 insertions(+), 128 deletions(-) create mode 100644 .github/workflows/moban-update.yml create mode 100644 .github/workflows/pythonpublish.yml create mode 100644 .isort.cfg create mode 100644 .moban.d/custom_travis.yml.jj2 create mode 100644 CONTRIBUTORS.rst diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index d5a2c03..6017f21 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,12 +1,9 @@ With your PR, here is a check list: -- [ ] Has Test cases written -- [ ] Has all code lines tested +- [ ] Has test cases written? +- [ ] Has all code lines tested? - [ ] Has `make format` been run? -- [ ] Has `moban` been run? +- [ ] Please update CHANGELOG.yml(not CHANGELOG.rst) - [ ] Passes all Travis CI builds - [ ] Has fair amount of documentation if your change is complex -- [ ] run 'make format' so as to confirm the pyexcel organisation's coding style -- [ ] Please update CHANGELOG.rst -- [ ] Please add yourself to CONTRIBUTORS.rst - [ ] Agree on NEW BSD License for your contribution diff --git a/.github/workflows/moban-update.yml b/.github/workflows/moban-update.yml new file mode 100644 index 0000000..706fd82 --- /dev/null +++ b/.github/workflows/moban-update.yml @@ -0,0 +1,29 @@ +on: [push] + +jobs: + run_moban: + runs-on: ubuntu-latest + name: synchronize templates via moban + steps: + - uses: actions/checkout@v2 + with: + ref: ${{ github.head_ref }} + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: '3.7' + - name: check changes + run: | + pip install moban gitfs2 pypifs moban-jinja2-github moban-ansible + moban + git status + git diff --exit-code + - name: Auto-commit + if: failure() + uses: docker://cdssnc/auto-commit-github-action + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + args: >- + This is an auto-commit, updating project meta data, + such as changelog.rst, contributors.rst diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml new file mode 100644 index 0000000..9e7ec42 --- /dev/null +++ b/.github/workflows/pythonpublish.yml @@ -0,0 +1,26 @@ +name: Upload Python Package + +on: + release: + types: [created] + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.gitignore b/.gitignore index a9ca840..e8b12f9 100644 --- a/.gitignore +++ b/.gitignore @@ -540,3 +540,7 @@ cscope.files cscope.out cscope.in.out cscope.po.out + + +# remove moban hash dictionary +.moban.hashes diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..3cd6be2 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,10 @@ +[settings] +line_length=79 +known_first_party=pyexcel_io,html5lib +known_third_party=mock,nose,pyexcel +indent=' ' +multi_line_output=3 +length_sort=1 +default_section=FIRSTPARTY +no_lines_before=LOCALFOLDER +sections=FUTURE,STDLIB,FIRSTPARTY,THIRDPARTY,LOCALFOLDER diff --git a/.moban.d/custom_travis.yml.jj2 b/.moban.d/custom_travis.yml.jj2 new file mode 100644 index 0000000..ecac24e --- /dev/null +++ b/.moban.d/custom_travis.yml.jj2 @@ -0,0 +1,8 @@ +{% extends "travis.yml.jj2" %} + +{%block custom_python_versions%} +python: + - 3.8 + - 3.7 + - 3.6 +{%endblock%} diff --git a/.moban.yml b/.moban.yml index d4ffc0b..6872bcf 100644 --- a/.moban.yml +++ b/.moban.yml @@ -2,12 +2,11 @@ overrides: "git://github.com/pyexcel/pyexcel-mobans!/mobanfile.yaml" configuration: configuration: pyexcel-htmlr.yml targets: - - README.rst: custom_README.rst.jj2 - setup.py: custom_setup.py.jj2 - requirements.txt: requirements.txt - MANIFEST.in: MANIFEST.in.jj2 - "tests/requirements.txt": "tests/custom_requirements.txt.jj2" - - .travis.yml: travis.yml.jj2 + - .travis.yml: custom_travis.yml.jj2 - .gitignore: gitignore.jj2 - "docs/source/conf.py": "docs/source/conf.py.jj2" - "docs/source/index.rst": "index.rst.jj2" \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 009cae9..b457dd1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,13 +4,9 @@ language: python notifications: email: false python: - - &pypy2 pypy2.7-6.0 - - &pypy3 pypy3.5-6.0 - 3.8 - 3.7 - 3.6 - - 3.5 - - 2.7 stages: - lint @@ -32,7 +28,7 @@ stages: env: - MINREQ=0 stage: moban - install: pip install moban>=0.0.4 gitfs2 pypifs + install: pip install moban gitfs2 pypifs moban-jinja2-github moban-ansible script: - moban - git diff --exit-code diff --git a/CHANGELOG.rst b/CHANGELOG.rst index dad9cd6..3eb4e17 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,13 @@ Change log ================================================================================ +0.6.0 - tbd +-------------------------------------------------------------------------------- + +**Updated** + +#. New style xlsx plugins, promoted by pyexcel-io v0.6.0. + 0.5.2 - 23.10.2017 -------------------------------------------------------------------------------- diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst new file mode 100644 index 0000000..423f980 --- /dev/null +++ b/CONTRIBUTORS.rst @@ -0,0 +1,6 @@ + + +No contributors yet +======================= + +* Your github link will be listed here after your PR is merged diff --git a/MANIFEST.in b/MANIFEST.in index b1bf562..e86ae54 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,6 @@ include README.rst include LICENSE include CHANGELOG.rst +include CONTRIBUTORS.rst recursive-include tests * recursive-include docs * diff --git a/Makefile b/Makefile index 03fb3be..3e0ee51 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,16 @@ all: test -test: +test: lint bash test.sh -document: - bash document.sh +install_test: + pip install -r tests/requirements.txt -spelling: - sphinx-build -b spelling docs/source/ docs/build/spelling +lint: + bash lint.sh + +format: + bash format.sh + +git-diff-check: + git diff --exit-code diff --git a/README.rst b/README.rst index 6443384..a8099f7 100644 --- a/README.rst +++ b/README.rst @@ -91,6 +91,9 @@ As a standalone library ... from ordereddict import OrderedDict ... else: ... from collections import OrderedDict + >>> import pyexcel as pe + >>> book_data = {"Sheet 1": [[1, 2, 3], [4, 5, 6]], "Sheet 2": [["row 1", "row 2", "row 3"]]} + >>> pe.save_book_as(bookdict=book_data, dest_file_name="your_file.html") Read from an html file @@ -104,7 +107,7 @@ Here's the sample code: >>> data = get_data("your_file.html") >>> import json >>> print(json.dumps(data)) - {"Sheet 1": [[1, 2, 3], [4, 5, 6]], "Sheet 2": [["row 1", "row 2", "row 3"]]} + {"Table 1": [[1, 2, 3], [4, 5, 6]], "Table 2": [["row 1", "row 2", "row 3"]]} @@ -119,10 +122,11 @@ Continue from previous example: >>> # This is just an illustration >>> # In reality, you might deal with html file upload >>> # where you will read from requests.FILES['YOUR_HTML_FILE'] - >>> data = get_data(io) + >>> with open('your_file.html', 'r') as html_file: + ... io = StringIO(html_file.read().encode()) + ... data = get_data(io) >>> print(json.dumps(data)) - {"Sheet 1": [[1, 2, 3], [4, 5, 6]], "Sheet 2": [[7, 8, 9], [10, 11, 12]]} - + {"Table 1": [[1, 2, 3], [4, 5, 6]], "Table 2": [["row 1", "row 2", "row 3"]]} Pagination feature ******************************************************************************** @@ -142,9 +146,9 @@ Let's assume the following file is a huge html file: ... [6, 26, 36] ... ] >>> sheetx = { - ... "huge": huge_data + ... "Table 1": huge_data ... } - >>> save_data("huge_file.html", sheetx) + >>> pe.save_book_as(dest_file_name="huge_file.html", bookdict=sheetx) And let's pretend to read partial data: @@ -152,7 +156,7 @@ And let's pretend to read partial data: >>> partial_data = get_data("huge_file.html", start_row=2, row_limit=3) >>> print(json.dumps(partial_data)) - {"huge": [[3, 23, 33], [4, 24, 34], [5, 25, 35]]} + {"Table 1": [[3, 23, 33], [4, 24, 34], [5, 25, 35]]} And you could as well do the same for columns: @@ -160,7 +164,7 @@ And you could as well do the same for columns: >>> partial_data = get_data("huge_file.html", start_column=1, column_limit=2) >>> print(json.dumps(partial_data)) - {"huge": [[21, 31], [22, 32], [23, 33], [24, 34], [25, 35], [26, 36]]} + {"Table 1": [[21, 31], [22, 32], [23, 33], [24, 34], [25, 35], [26, 36]]} Obvious, you could do both at the same time: @@ -170,7 +174,7 @@ Obvious, you could do both at the same time: ... start_row=2, row_limit=3, ... start_column=1, column_limit=2) >>> print(json.dumps(partial_data)) - {"huge": [[23, 33], [24, 34], [25, 35]]} + {"Table 1": [[23, 33], [24, 34], [25, 35]]} .. testcode:: :hide: @@ -196,13 +200,13 @@ Here is the sample code: >>> import pyexcel as pe >>> sheet = pe.get_book(file_name="your_file.html") >>> sheet - Sheet 1: + Table 1: +---+---+---+ | 1 | 2 | 3 | +---+---+---+ | 4 | 5 | 6 | +---+---+---+ - Sheet 2: + Table 2: +-------+-------+-------+ | row 1 | row 2 | row 3 | +-------+-------+-------+ @@ -220,19 +224,19 @@ You got to wrap the binary content with stream to get html working: >>> # This is just an illustration >>> # In reality, you might deal with html file upload >>> # where you will read from requests.FILES['YOUR_HTML_FILE'] - >>> htmlfile = "another_file.html" + >>> htmlfile = "your_file.html" >>> with open(htmlfile, "rb") as f: ... content = f.read() ... r = pe.get_book(file_type="html", file_content=content) ... print(r) ... - Sheet 1: + Table 1: +---+---+---+ | 1 | 2 | 3 | +---+---+---+ | 4 | 5 | 6 | +---+---+---+ - Sheet 2: + Table 2: +-------+-------+-------+ | row 1 | row 2 | row 3 | +-------+-------+-------+ @@ -320,4 +324,3 @@ What is .moban.d >>> import os >>> os.unlink("your_file.html") - >>> os.unlink("another_file.html") diff --git a/changelog.yml b/changelog.yml index 712cfac..e41fe63 100644 --- a/changelog.yml +++ b/changelog.yml @@ -1,4 +1,10 @@ releases: +- changes: + - action: Updated + details: + - 'New style xlsx plugins, promoted by pyexcel-io v0.6.0.' + date: tbd + version: 0.6.0 - changes: - action: updated details: diff --git a/docs/source/conf.py b/docs/source/conf.py index 53488f6..897a222 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -23,9 +23,9 @@ project = 'pyexcel-htmlr' copyright = '2015-2019 Onni Software Ltd.' -author = 'C.W.' +author = 'chfw' # The short X.Y version -version = '0.5.2' +version = '0.6.0' # The full version, including alpha/beta/rc tags release = '0.5.2' diff --git a/docs/source/index.rst b/docs/source/index.rst index 0703582..b2c1408 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,10 +1,11 @@ `pyexcel-htmlr` - Let you focus on data, instead of file formats ================================================================================ -:Author: C.W. +:Author: chfw :Source code: http://github.com/pyexcel/pyexcel-htmlr.git :Issues: http://github.com/pyexcel/pyexcel-htmlr/issues :License: New BSD License +:Development: |release| :Released: |version| :Generated: |today| diff --git a/lint.sh b/lint.sh index 976f745..891aa63 100644 --- a/lint.sh +++ b/lint.sh @@ -1,2 +1,2 @@ pip install flake8 -flake8 . --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long \ No newline at end of file +flake8 --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long . && python setup.py checkdocs diff --git a/pyexcel-htmlr.yml b/pyexcel-htmlr.yml index ff7d8c1..be0ca9b 100644 --- a/pyexcel-htmlr.yml +++ b/pyexcel-htmlr.yml @@ -1,12 +1,15 @@ overrides: "pyexcel.yaml" name: "pyexcel-htmlr" nick_name: "htmlr" -version: "0.5.2" -current_version: "0.5.2" +version: "0.6.0" +current_version: "0.6.0" release: "0.5.2" file_type: 'html' dependencies: - html5lib - lxml - pyexcel-io>=0.5.3 +test_dependencies: + - pyexcel-text + - pyexcel description: "read tables in html file as excel data" \ No newline at end of file diff --git a/pyexcel_htmlr/__init__.py b/pyexcel_htmlr/__init__.py index 5b9bd3e..5bf9fd8 100644 --- a/pyexcel_htmlr/__init__.py +++ b/pyexcel_htmlr/__init__.py @@ -5,15 +5,28 @@ :copyright: (c) 2015-2017 by Onni Software Ltd & its contributors :license: New BSD License """ -from ._version import __version__, __author__ # flake8: noqa -from pyexcel_io.plugins import IOPluginInfoChain -from pyexcel_io.io import get_data as read_data, isstream +from pyexcel_io.io import get_data as read_data +from pyexcel_io.io import isstream +from pyexcel_io.plugins import IOPluginInfoChainV2 -__FILE_TYPE__ = 'html' -IOPluginInfoChain(__name__).add_a_reader( - relative_plugin_class_path='htmlr.HtmlPage', - file_types=[__FILE_TYPE__], - stream_type='text' +from ._version import __author__, __version__ # noqa + +__FILE_TYPE__ = "html" +IOPluginInfoChainV2(__name__).add_a_reader( + relative_plugin_class_path="htmlr.HtmlPageInContent", + locations=["content"], + file_types=[__FILE_TYPE__, "htm"], + stream_type="text", +).add_a_reader( + relative_plugin_class_path="htmlr.HtmlPageInStream", + locations=["memory"], + file_types=[__FILE_TYPE__, "htm"], + stream_type="text", +).add_a_reader( + relative_plugin_class_path="htmlr.HtmlPageInFile", + locations=["file"], + file_types=[__FILE_TYPE__, "htm"], + stream_type="text", ) diff --git a/pyexcel_htmlr/_version.py b/pyexcel_htmlr/_version.py index fd4fc5d..bfa14d8 100644 --- a/pyexcel_htmlr/_version.py +++ b/pyexcel_htmlr/_version.py @@ -1,2 +1,2 @@ -__version__ = '' -__author__ = '' +__version__ = "" +__author__ = "" diff --git a/pyexcel_htmlr/htmlr.py b/pyexcel_htmlr/htmlr.py index e9834fe..d41423f 100644 --- a/pyexcel_htmlr/htmlr.py +++ b/pyexcel_htmlr/htmlr.py @@ -3,27 +3,31 @@ ~~~~~~~~~~~~~~~~~~~ html table reader using messytables - :copyright: (c) 2015-2017 by Onni Software Ltd & its contributors + :copyright: (c) 2015-2020 by Onni Software Ltd & its contributors :license: New BSD License """ -import html5lib +import codecs import xml.etree.ElementTree as etree -from pyexcel_io.book import BookReader -from pyexcel_io.sheet import SheetReader, NamedContent -from pyexcel_io._compact import OrderedDict +import html5lib import pyexcel_io.service as service - +from pyexcel_io.sheet import NamedContent +from pyexcel_io.plugin_api.abstract_sheet import ISheet +from pyexcel_io.plugin_api.abstract_reader import IReader ALL_TABLE_COLUMNS = './/*[name()="td" or name()="th"]' -class HtmlTable(SheetReader): - def __init__(self, sheet, auto_detect_int=True, - auto_detect_float=True, - auto_detect_datetime=True, - **keywords): - SheetReader.__init__(self, sheet, **keywords) +class HtmlTable(ISheet): + def __init__( + self, + sheet, + auto_detect_int=True, + auto_detect_float=True, + auto_detect_datetime=True, + **keywords + ): + self._native_sheet = sheet self.__auto_detect_int = auto_detect_int self.__auto_detect_float = auto_detect_float self.__auto_detect_datetime = auto_detect_datetime @@ -35,7 +39,7 @@ def name(self): return self._native_sheet.name def row_iterator(self): - for element in self._native_sheet.payload.xpath('.//tr'): + for element in self._native_sheet.payload.xpath(".//tr"): if self.__xml_table in element.xpath("./ancestor::table[1]"): yield element @@ -48,28 +52,28 @@ def column_iterator(self, row): self.__column_span[index] -= 1 if self.__column_span[index] == 0: del self.__column_span[index] - yield '' + yield "" index += 1 cell_text = text_from_element(element) yield self.__convert_cell(cell_text) - row_span = get_attribute('colspan', element) - col_span = get_attribute('rowspan', element) + row_span = get_attribute("colspan", element) + col_span = get_attribute("rowspan", element) if row_span > 1: # generate '' due to colspan if col_span > 1: for offset in range(row_span): if offset > 0: # for next cell, give full col span - self.__column_span[index+offset] = col_span + self.__column_span[index + offset] = col_span else: # for current cell, give -1 because it has been # yielded - self.__column_span[index+offset] = col_span - 1 + self.__column_span[index + offset] = col_span - 1 else: # no col span found, so just repeat in the same row - for _ in range(row_span-1): - yield '' + for _ in range(row_span - 1): + yield "" index += 1 else: if col_span > 1: @@ -84,9 +88,8 @@ def __convert_cell(self, cell_text): if ret is None and self.__auto_detect_float: ret = service.detect_float_value(cell_text) shall_we_ignore_the_conversion = ( - (ret in [float('inf'), float('-inf')]) and - self.__ignore_infinity - ) + ret in [float("inf"), float("-inf")] + ) and self.__ignore_infinity if shall_we_ignore_the_conversion: ret = None if ret is None and self.__auto_detect_datetime: @@ -96,45 +99,43 @@ def __convert_cell(self, cell_text): return ret -class HtmlPage(BookReader): - def __init__(self): - BookReader.__init__(self) - self._file_handle = None +class HtmlPageInContent(IReader): + def __init__(self, file_content, file_type, **keywords): + self._keywords = keywords + self.content_array = list(HtmlPageInContent.parse_html(file_content)) - def open(self, file_name, **keywords): - BookReader.open(self, file_name, **keywords) - self._load_from_file() + def read_sheet(self, native_sheet_index): + native_sheet = self.content_array[native_sheet_index] + sheet = HtmlTable(native_sheet, **self._keywords) + return sheet - def open_stream(self, file_stream, **keywords): - BookReader.open_stream(self, file_stream, **keywords) - self._load_from_memory() + @staticmethod + def parse_html(content): + root = fromstring(content) + for index, table in enumerate(root.xpath("//table"), 1): + name = "Table %s" % index + yield NamedContent(name, table) - def read_all(self): - result = OrderedDict() - for sheet in self._native_book: - result.update(self.read_sheet(sheet)) - return result + def close(self): + pass - def read_sheet(self, native_sheet): - sheet = HtmlTable(native_sheet, **self._keywords) - return {sheet.name: sheet.to_array()} - def _load_from_file(self): - self._file_handle = open(self._file_name, 'r') - self._native_book = self._parse_html(self._file_handle) +class HtmlPageInStream(HtmlPageInContent): + def __init__(self, file_stream, file_type, **keywords): + file_stream.seek(0) + file_content = file_stream.read() + super().__init__(file_content, file_type, **keywords) - def _load_from_memory(self): - self._native_book = self._parse_html(self._file_stream) - def _parse_html(self, file_handler): - root = fromstring(file_handler.read()) - for index, table in enumerate(root.xpath('//table'), 1): - name = 'Table %s' % index - yield NamedContent(name, table) +class HtmlPageInFile(HtmlPageInContent): + def __init__(self, file_name, file_type, **keywords): + self.file_handle = codecs.open(file_name, "r") + file_content = self.file_handle.read() + super().__init__(file_content, file_type, **keywords) def close(self): - if self._file_handle: - self._file_handle.close() + if self.file_handle: + self.file_handle.close() def fromstring(s): @@ -147,21 +148,21 @@ def text_from_element(elem): builder = [] for x in elem.iter(): if is_invisible_text(x): - cell_str = x.tail or '' # handle None values. + cell_str = x.tail or "" # handle None values. else: - cell_str = (x.text or '') + (x.tail or '') - cell_str = cell_str.replace('\n', ' ').strip() - if x.tag == 'br' or x.tag == 'p': - cell_str = '\n' + cell_str + cell_str = (x.text or "") + (x.tail or "") + cell_str = cell_str.replace("\n", " ").strip() + if x.tag == "br" or x.tag == "p": + cell_str = "\n" + cell_str builder.append(cell_str) - return ''.join(builder).strip() + return "".join(builder).strip() def is_invisible_text(elem): flag = False if elem.tag == "span": - if 'style' in elem.attrib: - if 'display:none' in elem.attrib['style']: + if "style" in elem.attrib: + if "display:none" in elem.attrib["style"]: flag = True return flag diff --git a/setup.py b/setup.py index 28af69a..60434a4 100644 --- a/setup.py +++ b/setup.py @@ -31,8 +31,8 @@ locale.setlocale(locale.LC_ALL, "en_US.UTF-8") NAME = "pyexcel-htmlr" -AUTHOR = "C.W." -VERSION = "0.5.2" +AUTHOR = "chfw" +VERSION = "0.6.0" EMAIL = "info@pyexcel.org" LICENSE = "New BSD" DESCRIPTION = ( diff --git a/tests/requirements.txt b/tests/requirements.txt index d1af589..957b35f 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -3,5 +3,11 @@ mock;python_version<"3" codecov coverage flake8 -pyexcel +black +isort +collective.checkdocs +pygments +moban +moban_jinja2_github pyexcel-text +pyexcel diff --git a/tests/test_htmlr.py b/tests/test_htmlr.py index 289b8de..8968906 100644 --- a/tests/test_htmlr.py +++ b/tests/test_htmlr.py @@ -1,6 +1,7 @@ import os -from nose.tools import eq_ + import pyexcel as p +from nose.tools import eq_ def test_htmlr(): @@ -12,39 +13,39 @@ def test_htmlr(): def test_html_html(): - sheet = p.get_sheet(file_name=get_fixture('html.html')) + sheet = p.get_sheet(file_name=get_fixture("html.html")) eq_(sheet.number_of_rows(), 200) - eq_(sheet[0, 0], 'HDI Rank') - eq_(sheet[0, 1], 'Country') + eq_(sheet[0, 0], "HDI Rank") + eq_(sheet[0, 1], "Country") eq_(sheet[0, 4], 2010) def test_table_names(): - book = p.get_book(file_name=get_fixture('html.html')) - eq_(book[0].name, 'Table 1') - eq_(book[1].name, 'Table 2') - eq_(book[2].name, 'Table 3') + book = p.get_book(file_name=get_fixture("html.html")) + eq_(book[0].name, "Table 1") + eq_(book[1].name, "Table 2") + eq_(book[2].name, "Table 3") def test_invisible_text_html(): - sheet = p.get_sheet(file_name=get_fixture('invisible_text.html')) + sheet = p.get_sheet(file_name=get_fixture("invisible_text.html")) eq_(sheet.number_of_rows(), 4) - eq_(sheet[1, 5], '1 July 1879') + eq_(sheet[1, 5], "1 July 1879") def test_complex_html(): - book = p.get_book(file_name=get_fixture('complex.html')) + book = p.get_book(file_name=get_fixture("complex.html")) eq_(book[0].number_of_rows(), 1) - eq_(book[0][0, 0], 'headfootbody') + eq_(book[0][0, 0], "headfootbody") def test_span(): - sheet = p.get_sheet(file_name=get_fixture('rowcolspan.html')) + sheet = p.get_sheet(file_name=get_fixture("rowcolspan.html")) print(sheet) - eq_(sheet[0, 0], '05') + eq_(sheet[0, 0], "05") eq_(sheet[2, 0], 25) eq_(sheet[2, 4], 29) - eq_(sheet[3, 0], '') + eq_(sheet[3, 0], "") eq_(sheet[3, 1], 36) eq_(sheet[3, 4], 39) eq_(sheet[6, 1], 66) From 773064118eecb35f0972e5271053bc4cee138765 Mon Sep 17 00:00:00 2001 From: chfw Date: Tue, 6 Oct 2020 23:26:16 +0100 Subject: [PATCH 04/10] :hammer: code refactoring --- pyexcel_htmlr/htmlr.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pyexcel_htmlr/htmlr.py b/pyexcel_htmlr/htmlr.py index d41423f..8856c02 100644 --- a/pyexcel_htmlr/htmlr.py +++ b/pyexcel_htmlr/htmlr.py @@ -11,9 +11,7 @@ import html5lib import pyexcel_io.service as service -from pyexcel_io.sheet import NamedContent -from pyexcel_io.plugin_api.abstract_sheet import ISheet -from pyexcel_io.plugin_api.abstract_reader import IReader +from pyexcel_io.plugin_api import ISheet, IReader, NamedContent ALL_TABLE_COLUMNS = './/*[name()="td" or name()="th"]' @@ -25,7 +23,6 @@ def __init__( auto_detect_int=True, auto_detect_float=True, auto_detect_datetime=True, - **keywords ): self._native_sheet = sheet self.__auto_detect_int = auto_detect_int From 562816dc2be67fd86a92f6c1b125f58ec6b8e154 Mon Sep 17 00:00:00 2001 From: chfw Date: Thu, 8 Oct 2020 22:44:08 +0100 Subject: [PATCH 05/10] :egg: :ferris_wheel: release 0.6.0 --- CHANGELOG.rst | 11 ++++++----- LICENSE | 4 ++-- changelog.yml | 6 ++++-- docs/source/conf.py | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3eb4e17..09c625e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,15 +6,15 @@ Change log **Updated** -#. New style xlsx plugins, promoted by pyexcel-io v0.6.0. +#. New style xlsx plugins, promoted by pyexcel-io v0.6.2 0.5.2 - 23.10.2017 -------------------------------------------------------------------------------- **updated** -#. pyexcel `pyexcel#105 `_, remove gease - from setup_requires, introduced by 0.5.1. +#. pyexcel `pyexcel#105 `_, + remove gease from setup_requires, introduced by 0.5.1. #. remove python2.6 test support #. update its dependecy on pyexcel-io to 0.5.3 @@ -23,8 +23,9 @@ Change log **added** -#. `pyexcel#103 `_, include LICENSE file - in MANIFEST.in, meaning LICENSE file will appear in the released tar ball. +#. `pyexcel#103 `_, include + LICENSE file in MANIFEST.in, meaning LICENSE file will appear in the released + tar ball. 0.5.0 - 30.08.2017 -------------------------------------------------------------------------------- diff --git a/LICENSE b/LICENSE index f6469b2..13a27b4 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2015-2019 by Onni Software Ltd. and its contributors +Copyright (c) 2015-2020 by Onni Software Ltd. and its contributors All rights reserved. Redistribution and use in source and binary forms of the software as well @@ -13,7 +13,7 @@ that the following conditions are met: and/or other materials provided with the distribution. * Neither the name of 'pyexcel-htmlr' nor the names of the contributors - may be used to endorse or promote products derived from this software + may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND diff --git a/changelog.yml b/changelog.yml index e41fe63..4ddd954 100644 --- a/changelog.yml +++ b/changelog.yml @@ -1,9 +1,11 @@ +name: pyexcel-htmlr +organisation: pyexcel releases: - changes: - action: Updated details: - - 'New style xlsx plugins, promoted by pyexcel-io v0.6.0.' - date: tbd + - 'New style xlsx plugins, promoted by pyexcel-io v0.6.2' + date: 9.10.2020 version: 0.6.0 - changes: - action: updated diff --git a/docs/source/conf.py b/docs/source/conf.py index 897a222..3e5eb3b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -22,7 +22,7 @@ # -- Project information ----------------------------------------------------- project = 'pyexcel-htmlr' -copyright = '2015-2019 Onni Software Ltd.' +copyright = '2015-2020 Onni Software Ltd.' author = 'chfw' # The short X.Y version version = '0.6.0' From b274c3e1c3f547db73675da4f72fb06bfdeb4bc8 Mon Sep 17 00:00:00 2001 From: chfw Date: Thu, 8 Oct 2020 21:44:39 +0000 Subject: [PATCH 06/10] This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst --- CHANGELOG.rst | 2 +- format.sh | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 format.sh diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 09c625e..9d98f7d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,7 +1,7 @@ Change log ================================================================================ -0.6.0 - tbd +0.6.0 - 9.10.2020 -------------------------------------------------------------------------------- **Updated** diff --git a/format.sh b/format.sh new file mode 100644 index 0000000..eeb82ba --- /dev/null +++ b/format.sh @@ -0,0 +1,3 @@ +isort $(find pyexcel_htmlr -name "*.py"|xargs echo) $(find tests -name "*.py"|xargs echo) +black -l 79 pyexcel_htmlr +black -l 79 tests From ada41fe3d68ebda8bd8e3cb4b21e9a139357586a Mon Sep 17 00:00:00 2001 From: chfw Date: Thu, 8 Oct 2020 22:52:49 +0100 Subject: [PATCH 07/10] :egg: :ferris_wheel: release 0.6.0 --- CHANGELOG.rst | 2 +- docs/source/conf.py | 2 +- docs/source/index.rst | 1 - pyexcel-htmlr.yml | 8 +++++--- requirements.txt | 2 +- setup.py | 20 +++++++++++--------- 6 files changed, 19 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 09c625e..9d98f7d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,7 +1,7 @@ Change log ================================================================================ -0.6.0 - tbd +0.6.0 - 9.10.2020 -------------------------------------------------------------------------------- **Updated** diff --git a/docs/source/conf.py b/docs/source/conf.py index 3e5eb3b..2347372 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -27,7 +27,7 @@ # The short X.Y version version = '0.6.0' # The full version, including alpha/beta/rc tags -release = '0.5.2' +release = '0.6.0' # -- General configuration --------------------------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index b2c1408..62b7733 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -5,7 +5,6 @@ :Source code: http://github.com/pyexcel/pyexcel-htmlr.git :Issues: http://github.com/pyexcel/pyexcel-htmlr/issues :License: New BSD License -:Development: |release| :Released: |version| :Generated: |today| diff --git a/pyexcel-htmlr.yml b/pyexcel-htmlr.yml index be0ca9b..c1c0367 100644 --- a/pyexcel-htmlr.yml +++ b/pyexcel-htmlr.yml @@ -3,13 +3,15 @@ name: "pyexcel-htmlr" nick_name: "htmlr" version: "0.6.0" current_version: "0.6.0" -release: "0.5.2" +release: "0.6.0" file_type: 'html' dependencies: - html5lib - lxml - - pyexcel-io>=0.5.3 + - pyexcel-io>=0.6.2 test_dependencies: - pyexcel-text - pyexcel -description: "read tables in html file as excel data" \ No newline at end of file +description: "read tables in html file as excel data" +python_requires: ">=3.6" +min_python_version: "3.6" diff --git a/requirements.txt b/requirements.txt index 7abeab9..baa0798 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ html5lib lxml -pyexcel-io>=0.5.3 +pyexcel-io>=0.6.2 diff --git a/setup.py b/setup.py index 60434a4..4fd0481 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ "read tables in html file as excel data" ) URL = "https://github.com/pyexcel/pyexcel-htmlr" -DOWNLOAD_URL = "%s/archive/0.5.2.tar.gz" % URL +DOWNLOAD_URL = "%s/archive/0.6.0.tar.gz" % URL FILES = ["README.rst", "CHANGELOG.rst"] KEYWORDS = [ "python", @@ -49,11 +49,11 @@ "Topic :: Software Development :: Libraries", "Programming Language :: Python", "Intended Audience :: Developers", - "Programming Language :: Python :: 2.6", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3.3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", + + "Programming Language :: Python :: 3 :: Only", + + + "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", @@ -61,11 +61,12 @@ 'License :: OSI Approved :: BSD License', ] +PYTHON_REQUIRES = ">=3.6" INSTALL_REQUIRES = [ "html5lib", "lxml", - "pyexcel-io>=0.5.3", + "pyexcel-io>=0.6.2", ] SETUP_COMMANDS = {} @@ -74,8 +75,8 @@ } # You do not need to read beyond this line PUBLISH_COMMAND = "{0} setup.py sdist bdist_wheel upload -r pypi".format(sys.executable) -GS_COMMAND = ("gs pyexcel-htmlr v0.5.2 " + - "Find 0.5.2 in changelog for more details") +GS_COMMAND = ("gs pyexcel-htmlr v0.6.0 " + + "Find 0.6.0 in changelog for more details") NO_GS_MESSAGE = ("Automatic github release is disabled. " + "Please install gease to enable it.") UPLOAD_FAILED_MSG = ( @@ -195,6 +196,7 @@ def filter_out_test_code(file_handle): long_description=read_files(*FILES), license=LICENSE, keywords=KEYWORDS, + python_requires=PYTHON_REQUIRES, extras_require=EXTRAS_REQUIRE, tests_require=["nose"], install_requires=INSTALL_REQUIRES, From ca2b57696b15bb1b59972c76c39a557b56ac2a6f Mon Sep 17 00:00:00 2001 From: chfw Date: Thu, 8 Oct 2020 23:09:05 +0100 Subject: [PATCH 08/10] :green_heart: disable moban job --- pyexcel-htmlr.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyexcel-htmlr.yml b/pyexcel-htmlr.yml index c1c0367..b400ff4 100644 --- a/pyexcel-htmlr.yml +++ b/pyexcel-htmlr.yml @@ -15,3 +15,4 @@ test_dependencies: description: "read tables in html file as excel data" python_requires: ">=3.6" min_python_version: "3.6" +moban_command: false From d0e65ca60a35213bba81d300ae2d6c47d31dca59 Mon Sep 17 00:00:00 2001 From: chfw Date: Thu, 8 Oct 2020 22:09:39 +0000 Subject: [PATCH 09/10] This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst --- .travis.yml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index b457dd1..9cb4e91 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,6 @@ python: stages: - lint - - moban - test @@ -23,16 +22,6 @@ stages: stage: lint script: make lint -.moban: &moban - python: 3.6 - env: - - MINREQ=0 - stage: moban - install: pip install moban gitfs2 pypifs moban-jinja2-github moban-ansible - script: - - moban - - git diff --exit-code - jobs: include: - *moban From a1d6364fd83e56336e5963a3cd908192108ca462 Mon Sep 17 00:00:00 2001 From: chfw Date: Thu, 8 Oct 2020 23:17:48 +0100 Subject: [PATCH 10/10] :lipstick: update version automatically --- .moban.yml | 3 ++- pyexcel_htmlr/_version.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.moban.yml b/.moban.yml index 6872bcf..1bb3f98 100644 --- a/.moban.yml +++ b/.moban.yml @@ -9,4 +9,5 @@ targets: - .travis.yml: custom_travis.yml.jj2 - .gitignore: gitignore.jj2 - "docs/source/conf.py": "docs/source/conf.py.jj2" - - "docs/source/index.rst": "index.rst.jj2" \ No newline at end of file + - "docs/source/index.rst": "index.rst.jj2" + - 'pyexcel_htmlr/_version.py': '_version.py.jj2' \ No newline at end of file diff --git a/pyexcel_htmlr/_version.py b/pyexcel_htmlr/_version.py index bfa14d8..817e5a8 100644 --- a/pyexcel_htmlr/_version.py +++ b/pyexcel_htmlr/_version.py @@ -1,2 +1,2 @@ -__version__ = "" -__author__ = "" +__version__ = "0.6.0" +__author__ = "chfw"