Skip to content

Commit bfc9f66

Browse files
committed
Add data-loading libraries from ml-python
1 parent 99b4e50 commit bfc9f66

File tree

1 file changed

+129
-0
lines changed

1 file changed

+129
-0
lines changed

projects.yaml

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ categories:
4242
- category: "web-scraping"
4343
title: "Web Scraping & Crawling"
4444
subtitle: "Libraries for web scraping, crawling, downloading, and mining as well as libraries."
45+
- category: "data-loading"
46+
title: "Data Loading & Extraction"
47+
subtitle: "Libraries for loading, collecting, and extracting data from a variety of data sources and formats."
4548
- category: "monitoring"
4649
title: "Monitoring"
4750
- category: "web-ui"
@@ -2692,3 +2695,129 @@ projects:
26922695
- name: IP2Trace
26932696
github_id: ip2location/ip2trace-python
26942697
pypi_id: IP2Trace
2698+
# Data Loading & Extraction
2699+
- name: Datasets
2700+
pypi_id: datasets
2701+
github_id: huggingface/datasets
2702+
category: data-loading
2703+
- name: tabulator-py
2704+
github_id: frictionlessdata/tabulator-py
2705+
category: data-loading
2706+
conda_id: conda-forge/tabulator-py
2707+
pypi_id: tabulator
2708+
- name: messytables
2709+
github_id: okfn/messytables
2710+
category: data-loading
2711+
pypi_id: messytables
2712+
- name: datatest
2713+
github_id: shawnbrown/datatest
2714+
category: data-loading
2715+
pypi_id: datatest
2716+
- name: rows
2717+
github_id: turicas/rows
2718+
category: data-loading
2719+
pypi_id: rows
2720+
- name: deepdish
2721+
github_id: uchicago-cs/deepdish
2722+
category: data-loading
2723+
conda_id: conda-forge/deepdish
2724+
pypi_id: deepdish
2725+
- name: camelot
2726+
github_id: camelot-dev/camelot
2727+
category: data-loading
2728+
pypi_id: camelot
2729+
- name: excalibur
2730+
github_id: camelot-dev/excalibur
2731+
category: data-loading
2732+
pypi_id: excalibur-py
2733+
- name: xlwings
2734+
github_id: xlwings/xlwings
2735+
category: data-loading
2736+
conda_id: conda-forge/xlwings
2737+
pypi_id: xlwings
2738+
- name: csvs-to-sqlite
2739+
github_id: simonw/csvs-to-sqlite
2740+
category: data-loading
2741+
pypi_id: csvs-to-sqlite
2742+
- name: Tablib
2743+
pypi_id: tablib
2744+
github_id: jazzband/tablib
2745+
conda_id: conda-forge/tablib
2746+
category: data-loading
2747+
- name: python-magic
2748+
pypi_id: python-magic
2749+
github_id: ahupp/python-magic
2750+
conda_id: conda-forge/python-magic
2751+
category: data-loading
2752+
- name: SDV
2753+
pypi_id: sdv
2754+
github_id: sdv-dev/SDV
2755+
category: data-loading
2756+
- name: Intake
2757+
pypi_id: intake
2758+
github_id: intake/intake
2759+
conda_id: conda-forge/intake
2760+
category: data-loading
2761+
- name: csvkit
2762+
pypi_id: csvkit
2763+
github_id: wireservice/csvkit
2764+
conda_id: conda-forge/csvkit
2765+
category: data-loading
2766+
- name: snorkel
2767+
pypi_id: snorkel
2768+
github_id: snorkel-team/snorkel
2769+
conda_id: conda-forge/snorkel
2770+
category: data-loading
2771+
- name: pyexcel-xlsx
2772+
pypi_id: pyexcel-xlsx
2773+
github_id: pyexcel/pyexcel-xlsx
2774+
conda_id: conda-forge/pyexcel-xlsx
2775+
category: data-loading
2776+
- name: Faker
2777+
pypi_id: Faker
2778+
github_id: joke2k/faker
2779+
conda_id: conda-forge/faker
2780+
category: data-loading
2781+
- name: smart-open
2782+
pypi_id: smart-open
2783+
github_id: RaRe-Technologies/smart_open
2784+
category: data-loading
2785+
- name: pandas-datareader
2786+
pypi_id: pandas-datareader
2787+
github_id: pydata/pandas-datareader
2788+
conda_id: conda-forge/pandas-datareader
2789+
category: data-loading
2790+
- name: openpyxl
2791+
pypi_id: openpyxl
2792+
gitlab_id: "https://foss.heptapod.net/api/graphql::openpyxl/openpyxl"
2793+
conda_id: openpyxl
2794+
dockerhub_id: "openpyxl/openpyxl-ci"
2795+
docs_url: https://openpyxl.readthedocs.io/en/stable/
2796+
license: MIT
2797+
category: data-loading
2798+
- name: textract
2799+
pypi_id: textract
2800+
github_id: deanmalmgren/textract
2801+
conda_id: conda-forge/textract
2802+
category: data-loading
2803+
- name: PDFMiner
2804+
pypi_id: pdfminer
2805+
conda_id: conda-forge/pdfminer
2806+
github_id: euske/pdfminer
2807+
category: data-loading
2808+
- name: xmltodict
2809+
pypi_id: xmltodict
2810+
github_id: martinblech/xmltodict
2811+
conda_id: conda-forge/xmltodict
2812+
category: data-loading
2813+
- name: Singer
2814+
pypi_id: singer-python
2815+
github_id: singer-io/getting-started
2816+
description: "Standard for moving data between databases, web APIs, files, queues, and just about anything else you can think of."
2817+
license: AGPL-3.0
2818+
category: data-loading
2819+
- name: xlrd
2820+
pypi_id: xlrd
2821+
github_id: python-excel/xlrd
2822+
conda_id: conda-forge/xlrd
2823+
category: data-loading

0 commit comments

Comments
 (0)