@@ -42,6 +42,9 @@ categories:
42
42
- category : " web-scraping"
43
43
title : " Web Scraping & Crawling"
44
44
subtitle : " Libraries for web scraping, crawling, downloading, and mining as well as libraries."
45
+ - category : " data-loading"
46
+ title : " Data Loading & Extraction"
47
+ subtitle : " Libraries for loading, collecting, and extracting data from a variety of data sources and formats."
45
48
- category : " monitoring"
46
49
title : " Monitoring"
47
50
- category : " web-ui"
@@ -2692,3 +2695,129 @@ projects:
2692
2695
- name : IP2Trace
2693
2696
github_id : ip2location/ip2trace-python
2694
2697
pypi_id : IP2Trace
2698
+ # Data Loading & Extraction
2699
+ - name : Datasets
2700
+ pypi_id : datasets
2701
+ github_id : huggingface/datasets
2702
+ category : data-loading
2703
+ - name : tabulator-py
2704
+ github_id : frictionlessdata/tabulator-py
2705
+ category : data-loading
2706
+ conda_id : conda-forge/tabulator-py
2707
+ pypi_id : tabulator
2708
+ - name : messytables
2709
+ github_id : okfn/messytables
2710
+ category : data-loading
2711
+ pypi_id : messytables
2712
+ - name : datatest
2713
+ github_id : shawnbrown/datatest
2714
+ category : data-loading
2715
+ pypi_id : datatest
2716
+ - name : rows
2717
+ github_id : turicas/rows
2718
+ category : data-loading
2719
+ pypi_id : rows
2720
+ - name : deepdish
2721
+ github_id : uchicago-cs/deepdish
2722
+ category : data-loading
2723
+ conda_id : conda-forge/deepdish
2724
+ pypi_id : deepdish
2725
+ - name : camelot
2726
+ github_id : camelot-dev/camelot
2727
+ category : data-loading
2728
+ pypi_id : camelot
2729
+ - name : excalibur
2730
+ github_id : camelot-dev/excalibur
2731
+ category : data-loading
2732
+ pypi_id : excalibur-py
2733
+ - name : xlwings
2734
+ github_id : xlwings/xlwings
2735
+ category : data-loading
2736
+ conda_id : conda-forge/xlwings
2737
+ pypi_id : xlwings
2738
+ - name : csvs-to-sqlite
2739
+ github_id : simonw/csvs-to-sqlite
2740
+ category : data-loading
2741
+ pypi_id : csvs-to-sqlite
2742
+ - name : Tablib
2743
+ pypi_id : tablib
2744
+ github_id : jazzband/tablib
2745
+ conda_id : conda-forge/tablib
2746
+ category : data-loading
2747
+ - name : python-magic
2748
+ pypi_id : python-magic
2749
+ github_id : ahupp/python-magic
2750
+ conda_id : conda-forge/python-magic
2751
+ category : data-loading
2752
+ - name : SDV
2753
+ pypi_id : sdv
2754
+ github_id : sdv-dev/SDV
2755
+ category : data-loading
2756
+ - name : Intake
2757
+ pypi_id : intake
2758
+ github_id : intake/intake
2759
+ conda_id : conda-forge/intake
2760
+ category : data-loading
2761
+ - name : csvkit
2762
+ pypi_id : csvkit
2763
+ github_id : wireservice/csvkit
2764
+ conda_id : conda-forge/csvkit
2765
+ category : data-loading
2766
+ - name : snorkel
2767
+ pypi_id : snorkel
2768
+ github_id : snorkel-team/snorkel
2769
+ conda_id : conda-forge/snorkel
2770
+ category : data-loading
2771
+ - name : pyexcel-xlsx
2772
+ pypi_id : pyexcel-xlsx
2773
+ github_id : pyexcel/pyexcel-xlsx
2774
+ conda_id : conda-forge/pyexcel-xlsx
2775
+ category : data-loading
2776
+ - name : Faker
2777
+ pypi_id : Faker
2778
+ github_id : joke2k/faker
2779
+ conda_id : conda-forge/faker
2780
+ category : data-loading
2781
+ - name : smart-open
2782
+ pypi_id : smart-open
2783
+ github_id : RaRe-Technologies/smart_open
2784
+ category : data-loading
2785
+ - name : pandas-datareader
2786
+ pypi_id : pandas-datareader
2787
+ github_id : pydata/pandas-datareader
2788
+ conda_id : conda-forge/pandas-datareader
2789
+ category : data-loading
2790
+ - name : openpyxl
2791
+ pypi_id : openpyxl
2792
+ gitlab_id : " https://foss.heptapod.net/api/graphql::openpyxl/openpyxl"
2793
+ conda_id : openpyxl
2794
+ dockerhub_id : " openpyxl/openpyxl-ci"
2795
+ docs_url : https://openpyxl.readthedocs.io/en/stable/
2796
+ license : MIT
2797
+ category : data-loading
2798
+ - name : textract
2799
+ pypi_id : textract
2800
+ github_id : deanmalmgren/textract
2801
+ conda_id : conda-forge/textract
2802
+ category : data-loading
2803
+ - name : PDFMiner
2804
+ pypi_id : pdfminer
2805
+ conda_id : conda-forge/pdfminer
2806
+ github_id : euske/pdfminer
2807
+ category : data-loading
2808
+ - name : xmltodict
2809
+ pypi_id : xmltodict
2810
+ github_id : martinblech/xmltodict
2811
+ conda_id : conda-forge/xmltodict
2812
+ category : data-loading
2813
+ - name : Singer
2814
+ pypi_id : singer-python
2815
+ github_id : singer-io/getting-started
2816
+ description : " Standard for moving data between databases, web APIs, files, queues, and just about anything else you can think of."
2817
+ license : AGPL-3.0
2818
+ category : data-loading
2819
+ - name : xlrd
2820
+ pypi_id : xlrd
2821
+ github_id : python-excel/xlrd
2822
+ conda_id : conda-forge/xlrd
2823
+ category : data-loading
0 commit comments