pycoder1
diff --git a/‎general/pdf-table-extractor/1710.05006.pdf
5.09 MB b/‎general/pdf-table-extractor/1710.05006.pdf
5.09 MB
diff --git a/‎general/pdf-table-extractor/README.md
Lines changed: 4 additions & 6 deletions b/‎general/pdf-table-extractor/README.md
Lines changed: 4 additions & 6 deletions
diff --git a/‎general/pdf-table-extractor/pdf_table_extractor.py renamed to ‎general/pdf-table-extractor/pdf_table_extractor_camelot.py
Lines changed: 3 additions & 1 deletion b/‎general/pdf-table-extractor/pdf_table_extractor.py renamed to ‎general/pdf-table-extractor/pdf_table_extractor_camelot.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎general/pdf-table-extractor/pdf_table_extractor_tabula.py
Lines changed: 24 additions & 0 deletions b/‎general/pdf-table-extractor/pdf_table_extractor_tabula.py
Lines changed: 24 additions & 0 deletions
diff --git a/‎general/pdf-table-extractor/pdfs/1710.05006.pdf
5.09 MB b/‎general/pdf-table-extractor/pdfs/1710.05006.pdf
5.09 MB
diff --git a/‎general/pdf-table-extractor/pdfs/foo.pdf
82.2 KB b/‎general/pdf-table-extractor/pdfs/foo.pdf
82.2 KB
diff --git a/‎general/pdf-table-extractor/requirements.txt
Lines changed: 2 additions & 1 deletion b/‎general/pdf-table-extractor/requirements.txt
Lines changed: 2 additions & 1 deletion
@@ -1,8 +1,6 @@
 # [How to Extract PDF Tables in Python](https://www.thepythoncode.com/article/extract-pdf-tables-in-python-camelot)
 To run this:
-- You need to install required dependencies for the library [here](https://camelot-py.readthedocs.io/en/master/user/install-deps.html#install-deps).
-- `pip3 install -r requirements.txt`
-- Extract PDFs of the file `foo.pdf`:
-    ```
-    python pdf_table_extractor.py foo.pdf
-    ```
+- You need to install required dependencies for the camelot library [here](https://camelot-py.readthedocs.io/en/master/user/install-deps.html#install-deps).
+- `pip3 install -r requirements.txt`.
+- `pdf_table_extractor_camelot.py` is using camelot library.
+- `pdf_table_extractor_tabula.py` is using tabula-py library.
@@ -13,8 +13,10 @@
 # print the first table as Pandas DataFrame
 print(tables[0].df)
 
-# export individually
+# export individually as CSV
 tables[0].to_csv("foo.csv")
+# export individually as Excel (.xlsx extension)
+tables[0].to_excel("foo.xlsx")
 
 # or export all in a zip
 tables.export("foo.csv", f="csv", compress=True)
 
@@ -0,0 +1,24 @@
+import tabula
+import os
+# uncomment if you want to pass pdf file from command line arguments
+# import sys
+
+# read PDF file
+# uncomment if you want to pass pdf file from command line arguments
+# tables = tabula.read_pdf(sys.argv[1], pages="all")
+tables = tabula.read_pdf("1710.05006.pdf", pages="all")
+
+# save them in a folder
+folder_name = "tables"
+if not os.path.isdir(folder_name):
+    os.mkdir(folder_name)
+# iterate over extracted tables and export as excel individually
+for i, table in enumerate(tables, start=1):
+    table.to_excel(os.path.join(folder_name, f"table_{i}.xlsx"), index=False)
+
+# convert all tables of a PDF file into a single CSV file
+# supported output_formats are "csv", "json" or "tsv"
+tabula.convert_into("1710.05006.pdf", "output.csv", output_format="csv", pages="all")
+# convert all PDFs in a folder into CSV format
+# `pdfs` folder should exist in the current directory
+tabula.convert_into_by_batch("pdfs", output_format="csv", pages="all")
@@ -1 +1,2 @@
-camelot-py[cv]
+camelot-py[cv]
+tabula-py
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`		`-camelot-py[cv]`
	`1`	`+camelot-py[cv]`
	`2`	`+tabula-py`