From 6c7515ba637361b6d2fb89d0d4efbca6398c85b8 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Wed, 6 Nov 2024 04:34:19 -0500 Subject: [PATCH 1/5] update readme for assest skip feature --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5ce6970..732e6ff 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ assets: The exporter can be installed via pip and run directly. #### Python Version -_Note: This application is tested and developed on Python version `3.12.X`. The min required version is >= `3.8` but is recommended to install (or set up a venv) a `3.12.X` version._ +_Note: This application is tested and developed on Python version `3.13.X`. The min required version is >= `3.8` but is recommended to install (or set up a venv) a `3.13.X` version._ #### Examples ```bash @@ -386,6 +386,8 @@ bookstack_export_2023-11-28_06-24-25/programming/react/images/nextjs/tips.png **Note you may see old images in your exports. This is because, by default, Bookstack retains images/drawings that are uploaded even if no longer referenced on an active page. Admins can run `Cleanup Images` in the Maintenance Settings or via [CLI](https://www.bookstackapp.com/docs/admin/commands/#cleanup-unused-images) to remove them.** +If an API call to get an image or its metadata fails, the exporter will skip the image and log the error. If using `modify_markdown` option, the image links in the document will be untouched and in its original form. All API calls are retried 3 times after initial failure. + ### Attachments Attachments will be dumped in a separate directory, `attachments` within the page parent (book/chapter) directory it belongs to. The relative path will be `{parent}/attachments/{page}/{attachment_name}`. As shown earlier: @@ -401,6 +403,8 @@ bookstack_export_2023-11-28_06-24-25/programming/react/attachments/nextjs/sample [Reference](https://demo.bookstackapp.com/api/docs#attachments-list) and excerpt from Bookstack API docs: > Get a listing of attachments visible to the user. The external property indicates whether the attachment is simple a link. A false value for the external property would indicate a file upload. +If an API call to get an attachment or its metadata fails, the exporter will skip the attachment and log the error. If using `modify_markdown` option, the attachment links in the document will be untouched and in its original form. All API calls are retried 3 times after initial failure. + ### Modify Markdown Files **To use this feature, `assets.export_images` should be set to `true` and/or `assets.export_attachments`** From 64a8717bac9d807411e529b46948e4f259f98c13 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Wed, 6 Nov 2024 04:59:35 -0500 Subject: [PATCH 2/5] create user provided output_path directory if not exists --- README.md | 2 +- bookstack_file_exporter/archiver/archiver.py | 9 +++++++++ bookstack_file_exporter/archiver/util.py | 5 +++++ bookstack_file_exporter/run.py | 3 +++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 732e6ff..76b2192 100644 --- a/README.md +++ b/README.md @@ -248,7 +248,7 @@ More descriptions can be found for each section below: | `credentials.token_secret` | `str` | `true` if `credentials`| If `credentials` section is given, this should be a valid tokenSecret | | `additional_headers` | `object` | `false` | Optional section where key/value for pairs can be specified to use in Bookstack http request headers. | `formats` | `list` | `true` | Which export formats to use for Bookstack page content. Valid options are: `["markdown", "html", "pdf", "plaintext"]`| -| `output_path` | `str` | `false` | Optional (default: `cwd`) which directory (relative or full path) to place exports. User who runs the command should have access to read/write to this directory. If not provided, will use current run directory by default | +| `output_path` | `str` | `false` | Optional (default: `cwd`) which directory (relative or full path) to place exports. User who runs the command should have access to read/write to this directory. This directory and any parent directories will be attempted to be created if they do not exist. If not provided, will use current run directory by default.| | `assets` | `object` | `false` | Optional section to export additional assets from pages. | | `assets.export_images` | `bool` | `false` | Optional (default: `false`), export all images for a page to an `image` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout | | `assets.export_attachments` | `bool` | `false` | Optional (default: `false`), export all attachments for a page to an `attachments` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout | diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py index 6af2024..7d64110 100644 --- a/bookstack_file_exporter/archiver/archiver.py +++ b/bookstack_file_exporter/archiver/archiver.py @@ -35,6 +35,15 @@ def __init__(self, config: ConfigNode): self._page_archiver = PageArchiver(self.archive_dir, self.config) self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3} + def create_export_dir(self): + """create directory for archiving""" + if not self.config.user_inputs.output_path: + log.info("No output path specified, using current directory for archive") + return + log.info("Creating base directory for archive: %s", + self.config.user_inputs.output_path) + util.create_dir(self.config.user_inputs.output_path) + def get_bookstack_exports(self, page_nodes: Dict[int, Node]): """export all page content""" log.info("Exporting all bookstack page contents") diff --git a/bookstack_file_exporter/archiver/util.py b/bookstack_file_exporter/archiver/util.py index 0980fb6..e6bcb8f 100644 --- a/bookstack_file_exporter/archiver/util.py +++ b/bookstack_file_exporter/archiver/util.py @@ -7,6 +7,7 @@ from io import BytesIO import gzip import glob +from pathlib import Path from bookstack_file_exporter.common import util @@ -48,3 +49,7 @@ def scan_archives(base_dir: str, extension: str) -> str: """scan export directory for archives""" file_pattern = f"{base_dir}_*{extension}" return glob.glob(file_pattern) + +def create_dir(dir_path: str): + """create a directory if not exists""" + Path(dir_path).mkdir(parents=True, exist_ok=True) diff --git a/bookstack_file_exporter/run.py b/bookstack_file_exporter/run.py index f63a215..b69a35b 100644 --- a/bookstack_file_exporter/run.py +++ b/bookstack_file_exporter/run.py @@ -41,6 +41,9 @@ def exporter(args: argparse.Namespace): ## start archive ## archive: Archiver = Archiver(config) + # create export directory if not exists + archive.create_export_dir() + # get all page content for each page archive.get_bookstack_exports(page_nodes) From e93eb309ba6e9789333f19a2f45834743d5886a2 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Wed, 6 Nov 2024 05:38:26 -0500 Subject: [PATCH 3/5] make create output dir error catchable for docker usage --- bookstack_file_exporter/archiver/archiver.py | 9 ++++++++- bookstack_file_exporter/archiver/page_archiver.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py index 7d64110..706ed56 100644 --- a/bookstack_file_exporter/archiver/archiver.py +++ b/bookstack_file_exporter/archiver/archiver.py @@ -42,7 +42,14 @@ def create_export_dir(self): return log.info("Creating base directory for archive: %s", self.config.user_inputs.output_path) - util.create_dir(self.config.user_inputs.output_path) + # in docker, this may fail if the user id is not the same as the host + try: + util.create_dir(self.config.user_inputs.output_path) + except PermissionError as perm_err: + log.warning("Failed to create base directory: %s", perm_err) + log.warning("This usually occurs in docker environments, \ + attempting to skip this step") + return def get_bookstack_exports(self, page_nodes: Dict[int, Node]): """export all page content""" diff --git a/bookstack_file_exporter/archiver/page_archiver.py b/bookstack_file_exporter/archiver/page_archiver.py index 84ba201..fba5047 100644 --- a/bookstack_file_exporter/archiver/page_archiver.py +++ b/bookstack_file_exporter/archiver/page_archiver.py @@ -142,7 +142,7 @@ def archive_page_assets(self, asset_type: str, parent_path: str, page_name: str, return {} # use a map for faster lookup failed_assets = {} - node_base_path = f"{self.archive_base_path}/{parent_path}/" + node_base_path = f"{self.archive_base_path}/{parent_path}" for asset_node in asset_nodes: try: asset_data = self.asset_archiver.get_asset_bytes(asset_type, asset_node.url) From 12929e3dcdd23152f9cc7e5db42b8029da0dfa8c Mon Sep 17 00:00:00 2001 From: pchang388 Date: Wed, 6 Nov 2024 05:51:01 -0500 Subject: [PATCH 4/5] minor fix on logging and output description --- README.md | 2 +- bookstack_file_exporter/archiver/archiver.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 76b2192..a7487cf 100644 --- a/README.md +++ b/README.md @@ -248,7 +248,7 @@ More descriptions can be found for each section below: | `credentials.token_secret` | `str` | `true` if `credentials`| If `credentials` section is given, this should be a valid tokenSecret | | `additional_headers` | `object` | `false` | Optional section where key/value for pairs can be specified to use in Bookstack http request headers. | `formats` | `list` | `true` | Which export formats to use for Bookstack page content. Valid options are: `["markdown", "html", "pdf", "plaintext"]`| -| `output_path` | `str` | `false` | Optional (default: `cwd`) which directory (relative or full path) to place exports. User who runs the command should have access to read/write to this directory. This directory and any parent directories will be attempted to be created if they do not exist. If not provided, will use current run directory by default.| +| `output_path` | `str` | `false` | Optional (default: `cwd`) which directory (relative or full path) to place exports. User who runs the command should have access to read/write to this directory. This directory and any parent directories will be attempted to be created if they do not exist. If not provided, will use current run directory by default. If using docker, this option can be omitted. | | `assets` | `object` | `false` | Optional section to export additional assets from pages. | | `assets.export_images` | `bool` | `false` | Optional (default: `false`), export all images for a page to an `image` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout | | `assets.export_attachments` | `bool` | `false` | Optional (default: `false`), export all attachments for a page to an `attachments` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout | diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py index 706ed56..096beba 100644 --- a/bookstack_file_exporter/archiver/archiver.py +++ b/bookstack_file_exporter/archiver/archiver.py @@ -47,8 +47,8 @@ def create_export_dir(self): util.create_dir(self.config.user_inputs.output_path) except PermissionError as perm_err: log.warning("Failed to create base directory: %s", perm_err) - log.warning("This usually occurs in docker environments, \ - attempting to skip this step") + log.warning("This usually occurs in docker environments" \ + "attempting to skip this step") return def get_bookstack_exports(self, page_nodes: Dict[int, Node]): From 6f0c843523822c8a7115ac5e4aa366d1d455d464 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Wed, 6 Nov 2024 06:00:30 -0500 Subject: [PATCH 5/5] fix minor spacing issue on log message --- bookstack_file_exporter/archiver/archiver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py index 096beba..969ee14 100644 --- a/bookstack_file_exporter/archiver/archiver.py +++ b/bookstack_file_exporter/archiver/archiver.py @@ -47,7 +47,7 @@ def create_export_dir(self): util.create_dir(self.config.user_inputs.output_path) except PermissionError as perm_err: log.warning("Failed to create base directory: %s", perm_err) - log.warning("This usually occurs in docker environments" \ + log.warning("This usually occurs in docker environments " \ "attempting to skip this step") return