diff --git a/README.md b/README.md index 5ce6970..a7487cf 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ assets: The exporter can be installed via pip and run directly. #### Python Version -_Note: This application is tested and developed on Python version `3.12.X`. The min required version is >= `3.8` but is recommended to install (or set up a venv) a `3.12.X` version._ +_Note: This application is tested and developed on Python version `3.13.X`. The min required version is >= `3.8` but is recommended to install (or set up a venv) a `3.13.X` version._ #### Examples ```bash @@ -248,7 +248,7 @@ More descriptions can be found for each section below: | `credentials.token_secret` | `str` | `true` if `credentials`| If `credentials` section is given, this should be a valid tokenSecret | | `additional_headers` | `object` | `false` | Optional section where key/value for pairs can be specified to use in Bookstack http request headers. | `formats` | `list` | `true` | Which export formats to use for Bookstack page content. Valid options are: `["markdown", "html", "pdf", "plaintext"]`| -| `output_path` | `str` | `false` | Optional (default: `cwd`) which directory (relative or full path) to place exports. User who runs the command should have access to read/write to this directory. If not provided, will use current run directory by default | +| `output_path` | `str` | `false` | Optional (default: `cwd`) which directory (relative or full path) to place exports. User who runs the command should have access to read/write to this directory. This directory and any parent directories will be attempted to be created if they do not exist. If not provided, will use current run directory by default. If using docker, this option can be omitted. | | `assets` | `object` | `false` | Optional section to export additional assets from pages. | | `assets.export_images` | `bool` | `false` | Optional (default: `false`), export all images for a page to an `image` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout | | `assets.export_attachments` | `bool` | `false` | Optional (default: `false`), export all attachments for a page to an `attachments` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout | @@ -386,6 +386,8 @@ bookstack_export_2023-11-28_06-24-25/programming/react/images/nextjs/tips.png **Note you may see old images in your exports. This is because, by default, Bookstack retains images/drawings that are uploaded even if no longer referenced on an active page. Admins can run `Cleanup Images` in the Maintenance Settings or via [CLI](https://www.bookstackapp.com/docs/admin/commands/#cleanup-unused-images) to remove them.** +If an API call to get an image or its metadata fails, the exporter will skip the image and log the error. If using `modify_markdown` option, the image links in the document will be untouched and in its original form. All API calls are retried 3 times after initial failure. + ### Attachments Attachments will be dumped in a separate directory, `attachments` within the page parent (book/chapter) directory it belongs to. The relative path will be `{parent}/attachments/{page}/{attachment_name}`. As shown earlier: @@ -401,6 +403,8 @@ bookstack_export_2023-11-28_06-24-25/programming/react/attachments/nextjs/sample [Reference](https://demo.bookstackapp.com/api/docs#attachments-list) and excerpt from Bookstack API docs: > Get a listing of attachments visible to the user. The external property indicates whether the attachment is simple a link. A false value for the external property would indicate a file upload. +If an API call to get an attachment or its metadata fails, the exporter will skip the attachment and log the error. If using `modify_markdown` option, the attachment links in the document will be untouched and in its original form. All API calls are retried 3 times after initial failure. + ### Modify Markdown Files **To use this feature, `assets.export_images` should be set to `true` and/or `assets.export_attachments`** diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py index 6af2024..969ee14 100644 --- a/bookstack_file_exporter/archiver/archiver.py +++ b/bookstack_file_exporter/archiver/archiver.py @@ -35,6 +35,22 @@ def __init__(self, config: ConfigNode): self._page_archiver = PageArchiver(self.archive_dir, self.config) self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3} + def create_export_dir(self): + """create directory for archiving""" + if not self.config.user_inputs.output_path: + log.info("No output path specified, using current directory for archive") + return + log.info("Creating base directory for archive: %s", + self.config.user_inputs.output_path) + # in docker, this may fail if the user id is not the same as the host + try: + util.create_dir(self.config.user_inputs.output_path) + except PermissionError as perm_err: + log.warning("Failed to create base directory: %s", perm_err) + log.warning("This usually occurs in docker environments " \ + "attempting to skip this step") + return + def get_bookstack_exports(self, page_nodes: Dict[int, Node]): """export all page content""" log.info("Exporting all bookstack page contents") diff --git a/bookstack_file_exporter/archiver/page_archiver.py b/bookstack_file_exporter/archiver/page_archiver.py index 84ba201..fba5047 100644 --- a/bookstack_file_exporter/archiver/page_archiver.py +++ b/bookstack_file_exporter/archiver/page_archiver.py @@ -142,7 +142,7 @@ def archive_page_assets(self, asset_type: str, parent_path: str, page_name: str, return {} # use a map for faster lookup failed_assets = {} - node_base_path = f"{self.archive_base_path}/{parent_path}/" + node_base_path = f"{self.archive_base_path}/{parent_path}" for asset_node in asset_nodes: try: asset_data = self.asset_archiver.get_asset_bytes(asset_type, asset_node.url) diff --git a/bookstack_file_exporter/archiver/util.py b/bookstack_file_exporter/archiver/util.py index 0980fb6..e6bcb8f 100644 --- a/bookstack_file_exporter/archiver/util.py +++ b/bookstack_file_exporter/archiver/util.py @@ -7,6 +7,7 @@ from io import BytesIO import gzip import glob +from pathlib import Path from bookstack_file_exporter.common import util @@ -48,3 +49,7 @@ def scan_archives(base_dir: str, extension: str) -> str: """scan export directory for archives""" file_pattern = f"{base_dir}_*{extension}" return glob.glob(file_pattern) + +def create_dir(dir_path: str): + """create a directory if not exists""" + Path(dir_path).mkdir(parents=True, exist_ok=True) diff --git a/bookstack_file_exporter/run.py b/bookstack_file_exporter/run.py index f63a215..b69a35b 100644 --- a/bookstack_file_exporter/run.py +++ b/bookstack_file_exporter/run.py @@ -41,6 +41,9 @@ def exporter(args: argparse.Namespace): ## start archive ## archive: Archiver = Archiver(config) + # create export directory if not exists + archive.create_export_dir() + # get all page content for each page archive.get_bookstack_exports(page_nodes)