From 64c95a7d0b01a49165cf593d435de70b2431abeb Mon Sep 17 00:00:00 2001 From: pchang388 Date: Tue, 28 Nov 2023 02:07:14 -0500 Subject: [PATCH 1/3] rework archive directory structure for better image layout --- Makefile | 2 +- README.md | 71 +++++++++---------- bookstack_file_exporter/archiver/archiver.py | 7 +- .../archiver/page_archiver.py | 34 +++++---- bookstack_file_exporter/exporter/node.py | 5 ++ 5 files changed, 60 insertions(+), 59 deletions(-) diff --git a/Makefile b/Makefile index 5d9e5f0..998062a 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ BASE_IMAGE_TAG=3.12-slim-bookworm IMAGE_NAME=homeylab/bookstack-file-exporter # keep this start sequence unique (IMAGE_TAG=) # github actions will use this to create a tag -IMAGE_TAG=1.0.0 +IMAGE_TAG=1.0.1 DOCKER_WORK_DIR=/export DOCKER_CONFIG_DIR=/export/config DOCKER_EXPORT_DIR=/export/dump diff --git a/README.md b/README.md index 874f691..d82d7b4 100644 --- a/README.md +++ b/README.md @@ -287,38 +287,40 @@ Shelves --> Books --> Chapters --> Pages kafka (shelf) ---> controller (book) ---> settings (chapter) - ---> retention-settings (page) - ---> retention-settings.md - ---> retention-settings_meta.json - ---> compression (page) - ---> compression.html - ---> compression.pdf - ---> compression_meta.json - ---> optional-config (page) + ---> retention-settings.md (page) + ---> retention-settings_meta.json + ---> compression.html (page) + ---> compression.pdf + ---> compression_meta.json + ---> optional-config.md (page) ... - ---> main (page) + ---> main.md (page) ... ---> broker (book) - ---> settings (page) + ---> settings.md (page) ... - ---> deploy (page) + ---> deploy.md (page) ... kafka-apps (shelf) ---> schema-registry (book) - ---> protobuf (page) + ---> protobuf.md (page) ... - ---> settings (page) + ---> settings.md (page) ... ## Example with image layout -unassigned (Used for books with no shelf) +# unassigned dir is used for books with no shelf +unassigned (shelf) ---> test (book) - ---> test_page (page) - ---> test_page.md - ---> test_page.pdf - ---> images (image_dir) + ---> images (image_dir) + ---> test_page (page directory) ---> img-001.png ---> img-002.png + ---> rec-page + ---> img-010.png + ---> img-020.png + ---> test_page.md (page) + ... ---> rec_page (page) ---> rec_page.md ---> rec_page.pdf @@ -330,20 +332,15 @@ Another example is shown below: # book = react # basics = page -bookstack_export_2023-11-20_08-00-29/programming/react/basics/basics.md -bookstack_export_2023-11-20_08-00-29/programming/react/basics/basics.html -bookstack_export_2023-11-20_08-00-29/programming/react/basics/basics.pdf -bookstack_export_2023-11-20_08-00-29/programming/react/basics/basics.txt -bookstack_export_2023-11-20_08-00-29/programming/react/basics/basics_meta.json -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/YKvimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/dwwimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/NzZimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/Mymimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/nextjs/nextjs.md -bookstack_export_2023-11-20_08-00-29/programming/react/nextjs/nextjs.html -bookstack_export_2023-11-20_08-00-29/programming/react/nextjs/nextjs.pdf -bookstack_export_2023-11-20_08-00-29/programming/react/nextjs/nextjs.txt -bookstack_export_2023-11-20_08-00-29/programming/react/nextjs/nextjs_meta.json +bookstack_export_2023-11-28_06-24-25/programming/react/basics.md +bookstack_export_2023-11-28_06-24-25/programming/react/basics.pdf +bookstack_export_2023-11-28_06-24-25/programming/react/images/basics/YKvimage.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/basics/dwwimage.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/basics/NzZimage.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/nextjs/next1.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/nextjs/tips.png +bookstack_export_2023-11-28_06-24-25/programming/react/nextjs.md +bookstack_export_2023-11-28_06-24-25/programming/react/nextjs.pdf ``` Books without a shelf will be put in a shelve folder named `unassigned`. @@ -363,13 +360,13 @@ You may notice some directories (books) and/or files (pages) in the archive have ### Images ### General -Images will be dumped in a separate directory, `images` within the page directory it belongs to. As shown earlier: +Images will be dumped in a separate directory, `images` within the page parent (book/chapter) directory it belongs to. The relative path will be `{parent}/images/{page}/{image_name}`. As shown earlier: ``` -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/YKvimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/dwwimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/NzZimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/Mymimage.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/basics/dwwimage.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/basics/NzZimage.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/nextjs/next1.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/nextjs/tips.png ``` **Note you may see old images in your exports. This is because, by default, Bookstack retains images/drawings that are uploaded even if no longer referenced on an active page. Admins can run `Cleanup Images` in the Maintenance Settings or via [CLI](https://www.bookstackapp.com/docs/admin/commands/#cleanup-unused-images) to remove them.** diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py index 329343c..c6d01c5 100644 --- a/bookstack_file_exporter/archiver/archiver.py +++ b/bookstack_file_exporter/archiver/archiver.py @@ -47,7 +47,7 @@ def get_bookstack_exports(self, page_nodes: Dict[int, Node]): if page.id_ in all_image_meta: page_image_meta = all_image_meta[page.id_] self._get_page_files(page, page_image_meta) - self._get_page_images(page.file_path, page_image_meta) + self._get_page_images(page, page_image_meta) def _get_page_files(self, page_node: Node, image_meta: List[ImageNode]): """pull all bookstack pages into local files/tar""" @@ -60,12 +60,13 @@ def _get_page_image_map(self) -> Dict[int, ImageNode]: return {} return self._page_archiver.get_image_meta() - def _get_page_images(self, page_path: str, img_nodes: List[ImageNode]): + def _get_page_images(self, page_node: Node, img_nodes: List[ImageNode]): if not img_nodes: log.debug("page has no images to pull") return log.debug("Exporting bookstack page images") - self._page_archiver.archive_page_images(page_path, img_nodes) + self._page_archiver.archive_page_images(page_node.parent.file_path, + page_node.name, img_nodes) def create_archive(self): """create tgz archive""" diff --git a/bookstack_file_exporter/archiver/page_archiver.py b/bookstack_file_exporter/archiver/page_archiver.py index e159bbb..9c0b07a 100644 --- a/bookstack_file_exporter/archiver/page_archiver.py +++ b/bookstack_file_exporter/archiver/page_archiver.py @@ -43,15 +43,14 @@ def __init__(self, img_meta_data: Dict[str, Union[int, str]]): self.url: str = img_meta_data['url'] self.name: str = self._get_image_name() self._markdown_str = "" - self._image_relative_path: str = f"./{_IMAGE_DIR_NAME}/{self.name}" + self._relative_path_prefix: str = f"./{_IMAGE_DIR_NAME}" def _get_image_name(self) -> str: return self.url.split('/')[-1] - @property - def image_relative_path(self): + def get_image_relative_path(self, page_name: str) -> str: """return image path local to page directory""" - return self._image_relative_path + return f"{self._relative_path_prefix}/{page_name}/{self.name}" @property def markdown_str(self): @@ -92,7 +91,7 @@ def __init__(self, archive_dir: str, config: ConfigNode) -> None: self.export_formats = config.user_inputs.formats self.api_urls = config.urls self._headers = config.headers - # parent export directory, bookstack-, and .tgz extension + # full path, bookstack-, and .tgz extension self.archive_file = f"{archive_dir}{_FILE_EXTENSION_MAP['tgz']}" # name of intermediate tar file before gzip self.tar_file = f"{archive_dir}{_FILE_EXTENSION_MAP['tar']}" @@ -114,14 +113,14 @@ def archive_page(self, page: Node, self._archive_page(page, export_format, page_data, image_urls) if self.asset_config.export_meta: - self._archive_page_meta(page.name, page.file_path, page.meta) + self._archive_page_meta(page.file_path, page.meta) def _archive_page(self, page: Node, export_format: str, data: bytes, image_nodes: List[ImageNode] = None): page_file_name = f"{self.archive_base_path}/" \ - f"{page.file_path}/{page.name}{_FILE_EXTENSION_MAP[export_format]}" + f"{page.file_path}{_FILE_EXTENSION_MAP[export_format]}" if self.modify_md and export_format == _MARKDOWN_STR_CHECK and image_nodes: - data = self._update_image_links(data, image_nodes) + data = self._update_image_links(page.name, data, image_nodes) self.write_data(page_file_name, data) def _get_page_data(self, page_id: int, export_format: str): @@ -129,10 +128,8 @@ def _get_page_data(self, page_id: int, export_format: str): return archiver_util.get_byte_response(url=url, headers=self._headers, verify_ssl=self.verify_ssl) - def _archive_page_meta(self, page_name: str, page_path: str, - meta_data: Dict[str, Union[str, int]]): - meta_file_name = f"{self.archive_base_path}/{page_path}/" \ - f"{page_name}{_FILE_EXTENSION_MAP['meta']}" + def _archive_page_meta(self, page_path: str, meta_data: Dict[str, Union[str, int]]): + meta_file_name = f"{self.archive_base_path}/{page_path}{_FILE_EXTENSION_MAP['meta']}" bytes_meta = archiver_util.get_json_bytes(meta_data) self.write_data(file_path=meta_file_name, data=bytes_meta) @@ -145,14 +142,14 @@ def get_image_meta(self) -> Dict[int, List[ImageNode]]: img_meta_json = img_meta_response.json()['data'] return self._create_image_map(img_meta_json) - def archive_page_images(self, page_path: str, image_nodes: List[ImageNode]): + def archive_page_images(self, parent_path: str, page_name: str, + image_nodes: List[ImageNode]): """pull images locally into a directory based on page""" - # image_base_path = f"{self.archive_base_path}/{page_path}{_IMAGE_DIR_SUFFIX}" - image_base_path = f"{self.archive_base_path}/{page_path}/{_IMAGE_DIR_NAME}" + image_base_path = f"{self.archive_base_path}/{parent_path}/{_IMAGE_DIR_NAME}" for img_node in image_nodes: img_data: bytes = archiver_util.get_byte_response(img_node.url, self._headers, self.verify_ssl) - image_path = f"{image_base_path}/{img_node.name}" + image_path = f"{image_base_path}/{page_name}/{img_node.name}" self.write_data(image_path, img_data) def write_data(self, file_path: str, data: bytes): @@ -168,7 +165,8 @@ def gzip_archive(self): """provide the tar to gzip and the name of the gzip output file""" archiver_util.create_gzip(self.tar_file, self.archive_file) - def _update_image_links(self, page_data: bytes, image_nodes: List[ImageNode]) -> bytes: + def _update_image_links(self, page_name: str, page_data: bytes, + image_nodes: List[ImageNode]) -> bytes: """regex replace links to local created directories""" for img_node in image_nodes: img_meta_url = f"{self.api_urls['images']}/{img_node.id}" @@ -179,7 +177,7 @@ def _update_image_links(self, page_data: bytes, image_nodes: List[ImageNode]) -> continue # 1 - what to replace, 2 - replace with, 3 is the data to replace page_data = re.sub(img_node.markdown_str.encode(), - img_node.image_relative_path.encode(), page_data) + img_node.get_image_relative_path(page_name).encode(), page_data) return page_data @property diff --git a/bookstack_file_exporter/exporter/node.py b/bookstack_file_exporter/exporter/node.py index ab1714a..d63e17f 100644 --- a/bookstack_file_exporter/exporter/node.py +++ b/bookstack_file_exporter/exporter/node.py @@ -75,6 +75,11 @@ def children(self): """return all children of a book/chapter/shelf""" return self._children + @property + def parent(self): + """return parent of a book/chapter/page""" + return self._parent + @property def empty(self): """return True if page node lacks content""" From 0ddeb86c7655570ba8a0898b6eb156cf7d07fa76 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Tue, 28 Nov 2023 02:12:21 -0500 Subject: [PATCH 2/3] improve read me --- README.md | 61 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index d82d7b4..802ab12 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,37 @@ # bookstack-file-exporter Table of Contents -- [Background](#background) -- [Using This Application](#using-this-application) +- [bookstack-file-exporter](#bookstack-file-exporter) + - [Background](#background) + - [Features](#features) + - [Use Case](#use-case) + - [Using This Application](#using-this-application) - [Run via Pip](#run-via-pip) - - [Run via Docker](#run-via-docker) -- [Authentication](#authentication) -- [Configuration](#configuration) - - [Simple example](#just-run) - - [Full example](#full-example) - - [Options and descriptions](#options-and-descriptions) - - [Environment variables](#valid-environment-variables) -- [Backup Behavior](#backup-behavior) + - [Examples](#examples) + - [Options](#options) + - [Environment Variables](#environment-variables) + - [Python Version](#python-version) + - [Run Via Docker](#run-via-docker) + - [Examples](#examples-1) + - [Environment Variables](#environment-variables-1) + - [Bind Mounts](#bind-mounts) + - [Authentication](#authentication) + - [Configuration](#configuration) + - [Just Run](#just-run) + - [Full Example](#full-example) + - [Options and Descriptions](#options-and-descriptions) + - [Valid Environment Variables](#valid-environment-variables) + - [Backup Behavior](#backup-behavior) + - [Export File](#export-file) + - [General](#general) - [Images](#images) + - [General](#general-1) - [Modify Markdown Files](#modify-markdown-files) -- [Object Storage](#object-storage) - - [Minio](#minio-backups) -- [Future Items](#future-items) + - [Object Storage](#object-storage) + - [Minio Backups](#minio-backups) + - [Authentication](#authentication-1) + - [Example](#example) + - [Configuration](#configuration-1) + - [Future Items](#future-items) ## Background _Features are actively being developed. See `Future Items` section for more details. Open an issue for a feature request._ @@ -122,6 +138,7 @@ Docker can be utilized to run the exporter. #### Examples ```bash +# --user flag to override the uid/gid for created files. Set this to your uid/gid docker run \ --user ${USER_ID}:${USER_GID} \ -v $(pwd)/config.yml:/export/config/config.yml:ro \ @@ -144,7 +161,7 @@ Tokens and other options can be specified, example: ```bash # '-e' flag for env vars -# --user flag to override the uid/gid for created files +# --user flag to override the uid/gid for created files. Set this to your uid/gid docker run \ -e LOG_LEVEL='debug' \ -e BOOKSTACK_TOKEN_ID='xyz' \ @@ -190,11 +207,11 @@ host: "https://bookstack.yourdomain.com" credentials: token_id: "" token_secret: "" -formats: +formats: # md only example - markdown -- html -- pdf -- plaintext +# - html +# - pdf +# - plaintext output_path: "bkps/" assets: export_images: false @@ -204,7 +221,7 @@ assets: ``` #### Full Example -Below is an example configuration that shows all possible options, +Below is an example configuration that shows example values for all possible options. ```yaml host: "https://bookstack.yourdomain.com" @@ -289,9 +306,11 @@ kafka (shelf) ---> settings (chapter) ---> retention-settings.md (page) ---> retention-settings_meta.json + ... ---> compression.html (page) ---> compression.pdf ---> compression_meta.json + ... ---> optional-config.md (page) ... ---> main.md (page) @@ -320,7 +339,7 @@ unassigned (shelf) ---> img-010.png ---> img-020.png ---> test_page.md (page) - ... + ... ---> rec_page (page) ---> rec_page.md ---> rec_page.pdf @@ -382,7 +401,7 @@ Page (parent) -> Images (Children) relationships are created and then each image [![pool-topology-1.png](https://demo.bookstack/uploads/images/gallery/2023-07/scaled-1680-/pool-topology-1.png)](https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png) ## after -[![pool-topology-1.png](./images/pool-topology-1.png)](https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png) +[![pool-topology-1.png](./images/{page_name}/pool-topology-1.png)](https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png) ``` This allows the image to be found locally within the export files and allow your `markdown` docs to have all the images display properly like it would normally would. From 63134f3ea96fc22e8761ee4554a20da05dcb6485 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Tue, 28 Nov 2023 02:26:30 -0500 Subject: [PATCH 3/3] configure vscode markdown.extension for ToC generation --- .vscode/settings.json | 3 +++ README.md | 20 +------------------- 2 files changed, 4 insertions(+), 19 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..c4037f8 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "markdown.extension.toc.levels": "1..3" +} \ No newline at end of file diff --git a/README.md b/README.md index 802ab12..b204885 100644 --- a/README.md +++ b/README.md @@ -6,31 +6,15 @@ Table of Contents - [Use Case](#use-case) - [Using This Application](#using-this-application) - [Run via Pip](#run-via-pip) - - [Examples](#examples) - - [Options](#options) - - [Environment Variables](#environment-variables) - - [Python Version](#python-version) - [Run Via Docker](#run-via-docker) - - [Examples](#examples-1) - - [Environment Variables](#environment-variables-1) - - [Bind Mounts](#bind-mounts) - [Authentication](#authentication) - [Configuration](#configuration) - - [Just Run](#just-run) - - [Full Example](#full-example) - - [Options and Descriptions](#options-and-descriptions) - - [Valid Environment Variables](#valid-environment-variables) - [Backup Behavior](#backup-behavior) - - [Export File](#export-file) - [General](#general) - [Images](#images) - - [General](#general-1) - [Modify Markdown Files](#modify-markdown-files) - [Object Storage](#object-storage) - [Minio Backups](#minio-backups) - - [Authentication](#authentication-1) - - [Example](#example) - - [Configuration](#configuration-1) - [Future Items](#future-items) ## Background @@ -288,10 +272,9 @@ General ## Backup Behavior -### Export File +### General Backups are exported in `.tgz` format and generated based off timestamp. Export names will be in the format: `%Y-%m-%d_%H-%M-%S` (Year-Month-Day_Hour-Minute-Second). *Files are first pulled locally to create the tarball and then can be sent to object storage if needed*. Example file name: `bookstack_export_2023-09-22_07-19-54.tgz`. -### General The exporter can also do housekeeping duties and keep a configured number of archives and delete older ones. See `keep_last` property in the [Configuration](#options-and-descriptions) section. Object storage provider configurations include their own `keep_last` property for flexibility. For file names, `slug` names (from Bookstack API) are used, as such certain characters like `!`, `/` will be ignored and spaces replaced from page names/titles. @@ -378,7 +361,6 @@ You may notice some directories (books) and/or files (pages) in the archive have ### Images -### General Images will be dumped in a separate directory, `images` within the page parent (book/chapter) directory it belongs to. The relative path will be `{parent}/images/{page}/{image_name}`. As shown earlier: ```