From bbf1edffdcaf02c550afcc8c17315054152729d7 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Wed, 17 Jan 2024 02:09:31 -0500 Subject: [PATCH 01/13] add arm64 docker image support --- .github/workflows/on_pr_merged.yml | 11 ++++++++--- Makefile | 4 ++++ README.md | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/on_pr_merged.yml b/.github/workflows/on_pr_merged.yml index 255b7ed..04552fb 100644 --- a/.github/workflows/on_pr_merged.yml +++ b/.github/workflows/on_pr_merged.yml @@ -30,10 +30,15 @@ jobs: with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Build the Docker image + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3 + - name: Build and push the Docker image run: make docker_build_latest - - name: Push Docker image - run: make docker_push_latest + # - name: Push Docker image + # run: make docker_push_latest - name: Update Dockerhub Documentation uses: peter-evans/dockerhub-description@v3 with: diff --git a/Makefile b/Makefile index 08edc58..30db136 100644 --- a/Makefile +++ b/Makefile @@ -29,6 +29,8 @@ download_testpypi: docker_build: docker buildx build \ + --platform linux/amd64,linux/arm64 \ + --output "type=image,push=false" \ --build-arg BASE_IMAGE=${BASE_IMAGE} \ --build-arg BASE_IMAGE_TAG=${BASE_IMAGE_TAG} \ --build-arg DOCKER_WORK_DIR=${DOCKER_WORK_DIR} \ @@ -39,6 +41,8 @@ docker_build: docker_build_latest: docker buildx build \ + --platform linux/amd64,linux/arm64 \ + --output "type=image,push=true" \ --build-arg BASE_IMAGE=${BASE_IMAGE} \ --build-arg BASE_IMAGE_TAG=${BASE_IMAGE_TAG} \ --build-arg DOCKER_WORK_DIR=${DOCKER_WORK_DIR} \ diff --git a/README.md b/README.md index ed5d612..b27d77e 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ python -m bookstack_file_exporter -c _Note: This application is tested and developed on Python version `3.12.X`. The min required version is >= `3.8` but is recommended to install (or set up a venv) a `3.12.X` version._ ### Run Via Docker -Docker can be utilized to run the exporter. +Docker images are provided for `linux/amd64` and `linux/arm64` variants only at the moment. If another variant is required, please request it via Github Issue. #### Examples ```bash From 97778f9b50b1bbcfa06799e67ceb0b505ffd066a Mon Sep 17 00:00:00 2001 From: pchang388 Date: Wed, 17 Jan 2024 02:12:53 -0500 Subject: [PATCH 02/13] add arm64 docker image support --- .github/workflows/on_pr_open.docker-build.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml index 0f91b2e..73a9c1b 100644 --- a/.github/workflows/on_pr_open.docker-build.yml +++ b/.github/workflows/on_pr_open.docker-build.yml @@ -11,6 +11,11 @@ jobs: - uses: actions/checkout@v3 - name: Prepare Makefile run: sed -i 's/^IMAGE_TAG=[^ ]*/IMAGE_TAG=${{github.run_id}}/' Makefile + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3 - name: Build the Docker image run: make docker_build pip_build: From 1e4f8703c2a05ba31bc84f1e85123b46e3c51432 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Mon, 22 Jan 2024 04:49:23 -0500 Subject: [PATCH 03/13] rework asset archiver to generic object inheritence format --- .dockerignore | 2 + README.md | 10 +- bookstack_file_exporter/archiver/archiver.py | 38 +--- .../archiver/asset_archiver.py | 166 ++++++++++++++++ .../archiver/page_archiver.py | 188 +++++++----------- .../config_helper/config_helper.py | 3 +- .../config_helper/models.py | 1 + bookstack_file_exporter/exporter/node.py | 33 ++- setup.cfg | 4 +- 9 files changed, 282 insertions(+), 163 deletions(-) create mode 100644 bookstack_file_exporter/archiver/asset_archiver.py diff --git a/.dockerignore b/.dockerignore index 21d444f..7c04879 100644 --- a/.dockerignore +++ b/.dockerignore @@ -162,6 +162,8 @@ cython_debug/ ## Local local/ +.vscode/ +.github/ ## test outputs bkps/ \ No newline at end of file diff --git a/README.md b/README.md index b27d77e..76241ad 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,9 @@ assets: ### Run via Pip The exporter can be installed via pip and run directly. +#### Python Version +_Note: This application is tested and developed on Python version `3.12.X`. The min required version is >= `3.8` but is recommended to install (or set up a venv) a `3.12.X` version._ + #### Examples ```bash python -m pip install bookstack-file-exporter @@ -114,10 +117,7 @@ export LOG_LEVEL=debug python -m bookstack_file_exporter -c ``` -#### Python Version -_Note: This application is tested and developed on Python version `3.12.X`. The min required version is >= `3.8` but is recommended to install (or set up a venv) a `3.12.X` version._ - -### Run Via Docker +### Run via Docker Docker images are provided for `linux/amd64` and `linux/arm64` variants only at the moment. If another variant is required, please request it via Github Issue. #### Examples @@ -239,7 +239,7 @@ More descriptions can be found for each section below: #### Valid Environment Variables General -- `LOG_LEVEL`: default: `info``. Provide a valid log level: info, debug, warning, error. +- `LOG_LEVEL`: default: `info`. Provide a valid log level: info, debug, warning, error. [Bookstack Credentials](#authentication) - `BOOKSTACK_TOKEN_ID` diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py index c6d01c5..6af2024 100644 --- a/bookstack_file_exporter/archiver/archiver.py +++ b/bookstack_file_exporter/archiver/archiver.py @@ -5,7 +5,7 @@ from bookstack_file_exporter.exporter.node import Node from bookstack_file_exporter.archiver import util -from bookstack_file_exporter.archiver.page_archiver import PageArchiver, ImageNode +from bookstack_file_exporter.archiver.page_archiver import PageArchiver from bookstack_file_exporter.archiver.minio_archiver import MinioArchiver from bookstack_file_exporter.config_helper.remote import StorageProviderConfig from bookstack_file_exporter.config_helper.config_helper import ConfigNode @@ -17,7 +17,7 @@ # pylint: disable=too-many-instance-attributes class Archiver: """ - Archiver pulls all the necessary files from upstream + Archiver helps handle archive duties: pulls all the necessary files from upstream and then pushes them to the specified backup location(s) Args: @@ -32,41 +32,15 @@ def __init__(self, config: ConfigNode): # for convenience self.base_dir = config.base_dir_name self.archive_dir = self._generate_root_folder(self.base_dir) - self._page_archiver = self._generate_page_archiver() + self._page_archiver = PageArchiver(self.archive_dir, self.config) self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3} - def get_bookstack_exports(self, page_nodes: Dict[int, Node]): """export all page content""" log.info("Exporting all bookstack page contents") # get images first if requested # this is because we may want to manipulate page data with modify_markdown flag - all_image_meta = self._get_page_image_map() - for _, page in page_nodes.items(): - page_image_meta = [] - if page.id_ in all_image_meta: - page_image_meta = all_image_meta[page.id_] - self._get_page_files(page, page_image_meta) - self._get_page_images(page, page_image_meta) - - def _get_page_files(self, page_node: Node, image_meta: List[ImageNode]): - """pull all bookstack pages into local files/tar""" - log.debug("Exporting bookstack page data") - self._page_archiver.archive_page(page_node, image_meta) - - def _get_page_image_map(self) -> Dict[int, ImageNode]: - if not self._page_archiver.export_images: - log.debug("skipping image export based on user input") - return {} - return self._page_archiver.get_image_meta() - - def _get_page_images(self, page_node: Node, img_nodes: List[ImageNode]): - if not img_nodes: - log.debug("page has no images to pull") - return - log.debug("Exporting bookstack page images") - self._page_archiver.archive_page_images(page_node.parent.file_path, - page_node.name, img_nodes) + self._page_archiver.archive_pages(page_nodes) def create_archive(self): """create tgz archive""" @@ -145,10 +119,6 @@ def _delete_files(self, file_list: List[str]): for file in file_list: util.remove_file(file) - def _generate_page_archiver(self)-> PageArchiver: - return PageArchiver(self.archive_dir, self.config) - - @staticmethod def _generate_root_folder(base_folder_name: str) -> str: """return base archive name""" diff --git a/bookstack_file_exporter/archiver/asset_archiver.py b/bookstack_file_exporter/archiver/asset_archiver.py new file mode 100644 index 0000000..ac8dd66 --- /dev/null +++ b/bookstack_file_exporter/archiver/asset_archiver.py @@ -0,0 +1,166 @@ +from typing import Union, List, Dict +# pylint: disable=import-error +from requests import Response +from re import sub as re_sub +import logging +import base64 + +from bookstack_file_exporter.common import util as common_util + +log = logging.getLogger(__name__) + +_IMAGE_DIR_NAME = "images" +_ATTACHMENT_DIR_NAME = "attachments" + + +class AssetNode: + def __init__(self, meta_data: Dict[str, int | str | bool]): + self.id: int = meta_data['id'] + self.page_id: int = meta_data['uploaded_to'] + # self.page_name: str = page_name + self.url: str = meta_data['url'] + self.name: str = self.url.split('/')[-1] + self._markdown_str = "" + self._relative_path_prefix: str = "" + + def get_relative_path(self, page_name: str) -> str: + """image path local to page directory""" + return f"{self._relative_path_prefix}/{page_name}/{self.name}" + + @property + def markdown_str(self): + """return markdown url str to replace""" + return self._markdown_str + + def set_markdown_content(self, asset_data: Dict[str, int | str | bool]) -> None: + self._markdown_str = self._get_md_url_str(asset_data) + + @staticmethod + def _get_md_url_str(asset_data: Dict[str, Union[int, str]]) -> str: + url_str = "" + if 'content' in asset_data: + if 'markdown' in asset_data['content']: + url_str = asset_data['content']['markdown'] + # check to see if empty before doing find + if not url_str: + return "" + # find the link between two parenthesis + # - markdown format + return url_str[url_str.find("(")+1:url_str.find(")")] + +class ImageNode(AssetNode): + def __init__(self, meta_data: Dict[str, Union[int, str]]): + super().__init__(meta_data) + log.debug(self.url) + self._relative_path_prefix = f"{_IMAGE_DIR_NAME}" + +class AttachmentNode(AssetNode): + def __init__(self, meta_data: Dict[str, Union[int, str, bool]], + base_url: str): + self.id: int = meta_data['id'] + self.page_id: int = meta_data['uploaded_to'] + self.url: str = f"{base_url}/{self.id}" + log.debug(self.url) + self.name = meta_data['name'] + self._markdown_str = "" + self._relative_path_prefix = f"{_ATTACHMENT_DIR_NAME}" + + @staticmethod + def _get_md_url_str(asset_data: Dict[str, int | str | dict]) -> str: + url_str = "" + if 'links' in asset_data: + if 'markdown' in asset_data['links']: + url_str = asset_data['links']['markdown'] + # check to see if empty before doing find + if not url_str: + return "" + # find the link between two parenthesis + # - markdown format + return url_str[url_str.find("(")+1:url_str.find(")")] + +class AssetArchiver: + def __init__(self, urls: Dict[str, str], headers: Dict[str, str], + verify_ssl: bool): + self.api_urls = urls + self.verify_ssl = verify_ssl + self._headers = headers + self._asset_map = { + 'images': self._create_image_map, + 'attachments': self._create_attachment_map + } + + def get_asset_nodes(self, asset_type: str) -> Dict[str, ImageNode | AttachmentNode]: + """Get image or attachment helpers for a page""" + asset_response: Response = common_util.http_get_request( + self.api_urls[asset_type], + self._headers, + self.verify_ssl) + asset_json = asset_response.json()['data'] + return self._asset_map[asset_type](asset_json) + + def get_asset_data(self, asset_type: str, + meta_data: Union[AttachmentNode, ImageNode]) -> Dict[str, str | bool | int | dict]: + """Get asset data based on type""" + data_url = f"{self.api_urls[asset_type]}/{meta_data.id}" + asset_data_response: Response = common_util.http_get_request( + data_url, + self._headers, + self.verify_ssl) + return asset_data_response.json() + + def get_asset_bytes(self, asset_type: str, url: str) -> bytes: + """Get raw asset data""" + asset_response: Response = common_util.http_get_request( + url, + self._headers, + self.verify_ssl) + match asset_type: + case "images": + asset_data = asset_response.content + case "attachments": + asset_data = self.decode_attachment_data(asset_response.json()['content']) + return asset_data + + def update_asset_links(self, asset_type, page_name: str, page_data: bytes, + asset_nodes: List[ImageNode | AttachmentNode]) -> bytes: + """update markdown links in page data""" + for asset_node in asset_nodes: + asset_data = self.get_asset_data(asset_type, asset_node) + asset_node.set_markdown_content(asset_data) + if not asset_node.markdown_str: + continue + page_data = re_sub(asset_node.markdown_str.encode(), + asset_node.get_relative_path(page_name).encode(), page_data) + return page_data + + @staticmethod + def _create_image_map(json_data: Dict[str, + List[Dict[str, str | int | bool | dict]]]) -> Dict[int, List[ImageNode]]: + image_page_map = {} + for img_meta in json_data: + img_node = ImageNode(img_meta) + if img_node.page_id in image_page_map: + image_page_map[img_node.page_id].append(img_node) + else: + image_page_map[img_node.page_id] = [img_node] + return image_page_map + + def _create_attachment_map(self, + json_data: Dict[str, List[Dict[str, str | int | bool | dict]]]) -> List[AssetNode]: + asset_nodes = {} + for asset_meta in json_data: + asset_node = None + if asset_meta['external']: + continue # skip external link, only get attachments + asset_node = AttachmentNode(asset_meta, self.api_urls['attachments']) + if asset_node.page_id in asset_nodes: + asset_nodes[asset_node.page_id].append(asset_node) + else: + asset_nodes[asset_node.page_id] = [asset_node] + return asset_nodes + + @staticmethod + def decode_attachment_data(b64encoded_data: str) -> bytes: + """decode base64 encoded data""" + asset_data = b64encoded_data.encode() + return base64.b64decode(asset_data) diff --git a/bookstack_file_exporter/archiver/page_archiver.py b/bookstack_file_exporter/archiver/page_archiver.py index 812da99..a284533 100644 --- a/bookstack_file_exporter/archiver/page_archiver.py +++ b/bookstack_file_exporter/archiver/page_archiver.py @@ -1,10 +1,8 @@ from typing import Union, List, Dict -import re -# pylint: disable=import-error -from requests import Response from bookstack_file_exporter.exporter.node import Node from bookstack_file_exporter.archiver import util as archiver_util +from bookstack_file_exporter.archiver.asset_archiver import AssetArchiver, ImageNode, AttachmentNode from bookstack_file_exporter.config_helper.config_helper import ConfigNode from bookstack_file_exporter.common import util as common_util @@ -24,61 +22,11 @@ "tgz": _TAR_GZ_SUFFIX } -_IMAGE_DIR_NAME = "images" -_MARKDOWN_STR_CHECK = "markdown" - -class ImageNode: - """ - ImageNode provides metadata and convenience for Bookstack images. - - Args: - :img_meta_data: = image meta data - - Returns: - :ImageNode: instance with attributes to help handle images. - """ - def __init__(self, img_meta_data: Dict[str, Union[int, str]]): - self.id: int = img_meta_data['id'] - self.page_id: int = img_meta_data['uploaded_to'] - self.url: str = img_meta_data['url'] - self.name: str = self._get_image_name() - self._markdown_str = "" - self._relative_path_prefix: str = f"./{_IMAGE_DIR_NAME}" - - def _get_image_name(self) -> str: - return self.url.split('/')[-1] - - def get_image_relative_path(self, page_name: str) -> str: - """return image path local to page directory""" - return f"{self._relative_path_prefix}/{page_name}/{self.name}" - - def set_markdown_content(self, img_details: Dict[str, Union[int, str]]): - """provide image metadata to set markdown properties""" - self._markdown_str = self._get_md_url_str(img_details) - - @property - def markdown_str(self): - """return markdown url str to replace""" - return self._markdown_str - - @staticmethod - def _get_md_url_str(img_data: Dict[str, Union[int, str]]) -> str: - url_str = "" - if 'content' in img_data: - if _MARKDOWN_STR_CHECK in img_data['content']: - url_str = img_data['content'][_MARKDOWN_STR_CHECK] - # check to see if empty before doing find - if not url_str: - return "" - # find the link between two parenthesis - # - markdown format - return url_str[url_str.find("(")+1:url_str.find(")")] - # pylint: disable=too-many-instance-attributes class PageArchiver: """ PageArchiver handles all data extraction and modifications - to Bookstack page contents including images. + to Bookstack page contents including assets like images or attachments. Args: :archive_dir: = directory where data will be put into. @@ -100,32 +48,51 @@ def __init__(self, archive_dir: str, config: ConfigNode) -> None: # name of the base folder to use within the tgz archive (internal tar layout) self.archive_base_path = archive_dir.split("/")[-1] self.modify_md: bool = self._check_md_modify() + self.asset_archiver = AssetArchiver(self.api_urls, self._headers, + self.verify_ssl) def _check_md_modify(self) -> bool: # check to ensure they have asset_config defined, could be None - if _MARKDOWN_STR_CHECK in self.export_formats: + if 'markdown' in self.export_formats: return self.asset_config.modify_markdown and self.export_images return False - - def archive_page(self, page: Node, - image_urls: List[str] = None): - """export page content""" - for export_format in self.export_formats: - page_data = self._get_page_data(page.id_, export_format) - self._archive_page(page, export_format, - page_data, image_urls) - if self.asset_config.export_meta: - self._archive_page_meta(page.file_path, page.meta) - - def _archive_page(self, page: Node, export_format: str, data: bytes, - image_nodes: List[ImageNode] = None): + + def archive_pages(self, page_nodes: Dict[int, Node]): + """export page contents and their images/attachments""" + # get assets first if requested + # this is because we may want to manipulate page data with modify_markdown flag + image_nodes = self._get_image_meta() + attachment_nodes = self._get_attachment_meta() + for _, page in page_nodes.items(): + page_images = [] + page_attachments = [] + if page.id_ in image_nodes: + page_images = image_nodes[page.id_] + if page.id_ in attachment_nodes: + page_attachments = attachment_nodes[page.id_] + for export_format in self.export_formats: + page_data = self._get_page_data(page.id_, export_format) + if page_images and export_format == 'markdown': + page_data = self._modify_markdown("images", page.name, + page_data, page_images) + if page_attachments and export_format == 'markdown': + page_data = self._modify_markdown("attachments", page.name, + page_data, page_attachments) + self._archive_page(page, export_format, + page_data) + self.archive_page_assets("images", page.parent.file_path, + page.name, page_images) + self.archive_page_assets("attachments", page.parent.file_path, + page.name, page_attachments) + if self.asset_config.export_meta: + self._archive_page_meta(page.file_path, page.meta) + + def _archive_page(self, page: Node, export_format: str, data: bytes): page_file_name = f"{self.archive_base_path}/" \ f"{page.file_path}{_FILE_EXTENSION_MAP[export_format]}" - if self.modify_md and export_format == _MARKDOWN_STR_CHECK and image_nodes: - data = self._update_image_links(page.name, data, image_nodes) self.write_data(page_file_name, data) - def _get_page_data(self, page_id: int, export_format: str): + def _get_page_data(self, page_id: int, export_format: str) -> bytes: url = f"{self.api_urls['pages']}/{page_id}/{_EXPORT_API_PATH}/{export_format}" return archiver_util.get_byte_response(url=url, headers=self._headers, verify_ssl=self.verify_ssl) @@ -134,25 +101,37 @@ def _archive_page_meta(self, page_path: str, meta_data: Dict[str, Union[str, int meta_file_name = f"{self.archive_base_path}/{page_path}{_FILE_EXTENSION_MAP['meta']}" bytes_meta = archiver_util.get_json_bytes(meta_data) self.write_data(file_path=meta_file_name, data=bytes_meta) - - def get_image_meta(self) -> Dict[int, List[ImageNode]]: + + def _get_image_meta(self) -> Dict[int, List[ImageNode]]: """Get all image metadata into a {page_number: [image_url]} format""" - img_meta_response: Response = common_util.http_get_request( - self.api_urls['images'], - self._headers, - self.verify_ssl) - img_meta_json = img_meta_response.json()['data'] - return self._create_image_map(img_meta_json) - - def archive_page_images(self, parent_path: str, page_name: str, - image_nodes: List[ImageNode]): + if not self.asset_config.export_images: + return {} + return self.asset_archiver.get_asset_nodes('images') + + def _get_attachment_meta(self) -> Dict[int, List[AttachmentNode]]: + """Get all attachment metadata into a {page_number: [attachment_url]} format""" + if not self.asset_config.export_attachments: + return {} + return self.asset_archiver.get_asset_nodes('attachments') + + def _modify_markdown(self, asset_type: str, + page_name: str, page_data: bytes, + asset_nodes: List[ImageNode | AttachmentNode]) -> bytes: + if not self.modify_md: + return page_data + return self.asset_archiver.update_asset_links(asset_type, page_name, page_data, + asset_nodes) + + def archive_page_assets(self, asset_type: str, parent_path: str, page_name: str, + asset_nodes: List[ImageNode | AttachmentNode]): """pull images locally into a directory based on page""" - image_base_path = f"{self.archive_base_path}/{parent_path}/{_IMAGE_DIR_NAME}" - for img_node in image_nodes: - img_data: bytes = archiver_util.get_byte_response(img_node.url, self._headers, - self.verify_ssl) - image_path = f"{image_base_path}/{page_name}/{img_node.name}" - self.write_data(image_path, img_data) + if not asset_nodes: + return + node_base_path = f"{self.archive_base_path}/{parent_path}/" + for asset_node in asset_nodes: + asset_data = self.asset_archiver.get_asset_bytes(asset_type, asset_node.url) + asset_path = f"{node_base_path}/{asset_node.get_relative_path(page_name)}" + self.write_data(asset_path, asset_data) def write_data(self, file_path: str, data: bytes): """write data to a tar file @@ -167,21 +146,6 @@ def gzip_archive(self): """provide the tar to gzip and the name of the gzip output file""" archiver_util.create_gzip(self.tar_file, self.archive_file) - def _update_image_links(self, page_name: str, page_data: bytes, - image_nodes: List[ImageNode]) -> bytes: - """regex replace links to local created directories""" - for img_node in image_nodes: - img_meta_url = f"{self.api_urls['images']}/{img_node.id}" - img_details = common_util.http_get_request(img_meta_url, - self._headers, self.verify_ssl) - img_node.set_markdown_content(img_details.json()) - if not img_node.markdown_str: - continue - # 1 - what to replace, 2 - replace with, 3 is the data to replace - page_data = re.sub(img_node.markdown_str.encode(), - img_node.get_image_relative_path(page_name).encode(), page_data) - return page_data - @property def file_extension_map(self) -> Dict[str, str]: """file extension metadata""" @@ -195,20 +159,4 @@ def export_images(self) -> bool: @property def verify_ssl(self) -> bool: """return whether or not to verify ssl for http requests""" - return self.asset_config.verify_ssl - - # @staticmethod - # def _get_regex_expr(url: str) -> bytes: - # # regex_str = fr"\[\!\[^$|.*\]\({url}\)\]" - # return re.compile(regex_str.encode()) - - @staticmethod - def _create_image_map(json_data: List[Dict[str, Union[str,int]]]) -> Dict[int, List[ImageNode]]: - image_page_map = {} - for img_meta in json_data: - img_node = ImageNode(img_meta) - if img_node.page_id in image_page_map: - image_page_map[img_node.page_id].append(img_node) - else: - image_page_map[img_node.page_id] = [img_node] - return image_page_map + return self.asset_config.verify_ssl \ No newline at end of file diff --git a/bookstack_file_exporter/config_helper/config_helper.py b/bookstack_file_exporter/config_helper/config_helper.py index 46911b2..80b822f 100644 --- a/bookstack_file_exporter/config_helper/config_helper.py +++ b/bookstack_file_exporter/config_helper/config_helper.py @@ -19,7 +19,8 @@ "books": "api/books", "chapters": "api/chapters", "pages": "api/pages", - "images": "api/image-gallery" + "images": "api/image-gallery", + "attachments": "api/attachments" } _UNASSIGNED_BOOKS_DIR = "unassigned/" diff --git a/bookstack_file_exporter/config_helper/models.py b/bookstack_file_exporter/config_helper/models.py index 8b37b3d..e82e5da 100644 --- a/bookstack_file_exporter/config_helper/models.py +++ b/bookstack_file_exporter/config_helper/models.py @@ -23,6 +23,7 @@ class BookstackAccess(BaseModel): class Assets(BaseModel): """YAML schema for bookstack markdown asset(pages/images/attachments) configuration""" export_images: Optional[bool] = False + export_attachments: Optional[bool] = False modify_markdown: Optional[bool] = False export_meta: Optional[bool] = False verify_ssl: Optional[bool] = True diff --git a/bookstack_file_exporter/exporter/node.py b/bookstack_file_exporter/exporter/node.py index d63e17f..ef4a114 100644 --- a/bookstack_file_exporter/exporter/node.py +++ b/bookstack_file_exporter/exporter/node.py @@ -1,4 +1,6 @@ from typing import Dict, Union, List +import unicodedata +from re import sub as re_sub # shelves --> 'books' # books --> 'content' @@ -34,7 +36,8 @@ def __init__(self, meta: Dict[str, Union[str, int]], self._parent = parent self._path_prefix = path_prefix # for convenience/usage for exporter - self.name: str = self.meta['slug'] + # self.name: str = self.meta['slug'] + self.name = self.get_name(self.meta['slug'], self.meta['name']) self.id_: int = self.meta['id'] self._display_name = self.meta['name'] # children @@ -42,6 +45,14 @@ def __init__(self, meta: Dict[str, Union[str, int]], # if parent self._file_path = self._get_file_path() + def get_name(self, slug: str, name: str) -> str: + """return name of resource""" + if slug: + return slug + if name != _NULL_PAGE_NAME: + return self.slugify(name) + return "" + def _get_file_path(self) -> str: if self._parent: # page node @@ -86,3 +97,23 @@ def empty(self): if not self.name and self._display_name == _NULL_PAGE_NAME: return True return False + + @staticmethod + def slugify(value: str, allow_unicode=False): + """ + Taken from https://github.com/django/django/blob/master/django/utils/text.py + Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated + dashes to single dashes. Remove characters that aren't alphanumerics, + underscores, or hyphens. Convert to lowercase. Also strip leading and + trailing whitespace, dashes, and underscores. + """ + if allow_unicode: + value = unicodedata.normalize("NFKC", value) + else: + value = ( + unicodedata.normalize("NFKD", value) + .encode("ascii", "ignore") + .decode("ascii") + ) + value = re_sub(r"[^\w\s-]", "", value.lower()) + return re_sub(r"[-\s]+", "-", value).strip("-_") \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index ef4786a..cb011ac 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,9 +18,9 @@ classifiers = python_requires = >=3.8 install_requires = Pyyaml >= 6.0.1 # https://pypi.org/project/PyYAML/ - Pydantic >= 2.4.0 # https://docs.pydantic.dev/latest/ + Pydantic >= 2.5.3 # https://docs.pydantic.dev/latest/ requests >= 2.31.0 # https://pypi.org/project/requests/ - minio >= 7.2.0 # https://pypi.org/project/minio/ + minio >= 7.2.3 # https://pypi.org/project/minio/ packages = find: [options.entry_points] From 3c7a61e721cc49ea53214d9411e67d16fa7ac8a2 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Tue, 23 Jan 2024 02:14:12 -0500 Subject: [PATCH 04/13] new github action methodology and update readme with attachment changes --- .github/actions/docker/action.yml | 83 ++++++++++++ .github/actions/python/action.yml | 37 +++++ .github/actions/tests/action.yml | 18 +++ .github/{workflows => bkp}/always.pylint.yml | 0 .github/bkp/on_pr_merged.yml | 121 +++++++++++++++++ .../on_pr_open.docker-build.yml | 0 .github/workflows/on_pr_merged.yml | 127 +++--------------- .github/workflows/on_pr_open.yml | 18 +++ .github/workflows/on_release.yml | 65 +++++++++ Dockerfile | 5 + README.md | 80 ++++++++--- .../archiver/page_archiver.py | 2 +- examples/config.yml | 3 + examples/minio_config.yml | 3 + setup.cfg | 2 +- 15 files changed, 436 insertions(+), 128 deletions(-) create mode 100644 .github/actions/docker/action.yml create mode 100644 .github/actions/python/action.yml create mode 100644 .github/actions/tests/action.yml rename .github/{workflows => bkp}/always.pylint.yml (100%) create mode 100644 .github/bkp/on_pr_merged.yml rename .github/{workflows => bkp}/on_pr_open.docker-build.yml (100%) create mode 100644 .github/workflows/on_pr_open.yml create mode 100644 .github/workflows/on_release.yml diff --git a/.github/actions/docker/action.yml b/.github/actions/docker/action.yml new file mode 100644 index 0000000..ed9545c --- /dev/null +++ b/.github/actions/docker/action.yml @@ -0,0 +1,83 @@ +--- +name: Docker image +description: Creates a Docker image + +# note inputs have some weird behavior: https://github.com/actions/runner/issues/1483 +# for string type, don't wrap in JSON +# for boolean type, wrap in JSON + +inputs: + dockerhub_username: + description: Dockerhub username + type: string + required: false + default: none + dockerhub_token: + description: Dockerhub token + type: string + required: false + default: none + push: + description: Push Images to docker hub + type: boolean + required: false + default: true + latest: + description: Update latest tag + type: boolean + required: false + default: true + +runs: + using: composite + steps: + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: docker.io/${{ github.repository }} + flavor: | + latest=${{ fromJSON(inputs.latest) }} + # for some reason can't get this to show up from docker image labels + # placing here for now + labels: | + org.opencontainers.image.description=Page asset and content exporter for Bookstack + tags: | + ## add the event types that should be added as tags + ## on merge to master - update `main` tag for testing before release + type=ref,event=branch + ## on release - for use by users + type=semver,pattern={{major}}.{{minor}} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Dockerhub + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + username: ${{ inputs.dockerhub_username }} + password: ${{ inputs.dockerhub_token }} + + - name: Build Docker Image + if: github.event_name != 'pull_request' + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + platforms: linux/amd64,linux/arm64 + push: ${{ fromJSON(inputs.push) }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + - name: Update Dockerhub Documentation + uses: peter-evans/dockerhub-description@v3 + if: ${{ (fromJSON(inputs.latest) == true) && (github.event_name != 'pull_request') }} + with: + username: ${{ inputs.dockerhub_username }} + password: ${{ inputs.dockerhub_token }} + repository: ${{ github.repository }} \ No newline at end of file diff --git a/.github/actions/python/action.yml b/.github/actions/python/action.yml new file mode 100644 index 0000000..db7c92b --- /dev/null +++ b/.github/actions/python/action.yml @@ -0,0 +1,37 @@ +--- +name: Deploy to PyPi +description: Deploys the python package to PyPi + +inputs: + pypi_api_token: + description: PyPi api token + type: string + required: true + +runs: + using: composite + steps: + - name: Get tag release without v + run: | + TAG=${{ github.ref_name }} + echo "VERSION=${TAG#v}" >> "$GITHUB_ENV" + echo "Tag without v is: ${VERSION}" + - name: Update Release Tag + run: sed -i "s/^version = [^ ]*/version = ${{ env.VERSION }}/" setup.cfg + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.12.1' + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build Python Package + run: | + python -m pip install --upgrade build + python -m build + - name: Publish to PyPi + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ inputs.pypi_api_token }} + skip-existing: true \ No newline at end of file diff --git a/.github/actions/tests/action.yml b/.github/actions/tests/action.yml new file mode 100644 index 0000000..ad5b369 --- /dev/null +++ b/.github/actions/tests/action.yml @@ -0,0 +1,18 @@ +--- +name: Test Python Package +description: Test and lint code + +runs: + using: composite + steps: + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.12.1' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + - name: Analysing the code with pylint + run: | + pylint $(git ls-files '*.py') \ No newline at end of file diff --git a/.github/workflows/always.pylint.yml b/.github/bkp/always.pylint.yml similarity index 100% rename from .github/workflows/always.pylint.yml rename to .github/bkp/always.pylint.yml diff --git a/.github/bkp/on_pr_merged.yml b/.github/bkp/on_pr_merged.yml new file mode 100644 index 0000000..04552fb --- /dev/null +++ b/.github/bkp/on_pr_merged.yml @@ -0,0 +1,121 @@ +# needs: [tests] # require tests to pass before deploy runs + +name: Build and Push + +# on: +# push: +# # Pattern matched against refs/tags +# tags: +# - '**' # Push events to every tag including hierarchical tags like v1.0/beta + +on: + pull_request: + types: + - closed + branches: + - main + +# maybe trigger build/push on release tags? +# but this also works for my use case +jobs: + docker_deploy: + if: github.event.pull_request.merged + runs-on: ubuntu-latest + # specify this to target the correct env/secrets to use + environment: 'Dockerhub' + steps: + - uses: actions/checkout@v3 + - name: Login to Dockerhub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3 + - name: Build and push the Docker image + run: make docker_build_latest + # - name: Push Docker image + # run: make docker_push_latest + - name: Update Dockerhub Documentation + uses: peter-evans/dockerhub-description@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + repository: ${{ env.DOCKERHUB_REPO }} + pypi_deploy: + if: github.event.pull_request.merged + runs-on: ubuntu-latest + environment: 'PyPi' + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Set tag version + run: | + TAG=$(cat Makefile | grep -E ^IMAGE_TAG=[0-9].[0-9].[0-9] | cut -d "=" -f2) + echo "VERSION=${TAG}" >> "$GITHUB_ENV" + echo "version from Makefile is: ${VERSION}" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Update Release tag + run: sed -i "s/^version = [^ ]*/version = ${{ env.VERSION }}/" setup.cfg + - name: Build package + run: make build + - name: Publish package + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} + skip-existing: true + create_tag: + if: github.event.pull_request.merged + runs-on: ubuntu-latest + needs: + - docker_deploy + - pypi_deploy + permissions: + contents: write + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.merge_commit_sha }} + fetch-depth: '0' + - name: Set tag version + run: | + TAG=$(cat Makefile | grep -E ^IMAGE_TAG=[0-9].[0-9].[0-9] | cut -d "=" -f2) + echo "VERSION=v${TAG}" >> "$GITHUB_ENV" + echo "version from Makefile is: ${VERSION}" + - name: Create tag + uses: anothrNick/github-tag-action@1.64.0 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # WITH_V: true + # PRERELEASE: true + CUSTOM_TAG: ${{ env.VERSION }} + create_release: + if: github.event.pull_request.merged + runs-on: ubuntu-latest + needs: + - create_tag + permissions: + contents: write + steps: + - uses: actions/checkout@v3 + - name: Set tag version + run: | + TAG=$(cat Makefile | grep -E ^IMAGE_TAG=[0-9].[0-9].[0-9] | cut -d "=" -f2) + echo "VERSION=v${TAG}" >> "$GITHUB_ENV" + echo "version from Makefile is: ${VERSION}" + - name: Generate release + uses: ncipollo/release-action@v1 + with: + tag: ${{ env.VERSION }} + generateReleaseNotes: true + skipIfReleaseExists: true + # docker image tag latest diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/bkp/on_pr_open.docker-build.yml similarity index 100% rename from .github/workflows/on_pr_open.docker-build.yml rename to .github/bkp/on_pr_open.docker-build.yml diff --git a/.github/workflows/on_pr_merged.yml b/.github/workflows/on_pr_merged.yml index 04552fb..7c1c40d 100644 --- a/.github/workflows/on_pr_merged.yml +++ b/.github/workflows/on_pr_merged.yml @@ -1,121 +1,32 @@ # needs: [tests] # require tests to pass before deploy runs -name: Build and Push - -# on: -# push: -# # Pattern matched against refs/tags -# tags: -# - '**' # Push events to every tag including hierarchical tags like v1.0/beta +name: Create Current Main Image on: - pull_request: - types: - - closed + push: branches: - main -# maybe trigger build/push on release tags? -# but this also works for my use case jobs: - docker_deploy: - if: github.event.pull_request.merged - runs-on: ubuntu-latest - # specify this to target the correct env/secrets to use - environment: 'Dockerhub' - steps: - - uses: actions/checkout@v3 - - name: Login to Dockerhub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - name: Set up Docker Buildx - id: buildx - uses: docker/setup-buildx-action@v3 - - name: Build and push the Docker image - run: make docker_build_latest - # - name: Push Docker image - # run: make docker_push_latest - - name: Update Dockerhub Documentation - uses: peter-evans/dockerhub-description@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - repository: ${{ env.DOCKERHUB_REPO }} - pypi_deploy: - if: github.event.pull_request.merged + test: runs-on: ubuntu-latest - environment: 'PyPi' steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v3 - with: - python-version: '3.x' - - name: Set tag version - run: | - TAG=$(cat Makefile | grep -E ^IMAGE_TAG=[0-9].[0-9].[0-9] | cut -d "=" -f2) - echo "VERSION=${TAG}" >> "$GITHUB_ENV" - echo "version from Makefile is: ${VERSION}" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build - - name: Update Release tag - run: sed -i "s/^version = [^ ]*/version = ${{ env.VERSION }}/" setup.cfg - - name: Build package - run: make build - - name: Publish package - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} - skip-existing: true - create_tag: - if: github.event.pull_request.merged - runs-on: ubuntu-latest - needs: - - docker_deploy - - pypi_deploy - permissions: - contents: write - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ github.event.pull_request.merge_commit_sha }} - fetch-depth: '0' - - name: Set tag version - run: | - TAG=$(cat Makefile | grep -E ^IMAGE_TAG=[0-9].[0-9].[0-9] | cut -d "=" -f2) - echo "VERSION=v${TAG}" >> "$GITHUB_ENV" - echo "version from Makefile is: ${VERSION}" - - name: Create tag - uses: anothrNick/github-tag-action@1.64.0 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # WITH_V: true - # PRERELEASE: true - CUSTOM_TAG: ${{ env.VERSION }} - create_release: - if: github.event.pull_request.merged + - name: Checkout + uses: actions/checkout@v4 + - name: Go Tests + uses: ./.github/actions/tests + # push to `main` image for testing/most up to date + docker-build: runs-on: ubuntu-latest - needs: - - create_tag - permissions: - contents: write + needs: test + environment: 'Dockerhub' + timeout-minutes: 10 steps: - - uses: actions/checkout@v3 - - name: Set tag version - run: | - TAG=$(cat Makefile | grep -E ^IMAGE_TAG=[0-9].[0-9].[0-9] | cut -d "=" -f2) - echo "VERSION=v${TAG}" >> "$GITHUB_ENV" - echo "version from Makefile is: ${VERSION}" - - name: Generate release - uses: ncipollo/release-action@v1 + - name: Checkout + uses: actions/checkout@v4 + - name: Run Docker Build + uses: ./.github/actions/docker with: - tag: ${{ env.VERSION }} - generateReleaseNotes: true - skipIfReleaseExists: true - # docker image tag latest + latest: false + dockerhub_username: ${{ secrets.DOCKERHUB_USERNAME }} + dockerhub_token: ${{ secrets.DOCKERHUB_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/on_pr_open.yml b/.github/workflows/on_pr_open.yml new file mode 100644 index 0000000..2782bf9 --- /dev/null +++ b/.github/workflows/on_pr_open.yml @@ -0,0 +1,18 @@ +name: Test + +on: + pull_request: + branches: [ "main" ] + types: + - opened + - reopened + - synchronize + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Go Tests + uses: ./.github/actions/tests \ No newline at end of file diff --git a/.github/workflows/on_release.yml b/.github/workflows/on_release.yml new file mode 100644 index 0000000..ac259c4 --- /dev/null +++ b/.github/workflows/on_release.yml @@ -0,0 +1,65 @@ +--- +name: Create Official Release and Push Artifacts + +on: + push: + tags: + - v* + +permissions: + contents: write + +jobs: + tests: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Tests + uses: ./.github/actions/tests + create_release: + runs-on: ubuntu-latest + needs: tests + permissions: + contents: write + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Create Release + uses: ncipollo/release-action@v1 + with: + tag: ${{ github.ref_name }} + generateReleaseNotes: true + # build and push docker image + release-docker: + runs-on: ubuntu-latest + needs: + - tests + - create_release + environment: 'Dockerhub' + timeout-minutes: 10 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Run Docker Build + uses: ./.github/actions/docker + with: + dockerhub_username: ${{ secrets.DOCKERHUB_USERNAME }} + dockerhub_token: ${{ secrets.DOCKERHUB_TOKEN }} + # + # adds binaries to release + # https://github.com/goreleaser/goreleaser-action + release-python: + runs-on: ubuntu-latest + needs: + - tests + - create_release + timeout-minutes: 20 + environment: 'PyPi' + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Deploy release to PyPi + uses: ./.github/actions/python + with: + pypi_api_token: "${{ secrets.PYPI_API_TOKEN }}" \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 707f8a5..2c798d8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,6 +3,11 @@ ARG BASE_IMAGE_TAG=3.12-slim-python FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} +LABEL \ + org.opencontainers.image.title="bookstack-file-exporter" \ + org.opencontainers.image.description="Page asset and content exporter for Bookstack" \ + org.opencontainers.image.source="https://github.com/homeylab/bookstack-file-exporter" + # Get security updates and clean up apt cache for smaller size RUN apt update -y && apt upgrade -y && \ apt install dumb-init && \ diff --git a/README.md b/README.md index 76241ad..410f3ad 100644 --- a/README.md +++ b/README.md @@ -6,19 +6,20 @@ Table of Contents - [Use Case](#use-case) - [Using This Application](#using-this-application) - [Run via Pip](#run-via-pip) - - [Run Via Docker](#run-via-docker) + - [Run via Docker](#run-via-docker) - [Authentication](#authentication) - [Configuration](#configuration) - [Backup Behavior](#backup-behavior) - [General](#general) - [Images](#images) + - [Attachments](#attachments) - [Modify Markdown Files](#modify-markdown-files) - [Object Storage](#object-storage) - [Minio Backups](#minio-backups) - [Future Items](#future-items) ## Background -_Features are actively being developed. See `Future Items` section for more details. Open an issue for a feature request._ +_If you encounter any issues, want to request an additional feature, or provide assistance, feel free to open a Github issue._ This tool provides a way to export [Bookstack](https://github.com/BookStackApp/BookStack) pages and their content (_text, images, metadata, etc._) into a relational parent-child layout locally with an option to push to remote object storage locations. See [Backup Behavior](#backup-behavior) section for more details on how pages are organized. @@ -29,14 +30,13 @@ What it does: - Discover and build relationships between Bookstack `Shelves/Books/Chapters/Pages` to create a relational parent-child layout - Export Bookstack pages and their content to a `.tgz` archive -- Additional content for pages like their images and metadata and can be exported -- The exporter can also [Modify Markdown Files](#modify-markdown-files) to replace image links with local exported image paths for a more portable backup +- Additional content for pages like their images, attachments, and metadata and can be exported +- The exporter can also [Modify Markdown Files](#modify-markdown-files) to replace image and/or attachment links with local exported paths for a more portable backup - YAML configuration file for repeatable and easy runs - Can be run via [Python](#run-via-pip) or [Docker](#run-via-docker) - Can push archives to remote object storage like [Minio](https://min.io/) - Basic housekeeping option (`keep_last`) to keep a tidy archive destination - Supported backup targets are: 1. local @@ -77,6 +77,7 @@ formats: # md only example output_path: "bkps/" assets: export_images: false + export_attachments: false modify_markdown: false export_meta: false verify_ssl: true @@ -92,6 +93,9 @@ _Note: This application is tested and developed on Python version `3.12.X`. The ```bash python -m pip install bookstack-file-exporter +# if you prefer a specific version, example: +python -m pip install bookstack-file-exporter==X.X.X + # using pip python -m bookstack_file_exporter -c @@ -120,6 +124,15 @@ python -m bookstack_file_exporter -c ### Run via Docker Docker images are provided for `linux/amd64` and `linux/arm64` variants only at the moment. If another variant is required, please request it via Github Issue. +#### Tags +Users will generally want to use the `latest` tag or a specific version tag. The `main` tag is also provided but is not guaranteed to be stable. + +| tag | description | +| --- | ----------- | +| `latest` | Latest stable release and is updated with each new stable release. | +| `X.X.X` | Semantic versioned releases are also provided if preferred for stability or other reasons. | +| `main` | This tag reflects the `main` branch of this repository and may not be stable | + #### Examples ```bash # --user flag to override the uid/gid for created files. Set this to your uid/gid @@ -211,6 +224,7 @@ minio: output_path: "bkps/" assets: export_images: true + export_attachments: true modify_markdown: false export_meta: false verify_ssl: true @@ -231,6 +245,7 @@ More descriptions can be found for each section below: | `output_path` | `str` | `false` | Optional (default: `cwd`) which directory (relative or full path) to place exports. User who runs the command should have access to read/write to this directory. If not provided, will use current run directory by default | | `assets` | `object` | `false` | Optional section to export additional assets from pages. | | `assets.export_images` | `bool` | `false` | Optional (default: `false`), export all images for a page to an `image` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout | +| `assets.export_attachments` | `bool` | `false` | Optional (default: `false`), export all attachments for a page to an `attachments` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout | | `assets.modify_markdown` | `bool` | `false` | Optional (default: `false`), modify markdown files to replace image links with local exported image paths. This requires `assets.export_images` to be `true` in order to work. See [Modify Markdown Files](#modify-markdown-files) for more information. | `assets.export_meta` | `bool` | `false` | Optional (default: `false`), export of metadata about the page in a json file | | `assets.verify_ssl` | `bool` | `false` | Optional (default: `true`), whether or not to check ssl certificates when requesting content from Bookstack host | @@ -256,8 +271,12 @@ Backups are exported in `.tgz` format and generated based off timestamp. Export The exporter can also do housekeeping duties and keep a configured number of archives and delete older ones. See `keep_last` property in the [Configuration](#options-and-descriptions) section. Object storage provider configurations include their own `keep_last` property for flexibility. -For file names, `slug` names (from Bookstack API) are used, as such certain characters like `!`, `/` will be ignored and spaces replaced from page names/titles. +#### File Naming +For file names, `slug` names (from Bookstack API) are used, as such certain characters like `!`, `/` will be ignored and spaces replaced from page names/titles. If your page has an empty `slug` value for some reason (draft that was never fully saved), the exporter will use page name with the `slugify` function from Django to generate a valid slug. Example: `My Page.bin Name!` will be converted to `my-page-bin-name`. + +You may also notice some directories (books) and/or files (pages) in the archive have a random string at the end, example - `nKA`: `user-and-group-management-nKA`. This is expected and is because there were resources with the same name created in another shelve and bookstack adds a string at the end to ensure uniqueness. +#### Directory Layout All sub directories will be created as required during the export process. ``` Shelves --> Books --> Chapters --> Pages @@ -289,7 +308,7 @@ kafka-apps (shelf) ---> settings.md (page) ... -## Example with image layout +## Example with image and attachment layout # unassigned dir is used for books with no shelf unassigned (shelf) ---> test (book) @@ -300,12 +319,20 @@ unassigned (shelf) ---> rec-page ---> img-010.png ---> img-020.png + --> attachments (attachment_dir) + ---> test_page (page directory) + ---> something.config + ---> something_else.config + ---> rec-page + ---> test_output.log + ---> actual_output.log ---> test_page.md (page) ... ---> rec_page (page) ---> rec_page.md ---> rec_page.pdf ``` + Another example is shown below: ``` ## First example: @@ -320,13 +347,18 @@ bookstack_export_2023-11-28_06-24-25/programming/react/images/basics/dwwimage.pn bookstack_export_2023-11-28_06-24-25/programming/react/images/basics/NzZimage.png bookstack_export_2023-11-28_06-24-25/programming/react/images/nextjs/next1.png bookstack_export_2023-11-28_06-24-25/programming/react/images/nextjs/tips.png +bookstack_export_2023-11-28_06-24-25/programming/react/attachments/nextjs/sample.config +bookstack_export_2023-11-28_06-24-25/programming/react/attachments/nextjs/sample_output.log bookstack_export_2023-11-28_06-24-25/programming/react/nextjs.md bookstack_export_2023-11-28_06-24-25/programming/react/nextjs.pdf ``` Books without a shelf will be put in a shelve folder named `unassigned`. -Empty/New Pages will be ignored since they have not been modified yet from creation and are empty but also do not have a valid slug. Example: +#### Empty/New Pages +Empty/New Pages will be ignored since they have not been modified yet from creation and are empty but also do not have a valid slug. + +Example from Bookstack API: ``` { ... @@ -336,10 +368,7 @@ Empty/New Pages will be ignored since they have not been modified yet from creat } ``` -You may notice some directories (books) and/or files (pages) in the archive have a random string at the end, example - `nKA`: `user-and-group-management-nKA`. This is expected and is because there were resources with the same name created in another shelve and bookstack adds a string at the end to ensure uniqueness. - ### Images - Images will be dumped in a separate directory, `images` within the page parent (book/chapter) directory it belongs to. The relative path will be `{parent}/images/{page}/{image_name}`. As shown earlier: ``` @@ -351,22 +380,37 @@ bookstack_export_2023-11-28_06-24-25/programming/react/images/nextjs/tips.png **Note you may see old images in your exports. This is because, by default, Bookstack retains images/drawings that are uploaded even if no longer referenced on an active page. Admins can run `Cleanup Images` in the Maintenance Settings or via [CLI](https://www.bookstackapp.com/docs/admin/commands/#cleanup-unused-images) to remove them.** +### Attachments +Attachments will be dumped in a separate directory, `attachments` within the page parent (book/chapter) directory it belongs to. The relative path will be `{parent}/attachments/{page}/{attachment_name}`. As shown earlier: + +``` +bookstack_export_2023-11-28_06-24-25/programming/react/attachments/nextjs/sample.config +bookstack_export_2023-11-28_06-24-25/programming/react/attachments/nextjs/sample_package.json +... +... +``` + +**Note attachments that are just external links are ignored. Only attachments that are shown as `external: False` will be exported.** + +[Reference](https://demo.bookstackapp.com/api/docs#attachments-list) and excerpt from Bookstack API docs: +> Get a listing of attachments visible to the user. The external property indicates whether the attachment is simple a link. A false value for the external property would indicate a file upload. + ### Modify Markdown Files -**To use this feature, `assets.export_images` should be set to `true`** +**To use this feature, `assets.export_images` should be set to `true` and/or `assets.export_attachments`** -The configuration item, `assets.modify_markdown`, can be set to `true` to modify markdown files to replace image url links with local exported image paths. This feature allows for you to make your `markdown` exports much more portable. +The configuration item, `assets.modify_markdown`, can be set to `true` to modify markdown files to replace image and attachment url links with local exported image paths. This feature allows for you to make your `markdown` exports much more portable. -Page (parent) -> Images (Children) relationships are created and then each image url is replaced with its own respective local export path. Example: +Page (parent) -> Images (Children) relationships are created and then each image/attachment url is replaced with its own respective local export path. Example: ``` ## before [![pool-topology-1.png](https://demo.bookstack/uploads/images/gallery/2023-07/scaled-1680-/pool-topology-1.png)](https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png) ## after -[![pool-topology-1.png](./images/{page_name}/pool-topology-1.png)](https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png) +[![pool-topology-1.png](images/{page_name}/pool-topology-1.png)](https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png) ``` -This allows the image to be found locally within the export files and allow your `markdown` docs to have all the images display properly like it would normally would. +This allows the image or attachment to be found locally within the export files and allow your `markdown` docs to have all the assets display properly like it would normally would. -**Note: This will work properly if your pages are using the notation used by Bookstack for Markdown image links, example: ` [![image alt text](Bookstack Markdown image URL link)](anchor/url link)` The `(anchor/url link)` is optional.** +**Note: This will work properly if your pages are using the notation used by Bookstack for Markdown image links, example: ` [![image alt text](Bookstack Markdown image URL link)](anchor/url link)` The `(anchor/url link)` is optional. For attachments the format is: `[file](url link)`** ## Object Storage Optionally, target(s) can be specified to upload generated archives to a remote location. Supported object storage providers can be found below: @@ -408,7 +452,7 @@ minio: 1. ~~Be able to pull images locally and place in their respective page folders for a more complete file level backup.~~ 2. ~~Include the exporter in a maintained helm chart as an optional deployment. The helm chart is [here](https://github.com/homeylab/helm-charts/tree/main/charts/bookstack).~~ 3. ~~Be able to modify markdown links of images to local exported images in their respective page folders for a more complete file level backup.~~ -4. Be able to pull attachments locally and place in their respective page folders for a more complete file level backup. +4. ~~Be able to pull attachments locally and place in their respective page folders for a more complete file level backup.~~ 5. Export S3 and more options. 6. Filter shelves and books by name - for more targeted backups. Example: you only want to share a book about one topic with an external friend/user. 7. Be able to pull media/photos from 3rd party providers like `drawio` \ No newline at end of file diff --git a/bookstack_file_exporter/archiver/page_archiver.py b/bookstack_file_exporter/archiver/page_archiver.py index a284533..52e5d99 100644 --- a/bookstack_file_exporter/archiver/page_archiver.py +++ b/bookstack_file_exporter/archiver/page_archiver.py @@ -54,7 +54,7 @@ def __init__(self, archive_dir: str, config: ConfigNode) -> None: def _check_md_modify(self) -> bool: # check to ensure they have asset_config defined, could be None if 'markdown' in self.export_formats: - return self.asset_config.modify_markdown and self.export_images + return self.asset_config.modify_markdown and ( self.export_images or self.export_attachments) return False def archive_pages(self, page_nodes: Dict[int, Node]): diff --git a/examples/config.yml b/examples/config.yml index 2a12cb1..cca6602 100644 --- a/examples/config.yml +++ b/examples/config.yml @@ -29,6 +29,9 @@ assets: # optional export of all the images used in a page(s). # omit this or set to false if not needed export_images: false + # optional export of all the attachments used in a page(s). + # omit this or set to false if not needed + export_attachments: false # optional modify markdown files to replace image url links # with local exported image paths modify_markdown: false diff --git a/examples/minio_config.yml b/examples/minio_config.yml index 692cacd..3b36a08 100644 --- a/examples/minio_config.yml +++ b/examples/minio_config.yml @@ -58,6 +58,9 @@ assets: # optional export of all the images used in a page(s). # omit this or set to false if not needed export_images: false + # optional export of all the attachments used in a page(s). + # omit this or set to false if not needed + export_attachments: false # optional modify markdown files to replace image url links # with local exported image paths modify_markdown: false diff --git a/setup.cfg b/setup.cfg index cb011ac..dffaf0c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = bookstack-file-exporter -# version will be replaced by IMAGE_TAG in Makefile +# version will be replaced by IMAGE_TAG via Github Actions version = 0.0.1 author = pchang388 # author_email = your@email.address From ba1161edf4a93baeadb163a56a6581c0c8dd1341 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Tue, 23 Jan 2024 02:15:26 -0500 Subject: [PATCH 05/13] update readme for deployment --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 410f3ad..8bb6612 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ The main use case is to backup all docs in a relational directory-tree format to 2. Offline copy wanted. 3. Back up at a file level as an accessory or alternative to disk and volume backups. 4. Migrate all Bookstack page contents to Markdown documenting for simplicity. -5. Provide an easy way to do automated file backups locally, in docker, or kubernetes for Bookstack page contents. +5. Provide an easy way to do automated file backups locally, in docker, or [kubernetes](https://github.com/homeylab/helm-charts/tree/main/charts/bookstack) for Bookstack page contents. ## Using This Application Ensure a valid configuration is provided when running this application. See [Configuration](#Configuration) section for more details. From 56e5cd92c08961d6b832c9c2e49a28743352cedf Mon Sep 17 00:00:00 2001 From: pchang388 Date: Tue, 23 Jan 2024 02:16:11 -0500 Subject: [PATCH 06/13] update readme for deployment --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8bb6612..c52dde0 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ The main use case is to backup all docs in a relational directory-tree format to 2. Offline copy wanted. 3. Back up at a file level as an accessory or alternative to disk and volume backups. 4. Migrate all Bookstack page contents to Markdown documenting for simplicity. -5. Provide an easy way to do automated file backups locally, in docker, or [kubernetes](https://github.com/homeylab/helm-charts/tree/main/charts/bookstack) for Bookstack page contents. +5. Provide an easy way to do automated file backups locally, in docker, or [kubernetes](https://github.com/homeylab/helm-charts/tree/main/charts/bookstack#file-exporter-backup-your-pages) for Bookstack page contents. ## Using This Application Ensure a valid configuration is provided when running this application. See [Configuration](#Configuration) section for more details. From 714acad22ac519386280a94afe55da64c723a39d Mon Sep 17 00:00:00 2001 From: pchang388 Date: Tue, 23 Jan 2024 02:47:16 -0500 Subject: [PATCH 07/13] pylint changes --- Makefile | 2 +- .../archiver/asset_archiver.py | 67 +++++++++++++++---- .../archiver/page_archiver.py | 23 ++++--- bookstack_file_exporter/exporter/exporter.py | 3 +- bookstack_file_exporter/exporter/node.py | 2 +- 5 files changed, 71 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index 30db136..1877d5f 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ BASE_IMAGE_TAG=3.12-slim-bookworm IMAGE_NAME=homeylab/bookstack-file-exporter # keep this start sequence unique (IMAGE_TAG=) # github actions will use this to create a tag -IMAGE_TAG=1.0.2 +IMAGE_TAG=1.1.0 DOCKER_WORK_DIR=/export DOCKER_CONFIG_DIR=/export/config DOCKER_EXPORT_DIR=/export/dump diff --git a/bookstack_file_exporter/archiver/asset_archiver.py b/bookstack_file_exporter/archiver/asset_archiver.py index ac8dd66..71f2583 100644 --- a/bookstack_file_exporter/archiver/asset_archiver.py +++ b/bookstack_file_exporter/archiver/asset_archiver.py @@ -1,9 +1,9 @@ +import logging +import base64 from typing import Union, List, Dict +from re import sub as re_sub # pylint: disable=import-error from requests import Response -from re import sub as re_sub -import logging -import base64 from bookstack_file_exporter.common import util as common_util @@ -14,12 +14,20 @@ class AssetNode: + """ + Base class for other asset nodes. This class should not be used directly. + + Args: + :meta_data: = asset meta data + + Returns: + AssetNode instance for use in other classes + """ def __init__(self, meta_data: Dict[str, int | str | bool]): self.id: int = meta_data['id'] self.page_id: int = meta_data['uploaded_to'] - # self.page_name: str = page_name - self.url: str = meta_data['url'] - self.name: str = self.url.split('/')[-1] + self.url: str = "" + self.name: str = "" self._markdown_str = "" self._relative_path_prefix: str = "" @@ -33,6 +41,7 @@ def markdown_str(self): return self._markdown_str def set_markdown_content(self, asset_data: Dict[str, int | str | bool]) -> None: + """set markdown url str to replace""" self._markdown_str = self._get_md_url_str(asset_data) @staticmethod @@ -49,20 +58,39 @@ def _get_md_url_str(asset_data: Dict[str, Union[int, str]]) -> str: return url_str[url_str.find("(")+1:url_str.find(")")] class ImageNode(AssetNode): + """ + ImageNode handles image meta data and markdown url replacement. + + Args: + :meta_data: = image meta data + + Returns: + ImageNode instance for use in archiving images for a page + """ def __init__(self, meta_data: Dict[str, Union[int, str]]): super().__init__(meta_data) - log.debug(self.url) + self.url: str = meta_data['url'] + self.name: str = self.url.split('/')[-1] + log.debug("Image node has generated url: %s", self.url) self._relative_path_prefix = f"{_IMAGE_DIR_NAME}" class AttachmentNode(AssetNode): + """ + AttachmentNode handles attachment meta data and markdown url replacement. + + Args: + :meta_data: = attachment meta data + :base_url: = base url for attachment download + + Returns: + AttachmentNode instance for use in archiving attachments for a page + """ def __init__(self, meta_data: Dict[str, Union[int, str, bool]], base_url: str): - self.id: int = meta_data['id'] - self.page_id: int = meta_data['uploaded_to'] + super().__init__(meta_data) self.url: str = f"{base_url}/{self.id}" - log.debug(self.url) self.name = meta_data['name'] - self._markdown_str = "" + log.debug("Attachment node has generated url: %s", self.url) self._relative_path_prefix = f"{_ATTACHMENT_DIR_NAME}" @staticmethod @@ -79,6 +107,17 @@ def _get_md_url_str(asset_data: Dict[str, int | str | dict]) -> str: return url_str[url_str.find("(")+1:url_str.find(")")] class AssetArchiver: + """ + AssetArchiver handles image and attachment exports for a page. + + Args: + :urls: = api urls for images and attachments + :headers: = http headers for api requests + :verify_ssl: = verify ssl for api requests + + Returns: + AssetArchiver instance for use in archiving images and attachments for a page + """ def __init__(self, urls: Dict[str, str], headers: Dict[str, str], verify_ssl: bool): self.api_urls = urls @@ -118,7 +157,7 @@ def get_asset_bytes(self, asset_type: str, url: str) -> bytes: case "images": asset_data = asset_response.content case "attachments": - asset_data = self.decode_attachment_data(asset_response.json()['content']) + asset_data = self._decode_attachment_data(asset_response.json()['content']) return asset_data def update_asset_links(self, asset_type, page_name: str, page_data: bytes, @@ -158,9 +197,9 @@ def _create_attachment_map(self, else: asset_nodes[asset_node.page_id] = [asset_node] return asset_nodes - + @staticmethod - def decode_attachment_data(b64encoded_data: str) -> bytes: + def _decode_attachment_data(b64encoded_data: str) -> bytes: """decode base64 encoded data""" asset_data = b64encoded_data.encode() return base64.b64decode(asset_data) diff --git a/bookstack_file_exporter/archiver/page_archiver.py b/bookstack_file_exporter/archiver/page_archiver.py index 52e5d99..4398e27 100644 --- a/bookstack_file_exporter/archiver/page_archiver.py +++ b/bookstack_file_exporter/archiver/page_archiver.py @@ -4,7 +4,6 @@ from bookstack_file_exporter.archiver import util as archiver_util from bookstack_file_exporter.archiver.asset_archiver import AssetArchiver, ImageNode, AttachmentNode from bookstack_file_exporter.config_helper.config_helper import ConfigNode -from bookstack_file_exporter.common import util as common_util _META_FILE_SUFFIX = "_meta.json" _TAR_SUFFIX = ".tar" @@ -54,9 +53,10 @@ def __init__(self, archive_dir: str, config: ConfigNode) -> None: def _check_md_modify(self) -> bool: # check to ensure they have asset_config defined, could be None if 'markdown' in self.export_formats: - return self.asset_config.modify_markdown and ( self.export_images or self.export_attachments) + return self.asset_config.modify_markdown and \ + ( self.export_images or self.export_attachments) return False - + def archive_pages(self, page_nodes: Dict[int, Node]): """export page contents and their images/attachments""" # get assets first if requested @@ -73,16 +73,16 @@ def archive_pages(self, page_nodes: Dict[int, Node]): for export_format in self.export_formats: page_data = self._get_page_data(page.id_, export_format) if page_images and export_format == 'markdown': - page_data = self._modify_markdown("images", page.name, + page_data = self._modify_markdown("images", page.name, page_data, page_images) if page_attachments and export_format == 'markdown': - page_data = self._modify_markdown("attachments", page.name, + page_data = self._modify_markdown("attachments", page.name, page_data, page_attachments) self._archive_page(page, export_format, page_data) - self.archive_page_assets("images", page.parent.file_path, + self.archive_page_assets("images", page.parent.file_path, page.name, page_images) - self.archive_page_assets("attachments", page.parent.file_path, + self.archive_page_assets("attachments", page.parent.file_path, page.name, page_attachments) if self.asset_config.export_meta: self._archive_page_meta(page.file_path, page.meta) @@ -101,7 +101,7 @@ def _archive_page_meta(self, page_path: str, meta_data: Dict[str, Union[str, int meta_file_name = f"{self.archive_base_path}/{page_path}{_FILE_EXTENSION_MAP['meta']}" bytes_meta = archiver_util.get_json_bytes(meta_data) self.write_data(file_path=meta_file_name, data=bytes_meta) - + def _get_image_meta(self) -> Dict[int, List[ImageNode]]: """Get all image metadata into a {page_number: [image_url]} format""" if not self.asset_config.export_images: @@ -156,7 +156,12 @@ def export_images(self) -> bool: """return whether or not to export images""" return self.asset_config.export_images + @property + def export_attachments(self) -> bool: + """return whether or not to export attachments""" + return self.asset_config.export_attachments + @property def verify_ssl(self) -> bool: """return whether or not to verify ssl for http requests""" - return self.asset_config.verify_ssl \ No newline at end of file + return self.asset_config.verify_ssl diff --git a/bookstack_file_exporter/exporter/exporter.py b/bookstack_file_exporter/exporter/exporter.py index 910c30c..52702f2 100644 --- a/bookstack_file_exporter/exporter/exporter.py +++ b/bookstack_file_exporter/exporter/exporter.py @@ -156,7 +156,8 @@ def get_all_pages(self, book_nodes: Dict[int, Node]) -> Dict[int, Node]: # add `page` flag, we only want pages # filter out chapters for now # chapters can have their own children/pages - page_nodes: Dict[int, Node] = self.get_child_nodes("pages", book_nodes, node_type="page") + page_nodes: Dict[int, Node] = self.get_child_nodes("pages", + book_nodes, node_type="page") ## chapters (if exists) # chapter nodes are treated a little differently # chapters are children under books diff --git a/bookstack_file_exporter/exporter/node.py b/bookstack_file_exporter/exporter/node.py index ef4a114..6fa5a86 100644 --- a/bookstack_file_exporter/exporter/node.py +++ b/bookstack_file_exporter/exporter/node.py @@ -116,4 +116,4 @@ def slugify(value: str, allow_unicode=False): .decode("ascii") ) value = re_sub(r"[^\w\s-]", "", value.lower()) - return re_sub(r"[-\s]+", "-", value).strip("-_") \ No newline at end of file + return re_sub(r"[-\s]+", "-", value).strip("-_") From d9f3e183bc7b73721cdef37dc87e639ac8a6a6e3 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Tue, 23 Jan 2024 02:48:27 -0500 Subject: [PATCH 08/13] update readme --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index c52dde0..e118dcb 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,6 @@ docker run \ homeylab/bookstack-file-exporter:latest ``` - #### Environment Variables See [Valid Environment Variables](#valid-environment-variables) for more options. From 76461251cb938a39a5c0da249365737e4bcc5d1e Mon Sep 17 00:00:00 2001 From: pchang388 Date: Tue, 23 Jan 2024 02:53:11 -0500 Subject: [PATCH 09/13] fix new github actions --- .github/actions/python/action.yml | 4 ++++ .github/actions/tests/action.yml | 2 ++ .github/workflows/on_release.yml | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/actions/python/action.yml b/.github/actions/python/action.yml index db7c92b..afef031 100644 --- a/.github/actions/python/action.yml +++ b/.github/actions/python/action.yml @@ -12,21 +12,25 @@ runs: using: composite steps: - name: Get tag release without v + shell: bash run: | TAG=${{ github.ref_name }} echo "VERSION=${TAG#v}" >> "$GITHUB_ENV" echo "Tag without v is: ${VERSION}" - name: Update Release Tag + shell: bash run: sed -i "s/^version = [^ ]*/version = ${{ env.VERSION }}/" setup.cfg - name: Set up Python uses: actions/setup-python@v3 with: python-version: '3.12.1' - name: Install Dependencies + shell: bash run: | python -m pip install --upgrade pip pip install build - name: Build Python Package + shell: bash run: | python -m pip install --upgrade build python -m build diff --git a/.github/actions/tests/action.yml b/.github/actions/tests/action.yml index ad5b369..3606371 100644 --- a/.github/actions/tests/action.yml +++ b/.github/actions/tests/action.yml @@ -10,9 +10,11 @@ runs: with: python-version: '3.12.1' - name: Install dependencies + shell: bash run: | python -m pip install --upgrade pip pip install pylint - name: Analysing the code with pylint + shell: bash run: | pylint $(git ls-files '*.py') \ No newline at end of file diff --git a/.github/workflows/on_release.yml b/.github/workflows/on_release.yml index ac259c4..b60bf66 100644 --- a/.github/workflows/on_release.yml +++ b/.github/workflows/on_release.yml @@ -21,7 +21,7 @@ jobs: runs-on: ubuntu-latest needs: tests permissions: - contents: write + contents: write steps: - name: Checkout uses: actions/checkout@v4 From 0b9073edba341d1f54fec37491a6c0d24c0882ad Mon Sep 17 00:00:00 2001 From: pchang388 Date: Tue, 23 Jan 2024 02:57:30 -0500 Subject: [PATCH 10/13] fix new github actions --- .github/workflows/on_pr_merged.yml | 2 +- .github/workflows/on_pr_open.yml | 2 +- .github/workflows/on_release.yml | 3 --- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/on_pr_merged.yml b/.github/workflows/on_pr_merged.yml index 7c1c40d..d270180 100644 --- a/.github/workflows/on_pr_merged.yml +++ b/.github/workflows/on_pr_merged.yml @@ -13,7 +13,7 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - - name: Go Tests + - name: Python Tests uses: ./.github/actions/tests # push to `main` image for testing/most up to date docker-build: diff --git a/.github/workflows/on_pr_open.yml b/.github/workflows/on_pr_open.yml index 2782bf9..6a776c3 100644 --- a/.github/workflows/on_pr_open.yml +++ b/.github/workflows/on_pr_open.yml @@ -14,5 +14,5 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - - name: Go Tests + - name: Python Tests uses: ./.github/actions/tests \ No newline at end of file diff --git a/.github/workflows/on_release.yml b/.github/workflows/on_release.yml index b60bf66..aa9aa72 100644 --- a/.github/workflows/on_release.yml +++ b/.github/workflows/on_release.yml @@ -46,9 +46,6 @@ jobs: with: dockerhub_username: ${{ secrets.DOCKERHUB_USERNAME }} dockerhub_token: ${{ secrets.DOCKERHUB_TOKEN }} - # - # adds binaries to release - # https://github.com/goreleaser/goreleaser-action release-python: runs-on: ubuntu-latest needs: From d794eaee6ab523ea252ea267803080a78f15072b Mon Sep 17 00:00:00 2001 From: pchang388 Date: Tue, 23 Jan 2024 03:12:37 -0500 Subject: [PATCH 11/13] fix dockerfile image tag --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 2c798d8..2374c23 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ ARG BASE_IMAGE=python -ARG BASE_IMAGE_TAG=3.12-slim-python +ARG BASE_IMAGE_TAG=3.12.1-slim-bookworm FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} From 39695493c7cd47981e724619ac97d058686262e8 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Tue, 23 Jan 2024 03:30:24 -0500 Subject: [PATCH 12/13] fix dockerfile image args --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2374c23..c368f25 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,9 +16,9 @@ RUN apt update -y && apt upgrade -y && \ # create docker user RUN useradd -M -s /usr/sbin/nologin -u 33333 exporter -ARG DOCKER_WORK_DIR -ARG DOCKER_CONFIG_DIR -ARG DOCKER_EXPORT_DIR +ARG DOCKER_WORK_DIR=/export +ARG DOCKER_CONFIG_DIR=/export/config +ARG DOCKER_EXPORT_DIR=/export/dump ENV DOCKER_CONFIG_DIR=${DOCKER_CONFIG_DIR} ENV DOCKER_EXPORT_DIR=${DOCKER_EXPORT_DIR} From 96912706dd9e3d96c719154f467dbdbf7c72a5d3 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Tue, 23 Jan 2024 04:50:54 -0500 Subject: [PATCH 13/13] fix release tag versioning for docker --- .github/actions/docker/action.yml | 3 ++- Makefile | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/actions/docker/action.yml b/.github/actions/docker/action.yml index ed9545c..3fa3194 100644 --- a/.github/actions/docker/action.yml +++ b/.github/actions/docker/action.yml @@ -47,7 +47,8 @@ runs: ## on merge to master - update `main` tag for testing before release type=ref,event=branch ## on release - for use by users - type=semver,pattern={{major}}.{{minor}} + ## version ; shorthand for {{major}}.{{minor}}.{{patch}} (can include pre-release) + type=semver,pattern={{ version }} - name: Set up QEMU uses: docker/setup-qemu-action@v3 diff --git a/Makefile b/Makefile index 1877d5f..4eebcfd 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ BASE_IMAGE_TAG=3.12-slim-bookworm IMAGE_NAME=homeylab/bookstack-file-exporter # keep this start sequence unique (IMAGE_TAG=) # github actions will use this to create a tag -IMAGE_TAG=1.1.0 +IMAGE_TAG=main DOCKER_WORK_DIR=/export DOCKER_CONFIG_DIR=/export/config DOCKER_EXPORT_DIR=/export/dump