diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 82d6346..14b78b0 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -7,7 +7,7 @@ "features": { "ghcr.io/devcontainers/features/python:1": { "installTools": true, - "version": "3.12.4" + "version": "3.13.0" } }, "customizations": { diff --git a/.github/actions/python/action.yml b/.github/actions/python/action.yml index 677c051..e101b14 100644 --- a/.github/actions/python/action.yml +++ b/.github/actions/python/action.yml @@ -23,7 +23,7 @@ runs: - name: Set up Python uses: actions/setup-python@v3 with: - python-version: '3.12.4' + python-version: '3.13.0' - name: Install Dependencies shell: bash run: | diff --git a/.github/actions/tests/action.yml b/.github/actions/tests/action.yml index 3b7ed0c..97805a3 100644 --- a/.github/actions/tests/action.yml +++ b/.github/actions/tests/action.yml @@ -8,7 +8,7 @@ runs: - name: Set up Python uses: actions/setup-python@v3 with: - python-version: '3.12.4' + python-version: '3.13.0' - name: Install dependencies shell: bash run: | diff --git a/Dockerfile b/Dockerfile index c493943..fd258fa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ ARG BASE_IMAGE=python -ARG BASE_IMAGE_TAG=3.12.4-slim-bookworm +ARG BASE_IMAGE_TAG=3.13.0-slim-bookworm FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} diff --git a/Makefile b/Makefile index 93ae354..3eef2ba 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ ## DOCKER BUILD VARS BASE_IMAGE=python -BASE_IMAGE_TAG=3.12.4-slim-bookworm +BASE_IMAGE_TAG=3.13.0-slim-bookworm IMAGE_NAME=homeylab/bookstack-file-exporter # keep this start sequence unique (IMAGE_TAG=) # github actions will use this to create a tag @@ -19,6 +19,9 @@ build: python -m pip install --upgrade build python -m build +lint: + pylint bookstack_file_exporter + upload_testpypi: python -m pip install --upgrade twine python -m twine upload --repository testpypi dist/* @@ -27,6 +30,16 @@ upload_testpypi: download_testpypi: python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple bookstack-file-exporter +docker_build_simple: + docker build \ + --build-arg BASE_IMAGE=${BASE_IMAGE} \ + --build-arg BASE_IMAGE_TAG=${BASE_IMAGE_TAG} \ + --build-arg DOCKER_WORK_DIR=${DOCKER_WORK_DIR} \ + --build-arg DOCKER_CONFIG_DIR=${DOCKER_CONFIG_DIR} \ + --build-arg DOCKER_EXPORT_DIR=${DOCKER_EXPORT_DIR} \ + -t ${IMAGE_NAME}:${IMAGE_TAG} \ + --no-cache . + docker_build: docker buildx build \ --platform linux/amd64,linux/arm64 \ diff --git a/bookstack_file_exporter/archiver/asset_archiver.py b/bookstack_file_exporter/archiver/asset_archiver.py index 71f2583..dbea2bf 100644 --- a/bookstack_file_exporter/archiver/asset_archiver.py +++ b/bookstack_file_exporter/archiver/asset_archiver.py @@ -24,7 +24,7 @@ class AssetNode: AssetNode instance for use in other classes """ def __init__(self, meta_data: Dict[str, int | str | bool]): - self.id: int = meta_data['id'] + self.id_: int = meta_data['id'] self.page_id: int = meta_data['uploaded_to'] self.url: str = "" self.name: str = "" @@ -88,7 +88,7 @@ class AttachmentNode(AssetNode): def __init__(self, meta_data: Dict[str, Union[int, str, bool]], base_url: str): super().__init__(meta_data) - self.url: str = f"{base_url}/{self.id}" + self.url: str = f"{base_url}/{self.id_}" self.name = meta_data['name'] log.debug("Attachment node has generated url: %s", self.url) self._relative_path_prefix = f"{_ATTACHMENT_DIR_NAME}" @@ -140,7 +140,7 @@ def get_asset_nodes(self, asset_type: str) -> Dict[str, ImageNode | AttachmentNo def get_asset_data(self, asset_type: str, meta_data: Union[AttachmentNode, ImageNode]) -> Dict[str, str | bool | int | dict]: """Get asset data based on type""" - data_url = f"{self.api_urls[asset_type]}/{meta_data.id}" + data_url = f"{self.api_urls[asset_type]}/{meta_data.id_}" asset_data_response: Response = common_util.http_get_request( data_url, self._headers, @@ -164,6 +164,7 @@ def update_asset_links(self, asset_type, page_name: str, page_data: bytes, asset_nodes: List[ImageNode | AttachmentNode]) -> bytes: """update markdown links in page data""" for asset_node in asset_nodes: + # get metadata instead of raw data/bytes asset_data = self.get_asset_data(asset_type, asset_node) asset_node.set_markdown_content(asset_data) if not asset_node.markdown_str: diff --git a/bookstack_file_exporter/archiver/page_archiver.py b/bookstack_file_exporter/archiver/page_archiver.py index 4398e27..84ba201 100644 --- a/bookstack_file_exporter/archiver/page_archiver.py +++ b/bookstack_file_exporter/archiver/page_archiver.py @@ -1,10 +1,14 @@ from typing import Union, List, Dict - +import logging +# pylint: disable=import-error +from requests.exceptions import HTTPError from bookstack_file_exporter.exporter.node import Node from bookstack_file_exporter.archiver import util as archiver_util from bookstack_file_exporter.archiver.asset_archiver import AssetArchiver, ImageNode, AttachmentNode from bookstack_file_exporter.config_helper.config_helper import ConfigNode +log = logging.getLogger(__name__) + _META_FILE_SUFFIX = "_meta.json" _TAR_SUFFIX = ".tar" _TAR_GZ_SUFFIX = ".tgz" @@ -70,6 +74,19 @@ def archive_pages(self, page_nodes: Dict[int, Node]): page_images = image_nodes[page.id_] if page.id_ in attachment_nodes: page_attachments = attachment_nodes[page.id_] + failed_images = self.archive_page_assets("images", page.parent.file_path, + page.name, page_images) + failed_attach = self.archive_page_assets("attachments", page.parent.file_path, + page.name, page_attachments) + # exclude from page_images + # so it doesn't attempt to get modified in markdown file + if failed_images: + page_images = [img for img in page_images if img.id_ not in failed_images] + # exclude from page_attachments + # so it doesn't attempt to get modified in markdown file + if failed_attach: + page_attachments = [attach for attach in page_attachments + if attach.id_ not in failed_attach] for export_format in self.export_formats: page_data = self._get_page_data(page.id_, export_format) if page_images and export_format == 'markdown': @@ -80,10 +97,6 @@ def archive_pages(self, page_nodes: Dict[int, Node]): page_data, page_attachments) self._archive_page(page, export_format, page_data) - self.archive_page_assets("images", page.parent.file_path, - page.name, page_images) - self.archive_page_assets("attachments", page.parent.file_path, - page.name, page_attachments) if self.asset_config.export_meta: self._archive_page_meta(page.file_path, page.meta) @@ -123,15 +136,28 @@ def _modify_markdown(self, asset_type: str, asset_nodes) def archive_page_assets(self, asset_type: str, parent_path: str, page_name: str, - asset_nodes: List[ImageNode | AttachmentNode]): + asset_nodes: List[ImageNode | AttachmentNode]) -> Dict[int, int]: """pull images locally into a directory based on page""" if not asset_nodes: - return + return {} + # use a map for faster lookup + failed_assets = {} node_base_path = f"{self.archive_base_path}/{parent_path}/" for asset_node in asset_nodes: - asset_data = self.asset_archiver.get_asset_bytes(asset_type, asset_node.url) + try: + asset_data = self.asset_archiver.get_asset_bytes(asset_type, asset_node.url) + except HTTPError: + # probably unnecessary, but just in case + if asset_node.id_ not in failed_assets: + failed_assets[asset_node.id_] = 0 + # a 404 or other error occurred + # skip this asset + log.error("Failed to get image or attachment data " \ + "for asset located at: %s - skipping", asset_node.url) + continue asset_path = f"{node_base_path}/{asset_node.get_relative_path(page_name)}" self.write_data(asset_path, asset_data) + return failed_assets def write_data(self, file_path: str, data: bytes): """write data to a tar file diff --git a/bookstack_file_exporter/exporter/node.py b/bookstack_file_exporter/exporter/node.py index 6fa5a86..5abe41d 100644 --- a/bookstack_file_exporter/exporter/node.py +++ b/bookstack_file_exporter/exporter/node.py @@ -38,6 +38,7 @@ def __init__(self, meta: Dict[str, Union[str, int]], # for convenience/usage for exporter # self.name: str = self.meta['slug'] self.name = self.get_name(self.meta['slug'], self.meta['name']) + # id() is a built-in function and should not be used as a variable name self.id_: int = self.meta['id'] self._display_name = self.meta['name'] # children diff --git a/bookstack_file_exporter/run.py b/bookstack_file_exporter/run.py index 2ad787c..f63a215 100644 --- a/bookstack_file_exporter/run.py +++ b/bookstack_file_exporter/run.py @@ -53,4 +53,5 @@ def exporter(args: argparse.Namespace): # clean up the .tgz archive since it is already uploaded archive.clean_up() + log.info("Created file archive: %s.tgz", archive.archive_dir) log.info("Completed run") diff --git a/setup.cfg b/setup.cfg index ef0c2bf..2cc3992 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,10 +17,10 @@ classifiers = [options] python_requires = >=3.8 install_requires = - Pyyaml >= 6.0.1 # https://pypi.org/project/PyYAML/ - Pydantic >= 2.8.2 # https://docs.pydantic.dev/latest/ + Pyyaml >= 6.0.2 # https://pypi.org/project/PyYAML/ + Pydantic >= 2.9.2 # https://docs.pydantic.dev/latest/ requests >= 2.32.3 # https://pypi.org/project/requests/ - minio >= 7.2.7 # https://pypi.org/project/minio/ + minio >= 7.2.10 # https://pypi.org/project/minio/ packages = find: [options.entry_points]