homeylab · pchang388 · Dec 19, 2023 · Dec 19, 2023
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -7,7 +7,7 @@
 	"features": {
 		"ghcr.io/devcontainers/features/python:1": {
 			"installTools": true,
-			"version": "3.12"
+			"version": "3.12.1"
 		}
 	},
 	"customizations": {

diff --git a/Makefile b/Makefile
@@ -4,7 +4,7 @@ BASE_IMAGE_TAG=3.12-slim-bookworm
 IMAGE_NAME=homeylab/bookstack-file-exporter
 # keep this start sequence unique (IMAGE_TAG=)
 # github actions will use this to create a tag
-IMAGE_TAG=1.0.1
+IMAGE_TAG=1.0.2
 DOCKER_WORK_DIR=/export
 DOCKER_CONFIG_DIR=/export/config
 DOCKER_EXPORT_DIR=/export/dump

diff --git a/bookstack_file_exporter/archiver/page_archiver.py b/bookstack_file_exporter/archiver/page_archiver.py
@@ -52,15 +52,15 @@ def get_image_relative_path(self, page_name: str) -> str:
         """return image path local to page directory"""
         return f"{self._relative_path_prefix}/{page_name}/{self.name}"
 
+    def set_markdown_content(self, img_details: Dict[str, Union[int, str]]):
+        """provide image metadata to set markdown properties"""
+        self._markdown_str = self._get_md_url_str(img_details)
+
     @property
     def markdown_str(self):
         """return markdown url str to replace"""
         return self._markdown_str
 
-    def set_markdown_content(self, img_details: Dict[str, Union[int, str]]):
-        """provide image metadata to set markdown properties"""
-        self._markdown_str = self._get_md_url_str(img_details)
-
     @staticmethod
     def _get_md_url_str(img_data: Dict[str, Union[int, str]]) -> str:
         url_str = ""
@@ -70,6 +70,8 @@ def _get_md_url_str(img_data: Dict[str, Union[int, str]]) -> str:
         # check to see if empty before doing find
         if not url_str:
             return ""
+        # find the link between two parenthesis
+        # - markdown format
         return url_str[url_str.find("(")+1:url_str.find(")")]
 
 # pylint: disable=too-many-instance-attributes

diff --git a/bookstack_file_exporter/common/util.py b/bookstack_file_exporter/common/util.py
@@ -26,6 +26,15 @@ def http_get_request(url: str, headers: Dict[str, str],
     except Exception as req_err:
         log.error("Failed to make request for %s", url)
         raise req_err
+    try:
+        #raise_for_status() throws an exception on codes 400-599
+        response.raise_for_status()
+    except requests.exceptions.HTTPError as e:
+        # this means it either exceeded 50X retries in `http_get_request` handler
+        # or it returned a 40X which is not expected
+        log.error("Bookstack request failed with status code: %d on url: %s",
+                   response.status_code, url)
+        raise e
     return response
 
 def should_verify(url: str) -> str:

diff --git a/bookstack_file_exporter/config_helper/config_helper.py b/bookstack_file_exporter/config_helper/config_helper.py
@@ -135,6 +135,7 @@ def _generate_urls(self) -> Dict[str, str]:
             url_prefix = ""
         for key, value in _API_PATHS.items():
             urls[key] = f"{url_prefix}{self.user_inputs.host}/{value}"
+        log.debug("api urls: %s", urls)
         return urls
 
     def _set_base_dir(self, cmd_output_dir: str) -> str:

diff --git a/bookstack_file_exporter/exporter/exporter.py b/bookstack_file_exporter/exporter/exporter.py
@@ -1,6 +1,9 @@
 from typing import Dict, List, Union
 import logging
 
+# pylint: disable=import-error
+from requests import Response
+
 from bookstack_file_exporter.exporter.node import Node
 from bookstack_file_exporter.common import util
 
@@ -35,7 +38,7 @@ def get_all_shelves(self) -> Dict[int, Node]:
 
     def _get_json_response(self, url: str) -> List[Dict[str, Union[str,int]]]:
         """get http response data in json format"""
-        response = util.http_get_request(url=url, headers=self.headers,
+        response: Response = util.http_get_request(url=url, headers=self.headers,
                                         verify_ssl=self.verify_ssl)
         return response.json()
 
@@ -77,42 +80,52 @@ def _get_chapters(self, base_url: str, all_chapters: List[int],
         return chapter_nodes
 
     def get_child_nodes(self, resource_type: str, parent_nodes: Dict[int, Node],
-                        filter_empty: bool = True) -> Dict[int, Node]:
+                        filter_empty: bool = True, node_type: str = "") -> Dict[int, Node]:
         """get child nodes from a book/chapter/shelf"""
         base_url = self.api_urls[resource_type]
-        return self._get_children(base_url, parent_nodes, filter_empty)
+        return self._get_children(base_url, parent_nodes, filter_empty, node_type)
 
     def _get_children(self, base_url: str, parent_nodes: Dict[int, Node],
-                       filter_empty: bool) -> Dict[int, Node]:
+                       filter_empty: bool, node_type: str = "") -> Dict[int, Node]:
         child_nodes = {}
         for _, parent in parent_nodes.items():
             if parent.children:
                 for child in parent.children:
+                    if node_type:
+                        # only used for Book Nodes to get children Page/Chapter Nodes
+                        # access key directly, don't create a Node if not needed
+                        # chapters and pages always have `type` from what I can tell
+                        if not child['type'] == node_type:
+                            log.debug("Book Node child of type: %s is not desired type: %s",
+                                       child['type'], node_type)
+                            continue
                     child_id = child['id']
                     child_url = f"{base_url}/{child_id}"
                     child_data = self._get_json_response(child_url)
                     child_node = Node(child_data, parent)
                     if filter_empty:
+                        # if it is not empty, add it
+                        # skip it if empty
                         if not child_node.empty:
                             child_nodes[child_id] = child_node
                     else:
                         child_nodes[child_id] = child_node
         return child_nodes
 
-    def get_unassigned_books(self, existing_resources: Dict[int, Node],
+    def get_unassigned_books(self, existing_books: Dict[int, Node],
                               path_prefix: str) -> Dict[int, Node]:
         """get books not under a shelf"""
-        base_url = self.api_urls["books"]
-        all_resources: List[int] = self._get_all_ids(base_url)
+        book_url = self.api_urls["books"]
+        all_books: List[int] = self._get_all_ids(book_url)
         unassigned = []
-        # get all existing ones and compare against current known resources
-        for resource_id in all_resources:
-            if resource_id not in existing_resources:
-                unassigned.append(resource_id)
+        # get all existing ones and compare against current known books
+        for book in all_books:
+            if book not in existing_books:
+                unassigned.append(book)
         if not unassigned:
             return {}
         # books with no shelf treated like a parent resource
-        return self._get_parents(base_url, unassigned, path_prefix)
+        return self._get_parents(book_url, unassigned, path_prefix)
 
     # convenience function
     def get_all_books(self, shelve_nodes: Dict[int, Node], unassigned_dir: str) -> Dict[int, Node]:
@@ -140,7 +153,10 @@ def get_all_pages(self, book_nodes: Dict[int, Node]) -> Dict[int, Node]:
         ## pages
         page_nodes = {}
         if book_nodes:
-            page_nodes: Dict[int, Node] = self.get_child_nodes("pages", book_nodes)
+            # add `page` flag, we only want pages
+            # filter out chapters for now
+            # chapters can have their own children/pages
+            page_nodes: Dict[int, Node] = self.get_child_nodes("pages", book_nodes, node_type="page")
         ## chapters (if exists)
         # chapter nodes are treated a little differently
         # chapters are children under books