Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"features": {
"ghcr.io/devcontainers/features/python:1": {
"installTools": true,
"version": "3.12"
"version": "3.12.1"
}
},
"customizations": {
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ BASE_IMAGE_TAG=3.12-slim-bookworm
IMAGE_NAME=homeylab/bookstack-file-exporter
# keep this start sequence unique (IMAGE_TAG=)
# github actions will use this to create a tag
IMAGE_TAG=1.0.1
IMAGE_TAG=1.0.2
DOCKER_WORK_DIR=/export
DOCKER_CONFIG_DIR=/export/config
DOCKER_EXPORT_DIR=/export/dump
Expand Down
10 changes: 6 additions & 4 deletions bookstack_file_exporter/archiver/page_archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,15 @@ def get_image_relative_path(self, page_name: str) -> str:
"""return image path local to page directory"""
return f"{self._relative_path_prefix}/{page_name}/{self.name}"

def set_markdown_content(self, img_details: Dict[str, Union[int, str]]):
"""provide image metadata to set markdown properties"""
self._markdown_str = self._get_md_url_str(img_details)

@property
def markdown_str(self):
"""return markdown url str to replace"""
return self._markdown_str

def set_markdown_content(self, img_details: Dict[str, Union[int, str]]):
"""provide image metadata to set markdown properties"""
self._markdown_str = self._get_md_url_str(img_details)

@staticmethod
def _get_md_url_str(img_data: Dict[str, Union[int, str]]) -> str:
url_str = ""
Expand All @@ -70,6 +70,8 @@ def _get_md_url_str(img_data: Dict[str, Union[int, str]]) -> str:
# check to see if empty before doing find
if not url_str:
return ""
# find the link between two parenthesis
# - markdown format
return url_str[url_str.find("(")+1:url_str.find(")")]

# pylint: disable=too-many-instance-attributes
Expand Down
9 changes: 9 additions & 0 deletions bookstack_file_exporter/common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ def http_get_request(url: str, headers: Dict[str, str],
except Exception as req_err:
log.error("Failed to make request for %s", url)
raise req_err
try:
#raise_for_status() throws an exception on codes 400-599
response.raise_for_status()
except requests.exceptions.HTTPError as e:
# this means it either exceeded 50X retries in `http_get_request` handler
# or it returned a 40X which is not expected
log.error("Bookstack request failed with status code: %d on url: %s",
response.status_code, url)
raise e
return response

def should_verify(url: str) -> str:
Expand Down
1 change: 1 addition & 0 deletions bookstack_file_exporter/config_helper/config_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def _generate_urls(self) -> Dict[str, str]:
url_prefix = ""
for key, value in _API_PATHS.items():
urls[key] = f"{url_prefix}{self.user_inputs.host}/{value}"
log.debug("api urls: %s", urls)
return urls

def _set_base_dir(self, cmd_output_dir: str) -> str:
Expand Down
42 changes: 29 additions & 13 deletions bookstack_file_exporter/exporter/exporter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from typing import Dict, List, Union
import logging

# pylint: disable=import-error
from requests import Response

from bookstack_file_exporter.exporter.node import Node
from bookstack_file_exporter.common import util

Expand Down Expand Up @@ -35,7 +38,7 @@ def get_all_shelves(self) -> Dict[int, Node]:

def _get_json_response(self, url: str) -> List[Dict[str, Union[str,int]]]:
"""get http response data in json format"""
response = util.http_get_request(url=url, headers=self.headers,
response: Response = util.http_get_request(url=url, headers=self.headers,
verify_ssl=self.verify_ssl)
return response.json()

Expand Down Expand Up @@ -77,42 +80,52 @@ def _get_chapters(self, base_url: str, all_chapters: List[int],
return chapter_nodes

def get_child_nodes(self, resource_type: str, parent_nodes: Dict[int, Node],
filter_empty: bool = True) -> Dict[int, Node]:
filter_empty: bool = True, node_type: str = "") -> Dict[int, Node]:
"""get child nodes from a book/chapter/shelf"""
base_url = self.api_urls[resource_type]
return self._get_children(base_url, parent_nodes, filter_empty)
return self._get_children(base_url, parent_nodes, filter_empty, node_type)

def _get_children(self, base_url: str, parent_nodes: Dict[int, Node],
filter_empty: bool) -> Dict[int, Node]:
filter_empty: bool, node_type: str = "") -> Dict[int, Node]:
child_nodes = {}
for _, parent in parent_nodes.items():
if parent.children:
for child in parent.children:
if node_type:
# only used for Book Nodes to get children Page/Chapter Nodes
# access key directly, don't create a Node if not needed
# chapters and pages always have `type` from what I can tell
if not child['type'] == node_type:
log.debug("Book Node child of type: %s is not desired type: %s",
child['type'], node_type)
continue
child_id = child['id']
child_url = f"{base_url}/{child_id}"
child_data = self._get_json_response(child_url)
child_node = Node(child_data, parent)
if filter_empty:
# if it is not empty, add it
# skip it if empty
if not child_node.empty:
child_nodes[child_id] = child_node
else:
child_nodes[child_id] = child_node
return child_nodes

def get_unassigned_books(self, existing_resources: Dict[int, Node],
def get_unassigned_books(self, existing_books: Dict[int, Node],
path_prefix: str) -> Dict[int, Node]:
"""get books not under a shelf"""
base_url = self.api_urls["books"]
all_resources: List[int] = self._get_all_ids(base_url)
book_url = self.api_urls["books"]
all_books: List[int] = self._get_all_ids(book_url)
unassigned = []
# get all existing ones and compare against current known resources
for resource_id in all_resources:
if resource_id not in existing_resources:
unassigned.append(resource_id)
# get all existing ones and compare against current known books
for book in all_books:
if book not in existing_books:
unassigned.append(book)
if not unassigned:
return {}
# books with no shelf treated like a parent resource
return self._get_parents(base_url, unassigned, path_prefix)
return self._get_parents(book_url, unassigned, path_prefix)

# convenience function
def get_all_books(self, shelve_nodes: Dict[int, Node], unassigned_dir: str) -> Dict[int, Node]:
Expand Down Expand Up @@ -140,7 +153,10 @@ def get_all_pages(self, book_nodes: Dict[int, Node]) -> Dict[int, Node]:
## pages
page_nodes = {}
if book_nodes:
page_nodes: Dict[int, Node] = self.get_child_nodes("pages", book_nodes)
# add `page` flag, we only want pages
# filter out chapters for now
# chapters can have their own children/pages
page_nodes: Dict[int, Node] = self.get_child_nodes("pages", book_nodes, node_type="page")
## chapters (if exists)
# chapter nodes are treated a little differently
# chapters are children under books
Expand Down