Skip to content

Commit 92af282

Browse files
committed
attempt to add logic to skip an image or attachment export if their API call fails
1 parent b68a959 commit 92af282

File tree

4 files changed

+33
-11
lines changed

4 files changed

+33
-11
lines changed

bookstack_file_exporter/archiver/asset_archiver.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class AssetNode:
2424
AssetNode instance for use in other classes
2525
"""
2626
def __init__(self, meta_data: Dict[str, int | str | bool]):
27-
self.id: int = meta_data['id']
27+
self.id_: int = meta_data['id']
2828
self.page_id: int = meta_data['uploaded_to']
2929
self.url: str = ""
3030
self.name: str = ""
@@ -88,7 +88,7 @@ class AttachmentNode(AssetNode):
8888
def __init__(self, meta_data: Dict[str, Union[int, str, bool]],
8989
base_url: str):
9090
super().__init__(meta_data)
91-
self.url: str = f"{base_url}/{self.id}"
91+
self.url: str = f"{base_url}/{self.id_}"
9292
self.name = meta_data['name']
9393
log.debug("Attachment node has generated url: %s", self.url)
9494
self._relative_path_prefix = f"{_ATTACHMENT_DIR_NAME}"
@@ -140,7 +140,7 @@ def get_asset_nodes(self, asset_type: str) -> Dict[str, ImageNode | AttachmentNo
140140
def get_asset_data(self, asset_type: str,
141141
meta_data: Union[AttachmentNode, ImageNode]) -> Dict[str, str | bool | int | dict]:
142142
"""Get asset data based on type"""
143-
data_url = f"{self.api_urls[asset_type]}/{meta_data.id}"
143+
data_url = f"{self.api_urls[asset_type]}/{meta_data.id_}"
144144
asset_data_response: Response = common_util.http_get_request(
145145
data_url,
146146
self._headers,
@@ -164,6 +164,7 @@ def update_asset_links(self, asset_type, page_name: str, page_data: bytes,
164164
asset_nodes: List[ImageNode | AttachmentNode]) -> bytes:
165165
"""update markdown links in page data"""
166166
for asset_node in asset_nodes:
167+
# get metadata instead of raw data/bytes
167168
asset_data = self.get_asset_data(asset_type, asset_node)
168169
asset_node.set_markdown_content(asset_data)
169170
if not asset_node.markdown_str:

bookstack_file_exporter/archiver/page_archiver.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from typing import Union, List, Dict
2-
2+
import logging
33
from bookstack_file_exporter.exporter.node import Node
44
from bookstack_file_exporter.archiver import util as archiver_util
55
from bookstack_file_exporter.archiver.asset_archiver import AssetArchiver, ImageNode, AttachmentNode
66
from bookstack_file_exporter.config_helper.config_helper import ConfigNode
77

8+
log = logging.getLogger(__name__)
9+
810
_META_FILE_SUFFIX = "_meta.json"
911
_TAR_SUFFIX = ".tar"
1012
_TAR_GZ_SUFFIX = ".tgz"
@@ -70,6 +72,16 @@ def archive_pages(self, page_nodes: Dict[int, Node]):
7072
page_images = image_nodes[page.id_]
7173
if page.id_ in attachment_nodes:
7274
page_attachments = attachment_nodes[page.id_]
75+
failed_images = self.archive_page_assets("images", page.parent.file_path,
76+
page.name, page_images)
77+
failed_attach = self.archive_page_assets("attachments", page.parent.file_path,
78+
page.name, page_attachments)
79+
if failed_images:
80+
# exclude from page_images so it doesn't attempt to get modified in markdown file
81+
page_images = [img for img in page_images if img.id_ not in failed_images]
82+
if failed_attach:
83+
# exclude from page_attachments so it doesn't attempt to get modified in markdown file
84+
page_attachments = [attach for attach in page_attachments if attach.id_ not in failed_attach]
7385
for export_format in self.export_formats:
7486
page_data = self._get_page_data(page.id_, export_format)
7587
if page_images and export_format == 'markdown':
@@ -80,10 +92,6 @@ def archive_pages(self, page_nodes: Dict[int, Node]):
8092
page_data, page_attachments)
8193
self._archive_page(page, export_format,
8294
page_data)
83-
self.archive_page_assets("images", page.parent.file_path,
84-
page.name, page_images)
85-
self.archive_page_assets("attachments", page.parent.file_path,
86-
page.name, page_attachments)
8795
if self.asset_config.export_meta:
8896
self._archive_page_meta(page.file_path, page.meta)
8997

@@ -123,15 +131,26 @@ def _modify_markdown(self, asset_type: str,
123131
asset_nodes)
124132

125133
def archive_page_assets(self, asset_type: str, parent_path: str, page_name: str,
126-
asset_nodes: List[ImageNode | AttachmentNode]):
134+
asset_nodes: List[ImageNode | AttachmentNode]) -> Dict[int, int]:
127135
"""pull images locally into a directory based on page"""
128136
if not asset_nodes:
129-
return
137+
return {}
138+
# use a map for faster lookup
139+
failed_assets = {}
130140
node_base_path = f"{self.archive_base_path}/{parent_path}/"
131141
for asset_node in asset_nodes:
132-
asset_data = self.asset_archiver.get_asset_bytes(asset_type, asset_node.url)
142+
try:
143+
asset_data = self.asset_archiver.get_asset_bytes(asset_type, asset_node.url)
144+
except:
145+
# probably unnecessary, but just in case
146+
if asset_node.id_ not in failed_assets:
147+
failed_assets[asset_node.id_] = 0
148+
# a 404 or other error occurred, skip this asset, already logged in http request exception
149+
log.error(f"Failed to get image or attachment data for asset located at: {asset_node.url} - skipping")
150+
continue
133151
asset_path = f"{node_base_path}/{asset_node.get_relative_path(page_name)}"
134152
self.write_data(asset_path, asset_data)
153+
return failed_assets
135154

136155
def write_data(self, file_path: str, data: bytes):
137156
"""write data to a tar file

bookstack_file_exporter/exporter/node.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def __init__(self, meta: Dict[str, Union[str, int]],
3838
# for convenience/usage for exporter
3939
# self.name: str = self.meta['slug']
4040
self.name = self.get_name(self.meta['slug'], self.meta['name'])
41+
# id() is a built-in function and should not be used as a variable name
4142
self.id_: int = self.meta['id']
4243
self._display_name = self.meta['name']
4344
# children

bookstack_file_exporter/run.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,5 @@ def exporter(args: argparse.Namespace):
5353
# clean up the .tgz archive since it is already uploaded
5454
archive.clean_up()
5555

56+
log.info(f"Created file archive: {archive.archive_dir}.tgz")
5657
log.info("Completed run")

0 commit comments

Comments
 (0)