1
1
from typing import Union , List , Dict
2
-
2
+ import logging
3
3
from bookstack_file_exporter .exporter .node import Node
4
4
from bookstack_file_exporter .archiver import util as archiver_util
5
5
from bookstack_file_exporter .archiver .asset_archiver import AssetArchiver , ImageNode , AttachmentNode
6
6
from bookstack_file_exporter .config_helper .config_helper import ConfigNode
7
7
8
+ log = logging .getLogger (__name__ )
9
+
8
10
_META_FILE_SUFFIX = "_meta.json"
9
11
_TAR_SUFFIX = ".tar"
10
12
_TAR_GZ_SUFFIX = ".tgz"
@@ -70,6 +72,16 @@ def archive_pages(self, page_nodes: Dict[int, Node]):
70
72
page_images = image_nodes [page .id_ ]
71
73
if page .id_ in attachment_nodes :
72
74
page_attachments = attachment_nodes [page .id_ ]
75
+ failed_images = self .archive_page_assets ("images" , page .parent .file_path ,
76
+ page .name , page_images )
77
+ failed_attach = self .archive_page_assets ("attachments" , page .parent .file_path ,
78
+ page .name , page_attachments )
79
+ if failed_images :
80
+ # exclude from page_images so it doesn't attempt to get modified in markdown file
81
+ page_images = [img for img in page_images if img .id_ not in failed_images ]
82
+ if failed_attach :
83
+ # exclude from page_attachments so it doesn't attempt to get modified in markdown file
84
+ page_attachments = [attach for attach in page_attachments if attach .id_ not in failed_attach ]
73
85
for export_format in self .export_formats :
74
86
page_data = self ._get_page_data (page .id_ , export_format )
75
87
if page_images and export_format == 'markdown' :
@@ -80,10 +92,6 @@ def archive_pages(self, page_nodes: Dict[int, Node]):
80
92
page_data , page_attachments )
81
93
self ._archive_page (page , export_format ,
82
94
page_data )
83
- self .archive_page_assets ("images" , page .parent .file_path ,
84
- page .name , page_images )
85
- self .archive_page_assets ("attachments" , page .parent .file_path ,
86
- page .name , page_attachments )
87
95
if self .asset_config .export_meta :
88
96
self ._archive_page_meta (page .file_path , page .meta )
89
97
@@ -123,15 +131,26 @@ def _modify_markdown(self, asset_type: str,
123
131
asset_nodes )
124
132
125
133
def archive_page_assets (self , asset_type : str , parent_path : str , page_name : str ,
126
- asset_nodes : List [ImageNode | AttachmentNode ]):
134
+ asset_nodes : List [ImageNode | AttachmentNode ]) -> Dict [ int , int ] :
127
135
"""pull images locally into a directory based on page"""
128
136
if not asset_nodes :
129
- return
137
+ return {}
138
+ # use a map for faster lookup
139
+ failed_assets = {}
130
140
node_base_path = f"{ self .archive_base_path } /{ parent_path } /"
131
141
for asset_node in asset_nodes :
132
- asset_data = self .asset_archiver .get_asset_bytes (asset_type , asset_node .url )
142
+ try :
143
+ asset_data = self .asset_archiver .get_asset_bytes (asset_type , asset_node .url )
144
+ except :
145
+ # probably unnecessary, but just in case
146
+ if asset_node .id_ not in failed_assets :
147
+ failed_assets [asset_node .id_ ] = 0
148
+ # a 404 or other error occurred, skip this asset, already logged in http request exception
149
+ log .error (f"Failed to get image or attachment data for asset located at: { asset_node .url } - skipping" )
150
+ continue
133
151
asset_path = f"{ node_base_path } /{ asset_node .get_relative_path (page_name )} "
134
152
self .write_data (asset_path , asset_data )
153
+ return failed_assets
135
154
136
155
def write_data (self , file_path : str , data : bytes ):
137
156
"""write data to a tar file
0 commit comments