Skip to content

Http input feature #62

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions bookstack_file_exporter/archiver/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from bookstack_file_exporter.archiver.minio_archiver import MinioArchiver
from bookstack_file_exporter.config_helper.remote import StorageProviderConfig
from bookstack_file_exporter.config_helper.config_helper import ConfigNode
from bookstack_file_exporter.common.util import HttpHelper

log = logging.getLogger(__name__)

Expand All @@ -22,17 +23,18 @@ class Archiver:

Args:
:config: <ConfigNode> = Configuration with user inputs and general options.
:http_client: <HttpHelper> = http helper functions with config from user inputs

Returns:
Archiver instance with attributes that are accessible
for use for handling bookstack exports and remote uploads.
"""
def __init__(self, config: ConfigNode):
def __init__(self, config: ConfigNode, http_client: HttpHelper):
self.config = config
# for convenience
self.base_dir = config.base_dir_name
self.archive_dir = self._generate_root_folder(self.base_dir)
self._page_archiver = PageArchiver(self.archive_dir, self.config)
self._page_archiver = PageArchiver(self.archive_dir, self.config, http_client)
self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3}

def create_export_dir(self):
Expand Down
29 changes: 10 additions & 19 deletions bookstack_file_exporter/archiver/asset_archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# pylint: disable=import-error
from requests import Response

from bookstack_file_exporter.common import util as common_util
from bookstack_file_exporter.common.util import HttpHelper

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -112,47 +112,38 @@ class AssetArchiver:

Args:
:urls: <Dict[str, str]> = api urls for images and attachments
:headers: <Dict[str, str]> = http headers for api requests
:verify_ssl: <bool> = verify ssl for api requests
:http_client: <HttpHelper> = http helper functions with config from user inputs

Returns:
AssetArchiver instance for use in archiving images and attachments for a page
"""
def __init__(self, urls: Dict[str, str], headers: Dict[str, str],
verify_ssl: bool):
def __init__(self, urls: Dict[str, str], http_client: HttpHelper):
self.api_urls = urls
self.verify_ssl = verify_ssl
self._headers = headers
self._asset_map = {
'images': self._create_image_map,
'attachments': self._create_attachment_map
}
self.http_client = http_client

def get_asset_nodes(self, asset_type: str) -> Dict[str, ImageNode | AttachmentNode]:
"""Get image or attachment helpers for a page"""
asset_response: Response = common_util.http_get_request(
self.api_urls[asset_type],
self._headers,
self.verify_ssl)
asset_response: Response = self.http_client.http_get_request(
self.api_urls[asset_type])
asset_json = asset_response.json()['data']
return self._asset_map[asset_type](asset_json)

def get_asset_data(self, asset_type: str,
meta_data: Union[AttachmentNode, ImageNode]) -> Dict[str, str | bool | int | dict]:
"""Get asset data based on type"""
data_url = f"{self.api_urls[asset_type]}/{meta_data.id_}"
asset_data_response: Response = common_util.http_get_request(
data_url,
self._headers,
self.verify_ssl)
asset_data_response: Response = self.http_client.http_get_request(
data_url)
return asset_data_response.json()

def get_asset_bytes(self, asset_type: str, url: str) -> bytes:
"""Get raw asset data"""
asset_response: Response = common_util.http_get_request(
url,
self._headers,
self.verify_ssl)
asset_response: Response = self.http_client.http_get_request(
url)
match asset_type:
case "images":
asset_data = asset_response.content
Expand Down
15 changes: 8 additions & 7 deletions bookstack_file_exporter/archiver/page_archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from bookstack_file_exporter.archiver import util as archiver_util
from bookstack_file_exporter.archiver.asset_archiver import AssetArchiver, ImageNode, AttachmentNode
from bookstack_file_exporter.config_helper.config_helper import ConfigNode
from bookstack_file_exporter.common.util import HttpHelper

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -33,26 +34,26 @@ class PageArchiver:

Args:
:archive_dir: <str> = directory where data will be put into.

:config: <ConfigNode> = Configuration with user inputs and general options.
:http_client: <HttpHelper> = http helper functions with config from user inputs

Returns:
:PageArchiver: instance with methods to help collect page content from a Bookstack instance.
"""
def __init__(self, archive_dir: str, config: ConfigNode) -> None:
def __init__(self, archive_dir: str, config: ConfigNode, http_client: HttpHelper) -> None:
self.asset_config = config.user_inputs.assets
self.export_formats = config.user_inputs.formats
self.api_urls = config.urls
self._headers = config.headers
# full path, bookstack-<timestamp>, and .tgz extension
self.archive_file = f"{archive_dir}{_FILE_EXTENSION_MAP['tgz']}"
# name of intermediate tar file before gzip
self.tar_file = f"{archive_dir}{_FILE_EXTENSION_MAP['tar']}"
# name of the base folder to use within the tgz archive (internal tar layout)
self.archive_base_path = archive_dir.split("/")[-1]
self.modify_md: bool = self._check_md_modify()
self.asset_archiver = AssetArchiver(self.api_urls, self._headers,
self.verify_ssl)
self.asset_archiver = AssetArchiver(self.api_urls,
http_client)
self.http_client = http_client

def _check_md_modify(self) -> bool:
# check to ensure they have asset_config defined, could be None
Expand Down Expand Up @@ -107,8 +108,8 @@ def _archive_page(self, page: Node, export_format: str, data: bytes):

def _get_page_data(self, page_id: int, export_format: str) -> bytes:
url = f"{self.api_urls['pages']}/{page_id}/{_EXPORT_API_PATH}/{export_format}"
return archiver_util.get_byte_response(url=url, headers=self._headers,
verify_ssl=self.verify_ssl)
return archiver_util.get_byte_response(url=url,
http_client=self.http_client)

def _archive_page_meta(self, page_path: str, meta_data: Dict[str, Union[str, int]]):
meta_file_name = f"{self.archive_base_path}/{page_path}{_FILE_EXTENSION_MAP['meta']}"
Expand Down
6 changes: 3 additions & 3 deletions bookstack_file_exporter/archiver/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
import glob
from pathlib import Path

from bookstack_file_exporter.common import util
from bookstack_file_exporter.common.util import HttpHelper

log = logging.getLogger(__name__)

def get_byte_response(url: str, headers: Dict[str, str], verify_ssl: bool) -> bytes:
def get_byte_response(url: str, http_client: HttpHelper) -> bytes:
"""get byte response from http request"""
response = util.http_get_request(url=url, headers=headers, verify_ssl=verify_ssl)
response = http_client.http_get_request(url=url)
return response.content

# append to a tar file instead of creating files locally and then tar'ing after
Expand Down
98 changes: 63 additions & 35 deletions bookstack_file_exporter/common/util.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,72 @@
import logging
from typing import Dict
import urllib3
# pylint: disable=import-error
import requests
# pylint: disable=import-error
from requests.adapters import HTTPAdapter, Retry

from bookstack_file_exporter.config_helper.models import HttpConfig

log = logging.getLogger(__name__)

def http_get_request(url: str, headers: Dict[str, str],
verify_ssl: bool, timeout: int = 30) -> requests.Response:
"""make http requests and return response object"""
url_prefix = should_verify(url)
try:
with requests.Session() as session:
# {backoff factor} * (2 ** ({number of previous retries}))
# {raise_on_status} if status falls in status_forcelist range
# and retries have been exhausted.
# {status_force_list} 413, 429, 503 defaults are overwritten with additional ones
retries = Retry(total=5,
backoff_factor=0.5,
raise_on_status=True,
status_forcelist=[413, 429, 500, 502, 503, 504])
session.mount(url_prefix, HTTPAdapter(max_retries=retries))
response = session.get(url, headers=headers, verify=verify_ssl, timeout=timeout)
except Exception as req_err:
log.error("Failed to make request for %s", url)
raise req_err
try:
#raise_for_status() throws an exception on codes 400-599
response.raise_for_status()
except requests.exceptions.HTTPError as e:
# this means it either exceeded 50X retries in `http_get_request` handler
# or it returned a 40X which is not expected
log.error("Bookstack request failed with status code: %d on url: %s",
response.status_code, url)
raise e
return response

def should_verify(url: str) -> str:
"""check if http or https"""
if url.startswith("https"):
return "https://"
return "http://"
# disable TLS warnings if using verify_ssl=false
urllib3.disable_warnings()

class HttpHelper:
"""
HttpHelper provides an http request helper with config stored and retries built in

Args:
:headers: <Dict[str, str]> = all headers to use for http requests
:config: <HttpConfig> = Configuration with user inputs for http requests

Returns:
:HttpHelper: instance with methods to help with http requests.
"""
def __init__(self, headers: Dict[str, str],
config: HttpConfig):
self.backoff_factor = config.backoff_factor
self.retry_codes = config.retry_codes
self.retry_count = config.retry_count
self.http_timeout = config.timeout
self.verify_ssl = config.verify_ssl
self._headers = headers

# more details on options: https://urllib3.readthedocs.io/en/stable/reference/urllib3.util.html
def http_get_request(self, url: str) -> requests.Response:
"""make http requests and return response object"""
url_prefix = self.should_verify(url)
try:
with requests.Session() as session:
# {backoff factor} * (2 ** ({number of previous retries}))
# {raise_on_status} if status falls in status_forcelist range
# and retries have been exhausted.
# {status_force_list} 413, 429, 503 defaults are overwritten with additional ones
retries = Retry(total=self.retry_count,
backoff_factor=self.backoff_factor,
raise_on_status=True,
status_forcelist=self.retry_codes)
session.mount(url_prefix, HTTPAdapter(max_retries=retries))
response = session.get(url, headers=self._headers, verify=self.verify_ssl,
timeout=self.http_timeout)
except Exception as req_err:
log.error("Failed to make request for %s", url)
raise req_err
try:
#raise_for_status() throws an exception on codes 400-599
response.raise_for_status()
except requests.exceptions.HTTPError as e:
# this means it either exceeded 50X retries in `http_get_request` handler
# or it returned a 40X which is not expected
log.error("Bookstack request failed with status code: %d on url: %s",
response.status_code, url)
raise e
return response

@staticmethod
def should_verify(url: str) -> str:
"""check if http or https"""
if url.startswith("https"):
return "https://"
return "http://"
4 changes: 2 additions & 2 deletions bookstack_file_exporter/config_helper/config_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ def _generate_remote_config(self) -> Dict[str, StorageProviderConfig]:
def _generate_headers(self) -> Dict[str, str]:
headers = {}
# add additional_headers provided by user
if self.user_inputs.additional_headers:
for key, value in self.user_inputs.additional_headers.items():
if self.user_inputs.http_config.additional_headers:
for key, value in self.user_inputs.http_config.additional_headers.items():
headers[key] = value

# add default headers
Expand Down
15 changes: 12 additions & 3 deletions bookstack_file_exporter/config_helper/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,26 @@ class Assets(BaseModel):
export_attachments: Optional[bool] = False
modify_markdown: Optional[bool] = False
export_meta: Optional[bool] = False
verify_ssl: Optional[bool] = True
# verify_ssl: Optional[bool] = True

class HttpConfig(BaseModel):
"""YAML schema for user provided http settings"""
verify_ssl: Optional[bool] = False
timeout: Optional[int] = 30
backoff_factor: Optional[float] = 2.5
retry_codes: Optional[List[int]] = [413, 429, 500, 502, 503, 504]
retry_count: Optional[int] = 5
additional_headers: Optional[Dict[str, str]] = {}

# pylint: disable=too-few-public-methods
class UserInput(BaseModel):
"""YAML schema for user provided configuration file"""
host: str
additional_headers: Optional[Dict[str, str]] = None
credentials: Optional[BookstackAccess] = None
formats: List[Literal["markdown", "html", "pdf", "plaintext"]]
output_path: Optional[str] = None
assets: Optional[Assets] = Assets()
minio: Optional[ObjectStorageConfig] = None
keep_last: Optional[int] = None
run_interval: Optional[int] = 0
run_interval: Optional[int] = 0
http_config: Optional[HttpConfig] = HttpConfig()
10 changes: 4 additions & 6 deletions bookstack_file_exporter/exporter/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from requests import Response

from bookstack_file_exporter.exporter.node import Node
from bookstack_file_exporter.common import util
from bookstack_file_exporter.common.util import HttpHelper

log = logging.getLogger(__name__)

Expand All @@ -19,10 +19,9 @@ class NodeExporter():
Returns:
NodeExporter instance to handle building shelve/book/chapter/page relations.
"""
def __init__(self, api_urls: Dict[str, str], headers: Dict[str,str], verify_ssl: bool):
def __init__(self, api_urls: Dict[str, str], http_client: HttpHelper):
self.api_urls = api_urls
self.headers = headers
self.verify_ssl = verify_ssl
self.http_client = http_client

def get_all_shelves(self) -> Dict[int, Node]:
"""
Expand All @@ -38,8 +37,7 @@ def get_all_shelves(self) -> Dict[int, Node]:

def _get_json_response(self, url: str) -> List[Dict[str, Union[str,int]]]:
"""get http response data in json format"""
response: Response = util.http_get_request(url=url, headers=self.headers,
verify_ssl=self.verify_ssl)
response: Response = self.http_client.http_get_request(url=url)
return response.json()

def _get_all_ids(self, url: str) -> List[int]:
Expand Down
Loading