Skip to content

Commit 7af17bf

Browse files
repair:
1. nodriver helper- user_data_dir, can work on win11 win10 macos ubuntu; 2. user can specify the chrome install path by environment BROWSER_EXECUTABLE_PATH
1 parent 4123e73 commit 7af17bf

File tree

3 files changed

+15
-16
lines changed

3 files changed

+15
-16
lines changed

core/async_logger.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
import sys, traceback
44

55

6-
base_directory = os.path.join(".", os.getenv("PROJECT_DIR", "work_dir"))
6+
# 获取脚本所在目录的父目录作为项目根目录
7+
_current_file_dir = os.path.dirname(os.path.abspath(__file__))
8+
_project_root = os.path.dirname(_current_file_dir) # 回到项目根目录
9+
base_directory = os.path.join(_project_root, os.getenv("PROJECT_DIR", "work_dir"))
710
os.makedirs(base_directory, exist_ok=True)
811
wis_logger = get_logger(base_directory, "wiseflow_info_scraper")
912

core/wis/nodriver_helper.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22
import asyncio
33
import json
4-
# from typing import Callable, Dict, Any, List, Union
4+
import os
55
from typing import Optional
66
import nodriver as uc
77
from pathlib import Path
@@ -42,17 +42,13 @@ async def start(self):
4242
"""启动浏览器"""
4343
# 设置浏览器配置
4444
config = {
45-
'user_data_dir': str(self.browser_data), # 使用单一的浏览器数据目录
45+
'user_data_dir': self.browser_data, # 使用单一的浏览器数据目录
4646
'headless': False,
47-
'browser_args': [
48-
'--lang=zh-CN',
49-
# '--no-sandbox',
50-
'--disable-translate', # 禁用翻译
51-
'--no-first-run', # 禁用首次运行向导
52-
'--no-default-browser-check'
53-
]
47+
'lang': 'zh-CN',
5448
}
55-
49+
if os.environ.get('BROWSER_EXECUTABLE_PATH'):
50+
config['browser_executable_path'] = os.environ.get('BROWSER_EXECUTABLE_PATH')
51+
5652
self.browser = await uc.start(**config)
5753

5854
async def open_page(self, url: str = None):

test/mc_fetching_test.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
import sys
77
from datetime import datetime
88

9-
root_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')
9+
root_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'core')
1010
sys.path.append(root_path)
11-
from core.wis import KuaiShouCrawler, WeiboCrawler, WeiboSearchType, WEIBO_PLATFORM_NAME, KUAISHOU_PLATFORM_NAME
11+
from wis import KuaiShouCrawler, WeiboCrawler, WeiboSearchType, WEIBO_PLATFORM_NAME, KUAISHOU_PLATFORM_NAME
1212

1313

1414
save_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'webpage_samples')
@@ -28,7 +28,7 @@ async def main(keywords: list,
2828
except Exception as e:
2929
print(e)
3030
return
31-
albums, posts = await crawler.posts_list(keywords=keywords, creator_ids=creator_ids, existings=existings, limit_hours=limit_hours, search_type=search_type)
31+
albums, posts = await crawler.posts_list(keywords=keywords, creator_ids=creator_ids, existings=existings)
3232
print(albums)
3333
time_stamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
3434
albums_json = {
@@ -46,10 +46,10 @@ async def main(keywords: list,
4646
print("\n--- No posts found in posts to select from ---")
4747
return
4848

49-
article, ref = await crawler.post_as_article(selected_post)
49+
article, ref = await crawler.as_article(selected_post)
5050
print(article)
5151
print(ref)
52-
creator_info = await crawler.creator_as_article(selected_post.get("user_id"))
52+
creator_info = await crawler.as_creator(selected_post.get("user_id"))
5353
print(creator_info)
5454
article_json = {
5555
"article": article,

0 commit comments

Comments
 (0)