Spaces:
Paused
Paused
Upload 10 files
Browse files- browser/instance.py +20 -5
- browser/navigation.py +22 -18
- browser/ws_helper.py +141 -34
- utils/common.py +48 -1
- utils/logger.py +12 -5
browser/instance.py
CHANGED
|
@@ -7,8 +7,9 @@ from utils.cookie_manager import CookieManager
|
|
| 7 |
from browser.navigation import handle_successful_navigation, KeepAliveError
|
| 8 |
from browser.cookie_validator import CookieValidator
|
| 9 |
from camoufox.sync_api import Camoufox
|
|
|
|
| 10 |
from utils.paths import logs_dir
|
| 11 |
-
from utils.common import parse_headless_mode, ensure_dir
|
| 12 |
from utils.url_helper import extract_url_path, mask_url_for_logging, mask_path_for_logging
|
| 13 |
|
| 14 |
|
|
@@ -64,8 +65,13 @@ def run_browser_instance(config, shutdown_event=None):
|
|
| 64 |
# launch_options["block_images"] = True # 禁用图片加载
|
| 65 |
|
| 66 |
if proxy:
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
screenshot_dir = logs_dir()
|
| 71 |
ensure_dir(screenshot_dir)
|
|
@@ -152,6 +158,10 @@ def run_browser_instance(config, shutdown_event=None):
|
|
| 152 |
logger.info(f"已截取网络错误时的屏幕快照: {screenshot_path}")
|
| 153 |
except Exception as diag_e:
|
| 154 |
logger.error(f"在尝试进行网络错误诊断(截图)时发生额外错误: {diag_e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
return # 网络错误,终止
|
| 156 |
|
| 157 |
# --- 如果导航没有抛出异常,继续执行后续逻辑 ---
|
|
@@ -236,7 +246,12 @@ def run_browser_instance(config, shutdown_event=None):
|
|
| 236 |
retry_count = 0
|
| 237 |
return
|
| 238 |
|
| 239 |
-
except KeepAliveError as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
retry_count += 1
|
| 241 |
if retry_count > max_retries:
|
| 242 |
logger.error(f"重试次数已达上限 ({max_retries}),实例不再重启,退出")
|
|
@@ -244,7 +259,7 @@ def run_browser_instance(config, shutdown_event=None):
|
|
| 244 |
|
| 245 |
# 指数退避:3秒、6秒、12秒、24秒...最长60秒
|
| 246 |
delay = min(base_delay * (2 ** (retry_count - 1)), 60)
|
| 247 |
-
logger.error(f"浏览器实例出现错误 (重试 {retry_count}/{max_retries}),将在 {delay} 秒后重启浏览器实例: {
|
| 248 |
time.sleep(delay)
|
| 249 |
continue
|
| 250 |
except KeyboardInterrupt:
|
|
|
|
| 7 |
from browser.navigation import handle_successful_navigation, KeepAliveError
|
| 8 |
from browser.cookie_validator import CookieValidator
|
| 9 |
from camoufox.sync_api import Camoufox
|
| 10 |
+
from camoufox.exceptions import InvalidIP, InvalidProxy
|
| 11 |
from utils.paths import logs_dir
|
| 12 |
+
from utils.common import parse_headless_mode, ensure_dir, parse_proxy_config
|
| 13 |
from utils.url_helper import extract_url_path, mask_url_for_logging, mask_path_for_logging
|
| 14 |
|
| 15 |
|
|
|
|
| 65 |
# launch_options["block_images"] = True # 禁用图片加载
|
| 66 |
|
| 67 |
if proxy:
|
| 68 |
+
proxy_config = parse_proxy_config(proxy)
|
| 69 |
+
if not proxy_config:
|
| 70 |
+
logger.error("错误: 代理配置无效,无法启动浏览器实例")
|
| 71 |
+
return
|
| 72 |
+
logger.info(f"使用代理: {proxy_config.get('server', proxy)} 访问")
|
| 73 |
+
launch_options["proxy"] = proxy_config
|
| 74 |
+
launch_options["geoip"] = True
|
| 75 |
|
| 76 |
screenshot_dir = logs_dir()
|
| 77 |
ensure_dir(screenshot_dir)
|
|
|
|
| 158 |
logger.info(f"已截取网络错误时的屏幕快照: {screenshot_path}")
|
| 159 |
except Exception as diag_e:
|
| 160 |
logger.error(f"在尝试进行网络错误诊断(截图)时发生额外错误: {diag_e}")
|
| 161 |
+
|
| 162 |
+
error_message_lower = error_message.lower()
|
| 163 |
+
if "proxy" in error_message_lower or "ns_error_proxy" in error_message_lower or "err_proxy" in error_message_lower:
|
| 164 |
+
raise KeepAliveError(f"代理连接错误: {error_message}")
|
| 165 |
return # 网络错误,终止
|
| 166 |
|
| 167 |
# --- 如果导航没有抛出异常,继续执行后续逻辑 ---
|
|
|
|
| 246 |
retry_count = 0
|
| 247 |
return
|
| 248 |
|
| 249 |
+
except (InvalidProxy, InvalidIP, KeepAliveError) as e:
|
| 250 |
+
if isinstance(e, (InvalidProxy, InvalidIP)):
|
| 251 |
+
retry_reason = f"代理/GeoIP 处理失败: {e}"
|
| 252 |
+
logger.error(retry_reason)
|
| 253 |
+
else:
|
| 254 |
+
retry_reason = str(e)
|
| 255 |
retry_count += 1
|
| 256 |
if retry_count > max_retries:
|
| 257 |
logger.error(f"重试次数已达上限 ({max_retries}),实例不再重启,退出")
|
|
|
|
| 259 |
|
| 260 |
# 指数退避:3秒、6秒、12秒、24秒...最长60秒
|
| 261 |
delay = min(base_delay * (2 ** (retry_count - 1)), 60)
|
| 262 |
+
logger.error(f"浏览器实例出现错误 (重试 {retry_count}/{max_retries}),将在 {delay} 秒后重启浏览器实例: {retry_reason}")
|
| 263 |
time.sleep(delay)
|
| 264 |
continue
|
| 265 |
except KeyboardInterrupt:
|
browser/navigation.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
import time
|
| 2 |
import os
|
|
|
|
| 3 |
from playwright.sync_api import Page, expect
|
| 4 |
from utils.paths import logs_dir
|
| 5 |
from utils.common import ensure_dir
|
| 6 |
-
from browser.ws_helper import reconnect_ws, get_ws_status, dismiss_interaction_modal, click_in_iframe
|
| 7 |
|
| 8 |
class KeepAliveError(Exception):
|
| 9 |
pass
|
|
@@ -75,13 +76,16 @@ def handle_successful_navigation(page: Page, logger, cookie_file_config, shutdow
|
|
| 75 |
if cookie_validator:
|
| 76 |
logger.info("Cookie验证器已创建,将定期验证Cookie有效性")
|
| 77 |
|
| 78 |
-
logger.info("实例将保持运行状态。每
|
| 79 |
|
| 80 |
# 等待页面加载和渲染
|
| 81 |
time.sleep(15)
|
| 82 |
|
|
|
|
|
|
|
|
|
|
| 83 |
# 记录初始WS状态
|
| 84 |
-
last_ws_status = get_ws_status(page, logger)
|
| 85 |
logger.info(f"初始WS状态: {last_ws_status}")
|
| 86 |
|
| 87 |
# 添加Cookie验证计数器
|
|
@@ -95,28 +99,27 @@ def handle_successful_navigation(page: Page, logger, cookie_file_config, shutdow
|
|
| 95 |
|
| 96 |
try:
|
| 97 |
# 检测并关闭interaction-modal遮罩层(如果出现)
|
| 98 |
-
dismiss_interaction_modal(page, logger)
|
| 99 |
|
| 100 |
# 在iframe内随机移动并点击保活
|
| 101 |
-
click_in_iframe(page, logger)
|
| 102 |
click_counter += 1
|
| 103 |
|
| 104 |
# 检查WS状态是否发生变化
|
| 105 |
-
current_ws_status = get_ws_status(page, logger)
|
| 106 |
if current_ws_status != last_ws_status:
|
| 107 |
logger.warning(f"WS状态变更: {last_ws_status} -> {current_ws_status}")
|
| 108 |
-
|
| 109 |
-
# 如果
|
| 110 |
-
if current_ws_status
|
| 111 |
-
logger.info("WS
|
| 112 |
-
reconnect_ws(page, logger)
|
| 113 |
-
current_ws_status = get_ws_status(page, logger)
|
| 114 |
-
logger.info(f"重连后WS状态: {current_ws_status}")
|
| 115 |
|
| 116 |
last_ws_status = current_ws_status
|
| 117 |
|
| 118 |
-
# 每
|
| 119 |
-
if cookie_validator and click_counter >=
|
| 120 |
is_valid = cookie_validator.validate_cookies_in_main_thread()
|
| 121 |
|
| 122 |
if not is_valid:
|
|
@@ -125,8 +128,9 @@ def handle_successful_navigation(page: Page, logger, cookie_file_config, shutdow
|
|
| 125 |
|
| 126 |
click_counter = 0 # 重置计数器
|
| 127 |
|
| 128 |
-
# 使用可中断的睡眠,每秒检查一次关闭信号
|
| 129 |
-
|
|
|
|
| 130 |
if shutdown_event and shutdown_event.is_set():
|
| 131 |
logger.info("收到关闭信号,正在优雅退出保持活动循环...")
|
| 132 |
return
|
|
@@ -143,4 +147,4 @@ def handle_successful_navigation(page: Page, logger, cookie_file_config, shutdow
|
|
| 143 |
logger.info(f"已在保持活动循环出错时截屏: {screenshot_filename}")
|
| 144 |
except Exception as screenshot_e:
|
| 145 |
logger.error(f"在保持活动循环出错时截屏失败: {screenshot_e}")
|
| 146 |
-
raise KeepAliveError(f"在保持活动循环时出错: {e}")
|
|
|
|
| 1 |
import time
|
| 2 |
import os
|
| 3 |
+
import random
|
| 4 |
from playwright.sync_api import Page, expect
|
| 5 |
from utils.paths import logs_dir
|
| 6 |
from utils.common import ensure_dir
|
| 7 |
+
from browser.ws_helper import reconnect_ws, get_ws_status, dismiss_interaction_modal, click_in_iframe, PageLocators
|
| 8 |
|
| 9 |
class KeepAliveError(Exception):
|
| 10 |
pass
|
|
|
|
| 76 |
if cookie_validator:
|
| 77 |
logger.info("Cookie验证器已创建,将定期验证Cookie有效性")
|
| 78 |
|
| 79 |
+
logger.info("实例将保持运行状态。每8-15秒随机点击一次页面以保持活动")
|
| 80 |
|
| 81 |
# 等待页面加载和渲染
|
| 82 |
time.sleep(15)
|
| 83 |
|
| 84 |
+
# 创建 PageLocators 缓存对象,复用 locator 避免内存泄漏
|
| 85 |
+
locators = PageLocators(page)
|
| 86 |
+
|
| 87 |
# 记录初始WS状态
|
| 88 |
+
last_ws_status = get_ws_status(page, logger, locators)
|
| 89 |
logger.info(f"初始WS状态: {last_ws_status}")
|
| 90 |
|
| 91 |
# 添加Cookie验证计数器
|
|
|
|
| 99 |
|
| 100 |
try:
|
| 101 |
# 检测并关闭interaction-modal遮罩层(如果出现)
|
| 102 |
+
dismiss_interaction_modal(page, logger, locators)
|
| 103 |
|
| 104 |
# 在iframe内随机移动并点击保活
|
| 105 |
+
click_in_iframe(page, logger, locators)
|
| 106 |
click_counter += 1
|
| 107 |
|
| 108 |
# 检查WS状态是否发生变化
|
| 109 |
+
current_ws_status = get_ws_status(page, logger, locators)
|
| 110 |
if current_ws_status != last_ws_status:
|
| 111 |
logger.warning(f"WS状态变更: {last_ws_status} -> {current_ws_status}")
|
| 112 |
+
|
| 113 |
+
# 如果状态变成IDLE,尝试重连
|
| 114 |
+
if current_ws_status == "IDLE":
|
| 115 |
+
logger.info("WS状态为IDLE,尝试重连...")
|
| 116 |
+
reconnect_ws(page, logger, locators)
|
| 117 |
+
current_ws_status = get_ws_status(page, logger, locators)
|
|
|
|
| 118 |
|
| 119 |
last_ws_status = current_ws_status
|
| 120 |
|
| 121 |
+
# 每720次点击(约2小时)执行一次完整的Cookie验证
|
| 122 |
+
if cookie_validator and click_counter >= 720: # 720 * ~10秒 ≈ 7200秒 ≈ 2小时
|
| 123 |
is_valid = cookie_validator.validate_cookies_in_main_thread()
|
| 124 |
|
| 125 |
if not is_valid:
|
|
|
|
| 128 |
|
| 129 |
click_counter = 0 # 重置计数器
|
| 130 |
|
| 131 |
+
# 使用可中断的随机睡眠(8-15秒),每秒检查一次关闭信号
|
| 132 |
+
sleep_duration = random.randint(8, 15)
|
| 133 |
+
for _ in range(sleep_duration):
|
| 134 |
if shutdown_event and shutdown_event.is_set():
|
| 135 |
logger.info("收到关闭信号,正在优雅退出保持活动循环...")
|
| 136 |
return
|
|
|
|
| 147 |
logger.info(f"已在保持活动循环出错时截屏: {screenshot_filename}")
|
| 148 |
except Exception as screenshot_e:
|
| 149 |
logger.error(f"在保持活动循环出错时截屏失败: {screenshot_e}")
|
| 150 |
+
raise KeepAliveError(f"在保持活动循环时出错: {e}")
|
browser/ws_helper.py
CHANGED
|
@@ -3,6 +3,65 @@ import random
|
|
| 3 |
from playwright.sync_api import Page, FrameLocator
|
| 4 |
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
def get_preview_frame(page: Page, logger=None) -> FrameLocator:
|
| 7 |
"""
|
| 8 |
获取预览iframe的FrameLocator。
|
|
@@ -17,23 +76,31 @@ def get_preview_frame(page: Page, logger=None) -> FrameLocator:
|
|
| 17 |
return None
|
| 18 |
|
| 19 |
|
| 20 |
-
def get_ws_status(page: Page, logger=None) -> str:
|
| 21 |
"""
|
| 22 |
获取页面中WS连接状态(在iframe内部)。
|
| 23 |
-
返回: CONNECTED, IDLE, CONNECTING 或 UNKNOWN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
"""
|
| 25 |
try:
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
# 在iframe内查找包含 "WS:" 的状态文本元素
|
| 31 |
-
# 根据截图,状态显示为 "WS: CONNECTED" 等格式
|
| 32 |
-
status_element = frame.locator('text=/WS:\\s*(CONNECTED|IDLE|CONNECTING)/i').first
|
| 33 |
if status_element.is_visible(timeout=3000):
|
| 34 |
text = status_element.text_content()
|
| 35 |
if text:
|
| 36 |
-
if "
|
|
|
|
|
|
|
| 37 |
return "CONNECTED"
|
| 38 |
elif "IDLE" in text.upper():
|
| 39 |
return "IDLE"
|
|
@@ -46,16 +113,24 @@ def get_ws_status(page: Page, logger=None) -> str:
|
|
| 46 |
return "UNKNOWN"
|
| 47 |
|
| 48 |
|
| 49 |
-
def click_disconnect(page: Page, logger=None) -> bool:
|
| 50 |
"""
|
| 51 |
点击Disconnect按钮断开WS连接(在iframe内部)。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
"""
|
| 53 |
try:
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
disconnect_btn = frame.locator('button:has-text("Disconnect")')
|
| 59 |
if disconnect_btn.count() > 0 and disconnect_btn.first.is_visible(timeout=3000):
|
| 60 |
disconnect_btn.first.click(timeout=5000)
|
| 61 |
if logger:
|
|
@@ -71,16 +146,24 @@ def click_disconnect(page: Page, logger=None) -> bool:
|
|
| 71 |
return False
|
| 72 |
|
| 73 |
|
| 74 |
-
def click_connect(page: Page, logger=None) -> bool:
|
| 75 |
"""
|
| 76 |
点击Connect按钮建立WS连接(在iframe内部)。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
"""
|
| 78 |
try:
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
-
connect_btn = frame.locator('button:has-text("Connect")')
|
| 84 |
if connect_btn.count() > 0 and connect_btn.first.is_visible(timeout=3000):
|
| 85 |
connect_btn.first.click(timeout=5000)
|
| 86 |
if logger:
|
|
@@ -96,72 +179,87 @@ def click_connect(page: Page, logger=None) -> bool:
|
|
| 96 |
return False
|
| 97 |
|
| 98 |
|
| 99 |
-
def wait_for_ws_connected(page: Page, logger=None, timeout: int = 30) -> bool:
|
| 100 |
"""
|
| 101 |
等待WS状态变为CONNECTED。
|
| 102 |
"""
|
| 103 |
start_time = time.time()
|
| 104 |
while time.time() - start_time < timeout:
|
| 105 |
-
status = get_ws_status(page, logger)
|
| 106 |
if status == "CONNECTED":
|
| 107 |
return True
|
| 108 |
time.sleep(1)
|
| 109 |
return False
|
| 110 |
|
| 111 |
|
| 112 |
-
def reconnect_ws(page: Page, logger=None) -> str:
|
| 113 |
"""
|
| 114 |
执行断开再连接的流程,并返回最终WS状态。
|
| 115 |
流程:关闭遮罩 -> Disconnect -> 等待IDLE -> Connect -> 等待CONNECTED -> 获取状态
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
"""
|
| 117 |
if logger:
|
| 118 |
logger.info("开始执行WS重连流程: Disconnect -> Connect")
|
| 119 |
|
| 120 |
# 先关闭 interaction-modal 遮罩层(如果存在)
|
| 121 |
-
dismiss_interaction_modal(page, logger)
|
| 122 |
|
| 123 |
# 先断开连接
|
| 124 |
-
click_disconnect(page, logger)
|
| 125 |
time.sleep(2)
|
| 126 |
|
| 127 |
# 检查是否变为IDLE
|
| 128 |
-
status = get_ws_status(page, logger)
|
| 129 |
if logger:
|
| 130 |
logger.info(f"断开后WS状态: {status}")
|
| 131 |
|
| 132 |
# 再连接
|
| 133 |
-
click_connect(page, logger)
|
| 134 |
time.sleep(2)
|
| 135 |
|
| 136 |
# 等待连接成功
|
| 137 |
-
if wait_for_ws_connected(page, logger, timeout=15):
|
| 138 |
-
status = get_ws_status(page, logger)
|
| 139 |
if logger:
|
| 140 |
logger.info(f"重连后WS状态: {status}")
|
| 141 |
return status
|
| 142 |
else:
|
| 143 |
-
status = get_ws_status(page, logger)
|
| 144 |
if logger:
|
| 145 |
logger.warning(f"WS重连超时,当前状态: {status}")
|
| 146 |
return status
|
| 147 |
|
| 148 |
|
| 149 |
-
def dismiss_interaction_modal(page: Page, logger=None) -> bool:
|
| 150 |
"""
|
| 151 |
检测并关闭 interaction-modal 遮罩层。
|
| 152 |
通过在 iframe 区域内模拟鼠标移动来触发遮罩层关闭。
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
返回: True 如果成功关闭遮罩,False 如果未找到遮罩或关闭失败
|
| 155 |
"""
|
| 156 |
try:
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
if modal.count() == 0 or not modal.first.is_visible(timeout=500):
|
| 159 |
return False
|
| 160 |
|
| 161 |
if logger:
|
| 162 |
logger.info("检测到 interaction-modal 遮罩层,尝试关闭...")
|
| 163 |
|
| 164 |
-
iframe = page.locator('iframe[title="Preview"]')
|
| 165 |
if iframe.count() > 0:
|
| 166 |
iframe_box = iframe.first.bounding_box()
|
| 167 |
if iframe_box:
|
|
@@ -193,15 +291,24 @@ def dismiss_interaction_modal(page: Page, logger=None) -> bool:
|
|
| 193 |
return False
|
| 194 |
|
| 195 |
|
| 196 |
-
def click_in_iframe(page: Page, logger=None) -> bool:
|
| 197 |
"""
|
| 198 |
在 iframe 内随机移动鼠标并点击一次,用于保活。
|
| 199 |
避开顶部(状态栏和按钮区域)和右侧区域。
|
| 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
返回: True 如果��功点击,False 如果失败
|
| 202 |
"""
|
| 203 |
try:
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
if iframe.count() == 0:
|
| 206 |
return False
|
| 207 |
|
|
|
|
| 3 |
from playwright.sync_api import Page, FrameLocator
|
| 4 |
|
| 5 |
|
| 6 |
+
class PageLocators:
|
| 7 |
+
"""
|
| 8 |
+
缓存常用的 Locator 对象,避免每次保活循环都创建新对象导致内存泄漏。
|
| 9 |
+
Playwright 的 Locator 是惰性的,可以安全复用。
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
def __init__(self, page: Page):
|
| 13 |
+
self.page = page
|
| 14 |
+
# 缓存常用的 locator
|
| 15 |
+
self._modal = None
|
| 16 |
+
self._iframe = None
|
| 17 |
+
self._frame = None
|
| 18 |
+
self._ws_status = None
|
| 19 |
+
self._disconnect_btn = None
|
| 20 |
+
self._connect_btn = None
|
| 21 |
+
|
| 22 |
+
@property
|
| 23 |
+
def modal(self):
|
| 24 |
+
"""interaction-modal 遮罩层"""
|
| 25 |
+
if self._modal is None:
|
| 26 |
+
self._modal = self.page.locator('div.interaction-modal')
|
| 27 |
+
return self._modal
|
| 28 |
+
|
| 29 |
+
@property
|
| 30 |
+
def iframe(self):
|
| 31 |
+
"""Preview iframe 元素"""
|
| 32 |
+
if self._iframe is None:
|
| 33 |
+
self._iframe = self.page.locator('iframe[title="Preview"]')
|
| 34 |
+
return self._iframe
|
| 35 |
+
|
| 36 |
+
@property
|
| 37 |
+
def frame(self):
|
| 38 |
+
"""Preview iframe 的 FrameLocator"""
|
| 39 |
+
if self._frame is None:
|
| 40 |
+
self._frame = self.page.frame_locator('iframe[title="Preview"]')
|
| 41 |
+
return self._frame
|
| 42 |
+
|
| 43 |
+
@property
|
| 44 |
+
def ws_status(self):
|
| 45 |
+
"""WS 状态文本元素"""
|
| 46 |
+
if self._ws_status is None:
|
| 47 |
+
self._ws_status = self.frame.locator('text=/WS:\\s*(CONNECTED|IDLE|CONNECTING|RECONNECTING)/i').first
|
| 48 |
+
return self._ws_status
|
| 49 |
+
|
| 50 |
+
@property
|
| 51 |
+
def disconnect_btn(self):
|
| 52 |
+
"""Disconnect 按钮"""
|
| 53 |
+
if self._disconnect_btn is None:
|
| 54 |
+
self._disconnect_btn = self.frame.locator('button:has-text("Disconnect")')
|
| 55 |
+
return self._disconnect_btn
|
| 56 |
+
|
| 57 |
+
@property
|
| 58 |
+
def connect_btn(self):
|
| 59 |
+
"""Connect 按钮"""
|
| 60 |
+
if self._connect_btn is None:
|
| 61 |
+
self._connect_btn = self.frame.locator('button:has-text("Connect")')
|
| 62 |
+
return self._connect_btn
|
| 63 |
+
|
| 64 |
+
|
| 65 |
def get_preview_frame(page: Page, logger=None) -> FrameLocator:
|
| 66 |
"""
|
| 67 |
获取预览iframe的FrameLocator。
|
|
|
|
| 76 |
return None
|
| 77 |
|
| 78 |
|
| 79 |
+
def get_ws_status(page: Page, logger=None, locators: PageLocators = None) -> str:
|
| 80 |
"""
|
| 81 |
获取页面中WS连接状态(在iframe内部)。
|
| 82 |
+
返回: CONNECTED, IDLE, CONNECTING, RECONNECTING 或 UNKNOWN
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
page: Playwright Page 对象
|
| 86 |
+
logger: 日志记录器
|
| 87 |
+
locators: 可选的 PageLocators 缓存对象,传入可避免重复创建 locator
|
| 88 |
"""
|
| 89 |
try:
|
| 90 |
+
if locators:
|
| 91 |
+
status_element = locators.ws_status
|
| 92 |
+
else:
|
| 93 |
+
frame = get_preview_frame(page, logger)
|
| 94 |
+
if not frame:
|
| 95 |
+
return "UNKNOWN"
|
| 96 |
+
status_element = frame.locator('text=/WS:\\s*(CONNECTED|IDLE|CONNECTING|RECONNECTING)/i').first
|
| 97 |
|
|
|
|
|
|
|
|
|
|
| 98 |
if status_element.is_visible(timeout=3000):
|
| 99 |
text = status_element.text_content()
|
| 100 |
if text:
|
| 101 |
+
if "RECONNECTING" in text.upper():
|
| 102 |
+
return "RECONNECTING"
|
| 103 |
+
elif "CONNECTED" in text.upper():
|
| 104 |
return "CONNECTED"
|
| 105 |
elif "IDLE" in text.upper():
|
| 106 |
return "IDLE"
|
|
|
|
| 113 |
return "UNKNOWN"
|
| 114 |
|
| 115 |
|
| 116 |
+
def click_disconnect(page: Page, logger=None, locators: PageLocators = None) -> bool:
|
| 117 |
"""
|
| 118 |
点击Disconnect按钮断开WS连接(在iframe内部)。
|
| 119 |
+
|
| 120 |
+
Args:
|
| 121 |
+
page: Playwright Page 对象
|
| 122 |
+
logger: 日志记录器
|
| 123 |
+
locators: 可选的 PageLocators 缓存对象
|
| 124 |
"""
|
| 125 |
try:
|
| 126 |
+
if locators:
|
| 127 |
+
disconnect_btn = locators.disconnect_btn
|
| 128 |
+
else:
|
| 129 |
+
frame = get_preview_frame(page, logger)
|
| 130 |
+
if not frame:
|
| 131 |
+
return False
|
| 132 |
+
disconnect_btn = frame.locator('button:has-text("Disconnect")')
|
| 133 |
|
|
|
|
| 134 |
if disconnect_btn.count() > 0 and disconnect_btn.first.is_visible(timeout=3000):
|
| 135 |
disconnect_btn.first.click(timeout=5000)
|
| 136 |
if logger:
|
|
|
|
| 146 |
return False
|
| 147 |
|
| 148 |
|
| 149 |
+
def click_connect(page: Page, logger=None, locators: PageLocators = None) -> bool:
|
| 150 |
"""
|
| 151 |
点击Connect按钮建立WS连接(在iframe内部)。
|
| 152 |
+
|
| 153 |
+
Args:
|
| 154 |
+
page: Playwright Page 对象
|
| 155 |
+
logger: 日志记录器
|
| 156 |
+
locators: 可选的 PageLocators 缓存对象
|
| 157 |
"""
|
| 158 |
try:
|
| 159 |
+
if locators:
|
| 160 |
+
connect_btn = locators.connect_btn
|
| 161 |
+
else:
|
| 162 |
+
frame = get_preview_frame(page, logger)
|
| 163 |
+
if not frame:
|
| 164 |
+
return False
|
| 165 |
+
connect_btn = frame.locator('button:has-text("Connect")')
|
| 166 |
|
|
|
|
| 167 |
if connect_btn.count() > 0 and connect_btn.first.is_visible(timeout=3000):
|
| 168 |
connect_btn.first.click(timeout=5000)
|
| 169 |
if logger:
|
|
|
|
| 179 |
return False
|
| 180 |
|
| 181 |
|
| 182 |
+
def wait_for_ws_connected(page: Page, logger=None, timeout: int = 30, locators: PageLocators = None) -> bool:
|
| 183 |
"""
|
| 184 |
等待WS状态变为CONNECTED。
|
| 185 |
"""
|
| 186 |
start_time = time.time()
|
| 187 |
while time.time() - start_time < timeout:
|
| 188 |
+
status = get_ws_status(page, logger, locators)
|
| 189 |
if status == "CONNECTED":
|
| 190 |
return True
|
| 191 |
time.sleep(1)
|
| 192 |
return False
|
| 193 |
|
| 194 |
|
| 195 |
+
def reconnect_ws(page: Page, logger=None, locators: PageLocators = None) -> str:
|
| 196 |
"""
|
| 197 |
执行断开再连接的流程,并返回最终WS状态。
|
| 198 |
流程:关闭遮罩 -> Disconnect -> 等待IDLE -> Connect -> 等待CONNECTED -> 获取状态
|
| 199 |
+
|
| 200 |
+
Args:
|
| 201 |
+
page: Playwright Page 对象
|
| 202 |
+
logger: 日志记录器
|
| 203 |
+
locators: 可选的 PageLocators 缓存对象
|
| 204 |
"""
|
| 205 |
if logger:
|
| 206 |
logger.info("开始执行WS重连流程: Disconnect -> Connect")
|
| 207 |
|
| 208 |
# 先关闭 interaction-modal 遮罩层(如果存在)
|
| 209 |
+
dismiss_interaction_modal(page, logger, locators)
|
| 210 |
|
| 211 |
# 先断开连接
|
| 212 |
+
click_disconnect(page, logger, locators)
|
| 213 |
time.sleep(2)
|
| 214 |
|
| 215 |
# 检查是否变为IDLE
|
| 216 |
+
status = get_ws_status(page, logger, locators)
|
| 217 |
if logger:
|
| 218 |
logger.info(f"断开后WS状态: {status}")
|
| 219 |
|
| 220 |
# 再连接
|
| 221 |
+
click_connect(page, logger, locators)
|
| 222 |
time.sleep(2)
|
| 223 |
|
| 224 |
# 等待连接成功
|
| 225 |
+
if wait_for_ws_connected(page, logger, timeout=15, locators=locators):
|
| 226 |
+
status = get_ws_status(page, logger, locators)
|
| 227 |
if logger:
|
| 228 |
logger.info(f"重连后WS状态: {status}")
|
| 229 |
return status
|
| 230 |
else:
|
| 231 |
+
status = get_ws_status(page, logger, locators)
|
| 232 |
if logger:
|
| 233 |
logger.warning(f"WS重连超时,当前状态: {status}")
|
| 234 |
return status
|
| 235 |
|
| 236 |
|
| 237 |
+
def dismiss_interaction_modal(page: Page, logger=None, locators: PageLocators = None) -> bool:
|
| 238 |
"""
|
| 239 |
检测并关闭 interaction-modal 遮罩层。
|
| 240 |
通过在 iframe 区域内模拟鼠标移动来触发遮罩层关闭。
|
| 241 |
|
| 242 |
+
Args:
|
| 243 |
+
page: Playwright Page 对象
|
| 244 |
+
logger: 日志记录器
|
| 245 |
+
locators: 可选的 PageLocators 缓存对象
|
| 246 |
+
|
| 247 |
返回: True 如果成功关闭遮罩,False 如果未找到遮罩或关闭失败
|
| 248 |
"""
|
| 249 |
try:
|
| 250 |
+
if locators:
|
| 251 |
+
modal = locators.modal
|
| 252 |
+
iframe = locators.iframe
|
| 253 |
+
else:
|
| 254 |
+
modal = page.locator('div.interaction-modal')
|
| 255 |
+
iframe = page.locator('iframe[title="Preview"]')
|
| 256 |
+
|
| 257 |
if modal.count() == 0 or not modal.first.is_visible(timeout=500):
|
| 258 |
return False
|
| 259 |
|
| 260 |
if logger:
|
| 261 |
logger.info("检测到 interaction-modal 遮罩层,尝试关闭...")
|
| 262 |
|
|
|
|
| 263 |
if iframe.count() > 0:
|
| 264 |
iframe_box = iframe.first.bounding_box()
|
| 265 |
if iframe_box:
|
|
|
|
| 291 |
return False
|
| 292 |
|
| 293 |
|
| 294 |
+
def click_in_iframe(page: Page, logger=None, locators: PageLocators = None) -> bool:
|
| 295 |
"""
|
| 296 |
在 iframe 内随机移动鼠标并点击一次,用于保活。
|
| 297 |
避开顶部(状态栏和按钮区域)和右侧区域。
|
| 298 |
|
| 299 |
+
Args:
|
| 300 |
+
page: Playwright Page 对象
|
| 301 |
+
logger: 日志记录器
|
| 302 |
+
locators: 可选的 PageLocators 缓存对象
|
| 303 |
+
|
| 304 |
返回: True 如果��功点击,False 如果失败
|
| 305 |
"""
|
| 306 |
try:
|
| 307 |
+
if locators:
|
| 308 |
+
iframe = locators.iframe
|
| 309 |
+
else:
|
| 310 |
+
iframe = page.locator('iframe[title="Preview"]')
|
| 311 |
+
|
| 312 |
if iframe.count() == 0:
|
| 313 |
return False
|
| 314 |
|
utils/common.py
CHANGED
|
@@ -5,6 +5,7 @@
|
|
| 5 |
|
| 6 |
import os
|
| 7 |
from pathlib import Path
|
|
|
|
| 8 |
|
| 9 |
def clean_env_value(value):
|
| 10 |
"""
|
|
@@ -49,4 +50,50 @@ def ensure_dir(path):
|
|
| 49 |
"""
|
| 50 |
if isinstance(path, str):
|
| 51 |
path = Path(path)
|
| 52 |
-
os.makedirs(path, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
import os
|
| 7 |
from pathlib import Path
|
| 8 |
+
from urllib.parse import urlsplit, unquote
|
| 9 |
|
| 10 |
def clean_env_value(value):
|
| 11 |
"""
|
|
|
|
| 50 |
"""
|
| 51 |
if isinstance(path, str):
|
| 52 |
path = Path(path)
|
| 53 |
+
os.makedirs(path, exist_ok=True)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def parse_proxy_config(proxy_value):
|
| 57 |
+
"""
|
| 58 |
+
解析代理配置字符串为 Playwright/Camoufox 需要的 dict 结构
|
| 59 |
+
|
| 60 |
+
支持格式:
|
| 61 |
+
- scheme://user:pass@host:port
|
| 62 |
+
- scheme://host:port
|
| 63 |
+
- host:port
|
| 64 |
+
- user:pass@host:port (默认 http)
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
dict or None: {"server": "...", "username": "...", "password": "..."} 或 None
|
| 68 |
+
"""
|
| 69 |
+
if not proxy_value:
|
| 70 |
+
return None
|
| 71 |
+
|
| 72 |
+
proxy_value = proxy_value.strip()
|
| 73 |
+
if not proxy_value:
|
| 74 |
+
return None
|
| 75 |
+
|
| 76 |
+
if "://" in proxy_value:
|
| 77 |
+
parsed = urlsplit(proxy_value)
|
| 78 |
+
if not parsed.hostname:
|
| 79 |
+
return {"server": proxy_value}
|
| 80 |
+
scheme = parsed.scheme
|
| 81 |
+
else:
|
| 82 |
+
parsed = urlsplit(f"//{proxy_value}")
|
| 83 |
+
if not parsed.hostname:
|
| 84 |
+
return {"server": proxy_value}
|
| 85 |
+
scheme = "http"
|
| 86 |
+
|
| 87 |
+
host = parsed.hostname
|
| 88 |
+
if host and ":" in host and not host.startswith("["):
|
| 89 |
+
host = f"[{host}]"
|
| 90 |
+
server = f"{scheme}://{host}"
|
| 91 |
+
if parsed.port:
|
| 92 |
+
server += f":{parsed.port}"
|
| 93 |
+
|
| 94 |
+
result = {"server": server}
|
| 95 |
+
if parsed.username:
|
| 96 |
+
result["username"] = unquote(parsed.username)
|
| 97 |
+
if parsed.password:
|
| 98 |
+
result["password"] = unquote(parsed.password)
|
| 99 |
+
return result
|
utils/logger.py
CHANGED
|
@@ -26,19 +26,26 @@ def setup_logging(log_file, prefix=None, level=logging.INFO):
|
|
| 26 |
配置日志记录器,使其输出到文件和控制台。
|
| 27 |
支持一个可选的前缀,用于标识日志来源。
|
| 28 |
|
| 29 |
-
每次调用
|
|
|
|
| 30 |
|
| 31 |
时间显示默认为 UTC+8 (北京时间),可通过环境变量 TZ_OFFSET 修改。
|
| 32 |
|
| 33 |
-
:param log_file: 日志文件的路径。
|
| 34 |
:param prefix: (可选) 要添加到每条日志消息开头的字符串前缀。
|
| 35 |
-
:param level: 日志级别。
|
| 36 |
"""
|
| 37 |
-
logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
logger.setLevel(level)
|
| 39 |
|
|
|
|
| 40 |
if logger.hasHandlers():
|
| 41 |
-
logger
|
| 42 |
|
| 43 |
base_format = '%(asctime)s - %(process)d - %(levelname)s - %(message)s'
|
| 44 |
|
|
|
|
| 26 |
配置日志记录器,使其输出到文件和控制台。
|
| 27 |
支持一个可选的前缀,用于标识日志来源。
|
| 28 |
|
| 29 |
+
每个 (进程ID, prefix) 组合对应一个独立的 logger,首次调用时初始化,
|
| 30 |
+
后续调用直接返回已有 logger,避免重复创建 handler 导致内存泄漏。
|
| 31 |
|
| 32 |
时间显示默认为 UTC+8 (北京时间),可通过环境变量 TZ_OFFSET 修改。
|
| 33 |
|
| 34 |
+
:param log_file: 日志文件的路径(仅首次调用生效)。
|
| 35 |
:param prefix: (可选) 要添加到每条日志消息开头的字符串前缀。
|
| 36 |
+
:param level: 日志级别(仅首次调用生效)。
|
| 37 |
"""
|
| 38 |
+
# 使用进程ID + 前缀作为 logger 名称,避免不同进程/实例的 logger 互相干扰
|
| 39 |
+
logger_name = f'camoufox.{os.getpid()}'
|
| 40 |
+
if prefix:
|
| 41 |
+
logger_name += f'.{prefix}'
|
| 42 |
+
|
| 43 |
+
logger = logging.getLogger(logger_name)
|
| 44 |
logger.setLevel(level)
|
| 45 |
|
| 46 |
+
# 如果该 logger 已有 handlers,说明已初始化过,直接返回
|
| 47 |
if logger.hasHandlers():
|
| 48 |
+
return logger
|
| 49 |
|
| 50 |
base_format = '%(asctime)s - %(process)d - %(levelname)s - %(message)s'
|
| 51 |
|