Spaces:
Running
Running
| import os | |
| import re | |
| import time | |
| import json | |
| import requests | |
| from datetime import datetime, timezone | |
| from bs4 import BeautifulSoup | |
| from io import BytesIO | |
| from PIL import Image | |
| from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Spacer, Paragraph | |
| from reportlab.lib.pagesizes import A4 | |
| from reportlab.lib.styles import getSampleStyleSheet | |
| # ===== Channel.io 設定 ===== | |
| GET_URL = "https://desk-api.channel.io/desk/channels/200605/groups/551316/messages" | |
| POST_URL = GET_URL | |
| PARAMS = { | |
| "sortOrder": "desc", | |
| "limit": 36, | |
| } | |
| X_ACCOUNT = os.getenv("dmsendertoken") | |
| if not X_ACCOUNT: | |
| raise RuntimeError("環境変数が設定されていません") | |
| HEADERS_GET = { | |
| "accept": "application/json", | |
| "x-account": X_ACCOUNT, | |
| } | |
| HEADERS_POST = { | |
| "accept": "application/json", | |
| "content-type": "application/json", | |
| "x-account": X_ACCOUNT, | |
| } | |
| # ===== Utils ===== | |
| def parse_updated_at(value): | |
| if isinstance(value, (int, float)): | |
| return datetime.fromtimestamp(value / 1000, tz=timezone.utc) | |
| return None | |
| def extract_url(text): | |
| urls = re.findall(r"https?://[^\s]+", text) | |
| return urls[0] if urls else None | |
| # ===== PDF作成 ===== | |
| def create_pdf_from_images(image_urls): | |
| buffer = BytesIO() | |
| doc = SimpleDocTemplate(buffer, pagesize=A4) | |
| elements = [] | |
| for url in image_urls: | |
| try: | |
| res = requests.get(url, timeout=30) | |
| img = Image.open(BytesIO(res.content)) | |
| img_buffer = BytesIO() | |
| img.convert("RGB").save(img_buffer, format="JPEG") | |
| img_buffer.seek(0) | |
| rl_img = RLImage(img_buffer, width=500, height=700) | |
| elements.append(rl_img) | |
| elements.append(Spacer(1, 20)) | |
| except Exception: | |
| continue | |
| doc.build(elements) | |
| buffer.seek(0) | |
| return buffer.read() | |
| def create_pdf_from_page(url): | |
| res = requests.get(url, timeout=30) | |
| soup = BeautifulSoup(res.text, "html.parser") | |
| text = soup.get_text() | |
| buffer = BytesIO() | |
| doc = SimpleDocTemplate(buffer, pagesize=A4) | |
| styles = getSampleStyleSheet() | |
| elements = [] | |
| for line in text.split("\n"): | |
| if line.strip(): | |
| elements.append(Paragraph(line, styles["Normal"])) | |
| elements.append(Spacer(1, 10)) | |
| doc.build(elements) | |
| buffer.seek(0) | |
| return buffer.read() | |
| # ===== mmnga専用 ===== | |
| def extract_mmnga_images(url): | |
| res = requests.get(url, timeout=30) | |
| soup = BeautifulSoup(res.text, "html.parser") | |
| post_div = soup.find("div", id="post") | |
| if not post_div: | |
| return [] | |
| imgs = post_div.find_all("img") | |
| return [img.get("src") for img in imgs if img.get("src")] | |
| # ===== Channel送信 ===== | |
| def send_text(text): | |
| payload = { | |
| "requestId": f"req-{int(time.time()*1000)}", | |
| "blocks": [{"type": "text", "value": text}], | |
| } | |
| requests.post(POST_URL, headers=HEADERS_POST, data=json.dumps(payload)) | |
| def upload_pdf(file_bytes): | |
| url = "https://media.channel.io/cht/v1/pri-file/200605/groups/551316/result.pdf" | |
| headers = { | |
| "x-account": X_ACCOUNT, | |
| "Content-Type": "application/pdf", | |
| "Content-Length": str(len(file_bytes)), | |
| } | |
| res = requests.post(url, headers=headers, data=file_bytes) | |
| return res.json() | |
| def send_file(file_json): | |
| payload = { | |
| "requestId": f"req-{int(time.time()*1000)}", | |
| "blocks": [{"type": "text", "value": "PDFです"}], | |
| "files": [file_json], | |
| } | |
| requests.post(POST_URL, headers=HEADERS_POST, data=json.dumps(payload)) | |
| # ===== Main ===== | |
| def main(): | |
| processed = set() | |
| while True: | |
| try: | |
| res = requests.get(GET_URL, headers=HEADERS_GET, params=PARAMS) | |
| messages = res.json().get("messages", []) | |
| latest = None | |
| latest_time = None | |
| for msg in messages: | |
| msg_id = msg.get("id") | |
| text = msg.get("plainText") | |
| updated = msg.get("updatedAt") | |
| if not text or msg_id in processed: | |
| continue | |
| t = parse_updated_at(updated) | |
| if not t: | |
| continue | |
| if latest_time is None or t > latest_time: | |
| latest = msg | |
| latest_time = t | |
| if not latest: | |
| time.sleep(10) | |
| continue | |
| url = extract_url(latest["plainText"]) | |
| if not url: | |
| time.sleep(10) | |
| continue | |
| send_text("PDF作成を開始します...") | |
| try: | |
| if "mmnga.com/magazine/" in url: | |
| send_text("漫画モードで処理中...") | |
| images = extract_mmnga_images(url) | |
| if not images: | |
| send_text("画像が見つかりませんでした") | |
| continue | |
| pdf_bytes = create_pdf_from_images(images) | |
| else: | |
| send_text("通常ページをPDF化します...") | |
| pdf_bytes = create_pdf_from_page(url) | |
| send_text("アップロード中...") | |
| file_json = upload_pdf(pdf_bytes) | |
| send_file(file_json) | |
| send_text("完了しました!") | |
| except Exception as e: | |
| send_text(f"エラー: {e}") | |
| processed.add(latest["id"]) | |
| except Exception as e: | |
| print("エラー:", e) | |
| time.sleep(15) | |
| if __name__ == "__main__": | |
| main() |