Spaces:

izuemon
/

any-env-code

Running

App Files Files Community

izuemon commited on Mar 17

Commit

e9d3b0a

verified ·

1 Parent(s): 4b2c40c

Update mmng.py

Browse files

Files changed (1) hide show

mmng.py +88 -164

mmng.py CHANGED Viewed

@@ -3,147 +3,116 @@ import re
 import time
 import json
 import requests
-import img2pdf
-import pdfkit
-from bs4 import BeautifulSoup
 from datetime import datetime, timezone
 from io import BytesIO
 # ===== Channel.io 設定 =====
-GROUP_ID = "551316"
-GET_URL = f"https://desk-api.channel.io/desk/channels/200605/groups/{GROUP_ID}/messages"
 POST_URL = GET_URL
 PARAMS = {
     "sortOrder": "desc",
     "limit": 36,
-    "logFolded": "false",
 }
 X_ACCOUNT = os.getenv("dmsendertoken")
 if not X_ACCOUNT:
-    raise RuntimeError("環境変数 dmsendertoken が設定されていません")
 HEADERS_GET = {
     "accept": "application/json",
-    "accept-language": "ja",
     "x-account": X_ACCOUNT,
 }
 HEADERS_POST = {
     "accept": "application/json",
-    "accept-language": "ja",
     "content-type": "application/json",
     "x-account": X_ACCOUNT,
 }
 # ===== Utils =====
 def parse_updated_at(value):
     if isinstance(value, (int, float)):
         return datetime.fromtimestamp(value / 1000, tz=timezone.utc)
-    elif isinstance(value, str):
-        return datetime.fromisoformat(value.replace("Z", "+00:00"))
     return None
 def extract_url(text):
-    url_pattern = r"https?://[^\s]+"
-    m = re.search(url_pattern, text)
-    if m:
-        return m.group(0)
-    return None
-def is_mmnga_magazine(url):
-    return re.search(r"momon-ga\.com/magazine/", url)
 # ===== PDF作成 =====
 def create_pdf_from_images(image_urls):
-    image_bytes = []
     for url in image_urls:
-        r = requests.get(url, timeout=60)
-        r.raise_for_status()
-        image_bytes.append(r.content)
-    pdf_bytes = img2pdf.convert(image_bytes)
-    return pdf_bytes
-def create_fullpage_pdf(url):
-    pdf = pdfkit.from_url(url, False)
-    return pdf
-# ===== mmnga処理 =====
-def get_mmnga_images(url):
-    r = requests.get(url, timeout=60)
-    r.raise_for_status()
-    soup = BeautifulSoup(r.text, "html.parser")
-    post = soup.find("div", id="post")
-    if not post:
-        raise Exception("post divが見つかりません")
-    imgs = post.find_all("img")
-    image_urls = []
-    for img in imgs:
-        src = img.get("src")
-        if not src:
-            continue
-        if src.startswith("//"):
-            src = "https:" + src
-        if src.startswith("/"):
-            src = "https://momon-ga.com" + src
-        image_urls.append(src)
-    if not image_urls:
-        raise Exception("画像が見つかりません")
-    return image_urls
 # ===== Channel送信 =====
-def send_to_channel(text):
     payload = {
-        "requestId": f"desk-web-{int(time.time()*1000)}",
-        "blocks": [
-            {"type": "text", "value": text}
-        ],
     }
-    r = requests.post(
-        POST_URL,
-        headers=HEADERS_POST,
-        data=json.dumps(payload),
-        timeout=30
-    )
-    r.raise_for_status()
-def upload_file_to_channel(file_bytes):
-    upload_url = f"https://media.channel.io/cht/v1/pri-file/200605/groups/{GROUP_ID}/message/send_pdf_file.pdf"
     headers = {
         "x-account": X_ACCOUNT,
@@ -151,132 +120,87 @@ def upload_file_to_channel(file_bytes):
         "Content-Length": str(len(file_bytes)),
     }
-    r = requests.post(
-        upload_url,
-        headers=headers,
-        data=file_bytes,
-        timeout=300
-    )
-    r.raise_for_status()
-    return r.json()
-def send_pdf_message(file_json):
     payload = {
-        "requestId": f"desk-web-{int(time.time()*1000)}",
-        "blocks": [
-            {"type": "text", "value": "PDFプレビュー"}
-        ],
         "files": [file_json],
     }
-    r = requests.post(
-        POST_URL,
-        headers=HEADERS_POST,
-        data=json.dumps(payload),
-        timeout=30
-    )
-    r.raise_for_status()
 # ===== Main =====
 def main():
-    processed_messages = set()
     while True:
         try:
-            res = requests.get(
-                GET_URL,
-                headers=HEADERS_GET,
-                params=PARAMS,
-                timeout=30
-            )
-            res.raise_for_status()
             messages = res.json().get("messages", [])
-            latest_msg = None
             latest_time = None
             for msg in messages:
                 msg_id = msg.get("id")
                 text = msg.get("plainText")
-                updated_at = msg.get("updatedAt")
-                if not text:
-                    continue
-                if msg_id in processed_messages:
                     continue
-                t = parse_updated_at(updated_at)
                 if not t:
                     continue
                 if latest_time is None or t > latest_time:
                     latest_time = t
-                    latest_msg = msg
-            if not latest_msg:
                 time.sleep(10)
                 continue
-            url = extract_url(latest_msg["plainText"])
             if not url:
-                processed_messages.add(latest_msg["id"])
                 time.sleep(10)
                 continue
-            send_to_channel("PDF生成を開始します")
             try:
-                if is_mmnga_magazine(url):
-                    send_to_channel("mmnga漫画ページを検出しました")
-                    image_urls = get_mmnga_images(url)
-                    pdf_bytes = create_pdf_from_images(image_urls)
                 else:
-                    send_to_channel("通常ページをPDF化します")
-                    pdf_bytes = create_fullpage_pdf(url)
-                send_to_channel("PDFをアップロードしています")
-                file_json = upload_file_to_channel(pdf_bytes)
-                send_pdf_message(file_json)
-                send_to_channel("完了しました")
             except Exception as e:
-                send_to_channel(f"エラー: {e}")
-            processed_messages.add(latest_msg["id"])
         except Exception as e:
-            print("error:", e)
         time.sleep(15)
 if __name__ == "__main__":
     main()

 import time
 import json
 import requests
 from datetime import datetime, timezone
+from bs4 import BeautifulSoup
 from io import BytesIO
+from PIL import Image
+from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Spacer, Paragraph
+from reportlab.lib.pagesizes import A4
+from reportlab.lib.styles import getSampleStyleSheet
 # ===== Channel.io 設定 =====
+GET_URL = "https://desk-api.channel.io/desk/channels/200605/groups/551316/messages"
 POST_URL = GET_URL
 PARAMS = {
     "sortOrder": "desc",
     "limit": 36,
 }
 X_ACCOUNT = os.getenv("dmsendertoken")
 if not X_ACCOUNT:
+    raise RuntimeError("環境変数が設定されていません")
 HEADERS_GET = {
     "accept": "application/json",
     "x-account": X_ACCOUNT,
 }
 HEADERS_POST = {
     "accept": "application/json",
     "content-type": "application/json",
     "x-account": X_ACCOUNT,
 }
 # ===== Utils =====
 def parse_updated_at(value):
     if isinstance(value, (int, float)):
         return datetime.fromtimestamp(value / 1000, tz=timezone.utc)
     return None
 def extract_url(text):
+    urls = re.findall(r"https?://[^\s]+", text)
+    return urls[0] if urls else None
 # ===== PDF作成 =====
 def create_pdf_from_images(image_urls):
+    buffer = BytesIO()
+    doc = SimpleDocTemplate(buffer, pagesize=A4)
+    elements = []
     for url in image_urls:
+        try:
+            res = requests.get(url, timeout=30)
+            img = Image.open(BytesIO(res.content))
+            img_buffer = BytesIO()
+            img.convert("RGB").save(img_buffer, format="JPEG")
+            img_buffer.seek(0)
+            rl_img = RLImage(img_buffer, width=500, height=700)
+            elements.append(rl_img)
+            elements.append(Spacer(1, 20))
+        except Exception:
+            continue
+    doc.build(elements)
+    buffer.seek(0)
+    return buffer.read()
+def create_pdf_from_page(url):
+    res = requests.get(url, timeout=30)
+    soup = BeautifulSoup(res.text, "html.parser")
+    text = soup.get_text()
+    buffer = BytesIO()
+    doc = SimpleDocTemplate(buffer, pagesize=A4)
+    styles = getSampleStyleSheet()
+    elements = []
+    for line in text.split("\n"):
+        if line.strip():
+            elements.append(Paragraph(line, styles["Normal"]))
+            elements.append(Spacer(1, 10))
+    doc.build(elements)
+    buffer.seek(0)
+    return buffer.read()
+# ===== mmnga専用 =====
+def extract_mmnga_images(url):
+    res = requests.get(url, timeout=30)
+    soup = BeautifulSoup(res.text, "html.parser")
+    post_div = soup.find("div", id="post")
+    if not post_div:
+        return []
+    imgs = post_div.find_all("img")
+    return [img.get("src") for img in imgs if img.get("src")]
 # ===== Channel送信 =====
+def send_text(text):
     payload = {
+        "requestId": f"req-{int(time.time()*1000)}",
+        "blocks": [{"type": "text", "value": text}],
     }
+    requests.post(POST_URL, headers=HEADERS_POST, data=json.dumps(payload))
+def upload_pdf(file_bytes):
+    url = "https://media.channel.io/cht/v1/pri-file/200605/groups/551316/result.pdf"
     headers = {
         "x-account": X_ACCOUNT,
         "Content-Length": str(len(file_bytes)),
     }
+    res = requests.post(url, headers=headers, data=file_bytes)
+    return res.json()
+def send_file(file_json):
     payload = {
+        "requestId": f"req-{int(time.time()*1000)}",
+        "blocks": [{"type": "text", "value": "PDFです"}],
         "files": [file_json],
     }
+    requests.post(POST_URL, headers=HEADERS_POST, data=json.dumps(payload))
 # ===== Main =====
 def main():
+    processed = set()
     while True:
         try:
+            res = requests.get(GET_URL, headers=HEADERS_GET, params=PARAMS)
             messages = res.json().get("messages", [])
+            latest = None
             latest_time = None
             for msg in messages:
                 msg_id = msg.get("id")
                 text = msg.get("plainText")
+                updated = msg.get("updatedAt")
+                if not text or msg_id in processed:
                     continue
+                t = parse_updated_at(updated)
                 if not t:
                     continue
                 if latest_time is None or t > latest_time:
+                    latest = msg
                     latest_time = t
+            if not latest:
                 time.sleep(10)
                 continue
+            url = extract_url(latest["plainText"])
             if not url:
                 time.sleep(10)
                 continue
+            send_text("PDF作成を開始します...")
             try:
+                if "mmnga.com/magazine/" in url:
+                    send_text("漫画モードで処理中...")
+                    images = extract_mmnga_images(url)
+                    if not images:
+                        send_text("画像が見つかりませんでした")
+                        continue
+                    pdf_bytes = create_pdf_from_images(images)
                 else:
+                    send_text("通常ページをPDF化します...")
+                    pdf_bytes = create_pdf_from_page(url)
+                send_text("アップロード中...")
+                file_json = upload_pdf(pdf_bytes)
+                send_file(file_json)
+                send_text("完了しました！")
             except Exception as e:
+                send_text(f"エラー: {e}")
+            processed.add(latest["id"])
         except Exception as e:
+            print("エラー:", e)
         time.sleep(15)
 if __name__ == "__main__":
     main()