izuemon commited on
Commit
e9d3b0a
·
verified ·
1 Parent(s): 4b2c40c

Update mmng.py

Browse files
Files changed (1) hide show
  1. mmng.py +88 -164
mmng.py CHANGED
@@ -3,147 +3,116 @@ import re
3
  import time
4
  import json
5
  import requests
6
- import img2pdf
7
- import pdfkit
8
- from bs4 import BeautifulSoup
9
  from datetime import datetime, timezone
 
10
  from io import BytesIO
 
 
 
 
11
 
12
  # ===== Channel.io 設定 =====
13
- GROUP_ID = "551316"
14
-
15
- GET_URL = f"https://desk-api.channel.io/desk/channels/200605/groups/{GROUP_ID}/messages"
16
  POST_URL = GET_URL
17
 
18
  PARAMS = {
19
  "sortOrder": "desc",
20
  "limit": 36,
21
- "logFolded": "false",
22
  }
23
 
24
  X_ACCOUNT = os.getenv("dmsendertoken")
25
  if not X_ACCOUNT:
26
- raise RuntimeError("環境変数 dmsendertoken が設定されていません")
27
 
28
  HEADERS_GET = {
29
  "accept": "application/json",
30
- "accept-language": "ja",
31
  "x-account": X_ACCOUNT,
32
  }
33
 
34
  HEADERS_POST = {
35
  "accept": "application/json",
36
- "accept-language": "ja",
37
  "content-type": "application/json",
38
  "x-account": X_ACCOUNT,
39
  }
40
 
41
  # ===== Utils =====
42
-
43
  def parse_updated_at(value):
44
  if isinstance(value, (int, float)):
45
  return datetime.fromtimestamp(value / 1000, tz=timezone.utc)
46
- elif isinstance(value, str):
47
- return datetime.fromisoformat(value.replace("Z", "+00:00"))
48
  return None
49
 
50
-
51
  def extract_url(text):
52
- url_pattern = r"https?://[^\s]+"
53
- m = re.search(url_pattern, text)
54
- if m:
55
- return m.group(0)
56
- return None
57
-
58
-
59
- def is_mmnga_magazine(url):
60
- return re.search(r"momon-ga\.com/magazine/", url)
61
-
62
 
63
  # ===== PDF作成 =====
64
-
65
  def create_pdf_from_images(image_urls):
66
-
67
- image_bytes = []
 
68
 
69
  for url in image_urls:
70
- r = requests.get(url, timeout=60)
71
- r.raise_for_status()
72
- image_bytes.append(r.content)
73
-
74
- pdf_bytes = img2pdf.convert(image_bytes)
75
-
76
- return pdf_bytes
77
-
78
-
79
- def create_fullpage_pdf(url):
80
-
81
- pdf = pdfkit.from_url(url, False)
82
-
83
- return pdf
84
-
85
-
86
- # ===== mmnga処理 =====
87
-
88
- def get_mmnga_images(url):
89
-
90
- r = requests.get(url, timeout=60)
91
- r.raise_for_status()
92
 
93
- soup = BeautifulSoup(r.text, "html.parser")
 
 
94
 
95
- post = soup.find("div", id="post")
 
 
96
 
97
- if not post:
98
- raise Exception("post divが見つかりません")
99
 
100
- imgs = post.find_all("img")
 
 
101
 
102
- image_urls = []
 
 
103
 
104
- for img in imgs:
105
- src = img.get("src")
106
- if not src:
107
- continue
108
 
109
- if src.startswith("//"):
110
- src = "https:" + src
 
111
 
112
- if src.startswith("/"):
113
- src = "https://momon-ga.com" + src
 
 
 
114
 
115
- image_urls.append(src)
 
 
116
 
117
- if not image_urls:
118
- raise Exception("画像が見つかりません")
 
 
119
 
120
- return image_urls
 
 
121
 
 
 
122
 
123
  # ===== Channel送信 =====
124
-
125
- def send_to_channel(text):
126
-
127
  payload = {
128
- "requestId": f"desk-web-{int(time.time()*1000)}",
129
- "blocks": [
130
- {"type": "text", "value": text}
131
- ],
132
  }
 
133
 
134
- r = requests.post(
135
- POST_URL,
136
- headers=HEADERS_POST,
137
- data=json.dumps(payload),
138
- timeout=30
139
- )
140
-
141
- r.raise_for_status()
142
-
143
-
144
- def upload_file_to_channel(file_bytes):
145
-
146
- upload_url = f"https://media.channel.io/cht/v1/pri-file/200605/groups/{GROUP_ID}/message/send_pdf_file.pdf"
147
 
148
  headers = {
149
  "x-account": X_ACCOUNT,
@@ -151,132 +120,87 @@ def upload_file_to_channel(file_bytes):
151
  "Content-Length": str(len(file_bytes)),
152
  }
153
 
154
- r = requests.post(
155
- upload_url,
156
- headers=headers,
157
- data=file_bytes,
158
- timeout=300
159
- )
160
-
161
- r.raise_for_status()
162
-
163
- return r.json()
164
-
165
-
166
- def send_pdf_message(file_json):
167
 
 
168
  payload = {
169
- "requestId": f"desk-web-{int(time.time()*1000)}",
170
- "blocks": [
171
- {"type": "text", "value": "PDFプレビュー"}
172
- ],
173
  "files": [file_json],
174
  }
175
-
176
- r = requests.post(
177
- POST_URL,
178
- headers=HEADERS_POST,
179
- data=json.dumps(payload),
180
- timeout=30
181
- )
182
-
183
- r.raise_for_status()
184
-
185
 
186
  # ===== Main =====
187
-
188
  def main():
189
-
190
- processed_messages = set()
191
 
192
  while True:
193
-
194
  try:
195
-
196
- res = requests.get(
197
- GET_URL,
198
- headers=HEADERS_GET,
199
- params=PARAMS,
200
- timeout=30
201
- )
202
-
203
- res.raise_for_status()
204
-
205
  messages = res.json().get("messages", [])
206
 
207
- latest_msg = None
208
  latest_time = None
209
 
210
  for msg in messages:
211
-
212
  msg_id = msg.get("id")
213
  text = msg.get("plainText")
214
- updated_at = msg.get("updatedAt")
215
-
216
- if not text:
217
- continue
218
 
219
- if msg_id in processed_messages:
220
  continue
221
 
222
- t = parse_updated_at(updated_at)
223
-
224
  if not t:
225
  continue
226
 
227
  if latest_time is None or t > latest_time:
 
228
  latest_time = t
229
- latest_msg = msg
230
 
231
- if not latest_msg:
232
  time.sleep(10)
233
  continue
234
 
235
- url = extract_url(latest_msg["plainText"])
236
-
237
  if not url:
238
- processed_messages.add(latest_msg["id"])
239
  time.sleep(10)
240
  continue
241
 
242
- send_to_channel("PDF成を開始します")
243
 
244
  try:
 
 
245
 
246
- if is_mmnga_magazine(url):
247
-
248
- send_to_channel("mmnga漫ページを検出しました")
 
249
 
250
- image_urls = get_mmnga_images(url)
251
-
252
- pdf_bytes = create_pdf_from_images(image_urls)
253
 
254
  else:
 
 
255
 
256
- send_to_channel("通常ペジをPDF化します")
257
-
258
- pdf_bytes = create_fullpage_pdf(url)
259
-
260
- send_to_channel("PDFをアップロードしています")
261
-
262
- file_json = upload_file_to_channel(pdf_bytes)
263
 
264
- send_pdf_message(file_json)
 
265
 
266
- send_to_channel("完了しました")
267
 
268
  except Exception as e:
 
269
 
270
- send_to_channel(f"エラー: {e}")
271
-
272
- processed_messages.add(latest_msg["id"])
273
 
274
  except Exception as e:
275
-
276
- print("error:", e)
277
 
278
  time.sleep(15)
279
 
280
-
281
  if __name__ == "__main__":
282
  main()
 
3
  import time
4
  import json
5
  import requests
 
 
 
6
  from datetime import datetime, timezone
7
+ from bs4 import BeautifulSoup
8
  from io import BytesIO
9
+ from PIL import Image
10
+ from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Spacer, Paragraph
11
+ from reportlab.lib.pagesizes import A4
12
+ from reportlab.lib.styles import getSampleStyleSheet
13
 
14
  # ===== Channel.io 設定 =====
15
+ GET_URL = "https://desk-api.channel.io/desk/channels/200605/groups/551316/messages"
 
 
16
  POST_URL = GET_URL
17
 
18
  PARAMS = {
19
  "sortOrder": "desc",
20
  "limit": 36,
 
21
  }
22
 
23
  X_ACCOUNT = os.getenv("dmsendertoken")
24
  if not X_ACCOUNT:
25
+ raise RuntimeError("環境変数が設定されていません")
26
 
27
  HEADERS_GET = {
28
  "accept": "application/json",
 
29
  "x-account": X_ACCOUNT,
30
  }
31
 
32
  HEADERS_POST = {
33
  "accept": "application/json",
 
34
  "content-type": "application/json",
35
  "x-account": X_ACCOUNT,
36
  }
37
 
38
  # ===== Utils =====
 
39
  def parse_updated_at(value):
40
  if isinstance(value, (int, float)):
41
  return datetime.fromtimestamp(value / 1000, tz=timezone.utc)
 
 
42
  return None
43
 
 
44
  def extract_url(text):
45
+ urls = re.findall(r"https?://[^\s]+", text)
46
+ return urls[0] if urls else None
 
 
 
 
 
 
 
 
47
 
48
  # ===== PDF作成 =====
 
49
  def create_pdf_from_images(image_urls):
50
+ buffer = BytesIO()
51
+ doc = SimpleDocTemplate(buffer, pagesize=A4)
52
+ elements = []
53
 
54
  for url in image_urls:
55
+ try:
56
+ res = requests.get(url, timeout=30)
57
+ img = Image.open(BytesIO(res.content))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ img_buffer = BytesIO()
60
+ img.convert("RGB").save(img_buffer, format="JPEG")
61
+ img_buffer.seek(0)
62
 
63
+ rl_img = RLImage(img_buffer, width=500, height=700)
64
+ elements.append(rl_img)
65
+ elements.append(Spacer(1, 20))
66
 
67
+ except Exception:
68
+ continue
69
 
70
+ doc.build(elements)
71
+ buffer.seek(0)
72
+ return buffer.read()
73
 
74
+ def create_pdf_from_page(url):
75
+ res = requests.get(url, timeout=30)
76
+ soup = BeautifulSoup(res.text, "html.parser")
77
 
78
+ text = soup.get_text()
 
 
 
79
 
80
+ buffer = BytesIO()
81
+ doc = SimpleDocTemplate(buffer, pagesize=A4)
82
+ styles = getSampleStyleSheet()
83
 
84
+ elements = []
85
+ for line in text.split("\n"):
86
+ if line.strip():
87
+ elements.append(Paragraph(line, styles["Normal"]))
88
+ elements.append(Spacer(1, 10))
89
 
90
+ doc.build(elements)
91
+ buffer.seek(0)
92
+ return buffer.read()
93
 
94
+ # ===== mmnga専用 =====
95
+ def extract_mmnga_images(url):
96
+ res = requests.get(url, timeout=30)
97
+ soup = BeautifulSoup(res.text, "html.parser")
98
 
99
+ post_div = soup.find("div", id="post")
100
+ if not post_div:
101
+ return []
102
 
103
+ imgs = post_div.find_all("img")
104
+ return [img.get("src") for img in imgs if img.get("src")]
105
 
106
  # ===== Channel送信 =====
107
+ def send_text(text):
 
 
108
  payload = {
109
+ "requestId": f"req-{int(time.time()*1000)}",
110
+ "blocks": [{"type": "text", "value": text}],
 
 
111
  }
112
+ requests.post(POST_URL, headers=HEADERS_POST, data=json.dumps(payload))
113
 
114
+ def upload_pdf(file_bytes):
115
+ url = "https://media.channel.io/cht/v1/pri-file/200605/groups/551316/result.pdf"
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  headers = {
118
  "x-account": X_ACCOUNT,
 
120
  "Content-Length": str(len(file_bytes)),
121
  }
122
 
123
+ res = requests.post(url, headers=headers, data=file_bytes)
124
+ return res.json()
 
 
 
 
 
 
 
 
 
 
 
125
 
126
+ def send_file(file_json):
127
  payload = {
128
+ "requestId": f"req-{int(time.time()*1000)}",
129
+ "blocks": [{"type": "text", "value": "PDFです"}],
 
 
130
  "files": [file_json],
131
  }
132
+ requests.post(POST_URL, headers=HEADERS_POST, data=json.dumps(payload))
 
 
 
 
 
 
 
 
 
133
 
134
  # ===== Main =====
 
135
  def main():
136
+ processed = set()
 
137
 
138
  while True:
 
139
  try:
140
+ res = requests.get(GET_URL, headers=HEADERS_GET, params=PARAMS)
 
 
 
 
 
 
 
 
 
141
  messages = res.json().get("messages", [])
142
 
143
+ latest = None
144
  latest_time = None
145
 
146
  for msg in messages:
 
147
  msg_id = msg.get("id")
148
  text = msg.get("plainText")
149
+ updated = msg.get("updatedAt")
 
 
 
150
 
151
+ if not text or msg_id in processed:
152
  continue
153
 
154
+ t = parse_updated_at(updated)
 
155
  if not t:
156
  continue
157
 
158
  if latest_time is None or t > latest_time:
159
+ latest = msg
160
  latest_time = t
 
161
 
162
+ if not latest:
163
  time.sleep(10)
164
  continue
165
 
166
+ url = extract_url(latest["plainText"])
 
167
  if not url:
 
168
  time.sleep(10)
169
  continue
170
 
171
+ send_text("PDF成を開始します...")
172
 
173
  try:
174
+ if "mmnga.com/magazine/" in url:
175
+ send_text("漫画モードで処理中...")
176
 
177
+ images = extract_mmnga_images(url)
178
+ if not images:
179
+ send_text("画像が見つかりせんでした")
180
+ continue
181
 
182
+ pdf_bytes = create_pdf_from_images(images)
 
 
183
 
184
  else:
185
+ send_text("通常ページをPDF化します...")
186
+ pdf_bytes = create_pdf_from_page(url)
187
 
188
+ send_text("アップロド中...")
 
 
 
 
 
 
189
 
190
+ file_json = upload_pdf(pdf_bytes)
191
+ send_file(file_json)
192
 
193
+ send_text("完了しました")
194
 
195
  except Exception as e:
196
+ send_text(f"エラー: {e}")
197
 
198
+ processed.add(latest["id"])
 
 
199
 
200
  except Exception as e:
201
+ print("エラー:", e)
 
202
 
203
  time.sleep(15)
204
 
 
205
  if __name__ == "__main__":
206
  main()