| import time |
| import datetime |
| from typing import Union, Generator, List, Dict, Any, Optional |
|
|
| from pyrogram import Client |
| from pyrogram.types import Message |
|
|
|
|
| class PyroSource: |
|
|
| def __init__( |
| self, |
| api_id: Union[int, str], |
| api_hash: str, |
| app_name: str = "default_app", |
| ): |
| self.client = Client(name=app_name, api_id=api_id, api_hash=api_hash) |
|
|
| |
| def load_messages( |
| self, |
| channel_id: Union[int, str], |
| limit: int, |
| offset: int = 0, |
| offset_id: int = 0, |
| time_sleep: float = 0.05, |
| ) -> List[Dict[str, Any]]: |
| """ |
| channel_id: channel id or username |
| limit: number of messages to load |
| offset: offset index |
| offset_id: message id offset |
| """ |
| posts = [] |
|
|
| with self.client as app: |
| messages: Generator[Message] = app.get_chat_history( |
| chat_id=channel_id, |
| limit=limit, |
| offset=offset, |
| offset_id=offset_id, |
| ) |
|
|
| for msg in messages: |
| time.sleep(time_sleep) |
|
|
| content = msg.text or msg.caption or '' |
| original_author = ( |
| msg.forward_from_chat.username if msg.forward_from_chat else '' |
| ) |
| message_dt = msg.date.strftime("%Y-%m-%d") |
|
|
| meta = { |
| "message_dt" : message_dt, |
| "message_id" : msg.id, |
| "channel_id" : channel_id, |
| "content" : content, |
| "views" : msg.views, |
| "original_author" : original_author, |
| } |
|
|
| posts.append(meta) |
| |
| return posts |
|
|
|
|
| def load_days( |
| self, |
| channel_id: Union[int, str], |
| from_date: datetime.date, |
| to_date: Optional[datetime.date] = None, |
| limit: int = 1000, |
| time_sleep: float = 0.05, |
| ) -> List[Dict[str, Any]]: |
| """ |
| Загружает сообщения в диапазоне дат [from_date, to_date] |
| |
| channel_id: channel id or username |
| from_date: дата начала (включительно) |
| to_date: дата конца (включительно) |
| limit: safety limit |
| """ |
| posts = [] |
|
|
| offset_date = datetime.datetime.combine( |
| from_date + datetime.timedelta(days=1), |
| datetime.time.min |
| ) |
|
|
| with self.client as app: |
| messages: Generator[Message] = app.get_chat_history( |
| chat_id=channel_id, |
| limit=limit, |
| offset_date=offset_date, |
| ) |
|
|
| for msg in messages: |
| time.sleep(time_sleep) |
|
|
| msg_date = msg.date.date() |
|
|
| |
| if msg_date < from_date: |
| break |
|
|
| |
| if to_date and msg_date > to_date: |
| continue |
|
|
| content = msg.text or msg.caption or '' |
| original_author = ( |
| msg.forward_from_chat.username if msg.forward_from_chat else '' |
| ) |
|
|
| meta = { |
| "message_dt": msg_date.isoformat(), |
| "message_id": msg.id, |
| "channel_id": channel_id, |
| "content": content, |
| "views": msg.views, |
| "original_author": original_author, |
| } |
|
|
| posts.append(meta) |
|
|
| return posts |
|
|