KoreanPartyCommunication / unified_crawler.py
hanjunlee's picture
Upload 23 files
3a36548 verified
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
ํ†ตํ•ฉ ์ •๋‹น ํฌ๋กค๋Ÿฌ
- ๋”๋ถˆ์–ด๋ฏผ์ฃผ๋‹น, ๊ตญ๋ฏผ์˜ํž˜, ์กฐ๊ตญํ˜์‹ ๋‹น, ๊ฐœํ˜์‹ ๋‹น, ๊ธฐ๋ณธ์†Œ๋“๋‹น, ์ง„๋ณด๋‹น ๋™์‹œ ํฌ๋กค๋ง
- ๊ฐ ์ •๋‹น๋ณ„ ๋…๋ฆฝ์ ์ธ ํ—ˆ๊น…ํŽ˜์ด์Šค ์—…๋กœ๋“œ
- ๋น„๋™๊ธฐ ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ
โ€ป CLI ์ธ์ž ์ง€์›์ด ํ•„์š”ํ•œ ๊ฒฝ์šฐ main.py ๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”.
"""
import asyncio
import logging
from datetime import datetime
from minjoo_crawler_async import MinjooAsyncCrawler
from ppp_crawler_async import PPPAsyncCrawler
from rebuilding_crawler_async import RebuildingAsyncCrawler
from reform_crawler_async import ReformAsyncCrawler
from basic_income_crawler_async import BasicIncomeAsyncCrawler
from jinbo_crawler_async import JinboAsyncCrawler
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
handlers=[
logging.FileHandler('unified_crawler.log', encoding='utf-8'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
CRAWLERS = {
'๋”๋ถˆ์–ด๋ฏผ์ฃผ๋‹น': MinjooAsyncCrawler,
'๊ตญ๋ฏผ์˜ํž˜': PPPAsyncCrawler,
'์กฐ๊ตญํ˜์‹ ๋‹น': RebuildingAsyncCrawler,
'๊ฐœํ˜์‹ ๋‹น': ReformAsyncCrawler,
'๊ธฐ๋ณธ์†Œ๋“๋‹น': BasicIncomeAsyncCrawler,
'์ง„๋ณด๋‹น': JinboAsyncCrawler,
}
async def crawl_all_parties():
"""6๊ฐœ ์ •๋‹น ๋™์‹œ ํฌ๋กค๋ง"""
logger.info("=" * 60)
logger.info("ํ†ตํ•ฉ ์ •๋‹น ํฌ๋กค๋Ÿฌ ์‹œ์ž‘")
logger.info(" + ".join(CRAWLERS.keys()))
logger.info("=" * 60)
start_time = datetime.now()
crawlers = [cls() for cls in CRAWLERS.values()]
party_names = list(CRAWLERS.keys())
results = await asyncio.gather(
*[crawler.run_incremental() for crawler in crawlers],
return_exceptions=True
)
for party, result in zip(party_names, results):
if isinstance(result, Exception):
logger.error(f"{party} ํฌ๋กค๋ง ์‹คํŒจ: {result}")
else:
logger.info(f"{party} ํฌ๋กค๋ง ์™„๋ฃŒ")
duration = (datetime.now() - start_time).total_seconds()
logger.info("=" * 60)
logger.info(f"์ „์ฒด ํฌ๋กค๋ง ์™„๋ฃŒ")
logger.info(f"์†Œ์š” ์‹œ๊ฐ„: {duration:.1f}์ดˆ ({duration / 60:.1f}๋ถ„)")
logger.info("=" * 60)
# ํ•˜์œ„ ํ˜ธํ™˜์„ฑ ์œ ์ง€
async def crawl_both_parties():
await crawl_all_parties()
async def main():
await crawl_all_parties()
if __name__ == "__main__":
asyncio.run(main())