|
|
|
|
| """
|
| ํตํฉ ์ ๋น ํฌ๋กค๋ฌ
|
| - ๋๋ถ์ด๋ฏผ์ฃผ๋น, ๊ตญ๋ฏผ์ํ, ์กฐ๊ตญํ์ ๋น, ๊ฐํ์ ๋น, ๊ธฐ๋ณธ์๋๋น, ์ง๋ณด๋น ๋์ ํฌ๋กค๋ง
|
| - ๊ฐ ์ ๋น๋ณ ๋
๋ฆฝ์ ์ธ ํ๊น
ํ์ด์ค ์
๋ก๋
|
| - ๋น๋๊ธฐ ๋ณ๋ ฌ ์ฒ๋ฆฌ
|
|
|
| โป CLI ์ธ์ ์ง์์ด ํ์ํ ๊ฒฝ์ฐ main.py ๋ฅผ ์ฌ์ฉํ์ธ์.
|
| """
|
|
|
| import asyncio
|
| import logging
|
| from datetime import datetime
|
|
|
| from minjoo_crawler_async import MinjooAsyncCrawler
|
| from ppp_crawler_async import PPPAsyncCrawler
|
| from rebuilding_crawler_async import RebuildingAsyncCrawler
|
| from reform_crawler_async import ReformAsyncCrawler
|
| from basic_income_crawler_async import BasicIncomeAsyncCrawler
|
| from jinbo_crawler_async import JinboAsyncCrawler
|
|
|
| logging.basicConfig(
|
| level=logging.INFO,
|
| format='%(asctime)s [%(levelname)s] %(message)s',
|
| handlers=[
|
| logging.FileHandler('unified_crawler.log', encoding='utf-8'),
|
| logging.StreamHandler()
|
| ]
|
| )
|
| logger = logging.getLogger(__name__)
|
|
|
| CRAWLERS = {
|
| '๋๋ถ์ด๋ฏผ์ฃผ๋น': MinjooAsyncCrawler,
|
| '๊ตญ๋ฏผ์ํ': PPPAsyncCrawler,
|
| '์กฐ๊ตญํ์ ๋น': RebuildingAsyncCrawler,
|
| '๊ฐํ์ ๋น': ReformAsyncCrawler,
|
| '๊ธฐ๋ณธ์๋๋น': BasicIncomeAsyncCrawler,
|
| '์ง๋ณด๋น': JinboAsyncCrawler,
|
| }
|
|
|
|
|
| async def crawl_all_parties():
|
| """6๊ฐ ์ ๋น ๋์ ํฌ๋กค๋ง"""
|
| logger.info("=" * 60)
|
| logger.info("ํตํฉ ์ ๋น ํฌ๋กค๋ฌ ์์")
|
| logger.info(" + ".join(CRAWLERS.keys()))
|
| logger.info("=" * 60)
|
|
|
| start_time = datetime.now()
|
|
|
| crawlers = [cls() for cls in CRAWLERS.values()]
|
| party_names = list(CRAWLERS.keys())
|
|
|
| results = await asyncio.gather(
|
| *[crawler.run_incremental() for crawler in crawlers],
|
| return_exceptions=True
|
| )
|
|
|
| for party, result in zip(party_names, results):
|
| if isinstance(result, Exception):
|
| logger.error(f"{party} ํฌ๋กค๋ง ์คํจ: {result}")
|
| else:
|
| logger.info(f"{party} ํฌ๋กค๋ง ์๋ฃ")
|
|
|
| duration = (datetime.now() - start_time).total_seconds()
|
| logger.info("=" * 60)
|
| logger.info(f"์ ์ฒด ํฌ๋กค๋ง ์๋ฃ")
|
| logger.info(f"์์ ์๊ฐ: {duration:.1f}์ด ({duration / 60:.1f}๋ถ)")
|
| logger.info("=" * 60)
|
|
|
|
|
|
|
| async def crawl_both_parties():
|
| await crawl_all_parties()
|
|
|
|
|
| async def main():
|
| await crawl_all_parties()
|
|
|
|
|
| if __name__ == "__main__":
|
| asyncio.run(main())
|
|
|