|
|
|
|
| """
|
| ์ ๋น ๋ณด๋์๋ฃ ํฌ๋กค๋ฌ - ๋ฉ์ธ ์ง์
์
|
| ์ง์ ์ ๋น: ๋๋ถ์ด๋ฏผ์ฃผ๋น, ๊ตญ๋ฏผ์ํ, ์กฐ๊ตญํ์ ๋น, ๊ฐํ์ ๋น, ๊ธฐ๋ณธ์๋๋น, ์ง๋ณด๋น
|
|
|
| ์ฌ์ฉ๋ฒ:
|
| python main.py # ์ ์ฒด ์ ๋น ์ฆ๋ถ ์
๋ฐ์ดํธ
|
| python main.py --party minjoo # ๋๋ถ์ด๋ฏผ์ฃผ๋น๋ง
|
| python main.py --party ppp # ๊ตญ๋ฏผ์ํ๋ง
|
| python main.py --party rebuilding # ์กฐ๊ตญํ์ ๋น๋ง
|
| python main.py --party reform # ๊ฐํ์ ๋น๋ง
|
| python main.py --party basic_income # ๊ธฐ๋ณธ์๋๋น๋ง
|
| python main.py --party jinbo # ์ง๋ณด๋น๋ง
|
| python main.py --start-date 2024-01-01 # ๋ ์ง ๋ฒ์ ์ง์
|
| python main.py --party ppp --start-date 2024-01-01 --end-date 2024-06-30
|
| """
|
|
|
| import asyncio
|
| import argparse
|
| import logging
|
| from datetime import datetime
|
|
|
| from minjoo_crawler_async import MinjooAsyncCrawler
|
| from ppp_crawler_async import PPPAsyncCrawler
|
| from rebuilding_crawler_async import RebuildingAsyncCrawler
|
| from reform_crawler_async import ReformAsyncCrawler
|
| from basic_income_crawler_async import BasicIncomeAsyncCrawler
|
| from jinbo_crawler_async import JinboAsyncCrawler
|
|
|
| logging.basicConfig(
|
| level=logging.INFO,
|
| format='%(asctime)s [%(levelname)s] %(message)s',
|
| handlers=[
|
| logging.FileHandler('main.log', encoding='utf-8'),
|
| logging.StreamHandler()
|
| ]
|
| )
|
| logger = logging.getLogger(__name__)
|
|
|
| PARTY_LABELS = {
|
| 'minjoo': '๋๋ถ์ด๋ฏผ์ฃผ๋น',
|
| 'ppp': '๊ตญ๋ฏผ์ํ',
|
| 'rebuilding': '์กฐ๊ตญํ์ ๋น',
|
| 'reform': '๊ฐํ์ ๋น',
|
| 'basic_income':'๊ธฐ๋ณธ์๋๋น',
|
| 'jinbo': '์ง๋ณด๋น',
|
| 'all': '์ ์ฒด (6๊ฐ ์ ๋น)',
|
| }
|
|
|
| ALL_PARTIES = ['minjoo', 'ppp', 'rebuilding', 'reform', 'basic_income', 'jinbo']
|
|
|
|
|
| def parse_args():
|
| parser = argparse.ArgumentParser(
|
| description='์ ๋น ๋ณด๋์๋ฃ ํฌ๋กค๋ฌ',
|
| formatter_class=argparse.RawTextHelpFormatter
|
| )
|
| parser.add_argument(
|
| '--party',
|
| choices=list(PARTY_LABELS.keys()),
|
| default='all',
|
| help=(
|
| 'ํฌ๋กค๋งํ ์ ๋น ์ ํ (๊ธฐ๋ณธ๊ฐ: all)\n'
|
| ' minjoo : ๋๋ถ์ด๋ฏผ์ฃผ๋น\n'
|
| ' ppp : ๊ตญ๋ฏผ์ํ\n'
|
| ' rebuilding : ์กฐ๊ตญํ์ ๋น\n'
|
| ' reform : ๊ฐํ์ ๋น\n'
|
| ' basic_income : ๊ธฐ๋ณธ์๋๋น\n'
|
| ' jinbo : ์ง๋ณด๋น\n'
|
| ' all : ์ ์ฒด ๋์ ํฌ๋กค๋ง'
|
| )
|
| )
|
| parser.add_argument(
|
| '--start-date',
|
| metavar='YYYY-MM-DD',
|
| default=None,
|
| help='์์ง ์์ ๋ ์ง (์: 2024-01-01)\n๋ฏธ์
๋ ฅ ์ ๋ง์ง๋ง ํฌ๋กค๋ง ์ดํ๋ถํฐ (์ฆ๋ถ ์
๋ฐ์ดํธ)'
|
| )
|
| parser.add_argument(
|
| '--end-date',
|
| metavar='YYYY-MM-DD',
|
| default=None,
|
| help='์์ง ์ข
๋ฃ ๋ ์ง (์: 2024-12-31)\n๋ฏธ์
๋ ฅ ์ ์ค๋ ๋ ์ง'
|
| )
|
| return parser.parse_args()
|
|
|
|
|
| def get_crawler(party: str):
|
| """์ ๋น ์ฝ๋์ ๋ง๋ ํฌ๋กค๋ฌ ์ธ์คํด์ค ๋ฐํ"""
|
| return {
|
| 'minjoo': MinjooAsyncCrawler,
|
| 'ppp': PPPAsyncCrawler,
|
| 'rebuilding': RebuildingAsyncCrawler,
|
| 'reform': ReformAsyncCrawler,
|
| 'basic_income': BasicIncomeAsyncCrawler,
|
| 'jinbo': JinboAsyncCrawler,
|
| }[party]()
|
|
|
|
|
| async def run_party(party: str, start_date=None, end_date=None):
|
| """๋จ์ผ ์ ๋น ํฌ๋กค๋ง ์คํ"""
|
| crawler = get_crawler(party)
|
| if start_date or end_date:
|
| df = await crawler.collect_all(start_date, end_date)
|
| if not df.empty:
|
| crawler.save_local(df)
|
| crawler.upload_to_huggingface(df)
|
| else:
|
| await crawler.run_incremental()
|
|
|
|
|
| async def main():
|
| args = parse_args()
|
| start_time = datetime.now()
|
|
|
| target_parties = ALL_PARTIES if args.party == 'all' else [args.party]
|
|
|
| logger.info("=" * 60)
|
| logger.info("์ ๋น ๋ณด๋์๋ฃ ํฌ๋กค๋ฌ ์์")
|
| logger.info(f"๋์ ์ ๋น : {PARTY_LABELS[args.party]}")
|
| logger.info(f"์์ง ๊ธฐ๊ฐ : {args.start_date or '์ฆ๋ถ ์
๋ฐ์ดํธ'} ~ {args.end_date or '์ค๋'}")
|
| logger.info("=" * 60)
|
|
|
| if len(target_parties) == 1:
|
| await run_party(target_parties[0], args.start_date, args.end_date)
|
| else:
|
| results = await asyncio.gather(
|
| *[run_party(p, args.start_date, args.end_date) for p in target_parties],
|
| return_exceptions=True
|
| )
|
| for party, result in zip(target_parties, results):
|
| if isinstance(result, Exception):
|
| logger.error(f"{PARTY_LABELS[party]} ํฌ๋กค๋ง ์คํจ: {result}")
|
| else:
|
| logger.info(f"{PARTY_LABELS[party]} ํฌ๋กค๋ง ์๋ฃ")
|
|
|
| duration = (datetime.now() - start_time).total_seconds()
|
| logger.info("=" * 60)
|
| logger.info(f"์ ์ฒด ์๋ฃ! ์์ ์๊ฐ: {duration:.1f}์ด ({duration / 60:.1f}๋ถ)")
|
| logger.info("=" * 60)
|
|
|
|
|
| if __name__ == "__main__":
|
| asyncio.run(main())
|
|
|