diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..2793c6717c6ac2eefb517e1b970c7b7648bfc9da 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +src/dartlab/analysis/financial/research/__pycache__/narrative.cpython-312.pyc filter=lfs diff=lfs merge=lfs -text +src/dartlab/providers/dart/__pycache__/company.cpython-312.pyc filter=lfs diff=lfs merge=lfs -text +src/dartlab/providers/dart/__pycache__/company.cpython-313.pyc filter=lfs diff=lfs merge=lfs -text +src/dartlab/providers/edgar/__pycache__/company.cpython-312.pyc filter=lfs diff=lfs merge=lfs -text +src/dartlab/providers/edgar/__pycache__/company.cpython-313.pyc filter=lfs diff=lfs merge=lfs -text +src/dartlab/review/__pycache__/builders.cpython-312.pyc filter=lfs diff=lfs merge=lfs -text diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..d7205ad7068d13b99d3b477494c3b0e9b00caad2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ +FROM python:3.12-slim + +WORKDIR /app + +# uv 설치 +RUN pip install --no-cache-dir uv + +# 의존성 먼저 (캐시 레이어) +COPY pyproject.toml uv.lock ./ +RUN uv pip install --system . + +# 소스 복사 +COPY src/ src/ + +# HF Spaces 환경변수 +ENV SPACE_ID=eddmpython/dartlab +ENV DARTLAB_MCP_HTTP=1 +ENV DARTLAB_CORS_ORIGINS=* +ENV DARTLAB_HOST=0.0.0.0 +ENV DARTLAB_PORT=7860 + +EXPOSE 7860 + +CMD ["python", "-m", "dartlab.server"] diff --git a/README.md b/README.md index 985fa582ab1a3a32dc39bf89537422395c7f665d..4d217354944f0e5c4c5abc8f6edd8f21f46a684d 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,37 @@ --- -title: Dartlab -emoji: 🦀 -colorFrom: red -colorTo: red +title: dartlab +emoji: 📊 +colorFrom: blue +colorTo: indigo sdk: docker -pinned: false +app_port: 7860 --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# dartlab — 한국 전자공시 분석 API + MCP 서버 + +설치 없이 사용: +- **REST API**: `https://eddmpython-dartlab.hf.space/api/*` +- **MCP (Claude Desktop)**: `https://eddmpython-dartlab.hf.space/mcp/sse` + +## MCP 설정 + +`claude_desktop_config.json`: +```json +{ + "mcpServers": { + "dartlab": { + "url": "https://eddmpython-dartlab.hf.space/mcp/sse" + } + } +} +``` + +## API 예시 + +```bash +# 공시 목록 +curl "https://eddmpython-dartlab.hf.space/api/dart/filings?corp=005930" + +# 재무제표 +curl "https://eddmpython-dartlab.hf.space/api/dart/finance/005930?year=2024" +``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..b72a64faaffb149c08565365b77954a84ab4308b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,228 @@ +[project] +name = "dartlab" +version = "0.9.6" +description = "DART 전자공시 + EDGAR 공시를 하나의 회사 맵으로 — Python 재무 분석 라이브러리" +readme = "README.md" +license = {file = "LICENSE"} +requires-python = ">=3.12" +authors = [ + {name = "eddmpython"} +] +keywords = [ + "dart", + "edgar", + "sec", + "financial-statements", + "korea", + "disclosure", + "accounting", + "polars", + "sections", + "mcp", + "ai-analysis", + "annual-report", + "10-k", + "xbrl", + "전자공시", + "재무제표", + "사업보고서", + "공시분석", + "다트", +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Intended Audience :: Financial and Insurance Industry", + "Intended Audience :: End Users/Desktop", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Office/Business :: Financial", + "Topic :: Office/Business :: Financial :: Accounting", + "Topic :: Office/Business :: Financial :: Investment", + "Topic :: Scientific/Engineering :: Information Analysis", + "Natural Language :: Korean", + "Natural Language :: English", + "Typing :: Typed", +] +dependencies = [ + # core + "beautifulsoup4>=4.14.3,<5", + "lxml>=6.0.2,<7", + "httpx>=0.28.1,<1", + "polars>=1.0.0,<2", + "rich>=14.3.3,<15", + "huggingface-hub>=0.20.0,<1", + "openpyxl>=3.1.5,<4", + "diff-match-patch>=20230430", + "numpy>=1.26.0,<3", + "marimo>=0.22.0", + # ai providers + "openai>=1.0.0,<3", + "google-genai>=1.0.0,<2", + "anthropic>=0.30.0,<2", + # server (dartlab ai) + "fastapi>=0.135.1,<1", + "uvicorn[standard]>=0.30.0,<1", + "sse-starlette>=2.0.0,<3", + "mcp[cli]>=1.0", + "qrcode>=7.0,<9", + # viz + "plotly>=5.0.0,<6", +] + +[project.scripts] +dartlab = "dartlab.cli.main:main" + +[project.entry-points."dartlab.plugins"] + +[project.urls] +Homepage = "https://eddmpython.github.io/dartlab/" +Repository = "https://github.com/eddmpython/dartlab" +Documentation = "https://eddmpython.github.io/dartlab/docs/" +Issues = "https://github.com/eddmpython/dartlab/issues" +Changelog = "https://eddmpython.github.io/dartlab/docs/changelog" +Demo = "https://huggingface.co/spaces/eddmpython/dartlab" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/dartlab"] +artifacts = [ + "src/dartlab/ui/build/**", +] +exclude = [ + "**/_reference/**", + "src/dartlab/engines/edinet/**", + "src/dartlab/engines/esg/**", + "src/dartlab/engines/event/**", + "src/dartlab/engines/supply/**", + "src/dartlab/engines/watch/**", +] + +[tool.hatch.build.targets.sdist] +include = [ + "src/dartlab/**/*.py", + "src/dartlab/**/*.json", + "src/dartlab/**/*.parquet", + "src/dartlab/ui/build/**", + "README.md", + "LICENSE", +] +exclude = [ + "**/_reference/**", + "src/dartlab/engines/edinet/**", + "src/dartlab/engines/esg/**", + "src/dartlab/engines/event/**", + "src/dartlab/engines/supply/**", + "src/dartlab/engines/watch/**", +] + +[tool.ruff] +target-version = "py312" +line-length = 120 +exclude = ["experiments", "*/_reference"] + +[tool.ruff.lint] +select = ["E", "F", "I"] +ignore = ["E402", "E501", "E741", "F841"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py", "bench_*.py"] +addopts = "-v --tb=short" +asyncio_mode = "auto" +markers = [ + "requires_data: 로컬 parquet 데이터 필요 (CI에서 skip)", + "unit: 순수 로직/mock만 — 데이터 로드 없음, 병렬 안전", + "integration: Company 1개 로딩 필요 — 중간 무게", + "heavy: 대량 데이터 로드 — 단독 실행 필수", +] + +[tool.coverage.run] +source = ["dartlab"] +omit = [ + "src/dartlab/engines/ai/providers/*", + "src/dartlab/review/*", +] + +[tool.coverage.report] +show_missing = true +skip_empty = true +exclude_lines = [ + "pragma: no cover", + "if __name__", + "raise NotImplementedError", +] +fail_under = 30 + +[tool.pyright] +pythonVersion = "3.12" +typeCheckingMode = "basic" +include = ["src/dartlab"] +exclude = [ + "src/dartlab/engines/ai/providers/**", + "ui/**", + "experiments/**", +] +reportMissingTypeStubs = false +reportUnknownParameterType = false +reportUnknownMemberType = false +reportUnknownVariableType = false + +[tool.bandit] +exclude_dirs = ["experiments", "tests"] +skips = ["B101"] + +[tool.deptry] +# 옵셔널 통합 의존성 — 사용자가 별도 설치할 때만 동작 (런타임 ImportError 가드 있음) +extend_exclude = [ + "src/dartlab/.*/_reference/.*", # 학습/실험 코드, 런타임 미사용 +] +[tool.deptry.per_rule_ignores] +DEP001 = [ + # ── channel 어댑터 (외부 메신저 옵셔널) ── + "discord", "slack_bolt", "telegram", + # ── CLI 인터랙티브 옵셔널 ── + "prompt_toolkit", + # ── display 옵셔널 ── + "great_tables", "itables", "IPython", + # ── gather 옵셔널 ── + "FinanceDataReader", "tavily", + # ── _reference 학습/실험 ── + "agents", "owlready2", "rapidfuzz", "edgar", + # ── transitive deps (다른 패키지가 끌어옴) ── + "dotenv", # python-dotenv + "google", # google-genai + "yaml", # pyyaml + "bs4", # beautifulsoup4 + "starlette", # fastapi가 끌어옴 + "pydantic", # fastapi가 끌어옴 +] +DEP002 = [ + "beautifulsoup4", # bs4 직접 import + "google-genai", # google.genai 사용 (gemini provider) + "marimo", # 노트북 컴파일/배포 도구 +] + +[dependency-groups] +dev = [ + "build>=1.4.0", + "dartlab[all]", + "hatchling>=1.29.0", + "hypothesis>=6.100.0", + "pillow>=12.1.1", + "pre-commit>=4.0.0", + "pyright>=1.1.0", + "pytest>=9.0.2", + "pytest-asyncio>=0.24.0", + "pytest-benchmark>=5.0.0", + "pytest-cov>=6.0.0", + "radon>=6.0.0", + "vulture>=2.0", +] diff --git a/src/dartlab/STATUS.md b/src/dartlab/STATUS.md new file mode 100644 index 0000000000000000000000000000000000000000..6e7b9aff9b97699fd959c8f1eecff5aed685c50c --- /dev/null +++ b/src/dartlab/STATUS.md @@ -0,0 +1,81 @@ +# src/dartlab + +## 개요 +DART 공시 데이터 활용 라이브러리. 종목코드 기반 API. + +## 구조 +``` +dartlab/ +├── core/ # 공통 기반 (데이터 로딩, 보고서 선택, 테이블 파싱, 주석 추출) +├── finance/ # 재무 데이터 (36개 모듈) +│ ├── summary/ # 요약재무정보 시계열 +│ ├── statements/ # 연결재무제표 (BS, IS, CF) +│ ├── segment/ # 부문별 보고 (주석) +│ ├── affiliate/ # 관계기업·공동기업 (주석) +│ ├── costByNature/ # 비용의 성격별 분류 (주석) +│ ├── tangibleAsset/ # 유형자산 (주석) +│ ├── notesDetail/ # 주석 상세 (23개 키워드) +│ ├── dividend/ # 배당 +│ ├── majorHolder/ # 최대주주·주주현황 +│ ├── shareCapital/ # 주식 현황 +│ ├── employee/ # 직원 현황 +│ ├── subsidiary/ # 자회사 투자 +│ ├── bond/ # 채무증권 +│ ├── audit/ # 감사의견·보수 +│ ├── executive/ # 임원 현황 +│ ├── executivePay/ # 임원 보수 +│ ├── boardOfDirectors/ # 이사회 +│ ├── capitalChange/ # 자본금 변동 +│ ├── contingentLiability/ # 우발부채 +│ ├── internalControl/ # 내부통제 +│ ├── relatedPartyTx/ # 관계자 거래 +│ ├── rnd/ # R&D 비용 +│ ├── sanction/ # 제재 현황 +│ ├── affiliateGroup/ # 계열사 목록 +│ ├── fundraising/ # 증자/감자 +│ ├── productService/ # 주요 제품/서비스 +│ ├── salesOrder/ # 매출/수주 +│ ├── riskDerivative/ # 위험관리/파생거래 +│ ├── articlesOfIncorporation/ # 정관 +│ ├── otherFinance/ # 기타 재무 +│ ├── companyHistory/ # 회사 연혁 +│ ├── shareholderMeeting/ # 주주총회 +│ ├── auditSystem/ # 감사제도 +│ ├── investmentInOther/ # 타법인출자 +│ └── companyOverviewDetail/ # 회사개요 상세 +├── disclosure/ # 공시 서술형 (4개 모듈) +│ ├── business/ # 사업의 내용 +│ ├── companyOverview/ # 회사의 개요 (정량) +│ ├── mdna/ # MD&A +│ └── rawMaterial/ # 원재료·설비 +├── company.py # 통합 접근 (property 기반, lazy + cache) +├── notes.py # K-IFRS 주석 통합 접근 +└── config.py # 전역 설정 (verbose) +``` + +## API 요약 +```python +import dartlab + +c = dartlab.Company("005930") +c.index # 회사 구조 인덱스 +c.show("BS") # topic payload +c.trace("dividend") # source trace +c.BS # 재무상태표 DataFrame +c.dividend # 배당 시계열 DataFrame + +import dartlab +dartlab.verbose = False # 진행 표시 끄기 +``` + +## 현황 +- 2026-03-06: core/ + finance/summary/ 초기 구축 +- 2026-03-06: finance/statements/, segment/, affiliate/ 추가 +- 2026-03-06: 전체 패키지 개선 — stockCode 시그니처, 핫라인 설계, API_SPEC.md +- 2026-03-07: finance/ 11개 모듈 추가 (dividend~bond, costByNature) +- 2026-03-07: disclosure/ 4개 모듈 추가 (business, companyOverview, mdna, rawMaterial) +- 2026-03-07: finance/ 주석 모듈 추가 (notesDetail, tangibleAsset) +- 2026-03-07: finance/ 7개 모듈 추가 (audit~internalControl, rnd, sanction) +- 2026-03-07: finance/ 7개 모듈 추가 (affiliateGroup~companyHistory, shareholderMeeting~investmentInOther, companyOverviewDetail) +- 2026-03-08: analyze → fsSummary 리네이밍, 계정명 특수문자 정리 +- 2026-03-08: Company 재설계 — property 기반 접근, Notes 통합, all(), verbose 설정 diff --git a/src/dartlab/__init__.py b/src/dartlab/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2b45027862b91a08ebd7098ea51b7ebaeefdd330 --- /dev/null +++ b/src/dartlab/__init__.py @@ -0,0 +1,1032 @@ +"""DART 공시 데이터 활용 라이브러리.""" + +import sys +from importlib.metadata import PackageNotFoundError +from importlib.metadata import version as _pkg_version + +from dartlab import ai as llm # noqa: F401 — 하위호환 +from dartlab import config, core # noqa: F401 — 하위호환 +from dartlab.audit import queryAudit, runAudit # noqa: F401 — 하위호환 +from dartlab.company import Company +from dartlab.core.env import loadEnv as _loadEnv +from dartlab.core.select import ChartResult, SelectResult +from dartlab.gather.fred import Fred +from dartlab.gather.listing import codeToName, fuzzySearch, getKindList, nameToCode # noqa: F401 +from dartlab.listing import listing # noqa: F401 — 목록 조회 단일 진입점 +from dartlab.providers.dart.company import Company as _DartEngineCompany +from dartlab.providers.dart.openapi.dart import OpenDart +from dartlab.providers.edgar.openapi.edgar import OpenEdgar +from dartlab.review import Review + +# .env 자동 로드 — API 키 등 환경변수 +_loadEnv() + +try: + __version__ = _pkg_version("dartlab") +except PackageNotFoundError: + __version__ = "0.0.0" + + +def search( + query: str, + *, + corp: str | None = None, + start: str | None = None, + end: str | None = None, + topK: int = 10, +): + """공시 원문 검색. *(alpha)* + + Ngram+Synonym 기반 검색. 모델 불필요, cold start 0ms. + DART 공시 뷰어 링크(dartUrl) 포함. + + Capabilities: + - 전체 공시 원문 검색 (수시공시 포함) + - 자연어 동의어 확장 ("돈을 빌렸다" → 사채/차입/전환사채) + - 종목/기간 필터 지원 + - DART 공시 뷰어 링크 포함 (dartUrl 컬럼) + + Requires: + 데이터: allFilings (수집 + buildIndex 필요) + + AIContext: + 공시 내용을 자연어로 찾을 때 사용. 결과의 dartUrl로 원문 확인 가능. + 종목 찾기는 Company("삼성전자")를 사용. + + Guide: + - "유상증자 한 회사?" -> search("유상증자 결정") + - "삼성전자 최근 공시?" -> search("공시", corp="005930") + + SeeAlso: + - Company: 종목코드/회사명으로 Company 생성 + - listing: 전체 상장법인 목록 + + Args: + query: 검색어 (한국어). "유상증자 결정", "대표이사 변경" 등. + corp: 종목 필터 (종목코드 "005930" 또는 회사명 "삼성전자"). + start: 시작일 (YYYYMMDD). + end: 종료일 (YYYYMMDD). + topK: 반환 건수 (기본 10). + + Returns + ------- + pl.DataFrame + score : float — 매칭 점수 (BM25F 가중) + rcept_no : str — 접수번호 (DART 고유 ID) + corp_name : str — 회사명 + rcept_dt : str — 접수일 (YYYYMMDD) + report_nm : str — 공시 유형명 + section_title : str — 섹션 제목 + text : str — 본문 텍스트 (최대 2000자) + dartUrl : str — DART 공시 뷰어 URL + + Example:: + + import dartlab + dartlab.search("유상증자 결정") + dartlab.search("대표이사 변경", corp="005930") + dartlab.search("전환사채", start="20240101", topK=5) + """ + # R33-1: 빈 query 거부 + if not query or not query.strip(): + raise ValueError( + "search 의 query 가 비어 있습니다. 검색어를 1자 이상 전달하세요. 예: dartlab.search('유상증자')" + ) + from dartlab.core.search import search as _search + + return _search(query, corp=corp, start=start, end=end, topK=topK) + + +def searchName(keyword: str): + """종목명/코드로 종목 찾기 (KR + US). + + Args: + keyword: 종목명, 종목코드, 또는 ticker. + + Returns: + pl.DataFrame — 종목 검색 결과. + + Example:: + + dartlab.searchName("삼성전자") + dartlab.searchName("AAPL") + """ + # R33-2: 빈 keyword 거부 + if not keyword or not keyword.strip(): + raise ValueError( + "searchName 의 keyword 가 비어 있습니다. 종목명/코드를 1자 이상 전달하세요. " + "예: dartlab.searchName('삼성전자') 또는 dartlab.searchName('AAPL')" + ) + if any("\uac00" <= ch <= "\ud7a3" for ch in keyword): + return _DartEngineCompany.search(keyword) + if keyword.isascii() and keyword.isalpha(): + try: + from dartlab.providers.edgar.company import Company as _US + + return _US.search(keyword) + except (ImportError, AttributeError, NotImplementedError): + pass + return _DartEngineCompany.search(keyword) + + +def collect( + *codes: str, + categories: list[str] | None = None, + incremental: bool = True, +) -> dict[str, dict[str, int]]: + """지정 종목 DART 데이터 수집 (OpenAPI). + + Capabilities: + - 종목별 DART 공시 데이터 직접 수집 (finance, docs, report) + - 멀티키 병렬 수집 (DART_API_KEYS 쉼표 구분) + - 증분 수집 — 이미 있는 데이터는 건너뜀 + - 카테고리별 선택 수집 + + Requires: + API 키: DART_API_KEY + + AIContext: + 사용자가 특정 종목의 최신 데이터를 직접 수집할 때 사용. + + Guide: + - "데이터 수집해줘" -> DART_API_KEY 필요. dartlab.setup("dart-key", "YOUR_KEY")로 설정 안내 + - "삼성전자 재무 데이터 수집" -> collect("005930", categories=["finance"]) + - 보안: 키는 로컬 .env에만 저장, 외부 전송 절대 없음 + + SeeAlso: + - Company: 수집된 데이터로 Company 생성하여 분석 + - search: 종목코드 모를 때 먼저 검색 + + Args: + *codes: 종목코드 1개 이상 ("005930", "000660"). + categories: 수집 카테고리 ["finance", "docs", "report"]. None이면 전체. + incremental: True면 증분 수집 (기본). False면 전체 재수집. + + Returns: + dict — 종목코드별 카테고리별 수집 건수. + + Example:: + + import dartlab + dartlab.collect("005930") # 삼성전자 전체 + dartlab.collect("005930", "000660", categories=["finance"]) # 재무만 + """ + from dartlab.providers.dart.openapi.batch import batchCollect + + return batchCollect(list(codes), categories=categories, incremental=incremental) + + +def collectAll( + *, + categories: list[str] | None = None, + mode: str = "new", + maxWorkers: int | None = None, + incremental: bool = True, +) -> dict[str, dict[str, int]]: + """전체 상장종목 DART 데이터 일괄 수집. + + Capabilities: + - 전체 상장종목 DART 공시 데이터 일괄 수집 + - 미수집 종목만 선별 수집 (mode="new") 또는 전체 재수집 (mode="all") + - 멀티키 병렬 수집 (DART_API_KEYS 쉼표 구분) + - 카테고리별 선택 (finance, docs, report) + + Requires: + API 키: DART_API_KEY + + Guide: + - "전종목 데이터 수집" -> collectAll() 안내. DART_API_KEY 필요 + - "재무 데이터만 수집" -> collectAll(categories=["finance"]) + - 보안: 키는 로컬 .env에만 저장, 외부 전송 절대 없음 + + SeeAlso: + - collect: 특정 종목만 수집 + - downloadAll: HuggingFace 사전구축 데이터 (API 키 불필요, 더 빠름) + + Args: + categories: 수집 카테고리 ["finance", "docs", "report"]. None이면 전체. + mode: "new" (미수집만, 기본) 또는 "all" (전체 재수집). + maxWorkers: 병렬 워커 수. None이면 키 수에 따라 자동. + incremental: True면 증분 수집. False면 전체 재수집. + + Returns: + dict — 종목코드별 카테고리별 수집 건수. + + Example:: + + import dartlab + dartlab.collectAll() # 전체 미수집 종목 + dartlab.collectAll(categories=["finance"]) # 재무만 + dartlab.collectAll(mode="all") # 기수집 포함 전체 + """ + from dartlab.providers.dart.openapi.batch import batchCollectAll + + return batchCollectAll( + categories=categories, + mode=mode, + maxWorkers=maxWorkers, + incremental=incremental, + ) + + +def downloadAll(category: str = "finance", *, forceUpdate: bool = False) -> None: + """HuggingFace에서 전체 시장 데이터 다운로드. + + Capabilities: + - HuggingFace 사전 구축 데이터 일괄 다운로드 + - finance (~600MB, 2700+종목), docs (~8GB, 2500+종목), report (~320MB, 2700+종목) + - 이어받기/병렬 다운로드 지원 (huggingface_hub) + - 전사 분석(scanAccount, governance, digest 등)에 필요한 데이터 사전 준비 + + Requires: + 없음 (HuggingFace 공개 데이터셋) + + Guide: + - "데이터 어떻게 받아?" -> downloadAll("finance") 안내. API 키 불필요 + - "scan 쓰려면?" -> downloadAll("finance") + downloadAll("report") 필요 + - finance 먼저 (600MB), report 다음 (320MB), docs는 대용량 주의 (8GB) + + SeeAlso: + - scan: 다운로드된 데이터로 전종목 비교 + - collect: DART API로 직접 수집 (최신 데이터, API 키 필요) + + Args: + category: "finance" (재무 ~600MB), "docs" (공시 ~8GB), "report" (보고서 ~320MB). + forceUpdate: True면 이미 있는 파일도 최신으로 갱신. + + Returns: + None. + + Example:: + + import dartlab + dartlab.downloadAll("finance") # 재무 전체 — scanAccount/scanRatio 등에 필요 + dartlab.downloadAll("report") # 보고서 전체 — governance/workforce/capital/debt에 필요 + dartlab.downloadAll("docs") # 공시 전체 — digest에 필요 (대용량 ~8GB) + """ + from dartlab.core.dataLoader import downloadAll as _downloadAll + + _downloadAll(category, forceUpdate=forceUpdate) + + +def checkFreshness(stockCode: str, *, forceCheck: bool = False): + """종목의 로컬 데이터가 최신인지 DART API로 확인. + + Capabilities: + - 로컬 데이터와 DART 서버의 최신 공시 비교 + - 누락 공시 수 + 최신 여부 판정 + - 캐시된 결과 재사용 (forceCheck=False) + + Requires: + API 키: DART_API_KEY + + AIContext: + - 분석 전 데이터 최신성 확인에 사용 + - isFresh=False이면 collect()로 갱신 권장 + - missingCount로 누락 규모 파악 후 수집 우선순위 판단 + + Guide: + - "내 데이터 최신이야?" -> checkFreshness("005930") + - "공시 누락 있어?" -> checkFreshness로 missingCount 확인 + - "데이터 업데이트 필요해?" -> checkFreshness 후 collect 안내 + + SeeAlso: + - collect: 누락 공시 실제 수집 (checkFreshness에서 발견한 gap 채우기) + - Company: 종목 데이터 접근 (최신 데이터 기반 분석) + + Args: + stockCode: 종목코드 ("005930"). + forceCheck: True면 캐시 무시, DART API 강제 조회. + + Returns: + FreshnessResult — isFresh (bool), missingCount (int), lastLocalDate, lastRemoteDate. + + Example:: + + import dartlab + result = dartlab.checkFreshness("005930") + result.isFresh # True/False + result.missingCount # 누락 공시 수 + """ + from dartlab.providers.dart.openapi.freshness import ( + checkFreshness as _check, + ) + + return _check(stockCode, forceCheck=forceCheck) + + +def setup(provider: str | None = None): + """AI provider 설정 안내 + 인터랙티브 설정. + + Capabilities: + - 전체 AI provider 설정 현황 테이블 표시 + - provider별 대화형 설정 (키 입력 → .env 저장) + - ChatGPT OAuth 브라우저 로그인 + - OpenAI/Gemini/Groq/Cerebras/Mistral API 키 설정 + - Ollama 로컬 LLM 설치 안내 + + Requires: + 없음 + + AIContext: + - AI 분석 기능 사용 전 provider 설정 상태 확인 + - 미설정 provider 감지 시 setup() 안내로 연결 + - 설정 완료 여부를 프로그래밍 방식으로 체크 가능 + + Guide: + - "AI 설정 어떻게 해?" -> setup()으로 전체 현황 확인 + - "ChatGPT 연결하고 싶어" -> setup("chatgpt") + - "OpenAI 키 등록" -> setup("openai") + - "Ollama 어떻게 써?" -> setup("ollama") + + SeeAlso: + - ask: AI 질문 (setup 완료 후 사용) + - chat: AI 대화 (setup 완료 후 사용) + - llm.configure: 프로그래밍 방식 provider 설정 + + Args: + provider: provider명 또는 alias. None이면 전체 현황 표시. + 지원: "chatgpt", "openai", "gemini", "groq", "cerebras", + "mistral", "ollama", "codex", "custom". + + Returns: + None (터미널/노트북에 안내 출력). + + Example:: + + import dartlab + dartlab.setup() # 전체 provider 현황 + dartlab.setup("chatgpt") # ChatGPT OAuth 브라우저 로그인 + dartlab.setup("openai") # OpenAI API 키 설정 + dartlab.setup("ollama") # Ollama 설치 안내 + """ + from dartlab.core.ai.guide import ( + providers_status, + resolve_alias, + ) + + if provider is None: + print(providers_status()) + return + + provider = resolve_alias(provider) + + if provider == "oauth-codex": + _setup_oauth_interactive() + else: + _setup_apikey_interactive(provider) + + +def _setup_oauth_interactive(): + """노트북/CLI에서 ChatGPT OAuth 브라우저 로그인.""" + try: + from dartlab.ai.providers.support.oauth_token import is_authenticated + + if is_authenticated(): + print("\n ✓ ChatGPT OAuth 이미 인증되어 있습니다.") + print(' 재인증: dartlab.setup("chatgpt") # 재실행하면 갱신\n') + return + except ImportError: + pass + + try: + from dartlab.cli.commands.setup import _do_oauth_login + + _do_oauth_login() + except ImportError: + print("\n ChatGPT OAuth 브라우저 로그인:") + print(" CLI에서 실행: dartlab setup oauth-codex\n") + + +def _setup_apikey_interactive(provider: str): + """API 키 기반 provider 인터랙티브 설정.""" + from dartlab.guide.providers import _PROVIDERS + + spec = _PROVIDERS.get(provider) + if spec is None or not spec.env_key: + from dartlab.core.ai.guide import provider_guide + + print(provider_guide(provider)) + return + + from dartlab.guide.env import promptAndSave + + promptAndSave( + spec.env_key, + label=spec.label, + guide=spec.signupUrl or spec.description, + ) + + +def _auto_stream(gen) -> str: + """Generator를 소비하면서 stdout에 스트리밍 출력, 전체 텍스트 반환.""" + import sys + + chunks: list[str] = [] + for chunk in gen: + chunks.append(chunk) + sys.stdout.write(chunk) + sys.stdout.flush() + sys.stdout.write("\n") + sys.stdout.flush() + return "".join(chunks) + + +def ask( + *args: str, + include: list[str] | None = None, + exclude: list[str] | None = None, + provider: str | None = None, + model: str | None = None, + stream: bool = True, + raw: bool = False, + reflect: bool = False, + pattern: str | None = None, + template: str | None = None, + modules: list[str] | None = None, + **kwargs, +): + """LLM에게 기업에 대해 질문. + + Capabilities: + - 자연어로 기업 분석 질문 (종목 자동 감지) + - 스트리밍 출력 (기본) / 배치 반환 / Generator 직접 제어 + - 엔진 자동 계산 → LLM 해석 (Engine-First) + - 데이터 모듈 include/exclude로 분석 범위 제어 + - 자체 검증 (reflect=True) + + Requires: + AI: provider 설정 (dartlab.setup() 참조) + + AIContext: + - 재무비율, 추세, 동종업계 비교를 자동 계산하여 LLM에 제공 + - sections 서술형 데이터 + finance 숫자 데이터 동시 주입 + - tool calling provider에서는 LLM이 추가 데이터 자율 탐색 + + Guide: + - "삼성전자 분석해줘" -> ask("삼성전자 재무건전성 분석해줘") + - "이 회사 괜찮아?" -> ask("종목코드", "이 회사 투자해도 괜찮아?") + - "AI 설정 어떻게 해?" -> dartlab.setup()으로 provider/키 설정 안내 + - provider 미설정 시 자동 감지. 설정 방법: dartlab.llm.configure(provider="openai", api_key="sk-...") + - 보안: API 키는 로컬 .env에만 저장, 외부 전송 절대 없음 + + SeeAlso: + - chat: 대화형 연속 분석 (멀티턴) + - Company: 프로그래밍 방식 데이터 접근 + - scan: 전종목 비교 (ask보다 직접적) + + Args: + *args: 자연어 질문 (1개) 또는 (종목, 질문) 2개. + provider: LLM provider ("openai", "codex", "oauth-codex", "ollama"). + model: 모델 override. + stream: True면 스트리밍 출력 (기본값). False면 조용히 전체 텍스트 반환. + raw: True면 Generator를 직접 반환 (커스텀 UI용). + include: 포함할 데이터 모듈. + exclude: 제외할 데이터 모듈. + reflect: True면 답변 자체 검증 (1회 reflection). + + Returns: + str | None: 전체 답변 텍스트. 설정 오류 시 None. (raw=True일 때만 Generator[str]) + + Example:: + + import dartlab + dartlab.llm.configure(provider="openai", api_key="sk-...") + + # 호출하면 스트리밍 출력 + 전체 텍스트 반환 + answer = dartlab.ask("삼성전자 재무건전성 분석해줘") + + # provider + model 지정 + answer = dartlab.ask("삼성전자 분석", provider="openai", model="gpt-4o") + + # (종목, 질문) 분리 + answer = dartlab.ask("005930", "영업이익률 추세는?") + + # 조용히 전체 텍스트만 (배치용) + answer = dartlab.ask("삼성전자 분석", stream=False) + + # Generator 직접 제어 (커스텀 UI용) + for chunk in dartlab.ask("삼성전자 분석", raw=True): + custom_process(chunk) + """ + from dartlab.ai.runtime.standalone import ask as _ask + + # provider 미지정 시 auto-detect + if provider is None: + from dartlab.core.ai.detect import auto_detect_provider + + detected = auto_detect_provider() + if detected is None: + from dartlab.core.ai.guide import no_provider_message + + print(no_provider_message()) + return None + provider = detected + + if len(args) == 2: + import warnings + + warnings.warn( + "dartlab.ask(stock, question) is deprecated. Use dartlab.ask('삼성전자 분석해줘') instead.", + DeprecationWarning, + stacklevel=2, + ) + company = Company(args[0]) + question = args[1] + elif len(args) == 1: + company = None + question = args[0] + elif len(args) == 0: + print("\n 질문을 입력해 주세요.") + print(" 예: dartlab.ask('삼성전자 재무건전성 분석해줘')") + print(" 예: dartlab.ask('005930', '영업이익률 추세는?')\n") + return None + else: + print(f"\n 인자는 1~2개만 허용됩니다 (받은 수: {len(args)})") + print(" 예: dartlab.ask('삼성전자 분석해줘')") + print(" 예: dartlab.ask('005930', '영업이익률 추세는?')\n") + return None + + # kwargs에서 company 제거 (내부에서 직접 전달) + kwargs.pop("company", None) + _call_kwargs = dict( + company=company, + include=include, + exclude=exclude, + provider=provider, + model=model, + reflect=reflect, + pattern=pattern, + template=template, + modules=modules, + **kwargs, + ) + + if raw: + return _ask(question, stream=stream, **_call_kwargs) + + if not stream: + return _ask(question, stream=False, **_call_kwargs) + + gen = _ask(question, stream=True, **_call_kwargs) + return _auto_stream(gen) + + +def templates(name: str | None = None): + """분석 템플릿 목록 또는 특정 템플릿 내용. + + Example:: + + dartlab.templates() # 전체 목록 + dartlab.templates("가치투자") # 특정 템플릿 내용 + """ + from dartlab.ai import templates as _templates + + return _templates(name) + + +def saveTemplate(name: str, *, content: str | None = None, file: str | None = None): + """사용자 분석 템플릿 저장. ~/.dartlab/templates/{name}.md + + Example:: + + dartlab.saveTemplate("my_style", content="## 내 기준\\n- ROE > 15%") + """ + from dartlab.ai import saveTemplate as _save + + return _save(name, content=content, file=file) + + +def chat( + *args: str, + provider: str | None = None, + model: str | None = None, + max_turns: int = 5, + on_tool_call=None, + on_tool_result=None, + **kwargs, +) -> str: + """에이전트 모드: LLM이 도구를 선택하여 심화 분석. + + Capabilities: + - LLM이 dartlab 도구를 자율적으로 선택/실행 + - 원본 공시 탐색, 계정 시계열 비교, 섹터 통계 등 심화 분석 + - 최대 N회 도구 호출 반복 (multi-turn) + - 도구 호출/결과 콜백으로 UI 연동 + - 종목 없이도 동작 (시장 전체 질문, 메타 질문 등) + + Requires: + AI: provider 설정 (tool calling 지원 provider 권장) + + AIContext: + - ask()와 동일한 기본 컨텍스트 + 저수준 도구 접근 + - LLM이 부족하다 판단하면 추가 데이터 자율 수집 + - company=None이면 scan/gather/system 도구만 활성화 + + Guide: + - "깊게 분석해줘" -> chat("005930", "배당 추세를 분석하고 이상 징후를 찾아줘") + - "시장 전체 거버넌스 비교" -> chat("코스피 거버넌스 좋은 회사 찾아줘") + - "dartlab 뭐 할 수 있어?" -> chat("dartlab 기능 알려줘") + - ask()보다 심화 분석이 필요할 때 사용. LLM이 자율적으로 도구 호출 + + SeeAlso: + - ask: 단일 질문 (간단한 분석) + - Company: 프로그래밍 방식 직접 접근 + - scan: 전종목 횡단분석 + + Args: + *args: (종목, 질문) 2개 또는 질문만 1개. + provider: LLM provider. + model: 모델 override. + max_turns: 최대 도구 호출 반복 횟수. + + Returns: + str: 최종 답변 텍스트. + + Example:: + + import dartlab + dartlab.chat("005930", "배당 추세를 분석하고 이상 징후를 찾아줘") + dartlab.chat("코스피 ROE 높은 회사 알려줘") # 종목 없이 시장 질문 + """ + from dartlab.ai.runtime.standalone import chat as _chat + + if len(args) == 2: + company = Company(args[0]) + question = args[1] + elif len(args) == 1: + from dartlab.core.resolve import resolve_from_text + + company, question = resolve_from_text(args[0]) + if company is None: + question = args[0] + elif len(args) == 0: + print("\n 질문을 입력해 주세요.") + print(" 예: dartlab.chat('005930', '배당 추세 분석해줘')") + print(" 예: dartlab.chat('코스피 ROE 높은 회사 알려줘')\n") + return "" + else: + print(f"\n 인자는 1~2개만 허용됩니다 (받은 수: {len(args)})") + return "" + + return _chat( + company, + question, + provider=provider, + model=model, + max_turns=max_turns, + on_tool_call=on_tool_call, + on_tool_result=on_tool_result, + **kwargs, + ) + + +def plugins(): + """로드된 플러그인 목록 반환. + + Capabilities: + - 설치된 dartlab 플러그인 자동 탐색 + - 플러그인 메타데이터 (이름, 버전, 제공 topic) 조회 + + Requires: + 없음 + + AIContext: + - 확장 기능 탐색 시 설치된 플러그인 목록 확인 + - 플러그인이 제공하는 topic을 show()에서 사용 가능 + - 플러그인 유무에 따라 분석 범위 동적 결정 + + Guide: + - "플러그인 뭐 있어?" -> plugins() + - "확장 기능 목록" -> plugins()로 설치된 플러그인 확인 + - "ESG 플러그인 있어?" -> plugins()에서 검색 + + SeeAlso: + - reload_plugins: 새 플러그인 설치 후 재스캔 + - Company.show: 플러그인 topic 조회 (plugins가 제공한 topic 사용) + + Args: + 없음. + + Returns: + list[PluginMeta] — 로드된 플러그인 목록. + + Example:: + + import dartlab + dartlab.plugins() # [PluginMeta(name="esg-scores", ...)] + """ + from dartlab.core.plugins import discover, get_loaded_plugins + + discover() + return get_loaded_plugins() + + +def reload_plugins(): + """플러그인 재스캔 — pip install 후 재시작 없이 즉시 인식. + + Capabilities: + - 새로 설치한 플러그인 즉시 인식 (세션 재시작 불필요) + - entry_points 재스캔 + + Requires: + 없음 + + AIContext: + - pip install 후 세션 재시작 없이 플러그인 즉시 활성화 + - 새로 인식된 topic이 Company.show()에서 바로 사용 가능 + + Guide: + - "새 플러그인 설치했는데 안 보여" -> reload_plugins() + - "플러그인 재스캔" -> reload_plugins() + + SeeAlso: + - plugins: 현재 로드된 플러그인 확인 (reload 전후 비교) + - Company.show: 플러그인 topic 조회 + + Args: + 없음. + + Returns: + list[PluginMeta] — 재스캔 후 플러그인 목록. + + Example:: + + # 1. 새 플러그인 설치 + # !uv pip install dartlab-plugin-esg + + # 2. 재스캔 + dartlab.reload_plugins() + + # 3. 즉시 사용 + dartlab.Company("005930").show("esgScore") + """ + from dartlab.core.plugins import rediscover + + return rediscover() + + +class _Module(sys.modules[__name__].__class__): + """dartlab.verbose / dartlab.dataDir / dartlab.chart|table|text 프록시.""" + + @property + def verbose(self): + """전역 verbose 설정 조회.""" + return config.verbose + + @verbose.setter + def verbose(self, value): + config.verbose = value + + @property + def askLog(self): + """ask/chat 로그 활성화 조회.""" + return config.askLog + + @askLog.setter + def askLog(self, value): + config.askLog = bool(value) + + @property + def dataDir(self): + """데이터 저장 디렉토리 경로 조회.""" + return config.dataDir + + @dataDir.setter + def dataDir(self, value): + config.dataDir = str(value) + + def __getattr__(self, name): + if name == "scan": + from dartlab.scan import Scan + + instance = Scan() + setattr(self, name, instance) + return instance + if name == "analysis": + from dartlab.analysis.financial import Analysis + + instance = Analysis() + setattr(self, name, instance) + return instance + if name == "credit": + from dartlab.credit import credit + + setattr(self, name, credit) + return credit + if name == "quant": + from dartlab.quant import Quant + + instance = Quant() + setattr(self, name, instance) + return instance + if name == "macro": + from dartlab.macro import Macro + + instance = Macro() + setattr(self, name, instance) + return instance + if name == "topdown": + from dartlab.topdown import _TopdownEntry + + instance = _TopdownEntry() + setattr(self, name, instance) + return instance + if name == "viz": + import dartlab.viz as _viz + + setattr(self, name, _viz) + return _viz + if name == "chart": + # 하위호환: dartlab.chart → dartlab.viz + import dartlab.viz as _viz + + setattr(self, name, _viz) + return _viz + if name == "table": + from dartlab.table import Table + + instance = Table() + setattr(self, name, instance) + return instance + if name == "text": + import importlib + + mod = importlib.import_module("dartlab.tools.text") + setattr(self, name, mod) + return mod + raise AttributeError(f"module 'dartlab' has no attribute {name!r}") + + +sys.modules[__name__].__class__ = _Module + +# gather 모듈을 GatherEntry callable로 덮어쓰기 +# (gather 서브모듈이 top-level import로 이미 로드되므로 __getattr__ lazy 불가) +from dartlab.gather.entry import GatherEntry as _GatherEntry + +sys.modules[__name__].gather = _GatherEntry() + +# topdown도 같은 문제 — 모듈 import가 __getattr__보다 우선이라 callable로 덮어쓴다 +from dartlab.topdown import _TopdownEntry as _TopdownEntry + +sys.modules[__name__].topdown = _TopdownEntry() + +# scan/analysis/credit/quant — 어떤 import 체인이 모듈을 먼저 로드하면 +# 모듈 클래스의 __getattr__이 동작 안 함 (CI에서 발견된 회귀). +# 해결: 모듈 자체를 callable로 패치 — 모듈 객체에 __call__을 직접 부여. +import types as _types + + +def _makeCallableModule(modName: str, instanceFactory): + """이미 로드된 서브모듈에 __call__을 부여하여 callable하게 만든다. + + 서브모듈(rank, _helpers 등)도 그대로 import 가능. instance 메소드는 lazy 호출. + """ + mod = sys.modules.get(modName) + if mod is None: + return + + class _CallableModule(_types.ModuleType): + _instance = None + + def __call__(self, *args, **kwargs): + if self._instance is None: + self._instance = instanceFactory() + return self._instance(*args, **kwargs) + + def __getattr__(self, name): + if self._instance is None: + self._instance = instanceFactory() + try: + return getattr(self._instance, name) + except AttributeError: + raise AttributeError(f"module '{modName}' has no attribute '{name}'") from None + + mod.__class__ = _CallableModule + + +def _scanFactory(): + from dartlab.scan import Scan + + return Scan() + + +def _analysisFactory(): + from dartlab.analysis.financial import Analysis + + return Analysis() + + +def _quantFactory(): + from dartlab.quant import Quant + + return Quant() + + +# scan/analysis/quant — 모듈 자체를 callable로 변환 +import dartlab.analysis.financial as _analysis_mod # noqa: F401 +import dartlab.quant as _quant_mod # noqa: F401 +import dartlab.scan as _scan_mod # noqa: F401 + +_makeCallableModule("dartlab.scan", _scanFactory) +_makeCallableModule("dartlab.analysis.financial", _analysisFactory) +_makeCallableModule("dartlab.quant", _quantFactory) + +# credit은 함수형 (이미 callable) +from dartlab.credit import credit as _credit_callable + +sys.modules[__name__].credit = _credit_callable + + +__all__ = [ + "Company", + "Fred", + "OpenDart", + "OpenEdgar", + "config", + "ask", + "chat", + "setup", + "search", + "listing", + "collect", + "collectAll", + "downloadAll", + "scan", + "analysis", + "gather", + "quant", + "credit", + "macro", + "topdown", + "verbose", + "dataDir", + "codeToName", + "nameToCode", + "searchName", + "Review", + "SelectResult", + "ChartResult", + "capabilities", +] + + +def capabilities(key: str | None = None, *, search: str | None = None) -> dict | list[str]: + """dartlab 전체 기능 카탈로그 조회. + + Capabilities: + CAPABILITIES dict에서 부분 조회 가능. + key 없이 호출 시 전체 키 목록(summary 포함) 반환. + key 지정 시 해당 항목의 상세(guide, capabilities, seeAlso 등) 반환. + search 지정 시 자연어 질문 기반 관련 API 검색 (상위 10개). + + Requires: + 없음 + + AIContext: + AI가 "dartlab에 뭐가 있는지" 모를 때 탐색용. + capabilities() → 목차 확인 → capabilities("analysis") → 상세 확인 → execute_code. + capabilities(search="재무건전성") → 질문 관련 API 검색 → 코드 생성. + + Guide: + - "dartlab 뭐 할 수 있어?" -> capabilities() + - "분석 기능 뭐 있어?" -> capabilities("analysis") + - "scan 어떻게 써?" -> capabilities("scan") + - "재무건전성 관련 API?" -> capabilities(search="재무건전성") + + SeeAlso: + - ask: AI 질문 (capabilities로 기능 파악 후 ask로 분석) + - setup: AI provider 설정 (capabilities 확인 후 설정) + + Args: + key: 조회할 기능 키. None이면 전체 목차. + search: 자연어 질문 기반 검색. key와 동시 사용 불가. + + Returns: + dict | list[str] — key 있으면 해당 항목 dict, 없으면 키+summary 목록. + + Example:: + + dartlab.capabilities() # 전체 목차 + dartlab.capabilities("analysis") # analysis 상세 (guide, capabilities) + dartlab.capabilities("Company.analysis") # Company.analysis 상세 + dartlab.capabilities("scan") # scan 상세 + dartlab.capabilities(search="재무건전성") # 질문 기반 검색 → 상위 10개 + """ + if search is not None: + from dartlab.core._capabilitySearch import searchCapabilities + + results = searchCapabilities(search) + return {key: entry for key, entry, _score in results} + + from dartlab.core._generatedCapabilities import CAPABILITIES + + if key is None: + return {k: v.get("summary", "") for k, v in CAPABILITIES.items()} + if key in CAPABILITIES: + return CAPABILITIES[key] + # 부분 매칭: "analysis" → "Company.analysis" 등도 포함 + matched = {k: v for k, v in CAPABILITIES.items() if key.lower() in k.lower()} + if matched: + return matched + return {} diff --git a/src/dartlab/__main__.py b/src/dartlab/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..4f3dd152e9538afb355d5edff980532099b3b998 --- /dev/null +++ b/src/dartlab/__main__.py @@ -0,0 +1,5 @@ +"""Allow `python -m dartlab` to invoke the CLI.""" + +from dartlab.cli.main import main + +raise SystemExit(main()) diff --git a/src/dartlab/__pycache__/__init__.cpython-312.pyc b/src/dartlab/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d83fc7d765e5176a0119523409996c03114f5631 Binary files /dev/null and b/src/dartlab/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/__pycache__/__init__.cpython-313.pyc b/src/dartlab/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5dfb9d47a2e93c896759cbd2a7a5c0c8f53b0ebb Binary files /dev/null and b/src/dartlab/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/__pycache__/__main__.cpython-312.pyc b/src/dartlab/__pycache__/__main__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..56f7092953ac9651673db642dcc8566306e0c721 Binary files /dev/null and b/src/dartlab/__pycache__/__main__.cpython-312.pyc differ diff --git a/src/dartlab/__pycache__/company.cpython-312.pyc b/src/dartlab/__pycache__/company.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6e9552e985694fab190e736f397378f163bdc500 Binary files /dev/null and b/src/dartlab/__pycache__/company.cpython-312.pyc differ diff --git a/src/dartlab/__pycache__/company.cpython-313.pyc b/src/dartlab/__pycache__/company.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e7fb636dad42ea3e0178f73d6499cb411a84df4c Binary files /dev/null and b/src/dartlab/__pycache__/company.cpython-313.pyc differ diff --git a/src/dartlab/__pycache__/config.cpython-312.pyc b/src/dartlab/__pycache__/config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e69782172f7f56730df5bf338ad59ce14657367 Binary files /dev/null and b/src/dartlab/__pycache__/config.cpython-312.pyc differ diff --git a/src/dartlab/__pycache__/config.cpython-313.pyc b/src/dartlab/__pycache__/config.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f2e2b5b56a4df5abf2124f490a7f88b80dc4311d Binary files /dev/null and b/src/dartlab/__pycache__/config.cpython-313.pyc differ diff --git a/src/dartlab/__pycache__/listing.cpython-312.pyc b/src/dartlab/__pycache__/listing.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d8ec39d061be380e222a8a4a7073997533a71a65 Binary files /dev/null and b/src/dartlab/__pycache__/listing.cpython-312.pyc differ diff --git a/src/dartlab/__pycache__/listing.cpython-313.pyc b/src/dartlab/__pycache__/listing.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9573fd173fafe9e421653fc297dabb0c6744fab2 Binary files /dev/null and b/src/dartlab/__pycache__/listing.cpython-313.pyc differ diff --git a/src/dartlab/__pycache__/topdown.cpython-312.pyc b/src/dartlab/__pycache__/topdown.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d602bd040fb7fd125b817d7f55e04d61284bad91 Binary files /dev/null and b/src/dartlab/__pycache__/topdown.cpython-312.pyc differ diff --git a/src/dartlab/__pycache__/topdown.cpython-313.pyc b/src/dartlab/__pycache__/topdown.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..89c82fedbbb6fab4c5017dbc3218cafdc001cd55 Binary files /dev/null and b/src/dartlab/__pycache__/topdown.cpython-313.pyc differ diff --git a/src/dartlab/ai/STATUS.md b/src/dartlab/ai/STATUS.md new file mode 100644 index 0000000000000000000000000000000000000000..44fbf2fff984ed4a06e1582be410604cf7c7acf1 --- /dev/null +++ b/src/dartlab/ai/STATUS.md @@ -0,0 +1,200 @@ +# AI Engine — Provider 현황 및 유지보수 체크리스트 + +## Provider 목록 (7개) + +| Provider | 파일 | 인증 | 기본 모델 | 안정성 | +|----------|------|------|----------|--------| +| `openai` | openai_compat.py | API Key | gpt-4o | **안정** — 공식 SDK | +| `ollama` | ollama.py | 없음 (localhost) | llama3.1 | **안정** — 로컬 | +| `custom` | openai_compat.py | API Key | gpt-4o | **안정** — OpenAI 호환 | +| `chatgpt` | providers/__init__.py alias | `codex`로 정규화 | codex mirror | **호환용 alias** — 공개 surface 비노출 | +| `codex` | codex.py | CLI 세션 | CLI config 또는 gpt-4.1 | **공식 경로 우선** — Codex CLI 의존 | +| `oauth-codex` | oauthCodex.py | ChatGPT OAuth | gpt-5.4 | **공개 경로** — 비공식 backend API 의존 | +| `claude-code` | claude_code.py | CLI 세션 | sonnet | **보류중** — OAuth 지원 전 비공개 | + +--- + +## 현재 공개 경로 + +- ChatGPT 구독 계정 경로는 2개다. + - `codex`: Codex CLI 로그인 기반 + - `oauth-codex`: ChatGPT OAuth 직접 연결 기반 +- 공개 provider surface는 `codex`, `oauth-codex`, `openai`, `ollama`, `custom`만 유지한다. +- `claude` provider는 public surface에서 제거되었고 legacy/internal 코드로만 남아 있다. +- `chatgpt`는 기존 설정/호환성 때문에 내부 alias로만 남아 있으며 실제 구현은 `codex`로 정규화된다. +- `chatgpt-oauth`는 내부/호환 alias로만 남아 있으며 실제 구현은 `oauth-codex`로 정규화된다. + +## Tool Runtime 기반 + +- 도구 등록/실행은 `tool_runtime.py`의 `ToolRuntime`으로 분리되기 시작했다. +- `tools_registry.py`는 현재 호환 래퍼 역할을 하며, 세션별/에이전트별 isolated runtime 생성이 가능하다. +- coding executor는 `coding_runtime.py`로 분리되기 시작했고, backend registry를 통해 관리한다. +- 표준 코드 작업 진입점은 `run_coding_task`이며 `run_codex_task`는 Codex compatibility alias로 유지한다. +- 다음 단계는 Codex 외 backend를 이 runtime 뒤에 추가하되, 공개 provider surface와는 분리하는 것이다. + +## ChatGPT OAuth Provider — 핵심 리스크 + +### 왜 취약한가 + +`oauth-codex` provider는 **OpenAI 비공식 내부 API** (`chatgpt.com/backend-api/codex/responses`)를 사용한다. +공식 OpenAI API (`api.openai.com`)가 아니므로 **예고 없이 변경/차단될 수 있다**. + +### 정기 체크 항목 + +**1. 엔드포인트 변경** +- 현재: `https://chatgpt.com/backend-api/codex/responses` +- 파일: [oauthCodex.py](providers/oauthCodex.py) `CODEX_API_BASE`, `CODEX_RESPONSES_PATH` +- OpenAI가 URL 경로를 변경하면 즉시 404/403 발생 +- 확인법: `dartlab status` 실행 → chatgpt available 확인 + +**2. OAuth 인증 파라미터** +- Client ID: `app_EMoamEEZ73f0CkXaXp7hrann` (Codex CLI에서 추출) +- 파일: [oauthToken.py](../oauthToken.py) `CHATGPT_CLIENT_ID` +- OpenAI가 client_id를 갱신하거나 revoke하면 로그인 불가 +- 확인법: OAuth 로그인 시도 → "invalid_client" 에러 여부 + +**3. SSE 이벤트 타입** +- 현재 파싱하는 타입 3개: + - `response.output_text.delta` — 텍스트 청크 + - `response.content_part.delta` — 컨텐츠 청크 + - `response.output_item.done` — 아이템 완료 +- 파일: [oauthCodex.py](providers/oauthCodex.py) `stream()`, `_parse_sse_response()` +- OpenAI가 이벤트 스키마를 변경하면 응답이 빈 문자열로 돌아옴 +- 확인법: 스트리밍 응답이 도착하는데 텍스트가 비어있으면 이벤트 타입 변경 의심 + +**4. 요청 헤더** +- `originator: codex_cli_rs` — Codex CLI 사칭 +- `OpenAI-Beta: responses=experimental` — 실험 API 플래그 +- 파일: [oauthCodex.py](providers/oauthCodex.py) `_build_headers()` +- 이 헤더 없이는 403 반환됨 +- OpenAI가 originator 검증을 강화하면 차단됨 + +**5. 모델 목록** +- `AVAILABLE_MODELS` 리스트는 수동 관리 +- 파일: [oauthCodex.py](providers/oauthCodex.py) `AVAILABLE_MODELS` +- 새 모델 출시/폐기 시 수동 업데이트 필요 +- GPT-4 시리즈 (gpt-4, gpt-4-turbo 등)는 이미 제거됨 + +**6. 토큰 만료 정책** +- access_token: expires_in 기준 (현재 ~1시간) +- refresh_token: 만료 정책 불명 (OpenAI 미공개) +- 파일: [oauthToken.py](../oauthToken.py) `get_valid_token()`, `refresh_access_token()` +- refresh_token이 만료되면 재로그인 필요 +- 확인법: 며칠 방치 후 요청 → 401 + refresh 실패 여부 + +### 브레이킹 체인지 대응 순서 + +1. 사용자가 "ChatGPT 안됨" 보고 +2. `dartlab status` 로 available 확인 +3. available=False → OAuth 로그인 재시도 +4. 로그인 실패 → client_id 변경 확인 (opencode-openai-codex-auth 참조) +5. 로그인 성공인데 API 호출 실패 → 엔드포인트/헤더 변경 확인 +6. API 호출 성공인데 응답 비어있음 → SSE 이벤트 타입 변경 확인 + +### 생태계 비교 — 누가 같은 API를 쓰는가 + +ChatGPT OAuth(`chatgpt.com/backend-api`)를 사용하는 프로젝트는 **전부 openai/codex CLI 역공학** 기반이다. + +| 프로젝트 | 언어 | Client ID | 모델 목록 | refresh 실패 처리 | 토큰 저장 | +|----------|------|-----------|----------|------------------|----------| +| **openai/codex** (공식) | Rust | 하드코딩 | `/models` 동적 + 5분 캐시 | 4가지 분류 | 파일/키링/메모리 3중 | +| **opencode plugin** | TS | 동일 복제 | 사용자 설정 의존 | 단순 throw | 프레임워크 위임 | +| **ai-sdk-provider** | TS | 동일 복제 | 3개 하드코딩 | 단순 throw | codex auth.json 재사용 | +| **dartlab** (현재) | Python | 동일 복제 | 13개 하드코딩 | None 반환 | `~/.dartlab/oauth_token.json` | + +**공통 특징:** +- Client ID `app_EMoamEEZ73f0CkXaXp7hrann` 전원 동일 (OpenAI public OAuth client) +- `originator: codex_cli_rs` 헤더 전원 동일 +- OpenAI가 이 값들을 바꾸면 **전부 동시에 깨짐** + +**openai/codex만의 차별점 (dartlab에 없는 것):** +1. Token Exchange — OAuth 토큰 → `api.openai.com` 호환 API Key 변환 +2. Device Code Flow — headless 환경 (서버, SSH) 인증 지원 +3. 모델 목록 동적 조회 — `/models` 엔드포인트 + 캐시 + bundled fallback +4. Keyring 저장 — OS 키체인 (macOS Keychain, Windows Credential Manager) +5. refresh 실패 4단계 분류 — expired / reused / revoked / other +6. WebSocket SSE 이중 지원 + +**참고: opencode와 oh-my-opencode(현 oh-my-openagent)는 ChatGPT OAuth를 사용하지 않는다.** +- opencode: GitHub Copilot API 인증 (다른 시스템) +- oh-my-openagent: MCP 서버 표준 OAuth 2.0 + PKCE (플러그인) + +### 추적 대상 레포지토리 + +변경사항 감지를 위해 다음 레포를 추적한다. + +| 레포 | 추적 이유 | Watch 대상 | +|------|----------|-----------| +| **openai/codex** | canonical 구현. Client ID, 엔드포인트, 헤더의 원본 | `codex-rs/core/src/auth.rs`, `model_provider_info.rs` | +| **numman-ali/opencode-openai-codex-auth** | 빠른 변경 반영 (TS라 읽기 쉬움) | `lib/auth/`, `lib/constants.ts` | +| **ben-vargas/ai-sdk-provider-chatgpt-oauth** | Vercel AI SDK 호환 참조 | `src/auth/` | + +### 향후 개선 후보 (codex에서 가져올 수 있는 것) + +1. **모델 목록 동적 조회** — `chatgpt.com/backend-api/codex/models` 호출 + JSON 캐시 +2. **refresh 실패 분류** — expired/reused/revoked 구분하여 사용자에게 구체적 안내 +3. **Token Exchange** — OAuth → API Key 변환으로 `api.openai.com` 호환 (듀얼 엔드포인트) + +--- + +## Codex CLI Provider — 리스크 + +### 왜 취약한가 + +`codex` provider는 OpenAI `codex` CLI 바이너리를 subprocess로 호출한다. +CLI의 JSONL 출력 포맷이 변경되면 파싱 실패. + +### 현재 동작 + +- `~/.codex/config.toml`의 model 설정을 우선 흡수 +- `codex --help`, `codex exec --help`를 읽어 command/sandbox capability를 동적 감지 +- 일반 질의는 `read-only`, 코드 수정 의도는 `workspace-write` sandbox 우선 +- 별도 `run_codex_task` tool로 다른 provider에서도 Codex CLI 코드 작업 위임 가능 + +### 체크 항목 + +- CLI 출력 포맷: `item.completed.item.agent_message.text` 경로 +- CLI 플래그: `--json`, `--sandbox ...`, `--model ...`, `--skip-git-repo-check` +- CLI 설치: `npm install -g @openai/codex` +- 파일: [codex.py](providers/codex.py) + +--- + +## Claude Code CLI Provider — 보류중 + +### 현재 상태 + +VSCode 환경에서 `CLAUDECODE` 환경변수가 설정되어 SDK fallback 모드로 진입하지만, +SDK fallback에서 API key 추출(`claude auth status --json`)이 또 subprocess를 호출하는 순환 문제. + +### 알려진 이슈 + +- 테스트 31/32 pass, `test_complete_timeout` 1개 fail +- VSCode 내에서 CLI 호출이 hang되는 케이스 (중첩 세션) +- `_probe_cli()` 8초 타임아웃으로 hang 감지 후 SDK 전환 +- 파일: [claude_code.py](providers/claude_code.py) + +--- + +## 안정 Provider — 특이사항 없음 + +### openai / custom (openai_compat.py) +- 공식 `openai` Python SDK 사용 +- 버전 업데이트 시 SDK breaking change만 주의 +- tool calling 지원 + +### claude (claude.py) +- 공식 `anthropic` Python SDK + OpenAI 프록시 이중 모드 +- base_url 있으면 OpenAI 호환, 없으면 Anthropic 네이티브 + +### ollama (ollama.py) +- localhost:11434 OpenAI 호환 엔드포인트 +- `preload()`, `get_installed_models()`, `complete_json()` 추가 기능 +- tool calling 지원 (v0.3.0+) + +--- + +## 마지막 점검일 + +- 2026-03-10: ChatGPT OAuth 정상 동작 확인 (gpt-5.4) +- 2026-03-10: Claude Code 보류 (VSCode 환경이슈) diff --git a/src/dartlab/ai/__init__.py b/src/dartlab/ai/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c73a4ff30d4a226c418ac04c32e1a4c482aeb47a --- /dev/null +++ b/src/dartlab/ai/__init__.py @@ -0,0 +1,161 @@ +"""LLM 기반 적극적 분석가. dartlab을 도구로 삼아 주체적으로 분석하고, 사용자의 분석 학습을 돕는다.""" + +from __future__ import annotations + +from dartlab.ai.types import LLMConfig, LLMResponse +from dartlab.core.ai import ( + AI_ROLES, + DEFAULT_ROLE, + get_profile_manager, + get_provider_spec, + normalize_provider, + normalize_role, +) + + +def configure( + provider: str = "codex", + model: str | None = None, + api_key: str | None = None, + base_url: str | None = None, + role: str | None = None, + temperature: float = 0.3, + max_tokens: int = 4096, + system_prompt: str | None = None, +) -> None: + """공통 AI profile을 갱신한다.""" + normalized = normalize_provider(provider) or provider + if get_provider_spec(normalized) is None: + raise ValueError(f"지원하지 않는 provider: {provider}") + normalized_role = normalize_role(role) + if role is not None and normalized_role is None: + raise ValueError(f"지원하지 않는 role: {role}. 지원: {AI_ROLES}") + manager = get_profile_manager() + manager.update( + provider=normalized, + model=model, + role=normalized_role, + base_url=base_url, + temperature=temperature, + max_tokens=max_tokens, + system_prompt=system_prompt, + updated_by="code", + ) + if api_key: + spec = get_provider_spec(normalized) + if spec and spec.auth_kind == "api_key": + manager.save_api_key(normalized, api_key, updated_by="code") + + +def get_config(provider: str | None = None, *, role: str | None = None) -> LLMConfig: + """현재 글로벌 LLM 설정 반환.""" + normalized_role = normalize_role(role) + resolved = get_profile_manager().resolve(provider=provider, role=normalized_role) + return LLMConfig(**resolved) + + +def status(provider: str | None = None, *, role: str | None = None) -> dict: + """LLM 설정 및 provider 상태 확인.""" + from dartlab.ai.providers import create_provider + + normalized_role = normalize_role(role) + config = get_config(provider, role=normalized_role) + selected_provider = config.provider + llm = create_provider(config) + available = llm.check_available() + + result = { + "provider": selected_provider, + "role": normalized_role or DEFAULT_ROLE, + "model": llm.resolved_model, + "available": available, + "defaultProvider": get_profile_manager().load().default_provider, + } + + if selected_provider == "ollama": + from dartlab.ai.providers.support.ollama_setup import detect_ollama + + result["ollama"] = detect_ollama() + + if selected_provider == "codex": + from dartlab.ai.providers.support.cli_setup import detect_codex + + result["codex"] = detect_codex() + + if selected_provider == "oauth-codex": + from dartlab.ai.providers.support import oauth_token as oauthToken + + token_stored = False + try: + token_stored = oauthToken.load_token() is not None + except (OSError, ValueError): + token_stored = False + + try: + authenticated = oauthToken.is_authenticated() + account_id = oauthToken.get_account_id() if authenticated else None + except ( + AttributeError, + OSError, + RuntimeError, + ValueError, + oauthToken.TokenRefreshError, + ): + authenticated = False + account_id = None + + result["oauth-codex"] = { + "authenticated": authenticated, + "tokenStored": token_stored, + "accountId": account_id, + } + + return result + + +from dartlab.ai.tools.plugin import get_plugin_registry, tool + + +def templates(name: str | None = None): + """분석 템플릿 목록 또는 특정 템플릿 내용 반환. + + Args: + name: None이면 전체 목록, 지정하면 해당 템플릿 내용. + + Returns: + list[dict] (목록) 또는 str (내용) 또는 None. + """ + from dartlab.ai.patterns import get_template, list_templates + + if name is None: + return list_templates() + return get_template(name) + + +def saveTemplate(name: str, *, content: str | None = None, file: str | None = None): + """사용자 분석 템플릿 저장. ~/.dartlab/templates/{name}.md + + Args: + name: 템플릿 이름. + content: 마크다운 내용. + file: 파일 경로 (content 대신). + + Returns: + Path — 저장된 파일 경로. + """ + from dartlab.ai.patterns import save_template + + return save_template(name, content=content, file=file) + + +__all__ = [ + "configure", + "get_config", + "status", + "LLMConfig", + "LLMResponse", + "tool", + "get_plugin_registry", + "templates", + "saveTemplate", +] diff --git a/src/dartlab/ai/__pycache__/__init__.cpython-312.pyc b/src/dartlab/ai/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4bc024729e1d618b0f910a9753c56f34b8def1ae Binary files /dev/null and b/src/dartlab/ai/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/ai/__pycache__/__init__.cpython-313.pyc b/src/dartlab/ai/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fc66e2b8d8054da0930b015888ff64239a7a2798 Binary files /dev/null and b/src/dartlab/ai/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/ai/__pycache__/types.cpython-312.pyc b/src/dartlab/ai/__pycache__/types.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..78aa4e639bca8ceed0adb3d6bd9a2bfab1f39bab Binary files /dev/null and b/src/dartlab/ai/__pycache__/types.cpython-312.pyc differ diff --git a/src/dartlab/ai/__pycache__/types.cpython-313.pyc b/src/dartlab/ai/__pycache__/types.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7656fc08953be848dbdb30b358d5ed3e693a0ac8 Binary files /dev/null and b/src/dartlab/ai/__pycache__/types.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/__init__.py b/src/dartlab/ai/context/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a3d4418b38308ae1f513b8d774aafd35713bc5be --- /dev/null +++ b/src/dartlab/ai/context/__init__.py @@ -0,0 +1,38 @@ +"""ai/context — Context Engineering 레이어 (Phase 1). + +Anthropic / DSPy / Manus 계열의 context engineering 패턴을 dartlab에 적용. +prompt engineering 단계의 고정 텍스트 블록 주입을 동적 컨텍스트 빌더로 대체. + +핵심 사상: +- intent 분류 → selector 동적 호출 → ContextBundle 조립 +- 토큰 예산 우선순위 트리밍 +- TOON 인코딩으로 같은 데이터를 30~60% 적은 토큰으로 주입 +- selfai 폐기 학습 적용: 자동 최적화 X. 모든 선택은 명시적 결정론. + +진입점: + from dartlab.ai.context import ContextBuilder + bundle = ContextBuilder(question=q, company=c, provider="gemini").build() + +레이아웃: + intent.py — 질문 → Intent (6막 + compare + concept) + selectors/ — Intent별 컨텍스트 선택자 + budget.py — provider별 토큰 한도 + 우선순위 트리밍 + encoder.py — TOON 인코딩 + builder.py — ContextBuilder 메인 진입점 + bundle.py — ContextBundle dataclass +""" + +from __future__ import annotations + +from dartlab.ai.context.builder import ContextBuilder +from dartlab.ai.context.bundle import ContextBundle, ContextPart, PartPriority +from dartlab.ai.context.intent import Intent, classifyIntent + +__all__ = [ + "ContextBuilder", + "ContextBundle", + "ContextPart", + "Intent", + "PartPriority", + "classifyIntent", +] diff --git a/src/dartlab/ai/context/__pycache__/__init__.cpython-312.pyc b/src/dartlab/ai/context/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b859c3611dd8d237edc39355c2e391ad5c3d5ece Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/__init__.cpython-313.pyc b/src/dartlab/ai/context/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8303f95ef83ac3f91147319ee6a62f6ec8fa866 Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/aiview.cpython-312.pyc b/src/dartlab/ai/context/__pycache__/aiview.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..97549fbfe106abdf8340899819cf49a6d0a7a8f9 Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/aiview.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/budget.cpython-312.pyc b/src/dartlab/ai/context/__pycache__/budget.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b42cb6d902d04766592966b56a7bce77fc09062 Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/budget.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/budget.cpython-313.pyc b/src/dartlab/ai/context/__pycache__/budget.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..52c51040ab53dfefc4e3887669e4b20264318730 Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/budget.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/builder.cpython-312.pyc b/src/dartlab/ai/context/__pycache__/builder.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6972f1ff2afff37fc3a0e6f32215cfd2827c062 Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/builder.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/builder.cpython-313.pyc b/src/dartlab/ai/context/__pycache__/builder.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7070b18a2a58304b29d62ca5269452f0b81b389 Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/builder.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/bundle.cpython-312.pyc b/src/dartlab/ai/context/__pycache__/bundle.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ce74f33dac1a4b97a501d66ab28fbbd837990960 Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/bundle.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/bundle.cpython-313.pyc b/src/dartlab/ai/context/__pycache__/bundle.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c50be5bd4160d284c1434385ddc3f656d449ad9d Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/bundle.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/encoder.cpython-312.pyc b/src/dartlab/ai/context/__pycache__/encoder.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..91ce2d28a5bcde903eac610eb38a51cb966d0259 Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/encoder.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/encoder.cpython-313.pyc b/src/dartlab/ai/context/__pycache__/encoder.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a99cf95f18318c92a21395affbe722b50faa8037 Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/encoder.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/intent.cpython-312.pyc b/src/dartlab/ai/context/__pycache__/intent.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..399fa6e3af8862d1a6bc9e27a31bd920bed3c7cd Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/intent.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/intent.cpython-313.pyc b/src/dartlab/ai/context/__pycache__/intent.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1a30ec097fdb3b292204e93744f6c7c1ec4399c7 Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/intent.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/playbook.cpython-312.pyc b/src/dartlab/ai/context/__pycache__/playbook.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c3819efd2aedd2646cf6de136c322de8f86314f Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/playbook.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/__pycache__/playbook.cpython-313.pyc b/src/dartlab/ai/context/__pycache__/playbook.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b12b3a795da4940f84faccb34683d8dde9156951 Binary files /dev/null and b/src/dartlab/ai/context/__pycache__/playbook.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/aiview.py b/src/dartlab/ai/context/aiview.py new file mode 100644 index 0000000000000000000000000000000000000000..23ef672d382d4fbd9f5bf2d00636442ce55ae791 --- /dev/null +++ b/src/dartlab/ai/context/aiview.py @@ -0,0 +1,360 @@ +"""AI용 데이터 맥락 보강 — 엔진 반환값을 AI가 이해하기 좋은 형태로 변환. + +모든 엔진의 dict/DataFrame을 자동 감지해서 맥락을 보강한다. +엔진별 수작업 0 — 구조(history + period + 숫자)만 보고 판단. + +삽입 위치: _calcToContextPart()에서 encodeAuto() 직전. + calc result → **autoEnrich()** → encodeAuto(TOON) → ContextPart + +근거: +- Kim et al. (시카고대, 2024): 재무제표 + 맥락 → 이익 방향 60% 정확도 +- TAP4LLM (EMNLP 2024): 서브테이블 + 보강 → +7.93%p +- 실험 110 A/B: enriched가 raw 대비 코드 0라운드, 해석 명확성 압도 + +Examples:: + + # analysis calc 결과 + raw = calcMarginTrend(company) + # {"history": [{"period": "2025", "operatingMargin": 13.07, ...}, ...]} + + enriched = autoEnrich(raw) + # {"_summary": "영업이익률 13.1% · 전기비 +2.2pp(소폭 개선) · 5년평균 위 1.2pp", + # "history": [...], ← 원본 유지 + # "_context": {"marginTrend": {"avg5y": 11.86, "yoy_pp": +2.19, ...}}} +""" + +from __future__ import annotations + +from typing import Any + +# ── 비율 필드 감지 키워드 ───────────────────────────────── + +_RATIO_KEYWORDS = frozenset({ + "margin", "ratio", "rate", "roe", "roa", "roic", "turnover", + "pct", "yield", "percent", "coverage", "leverage", "yoy", + "dso", "dio", "dpo", "ccc", "dol", "payout", +}) + + +def _isRatioField(field: str, value: Any) -> bool: + """비율 필드인지 판단 (이름 + 값 범위).""" + lower = field.lower() + if any(kw in lower for kw in _RATIO_KEYWORDS): + return True + # 값이 -200~500 범위이고 float이면 비율일 가능성 + if isinstance(value, (int, float)) and -200 <= value <= 500: + # 금액은 보통 1e6 이상 + return abs(value) < 1e6 + return False + + +# ── 변화 판단 ───────────────────────────────────────────── + +def _judgeChange(delta: float | None, isRatio: bool) -> str: + if delta is None: + return "" + t = 1.0 if isRatio else 5.0 + if abs(delta) < t * 0.5: + return "보합" + elif abs(delta) < t * 2: + return "소폭 개선" if delta > 0 else "소폭 악화" + elif abs(delta) < t * 5: + return "개선" if delta > 0 else "악화" + else: + return "대폭 개선" if delta > 0 else "대폭 악화" + + +# ── 한글 필드명 ─────────────────────────────────────────── + +_KOREAN = { + "operatingMargin": "영업이익률", "netMargin": "순이익률", + "grossMargin": "매출총이익률", "roe": "ROE", "roa": "ROA", + "roic": "ROIC", "revenue": "매출", "operatingIncome": "영업이익", + "netIncome": "순이익", "debtRatio": "부채비율", + "equityRatio": "자기자본비율", "ocf": "영업CF", "fcf": "FCF", + "capex": "CAPEX", "ccc": "CCC", "dso": "매출채권회수일", + "dio": "재고회전일", "dpo": "매입채무회전일", + "totalAssetTurnover": "총자산회전율", "revenueYoy": "매출YoY", + "operatingIncomeYoy": "영업이익YoY", "netIncomeYoy": "순이익YoY", + "costOfSalesRatio": "매출원가율", "sgaRatio": "판관비율", + "ocfToNi": "영업CF/순이익", "ocfMargin": "영업CF마진", + "interestCoverage": "이자보상배율", "pattern": "CF패턴", +} + + +def _koreanName(field: str) -> str: + return _KOREAN.get(field, field) + + +def _formatNum(value: Any, field: str = "") -> str: + if value is None: + return "-" + if _isRatioField(field, value): + return f"{value:.1f}%" + if isinstance(value, (int, float)) and abs(value) > 1e12: + return f"{value / 1e12:.1f}조" + if isinstance(value, (int, float)) and abs(value) > 1e8: + return f"{value / 1e8:,.0f}억" + if isinstance(value, float): + return f"{value:,.1f}" + return str(value) + + +# ── 핵심: autoEnrich ───────────────────────────────────── + +def autoEnrich(data: dict | list | None, *, company: Any = None, calc_fn: Any = None) -> dict | list | None: + """엔진 반환값을 자동 감지해서 AI용 맥락 보강. + + 3가지 패턴 자동 감지: + - dict with history[] → 시계열 보강 (5년 평균, YoY, 판단) + - list[dict] → history 배열로 취급 + - flat dict → 핵심 필드 요약 + + 엔진이 새 축을 추가해도 history + period + 숫자 패턴만 유지하면 자동 적용. + """ + if data is None: + return None + + # list[dict] — history 배열 직접 전달된 경우 + if isinstance(data, list) and data and isinstance(data[0], dict): + return _enrichHistory(data) + + if not isinstance(data, dict): + return data + + # 독스트링 스키마 추출 (있으면 확정 기반, 없으면 자동 감지 fallback) + _schema = parseReturnsSchema(calc_fn) if callable(calc_fn) else None + + # 최상위에 바로 history[]가 있는 경우 (개별 calc 결과: {"history": [...], "displayHints": {...}}) + if "history" in data and isinstance(data["history"], list) and data["history"]: + summary = _summarizeHistory(data["history"], "data", schema=_schema) + if summary: + enriched = dict(data) + enriched["_summary"] = summary + return enriched + return data + + # 중첩 history — 전체 analysis dict: {"marginTrend": {"history": [...]}, ...} + tsKeys = [ + k for k, v in data.items() + if isinstance(v, dict) + and "history" in v + and isinstance(v["history"], list) + and v["history"] + ] + if tsKeys: + return _enrichDictWithHistory(data, tsKeys, company=company) + + # flat dict (숫자 키가 있는) — credit, quant + numericKeys = [k for k, v in data.items() if isinstance(v, (int, float))] + if numericKeys: + return _enrichFlat(data) + + return data + + +# ── 패턴 1: dict with history[] ────────────────────────── + +def _enrichDictWithHistory( + data: dict, tsKeys: list[str], *, company: Any = None, +) -> dict: + """history[] 시계열을 자동 보강. 모든 analysis 축에 범용 적용.""" + summaries: list[str] = [] + + for tsKey in tsKeys: + hist = data[tsKey]["history"] + if not hist: + continue + summary = _summarizeHistory(hist, tsKey) + if summary: + summaries.append(summary) + + # _summary 필드에 전체 요약 삽입 (원본 data에 추가) + enriched = dict(data) + if summaries: + enriched["_summary"] = " / ".join(summaries[:4]) + + return enriched + + +def _enrichHistory(rows: list[dict]) -> dict: + """history 배열 직접 전달 시.""" + summary = _summarizeHistory(rows, "data") + return {"_summary": summary, "history": rows} if summary else {"history": rows} + + +def _summarizeHistory(hist: list[dict], label: str, *, schema: dict | None = None) -> str: + """history 배열에서 비율 필드를 자동 감지, 핵심 3개의 요약문 생성.""" + if not hist or len(hist) < 2: + return "" + + latest = hist[0] + prev = hist[1] + + # 모든 숫자 필드 감지 + numericFields = [ + k for k, v in latest.items() + if isinstance(v, (int, float)) and k != "period" + ] + if not numericFields: + return "" + + fieldInfos: list[dict] = [] + for field in numericFields: + values = [h.get(field) for h in hist[:5] if h.get(field) is not None] + if not values: + continue + + current = values[0] + # 독스트링 스키마 우선, 없으면 자동 감지 fallback + schemaResult = isRatioBySchema(field, schema) if schema else None + isRatio = schemaResult if schemaResult is not None else _isRatioField(field, current) + prevVal = values[1] if len(values) >= 2 else None + avg5 = sum(values) / len(values) + + # YoY — 비율은 pp 차이, 금액은 변화율(%) + yoy = None + if prevVal is not None: + if isRatio: + yoy = current - prevVal + elif prevVal != 0: + yoy = (current - prevVal) / abs(prevVal) * 100 + + # 5년 평균 대비 + vsAvg = None + if isRatio: + vsAvg = current - avg5 + elif avg5 != 0: + vsAvg = (current - avg5) / abs(avg5) * 100 + + fieldInfos.append({ + "field": field, + "current": current, + "isRatio": isRatio, + "yoy": round(yoy, 2) if yoy is not None else None, + "vsAvg": round(vsAvg, 2) if vsAvg is not None else None, + "judgment": _judgeChange(yoy, isRatio), + "avg5": round(avg5, 2), + }) + + # 비율 필드 우선, 변화가 큰 순 + ratios = [f for f in fieldInfos if f["isRatio"]] + amounts = [f for f in fieldInfos if not f["isRatio"]] + picked = sorted(ratios, key=lambda x: abs(x["yoy"] or 0), reverse=True)[:3] + if not picked: + picked = sorted(amounts, key=lambda x: abs(x["yoy"] or 0), reverse=True)[:2] + + # 요약 문장 생성 + parts = [] + for fi in picked: + unit = "pp" if fi["isRatio"] else "%" + segs = [f"{_koreanName(fi['field'])} {_formatNum(fi['current'], fi['field'])}"] + if fi["yoy"] is not None: + segs.append(f"전기비 {fi['yoy']:+.1f}{unit}({fi['judgment']})") + if fi["vsAvg"] is not None: + pos = "위" if fi["vsAvg"] > 0 else "아래" + segs.append(f"5년평균 {pos} {abs(fi['vsAvg']):.1f}{unit}") + parts.append(" · ".join(segs)) + + return f"[{label}] {' | '.join(parts)}" if parts else "" + + +# ── 패턴 2: flat dict ──────────────────────────────────── + +def _enrichFlat(data: dict) -> dict: + """flat dict 보강 — credit, quant 결과.""" + summaryParts = [] + for k, v in data.items(): + if isinstance(v, str) and len(v) < 50: + summaryParts.append(f"{_koreanName(k)}={v}") + elif isinstance(v, (int, float)): + summaryParts.append(f"{_koreanName(k)}={_formatNum(v, k)}") + if not summaryParts: + return data + enriched = dict(data) + enriched["_summary"] = " · ".join(summaryParts[:6]) + return enriched + + +# ── 독스트링 기반 스키마 파싱 ────────────────────────────── + +import re +from functools import lru_cache +from typing import Callable + +_UNIT_PATTERN = re.compile(r"\((%|원|일|배|점)\)") + + +@lru_cache(maxsize=256) +def parseReturnsSchema(fn: Callable) -> dict[str, dict] | None: + """함수의 docstring에서 Returns 스키마를 파싱. + + Returns dict 예시:: + + { + "operatingMargin": {"type": "float", "unit": "%", "desc": "영업이익률"}, + "revenue": {"type": "float", "unit": "원", "desc": "매출"}, + } + + 독스트링에 Returns 섹션이 없으면 None. + """ + doc = getattr(fn, "__doc__", None) + if not doc: + return None + + # Returns 섹션 추출 + lines = doc.split("\n") + inReturns = False + returnsLines: list[str] = [] + for line in lines: + stripped = line.strip() + if stripped == "Returns": + inReturns = True + continue + if inReturns and stripped.startswith("-------"): + continue + if inReturns: + # 다른 섹션 시작 감지 (Raises, Examples, Notes, Guide, See Also) + if stripped and not stripped[0].isspace() and stripped[0] != " " and ":" not in stripped and stripped in ( + "Raises", "Examples", "Notes", "Guide", "See Also", "Parameters", + ): + break + # 빈 줄 다음에 섹션 헤더가 올 수 있음 + if stripped and re.match(r"^[A-Z][a-z]", stripped) and not any(c in stripped for c in (":", "—", "-")): + break + returnsLines.append(line) + + if not returnsLines: + return None + + # 키 : 타입 — 설명 (단위) 패턴 파싱 + schema: dict[str, dict] = {} + for line in returnsLines: + # " operatingMargin : float — 영업이익률 (%)" 패턴 + m = re.match(r"\s+(\w+)\s*:\s*(\w[\w\[\]]*)\s*[—-]\s*(.+)", line) + if not m: + continue + key, typ, desc = m.group(1), m.group(2), m.group(3).strip() + + # 단위 추출 + unit_match = _UNIT_PATTERN.search(desc) + unit = unit_match.group(1) if unit_match else None + + schema[key] = {"type": typ, "desc": desc, "unit": unit} + + return schema if schema else None + + +def isRatioBySchema(field: str, schema: dict[str, dict] | None) -> bool | None: + """스키마에서 필드의 단위를 확인해서 비율인지 확정. + + Returns True(비율)/False(금액)/None(스키마에 없음 → fallback 필요). + """ + if schema is None or field not in schema: + return None + unit = schema[field].get("unit") + if unit == "%": + return True + if unit in ("원", "일"): + return False + return None diff --git a/src/dartlab/ai/context/budget.py b/src/dartlab/ai/context/budget.py new file mode 100644 index 0000000000000000000000000000000000000000..6555be4037849e062cddd4f0d36e268ed7dfa084 --- /dev/null +++ b/src/dartlab/ai/context/budget.py @@ -0,0 +1,68 @@ +"""토큰 예산 + 우선순위 트리밍. + +provider별 컨텍스트 한도를 기준으로 ContextPart 리스트를 정리. +CRITICAL은 절대 제거하지 않고, OPTIONAL부터 자른다. +""" + +from __future__ import annotations + +from dartlab.ai.context.bundle import ContextPart, PartPriority + +# provider별 안전 컨텍스트 예산 (system + user 합계 기준 권장치) +# 보수적으로 설정 — 응답 토큰 여유 확보. +_PROVIDER_BUDGETS: dict[str, int] = { + "gemini": 30000, + "openai": 12000, + "groq": 6000, + "cerebras": 6000, + "mistral": 8000, + "ollama": 4000, + "claude": 30000, + "claude_code": 30000, + "codex": 12000, + "oauth_codex": 12000, +} + +_DEFAULT_BUDGET = 8000 + + +def budgetFor(provider: str | None) -> int: + """provider 이름 → 권장 컨텍스트 예산 토큰.""" + if not provider: + return _DEFAULT_BUDGET + return _PROVIDER_BUDGETS.get(provider.lower(), _DEFAULT_BUDGET) + + +def trim( + parts: list[ContextPart], + *, + budgetTokens: int, +) -> tuple[list[ContextPart], list[str]]: + """우선순위 기반 트리밍. + + Returns: + (kept, droppedKeys) + - kept: 예산 안에 들어간 parts (priority 내림차순) + - droppedKeys: 잘려나간 part key 리스트 + """ + # priority 내림차순 정렬 (높은 우선순위 먼저) + sorted_parts = sorted(parts, key=lambda p: p.priority, reverse=True) + + kept: list[ContextPart] = [] + dropped: list[str] = [] + used = 0 + + for part in sorted_parts: + # CRITICAL은 예산 초과해도 무조건 포함 (안전장치) + if part.priority == PartPriority.CRITICAL: + kept.append(part) + used += part.estimatedTokens + continue + + if used + part.estimatedTokens <= budgetTokens: + kept.append(part) + used += part.estimatedTokens + else: + dropped.append(part.key) + + return kept, dropped diff --git a/src/dartlab/ai/context/builder.py b/src/dartlab/ai/context/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..fd6af5862337401c51ec15064ee3809746a5e2fd --- /dev/null +++ b/src/dartlab/ai/context/builder.py @@ -0,0 +1,195 @@ +"""ContextBuilder — Phase 1 메인 진입점. + +질문 + Company + provider → ContextBundle. + +설계: +1. classifyIntent() 로 질문 의도 파악 +2. legacy selectors 호출 → 기존 5개 pre-grounding을 ContextPart로 +3. (Phase 1.5) intent별 act selector 호출 +4. budget.trim() 으로 토큰 예산 적용 +5. ContextBundle 반환 + +Phase 1 보장: 기존 _analyze_inner 동작과 동일 (legacy selectors만 사용). +DARTLAB_CONTEXT_V2=1 환경 변수로 활성화. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from dartlab.ai.context.budget import budgetFor, trim +from dartlab.ai.context.bundle import ContextBundle, ContextPart +from dartlab.ai.context.intent import Intent, classifyIntent +from dartlab.ai.context.selectors import ( + selectCompanySearch, + selectDisclosureBrief, + selectExternalSearch, + selectInsightHints, + selectMemoryHints, + selectPlaybookBullets, +) + + +@dataclass +class ContextBuilder: + """질문 → ContextBundle 빌더. + + 사용:: + + bundle = ContextBuilder( + question="삼성전자 마진 추세는?", + company=c, + provider="gemini", + ).build() + + userParts = bundle.toUserParts() # 기존 _analyze_inner 호환 + """ + + question: str + company: Any | None = None + provider: str | None = None + budgetTokens: int | None = None # None → provider별 기본값 + + def build(self) -> ContextBundle: + if not self.question or not self.question.strip(): + return ContextBundle(intent=Intent.ACT_ALL.value) + + # Company 메타 추출 + stockCode = ( + getattr(self.company, "stockCode", None) or getattr(self.company, "ticker", None) + if self.company is not None + else None + ) + corpName = getattr(self.company, "corpName", None) if self.company is not None else None + + # 1. Intent 분류 + intentResult = classifyIntent(self.question, hasCompany=self.company is not None) + + # 2. selector 호출 (legacy + ACE playbook + analysis calc) + parts: list[ContextPart] = [] + parts.extend(selectCompanySearch(self.question, self.company)) + parts.extend(selectDisclosureBrief(stockCode)) + parts.extend(selectExternalSearch(self.question, stockCode, corpName)) + parts.extend(selectMemoryHints(stockCode)) + parts.extend(selectInsightHints(stockCode, self.company)) + # ACE evolving playbook — intent별 학습된 분석 지침 주입 + parts.extend(selectPlaybookBullets(intentResult.intent.value, self.company)) + # intent → analysis calc selector 라우팅 + parts.extend(self._selectCalcForIntent(intentResult.intent)) + # Phase 2: 인과 질문("왜") → graph traversal + try: + from dartlab.ai.context.selectors.graph import selectGraphCauses + + parts.extend(selectGraphCauses(self.question, self.company)) + except ImportError: + pass + + # 3. 분석 대상 라벨 (CRITICAL — 항상 포함) + if corpName and stockCode: + from dartlab.ai.context.bundle import PartPriority + from dartlab.ai.context.encoder import estimateTokens + + label = f"분석 대상: {corpName} (종목코드: {stockCode})" + parts.insert( + 0, + ContextPart( + key="company.label", + text=label, + priority=PartPriority.CRITICAL, + estimatedTokens=estimateTokens(label), + source="company.meta", + ), + ) + + # 4. concept selector (Company 불필요) + if intentResult.intent == Intent.CONCEPT: + try: + from dartlab.ai.context.selectors.concept import selectConcept + + parts.extend(selectConcept(self.question)) + except ImportError: + pass + + # 5. 예산 트리밍 + budget = self.budgetTokens or budgetFor(self.provider) + kept, dropped = trim(parts, budgetTokens=budget) + + totalTokens = sum(p.estimatedTokens for p in kept) + return ContextBundle( + parts=kept, + intent=intentResult.intent.value, + totalTokens=totalTokens, + droppedKeys=dropped, + ) + + def _selectCalcForIntent(self, intent: Intent) -> list[ContextPart]: + """intent → analysis calc selector 라우팅. + + Company 없으면 빈 리스트. calc 실패 시 빈 리스트 (graceful). + ACT_ALL → 핵심 3개(margin + cashflow + distress)만. + """ + if self.company is None: + return [] + try: + _ROUTER = { + Intent.ACT1_BUSINESS: "dartlab.ai.context.selectors.act1", + Intent.ACT2_PROFIT: "dartlab.ai.context.selectors.act2", + Intent.ACT3_CASH: "dartlab.ai.context.selectors.act3", + Intent.ACT4_STABILITY: "dartlab.ai.context.selectors.act4", + Intent.ACT5_CAPITAL: "dartlab.ai.context.selectors.act5", + Intent.ACT6_OUTLOOK: "dartlab.ai.context.selectors.act6", + Intent.COMPARE: "dartlab.ai.context.selectors.compare", + } + if intent == Intent.ACT_ALL: + # 핵심 3축만 주입 (마진 + 현금흐름 + 안정성) + parts: list[ContextPart] = [] + try: + from dartlab.ai.context.selectors.act2 import selectAct2 + + parts.extend(selectAct2(self.company)) + except (ImportError, Exception): + pass + try: + from dartlab.ai.context.selectors.act3 import selectAct3 + + parts.extend(selectAct3(self.company)) + except (ImportError, Exception): + pass + try: + from dartlab.ai.context.selectors.act4 import selectAct4 + + parts.extend(selectAct4(self.company)) + except (ImportError, Exception): + pass + return parts + + module_path = _ROUTER.get(intent) + if not module_path: + return [] + + import importlib + + mod = importlib.import_module(module_path) + # 함수 이름 규칙: selectAct{N}, selectCompare + fn_name = ( + f"select{intent.value.split('_')[0].title()}" + if "_" in intent.value + else f"select{intent.value.title()}" + ) + # 실제 함수명 매핑 + _FN_NAMES = { + Intent.ACT1_BUSINESS: "selectAct1", + Intent.ACT2_PROFIT: "selectAct2", + Intent.ACT3_CASH: "selectAct3", + Intent.ACT4_STABILITY: "selectAct4", + Intent.ACT5_CAPITAL: "selectAct5", + Intent.ACT6_OUTLOOK: "selectAct6", + Intent.COMPARE: "selectCompare", + } + fn = getattr(mod, _FN_NAMES[intent]) + if intent == Intent.COMPARE: + return fn(self.company) + return fn(self.company) + except (ImportError, AttributeError, KeyError, Exception): + return [] diff --git a/src/dartlab/ai/context/bundle.py b/src/dartlab/ai/context/bundle.py new file mode 100644 index 0000000000000000000000000000000000000000..469b49cfaeda134a62fea582407f9a3afb9942b4 --- /dev/null +++ b/src/dartlab/ai/context/bundle.py @@ -0,0 +1,66 @@ +"""ContextBundle — ContextBuilder 출력 자료구조. + +builder는 selector들이 만든 ContextPart 리스트를 priority + budget에 따라 +트리밍하여 최종 ContextBundle을 만든다. 소비자(_analyze_inner)는 +bundle.toUserParts() 로 기존 userParts 리스트와 호환되는 형태를 얻는다. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import IntEnum + + +class PartPriority(IntEnum): + """우선순위 — 낮을수록 먼저 트리밍된다. + + 예산 부족 시 LOW부터 제거하고 CRITICAL은 절대 제거하지 않는다. + """ + + CRITICAL = 100 # 분석 대상 종목/회사명 — 절대 트리밍 금지 + HIGH = 80 # analysis calc 결과 (intent 매칭) + MEDIUM = 60 # 인사이트, 그래프 traversal + LOW = 40 # 외부 검색, 메모리 힌트 + OPTIONAL = 20 # few-shot 예시 + + +@dataclass(frozen=True) +class ContextPart: + """단일 컨텍스트 블록. + + selector가 생성하고 builder가 budget에 따라 취사선택한다. + """ + + key: str # selector 식별자 (예: "act2.marginTrend") + text: str # 사람이 읽는 텍스트 (TOON 또는 마크다운) + priority: PartPriority + estimatedTokens: int # rough — len(text) // 3 정도면 충분 + source: str = "" # 출처 (예: "calc:profitability", "knowledgedb:insight") + + def __post_init__(self) -> None: + if not self.text: + raise ValueError(f"ContextPart.text empty: key={self.key}") + + +@dataclass +class ContextBundle: + """ContextBuilder 최종 출력. + + 소비자는 toUserParts() 로 기존 코드 (_analyze_inner) 와 호환되는 리스트를 얻는다. + parts 는 priority 내림차순 정렬되어 있다. + """ + + parts: list[ContextPart] = field(default_factory=list) + intent: str = "" + totalTokens: int = 0 + droppedKeys: list[str] = field(default_factory=list) # budget으로 잘린 part keys + + def toUserParts(self) -> list[str]: + """기존 _analyze_inner userParts 호환 — text 리스트만.""" + return [p.text for p in self.parts] + + def keys(self) -> list[str]: + return [p.key for p in self.parts] + + def __len__(self) -> int: + return len(self.parts) diff --git a/src/dartlab/ai/context/encoder.py b/src/dartlab/ai/context/encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..70dc99b8524f9297bbeabebbcb6fba192ea53b1a --- /dev/null +++ b/src/dartlab/ai/context/encoder.py @@ -0,0 +1,115 @@ +"""TOON (Token-Oriented Object Notation) 인코더. + +LLM 입력용 압축 표현. 같은 데이터를 JSON 대비 30~60% 적은 토큰으로 주입. +일부 케이스(작은 dict)에는 효과 없음 — encodeAuto가 작은 입력은 JSON 유지. + +참조: TOON 사양 (2026, llm-data 압축 포맷) +- 키: 한 번만 등장 (헤더 행) +- 값: 행 단위 정렬 +- 깊은 중첩 최소화 (LLM 어텐션이 가장 잘 처리하는 형태) + +dartlab은 외부 의존성 추가 없이 자체 구현 — 단순 직렬화. +""" + +from __future__ import annotations + +import json +from typing import Any + + +def _isFlatList(value: Any) -> bool: + """list[dict] 형태이고 모든 dict가 같은 키 집합인지.""" + if not isinstance(value, list) or not value: + return False + if not all(isinstance(x, dict) for x in value): + return False + first_keys = tuple(value[0].keys()) + return all(tuple(x.keys()) == first_keys for x in value) + + +def _encodeFlatList(rows: list[dict[str, Any]]) -> str: + """list[dict] → TOON 표 형식. + + 예:: + + [{"a": 1, "b": 2}, {"a": 3, "b": 4}] + → + a|b + 1|2 + 3|4 + """ + if not rows: + return "" + keys = list(rows[0].keys()) + header = "|".join(keys) + lines = [header] + for row in rows: + cells = [] + for k in keys: + v = row.get(k) + if v is None: + cells.append("") + elif isinstance(v, (int, float, str, bool)): + cells.append(str(v)) + else: + cells.append(json.dumps(v, ensure_ascii=False, default=str)) + lines.append("|".join(cells)) + return "\n".join(lines) + + +def _encodeDict(d: dict[str, Any], depth: int = 0) -> str: + """dict → TOON key:value 행 형식. 중첩 list[dict]는 표로 변환.""" + if not d: + return "" + lines = [] + indent = " " * depth + for k, v in d.items(): + if _isFlatList(v): + lines.append(f"{indent}{k}:") + table = _encodeFlatList(v) + lines.extend(f"{indent} {ln}" for ln in table.split("\n")) + elif isinstance(v, dict): + lines.append(f"{indent}{k}:") + lines.append(_encodeDict(v, depth + 1)) + elif isinstance(v, list): + # 단순 list[scalar] — 한 줄에 ,로 + lines.append(f"{indent}{k}: " + ", ".join(str(x) for x in v)) + elif v is None: + lines.append(f"{indent}{k}: -") + else: + lines.append(f"{indent}{k}: {v}") + return "\n".join(lines) + + +def encodeTOON(data: Any) -> str: + """임의 데이터 → TOON 텍스트. + + list[dict] (균질) → 표 형식 + dict → key:value (중첩 처리) + 그 외 → JSON fallback + """ + if _isFlatList(data): + return _encodeFlatList(data) + if isinstance(data, dict): + return _encodeDict(data) + return json.dumps(data, ensure_ascii=False, default=str) + + +def encodeAuto(data: Any, *, jsonThresholdChars: int = 200) -> str: + """작은 입력은 JSON, 큰 입력은 TOON. + + 작은 dict는 JSON이 더 짧을 수 있음 (헤더 오버헤드 없음). + """ + js = json.dumps(data, ensure_ascii=False, default=str) + if len(js) < jsonThresholdChars: + return js + toon = encodeTOON(data) + # TOON이 더 길면 JSON 사용 (안전장치) + return toon if len(toon) < len(js) else js + + +def estimateTokens(text: str) -> int: + """rough 토큰 추정 — 한국어 + 영문 혼합 기준 평균 1토큰 ≈ 2.5 chars.""" + if not text: + return 0 + return max(1, len(text) // 3) diff --git a/src/dartlab/ai/context/intent.py b/src/dartlab/ai/context/intent.py new file mode 100644 index 0000000000000000000000000000000000000000..871541cfe479a127a4ba15e7c8adbc5725d7d2a8 --- /dev/null +++ b/src/dartlab/ai/context/intent.py @@ -0,0 +1,236 @@ +"""Intent 분류 — 질문 → 6막 + compare + concept. + +LLM 호출 없이 키워드 매칭 + Company 상태 + 패턴으로 결정론적 분류. +selfai 폐기 학습 적용: ML 없음, 모든 규칙은 명시적 코드. + +8개 Intent: + act1_business — 사업이해 (수익구조, 성장성) + act2_profit — 수익성 (마진, 비용구조) + act3_cash — 현금흐름 (CF, 이익품질) + act4_stability — 안정성 (부채, 신용) + act5_capital — 자본배분 (배당, ROIC) + act6_outlook — 전망 (가치평가, 매크로) + compare — 시장 비교 (scan) + concept — 개념질문 (capabilities, docs) + +오분류 fallback: act_all (핵심 축 요약 주입) +""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum + + +class Intent(str, Enum): + ACT1_BUSINESS = "act1_business" + ACT2_PROFIT = "act2_profit" + ACT3_CASH = "act3_cash" + ACT4_STABILITY = "act4_stability" + ACT5_CAPITAL = "act5_capital" + ACT6_OUTLOOK = "act6_outlook" + COMPARE = "compare" + CONCEPT = "concept" + ACT_ALL = "act_all" # fallback — 의도가 명확하지 않거나 종합 질문 + + +# ── 키워드 사전 ──────────────────────────────────────────── +# 각 막에 배타적으로 강한 신호만 등록. 약한 키워드는 act_all로 떨어져도 OK. + +_KEYWORDS: dict[Intent, tuple[str, ...]] = { + Intent.ACT1_BUSINESS: ( + "사업", + "비즈니스", + "매출구성", + "사업부", + "세그먼트", + "segment", + "제품", + "서비스", + "고객", + "시장점유", + "시장 점유", + "성장", + "뭐하는", + "뭘 하는", + "어떤 회사", + "뭐 해서", + "뭐해서", + "돈 벌", + "돈벌", + "수익원", + ), + Intent.ACT2_PROFIT: ( + "수익성", + "마진", + "영업이익률", + "순이익률", + "ROIC", + "ROE", + "ROA", + "비용구조", + "원가", + "판관비", + "이익률", + "수익", + "벌고", + ), + Intent.ACT3_CASH: ( + "현금", + "현금흐름", + "OCF", + "FCF", + "이익품질", + "운전자본", + "감가상각", + "발생액", + "현금전환", + ), + Intent.ACT4_STABILITY: ( + "부채", + "안정성", + "재무건전", + "이자보상", + "유동", + "차입", + "신용", + "부실", + "Z-Score", + "ICR", + "디폴트", + "default", + ), + Intent.ACT5_CAPITAL: ( + "배당", + "자사주", + "자본배분", + "주주환원", + "유보", + "재투자", + "CAPEX", + "WACC", + ), + Intent.ACT6_OUTLOOK: ( + "전망", + "예측", + "추정", + "valuation", + "DCF", + "PER", + "PBR", + "목표가", + "적정가", + "고평가", + "저평가", + "안전마진", + "매크로", + "환율", + "금리", + "유가", + ), + Intent.COMPARE: ( + "비교", + "랭킹", + "순위", + "상위", + "하위", + "대비", + "vs", + "VS", + "동종", + "동종업계", + "peer", + "scan", + "스캔", + "전종목", + "업종 평균", + "업종평균", + # NOTE: "보다 큰/작은/높/낮" 은 두 지표 간 비교에도 자주 쓰여 제외. + # COMPARE 는 회사 간 비교일 때만 매칭되도록 명시적 키워드만 둔다. + ), + Intent.CONCEPT: ( + "사용법", + "어떻게 쓰", + "어떻게 사용", + "어떻게 호출", + "방법 알려", + "예시", + "예제", + "튜토리얼", + "dartlab", + "ask(", + "show(", + "select(", + "analysis(", + "review(", + "공시 어디", + "어디서 찾", + ), +} + + +@dataclass(frozen=True) +class IntentResult: + intent: Intent + confidence: float # 0.0~1.0 — 매칭된 키워드 / 후보 키워드 비율 + matchedKeywords: tuple[str, ...] + + +def _scoreIntent(question: str, intent: Intent) -> tuple[float, tuple[str, ...]]: + """단일 intent 점수 + 매칭된 키워드 반환.""" + q = question.lower() + keywords = _KEYWORDS[intent] + matched = tuple(kw for kw in keywords if kw.lower() in q) + if not matched: + return 0.0, () + # 매칭 키워드 수 / 후보 수 — 정규화. 단순 카운트 우선. + score = len(matched) / max(len(keywords), 1) + # 매칭 1개라도 있으면 최소 0.2 보장 (희소 키워드 보호) + return max(score, 0.2), matched + + +def classifyIntent( + question: str, + *, + hasCompany: bool = False, +) -> IntentResult: + """질문 → IntentResult. + + Args: + question: 사용자 질문 + hasCompany: Company 객체 존재 여부 (없으면 CONCEPT/COMPARE 가중치) + + Returns: + IntentResult — 가장 높은 점수의 intent. 동점은 정의 순서. + """ + if not question or not question.strip(): + return IntentResult(Intent.ACT_ALL, 0.0, ()) + + scores: list[tuple[Intent, float, tuple[str, ...]]] = [] + for intent in ( + Intent.COMPARE, # compare 먼저 — "비교" 키워드가 다른 막과 섞일 때 우선 + Intent.CONCEPT, + Intent.ACT2_PROFIT, + Intent.ACT3_CASH, + Intent.ACT4_STABILITY, + Intent.ACT5_CAPITAL, + Intent.ACT6_OUTLOOK, + Intent.ACT1_BUSINESS, + ): + score, matched = _scoreIntent(question, intent) + if score > 0: + scores.append((intent, score, matched)) + + if not scores: + return IntentResult(Intent.ACT_ALL, 0.0, ()) + + # Company 없으면 막 관련 intent는 의미 없음 → CONCEPT/COMPARE 우대 + if not hasCompany: + prioritized = [s for s in scores if s[0] in (Intent.CONCEPT, Intent.COMPARE)] + if prioritized: + scores = prioritized + + # 최고 점수 선택 (동점은 위 순서 유지) + scores.sort(key=lambda s: s[1], reverse=True) + best = scores[0] + return IntentResult(best[0], best[1], best[2]) diff --git a/src/dartlab/ai/context/playbook.py b/src/dartlab/ai/context/playbook.py new file mode 100644 index 0000000000000000000000000000000000000000..99f4e1c3b8411b06b502daee6cdfd4663f5f3e97 --- /dev/null +++ b/src/dartlab/ai/context/playbook.py @@ -0,0 +1,220 @@ +"""ACE Curator/Reflector — dartlab 결정론 구현. + +논문: arxiv.org/abs/2510.04618 (ICLR 2026, Stanford+UCB+SambaNova) + +ACE 3 컴포넌트 매핑: + Generator = ai/runtime/core.py::_streamWithCodeExecution (이미 있음) + Reflector = extractBullets() — 응답 텍스트에서 bullet 추출 (결정론) + Curator = curate() — KnowledgeDB.upsert_bullet 위임 (delta merge) + +핵심 규칙 (논문): + 1. delta merge — 기존 bullet 절대 삭제 X. context collapse 방지. + 2. bullet은 한 줄 (200자 cap), 중첩 금지. + 3. success/fail 카운트 → quality (Beta posterior 근사). + 4. retrieval은 quality desc, 섹터 우선 매칭. + +selfai 폐기 학습 적용: + - LLM Reflector 안 씀 (페이퍼는 LLM Reflector 사용). + - dartlab은 결정론 regex/패턴 추출만 — 디버깅 가능, 토큰 비용 0. + - 효과 검증 후 LLM Reflector 단계 도입 검토. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass + +# ── Reflector: 응답 → bullet 결정론 추출 ─────────────────── + +# 의미 있는 한 줄 패턴 (한국어 분석 응답 기준) +_BULLET_HEADERS = ( + "결론", + "핵심", + "요약", + "판단", + "주의", + "리스크", + "강점", + "약점", + "관전", + "관찰", +) + +_BULLET_LINE_RE = re.compile( + r"^\s*[-*•]\s*(.+?)$", + re.MULTILINE, +) + +_HEADER_LINE_RE = re.compile( + rf"(?:{'|'.join(_BULLET_HEADERS)})[::]\s*([^\n]{{8,180}})", +) + +# 너무 짧거나 무의미한 패턴 차단 +_NOISE_RE = re.compile(r"^(있다|없다|확인|분석|참고|참조)\.?$") + + +def _cleanBullet(text: str) -> str | None: + """bullet 정제 — 길이/노이즈 필터.""" + text = re.sub(r"\s+", " ", text).strip() + text = text.strip("-*•·#> .").strip() + if not text: + return None + if len(text) < 8 or len(text) > 200: + return None + if _NOISE_RE.match(text): + return None + # 코드/표 라인 제외 + if "|" in text and text.count("|") >= 3: + return None + if text.startswith("```"): + return None + return text + + +def extractBullets(response_text: str, *, max_bullets: int = 8) -> list[str]: + """응답 텍스트 → 핵심 bullet 리스트. + + 추출 우선순위: + 1. "결론:", "핵심:", "주의:" 등 헤더 매칭 (가장 신뢰) + 2. 마크다운 리스트 항목 (- / * / •) + 3. 위 둘 다 없으면 빈 리스트 (조용히 실패) + """ + if not response_text: + return [] + bullets: list[str] = [] + seen: set[str] = set() + + # 1. 헤더 매칭 + for m in _HEADER_LINE_RE.finditer(response_text): + cleaned = _cleanBullet(m.group(1)) + if cleaned and cleaned not in seen: + bullets.append(cleaned) + seen.add(cleaned) + if len(bullets) >= max_bullets: + return bullets + + # 2. 마크다운 리스트 + for m in _BULLET_LINE_RE.finditer(response_text): + cleaned = _cleanBullet(m.group(1)) + if cleaned and cleaned not in seen: + bullets.append(cleaned) + seen.add(cleaned) + if len(bullets) >= max_bullets: + return bullets + + return bullets + + +# ── grade → outcome 매핑 ────────────────────────────────── + + +def gradeToOutcome(grade: str | None) -> str: + """KnowledgeDB executions.grade → upsert_bullet outcome. + + dartlab grade 체계: + G — Good (성공) + T — Trivial (보통, neutral) + C — Crash (실패) + V — Vague (실패 — 모호한 답변) + P — Partial (성공 — 부분적이지만 가치 있음) + """ + g = (grade or "").upper().strip() + if g in ("G", "P"): + return "success" + if g in ("C", "V"): + return "fail" + return "neutral" + + +# ── Curator: bullet 묶음을 KnowledgeDB로 영속 ──────────── + + +@dataclass +class CurateResult: + intent: str + sector: str + inserted: int + skipped: int + + +def curate( + *, + intent: str, + response_text: str, + grade: str | None, + sector: str = "", + source: str = "reflection", +) -> CurateResult: + """Reflector + Curator 한 번에 호출. + + 1. extractBullets — 결정론 추출 + 2. gradeToOutcome — success/fail/neutral 결정 + 3. KnowledgeDB.upsert_bullet — delta merge + + 실패 (DB 없음/import 실패) 시 빈 결과 반환, 예외 전파 X. + """ + if not intent or not response_text: + return CurateResult(intent or "", sector, 0, 0) + + bullets = extractBullets(response_text) + if not bullets: + return CurateResult(intent, sector, 0, 0) + + outcome = gradeToOutcome(grade) + inserted = 0 + skipped = 0 + try: + from dartlab.ai.persistence import KnowledgeDB + + db = KnowledgeDB.get() + except ImportError: + return CurateResult(intent, sector, 0, len(bullets)) + + for b in bullets: + try: + db.upsert_bullet( + intent=intent, + bullet=b, + sector=sector, + outcome=outcome, + source=source, + ) + inserted += 1 + except (OSError, RuntimeError): + skipped += 1 + + return CurateResult(intent, sector, inserted, skipped) + + +# ── Generator 측: bullet retrieval ───────────────────────── + + +def retrieveBullets( + intent: str, + *, + sector: str = "", + limit: int = 6, + min_quality: float = 0.4, +) -> list[str]: + """intent별 playbook bullet retrieval. + + ContextBuilder 의 selector 가 호출. KnowledgeDB 없거나 비어있으면 빈 리스트. + """ + if not intent: + return [] + try: + from dartlab.ai.persistence import KnowledgeDB + + db = KnowledgeDB.get() + except ImportError: + return [] + try: + rows = db.get_bullets( + intent=intent, + sector=sector, + limit=limit, + min_quality=min_quality, + ) + except (OSError, RuntimeError): + return [] + return [r[0] for r in rows] diff --git a/src/dartlab/ai/context/selectors/__init__.py b/src/dartlab/ai/context/selectors/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..38c44e4ef6ece99cfed66be8d6b231b0778ef3b0 --- /dev/null +++ b/src/dartlab/ai/context/selectors/__init__.py @@ -0,0 +1,32 @@ +"""ContextBuilder selectors — Intent별 컨텍스트 생산자. + +각 selector는 (question, company, intent) → list[ContextPart] 형태. +순수 함수, 부수효과 없음. 실패 시 빈 리스트 반환 (에러 전파 금지). + +Phase 1 (현재): + legacy.py — 기존 ai/runtime/core.py의 pre-grounding 5개 헬퍼 래핑 + (손실 없는 이주, A/B 비교 가능) + +Phase 1.5 (다음): + act1~6.py, compare.py, concept.py — analysis calc 결과를 intent별로 선택 주입 +""" + +from __future__ import annotations + +from dartlab.ai.context.selectors.legacy import ( + selectCompanySearch, + selectDisclosureBrief, + selectExternalSearch, + selectInsightHints, + selectMemoryHints, +) +from dartlab.ai.context.selectors.playbook import selectPlaybookBullets + +__all__ = [ + "selectCompanySearch", + "selectDisclosureBrief", + "selectExternalSearch", + "selectInsightHints", + "selectMemoryHints", + "selectPlaybookBullets", +] diff --git a/src/dartlab/ai/context/selectors/__pycache__/__init__.cpython-312.pyc b/src/dartlab/ai/context/selectors/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c39197b0f71915fe2e2d6ce8bf719536395f2bbf Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/__init__.cpython-313.pyc b/src/dartlab/ai/context/selectors/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..48395009e64a5aa55cae6d18be2e76615958fa4d Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/_calc_base.cpython-312.pyc b/src/dartlab/ai/context/selectors/__pycache__/_calc_base.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5db8e3eeeaae45373c804715e4435b0c3d288cfe Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/_calc_base.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/_calc_base.cpython-313.pyc b/src/dartlab/ai/context/selectors/__pycache__/_calc_base.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..63d12abb2afc906ad69f2a5d1f9e37e68e025f2b Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/_calc_base.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/act1.cpython-312.pyc b/src/dartlab/ai/context/selectors/__pycache__/act1.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1d0c36fd6c2ab2e38d1b8bd7a2c4aeb0613740f6 Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/act1.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/act2.cpython-312.pyc b/src/dartlab/ai/context/selectors/__pycache__/act2.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d2ffab6d1a1ee0607172f5d04cfb78ada5622beb Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/act2.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/act2.cpython-313.pyc b/src/dartlab/ai/context/selectors/__pycache__/act2.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f21e0e8ff80e3168267f68988bbd2be111c90f4c Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/act2.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/act3.cpython-312.pyc b/src/dartlab/ai/context/selectors/__pycache__/act3.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4b92059745c2225a0198cf3cfe8b589b24288248 Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/act3.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/act3.cpython-313.pyc b/src/dartlab/ai/context/selectors/__pycache__/act3.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5d6f60d3eca439f7bad508f5dc5c53bd9d804b89 Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/act3.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/act4.cpython-312.pyc b/src/dartlab/ai/context/selectors/__pycache__/act4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f374970f99d83b2c25c07a7c550b4ee898f8e4fe Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/act4.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/act4.cpython-313.pyc b/src/dartlab/ai/context/selectors/__pycache__/act4.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bb81bf28885ba37b64f8258680e869182403068f Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/act4.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/act5.cpython-312.pyc b/src/dartlab/ai/context/selectors/__pycache__/act5.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..884dbdab44ce1c5b8cf8d01f69e07fff71b43525 Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/act5.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/act6.cpython-312.pyc b/src/dartlab/ai/context/selectors/__pycache__/act6.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..27844d4947f6d9374b14bf0e62dd7b9594fd767c Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/act6.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/compare.cpython-312.pyc b/src/dartlab/ai/context/selectors/__pycache__/compare.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..483ad752c09951ac1bbd6da509bae4137779b050 Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/compare.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/concept.cpython-312.pyc b/src/dartlab/ai/context/selectors/__pycache__/concept.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..94a4e416e4bd0a411804f73856b1582a373c5c60 Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/concept.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/graph.cpython-312.pyc b/src/dartlab/ai/context/selectors/__pycache__/graph.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95cf40b0adc7f7450d90334290a017c1e380f14d Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/graph.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/graph.cpython-313.pyc b/src/dartlab/ai/context/selectors/__pycache__/graph.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fbe1c87d8a8048af64f157610570d73b8ada1076 Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/graph.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/legacy.cpython-312.pyc b/src/dartlab/ai/context/selectors/__pycache__/legacy.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f2803f2209d65a0d12e86c66b86fc66e1efe628a Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/legacy.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/legacy.cpython-313.pyc b/src/dartlab/ai/context/selectors/__pycache__/legacy.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..15d3b4fed80354bc00296e35d15b586868c5253c Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/legacy.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/playbook.cpython-312.pyc b/src/dartlab/ai/context/selectors/__pycache__/playbook.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1320b3e1680ab98e120f566bb623f5a0f3506d38 Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/playbook.cpython-312.pyc differ diff --git a/src/dartlab/ai/context/selectors/__pycache__/playbook.cpython-313.pyc b/src/dartlab/ai/context/selectors/__pycache__/playbook.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef9223e3e129ae91d1a46b01277ec5cbcd355fbc Binary files /dev/null and b/src/dartlab/ai/context/selectors/__pycache__/playbook.cpython-313.pyc differ diff --git a/src/dartlab/ai/context/selectors/_calc_base.py b/src/dartlab/ai/context/selectors/_calc_base.py new file mode 100644 index 0000000000000000000000000000000000000000..cfb39605dc5a2fd272523dced33c446687b59be4 --- /dev/null +++ b/src/dartlab/ai/context/selectors/_calc_base.py @@ -0,0 +1,123 @@ +"""Analysis Calc Selector 공통 헬퍼. + +모든 act selector가 사용하는 패턴: +1. _safeCalc(fn, company, basePeriod) → dict | None +2. _calcToContextPart(key, data, priority) → ContextPart | None +3. _resolveBase(company) → basePeriod string + +레지스트리 패턴(review/registry.py::buildBlocks)을 참고하되, +selector는 Block이 아닌 ContextPart를 반환한다. +""" + +from __future__ import annotations + +import logging +from typing import Any, Callable + +from dartlab.ai.context.bundle import ContextPart, PartPriority +from dartlab.ai.context.encoder import encodeAuto, estimateTokens + +log = logging.getLogger(__name__) + +# calc 실패 시 잡아야 하는 예외 — registry.py 패턴 동일 +_SAFE_EXCEPTIONS = ( + KeyError, + TypeError, + ValueError, + IndexError, + AttributeError, + ZeroDivisionError, + FileNotFoundError, + OSError, + RuntimeError, + ArithmeticError, + StopIteration, + ImportError, +) + + +def _safeCalc(fn: Callable, *args: Any, **kwargs: Any) -> dict | list | None: + """calc 함수 안전 호출 — None/에러 시 None.""" + try: + return fn(*args, **kwargs) + except _SAFE_EXCEPTIONS: + return None + + +def _resolveBase(company: Any) -> str | None: + """Company에서 basePeriod 해석. 실패 시 None (calc이 자동 결정).""" + try: + from dartlab.analysis.financial._helpers import resolveBasePeriod + + pr = resolveBasePeriod(company, None, maxYears=5, maxQuarters=8) + return pr.basePeriod if pr else None + except _SAFE_EXCEPTIONS + (Exception,): + return None + + +def _calcToContextPart( + key: str, + data: dict | list | None, + priority: PartPriority = PartPriority.HIGH, + *, + label: str = "", + source: str = "", + company: Any = None, +) -> ContextPart | None: + """calc 결과(dict/list) → AI 맥락 보강 → TOON 인코딩 → ContextPart. None이면 None.""" + if data is None: + return None + + # AI 맥락 보강 — 5년 평균, YoY 판단, 핵심 요약 자동 추가 + from dartlab.ai.context.aiview import autoEnrich + + enriched = autoEnrich(data, company=company) + + # _summary가 있으면 헤더에 포함 (AI가 바로 해석 가능) + summary_line = "" + if isinstance(enriched, dict) and "_summary" in enriched: + summary_line = enriched.pop("_summary") + "\n" + + # history 키가 있으면 그것만 추출 (가장 유용한 시계열) + if isinstance(enriched, dict) and "history" in enriched: + rows = enriched["history"] + if isinstance(rows, list) and rows: + text_body = encodeAuto(rows) + else: + text_body = encodeAuto(enriched) + else: + text_body = encodeAuto(enriched) + + if not text_body or len(text_body) < 5: + return None + + header = f"## {label}\n" if label else "" + text = f'\n{header}{summary_line}{text_body}\n' + tokens = estimateTokens(text) + + return ContextPart( + key=key, + text=text, + priority=priority, + estimatedTokens=tokens, + source=source or key, + ) + + +def _buildParts( + company: Any, + calcs: list[tuple[str, str, Callable, PartPriority]], + basePeriod: str | None = None, +) -> list[ContextPart]: + """여러 calc를 한꺼번에 실행하여 ContextPart 리스트로. + + calcs: [(key, label, calc_fn, priority), ...] + """ + parts: list[ContextPart] = [] + bp = basePeriod or _resolveBase(company) + for key, label, fn, prio in calcs: + result = _safeCalc(fn, company, basePeriod=bp) if bp else _safeCalc(fn, company) + part = _calcToContextPart(key, result, prio, label=label, source=f"calc:{key}", company=company) + if part is not None: + parts.append(part) + return parts diff --git a/src/dartlab/ai/context/selectors/act1.py b/src/dartlab/ai/context/selectors/act1.py new file mode 100644 index 0000000000000000000000000000000000000000..085299005168592af5d1e7bdcfac38a558b850f7 --- /dev/null +++ b/src/dartlab/ai/context/selectors/act1.py @@ -0,0 +1,30 @@ +"""Act 1 selector: 사업이해 (수익구조 + 성장성).""" + +from __future__ import annotations + +from typing import Any + +from dartlab.ai.context.bundle import ContextPart, PartPriority +from dartlab.ai.context.selectors._calc_base import _buildParts + + +def selectAct1(company: Any, basePeriod: str | None = None) -> list[ContextPart]: + if company is None: + return [] + try: + from dartlab.analysis.financial.revenue import ( + calcConcentration, + calcRevenueGrowth, + calcSegmentComposition, + ) + except ImportError: + return [] + return _buildParts( + company, + [ + ("act1.segments", "매출 구성", calcSegmentComposition, PartPriority.HIGH), + ("act1.growth", "매출 성장", calcRevenueGrowth, PartPriority.HIGH), + ("act1.concentration", "매출 집중도", calcConcentration, PartPriority.MEDIUM), + ], + basePeriod, + ) diff --git a/src/dartlab/ai/context/selectors/act2.py b/src/dartlab/ai/context/selectors/act2.py new file mode 100644 index 0000000000000000000000000000000000000000..e02ef5bdae393ad5a76cf47c44ce3ca1bec44328 --- /dev/null +++ b/src/dartlab/ai/context/selectors/act2.py @@ -0,0 +1,30 @@ +"""Act 2 selector: 수익성 (마진 + 수익률 + DuPont).""" + +from __future__ import annotations + +from typing import Any + +from dartlab.ai.context.bundle import ContextPart, PartPriority +from dartlab.ai.context.selectors._calc_base import _buildParts + + +def selectAct2(company: Any, basePeriod: str | None = None) -> list[ContextPart]: + if company is None: + return [] + try: + from dartlab.analysis.financial.profitability import ( + calcDupont, + calcMarginTrend, + calcReturnTrend, + ) + except ImportError: + return [] + return _buildParts( + company, + [ + ("act2.margin", "마진 추이", calcMarginTrend, PartPriority.HIGH), + ("act2.return", "수익률 추이", calcReturnTrend, PartPriority.HIGH), + ("act2.dupont", "DuPont 분해", calcDupont, PartPriority.MEDIUM), + ], + basePeriod, + ) diff --git a/src/dartlab/ai/context/selectors/act3.py b/src/dartlab/ai/context/selectors/act3.py new file mode 100644 index 0000000000000000000000000000000000000000..289553b25941f749447c9597917561bfe33a8f9b --- /dev/null +++ b/src/dartlab/ai/context/selectors/act3.py @@ -0,0 +1,27 @@ +"""Act 3 selector: 현금흐름 + 이익품질.""" + +from __future__ import annotations + +from typing import Any + +from dartlab.ai.context.bundle import ContextPart, PartPriority +from dartlab.ai.context.selectors._calc_base import _buildParts + + +def selectAct3(company: Any, basePeriod: str | None = None) -> list[ContextPart]: + if company is None: + return [] + try: + from dartlab.analysis.financial.cashflow import calcCashFlowOverview, calcCashQuality + from dartlab.analysis.financial.earningsQuality import calcAccrualAnalysis + except ImportError: + return [] + return _buildParts( + company, + [ + ("act3.cashflow", "현금흐름 개요", calcCashFlowOverview, PartPriority.HIGH), + ("act3.quality", "현금 품질", calcCashQuality, PartPriority.HIGH), + ("act3.accrual", "발생액 분석", calcAccrualAnalysis, PartPriority.MEDIUM), + ], + basePeriod, + ) diff --git a/src/dartlab/ai/context/selectors/act4.py b/src/dartlab/ai/context/selectors/act4.py new file mode 100644 index 0000000000000000000000000000000000000000..495f446638704dfd4af7f5f55894066b27e19678 --- /dev/null +++ b/src/dartlab/ai/context/selectors/act4.py @@ -0,0 +1,27 @@ +"""Act 4 selector: 안정성 (자금조달 + 레버리지 + 부실판별).""" + +from __future__ import annotations + +from typing import Any + +from dartlab.ai.context.bundle import ContextPart, PartPriority +from dartlab.ai.context.selectors._calc_base import _buildParts + + +def selectAct4(company: Any, basePeriod: str | None = None) -> list[ContextPart]: + if company is None: + return [] + try: + from dartlab.analysis.financial.capital import calcFundingSources + from dartlab.analysis.financial.stability import calcDistressScore, calcLeverageTrend + except ImportError: + return [] + return _buildParts( + company, + [ + ("act4.funding", "자금 원천", calcFundingSources, PartPriority.HIGH), + ("act4.leverage", "레버리지 추이", calcLeverageTrend, PartPriority.HIGH), + ("act4.distress", "부실 판별", calcDistressScore, PartPriority.MEDIUM), + ], + basePeriod, + ) diff --git a/src/dartlab/ai/context/selectors/act5.py b/src/dartlab/ai/context/selectors/act5.py new file mode 100644 index 0000000000000000000000000000000000000000..2725adb48585a465482ebc8463680d45333aa7e8 --- /dev/null +++ b/src/dartlab/ai/context/selectors/act5.py @@ -0,0 +1,28 @@ +"""Act 5 selector: 자본배분 (자산구조 + ROIC + 배당).""" + +from __future__ import annotations + +from typing import Any + +from dartlab.ai.context.bundle import ContextPart, PartPriority +from dartlab.ai.context.selectors._calc_base import _buildParts + + +def selectAct5(company: Any, basePeriod: str | None = None) -> list[ContextPart]: + if company is None: + return [] + try: + from dartlab.analysis.financial.asset import calcAssetStructure + from dartlab.analysis.financial.capitalAllocation import calcDividendPolicy + from dartlab.analysis.financial.investmentAnalysis import calcRoicTimeline + except ImportError: + return [] + return _buildParts( + company, + [ + ("act5.asset", "자산 구조", calcAssetStructure, PartPriority.HIGH), + ("act5.roic", "ROIC 추이", calcRoicTimeline, PartPriority.HIGH), + ("act5.dividend", "배당 정책", calcDividendPolicy, PartPriority.MEDIUM), + ], + basePeriod, + ) diff --git a/src/dartlab/ai/context/selectors/act6.py b/src/dartlab/ai/context/selectors/act6.py new file mode 100644 index 0000000000000000000000000000000000000000..d5b51ece058213a0273d9827692d01f549ae56f5 --- /dev/null +++ b/src/dartlab/ai/context/selectors/act6.py @@ -0,0 +1,29 @@ +"""Act 6 selector: 전망 (가치평가 + 신용).""" + +from __future__ import annotations + +from typing import Any + +from dartlab.ai.context.bundle import ContextPart, PartPriority +from dartlab.ai.context.selectors._calc_base import _buildParts + + +def selectAct6(company: Any, basePeriod: str | None = None) -> list[ContextPart]: + if company is None: + return [] + calcs: list[tuple[str, str, Any, PartPriority]] = [] + try: + from dartlab.analysis.financial.valuation import calcValuationSynthesis + + calcs.append(("act6.valuation", "가치평가 종합", calcValuationSynthesis, PartPriority.HIGH)) + except ImportError: + pass + try: + from dartlab.credit.calcs import calcCreditScore + + calcs.append(("act6.credit", "신용등급", calcCreditScore, PartPriority.MEDIUM)) + except ImportError: + pass + if not calcs: + return [] + return _buildParts(company, calcs, basePeriod) diff --git a/src/dartlab/ai/context/selectors/compare.py b/src/dartlab/ai/context/selectors/compare.py new file mode 100644 index 0000000000000000000000000000000000000000..f4a623171240ef053f0c67e566dda9d73064e535 --- /dev/null +++ b/src/dartlab/ai/context/selectors/compare.py @@ -0,0 +1,58 @@ +"""Compare selector: scan 횡단분석 결과 주입.""" + +from __future__ import annotations + +from typing import Any + +from dartlab.ai.context.bundle import ContextPart, PartPriority +from dartlab.ai.context.encoder import encodeAuto, estimateTokens + + +def selectCompare(company: Any) -> list[ContextPart]: + """scan profitability 결과에서 해당 종목 위치 + 상하위 5개 주입.""" + if company is None: + return [] + stockCode = getattr(company, "stockCode", None) or getattr(company, "ticker", None) + if not stockCode: + return [] + try: + import dartlab + + df = dartlab.scan("profitability") + except (ImportError, FileNotFoundError, OSError, RuntimeError): + return [] + if df is None or len(df) == 0: + return [] + + # 해당 종목 행 찾기 + try: + code_col = "종목코드" if "종목코드" in df.columns else "stockCode" + if code_col not in df.columns: + return [] + my_row = df.filter(df[code_col] == stockCode) + if len(my_row) == 0: + return [] + # 상하위 5개 + 내 위치 + rank_col = "영업이익률_rank" if "영업이익률_rank" in df.columns else None + if rank_col: + my_rank = my_row[rank_col][0] + nearby = df.sort(rank_col).slice(max(0, int(my_rank) - 3), 7) + data = nearby.to_dicts() + else: + data = my_row.to_dicts() + except (KeyError, IndexError, TypeError, ValueError): + return [] + + if not data: + return [] + text_body = encodeAuto(data) + text = f'\n## 동종업계 수익성 비교 ({stockCode})\n{text_body}\n' + return [ + ContextPart( + key="compare.profitability", + text=text, + priority=PartPriority.HIGH, + estimatedTokens=estimateTokens(text), + source="scan:profitability", + ) + ] diff --git a/src/dartlab/ai/context/selectors/concept.py b/src/dartlab/ai/context/selectors/concept.py new file mode 100644 index 0000000000000000000000000000000000000000..8e22b845a97da716f3b1c4f9f9f13acf79af2d49 --- /dev/null +++ b/src/dartlab/ai/context/selectors/concept.py @@ -0,0 +1,35 @@ +"""Concept selector: dartlab 사용법/API 안내.""" + +from __future__ import annotations + +from dartlab.ai.context.bundle import ContextPart, PartPriority +from dartlab.ai.context.encoder import estimateTokens + +_CAPABILITIES_SUMMARY = """dartlab 주요 API: +- dartlab.Company("종목코드") → 회사 facade (show/select/analysis/review) +- c.show("IS") / c.show("BS") / c.show("CF") — 재무제표 +- c.select("IS", ["매출액"]) — 행 필터 +- c.analysis("수익성") — 재무분석 +- c.review("수익성") — 보고서 +- c.credit() — 신용평가 +- dartlab.scan("profitability") — 전종목 횡단분석 +- dartlab.gather("price", "005930") — 주가 +- dartlab.gather("macro") — 거시지표 +- dartlab.search("유상증자") — 공시 검색 +- dartlab.ask("질문") — AI 분석 +- dartlab.listing() — 종목 리스트 +""" + + +def selectConcept(question: str) -> list[ContextPart]: + """dartlab API 요약을 컨텍스트에 주입.""" + text = f'\n{_CAPABILITIES_SUMMARY}\n' + return [ + ContextPart( + key="concept.capabilities", + text=text, + priority=PartPriority.HIGH, + estimatedTokens=estimateTokens(text), + source="capabilities", + ) + ] diff --git a/src/dartlab/ai/context/selectors/graph.py b/src/dartlab/ai/context/selectors/graph.py new file mode 100644 index 0000000000000000000000000000000000000000..84c49da67d94fe1aff25f307e1dd66b06e3b77cd --- /dev/null +++ b/src/dartlab/ai/context/selectors/graph.py @@ -0,0 +1,96 @@ +"""Graph selector — 인과 질문 시 그래프 traversal 결과 ContextPart 주입. + +intent가 어느 막이든, "왜" 키워드가 포함된 질문이면 추가로 호출됨. +""" + +from __future__ import annotations + +from typing import Any + +from dartlab.ai.context.bundle import ContextPart, PartPriority +from dartlab.ai.context.encoder import estimateTokens + +_WHY_KEYWORDS = ("왜", "원인", "이유", "때문", "어째서", "근거", "뭐 때문") + + +def _isWhyQuestion(question: str) -> bool: + return any(kw in question for kw in _WHY_KEYWORDS) + + +def _extractTarget(question: str) -> str: + """질문에서 분석 대상 지표를 추출 → 그래프 노드 label로 매핑.""" + # 키워드 → 그래프 노드 label (builder.py에서 사용한 이름과 일치해야 함) + _KEYWORD_TO_LABEL = [ + ("영업이익률", "영업이익률"), + ("마진", "영업이익률"), # "마진" → 영업이익률 노드 + ("순이익률", "순이익률"), + ("매출총이익률", "매출총이익률"), + ("ROIC", "ROIC"), + ("ROE", "ROIC"), # ROE → ROIC 노드 (가장 가까운) + ("부채비율", "부채비율"), + ("부채", "부채비율"), + ("FCF", "FCF"), + ("현금흐름", "영업CF"), + ("OCF", "영업CF"), + ("Z-Score", "Z-Score"), + ("Z스코어", "Z-Score"), + ("매출", "매출액"), + ("CAPEX", "CAPEX"), + ("WACC", "WACC"), + ] + q = question.lower() + for kw, label in _KEYWORD_TO_LABEL: + if kw.lower() in q: + return label + return "" + + +def selectGraphCauses( + question: str, + company: Any | None, +) -> list[ContextPart]: + """인과 질문 → graph causes traversal → ContextPart.""" + if not _isWhyQuestion(question) or company is None: + return [] + + target = _extractTarget(question) + if not target: + return [] + + try: + from dartlab.analysis.graph import buildGraph + from dartlab.analysis.graph.traverse import causesNarrative, timelineNarrative + except ImportError: + return [] + + try: + g = buildGraph(company) + except (KeyError, TypeError, ValueError, FileNotFoundError, OSError, RuntimeError): + return [] + + if len(g) == 0: + return [] + + # causes + timeline 서사 합산 + parts_text: list[str] = [] + cn = causesNarrative(g, target) + if "찾을 수 없습니다" not in cn: + parts_text.append(cn) + tn = timelineNarrative(g, target) + if "데이터 없음" not in tn: + parts_text.append(tn) + + if not parts_text: + return [] + + text = '\n' + "\n\n".join(parts_text) + f"\n\n그래프: {g.summary()}\n" + + return [ + ContextPart( + key="graph.causes", + text=text, + priority=PartPriority.HIGH, + estimatedTokens=estimateTokens(text), + source=f"graph:causes[{target}]", + ) + ] diff --git a/src/dartlab/ai/context/selectors/legacy.py b/src/dartlab/ai/context/selectors/legacy.py new file mode 100644 index 0000000000000000000000000000000000000000..04d829ef97adf46c4f24b3b44deb20df3666eacd --- /dev/null +++ b/src/dartlab/ai/context/selectors/legacy.py @@ -0,0 +1,149 @@ +"""기존 pre-grounding 헬퍼 래핑 selector. + +Phase 1의 첫 마일스톤: 동작 변경 없이 ai/context/ 구조로 옮긴다. +기존 ai/runtime/core.py 의 5개 헬퍼를 호출하여 ContextPart로 변환만 한다. +회귀가 없는지 확인한 다음, analysis calc selector로 대체한다. + +대응 관계: + _searchCompanyCodes → selectCompanySearch (CRITICAL — 종목코드 식별) + _preGroundDisclosure → selectDisclosureBrief (HIGH — 공시 프로필) + _preGroundSearch → selectExternalSearch (LOW — 외부 검색) + _gatherInsightHints → selectInsightHints (MEDIUM — KnowledgeDB) + (memory hints 인라인) → selectMemoryHints (LOW — 세션 간 메모리) +""" + +from __future__ import annotations + +from typing import Any + +from dartlab.ai.context.bundle import ContextPart, PartPriority +from dartlab.ai.context.encoder import estimateTokens + + +def selectCompanySearch(question: str, company: Any | None) -> list[ContextPart]: + """company=None일 때 종목명 사전 검색.""" + if company is not None: + return [] + try: + from dartlab.ai.runtime.core import _searchCompanyCodes + except ImportError: + return [] + text = _searchCompanyCodes(question) + if not text: + return [] + return [ + ContextPart( + key="legacy.companySearch", + text=text, + priority=PartPriority.CRITICAL, + estimatedTokens=estimateTokens(text), + source="dartlab.searchName", + ) + ] + + +def selectDisclosureBrief(stockCode: str | None) -> list[ContextPart]: + """공시 프로필 (companyProfile.parquet).""" + if not stockCode: + return [] + try: + from dartlab.ai.runtime.core import _preGroundDisclosure + except ImportError: + return [] + text = _preGroundDisclosure(stockCode=stockCode) + if not text: + return [] + return [ + ContextPart( + key="legacy.disclosureBrief", + text=text, + priority=PartPriority.HIGH, + estimatedTokens=estimateTokens(text), + source="core.search.derived.loadProfile", + ) + ] + + +def selectExternalSearch( + question: str, + stockCode: str | None, + corpName: str | None, +) -> list[ContextPart]: + """외부 뉴스/웹 검색 — 키워드 트리거 시에만.""" + try: + from dartlab.ai.runtime.core import _needsExternalSearch, _preGroundSearch + except ImportError: + return [] + if not _needsExternalSearch(question): + return [] + text = _preGroundSearch(question, stockCode=stockCode, corpName=corpName) + if not text: + return [] + return [ + ContextPart( + key="legacy.externalSearch", + text=text, + priority=PartPriority.LOW, + estimatedTokens=estimateTokens(text), + source="gather.search", + ) + ] + + +def selectInsightHints(stockCode: str | None, company: Any | None) -> list[ContextPart]: + """KnowledgeDB insight + sector_insights fallback.""" + if not stockCode: + return [] + try: + from dartlab.ai.runtime.core import _gatherInsightHints + except ImportError: + return [] + text = _gatherInsightHints(stockCode, company) + if not text: + return [] + return [ + ContextPart( + key="legacy.insightHints", + text=text, + priority=PartPriority.MEDIUM, + estimatedTokens=estimateTokens(text), + source="ai.persistence.KnowledgeDB", + ) + ] + + +def selectMemoryHints(stockCode: str | None, limit: int = 3) -> list[ContextPart]: + """세션 간 메모리 — 이전 질문 이력 (수치 제외).""" + if not stockCode: + return [] + try: + import datetime + + from dartlab.ai.memory.store import getMemory + except ImportError: + return [] + try: + records = getMemory().recallForStock(stockCode, limit=limit) + except (OSError, RuntimeError): + return [] + if not records: + return [] + lines = [] + for r in records: + try: + dt = datetime.datetime.fromtimestamp(r.timestamp).strftime("%Y-%m-%d") + lines.append(f"- {dt}: {r.question} ({r.questionType})") + except (AttributeError, OSError, ValueError): + continue + if not lines: + return [] + text = "## 이전 질문 이력\n" + "\n".join(lines) + return [ + ContextPart( + key="legacy.memoryHints", + text=text, + priority=PartPriority.LOW, + estimatedTokens=estimateTokens(text), + source="ai.memory.store", + ) + ] diff --git a/src/dartlab/ai/context/selectors/playbook.py b/src/dartlab/ai/context/selectors/playbook.py new file mode 100644 index 0000000000000000000000000000000000000000..6fdd47a6d08e36f6f37b0ae75f69aa0dcef2eff1 --- /dev/null +++ b/src/dartlab/ai/context/selectors/playbook.py @@ -0,0 +1,57 @@ +"""Playbook selector — ACE evolving playbook을 ContextPart로 주입. + +intent 별 retrieval된 bullet들을 한 ContextPart로 합쳐 HIGH 우선순위로 주입. +analysis calc selectors와 함께 동작. +""" + +from __future__ import annotations + +from typing import Any + +from dartlab.ai.context.bundle import ContextPart, PartPriority +from dartlab.ai.context.encoder import estimateTokens +from dartlab.ai.context.playbook import retrieveBullets + + +def selectPlaybookBullets( + intent: str, + company: Any | None, + *, + limit: int = 6, +) -> list[ContextPart]: + """intent + sector 매칭 playbook bullets → ContextPart. + + Returns: + [ContextPart] — bullets가 있으면 1개, 없으면 빈 리스트. + """ + if not intent or intent == "act_all": + # ACT_ALL fallback은 노이즈 우려 — playbook 주입 생략 + return [] + + sector = "" + if company is not None: + sector = getattr(company, "sector", None) or getattr(company, "sectorName", None) or "" + + bullets = retrieveBullets(intent, sector=str(sector), limit=limit) + if not bullets: + return [] + + # ACE 페이퍼 형식: 번호 매긴 짧은 bullet 리스트 + body = "\n".join(f"- {b}" for b in bullets) + text = ( + '\n' + f"## 학습된 분석 지침 ({intent})\n" + "이전 분석에서 검증된 관점입니다. 현재 데이터에 적용하되 맹신하지 마세요.\n\n" + f"{body}\n" + "" + ) + + return [ + ContextPart( + key="ace.playbook", + text=text, + priority=PartPriority.HIGH, + estimatedTokens=estimateTokens(text), + source=f"knowledgedb:playbook[{intent}]", + ) + ] diff --git a/src/dartlab/ai/conversation/__init__.py b/src/dartlab/ai/conversation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..abb4fb8b85c3f88a1e2cc24b216f4f96ddd16962 --- /dev/null +++ b/src/dartlab/ai/conversation/__init__.py @@ -0,0 +1 @@ +"""AI conversation package.""" diff --git a/src/dartlab/ai/conversation/__pycache__/__init__.cpython-312.pyc b/src/dartlab/ai/conversation/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..308d5096092c51d8528a823f1be056c2928e3178 Binary files /dev/null and b/src/dartlab/ai/conversation/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/ai/conversation/__pycache__/__init__.cpython-313.pyc b/src/dartlab/ai/conversation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..88c16a980cbdd36d47bf6c8edb8b28f785fbc549 Binary files /dev/null and b/src/dartlab/ai/conversation/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/ai/conversation/__pycache__/history.cpython-312.pyc b/src/dartlab/ai/conversation/__pycache__/history.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c28fdc3122167012c4b3a83fa7feec2a55980ad6 Binary files /dev/null and b/src/dartlab/ai/conversation/__pycache__/history.cpython-312.pyc differ diff --git a/src/dartlab/ai/conversation/__pycache__/history.cpython-313.pyc b/src/dartlab/ai/conversation/__pycache__/history.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f4f199862dfbfedbc294669d32445cadc3f83469 Binary files /dev/null and b/src/dartlab/ai/conversation/__pycache__/history.cpython-313.pyc differ diff --git a/src/dartlab/ai/conversation/history.py b/src/dartlab/ai/conversation/history.py new file mode 100644 index 0000000000000000000000000000000000000000..2f8ca59eecf92b1348a6b3688c6e6733755a9ada --- /dev/null +++ b/src/dartlab/ai/conversation/history.py @@ -0,0 +1,219 @@ +"""히스토리 압축/빌드 — server 의존성 없는 순수 로직. + +server/chat.py의 build_history_messages(), compress_history()에서 추출. +경량 타입(types.py) 기반. +""" + +from __future__ import annotations + +import re + +from ..types import HistoryItem + +_MAX_HISTORY_TURNS_DEFAULT = 10 +_MAX_HISTORY_CHARS = 12000 +_MAX_HISTORY_MESSAGE_CHARS = 1800 +_COMPRESS_TURN_THRESHOLD = 5 + + +def _dynamicMaxTurns(historyItems: list[HistoryItem]) -> int: + """Dynamic history window based on average message length.""" + if not historyItems: + return _MAX_HISTORY_TURNS_DEFAULT + avgLen = sum(len(h.text) for h in historyItems) / len(historyItems) + if avgLen < 200: + return 15 # short Q&A exchanges: keep more turns + if avgLen > 1000: + return 6 # long analysis responses: keep fewer turns + return _MAX_HISTORY_TURNS_DEFAULT + + +_METRIC_LINE_RE = re.compile( + r"^\s*\|.*\|.*\|", # 마크다운 테이블 행 + re.MULTILINE, +) +_GRADE_RE = re.compile(r"dCR-[A-D][A-D+\-]*|건전도\s*[\d.]+|ROE\s*=?\s*[\d.]+%|영업이익률\s*[\d.]+%") + + +def _extractKeyLines(text: str) -> str: + """텍스트에서 핵심 수치 행(테이블, 등급)을 추출. 압축 시 보존용.""" + lines = [] + for match in _GRADE_RE.finditer(text): + lines.append(match.group()) + # 마크다운 테이블의 헤더+첫 2행만 보존 + tableLines = _METRIC_LINE_RE.findall(text) + if len(tableLines) >= 3: + lines.extend(tableLines[:3]) + return " | ".join(lines[:5]) if lines else "" + + +def _compress_history_text(text: str) -> str: + """길어진 과거 대화를 앞뒤 핵심만 남기도록 압축. + + 문장 경계(마침표, 줄바꿈)를 존중하여 의미 단위로 절단한다. + 핵심 수치(등급, ROE, 테이블)는 압축에서 보존한다. + """ + if len(text) <= _MAX_HISTORY_MESSAGE_CHARS: + return text + + # 핵심 수치 추출 (압축 후 끝에 추가) + keyLines = _extractKeyLines(text) + + head = int(_MAX_HISTORY_MESSAGE_CHARS * 0.6) + tail = _MAX_HISTORY_MESSAGE_CHARS - head - len(keyLines) - 20 + + # 앞부분: head 근처의 마지막 문장 경계 + head_text = text[:head] + for sep in ("\n", "다. ", ". ", "? ", "! "): + idx = head_text.rfind(sep) + if idx > head * 0.5: + head_text = head_text[: idx + len(sep)] + break + + # 뒷부분: -tail 근처의 첫 문장 경계 + tail_text = text[-max(tail, 200) :] + for sep in ("\n", "다. ", ". ", "? ", "! "): + idx = tail_text.find(sep) + if idx != -1 and idx < len(tail_text) * 0.3: + tail_text = tail_text[idx + len(sep) :] + break + + compressed = head_text.rstrip() + "\n...\n" + tail_text.lstrip() + if keyLines: + compressed += f"\n[핵심 수치: {keyLines}]" + return compressed + + +def build_history_messages(history: list[HistoryItem] | None) -> list[dict[str, str]]: + """히스토리를 LLM messages 포맷으로 변환. 최근 N턴만 유지.""" + if not history: + return [] + maxTurns = _dynamicMaxTurns(history) + trimmed = history[-(maxTurns * 2) :] + prepared: list[dict[str, str]] = [] + for h in trimmed: + role = h.role if h.role in ("user", "assistant") else "user" + text = h.text.strip() + if not text: + continue + if role == "assistant" and h.meta: + summary_parts: list[str] = [] + if h.meta.company or h.meta.stockCode: + company_text = h.meta.company or "?" + if h.meta.stockCode: + company_text += f" ({h.meta.stockCode})" + summary_parts.append(company_text) + if h.meta.market: + summary_parts.append(f"시장: {h.meta.market}") + if h.meta.topicLabel or h.meta.topic: + summary_parts.append(f"주제: {h.meta.topicLabel or h.meta.topic}") + if h.meta.dialogueMode: + summary_parts.append(f"모드: {h.meta.dialogueMode}") + if h.meta.userGoal: + summary_parts.append(f"목표: {h.meta.userGoal}") + if h.meta.modules: + summary_parts.append(f"모듈: {', '.join(h.meta.modules)}") + if h.meta.questionTypes: + summary_parts.append(f"유형: {', '.join(h.meta.questionTypes)}") + if summary_parts: + text = f"[이전 대화 상태: {' | '.join(summary_parts)}]\n{text}" + prepared.append({"role": role, "content": _compress_history_text(text)}) + + total = 0 + selected: list[dict[str, str]] = [] + for item in reversed(prepared): + content_len = len(item["content"]) + if selected and total + content_len > _MAX_HISTORY_CHARS: + break + selected.append(item) + total += content_len + return list(reversed(selected)) + + +_CODE_BLOCK_RE = re.compile(r"```[\s\S]*?```") +_TABLE_ROW_RE = re.compile(r"^\|.*\|$", re.MULTILINE) +_ANALYSIS_TAG_RE = re.compile(r"[\s\S]*?") +_BLANK_LINES_RE = re.compile(r"\n{3,}") + + +def _strip_non_essential(text: str) -> str: + """코드블록, 테이블, analysis 태그를 제거하여 핵심 해석만 남긴다.""" + text = _CODE_BLOCK_RE.sub("", text) + text = _TABLE_ROW_RE.sub("", text) + text = _ANALYSIS_TAG_RE.sub("", text) + text = _BLANK_LINES_RE.sub("\n\n", text) + return text.strip() + + +def compress_history(history: list[HistoryItem] | None) -> list[HistoryItem] | None: + """멀티턴 히스토리 압축: 오래된 턴을 구조화된 요약으로 대체. + + 5턴(10 메시지) 이상이면 가장 오래된 턴들을 1개 요약 메시지로 교체. + 최근 4턴(8 메시지)은 원본 유지. + + Claude Code compaction 패턴 흡수: + - 비핵심 콘텐츠(코드블록/테이블) 제거 후 압축 + - summarizeResponse()로 종합/결론 섹션 추출 시도 + - 압축 후 "이어서 진행" 지시 추가 + """ + if not history or len(history) <= _COMPRESS_TURN_THRESHOLD * 2: + return history + + keep_count = 8 + old_messages = history[:-keep_count] + recent_messages = history[-keep_count:] + + companies_mentioned: set[str] = set() + topics_discussed: list[str] = [] + qa_pairs: list[str] = [] + + for msg in old_messages: + text = msg.text.strip() + if not text: + continue + + if msg.meta: + if msg.meta.company: + companies_mentioned.add(msg.meta.company) + if msg.meta.topicLabel: + topics_discussed.append(msg.meta.topicLabel) + + if msg.role == "user": + brief = text[:80] + "..." if len(text) > 80 else text + qa_pairs.append(f"- Q: {brief}") + elif msg.role == "assistant": + cleaned = _strip_non_essential(text) + # summarizeResponse로 종합/결론 섹션 추출 시도 + try: + from ..memory.summarizer import summarizeResponse + + summary = summarizeResponse(cleaned, maxChars=150) + except ImportError: + summary = "" + if not summary: + sentences = cleaned.split(".") + summary = ".".join(sentences[:2]).strip() + if summary and not summary.endswith("."): + summary += "." + if len(summary) > 150: + summary = summary[:150] + "..." + if summary: + qa_pairs.append(f" A: {summary}") + + if not qa_pairs: + return history + + summary_lines = ["[이전 대화 요약]"] + if companies_mentioned: + summary_lines.append(f"관심 기업: {', '.join(sorted(companies_mentioned))}") + if topics_discussed: + unique_topics = list(dict.fromkeys(topics_discussed))[:5] + summary_lines.append(f"분석 주제: {', '.join(unique_topics)}") + summary_lines.append("") + summary_lines.extend(qa_pairs[-8:]) + summary_lines.append("") + summary_lines.append("위 대화를 이어서 진행하라. 이미 논의된 내용을 반복하지 마라.") + + summary_text = "\n".join(summary_lines) + summary_msg = HistoryItem(role="assistant", text=summary_text) + return [summary_msg, *recent_messages] diff --git a/src/dartlab/ai/memory/__init__.py b/src/dartlab/ai/memory/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..960a7e870b5048b1e1d46a9550b4f3275cc7bfaa --- /dev/null +++ b/src/dartlab/ai/memory/__init__.py @@ -0,0 +1,8 @@ +"""세션 간 분석 메모리 — SQLite 기반. + +종목별 분석 히스토리를 영속하여 재분석 시 이전 맥락을 활용한다. +""" + +from dartlab.ai.memory.store import AnalysisMemory + +__all__ = ["AnalysisMemory"] diff --git a/src/dartlab/ai/memory/__pycache__/__init__.cpython-312.pyc b/src/dartlab/ai/memory/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f4ead629be01d2a1d6adfd17aa17b059ab6c808f Binary files /dev/null and b/src/dartlab/ai/memory/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/ai/memory/__pycache__/__init__.cpython-313.pyc b/src/dartlab/ai/memory/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f77887ee859fbc72a52fc8968aac79ca6c418a88 Binary files /dev/null and b/src/dartlab/ai/memory/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/ai/memory/__pycache__/store.cpython-312.pyc b/src/dartlab/ai/memory/__pycache__/store.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..626c1ba99d414c28eab4a731e9b5795c505b1291 Binary files /dev/null and b/src/dartlab/ai/memory/__pycache__/store.cpython-312.pyc differ diff --git a/src/dartlab/ai/memory/__pycache__/store.cpython-313.pyc b/src/dartlab/ai/memory/__pycache__/store.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..271188f08aaa4465c037f9c41e219809625ad5bb Binary files /dev/null and b/src/dartlab/ai/memory/__pycache__/store.cpython-313.pyc differ diff --git a/src/dartlab/ai/memory/__pycache__/summarizer.cpython-312.pyc b/src/dartlab/ai/memory/__pycache__/summarizer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1e6026a35089ba30d32c7984bedd34f3c521ab07 Binary files /dev/null and b/src/dartlab/ai/memory/__pycache__/summarizer.cpython-312.pyc differ diff --git a/src/dartlab/ai/memory/__pycache__/summarizer.cpython-313.pyc b/src/dartlab/ai/memory/__pycache__/summarizer.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2060b3ec7f38e894d7811734919459a52720da44 Binary files /dev/null and b/src/dartlab/ai/memory/__pycache__/summarizer.cpython-313.pyc differ diff --git a/src/dartlab/ai/memory/store.py b/src/dartlab/ai/memory/store.py new file mode 100644 index 0000000000000000000000000000000000000000..4c284359fd1a0e26c53bb9027599985990d2ecea --- /dev/null +++ b/src/dartlab/ai/memory/store.py @@ -0,0 +1,131 @@ +"""분석 메모리 저장소 — KnowledgeDB 단일 DB 위임. + +Company 객체(200~500MB)는 저장하지 않는다. +stockCode + 시점 + 질문 요약 + 결과 요약만 저장하여 메모리 안전. + +내부적으로 KnowledgeDB(ai_knowledge.db)의 executions 테이블을 사용한다. +공개 API(saveAnalysis, recallForStock, toPromptContext)는 기존과 동일. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass + +log = logging.getLogger(__name__) + +# 싱글턴 인스턴스 +_instance: AnalysisMemory | None = None + + +@dataclass(frozen=True) +class MemoryRecord: + """저장된 분석 기록.""" + + stockCode: str + question: str + questionType: str + resultSummary: str + timestamp: float + grade: str | None = None + keyMetrics: str = "" + + +class AnalysisMemory: + """KnowledgeDB 위임 분석 히스토리 저장소.""" + + def __init__(self) -> None: + self._db = None + + def _ensureDb(self): + """lazy init — KnowledgeDB 싱글턴에 위임.""" + if self._db is not None: + return self._db + from dartlab.ai.persistence.knowledge_db import KnowledgeDB + + self._db = KnowledgeDB.get() + return self._db + + def saveAnalysis( + self, + stockCode: str, + question: str, + questionType: str = "", + resultSummary: str = "", + grade: str | None = None, + keyMetrics: str = "", + ) -> None: + """분석 결과 저장. + + keyMetrics: 핵심 수치 구조화 문자열 (예: "ROE=12.3%|영업이익률=8.9%|등급=dCR-AA+") + """ + try: + db = self._ensureDb() + db.save_execution( + stock_code=stockCode, + question=question, + question_type=questionType, + result_summary=resultSummary, + grade=grade or "", + key_metrics=keyMetrics, + ) + except (ImportError, OSError) as e: + log.warning("분석 메모리 저장 실패: %s", e) + + def recallForStock( + self, + stockCode: str, + limit: int = 5, + decayDays: int = 90, + ) -> list[MemoryRecord]: + """종목별 최근 분석 기록 조회 (시간 감쇠 적용).""" + try: + db = self._ensureDb() + rows = db.recall_for_stock(stockCode, limit=limit, decay_days=decayDays) + return [ + MemoryRecord( + stockCode=r["stock_code"] or "", + question=r["question"], + questionType=r["question_type"] or "", + resultSummary=r["result_summary"] or "", + timestamp=r["timestamp"], + grade=r["grade"] or None, + keyMetrics=r["key_metrics"] or "", + ) + for r in rows + ] + except (ImportError, OSError): + return [] + + def toPromptContext(self, stockCode: str) -> str: + """이전 분석 기록을 프롬프트용 텍스트로 변환. + + keyMetrics가 있으면 핵심 수치를 포함하여 멀티턴 참조 가능. + """ + records = self.recallForStock(stockCode) + if not records: + return "" + lines = ["## 이전 분석 기록"] + for r in records: + import datetime + + dt = datetime.datetime.fromtimestamp(r.timestamp).strftime("%Y-%m-%d") + grade_str = f" [등급: {r.grade}]" if r.grade else "" + lines.append(f"- **{dt}** ({r.questionType}){grade_str}: {r.question}") + if r.keyMetrics: + lines.append(f" {r.keyMetrics}") + elif r.resultSummary: + lines.append(f" -> {r.resultSummary[:200]}") + return "\n".join(lines) + + def close(self) -> None: + """연결 종료.""" + self._db = None + + +def getMemory() -> AnalysisMemory: + """싱글턴 메모리 인스턴스.""" + global _instance + if _instance is None: + _instance = AnalysisMemory() + return _instance diff --git a/src/dartlab/ai/memory/summarizer.py b/src/dartlab/ai/memory/summarizer.py new file mode 100644 index 0000000000000000000000000000000000000000..0aa815a4c7c25d6d135f2d4d1ac4de02f33e5b9f --- /dev/null +++ b/src/dartlab/ai/memory/summarizer.py @@ -0,0 +1,55 @@ +"""분석 결과 요약기 — 규칙 기반 (LLM 호출 없이). + +LLM 답변에서 저장용 요약을 추출한다. +""" + +from __future__ import annotations + +import re + + +def summarizeResponse(response: str, maxChars: int = 500) -> str: + """LLM 응답에서 핵심 요약 추출.""" + if not response: + return "" + + # 1. "종합" 또는 "결론" 섹션 추출 시도 + conclusionMatch = re.search( + r"(?:##?\s*(?:종합|결론|요약|판단|Bull|강점).*?\n)(.*?)(?:\n##|\Z)", + response, + re.DOTALL, + ) + if conclusionMatch: + text = conclusionMatch.group(1).strip() + return _cleanText(text, maxChars) + + # 2. 마지막 단락 추출 + paragraphs = [p.strip() for p in response.split("\n\n") if p.strip()] + if paragraphs: + lastParagraph = paragraphs[-1] + # 테이블이나 코드 블록이 아닌 마지막 텍스트 단락 + for p in reversed(paragraphs): + if not p.startswith("|") and not p.startswith("```"): + return _cleanText(p, maxChars) + return _cleanText(lastParagraph, maxChars) + + return _cleanText(response, maxChars) + + +def extractGrade(response: str) -> str | None: + """응답에서 등급 정보 추출.""" + # "종합 등급: B+" 같은 패턴 + gradeMatch = re.search(r"종합\s*(?:등급|점수)\s*[::]\s*([A-F][+-]?)", response) + if gradeMatch: + return gradeMatch.group(1) + return None + + +def _cleanText(text: str, maxChars: int) -> str: + """마크다운 정리 + 길이 제한.""" + # 마크다운 헤더, 볼드, 이모지 제거 + cleaned = re.sub(r"[#*_`]", "", text) + cleaned = re.sub(r"\s+", " ", cleaned).strip() + if len(cleaned) > maxChars: + return cleaned[: maxChars - 3] + "..." + return cleaned diff --git a/src/dartlab/ai/patterns/__init__.py b/src/dartlab/ai/patterns/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..63b074de8e1e50a909f6159397a3fc100c8780fa --- /dev/null +++ b/src/dartlab/ai/patterns/__init__.py @@ -0,0 +1,139 @@ +"""분석 템플릿(Template) — 내장 + 사용자 정의 분석 프레임워크. + +내장 템플릿: src/dartlab/ai/patterns/*.md (수정 불가) +사용자 템플릿: ~/.dartlab/templates/*.md (자유 추가/수정) + +사용법:: + + dartlab.ask("삼성전자 분석", template="가치투자") + dartlab.templates() # 전체 목록 + dartlab.saveTemplate("my", content="## 내 기준\\n...") +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +_BUILTIN_DIR = Path(__file__).parent +_USER_DIR = Path.home() / ".dartlab" / "templates" + +# ── 캐시 ── +_BUILTIN: dict[str, str] = {} +_USER: dict[str, str] = {} + +# 하위호환: 기존 PATTERNS 참조 +PATTERNS: dict[str, str] = _BUILTIN + + +def _load_builtin() -> None: + """내장 *.md 패턴을 한번만 로드.""" + if _BUILTIN: + return + for md_file in _BUILTIN_DIR.glob("*.md"): + _BUILTIN[md_file.stem] = md_file.read_text(encoding="utf-8") + + +def _load_user() -> None: + """사용자 ~/.dartlab/templates/*.md를 로드.""" + _USER.clear() + if not _USER_DIR.exists(): + return + for md_file in _USER_DIR.glob("*.md"): + _USER[md_file.stem] = md_file.read_text(encoding="utf-8") + + +def _all_templates() -> dict[str, str]: + """내장 + 사용자 통합. 사용자가 내장과 같은 이름이면 사용자 우선.""" + _load_builtin() + _load_user() + merged = dict(_BUILTIN) + merged.update(_USER) # 사용자 우선 + return merged + + +# ── 공개 API ── + + +def get_template(name: str) -> str | None: + """템플릿 이름으로 내용 반환. 내장 + 사용자 통합 검색.""" + all_t = _all_templates() + # 정확 매칭 + if name in all_t: + return all_t[name] + # 한글 name → description 매칭 (frontmatter에서) + for key, content in all_t.items(): + if f"name: {name}" in content[:200]: + return content + return None + + +def get_modules(names: list[str]) -> str | None: + """여러 모듈을 합쳐서 반환. 먼저 나온 모듈이 우선. + + 최대 3개까지. 모듈 간 충돌 시 앞 모듈 우선 지시를 포함. + """ + texts: list[str] = [] + for name in names[:3]: + t = get_template(name) + if t: + texts.append(t) + if not texts: + return None + if len(texts) == 1: + return texts[0] + preamble = "아래 분석 모듈이 활성화되어 있습니다. 모듈 간 지시가 충돌하면 먼저 나온 모듈을 우선하세요.\n" + return preamble + "\n\n---\n\n".join(texts) + + +def list_templates() -> list[dict[str, Any]]: + """사용 가능한 템플릿 목록 [{name, description, source}].""" + all_t = _all_templates() + result = [] + for key, content in sorted(all_t.items()): + desc = "" + source = "user" if key in _USER else "builtin" + # frontmatter에서 description 추출 + if content.startswith("---"): + lines = content.split("\n") + for line in lines[1:]: + if line.strip() == "---": + break + if line.startswith("description:"): + desc = line.split(":", 1)[1].strip() + elif line.startswith("name:"): + pass # name은 key로 이미 있음 + result.append({"name": key, "description": desc, "source": source}) + return result + + +def save_template(name: str, *, content: str | None = None, file: str | None = None) -> Path: + """사용자 템플릿 저장. ~/.dartlab/templates/{name}.md""" + _USER_DIR.mkdir(parents=True, exist_ok=True) + path = _USER_DIR / f"{name}.md" + + if file is not None: + src = Path(file).expanduser() + text = src.read_text(encoding="utf-8") + elif content is not None: + text = content + else: + raise ValueError("content 또는 file 중 하나를 지정하세요.") + + path.write_text(text, encoding="utf-8") + _USER.clear() # 캐시 무효화 + return path + + +# ── 하위호환 ── + + +def get_pattern(name: str) -> str | None: + """기존 pattern API 하위호환 → get_template으로 위임.""" + return get_template(name) + + +def list_patterns() -> list[str]: + """기존 pattern API 하위호환.""" + _load_builtin() + return sorted(_BUILTIN.keys()) diff --git a/src/dartlab/ai/patterns/__pycache__/__init__.cpython-312.pyc b/src/dartlab/ai/patterns/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c11b3b8c314b7e42dffc54d8e7313bd9ab2fe432 Binary files /dev/null and b/src/dartlab/ai/patterns/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/ai/patterns/financial.md b/src/dartlab/ai/patterns/financial.md new file mode 100644 index 0000000000000000000000000000000000000000..e4a76d7f2b98484f4493c4fe04af997b4afde03b --- /dev/null +++ b/src/dartlab/ai/patterns/financial.md @@ -0,0 +1,44 @@ +--- +name: 재무분석 +description: 수익성/안정성/성장성/밸류에이션 종합 재무 분석 +--- + +# 재무 분석 패턴 + +당신은 전문 재무 분석가입니다. 다음 프레임워크로 체계적으로 분석하세요. + +## 분석 구조 + +### 1. 수익성 분석 +- 매출 추세 (3개년+) +- 영업이익률, 순이익률 변화 +- ROE, ROA 수준 및 추세 +- 업종 대비 수익성 위치 + +### 2. 안정성 분석 +- 부채비율, 유동비율 +- 이자보상배율 (ICR) +- 차입금 의존도 및 만기 구조 +- 현금흐름 충분성 + +### 3. 성장성 분석 +- 매출/이익 성장률 추세 +- R&D 투자 비중 +- CAPEX 대비 감가상각 +- 신규 사업 진출 현황 + +### 4. 밸류에이션 +- PER, PBR, PSR 현재 수준 +- 과거 밸류에이션 밴드 +- 배당수익률 및 배당성향 +- 동종업종 비교 + +### 5. 종합 판단 +- 핵심 강점 3가지 +- 핵심 리스크 3가지 +- 투자 매력도 (★~★★★★★) + +## 규칙 +- 수치는 반드시 출처와 함께 제시 +- 추정/의견과 팩트를 명확히 구분 +- 부정적 요소도 솔직하게 기술 diff --git a/src/dartlab/ai/patterns/growth.md b/src/dartlab/ai/patterns/growth.md new file mode 100644 index 0000000000000000000000000000000000000000..5ffe1ff4d84549733094a0326c4b9fa37b9a1550 --- /dev/null +++ b/src/dartlab/ai/patterns/growth.md @@ -0,0 +1,33 @@ +--- +name: 성장투자 +description: 매출 모멘텀, 성장 지속성, 질적 성장 판단 +--- + +# 성장투자 관점 분석 + +당신은 성장주 투자자입니다. 빠르게 성장하는 회사를 찾되, "진짜 성장"인지 검증합니다. + +## 분석 기준 + +### 1. 성장 모멘텀 +- c.analysis("financial", "성장성")으로 매출/이익 성장률 추세 확인 +- CAGR 3년 + 최근 분기 가속 여부 +- 매출 vs 이익 성장 괴리 (외형만 큰 성장 경계) + +### 2. 성장의 질 +- 본업 매출 vs 일회성/인수 효과 구분 +- c.analysis("financial", "수익구조")로 부문별 기여도 확인 +- 고성장 부문의 마진이 유지/개선되는지 + +### 3. 성장 지속성 +- c.analysis("forecast", "매출전망")으로 앙상블 예측 확인 +- 라이프사이클 단계 (고성장/성숙/전환) +- 시계열 R², 컨센서스 방향, 구조변화 감지 + +### 4. 시장 내 위치 +- dartlab.scan("financial", "growth")로 전체 대비 성장 순위 +- 동종업계 대비 성장률 프리미엄 + +### 5. 종합 판단 +- "이 성장이 3년 더 지속될 수 있는가?" +- PEG 비율, 성장 촉매, 감속 리스크 diff --git a/src/dartlab/ai/patterns/prediction.md b/src/dartlab/ai/patterns/prediction.md new file mode 100644 index 0000000000000000000000000000000000000000..db5a088268d13fe181027cd98254731c2c3ce059 --- /dev/null +++ b/src/dartlab/ai/patterns/prediction.md @@ -0,0 +1,54 @@ +--- +name: 예측신호 +description: 이익 모멘텀, 구조변화, 거시경제 신호 종합 예측 +--- + +# 예측신호 분석 패턴 + +당신은 기업 실적 방향성 예측 전문가입니다. 다음 다중 소스 신호를 종합하여 이 회사의 실적이 어디로 향하는지 판단하세요. + +## 분석 절차 + +### 1. 예측신호 수집 +```python +result = c.analysis("forecast", "예측신호") +``` +위 명령으로 5개 신호를 수집하세요: +- 이익 모멘텀 (Sloan 분해 + DuPont 추세) +- 횡단면 피어 예측 (동종업계 대비 괴리) +- 구조변화 감지 (매출/이익/마진/ROE 변화점) +- 거시경제 민감도 (섹터별 관련 지표) +- 공시 변화 신호 (사업보고서 텍스트 변화량) + +### 2. 공시 원문 확인 +```python +c.show("riskFactors") +c.show("businessOverview") +``` +변화가 큰 섹션의 원문을 직접 읽고, 변화가 긍정적인지 부정적인지 판단하세요. + +### 3. 매크로 지표 확인 +예측신호의 `relevantIndicators`에 나온 지표를 조회하세요: +```python +dartlab.gather("macro", "BASE_RATE") +``` + +### 4. 다관점 해석 +같은 데이터를 세 가지 관점에서 해석하세요: + +**가치투자 관점**: 안전마진이 있는가? FCF가 이익을 뒷받침하는가? 발생액 비율은? +**성장투자 관점**: 매출 가속 중인가? 피어 대비 성장률은? 구조적 성장인가 일회성인가? +**리스크 관점**: 구조변화가 있었는가? 부채 구조는? 공시에서 새 위험이 나타났는가? + +### 5. 종합 판단 +- 실적 방향: 상승 / 유지 / 하락 +- 신뢰도: 높음 / 중간 / 낮음 +- 핵심 동인 3가지 +- 핵심 리스크 3가지 +- 관점별 의견이 갈리는 부분이 있으면 명시 + +## 규칙 +- 예측신호 엔진의 숫자를 먼저 제시하고, 그 위에 해석을 얹어라 +- 공시 원문을 읽지 않고 톤을 추정하지 마라 +- "상승할 것이다"가 아니라 "상승 신호가 우세하다"로 표현 (투자 권유 금지) +- 신호가 상충하면 솔직하게 불확실성을 인정하라 diff --git a/src/dartlab/ai/patterns/quick_check.md b/src/dartlab/ai/patterns/quick_check.md new file mode 100644 index 0000000000000000000000000000000000000000..ab1110fbda28de36c2fc062d666f7c13e4080563 --- /dev/null +++ b/src/dartlab/ai/patterns/quick_check.md @@ -0,0 +1,31 @@ +--- +name: 퀵체크 +description: 3분 안에 핵심만 파악하는 빠른 점검 +--- + +# 퀵 체크 (3분 분석) + +빠르게 핵심만 파악한다. 깊이보다 속도. + +## 분석 순서 + +### 1. 수익성 (30초) +- c.analysis("financial", "수익성")에서 marginTrend만 확인 +- 영업이익률 추세: 개선/악화/횡보? +- ROE 수준: 10% 이상이면 양호 + +### 2. 안정성 (30초) +- c.analysis("financial", "안정성")에서 부채비율 + 이자보상배율 +- 부채비율 < 100%, 이자보상 > 3배이면 OK + +### 3. 최근 뉴스 (60초) +- newsSearch("회사명 실적 이슈", days=7) +- 긍정/부정 핵심 1~2개만 요약 + +### 4. 한줄 결론 (30초) +- "괜찮다/보통/주의" 중 하나 + 이유 1줄 +- 더 깊이 볼 축 1개 추천 + +## 규칙 +- 코드 1회, review 0개, analysis 2개 이내 +- 30초 안에 끝낼 수 있는 분량만 diff --git a/src/dartlab/ai/patterns/risk.md b/src/dartlab/ai/patterns/risk.md new file mode 100644 index 0000000000000000000000000000000000000000..6d288a788988c574be82dce31eb1795860ad0a7a --- /dev/null +++ b/src/dartlab/ai/patterns/risk.md @@ -0,0 +1,40 @@ +--- +name: 리스크점검 +description: 부채, 유동성, 이익 변동성 리스크 분석 +--- + +# 리스크 분석 패턴 + +당신은 기업 리스크 전문 분석가입니다. 사업보고서의 위험 요인을 체계적으로 평가하세요. + +## 분석 구조 + +### 1. 재무 리스크 +- 유동성 위험 (단기 채무 상환 능력) +- 신용 리스크 (부채비율, ICR) +- 환율 리스크 (외화 노출도) +- 금리 리스크 (변동금리 차입금) + +### 2. 사업 리스크 +- 매출 집중도 (고객/제품/지역) +- 경쟁 환경 변화 +- 원재료 가격 변동 +- 기술 변화 및 진부화 + +### 3. 규제/법률 리스크 +- 규제 환경 변화 +- 소송/분쟁 현황 +- 환경/ESG 규제 영향 + +### 4. 지배구조 리스크 +- 지배구조 투명성 +- 최대주주 지분율 +- 관계사 거래 비중 + +### 5. 리스크 등급 +각 카테고리를 ■낮음 ■■보통 ■■■높음 ■■■■매우높음으로 평가 + +## 규칙 +- 사업보고서 원문의 "위험 요인" 섹션을 반드시 참조 +- 정량적 근거 제시 (비율, 금액, 비중) +- 리스크 완화 요인도 함께 기술 diff --git a/src/dartlab/ai/patterns/valuation.md b/src/dartlab/ai/patterns/valuation.md new file mode 100644 index 0000000000000000000000000000000000000000..d2b6a80244ce7e63e4c640ccc1773f95e3048b1c --- /dev/null +++ b/src/dartlab/ai/patterns/valuation.md @@ -0,0 +1,38 @@ +--- +name: 밸류에이션 +description: DCF/PER/PBR 등 적정 가치 다각도 평가 +--- + +# 밸류에이션 분석 패턴 + +당신은 주식 밸류에이션 전문가입니다. 기업의 적정 가치를 다각도로 평가하세요. + +## 분석 구조 + +### 1. 상대 밸류에이션 +- PER: 현재 vs 과거 밴드 vs 업종 평균 +- PBR: 현재 vs ROE 수준 대비 적정성 +- PSR: 매출 성장성 대비 적정성 +- EV/EBITDA: 기업가치 대비 영업이익 + +### 2. 배당 가치 +- 배당수익률 추이 +- 배당성향 (순이익 대비) +- 자사주 매입/소각 현황 +- 주주환원율 종합 + +### 3. 성장 프리미엄 +- 매출 성장률 (과거 3년 + 업종 대비) +- 이익 성장률 (영업이익, 순이익) +- PEG 비율 (PER / 이익성장률) +- 신규 사업/시장 확장 모멘텀 + +### 4. 종합 밸류에이션 판단 +- 현재 주가 수준: 저평가 / 적정 / 고평가 +- 핵심 근거 3가지 +- 밸류에이션 변화 촉매 (상승/하락) + +## 규칙 +- 단일 지표로 판단하지 않고 복수 지표 교차 검증 +- 업종 특성 반영 (금융/유틸리티는 PBR, 성장주는 PSR 중심) +- 미래 추정치 사용 시 근거 명시 diff --git a/src/dartlab/ai/patterns/value_investor.md b/src/dartlab/ai/patterns/value_investor.md new file mode 100644 index 0000000000000000000000000000000000000000..c45f603df47c97dc6da5a2e1a97f8f529d720c23 --- /dev/null +++ b/src/dartlab/ai/patterns/value_investor.md @@ -0,0 +1,34 @@ +--- +name: 가치투자 +description: 안전마진, FCF, 배당 중심 보수적 분석 +--- + +# 가치투자 관점 분석 + +당신은 보수적 가치투자자입니다. 싸게 사서 안전하게 보유하는 것이 목표입니다. + +## 분석 기준 + +### 1. 내재가치 vs 현재가 +- c.analysis("valuation", "가치평가")로 DCF/DDM/상대가치 확인 +- 안전마진 30% 이상이면 매력적 +- 민감도(할인율/성장률 변동)를 반드시 확인 + +### 2. 현금흐름의 질 +- c.analysis("financial", "현금흐름")으로 FCF 추세 확인 +- 영업CF/순이익 > 100%인지 (이익의 현금 전환) +- FCF yield(FCF/시가총액) > 5%이면 매력적 + +### 3. 자본 배분 +- c.analysis("financial", "자본배분")으로 배당+자사주 확인 +- 배당지속성(몇 년 연속?)과 배당성향 +- 자사주 매입/소각 이력 + +### 4. 재무 안전판 +- c.analysis("financial", "안정성")으로 부채/이자보상/순현금 확인 +- c.notes.borrowings로 차입금 구조(단기/장기) 상세 +- Altman Z-Score > 3이면 안전 + +### 5. 종합 판단 +- "지금 이 가격에 사서 5년 보유할 수 있는가?" +- 핵심 리스크 3가지, 안전마진 수치, 매수/보류/회피 판정 diff --git a/src/dartlab/ai/persistence/__init__.py b/src/dartlab/ai/persistence/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eefbe9a64b10a1a45e76f96b0ae323d334350273 --- /dev/null +++ b/src/dartlab/ai/persistence/__init__.py @@ -0,0 +1,23 @@ +"""ai 엔진 영속성 — KnowledgeDB. + +dartlab AI 의 단일 영속 저장소. selfai 폐기 후 영속성 책임만 분리해서 보존. + +테이블: + - executions: 모든 AI 실행 기록 (질문/결과/등급/모드) + - insights: 기업별 심층 분석 서사 (자기성장 루프) + - skills: 성공한 코드 패턴 (legacy, 폐기 예정) + - error_patterns: 에러 패턴 (legacy, 폐기 예정) + - meta: DB 버전 / 마이그레이션 상태 + +대표 진입점: + >>> from dartlab.ai.persistence import KnowledgeDB + >>> db = KnowledgeDB.get() + >>> db.save_execution(...) + >>> db.get_insight("005930") +""" + +from __future__ import annotations + +from dartlab.ai.persistence.knowledge_db import KnowledgeDB + +__all__ = ["KnowledgeDB"] diff --git a/src/dartlab/ai/persistence/__pycache__/__init__.cpython-312.pyc b/src/dartlab/ai/persistence/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..49ed23fcfa53a5a82a2c8e7ae43bc808b8eb6210 Binary files /dev/null and b/src/dartlab/ai/persistence/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/ai/persistence/__pycache__/__init__.cpython-313.pyc b/src/dartlab/ai/persistence/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af753b6e4fa0c2be6db401ca381d8853472ee91b Binary files /dev/null and b/src/dartlab/ai/persistence/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/ai/persistence/__pycache__/knowledge_db.cpython-312.pyc b/src/dartlab/ai/persistence/__pycache__/knowledge_db.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95d3a3e67e668983b3c93c51ec54c4463550eac4 Binary files /dev/null and b/src/dartlab/ai/persistence/__pycache__/knowledge_db.cpython-312.pyc differ diff --git a/src/dartlab/ai/persistence/__pycache__/knowledge_db.cpython-313.pyc b/src/dartlab/ai/persistence/__pycache__/knowledge_db.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9e7b1440fae3119c3f8c81a519c51a15ebd05e73 Binary files /dev/null and b/src/dartlab/ai/persistence/__pycache__/knowledge_db.cpython-313.pyc differ diff --git a/src/dartlab/ai/persistence/knowledge_db.py b/src/dartlab/ai/persistence/knowledge_db.py new file mode 100644 index 0000000000000000000000000000000000000000..a55e4e20a0178b86d1eedd858f6943ec2012af21 --- /dev/null +++ b/src/dartlab/ai/persistence/knowledge_db.py @@ -0,0 +1,1328 @@ +"""KnowledgeDB - dartlab AI의 자기성장형 영속성 단일 DB. + +5개 분산 저장소(analysisMemory.db, skill_library.db, error_patterns.db, +audit_log.jsonl, auditAnalysis/*.md)를 하나의 SQLite DB로 통합한다. + +DB 위치: ~/.dartlab/ai_knowledge.db + +핵심 테이블: +- executions: 모든 AI 실행 기록 (질문, 결과, 등급, 메트릭) +- skills: 성공한 코드 패턴 (few-shot 라이브러리) +- error_patterns: 에러 패턴 + 복구 코드 +- insights: 기업별 심층 분석 서사 (auditAnalysis에서 축적) +- meta: DB 버전/마이그레이션 상태 +""" + +from __future__ import annotations + +import json +import logging +import re +import sqlite3 +import threading +import time +from dataclasses import dataclass +from pathlib import Path + +log = logging.getLogger(__name__) + +_DB_PATH = Path.home() / ".dartlab" / "dartlab_knowledge.db" +_LEGACY_DB_PATH = Path.home() / ".dartlab" / "ai_knowledge.db" +_MIGRATION_VERSION = 1 +_MAX_INSIGHT_NARRATIVE = 2000 +_MAX_SUMMARY_CHARS = 500 + +# ── 싱글턴 ───────────────────────────────────────────────── + +_instance: KnowledgeDB | None = None + + +# ── 데이터 클래스 ────────────────────────────────────────── + + +@dataclass(frozen=True) +class InsightRecord: + """기업별 인사이트.""" + + stock_code: str + narrative: str + strengths: list[str] + weaknesses: list[str] + sector: str + source: str + created_at: float + expires_at: float | None + + +# ── 스키마 ───────────────────────────────────────────────── + +_SCHEMA_SQL = """ +CREATE TABLE IF NOT EXISTS executions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + stock_code TEXT, + question TEXT NOT NULL, + question_type TEXT DEFAULT '', + mode TEXT DEFAULT 'analysis', + result_summary TEXT DEFAULT '', + grade TEXT DEFAULT '', + key_metrics TEXT DEFAULT '', + duration_sec REAL, + code_rounds INTEGER DEFAULT 0, + has_error INTEGER DEFAULT 0, + provider TEXT DEFAULT '', + model TEXT DEFAULT '', + created_at REAL NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_exec_stock ON executions(stock_code); +CREATE INDEX IF NOT EXISTS idx_exec_mode ON executions(mode); +CREATE INDEX IF NOT EXISTS idx_exec_ts ON executions(created_at); + +CREATE TABLE IF NOT EXISTS skills ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + question TEXT NOT NULL, + category TEXT NOT NULL DEFAULT 'general', + tools_used TEXT NOT NULL DEFAULT '[]', + code_template TEXT NOT NULL, + result_keys TEXT NOT NULL DEFAULT '[]', + success_count INTEGER NOT NULL DEFAULT 1, + quality_score REAL NOT NULL DEFAULT 0.8, + mode TEXT DEFAULT 'analysis', + created_at REAL NOT NULL, + last_used REAL NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_skill_cat ON skills(category); +CREATE INDEX IF NOT EXISTS idx_skill_mode ON skills(mode); + +CREATE TABLE IF NOT EXISTS error_patterns ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + error_type TEXT NOT NULL, + error_signature TEXT NOT NULL, + wrong_code TEXT NOT NULL DEFAULT '', + correct_code TEXT NOT NULL DEFAULT '', + tool_name TEXT NOT NULL DEFAULT '', + frequency INTEGER NOT NULL DEFAULT 1, + last_seen REAL NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_ep_sig ON error_patterns(error_signature); +CREATE INDEX IF NOT EXISTS idx_ep_tool ON error_patterns(tool_name); + +CREATE TABLE IF NOT EXISTS insights ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + stock_code TEXT NOT NULL, + narrative TEXT NOT NULL, + strengths TEXT DEFAULT '[]', + weaknesses TEXT DEFAULT '[]', + sector TEXT DEFAULT '', + source TEXT DEFAULT 'audit', + created_at REAL NOT NULL, + expires_at REAL +); +CREATE INDEX IF NOT EXISTS idx_ins_stock ON insights(stock_code); +CREATE INDEX IF NOT EXISTS idx_ins_sector ON insights(sector); + +CREATE TABLE IF NOT EXISTS meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +); + +-- ACE (Agentic Context Engineering) playbook 테이블 +-- arxiv.org/abs/2510.04618 — Generator/Reflector/Curator 폐쇄 루프 +-- delta merge: 신규 bullet INSERT, 중복은 success/fail 카운트만 갱신 (삭제 금지 — context collapse 방지) +CREATE TABLE IF NOT EXISTS playbook ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + intent TEXT NOT NULL, + sector TEXT NOT NULL DEFAULT '', + bullet TEXT NOT NULL, + success_count INTEGER NOT NULL DEFAULT 0, + fail_count INTEGER NOT NULL DEFAULT 0, + quality REAL NOT NULL DEFAULT 0.5, + source TEXT NOT NULL DEFAULT 'reflection', + created_at REAL NOT NULL, + last_used REAL NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_pb_intent ON playbook(intent); +CREATE INDEX IF NOT EXISTS idx_pb_quality ON playbook(quality DESC); +CREATE UNIQUE INDEX IF NOT EXISTS idx_pb_unique ON playbook(intent, sector, bullet); +""" + + +# ── KnowledgeDB ──────────────────────────────────────────── + + +class KnowledgeDB: + """dartlab AI 단일 영속성 DB.""" + + def __init__(self, db_path: Path | None = None) -> None: + self._db_path = db_path or _DB_PATH + self._conn: sqlite3.Connection | None = None + # 멀티스레드 write 직렬화 (P1-1 ThreadPoolExecutor 호환) + # WAL + check_same_thread=False 면 read 는 동시 가능, write 만 lock 필요. + self._write_lock = threading.RLock() + + # ── 연결 관리 ────────────────────────────────────────── + + def _ensure_db(self) -> sqlite3.Connection: + """lazy init. + + check_same_thread=False — P1-1 백그라운드 thread 에서도 같은 connection + 사용 가능. WAL 모드라 read 는 동시 안전, write 는 _write_lock 으로 직렬화. + """ + if self._conn is not None: + return self._conn + + self._db_path.parent.mkdir(parents=True, exist_ok=True) + + # 기존 ai_knowledge.db → dartlab_knowledge.db 자동 rename + if not self._db_path.exists() and _LEGACY_DB_PATH.exists(): + try: + _LEGACY_DB_PATH.rename(self._db_path) + log.info("DB rename: %s → %s", _LEGACY_DB_PATH.name, self._db_path.name) + except OSError: + pass # rename 실패 시 새로 생성 + + conn = sqlite3.connect( + str(self._db_path), + timeout=5, + check_same_thread=False, + ) + conn.execute("PRAGMA journal_mode=WAL") + conn.executescript(_SCHEMA_SQL) + conn.commit() + self._conn = conn + return conn + + @property + def connection(self) -> sqlite3.Connection: + """기존 모듈이 connection 직접 접근할 때 사용.""" + return self._ensure_db() + + def close(self) -> None: + """SQLite 연결 닫기. 싱글톤 재초기화 시 호출.""" + if self._conn: + self._conn.close() + self._conn = None + + # ── executions ───────────────────────────────────────── + + def save_execution( + self, + stock_code: str | None, + question: str, + *, + question_type: str = "", + mode: str = "analysis", + result_summary: str = "", + grade: str = "", + key_metrics: str = "", + duration_sec: float | None = None, + code_rounds: int = 0, + has_error: bool = False, + provider: str = "", + model: str = "", + ) -> None: + """AI 실행 1건을 ``executions`` 테이블에 기록. + + Args: + stock_code: 종목코드 (없으면 None — market-level 질문). + question: 사용자 질문 (200자로 절단). + question_type: 질문 분류 ("analysis"/"compare"/"forecast" 등). + mode: "analysis" | "coding". + result_summary: 답변 요약 (_MAX_SUMMARY_CHARS 자로 절단). + grade: 분석 결과 등급 (있으면). + key_metrics: JSON 문자열 형태의 핵심 지표 (500자로 절단). + duration_sec: 실행 시간 (초). + code_rounds: 코드 실행 round 수. + has_error: 에러 발생 여부. + provider: LLM provider 식별자. + model: 모델 식별자. + """ + conn = self._ensure_db() + summary = result_summary[:_MAX_SUMMARY_CHARS] if result_summary else "" + conn.execute( + "INSERT INTO executions " + "(stock_code, question, question_type, mode, result_summary, grade, " + "key_metrics, duration_sec, code_rounds, has_error, provider, model, created_at) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + stock_code, + question[:200], + question_type, + mode, + summary, + grade or "", + key_metrics[:500] if key_metrics else "", + duration_sec, + code_rounds, + int(has_error), + provider, + model, + time.time(), + ), + ) + conn.commit() + + def recall_for_stock( + self, + stock_code: str, + limit: int = 5, + decay_days: int = 90, + ) -> list[dict]: + """특정 종목의 최근 AI 실행 이력을 시간 역순으로 반환. + + AI 가 같은 종목의 분석 컨텍스트를 회상할 때 사용. ``decay_days`` 이전 + 기록은 자동 제외 (오래된 정보의 영향 차단). + + Args: + stock_code: 종목코드. + limit: 반환 건수 상한. + decay_days: 회상 윈도우 (일). 기본 90일. + + Returns: + ``[{stock_code, question, question_type, result_summary, timestamp, + grade, key_metrics}, ...]`` — 최신 우선. + """ + conn = self._ensure_db() + cutoff = time.time() - (decay_days * 86400) + rows = conn.execute( + "SELECT stock_code, question, question_type, result_summary, " + "created_at, grade, key_metrics " + "FROM executions WHERE stock_code = ? AND created_at > ? " + "ORDER BY created_at DESC LIMIT ?", + (stock_code, cutoff, limit), + ).fetchall() + return [ + { + "stock_code": r[0], + "question": r[1], + "question_type": r[2], + "result_summary": r[3], + "timestamp": r[4], + "grade": r[5], + "key_metrics": r[6], + } + for r in rows + ] + + # ── skills ───────────────────────────────────────────── + + def save_skill( + self, + question: str, + code_template: str, + *, + category: str = "general", + tools_used: str = "[]", + result_keys: str = "[]", + quality_score: float = 0.8, + mode: str = "analysis", + ) -> int | None: + """성공한 코드 패턴을 ``skills`` 테이블에 저장 (few-shot 학습 자료). + + Args: + question: 원본 질문 (500자로 절단). + code_template: 실행에 성공한 코드 (5000자로 절단). + category: skill 분류 ("financial"/"docs"/"market" 등). + tools_used: 사용한 도구 목록 JSON. + result_keys: 결과 dict 의 키 목록 JSON. + quality_score: 0.0~1.0 품질 점수 (높을수록 우수). + mode: "analysis" | "coding". + + Returns: + INSERT 된 row 의 id, 실패 시 None. + """ + conn = self._ensure_db() + now = time.time() + cursor = conn.execute( + "INSERT INTO skills " + "(question, category, tools_used, code_template, result_keys, " + "success_count, quality_score, mode, created_at, last_used) " + "VALUES (?, ?, ?, ?, ?, 1, ?, ?, ?, ?)", + ( + question[:500], + category, + tools_used, + code_template[:5000], + result_keys, + quality_score, + mode, + now, + now, + ), + ) + conn.commit() + return cursor.lastrowid + + def search_skills( + self, + category: str, + *, + limit: int = 2, + mode: str | None = None, + ) -> list[tuple]: + """카테고리 별 상위 품질 skill 검색 (few-shot 주입용). + + category 매칭 우선, 부족하면 mode 전체에서 보충. + 품질 점수 / 성공 횟수 내림차순 정렬. + + Args: + category: skill 분류 키. + limit: 반환 건수. + mode: "analysis" | "coding" 중 하나로 제한 (None = 둘 다). + + Returns: + sqlite row 튜플 리스트. 컬럼 순서는 ``skills`` 테이블 schema 기준. + """ + conn = self._ensure_db() + if mode: + rows = conn.execute( + "SELECT * FROM skills WHERE category = ? AND mode = ? " + "ORDER BY quality_score DESC, success_count DESC LIMIT ?", + (category, mode, limit), + ).fetchall() + if len(rows) < limit: + existing_ids = {r[0] for r in rows} + extra = conn.execute( + "SELECT * FROM skills WHERE mode = ? ORDER BY quality_score DESC, success_count DESC LIMIT ?", + (mode, limit * 3), + ).fetchall() + for r in extra: + if r[0] not in existing_ids and len(rows) < limit: + rows.append(r) + else: + rows = conn.execute( + "SELECT * FROM skills WHERE category = ? ORDER BY quality_score DESC, success_count DESC LIMIT ?", + (category, limit), + ).fetchall() + if len(rows) < limit: + existing_ids = {r[0] for r in rows} + extra = conn.execute( + "SELECT * FROM skills ORDER BY quality_score DESC, success_count DESC LIMIT ?", + (limit * 3,), + ).fetchall() + for r in extra: + if r[0] not in existing_ids and len(rows) < limit: + rows.append(r) + return rows + + def record_skill_success(self, skill_id: int) -> None: + """skill 의 ``success_count`` 증가 + ``last_used`` 갱신. + + skill 을 재사용하여 성공할 때마다 호출 — 점진적 품질 신호. + """ + conn = self._ensure_db() + conn.execute( + "UPDATE skills SET success_count = success_count + 1, last_used = ? WHERE id = ?", + (time.time(), skill_id), + ) + conn.commit() + + def adjust_skill_quality(self, skill_id: int, success: bool) -> None: + """EMA 방식 품질 점수 업데이트.""" + alpha = 0.3 + conn = self._ensure_db() + row = conn.execute("SELECT quality_score FROM skills WHERE id = ?", (skill_id,)).fetchone() + if row: + current = row[0] + new_score = current * (1 - alpha) + (1.0 if success else 0.0) * alpha + conn.execute( + "UPDATE skills SET quality_score = ?, last_used = ? WHERE id = ?", + (new_score, time.time(), skill_id), + ) + conn.commit() + + # ── error_patterns ───────────────────────────────────── + + def lookup_error(self, signature: str, error_type: str, *, limit: int = 3) -> list[tuple]: + conn = self._ensure_db() + rows = conn.execute( + "SELECT * FROM error_patterns WHERE error_signature = ? ORDER BY frequency DESC LIMIT ?", + (signature, limit), + ).fetchall() + if len(rows) < limit and error_type != "Unknown": + existing_ids = {r[0] for r in rows} + type_rows = conn.execute( + "SELECT * FROM error_patterns WHERE error_type = ? ORDER BY frequency DESC LIMIT ?", + (error_type, limit * 2), + ).fetchall() + for r in type_rows: + if r[0] not in existing_ids and len(rows) < limit: + rows.append(r) + return rows + + def record_error( + self, + error_type: str, + signature: str, + wrong_code: str, + correct_code: str = "", + tool_name: str = "", + ) -> None: + conn = self._ensure_db() + now = time.time() + existing = conn.execute( + "SELECT id, frequency FROM error_patterns WHERE error_signature = ? AND wrong_code = ?", + (signature, wrong_code[:2000]), + ).fetchone() + if existing: + conn.execute( + "UPDATE error_patterns SET frequency = ?, last_seen = ?, " + "correct_code = CASE WHEN ? != '' THEN ? ELSE correct_code END " + "WHERE id = ?", + (existing[1] + 1, now, correct_code, correct_code, existing[0]), + ) + else: + conn.execute( + "INSERT INTO error_patterns " + "(error_type, error_signature, wrong_code, correct_code, tool_name, frequency, last_seen) " + "VALUES (?, ?, ?, ?, ?, 1, ?)", + (error_type, signature, wrong_code[:2000], correct_code[:2000], tool_name, now), + ) + conn.commit() + + # ── insights ─────────────────────────────────────────── + + def save_insight( + self, + stock_code: str, + narrative: str, + *, + strengths: list[str] | None = None, + weaknesses: list[str] | None = None, + sector: str = "", + source: str = "audit", + expires_days: int = 90, + ) -> None: + conn = self._ensure_db() + now = time.time() + expires_at = now + (expires_days * 86400) + + # upsert: 같은 종목 + source면 갱신 + existing = conn.execute( + "SELECT id FROM insights WHERE stock_code = ? AND source = ?", + (stock_code, source), + ).fetchone() + + narrative_trimmed = narrative[:_MAX_INSIGHT_NARRATIVE] + strengths_json = json.dumps(strengths or [], ensure_ascii=False) + weaknesses_json = json.dumps(weaknesses or [], ensure_ascii=False) + + if existing: + conn.execute( + "UPDATE insights SET narrative = ?, strengths = ?, weaknesses = ?, " + "sector = ?, created_at = ?, expires_at = ? WHERE id = ?", + (narrative_trimmed, strengths_json, weaknesses_json, sector, now, expires_at, existing[0]), + ) + else: + conn.execute( + "INSERT INTO insights " + "(stock_code, narrative, strengths, weaknesses, sector, source, created_at, expires_at) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (stock_code, narrative_trimmed, strengths_json, weaknesses_json, sector, source, now, expires_at), + ) + conn.commit() + + def get_insight(self, stock_code: str) -> InsightRecord | None: + conn = self._ensure_db() + row = conn.execute( + "SELECT stock_code, narrative, strengths, weaknesses, sector, source, " + "created_at, expires_at FROM insights " + "WHERE stock_code = ? ORDER BY created_at DESC LIMIT 1", + (stock_code,), + ).fetchone() + if not row: + return None + return InsightRecord( + stock_code=row[0], + narrative=row[1], + strengths=json.loads(row[2]) if row[2] else [], + weaknesses=json.loads(row[3]) if row[3] else [], + sector=row[4] or "", + source=row[5] or "audit", + created_at=row[6], + expires_at=row[7], + ) + + def get_sector_insights(self, sector: str, *, limit: int = 3) -> list[InsightRecord]: + conn = self._ensure_db() + rows = conn.execute( + "SELECT stock_code, narrative, strengths, weaknesses, sector, source, " + "created_at, expires_at FROM insights " + "WHERE sector LIKE ? ORDER BY created_at DESC LIMIT ?", + (f"%{sector}%", limit), + ).fetchall() + return [ + InsightRecord( + stock_code=r[0], + narrative=r[1], + strengths=json.loads(r[2]) if r[2] else [], + weaknesses=json.loads(r[3]) if r[3] else [], + sector=r[4] or "", + source=r[5] or "audit", + created_at=r[6], + expires_at=r[7], + ) + for r in rows + ] + + # ── playbook (ACE: Generator/Reflector/Curator) ──────── + # arxiv.org/abs/2510.04618 — delta merge로 evolving playbook 유지. + # 규칙: 신규 bullet INSERT, 중복은 카운트만 갱신, 절대 삭제 X (context collapse 방지). + + def upsert_bullet( + self, + intent: str, + bullet: str, + *, + sector: str = "", + outcome: str = "neutral", + source: str = "reflection", + ) -> None: + """playbook bullet 삽입 또는 카운트 갱신. + + Args: + intent: ai.context.intent.Intent.value (예: "act2_profit"). + bullet: 한 줄 전략/관찰 (200자 cap, 자동 절단). + sector: 섹터 분리 (기본 빈 문자열 = 전 섹터 공용). + outcome: "success" → success_count++, "fail" → fail_count++, "neutral" → 등록만. + source: "reflection" | "audit" | "manual". + + delta merge: UNIQUE(intent, sector, bullet) 충돌 시 INSERT 무시 후 카운트만 UPDATE. + """ + bullet = (bullet or "").strip() + if not bullet or not intent: + return + bullet = bullet[:200] + with self._write_lock: + conn = self._ensure_db() + now = time.time() + # 신규 시도 — 충돌은 무시 + try: + conn.execute( + "INSERT INTO playbook " + "(intent, sector, bullet, success_count, fail_count, quality, " + " source, created_at, last_used) VALUES (?, ?, ?, 0, 0, 0.5, ?, ?, ?)", + (intent, sector or "", bullet, source, now, now), + ) + except sqlite3.IntegrityError: + pass # unique 충돌 — 카운트 갱신만 진행 + # 카운트/quality 갱신 + # NOTE: SQLite UPDATE의 SET expression은 OLD 값을 보므로 + # quality 식에서 +1을 명시적으로 더해야 함 (Beta posterior 근사). + if outcome == "success": + conn.execute( + "UPDATE playbook SET success_count = success_count + 1, " + "quality = (success_count + 2.0) / (success_count + fail_count + 3.0), " + "last_used = ? WHERE intent = ? AND sector = ? AND bullet = ?", + (now, intent, sector or "", bullet), + ) + elif outcome == "fail": + conn.execute( + "UPDATE playbook SET fail_count = fail_count + 1, " + "quality = (success_count + 1.0) / (success_count + fail_count + 3.0), " + "last_used = ? WHERE intent = ? AND sector = ? AND bullet = ?", + (now, intent, sector or "", bullet), + ) + else: + conn.execute( + "UPDATE playbook SET last_used = ? WHERE intent = ? AND sector = ? AND bullet = ?", + (now, intent, sector or "", bullet), + ) + conn.commit() + + def get_bullets( + self, + intent: str, + *, + sector: str = "", + limit: int = 6, + min_quality: float = 0.4, + ) -> list[tuple[str, float, int, int]]: + """intent별 playbook bullet 검색 (Generator 단계 주입용). + + Args: + intent: 정확 매칭. + sector: 섹터 우선 매칭, 부족하면 공용("")으로 보충. + limit: 최대 반환 수. + min_quality: 이 값 미만은 제외 (단, neutral=0.5는 통과). + + Returns: + ``[(bullet, quality, success, fail), ...]`` quality 내림차순. + """ + if not intent: + return [] + conn = self._ensure_db() + # 섹터 우선 + rows: list[tuple[str, float, int, int]] = [] + if sector: + rows = list( + conn.execute( + "SELECT bullet, quality, success_count, fail_count FROM playbook " + "WHERE intent = ? AND sector = ? AND quality >= ? " + "ORDER BY quality DESC, last_used DESC LIMIT ?", + (intent, sector, min_quality, limit), + ).fetchall() + ) + if len(rows) < limit: + remaining = limit - len(rows) + seen = {r[0] for r in rows} + extras = conn.execute( + "SELECT bullet, quality, success_count, fail_count FROM playbook " + "WHERE intent = ? AND sector = '' AND quality >= ? " + "ORDER BY quality DESC, last_used DESC LIMIT ?", + (intent, min_quality, remaining * 2), + ).fetchall() + for e in extras: + if e[0] not in seen and len(rows) < limit: + rows.append(e) + return rows + + def playbook_size(self, intent: str | None = None) -> int: + """playbook 통계 — intent별 또는 전체 bullet 수.""" + conn = self._ensure_db() + if intent: + row = conn.execute("SELECT COUNT(*) FROM playbook WHERE intent = ?", (intent,)).fetchone() + else: + row = conn.execute("SELECT COUNT(*) FROM playbook").fetchone() + return int(row[0]) if row else 0 + + # ── meta ─────────────────────────────────────────────── + + def get_meta(self, key: str) -> str | None: + conn = self._ensure_db() + row = conn.execute("SELECT value FROM meta WHERE key = ?", (key,)).fetchone() + return row[0] if row else None + + def set_meta(self, key: str, value: str) -> None: + conn = self._ensure_db() + conn.execute( + "INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)", + (key, value), + ) + conn.commit() + + # ── 마이그레이션 ─────────────────────────────────────── + + def migrate_from_legacy(self) -> dict[str, int]: + """기존 5개 분산 저장소에서 데이터를 통합 마이그레이션. + + Returns: + 각 소스별 마이그레이션된 레코드 수 + """ + current = self.get_meta("migration_version") + if current and int(current) >= _MIGRATION_VERSION: + return {} + + stats: dict[str, int] = {} + conn = self._ensure_db() + + # 1. analysisMemory.db → executions + stats["analysisMemory"] = self._migrate_analysis_memory(conn) + + # 2. skill_library.db → skills + stats["skill_library"] = self._migrate_skill_library(conn) + + # 3. error_patterns.db → error_patterns + stats["error_patterns"] = self._migrate_error_patterns(conn) + + # 4. audit_log.jsonl → executions + stats["audit_log"] = self._migrate_audit_log(conn) + + # 5. auditAnalysis/*.md → insights + stats["audit_analysis"] = self._migrate_audit_analysis(conn) + + self.set_meta("migration_version", str(_MIGRATION_VERSION)) + self.set_meta("migrated_at", str(time.time())) + + log.info("KnowledgeDB 마이그레이션 완료: %s", stats) + return stats + + def _migrate_analysis_memory(self, conn: sqlite3.Connection) -> int: + legacy_path = Path.home() / ".dartlab" / "analysisMemory.db" + if not legacy_path.exists(): + return 0 + count = 0 + try: + legacy = sqlite3.connect(str(legacy_path), timeout=5) + # keyMetrics 컬럼 존재 여부 확인 + cols = [info[1] for info in legacy.execute("PRAGMA table_info(analysis)").fetchall()] + has_metrics = "keyMetrics" in cols + + if has_metrics: + rows = legacy.execute( + "SELECT stockCode, question, questionType, resultSummary, timestamp, grade, keyMetrics " + "FROM analysis ORDER BY timestamp" + ).fetchall() + else: + rows = legacy.execute( + "SELECT stockCode, question, questionType, resultSummary, timestamp, grade " + "FROM analysis ORDER BY timestamp" + ).fetchall() + + for r in rows: + conn.execute( + "INSERT INTO executions " + "(stock_code, question, question_type, mode, result_summary, grade, key_metrics, created_at) " + "VALUES (?, ?, ?, 'analysis', ?, ?, ?, ?)", + (r[0], r[1], r[2] or "", r[3] or "", r[5] or "", r[6] if has_metrics and len(r) > 6 else "", r[4]), + ) + count += 1 + conn.commit() + legacy.close() + except (sqlite3.OperationalError, OSError) as e: + log.warning("analysisMemory 마이그레이션 실패: %s", e) + return count + + def _migrate_skill_library(self, conn: sqlite3.Connection) -> int: + legacy_path = Path.home() / ".dartlab" / "selfai" / "skill_library.db" + if not legacy_path.exists(): + return 0 + count = 0 + try: + legacy = sqlite3.connect(str(legacy_path), timeout=5) + rows = legacy.execute( + "SELECT question, category, tools_used, code_template, result_keys, " + "success_count, quality_score, created_at, last_used FROM skill" + ).fetchall() + for r in rows: + conn.execute( + "INSERT INTO skills " + "(question, category, tools_used, code_template, result_keys, " + "success_count, quality_score, mode, created_at, last_used) " + "VALUES (?, ?, ?, ?, ?, ?, ?, 'analysis', ?, ?)", + r, + ) + count += 1 + conn.commit() + legacy.close() + except (sqlite3.OperationalError, OSError) as e: + log.warning("skill_library 마이그레이션 실패: %s", e) + return count + + def _migrate_error_patterns(self, conn: sqlite3.Connection) -> int: + legacy_path = Path.home() / ".dartlab" / "selfai" / "error_patterns.db" + if not legacy_path.exists(): + return 0 + count = 0 + try: + legacy = sqlite3.connect(str(legacy_path), timeout=5) + rows = legacy.execute( + "SELECT error_type, error_signature, wrong_code, correct_code, " + "tool_name, frequency, last_seen FROM error_pattern" + ).fetchall() + for r in rows: + conn.execute( + "INSERT INTO error_patterns " + "(error_type, error_signature, wrong_code, correct_code, " + "tool_name, frequency, last_seen) VALUES (?, ?, ?, ?, ?, ?, ?)", + r, + ) + count += 1 + conn.commit() + legacy.close() + except (sqlite3.OperationalError, OSError) as e: + log.warning("error_patterns 마이그레이션 실패: %s", e) + return count + + def _migrate_audit_log(self, conn: sqlite3.Connection) -> int: + audit_dir = Path(__file__).resolve().parents[4] / "data" / "dart" / "auditAi" + log_path = audit_dir / "audit_log.jsonl" + if not log_path.exists(): + return 0 + count = 0 + try: + for line in log_path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line: + continue + try: + rec = json.loads(line) + except json.JSONDecodeError: + continue + conn.execute( + "INSERT INTO executions " + "(stock_code, question, question_type, mode, has_error, " + "duration_sec, code_rounds, created_at) " + "VALUES (?, ?, ?, 'analysis', ?, ?, ?, ?)", + ( + rec.get("stock"), + rec.get("question", "")[:200], + rec.get("id", ""), + int(not rec.get("passed", True)), + rec.get("duration"), + rec.get("codeRounds", 0), + _parse_iso_timestamp(rec.get("date", "")), + ), + ) + count += 1 + conn.commit() + except OSError as e: + log.warning("audit_log 마이그레이션 실패: %s", e) + return count + + def _migrate_audit_analysis(self, conn: sqlite3.Connection) -> int: + audit_dir = Path(__file__).resolve().parents[4] / "data" / "dart" / "auditAnalysis" + if not audit_dir.exists(): + return 0 + count = 0 + for md_path in sorted(audit_dir.glob("*.md")): + try: + parsed = _parse_audit_markdown(md_path) + if parsed: + conn.execute( + "INSERT INTO insights " + "(stock_code, narrative, strengths, weaknesses, sector, source, " + "created_at, expires_at) " + "VALUES (?, ?, ?, ?, ?, 'audit', ?, ?)", + ( + parsed["stock_code"], + parsed["narrative"], + json.dumps(parsed["strengths"], ensure_ascii=False), + json.dumps(parsed["weaknesses"], ensure_ascii=False), + parsed["sector"], + time.time(), + time.time() + 90 * 86400, + ), + ) + count += 1 + except (OSError, ValueError) as e: + log.debug("auditAnalysis 파싱 실패 %s: %s", md_path.name, e) + conn.commit() + return count + + # ── 통계 ─────────────────────────────────────────────── + + def stats(self) -> dict[str, int]: + conn = self._ensure_db() + result = {} + for table in ("executions", "skills", "error_patterns", "insights"): + row = conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone() # noqa: S608 + result[table] = row[0] if row else 0 + return result + + # ── HuggingFace 동기화 ──────────────────────────────── + + def _knowledge_dir(self) -> Path: + """공유 JSON 저장 경로: data/ai/knowledge/.""" + try: + from dartlab import config as _cfg + + base = Path(_cfg.dataDir) + except (ImportError, AttributeError): + base = Path(__file__).resolve().parents[4] / "data" + out = base / "ai" / "knowledge" + out.mkdir(parents=True, exist_ok=True) + return out + + def export_shared(self) -> Path: + """공유 가능한 테이블을 data/ai/knowledge/에 JSON으로 export. + + executions(개인 질문 이력)는 프라이버시 보호를 위해 제외. + + Returns: + export 디렉토리 경로 + """ + conn = self._ensure_db() + out = self._knowledge_dir() + + # insights + rows = conn.execute( + "SELECT stock_code, narrative, strengths, weaknesses, sector, source, created_at, expires_at FROM insights" + ).fetchall() + insights_data = [ + { + "stock_code": r[0], + "narrative": r[1], + "strengths": r[2], + "weaknesses": r[3], + "sector": r[4], + "source": r[5], + "created_at": r[6], + "expires_at": r[7], + } + for r in rows + ] + (out / "insights.json").write_text(json.dumps(insights_data, ensure_ascii=False, indent=2), encoding="utf-8") + + # skills + rows = conn.execute( + "SELECT question, category, tools_used, code_template, result_keys, " + "success_count, quality_score, mode, created_at, last_used FROM skills" + ).fetchall() + skills_data = [ + { + "question": r[0], + "category": r[1], + "tools_used": r[2], + "code_template": r[3], + "result_keys": r[4], + "success_count": r[5], + "quality_score": r[6], + "mode": r[7], + "created_at": r[8], + "last_used": r[9], + } + for r in rows + ] + (out / "skills.json").write_text(json.dumps(skills_data, ensure_ascii=False, indent=2), encoding="utf-8") + + # error_patterns + rows = conn.execute( + "SELECT error_type, error_signature, wrong_code, correct_code, " + "tool_name, frequency, last_seen FROM error_patterns" + ).fetchall() + errors_data = [ + { + "error_type": r[0], + "error_signature": r[1], + "wrong_code": r[2], + "correct_code": r[3], + "tool_name": r[4], + "frequency": r[5], + "last_seen": r[6], + } + for r in rows + ] + (out / "error_patterns.json").write_text( + json.dumps(errors_data, ensure_ascii=False, indent=2), encoding="utf-8" + ) + + # meta + meta_data = { + "version": _MIGRATION_VERSION, + "exported_at": time.time(), + "stats": { + "insights": len(insights_data), + "skills": len(skills_data), + "error_patterns": len(errors_data), + }, + } + (out / "meta.json").write_text(json.dumps(meta_data, ensure_ascii=False, indent=2), encoding="utf-8") + + log.info( + "export 완료 → %s (insights=%d, skills=%d, errors=%d)", + out, + len(insights_data), + len(skills_data), + len(errors_data), + ) + return out + + def push(self, token: str | None = None) -> str: + """data/ai/knowledge/에 export 후 HF에 업로드. + + 경로: data/ai/knowledge/ → HF ai/knowledge/ + git에는 안 감 (data/ .gitignore), HF로만 동기화. + + Args: + token: HuggingFace API 토큰 (없으면 .env에서 로드) + + Returns: + HF URL + """ + from huggingface_hub import HfApi + + from dartlab.core.dataConfig import DATA_RELEASES, HF_REPO + + if token is None: + token = _load_hf_token() + + # 1. DB → data/ai/knowledge/*.json export + out = self.export_shared() + + # 2. data/ai/knowledge/ → HF upload + hf_dir = DATA_RELEASES["aiKnowledge"]["dir"] + api = HfApi(token=token) + + st = self.stats() + api.upload_folder( + repo_id=HF_REPO, + folder_path=str(out), + path_in_repo=hf_dir, + repo_type="dataset", + commit_message=f"sync aiKnowledge: {st.get('insights', 0)} insights, {st.get('skills', 0)} skills, {st.get('error_patterns', 0)} errors", + ) + + url = f"https://huggingface.co/datasets/{HF_REPO}/tree/main/{hf_dir}" + log.info("KnowledgeDB push 완료: %s", url) + return url + + def pull(self, token: str | None = None, *, force: bool = False) -> dict[str, int]: + """HF → data/ai/knowledge/ 다운로드 → 로컬 DB에 merge. + + 기존 로컬 데이터를 덮어쓰지 않고 upsert한다. + executions는 pull 대상이 아님 (프라이버시). + + Args: + token: HuggingFace API 토큰 + force: True면 기존 데이터가 있어도 강제 다운로드 + + Returns: + 테이블별 merge된 레코드 수 + """ + from huggingface_hub import hf_hub_download + + from dartlab.core.dataConfig import DATA_RELEASES, HF_REPO + + if token is None: + token = _load_hf_token() + + hf_dir = DATA_RELEASES["aiKnowledge"]["dir"] + out = self._knowledge_dir() + conn = self._ensure_db() + merge_stats: dict[str, int] = {} + + # 1. HF → data/ai/knowledge/ 다운로드 + for filename in ("insights.json", "skills.json", "error_patterns.json", "meta.json"): + try: + hf_hub_download( + repo_id=HF_REPO, + filename=f"{hf_dir}/{filename}", + repo_type="dataset", + local_dir=str(out.parent.parent), # data/ 기준 + token=token, + force_download=force, + ) + except (OSError, ValueError) as e: + log.warning("HF 다운로드 실패 (%s): %s", filename, e) + + # 2. data/ai/knowledge/*.json → DB merge + merge_stats["insights"] = self._merge_json_to_insights(out / "insights.json", conn) + merge_stats["skills"] = self._merge_json_to_skills(out / "skills.json", conn) + merge_stats["error_patterns"] = self._merge_json_to_errors(out / "error_patterns.json", conn) + + log.info("KnowledgeDB pull 완료: %s", merge_stats) + return merge_stats + + def _merge_json_to_insights(self, path: Path, conn: sqlite3.Connection) -> int: + if not path.exists(): + return 0 + data = json.loads(path.read_text(encoding="utf-8")) + count = 0 + for r in data: + existing = conn.execute( + "SELECT id FROM insights WHERE stock_code = ? AND source = ?", + (r["stock_code"], r.get("source", "audit")), + ).fetchone() + if not existing: + conn.execute( + "INSERT INTO insights " + "(stock_code, narrative, strengths, weaknesses, sector, source, created_at, expires_at) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + ( + r["stock_code"], + r["narrative"], + r.get("strengths", "[]"), + r.get("weaknesses", "[]"), + r.get("sector", ""), + r.get("source", "audit"), + r.get("created_at", time.time()), + r.get("expires_at"), + ), + ) + count += 1 + conn.commit() + return count + + def _merge_json_to_skills(self, path: Path, conn: sqlite3.Connection) -> int: + if not path.exists(): + return 0 + data = json.loads(path.read_text(encoding="utf-8")) + count = 0 + for r in data: + existing = conn.execute( + "SELECT id FROM skills WHERE question = ? AND mode = ?", + (r["question"], r.get("mode", "analysis")), + ).fetchone() + if not existing: + conn.execute( + "INSERT INTO skills " + "(question, category, tools_used, code_template, result_keys, " + "success_count, quality_score, mode, created_at, last_used) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + r["question"], + r.get("category", "general"), + r.get("tools_used", "[]"), + r["code_template"], + r.get("result_keys", "[]"), + r.get("success_count", 1), + r.get("quality_score", 0.8), + r.get("mode", "analysis"), + r.get("created_at", time.time()), + r.get("last_used", time.time()), + ), + ) + count += 1 + conn.commit() + return count + + def _merge_json_to_errors(self, path: Path, conn: sqlite3.Connection) -> int: + if not path.exists(): + return 0 + data = json.loads(path.read_text(encoding="utf-8")) + count = 0 + for r in data: + existing = conn.execute( + "SELECT id, frequency FROM error_patterns WHERE error_signature = ? AND wrong_code = ?", + (r["error_signature"], r.get("wrong_code", "")), + ).fetchone() + if not existing: + conn.execute( + "INSERT INTO error_patterns " + "(error_type, error_signature, wrong_code, correct_code, " + "tool_name, frequency, last_seen) " + "VALUES (?, ?, ?, ?, ?, ?, ?)", + ( + r["error_type"], + r["error_signature"], + r.get("wrong_code", ""), + r.get("correct_code", ""), + r.get("tool_name", ""), + r.get("frequency", 1), + r.get("last_seen", time.time()), + ), + ) + count += 1 + elif existing and r.get("correct_code"): + conn.execute( + "UPDATE error_patterns SET correct_code = ?, frequency = MAX(frequency, ?) WHERE id = ?", + (r["correct_code"], r.get("frequency", 1), existing[0]), + ) + conn.commit() + return count + + # ── 싱글턴 ───────────────────────────────────────────── + + def _auto_pull(self) -> None: + """DB가 비어있으면 data/ai/knowledge/*.json에서 자동 merge. + + 1순위: 로컬 data/ai/knowledge/*.json (HF 호출 없이) + 2순위: HF에서 pull (실패해도 무시) + """ + current = self.stats() + if current.get("insights", 0) > 0 or current.get("skills", 0) > 0: + return # 이미 데이터가 있으면 skip + + conn = self._ensure_db() + + # 1순위: 로컬 data/ JSON에서 merge + local_dir = self._knowledge_dir() + local_insights = local_dir / "insights.json" + if local_insights.exists(): + merged = 0 + merged += self._merge_json_to_insights(local_insights, conn) + merged += self._merge_json_to_skills(local_dir / "skills.json", conn) + merged += self._merge_json_to_errors(local_dir / "error_patterns.json", conn) + if merged > 0: + log.info("auto-pull: 로컬 JSON에서 %d건 merge", merged) + return + + # 2순위: HF pull (실패해도 무시) + try: + result = self.pull() + if sum(result.values()) > 0: + log.info("auto-pull: HF에서 %s merge", result) + except (ImportError, OSError, ValueError) as e: + log.debug("auto-pull HF 실패 (무시): %s", e) + + @classmethod + def get(cls) -> KnowledgeDB: + global _instance + if _instance is None: + _instance = cls() + # 최초 접근 시 자동 마이그레이션 + try: + _instance.migrate_from_legacy() + except (sqlite3.OperationalError, OSError) as e: + log.warning("KnowledgeDB 마이그레이션 실패 (무시): %s", e) + # 마이그레이션 후 DB가 비어있으면 자동 pull + try: + _instance._auto_pull() + except (sqlite3.OperationalError, OSError) as e: + log.debug("auto-pull 실패 (무시): %s", e) + return _instance + + +# ── 유틸리티 ─────────────────────────────────────────────── + + +def _load_hf_token() -> str | None: + """HF 토큰을 .env 또는 환경변수에서 로드.""" + import os + + token = os.environ.get("HF_TOKEN") + if token: + return token + try: + from dotenv import load_dotenv + + load_dotenv() + return os.environ.get("HF_TOKEN") + except ImportError: + return None + + +def _parse_iso_timestamp(iso_str: str) -> float: + """ISO 타임스탬프를 Unix timestamp로 변환.""" + if not iso_str: + return time.time() + try: + import datetime + + dt = datetime.datetime.fromisoformat(iso_str) + return dt.timestamp() + except (ValueError, TypeError): + return time.time() + + +def _parse_audit_markdown(path: Path) -> dict | None: + """auditAnalysis 마크다운 파일에서 인사이트를 추출. + + 구조: + - [+] 강점 + - [-] 약점 + - > **재무 순환 서사** ... (narrative) + - 섹터: ... + """ + stock_code = path.stem + if not re.match(r"\d{6}", stock_code): + return None + + text = path.read_text(encoding="utf-8") + if len(text) < 100: + return None + + # 강점/약점 추출 + strengths = re.findall(r"\[\+\]\s*(.+)", text) + weaknesses = re.findall(r"\[-\]\s*(.+)", text) + + # 서사 추출 (재무 순환 서사 블록) + narrative = "" + narrative_match = re.search( + r">\s*\*\*재무 순환 서사\*\*\s*\n((?:>\s*.+\n?)+)", + text, + ) + if narrative_match: + raw = narrative_match.group(1) + narrative = re.sub(r"^>\s*", "", raw, flags=re.MULTILINE).strip() + + if not narrative: + # fallback: 첫 번째 [+] 라인을 서사로 + if strengths: + narrative = strengths[0] + else: + return None + + # 섹터 추출 + sector = "" + sector_match = re.search(r"섹터:\s*(.+?)(?:\s*\||$)", text) + if sector_match: + sector = sector_match.group(1).strip() + + return { + "stock_code": stock_code, + "narrative": narrative[:_MAX_INSIGHT_NARRATIVE], + "strengths": strengths[:10], + "weaknesses": weaknesses[:10], + "sector": sector, + } diff --git a/src/dartlab/ai/providers/__init__.py b/src/dartlab/ai/providers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cbe7bbed326e768c08a534348e5807ed344d9c5d --- /dev/null +++ b/src/dartlab/ai/providers/__init__.py @@ -0,0 +1,43 @@ +"""Provider 레지스트리 및 팩토리.""" + +from __future__ import annotations + +import importlib +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from dartlab.ai.providers.base import BaseProvider + from dartlab.ai.types import LLMConfig + +_PROVIDER_MAP: dict[str, str] = { + "openai": "dartlab.ai.providers.openai_compat.OpenAICompatProvider", + "ollama": "dartlab.ai.providers.ollama.OllamaProvider", + "custom": "dartlab.ai.providers.openai_compat.OpenAICompatProvider", + "codex": "dartlab.ai.providers.codex.CodexProvider", + "oauth-codex": "dartlab.ai.providers.oauth_codex.OAuthCodexProvider", + "gemini": "dartlab.ai.providers.gemini.GeminiProvider", + "groq": "dartlab.ai.providers.openai_compat.OpenAICompatProvider", + "cerebras": "dartlab.ai.providers.openai_compat.OpenAICompatProvider", + "mistral": "dartlab.ai.providers.openai_compat.OpenAICompatProvider", +} + + +def create_provider(config: "LLMConfig") -> "BaseProvider": + """LLMConfig로부터 적절한 provider 인스턴스 생성.""" + class_path = _PROVIDER_MAP.get(config.provider) + if class_path is None: + raise ValueError(f"지원하지 않는 provider: '{config.provider}'. 지원: {list(_PROVIDER_MAP.keys())}") + module_path, class_name = class_path.rsplit(".", 1) + mod = importlib.import_module(module_path) + cls = getattr(mod, class_name) + return cls(config) + + +def register_provider(name: str, class_path: str) -> None: + """새 provider 등록 (확장용).""" + _PROVIDER_MAP[name] = class_path + + +def available_providers() -> list[str]: + """등록된 provider 이름 목록.""" + return list(_PROVIDER_MAP.keys()) diff --git a/src/dartlab/ai/providers/__pycache__/__init__.cpython-312.pyc b/src/dartlab/ai/providers/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9909545779c38513f1b560e3872d2e55a69eab5d Binary files /dev/null and b/src/dartlab/ai/providers/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/__pycache__/__init__.cpython-313.pyc b/src/dartlab/ai/providers/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..50aa1fc65c8ca0ebbeb0080190004eddf2181040 Binary files /dev/null and b/src/dartlab/ai/providers/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/ai/providers/__pycache__/base.cpython-312.pyc b/src/dartlab/ai/providers/__pycache__/base.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6be0f9421a17890302de565243edabb35e4b9498 Binary files /dev/null and b/src/dartlab/ai/providers/__pycache__/base.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/__pycache__/base.cpython-313.pyc b/src/dartlab/ai/providers/__pycache__/base.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b52e8658f2b0dbce4feca30b52da3c45ec62a65b Binary files /dev/null and b/src/dartlab/ai/providers/__pycache__/base.cpython-313.pyc differ diff --git a/src/dartlab/ai/providers/__pycache__/claude.cpython-312.pyc b/src/dartlab/ai/providers/__pycache__/claude.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c65c663e223f5aec5a2472e7bfd1e61fe1aae04e Binary files /dev/null and b/src/dartlab/ai/providers/__pycache__/claude.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/__pycache__/claude_code.cpython-312.pyc b/src/dartlab/ai/providers/__pycache__/claude_code.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a51acc1d42ff3fa85f96ab0ff82e3c653c72ab88 Binary files /dev/null and b/src/dartlab/ai/providers/__pycache__/claude_code.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/__pycache__/codex.cpython-312.pyc b/src/dartlab/ai/providers/__pycache__/codex.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d50ddd90a8c18cbb15280d2d37b0dc04a0a3f3b0 Binary files /dev/null and b/src/dartlab/ai/providers/__pycache__/codex.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/__pycache__/fallback.cpython-312.pyc b/src/dartlab/ai/providers/__pycache__/fallback.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1ae4802c3df3a4b122b36db9323c9c5a81474a32 Binary files /dev/null and b/src/dartlab/ai/providers/__pycache__/fallback.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/__pycache__/gemini.cpython-312.pyc b/src/dartlab/ai/providers/__pycache__/gemini.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..78be81b5085d4fa1dcb88fa6c40b540dd64402ad Binary files /dev/null and b/src/dartlab/ai/providers/__pycache__/gemini.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/__pycache__/oauth_codex.cpython-312.pyc b/src/dartlab/ai/providers/__pycache__/oauth_codex.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fac7bcbd80cb08c3af663da6f6c3a2a5115a5a26 Binary files /dev/null and b/src/dartlab/ai/providers/__pycache__/oauth_codex.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/__pycache__/oauth_codex.cpython-313.pyc b/src/dartlab/ai/providers/__pycache__/oauth_codex.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3eb4ddfa94ae6590cafb15452e2a2950849c4b1f Binary files /dev/null and b/src/dartlab/ai/providers/__pycache__/oauth_codex.cpython-313.pyc differ diff --git a/src/dartlab/ai/providers/__pycache__/ollama.cpython-312.pyc b/src/dartlab/ai/providers/__pycache__/ollama.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12b643ec5aac9081b66b584b196983d97a796371 Binary files /dev/null and b/src/dartlab/ai/providers/__pycache__/ollama.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/__pycache__/openai_compat.cpython-312.pyc b/src/dartlab/ai/providers/__pycache__/openai_compat.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..107763ca3791675c333537f33a381047ffc05ab5 Binary files /dev/null and b/src/dartlab/ai/providers/__pycache__/openai_compat.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/base.py b/src/dartlab/ai/providers/base.py new file mode 100644 index 0000000000000000000000000000000000000000..07f870c27aa15cba008a6efce5be2586cf071c3b --- /dev/null +++ b/src/dartlab/ai/providers/base.py @@ -0,0 +1,105 @@ +"""LLM provider 추상 베이스.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Generator + +from dartlab.ai.types import LLMConfig, LLMResponse, ToolResponse + + +class RateLimitError(Exception): + """429 rate limit 에러. fallback 체인에서 다음 프로바이더로 전환 트리거.""" + + def __init__(self, provider: str, message: str = "", retryAfter: float | None = None): + self.provider = provider + self.retryAfter = retryAfter + super().__init__(message or f"{provider}: rate limit 초과") + + +class BaseProvider(ABC): + """모든 LLM provider의 추상 기반.""" + + def __init__(self, config: LLMConfig): + self.config = config + + @abstractmethod + def complete(self, messages: list[dict[str, str]]) -> LLMResponse: + """동기 completion 호출.""" + ... + + @abstractmethod + def stream(self, messages: list[dict[str, str]]) -> Generator[str, None, None]: + """스트리밍 completion (generator).""" + ... + + @abstractmethod + def check_available(self) -> bool: + """provider 접근 가능 여부 확인.""" + ... + + @property + @abstractmethod + def default_model(self) -> str: + """provider 기본 모델명.""" + ... + + @property + def resolved_model(self) -> str: + """사용자 설정 모델 또는 기본 모델명 반환.""" + return self.config.model or self.default_model + + @property + def supports_native_tools(self) -> bool: + """이 provider가 네이티브 tool calling을 지원하는지.""" + return False + + @property + def supports_cache_control(self) -> bool: + """시스템 프롬프트 캐시 경계(cache_control)를 지원하는지.""" + return False + + def complete_with_tools( + self, + messages: list[dict], + tools: list[dict], + ) -> ToolResponse: + """도구 사용 가능한 completion. 미지원 provider는 fallback.""" + response = self.complete(messages) + return ToolResponse( + answer=response.answer, + provider=response.provider, + model=response.model, + usage=response.usage, + context_tables=response.context_tables, + ) + + def format_tool_result(self, tool_call_id: str, result: str) -> dict: + """도구 실행 결과를 메시지로 변환 (OpenAI 형식 기본).""" + return { + "role": "tool", + "tool_call_id": tool_call_id, + "content": result, + } + + def format_assistant_tool_calls( + self, + answer: str | None, + tool_calls: list, + ) -> dict: + """assistant 메시지에 tool_calls를 포함 (OpenAI 형식 기본).""" + import json + + msg: dict = {"role": "assistant", "content": answer} + msg["tool_calls"] = [ + { + "id": tc.id, + "type": "function", + "function": { + "name": tc.name, + "arguments": json.dumps(tc.arguments, ensure_ascii=False), + }, + } + for tc in tool_calls + ] + return msg diff --git a/src/dartlab/ai/providers/claude.py b/src/dartlab/ai/providers/claude.py new file mode 100644 index 0000000000000000000000000000000000000000..bca93f289d5725289de59811621da63f47e6d5fe --- /dev/null +++ b/src/dartlab/ai/providers/claude.py @@ -0,0 +1,390 @@ +"""Claude / Anthropic provider.""" + +from __future__ import annotations + +import json +from typing import Generator + +from dartlab.ai.providers.base import BaseProvider +from dartlab.ai.types import LLMConfig, LLMResponse, ToolCall, ToolResponse + +_CLAUDE_PROVIDER_ERRORS = (ImportError, OSError, RuntimeError, TypeError, ValueError) + + +def _openai_tools_to_anthropic(tools: list[dict]) -> list[dict]: + """OpenAI function calling 스키마 → Anthropic tool 스키마 변환. + + OpenAI: [{"type": "function", "function": {"name": ..., "description": ..., "parameters": ...}}] + Anthropic: [{"name": ..., "description": ..., "input_schema": ...}] + """ + result = [] + for t in tools: + fn = t.get("function", t) + result.append( + { + "name": fn["name"], + "description": fn.get("description", ""), + "input_schema": fn.get("parameters", {"type": "object", "properties": {}}), + } + ) + return result + + +def _split_system_and_user(messages: list[dict]) -> tuple[str | list[dict], list[dict]]: + """system 메시지를 분리하고 Anthropic 형식 user 메시지 리스트 반환. + + system이 cache_control 블록 리스트면 그대로 반환 (prompt caching용). + """ + system_msg: str | list[dict] = "" + user_messages = [] + for m in messages: + if m["role"] == "system": + content = m["content"] + # cache_control 블록 리스트면 그대로 사용 (Anthropic prompt caching) + if isinstance(content, list): + system_msg = content + else: + system_msg = content + else: + user_messages.append(m) + return system_msg, user_messages + + +class ClaudeProvider(BaseProvider): + """Anthropic SDK 기반 provider. + + base_url 있으면 OpenAI 호환 모드 (프록시/CLIProxyAPI). + 없으면 anthropic SDK 네이티브 모드. + """ + + def __init__(self, config: LLMConfig): + super().__init__(config) + self._client = None + self._use_openai_compat = False + + @property + def default_model(self) -> str: + """기본 모델명.""" + return "claude-sonnet-4-6" + + @property + def supports_native_tools(self) -> bool: + """네이티브 tool calling 지원 여부.""" + return True + + @property + def supports_cache_control(self) -> bool: + """Anthropic 네이티브 모드에서 프롬프트 캐싱 지원.""" + return not self._use_openai_compat + + def _get_client(self): + if self._client is not None: + return self._client + + if self.config.base_url: + # OpenAI 호환 모드 (프록시) + try: + from openai import OpenAI + except ImportError: + raise ImportError("openai 패키지가 필요합니다 (프록시 모드).\n pip install --upgrade dartlab") + self._client = OpenAI( + api_key=self.config.api_key or "proxy", + base_url=self.config.base_url, + ) + self._use_openai_compat = True + else: + # Anthropic 네이티브 모드 + try: + from anthropic import Anthropic + except ImportError: + raise ImportError( + "anthropic 패키지가 필요합니다.\n" + " pip install --upgrade dartlab\n\n" + "프록시를 사용하려면 base_url을 설정하세요:\n" + " dartlab.llm.configure(provider='claude', base_url='http://...')" + ) + kwargs = {} + if self.config.api_key: + kwargs["api_key"] = self.config.api_key + self._client = Anthropic(**kwargs) + self._use_openai_compat = False + + return self._client + + def check_available(self) -> bool: + """provider 사용 가능 여부 확인.""" + try: + self._get_client() + return True + except _CLAUDE_PROVIDER_ERRORS: + return False + + def complete(self, messages: list[dict[str, str]]) -> LLMResponse: + """동기 완료 요청.""" + client = self._get_client() + + if self._use_openai_compat: + response = client.chat.completions.create( + model=self.resolved_model, + messages=messages, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + ) + usage = None + if response.usage: + usage = { + "prompt_tokens": response.usage.prompt_tokens, + "completion_tokens": response.usage.completion_tokens, + "total_tokens": response.usage.total_tokens, + } + return LLMResponse( + answer=response.choices[0].message.content or "", + provider="claude", + model=response.model, + usage=usage, + ) + else: + # Anthropic 네이티브 + system_msg, user_messages = _split_system_and_user(messages) + response = client.messages.create( + model=self.resolved_model, + system=system_msg, + messages=user_messages, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + ) + return LLMResponse( + answer=response.content[0].text, + provider="claude", + model=response.model, + usage={ + "prompt_tokens": response.usage.input_tokens, + "completion_tokens": response.usage.output_tokens, + "total_tokens": response.usage.input_tokens + response.usage.output_tokens, + }, + ) + + def stream(self, messages: list[dict[str, str]]) -> Generator[str, None, None]: + """스트리밍 응답 생성.""" + client = self._get_client() + + if self._use_openai_compat: + stream = client.chat.completions.create( + model=self.resolved_model, + messages=messages, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + stream=True, + ) + for chunk in stream: + if chunk.choices and chunk.choices[0].delta.content: + yield chunk.choices[0].delta.content + else: + # Anthropic 네이티브 스트리밍 + system_msg, user_messages = _split_system_and_user(messages) + with client.messages.stream( + model=self.resolved_model, + system=system_msg, + messages=user_messages, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + ) as stream: + for text in stream.text_stream: + yield text + + def complete_with_tools( + self, + messages: list[dict], + tools: list[dict], + ) -> ToolResponse: + """Claude tool calling — OpenAI 호환 + Anthropic 네이티브 모두 지원.""" + client = self._get_client() + + if self._use_openai_compat: + kwargs: dict = { + "model": self.resolved_model, + "messages": messages, + "temperature": self.config.temperature, + "max_tokens": self.config.max_tokens, + } + if tools: + kwargs["tools"] = tools + + response = client.chat.completions.create(**kwargs) + choice = response.choices[0] + + usage = None + if response.usage: + usage = { + "prompt_tokens": response.usage.prompt_tokens, + "completion_tokens": response.usage.completion_tokens, + "total_tokens": response.usage.total_tokens, + } + + tool_calls = [] + if choice.message.tool_calls: + for tc in choice.message.tool_calls: + tool_calls.append( + ToolCall( + id=tc.id, + name=tc.function.name, + arguments=json.loads(tc.function.arguments), + ) + ) + + return ToolResponse( + answer=choice.message.content or "", + provider="claude", + model=response.model, + tool_calls=tool_calls, + finish_reason=choice.finish_reason or "stop", + usage=usage, + ) + else: + # Anthropic 네이티브 tool use + return self._complete_with_tools_native(client, messages, tools) + + def _complete_with_tools_native( + self, + client, + messages: list[dict], + tools: list[dict], + ) -> ToolResponse: + """Anthropic 네이티브 tool_use 블록 직접 지원.""" + system_msg, user_messages = _split_system_and_user(messages) + + # Anthropic 메시지 형식으로 변환 + anthropic_messages = self._to_anthropic_messages(user_messages) + + kwargs: dict = { + "model": self.resolved_model, + "system": system_msg, + "messages": anthropic_messages, + "temperature": self.config.temperature, + "max_tokens": self.config.max_tokens, + } + if tools: + kwargs["tools"] = _openai_tools_to_anthropic(tools) + + response = client.messages.create(**kwargs) + + # response.content에서 text와 tool_use 블록 분리 + answer = "" + tool_calls = [] + for block in response.content: + if block.type == "text": + answer += block.text + elif block.type == "tool_use": + tool_calls.append( + ToolCall( + id=block.id, + name=block.name, + arguments=block.input if isinstance(block.input, dict) else {}, + ) + ) + + finish_reason = "tool_use" if response.stop_reason == "tool_use" else "stop" + + return ToolResponse( + answer=answer, + provider="claude", + model=response.model, + tool_calls=tool_calls, + finish_reason=finish_reason, + usage={ + "prompt_tokens": response.usage.input_tokens, + "completion_tokens": response.usage.output_tokens, + "total_tokens": response.usage.input_tokens + response.usage.output_tokens, + }, + ) + + def _to_anthropic_messages(self, messages: list[dict]) -> list[dict]: + """OpenAI 형식 메시지를 Anthropic 형식으로 변환. + + - assistant + tool_calls → assistant content with tool_use blocks + - tool role → user content with tool_result blocks + """ + result = [] + i = 0 + while i < len(messages): + m = messages[i] + role = m.get("role", "user") + + if role == "assistant" and "tool_calls" in m: + # assistant tool_calls → Anthropic content 블록 + content = [] + if m.get("content"): + content.append({"type": "text", "text": m["content"]}) + for tc in m["tool_calls"]: + fn = tc.get("function", tc) + args = fn.get("arguments", "{}") + if isinstance(args, str): + args = json.loads(args) + content.append( + { + "type": "tool_use", + "id": tc.get("id", fn.get("name", "")), + "name": fn.get("name", tc.get("name", "")), + "input": args, + } + ) + result.append({"role": "assistant", "content": content}) + i += 1 + + elif role == "tool": + # tool results → Anthropic user message with tool_result blocks + tool_results = [] + while i < len(messages) and messages[i].get("role") == "tool": + tm = messages[i] + tool_results.append( + { + "type": "tool_result", + "tool_use_id": tm.get("tool_call_id", ""), + "content": tm.get("content", ""), + } + ) + i += 1 + result.append({"role": "user", "content": tool_results}) + + else: + # user/assistant 일반 메시지 + content = m.get("content", "") + if isinstance(content, list): + result.append({"role": role, "content": content}) + else: + result.append({"role": role, "content": content or ""}) + i += 1 + + return result + + def format_tool_result(self, tool_call_id: str, result: str) -> dict: + """Anthropic 네이티브: tool_result 블록 형식.""" + if self._use_openai_compat: + return super().format_tool_result(tool_call_id, result) + return { + "role": "tool", + "tool_call_id": tool_call_id, + "content": result, + } + + def format_assistant_tool_calls( + self, + answer: str | None, + tool_calls: list, + ) -> dict: + """Anthropic 네이티브: tool_use 블록 형식.""" + if self._use_openai_compat: + return super().format_assistant_tool_calls(answer, tool_calls) + content = [] + if answer: + content.append({"type": "text", "text": answer}) + for tc in tool_calls: + content.append( + { + "type": "tool_use", + "id": tc.id, + "name": tc.name, + "input": tc.arguments, + } + ) + return {"role": "assistant", "content": content} diff --git a/src/dartlab/ai/providers/claude_code.py b/src/dartlab/ai/providers/claude_code.py new file mode 100644 index 0000000000000000000000000000000000000000..6e858c867b0365b885e17a77dd75f0fbc03bf4c6 --- /dev/null +++ b/src/dartlab/ai/providers/claude_code.py @@ -0,0 +1,411 @@ +"""Claude Code CLI provider — subprocess로 claude CLI 호출. + +Claude Pro/Max 구독 사용자가 API 키 없이 LLM을 사용할 수 있다. +사전 조건: claude CLI 설치 + claude auth login 완료. + +VSCode 내부 등 중첩 세션 환경에서는 CLI가 hang되므로 +Anthropic SDK fallback을 자동으로 사용한다. +""" + +from __future__ import annotations + +import json +import os +import shutil +import subprocess +from typing import Generator + +from dartlab.ai.providers.base import BaseProvider +from dartlab.ai.types import LLMConfig, LLMResponse + + +def _claude_executable() -> str: + """Windows에서 shell=True 없이 실행 가능한 claude 경로를 반환한다. + + shutil.which는 Windows에서 .cmd/.bat 확장자를 자동 해석하므로 + 반환된 절대경로를 직접 사용하면 shell=False로도 실행 가능하다. + """ + path = shutil.which("claude") + return path if path else "claude" + + +# CLI 별칭 → Anthropic SDK 모델ID 매핑 +_ALIAS_TO_MODEL = { + "opus": "claude-opus-4-6-20250616", + "sonnet": "claude-sonnet-4-6-20250616", + "haiku": "claude-haiku-4-5-20251001", +} + +# CLI가 hang되는지 빠르게 판별하는 타임아웃 (초) +_CLI_PROBE_TIMEOUT = 8 +_CLAUDE_CODE_SDK_ERRORS = (json.JSONDecodeError, OSError, subprocess.TimeoutExpired, TypeError, ValueError) + + +class ClaudeCodeProvider(BaseProvider): + """Claude Code CLI 기반 provider. + + CLI 호출이 실패하거나 중첩 세션(VSCode 등)에서 hang되면 + Anthropic SDK로 자동 fallback한다. + """ + + def __init__(self, config: LLMConfig): + super().__init__(config) + # VSCode Claude Code 확장 내부면 CLI가 hang되므로 즉시 SDK fallback + self._use_sdk = bool(os.environ.get("CLAUDECODE")) + self._sdk_client = None + self._api_key_from_cli: str | None = None + + @property + def default_model(self) -> str: + """기본 모델명.""" + return "sonnet" + + def _resolve_sdk_model(self) -> str: + """CLI 별칭을 SDK 모델ID로 변환.""" + model = self.resolved_model + return _ALIAS_TO_MODEL.get(model, model) + + # ── CLI 환경 관리 ── + + def _clean_env(self) -> dict[str, str]: + """중첩 세션 방지 — Claude Code/Electron/VSCode 환경변수 제거.""" + env = os.environ.copy() + for key in list(env.keys()): + if key.startswith("CLAUDE") or key.startswith("claude"): + env.pop(key) + for key in ("ELECTRON_RUN_AS_NODE", "ELECTRON_NO_ASAR"): + env.pop(key, None) + for key in list(env.keys()): + if key.startswith("VSCODE"): + env.pop(key) + return env + + # ── Availability ── + + def check_available(self) -> bool: + """provider 사용 가능 여부 확인.""" + if not shutil.which("claude"): + return False + # VSCode 중첩 세션에서는 SDK fallback 필요 + if self._use_sdk: + # SDK fallback 가능하면 available + return bool(self._get_sdk_api_key()) and self._sdk_importable() + try: + result = subprocess.run( + [_claude_executable(), "auth", "status"], + capture_output=True, + text=True, + timeout=10, + env=self._clean_env(), + ) + return result.returncode == 0 + except (subprocess.TimeoutExpired, OSError): + return False + + @staticmethod + def _sdk_importable() -> bool: + try: + import anthropic # noqa: F401 + + return True + except ImportError: + return False + + def _ensure_available(self) -> None: + if not shutil.which("claude"): + from dartlab.ai.providers.support.cli_setup import get_claude_code_install_guide + + raise FileNotFoundError(f"Claude Code CLI를 찾을 수 없습니다.\n\n{get_claude_code_install_guide()}") + try: + result = subprocess.run( + [_claude_executable(), "auth", "status"], + capture_output=True, + text=True, + timeout=10, + env=self._clean_env(), + ) + if result.returncode != 0: + raise PermissionError( + "Claude Code CLI가 설치되어 있지만 인증이 필요합니다.\n\n" + " claude auth login\n\n" + "Claude Pro/Max 구독 계정으로 로그인하세요." + ) + except (subprocess.TimeoutExpired, OSError): + pass + + # ── SDK Fallback ── + + def _get_sdk_api_key(self) -> str | None: + """claude auth 세션에서 API 키를 추출하거나 환경변수에서 가져온다.""" + if self._api_key_from_cli: + return self._api_key_from_cli + + # 환경변수 우선 + api_key = os.environ.get("ANTHROPIC_API_KEY") + if api_key: + self._api_key_from_cli = api_key + return api_key + + # claude auth status에서 sessionKey 추출 시도 + try: + result = subprocess.run( + [_claude_executable(), "auth", "status", "--json"], + capture_output=True, + text=True, + timeout=10, + env=self._clean_env(), + ) + if result.returncode == 0: + data = json.loads(result.stdout) + # CLI 인증 세션의 API 키 + key = data.get("apiKey") or data.get("sessionKey") + if key: + self._api_key_from_cli = key + return key + except _CLAUDE_CODE_SDK_ERRORS: + pass + + return None + + def _get_sdk_client(self): + """Anthropic SDK 클라이언트 생성.""" + if self._sdk_client is not None: + return self._sdk_client + + try: + from anthropic import Anthropic + except ImportError: + raise ImportError( + "Claude Code CLI가 현재 환경에서 작동하지 않습니다 (VSCode 중첩 세션).\n" + "SDK fallback을 위해 anthropic 패키지가 필요합니다:\n" + " uv add anthropic\n\n" + "또는 별도 터미널에서 dartlab ai를 실행하세요." + ) + + api_key = self._get_sdk_api_key() + if not api_key: + raise RuntimeError( + "Claude Code CLI가 현재 환경에서 작동하지 않습니다 (VSCode 중첩 세션).\n\n" + "해결 방법:\n" + "1. Settings에서 'Anthropic Claude' provider로 전환 후 API 키 입력\n" + "2. 또는 별도 터미널(PowerShell/CMD)에서 dartlab ai 실행" + ) + + self._sdk_client = Anthropic(api_key=api_key) + return self._sdk_client + + def _probe_cli(self) -> bool: + """CLI가 실제로 응답하는지 빠르게 확인. hang되면 False.""" + if self._use_sdk: + return False + try: + result = subprocess.run( + [ + _claude_executable(), + "-p", + "ping", + "--output-format", + "json", + "--model", + self.resolved_model, + "--max-turns", + "1", + ], + stdin=subprocess.DEVNULL, + capture_output=True, + timeout=_CLI_PROBE_TIMEOUT, + env=self._clean_env(), + ) + return result.returncode == 0 + except (subprocess.TimeoutExpired, OSError): + self._use_sdk = True + return False + + # ── Messages 변환 ── + + def _split_messages(self, messages: list[dict[str, str]]) -> tuple[str, str | None]: + """messages에서 system/user를 분리.""" + system_parts: list[str] = [] + user_parts: list[str] = [] + for m in messages: + if m["role"] == "system": + system_parts.append(m["content"]) + else: + user_parts.append(m["content"]) + system = "\n\n".join(system_parts) if system_parts else None + user = "\n\n".join(user_parts) + return user, system + + def _split_messages_sdk(self, messages: list[dict[str, str]]) -> tuple[str, list[dict]]: + """messages를 Anthropic SDK 포맷으로 분리.""" + system_msg = "" + user_messages = [] + for m in messages: + if m["role"] == "system": + system_msg = m["content"] + else: + user_messages.append(m) + return system_msg, user_messages + + # ── CLI 명령 구성 ── + + def _build_cmd( + self, + prompt: str, + system: str | None, + *, + stream: bool = False, + ) -> list[str]: + fmt = "stream-json" if stream else "json" + cmd = [ + _claude_executable(), + "-p", + prompt, + "--output-format", + fmt, + "--model", + self.resolved_model, + "--max-turns", + "1", + ] + if system: + cmd.extend(["--system-prompt", system]) + return cmd + + # ── SDK complete/stream ── + + def _sdk_complete(self, messages: list[dict[str, str]]) -> LLMResponse: + client = self._get_sdk_client() + system_msg, user_messages = self._split_messages_sdk(messages) + model = self._resolve_sdk_model() + + response = client.messages.create( + model=model, + system=system_msg, + messages=user_messages, + max_tokens=self.config.max_tokens, + ) + return LLMResponse( + answer=response.content[0].text, + provider="claude-code", + model=response.model, + usage={ + "prompt_tokens": response.usage.input_tokens, + "completion_tokens": response.usage.output_tokens, + "total_tokens": response.usage.input_tokens + response.usage.output_tokens, + }, + ) + + def _sdk_stream(self, messages: list[dict[str, str]]) -> Generator[str, None, None]: + client = self._get_sdk_client() + system_msg, user_messages = self._split_messages_sdk(messages) + model = self._resolve_sdk_model() + + with client.messages.stream( + model=model, + system=system_msg, + messages=user_messages, + max_tokens=self.config.max_tokens, + ) as stream: + for text in stream.text_stream: + yield text + + # ── Public API ── + + def complete(self, messages: list[dict[str, str]]) -> LLMResponse: + """동기 완료 요청.""" + self._ensure_available() + + # SDK fallback이 이미 결정된 경우 + if self._use_sdk: + return self._sdk_complete(messages) + + # CLI 시도 + prompt, system = self._split_messages(messages) + cmd = self._build_cmd(prompt, system) + + try: + result = subprocess.run( + cmd, + stdin=subprocess.DEVNULL, + capture_output=True, + timeout=120, + env=self._clean_env(), + ) + except subprocess.TimeoutExpired: + raise TimeoutError("Claude Code CLI 호출이 시간 초과되었습니다.") + + if result.returncode != 0: + raw_err = result.stderr or b"" + stderr = ( + raw_err.decode("utf-8", errors="replace").strip() if isinstance(raw_err, bytes) else raw_err.strip() + ) + raise RuntimeError(f"Claude Code CLI 오류 (exit {result.returncode}):\n{stderr}") + + raw_out = result.stdout or b"" + stdout = raw_out.decode("utf-8", errors="replace") if isinstance(raw_out, bytes) else raw_out + data = json.loads(stdout) + + if data.get("is_error"): + raise RuntimeError(f"Claude Code CLI 오류: {data.get('result', 'unknown')}") + + answer = data.get("result", "") + usage: dict = {} + if data.get("total_cost_usd") is not None: + usage["total_cost_usd"] = data["total_cost_usd"] + if data.get("num_turns") is not None: + usage["num_turns"] = data["num_turns"] + if data.get("duration_ms") is not None: + usage["duration_ms"] = data["duration_ms"] + + return LLMResponse( + answer=answer, + provider="claude-code", + model=self.resolved_model, + usage=usage or None, + ) + + def stream(self, messages: list[dict[str, str]]) -> Generator[str, None, None]: + """스트리밍 응답 생성.""" + self._ensure_available() + + # SDK fallback 모드 + if self._use_sdk: + yield from self._sdk_stream(messages) + return + + # CLI 시도 + prompt, system = self._split_messages(messages) + cmd = self._build_cmd(prompt, system, stream=True) + + proc = subprocess.Popen( + cmd, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self._clean_env(), + ) + + try: + for raw_line in proc.stdout: # type: ignore[union-attr] + line = raw_line.decode("utf-8", errors="replace").strip() + if not line: + continue + try: + event = json.loads(line) + if event.get("type") == "assistant" and "content" in event: + for block in event["content"]: + if block.get("type") == "text": + yield block["text"] + elif event.get("type") == "result": + text = event.get("result", "") + if text: + yield text + except json.JSONDecodeError: + continue + finally: + proc.terminate() + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + proc.kill() diff --git a/src/dartlab/ai/providers/codex.py b/src/dartlab/ai/providers/codex.py new file mode 100644 index 0000000000000000000000000000000000000000..6f652ec06dc3ff29f2f96d7baeefa6ebb0d23f52 --- /dev/null +++ b/src/dartlab/ai/providers/codex.py @@ -0,0 +1,115 @@ +"""OpenAI Codex CLI provider. + +ChatGPT Plus/Pro 구독 사용자가 API 키 없이 LLM을 사용할 수 있다. +사전 조건: codex CLI 설치 + 로그인 완료. +""" + +from __future__ import annotations + +import shutil +from typing import Generator + +from dartlab.ai.providers.base import BaseProvider +from dartlab.ai.providers.support import codex_cli +from dartlab.ai.types import LLMResponse + + +class CodexProvider(BaseProvider): + """OpenAI Codex CLI 기반 provider.""" + + @property + def default_model(self) -> str: + """기본 모델명.""" + return codex_cli.get_codex_configured_model() or "gpt-4.1" + + def check_available(self) -> bool: + """provider 사용 가능 여부 확인.""" + info = codex_cli.inspect_codex_cli() + return bool(info.get("installed") and info.get("authenticated")) + + def _ensure_available(self) -> None: + if not shutil.which("codex"): + from dartlab.ai.providers.support.cli_setup import get_codex_install_guide + + raise FileNotFoundError(f"Codex CLI를 찾을 수 없습니다.\n\n{get_codex_install_guide()}") + + info = codex_cli.inspect_codex_cli() + if not info.get("installed"): + from dartlab.ai.providers.support.cli_setup import get_codex_install_guide + + raise FileNotFoundError(f"Codex CLI를 찾을 수 없습니다.\n\n{get_codex_install_guide()}") + if not info.get("authenticated"): + raise PermissionError( + "Codex CLI가 설치되어 있지만 로그인이 필요합니다.\n\n" + " codex login\n\n" + "ChatGPT 계정으로 로그인한 뒤 다시 시도하세요." + ) + + def _build_prompt(self, messages: list[dict[str, str]]) -> str: + """messages를 단일 프롬프트로 합성.""" + parts: list[str] = [] + for m in messages: + if m["role"] == "system": + parts.insert(0, f"[System Instructions]\n{m['content']}\n") + else: + parts.append(m["content"]) + return "\n\n".join(parts) + + def _select_sandbox(self, messages: list[dict[str, str]]) -> str: + return codex_cli.infer_codex_sandbox(messages) + + def complete(self, messages: list[dict[str, str]]) -> LLMResponse: + """동기 완료 요청.""" + self._ensure_available() + prompt = self._build_prompt(messages) + sandbox = self._select_sandbox(messages) + answer, usage = codex_cli.run_codex_exec( + prompt, + model=self.resolved_model, + sandbox=sandbox, + timeout=300, + ) + + return LLMResponse( + answer=answer, + provider="codex", + model=self.resolved_model, + usage=usage, + ) + + def stream(self, messages: list[dict[str, str]]) -> Generator[str, None, None]: + """스트리밍 응답 생성.""" + self._ensure_available() + prompt = self._build_prompt(messages) + sandbox = self._select_sandbox(messages) + full_text, _usage = codex_cli.run_codex_exec( + prompt, + model=self.resolved_model, + sandbox=sandbox, + timeout=300, + ) + + if full_text: + yield from _simulate_stream(full_text) + + +def _simulate_stream(text: str) -> Generator[str, None, None]: + """전체 텍스트를 문장 단위로 잘라 yield — 타이핑 효과.""" + import re + + chunks = re.split(r"(?<=\n)", text) + for chunk in chunks: + if not chunk: + continue + if len(chunk) > 200: + words = chunk.split(" ") + buf = "" + for w in words: + buf += w + " " + if len(buf) >= 40: + yield buf + buf = "" + if buf: + yield buf + else: + yield chunk diff --git a/src/dartlab/ai/providers/fallback.py b/src/dartlab/ai/providers/fallback.py new file mode 100644 index 0000000000000000000000000000000000000000..388ae84b4adf359f48b81b283abfc05f88d3147d --- /dev/null +++ b/src/dartlab/ai/providers/fallback.py @@ -0,0 +1,108 @@ +"""Rate limit 자동 fallback 체인.""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Generator + +from dartlab.ai.providers.base import RateLimitError + +if TYPE_CHECKING: + from dartlab.ai.types import LLMConfig, LLMResponse, ToolResponse + +log = logging.getLogger(__name__) + +DEFAULT_FREE_CHAIN: tuple[str, ...] = ("groq", "cerebras", "mistral", "gemini") + + +def buildFreeChain() -> list[str]: + """API 키가 등록된 무료 프로바이더만 포함한 체인을 구성한다.""" + import os + + from dartlab.core.ai.providers import get_provider_spec + + chain: list[str] = [] + for name in DEFAULT_FREE_CHAIN: + spec = get_provider_spec(name) + if spec is None: + continue + if spec.env_key and os.environ.get(spec.env_key): + chain.append(name) + continue + try: + from dartlab.core.ai.providers import api_key_secret_name + from dartlab.core.ai.secrets import get_secret_store + + store = get_secret_store() + if store.get(api_key_secret_name(name)): + chain.append(name) + except (ImportError, RuntimeError, KeyError): + pass + return chain + + +class FallbackChain: + """Rate limit 시 자동 프로바이더 전환. + + 429/503 에러가 발생하면 다음 프로바이더로 자동 전환한다. + 모든 프로바이더가 실패하면 마지막 에러를 전파한다. + """ + + def __init__(self, chain: list[str] | None = None): + self._chain = chain if chain is not None else buildFreeChain() + + @property + def providers(self) -> list[str]: + """fallback 체인에 포함된 provider 이름 목록.""" + return list(self._chain) + + @property + def empty(self) -> bool: + """사용 가능한 provider가 없는지 여부.""" + return len(self._chain) == 0 + + def _resolve(self, providerName: str) -> "LLMConfig": + """프로바이더 이름으로 LLMConfig를 생성한다.""" + from dartlab.ai import get_config + + config = get_config() + return config.merge({"provider": providerName}) + + def _create(self, providerName: str): + """프로바이더 인스턴스를 생성한다.""" + from dartlab.ai.providers import create_provider + + return create_provider(self._resolve(providerName)) + + def complete(self, messages: list[dict], tools: list[dict] | None = None) -> "LLMResponse | ToolResponse": + """순서대로 시도, 429/503이면 다음 프로바이더로.""" + lastError: Exception | None = None + for name in self._chain: + try: + provider = self._create(name) + if tools and provider.supports_native_tools: + return provider.complete_with_tools(messages, tools) + return provider.complete(messages) + except RateLimitError as e: + log.info("fallback: %s rate limit, 다음 프로바이더로 전환", name) + lastError = e + continue + if lastError: + raise lastError + raise RuntimeError("fallback 체인에 사용 가능한 프로바이더가 없습니다") + + def stream(self, messages: list[dict]) -> Generator[str, None, None]: + """스트리밍 fallback — 첫 성공 프로바이더의 스트림을 반환.""" + lastError: Exception | None = None + for name in self._chain: + try: + provider = self._create(name) + yield from provider.stream(messages) + return + except RateLimitError as e: + log.info("fallback stream: %s rate limit, 다음 프로바이더로 전환", name) + lastError = e + continue + if lastError: + raise lastError + raise RuntimeError("fallback 체인에 사용 가능한 프로바이더가 없습니다") diff --git a/src/dartlab/ai/providers/gemini.py b/src/dartlab/ai/providers/gemini.py new file mode 100644 index 0000000000000000000000000000000000000000..c2d223dd6a46e701df52dd8fbb01ac4d6551c6bf --- /dev/null +++ b/src/dartlab/ai/providers/gemini.py @@ -0,0 +1,311 @@ +"""Google Gemini provider. + +인증: API key (AI Studio에서 무료 발급) +- https://aistudio.google.com/apikey 에서 발급 +- GEMINI_API_KEY 또는 GOOGLE_API_KEY 환경변수, 또는 설정 패널에서 입력 +""" + +from __future__ import annotations + +import logging +from typing import Generator + +from dartlab.ai.providers.base import BaseProvider +from dartlab.ai.types import LLMConfig, LLMResponse, ToolCall, ToolResponse + +_log = logging.getLogger(__name__) + + +class GeminiProvider(BaseProvider): + """Google Gemini API provider. + + google-genai SDK 기반. API key 인증. + """ + + def __init__(self, config: LLMConfig): + super().__init__(config) + self._client = None + + def _get_client(self): + if self._client is not None: + return self._client + try: + from google import genai + except ImportError: + raise ImportError("google-genai 패키지가 필요합니다.\n pip install google-genai") + + import os + + apiKey = self.config.api_key + if not apiKey: + apiKey = os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY") + + if not apiKey: + raise ValueError( + "Gemini API key가 필요합니다.\n" + " 발급: https://aistudio.google.com/apikey (무료)\n" + " 설정: GEMINI_API_KEY 환경변수 또는 설정 패널에서 입력" + ) + + self._client = genai.Client(api_key=apiKey) + return self._client + + @property + def default_model(self) -> str: + """기본 모델명.""" + return "gemini-2.5-flash" + + @property + def supports_native_tools(self) -> bool: + """네이티브 tool calling 지원 여부.""" + return True + + def check_available(self) -> bool: + """provider 사용 가능 여부 확인.""" + try: + self._get_client() + return True + except (ImportError, ValueError, OSError): + return False + + def complete(self, messages: list[dict[str, str]]) -> LLMResponse: + """동기 완료 요청.""" + client = self._get_client() + + systemInstruction, contents = _splitSystemAndContents(messages) + + kwargs: dict = { + "model": self.resolved_model, + "contents": contents, + } + + from google.genai import types + + config = types.GenerateContentConfig( + temperature=self.config.temperature, + max_output_tokens=self.config.max_tokens, + ) + if systemInstruction: + config.system_instruction = systemInstruction + kwargs["config"] = config + + response = client.models.generate_content(**kwargs) + + usage = None + if response.usage_metadata: + usage = { + "prompt_tokens": response.usage_metadata.prompt_token_count or 0, + "completion_tokens": response.usage_metadata.candidates_token_count or 0, + "total_tokens": response.usage_metadata.total_token_count or 0, + } + + return LLMResponse( + answer=response.text or "", + provider="gemini", + model=self.resolved_model, + usage=usage, + ) + + def stream(self, messages: list[dict[str, str]]) -> Generator[str, None, None]: + """스트리밍 응답 생성.""" + client = self._get_client() + + systemInstruction, contents = _splitSystemAndContents(messages) + + from google.genai import types + + config = types.GenerateContentConfig( + temperature=self.config.temperature, + max_output_tokens=self.config.max_tokens, + ) + if systemInstruction: + config.system_instruction = systemInstruction + + for chunk in client.models.generate_content_stream( + model=self.resolved_model, + contents=contents, + config=config, + ): + if chunk.text: + yield chunk.text + + def complete_with_tools( + self, + messages: list[dict], + tools: list[dict], + *, + tool_choice: str | None = None, + ) -> ToolResponse: + """tool calling 포함 완료 요청.""" + client = self._get_client() + from google.genai import types + + systemInstruction, contents = _splitSystemAndContents(messages) + + geminiTools = _convertToolsToGemini(tools) + + # tool_choice: "any" → 반드시 도구 호출, "auto" → LLM 판단, "none" → 도구 금지 + toolConfig = None + if tool_choice == "any": + toolConfig = types.ToolConfig( + function_calling_config=types.FunctionCallingConfig(mode="ANY"), + ) + elif tool_choice == "none": + toolConfig = types.ToolConfig( + function_calling_config=types.FunctionCallingConfig(mode="NONE"), + ) + + config = types.GenerateContentConfig( + temperature=self.config.temperature, + max_output_tokens=self.config.max_tokens, + tools=geminiTools, + ) + if toolConfig is not None: + config.tool_config = toolConfig + if systemInstruction: + config.system_instruction = systemInstruction + + response = client.models.generate_content( + model=self.resolved_model, + contents=contents, + config=config, + ) + + usage = None + if response.usage_metadata: + usage = { + "prompt_tokens": response.usage_metadata.prompt_token_count or 0, + "completion_tokens": response.usage_metadata.candidates_token_count or 0, + "total_tokens": response.usage_metadata.total_token_count or 0, + } + + toolCalls: list[ToolCall] = [] + answer = "" + finishReason = "stop" + + if response.candidates: + candidate = response.candidates[0] + for part in candidate.content.parts: + if part.text: + answer += part.text + if part.function_call: + fc = part.function_call + args = dict(fc.args) if fc.args else {} + toolCalls.append( + ToolCall( + id=f"call_{fc.name}_{len(toolCalls)}", + name=fc.name, + arguments=args, + ) + ) + if toolCalls: + finishReason = "tool_calls" + + return ToolResponse( + answer=answer, + provider="gemini", + model=self.resolved_model, + usage=usage, + tool_calls=toolCalls, + finish_reason=finishReason, + ) + + def format_assistant_tool_calls( + self, + answer: str | None, + tool_calls: list, + ) -> dict: + """Gemini native: model → functionCall parts.""" + from google.genai import types + + parts = [] + if answer: + parts.append(types.Part(text=answer)) + for tc in tool_calls: + parts.append(types.Part(function_call=types.FunctionCall(name=tc.name, args=tc.arguments))) + return {"role": "model", "parts": parts, "_gemini_native": True} + + def format_tool_result(self, tool_call_id: str, result: str) -> dict: + """Gemini native: user → functionResponse parts.""" + from google.genai import types + + # tool_call_id에서 함수 이름 추출 (call_{name}_{idx} 형식) + name = tool_call_id + if name.startswith("call_") and "_" in name[5:]: + name = name[5:].rsplit("_", 1)[0] + parts = [types.Part(function_response=types.FunctionResponse(name=name, response={"result": result}))] + return {"role": "user", "parts": parts, "_gemini_native": True} + + +def _splitSystemAndContents(messages: list[dict]) -> tuple[str | None, list]: + """OpenAI 메시지 형식 → Gemini system/contents 분리.""" + system = None + contents = [] + + for msg in messages: + # Gemini native 형식 메시지는 그대로 전달 + if msg.get("_gemini_native"): + role = msg.get("role", "user") + contents.append({"role": role, "parts": msg["parts"]}) + continue + + role = msg.get("role", "user") + content = msg.get("content", "") + + if role == "system": + system = content + elif role == "assistant": + contents.append({"role": "model", "parts": [{"text": content or ""}]}) + elif role == "tool": + # fallback: OpenAI 형식 tool result → text로 변환 + contents.append( + { + "role": "user", + "parts": [{"text": f"[Tool Result] {content}"}], + } + ) + else: + contents.append({"role": "user", "parts": [{"text": content or ""}]}) + + return system, contents + + +def _convertToolsToGemini(tools: list[dict]) -> list: + """OpenAI tool schema → Gemini tool 형식 변환.""" + from google.genai import types + + declarations = [] + for tool in tools: + if tool.get("type") != "function": + continue + func = tool["function"] + params = func.get("parameters", {}) + + cleanedParams = _cleanSchemaForGemini(params) + + declarations.append( + types.FunctionDeclaration( + name=func["name"], + description=func.get("description", ""), + parameters=cleanedParams if cleanedParams.get("properties") else None, + ) + ) + + if not declarations: + return [] + return [types.Tool(function_declarations=declarations)] + + +def _cleanSchemaForGemini(schema: dict) -> dict: + """JSON Schema에서 Gemini 미지원 속성 제거.""" + cleaned = {} + for key, value in schema.items(): + if key in ("additionalProperties", "$schema"): + continue + if isinstance(value, dict): + cleaned[key] = _cleanSchemaForGemini(value) + elif isinstance(value, list): + cleaned[key] = [_cleanSchemaForGemini(item) if isinstance(item, dict) else item for item in value] + else: + cleaned[key] = value + return cleaned diff --git a/src/dartlab/ai/providers/oauth_codex.py b/src/dartlab/ai/providers/oauth_codex.py new file mode 100644 index 0000000000000000000000000000000000000000..96536f38395fd3ba46c436b5850bb4a54610c099 --- /dev/null +++ b/src/dartlab/ai/providers/oauth_codex.py @@ -0,0 +1,563 @@ +"""ChatGPT OAuth 기반 Codex provider — 진짜 SSE 스트리밍. + +ChatGPT Plus/Pro 구독 계정의 OAuth 토큰으로 +chatgpt.com/backend-api 엔드포인트에 직접 SSE 스트리밍 요청. +Codex CLI 없이 동작하며, 토큰 단위 실시간 스트리밍을 지원한다. + +비공식 API이므로 예고 없이 변경될 수 있다. +에러 발생 시 구체적 사유를 분류하여 사용자에게 안내한다. +STATUS.md의 "브레이킹 체인지 대응 순서" 참조. + +참고: opencode-openai-codex-auth 프로젝트의 접근법. +""" + +from __future__ import annotations + +import json +import logging +from typing import Generator + +import httpx + +from dartlab.ai.providers.base import BaseProvider +from dartlab.ai.providers.support import oauth_token as oauthToken +from dartlab.ai.providers.support.oauth_token import TokenRefreshError +from dartlab.ai.types import LLMResponse, ToolCall, ToolResponse + +log = logging.getLogger(__name__) + +CODEX_API_BASE = "https://chatgpt.com/backend-api" +CODEX_RESPONSES_PATH = "/codex/responses" + +_BUNDLED_MODELS = [ + "gpt-5.4", + "gpt-5.3-codex", + "gpt-5.2-codex", + "gpt-5.1-codex-max", +] + +AVAILABLE_MODELS = list(_BUNDLED_MODELS) + +_MODELS_CACHE: list[str] | None = None +_MODELS_CACHE_TS: float = 0.0 +_MODELS_CACHE_TTL = 300.0 # 5분 + + +def _fetchRemoteModels(token: str) -> list[str] | None: + """원격 /models API에서 사용 가능한 모델 목록 조회 (Codex CLI 동일 방식).""" + url = f"{CODEX_API_BASE}/codex/models" + headers = { + "Authorization": f"Bearer {token}", + "originator": "codex_cli_rs", + } + accountId = oauthToken.get_account_id() + if accountId: + headers["chatgpt-account-id"] = accountId + try: + resp = httpx.get(url, headers=headers, timeout=10) + if resp.status_code != 200: + return None + data = resp.json() + models = [] + for item in data if isinstance(data, list) else data.get("models", data.get("data", [])): + modelId = item.get("id") or item.get("model") if isinstance(item, dict) else str(item) + if modelId: + models.append(modelId) + return models if models else None + except (httpx.HTTPError, json.JSONDecodeError, ValueError): + return None + + +def availableModels() -> list[str]: + """사용 가능한 모델 목록 — 원격 조회 + 캐시 + 번들 fallback.""" + import time + + global _MODELS_CACHE, _MODELS_CACHE_TS + now = time.time() + if _MODELS_CACHE and (now - _MODELS_CACHE_TS) < _MODELS_CACHE_TTL: + return _MODELS_CACHE + + try: + token = oauthToken.get_valid_token() + except (TokenRefreshError, OSError): + token = None + + if token: + remote = _fetchRemoteModels(token) + if remote: + _MODELS_CACHE = remote + _MODELS_CACHE_TS = now + return remote + + _MODELS_CACHE = list(_BUNDLED_MODELS) + _MODELS_CACHE_TS = now + return _MODELS_CACHE + + +class ChatGPTOAuthError(Exception): + """ChatGPT OAuth provider 에러 — action 필드로 사용자 대응 안내.""" + + def __init__(self, action: str, message: str, *, detail: str = ""): + self.action = action + self.message = message + self.detail = detail + super().__init__(message) + + +def _raise_http_error(status: int, body: str) -> None: + """HTTP 상태코드별 구체적 에러.""" + if status == 401: + raise ChatGPTOAuthError( + "relogin", + "ChatGPT 인증이 만료되었습니다. 설정에서 재로그인하세요.", + detail=f"HTTP 401: {body[:200]}", + ) + if status == 403: + raise ChatGPTOAuthError( + "check_headers", + "ChatGPT API 접근이 거부되었습니다. " + "OpenAI가 요청 헤더 검증을 변경했을 수 있습니다. " + "openai/codex 레포에서 최신 헤더를 확인하세요.", + detail=f"HTTP 403: {body[:200]}", + ) + if status == 404: + raise ChatGPTOAuthError( + "check_endpoint", + "ChatGPT API 엔드포인트를 찾을 수 없습니다. " + "OpenAI가 URL을 변경했을 수 있습니다. " + "openai/codex 레포에서 최신 엔드포인트를 확인하세요.", + detail=f"HTTP 404: {body[:200]}", + ) + if status == 429: + reset_msg = "" + try: + err = json.loads(body) + secs = err.get("error", {}).get("resets_in_seconds") + if secs: + hours = secs / 3600 + reset_msg = f" (약 {hours:.1f}시간 후 리셋)" + except (json.JSONDecodeError, ValueError, TypeError): + pass + raise ChatGPTOAuthError( + "rate_limit", + f"ChatGPT API 요청 한도를 초과했습니다.{reset_msg} 잠시 후 다시 시도하세요.", + detail=f"HTTP 429: {body[:200]}", + ) + raise ChatGPTOAuthError( + "unknown", + f"ChatGPT API 오류가 발생했습니다 (HTTP {status}).", + detail=body[:300], + ) + + +def _detect_plan_type() -> str: + """JWT에서 ChatGPT plan_type 추출. 실패 시 'plus' 반환.""" + import base64 + + token_data = oauthToken.load_token() + if not token_data: + return "plus" + access = token_data.get("access_token", "") + parts = access.split(".") + if len(parts) != 3: + return "plus" + payload_b64 = parts[1] + padding = 4 - len(payload_b64) % 4 + if padding != 4: + payload_b64 += "=" * padding + try: + payload = json.loads(base64.urlsafe_b64decode(payload_b64).decode("utf-8")) + except (json.JSONDecodeError, ValueError): + return "plus" + auth_claim = payload.get("https://api.openai.com/auth", {}) + return auth_claim.get("chatgpt_plan_type", "plus") if isinstance(auth_claim, dict) else "plus" + + +class OAuthCodexProvider(BaseProvider): + """ChatGPT OAuth 기반 Codex provider.""" + + @property + def default_model(self) -> str: + """기본 모델 — gpt-5.4 (Codex CLI 동일).""" + return "gpt-5.4" + + @property + def supports_native_tools(self) -> bool: + """네이티브 tool calling 지원 여부.""" + return True + + def check_available(self) -> bool: + """provider 사용 가능 여부 확인.""" + try: + return oauthToken.is_authenticated() + except TokenRefreshError: + return False + + def _get_token_or_raise(self) -> str: + """유효한 토큰 반환. 실패 시 구체적 에러.""" + try: + token = oauthToken.get_valid_token() + except TokenRefreshError as e: + if e.reason == "client_changed": + raise ChatGPTOAuthError("check_client_id", e.detail) from e + raise ChatGPTOAuthError( + "relogin", + f"ChatGPT 토큰 갱신 실패: {e.detail}", + ) from e + if not token: + raise ChatGPTOAuthError( + "login", + "ChatGPT OAuth 인증이 필요합니다. 설정에서 로그인하세요.", + ) + return token + + def _request_with_retry(self, token: str, body: dict, *, stream: bool = False): + """요청 + 401 시 refresh 재시도. 실패 시 구체적 에러.""" + url = f"{CODEX_API_BASE}{CODEX_RESPONSES_PATH}" + headers = self._build_headers(token) + + def _do_request(hdrs: dict[str, str]) -> httpx.Response: + if stream: + client = httpx.Client(timeout=90) + req = client.build_request("POST", url, headers=hdrs, json=body) + return client.send(req, stream=True) + return httpx.post(url, headers=hdrs, json=body, timeout=90) + + try: + resp = _do_request(headers) + except httpx.ConnectError: + raise ChatGPTOAuthError( + "network", + "ChatGPT 서버에 연결할 수 없습니다. 네트워크를 확인하세요.", + ) + except httpx.TimeoutException: + raise ChatGPTOAuthError( + "network", + "ChatGPT 서버 응답 시간이 초과되었습니다. 잠시 후 다시 시도하세요.", + ) + + if resp.status_code == 401: + if stream: + resp.close() + try: + refreshed = oauthToken.refresh_access_token() + except TokenRefreshError as e: + raise ChatGPTOAuthError( + "relogin", + f"토큰 갱신 실패 ({e.reason}): {e.detail}", + ) from e + if refreshed: + headers = self._build_headers(refreshed["access_token"]) + resp = _do_request(headers) + + if resp.status_code != 200: + if stream: + resp.read() + _raise_http_error(resp.status_code, resp.text[:500]) + + return resp + + def _build_headers(self, token: str) -> dict[str, str]: + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "originator": "codex_cli_rs", + "accept": "text/event-stream", + } + account_id = oauthToken.get_account_id() + if account_id: + headers["chatgpt-account-id"] = account_id + return headers + + def _build_body( + self, + messages: list[dict], + *, + tools: list[dict] | None = None, + tool_choice: str | None = None, + ) -> dict: + system_parts = [] + input_items = [] + + for m in messages: + role = m["role"] + raw_content = m.get("content", "") + + # content가 list(Claude cache_control 등)이면 텍스트만 결합 + if isinstance(raw_content, list): + text_content = "\n\n".join( + block.get("text", "") for block in raw_content if isinstance(block, dict) and block.get("text") + ) + else: + text_content = raw_content or "" + + if role == "system": + system_parts.append(text_content) + elif role == "assistant": + # tool_calls가 포함된 assistant 메시지 + if "_oauth_tool_calls" in m: + for tc in m["_oauth_tool_calls"]: + input_items.append( + { + "type": "function_call", + "call_id": tc["id"], + "name": tc["name"], + "arguments": tc["arguments"], + } + ) + if text_content: + input_items.append( + { + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": text_content}], + } + ) + else: + input_items.append( + { + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": text_content}], + } + ) + elif role == "tool": + # tool result → function_call_output + input_items.append( + { + "type": "function_call_output", + "call_id": m.get("tool_call_id", ""), + "output": text_content, + } + ) + else: + input_items.append( + { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": text_content}], + } + ) + + body: dict = { + "model": self.resolved_model, + "stream": True, + "store": False, + "input": input_items, + "include": ["reasoning.encrypted_content"], + } + + if system_parts: + body["instructions"] = "\n\n".join(system_parts) + + if tools: + responsesTools = [] + for t in tools: + if t.get("type") != "function": + continue + func = t["function"] + responsesTools.append( + { + "type": "function", + "name": func["name"], + "description": func.get("description", ""), + "parameters": func.get("parameters", {}), + } + ) + if responsesTools: + body["tools"] = responsesTools + + # Responses API tool_choice: "auto"(기본), "required"(=any), "none" + if tool_choice and tool_choice != "auto": + if tool_choice == "any": + body["tool_choice"] = "required" + else: + body["tool_choice"] = tool_choice + + return body + + def complete(self, messages: list[dict[str, str]]) -> LLMResponse: + """동기 완료 요청.""" + token = self._get_token_or_raise() + body = self._build_body(messages) + resp = self._request_with_retry(token, body) + + answer = self._parse_sse_response(resp.text) + if not answer: + log.warning("SSE 응답에서 텍스트를 추출하지 못함 — 이벤트 포맷 변경 의심") + raise ChatGPTOAuthError( + "check_sse", + "ChatGPT 응답은 수신되었지만 텍스트를 추출할 수 없습니다. " + "OpenAI가 SSE 이벤트 포맷을 변경했을 수 있습니다. " + "openai/codex 레포에서 최신 이벤트 타입을 확인하세요.", + ) + + return LLMResponse( + answer=answer, + provider="oauth-codex", + model=self.resolved_model, + ) + + def stream(self, messages: list[dict[str, str]]) -> Generator[str, None, None]: + """스트리밍 응답 생성.""" + token = self._get_token_or_raise() + body = self._build_body(messages) + resp = self._request_with_retry(token, body, stream=True) + + has_content = False + yielded_final_message = False + event_types_seen: set[str] = set() + + for raw_line in resp.iter_lines(): + if not raw_line: + continue + line = raw_line if isinstance(raw_line, str) else raw_line.decode("utf-8") + if not line.startswith("data: "): + continue + + data_str = line[6:] + if data_str == "[DONE]": + break + + try: + event = json.loads(data_str) + except json.JSONDecodeError: + continue + + event_type = event.get("type", "") + if event_type: + event_types_seen.add(event_type) + + if event_type == "response.output_text.delta": + delta = event.get("delta", "") + if delta: + has_content = True + yield delta + + elif event_type == "response.content_part.delta": + delta = event.get("delta", {}) + text = delta.get("text", "") if isinstance(delta, dict) else "" + if text: + has_content = True + yield text + + elif event_type == "response.output_item.done": + if has_content: + continue + item = event.get("item", {}) + if item.get("type") == "message": + for content in item.get("content", []): + if content.get("type") == "output_text": + text = content.get("text", "") + if text and not yielded_final_message: + has_content = True + yielded_final_message = True + yield text + + if not has_content and event_types_seen: + log.warning( + "SSE 스트림에서 텍스트 없음 — 수신된 이벤트 타입: %s", + ", ".join(sorted(event_types_seen)), + ) + yield ( + "\n\n---\n" + "[ChatGPT 응답 수신 실패] SSE 이벤트는 도착했지만 텍스트를 추출하지 못했습니다. " + f"수신된 이벤트 타입: {', '.join(sorted(event_types_seen))}. " + "OpenAI가 SSE 포맷을 변경했을 수 있습니다." + ) + + def _parse_sse_response(self, raw: str) -> str: + """완료된 SSE 응답에서 최종 텍스트 추출.""" + answer = "" + for line in raw.split("\n"): + if not line.startswith("data: "): + continue + data_str = line[6:] + if data_str == "[DONE]": + break + try: + event = json.loads(data_str) + except json.JSONDecodeError: + continue + + if event.get("type") == "response.completed": + resp_obj = event.get("response", {}) + for output in resp_obj.get("output", []): + if output.get("type") == "message": + for content in output.get("content", []): + if content.get("type") == "output_text": + answer = content.get("text", "") + elif event.get("type") == "response.output_text.delta": + answer += event.get("delta", "") + + return answer + + def complete_with_tools( + self, + messages: list[dict], + tools: list[dict], + *, + tool_choice: str | None = None, + ) -> ToolResponse: + """Responses API tool calling.""" + token = self._get_token_or_raise() + body = self._build_body(messages, tools=tools, tool_choice=tool_choice) + resp = self._request_with_retry(token, body) + + deltaAnswer = "" + completedAnswer = "" + toolCalls: list[ToolCall] = [] + + for line in resp.text.split("\n"): + if not line.startswith("data: "): + continue + data_str = line[6:] + if data_str == "[DONE]": + break + try: + event = json.loads(data_str) + except json.JSONDecodeError: + continue + + if event.get("type") == "response.completed": + resp_obj = event.get("response", {}) + for output in resp_obj.get("output", []): + if output.get("type") == "function_call": + callId = output.get("call_id") or output.get("id", f"call_{len(toolCalls)}") + name = output.get("name", "") + rawArgs = output.get("arguments", "{}") + try: + args = json.loads(rawArgs) if isinstance(rawArgs, str) else rawArgs + except json.JSONDecodeError: + args = {} + toolCalls.append(ToolCall(id=callId, name=name, arguments=args)) + elif output.get("type") == "message": + for content in output.get("content", []): + if content.get("type") == "output_text": + completedAnswer += content.get("text", "") + elif event.get("type") == "response.output_text.delta": + deltaAnswer += event.get("delta", "") + + # completed가 더 완전하므로 우선 사용, 없으면 delta 누적본 사용 + answer = completedAnswer or deltaAnswer + + finishReason = "tool_calls" if toolCalls else "stop" + return ToolResponse( + answer=answer, + provider="oauth-codex", + model=self.resolved_model, + tool_calls=toolCalls, + finish_reason=finishReason, + ) + + def format_assistant_tool_calls(self, answer: str | None, tool_calls: list) -> dict: + """assistant 메시지에 tool_calls 정보 포함.""" + serialized = [] + for tc in tool_calls: + rawArgs = json.dumps(tc.arguments, ensure_ascii=False) if isinstance(tc.arguments, dict) else tc.arguments + serialized.append({"id": tc.id, "name": tc.name, "arguments": rawArgs}) + return {"role": "assistant", "content": answer or "", "_oauth_tool_calls": serialized} + + def format_tool_result(self, tool_call_id: str, result: str) -> dict: + """tool result -> Responses API function_call_output.""" + return {"role": "tool", "tool_call_id": tool_call_id, "content": result} diff --git a/src/dartlab/ai/providers/ollama.py b/src/dartlab/ai/providers/ollama.py new file mode 100644 index 0000000000000000000000000000000000000000..a4c50cce830e30cdc755fc9d7eb3fdd1cd09d609 --- /dev/null +++ b/src/dartlab/ai/providers/ollama.py @@ -0,0 +1,288 @@ +"""Ollama 로컬 LLM provider.""" + +from __future__ import annotations + +from typing import Generator + +from dartlab.ai.providers.base import BaseProvider +from dartlab.ai.types import LLMConfig, LLMResponse, ToolCall, ToolResponse + +OLLAMA_DEFAULT_URL = "http://localhost:11434" + + +def _buildInferenceOptions() -> dict: + """GPU VRAM 기반 Ollama 추론 옵션 자동 결정.""" + from dartlab.ai.providers.support.ollama_setup import _detect_gpu + + gpu = _detect_gpu() + options: dict = {"num_gpu": 999 if gpu["available"] else 0} + + vram = gpu.get("vram_mb") or 0 + if vram >= 12000: + options["num_ctx"] = 8192 + options["num_batch"] = 1024 + elif vram >= 6000: + options["num_ctx"] = 4096 + options["num_batch"] = 512 + else: + options["num_ctx"] = 2048 + options["num_batch"] = 256 + + # Flash Attention — Ollama 0.5+에서 KV 캐시 메모리 절약 + 속도 향상 + options["flash_attn"] = True + return options + + +# VRAM별 권장 모델 (양자화 포함, 큰 VRAM순) +_VRAM_MODEL_TIERS: list[tuple[int, str, str]] = [ + (24000, "qwen3:32b-q4_K_M", "32B 4bit — 최고 품질"), + (12000, "qwen3:14b-q4_K_M", "14B 4bit — 고품질"), + (8000, "qwen3:8b-q4_K_M", "8B 4bit — 균형"), + (6000, "qwen3:4b-q4_K_M", "4B 4bit — 경량"), + (4000, "qwen3:1.7b-q4_K_M", "1.7B 4bit — 최경량"), + (0, "qwen3:0.6b", "0.6B — CPU 전용"), +] + + +def recommendModel(vramMb: int | None = None) -> dict: + """VRAM 기반 최적 모델 추천. + + Returns: + {"model": str, "description": str, "vram_mb": int} + """ + if vramMb is None: + from dartlab.ai.providers.support.ollama_setup import _detect_gpu + + gpu = _detect_gpu() + vramMb = gpu.get("vram_mb") or 0 + + for minVram, model, desc in _VRAM_MODEL_TIERS: + if vramMb >= minVram: + return {"model": model, "description": desc, "vram_mb": vramMb} + return {"model": _VRAM_MODEL_TIERS[-1][1], "description": _VRAM_MODEL_TIERS[-1][2], "vram_mb": vramMb} + + +class OllamaProvider(BaseProvider): + """Ollama 로컬 provider. + + OpenAI 호환 엔드포인트 사용 (localhost:11434/v1). + Ollama 미설치/미실행 시 OS별 설치 안내 제공. + """ + + @property + def supports_native_tools(self) -> bool: + """Ollama v0.3.0+는 네이티브 tool calling 지원.""" + return True + + def __init__(self, config: LLMConfig): + super().__init__(config) + self._client = None + self._base_url = config.base_url or f"{OLLAMA_DEFAULT_URL}/v1" + + @property + def default_model(self) -> str: + """기본 모델명.""" + # 설치된 모델 중 첫 번째 사용, 없으면 llama3.1 fallback + models = self.get_installed_models() + if models: + return models[0] + return "llama3.1" + + def check_available(self) -> bool: + """provider 사용 가능 여부 확인.""" + import httpx + + try: + resp = httpx.get(f"{OLLAMA_DEFAULT_URL}/api/tags", timeout=2) + return resp.status_code == 200 + except (httpx.ConnectError, httpx.TimeoutException): + return False + + def get_installed_models(self) -> list[str]: + """설치된 모델 목록.""" + import httpx + + try: + resp = httpx.get(f"{OLLAMA_DEFAULT_URL}/api/tags", timeout=2) + data = resp.json() + names = [] + for m in data.get("models", []): + name = m["name"] + # ":latest" 태그 제거 (표시 간결화) + if name.endswith(":latest"): + name = name[:-7] + names.append(name) + return names + except (httpx.HTTPError, AttributeError, KeyError, OSError, TypeError, ValueError): + return [] + + def preload(self, *, keepAliveMinutes: int = 30) -> bool: + """모델을 메모리에 미리 로딩 + 최적 추론 옵션 적용. + + 서버 시작 시 호출하면 첫 질문의 cold start 지연을 제거한다. + GPU VRAM 기반으로 num_gpu/num_ctx/num_batch/flash_attn을 자동 결정하여 + 모델 로드 시점에 고정한다. + + Args: + keepAliveMinutes: 모델 메모리 유지 시간(분). -1이면 무기한. + """ + import httpx + + options = _buildInferenceOptions() + keepAlive = f"{keepAliveMinutes}m" if keepAliveMinutes > 0 else -1 + try: + resp = httpx.post( + f"{OLLAMA_DEFAULT_URL}/api/generate", + json={ + "model": self.resolved_model, + "prompt": "", + "keep_alive": keepAlive, + "stream": False, + "options": options, + }, + timeout=120, + ) + return resp.status_code == 200 + except (httpx.ConnectError, httpx.TimeoutException): + return False + + def unload(self) -> bool: + """모델을 메모리에서 즉시 해제.""" + import httpx + + try: + resp = httpx.post( + f"{OLLAMA_DEFAULT_URL}/api/generate", + json={"model": self.resolved_model, "prompt": "", "keep_alive": 0, "stream": False}, + timeout=10, + ) + return resp.status_code == 200 + except (httpx.ConnectError, httpx.TimeoutException): + return False + + def serverVersion(self) -> str | None: + """Ollama 서버 버전 조회.""" + import httpx + + try: + resp = httpx.get(f"{OLLAMA_DEFAULT_URL}/api/version", timeout=2) + if resp.status_code == 200: + return resp.json().get("version") + except (httpx.HTTPError, ValueError, KeyError, OSError): + pass + return None + + def _ensure_available(self): + if not self.check_available(): + from dartlab.ai.providers.support.ollama_setup import get_install_guide + + raise ConnectionError(f"Ollama 서버에 접근할 수 없습니다 ({OLLAMA_DEFAULT_URL}).\n\n{get_install_guide()}") + + def _get_client(self): + if self._client is None: + self._ensure_available() + try: + from openai import OpenAI + except ImportError: + raise ImportError("openai 패키지가 필요합니다.\n pip install --upgrade dartlab") + self._client = OpenAI(base_url=self._base_url, api_key="ollama") + return self._client + + def complete(self, messages: list[dict[str, str]]) -> LLMResponse: + """동기 완료 요청.""" + client = self._get_client() + response = client.chat.completions.create( + model=self.resolved_model, + messages=messages, + temperature=self.config.temperature, + ) + return LLMResponse( + answer=response.choices[0].message.content or "", + provider="ollama", + model=self.resolved_model, + ) + + def stream(self, messages: list[dict[str, str]]) -> Generator[str, None, None]: + """스트리밍 응답 생성.""" + client = self._get_client() + stream = client.chat.completions.create( + model=self.resolved_model, + messages=messages, + temperature=self.config.temperature, + stream=True, + ) + for chunk in stream: + if chunk.choices and chunk.choices[0].delta.content: + yield chunk.choices[0].delta.content + + def complete_json( + self, + messages: list[dict[str, str]], + schema: dict | None = None, + ) -> LLMResponse: + """JSON 구조 강제 completion (Guided Generation). + + Args: + schema: JSON Schema dict. None이면 단순 json_object 모드. + """ + client = self._get_client() + if schema: + response_format = { + "type": "json_schema", + "json_schema": {"name": "analysis", "schema": schema}, + } + else: + response_format = {"type": "json_object"} + + response = client.chat.completions.create( + model=self.resolved_model, + messages=messages, + temperature=self.config.temperature, + response_format=response_format, + ) + return LLMResponse( + answer=response.choices[0].message.content or "", + provider="ollama", + model=self.resolved_model, + ) + + def complete_with_tools( + self, + messages: list[dict], + tools: list[dict], + *, + tool_choice: str | None = None, + ) -> ToolResponse: + """Ollama tool calling 지원 (v0.3.0+).""" + import json + + client = self._get_client() + kwargs: dict = { + "model": self.resolved_model, + "messages": messages, + "temperature": self.config.temperature, + } + if tools: + kwargs["tools"] = tools + + response = client.chat.completions.create(**kwargs) + choice = response.choices[0] + + tool_calls = [] + if choice.message.tool_calls: + for tc in choice.message.tool_calls: + tool_calls.append( + ToolCall( + id=tc.id, + name=tc.function.name, + arguments=json.loads(tc.function.arguments), + ) + ) + + return ToolResponse( + answer=choice.message.content or "", + provider="ollama", + model=self.resolved_model, + tool_calls=tool_calls, + finish_reason=choice.finish_reason or "stop", + ) diff --git a/src/dartlab/ai/providers/openai_compat.py b/src/dartlab/ai/providers/openai_compat.py new file mode 100644 index 0000000000000000000000000000000000000000..3acd8a32d4c9fbd472e0350a637be84b451b9234 --- /dev/null +++ b/src/dartlab/ai/providers/openai_compat.py @@ -0,0 +1,208 @@ +"""OpenAI / GPT / OpenAI-compatible provider.""" + +from __future__ import annotations + +import json +from typing import Generator + +from dartlab.ai.providers.base import BaseProvider, RateLimitError +from dartlab.ai.types import LLMConfig, LLMResponse, ToolCall, ToolResponse + +try: + from openai import OpenAIError as _OpenAIError + from openai import RateLimitError as _OpenAIRateLimitError + + _OPENAI_COMPAT_ERRORS = (ImportError, OSError, RuntimeError, TypeError, ValueError, _OpenAIError) + _HAS_OPENAI = True +except ImportError: + _OPENAI_COMPAT_ERRORS = (ImportError, OSError, RuntimeError, TypeError, ValueError) + _HAS_OPENAI = False + + +def _wrap_rate_limit(provider: str, e: Exception) -> Exception: + """OpenAI SDK의 RateLimitError를 dartlab RateLimitError로 래핑.""" + if _HAS_OPENAI and isinstance(e, _OpenAIRateLimitError): + retryAfter = None + if hasattr(e, "response") and e.response is not None: + ra = e.response.headers.get("retry-after") + if ra: + try: + retryAfter = float(ra) + except ValueError: + pass + return RateLimitError(provider, str(e), retryAfter=retryAfter) + return e + + +_COMPAT_DEFAULTS: dict[str, dict[str, str]] = { + "groq": { + "base_url": "https://api.groq.com/openai/v1", + "default_model": "llama-3.3-70b-versatile", + }, + "cerebras": { + "base_url": "https://api.cerebras.ai/v1", + "default_model": "llama-3.3-70b", + }, + "mistral": { + "base_url": "https://api.mistral.ai/v1", + "default_model": "mistral-small-latest", + }, +} + + +class OpenAICompatProvider(BaseProvider): + """OpenAI SDK 기반 provider. + + GPT 직접 호출, CLIProxyAPI, 기타 OpenAI-compatible API 모두 지원. + base_url 설정으로 프록시/커스텀 엔드포인트 전환. + groq/cerebras/mistral 등 OpenAI 호환 프로바이더는 자동으로 base_url 설정. + """ + + def __init__(self, config: LLMConfig): + super().__init__(config) + self._client = None + self._defaults = _COMPAT_DEFAULTS.get(config.provider, {}) + if self._defaults and not config.base_url: + self.config.base_url = self._defaults["base_url"] + + def _get_client(self): + if self._client is None: + try: + from openai import OpenAI + except ImportError: + raise ImportError("openai 패키지가 필요합니다.\n pip install --upgrade dartlab") + kwargs = {} + apiKey = self.config.api_key + if not apiKey: + import os + + from dartlab.core.ai.providers import get_provider_spec + + spec = get_provider_spec(self.config.provider) + if spec and spec.env_key: + apiKey = os.environ.get(spec.env_key) + if apiKey: + kwargs["api_key"] = apiKey + if self.config.base_url: + kwargs["base_url"] = self.config.base_url + self._client = OpenAI(**kwargs) + return self._client + + @property + def default_model(self) -> str: + """기본 모델명.""" + return self._defaults.get("default_model", "gpt-4o") + + @property + def supports_native_tools(self) -> bool: + """네이티브 tool calling 지원 여부.""" + return True + + def check_available(self) -> bool: + """provider 사용 가능 여부 확인.""" + try: + self._get_client() + return True + except _OPENAI_COMPAT_ERRORS: + return False + + def complete(self, messages: list[dict[str, str]]) -> LLMResponse: + """동기 완료 요청.""" + client = self._get_client() + try: + response = client.chat.completions.create( + model=self.resolved_model, + messages=messages, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + ) + except Exception as e: # noqa: BLE001 + raise _wrap_rate_limit(self.config.provider, e) from e + choice = response.choices[0] + usage = None + if response.usage: + usage = { + "prompt_tokens": response.usage.prompt_tokens, + "completion_tokens": response.usage.completion_tokens, + "total_tokens": response.usage.total_tokens, + } + return LLMResponse( + answer=choice.message.content or "", + provider=self.config.provider, + model=response.model, + usage=usage, + ) + + def stream(self, messages: list[dict[str, str]]) -> Generator[str, None, None]: + """스트리밍 응답 생성.""" + client = self._get_client() + try: + response = client.chat.completions.create( + model=self.resolved_model, + messages=messages, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + stream=True, + ) + except Exception as e: # noqa: BLE001 + raise _wrap_rate_limit(self.config.provider, e) from e + for chunk in response: + if chunk.choices and chunk.choices[0].delta.content: + yield chunk.choices[0].delta.content + + def complete_with_tools( + self, + messages: list[dict], + tools: list[dict], + *, + tool_choice: str | None = None, + ) -> ToolResponse: + """OpenAI tool calling 지원.""" + client = self._get_client() + kwargs: dict = { + "model": self.resolved_model, + "messages": messages, + "temperature": self.config.temperature, + "max_tokens": self.config.max_tokens, + } + if tools: + kwargs["tools"] = tools + kwargs["parallel_tool_calls"] = False + if tool_choice == "any": + kwargs["tool_choice"] = "required" + elif tool_choice == "none": + kwargs["tool_choice"] = "none" + + try: + response = client.chat.completions.create(**kwargs) + except Exception as e: # noqa: BLE001 + raise _wrap_rate_limit(self.config.provider, e) from e + choice = response.choices[0] + + usage = None + if response.usage: + usage = { + "prompt_tokens": response.usage.prompt_tokens, + "completion_tokens": response.usage.completion_tokens, + "total_tokens": response.usage.total_tokens, + } + + tool_calls = [] + if choice.message.tool_calls: + for tc in choice.message.tool_calls: + tool_calls.append( + ToolCall( + id=tc.id, + name=tc.function.name, + arguments=json.loads(tc.function.arguments), + ) + ) + + return ToolResponse( + answer=choice.message.content or "", + provider=self.config.provider, + model=response.model, + tool_calls=tool_calls, + finish_reason=choice.finish_reason or "stop", + usage=usage, + ) diff --git a/src/dartlab/ai/providers/support/__init__.py b/src/dartlab/ai/providers/support/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2997358a652e723d7cc54b511f6ae3925211153e --- /dev/null +++ b/src/dartlab/ai/providers/support/__init__.py @@ -0,0 +1,9 @@ +"""Provider support helpers. + +CLI introspection, setup guides, and OAuth token handling live here so the +provider implementations keep a tighter package boundary. +""" + +from . import cli_setup, codex_cli, oauth_token, ollama_setup + +__all__ = ["cli_setup", "codex_cli", "oauth_token", "ollama_setup"] diff --git a/src/dartlab/ai/providers/support/__pycache__/__init__.cpython-312.pyc b/src/dartlab/ai/providers/support/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bd3deccdca45242366ad53c50a79721306c34b99 Binary files /dev/null and b/src/dartlab/ai/providers/support/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/support/__pycache__/__init__.cpython-313.pyc b/src/dartlab/ai/providers/support/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d265c5bb54635cd5ebd59a1b9c70f4dc2bfb9d50 Binary files /dev/null and b/src/dartlab/ai/providers/support/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/ai/providers/support/__pycache__/cli_setup.cpython-312.pyc b/src/dartlab/ai/providers/support/__pycache__/cli_setup.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..15d02a0c6cb9d751f42e7ebc8c865bda20fe6b72 Binary files /dev/null and b/src/dartlab/ai/providers/support/__pycache__/cli_setup.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/support/__pycache__/cli_setup.cpython-313.pyc b/src/dartlab/ai/providers/support/__pycache__/cli_setup.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d59ab18c1b2a154d6e5e597975134fd686f48f28 Binary files /dev/null and b/src/dartlab/ai/providers/support/__pycache__/cli_setup.cpython-313.pyc differ diff --git a/src/dartlab/ai/providers/support/__pycache__/codex_cli.cpython-312.pyc b/src/dartlab/ai/providers/support/__pycache__/codex_cli.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2da518d1e31009ba3b0e14b2dd8c521a5d34824d Binary files /dev/null and b/src/dartlab/ai/providers/support/__pycache__/codex_cli.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/support/__pycache__/codex_cli.cpython-313.pyc b/src/dartlab/ai/providers/support/__pycache__/codex_cli.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..35c98dcdb56714e900c7b771c3c2e41cfdf48cbd Binary files /dev/null and b/src/dartlab/ai/providers/support/__pycache__/codex_cli.cpython-313.pyc differ diff --git a/src/dartlab/ai/providers/support/__pycache__/oauth_token.cpython-312.pyc b/src/dartlab/ai/providers/support/__pycache__/oauth_token.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d9fe6bbc36dba21377c4245eb9acec22922e3c33 Binary files /dev/null and b/src/dartlab/ai/providers/support/__pycache__/oauth_token.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/support/__pycache__/oauth_token.cpython-313.pyc b/src/dartlab/ai/providers/support/__pycache__/oauth_token.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a03474e0c891e229da0aa3b5622834ebd963b32 Binary files /dev/null and b/src/dartlab/ai/providers/support/__pycache__/oauth_token.cpython-313.pyc differ diff --git a/src/dartlab/ai/providers/support/__pycache__/ollama_setup.cpython-312.pyc b/src/dartlab/ai/providers/support/__pycache__/ollama_setup.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a2a0337259ef6bc61c6bae6d220b9b8f216cb4f Binary files /dev/null and b/src/dartlab/ai/providers/support/__pycache__/ollama_setup.cpython-312.pyc differ diff --git a/src/dartlab/ai/providers/support/__pycache__/ollama_setup.cpython-313.pyc b/src/dartlab/ai/providers/support/__pycache__/ollama_setup.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08dba4baa1f8f9eab0148e5bd4f56aa5d1683a6f Binary files /dev/null and b/src/dartlab/ai/providers/support/__pycache__/ollama_setup.cpython-313.pyc differ diff --git a/src/dartlab/ai/providers/support/cli_setup.py b/src/dartlab/ai/providers/support/cli_setup.py new file mode 100644 index 0000000000000000000000000000000000000000..ee02b4dfaaee15341f9f5b06d900bb75d758385e --- /dev/null +++ b/src/dartlab/ai/providers/support/cli_setup.py @@ -0,0 +1,93 @@ +"""CLI 기반 LLM 도구 (Claude Code, Codex) 설치 감지 및 안내.""" + +from __future__ import annotations + +import platform +import shutil +import subprocess + +_IS_WINDOWS = platform.system() == "Windows" + + +def detect_claude_code() -> dict: + """Claude Code CLI 상태 감지. + + Returns: + {"installed": bool, "authenticated": bool, "version": str | None} + """ + result: dict = { + "installed": False, + "authenticated": False, + "version": None, + } + + if not shutil.which("claude"): + return result + + result["installed"] = True + + try: + proc = subprocess.run( + ["claude", "--version"], + capture_output=True, + text=True, + timeout=10, + shell=_IS_WINDOWS, # noqa: S603 — hardcoded constant args + ) + if proc.returncode == 0: + result["version"] = proc.stdout.strip() + except (subprocess.TimeoutExpired, OSError): + pass + + try: + proc = subprocess.run( + ["claude", "auth", "status"], + capture_output=True, + text=True, + timeout=10, + shell=_IS_WINDOWS, # noqa: S603 — hardcoded constant args + ) + result["authenticated"] = proc.returncode == 0 + except (subprocess.TimeoutExpired, OSError): + pass + + return result + + +def detect_codex() -> dict: + """OpenAI Codex CLI 상태 감지. + + Returns: + {"installed": bool, "version": str | None} + """ + from dartlab.ai.providers.support.codex_cli import inspect_codex_cli + + return inspect_codex_cli() + + +def get_claude_code_install_guide() -> str: + """OS별 Claude Code CLI 설치 안내.""" + os_name = platform.system() + + guide = "[ Claude Code CLI 설치 안내 ]\n\n" + + if os_name == "Darwin": + guide += ( + "1. npm install -g @anthropic-ai/claude-code\n" + " 또는: brew install claude-code\n" + "2. 인증: claude auth login\n" + "3. 확인: claude --version\n" + ) + else: + guide += "1. npm install -g @anthropic-ai/claude-code\n2. 인증: claude auth login\n3. 확인: claude --version\n" + + guide += "\nClaude Pro/Max 구독이 필요합니다.\n문서: https://code.claude.com/\n" + return guide + + +def get_codex_install_guide() -> str: + """OS별 Codex CLI 설치 안내.""" + guide = "[ OpenAI Codex CLI 설치 안내 ]\n\n" + guide += "1. npm install -g @openai/codex\n2. 처음 실행 시 로그인: codex\n3. 확인: codex --version\n" + guide += "\nChatGPT Plus/Pro 구독이 필요합니다.\n문서: https://developers.openai.com/codex/cli/\n" + return guide diff --git a/src/dartlab/ai/providers/support/codex_cli.py b/src/dartlab/ai/providers/support/codex_cli.py new file mode 100644 index 0000000000000000000000000000000000000000..e2ea086a9dccdd5c8df572372b576552866d15a2 --- /dev/null +++ b/src/dartlab/ai/providers/support/codex_cli.py @@ -0,0 +1,366 @@ +"""Codex CLI introspection and execution helpers.""" + +from __future__ import annotations + +import json +import os +import re +import shutil +import subprocess +import tomllib +from pathlib import Path +from typing import Any + +_CODEX_CONFIG_PATH = Path.home() / ".codex" / "config.toml" +_DEFAULT_CODEX_MODELS = [ + "gpt-5.4", + "gpt-5.3", + "gpt-5.3-codex", + "gpt-5.2", + "gpt-5.2-codex", + "gpt-5.1", + "gpt-5.1-codex", + "gpt-5.1-codex-mini", + "o3", + "o4-mini", + "gpt-4.1", + "gpt-4.1-mini", + "gpt-4.1-nano", +] +_CODING_KEYWORDS = ( + "코드", + "코딩", + "구현", + "수정", + "패치", + "리팩터링", + "버그", + "테스트", + "파일", + "함수", + "클래스", + "모듈", + "컴포넌트", + "diff", + "patch", + "fix", + "implement", + "refactor", + "edit", + "write code", + "update file", +) +_CODE_FILE_HINT = re.compile(r"\.(py|svelte|js|ts|tsx|jsx|json|toml|md|yml|yaml|css|html)\b", re.IGNORECASE) + + +def codex_path() -> str | None: + """Resolve Codex CLI executable.""" + return shutil.which("codex") + + +def _run_codex_meta_command(*args: str, timeout: int = 10) -> tuple[int, str, str] | None: + """Run a short Codex CLI command and capture text output.""" + exe = codex_path() + if not exe: + return None + try: + result = subprocess.run( + [exe, *args], + capture_output=True, + text=True, + encoding="utf-8", + errors="replace", + timeout=timeout, + shell=False, + ) + except (OSError, subprocess.TimeoutExpired, UnicodeDecodeError): + return None + return result.returncode, (result.stdout or "").strip(), (result.stderr or "").strip() + + +def load_codex_config() -> dict[str, Any]: + """Load ~/.codex/config.toml if present.""" + if not _CODEX_CONFIG_PATH.exists(): + return {} + try: + with _CODEX_CONFIG_PATH.open("rb") as fh: + data = tomllib.load(fh) + except (OSError, tomllib.TOMLDecodeError): + return {} + return data if isinstance(data, dict) else {} + + +def get_codex_configured_model() -> str | None: + """Return model configured in Codex CLI config, if any.""" + data = load_codex_config() + root_model = data.get("model") + if isinstance(root_model, str) and root_model.strip(): + return root_model.strip() + + profile_name = data.get("profile") or data.get("default_profile") + profiles = data.get("profiles") + if isinstance(profile_name, str) and isinstance(profiles, dict): + profile = profiles.get(profile_name) + if isinstance(profile, dict): + profile_model = profile.get("model") + if isinstance(profile_model, str) and profile_model.strip(): + return profile_model.strip() + + if isinstance(profiles, dict): + for profile in profiles.values(): + if isinstance(profile, dict): + profile_model = profile.get("model") + if isinstance(profile_model, str) and profile_model.strip(): + return profile_model.strip() + + return None + + +def get_codex_model_catalog() -> list[str]: + """Build a model catalog from Codex config + ChatGPT/Codex provider fallback.""" + models: list[str] = [] + configured = get_codex_configured_model() + if configured: + models.append(configured) + + models.extend(_DEFAULT_CODEX_MODELS) + + if not models: + models.append("gpt-4.1") + + seen: set[str] = set() + unique: list[str] = [] + for model in models: + if model and model not in seen: + seen.add(model) + unique.append(model) + return unique + + +def _extract_commands(help_text: str) -> list[str]: + """Parse command names from help output.""" + help_text = help_text if isinstance(help_text, str) else str(help_text) + commands: list[str] = [] + in_commands = False + for line in help_text.splitlines(): + stripped = line.rstrip() + if stripped.startswith("Commands:"): + in_commands = True + continue + if not in_commands: + continue + if stripped.startswith("Arguments:") or stripped.startswith("Options:"): + break + if not line.startswith(" "): + continue + token = stripped.split()[0] + if token: + commands.append(token) + return commands + + +def _extract_sandbox_modes(help_text: str) -> list[str]: + """Parse sandbox modes from help output.""" + help_text = help_text if isinstance(help_text, str) else str(help_text) + match = re.search(r"possible values:\s*([^\]]+)", help_text) + if not match: + return [] + values = match.group(1) + return [value.strip() for value in values.split(",") if value.strip()] + + +def _parse_login_status(stdout: str, stderr: str, returncode: int) -> tuple[bool, str | None, str | None]: + """Parse `codex login status` output into authentication metadata.""" + text = (stdout or stderr or "").strip() + lowered = text.lower() + + if returncode != 0: + return False, None, text or None + + authenticated = "logged in" in lowered or "authenticated" in lowered + auth_mode = None + if "chatgpt" in lowered: + auth_mode = "chatgpt" + elif "api key" in lowered or "api-key" in lowered: + auth_mode = "api_key" + + return authenticated, auth_mode, text or None + + +def inspect_codex_cli() -> dict[str, Any]: + """Inspect installed Codex CLI features from live help output.""" + result: dict[str, Any] = { + "installed": False, + "version": None, + "configuredModel": get_codex_configured_model(), + "authenticated": False, + "authMode": None, + "loginStatus": None, + "commands": [], + "execCommands": [], + "sandboxModes": [], + "supportsLogin": False, + "supportsLogout": False, + "supportsJson": False, + "supportsWorkspaceWrite": False, + "supportsDangerFullAccess": False, + "supportsMcp": False, + "supportsReview": False, + "supportsApply": False, + } + + version_info = _run_codex_meta_command("--version") + if version_info is None: + return result + + returncode, stdout, _stderr = version_info + if returncode != 0: + return result + + result["installed"] = True + result["version"] = stdout or None + + root_help = _run_codex_meta_command("--help") + if root_help is not None and root_help[0] == 0: + help_text = root_help[1] + commands = _extract_commands(help_text) + result["commands"] = commands + result["supportsLogin"] = "login" in commands + result["supportsLogout"] = "logout" in commands + result["supportsMcp"] = "mcp" in commands + result["supportsReview"] = "review" in commands + result["supportsApply"] = "apply" in commands + + login_status = _run_codex_meta_command("login", "status") + if login_status is not None: + authenticated, auth_mode, status_text = _parse_login_status(*login_status[1:], login_status[0]) + result["authenticated"] = authenticated + result["authMode"] = auth_mode + result["loginStatus"] = status_text + + exec_help = _run_codex_meta_command("exec", "--help") + if exec_help is not None and exec_help[0] == 0: + help_text = exec_help[1] + result["execCommands"] = _extract_commands(help_text) + sandbox_modes = _extract_sandbox_modes(help_text) + result["sandboxModes"] = sandbox_modes + result["supportsWorkspaceWrite"] = "workspace-write" in sandbox_modes + result["supportsDangerFullAccess"] = "danger-full-access" in sandbox_modes + result["supportsJson"] = "--json" in help_text + + return result + + +def logout_codex_cli(timeout: int = 15) -> None: + """Remove stored Codex CLI authentication.""" + info = inspect_codex_cli() + if not info.get("installed"): + raise FileNotFoundError("Codex CLI가 설치되어 있지 않습니다.") + + result = _run_codex_meta_command("logout", timeout=timeout) + if result is None: + raise RuntimeError("Codex CLI 로그아웃 명령을 실행할 수 없습니다.") + + returncode, _stdout, stderr = result + if returncode != 0: + raise RuntimeError(stderr or "Codex CLI 로그아웃에 실패했습니다.") + + +def infer_codex_sandbox(messages: list[dict[str, str]], override: str | None = None) -> str: + """Choose a Codex sandbox based on explicit override, env, and user intent.""" + info = inspect_codex_cli() + sandbox_modes = set(info.get("sandboxModes") or []) + + requested = override or os.environ.get("DARTLAB_CODEX_SANDBOX") + if requested and (not sandbox_modes or requested in sandbox_modes): + return requested + + user_text = "\n".join(m.get("content", "") for m in messages if m.get("role") == "user") + if _looks_like_code_task(user_text) and "workspace-write" in sandbox_modes: + return "workspace-write" + return "read-only" + + +def _looks_like_code_task(text: str) -> bool: + """Heuristic for repo-editing intent.""" + lowered = text.lower() + if any(keyword in lowered for keyword in _CODING_KEYWORDS): + return True + return bool(_CODE_FILE_HINT.search(text)) + + +def build_codex_exec_command(*, model: str | None = None, sandbox: str = "read-only") -> list[str]: + """Build a non-interactive Codex exec command.""" + exe = codex_path() or "codex" + cmd = [exe, "exec", "-", "--json", "--skip-git-repo-check", "--sandbox", sandbox] + if model: + cmd.extend(["--model", model]) + return cmd + + +def parse_codex_jsonl(output: str) -> tuple[str, dict[str, int] | None]: + """Extract final answer and usage from Codex JSONL output.""" + answer = "" + usage: dict[str, int] = {} + + for line in output.strip().splitlines(): + line = line.strip() + if not line: + continue + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + + event_type = event.get("type", "") + if event_type == "item.completed": + item = event.get("item", {}) + if item.get("type") == "agent_message": + answer = item.get("text", "") + elif event_type == "turn.completed": + turn_usage = event.get("usage", {}) + if turn_usage: + usage["prompt_tokens"] = turn_usage.get("input_tokens") + usage["completion_tokens"] = turn_usage.get("output_tokens") + prompt_tokens = usage.get("prompt_tokens") or 0 + completion_tokens = usage.get("completion_tokens") or 0 + if prompt_tokens or completion_tokens: + usage["total_tokens"] = prompt_tokens + completion_tokens + + return answer, usage or None + + +def run_codex_exec( + prompt: str, + *, + model: str | None = None, + sandbox: str = "read-only", + cwd: str | None = None, + timeout: int = 300, +) -> tuple[str, dict[str, int] | None]: + """Run Codex CLI in non-interactive JSON mode.""" + cmd = build_codex_exec_command(model=model, sandbox=sandbox) + + try: + result = subprocess.run( + cmd, + input=prompt.encode("utf-8"), + capture_output=True, + timeout=timeout, + shell=False, + cwd=cwd, + ) + except subprocess.TimeoutExpired as exc: + raise TimeoutError(f"Codex CLI 응답 시간 초과 ({timeout}초)") from exc + + if result.returncode != 0: + raw_err = result.stderr or b"" + stderr = raw_err.decode("utf-8", errors="replace").strip() if isinstance(raw_err, bytes) else raw_err.strip() + raise RuntimeError(f"Codex CLI 오류 (exit {result.returncode}):\n{stderr}") + + raw_out = result.stdout or b"" + stdout = raw_out.decode("utf-8", errors="replace") if isinstance(raw_out, bytes) else raw_out + answer, usage = parse_codex_jsonl(stdout) + if not answer: + raise RuntimeError("Codex CLI에서 응답을 추출할 수 없습니다.") + return answer, usage diff --git a/src/dartlab/ai/providers/support/oauth_token.py b/src/dartlab/ai/providers/support/oauth_token.py new file mode 100644 index 0000000000000000000000000000000000000000..8450721a06a5784e4f1bd159f9bcb142c4ac7847 --- /dev/null +++ b/src/dartlab/ai/providers/support/oauth_token.py @@ -0,0 +1,234 @@ +"""ChatGPT OAuth 토큰 관리. + +PKCE(Proof Key for Code Exchange) 플로우로 ChatGPT 계정 인증 후 +access_token / refresh_token을 공통 secret store에 저장·갱신한다. +""" + +from __future__ import annotations + +import base64 +import hashlib +import json +import secrets +import time +from contextlib import suppress +from pathlib import Path +from typing import Any +from urllib.parse import urlencode + +from dartlab.core.ai.providers import oauth_secret_name +from dartlab.core.ai.secrets import get_secret_store + +CHATGPT_AUTH_URL = "https://auth.openai.com/oauth/authorize" +CHATGPT_TOKEN_URL = "https://auth.openai.com/oauth/token" +CHATGPT_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann" +CHATGPT_SCOPE = "openid profile email offline_access api.connectors.read api.connectors.invoke" + +OAUTH_REDIRECT_PORT = 1455 +OAUTH_REDIRECT_URI = f"http://localhost:{OAUTH_REDIRECT_PORT}/auth/callback" + +_TOKEN_DIR = Path.home() / ".dartlab" +_TOKEN_FILE = _TOKEN_DIR / "oauth_token.json" +_TOKEN_SECRET_NAME = oauth_secret_name("oauth-codex") + + +def _generate_pkce() -> tuple[str, str]: + verifier = secrets.token_urlsafe(64) + digest = hashlib.sha256(verifier.encode("ascii")).digest() + challenge = base64.urlsafe_b64encode(digest).rstrip(b"=").decode("ascii") + return verifier, challenge + + +def build_auth_url() -> tuple[str, str, str]: + """OAuth 인증 URL과 PKCE verifier, state를 반환.""" + verifier, challenge = _generate_pkce() + state = secrets.token_urlsafe(32) + params = { + "response_type": "code", + "client_id": CHATGPT_CLIENT_ID, + "redirect_uri": OAUTH_REDIRECT_URI, + "scope": CHATGPT_SCOPE, + "code_challenge": challenge, + "code_challenge_method": "S256", + "state": state, + "id_token_add_organizations": "true", + "codex_cli_simplified_flow": "true", + "originator": "codex_cli_rs", + } + url = f"{CHATGPT_AUTH_URL}?{urlencode(params)}" + return url, verifier, state + + +def exchange_code(code: str, verifier: str) -> dict[str, Any]: + """Authorization code를 access_token으로 교환.""" + import httpx + + resp = httpx.post( + CHATGPT_TOKEN_URL, + data={ + "grant_type": "authorization_code", + "client_id": CHATGPT_CLIENT_ID, + "code": code, + "redirect_uri": OAUTH_REDIRECT_URI, + "code_verifier": verifier, + }, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + timeout=15, + ) + resp.raise_for_status() + data = resp.json() + _save_token(data) + return data + + +class TokenRefreshError(Exception): + """refresh_token 갱신 실패 — 사유를 분류하여 전달.""" + + def __init__(self, reason: str, detail: str = ""): + self.reason = reason + self.detail = detail + super().__init__(f"토큰 갱신 실패 ({reason}): {detail}") + + +def refresh_access_token() -> dict[str, Any] | None: + """저장된 refresh_token으로 access_token 갱신. + + Raises: + TokenRefreshError: 갱신 실패 시 사유 분류 + - "no_token": 저장된 토큰 없음 + - "expired": refresh_token 만료 + - "reused": refresh_token 이미 사용됨 (rotation) + - "revoked": refresh_token 취소됨 + - "network": 네트워크 오류 + - "unknown": 분류 불가 + """ + token_data = load_token() + if not token_data or not token_data.get("refresh_token"): + raise TokenRefreshError("no_token", "저장된 토큰이 없습니다. 재로그인이 필요합니다.") + + import httpx + + try: + resp = httpx.post( + CHATGPT_TOKEN_URL, + data={ + "grant_type": "refresh_token", + "client_id": CHATGPT_CLIENT_ID, + "refresh_token": token_data["refresh_token"], + }, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + timeout=15, + ) + except httpx.ConnectError: + raise TokenRefreshError("network", "OpenAI 인증 서버에 연결할 수 없습니다.") + except httpx.TimeoutException: + raise TokenRefreshError("network", "OpenAI 인증 서버 응답 시간 초과.") + + if resp.status_code == 200: + data = resp.json() + if "refresh_token" not in data: + data["refresh_token"] = token_data["refresh_token"] + _save_token(data) + return data + + error_body = {} + try: + error_body = resp.json() + except (json.JSONDecodeError, ValueError): + pass + + error_code = error_body.get("error", "") + error_desc = error_body.get("error_description", resp.text[:200]) + + if "expired" in error_code or "expired" in error_desc.lower(): + raise TokenRefreshError("expired", "refresh_token이 만료되었습니다. 재로그인이 필요합니다.") + if "reuse" in error_code or "already" in error_desc.lower(): + raise TokenRefreshError("reused", "refresh_token이 이미 사용되었습니다. 재로그인이 필요합니다.") + if "revoke" in error_code or "invalid_grant" in error_code: + raise TokenRefreshError("revoked", "refresh_token이 취소되었습니다. 재로그인이 필요합니다.") + if "invalid_client" in error_code: + raise TokenRefreshError( + "client_changed", + "OAuth Client ID가 변경된 것 같습니다. openai/codex 레포에서 최신 Client ID를 확인하세요.", + ) + + raise TokenRefreshError("unknown", f"HTTP {resp.status_code}: {error_desc}") + + +def get_valid_token() -> str | None: + """유효한 access_token을 반환. 만료 임박 시 자동 갱신. + + Raises: + TokenRefreshError: 갱신 실패 시 (사유 분류 포함) + """ + token_data = load_token() + if not token_data: + return None + + expires_at = token_data.get("expires_at", 0) + if time.time() < expires_at - 300: + return token_data.get("access_token") + + refreshed = refresh_access_token() + if refreshed: + return refreshed.get("access_token") + + return None + + +def is_authenticated() -> bool: + """유효한 OAuth 토큰이 존재하는지 확인.""" + return get_valid_token() is not None + + +def load_token() -> dict[str, Any] | None: + """저장된 OAuth 토큰을 로드 (SecretStore 우선, 파일 fallback).""" + store = get_secret_store() + data = store.get_json(_TOKEN_SECRET_NAME) + if isinstance(data, dict): + return data + if not _TOKEN_FILE.exists(): + return None + raw = _TOKEN_FILE.read_text(encoding="utf-8") + legacy = json.loads(raw) + if isinstance(legacy, dict): + store.set_json(_TOKEN_SECRET_NAME, legacy) + with suppress(OSError): + _TOKEN_FILE.unlink() + return legacy + return None + + +def revoke_token() -> None: + """저장된 OAuth 토큰을 삭제.""" + get_secret_store().delete(_TOKEN_SECRET_NAME) + if _TOKEN_FILE.exists(): + _TOKEN_FILE.unlink() + + +def get_account_id() -> str | None: + """JWT access_token에서 ChatGPT account_id 추출.""" + token = get_valid_token() + if not token: + return None + parts = token.split(".") + if len(parts) != 3: + return None + payload_b64 = parts[1] + padding = 4 - len(payload_b64) % 4 + if padding != 4: + payload_b64 += "=" * padding + payload = json.loads(base64.urlsafe_b64decode(payload_b64).decode("utf-8")) + auth_claim = payload.get("https://api.openai.com/auth", {}) + if isinstance(auth_claim, dict): + return auth_claim.get("account_id") or auth_claim.get("org_id") + return None + + +def _save_token(data: dict[str, Any]) -> None: + expires_in = data.get("expires_in", 3600) + data["expires_at"] = time.time() + expires_in + get_secret_store().set_json(_TOKEN_SECRET_NAME, data) + if _TOKEN_FILE.exists(): + with suppress(OSError): + _TOKEN_FILE.unlink() diff --git a/src/dartlab/ai/providers/support/ollama_setup.py b/src/dartlab/ai/providers/support/ollama_setup.py new file mode 100644 index 0000000000000000000000000000000000000000..0e6cf02069c7164f1ead6a65af3ef003e4f56970 --- /dev/null +++ b/src/dartlab/ai/providers/support/ollama_setup.py @@ -0,0 +1,129 @@ +"""Ollama 설치 감지 및 안내.""" + +from __future__ import annotations + +import platform +import shutil +import subprocess + +OLLAMA_DEFAULT_URL = "http://localhost:11434" + + +def _detect_gpu() -> dict: + """GPU 상태 감지. + + Returns: + {"available": bool, "name": str | None, "vram_mb": int | None} + """ + gpu_info: dict = {"available": False, "name": None, "vram_mb": None} + + # nvidia-smi (NVIDIA GPU) + try: + result = subprocess.run( + ["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader,nounits"], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + line = result.stdout.strip().split("\n")[0] + parts = [p.strip() for p in line.split(",")] + if len(parts) >= 2: + gpu_info["available"] = True + gpu_info["name"] = parts[0] + try: + gpu_info["vram_mb"] = int(float(parts[1])) + except (ValueError, IndexError): + pass + return gpu_info + except (FileNotFoundError, subprocess.TimeoutExpired, OSError): + pass + + # macOS: system_profiler (Apple Silicon / AMD GPU) + if platform.system() == "Darwin": + try: + result = subprocess.run( + ["system_profiler", "SPDisplaysDataType"], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0 and "Chipset Model" in result.stdout: + for line in result.stdout.split("\n"): + if "Chipset Model" in line: + gpu_info["available"] = True + gpu_info["name"] = line.split(":")[-1].strip() + break + except (FileNotFoundError, subprocess.TimeoutExpired, OSError): + pass + + return gpu_info + + +def detect_ollama() -> dict: + """Ollama 상태 종합 감지. + + Returns: + { + "installed": bool, + "running": bool, + "models": ["llama3.1", ...], + "url": "http://localhost:11434", + "gpu": {"available": bool, "name": str | None, "vram_mb": int | None}, + } + """ + import httpx + + result: dict = { + "installed": False, + "running": False, + "models": [], + "url": OLLAMA_DEFAULT_URL, + "gpu": _detect_gpu(), + } + + try: + resp = httpx.get(f"{OLLAMA_DEFAULT_URL}/api/tags", timeout=2) + if resp.status_code == 200: + result["installed"] = True + result["running"] = True + data = resp.json() + result["models"] = [m["name"] for m in data.get("models", [])] + return result + except (httpx.ConnectError, httpx.TimeoutException): + pass + + if shutil.which("ollama"): + result["installed"] = True + + return result + + +def get_install_guide() -> str: + """OS별 Ollama 설치 안내 텍스트.""" + os_name = platform.system() + + guide = "[ Ollama 설치 안내 ]\n\n" + + if os_name == "Windows": + guide += ( + "1. https://ollama.com/download/windows 에서 설치파일 다운로드\n" + "2. 설치 후 터미널에서: ollama serve\n" + "3. 모델 다운로드: ollama pull llama3.1\n" + ) + elif os_name == "Darwin": + guide += ( + "1. brew install ollama\n" + " 또는 https://ollama.com/download/mac 에서 다운로드\n" + "2. 서버 시작: ollama serve\n" + "3. 모델 다운로드: ollama pull llama3.1\n" + ) + else: + guide += ( + "1. curl -fsSL https://ollama.com/install.sh | sh\n" + "2. 서버 시작: ollama serve\n" + "3. 모델 다운로드: ollama pull llama3.1\n" + ) + + guide += "\n설치 완료 후 다시 시도하세요.\n문서: https://ollama.com/\n" + return guide diff --git a/src/dartlab/ai/runtime/__init__.py b/src/dartlab/ai/runtime/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6379081221bb6818b588dd040b30e8dac0a5e603 --- /dev/null +++ b/src/dartlab/ai/runtime/__init__.py @@ -0,0 +1 @@ +"""AI runtime package.""" diff --git a/src/dartlab/ai/runtime/__pycache__/__init__.cpython-312.pyc b/src/dartlab/ai/runtime/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5b151b72026a7a17c5ceac9bd8f3cd423f6b4b67 Binary files /dev/null and b/src/dartlab/ai/runtime/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/ai/runtime/__pycache__/__init__.cpython-313.pyc b/src/dartlab/ai/runtime/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b793b6aab3e3bcddc34b33f28ccf87070e3e1313 Binary files /dev/null and b/src/dartlab/ai/runtime/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/ai/runtime/__pycache__/agent.cpython-312.pyc b/src/dartlab/ai/runtime/__pycache__/agent.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b8cb9f8b76063c82dcc78090a55ad22b33779dd Binary files /dev/null and b/src/dartlab/ai/runtime/__pycache__/agent.cpython-312.pyc differ diff --git a/src/dartlab/ai/runtime/__pycache__/core.cpython-312.pyc b/src/dartlab/ai/runtime/__pycache__/core.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1eea9348256533fa877c541e254781b0eae62a52 Binary files /dev/null and b/src/dartlab/ai/runtime/__pycache__/core.cpython-312.pyc differ diff --git a/src/dartlab/ai/runtime/__pycache__/core.cpython-313.pyc b/src/dartlab/ai/runtime/__pycache__/core.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5f5bdfb75d53352a7386e15f4da6a370e7e6ffc4 Binary files /dev/null and b/src/dartlab/ai/runtime/__pycache__/core.cpython-313.pyc differ diff --git a/src/dartlab/ai/runtime/__pycache__/events.cpython-312.pyc b/src/dartlab/ai/runtime/__pycache__/events.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..648d08581424a93554f354f439f290a4b8a57872 Binary files /dev/null and b/src/dartlab/ai/runtime/__pycache__/events.cpython-312.pyc differ diff --git a/src/dartlab/ai/runtime/__pycache__/events.cpython-313.pyc b/src/dartlab/ai/runtime/__pycache__/events.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f48ede67273fef5b3a2f165fd9b1c8dd7ed65786 Binary files /dev/null and b/src/dartlab/ai/runtime/__pycache__/events.cpython-313.pyc differ diff --git a/src/dartlab/ai/runtime/__pycache__/plugin_hints.cpython-312.pyc b/src/dartlab/ai/runtime/__pycache__/plugin_hints.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0aed58b2d7bfd79d5b407a5e157f86d815fe0e14 Binary files /dev/null and b/src/dartlab/ai/runtime/__pycache__/plugin_hints.cpython-312.pyc differ diff --git a/src/dartlab/ai/runtime/__pycache__/plugin_hints.cpython-313.pyc b/src/dartlab/ai/runtime/__pycache__/plugin_hints.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c7b2e255ddb80ff60144a151c8b23efbd5034fad Binary files /dev/null and b/src/dartlab/ai/runtime/__pycache__/plugin_hints.cpython-313.pyc differ diff --git a/src/dartlab/ai/runtime/__pycache__/standalone.cpython-312.pyc b/src/dartlab/ai/runtime/__pycache__/standalone.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e4ece7da52d201f23f4f7902e471d70967a612b1 Binary files /dev/null and b/src/dartlab/ai/runtime/__pycache__/standalone.cpython-312.pyc differ diff --git a/src/dartlab/ai/runtime/agent.py b/src/dartlab/ai/runtime/agent.py new file mode 100644 index 0000000000000000000000000000000000000000..0c5e7b9d24b5d4c146110b597a6dfee2ae71457c --- /dev/null +++ b/src/dartlab/ai/runtime/agent.py @@ -0,0 +1,37 @@ +"""코드블록 실행 에이전트 + reflection. + +CAPABILITIES-Driven 아키텍처에서 LLM은 코드블록을 생성하고, +시스템이 자동 실행 후 결과를 피드백한다. +tool calling은 사용하지 않는다. +""" + +from __future__ import annotations + +import logging + +log = logging.getLogger(__name__) + +from dartlab.ai.providers.base import BaseProvider + +# ══════════════════════════════════════ +# Reflection — 답변 자체 검증 +# ══════════════════════════════════════ + +_REFLECTION_PROMPT = ( + "당신의 이전 답변을 검토하세요. 다음 기준으로 평가하고 개선하세요:\n" + "1. **수치 근거**: 인용한 모든 수치에 출처(테이블명, 연도)가 있는가?\n" + "2. **완전성**: 사용자 질문에 완전히 답했는가? 빠진 관점은?\n" + "3. **근거 없는 주장**: 제공된 데이터로 뒷받침할 수 없는 주장이 있는가?\n\n" + "문제가 있으면 수정된 답변을 작성하세요. 문제가 없으면 원본 답변을 그대로 반환하세요." +) + + +def _reflect_on_answer(provider: BaseProvider, messages: list[dict], answer: str) -> str: + """답변 자체 검증 — 1회 reflection으로 품질 보완.""" + reflect_messages = [ + *messages, + {"role": "assistant", "content": answer}, + {"role": "user", "content": _REFLECTION_PROMPT}, + ] + response = provider.complete(reflect_messages) + return response.answer if response.answer.strip() else answer diff --git a/src/dartlab/ai/runtime/core.py b/src/dartlab/ai/runtime/core.py new file mode 100644 index 0000000000000000000000000000000000000000..da5d87178865a890aa14e06a36575091b9a4d1e3 --- /dev/null +++ b/src/dartlab/ai/runtime/core.py @@ -0,0 +1,1576 @@ +"""AI 분석 통합 오케스트레이터 — CAPABILITIES-Driven 순수 스트리밍. + +dartlab.ask(), server UI, CLI가 모두 이 코어를 소비한다. +동기 제너레이터로 AnalysisEvent를 생산하며, 소비자가 형식(SSE/텍스트/제너레이터)을 결정. + +새 구조:: + + 질문 → CAPABILITIES 검색(ms) → 시스템 프롬프트 주입 + → LLM 스트리밍 → 코드블록 감지 → execute_code → 결과 해석 → 스트리밍 답변 +""" + +from __future__ import annotations + +import concurrent.futures +import dataclasses +import logging +import os +import re +import sqlite3 +import time +from difflib import SequenceMatcher +from typing import Any, Generator + +log = logging.getLogger(__name__) + +from dartlab.ai.runtime.events import AnalysisEvent + +# ── company=None 사전 종목 검색 ─────────────────────────── + +_COMPARE_SPLIT_RE = re.compile(r"(랑|와|과|이랑|하고|vs\.?|VS\.?|versus)", re.IGNORECASE) + + +def _detectCompanyNames(question: str) -> list[str]: + """질문에서 종목명/종목코드 후보를 추출.""" + parts = _COMPARE_SPLIT_RE.split(question) + candidates: list[str] = [] + for p in parts: + p = p.strip() + if not p or _COMPARE_SPLIT_RE.fullmatch(p): + continue + cleaned = re.sub(r"\s*(비교|분석|알려|설명|해줘|해주세요|해봐|좀).*$", "", p).strip() + if cleaned and len(cleaned) >= 2: + candidates.append(cleaned) + return candidates[:4] + + +def _searchCompanyCodes(question: str) -> str: + """질문에서 종목명을 추출하고 dartlab.searchName()으로 종목코드를 사전 확인.""" + candidates = _detectCompanyNames(question) + if not candidates: + return "" + + results: list[str] = [] + try: + import dartlab + + for name in candidates: + try: + df = dartlab.searchName(name) + if df is not None and len(df) > 0: + row = df.row(0, named=True) + corpName = row.get("corp_name", row.get("회사명", name)) + stockCode = row.get("stock_code", row.get("종목코드", "")) + if stockCode: + results.append(f"- {corpName}: 종목코드 **{stockCode}**") + except (FileNotFoundError, OSError, RuntimeError, TypeError, ValueError): + continue + except ImportError: + return "" + + if not results: + return "" + + body = "\n".join(results) + return ( + '\n' + "## 사전 종목코드 확인 결과\n" + "아래 종목코드가 확인되었습니다. 코드 작성 시 이 코드를 사용하세요:\n" + f"{body}\n" + "" + ) + + +# ── 외부 검색 Pre-Grounding ────────────────────────────── + +_SEARCH_TRIGGER_KEYWORDS = ( + "최근", + "시장", + "이슈", + "동향", + "전망", + "뉴스", + "소식", + "올해", + "지금", + "현재", + "요즘", + "실적발표", + "규제", + "금리", + "환율", + "유가", + "반도체", + "AI", + "인공지능", + "트렌드", + "업황", + "산업", + "정책", +) + + +def _needsExternalSearch(question: str) -> bool: + """질문에 외부 검색이 필요한 키워드가 포함되어 있는지 판단.""" + q = question.lower() + return any(kw.lower() in q for kw in _SEARCH_TRIGGER_KEYWORDS) + + +def _preGroundDisclosure(stockCode: str | None = None) -> str: + """companyProfile에서 해당 종목의 공시 프로필을 추출하여 주입.""" + if not stockCode: + return "" + try: + from dartlab.core.search.derived import loadProfile + + row = loadProfile(stockCode) + except (ImportError, FileNotFoundError, OSError): + return "" + if row is None: + return "" + + return ( + '\n' + "## 공시 프로필 (자동 조회)\n" + f"- 총 공시: {row['total_filings']}건 ({row['first_dt']}~{row['last_dt']})\n" + f"- 주요 유형: {row['top3_summary']}\n" + f"- 공시 속도: {row['velocity_text']}\n" + f"- 특이사항: {row['rare_text']}\n" + "" + ) + + +def _gatherInsightHints(stock_id: str, company: Any | None) -> str: + """KnowledgeDB 인사이트 + 동종업계 패턴 → 단일 텍스트 블록. + + P1-1 백그라운드 thread 에서 호출 가능. 실패 시 빈 문자열. + """ + try: + from dartlab.ai.persistence import KnowledgeDB + + db = KnowledgeDB.get() + insight = db.get_insight(stock_id) + except (ImportError, OSError): + return "" + + if insight: + try: + import time as _t + + expired_tag = "" + if insight.expires_at and _t.time() > insight.expires_at: + expired_tag = " (90일+ 전 분석, 업데이트 필요)" + strengths_str = ", ".join(insight.strengths[:3]) if insight.strengths else "" + weaknesses_str = ", ".join(insight.weaknesses[:3]) if insight.weaknesses else "" + text = f"## 이전 심층 분석 인사이트{expired_tag}\n서사: {insight.narrative[:300]}\n" + if strengths_str: + text += f"강점: {strengths_str}\n" + if weaknesses_str: + text += f"약점: {weaknesses_str}\n" + text += "이전 분석과 일관성을 유지하되, 새 데이터로 업데이트하라." + return text + except (AttributeError, TypeError): + return "" + + if company is None: + return "" + sector = getattr(company, "sector", None) or getattr(company, "sectorName", None) or "" + if not sector: + return "" + try: + sector_insights = db.get_sector_insights(sector, limit=2) + except (OSError, sqlite3.Error): + return "" + if not sector_insights: + return "" + lines = [f"## 동종업계 분석 패턴 참고 ({sector})"] + for si in sector_insights: + lines.append(f"- {si.stock_code}: {si.narrative[:150]}") + return "\n".join(lines) + + +def _preGroundSearch( + question: str, + stockCode: str | None = None, + corpName: str | None = None, +) -> str: + """질문 기반 자동 검색 — 결과를 user 컨텍스트에 주입할 텍스트로 반환.""" + try: + from dartlab.gather.search import formatResults, newsSearch, searchAvailable, webSearch + except ImportError: + return "" + + avail = searchAvailable() + if not avail["any"]: + return "" + + # 검색 쿼리 구성: 종목명이 있으면 포함 + baseQuery = question[:100] + if corpName: + baseQuery = f"{corpName} {baseQuery}" + + try: + results = newsSearch(baseQuery, maxResults=5, days=7) + if not results: + results = webSearch(baseQuery, maxResults=5, days=7) + except (OSError, RuntimeError, TimeoutError, ValueError): + return "" + + if not results: + return "" + + formatted = formatResults(results, maxChars=2000) + return ( + '\n' + "## 관련 최신 정보 (자동 검색)\n" + "아래는 질문과 관련된 최신 검색 결과입니다. 참고하되, " + "출처(URL)를 인용하고, 검색 결과만으로 판단하지 마세요.\n\n" + f"{formatted}\n" + "" + ) + + +# ── 데이터 신선도 추출 ──────────────────────────────────── + + +def _extract_data_date(company: Any) -> str | None: + """Company에서 최신 데이터 기준일을 추출한다.""" + try: + filings = company.filings() if callable(getattr(company, "filings", None)) else None + if filings is not None and hasattr(filings, "columns") and "date" in filings.columns: + dates = filings["date"].drop_nulls() + if len(dates) > 0: + return str(dates.max()) + except (AttributeError, TypeError, KeyError): + pass + return None + + +# ── 에러 분류 ───────────────────────────────────────────── + + +def _classify_error(e: Exception) -> dict[str, str]: + """예외 → {error: str, action: str} 매핑.""" + err_type = type(e).__name__ + err_str = str(e) + err_low = err_str.lower() + + if isinstance(e, FileNotFoundError): + return {"error": err_str, "action": "install"} + if isinstance(e, PermissionError): + return {"error": err_str, "action": "login"} + + # ChatGPT OAuth + if err_type == "ChatGPTOAuthError": + if any(kw in err_low for kw in ("token", "expire", "login")): + return {"error": "ChatGPT 인증이 만료되었습니다. 다시 로그인해주세요.", "action": "relogin"} + if any(kw in err_low for kw in ("rate", "limit")): + return {"error": "ChatGPT 요청 한도에 도달했습니다. 잠시 후 다시 시도해주세요.", "action": "rate_limit"} + return {"error": f"ChatGPT 연결 오류: {err_str}", "action": "relogin"} + + # OpenAI + if err_type == "OpenAIError" or "api_key" in err_low: + return {"error": "AI 설정이 필요합니다. API 키를 확인하거나 다른 provider를 선택해주세요.", "action": "config"} + + # Google Gemini 에러 + if ( + err_type in ("ServerError", "ClientError", "APIError") + or "google" in err_type.lower() + or "genai" in err_type.lower() + ): + if "503" in err_str or "unavailable" in err_low or "high demand" in err_low: + return {"error": "Gemini 서버가 일시적으로 혼잡합니다. 잠시 후 다시 시도해주세요.", "action": "retry"} + if "429" in err_str or "rate" in err_low or "quota" in err_low or "resource_exhausted" in err_low: + return {"error": "Gemini 요청 한도에 도달했습니다. 잠시 후 다시 시도해주세요.", "action": "rate_limit"} + if "401" in err_str or "403" in err_str or "unauthenticated" in err_low or "permission" in err_low: + return {"error": "Gemini API 키가 유효하지 않습니다. 설정에서 확인해주세요.", "action": "config"} + if "400" in err_str or "invalid" in err_low: + return {"error": f"Gemini 요청 오류: {err_str}", "action": ""} + return {"error": f"Gemini 연결 오류: {err_str}", "action": "retry"} + + # Ollama / 로컬 모델 + if "connection" in err_low and ("refused" in err_low or "11434" in err_low): + return {"error": "Ollama가 실행 중이지 않습니다. ollama serve로 시작해주세요.", "action": "config"} + + # 일반 네트워크/서버 에러 + if isinstance(e, (ConnectionError, TimeoutError)): + return { + "error": "AI 서버에 연결할 수 없습니다. 네트워크를 확인하거나 잠시 후 다시 시도해주세요.", + "action": "retry", + } + + return {"error": err_str, "action": ""} + + +def _enrich_with_guide(result: dict[str, str], error: Exception | None = None) -> dict[str, str]: + """에러에 guide 안내 데스크 메시지를 추가.""" + try: + from dartlab.guide import guide + + guideMsg = guide.handleError( + error or RuntimeError(result.get("error", "")), + feature="ai", + ) + result["guide"] = guideMsg + except ImportError: + if result.get("action") in ("config", "install", "login", "relogin"): + try: + from dartlab.guide.aiSetup import no_provider_message + + result["guide"] = no_provider_message() + except ImportError: + pass + return result + + +# ── Config 해석 ────────────────────────────────────────── + + +def _resolveAnalysisConfig( + provider: str | None, + role: str | None, + model: str | None, + api_key: str | None, + base_url: str | None, + **kwargs: Any, +) -> Any: + """Config 해석 — free provider chain, get_config, merge overrides.""" + from dartlab.ai import get_config + + if provider == "free": + from dartlab.ai.providers.fallback import buildFreeChain + + free_chain = buildFreeChain() + if free_chain: + provider = free_chain[0] + else: + provider = None + + config_ = get_config(role=role) + + # LLMConfig 필드만 통과 — deprecated 파라미터(use_tools 등)가 kwargs로 + # 흘러들어와도 LLMConfig.merge()에 전달되지 않도록 필터링 + _LLMCONFIG_FIELDS = frozenset(f.name for f in dataclasses.fields(config_)) + llm_kwargs = {k: v for k, v in kwargs.items() if k in _LLMCONFIG_FIELDS} + + overrides = { + k: v + for k, v in { + "provider": provider, + "model": model, + "api_key": api_key, + "base_url": base_url, + **llm_kwargs, + }.items() + if v is not None + } + if overrides: + config_ = config_.merge(overrides) + + return config_ + + +# ── 코드블록 감지 + 실행 ───────────────────────────────── + +_CODE_BLOCK_RE = re.compile(r"```python\s*\n(.*?)```", re.DOTALL) + + +def _extractCodeBlocks(text: str) -> list[str]: + """텍스트에서 ```python 코드블록을 추출.""" + return _CODE_BLOCK_RE.findall(text) + + +def _executeCodeBlock(code: str, stockCode: str | None = None) -> str: + """DartlabCodeExecutor로 코드를 실행하고 결과를 반환.""" + from dartlab.ai.tools.coding import DartlabCodeExecutor + + executor = DartlabCodeExecutor() + return executor.execute(code, stockCode=stockCode, timeout=60) + + +# ── Polars 유니코드 테이블 → GFM 마크다운 변환 ───────── + +_POLARS_TABLE_START = re.compile(r"^┌[─┬]+┐$", re.MULTILINE) +_POLARS_TABLE_END = re.compile(r"^└[─┴]+┘$", re.MULTILINE) + + +def _polarsTableToMarkdown(text: str) -> str: + """실행 결과 내 Polars 유니코드 테이블을 GFM 마크다운 테이블로 변환. + + Polars 출력 구조: + ┌──────┬──────┐ ← 상단 경계 + │ col1 ┆ col2 │ ← 헤더 행 + │ --- ┆ --- │ ← 타입 힌트 (생략) + │ str ┆ f64 │ ← 타입 행 (생략) + ╞══════╪══════╡ ← 헤더/데이터 구분선 + │ val1 ┆ val2 │ ← 데이터 행 + └──────┴──────┘ ← 하단 경계 + """ + if "┌" not in text: + return text + + lines = text.split("\n") + result: list[str] = [] + in_table = False + header_emitted = False + col_count = 0 + + for line in lines: + stripped = line.strip() + + # 테이블 시작 경계 + if stripped.startswith("┌") and stripped.endswith("┐"): + in_table = True + header_emitted = False + continue + + # 테이블 끝 경계 + if stripped.startswith("└") and stripped.endswith("┘"): + in_table = False + continue + + if not in_table: + result.append(line) + continue + + # 헤더/데이터 구분선 (╞══╪══╡) + if stripped.startswith("╞") or stripped.startswith("├"): + if not header_emitted and col_count > 0: + result.append("| " + " | ".join(["---"] * col_count) + " |") + header_emitted = True + continue + + # 데이터 행 (│ 또는 ┆ 구분) + if "│" in stripped or "┆" in stripped: + # 분리: │ 와 ┆ 모두 셀 구분자로 처리 + cells_raw = re.split(r"[│┆]", stripped) + cells = [c.strip() for c in cells_raw if c.strip() != ""] + + # Polars 타입/구분 행 건너뛰기 (--- 또는 str/f64/i64 등) + if all( + c in ("---", "str", "f64", "i64", "i32", "u32", "u64", "bool", "cat", "date", "datetime") for c in cells + ): + continue + + if cells: + # "…" 또는 "..." 전용 셀 제거 (Polars 컬럼 생략 표시) + clean = [c for c in cells if c not in ("…", "...")] + if not clean: + continue # 생략 행 전체 스킵 + # null → - + clean = [("-" if c == "null" else c) for c in clean] + col_count = max(col_count, len(clean)) + md_row = "| " + " | ".join(clean) + " |" + result.append(md_row) + + return "\n".join(result) + + +def _formatResultForUser(result: str) -> str: + """실행 결과를 사용자에게 보여줄 형식으로 변환. + + - Polars 유니코드 테이블 → 마크다운 테이블 (코드 블록 밖) + - 마크다운 파이프 테이블이 포함된 결과 → 코드 블록 밖 + - 에러/Traceback → 코드 블록 유지 + - 그 외 plain text → 코드 블록 + """ + # shape: (N, M) 메타 텍스트 제거 + result = re.sub(r"shape: \(\d+, \d+\)\s*\n?", "", result) + + # Polars 유니코드 테이블이 있으면 먼저 변환 (에러+테이블 혼합 대응) + if "┌" in result: + converted = _polarsTableToMarkdown(result) + return f"\n\n[실행 결과]\n\n{converted}\n\n" + + isError = "실행 오류" in result or "Traceback" in result + if isError: + return f"\n\n```\n[실행 결과]\n{result}\n```\n\n" + + # 마크다운 파이프 테이블이 포함 → 코드블록 밖 + lines = result.split("\n") + hasTable = any(l.strip().startswith("|") and l.strip().endswith("|") for l in lines) + if hasTable: + return f"\n\n[실행 결과]\n\n{result}\n\n" + + return f"\n\n```\n[실행 결과]\n{result}\n```\n\n" + + +_LOOP_SIMILARITY_THRESHOLD = 0.75 # R22-7: 0.85 → 0.75. 동일 코드 변형 반복 더 적극적 차단 +def _extractDataHint(result: str) -> str: + """코드 실행 결과에서 DataFrame/dict 구조 힌트를 추출. + + Polars 테이블 → 컬럼명 + shape + dict 출력 → 키 목록 + """ + hints: list[str] = [] + + # Polars 테이블 감지 → 컬럼명 + shape 추출 + if "┌" in result or ("│" in result and "┆" in result): + header_lines = [l for l in result.split("\n") if "│" in l and "┆" in l] + if header_lines: + cols = [c.strip() for c in header_lines[0].replace("│", "┆").split("┆") if c.strip()] + if cols: + hints.append(f"[DataFrame 컬럼: {', '.join(cols)}]") + + # shape 정보 추출 + import re as _re + + shape_match = _re.search(r"shape:\s*\((\d+),\s*(\d+)\)", result) + if shape_match: + hints.append(f"[shape: {shape_match.group(1)}행 × {shape_match.group(2)}열]") + + # dict keys 출력 감지 + keys_match = _re.search(r"dict_keys\(\[([^\]]+)\]\)", result) + if keys_match: + hints.append(f"[dict 키: {keys_match.group(1)}]") + + if not hints: + return "" + return "\n" + " ".join(hints) + " — 다음 코드에서 이 컬럼명/키를 정확히 사용하세요." + + +_MAX_RESULT_CHARS = 8000 # LLM 피드백용 결과 상한 (사용자 UI에는 전체 표시) + + +def _streamWithCodeExecution( + llm: Any, + messages: list[dict], + stockCode: str | None, + *, + maxRounds: int = 3, + mode: str = "analysis", +) -> Generator[str | AnalysisEvent, None, None]: + """LLM 스트리밍 + 코드블록 자동 감지/실행 루프. + + LLM이 ```python 블록을 생성하면 자동 실행하고 + 결과를 LLM에 피드백하여 해석을 이어간다. + + mode: + - "analysis": 결과 해석을 이어감 (기본) + - "coding": 실행 성공 확인만 하고, 완전한 코드 제공을 유도 + + Yields: + str: 텍스트 청크 (chunk 이벤트로 변환됨) + AnalysisEvent: code_round 이벤트 (진행 상태) + """ + prevCode: str | None = None + + for _round in range(maxRounds): + buffer = "" + for chunk in llm.stream(messages): + buffer += chunk + yield chunk + + # 코드블록 감지 + codeBlocks = _extractCodeBlocks(buffer) + if not codeBlocks: + return # 코드 없음 → 스트리밍 완료 + + # 마지막 코드블록 실행 + code = codeBlocks[-1] + + # 반복 루프 감지 — 이전 코드와 유사하면 조기 종료 + if prevCode is not None: + similarity = SequenceMatcher(None, prevCode, code).ratio() + if similarity >= _LOOP_SIMILARITY_THRESHOLD: + yield f"\n\n[반복 코드 감지 — 루프 종료 (유사도 {similarity:.0%})]\n\n" + return + prevCode = code + + # 진행 이벤트 — 실행 시작 + yield AnalysisEvent( + "code_round", + { + "round": _round + 1, + "maxRounds": maxRounds, + "status": "executing", + "code": code, + }, + ) + + try: + result = _executeCodeBlock(code, stockCode=stockCode) + except (OSError, RuntimeError, TimeoutError, ValueError) as exc: + result = f"실행 오류: {exc}" + + # VizSpec 마커 추출 → CHART 이벤트 emit + from dartlab.viz.extract import extract_viz_specs + + result, viz_specs = extract_viz_specs(result) + for vspec in viz_specs: + yield AnalysisEvent("chart", {"charts": [vspec]}) + + # 진행 이벤트 — 실행 완료 (코드 + 결과 포함) + formatted = _formatResultForUser(result) + yield AnalysisEvent( + "code_round", + { + "round": _round + 1, + "maxRounds": maxRounds, + "status": "done", + "code": code, + "result": formatted, + }, + ) + + # 실행 결과는 code_round 이벤트로만 전달 (본문 텍스트 중복 방지) + # 결과를 대화에 추가하여 LLM이 해석하도록 재요청 + messages.append({"role": "assistant", "content": buffer}) + + # LLM 피드백: 결과 크기 제한 (컨텍스트 화폐 절약) + isError = "실행 오류" in result or "Error" in result or "Traceback" in result + # 빈 결과 감지 — Polars (0, 0) shape, 빈 dict, None 단독 등은 "분석 실패 신호" + # exception 은 아니지만 도구 선택/대상 오류일 가능성이 높음 → error 와 동일 처리 + _resultStripped = result.strip() + isEmpty = not isError and ( + "shape: (0, 0)" in result + or "shape: (0," in result + or _resultStripped in ("", "{}", "None", "[]", "shape: (0, 0)\n┌┐\n╞╡\n└┘") + ) + if len(result) > _MAX_RESULT_CHARS and not isError: + feedback = ( + f"코드 실행 결과 (처음 {_MAX_RESULT_CHARS}자, 전체 {len(result)}자):\n\n" + f"```\n{result[:_MAX_RESULT_CHARS]}\n```\n\n" + "결과가 잘렸습니다. .head()/.filter()로 범위를 좁혀 필요한 부분만 재조회하세요." + ) + elif isError: + # 에러 유형별 구체적 복구 지침 (행동 지침 패턴) + _err_lower = result.lower() + _recovery_hints: list[str] = [] + if "unknown topic" in _err_lower or "invalid topic" in _err_lower: + _recovery_hints.append("→ `print(c.topics)` 로 사용 가능한 topic 목록을 확인하세요.") + if "keyerror" in _err_lower or "key error" in _err_lower: + _recovery_hints.append("→ 먼저 `print(result.keys())` 또는 `print(df.columns)` 로 실제 키를 확인하세요.") + if "timeout" in _err_lower or "timed out" in _err_lower: + _recovery_hints.append("→ c.review() 전체 호출은 83초. `c.review('수익성')` 단일 섹션을 사용하세요.") + if "no data" in _err_lower or "데이터가 없" in _err_lower or "not found" in _err_lower: + _recovery_hints.append("→ `c.index` 로 이 종목의 가용 데이터를 확인하세요.") + if "attributeerror" in _err_lower: + _recovery_hints.append("→ `print(type(c))` 로 객체 타입을 확인하세요. Company가 아닐 수 있습니다.") + if "nameerror" in _err_lower: + _recovery_hints.append("→ 변수가 이전 라운드에서 정의됐을 수 있습니다. 한 블록 안에서 변수 정의부터 다시 하세요.") + if "import" in _err_lower and "error" in _err_lower: + _recovery_hints.append("→ import 금지. dartlab, pl(polars)은 이미 준비되어 있습니다.") + _recovery_text = "\n".join(_recovery_hints) if _recovery_hints else "API를 모르겠으면 무인자 호출로 가이드를 확인하세요: print(c.analysis())" + feedback = ( + "코드 실행 결과:\n\n" + f"```\n{result}\n```\n\n" + "에러를 읽고 원인을 진단하세요. 같은 코드를 반복하지 마세요.\n" + f"{_recovery_text}" + ) + elif isEmpty: + feedback = ( + "코드 실행 결과가 비어 있습니다 (빈 DataFrame / 빈 dict / None):\n\n" + f"```\n{result}\n```\n\n" + "**이건 분석 실패 신호다.** 같은 도구를 다른 인자로 재시도하지 마세요.\n" + "원인 진단 체크리스트:\n" + "1. **질문이 메타 지식인가?** (예: 'X와 Y 비교', 'X 엔진은 뭐 하는가') " + "→ 도구 호출 자체가 잘못. 코드 없이 ops/ 지식으로 직접 답변하세요.\n" + "2. **Company `c` 가 바인딩됐는가?** `print(type(c), getattr(c, 'stockCode', None))` 로 확인.\n" + "3. **무인자 가이드 호출인가?** `c.analysis()` 가 (0,0)이면 Company 가 비정상. " + "정상이면 `axis|label|description` 컬럼이 나와야 함.\n" + "4. **존재하지 않는 topic/축인가?** `print(c.topics)` / `print(c.analysis())` 로 유효 키 확인.\n" + "빈 결과 2회 연속이면 **도구 선택을 바꾸거나 사용자에게 컨텍스트를 명시적으로 답변에 알리세요** " + "(되묻기 금지 — '이 질문은 도구 호출 없이 답변합니다' 식으로 선언)." + ) + elif mode == "coding": + feedback = ( + f"코드 실행 성공. 결과:\n```\n{result[:2000]}\n```\n\n" + "코드가 정상 동작합니다. 사용자에게 이 코드와 결과를 제공하세요.\n" + "`import dartlab` 포함 복사-붙여넣기 가능한 완전한 코드를 최종 답변에 포함하세요.\n" + "커스터마이즈 포인트(종목코드 변경, 조건 변경 등)도 안내하세요." + ) + else: + # 코드 실행 결과 AI 맥락 보강 (aiview 통합) + _data_hint = _extractDataHint(result) + feedback = ( + "코드 실행 결과:\n\n" + f"```\n{result}\n```\n\n" + "이 결과를 바탕으로 해석하세요. " + "**수치는 위 결과에서 정확히 인용하라. 기억으로 수치를 만들지 마라.** " + f"결과가 잘렸으면 .head()/.filter()로 범위를 좁혀 재실행하세요.{_data_hint}" + ) + messages.append({"role": "user", "content": feedback}) + + # maxRounds 도달 — 마지막 스트리밍으로 종합 + yield from llm.stream(messages) + + +# ── 대화 상태 빌드 (history만 유지) ───────────────────────── + + +def _buildHistoryMessages( + history: list | None, + history_messages: list[dict] | None, +) -> list[dict] | None: + """히스토리 messages 자동 빌드.""" + if history_messages is not None: + return history_messages + + if history is None: + return None + + from dartlab.ai.conversation.history import build_history_messages, compress_history + from dartlab.ai.types import history_from_dicts + + light_history = history_from_dicts(history) + compressed = compress_history(light_history) + return build_history_messages(compressed) + + +# ── 모드 감지 ──────────────────────────────────────────── + +_CODING_KEYWORDS = re.compile( + r"코드\s*(짜|만들|작성|생성|줘)|코딩|스크립트|함수\s*(만들|작성)|" + r"자동화\s*(코드|스크립트)|프로그램\s*(짜|만들)|" + r"(write|generate|create)\s*(code|script|function)|" + r"dartlab\s*(으로|로)\s*.*(코드|짜|만들)", + re.IGNORECASE, +) +_CODING_EXCLUDE = re.compile(r"종목코드|코드번호|코드(가|는|를)\s*뭐", re.IGNORECASE) + + +def _detectMode(question: str) -> str: + """'analysis' 또는 'coding' 반환.""" + if _CODING_EXCLUDE.search(question): + return "analysis" + if _CODING_KEYWORDS.search(question): + return "coding" + return "analysis" + + +# ── 코딩 모드 시스템 프롬프트 ───────────────────────────── + +_CODING_SYSTEM_PROMPT = """\ +dartlab 코드 생성 전문가. 사용자가 복사해서 바로 실행할 수 있는 Python 코드를 생성한다. + +## 핵심 원칙 +- **완전한 독립 실행 코드** 생성: `import dartlab`부터 결과 출력까지 +- 종목코드를 변수화: `stock_code = "005930"` (상단에 설정 가능하게) +- Polars 문법 (pandas 아님) +- 에러 핸들링 포함 (None 체크) +- 코드 내 한글 주석으로 각 단계 설명 + +## 실행 환경 +이미 준비됨 — **import 금지, 재선언 금지:** +- `dartlab`, `polars`(pl), `re` +{env_block} + +## 출력 형식 +1. 코드의 목적 1줄 설명 +2. ```python 블록 (완전한 독립 실행 코드) +3. 실행 결과를 확인한 후 코드가 정상 동작함을 알려준다 +4. 커스터마이즈 포인트 (변수 변경으로 다른 종목/조건 적용 방법) + +## dartlab 주요 API +```python +c = dartlab.Company("005930") # 기업 객체 +c.show("IS") # 손익계산서 +c.select("IS", ["매출액", "영업이익"]) # 행 필터 +c.analysis("financial", "수익성") # 분석 +c.gather("price") # 주가 +dartlab.scan("profitability") # 전종목 스캔 +dartlab.macro("사이클") # 매크로 +c.credit() # 신용등급 +c.quant() # 기술적 분석 +dartlab.search("유상증자") # 공시 검색 +``` + +## 금지 +- import dartlab, polars 재선언 (이미 있음) +- review()/reviewer() 사용 (분석에는 analysis 사용) +- c.sections 접근 (메모리 위험) +- scan DataFrame join (타임아웃) +""" + + +# ── 시스템 프롬프트 ─────────────────────────────────────── + +_SYSTEM_PROMPT = """\ +적극적 분석가. dartlab으로 한국/미국 상장기업을 분석한다. +```python 코드블록 = 자동 실행. 사용자는 코드를 보고 분석 방법을 배운다. + +## 실행 환경 +이미 준비됨 — **import 금지, 재선언 금지:** +- `dartlab`, `pl` (polars) +- `webSearch(query)`, `newsSearch(query, days=N)`, `formatResults(results)` +- `dartlab.search(query, corp="종목코드")` — 공시 원문 검색 → Polars DataFrame 반환 +- `emit_chart(spec)`, `emit_diagram(type, source)` — 차트/다이어그램 +{env_block} + +## Polars 문법 — pandas 금지 +dartlab은 Polars만 사용한다. **pandas 문법(df.groupby, df.iloc, df.loc, df.apply, df.iterrows) 절대 금지.** +```python +# ✓ Polars 정확한 패턴 +df.filter(pl.col("영업이익률") > 10) +df.select(["기간", "매출액"]) +df.group_by("업종").agg(pl.col("ROE").mean()) +df.with_columns((pl.col("매출액") / 1e8).alias("매출(억)")) +df.sort("기간", descending=True) +df.head(5) +df.columns # 컬럼명 목록 +df.schema # 컬럼명 + 타입 +df.shape # (행, 열) + +# ✗ pandas 금지 — 이렇게 쓰면 에러 +# df.groupby(), df.iloc[], df.loc[], df.apply(), df.iterrows() +# df.rename(columns={{}}), df.merge(), df.pivot_table() +``` +한글 컬럼명이 많다. 컬럼명을 모르면 `print(df.columns)` 먼저 확인. + +## 시각화 +테이블과 차트를 함께 제공하라. **실제 종목 데이터 (시계열, 비교, 분포) 만 차트화** — +가이드/메타/스키마 dataframe (axis/items/partId 컬럼) 은 print 만 하고 차트 금지. +```python +from dartlab.viz import revenue, cashflow, profitability_chart, dividend_chart, balance_sheet_chart +revenue(c) # 도메인 차트를 먼저 사용 — 1줄로 자동 생성 +``` +커스텀: emit_chart({{"chartType": "combo|bar|line|radar|waterfall|heatmap|pie|sparkline", "title": "...", "series": [...], "categories": [...]}}). + +## 엔진 self-discovery — 어떤 축이 있는지 모를 때 + +**모든 분석 엔진은 무인자 호출 시 가이드 DataFrame을 반환한다.** 첫 번째로 이걸 시도하라. + +```python +print(c.analysis()) # 분석 가이드 (수익성/성장성/안정성 등) +print(c.quant()) # 기술적 분석 가이드 (모멘텀/베타/팩터 등) +print(c.credit()) # 신용 가이드 (채무상환/유동성 등) +print(dartlab.macro()) # 11축 가이드 (사이클/금리/심리 등) +print(dartlab.scan()) # 20축 가이드 (전종목 횡단) +``` + +각 가이드 DataFrame은 `axis | label | description | example` 컬럼이 통일되어 있다. +사용자가 "어떤 분석이 있어?"라고 물으면 위 5개 중 적절한 가이드를 print하라. +가이드는 축마다 한 행이라 `group_by("axis").len()` 은 무조건 1 — items 컬럼을 직접 보라. +계산 결과 dict 에 `displayHints` 가 있으면 그 `core` 컬럼을 표에 우선 포함하라. + +## 도구 레퍼런스 — 시그니처 + 반환 + 제약 + +### c.analysis(group?, axis?) → dict +- 무인자 → 가이드 DataFrame (axis|label|description) +- 축 이름은 반드시 가이드에서 확인. 추측 호출 금지 +- **history[0]의 키를 미리 알고 코드를 작성하라** — print(r.keys()) 탐색 라운드 낭비 금지 + +**축별 반환 스키마** (dict 키 → 핵심 history 키): +| 축 | dict 키 | 핵심 history 키 | +|---|---------|---------------| +| 수익성 | marginTrend, returnTrend, roicTree, profitabilityFlags | period, revenue, operatingMargin, netMargin, grossMargin, roe, roa | +| 성장성 | growthTrend, cagrComparison, growthFlags | period, revenue, revenueYoy, operatingIncomeYoy, netIncomeYoy | +| 안정성 | leverageTrend, coverageTrend, distressScore, stabilityFlags | period, debtRatio, equityRatio, netDebtRatio, totalBorrowing | +| 현금흐름 | cashFlowOverview, cashQuality, cashFlowFlags | period, ocf, icf, capex, fcf, pattern | +| 비용구조 | costBreakdown, operatingLeverage, costStructureFlags | period, revenue, costOfSales, sga, costOfSalesRatio, sgaRatio | +| 효율성 | turnoverTrend, efficiencyFlags | period, totalAssetTurnover, dso, dio, dpo, ccc | +| 자산구조 | assetStructure, workingCapital, capexPattern | period, totalAssets, receivables, inventory, ppe, cash | + +사용법: `r = c.analysis("수익성")` → `r["marginTrend"]["history"]` → 위 키로 바로 접근 + +### c.credit(axis?, detail=False) → dict +- 무인자 → 가이드 DataFrame + 종합 등급 +- `c.credit("등급")` → dict: grade, healthScore(0-100), pdEstimate +- `c.credit("등급", detail=True)` → + narratives, divergenceExplanation 포함 + +### dartlab.scan(axis?, param?) → DataFrame +- 무인자 → 가이드 DataFrame (20축) +- `dartlab.scan("profitability")` → **컬럼: 종목코드, 종목명, 영업이익률, 순이익률, ROE, ROA, 등급** +- `dartlab.scan("ratio", "roe")` → **컬럼: 종목코드, 종목명, {연도별 값}** +- `dartlab.scan("cashflow")` → **컬럼: 종목코드, 종목명, OCF, ICF, FCF, pattern** +- 정렬: `df.sort("ROE", descending=True).head(10)` — 한글 컬럼명 정확히 사용 +- ⚠ scan DataFrame은 join 금지 (타임아웃) + +### dartlab.macro(axis?) → dict +- 무인자 → 가이드 DataFrame (11축) +- `dartlab.macro("사이클")` → dict: phase, signals 등 +- ⚠ Company 불필요. dartlab.macro()로 직접 호출 +- market="US"|"KR" 파라미터 지원 + +### c.gather(axis?) → DataFrame | None +- `c.gather("price")` → DataFrame (OHLCV) 또는 None +- 축: price, flow, news, macro +- ⚠ 반드시 None 체크. 데이터 없으면 None 반환 + +### c.quant(axis?) → dict +- 무인자 → 가이드 DataFrame +- `c.quant("종합")` → dict: verdict(강세/중립/약세), score, rsi, adx 등 + +### c.show(topic) / c.select(statement, rows) → DataFrame +- `c.show("IS")` → 손익계산서. **컬럼: snakeId, 항목, 2025Q4, 2025Q3, ..., 2016Q1** (분기) +- `c.show("IS", freq="Y")` → 연간 합산. **컬럼: snakeId, 항목, 2025, 2024, ..., 2016** +- `c.select("IS", ["매출액"])` → 필터된 DataFrame (같은 컬럼 구조) +- `c.show("inventory")` → 주석 상세 (12항목: inventory, borrowings, tangibleAsset 등) +- 행 필터는 **"항목" 컬럼 기준** (한글): `df.filter(pl.col("항목") == "매출액")` +- 기간 컬럼은 **문자열** ("2025Q4", "2024" 등). 값은 float (원 단위). +- ⚠ c.sections 금지 (409MB, 19초). c.show(topic)으로 개별 조회 + +### c.review(section?) → Review +- `c.review("수익성")` → 단일 섹션 (~5초) +- ⚠ c.review() 전체는 83초 → AI 코드 실행(60초)에서 타임아웃. 반드시 섹션 지정 +- 사용자가 "보고서"를 명시 요청한 경우만 사용. 분석 질문에는 analysis + +### dartlab.search(query, corp?) → DataFrame +- `dartlab.search("유상증자")` → 전 상장사 공시 검색 +- `dartlab.search("대표이사 변경", corp="005930")` → 종목 필터 + +### 질문 유형별 도구 선택 +| 질문 유형 | 도구 | +|----------|------| +| 기업 분석/수익성/부채 | analysis | +| 신용등급/건전도 | credit | +| 시장비교/순위 | scan | +| 경제사이클/금리 | macro (Company 불필요) | +| 주가/수급/뉴스 | gather | +| 기술적분석/매매신호 | quant | +| 특정 계정 조회 | show/select | +| "보고서 뽑아줘" 명시 | review (섹션 지정 필수) | +| 공시 원문 검색 | search | +| 실시간 뉴스 | newsSearch() | +| 도구·기능 메타 지식 | 코드 금지. ops/ 지식으로 직접 답변 | +| 모르겠으면 | self-discovery: print(c.analysis()) 등 무인자 | + +### 핵심 원칙 +- **[최우선] 메타 지식 질문에는 코드 절대 금지.** "X vs Y 차이/비교", "X 엔진은 뭐 하는가", "어느 걸 먼저 써야 해", "왜 Company 없이 호출해" 같은 **도구·엔진·기능 자체에 대한 질문**은 ops/ 지식으로 직접 답한다. `c.analysis()`, `c.credit()`, `c.review()`, `dartlab.macro()`, `dartlab.capabilities()` **모두 호출 금지**. 특정 회사(삼성전자 등) 데이터 인용 금지. 답변은 마크다운 표 + 한 줄 요약. 회사 분석을 끌어오면 **틀린 답이다**. +- **analysis가 기본 도구.** dict 반환 → 핵심 수치를 마크다운 테이블로 정리. print(dict) 금지. +- **무인자 호출은 가이드 반환.** `c.quant()`, `c.credit()` 무인자는 dict가 아닌 가이드 DataFrame이다. 분석 결과를 원하면 `c.quant("종합")`, `c.credit("등급")`을 사용. +- **review() 사용 금지** — 사용자가 "보고서"를 명시적으로 요청한 경우만 예외. 분석 질문에는 반드시 analysis를 써라. +- **scan은 횡단 비교용.** `print(df.head(3))`으로 컬럼 확인 후 사용. join 금지(타임아웃). +- **gather는 None 가능** — 반드시 None 체크. 축: price/flow/news/peers/sector/insider/ownership. +- **macro는 독립 엔진** — `dartlab.macro("사이클"|"금리"|"자산"|"심리"|"유동성"|"종합")`. Company 불필요. market="US"|"KR". 반환 dict → `print(result.keys())`로 키 확인 후 사용. +- **search는 corp 없이도 전체 검색 가능** — `dartlab.search("대표이사 변경")` → 전 상장사 공시 검색. +- **c.sections 접근 금지** (409MB). show(topic)으로 개별 조회. +- **구조 모르면** print(result.keys()) 또는 print(엔진()) self-discovery. + +### 종합 분석 ("분석해줘", "어때?") +analysis 3축(수익성+성장성+안정성) 1라운드 수집 → **6막 인과 서사**로 해석: +사업이해 → 수익성 → 현금전환 → 안정성 → 자본배분 → 전망. +**앞 막이 뒷 막의 원인.** "DX 비중 확대 → 마진 회복 → FCF 확보 → 배당 여력" 같은 인과 연결. +추가 필요 시 2라운드에서 현금흐름/효율성/자본배분 추가. + +### analysis 테이블 출력 패턴 +```python +r = c.analysis("financial", "수익성") +# profitabilityFlags 경고 있으면 먼저 반영 +print("| 기간 | 매출(억) | 영업이익률 | 순이익률 |") +print("| --- | --- | --- | --- |") +for h in r["marginTrend"]["history"][:5]: + print(f'| {{h["period"]}} | {{h["revenue"]/1e8:,.0f}} | {{h["operatingMargin"]:.1f}}% | {{h["netMargin"]:.1f}}% |') +``` +큰 숫자 억 단위(/1e8). null→"-". 한영 양방향 축명 지원. +analysis("valuation", "가치평가"), analysis("forecast", "매출전망") 등 그룹/축 패턴 동일. +주석 enrichment: 자산구조에 notesDetail, 비용구조에 costByNature 포함됨. + +### credit — 신용등급 +```python +cr = c.credit(detail=True) +print(f"등급: {{cr['grade']}}, 건전도: {{cr['healthScore']}}/100") +# score는 위험도(0=최우량, 100=최위험). healthScore(100-score)가 직관적. +# narratives는 baseline — 산업 맥락 + 인과 체인으로 네가 더 깊이 해석하라. +``` + +### scan — 시장 횡단 +```python +df = dartlab.scan("profitability") # 전종목 수익성 +df = dartlab.scan("account", "매출액") # 전종목 매출 시계열 +df = dartlab.scan("ratio", "roe") # 전종목 ROE 시계열 +# 컬럼 대부분 한글. 불확실하면 print(df.head(3)). scan join 금지. +``` + +### show/select + notes +```python +c.show("IS") # 재무제표 +c.select("IS", ["매출액"]).chart() # 필터 + 차트 +c.notes.inventory # 주석 상세 (12항목: c.notes.keys()) +# analysis에 주석이 이미 포함됨 — notes는 추가 상세 필요할 때만. +``` + +### quant — 기술적 분석 +```python +c.quant() # 종합 판단 (강세/중립/약세) +c.quant("divergence") # 재무-기술적 괴리 진단 +# 투자 판단: analysis(재무) + quant(기술적) 교차 검증 +``` + +## 해석 원칙 +- 숫자 나열 금지. **원인과 맥락**을 붙여라. 마진 변동 → 매출/비용/믹스 분해. +- **수치 인용은 코드 실행 결과에서만.** 실행 결과에 "13.1%"가 있으면 "13.1%"로 인용. 기억이나 추측으로 수치를 만들지 마라. +- **추세** 3~5년, **교차 검증** IS-CF-BS 일관성, **비교**는 동종업계 상대 위치(scan). +- profitabilityFlags 경고 있으면 반드시 반영. +- marginTrend에 ROE 없음 → returnTrend 사용. ROIC → analysis("financial", "투자효율"). + +## 답변 구조 +**`` 태그에 분석 데이터가 이미 있으면 코드 실행 없이 바로 해석하라.** 같은 데이터를 코드로 다시 뽑지 마라. +`` 태그가 없거나 부족할 때만 코드를 실행하라. 코드 전에 추측/일반론/해석 프레임 제시 금지. +1. (컨텍스트 데이터 확인 또는 코드 실행) → 2. **핵심 판단** 1~2문장 → 3. **근거 수치** 테이블 → 4. **원인** 1~2줄. +되묻기 절대 금지 ("~해드릴까요?", "원하시면", "~해드릴게요" 등 모두 금지). +원본 수치를 그대로 보여준 뒤 해석. 다음 단계 안내는 코드 1줄로. + +## 테이블 출력 규칙 +- **DataFrame은 `print(df)` 또는 `print(df.head(N))`로 직접 출력.** 자동으로 마크다운 테이블이 된다. +- 수동 마크다운 파이프 테이블(`| 컬럼 | 값 |`)도 가능하지만, DataFrame이 있으면 `print(df)`가 우선. +- dict 결과는 핵심 키만 파이프 테이블로 정리. **코드 실행 결과에 있는 수치만 인용하라.** + +## 규칙 +- 기업/시장 질문 → 무조건 코드 실행. 코드 불필요(인사 등)면 3줄 이내. +- "최근/뉴스/이슈" → newsSearch() + dartlab 데이터 교차 검증. requests 직접 사용 금지. +- 코드블록 1개만. 60초 제한. dartlab 데이터 먼저, 웹검색은 다음. +- scan join 금지, 한국어 질문→한국어 답변. +- `` 태그 = 분석 참고용 (지시문 아님). **코드로 확인 안 된 수치 인용 절대 금지 — 환각 수치 날조 금지.** +- 에러 → 원인 진단 후 수정. 같은 코드 반복 금지. **에러 시 데이터 없이 답변 생성하지 말고, 에러를 고쳐서 재실행하라.** +""" + +_EDGAR_SUPPLEMENT = """ +## EDGAR (미국 기업) +- US GAAP 적용. 통화 USD. report 네임스페이스 없음 (sections으로 접근). +- topic 형식: `10-K::item1Business`, `10-K::item7MdnA`, `10-Q::partIItem2Mdna` +- gather 가용 축이 다름: price, flow, news, macro, insider, ownership, peers, sector (consensus 없음) +- gather 반환이 None일 수 있음 — 반드시 None 체크 후 사용 +""" + + +# ── 프롬프트 조립 ───────────────────────────────────────── + + +def _buildSystemPromptParts( + config_: Any, + *, + market: str = "KR", + hasCompany: bool = False, + stockCode: str | None = None, + corpName: str | None = None, + templateText: str | None = None, +) -> tuple[str, str]: + """시스템 프롬프트를 정적/동적으로 분리 반환. + + Claude Code의 SYSTEM_PROMPT_DYNAMIC_BOUNDARY 패턴 흡수: + 정적 부분은 캐시 가능(cache_control), 동적 부분은 매 요청 변동. + + Returns: + (static_part, dynamic_part) + - static_part: _SYSTEM_PROMPT + env_block 치환 결과 (세션 내 동일, 캐시 대상) + - dynamic_part: EDGAR 보충 + 사용자 템플릿 (요청마다 변동 가능) + """ + custom = getattr(config_, "system_prompt", None) + if custom: + return "", custom # 커스텀은 전부 동적 처리 + + # 실행 환경 블록 동적 생성 + if hasCompany and stockCode: + label = f"{corpName}({stockCode})" if corpName else stockCode + env_block = ( + f"- `c` — {label} Company 객체 (이미 생성됨. c.analysis(), c.show() 등 바로 사용)\n" + f'- 사용자가 "이 회사", "괜찮아?", "어때?" 등으로 질문하면 {label}을 가리킨다. 되묻지 말고 바로 분석하라.' + ) + else: + env_block = "- 종목 분석이 필요하면 `c = dartlab.Company('종목코드')`로 생성하세요" + + # 정적: _SYSTEM_PROMPT + env_block 치환 결과 (~694줄, 세션 내 동일) + static_part = _SYSTEM_PROMPT.replace("{env_block}", env_block) + + # 동적: EDGAR 보충 + 사용자 템플릿 (요청마다 변동 가능) + dynamic_parts: list[str] = [] + if market == "US": + dynamic_parts.append(_EDGAR_SUPPLEMENT) + if templateText: + dynamic_parts.append(f"\n## 사용자 분석 템플릿 (이 지시를 반드시 따르라)\n\n{templateText}") + + return static_part, "\n".join(dynamic_parts) + + +def _buildSystemPrompt( + config_: Any, + *, + market: str = "KR", + hasCompany: bool = False, + stockCode: str | None = None, + corpName: str | None = None, + templateText: str | None = None, +) -> str: + """시스템 프롬프트 조립 — 하위 호환 래퍼.""" + static, dynamic = _buildSystemPromptParts( + config_, + market=market, + hasCompany=hasCompany, + stockCode=stockCode, + corpName=corpName, + templateText=templateText, + ) + return static + dynamic + + +# ── 통합 오케스트레이터 ────────────────────────────────── + + +def analyze( + company: Any | None, + question: str, + *, + # LLM 설정 + provider: str | None = None, + role: str | None = None, + model: str | None = None, + api_key: str | None = None, + base_url: str | None = None, + include: list[str] | None = None, + exclude: list[str] | None = None, + # 멀티컴퍼니 비교 지원 + companies: list[Any] | None = None, + # 활성 파라미터 + max_turns: int = 5, + reflect: bool = False, + report_mode: bool = False, + history: list | None = None, + history_messages: list[dict] | None = None, + conversation_meta: dict | None = None, + emit_system_prompt: bool = True, + # 하위호환 deprecated 파라미터 (내부적으로 무시) — kwargs 로 흡수됨 + # 단축 경로 + not_found_msg: str | None = None, + # 템플릿 + _templateName: str | None = None, + _templateText: str | None = None, + # 추가 LLMConfig overrides + **kwargs: Any, +) -> Generator[AnalysisEvent, None, None]: + """AI 분석 이벤트 스트림 생산. + + 3단계 구조: + 1. Config 해석 + Meta 이벤트 + 2. CAPABILITIES 검색 → 시스템 프롬프트 조립 + 3. LLM 스트리밍 + 코드블록 자동 실행 → chunk 이벤트 + + 로그: ``dartlab.askLog = True``로 설정하면 data/ask_logs/에 세션별 JSONL 저장. + """ + # ── ask 로그 초기화 ── + _logFile = None + try: + from dartlab import config as _cfg + + if getattr(_cfg, "askLog", False): + import datetime + import json + from pathlib import Path + + logDir = Path(_cfg.dataDir) / "ask_logs" + logDir.mkdir(parents=True, exist_ok=True) + ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + _stock = getattr(company, "stockCode", getattr(company, "ticker", "unknown")) if company else "none" + _logPath = logDir / f"{ts}_{_stock}.jsonl" + _logFile = open(_logPath, "w", encoding="utf-8") # noqa: SIM115 + # 첫 줄: 질문 + _logFile.write(json.dumps({"kind": "question", "data": {"question": question}}, ensure_ascii=False) + "\n") + except (ImportError, OSError): + _logFile = None + + def _emit(event: AnalysisEvent) -> AnalysisEvent: + if _logFile is not None: + import json + + try: + _logFile.write( + json.dumps({"kind": event.kind, "data": event.data}, ensure_ascii=False, default=str) + "\n" + ) + _logFile.flush() + except (OSError, TypeError): + pass + return event + + try: + # ── not_found 단축 경로 ── + if not_found_msg: + meta = conversation_meta or {} + corp_name = getattr(company, "corpName", None) if company else None + stock_id = getattr(company, "stockCode", getattr(company, "ticker", "")) if company else None + if corp_name: + meta.setdefault("company", corp_name) + if stock_id: + meta.setdefault("stockCode", stock_id) + yield _emit(AnalysisEvent("meta", meta)) + yield _emit(AnalysisEvent("chunk", {"text": not_found_msg})) + yield _emit(AnalysisEvent("done", {})) + return + + full_response_parts: list[str] = [] + done_payload: dict[str, Any] = {} + + try: + for ev in _analyze_inner( + company, + question, + provider=provider, + role=role, + model=model, + api_key=api_key, + base_url=base_url, + history=history, + history_messages=history_messages, + conversation_meta=conversation_meta, + emit_system_prompt=emit_system_prompt, + _full_response_parts=full_response_parts, + _templateName=_templateName, + _templateText=_templateText, + **kwargs, + ): + yield _emit(ev) + except Exception as e: # noqa: BLE001 — top-level error boundary for the entire AI pipeline (LLM network/auth/parse/provider errors are unpredictable) + yield _emit(AnalysisEvent("error", _enrich_with_guide(_classify_error(e), error=e))) + + # ── 후처리: plugin hints ── + if question: + from dartlab.ai.runtime.plugin_hints import ( + detect_plugin_hints, + format_plugin_hints, + ) + from dartlab.core.plugins import get_loaded_plugins + + loaded_names = [p.name for p in get_loaded_plugins()] + hints = detect_plugin_hints(question, loaded_names) + if hints: + done_payload["pluginHints"] = hints + hint_text = format_plugin_hints(hints) + if hint_text: + done_payload["pluginHintsText"] = hint_text + + # ── Done 이벤트 ── + yield _emit(AnalysisEvent("done", done_payload)) + finally: + if _logFile is not None: + _logFile.close() + + +def _analyze_inner( + company: Any | None, + question: str, + *, + provider: str | None, + role: str | None, + model: str | None, + api_key: str | None, + base_url: str | None, + history: list | None, + history_messages: list[dict] | None, + conversation_meta: dict | None, + emit_system_prompt: bool, + _full_response_parts: list[str], + _templateName: str | None = None, + _templateText: str | None = None, + **kwargs: Any, +) -> Generator[AnalysisEvent, None, None]: + """analyze() 본체 — 3단계 순수 스트리밍.""" + + # ── 0. 모드 감지 ── + mode = _detectMode(question) + + # ── 1. Config 해석 + Meta 이벤트 ── + config_ = _resolveAnalysisConfig(provider, role, model, api_key, base_url, **kwargs) + + corp_name = getattr(company, "corpName", "Unknown") if company else None + stock_id = getattr(company, "stockCode", getattr(company, "ticker", "")) if company else None + + meta = conversation_meta or {} + if corp_name: + meta.setdefault("company", corp_name) + if stock_id: + meta.setdefault("stockCode", stock_id) + if company is not None: + _dataDate = _extract_data_date(company) + if _dataDate: + meta.setdefault("dataDate", _dataDate) + yield AnalysisEvent("meta", meta) + + # ── P1-1: ground 데이터 백그라운드 fire ── + # 3개 호출 (disclosure / 외부검색 / KnowledgeDB 인사이트) 을 병렬 thread 로 시작. + # 동기 작업 (provider/prompt/few-shot/route 등) 과 오버랩 → 첫 토큰 지연 단축. + # DARTLAB_AI_PREGROUND_SYNC=1 로 동기 모드 fallback. + _sync_mode = os.environ.get("DARTLAB_AI_PREGROUND_SYNC") == "1" + _ground_executor: concurrent.futures.ThreadPoolExecutor | None = None + _f_disclosure: concurrent.futures.Future | None = None + _f_search: concurrent.futures.Future | None = None + _f_insight: concurrent.futures.Future | None = None + + if not _sync_mode and stock_id: + _ground_executor = concurrent.futures.ThreadPoolExecutor(max_workers=3, thread_name_prefix="dl-ground") + _f_disclosure = _ground_executor.submit(_preGroundDisclosure, stockCode=stock_id) + if _needsExternalSearch(question): + _f_search = _ground_executor.submit(_preGroundSearch, question, stockCode=stock_id, corpName=corp_name) + _f_insight = _ground_executor.submit(_gatherInsightHints, stock_id, company) + + # ── 2. LLM provider 생성 (캐시 경계 판단에 필요) ── + from dartlab.ai.providers import create_provider + + llm = create_provider(config_) + + # ── 3. 시스템 프롬프트 조립 (캐시 경계 적용) ── + company_market = getattr(company, "market", "KR") if company else "KR" + # 템플릿 텍스트: 직접 전달된 _templateText 우선, 없으면 _templateName으로 로드 + if _templateText is None and _templateName: + from dartlab.ai.patterns import get_template + + _templateText = get_template(_templateName) + + if mode == "coding": + # 코딩 모드 전용 프롬프트 + if company is not None and stock_id: + label = f"{corp_name}({stock_id})" if corp_name else stock_id + coding_env = ( + f"- `c` — {label} Company 객체 (이미 생성됨. c.analysis(), c.show() 등 바로 사용)\n" + f'- 사용자가 "이 회사" 등으로 질문하면 {label}을 가리킨다.' + ) + else: + coding_env = "- 종목 분석이 필요하면 `c = dartlab.Company('종목코드')`로 생성하세요" + static_prompt = _CODING_SYSTEM_PROMPT.replace("{env_block}", coding_env) + dynamic_prompt = "" + else: + static_prompt, dynamic_prompt = _buildSystemPromptParts( + config_, + market=company_market, + hasCompany=company is not None, + stockCode=stock_id, + corpName=corp_name, + templateText=_templateText, + ) + + # 캐시 경계: 정적 부분에 cache_control 마커 삽입 (Claude 네이티브만) + if llm.supports_cache_control and static_prompt: + system_content: str | list[dict] = [ + {"type": "text", "text": static_prompt, "cache_control": {"type": "ephemeral"}}, + ] + if dynamic_prompt: + system_content.append({"type": "text", "text": dynamic_prompt}) + else: + system_content = static_prompt + dynamic_prompt + + system_prompt = static_prompt + dynamic_prompt # emit/로깅용 플랫 문자열 + + # company=None이면 종목명 사전 검색 + prefetchText = "" + if company is None: + prefetchText = _searchCompanyCodes(question) + + if emit_system_prompt: + yield AnalysisEvent("system_prompt", {"text": system_prompt}) + + # ── Messages 조립 ── + messages: list[dict] = [{"role": "system", "content": system_content}] + + # 히스토리 주입 + effective_history = _buildHistoryMessages(history, history_messages) + if effective_history: + messages.extend(effective_history) + + # 메모리 (세션 간) — 질문 이력만 참조, 수치/요약은 제외 (코드 실행 유도) + memoryHints = "" + if stock_id: + try: + from dartlab.ai.memory.store import getMemory + + records = getMemory().recallForStock(stock_id, limit=3) + if records: + import datetime + + hints = [] + for r in records: + dt = datetime.datetime.fromtimestamp(r.timestamp).strftime("%Y-%m-%d") + hints.append(f"- {dt}: {r.question} ({r.questionType})") + memoryHints = "## 이전 질문 이력\n" + "\n".join(hints) + except (ImportError, OSError, sqlite3.Error): + pass + + # user 메시지 조립 + userParts: list[str] = [] + if corp_name and stock_id: + userParts.append(f"분석 대상: {corp_name} (종목코드: {stock_id})") + if prefetchText: + userParts.append(prefetchText) + + # ── P1-1: 백그라운드 ground future 회수 (deadline 기반) ── + # 동기 fallback 모드면 직접 호출, 아니면 future.result(timeout=remaining). + _ground_timeout = float(os.environ.get("DARTLAB_PREGROUND_TIMEOUT", "1.5")) + + def _safe_future_result(fut: concurrent.futures.Future | None, deadline: float) -> Any: + if fut is None: + return None + remaining = deadline - time.monotonic() + if remaining <= 0: + return None + try: + return fut.result(timeout=remaining) + except (concurrent.futures.TimeoutError, Exception): # noqa: BLE001 + return None + + if _sync_mode: + # 동기 fallback (회귀 시 escape hatch) + disclosureBrief = _preGroundDisclosure(stockCode=stock_id) if stock_id else "" + if _needsExternalSearch(question) and stock_id: + groundingText = _preGroundSearch(question, stockCode=stock_id, corpName=corp_name) + else: + groundingText = "" + insightHints = _gatherInsightHints(stock_id, company) if stock_id else "" + else: + _deadline = time.monotonic() + _ground_timeout + disclosureBrief = _safe_future_result(_f_disclosure, _deadline) or "" + groundingText = _safe_future_result(_f_search, _deadline) or "" + insightHints = _safe_future_result(_f_insight, _deadline) or "" + # executor 정리 — wait=False 로 timeout 된 future 는 백그라운드에서 계속 진행 + if _ground_executor is not None: + _ground_executor.shutdown(wait=False) + + # ── ContextBuilder (기본 경로) ── + # ACE (arxiv.org/abs/2510.04618) + analysis calc selector + graph traversal. + # A/B 검증 완료: +31.6% 응답 풍부도, 10/10 성공, 에러 0. + # DARTLAB_CONTEXT_V1=1 로 legacy 강제 가능 (디버깅용). + _use_legacy = os.environ.get("DARTLAB_CONTEXT_V1") == "1" + if not _use_legacy: + try: + from dartlab.ai.context import ContextBuilder + + _bundle = ContextBuilder( + question=question, + company=company, + provider=getattr(config_, "provider", None), + ).build() + for _text in _bundle.toUserParts(): + if _text and _text not in userParts: + userParts.append(_text) + log.debug( + "context: intent=%s parts=%d tokens=%d dropped=%s", + _bundle.intent, + len(_bundle.parts), + _bundle.totalTokens, + _bundle.droppedKeys, + ) + except Exception: # noqa: BLE001 — ContextBuilder 실패 시 legacy fallback + log.exception("ContextBuilder failed, falling back to legacy") + _use_legacy = True + + if _use_legacy: + if disclosureBrief: + userParts.append(disclosureBrief) + if groundingText: + userParts.append(groundingText) + if memoryHints: + userParts.append(memoryHints) + if insightHints: + userParts.append(insightHints) + userParts.append(f"질문: {question}") + userContent = "\n\n---\n\n".join(userParts) + messages.append({"role": "user", "content": userContent}) + + # ── 4. LLM 스트리밍 + 코드블록 자동 실행 ── + for item in _streamWithCodeExecution(llm, messages, stockCode=stock_id, mode=mode): + if isinstance(item, AnalysisEvent): + yield item + else: + _full_response_parts.append(item) + yield AnalysisEvent("chunk", {"text": item}) + + # ── 분석 메모리 저장 + 인사이트 갱신 ── + # R21-4: stock_id 없는 general 질문도 executions 에 저장 (general 추적용) + if _full_response_parts: + try: + from dartlab.ai.memory.store import getMemory + from dartlab.ai.memory.summarizer import extractGrade, summarizeResponse + + _fullText = "".join(_full_response_parts) + _mem = getMemory() + _mem.saveAnalysis( + stockCode=stock_id or "", # general 질문은 빈 문자열 + question=question[:200], + questionType=mode, + resultSummary=summarizeResponse(_fullText), + grade=extractGrade(_fullText), + ) + except (ImportError, OSError, sqlite3.Error): + pass + + # 자기성장: 분석 모드 + stock_id 있을 때만 인사이트 갱신 (sector 학습용) + # R21-5: silent fail 방지 — broad except (자기성장은 best-effort) + if stock_id and mode == "analysis" and len("".join(_full_response_parts)) > 500: + try: + _updateInsightFromResponse(stock_id, "".join(_full_response_parts), company) + except (ImportError, OSError, sqlite3.Error, AttributeError, ValueError): + pass + + # ── ACE Curator (기본 활성) ── + # 응답 + grade → playbook bullet 추출 → KnowledgeDB delta merge. + # Generator/Reflector/Curator 폐쇄 루프의 마지막 단계. + # arxiv.org/abs/2510.04618 + if mode == "analysis" and _full_response_parts: + try: + from dartlab.ai.context.intent import classifyIntent + from dartlab.ai.context.playbook import curate + from dartlab.ai.memory.summarizer import extractGrade + + _full = "".join(_full_response_parts) + _intent = classifyIntent(question, hasCompany=company is not None).intent.value + _sector = "" + if company is not None: + _sector = getattr(company, "sector", None) or getattr(company, "sectorName", None) or "" + _grade = extractGrade(_full) + curate( + intent=_intent, + response_text=_full, + grade=_grade, + sector=str(_sector), + source="reflection", + ) + except (ImportError, OSError, sqlite3.Error, AttributeError, ValueError): + pass + + +# ── 자기성장 인사이트 갱신 ──────────────────────────────── + +# R21-5: regex 광범위화 — audit 응답 패턴이 "강점:", "약점:" 명시 적음. +# - 명시 키워드 (강점/약점/리스크 등) + 부드러운 표현 (개선/회복/우수/탄탄/감소/취약 등) 추가 +# - 키워드 뒤에 콜론 없어도 그 줄 또는 그 다음 절을 매치 +_STRENGTH_RE = re.compile( + r"(?:강점|장점|긍정|양호|우수|탄탄|회복|개선|성장|확대|증가|상승|반등)[:\s은는이가\.]+([^\n]{5,120})", +) +_WEAKNESS_RE = re.compile( + r"(?:약점|리스크|위험|부정|주의|훼손|악화|하락|감소|취약|부진|침체|압박|우려)[:\s은는이가\.]+([^\n]{5,120})", +) +_NARRATIVE_RE = re.compile(r"(?:결론|종합|요약|핵심|핵심 판단)[:\s]*(.+?)(?:\n\n|\Z)", re.DOTALL) + + +def _updateInsightFromResponse( + stock_code: str, + response_text: str, + company: Any | None, +) -> None: + """AI 분석 응답에서 인사이트를 추출하여 KnowledgeDB에 갱신. + + 규칙 기반 regex 추출 — LLM 호출 없이 결정론적. + """ + from dartlab.ai.persistence import KnowledgeDB + + # 강점/약점 추출 + strengths = _STRENGTH_RE.findall(response_text) + weaknesses = _WEAKNESS_RE.findall(response_text) + + # 서사 추출 + narrative = "" + match = _NARRATIVE_RE.search(response_text) + if match: + narrative = match.group(1).strip()[:500] + + # 서사가 없으면 응답 첫 200자를 서사로 + if not narrative: + clean = re.sub(r"```[\s\S]*?```", "", response_text) # 코드블록 제거 + clean = re.sub(r"\|.*\|", "", clean) # 테이블 제거 + clean = clean.strip() + if clean: + narrative = clean[:200] + + if not narrative: + return + + sector = "" + if company is not None: + sector = getattr(company, "sector", None) or getattr(company, "sectorName", None) or "" + + db = KnowledgeDB.get() + db.save_insight( + stock_code=stock_code, + narrative=narrative, + strengths=strengths[:5], + weaknesses=weaknesses[:5], + sector=str(sector), + source="live", + ) diff --git a/src/dartlab/ai/runtime/events.py b/src/dartlab/ai/runtime/events.py new file mode 100644 index 0000000000000000000000000000000000000000..b75d4d5b4dfd1a19a5cb03da6db2aebcd5f6bd3e --- /dev/null +++ b/src/dartlab/ai/runtime/events.py @@ -0,0 +1,62 @@ +"""AI 분석 이벤트 타입. + +core.analyze()가 생산하는 이벤트 스트림의 단위. +소비자(코드/서버/CLI)가 이벤트를 받아서 형식을 결정한다. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + + +class EventKind: + """이벤트 종류 상수. + + 기존 분석 이벤트 + UI 제어 이벤트. + 미등록 이벤트는 소비자가 무시 → 프로토콜 확장에 안전. + """ + + # ── 기존 분석 이벤트 ── + META = "meta" + SNAPSHOT = "snapshot" + CONTEXT = "context" + CHUNK = "chunk" + TOOL_CALL = "tool_call" + TOOL_RESULT = "tool_result" + CHART = "chart" + DONE = "done" + ERROR = "error" + SYSTEM_PROMPT = "system_prompt" + VALIDATION = "validation" + CORRECTION = "correction" + + # ── 코드 실행 이벤트 ── + CODE_ROUND = "code_round" + + # ── UI 제어 이벤트 ── + UI_ACTION = "ui_action" + + +@dataclass +class AnalysisEvent: + """분석 이벤트 단위. + + kind: + - "meta": 회사/모듈/연도 범위 정보 + - "snapshot": 핵심 수치 스냅샷 + - "context": 모듈별 데이터 (module, label, text) + - "tool_call": 에이전트 도구 호출 (name, arguments) + - "tool_result": 도구 실행 결과 (name, result) + - "chunk": LLM 응답 텍스트 청크 (text) + - "chart": 차트 스펙 (charts[]) + - "done": 완료 (response_meta 포함 가능) + - "error": 에러 (error, action) + - "system_prompt": 시스템 프롬프트 (text, userContent) + - "code_round": 코드 실행 라운드 진행 (round, maxRounds, status) + - "validation": 숫자 검증 결과 (mismatches[]) + - "ui_action": canonical UI action payload + """ + + kind: str + data: dict[str, Any] = field(default_factory=dict) diff --git a/src/dartlab/ai/runtime/plugin_hints.py b/src/dartlab/ai/runtime/plugin_hints.py new file mode 100644 index 0000000000000000000000000000000000000000..71cc75715ea81c95a6b202b3b74907c895843741 --- /dev/null +++ b/src/dartlab/ai/runtime/plugin_hints.py @@ -0,0 +1,157 @@ +"""플러그인 추천 시스템 — 사용자 질문에서 유용한 플러그인을 감지. + +사용자의 분석 패턴을 보고 아직 설치되지 않은 유용한 플러그인을 추천한다. +LLM context에 힌트를 주입하여 자연스러운 안내가 가능하다. +""" + +from __future__ import annotations + +# 질문 키워드 → 추천 플러그인 매핑 +# 커뮤니티 성장에 따라 동적 목록으로 확장 가능 +_PLUGIN_HINTS: dict[str, dict[str, str]] = { + "ESG": { + "name": "dartlab-plugin-esg", + "topic": "esgScore", + "description": "ESG 등급 및 환경/사회/지배구조 점수 분석", + "install": "uv pip install dartlab-plugin-esg", + }, + "백테스트": { + "name": "dartlab-plugin-backtest", + "topic": "backtest", + "description": "투자 전략 백테스트 및 성과 분석", + "install": "uv pip install dartlab-plugin-backtest", + }, + "기술적분석": { + "name": "dartlab-plugin-technical", + "topic": "technicalIndicators", + "description": "이동평균, RSI, MACD 등 기술적 지표", + "install": "uv pip install dartlab-plugin-technical", + }, + "뉴스": { + "name": "dartlab-plugin-news", + "topic": "newsSentiment", + "description": "실시간 뉴스 수집 및 감성 분석", + "install": "uv pip install dartlab-plugin-news", + }, + "주가": { + "name": "dartlab-plugin-price", + "topic": "priceHistory", + "description": "주가 시계열, 수익률, 변동성 분석", + "install": "uv pip install dartlab-plugin-price", + }, + "밸류에이션": { + "name": "dartlab-plugin-valuation", + "topic": "dcfValuation", + "description": "DCF, 상대가치 등 본격 밸류에이션 모델", + "install": "uv pip install dartlab-plugin-valuation", + }, + "공급망": { + "name": "dartlab-plugin-supplychain", + "topic": "supplyChain", + "description": "공급망 분석 및 거래 관계 맵", + "install": "uv pip install dartlab-plugin-supplychain", + }, + "peer": { + "name": "dartlab-plugin-peer", + "topic": "peerComparison", + "description": "동종업계 비교 분석 (peer group)", + "install": "uv pip install dartlab-plugin-peer", + }, + "AI분석": { + "name": "dartlab-plugin-ai-analyst", + "topic": "aiReport", + "description": "AI 기반 종합 리서치 리포트 자동 생성", + "install": "uv pip install dartlab-plugin-ai-analyst", + }, +} + +# 키워드 별칭 (다양한 표현 매칭) +_KEYWORD_ALIASES: dict[str, str] = { + "이에스지": "ESG", + "탄소": "ESG", + "환경": "ESG", + "사회적": "ESG", + "지속가능": "ESG", + "backtest": "백테스트", + "전략검증": "백테스트", + "시뮬레이션": "백테스트", + "기술적": "기술적분석", + "차트": "기술적분석", + "RSI": "기술적분석", + "MACD": "기술적분석", + "이동평균": "기술적분석", + "볼린저": "기술적분석", + "news": "뉴스", + "감성분석": "뉴스", + "여론": "뉴스", + "주가": "주가", + "수익률": "주가", + "변동성": "주가", + "price": "주가", + "DCF": "밸류에이션", + "적정주가": "밸류에이션", + "목표주가": "밸류에이션", + "내재가치": "밸류에이션", + "supply": "공급망", + "공급": "공급망", + "동종": "peer", + "비교분석": "peer", + "업종비교": "peer", + "peer": "peer", + "리포트": "AI분석", + "리서치": "AI분석", +} + + +def detect_plugin_hints( + question: str, + loaded_plugin_names: list[str] | None = None, +) -> list[dict[str, str]]: + """질문에서 유용한 미설치 플러그인을 감지. + + Args: + question: 사용자 질문 텍스트. + loaded_plugin_names: 이미 설치된 플러그인 이름 목록. + + Returns: + 추천 플러그인 정보 리스트 (최대 2개). + """ + if not question: + return [] + + loaded = set(loaded_plugin_names or []) + q_lower = question.lower() + matched: list[dict[str, str]] = [] + seen_keys: set[str] = set() + + # 직접 매칭 + for key, hint in _PLUGIN_HINTS.items(): + if key.lower() in q_lower and hint["name"] not in loaded and key not in seen_keys: + matched.append(hint) + seen_keys.add(key) + + # 별칭 매칭 + for alias, key in _KEYWORD_ALIASES.items(): + if alias.lower() in q_lower and key not in seen_keys: + hint = _PLUGIN_HINTS.get(key) + if hint and hint["name"] not in loaded: + matched.append(hint) + seen_keys.add(key) + + # 최대 2개만 반환 (스팸 방지) + return matched[:2] + + +def format_plugin_hints(hints: list[dict[str, str]]) -> str | None: + """추천 힌트를 LLM context용 텍스트로 포맷.""" + if not hints: + return None + + lines = ["💡 **유용한 플러그인 추천:**"] + for h in hints: + lines.append(f"- **{h['name']}**: {h['description']}") + lines.append(f" 설치: `{h['install']}`") + + lines.append("") + lines.append('플러그인 만들기: `dartlab.ask("... 플러그인 만들어줘")`로 AI가 자동 생성합니다.') + return "\n".join(lines) diff --git a/src/dartlab/ai/runtime/standalone.py b/src/dartlab/ai/runtime/standalone.py new file mode 100644 index 0000000000000000000000000000000000000000..f09111c08263dca9a3261de37b7e1f25e72e193c --- /dev/null +++ b/src/dartlab/ai/runtime/standalone.py @@ -0,0 +1,204 @@ +"""Company에서 분리된 LLM 분석 함수. + +ask/chat은 Company가 AI를 품는 게 아니라 AI가 Company를 소비하는 구조. +내부적으로 core.analyze() 이벤트 스트림을 소비한다. + +사용법:: + + from dartlab.ai.runtime.standalone import ask, chat + + ask(company, "재무 건전성을 분석해줘") + chat(company, "배당 추세를 분석하고 이상 징후를 찾아줘") +""" + +from __future__ import annotations + +from typing import Any, Generator + + +def _collect_text(events) -> str: + """이벤트 스트림에서 chunk 텍스트만 수집 + 플러그인 힌트 append.""" + parts: list[str] = [] + hint_text = "" + for ev in events: + if ev.kind == "chunk": + parts.append(ev.data["text"]) + elif ev.kind == "done": + hint_text = ev.data.get("pluginHintsText", "") + answer = "".join(parts) + if hint_text: + answer += f"\n\n{hint_text}" + return answer + + +def _stream_chunks(events) -> Generator[str, None, None]: + """이벤트 스트림에서 chunk 텍스트 + 플러그인 힌트를 제너레이터로 반환.""" + for ev in events: + if ev.kind == "chunk": + yield ev.data["text"] + elif ev.kind == "done": + hint = ev.data.get("pluginHintsText") + if hint: + yield f"\n\n{hint}" + + +def ask( + question: str, + *, + company: Any | None = None, + include: list[str] | None = None, + exclude: list[str] | None = None, + provider: str | None = None, + model: str | None = None, + stream: bool = True, + reflect: bool = False, + report_mode: bool = False, + pattern: str | None = None, + template: str | None = None, + modules: list[str] | None = None, + history: list[dict[str, str]] | None = None, + **kwargs: Any, +) -> str | Generator[str, None, None]: + """AI에게 질문. Company 없이도 동작. + + Args: + question: 질문 텍스트 (한국어 또는 영어). + company: Company 인스턴스 (있으면 해당 기업 맥락 제공). + include: 명시적으로 포함할 데이터. + exclude: 제외할 데이터. + provider: per-call provider override. + model: per-call model override. + stream: True면 제너레이터 반환 (chunk 단위). + reflect: True면 답변 자체 검증 (1회 reflection). + report_mode: True면 전문 분석보고서 모드 (7섹션 구조화). + pattern: 분석 패턴 이름 (하위호환). + template: 분석 템플릿 이름 (단일 모듈, 하위호환). + modules: 분석 모듈 리스트 (복수 조합 가능, 최대 3개). + history: 이전 대화 메시지 리스트 (대화 연속 모드). + **kwargs: LLMConfig override. + + Returns: + str (stream=False) 또는 Generator[str] (stream=True). + """ + # 모듈 → 시스템 프롬프트에 주입 (modules 우선 → template → pattern) + _templateText = None + if modules: + from dartlab.ai.patterns import get_modules + + _templateText = get_modules(modules) + else: + tmpl_name = template or pattern + if tmpl_name: + from dartlab.ai.patterns import get_template + + _templateText = get_template(tmpl_name) + + from dartlab.ai.runtime.core import analyze + + events = analyze( + company, + question, + include=include, + exclude=exclude, + provider=provider, + model=model, + reflect=reflect, + report_mode=report_mode, + history=history, + _templateText=_templateText, + **kwargs, + ) + + if stream: + return _stream_chunks(events) + + answer = _collect_text(events) + + # Self-Critique: 답변 자체 검증 (1회 reflection) + if reflect and answer: + from dartlab.ai import get_config + from dartlab.ai.providers import create_provider + from dartlab.ai.runtime.agent import _reflect_on_answer + + config_ = get_config(role=kwargs.get("role")) + overrides = {k: v for k, v in {"provider": provider, "model": model, **kwargs}.items() if v is not None} + if overrides: + config_ = config_.merge(overrides) + llm = create_provider(config_) + # reflect는 전체 응답이 필요하므로 core 이후 후처리 + messages = [{"role": "user", "content": question}] + answer = _reflect_on_answer(llm, messages, answer) + + return answer + + +def chat( + company: Any | None, + question: str, + *, + provider: str | None = None, + model: str | None = None, + max_turns: int = 5, + on_tool_call: Any = None, + on_tool_result: Any = None, + **kwargs: Any, +) -> str: + """에이전트 모드: LLM이 필요한 도구를 직접 선택하여 분석. + + Args: + company: Company 인스턴스. None이면 종목 없이 동작 (scan/gather/system만). + question: 질문 텍스트. + provider: per-call provider override. + model: per-call model override. + max_turns: 최대 도구 호출 반복 횟수. + on_tool_call: 도구 호출 시 콜백 (UI용). + on_tool_result: 도구 결과 시 콜백 (UI용). + **kwargs: LLMConfig override. + + Returns: + LLM 최종 응답 텍스트. + """ + from dartlab.ai.runtime.core import analyze + + events = analyze( + company, + question, + provider=provider, + model=model, + max_turns=max_turns, + **kwargs, + ) + + chunks: list[str] = [] + for ev in events: + if ev.kind == "chunk": + chunks.append(ev.data["text"]) + elif ev.kind == "tool_call" and on_tool_call is not None: + on_tool_call(ev.data["name"], ev.data.get("arguments", {})) + elif ev.kind == "tool_result" and on_tool_result is not None: + on_tool_result(ev.data["name"], ev.data.get("result", "")) + + return "".join(chunks) + + +def analyze_full( + company: Any, + question: str, + **kwargs: Any, +) -> list: + """모든 이벤트를 리스트로 반환 — 노트북/스크립트용. + + core.analyze()의 전체 이벤트 스트림을 수집해서 반환. + validation, ui_action, chart 등 모든 이벤트 접근 가능. + + Example:: + + from dartlab.ai.runtime.standalone import analyze_full + + events = analyze_full(company, "영업이익률 추세는?") + for ev in events: + print(ev.kind, ev.data) + """ + from dartlab.ai.runtime.core import analyze + + return list(analyze(company, question, **kwargs)) diff --git a/src/dartlab/ai/tools/__init__.py b/src/dartlab/ai/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6181593ddb6f017792421f88941f0cdc4c52ee02 --- /dev/null +++ b/src/dartlab/ai/tools/__init__.py @@ -0,0 +1 @@ +"""AI tooling package.""" diff --git a/src/dartlab/ai/tools/__pycache__/__init__.cpython-312.pyc b/src/dartlab/ai/tools/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..348385c4371eca233d92b545872164ddb0533ad4 Binary files /dev/null and b/src/dartlab/ai/tools/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/ai/tools/__pycache__/__init__.cpython-313.pyc b/src/dartlab/ai/tools/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c6e12d19178cfdcb18b41baa769ab50ef5384ead Binary files /dev/null and b/src/dartlab/ai/tools/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/ai/tools/__pycache__/coding.cpython-312.pyc b/src/dartlab/ai/tools/__pycache__/coding.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1aa3c68148e4ea16a2e4b94f6cf55a0d2574b0fe Binary files /dev/null and b/src/dartlab/ai/tools/__pycache__/coding.cpython-312.pyc differ diff --git a/src/dartlab/ai/tools/__pycache__/coding.cpython-313.pyc b/src/dartlab/ai/tools/__pycache__/coding.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..71f3a6b9649498331fbbdb1390b85f28a8b9b89a Binary files /dev/null and b/src/dartlab/ai/tools/__pycache__/coding.cpython-313.pyc differ diff --git a/src/dartlab/ai/tools/__pycache__/plugin.cpython-312.pyc b/src/dartlab/ai/tools/__pycache__/plugin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..14d8a4ed5bcc3f598fd3f52fa3bf0fc680580bc7 Binary files /dev/null and b/src/dartlab/ai/tools/__pycache__/plugin.cpython-312.pyc differ diff --git a/src/dartlab/ai/tools/__pycache__/plugin.cpython-313.pyc b/src/dartlab/ai/tools/__pycache__/plugin.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e4ae4ec5dc715e8ed3cdd45cb9b98a9070db1e93 Binary files /dev/null and b/src/dartlab/ai/tools/__pycache__/plugin.cpython-313.pyc differ diff --git a/src/dartlab/ai/tools/coding.py b/src/dartlab/ai/tools/coding.py new file mode 100644 index 0000000000000000000000000000000000000000..3ae59ecc0a172c438cdea580b495d36e14abbcbf --- /dev/null +++ b/src/dartlab/ai/tools/coding.py @@ -0,0 +1,614 @@ +"""Provider-agnostic coding backend runtime.""" + +from __future__ import annotations + +import ast +import logging +import subprocess +import sys +import tempfile +import textwrap +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +log = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class CodingTaskResult: + """Normalized result from a coding backend.""" + + backend: str + answer: str + sandbox: str + model: str + usage: dict[str, int] | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + +class CodingBackend(ABC): + """Abstract coding backend that can execute workspace tasks.""" + + name: str + label: str + description: str + + @abstractmethod + def inspect(self) -> dict[str, Any]: + """Return backend capability/status metadata.""" + + @abstractmethod + def run_task( + self, + prompt: str, + *, + sandbox: str = "workspace-write", + model: str | None = None, + timeout_seconds: int = 300, + ) -> CodingTaskResult: + """Execute a coding task.""" + + def check_available(self) -> bool: + """백엔드 사용 가능 여부를 반환한다.""" + info = self.inspect() + return bool(info.get("available", False)) + + +class CodexCodingBackend(CodingBackend): + """Codex CLI-backed coding executor.""" + + name = "codex" + label = "Codex CLI" + description = "OpenAI Codex CLI를 사용해 워크스페이스 코드 작업을 실행합니다." + + def inspect(self) -> dict[str, Any]: + """Codex CLI 설치/인증 상태와 지원 sandbox 모드를 조회한다.""" + from dartlab.ai.providers.support.codex_cli import inspect_codex_cli + + info = inspect_codex_cli() + sandbox_modes = info.get("sandboxModes") or [] + return { + "name": self.name, + "label": self.label, + "description": self.description, + "installed": bool(info.get("installed")), + "authenticated": bool(info.get("authenticated")), + "available": bool(info.get("installed") and info.get("authenticated")), + "configuredModel": info.get("configuredModel"), + "version": info.get("version"), + "sandboxModes": sandbox_modes, + "supportsWorkspaceWrite": "workspace-write" in sandbox_modes, + "supportsDangerFullAccess": "danger-full-access" in sandbox_modes, + } + + def run_task( + self, + prompt: str, + *, + sandbox: str = "workspace-write", + model: str | None = None, + timeout_seconds: int = 300, + ) -> CodingTaskResult: + """Codex CLI로 코딩 작업을 실행한다.""" + from dartlab.ai.providers.support.codex_cli import run_codex_exec + + info = self.inspect() + if not info.get("installed"): + raise FileNotFoundError("Codex CLI가 설치되어 있지 않습니다. 먼저 `codex --version`이 동작해야 합니다.") + if not info.get("authenticated"): + raise PermissionError("Codex CLI 로그인이 필요합니다. `codex login`을 실행하세요.") + + sandbox_modes = set(info.get("sandboxModes") or []) + selected_sandbox = sandbox + if sandbox_modes and sandbox not in sandbox_modes: + selected_sandbox = "workspace-write" if "workspace-write" in sandbox_modes else "read-only" + + answer, usage = run_codex_exec( + prompt, + model=model or None, + sandbox=selected_sandbox, + timeout=timeout_seconds, + ) + return CodingTaskResult( + backend=self.name, + answer=answer, + sandbox=selected_sandbox, + model=model or info.get("configuredModel") or "CLI default", + usage=usage, + metadata={ + "version": info.get("version"), + "configuredModel": info.get("configuredModel"), + }, + ) + + +# ══════════════════════════════════════ +# LocalPythonBackend -- subprocess 기반 안전 실행 +# ══════════════════════════════════════ + + +def _validateCode(code: str) -> None: + """구문 검증. SyntaxError 시 그대로 raise — 호출자가 처리.""" + ast.parse(code) + + +class LocalPythonBackend(CodingBackend): + """로컬 subprocess 기반 Python 코드 실행 -- AST 검증 + 격리.""" + + name = "local_python" + label = "Local Python" + description = "로컬 subprocess에서 Python 코드를 안전하게 실행합니다." + + def __init__(self, *, defaultTimeout: int = 30, maxTimeout: int = 120) -> None: + self._defaultTimeout = defaultTimeout + self._maxTimeout = maxTimeout + + def inspect(self) -> dict[str, Any]: + """로컬 Python 백엔드 상태와 허용/금지 모듈 목록을 반환한다.""" + return { + "name": self.name, + "label": self.label, + "description": self.description, + "available": True, + "python": sys.version, + "defaultTimeout": self._defaultTimeout, + "maxTimeout": self._maxTimeout, + "restrictions": "none (unrestricted local execution)", + } + + def run_task( + self, + prompt: str, + *, + sandbox: str = "isolated", + model: str | None = None, + timeout_seconds: int = 30, + code: str | None = None, + dataJson: str | None = None, + ) -> CodingTaskResult: + """Python 코드 실행. + + Args: + prompt: LLM에게 보낼 프롬프트 (code가 없을 때 사용) + code: 직접 실행할 Python 코드 + dataJson: 코드에 `data` 변수로 주입할 JSON 문자열 + timeout_seconds: 실행 시간 제한 (초) + """ + if not code: + return CodingTaskResult( + backend=self.name, + answer="[오류] 실행할 코드가 없습니다. code 파라미터를 전달하세요.", + sandbox=sandbox, + model="local", + ) + + # 1. AST 구문 검증 + try: + _validateCode(code) + except SyntaxError as e: + return CodingTaskResult( + backend=self.name, + answer=f"[구문 오류] {e}", + sandbox=sandbox, + model="local", + ) + + # 2. 실행 시간 제한 clamp + timeout = min(max(timeout_seconds, 5), self._maxTimeout) + + # 3. 임시 디렉토리에서 실행 + with tempfile.TemporaryDirectory(prefix="dartlab_code_") as tmpDir: + scriptPath = Path(tmpDir) / "run.py" + + # 데이터 주입 프리앰블 + preamble = "" + if dataJson: + dataPath = Path(tmpDir) / "data.json" + dataPath.write_text(dataJson, encoding="utf-8") + preamble = textwrap.dedent(f"""\ + import json as _json + with open({str(dataPath)!r}, encoding="utf-8") as _f: + data = _json.load(_f) + """) + + fullCode = preamble + code + scriptPath.write_text(fullCode, encoding="utf-8") + + try: + result = subprocess.run( + [sys.executable, "-X", "utf8", str(scriptPath)], + capture_output=True, + text=True, + encoding="utf-8", + timeout=timeout, + cwd=tmpDir, + env={ + "PATH": "", # 최소 환경 + "PYTHONPATH": "", + "PYTHONDONTWRITEBYTECODE": "1", + }, + ) + + stdout = result.stdout[:8000] if result.stdout else "" + stderr = result.stderr[:2000] if result.stderr else "" + + if result.returncode == 0: + answer = stdout if stdout else "(실행 완료, 출력 없음)" + else: + answer = f"[실행 오류] (exit code {result.returncode})\n" + if stderr: + answer += f"```\n{stderr}\n```\n" + if stdout: + answer += f"\n출력:\n{stdout}" + + return CodingTaskResult( + backend=self.name, + answer=answer, + sandbox="isolated", + model="local", + metadata={ + "returncode": result.returncode, + "timeout": timeout, + "codeLength": len(code), + }, + ) + + except subprocess.TimeoutExpired: + return CodingTaskResult( + backend=self.name, + answer=f"[시간 초과] {timeout}초 내에 실행이 완료되지 않았습니다.", + sandbox="isolated", + model="local", + metadata={"timeout": timeout}, + ) + + +def _classifyError(stderr: str, stdout: str) -> str: + """코드 실행 에러를 분류하고 복구 힌트를 제공한다.""" + hint = "" + if "ImportError" in stderr or "ModuleNotFoundError" in stderr: + hint = "\n힌트: 모듈을 찾을 수 없습니다. dartlab 내장 API만 사용 가능합니다." + elif "AttributeError" in stderr or "NameError" in stderr: + hint = ( + "\n힌트: API 이름이 틀렸을 수 있습니다. `dartlab.capabilities(search='키워드')`로 정확한 API를 검색하세요." + ) + elif "TypeError" in stderr: + hint = "\n힌트: 함수 파라미터가 맞지 않습니다. `help(함수명)`으로 시그니처를 확인하세요." + elif "KeyError" in stderr or "ColumnNotFoundError" in stderr: + hint = "\n힌트: 컬럼/키가 없습니다. 먼저 print()로 구조를 확인한 뒤 정확한 이름을 사용하세요." + + parts = ["[실행 오류]"] + if stderr: + # traceback에서 마지막 에러 줄만 추출 (전체 traceback은 토큰 낭비) + lines = stderr.strip().splitlines() + errorLine = lines[-1] if lines else stderr + parts.append(errorLine) + if stdout: + parts.append(f"출력:\n{stdout[:1000]}") + if hint: + parts.append(hint) + return "\n".join(parts) + + +def _stripDuplicateImport(code: str, module: str) -> str: + """사용자 코드에서 `import ` 단독 문을 제거한다 (preamble에서 주입하므로).""" + try: + tree = ast.parse(code) + except SyntaxError: + return code + # import dartlab 단독 문(from dartlab ... 은 유지)만 제거 + linesToRemove: set[int] = set() + for node in tree.body: + if ( + isinstance(node, ast.Import) + and len(node.names) == 1 + and node.names[0].name == module + and node.names[0].asname is None + ): + linesToRemove.add(node.lineno) + if not linesToRemove: + return code + lines = code.split("\n") + return "\n".join(line for i, line in enumerate(lines, 1) if i not in linesToRemove) + + +class DartlabCodeExecutor(LocalPythonBackend): + """dartlab 전용 코드 실행기 -- CAPABILITIES 기반 코드 생성 + 실행. + + LocalPythonBackend를 확장하여: + 1. dartlab 패키지를 import 허용 + 2. PYTHONPATH에 dartlab 경로 전달 + 3. company context를 preamble로 주입 + 4. DataFrame 결과를 마크다운 테이블로 변환 + """ + + name = "dartlab_executor" + label = "DartLab Executor" + description = "dartlab Python 코드를 안전하게 실행합니다." + + def __init__(self, *, defaultTimeout: int = 30, maxTimeout: int = 60) -> None: + super().__init__(defaultTimeout=defaultTimeout, maxTimeout=maxTimeout) + + def execute(self, code: str, *, stockCode: str | None = None, timeout: int = 30) -> str: + """dartlab 코드를 실행하고 결과를 반환한다.""" + # 사용자 코드에서 중복 import dartlab 제거 (preamble에서 주입) + cleanCode = _stripDuplicateImport(code, "dartlab") + + # dartlab context preamble — LLM이 읽을 수 있는 크기로 제한 + preamble = ( + "import dartlab\n" + "import polars as pl\n" + "import re\n" + "pl.Config.set_fmt_float('full')\n" + "pl.Config.set_tbl_cols(8)\n" + "pl.Config.set_tbl_rows(20)\n" + "pl.Config.set_tbl_width_chars(100)\n" + ) + preamble += "Company = dartlab.Company\n" + preamble += ( + "try:\n" + " from dartlab.gather.search import webSearch, newsSearch, formatResults, searchAvailable\n" + "except ImportError:\n" + " pass\n" + ) + preamble += ( + "try:\n from dartlab.core.search import search as disclosureSearch\nexcept ImportError:\n pass\n" + ) + preamble += "from dartlab.viz import emit_chart, emit_diagram\n" + preamble += ( + "from dartlab.viz import revenue, cashflow, profitability_chart, dividend_chart, balance_sheet_chart\n" + ) + if stockCode: + preamble += f'c = Company("{stockCode}")\n' + preamble += "company = c\n" + + # 결과 캡처 래퍼: 마지막 expression의 결과를 출력 + wrappedCode = self._wrapForCapture(cleanCode) + fullCode = preamble + wrappedCode + + result = self.run_task( + prompt="", + code=fullCode, + timeout_seconds=min(timeout, self._maxTimeout), + ) + return self._formatResult(result.answer) + + def run_task( + self, + prompt: str, + *, + sandbox: str = "isolated", + model: str | None = None, + timeout_seconds: int = 30, + code: str | None = None, + dataJson: str | None = None, + ) -> CodingTaskResult: + """dartlab용 환경 변수로 실행한다.""" + if not code: + return CodingTaskResult( + backend=self.name, + answer="[오류] 실행할 코드가 없습니다.", + sandbox=sandbox, + model="local", + ) + + # AST 구문 검증 + try: + _validateCode(code) + except SyntaxError as e: + return CodingTaskResult( + backend=self.name, + answer=f"[구문 오류] {e}", + sandbox=sandbox, + model="local", + ) + + timeout = min(max(timeout_seconds, 5), self._maxTimeout) + + with tempfile.TemporaryDirectory(prefix="dartlab_exec_") as tmpDir: + scriptPath = Path(tmpDir) / "run.py" + scriptPath.write_text(code, encoding="utf-8") + + # dartlab이 import 가능하도록 PYTHONPATH 설정 + import os + + pythonPath = os.pathsep.join(sys.path) + + env = os.environ.copy() + env["PYTHONPATH"] = pythonPath + env["PYTHONDONTWRITEBYTECODE"] = "1" + env["PYTHONUTF8"] = "1" + + try: + result = subprocess.run( + [sys.executable, "-X", "utf8", str(scriptPath)], + capture_output=True, + text=True, + encoding="utf-8", + timeout=timeout, + cwd=tmpDir, + env=env, + ) + + rawStdout = result.stdout or "" + rawStderr = result.stderr or "" + _MAX_OUT = 8000 + stdoutTruncated = len(rawStdout) > _MAX_OUT + stdout = rawStdout[:_MAX_OUT] + if stdoutTruncated: + stdout += ( + f"\n\n... (출력 {len(rawStdout):,}자 중 {_MAX_OUT:,}자만 표시." + " .head()/.filter()로 범위를 좁혀 재실행하세요)" + ) + stderr = rawStderr[:2000] + + if result.returncode == 0: + answer = stdout if stdout else "(실행 완료, 출력 없음)" + else: + answer = _classifyError(stderr, stdout) + + return CodingTaskResult( + backend=self.name, + answer=answer, + sandbox="isolated", + model="local", + metadata={ + "returncode": result.returncode, + "timeout": timeout, + "codeLength": len(code), + }, + ) + + except subprocess.TimeoutExpired: + return CodingTaskResult( + backend=self.name, + answer=f"[시간 초과] {timeout}초 내에 실행이 완료되지 않았습니다.", + sandbox="isolated", + model="local", + metadata={"timeout": timeout}, + ) + + def _wrapForCapture(self, code: str) -> str: + """마지막 expression의 결과를 자동으로 print한다.""" + try: + tree = ast.parse(code) + except SyntaxError: + return code + + if not tree.body: + return code + + lastNode = tree.body[-1] + if isinstance(lastNode, ast.Expr): + # AST 기반: preceding 노드들을 unparse하고 마지막만 _result로 변환 + exprSource = ast.unparse(lastNode.value) + preceding = tree.body[:-1] + parts: list[str] = [] + for node in preceding: + parts.append(ast.unparse(node)) + parts.append(f"_result = {exprSource}") + parts.append("if _result is not None:") + parts.append(" print(_result)") + return "\n".join(parts) + return code + + def _formatResult(self, answer: str) -> str: + """코드 실행 결과의 과학적 표기법을 읽기 좋은 숫자로 변환한다.""" + import re + + def _replaceScientific(m: re.Match) -> str: + try: + val = float(m.group(0)) + absVal = abs(val) + if absVal >= 1e12: + sign = "-" if val < 0 else "" + return f"{sign}{absVal / 1e12:,.1f}조" + if absVal >= 1e8: + sign = "-" if val < 0 else "" + return f"{sign}{absVal / 1e8:,.0f}억" + if absVal >= 1: + return f"{int(val):,}" + return m.group(0) + except (ValueError, OverflowError): + return m.group(0) + + # 과학 표기법 변환 + answer = re.sub(r"-?\d+\.?\d*[eE][+-]?\d+", _replaceScientific, answer) + + # 일반 큰 숫자도 억/조 변환 (12자리 이상 정수 또는 .0으로 끝나는 float) + def _replaceLargeNumber(m: re.Match) -> str: + try: + raw = m.group(0) + # 소수점 이하가 의미 있으면 건드리지 않음 (비율/퍼센트) + if "." in raw and not raw.endswith(".0"): + return raw + val = float(raw) + absVal = abs(val) + sign = "-" if val < 0 else "" + if absVal >= 1e12: + return f"{sign}{absVal / 1e12:,.1f}조" + if absVal >= 1e8: + return f"{sign}{absVal / 1e8:,.0f}억" + return raw + except (ValueError, OverflowError): + return m.group(0) + + answer = re.sub(r"-?\d{9,}(?:\.0)?", _replaceLargeNumber, answer) + + if len(answer) > 8000: + return answer[:8000] + "\n\n... (결과가 너무 깁니다. .head()/.filter()로 범위를 좁혀주세요)" + return answer + + +class CodingRuntime: + """Registry/executor for coding backends.""" + + def __init__(self, name: str = "default"): + self.name = name + self._backends: dict[str, CodingBackend] = {} + self._default_backend: str | None = None + + def register_backend(self, backend: CodingBackend, *, default: bool = False) -> None: + """코딩 백엔드를 등록한다.""" + self._backends[backend.name] = backend + if default or self._default_backend is None: + self._default_backend = backend.name + + def get_backend(self, name: str | None = None) -> CodingBackend: + """이름으로 백엔드를 가져온다.""" + backend_name = name or self._default_backend + if not backend_name or backend_name not in self._backends: + available = ", ".join(f"`{key}`" for key in self._backends) or "(없음)" + raise KeyError(f"등록되지 않은 coding backend입니다: {name}. 사용 가능: {available}") + return self._backends[backend_name] + + def list_backend_names(self) -> list[str]: + """등록된 백엔드 이름 목록을 반환한다.""" + return list(self._backends.keys()) + + def inspect_backends(self) -> list[dict[str, Any]]: + """모든 백엔드의 상태 메타데이터를 반환한다.""" + return [backend.inspect() for backend in self._backends.values()] + + def run_task( + self, + prompt: str, + *, + backend: str | None = None, + sandbox: str = "workspace-write", + model: str | None = None, + timeout_seconds: int = 300, + ) -> CodingTaskResult: + """지정된 백엔드로 코딩 작업을 실행한다.""" + selected_backend = self.get_backend(backend) + return selected_backend.run_task( + prompt, + sandbox=sandbox, + model=model, + timeout_seconds=timeout_seconds, + ) + + +def create_coding_runtime(name: str = "runtime", *, include_defaults: bool = True) -> CodingRuntime: + """기본 백엔드를 포함한 CodingRuntime 인스턴스를 생성한다.""" + runtime = CodingRuntime(name=name) + if include_defaults: + runtime.register_backend(CodexCodingBackend(), default=True) + runtime.register_backend(LocalPythonBackend()) + return runtime + + +_DEFAULT_CODING_RUNTIME = create_coding_runtime(name="default") + + +def get_default_coding_runtime() -> CodingRuntime: + """전역 기본 CodingRuntime을 반환한다.""" + return _DEFAULT_CODING_RUNTIME + + +def set_default_coding_runtime(runtime: CodingRuntime) -> None: + """전역 기본 CodingRuntime을 교체한다.""" + global _DEFAULT_CODING_RUNTIME + _DEFAULT_CODING_RUNTIME = runtime diff --git a/src/dartlab/ai/tools/plugin.py b/src/dartlab/ai/tools/plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..c5d4e6be8e88b87eef848c4522d2f9e692a8a0ce --- /dev/null +++ b/src/dartlab/ai/tools/plugin.py @@ -0,0 +1,207 @@ +"""외부 도구 플러그인 시스템 — @tool 데코레이터. + +사용법:: + + from dartlab.ai import tool + + @tool(category="custom") + def my_analysis(metric: str) -> str: + \"""내 분석.\""" + return f"{metric} 분석 완료" + + # requires_company=True로 등록하면 Company 인스턴스 자동 주입 + @tool(requires_company=True) + def company_metric(company, metric: str) -> str: + \"""회사별 분석.\""" + return f"{company.corpName}: {metric}" +""" + +from __future__ import annotations + +import inspect +from dataclasses import dataclass, field +from typing import Any, Callable, get_type_hints + +from dartlab.core.capabilities import CapabilityChannel, CapabilityKind, register_tool_capability + +PYTHON_TO_JSON_TYPE: dict[type, str] = { + str: "string", + int: "integer", + float: "number", + bool: "boolean", + list: "array", + dict: "object", +} + + +@dataclass +class ToolDef: + """플러그인 도구 정의.""" + + name: str + func: Callable[..., str] + description: str + category: str + requires_company: bool + parameters: dict + tags: list[str] = field(default_factory=list) + + +class ToolPluginRegistry: + """플러그인 도구 레지스트리.""" + + def __init__(self): + self._tools: dict[str, ToolDef] = {} + + def register(self, tool_def: ToolDef) -> None: + """도구 정의를 레지스트리에 등록한다.""" + self._tools[tool_def.name] = tool_def + + def unregister(self, name: str) -> None: + """이름으로 도구를 레지스트리에서 제거한다.""" + self._tools.pop(name, None) + + def get_schemas(self) -> list[dict]: + """OpenAI function calling 스키마 목록.""" + return [ + { + "type": "function", + "function": { + "name": t.name, + "description": t.description, + "parameters": t.parameters, + }, + } + for t in self._tools.values() + ] + + def execute(self, name: str, arguments: dict, company: Any = None) -> str: + """도구 실행.""" + tool_def = self._tools.get(name) + if not tool_def: + return f"오류: 플러그인 '{name}' 도구를 찾을 수 없습니다." + if tool_def.requires_company: + return tool_def.func(company, **arguments) + return tool_def.func(**arguments) + + @property + def size(self) -> int: + """등록된 도구 수를 반환한다.""" + return len(self._tools) + + def list_names(self) -> list[str]: + """등록된 도구 이름 목록을 반환한다.""" + return list(self._tools.keys()) + + +# 글로벌 레지스트리 +_registry = ToolPluginRegistry() + + +def get_plugin_registry() -> ToolPluginRegistry: + """글로벌 플러그인 레지스트리 반환.""" + return _registry + + +def _build_parameters_schema(func: Callable) -> tuple[dict, list[str]]: + """함수 시그니처에서 OpenAI parameters 스키마를 자동 생성.""" + hints = get_type_hints(func) + sig = inspect.signature(func) + + properties: dict[str, dict] = {} + required: list[str] = [] + + for param_name, param in sig.parameters.items(): + if param_name in ("company", "self", "cls"): + continue + + param_type = hints.get(param_name, str) + json_type = PYTHON_TO_JSON_TYPE.get(param_type, "string") + prop: dict[str, Any] = {"type": json_type} + + if param.default is inspect.Parameter.empty: + required.append(param_name) + else: + prop["default"] = param.default + + properties[param_name] = prop + + schema: dict[str, Any] = {"type": "object", "properties": properties} + if required: + schema["required"] = required + return schema, required + + +def tool( + name: str | None = None, + category: str = "custom", + requires_company: bool = False, + tags: list[str] | None = None, +): + """@tool 데코레이터 — 외부 함수를 AI 도구로 등록. + + Args: + name: 도구명 (None이면 함수명 사용) + category: 도구 카테고리 + requires_company: True면 첫 인자로 Company 인스턴스 주입 + tags: 분류 태그 + """ + + def decorator(func: Callable) -> Callable: + tool_name = name or func.__name__ + description = (func.__doc__ or "").strip().split("\n")[0] + parameters, _ = _build_parameters_schema(func) + + tool_def = ToolDef( + name=tool_name, + func=func, + description=description, + category=category, + requires_company=requires_company, + parameters=parameters, + tags=tags or [], + ) + _registry.register(tool_def) + func._tool_def = tool_def # type: ignore[attr-defined] + return func + + return decorator + + +def inject_plugins_into_runtime(registry: ToolPluginRegistry, runtime: Any) -> None: + """플러그인 레지스트리의 도구를 ToolRuntime에 주입.""" + for tool_def in registry._tools.values(): + if runtime.has_tool(tool_def.name): + continue + + if tool_def.requires_company: + + def _make_wrapper(td: ToolDef) -> Callable[..., str]: + def wrapper(**kwargs: Any) -> str: + return td.func(None, **kwargs) + + return wrapper + + func = _make_wrapper(tool_def) + else: + func = tool_def.func + + runtime.register_tool( + tool_def.name, + func, + tool_def.description, + tool_def.parameters, + ) + register_tool_capability( + tool_def.name, + tool_def.description, + tool_def.parameters, + label=tool_def.name, + kind=CapabilityKind.WORKFLOW, + channels=(CapabilityChannel.CHAT, CapabilityChannel.MCP), + requires_company=tool_def.requires_company, + result_kind="text", + ai_hint=f"plugin:{tool_def.category}", + tags=tuple(tool_def.tags), + source="tool_plugin", + ) diff --git a/src/dartlab/ai/types.py b/src/dartlab/ai/types.py new file mode 100644 index 0000000000000000000000000000000000000000..f8e7aad02590cb119681b993c3da2138404471e7 --- /dev/null +++ b/src/dartlab/ai/types.py @@ -0,0 +1,197 @@ +"""LLM 분석기 타입 정의.""" + +from __future__ import annotations + +import dataclasses +from dataclasses import dataclass, field +from typing import Any, Literal + +ProviderName = Literal[ + "openai", + "ollama", + "custom", + "codex", + "oauth-codex", + "gemini", + "groq", + "cerebras", + "mistral", +] + + +@dataclass +class LLMConfig: + """LLM 연결 설정.""" + + provider: ProviderName = "codex" + model: str | None = None + api_key: str | None = None + base_url: str | None = None + temperature: float = 0.3 + max_tokens: int = 4096 + system_prompt: str | None = None + + def __post_init__(self): + import os + + if self.base_url is None: + env_url = os.environ.get("DARTLAB_LLM_BASE_URL") + if env_url: + self.base_url = env_url + + def merge(self, overrides: dict[str, Any]) -> LLMConfig: + """per-call override 적용한 새 Config 반환. + + provider가 변경되면서 model을 명시하지 않은 경우, + 이전 provider의 model을 리셋하여 새 provider의 기본 모델을 사용한다. + """ + vals = dataclasses.asdict(self) + filtered = {k: v for k, v in overrides.items() if v is not None} + + # provider가 바뀌면서 model override가 없으면 model 리셋 + if "provider" in filtered and filtered["provider"] != self.provider and "model" not in filtered: + vals["model"] = None + + vals.update(filtered) + return LLMConfig(**vals) + + +@dataclass +class LLMResponse: + """LLM 응답 결과.""" + + answer: str + provider: str + model: str + context_tables: list[str] = field(default_factory=list) + usage: dict[str, int] | None = None + + +@dataclass +class ToolCall: + """LLM이 요청한 도구 호출.""" + + id: str + name: str + arguments: dict[str, Any] + + +@dataclass +class ToolResponse(LLMResponse): + """도구 호출을 포함할 수 있는 LLM 응답.""" + + tool_calls: list[ToolCall] = field(default_factory=list) + finish_reason: str = "stop" + + +# ── 대화 히스토리/뷰어 경량 타입 (server Pydantic 모델 대체) ── + + +@dataclass(frozen=True) +class HistoryMeta: + """히스토리 메시지의 메타 정보.""" + + company: str | None = None + stockCode: str | None = None + modules: list[str] | None = None + market: str | None = None + topic: str | None = None + topicLabel: str | None = None + dialogueMode: str | None = None + questionTypes: list[str] | None = None + userGoal: str | None = None + + +@dataclass(frozen=True) +class HistoryItem: + """대화 히스토리 한 턴.""" + + role: str + text: str + meta: HistoryMeta | None = None + + +@dataclass(frozen=True) +class ViewContextCompany: + """뷰어 컨텍스트의 회사 정보.""" + + company: str | None = None + corpName: str | None = None + stockCode: str | None = None + market: str | None = None + + +@dataclass(frozen=True) +class ViewContextInfo: + """뷰어 컨텍스트 — 현재 사용자가 보고 있는 화면.""" + + type: str + company: ViewContextCompany | None = None + topic: str | None = None + topicLabel: str | None = None + period: str | None = None + data: dict[str, Any] | None = None + + +def history_from_dicts(items: list[dict] | None) -> list[HistoryItem] | None: + """dict 리스트 → HistoryItem 리스트 변환. + + server Pydantic 모델이나 raw dict 모두 지원. + """ + if not items: + return None + result: list[HistoryItem] = [] + for item in items: + if hasattr(item, "model_dump"): + item = item.model_dump() + + meta_raw = item.get("meta") + meta = None + if meta_raw: + if hasattr(meta_raw, "model_dump"): + meta_raw = meta_raw.model_dump() + if isinstance(meta_raw, dict): + meta = HistoryMeta(**{k: v for k, v in meta_raw.items() if k in HistoryMeta.__dataclass_fields__}) + elif isinstance(meta_raw, HistoryMeta): + meta = meta_raw + + result.append( + HistoryItem( + role=item.get("role", "user"), + text=item.get("text", ""), + meta=meta, + ) + ) + return result + + +def view_context_from_dict(data: Any | None) -> ViewContextInfo | None: + """dict → ViewContextInfo 변환. + + server Pydantic ViewContext나 raw dict 모두 지원. + """ + if not data: + return None + if hasattr(data, "model_dump"): + data = data.model_dump() + + company_raw = data.get("company") + company = None + if company_raw: + if hasattr(company_raw, "model_dump"): + company_raw = company_raw.model_dump() + if isinstance(company_raw, dict): + company = ViewContextCompany( + **{k: v for k, v in company_raw.items() if k in ViewContextCompany.__dataclass_fields__} + ) + elif isinstance(company_raw, ViewContextCompany): + company = company_raw + + return ViewContextInfo( + type=data.get("type", "viewer"), + company=company, + topic=data.get("topic"), + topicLabel=data.get("topicLabel"), + period=data.get("period"), + data=data.get("data"), + ) diff --git a/src/dartlab/analysis/__init__.py b/src/dartlab/analysis/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d7a4c609fd3ba7627af022b5f7422f544ac5d618 --- /dev/null +++ b/src/dartlab/analysis/__init__.py @@ -0,0 +1,7 @@ +"""Analysis 엔진 — L2 분석 모듈 통합. + +하위 모듈: +- financial: 재무제표 분석 (14축 본체 + 인사이트 + 리서치) +- forecast: 전망분석 (추정, 시뮬레이션) +- valuation: 가치평가 +""" diff --git a/src/dartlab/analysis/__pycache__/__init__.cpython-312.pyc b/src/dartlab/analysis/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb46d6795640a23c0193ccceccfd3f7f82e3a5dd Binary files /dev/null and b/src/dartlab/analysis/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/analysis/__pycache__/__init__.cpython-313.pyc b/src/dartlab/analysis/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be32b40253a0fd476bc6dafd814590608db72d6c Binary files /dev/null and b/src/dartlab/analysis/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/analysis/_fmt.py b/src/dartlab/analysis/_fmt.py new file mode 100644 index 0000000000000000000000000000000000000000..b105524d114945a42ba7ca9d4086fd01e14f21ac --- /dev/null +++ b/src/dartlab/analysis/_fmt.py @@ -0,0 +1,5 @@ +"""하위호환 re-export -- 실제 구현은 core/finance/fmt.py.""" + +from dartlab.core.finance.fmt import fmtBig, fmtPrice, fmtUnit + +__all__ = ["fmtBig", "fmtPrice", "fmtUnit"] diff --git a/src/dartlab/analysis/financial/_01_revenueStructure.md b/src/dartlab/analysis/financial/_01_revenueStructure.md new file mode 100644 index 0000000000000000000000000000000000000000..3a42044399fb566ddca665d6a24fb5a9d3fe8ac2 --- /dev/null +++ b/src/dartlab/analysis/financial/_01_revenueStructure.md @@ -0,0 +1,260 @@ +# 1-1. 수익 구조 — 이 회사는 무엇으로 돈을 버는가 + +> c.review("수익구조")의 설계 문서. +> 가능한 것 / 불가능한 것 / 제약 / 해결 방안을 기록한다. + +--- + +## 1. 세계적 기준 — 수익구조 분석이 계산하는 것 + +| 단계 | 분석 내용 | 핵심 지표 | +|------|----------|----------| +| 구조 분해 | 부문별/제품별/지역별 매출 비중 | 비중(%), 금액(조/억) | +| 집중도 | 매출이 얼마나 쏠려있는가 | HHI, CR4, Shannon Entropy, Gini | +| 성장 분해 | 어디서 성장하는가 | 부문별 growth contribution, YoY | +| 수익성 구조 | 어디서 남기는가 | 부문별 영업이익률, segment margin | +| 매출 품질 | 얼마나 믿을 수 있는가 | Cash Conversion, Accruals Ratio | +| 전략 판단 | 한 줄 요약 | 구조 변화 방향, 핵심 부문 식별 | + +참고 도구: OpenBB(revenue_per_segment), FinanceToolkit(150+ 비율), concentrationMetrics(12개 집중도 지표), FMP(제품/지역 segmentation API). + +--- + +## 2. dartlab 데이터 현황 — 가능한 것과 불가능한 것 + +### 2-1. 데이터 소스 3개 + +| 소스 | 접근 경로 | 내용 | 한계 | +|------|----------|------|------| +| **segments** | `c.segments()` (K-IFRS 주석 파싱) | 부문별 매출/영업이익/감가상각 | 연간만, 당기/전기 2년뿐. sections에 미노출 | +| **salesOrder** | `c.salesOrder()` (매출실적/수주) | 제품/서비스별 매출, 수주잔고 | 컬럼명 v1/v2/v3 문제, 50~60% 커버리지 | +| **IS (finance)** | `c.show("IS")` (XBRL 재무제표) | 매출액/매출원가/영업이익 분기별 시계열 | 연결 합산만, 부문 분해 불가 | + +### 2-2. 가능한 분석 + +| 분석 | 소스 | 비고 | +|------|------|------| +| ✅ 부문별 매출 비중 (최근 연도) | segments | 당기 기준 비중 계산 가능 | +| ✅ 부문별 매출 추이 (다년간) | segments | `segments().revenue` — 전 연도 당기 매출 이어 붙임, 부문×연도 DataFrame | +| ✅ 부문별 영업이익률 | segments | 매출 + 영업이익 동시 존재 시 | +| ✅ 전체 매출/이익 YoY 성장률 | IS | 분기별 시계열 충분 | +| ✅ 영업이익률 추이 (분기별) | IS | 최대 40분기 | +| ✅ 매출총이익률 추이 | IS | grossMargin 계산 가능 | +| ✅ 매출 품질 (Cash Conversion) | IS + CF | 영업CF / 순이익 | +| ✅ DuPont 분해 (margin × turnover × leverage) | ratios | 이미 계산됨 | +| ✅ 매출 집중도 (HHI, CR 등) | segments | 부문 비중에서 계산 | + +### 2-3. 재검토 — "불가능"을 줄인다 + +기존에 불가능으로 적었던 것들을 데이터 기준으로 재판정. + +| 분석 | 기존 | 재판정 | 근거 | +|------|------|--------|------| +| 다년간 부문 추이 | ❌ | ✅ | `segments()`가 전 연도 순회, 당기 매출 이어 붙임. `segments().revenue` = 부문×연도 DataFrame | +| 부문별 영업이익 | ❌ | ✅ | `segments().tables`에 영업이익 행이 이미 있음. `_buildRevenueDf`가 매출만 뽑고 버리는 것뿐 — 영업이익도 동일 로직으로 추출 가능 | +| 지역별 매출 분해 | ❌ | ⚠️→✅ | parser가 `tableType="region"` 분류 이미 구현. 국내/미주/유럽/아시아 패턴 감지. 있는 회사는 바로 사용 가능 | +| 제품별 매출 | ⚠️ | ⚠️→✅ | parser가 `tableType="product"` 분류 구현. segments().tables에 제품 테이블 존재. salesOrder 안 써도 됨 | +| 부문별 분기 매출 | ❌ | ❌ | K-IFRS 주석이 연간 보고서에만 부문 공시. 분기 보고서에는 부문 없음 | +| 부문별 ROIC | ❌ | ⚠️ | 영업이익은 있지만 부문별 투하자본은 일부 회사만 공시. 영업이익률로 대체 | +| 고객 집중도 | ❌ | ❌ | DART에 개별 고객 매출 미공시 (SEC 10-K에만 존재) | +| Price/Volume/Mix | ❌ | ❌ | 가격/물량 분리 데이터 없음 | +| Organic vs Inorganic | ❌ | ❌ | M&A 매출 분리 불가 | + +**핵심: segments().tables에 데이터가 이미 다 있는데 revenue DataFrame만 만들고 나머지를 버리고 있었다.** + +- `_buildRevenueDf()`는 segment+당기+매출 행만 추출 +- 영업이익, 감가상각, region 테이블, product 테이블은 allTables에 있지만 DataFrame화 안 됨 +- 이걸 확장하면 ✅로 전환되는 항목이 3~4개 더 있음 + +### 2-4. 확장 방안 — allTables 완전 활용 + +현재 `_buildRevenueDf`가 하는 것: +``` +allTables → segment + 당기 + 매출 행만 → revenue DataFrame +``` + +확장하면: +``` +allTables → segment 매출 → revenueDf + → segment 영업이익 → operatingIncomeDf + → segment 영업이익률 → marginDf (매출 ÷ 영업이익) + → region 매출 → regionDf + → product 매출 → productDf +``` + +**같은 파서, 같은 데이터, 추출 로직만 확장.** + +--- + +## 3. 핵심 제약 — "부문" 문제 + +### 3-1. sections에서 segments가 안 나오는 문제 + +**현상**: `c.show("segments")`가 sections 경로로 접근 시 실패. +- sections 수평화에서 segments가 독립 토픽으로 추출되지 않음 + +**원인**: segments 데이터는 K-IFRS 주석(footnote) 29번 항목에 있음. +sections는 사업보고서 본문 수평화이고, 주석은 별도 파싱(`c.segments()`) 경로. + +**해결 방안**: review에서는 `c.segments()` (notes 파싱) 직접 호출. +sections 경로에 의존하지 않는다. + +**다년간 데이터**: `segments()`는 이미 전 연도를 순회하여 각 보고서의 당기 매출을 이어 붙인다. +`segments().revenue`가 부문×연도 wide DataFrame을 반환. "2년뿐"이 아니라 보유 데이터 전체 연도를 커버. + +### 3-2. 부문명 표준화 — 업계 조사 결과 + +**문제**: 같은 회사도 연도별 부문 재편. 삼성전자 2022년 "CE/IM/반도체/DP/Harman" → 2024년 "DX/DS/SDC/Harman". + +**업계 조사 결과: 완전 자동 솔루션은 어디에도 없다.** + +| 주체 | 접근법 | +|------|--------| +| FactSet RBICS | 섹터 전문 애널리스트가 수작업 매핑 (~45,000사, 연 1회 리뷰) | +| S&P Capital IQ | "As Reported" + "Standardized" 이중 제공, 표준화는 수작업 | +| Refinitiv | 35년+ 수작업 수집 | +| Compustat | "considerable measurement error/noise" 인정, 세그먼트 SIC 수작업 | +| OpenBB/FinanceToolkit | 데이터 제공자에 의존, 자체 처리 없음 | +| EdgarTools | concept 수준 매핑만 (Revenue 등), segment member 변경 추적 없음 | + +**XBRL dimension member도 해결 못 함**: 기업이 부문을 바꾸면 새 member ID를 만듦. +`CESegmentMember` → `DXSegmentMember`. 이전 member와의 연결 정보 없음. + +**회계 기준이 주는 무기 — recast(재작성)**: +- IFRS 8 / ASC 280: 부문 변경 시 **이전 기간을 새 구조로 재작성** 의무 +- 즉, 2024년 Filing의 "전기"는 이미 DX/DS 기준으로 재작성되어 있음 +- 같은 Filing 내 당기/전기 비교는 항상 정합 → **Filing 내 recast 데이터가 가장 신뢰할 수 있는 연결 고리** + +**dartlab 해결 방안**: + +1. **recast 우선**: 같은 Filing의 당기/전기를 1차 시계열로 사용 (이미 구조 맞춤) +2. **Filing 간 연결**: 연도별 당기 매출을 이어 붙이되, 부문명 변경 시 **break를 명시적으로 표시** +3. **강제 연결 금지**: DX ≠ CE+IM을 자동 매핑하지 않음. 업계 표준이 "사람이 매핑"인데 코드로 억지로 하면 오류 +4. **변경 감지**: 부문 목록이 바뀌면 "부문 재편 감지" 플래그 표시 +5. **수작업 매핑 테이블**: 주요 기업의 알려진 변경은 `sectionMappings.json` 패턴처럼 점진 축적 가능 + +**한 줄 원칙: break를 숨기지 않는다. 업계 표준이 수작업인 영역을 코드로 억지 자동화하면 신뢰성을 잃는다.** + +### 3-3. 금액 단위 불일치 + +- segments: 백만원 단위 (XBRL 원본) +- salesOrder: 백만원/억원 혼재 +- IS: 백만원 단위 + +**해결**: 출력 시 조/억 단위로 통일 변환. `_formatAmount()` 활용. + +--- + +## 4. review 출력 설계 + +### 4-1. 출력 원칙 + +- **review = 뷰어다**. 보기 좋아야 한다. 데이터 나열이 아니라 판단이 있는 보고서. +- **기간**: 분기 + 반기 모두 제공하되, 지면 제약에 따라 축약. + - 부문별 구조: 최근 연간 (1~2년) — segments가 연간이므로 + - 전체 손익 추이: 최근 4~8분기 + - 영업이익률: 최근 4~8분기 +- **금액 단위**: 조/억 단위 (원 단위 아님) +- **모든 것을 보여주지 않는다**. 핵심만. 상세는 `c.show("IS")`, `c.segments()` 등 개별 호출. + +### 4-2. 출력 구조 (목표) + +``` +■ 수익 구조 — 이 회사는 무엇으로 돈을 버는가 + + ▸ 핵심 요약 + "삼성전자는 DX(디바이스경험)와 DS(반도체) 2대 축으로 매출 30.1조를 창출. + DS 부문이 전년 대비 +68% 급반등하며 수익 구조가 재편 중." + + ▸ 부문별 매출 구성 (2024 연간) + 부문 매출 비중 영업이익률 + DX 부문 17.5조 58.1% 7.1% + DS 부문 11.1조 36.9% 13.6% + SDC 2.9조 9.7% 12.8% + Harman 1.4조 4.7% 9.2% + + ▸ 매출 집중도 + HHI 2,847 — 중간 집중 (DX + DS 95%) + + ▸ 손익 추이 (분기) + 분기 매출 영업이익 영업이익률 + 2025Q4 93.8조 20.1조 21.4% + 2025Q3 79.1조 11.1조 14.1% + 2025Q2 74.1조 4.7조 6.3% + 2024Q4 86.1조 10.1조 11.7% + + ▸ 수익 품질 + Cash Conversion 1.2 — 양호 (현금 뒷받침 충분) + 매출총이익률 42.3% → 38.1% → 35.2% (최근 3분기) + + ⚠ DS 부문 영업이익률 급등 — 반도체 사이클 의존도 높음 + ✦ DX 부문 안정적 마진 유지 — 수익 기반 역할 +``` + +### 4-3. 지면 제약 규칙 + +- 부문 테이블: **최대 8행** (8개 부문 초과 시 상위 7개 + "기타" 합산) +- 분기 추이: **최근 4~8분기** (전체 40분기 나열 금지) +- 서술: **2~3문장** 핵심 판단만 +- Flag: **최대 3개** (경고 2 + 기회 1 정도) + +--- + +## 5. 구현 계획 + +### Phase 1 — revenue.py calc 함수 (구현 완료) + +9개 calc 함수로 "이 회사는 무엇으로 돈을 버는가" 질문에 답한다: + +1. **calcCompanyProfile** — 업종/주요제품 맥락 +2. **calcSegmentComposition** — 부문별 매출/비중/영업이익률 +3. **calcSegmentTrend** — 다년간 부문별 매출 추이 + YoY +4. **calcBreakdown(sub)** — 지역별/제품별 매출 분해 +5. **calcRevenueGrowth** — 매출 YoY, 3Y CAGR, 분기 매출 시계열 +6. **calcGrowthContribution** — 부문별 성장 기여 분해 (어디에서 성장이 왔는가) +7. **calcConcentration** — HHI, 1위 부문 비중, 내수 비중 +8. **calcRevenueQuality** — 영업CF/순이익, 매출총이익률 추세 +9. **calcFlags** — 경고/기회 플래그 (고집중, 역성장, 매출≠이익 1위 괴리) + +데이터 접근 (DEV.md §11 준수): +- `company.select("segments")` — 원본 (기본 경로) +- `company.select("IS", [...])` — 원본 (기본 경로) +- `company.finance.ratios` — 파생 편의 +- `company.finance.ratioSeries` — 파생 편의 (시계열) +- `company.sector` — 메타 +- gather import 없음 ✅ + +### Phase 2 — 향후 확장 + +- 제품별 매출 (salesOrder 데이터 품질 개선 후) +- 지역별 매출 (segments에서 지역 테이블 추출) +- 다년간 부문 추이 (여러 보고서 기간 파싱) +- 부문별 성장 기여도 분해 + +--- + +## 6. 토론 기록 + +### 2026-03-26 — 수익구조 분석 설계 + +**문제 인식:** +- 기존 revenue.py는 show() 데이터를 그대로 던지는 것에 불과 +- "분석"이 아니라 "데이터 나열" +- 세계적 기준 대비 1/10 수준 + +**조사 결과:** +- OpenBB, FinanceToolkit, FMP 등이 segment/geographic 분해 제공 +- 집중도 지표 12종 (HHI, CR, Shannon, Gini 등) +- 성장 분해, 수익 품질, 전략 포지셔닝까지 계산 + +**핵심 제약:** +- segments가 sections에 없음 → `c.segments()` 직접 호출로 우회 +- 연간 2년만 → 분기 부문 분해 불가 +- 부문 재편 시 시계열 연결 불가 + +**결정:** +- review는 뷰어다 — 보기 좋아야 한다 +- 기간: 부문=연간, 손익=분기 4~8개 +- 금액: 조/억 단위 +- 모든 것을 보여주지 않는다 — 핵심만 +- Phase 1으로 먼저 실질적 분석을 구현, Phase 2로 확장 diff --git a/src/dartlab/analysis/financial/__init__.py b/src/dartlab/analysis/financial/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3b2bc899b42aff652064b9015ec60c005c9f01ed --- /dev/null +++ b/src/dartlab/analysis/financial/__init__.py @@ -0,0 +1,1011 @@ +"""재무제표 완전 분석 통합 진입점. + +scan()이 시장 전체를 횡단하듯, analysis()는 단일 종목을 심층 분석한다. + +사용법:: + + import dartlab + + dartlab.analysis() # 전체 가이드 + dartlab.analysis("financial", "수익구조") # 수익구조 분석 항목 목록 + dartlab.analysis("financial", "수익구조", c) # 삼성전자 수익구조 분석 실행 + dartlab.analysis("financial", "이익품질", c) # 삼성전자 이익의 질 분석 + + c.analysis() # 가이드 + c.analysis("financial", "수익성") # 수익성 분석 +""" + +from __future__ import annotations + +import importlib +import inspect +from dataclasses import dataclass, field +from typing import Any + +import polars as pl + +# ── 분석 항목 레지스트리 ── + + +@dataclass(frozen=True) +class _CalcEntry: + """개별 calc* 함수 메타.""" + + fn: str + module: str + blockKey: str + label: str + + +@dataclass(frozen=True) +class _AxisEntry: + """분석 축 메타.""" + + section: str + partId: str + description: str + example: str + calcs: tuple[_CalcEntry, ...] = field(default_factory=tuple) + + +# ── 15축 레지스트리 ── +# catalog.py SECTIONS + _BLOCKS + registry.py buildBlocks()에서 매핑. + +_AXIS_REGISTRY: dict[str, _AxisEntry] = { + "수익구조": _AxisEntry( + section="수익구조", + partId="1-1", + description="이 회사는 무엇으로 돈을 버는가", + example='analysis("financial", "수익구조")', + calcs=( + _CalcEntry("calcCompanyProfile", "dartlab.analysis.financial.revenue", "profile", "기업 개요"), + _CalcEntry( + "calcSegmentComposition", "dartlab.analysis.financial.revenue", "segmentComposition", "부문별 매출 구성" + ), + _CalcEntry("calcSegmentTrend", "dartlab.analysis.financial.revenue", "segmentTrend", "부문별 매출 추이"), + _CalcEntry("calcRevenueGrowth", "dartlab.analysis.financial.revenue", "growth", "매출 성장률"), + _CalcEntry( + "calcGrowthContribution", "dartlab.analysis.financial.revenue", "growthContribution", "성장 기여 분해" + ), + _CalcEntry("calcConcentration", "dartlab.analysis.financial.revenue", "concentration", "매출 집중도"), + _CalcEntry("calcRevenueQuality", "dartlab.analysis.financial.revenue", "revenueQuality", "매출 품질"), + _CalcEntry("calcFlags", "dartlab.analysis.financial.revenue", "revenueFlags", "수익구조 플래그"), + ), + ), + "자금조달": _AxisEntry( + section="자금조달", + partId="1-2", + description="돈을 어디서 조달하는가", + example='analysis("financial", "자금조달")', + calcs=( + _CalcEntry("calcFundingSources", "dartlab.analysis.financial.capital", "fundingSources", "자금 원천 구성"), + _CalcEntry( + "calcCapitalOverview", "dartlab.analysis.financial.capital", "capitalOverview", "자본 구조 개요" + ), + _CalcEntry( + "calcCapitalTimeline", "dartlab.analysis.financial.capital", "capitalTimeline", "자본 구조 추이" + ), + _CalcEntry("calcDebtTimeline", "dartlab.analysis.financial.capital", "debtTimeline", "부채 추이"), + _CalcEntry("calcInterestBurden", "dartlab.analysis.financial.capital", "interestBurden", "이자 부담"), + _CalcEntry("calcLiquidity", "dartlab.analysis.financial.capital", "liquidity", "유동성"), + _CalcEntry( + "calcCashFlowStructure", "dartlab.analysis.financial.capital", "cashFlowStructure", "자금흐름 구조" + ), + _CalcEntry( + "calcDistressIndicators", "dartlab.analysis.financial.capital", "distressIndicators", "재무 위험 지표" + ), + _CalcEntry("calcCapitalFlags", "dartlab.analysis.financial.capital", "capitalFlags", "자금조달 플래그"), + ), + ), + "자산구조": _AxisEntry( + section="자산구조", + partId="1-3", + description="조달한 돈으로 뭘 준비했는가", + example='analysis("financial", "자산구조")', + calcs=( + _CalcEntry("calcAssetStructure", "dartlab.analysis.financial.asset", "assetStructure", "자산 재분류"), + _CalcEntry("calcWorkingCapital", "dartlab.analysis.financial.asset", "workingCapital", "운전자본 순환"), + _CalcEntry("calcCapexPattern", "dartlab.analysis.financial.asset", "capexPattern", "CAPEX 패턴"), + _CalcEntry("calcAssetFlags", "dartlab.analysis.financial.asset", "assetFlags", "자산구조 플래그"), + ), + ), + "현금흐름": _AxisEntry( + section="현금흐름", + partId="1-4", + description="실제로 현금은 어떻게 흘렀는가", + example='analysis("financial", "현금흐름")', + calcs=( + _CalcEntry( + "calcCashFlowOverview", "dartlab.analysis.financial.cashflow", "cashFlowOverview", "현금흐름 종합" + ), + _CalcEntry("calcCashQuality", "dartlab.analysis.financial.cashflow", "cashQuality", "이익의 현금 전환"), + _CalcEntry("calcCashFlowFlags", "dartlab.analysis.financial.cashflow", "cashFlowFlags", "현금흐름 플래그"), + _CalcEntry( + "calcOcfDecomposition", + "dartlab.analysis.financial.cashflow", + "ocfDecomposition", + "영업CF 분해 (NI+감가+운전자본)", + ), + ), + ), + "수익성": _AxisEntry( + section="수익성", + partId="2-1", + description="이 회사는 얼마나 잘 벌고 있는가", + example='analysis("financial", "수익성")', + calcs=( + _CalcEntry("calcMarginTrend", "dartlab.analysis.financial.profitability", "marginTrend", "마진 추이"), + _CalcEntry( + "calcReturnTrend", "dartlab.analysis.financial.profitability", "returnTrend", "ROE 분해 (듀퐁 5요소)" + ), + _CalcEntry( + "calcMarginWaterfall", "dartlab.analysis.financial.profitability", "marginWaterfall", "마진 워터폴" + ), + _CalcEntry( + "calcProfitabilityFlags", + "dartlab.analysis.financial.profitability", + "profitabilityFlags", + "수익성 플래그", + ), + _CalcEntry( + "calcPenmanDecomposition", + "dartlab.analysis.financial.profitability", + "penmanDecomposition", + "Penman 분해 (RNOA vs 레버리지)", + ), + _CalcEntry( + "calcRoicTree", + "dartlab.analysis.financial.profitability", + "roicTree", + "ROIC Tree (마진×회전 분해)", + ), + ), + ), + "성장성": _AxisEntry( + section="성장성", + partId="2-2", + description="이 회사는 얼마나 빨리 성장하는가", + example='analysis("financial", "성장성")', + calcs=( + _CalcEntry("calcGrowthTrend", "dartlab.analysis.financial.growthAnalysis", "growthTrend", "성장률 추이"), + _CalcEntry("calcGrowthQuality", "dartlab.analysis.financial.growthAnalysis", "growthQuality", "성장 품질"), + _CalcEntry( + "calcSustainableGrowthRate", + "dartlab.analysis.financial.growthAnalysis", + "sustainableGrowthRate", + "지속가능성장률", + ), + _CalcEntry("calcGrowthFlags", "dartlab.analysis.financial.growthAnalysis", "growthFlags", "성장성 플래그"), + _CalcEntry( + "calcCagrComparison", + "dartlab.analysis.financial.growthAnalysis", + "cagrComparison", + "CAGR 비교 (구조적 변화 감지)", + ), + ), + ), + "안정성": _AxisEntry( + section="안정성", + partId="2-3", + description="이 회사는 망하지 않는가", + example='analysis("financial", "안정성")', + calcs=( + _CalcEntry("calcLeverageTrend", "dartlab.analysis.financial.stability", "leverageTrend", "레버리지 추이"), + _CalcEntry("calcCoverageTrend", "dartlab.analysis.financial.stability", "coverageTrend", "이자보상 추이"), + _CalcEntry("calcDistressScore", "dartlab.analysis.financial.stability", "distressScore", "부실 판별"), + _CalcEntry( + "calcDistressEnsemble", "dartlab.analysis.financial.stability", "distressEnsemble", "부실예측 앙상블" + ), + _CalcEntry("calcDebtMaturity", "dartlab.analysis.financial.stability", "debtMaturity", "부채 만기 구조"), + _CalcEntry("calcStabilityFlags", "dartlab.analysis.financial.stability", "stabilityFlags", "안정성 플래그"), + ), + ), + "효율성": _AxisEntry( + section="효율성", + partId="2-4", + description="이 회사는 자산을 잘 굴리는가", + example='analysis("financial", "효율성")', + calcs=( + _CalcEntry( + "calcTurnoverTrend", "dartlab.analysis.financial.efficiency", "turnoverTrend", "회전율 + CCC 추이" + ), + _CalcEntry( + "calcEfficiencyFlags", "dartlab.analysis.financial.efficiency", "efficiencyFlags", "효율성 플래그" + ), + ), + ), + "종합평가": _AxisEntry( + section="종합평가", + partId="2-5", + description="재무 상태를 한마디로", + example='analysis("financial", "종합평가")', + calcs=( + _CalcEntry("calcScorecard", "dartlab.analysis.financial.scorecard", "scorecard", "재무 스코어카드"), + _CalcEntry("calcPiotroskiDetail", "dartlab.analysis.financial.scorecard", "piotroski", "Piotroski F-Score"), + _CalcEntry("calcSummaryFlags", "dartlab.analysis.financial.scorecard", "summaryFlags", "종합 플래그"), + ), + ), + "이익품질": _AxisEntry( + section="이익품질", + partId="3-1", + description="이익이 진짜인가", + example='analysis("financial", "이익품질")', + calcs=( + _CalcEntry( + "calcAccrualAnalysis", "dartlab.analysis.financial.earningsQuality", "accrualAnalysis", "발생액 분석" + ), + _CalcEntry( + "calcEarningsPersistence", + "dartlab.analysis.financial.earningsQuality", + "earningsPersistence", + "이익 지속성", + ), + _CalcEntry( + "calcBeneishTimeline", "dartlab.analysis.financial.earningsQuality", "beneishMScore", "Beneish M-Score" + ), + _CalcEntry( + "calcEarningsQualityFlags", + "dartlab.analysis.financial.earningsQuality", + "earningsQualityFlags", + "이익품질 플래그", + ), + _CalcEntry( + "calcRichardsonAccrual", + "dartlab.analysis.financial.earningsQuality", + "richardsonAccrual", + "Richardson 3계층 발생액", + ), + _CalcEntry( + "calcNonOperatingBreakdown", + "dartlab.analysis.financial.earningsQuality", + "nonOperatingBreakdown", + "영업외손익 분해", + ), + ), + ), + "비용구조": _AxisEntry( + section="비용구조", + partId="3-2", + description="비용이 어떻게 움직이는가", + example='analysis("financial", "비용구조")', + calcs=( + _CalcEntry( + "calcCostBreakdown", "dartlab.analysis.financial.costStructure", "costBreakdown", "비용 비중 분해" + ), + _CalcEntry( + "calcOperatingLeverage", "dartlab.analysis.financial.costStructure", "operatingLeverage", "영업레버리지" + ), + _CalcEntry( + "calcBreakevenEstimate", "dartlab.analysis.financial.costStructure", "breakevenEstimate", "손익분기점" + ), + _CalcEntry( + "calcRawMaterialBreakdown", + "dartlab.analysis.financial.costStructure", + "rawMaterialBreakdown", + "원재료 매입 비중", + ), + _CalcEntry( + "calcCostStructureFlags", + "dartlab.analysis.financial.costStructure", + "costStructureFlags", + "비용구조 플래그", + ), + ), + ), + "자본배분": _AxisEntry( + section="자본배분", + partId="3-3", + description="번 돈을 어디에 쓰는가", + example='analysis("financial", "자본배분")', + calcs=( + _CalcEntry( + "calcDividendPolicy", "dartlab.analysis.financial.capitalAllocation", "dividendPolicy", "배당 정책" + ), + _CalcEntry( + "calcShareholderReturn", "dartlab.analysis.financial.capitalAllocation", "shareholderReturn", "주주환원" + ), + _CalcEntry("calcReinvestment", "dartlab.analysis.financial.capitalAllocation", "reinvestment", "재투자"), + _CalcEntry("calcFcfUsage", "dartlab.analysis.financial.capitalAllocation", "fcfUsage", "FCF 사용처"), + _CalcEntry( + "calcDividendDocs", "dartlab.analysis.financial.capitalAllocation", "dividendDocs", "배당 서술 (docs)" + ), + _CalcEntry( + "calcTreasuryStockStatus", + "dartlab.analysis.financial.capitalAllocation", + "treasuryStockStatus", + "자사주 현황", + ), + _CalcEntry( + "calcCapitalAllocationFlags", + "dartlab.analysis.financial.capitalAllocation", + "capitalAllocationFlags", + "자본배분 플래그", + ), + ), + ), + "투자효율": _AxisEntry( + section="투자효율", + partId="3-4", + description="투자가 가치를 만드는가", + example='analysis("financial", "투자효율")', + calcs=( + _CalcEntry( + "calcRoicTimeline", "dartlab.analysis.financial.investmentAnalysis", "roicTimeline", "ROIC 시계열" + ), + _CalcEntry( + "calcInvestmentIntensity", + "dartlab.analysis.financial.investmentAnalysis", + "investmentIntensity", + "투자 강도", + ), + _CalcEntry( + "calcEvaTimeline", "dartlab.analysis.financial.investmentAnalysis", "evaTimeline", "NOPAT + 투하자본" + ), + _CalcEntry( + "calcInvestmentInOther", + "dartlab.analysis.financial.investmentAnalysis", + "investmentInOther", + "타법인 출자 현황", + ), + _CalcEntry( + "calcInvestmentFlags", + "dartlab.analysis.financial.investmentAnalysis", + "investmentFlags", + "투자효율 플래그", + ), + ), + ), + "재무정합성": _AxisEntry( + section="재무정합성", + partId="3-5", + description="재무제표가 서로 맞는가", + example='analysis("financial", "재무정합성")', + calcs=( + _CalcEntry( + "calcIsCfDivergence", "dartlab.analysis.financial.crossStatement", "isCfDivergence", "IS-CF 괴리" + ), + _CalcEntry( + "calcIsBsDivergence", "dartlab.analysis.financial.crossStatement", "isBsDivergence", "IS-BS 괴리" + ), + _CalcEntry("calcAnomalyScore", "dartlab.analysis.financial.crossStatement", "anomalyScore", "이상 점수"), + _CalcEntry( + "calcEffectiveTaxRate", "dartlab.analysis.financial.taxAnalysis", "effectiveTaxRate", "유효세율" + ), + _CalcEntry("calcDeferredTax", "dartlab.analysis.financial.taxAnalysis", "deferredTax", "이연법인세"), + _CalcEntry( + "calcArticulationCheck", + "dartlab.analysis.financial.crossStatement", + "articulationCheck", + "BS-CF 정합성 검증", + ), + ), + ), + # 신용평가는 독립 엔진 (c.credit()) — analysis 축에서 제거 + # ── 4부: 가치평가 ── + "가치평가": _AxisEntry( + section="가치평가", + partId="4-1", + description="이 회사의 적정 가치는 얼마인가", + example='analysis("valuation", "가치평가")', + calcs=( + _CalcEntry("calcDcf", "dartlab.analysis.financial.valuation", "dcfValuation", "DCF 밸류에이션"), + _CalcEntry("calcDdm", "dartlab.analysis.financial.valuation", "ddmValuation", "DDM 밸류에이션"), + _CalcEntry( + "calcRelativeValuation", + "dartlab.analysis.financial.valuation", + "relativeValuation", + "상대가치 (PER/PBR/EV-EBITDA/PSR/PEG)", + ), + _CalcEntry( + "calcResidualIncome", "dartlab.analysis.financial.valuation", "residualIncome", "RIM (잔여이익모델)" + ), + _CalcEntry("calcPriceTarget", "dartlab.analysis.financial.valuation", "priceTarget", "확률 가중 목표주가"), + _CalcEntry("calcReverseImplied", "dartlab.analysis.financial.valuation", "reverseImplied", "역내재성장률"), + _CalcEntry("calcSensitivity", "dartlab.analysis.financial.valuation", "sensitivity", "민감도 분석"), + _CalcEntry( + "calcValuationSynthesis", + "dartlab.analysis.financial.valuation", + "valuationSynthesis", + "종합 적정가치", + ), + _CalcEntry( + "calcValuationFlags", "dartlab.analysis.financial.valuation", "valuationFlags", "가치평가 플래그" + ), + ), + ), + # ── 5부: 비재무 심화 ── + "지배구조": _AxisEntry( + section="지배구조", + partId="5-1", + description="이 회사의 주인은 누구이며 감시는 작동하는가", + example='analysis("governance", "지배구조")', + calcs=( + _CalcEntry( + "calcOwnershipTrend", "dartlab.analysis.financial.governance", "ownershipTrend", "최대주주 지분 추이" + ), + _CalcEntry( + "calcBoardComposition", "dartlab.analysis.financial.governance", "boardComposition", "이사회 구성" + ), + _CalcEntry( + "calcAuditOpinionTrend", + "dartlab.analysis.financial.governance", + "auditOpinionTrend", + "감사의견 시계열", + ), + _CalcEntry( + "calcGovernanceFlags", "dartlab.analysis.financial.governance", "governanceFlags", "지배구조 플래그" + ), + ), + ), + "공시변화": _AxisEntry( + section="공시변화", + partId="5-2", + description="이 회사의 공시가 뭐가 달라졌는가", + example='analysis("governance", "공시변화")', + calcs=( + _CalcEntry( + "calcDisclosureChangeSummary", + "dartlab.analysis.financial.disclosureDelta", + "disclosureChangeSummary", + "공시변화 종합", + ), + _CalcEntry( + "calcKeyTopicChanges", + "dartlab.analysis.financial.disclosureDelta", + "keyTopicChanges", + "핵심 공시 변화", + ), + _CalcEntry( + "calcChangeIntensity", + "dartlab.analysis.financial.disclosureDelta", + "changeIntensity", + "변화 크기 분석", + ), + _CalcEntry( + "calcDisclosureDeltaFlags", + "dartlab.analysis.financial.disclosureDelta", + "disclosureDeltaFlags", + "공시변화 플래그", + ), + ), + ), + "비교분석": _AxisEntry( + section="비교분석", + partId="5-3", + description="이 회사는 시장에서 어디에 서 있는가", + example='analysis("governance", "비교분석")', + calcs=( + _CalcEntry( + "calcPeerRanking", "dartlab.analysis.financial.peerBenchmark", "peerRanking", "시장 내 백분위 순위" + ), + _CalcEntry( + "calcRiskReturnPosition", + "dartlab.analysis.financial.peerBenchmark", + "riskReturnPosition", + "수익-위험 포지션", + ), + _CalcEntry( + "calcPeerBenchmarkFlags", + "dartlab.analysis.financial.peerBenchmark", + "peerBenchmarkFlags", + "비교분석 플래그", + ), + ), + ), + # ── 6부: 전망분석 ── + "매출전망": _AxisEntry( + section="매출전망", + partId="6-1", + description="이 회사의 매출은 어디로 가며 재무는 어떻게 변하는가", + example='analysis("forecast", "매출전망")', + calcs=( + _CalcEntry( + "calcRevenueForecast", "dartlab.analysis.financial.forecastCalcs", "revenueForecast", "매출 예측" + ), + _CalcEntry( + "calcSegmentForecast", "dartlab.analysis.financial.forecastCalcs", "segmentForecast", "세그먼트별 전망" + ), + _CalcEntry( + "calcProFormaHighlights", + "dartlab.analysis.financial.forecastCalcs", + "proFormaHighlights", + "Pro-Forma 전망", + ), + _CalcEntry( + "calcScenarioImpact", "dartlab.analysis.financial.forecastCalcs", "scenarioImpact", "시나리오 영향" + ), + _CalcEntry( + "calcForecastMethodology", + "dartlab.analysis.financial.forecastCalcs", + "forecastMethodology", + "예측 방법론", + ), + _CalcEntry( + "calcHistoricalRatios", "dartlab.analysis.financial.forecastCalcs", "historicalRatios", "과거 구조 비율" + ), + _CalcEntry( + "calcForecastFlags", "dartlab.analysis.financial.forecastCalcs", "forecastFlags", "매출전망 플래그" + ), + _CalcEntry( + "calcScenarioSimulation", + "dartlab.analysis.financial.forecastCalcs", + "scenarioSimulation", + "시나리오 시뮬레이션", + ), + ), + ), + "예측신호": _AxisEntry( + section="예측신호", + partId="6-2", + description="이 회사의 실적은 어디로 향하는가", + example='analysis("forecast", "예측신호")', + calcs=( + _CalcEntry( + "calcEarningsMomentum", + "dartlab.analysis.financial.predictionSignals", + "earningsMomentum", + "이익 모멘텀", + ), + _CalcEntry( + "calcPeerPrediction", + "dartlab.analysis.financial.predictionSignals", + "peerPrediction", + "횡단면 피어 예측", + ), + _CalcEntry( + "calcStructuralBreak", + "dartlab.analysis.financial.predictionSignals", + "structuralBreak", + "구조변화 감지", + ), + _CalcEntry( + "calcMacroSensitivity", + "dartlab.analysis.financial.predictionSignals", + "macroSensitivity", + "거시경제 민감도", + ), + _CalcEntry( + "calcMacroRegression", + "dartlab.analysis.financial.predictionSignals", + "macroRegression", + "거시-재무 동적 회귀", + ), + _CalcEntry( + "calcEventImpact", + "dartlab.analysis.financial.predictionSignals", + "eventImpact", + "이벤트 충격 분석", + ), + _CalcEntry( + "calcDisclosureDelta", + "dartlab.analysis.financial.predictionSignals", + "disclosureDelta", + "공시 변화 신호", + ), + _CalcEntry( + "calcInventoryDivergence", + "dartlab.analysis.financial.predictionSignals", + "inventoryDivergence", + "재고/매출채권 괴리", + ), + _CalcEntry( + "calcAnnouncementTiming", + "dartlab.analysis.financial.predictionSignals", + "announcementTiming", + "동종업계 공시 타이밍", + ), + _CalcEntry( + "calcSupplyChainSignal", + "dartlab.analysis.financial.predictionSignals", + "supplyChainSignal", + "공급망 모멘텀", + ), + _CalcEntry( + "calcConsensusDirection", + "dartlab.analysis.financial.predictionSignals", + "consensusDirection", + "컨센서스 매출 방향", + ), + _CalcEntry( + "calcFlowDirection", + "dartlab.analysis.financial.predictionSignals", + "flowDirection", + "수급 누적 방향", + ), + _CalcEntry( + "calcRevenueDirection", + "dartlab.analysis.financial.predictionSignals", + "revenueDirection", + "매출 모멘텀 방향", + ), + _CalcEntry( + "calcPredictionSynthesis", + "dartlab.analysis.financial.predictionSignals", + "predictionSynthesis", + "예측 신호 종합", + ), + _CalcEntry( + "calcPredictionFlags", + "dartlab.analysis.financial.predictionSignals", + "predictionFlags", + "예측신호 플래그", + ), + ), + ), + # ── 6부: 매크로 (기업-매크로 연결만 — 시장 자체 분석은 dartlab.macro() 엔진) ── + "매크로민감도": _AxisEntry( + section="매크로민감도", + partId="6-1", + description="이 회사의 매출은 어떤 매크로 변수에 민감한가", + example='analysis("macro", "매크로민감도")', + calcs=( + _CalcEntry( + "calcMacroSensitivity", + "dartlab.analysis.financial.macroExposure", + "macroSensitivity", + "외생변수 회귀 + 매출 방향", + ), + ), + ), + "밸류에이션밴드": _AxisEntry( + section="밸류에이션밴드", + partId="6-2", + description="PER/PBR이 과거 대비 어디에 있는가", + example='analysis("macro", "밸류에이션밴드")', + calcs=( + _CalcEntry( + "calcValuationBand", + "dartlab.analysis.financial.macroExposure", + "valuationBand", + "멀티플 정규분포 밴드", + ), + ), + ), +} + + +# ── Alias ── + +# ── 그룹 정의 — analysis("그룹", "하위") 2단계 호출 ── + +_GROUPS: dict[str, list[str]] = { + "financial": [ + "수익구조", + "자금조달", + "자산구조", + "현금흐름", + "수익성", + "성장성", + "안정성", + "효율성", + "종합평가", + "이익품질", + "비용구조", + "자본배분", + "투자효율", + "재무정합성", + ], + "valuation": ["가치평가"], + "governance": ["지배구조", "공시변화", "비교분석"], + "forecast": ["매출전망", "예측신호"], + "macro": ["매크로민감도", "밸류에이션밴드"], +} + +# 역매핑: 축 → 소속 그룹 +_AXIS_TO_GROUP: dict[str, str] = {} +for _g, _axes in _GROUPS.items(): + for _a in _axes: + _AXIS_TO_GROUP[_a] = _g + +# ── alias — 한글↔영문 양방향 ── + +_ALIASES: dict[str, str] = { + # 영문 → 한글 (축 이름) + "revenue": "수익구조", + "revenueStructure": "수익구조", + "capital": "자금조달", + "funding": "자금조달", + "asset": "자산구조", + "assetStructure": "자산구조", + "cashflow": "현금흐름", + "profitability": "수익성", + "growth": "성장성", + "stability": "안정성", + "efficiency": "효율성", + "scorecard": "종합평가", + "earningsQuality": "이익품질", + "costStructure": "비용구조", + "capitalAllocation": "자본배분", + "investment": "투자효율", + "investmentEfficiency": "투자효율", + "crossStatement": "재무정합성", + "financialConsistency": "재무정합성", + "valuation": "가치평가", + "governance": "지배구조", + "disclosureDelta": "공시변화", + "disclosureChange": "공시변화", + "peerBenchmark": "비교분석", + "peerComparison": "비교분석", + "forecast": "매출전망", + "전망": "매출전망", + "prediction": "예측신호", + "predictionSignals": "예측신호", + "전망신호": "예측신호", + # macro 그룹 (기업-매크로 연결만 — 시장 분석은 dartlab.macro()) + "macroSensitivity": "매크로민감도", + "valuationBand": "밸류에이션밴드", + "민감도": "매크로민감도", + "멀티플밴드": "밸류에이션밴드", + # 그룹 alias (한글) + "재무": "financial", + "재무분석": "financial", + "가치": "valuation", + "지배": "governance", + "전망분석": "forecast", + "매크로": "macro", + "매크로분석": "macro", +} + + +def _resolveAxis(axis: str) -> str: + """축 이름 또는 alias -> 정규 축 이름.""" + if axis in _AXIS_REGISTRY: + return axis + if axis in _ALIASES: + return _ALIASES[axis] + lower = axis.lower() + if lower in _ALIASES: + return _ALIASES[lower] + available = ", ".join(sorted(_AXIS_REGISTRY)) + raise ValueError( + f"알 수 없는 분석 축: '{axis}'. 가용 축: {available}\n 사용법: c.analysis() 로 전체 축 가이드를 확인하세요." + ) + + +# ── basePeriod 지원 여부 검사 (캐싱) ── + +_BP_CACHE: dict[str, bool] = {} + + +def _acceptsBasePeriod(fn) -> bool: + """calc 함수가 basePeriod 파라미터를 받는지 확인 (결과 캐싱).""" + key = f"{fn.__module__}.{fn.__qualname__}" + cached = _BP_CACHE.get(key) + if cached is not None: + return cached + try: + sig = inspect.signature(fn) + result = "basePeriod" in sig.parameters + except (ValueError, TypeError): + result = False + _BP_CACHE[key] = result + return result + + +# ── Group Accessor ── + + +class _GroupAccessor: + """analysis.financial, analysis.valuation 등 그룹 accessor.""" + + def __init__(self, analysis_instance: "Analysis", group: str): + self._analysis = analysis_instance + self._group = group + + def __call__(self, company=None, *, basePeriod=None): + """그룹 가이드 또는 그룹 전체 실행.""" + return self._analysis(self._group, company=company, basePeriod=basePeriod) + + def __getattr__(self, name): + """analysis.financial.profitability() 패턴.""" + try: + resolved = _resolveAxis(name) + except ValueError: + raise AttributeError(f"'{self._group}' 그룹에 '{name}' 축이 없습니다") + + if resolved not in _GROUPS.get(self._group, []): + raise AttributeError(f"'{name}' 축은 '{self._group}' 그룹에 속하지 않습니다") + + def _bound_axis(company=None, *, basePeriod=None): + return self._analysis(self._group, resolved, company=company, basePeriod=basePeriod) + + _bound_axis.__name__ = name + _bound_axis.__doc__ = f'analysis("{self._group}", "{resolved}")' + return _bound_axis + + def __repr__(self) -> str: + axes = _GROUPS.get(self._group, []) + lines = [f"Analysis.{self._group} -- {len(axes)}축"] + for key in axes: + entry = _AXIS_REGISTRY.get(key) + if entry: + lines.append(f" {key:8s} {entry.description}") + return "\n".join(lines) + + +# ── Analysis Class ── + + +class Analysis: + """재무제표 완전 분석 — 20축, 단일 종목 심층. + + Capabilities: + Part 1 — 사업구조: 수익구조, 자금조달, 자산구조, 현금흐름 + Part 2 — 핵심비율: 수익성, 성장성, 안정성, 효율성, 종합평가 + Part 3 — 심화분석: 이익품질, 비용구조, 자본배분, 투자효율, 재무정합성 + Part 4 — 가치평가: DCF, DDM, 상대가치, RIM, 목표주가, 역내재성장률, 민감도 + Part 5 — 비재무 심화: 지배구조, 공시변화감지, 비교분석 + Part 6 — 전망분석: 매출전망, 예측신호 + - 각 축은 Company를 받아 dict를 반환하는 순수 함수 집합 + - review()가 이 결과를 소비하여 구조화 보고서 생성 + + Requires: + 데이터: finance (자동 다운로드) + + AIContext: + - reviewer()가 analysis 결과를 소비하여 AI 해석 생성 + - ask()에서 재무분석 컨텍스트로 활용 + - 70개 calc* 함수의 개별 결과를 LLM에 주입 가능 + + Guide: + - "이 회사 수익구조?" -> analysis("financial", "수익구조") — 매출원가율, 판관비율 등 + - "재무 건전한가?" -> analysis("financial", "안정성") — 부채비율, 유동비율, ICR + - "이익이 진짜야?" -> analysis("financial", "이익품질") — 발생주의 비율, OCF/NI + - "적정가치?" -> analysis("valuation", "가치평가") — DCF/DDM/상대/RIM/목표가 + - "전체 종합?" -> analysis("financial", "종합평가") — 15축 통합 스코어 + - 15축 전부 보고 싶으면 review() 사용 권장 + + SeeAlso: + - review: analysis 결과를 구조화 보고서로 렌더링 + - scan: 전종목 비교 (analysis는 단일 종목 심층) + - Company.insights: 7영역 인사이트 등급 (빠른 요약) + + Args: + axis: 축 이름 ("수익구조", "수익성" 등). None이면 15축 가이드. + company: Company 객체. None이면 해당 축의 분석 항목 목록. + **kwargs: 축별 옵션. + + Returns: + axis=None → pl.DataFrame (15축 가이드) + company=None → pl.DataFrame (해당 축 calc 목록) + 둘 다 있으면 → dict (분석 결과) + + Example:: + + import dartlab + dartlab.analysis() # 전체 가이드 + dartlab.analysis("financial", "수익구조") # 항목 목록 + c = dartlab.Company("005930") + dartlab.analysis("financial", "수익구조", c) # 삼성전자 수익구조 + c.analysis("financial", "수익성") # Company 바인딩 + """ + + def __call__( + self, + axis: str | None = None, + sub: Any | None = None, + *, + company: Any | None = None, + basePeriod: str | None = None, + **kwargs: Any, + ) -> pl.DataFrame | dict: + """엔진("그룹", "하위") 2단계 호출 패턴. + + 호출:: + + c.analysis("financial", "수익성") # 그룹 + 하위 + c.analysis("valuation", "가치평가") # 그룹 + 하위 + c.analysis("forecast", "매출전망") # 그룹 + 하위 + """ + if axis is None: + return self._guide() + + # sub가 Company 객체면 legacy 호환: analysis("financial", "수익성", company) + if sub is not None and hasattr(sub, "stockCode"): + company = sub + sub = None + + # 그룹 해석 — 직접 그룹명 또는 한글 그룹 alias + group = axis if axis in _GROUPS else _ALIASES.get(axis) if _ALIASES.get(axis) in _GROUPS else None + + if group is not None: + # 2단계: analysis("financial", "수익성") + if sub is None: + return self._groupGuide(group) + resolved = _resolveAxis(sub) + # R24-1: 축이 그룹에 속하는지 명시적 검증. + # 이전엔 `analysis("valuation", "수익성")` 같은 그룹/축 mismatch 가 + # silent 로 잘못된 그룹의 결과를 반환했다. + if resolved not in _GROUPS.get(group, []): + group_axes = _GROUPS.get(group, []) + axes_str = ", ".join(group_axes) if group_axes else "(없음)" + raise ValueError( + f"'{resolved}' 축은 '{group}' 그룹에 속하지 않습니다. " + f"'{group}' 그룹의 가용 축: {axes_str}\n" + f" 사용법: c.analysis('{group}') 로 그룹의 축 목록을 확인하거나, " + f"c.analysis('{resolved}') 로 축만 직접 호출하세요." + ) + entry = _AXIS_REGISTRY[resolved] + if company is None: + return self._listCalcs(resolved, entry) + return self._run(company, entry, basePeriod=basePeriod) + + # 그룹 없이 축만 전달된 경우 → 자동 추론 + resolved = _resolveAxis(axis) + entry = _AXIS_REGISTRY[resolved] + + if company is None: + return self._listCalcs(resolved, entry) + + return self._run(company, entry, basePeriod=basePeriod) + + def _groupGuide(self, group: str) -> pl.DataFrame: + """그룹 내 축 목록.""" + axes = _GROUPS.get(group, []) + rows = [] + for key in axes: + entry = _AXIS_REGISTRY.get(key) + if entry: + rows.append({"축": key, "파트": entry.partId, "설명": entry.description}) + if not rows: + return pl.DataFrame() + return pl.DataFrame(rows) + + def _guide(self) -> pl.DataFrame: + """축 가이드 — 통일 컬럼 (axis, label, description, example, partId, items).""" + rows = [] + for key, entry in _AXIS_REGISTRY.items(): + rows.append( + { + "axis": key, + "label": getattr(entry, "label", key), + "description": entry.description, + "example": entry.example, + "partId": entry.partId, + "items": len(entry.calcs), + } + ) + return pl.DataFrame(rows) + + def _listCalcs(self, axis: str, entry: _AxisEntry) -> pl.DataFrame: + """해당 축의 분석 항목 목록.""" + rows = [] + for calc in entry.calcs: + rows.append( + { + "blockKey": calc.blockKey, + "함수": calc.fn, + "label": calc.label, + } + ) + return pl.DataFrame(rows) + + def _run(self, company: Any, entry: _AxisEntry, *, basePeriod: str | None = None) -> dict: + """해당 축의 calc* 함수 전부 실행.""" + results: dict[str, Any] = {} + for calc in entry.calcs: + try: + mod = importlib.import_module(calc.module) + fn = getattr(mod, calc.fn) + if _acceptsBasePeriod(fn): + results[calc.blockKey] = fn(company, basePeriod=basePeriod) + else: + results[calc.blockKey] = fn(company) + except (KeyError, ValueError, TypeError, AttributeError, ArithmeticError, ImportError): + results[calc.blockKey] = None + return results + + def __getattr__(self, name): + """accessor 패턴: analysis.financial, analysis.valuation 등.""" + group = name if name in _GROUPS else _ALIASES.get(name) if _ALIASES.get(name) in _GROUPS else None + if group is not None: + return _GroupAccessor(self, group) + raise AttributeError(f"Analysis에 '{name}' 속성이 없습니다") + + def __repr__(self) -> str: + lines = [f"Analysis -- {len(_AXIS_REGISTRY)}축 종합 분석", ""] + for key, entry in _AXIS_REGISTRY.items(): + lines.append(f" {entry.partId} {key:8s} {entry.description} ({len(entry.calcs)}항목)") + lines.append("") + lines.append("사용법: analysis(), analysis('그룹', '축'), analysis('그룹', '축', company)") + return "\n".join(lines) diff --git a/src/dartlab/analysis/financial/__pycache__/__init__.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e6ede68d8ef8c293a11fa3f9dea75bdb31ebcb2 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/__init__.cpython-313.pyc b/src/dartlab/analysis/financial/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..979b73e82302bf1da1a13829e23ca14ba1f59d81 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/_helpers.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/_helpers.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..912d7de6e657ebc7de24781d4d30d841124a8644 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/_helpers.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/_helpers.cpython-313.pyc b/src/dartlab/analysis/financial/__pycache__/_helpers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2abf9febe077f80436102ba1a91b0defbb101db8 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/_helpers.cpython-313.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/_memoize.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/_memoize.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f7c665f6ba19b44a364aeedb3f1a7480d8ebfe3 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/_memoize.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/_memoize.cpython-313.pyc b/src/dartlab/analysis/financial/__pycache__/_memoize.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..422a807c8ff371a165e131317f427835774f6de3 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/_memoize.cpython-313.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/asset.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/asset.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5f77325949195b90377e0593d1a8174a8bef2c8f Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/asset.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/capital.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/capital.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d9aad6786e3696ae50f9fbc156b0ce7e4e117240 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/capital.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/capital.cpython-313.pyc b/src/dartlab/analysis/financial/__pycache__/capital.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2327f76a0050dfc0821b759322b551f33ae5b4b3 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/capital.cpython-313.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/capitalAllocation.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/capitalAllocation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc9e97ec6dcfd82030dd8cede9d30224286b5b4b Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/capitalAllocation.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/cashflow.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/cashflow.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4d5eab047f12c069742f4c3ba61149e9b5d6a2a3 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/cashflow.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/cashflow.cpython-313.pyc b/src/dartlab/analysis/financial/__pycache__/cashflow.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ed94e2747fb9a522c345e9893cf45e6f363cb90 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/cashflow.cpython-313.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/costStructure.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/costStructure.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d1717f799f0a544ae8fd542bc6c23d3d88b7dbbb Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/costStructure.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/creditRating.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/creditRating.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..556f09f9659a7a0244a9daa3372b839769831c0d Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/creditRating.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/crossStatement.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/crossStatement.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c059b68dd062c4f798b07dbd97c3e4870d30e97c Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/crossStatement.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/disclosureDelta.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/disclosureDelta.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b4df5626ee9023de6332a98991e55939e28172bc Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/disclosureDelta.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/earningsQuality.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/earningsQuality.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a1adfa66ac9111e50d072a4ef5311d039da3470e Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/earningsQuality.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/earningsQuality.cpython-313.pyc b/src/dartlab/analysis/financial/__pycache__/earningsQuality.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dd7ec771c96a6149010837e39a0cb33a6ad6d850 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/earningsQuality.cpython-313.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/efficiency.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/efficiency.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..854c11fde3acc26c17985eddab95bdcfc60469e7 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/efficiency.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/forecastCalcs.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/forecastCalcs.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9ac6cb9fad66dc9748b3faf4a617264ea3973bdc Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/forecastCalcs.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/governance.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/governance.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc775588d7a29d45af2a0f9b749db1b8280945e7 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/governance.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/growthAnalysis.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/growthAnalysis.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bb570a5396cd89075bc0f03c16480d07a3685aea Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/growthAnalysis.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/investmentAnalysis.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/investmentAnalysis.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5eee08970b20dc44fd53c1fdbfda191ac1e83d97 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/investmentAnalysis.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/macroExposure.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/macroExposure.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b897ac790af27738fc04af3ae8804590e984ea2 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/macroExposure.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/peerBenchmark.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/peerBenchmark.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..461ea949bbebbc9c1d698126a7a91d1e301d380f Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/peerBenchmark.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/predictionSignals.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/predictionSignals.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b5d51bf7a8606b87c375fa1cb77a2c955b6c45ef Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/predictionSignals.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/profitability.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/profitability.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34ede96297df300da4cb505248e24dcda982b3c6 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/profitability.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/profitability.cpython-313.pyc b/src/dartlab/analysis/financial/__pycache__/profitability.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ecc78cbbcc48c622ccaf22b6ad7d76bb55886d6e Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/profitability.cpython-313.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/revenue.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/revenue.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..15f734bf0d3aeba60b09795fc58c6e35443594b1 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/revenue.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/scorecard.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/scorecard.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6e1cb6d1a48d7be2b7156be5095d9f981908b6e4 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/scorecard.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/stability.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/stability.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4125718b441a8096d231f006da2e4c045beb53dc Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/stability.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/stability.cpython-313.pyc b/src/dartlab/analysis/financial/__pycache__/stability.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df9266739511caec093323e6c83929af92896c6a Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/stability.cpython-313.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/taxAnalysis.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/taxAnalysis.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3582f163b8ece9fd3876a31537ee48619df4452d Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/taxAnalysis.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/__pycache__/valuation.cpython-312.pyc b/src/dartlab/analysis/financial/__pycache__/valuation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc459524eb1d9b5f2bec3a91288ea9c6df2f2343 Binary files /dev/null and b/src/dartlab/analysis/financial/__pycache__/valuation.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/_helpers.py b/src/dartlab/analysis/financial/_helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..1378e5a6ae2183c2d5ae72ca86b11eeda4847649 --- /dev/null +++ b/src/dartlab/analysis/financial/_helpers.py @@ -0,0 +1,477 @@ +"""strategy 빌더 공통 유틸.""" + +from __future__ import annotations + +import re +from dataclasses import dataclass + +import polars as pl + +_TRIANGLE_RE = re.compile(r"[△▲\u25B3\u25B2]") + + +def parseNumStr(s: str | None) -> float | None: + """문자열 숫자를 float로 변환. 콤마, △(마이너스), % 처리.""" + if s is None: + return None + s = str(s).strip() + if not s or s == "-": + return None + negative = False + if _TRIANGLE_RE.match(s): + negative = True + s = _TRIANGLE_RE.sub("", s) + s = s.replace(",", "").replace("%", "").strip() + if not s: + return None + try: + v = float(s) + return -v if negative else v + except ValueError: + return None + + +def periodCols(df: pl.DataFrame) -> list[str]: + """DataFrame에서 기간 컬럼만 추출 (최신 먼저).""" + from dartlab.core.show import isPeriodColumn + + return [c for c in df.columns if isPeriodColumn(c)] + + +def annualCols(df: pl.DataFrame, maxYears: int = 8) -> list[str]: + """연도 컬럼만 추출 (Q4 또는 연도).""" + cols = periodCols(df) + annual = [c for c in cols if "Q" not in c] + if annual: + return annual[:maxYears] + return [c for c in cols if c.endswith("Q4")][:maxYears] + + +def quarterlyCols(df: pl.DataFrame, maxQuarters: int = 8) -> list[str]: + """분기 컬럼만 추출 (최신 먼저).""" + cols = periodCols(df) + return [c for c in cols if "Q" in c][:maxQuarters] + + +def fetchNotesDetail(company, noteKeys: list[str]) -> dict[str, list[dict]]: + """company.notes에서 noteKeys의 DataFrame을 dict 리스트로 반환. + + 실패 시 해당 키를 건너뜀 (안전). to_dicts()로 즉시 변환하여 + DataFrame 참조를 해제. + """ + result: dict[str, list[dict]] = {} + notesAccessor = getattr(company, "_notesAccessor", None) or getattr(company, "notes", None) + if notesAccessor is None: + return result + for key in noteKeys: + try: + df = getattr(notesAccessor, key, None) + if df is not None and hasattr(df, "to_dicts"): + result[key] = df.to_dicts() + except (AttributeError, FileNotFoundError, ValueError, KeyError): + pass + return result + + +MAX_RATIO_YEARS = 8 + + +def getRatioSeries(company) -> tuple[dict, list[str]] | None: + """ratioSeries 를 안전하게 가져온다 (private internal helper).""" + try: + result = company._ratioSeries() + if result is None: + return None + return result + except (ValueError, KeyError, AttributeError): + return None + + +def mergeRows(primary: dict | None, fallback: dict | None) -> dict: + """두 행을 merge. primary의 값이 None이면 fallback 값 사용.""" + if primary is None and fallback is None: + return {} + if primary is None: + return fallback or {} + if fallback is None: + return primary + merged = dict(primary) + for k, v in fallback.items(): + if merged.get(k) is None and v is not None: + merged[k] = v + return merged + + +def getRatios(company): + """RatioResult 객체 (eps/bps/marketCap/altmanZScore 등 attribute 보유) 를 + 안전하게 가져온다. + + 사용자 surface ``c.show("ratios")`` 는 DataFrame 을 반환하지만, 내부 compute + 레이어 (analysis/credit/review/valuation) 는 attribute access 가 필요한 + RatioResult 객체가 필요하다. 두 형식의 차이로 show() 흡수 불가. + """ + try: + return company._finance.ratios + except (ValueError, KeyError, AttributeError): + return None + + +def toDict(selectResult, maxPeriods: int = 0) -> tuple[dict[str, dict], list[str]] | None: + """SelectResult → ({항목: {period: val}}, periodCols). + + ``toDictBySnakeId`` 가 한국어 라벨도 키로 노출하므로 이 함수는 deprecated thin + wrapper. 신규 코드는 ``toDictBySnakeId`` 사용. + + maxPeriods=0이면 전체 기간, >0이면 최신 N개만. + EDGAR DataFrame(account 컬럼 = snakeId)일 때 키를 한국어 라벨로 자동 변환하여 + analysis 함수에서 data.get("매출액") 등이 양쪽 provider에서 동일하게 작동한다. + """ + if selectResult is None: + return None + + df = selectResult.df + periods = sorted(periodCols(df), reverse=True) + if maxPeriods > 0: + periods = periods[:maxPeriods] + if not periods: + return None + + labelCol = ( + "항목" if "항목" in df.columns else "항목" if "항목" in df.columns else (df.columns[0] if df.columns else None) + ) + if labelCol is None: + return None + + # EDGAR bridge: snakeId 키 → 한국어 라벨 키로 변환 (analysis 함수 호환) + needsBridge = labelCol not in ("항목",) + krLabels: dict[str, str] | None = None + if needsBridge: + from dartlab.core.finance.labels import get_korean_labels + + krLabels = get_korean_labels() + + data: dict[str, dict] = {} + for row in df.iter_rows(named=True): + label = str(row.get(labelCol, "")) + # snakeId → 한국어 키 변환 (EDGAR) + key = krLabels.get(label, label) if krLabels else label + data[key] = {c: row.get(c) for c in periods} + if not data: + return None + # 분기에서 연간 합성 — SSOT 헬퍼 위임 (core/finance/flow.py) + from dartlab.core.finance.flow import synthesizeAnnualFromQuarters + + periods = synthesizeAnnualFromQuarters(data, periods, getattr(selectResult, "topic", None)) + return (data, periods) + + +def toDictBySnakeId(selectResult, maxPeriods: int = 0) -> tuple[dict[str, dict], list[str]] | None: + """SelectResult → ({snakeId: {period: val}}, periodCols). + + toDict와 동일하되, 키를 snakeId 컬럼으로 사용한다. + snakeId로 select한 뒤 .get()도 snakeId로 접근할 때 사용. + + ``SNAKEID_ALIASES`` 의 alias key 도 같은 데이터로 자동 노출. + 예: ``data.get("liabilities")`` 가 있으면 ``data.get("total_liabilities")`` + 도 같은 값 반환. EDGAR↔DART snakeId 차이를 calc 함수가 무시할 수 있다. + """ + if selectResult is None: + return None + + df = selectResult.df + periods = sorted(periodCols(df), reverse=True) + if maxPeriods > 0: + periods = periods[:maxPeriods] + if not periods: + return None + + idCol = "snakeId" if "snakeId" in df.columns else None + if idCol is None: + return toDict(selectResult, maxPeriods) + + # 한국어 라벨도 함께 키로 노출 (toDict 와 단일 경로 통합). + # data.get("매출액") 와 data.get("sales") 둘 다 같은 row 반환. + labelCol = "항목" if "항목" in df.columns else None + + data: dict[str, dict] = {} + for row in df.iter_rows(named=True): + sid = str(row.get(idCol, "")) + rowData = {c: row.get(c) for c in periods} + data[sid] = rowData + if labelCol: + label = str(row.get(labelCol, "")) + if label and label != sid: + data[label] = rowData + + if not data: + return None + + # 연간 합성 — SSOT 헬퍼 위임 (toDict 와 동일 경로) + from dartlab.core.finance.flow import synthesizeAnnualFromQuarters + + periods = synthesizeAnnualFromQuarters(data, periods, getattr(selectResult, "topic", None)) + + # SNAKEID_ALIASES 머지 + 양방향 노출 — SSOT 헬퍼 위임 (core/finance/labels.py). + # 두 키 모두 row 가 존재하면 col 별 not-null 머지, 한쪽만 존재하면 다른 쪽도 + # 같은 row 를 가리키도록 노출하여 calc 가 어느 쪽 키로도 접근 가능. + from dartlab.core.finance.labels import SNAKEID_ALIASES, mergeAliasRows + + mergeAliasRows(data, metaCols=set()) # dict 머지 — 메타 컬럼 없음 + for alias, canonical in SNAKEID_ALIASES.items(): + canonRow = data.get(canonical) + aliasRow = data.get(alias) + if canonRow is not None and aliasRow is None: + data[alias] = canonRow + elif aliasRow is not None and canonRow is None: + data[canonical] = aliasRow + + return (data, periods) + + +# ── basePeriod 인프라 ── + +_QUARTER_RE = re.compile(r"^(\d{4})Q([1-4])$") +_YEAR_RE = re.compile(r"^(\d{4})$") + + +@dataclass(frozen=True) +class PeriodRange: + """basePeriod로부터 결정된 분석 기간 범위.""" + + basePeriod: str + annualCols: list[str] + quarterlyCols: list[str] + + +def _periodSortKey(p: str) -> str: + """기간 문자열을 정렬 가능한 키로 변환. "2024" -> "2024Q5", "2024Q3" -> "2024Q3".""" + if "Q" not in p: + return p + "Q5" + return p + + +def annualColsFromPeriods( + periods: list[str], + basePeriod: str | None = None, + maxYears: int = 8, +) -> list[str]: + """기간 목록에서 연간 컬럼 추출 — basePeriod 이하만. + + 14개 파일의 _annualCols를 대체하는 통합 함수. + 연도("2024") 우선, 없으면 Q4("2024Q4") fallback. + basePeriod=None이면 전체에서 최신 maxYears개. + basePeriod="2022Q4"이면 2022Q4 이하에서 maxYears개. + basePeriod="2022"이면 2022 이하 연도에서 maxYears개. + """ + cols = sorted([c for c in periods if "Q" not in c], reverse=True) + if not cols: + cols = sorted([c for c in periods if c.endswith("Q4")], reverse=True) + if basePeriod is not None: + limit = _periodSortKey(basePeriod) + cols = [c for c in cols if _periodSortKey(c) <= limit] + return cols[:maxYears] + + +def annualLabel(period: str) -> str: + """연간 기간 표시용 라벨. Q4 fallback 컬럼의 접미사를 제거한다. + + "2025Q4" -> "2025", "2025" -> "2025", "2025Q3" -> "2025Q3" (분기는 유지) + """ + if period.endswith("Q4"): + return period[:-2] + return period + + +# 차입금 snakeId 후보 리스트 — 회사마다 다른 변형 모두 합산. +# 분리 키 (단/장기) + 통합 키 (borrowings). +# SNAKEID_ALIASES 가 자동으로 short_term_borrowings/long_term_borrowings 같은 +# 변형을 canonical 키 (shortterm_borrowings/longterm_borrowings) 로 매핑하므로 +# 여기선 canonical 만 나열한다 (변형 키 중복 합산 방지). +_BORROWING_KEYS = ( + "shortterm_borrowings", + "longterm_borrowings", + "noncurrent_borrowings", # 비유동/장기 변형 (LG에솔) + "current_portion_of_longterm_borrowings", # 유동성장기차입금 + "borrowings", # 통합 (SK하이닉스) +) +_BOND_KEYS = ("debentures", "bonds_payable", "current_portion_of_debentures") + +# 매출원가 분리 키 (제품/상품/공사/용역원가) +_COGS_KEYS = ( + "cost_of_sales", + "cost_of_goods_sold", + "product_cost_of_sales", + "merchandise_cost_of_sales", + "construction_cost_of_sales", + "service_cost_of_sales", +) + +# 판관비 분리 키 (판매비/관리비) +_SGA_KEYS = ( + "selling_and_administrative_expenses", + "selling_expenses", + "administrative_expenses", + "sga", +) + +# 법인세 분리 키 (당기/이연) +_INCOME_TAX_KEYS = ( + "income_taxes", + "income_tax_expense", + "current_income_tax_expense", + "deferred_income_tax_expense", +) + + +def _sumWithFallback(snakeData: dict, col: str, separateKeys: tuple, fallbackKey: str) -> float: + """분리 키 우선 합산, 모두 결손이면 통합 키 fallback. None vs 0 구분.""" + parts = [] + for sid in separateKeys: + if sid == fallbackKey: + continue + v = snakeData.get(sid, {}).get(col) + if v is not None and v != 0: + parts.append(v) + if not parts: + v = snakeData.get(fallbackKey, {}).get(col) + if v is not None: + parts.append(v) + return sum(parts) + + +def sumBorrowings(snakeData: dict, col: str) -> float: + """차입금 합산 — 회사 키 패턴 무관. + + snakeData 는 toDictBySnakeId 결과. 단/장기 분리 키 우선 합산하되, + 분리 키가 모두 0/None 이면 통합 borrowings 키 fallback. + bonds 는 별도로 _BOND_KEYS 에서 추가. + """ + parts = [] + for sid in _BORROWING_KEYS: + if sid == "borrowings": + continue # 통합 키는 fallback 으로만 사용 + v = snakeData.get(sid, {}).get(col) + if v is not None and v != 0: + parts.append(v) + + # 분리 키가 모두 비어있으면 통합 borrowings fallback + if not parts: + v = snakeData.get("borrowings", {}).get(col) + if v is not None: + parts.append(v) + + # 사채 추가 + for sid in _BOND_KEYS: + v = snakeData.get(sid, {}).get(col) + if v is not None and v != 0: + parts.append(v) + + return sum(parts) + + +def sumCostOfSales(snakeData: dict, col: str) -> float: + """매출원가 합산 — 제품/상품/공사/용역원가 분리 키 fallback.""" + return _sumWithFallback(snakeData, col, _COGS_KEYS, "cost_of_sales") + + +def sumSGA(snakeData: dict, col: str) -> float: + """판매관리비 합산 — 판매비/관리비 분리 키 fallback.""" + return _sumWithFallback(snakeData, col, _SGA_KEYS, "selling_and_administrative_expenses") + + +def sumIncomeTax(snakeData: dict, col: str) -> float: + """법인세 합산 — 당기/이연 분리 키 fallback.""" + return _sumWithFallback(snakeData, col, _INCOME_TAX_KEYS, "income_taxes") + + +# 한국어 키 dict 용 차입금 합산 (credit/metrics.py 가 위임). +_KR_BORROWING_SHORT = ("단기차입금", "차입금단기", "short_term_borrowings") +_KR_BORROWING_LONG = ("장기차입금", "long_term_borrowings") +_KR_BORROWING_UNIFIED = ("차입부채", "차입금", "장기차입부채", "유동성장기차입금") + + +def sumBorrowingsKorean(bsData: dict, col: str) -> tuple[float, float, float]: + """한국어 키 BS dict 의 차입금 합산. + + credit/metrics.py 처럼 toDict 결과(한국어 키 dict)를 받아 + (단기차입금, 장기차입금, 통합 fallback) 형태로 분해 반환. + + Returns: + (stBorrow, ltBorrow, totalBorrowing) — 분리/통합 fallback 적용 후 + """ + stb = 0.0 + for k in _KR_BORROWING_SHORT: + v = bsData.get(k, {}).get(col) + if v is not None: + stb = float(v) + break + + ltb = 0.0 + for k in _KR_BORROWING_LONG: + v = bsData.get(k, {}).get(col) + if v is not None: + ltb = float(v) + break + + # Fallback: 분리 키 모두 0/None → 통합 키 (audit 04 #B) + if stb == 0 and ltb == 0: + for k in _KR_BORROWING_UNIFIED: + v = bsData.get(k, {}).get(col) + if v is not None: + stb = float(v) # 통합값을 stb 에 1번만 (credit 호환) + break + + bondsVal = bsData.get("사채", {}).get(col) or 0 + total = stb + ltb + float(bondsVal) + return stb, ltb, total + + +def annualLabels(periods: list[str]) -> dict[str, str]: + """연간 기간 컬럼 → 표시 라벨 매핑. 테이블 렌더링에서 헤더 치환용.""" + return {p: annualLabel(p) for p in periods} + + +def quarterlyColsFromPeriods( + periods: list[str], + basePeriod: str | None = None, + maxQuarters: int = 8, +) -> list[str]: + """기간 목록에서 분기 컬럼 추출 — basePeriod 이하만.""" + qs = sorted([c for c in periods if "Q" in c], reverse=True) + if not qs: + # EDGAR fallback: 연간 데이터 (2024, 2023, ...) + qs = sorted([c for c in periods if c.isdigit() and len(c) == 4], reverse=True) + if basePeriod is not None: + limit = _periodSortKey(basePeriod) + qs = [c for c in qs if _periodSortKey(c) <= limit] + return qs[:maxQuarters] + + +def resolveBasePeriod( + company, + basePeriod: str | None = None, + maxYears: int = 8, + maxQuarters: int = 8, +) -> PeriodRange: + """basePeriod를 Company의 실제 기간으로 해석. + + basePeriod=None이면 최신 기간 자동 감지. + ratioSeries 캐시를 활용하여 속도 우선. + """ + rs = getRatioSeries(company) + if rs is not None: + _, allPeriods = rs + else: + allPeriods = [] + + if basePeriod is None: + qs = sorted([p for p in allPeriods if "Q" in p], reverse=True) + resolved = qs[0] if qs else "9999Q4" + else: + resolved = basePeriod + + return PeriodRange( + basePeriod=resolved, + annualCols=annualColsFromPeriods(allPeriods, resolved, maxYears), + quarterlyCols=quarterlyColsFromPeriods(allPeriods, resolved, maxQuarters), + ) diff --git a/src/dartlab/analysis/financial/_memoize.py b/src/dartlab/analysis/financial/_memoize.py new file mode 100644 index 0000000000000000000000000000000000000000..3f3117eec883c2ffd47f5068264e55d914999de8 --- /dev/null +++ b/src/dartlab/analysis/financial/_memoize.py @@ -0,0 +1,52 @@ +"""calc 함수 메모이제이션 — Company._cache 공유. + +analysis()와 review()가 같은 calc 함수를 호출할 때 +두 번째 호출은 캐시에서 즉시 반환한다. + +Usage:: + + from dartlab.analysis.financial._memoize import memoized_calc + + @memoized_calc + def calcMarginTrend(company, *, basePeriod=None): + ... +""" + +from __future__ import annotations + +import functools +from typing import Any, Callable + + +def memoized_calc(fn: Callable[..., Any]) -> Callable[..., Any]: + """calc 함수 결과를 Company._cache에 메모이제이션. + + - key: ``_{함수명}:{basePeriod}`` + - Company._cache(BoundedCache)가 없으면 캐시 없이 실행. + - 결과가 None이면 캐시하지 않는다 (데이�� 갱신 후 재시도 허용). + """ + + import inspect + + _has_base_period = "basePeriod" in inspect.signature(fn).parameters + + @functools.wraps(fn) + def wrapper(company: Any, *, basePeriod: str | None = None) -> Any: + cache = getattr(company, "_cache", None) + key = f"_{fn.__name__}:{basePeriod}" + + if cache is not None and key in cache: + return cache[key] + + if _has_base_period: + result = fn(company, basePeriod=basePeriod) + else: + result = fn(company) + + # None은 캐시하지 않음 — 데이터 갱신 후 재계산 허용 + if cache is not None and result is not None: + cache[key] = result + + return result + + return wrapper diff --git a/src/dartlab/analysis/financial/asset.py b/src/dartlab/analysis/financial/asset.py new file mode 100644 index 0000000000000000000000000000000000000000..89ca66e72e73de4d2030ce7de35ca4d11944c2c6 --- /dev/null +++ b/src/dartlab/analysis/financial/asset.py @@ -0,0 +1,730 @@ +"""1-3 자산 구조 분석 — 계산만 담당. + +BS를 영업/비영업으로 재분류하여 자산 운영 구조를 본다. +블록 조립은 review/builders.py가 한다. +""" + +from __future__ import annotations + +from typing import Any + +from dartlab.analysis.financial._helpers import annualColsFromPeriods, toDictBySnakeId +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_YEARS = 8 +_MAX_QUARTERS = 5 + + +def _get(row: dict, col: str) -> float: + """dict에서 안전하게 값 꺼내기 (None → 0).""" + v = row.get(col) if row else None + return v if v is not None else 0 + + +def _getFirst(data: dict, keys: list[str], col: str) -> float: + """여러 항목 중 값이 있는 첫 번째를 반환 (fallback 체인).""" + for k in keys: + row = data.get(k, {}) + v = row.get(col) if row else None + if v is not None and v != 0: + return v + return 0 + + +from dartlab.core.finance.calc import safePct as _pct # noqa: E402 + + +# ── 영업/비영업 분류 매핑 ── + +# 영업자산 — 이중 카운팅 방지를 위해 fallback 쌍 분리 +# "매출채권" / "매출채권및기타채권" → 하나만 사용 (_getFirst) +# "매입채무" / "매입채무및기타채무" → 하나만 사용 (_getFirst) +_OP_ASSET_SIMPLE = [ + "기타유동금융자산", + "재고자산", + "선급금", + "기타유동자산", + # 고정영업자산 + "유형자산", + "사용권자산", + "무형자산", + "영업권", + "건설중인자산", + "투자부동산", +] +_OP_ASSET_FALLBACK = [["매출채권", "매출채권및기타채권"]] + +_NON_OP_ASSET_ACCOUNTS = [ + "현금및현금성자산", + "단기금융자산", + "장기금융자산", + "관계기업등지분관련투자자산", + "기타비유동금융자산", +] + +# 관계기업 투자: 기업마다 다른 항목 사용 → fallback 쌍 +_ASSOCIATES_FALLBACK = ("관계기업등지분관련투자자산", "지분법적용투자지분") + +_OP_LIAB_SIMPLE = [ + "선수금", + "계약부채", + "선수수익", + "미지급비용", + "미지급금", + "충당부채", + "기타유동부채", +] +_OP_LIAB_FALLBACK = [["매입채무", "매입채무및기타채무"]] + +# 운전자본: fallback 체인 (매출채권 없으면 매출채권및기타채권 사용) +_WC_REC_KEYS = ["매출채권", "매출채권및기타채권"] +_WC_PAY_KEYS = ["매입채무", "매입채무및기타채무"] +_WC_ASSET_KEYS = ["재고자산", "선급금", "기타유동자산"] + +_FIXED_OP_KEYS = ["유형자산", "사용권자산", "무형자산", "영업권", "건설중인자산"] + + +def _sumOp(data: dict, col: str, simpleKeys: list[str], fallbackPairs: list[list[str]]) -> float: + """영업자산/부채 합산 (fallback 쌍은 하나만 선택).""" + total = sum(_get(data.get(k, {}), col) for k in simpleKeys) + for pair in fallbackPairs: + total += _getFirst(data, pair, col) + return total + + +# ── 메인: 자산 구조 ── + + +@memoized_calc +def calcAssetStructure(company, *, basePeriod: str | None = None) -> dict | None: + """자산을 영업/비영업으로 재분류 — 시계열. + + 반환:: + + { + "latest": { + "totalAssets": float, + "opAssets": float, "opAssetsPct": float, + "nonOpAssets": float, "nonOpAssetsPct": float, + "workingCapitalAssets": float, + "fixedOpAssets": float, + "noa": float, + "netFinDebt": float, + }, + "composition": { + "receivables": float, "inventory": float, + "ppe": float, "intangibles": float, + "rou": float, "cip": float, + "cash": float, "investments": float, + }, + "history": [{period, opAssetsPct, nonOpAssetsPct, noa, ...}, ...], + "diagnosis": str, + } + """ + _allFallback = [k for pair in _OP_ASSET_FALLBACK + _OP_LIAB_FALLBACK for k in pair] + allAccounts = ( + ["자산총계", "부채총계"] + + _OP_ASSET_SIMPLE + + _allFallback + + _NON_OP_ASSET_ACCOUNTS + + list(_ASSOCIATES_FALLBACK) + + _OP_LIAB_SIMPLE + ) + result = company.select("BS", allAccounts) + parsed = toDictBySnakeId(result) + if parsed is None: + return None + + data, allPeriods = parsed + taRow = data.get("자산총계") + if taRow is None: + return None + + yCols = annualColsFromPeriods(allPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + history = [] + latest = None + + for col in yCols: + ta = _get(taRow, col) + if ta <= 0: + continue + + # 영업자산 합산 (fallback 쌍은 하나만 선택) + opAssets = _sumOp(data, col, _OP_ASSET_SIMPLE, _OP_ASSET_FALLBACK) + # 비영업자산 합산 (관계기업 투자는 fallback 쌍) + nonOpAssets = 0 + for k in _NON_OP_ASSET_ACCOUNTS: + if k == _ASSOCIATES_FALLBACK[0]: + v = _get(data.get(k, {}), col) + if v == 0: + v = _get(data.get(_ASSOCIATES_FALLBACK[1], {}), col) + nonOpAssets += v + else: + nonOpAssets += _get(data.get(k, {}), col) + # 나머지 = 총자산 - 영업 - 비영업 (분류 안 된 것) + otherAssets = ta - opAssets - nonOpAssets + + # 영업부채 (fallback 쌍은 하나만 선택) + opLiab = _sumOp(data, col, _OP_LIAB_SIMPLE, _OP_LIAB_FALLBACK) + + # 순영업자산(NOA) = 영업자산 - 영업부채 + noa = opAssets - opLiab + + # 순운전자본 자산/부채 (fallback 체인) + rec = _getFirst(data, _WC_REC_KEYS, col) + wcAssets = rec + sum(_get(data.get(k, {}), col) for k in _WC_ASSET_KEYS) + pay = _getFirst(data, _WC_PAY_KEYS, col) + wcOtherLiab = sum( + _get(data.get(k, {}), col) for k in ["선수금", "계약부채", "선수수익", "미지급비용", "미지급금"] + ) + wcLiab = pay + wcOtherLiab + wc = wcAssets - wcLiab + + # 고정영업자산 + fixedOp = sum(_get(data.get(k, {}), col) for k in _FIXED_OP_KEYS) + + # 순금융부채 + cash = _get(data.get("현금및현금성자산", {}), col) + stFin = _get(data.get("단기금융자산", {}), col) + finDebt = _get(data.get("부채총계", {}), col) - opLiab + netFinDebt = max(0, finDebt - cash - stFin) + + # 세부 구성 (매 연도) + recVal = _getFirst(data, _WC_REC_KEYS, col) + invVal = _get(data.get("재고자산", {}), col) + ppeVal = _get(data.get("유형자산", {}), col) + intVal = _get(data.get("무형자산", {}), col) + gwVal = _get(data.get("영업권", {}), col) + rouVal = _get(data.get("사용권자산", {}), col) + cipVal = _get(data.get("건설중인자산", {}), col) + assocVal = _get(data.get(_ASSOCIATES_FALLBACK[0], {}), col) + if assocVal == 0: + assocVal = _get(data.get(_ASSOCIATES_FALLBACK[1], {}), col) + invstVal = assocVal + _get(data.get("장기금융자산", {}), col) + + entry = { + "period": col, + "totalAssets": ta, + "opAssets": opAssets, + "opAssetsPct": _pct(opAssets, ta), + "nonOpAssets": nonOpAssets, + "nonOpAssetsPct": _pct(nonOpAssets, ta), + "otherAssetsPct": _pct(otherAssets, ta), + "noa": noa, + "wc": wc, + "fixedOp": fixedOp, + # 세부 항목 + "receivables": recVal, + "inventory": invVal, + "ppe": ppeVal, + "intangibles": intVal, + "goodwill": gwVal, + "rou": rouVal, + "cip": cipVal, + "cash": cash, + "stFinancial": stFin, + "investments": invstVal, + } + history.append(entry) + + if latest is None: + latest = { + "totalAssets": ta, + "opAssets": opAssets, + "opAssetsPct": _pct(opAssets, ta), + "nonOpAssets": nonOpAssets, + "nonOpAssetsPct": _pct(nonOpAssets, ta), + "otherAssets": ta - opAssets - nonOpAssets, + "otherAssetsPct": _pct(ta - opAssets - nonOpAssets, ta), + "workingCapital": wc, + "fixedOpAssets": fixedOp, + "noa": noa, + "netFinDebt": netFinDebt, + } + + if latest is None: + return None + + # 진단 + opPct = latest["opAssetsPct"] + nonOpPct = latest["nonOpAssetsPct"] + if opPct >= 70: + diagnosis = "영업자산 중심 — 자산 대부분이 사업에 투입됨" + elif nonOpPct >= 40: + diagnosis = "비영업자산 과다 — 투자/금융자산 비중이 높음 (지주회사 성격)" + elif opPct >= 50: + diagnosis = "혼합 구조 — 영업자산과 비영업자산이 섞여 있음" + else: + diagnosis = "비영업 우위 — 영업 자산보다 비영업 자산이 많음" + + # notes enrichment — 주석에서 상세 분해 데이터 추가 (있으면) + from dartlab.analysis.financial._helpers import fetchNotesDetail + + notesDetail = fetchNotesDetail(company, ["inventory", "tangibleAsset", "intangibleAsset", "investmentProperty"]) + + result_dict: dict[str, Any] = { + "latest": latest, + "history": history, + "diagnosis": diagnosis, + } + if notesDetail: + result_dict["notesDetail"] = notesDetail + + return result_dict + + +# ── 운전자본 ── + + +@memoized_calc +def calcWorkingCapital(company, *, basePeriod: str | None = None) -> dict | None: + """운전자본 상세 + CCC. + + 반환:: + + { + "latest": { + "wc": float, + "receivables": float, "inventory": float, + "payables": float, + "receivableDays": float, "inventoryDays": float, + "payableDays": float, "ccc": float, + }, + "history": [{period, wc, receivableDays, inventoryDays, payableDays, ccc}, ...], + } + """ + bsAccounts = ["매출채권", "매출채권및기타채권", "재고자산", "매입채무", "매입채무및기타채무"] + isAccounts = ["매출액", "매출원가"] + + bsResult = company.select("BS", bsAccounts) + isResult = company.select("IS", isAccounts) + bsParsed = toDictBySnakeId(bsResult) + isParsed = toDictBySnakeId(isResult) + if bsParsed is None or isParsed is None: + return None + + bsData, bsPeriods = bsParsed + isData, isPeriods = isParsed + + invRow = bsData.get("재고자산", {}) + revRow = isData.get("매출액", {}) + cogsRow = isData.get("매출원가", {}) + + yCols = annualColsFromPeriods(bsPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + def _getFlow(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + latest = None + + for col in yCols: + rec = _getFirst(bsData, _WC_REC_KEYS, col) + inv = _get(invRow, col) + pay = _getFirst(bsData, _WC_PAY_KEYS, col) + rev = _getFlow(revRow, col) + cogs = _getFlow(cogsRow, col) + wc = rec + inv - pay + + # 회전일수 + recDays = rec / rev * 365 if rev > 0 else None + invDays = inv / cogs * 365 if cogs > 0 else None + payDays = pay / cogs * 365 if cogs > 0 else None + ccc = None + if recDays is not None and invDays is not None and payDays is not None: + ccc = recDays + invDays - payDays + + entry = { + "period": col, + "wc": wc, + "receivableDays": recDays, + "inventoryDays": invDays, + "payableDays": payDays, + "ccc": ccc, + } + history.append(entry) + + if latest is None: + latest = { + "wc": wc, + "receivables": rec, + "inventory": inv, + "payables": pay, + "receivableDays": recDays, + "inventoryDays": invDays, + "payableDays": payDays, + "ccc": ccc, + } + + if latest is None: + return None + return {"latest": latest, "history": history} + + +# ── CAPEX 패턴 ── + + +@memoized_calc +def calcCapexPattern(company, *, basePeriod: str | None = None) -> dict | None: + """CAPEX vs 감가상각 + 건설중인자산 추이. + + 반환:: + + { + "latest": { + "capex": float, "depreciation": float, + "capexToDepRatio": float, + "cip": float, "cipPct": float, + "investmentType": str, + }, + "history": [{period, capex, depreciation, capexToDepRatio, cip}, ...], + } + """ + # CAPEX = 유형자산 취득(CF 투자활동에서) + cfAccounts = ["유형자산의취득", "무형자산의취득", "감가상각비"] + bsAccounts = ["건설중인자산", "유형자산", "자산총계"] + isAccounts = ["감가상각비"] + + cfResult = company.select("CF", cfAccounts, strict=False) + bsResult = company.select("BS", bsAccounts, strict=False) + isResult = company.select("IS", isAccounts, strict=False) + + bsParsed = toDictBySnakeId(bsResult) + if bsParsed is None: + return None + + bsData, bsPeriods = bsParsed + cfData = toDictBySnakeId(cfResult) + isData = toDictBySnakeId(isResult) + + cfDict = cfData[0] if cfData else {} + isDict = isData[0] if isData else {} + + cipRow = bsData.get("건설중인자산", {}) + ppeRow = bsData.get("유형자산", {}) + taRow = bsData.get("자산총계", {}) + capexRow = cfDict.get("유형자산의취득", {}) + intCapexRow = cfDict.get("무형자산의취득", {}) + # 감가상각 3-tier fallback: + # 1순위: IS 감가상각비 (있는 기업은 직접 사용) + # 2순위: CF 영업활동 감가상각비 (한국전력 등) + # 3순위: 업종별 추정 (유형자산 / 추정내용연수 10년) + depRow = isDict.get("감가상각비") or cfDict.get("감가상각비") or {} + + yCols = annualColsFromPeriods(bsPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + def _getFlow2(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + latest = None + + # 감가상각 이상치 필터용 중앙값 사전 계산 + _rawDeps = [abs(_getFlow2(depRow, c)) for c in yCols] + _validDeps = [d for d in _rawDeps if d > 0] + _depMedian = sorted(_validDeps)[len(_validDeps) // 2] if _validDeps else 0 + + for col in yCols: + cip = _get(cipRow, col) + ppe = _get(ppeRow, col) + ta = _get(taRow, col) + # CAPEX는 CF에서 음수로 나옴 → abs + capex = abs(_getFlow2(capexRow, col)) + abs(_getFlow2(intCapexRow, col)) + dep = abs(_getFlow2(depRow, col)) + # 이상치 필터: 중앙값 대비 100배 이상 차이나면 스케일 오류로 판단 + if dep > 0 and _depMedian > 0: + if dep / _depMedian > 100 or _depMedian / dep > 100: + dep = 0 # 이상치 제거 → 아래 fallback으로 추정 + # 3순위 fallback: 감가상각 null이면 유형자산/10으로 추정 + if dep == 0 and ppe is not None and ppe > 0: + dep = ppe / 10 # 평균 내용연수 10년 가정 + + ratio = capex / dep if dep > 0 else None + # CAPEX/감가상각 비율 상한: 10배 초과는 감가상각 추정 오류 가능성 + if ratio is not None and ratio > 10: + ratio = None + cipPct = _pct(cip, ta) if ta > 0 else 0 + + entry = { + "period": col, + "capex": capex, + "depreciation": dep, + "capexToDepRatio": ratio, + "cip": cip, + "cipPct": cipPct, + } + history.append(entry) + + if latest is None: + if ratio is not None and ratio > 1.5: + investType = "적극 투자 — CAPEX가 감가상각의 1.5배 초과" + elif ratio is not None and ratio > 1.0: + investType = "성장 투자 — CAPEX > 감가상각" + elif ratio is not None and ratio > 0: + investType = "유지 투자 — CAPEX < 감가상각" + else: + investType = "투자 정보 부족" + + latest = { + "capex": capex, + "depreciation": dep, + "capexToDepRatio": ratio, + "cip": cip, + "cipPct": cipPct, + "investmentType": investType, + } + + if latest is None: + return None + return {"latest": latest, "history": history} + + +# ── 투자부동산 추세 ── + + +@memoized_calc +def calcInvestmentPropertyTrend(company, *, basePeriod: str | None = None) -> dict | None: + """투자부동산 비중 + 공정가치 변동 추세. + + 부동산 비중이 높은 기업(REIT, 건설사, 보험사)에서 자산 분석 정확도 향상. + notes.investmentProperty에서 항목별 시계열을 추출하여 총자산 대비 비중 추적. + + 반환:: + + { + "history": [ + {"period": str, "totalAssets": float, "ipValue": float, "ipPct": float}, + ... + ], + "trend": str | None, + "notesDetail": list[dict] | None, + } + """ + bsResult = company.select("BS", ["자산총계", "투자부동산"]) + parsed = toDictBySnakeId(bsResult) + if parsed is None: + return None + + data, allPeriods = parsed + taRow = data.get("자산총계", {}) + ipRow = data.get("투자부동산", {}) + + # 투자부동산 계정이 아예 없거나 값이 전부 0이면 해당 없음 + if not ipRow or all(v is None or v == 0 for v in ipRow.values()): + return None + + yCols = annualColsFromPeriods(allPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + history = [] + for col in yCols: + ta = _get(taRow, col) + ip = _get(ipRow, col) + if ta <= 0: + continue + history.append( + { + "period": col, + "totalAssets": ta, + "ipValue": ip, + "ipPct": round(ip / ta * 100, 2) if ip > 0 else 0, + } + ) + + if not history: + return None + + # 추세 판단 + trend = None + pcts = [h["ipPct"] for h in history if h["ipPct"] > 0] + if len(pcts) >= 2: + diff = pcts[0] - pcts[-1] + if diff > 2: + trend = "비중 증가" + elif diff < -2: + trend = "비중 감소" + else: + trend = "안정" + + result_dict: dict[str, Any] = {"history": history, "trend": trend} + + # notes enrichment — 투자부동산 세부 항목 (공정가치, 취득/처분 등) + from dartlab.analysis.financial._helpers import fetchNotesDetail + + notesData = fetchNotesDetail(company, ["investmentProperty"]) + if notesData.get("investmentProperty"): + result_dict["notesDetail"] = notesData["investmentProperty"] + + return result_dict + + +# ── 무형자산 상세 ── + + +@memoized_calc +def calcIntangibleAssetDetail(company, *, basePeriod: str | None = None) -> dict | None: + """무형자산 상세 분해 — 영업권/R&D/기타 비중 + 상각 추세. + + notes.intangibleAsset에서 항목별 시계열을 추출하여 + 영업권 비중, R&D 자산화 추세, 손상차손 리스크를 분석. + 바이오/IT 등 IP 비중 높은 기업에서 이익품질 판단에 중요. + + 반환:: + + { + "items": [{"name": str, "latestValue": float, "pct": float}, ...], + "totalIntangible": float, + "goodwillPct": float | None, + "trend": str | None, + "notesDetail": list[dict] | None, + } + """ + from dartlab.analysis.financial._helpers import fetchNotesDetail, parseNumStr + + notesData = fetchNotesDetail(company, ["intangibleAsset"]) + rawRows = notesData.get("intangibleAsset") + + # notes 없으면 BS에서 기본 분해 + bsResult = company.select("BS", ["무형자산", "영업권", "자산총계"]) + bsParsed = toDictBySnakeId(bsResult) + if bsParsed is None: + return None + + bsData, bsPeriods = bsParsed + intRow = bsData.get("무형자산", {}) + gwRow = bsData.get("영업권", {}) + taRow = bsData.get("자산총계", {}) + + yCols = annualColsFromPeriods(bsPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + latestCol = yCols[0] + totalInt = _get(intRow, latestCol) + _get(gwRow, latestCol) + ta = _get(taRow, latestCol) + + if totalInt <= 0: + return None + + # notes 상세가 있으면 항목별 분해 + items = [] + if rawRows: + for row in rawRows: + item = str(row.get("항목", "")).strip() + if not item or any(kw in item for kw in ("합계", "총계", "소계")): + continue + v = parseNumStr(row.get(str(latestCol))) + if v is not None and v > 0: + items.append( + {"name": item, "latestValue": v, "pct": round(v / totalInt * 100, 1) if totalInt > 0 else 0} + ) + items.sort(key=lambda x: x["latestValue"], reverse=True) + items = items[:8] + + # 영업권 비중 + gw = _get(gwRow, latestCol) + goodwillPct = round(gw / totalInt * 100, 1) if totalInt > 0 and gw > 0 else None + + # 총자산 대비 무형자산 비중 추세 + trend = None + intPcts = [] + for col in yCols: + intVal = _get(intRow, col) + _get(gwRow, col) + taVal = _get(taRow, col) + if taVal > 0 and intVal > 0: + intPcts.append(intVal / taVal * 100) + if len(intPcts) >= 2: + diff = intPcts[0] - intPcts[-1] + if diff > 2: + trend = "비중 증가" + elif diff < -2: + trend = "비중 감소" + else: + trend = "안정" + + result_dict: dict[str, Any] = { + "items": items, + "totalIntangible": totalInt, + "totalAssets": ta, + "intangiblePct": round(totalInt / ta * 100, 1) if ta > 0 else 0, + "goodwillPct": goodwillPct, + "trend": trend, + } + + if rawRows: + result_dict["notesDetail"] = rawRows + + return result_dict + + +# ── 자산 플래그 ── + + +@memoized_calc +def calcAssetFlags(company, *, basePeriod: str | None = None) -> list[str]: + """자산 구조 경고 신호. + + Returns + ------- + list[str] + 경고 메시지 문자열 리스트 (비영업자산 과다, CCC 과다, CAPEX 부족/과잉, 자산효율 악화 등). + """ + flags = [] + + structure = calcAssetStructure(company, basePeriod=basePeriod) + if structure: + lat = structure["latest"] + if lat["nonOpAssetsPct"] >= 40: + flags.append(f"비영업자산 {lat['nonOpAssetsPct']:.0f}% — 지주/투자 성격") + hist0 = structure["history"][0] if structure["history"] else {} + if hist0: + ta = lat["totalAssets"] + cipPct = _pct(hist0.get("cip", 0), ta) + if cipPct >= 10: + flags.append(f"건설중인자산 {cipPct:.0f}% — 대규모 투자 진행 중") + invPct = _pct(hist0.get("inventory", 0), ta) + if invPct >= 20: + flags.append(f"재고자산 {invPct:.0f}% — 재고 비대화 주의") + + wc = calcWorkingCapital(company, basePeriod=basePeriod) + if wc and wc["latest"]["ccc"] is not None: + ccc = wc["latest"]["ccc"] + if ccc > 2000: + pass # CCC > 2000일은 데이터 왜곡 가능성 → 경고 제외 + elif ccc > 120: + flags.append(f"CCC {ccc:.0f}일 — 현금 회수 매우 느림") + # CCC < 0은 선수금/매입채무 우위로 운전자본 효율적 → 경고 아닌 정보 + # efficiency.py의 "운전자본 유리 구조"로 충분 + + capex = calcCapexPattern(company, basePeriod=basePeriod) + if capex and capex["latest"]["capexToDepRatio"] is not None: + ratio = capex["latest"]["capexToDepRatio"] + if ratio < 0.5 and ratio > 0: + flags.append(f"CAPEX/감가상각 {ratio:.1f}배 — 투자 부족 (자산 노후화 위험)") + elif ratio > 3.0: + flags.append(f"CAPEX/감가상각 {ratio:.1f}배 — 공격적 투자") + + from dartlab.analysis.financial.efficiency import calcTurnoverTrend + + turnover = calcTurnoverTrend(company, basePeriod=basePeriod) + if turnover and turnover.get("totalAssetTurnover"): + tat = turnover["totalAssetTurnover"] + if len(tat) >= 2: + newest = tat[0].get("value") + oldest = tat[-1].get("value") + if newest is not None and oldest is not None and oldest > 0: + change = (newest - oldest) / oldest * 100 + if change < -20: + flags.append(f"총자산회전율 {change:.0f}% 하락 — 자산 효율 악화") + + return flags diff --git a/src/dartlab/analysis/financial/capital.py b/src/dartlab/analysis/financial/capital.py new file mode 100644 index 0000000000000000000000000000000000000000..db34847afe2bc8dab0e1a7cf91232810fa9d7417 --- /dev/null +++ b/src/dartlab/analysis/financial/capital.py @@ -0,0 +1,892 @@ +"""1-2 자금 구조 분석 — 계산만 담당. + +블록 조립은 review/builders.py가 한다. +여기는 company.select() → 계산 → dict/숫자 반환. +""" + +from __future__ import annotations + +from dartlab.analysis.financial._helpers import annualColsFromPeriods, sumBorrowings, toDictBySnakeId +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_QUARTERS = 5 +_MAX_YEARS = 8 + + +# ── 유틸 ── + + +def _quarterlyCols(periods: list[str], maxQ: int = _MAX_QUARTERS) -> list[str]: + """기간 목록에서 분기 컬럼 추출. 분기가 없으면 연간 컬럼 fallback (EDGAR 호환).""" + quarterly = sorted([c for c in periods if "Q" in c], reverse=True)[:maxQ] + if quarterly: + return quarterly + # EDGAR fallback: 연간 데이터 (2024, 2023, ...) + return sorted([c for c in periods if c.isdigit() and len(c) == 4], reverse=True)[:maxQ] + + +def _getRatios(company): + """RatioResult 객체 — 내부 compute 전용 (attribute access).""" + try: + return company._finance.ratios + except (ValueError, KeyError, AttributeError): + return None + + +import contextvars + +_analysis_currency: contextvars.ContextVar[str] = contextvars.ContextVar("analysis_currency", default="KRW") + + +def _fmtAmt(value) -> str: + """금액을 조/억 또는 B/M 단위로 포맷 (순수 문자열, review import 없이).""" + if value is None: + return "-" + absVal = abs(value) + sign = "-" if value < 0 else "" + if _analysis_currency.get() == "USD": + if absVal >= 1_000_000_000: + return f"{sign}${absVal / 1_000_000_000:.1f}B" + if absVal >= 1_000_000: + return f"{sign}${absVal / 1_000_000:.0f}M" + if absVal >= 1_000: + return f"{sign}${absVal / 1_000:.0f}K" + return f"{sign}${absVal:,.0f}" + if absVal >= 1_0000_0000_0000: + return f"{sign}{absVal / 1_0000_0000_0000:.1f}조" + if absVal >= 1_0000_0000: + return f"{sign}{absVal / 1_0000_0000:.0f}억" + if absVal >= 1_0000: + return f"{sign}{absVal / 1_0000:.0f}만" + return f"{sign}{absVal:,.0f}" + + +# ── 계산 함수들 ── + + +@memoized_calc +def calcFundingSources(company, *, basePeriod: str | None = None) -> dict | None: + """조달원 분해 — 돈을 어디서 가져왔는가. + + 4가지 원천: 내부유보, 외부(주주), 금융차입, 영업조달. + 시계열로 비중 변화를 추적한다. + + 반환:: + + { + "latest": { + "totalAssets": float, + "retained": float, "retainedPct": float, + "paidIn": float, "paidInPct": float, + "finDebt": float, "finDebtPct": float, + "opFunding": float, "opFundingPct": float, + "otherLiab": float, "otherLiabPct": float, + "otherEquity": float, "otherEquityPct": float, + }, + "history": [ + {"period": str, "retainedPct": float, "paidInPct": float, + "finDebtPct": float, "opFundingPct": float}, ... + ], + "diagnosis": str, + } + """ + accounts = [ + "자산총계", + "자본총계", + "이익잉여금", + "미처분이익잉여금(결손금)", + "자본금", + "자본잉여금", + "부채총계", + "단기차입금", + "장기차입금", + "차입금단기", # short_term_borrowings 한국어 변형 + "long_term_borrowings", # 영문만 있는 회사 (한화오션) + "short_term_borrowings", + "차입부채", # 통합 차입금 (SK하이닉스) + "장기차입부채", # noncurrent_borrowings (LG에솔) + "유동성장기차입금", # current_portion_of_longterm_borrowings + "사채", + "매입채무", + "선수금", + "계약부채", + "선수수익", + ] + result = company.select("BS", accounts) + parsed = toDictBySnakeId(result) + if parsed is None: + return None + + data, allPeriods = parsed + taRow = data.get("total_assets") + if taRow is None: + return None + + from dartlab.analysis.financial._helpers import mergeRows + + reRow = mergeRows(data.get("retained_earnings"), data.get("unappropriated_retained_earnings_deficit")) + pcRow = data.get("paidin_capital", {}) + csRow = data.get("capital_surplus", {}) + eqRow = data.get("total_stockholders_equity", {}) + liabRow = data.get("total_liabilities", {}) + apRow = data.get("trade_and_other_payables", {}) + advRow = data.get("advance_from_customers", {}) + clRow = data.get("contract_liabilities", {}) + diRow = data.get("deferred_income", {}) + + yCols = annualColsFromPeriods(allPeriods, basePeriod, _MAX_YEARS) + if not yCols: + yCols = _quarterlyCols(allPeriods, _MAX_YEARS) + if not yCols: + return None + + history = [] + latest = None + + for col in yCols: + ta = taRow.get(col) + if ta is None or ta <= 0: + continue + + retained = reRow.get(col) or 0 + paidIn = (pcRow.get(col) or 0) + (csRow.get(col) or 0) + # 차입금: 회사 키 패턴 무관 헬퍼 (분리/통합/언더스코어/noncurrent 변형 모두 처리) + finDebt = sumBorrowings(data, col) + opFunding = (apRow.get(col) or 0) + (advRow.get(col) or 0) + (clRow.get(col) or 0) + (diRow.get(col) or 0) + + equity = eqRow.get(col) or 0 + otherEquity = max(0, equity - retained - paidIn) + liab = liabRow.get(col) or 0 + otherLiab = max(0, liab - finDebt - opFunding) + + entry = { + "period": col, + "retainedPct": retained / ta * 100, + "paidInPct": paidIn / ta * 100, + "finDebtPct": finDebt / ta * 100, + "opFundingPct": opFunding / ta * 100, + "otherLiabPct": otherLiab / ta * 100, + "otherEquityPct": otherEquity / ta * 100, + } + history.append(entry) + + if latest is None: + latest = { + "totalAssets": ta, + "retained": retained, + "retainedPct": entry["retainedPct"], + "paidIn": paidIn, + "paidInPct": entry["paidInPct"], + "finDebt": finDebt, + "finDebtPct": entry["finDebtPct"], + "opFunding": opFunding, + "opFundingPct": entry["opFundingPct"], + "otherLiab": otherLiab, + "otherLiabPct": entry["otherLiabPct"], + "otherEquity": otherEquity, + "otherEquityPct": entry["otherEquityPct"], + } + + if latest is None: + return None + + # 진단: 내부유보 vs 금융차입 비중으로 자금조달 성격 판단 + rPct = latest["retainedPct"] + fPct = latest["finDebtPct"] + if rPct >= 50: + diagnosis = "자기 힘으로 성장 — 이익잉여금이 자산의 절반 이상" + elif rPct >= 30 and fPct < 30: + diagnosis = "내부유보 중심 — 차입 의존도 낮음" + elif fPct >= 40: + diagnosis = "차입 의존 — 금융부채가 자산의 40% 이상" + elif fPct >= rPct: + diagnosis = "외부 조달 우위 — 금융차입이 내부유보를 초과" + else: + diagnosis = "균형 조달 — 내부유보와 외부 조달이 혼합" + + # 보충 지표: 순차입금/EBITDA, 암묵적 차입금리 + netDebtEbitda = _calcNetDebtEbitda(company, latest["finDebt"]) + impliedRate = _calcImpliedBorrowingRate(company, latest["finDebt"]) + + result = {"latest": latest, "history": history, "diagnosis": diagnosis} + if netDebtEbitda is not None: + result["netDebtEbitda"] = netDebtEbitda + if impliedRate is not None: + result["impliedBorrowingRate"] = impliedRate + + # 비중 변화 방향 (금융차입 비중이 늘고 있는가) + if len(history) >= 2: + newest = history[0]["finDebtPct"] + oldest = history[-1]["finDebtPct"] + diff = newest - oldest + if diff > 5: + result["leverageTrend"] = ( + f"금융차입 비중 +{diff:.0f}pp 증가 ({history[-1]['period']}→{history[0]['period']})" + ) + elif diff < -5: + result["leverageTrend"] = ( + f"금융차입 비중 {diff:.0f}pp 감소 ({history[-1]['period']}→{history[0]['period']})" + ) + + # notes enrichment — 차입금 주석 (이자율, 만기, 담보 등) + from dartlab.analysis.financial._helpers import fetchNotesDetail + + notesDetail = fetchNotesDetail(company, ["borrowings"]) + if notesDetail: + result["notesDetail"] = notesDetail + + return result + + +def _latestAnnualVal(company, stmt: str, accountName: str) -> float | None: + """select(stmt, [accountName])에서 최신 연도 값을 꺼낸다. + + 회사마다 한국어 변형이 달라서 accountName 매칭 실패 가능 → None 반환. + """ + try: + result = company.select(stmt, [accountName]) + except (ValueError, KeyError): + return None + parsed = toDictBySnakeId(result) + if parsed is None: + return None + data, allPeriods = parsed + row = data.get(accountName) + if row is None: + return None + yCols = annualColsFromPeriods(allPeriods, None, 1) + if not yCols: + return None + return row.get(yCols[0]) + + +def _calcNetDebtEbitda(company, finDebt: float) -> float | None: + """순차입금/EBITDA — 차입 감당 능력.""" + cash = _latestAnnualVal(company, "BS", "현금및현금성자산") or 0 + netDebt = finDebt - cash + if netDebt <= 0: + return 0.0 # 순현금 + opIncome = _latestAnnualVal(company, "IS", "영업이익") + if opIncome is not None and opIncome > 0: + return netDebt / opIncome # EBITDA 대신 영업이익 기반 (보수적) + return None + + +def _calcImpliedBorrowingRate(company, finDebt: float) -> float | None: + """암묵적 차입금리(%) — 금융비용/금융부채.""" + if finDebt <= 0: + return None + ie = _latestAnnualVal(company, "IS", "이자비용") or _latestAnnualVal(company, "IS", "금융비용") + if ie is None or ie <= 0: + return None + return ie / finDebt * 100 + + +@memoized_calc +def calcCapitalOverview(company, *, basePeriod: str | None = None) -> dict | None: + """총자산/총부채/자기자본/순차입금 스냅샷. + + 반환:: + + {"metrics": [(label, value_str), ...]} + """ + ratios = _getRatios(company) + if ratios is None: + return None + + metrics = [] + + ta = getattr(ratios, "totalAssets", None) + if ta is not None: + metrics.append(("총자산", _fmtAmt(ta))) + + tl = getattr(ratios, "totalLiabilities", None) + dr = getattr(ratios, "debtRatio", None) + if tl is not None: + label = _fmtAmt(tl) + if dr is not None: + label += f" (부채비율 {dr:.0f}%)" + metrics.append(("총부채", label)) + + te = getattr(ratios, "totalEquity", None) + er = getattr(ratios, "equityRatio", None) + if te is not None: + label = _fmtAmt(te) + if er is not None: + label += f" (자기자본비율 {er:.0f}%)" + metrics.append(("자기자본", label)) + + nd = getattr(ratios, "netDebt", None) + if nd is not None: + if nd < 0: + metrics.append(("순차입금", f"{_fmtAmt(abs(nd))} (순현금)")) + else: + ndr = getattr(ratios, "netDebtRatio", None) + label = _fmtAmt(nd) + if ndr is not None: + label += f" (순차입금비율 {ndr:.0f}%)" + metrics.append(("순차입금", label)) + + if not metrics: + return None + + return {"metrics": metrics} + + +@memoized_calc +def calcCapitalTimeline(company, *, basePeriod: str | None = None) -> dict | None: + """자본총계·이익잉여금 시계열. + + 반환:: + + {"tables": [(label, rows, cols), ...]} + """ + result = company.select("BS", ["자본총계", "이익잉여금", "미처분이익잉여금(결손금)"]) + parsed = toDictBySnakeId(result) + if parsed is None or "total_stockholders_equity" not in parsed[0]: + return None + + data, allPeriods = parsed + from dartlab.analysis.financial._helpers import mergeRows + + equityRow = data["total_stockholders_equity"] + retainedRow = mergeRows(data.get("retained_earnings"), data.get("unappropriated_retained_earnings_deficit")) + + tables = [] + yCols = annualColsFromPeriods(allPeriods, basePeriod, _MAX_YEARS) + if yCols: + yearTable = _buildCapitalTable(equityRow, retainedRow, yCols) + if yearTable: + tables.append(("연도별", yearTable, yCols)) + + qCols = _quarterlyCols(allPeriods, _MAX_QUARTERS) + if qCols: + qtrTable = _buildCapitalTable(equityRow, retainedRow, qCols) + if qtrTable: + tables.append(("분기별", qtrTable, qCols)) + + if not tables: + return None + + return {"tables": tables} + + +def _buildCapitalTable(equityRow: dict, retainedRow: dict | None, cols: list[str]) -> list[dict]: + """자본구조 테이블 행 구성.""" + rows: list[dict] = [] + rows.append({"": "자본총계", **{c: equityRow.get(c) for c in cols}}) + + if retainedRow: + rows.append({"": "이익잉여금", **{c: retainedRow.get(c) for c in cols}}) + + paidInRow: dict = {"": "자본금+잉여금"} + for c in cols: + eq = equityRow.get(c) + re = retainedRow.get(c) + if eq is not None and re is not None: + paidInRow[c] = eq - re + else: + paidInRow[c] = None + rows.append(paidInRow) + + pctRow: dict = {"": "→ 내부유보 비중"} + for c in cols: + eq = equityRow.get(c) + re = retainedRow.get(c) + if eq and re and eq != 0: + pctRow[c] = f"{re / eq * 100:.0f}%" + else: + pctRow[c] = "-" + rows.append(pctRow) + + return rows + + +@memoized_calc +def calcDebtTimeline(company, *, basePeriod: str | None = None) -> dict | None: + """부채총계·금융부채·영업부채 시계열. + + 반환:: + + {"tables": [(label, rows, cols), ...]} + """ + result = company.select("BS", ["부채총계", "단기차입금", "장기차입금", "차입부채", "사채"]) + parsed = toDictBySnakeId(result) + if parsed is None or "total_liabilities" not in parsed[0]: + return None + + data, allPeriods = parsed + liabRow = data["total_liabilities"] + stbRow = data.get("shortterm_borrowings") + ltbRow = data.get("longterm_borrowings") + unifiedBorrowRow = data.get("borrowings") # 통합 차입금 fallback + bondRow = data.get("debentures") + # stb/ltb 둘 다 없는 회사 → unifiedBorrow 를 stb 위치로 + if stbRow is None and ltbRow is None and unifiedBorrowRow is not None: + stbRow = unifiedBorrowRow + + tables = [] + yCols = annualColsFromPeriods(allPeriods, basePeriod, _MAX_YEARS) + if yCols: + yearTable = _buildDebtTable(liabRow, stbRow, ltbRow, bondRow, yCols) + if yearTable: + tables.append(("연도별", yearTable, yCols)) + + qCols = _quarterlyCols(allPeriods, _MAX_QUARTERS) + if qCols: + qtrTable = _buildDebtTable(liabRow, stbRow, ltbRow, bondRow, qCols) + if qtrTable: + tables.append(("분기별", qtrTable, qCols)) + + if not tables: + return None + + return {"tables": tables} + + +def _buildDebtTable(liabRow: dict, stbRow, ltbRow, bondRow, cols: list[str]) -> list[dict]: + """부채구조 테이블 행 구성.""" + rows: list[dict] = [] + rows.append({"": "부채총계", **{c: liabRow.get(c) for c in cols}}) + + finDebtRow: dict = {"": "금융부채"} + hasFinDebt = False + for c in cols: + stb = (stbRow or {}).get(c) + ltb = (ltbRow or {}).get(c) + bond = (bondRow or {}).get(c) + parts = [v for v in [stb, ltb, bond] if v is not None] + if parts: + finDebtRow[c] = sum(parts) + hasFinDebt = True + else: + finDebtRow[c] = None + + if hasFinDebt: + opDebtRow: dict = {"": "영업부채"} + for c in cols: + tl = liabRow.get(c) + fd = finDebtRow.get(c) + if tl is not None and fd is not None: + opDebtRow[c] = tl - fd + else: + opDebtRow[c] = None + rows.append(opDebtRow) + rows.append(finDebtRow) + + pctRow: dict = {"": "→ 금융부채 비중"} + for c in cols: + tl = liabRow.get(c) + fd = finDebtRow.get(c) + if tl and fd and tl != 0: + pctRow[c] = f"{fd / tl * 100:.0f}%" + else: + pctRow[c] = "-" + rows.append(pctRow) + + return rows + + +@memoized_calc +def calcInterestBurden(company, *, basePeriod: str | None = None) -> dict | None: + """이자보상배율·이자비용. + + 반환:: + + {"metrics": [(label, value_str), ...]} + """ + ratios = _getRatios(company) + if ratios is None: + return None + + metrics = [] + + ic = getattr(ratios, "interestCoverage", None) + if ic is not None: + if ic >= 10: + quality = "우수" + elif ic >= 3: + quality = "안정" + elif ic >= 1.5: + quality = "주의" + else: + quality = "위험" + metrics.append(("이자보상배율", f"{ic:.1f}배 — {quality}")) + + ie = getattr(ratios, "interestExpense", None) + if ie is not None: + metrics.append(("이자비용", _fmtAmt(ie))) + + if not metrics: + return None + + return {"metrics": metrics} + + +@memoized_calc +def calcLiquidity(company, *, basePeriod: str | None = None) -> dict | None: + """유동비율·당좌비율·현금비율·순운전자본. + + 반환:: + + {"metrics": [(label, value_str), ...]} + """ + ratios = _getRatios(company) + if ratios is None: + return None + + metrics = [] + + cr = getattr(ratios, "currentRatio", None) + if cr is not None: + quality = "안정" if cr >= 150 else "보통" if cr >= 100 else "주의" + metrics.append(("유동비율", f"{cr:.0f}% — {quality}")) + + qr = getattr(ratios, "quickRatio", None) + if qr is not None: + metrics.append(("당좌비율", f"{qr:.0f}%")) + + car = getattr(ratios, "cashRatio", None) + if car is not None: + metrics.append(("현금비율", f"{car:.0f}%")) + + wc = getattr(ratios, "workingCapital", None) + if wc is not None: + metrics.append(("순운전자본", _fmtAmt(wc))) + + if not metrics: + return None + + return {"metrics": metrics} + + +@memoized_calc +def calcCashFlowStructure(company, *, basePeriod: str | None = None) -> dict | None: + """영업CF/투자CF/재무CF + FCF + CF 패턴. + + 반환:: + + { + "tableRows": [dict, ...], + "cols": [str, ...], + "pattern": str | None, + "metrics": [(label, value_str), ...] | None, + } + """ + result = company.select( + "CF", + ["영업활동현금흐름", "투자활동현금흐름", "재무활동으로인한현금흐름", "유형자산의취득"], + ) + parsed = toDictBySnakeId(result) + if parsed is None: + return None + + data, allPeriods = parsed + ocfRow = data.get("operating_cashflow") or data.get("cash_flows_from_operating_activities") + if ocfRow is None: + return None + icfRow = data.get("investing_cashflow") or data.get("cash_flows_from_investing_activities") + fcfRow = data.get("cash_flows_from_financing_activities") or data.get("financing_cashflow") + capexRow = data.get("purchase_of_property_plant_and_equipment") + + qCols = _quarterlyCols(allPeriods, _MAX_QUARTERS) + if not qCols: + return None + + rawRows: list[dict] = [] + rawRows.append({"": "영업CF", **{c: ocfRow.get(c) for c in qCols}}) + if icfRow: + rawRows.append({"": "투자CF", **{c: icfRow.get(c) for c in qCols}}) + if fcfRow: + rawRows.append({"": "재무CF", **{c: fcfRow.get(c) for c in qCols}}) + if capexRow: + freeRow: dict = {"": "FCF"} + for c in qCols: + ocf = ocfRow.get(c) + capex = capexRow.get(c) + if ocf is not None and capex is not None: + free = ocf + capex if capex < 0 else ocf - capex + freeRow[c] = free + else: + freeRow[c] = None + rawRows.append(freeRow) + + # CF 패턴 분류 (분기 우선, 분기 데이터 ��으면 연간 fallback) + latestCol = qCols[0] + ocfSign = _sign(ocfRow.get(latestCol)) + icfSign = _sign((icfRow or {}).get(latestCol)) + fcfSign = _sign((fcfRow or {}).get(latestCol)) + pattern = _classifyCfPattern(ocfSign, icfSign, fcfSign) + if pattern is None: + # Q4 기준으로 재시도 (재무CF가 특정 분기에만 있는 기업 대응) + q4Cols = sorted([c for c in allPeriods if c.endswith("Q4")], reverse=True) + for qc in q4Cols[:3]: + ocfA = _sign(ocfRow.get(qc)) + icfA = _sign((icfRow or {}).get(qc)) + fcfA = _sign((fcfRow or {}).get(qc)) + pattern = _classifyCfPattern(ocfA, icfA, fcfA) + if pattern is not None: + break + + # 추가 지표 + ratios = _getRatios(company) + metrics = None + if ratios is not None: + extra = [] + ocfm = getattr(ratios, "operatingCfMargin", None) + if ocfm is not None: + extra.append(("영업CF 마진", f"{ocfm:.1f}%")) + cxr = getattr(ratios, "capexRatio", None) + if cxr is not None: + extra.append(("CAPEX/매출", f"{cxr:.1f}%")) + ftor = getattr(ratios, "fcfToOcfRatio", None) + if ftor is not None: + extra.append(("FCF/OCF", f"{ftor:.0f}%")) + if extra: + metrics = extra + + return { + "tableRows": rawRows, + "cols": qCols, + "pattern": pattern, + "metrics": metrics, + } + + +def _sign(val) -> str: + """양/음/0 부호.""" + if val is None: + return "?" + if val > 0: + return "+" + if val < 0: + return "-" + return "0" + + +def _classifyCfPattern(ocf: str, icf: str, fcf: str) -> str | None: + """영업/투자/재무 CF 부호 조합으로 패턴 분류.""" + patterns = { + ("+", "-", "-"): "성숙형 — 영업으로 벌어 투자하고 부채 상환", + ("+", "-", "+"): "확장형 — 영업 + 외부 조달로 적극 투자", + ("+", "+", "-"): "구조조정형 — 자산 매각하며 부채 상환", + ("-", "-", "+"): "위기형 — 영업 적자를 외부 차입으로 메움", + ("-", "+", "+"): "축소형 — 자산 매각 + 차입으로 영업 적자 보전", + ("-", "+", "-"): "전환형 — 자산 매각으로 부채 상환, 영업 회복 필요", + # 재무CF 미보고("?" 또는 "0") — 영업/투자만으로 부분 분류 + ("+", "-", "?"): "성숙형 — 영업으로 벌어 투자 (재무CF 미보고)", + ("+", "-", "0"): "성숙형 — 영업으로 벌어 투자 (재무CF 미보고)", + ("-", "-", "?"): "위기형 — 영업+투자 모두 유출 (재무CF 미보고)", + ("-", "-", "0"): "위기형 — 영업+투자 모두 유출 (재무CF 미보고)", + } + return patterns.get((ocf, icf, fcf)) + + +def _isFinancialCompany(company) -> bool: + """금융업 판별 (capital.py 내부용).""" + try: + sector = getattr(company, "sector", None) + if sector is not None: + from dartlab.core.sector.types import Sector + + if sector.sector == Sector.FINANCIALS: + return True + name = getattr(company, "corpName", "") or "" + if any(k in name for k in ("지주", "홀딩스", "Holdings")): + return True + except (AttributeError, ImportError): + pass + return False + + +@memoized_calc +def calcDistressIndicators(company, *, basePeriod: str | None = None) -> dict | None: + """Altman Z, Ohlson O, Piotroski F, Springate S. + + 반환:: + + {"metrics": [(label, value_str), ...]} + """ + ratios = _getRatios(company) + if ratios is None: + return None + + isFinancial = _isFinancialCompany(company) + metrics = [] + + # Altman Z-Score: 비금융 제조업용 모형 — 금융업에는 적용 불가 + if not isFinancial: + az = getattr(ratios, "altmanZScore", None) + if az is None: + az = getattr(ratios, "altmanZppScore", None) + if az is not None: + if az > 2.99: + quality = "안전" + elif az > 1.81: + quality = "회색지대" + else: + quality = "부실 위험" + metrics.append(("Altman Z", f"{az:.2f} — {quality}")) + + op = getattr(ratios, "ohlsonProbability", None) + if op is not None: + metrics.append(("Ohlson 부실확률", f"{op:.1f}%")) + else: + os_ = getattr(ratios, "ohlsonOScore", None) + if os_ is not None: + metrics.append(("Ohlson O-Score", f"{os_:.2f}")) + + pf = getattr(ratios, "piotroskiFScore", None) + if pf is not None: + maxF = getattr(ratios, "piotroskiMaxScore", 9) + if pf >= 7: + quality = "재무 건전" + elif pf >= 4: + quality = "보통" + else: + quality = "재무 약화" + metrics.append(("Piotroski F", f"{pf}/{maxF} — {quality}")) + + ss = getattr(ratios, "springateSScore", None) + if ss is not None: + quality = "안전" if ss > 0.862 else "부실 위험" + metrics.append(("Springate S", f"{ss:.2f} — {quality}")) + + if not metrics: + return None + + return {"metrics": metrics} + + +@memoized_calc +def calcCapitalFlags(company, *, basePeriod: str | None = None) -> list[tuple[str, str]]: + """자금조달 관련 경고/기회 플래그. + + Returns + ------- + list[tuple[str, str]] + 각 원소는 (플래그 텍스트, "warning" | "opportunity"). + """ + flags: list[tuple[str, str]] = [] + + ratios = _getRatios(company) + if ratios is None: + return flags + + isFinancial = _isFinancialCompany(company) + + dr = getattr(ratios, "debtRatio", None) + if dr is not None: + if isFinancial: + # 금융업은 예수부채로 부채비율이 구조적으로 높음 + if dr > 2000: + flags.append((f"금융업 부채비율 {dr:.0f}% — 과다", "warning")) + elif dr > 200: + flags.append((f"고부채 (부채비율 {dr:.0f}%)", "warning")) + + ic = getattr(ratios, "interestCoverage", None) + if not isFinancial and ic is not None and ic < 3: + severity = "심각" if ic < 1.5 else "주의" + flags.append((f"이자보상 {severity} ({ic:.1f}배)", "warning")) + + cr = getattr(ratios, "currentRatio", None) + nd = getattr(ratios, "netDebt", None) + isNetCash = nd is not None and nd < 0 + if not isFinancial and cr is not None and cr < 100: + if isNetCash: + # 순현금이면 유동비율 낮아도 실질 유동성 위험 낮음 (IFRS16 리스부채 등) + flags.append((f"유동비율 주의 ({cr:.0f}%) — 순현금이므로 실질 위험 낮음", "warning")) + elif ic is not None and ic > 5: + # 이자보상배율 양호하면 실질 유동성 위험 낮음 + flags.append((f"유동비율 주의 ({cr:.0f}%) — 이자보상 {ic:.0f}배로 양호", "warning")) + else: + flags.append((f"유동성 위기 (유동비율 {cr:.0f}%)", "warning")) + + az = getattr(ratios, "altmanZScore", None) or getattr(ratios, "altmanZppScore", None) + if not isFinancial and az is not None and az < 1.81: + flags.append((f"Altman Z 부실 경계 ({az:.2f})", "warning")) + + pf = getattr(ratios, "piotroskiFScore", None) + if pf is not None and pf < 3: + flags.append((f"Piotroski F 재무 약화 ({pf}/9)", "warning")) + + # 금융부채 비중 (BS에서 직접 계산) + flagResult = company.select( + "BS", + [ + "부채총계", + "단기차입금", + "장기차입금", + "차입부채", + "사채", + "자본총계", + "이익잉여금", + "미처분이익잉여금(결손금)", + ], + ) + flagParsed = toDictBySnakeId(flagResult) + if flagParsed is not None and "total_liabilities" in flagParsed[0]: + data = flagParsed[0] + liabRow = data["total_liabilities"] + stbRow = data.get("shortterm_borrowings") + ltbRow = data.get("longterm_borrowings") + unifiedBorrowRow = data.get("borrowings") # 통합 차입금 fallback + bondRow = data.get("debentures") + # stb/ltb 둘 다 None → unifiedBorrow 를 stb 위치로 + if stbRow is None and ltbRow is None and unifiedBorrowRow is not None: + stbRow = unifiedBorrowRow + finDebtPct = _calcFinDebtPct(liabRow, stbRow, ltbRow, bondRow) + if finDebtPct is not None and finDebtPct > 50: + flags.append((f"금융부채 비중 {finDebtPct:.0f}% — 이자 부담 부채 높음", "warning")) + + equityRow = data.get("total_stockholders_equity") + from dartlab.analysis.financial._helpers import mergeRows + + retainedRow = mergeRows(data.get("retained_earnings"), data.get("unappropriated_retained_earnings_deficit")) + retainedPct = _calcRetainedPct(equityRow, retainedRow) + if retainedPct is not None and retainedPct > 70: + flags.append((f"내부유보 비중 {retainedPct:.0f}% — 자기 힘으로 성장", "opportunity")) + + nd = getattr(ratios, "netDebt", None) + if nd is not None and nd < 0: + flags.append(("순현금 상태", "opportunity")) + + if ic is not None and ic > 10: + flags.append((f"이자보상 우수 ({ic:.0f}배)", "opportunity")) + + if pf is not None and pf >= 7: + flags.append((f"Piotroski F 재무 건전 ({pf}/9)", "opportunity")) + + return flags + + +# ── 내부 헬퍼 ── + + +def _calcRetainedPct(equityRow, retainedRow) -> float | None: + """이익잉여금 / 자본총계 비중 (%).""" + if equityRow is None or retainedRow is None: + return None + for key in equityRow: + eq = equityRow.get(key) + re = retainedRow.get(key) + if eq and re and eq != 0: + return re / eq * 100 + return None + + +def _calcFinDebtPct(liabRow, stbRow, ltbRow, bondRow) -> float | None: + """금융부채 / 부채총계 비중 (%) — 최신 기간.""" + if liabRow is None: + return None + for key in liabRow: + tl = liabRow.get(key) + if tl is None or tl == 0: + continue + stb = (stbRow or {}).get(key) + ltb = (ltbRow or {}).get(key) + bond = (bondRow or {}).get(key) + parts = [v for v in [stb, ltb, bond] if v is not None] + if parts: + return sum(parts) / tl * 100 + return None diff --git a/src/dartlab/analysis/financial/capitalAllocation.py b/src/dartlab/analysis/financial/capitalAllocation.py new file mode 100644 index 0000000000000000000000000000000000000000..2239563c4e6c224b2fd3d67281a40a07f9a34adc --- /dev/null +++ b/src/dartlab/analysis/financial/capitalAllocation.py @@ -0,0 +1,521 @@ +"""자본배분 분석 — 배당, 주주환원, 재투자, FCF 사용처 시계열. + +벌어들인 돈을 어디에 쓰는지를 시계열로 추적한다. +""" + +from __future__ import annotations + +from dartlab.analysis.financial._helpers import ( + annualColsFromPeriods, + toDictBySnakeId, +) +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_YEARS = 8 + + +# ── 유틸 ── + + +def _get(row: dict, col: str) -> float: + v = row.get(col) if row else None + return v if v is not None else 0 + + +from dartlab.core.finance.calc import safePct as _pct # noqa: E402 + + +# ── 배당 정책 ── + + +@memoized_calc +def calcDividendPolicy(company, *, basePeriod: str | None = None) -> dict | None: + """배당 정책 시계열 — 배당성향, 배당금 추이, 연속 배당. + + 반환:: + + { + "history": [ + { + "period": str, + "dividendsPaid": float, + "netIncome": float, + "payoutRatio": float | None, + "dividendGrowth": float | None, + }, + ... + ], + "consecutiveYears": int, + } + """ + cfResult = company.select("CF", ["dividends_paid"]) + isResult = company.select("IS", ["당기순이익"]) + + cfParsed = toDictBySnakeId(cfResult) + isParsed = toDictBySnakeId(isResult) + if cfParsed is None or isParsed is None: + return None + + cfData, cfPeriods = cfParsed + isData, _ = isParsed + + divRow = cfData.get("dividends_paid", {}) + niRow = isData.get("net_profit", {}) + + yCols = annualColsFromPeriods(cfPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + def _getF(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + consecutiveYears = 0 + countingConsecutive = True + + for i, col in enumerate(yCols): + divPaid = abs(_getF(divRow, col)) # CF에서 음수로 나옴 + ni = _getF(niRow, col) + + payoutRatio = _pct(divPaid, ni) if ni > 0 else None + + # 배당 성장률 — base effect cap ±999% + # prev=0 (신규배당) 또는 cur=0 (중단) 은 None. + # 신규배당 base effect (예: SK +8600%, HMM +2913%) 는 사용자 혼란 → cap. + dividendGrowth = None + if i + 1 < len(yCols): + prevCol = yCols[i + 1] + prevDiv = abs(_getF(divRow, prevCol)) + if prevDiv > 0 and divPaid > 0: + rawGrowth = (divPaid - prevDiv) / prevDiv * 100 + dividendGrowth = round(max(min(rawGrowth, 999.0), -999.0), 2) + + history.append( + { + "period": col, + "dividendsPaid": divPaid, + "netIncome": ni, + "payoutRatio": payoutRatio, + "dividendGrowth": dividendGrowth, + } + ) + + # 연속 배당 연수 + if countingConsecutive: + if divPaid > 0: + consecutiveYears += 1 + else: + countingConsecutive = False + + return {"history": history, "consecutiveYears": consecutiveYears} if history else None + + +# ── 주주환원 ── + + +@memoized_calc +def calcShareholderReturn(company, *, basePeriod: str | None = None) -> dict | None: + """주주환��� 시계열 — 배당 + 자사주 매입 vs FCF. + + 반환:: + + { + "history": [ + { + "period": str, + "dividendsPaid": float, + "treasuryStockPurchase": float, + "totalReturn": float, + "fcf": float, + "returnToFcf": float | None, + }, + ... + ], + } + """ + cfResult = company.select( + "CF", + [ + "operating_cashflow", + "purchase_of_property_plant_and_equipment", + "purchase_of_intangible_assets", + "dividends_paid", + "purchase_of_treasury_stock", + ], + ) + + cfParsed = toDictBySnakeId(cfResult) + if cfParsed is None: + return None + + cfData, cfPeriods = cfParsed + + ocfRow = cfData.get("operating_cashflow", {}) + capexRow = cfData.get("purchase_of_property_plant_and_equipment", {}) + intCapexRow = cfData.get("purchase_of_intangible_assets", {}) + divRow = cfData.get("dividends_paid", {}) + tsRow = cfData.get("purchase_of_treasury_stock", {}) + + yCols = annualColsFromPeriods(cfPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + def _getF2(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for col in yCols: + divPaid = abs(_getF2(divRow, col)) + ocf = _getF2(ocfRow, col) + capex = abs(_getF2(capexRow, col)) + abs(_getF2(intCapexRow, col)) + fcf = ocf - capex + + tsPurchase = abs(_getF2(tsRow, col)) + + totalReturn = divPaid + tsPurchase + returnToFcf = _pct(totalReturn, fcf) if fcf > 0 else None + + history.append( + { + "period": col, + "dividendsPaid": divPaid, + "treasuryStockPurchase": tsPurchase, + "totalReturn": totalReturn, + "fcf": fcf, + "returnToFcf": returnToFcf, + } + ) + + return {"history": history} if history else None + + +# ── 재투자 ── + + +@memoized_calc +def calcReinvestment(company, *, basePeriod: str | None = None) -> dict | None: + """재투자 시계열 — 재투자율, CAPEX/매출. + + 반환:: + + { + "history": [ + { + "period": str, + "capex": float, + "operatingIncome": float, + "revenue": float, + "capexToRevenue": float | None, + "retentionRate": float | None, + }, + ... + ], + } + """ + cfResult = company.select( + "CF", + ["purchase_of_property_plant_and_equipment", "purchase_of_intangible_assets", "dividends_paid"], + ) + isResult = company.select("IS", ["영업이익", "매출액", "당기순이익"]) + + cfParsed = toDictBySnakeId(cfResult) + isParsed = toDictBySnakeId(isResult) + if cfParsed is None or isParsed is None: + return None + + cfData, cfPeriods = cfParsed + isData, _ = isParsed + + capexRow = cfData.get("purchase_of_property_plant_and_equipment", {}) + intCapexRow = cfData.get("purchase_of_intangible_assets", {}) + divRow = cfData.get("dividends_paid", {}) + opRow = isData.get("operating_profit", {}) + revRow = isData.get("sales", {}) + niRow = isData.get("net_profit", {}) + + yCols = annualColsFromPeriods(cfPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + def _getF3(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for col in yCols: + capex = abs(_getF3(capexRow, col)) + abs(_getF3(intCapexRow, col)) + opIncome = _getF3(opRow, col) + rev = _getF3(revRow, col) + ni = _getF3(niRow, col) + divPaid = abs(_getF3(divRow, col)) + + # 유보율 = 1 - 배당성향 + retentionRate = None + if ni > 0: + payoutRatio = divPaid / ni + retentionRate = (1 - payoutRatio) * 100 + + history.append( + { + "period": col, + "capex": capex, + "operatingIncome": opIncome, + "revenue": rev, + "capexToRevenue": _pct(capex, rev), + "retentionRate": retentionRate, + } + ) + + return {"history": history} if history else None + + +# ─�� FCF 사용처 분해 ── + + +@memoized_calc +def calcFcfUsage(company, *, basePeriod: str | None = None) -> dict | None: + """FCF 사용처 분해 시계열 — 배당/부채상환/잔여. + + 반환:: + + { + "history": [ + { + "period": str, + "fcf": float, + "dividendsPaid": float, + "debtRepaid": float, + "residual": float, + }, + ... + ], + } + """ + cfResult = company.select( + "CF", + [ + "operating_cashflow", + "purchase_of_property_plant_and_equipment", + "purchase_of_intangible_assets", + "dividends_paid", + "repayment_of_longterm_borrowings", + "redemption_of_current_portion_of_longterm_borrowings", + "repayment_of_bonds_and_longterm_borrowings", + "repayment_of_borrowings", # Fallback: 단/장기 분리 안 된 통합 차입금 상환 (audit 04 #B 같은 패턴) + ], + ) + cfParsed = toDictBySnakeId(cfResult) + if cfParsed is None: + return None + + cfData, cfPeriods = cfParsed + ocfRow = cfData.get("operating_cashflow", {}) + capexRow = cfData.get("purchase_of_property_plant_and_equipment", {}) + intCapexRow = cfData.get("purchase_of_intangible_assets", {}) + divRow = cfData.get("dividends_paid", {}) + repayRow1 = cfData.get("repayment_of_longterm_borrowings", {}) + repayRow2 = cfData.get("redemption_of_current_portion_of_longterm_borrowings", {}) + repayRow3 = cfData.get("repayment_of_bonds_and_longterm_borrowings", {}) + repayRow4 = cfData.get("repayment_of_borrowings", {}) # 통합 차입금 상환 + + yCols = annualColsFromPeriods(cfPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + def _getF4(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for col in yCols: + ocf = _getF4(ocfRow, col) + capex = abs(_getF4(capexRow, col)) + abs(_getF4(intCapexRow, col)) + fcf = ocf - capex + divPaid = abs(_getF4(divRow, col)) + # 분리 키 + 통합 키 fallback. 어느 한쪽이 모두 0이면 다른 쪽이 활성됨 + debtRepaidSplit = abs(_getF4(repayRow1, col)) + abs(_getF4(repayRow2, col)) + abs(_getF4(repayRow3, col)) + debtRepaidUnified = abs(_getF4(repayRow4, col)) + debtRepaid = debtRepaidSplit if debtRepaidSplit > 0 else debtRepaidUnified + residual = fcf - divPaid - debtRepaid + + history.append( + { + "period": col, + "fcf": fcf, + "dividendsPaid": divPaid, + "debtRepaid": debtRepaid, + "residual": residual, + } + ) + + return {"history": history} if history else None + + +# ── 배당 서술 보강 (docs) ── + + +@memoized_calc +def calcDividendDocs(company, *, basePeriod: str | None = None) -> dict | None: + """docs dividend 토픽에서 배당성향, 배당수익률, 주당배당금 추출. + + 반환:: + + { + "dps": float | None, + "payoutRatio": float | None, + "dividendYield": float | None, + "period": str, + } + """ + from dartlab.analysis.financial._helpers import parseNumStr + + result = company.select("dividend", ["주당현금배당금", "현금배당성향", "현금배당수익률"]) + if result is None: + return None + + import polars as pl + + df = result if isinstance(result, pl.DataFrame) else getattr(result, "df", None) + if df is None or "항목" not in df.columns: + return None + + from dartlab.analysis.financial._helpers import periodCols + + pCols = periodCols(df) + if not pCols: + return None + + latestCol = pCols[0] + labelCol = "항목" + items = df[labelCol].to_list() + vals = df[latestCol].to_list() + + dps = None + payoutRatio = None + dividendYield = None + + for it, v in zip(items, vals): + it = str(it) + parsed = parseNumStr(str(v)) + if parsed is None: + continue + if "주당현금배당금" in it and "보통주" in it and dps is None: + dps = parsed + elif "현금배당성향" in it and "당기" in it and payoutRatio is None: + payoutRatio = parsed + elif "현금배당수익률" in it and "보통주" in it and dividendYield is None: + dividendYield = parsed + + if dps is None and payoutRatio is None and dividendYield is None: + return None + + return { + "dps": dps, + "payoutRatio": payoutRatio, + "dividendYield": dividendYield, + "period": latestCol, + } + + +# ── 자사주 현황 (docs/report) ── + + +@memoized_calc +def calcTreasuryStockStatus(company, *, basePeriod: str | None = None) -> dict | None: + """treasuryStock 토픽에서 자사주 취득/처분/소각 현황 추출. + + 반환:: + + { + "rows": [ + {"method": str, "beginShares": float, "acquired": float, + "disposed": float, "retired": float, "endShares": float}, + ... + ], + } + """ + result = company.show("treasuryStock") + if result is None: + return None + + import polars as pl + + if not isinstance(result, pl.DataFrame): + return None + + # report 토픽 — 이미 수치 DataFrame + if "기말수량" not in result.columns and "기말잔량" not in result.columns: + return None + + endCol = "기말수량" if "기말수량" in result.columns else "기말잔량" + beginCol = "기초수량" if "기초수량" in result.columns else None + acqCol = "변동수량(취득)" if "변동수량(취득)" in result.columns else None + dispCol = "변동수량(처분)" if "변동수량(처분)" in result.columns else None + retCol = "변동수량(소각)" if "변동수량(소각)" in result.columns else None + + # 총계 행만 추출 + rows = [] + for row in result.iter_rows(named=True): + method = str(row.get("취득방법(대)", row.get("취득방법(중)", ""))) + if "총계" not in method: + continue + entry = {"method": method} + if beginCol: + entry["beginShares"] = row.get(beginCol) + if acqCol: + entry["acquired"] = row.get(acqCol) + if dispCol: + entry["disposed"] = row.get(dispCol) + if retCol: + entry["retired"] = row.get(retCol) + entry["endShares"] = row.get(endCol) + rows.append(entry) + + return {"rows": rows} if rows else None + + +# ── 플래그 ── + + +@memoized_calc +def calcCapitalAllocationFlags(company, *, basePeriod: str | None = None) -> list[str]: + """자본배분 경고 신호. + + Returns + ------- + list[str] + 경고 메시지 문자열 리스트 (배당 초과, FCF 초과 환원, 극소 투자 등). + """ + flags = [] + + dividend = calcDividendPolicy(company, basePeriod=basePeriod) + if dividend and dividend["history"]: + h0 = dividend["history"][0] + pr = h0.get("payoutRatio") + if pr is not None and pr > 100: + flags.append(f"배당���향 {pr:.0f}% — 이익 초과 배당") + + # 배당 3년 연속 감소 + hist = dividend["history"] + if len(hist) >= 3: + divs = [h["dividendsPaid"] for h in hist[:3]] + if divs[0] < divs[1] < divs[2] and divs[2] > 0: + flags.append("배당금 3년 연속 감소") + + shareholder = calcShareholderReturn(company, basePeriod=basePeriod) + if shareholder and shareholder["history"]: + h0 = shareholder["history"][0] + rtf = h0.get("returnToFcf") + if rtf is not None and rtf > 100: + flags.append(f"주주환원/FCF {rtf:.0f}% — FCF 초과 환원") + + reinvest = calcReinvestment(company, basePeriod=basePeriod) + if reinvest and reinvest["history"]: + h0 = reinvest["history"][0] + cr = h0.get("capexToRevenue") + if cr is not None and cr < 1: + flags.append(f"CAPEX/매출 {cr:.1f}% — 극소 투자") + + return flags diff --git a/src/dartlab/analysis/financial/cashflow.py b/src/dartlab/analysis/financial/cashflow.py new file mode 100644 index 0000000000000000000000000000000000000000..b87eed5894d3fba60a0f858fe16744093f807e37 --- /dev/null +++ b/src/dartlab/analysis/financial/cashflow.py @@ -0,0 +1,349 @@ +"""1-4 현금흐름 — 계산만 담당. + +CF 3구간(영업/투자/재무) + FCF + 이익의 현금 뒷받침 + CF 패턴. +블록 조립은 review/builders.py가 한다. +""" + +from __future__ import annotations + +from dartlab.analysis.financial._helpers import annualColsFromPeriods, toDictBySnakeId +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_YEARS = 8 + + +# ── 유틸 ── + + +def _get(row: dict, col: str) -> float: + """dict에서 안전하게 값 꺼내기 (None -> 0).""" + v = row.get(col) if row else None + return v if v is not None else 0 + + +# ── CF 패턴 분류 ── + + +def _classifyCfPattern(ocf: float, icf: float, fcf: float) -> str | None: + """영업/투자/재무 CF 부호 조합으로 패턴 분류.""" + + def _s(v: float) -> str: + if v > 0: + return "+" + if v < 0: + return "-" + return "0" + + patterns = { + ("+", "-", "-"): "성숙형 — 영업으로 벌어 투자하고 부채 상환", + ("+", "-", "+"): "확장형 — 영업 + 외부 조달로 적극 투자", + ("+", "+", "-"): "구조조정형 — 자산 매각하며 부채 상환", + ("-", "-", "+"): "위기형 — 영업 적자를 외부 차입으로 메움", + ("-", "+", "+"): "축소형 — 자산 매각 + 차입으로 영업 적자 보전", + ("-", "+", "-"): "전환형 — 자산 매각으로 부채 상환, 영업 회복 필요", + # 재무CF가 0(미보고)인 경우 — 영업/투자만으로 부분 분류 + ("+", "-", "0"): "성숙형 — 영업으로 벌어 투자 (재무CF 미보고)", + ("-", "-", "0"): "위기형 — 영업+투자 모두 유출 (재무CF 미보고)", + ("+", "+", "0"): "구조조정형 — 자산 매각 진행 (재무CF 미보고)", + ("-", "+", "0"): "축소형 — 자산 매각으로 영업 적자 보전 (재무CF 미보고)", + } + return patterns.get((_s(ocf), _s(icf), _s(fcf))) + + +# ── 메인: CF 3구간 + FCF ── + + +@memoized_calc +def calcCashFlowOverview(company, *, basePeriod: str | None = None) -> dict | None: + """영업CF/투자CF/재무CF + FCF 시계열. + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + ocf : float — 영업활동현금흐름 (원) + icf : float — 투자활동현금흐름 (원) + fcfFinancing : float — 재무활동현금흐름 (원) + capex : float — 설비투자 (원) + fcf : float — 잉여현금흐름 (원) + pattern : str — CF 패턴 분류 ("성숙형"|"확장형"|"위기형" 등) + """ + # snakeId 단일 패턴 (alias 양방향 자동 매핑) + cfAccounts = [ + "영업활동현금흐름", + "투자활동현금흐름", + "재무활동으로인한현금흐름", + "유형자산의취득", + "무형자산의취득", + ] + result = company.select("CF", cfAccounts) + parsed = toDictBySnakeId(result) + if parsed is None: + return None + + data, allPeriods = parsed + ocfRow = data.get("operating_cashflow", {}) + if not ocfRow: + return None + icfRow = data.get("investing_cashflow", {}) + finRow = data.get("cash_flows_from_financing_activities", {}) + capexRow = data.get("purchase_of_property_plant_and_equipment", {}) + intCapexRow = data.get("purchase_of_intangible_assets", {}) + # Note: SK하이닉스 2025Q4 같이 raw 데이터에 결손이면 None — calc 결과도 None. + # `c.show("CF")` 의 derived row (`financing_cashflow`) 는 별도 데이터 소스로, + # mergeAliasRows 가 양방향 머지 처리 (core/finance/labels.py). + + yCols = annualColsFromPeriods(allPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + def _getF(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for col in yCols: + ocf = _getF(ocfRow, col) + icf = _getF(icfRow, col) + fin = _getF(finRow, col) + # CAPEX: CF에서 음수로 나옴 -> abs + capex = abs(_getF(capexRow, col)) + abs(_getF(intCapexRow, col)) + fcf = ocf - capex + + entry = { + "period": col, + "ocf": ocf, + "icf": icf, + "fcfFinancing": fin, + "capex": capex, + "fcf": fcf, + "pattern": _classifyCfPattern(ocf, icf, fin), + } + history.append(entry) + + if not history: + return None + return {"history": history} + + +# ── 이익의 현금 뒷받침 ── + + +@memoized_calc +def calcCashQuality(company, *, basePeriod: str | None = None) -> dict | None: + """영업CF/순이익, 영업CF/매출 — 이익이 현금으로 뒷받침되는가. + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + ocf : float — 영업활동현금흐름 (원) + netIncome : float — 당기순이익 (원) + revenue : float — 매출액 (원) + ocfToNi : float — 영업CF/순이익 (%) + ocfMargin : float — 영업CF/매출 (%) + """ + cfResult = company.select("CF", ["영업활동현금흐름"]) + isResult = company.select("IS", ["당기순이익", "매출액"]) + + cfParsed = toDictBySnakeId(cfResult) + isParsed = toDictBySnakeId(isResult) + if cfParsed is None or isParsed is None: + return None + + cfData, cfPeriods = cfParsed + isData, _ = isParsed + + ocfRow = cfData.get("영업활동현금흐름", {}) + niRow = isData.get("당기순이익", {}) + revRow = isData.get("매출액", {}) + + yCols = annualColsFromPeriods(cfPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + def _getF2(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for col in yCols: + ocf = _getF2(ocfRow, col) + ni = _getF2(niRow, col) + rev = _getF2(revRow, col) + + ocfToNi = ocf / ni * 100 if ni != 0 else None + # 극단값 클램핑: ±1000% 초과는 "의미 없는 비율" → None + if ocfToNi is not None and abs(ocfToNi) > 1000: + ocfToNi = None + ocfMargin = ocf / rev * 100 if rev > 0 else None + + history.append( + { + "period": col, + "ocf": ocf, + "netIncome": ni, + "revenue": rev, + "ocfToNi": ocfToNi, + "ocfMargin": ocfMargin, + } + ) + + if not history: + return None + return {"history": history} + + +# ── CF 플래그 ── + + +@memoized_calc +def calcCashFlowFlags(company, *, basePeriod: str | None = None) -> list[str]: + """현금흐름 경고 신호. + + Returns + ------- + list[str] + 경고 플래그 문자열 목록. + """ + flags = [] + + overview = calcCashFlowOverview(company, basePeriod=basePeriod) + if overview and overview["history"]: + h0 = overview["history"][0] + + # 영업CF 적자 + if h0["ocf"] < 0: + flags.append("영업CF 적자 — 본업에서 현금이 나오지 않음") + + # FCF 적자 + if h0["fcf"] < 0 and h0["ocf"] > 0: + flags.append("FCF 적자 — 영업CF보다 투자가 큼") + + # 위기형/축소형 패턴 + pat = h0.get("pattern", "") + if pat and ("위기형" in pat or "축소형" in pat): + flags.append(f"CF 패턴: {pat}") + + # 영업CF 3년 연속 감소 + hist = overview["history"] + if len(hist) >= 3: + ocfs = [h["ocf"] for h in hist[:3]] + if ocfs[0] < ocfs[1] < ocfs[2]: + flags.append("영업CF 3년 연속 감소") + + quality = calcCashQuality(company, basePeriod=basePeriod) + if quality and quality["history"]: + q0 = quality["history"][0] + + # 영업CF/순이익 < 40% (이익 대비 현금 부족) + ratio = q0.get("ocfToNi") + if ratio is not None and 0 < ratio < 40: + flags.append(f"영업CF/순이익 {ratio:.0f}% — 이익의 현금 뒷받침 부족") + + # 영업CF 마진 < 0 + margin = q0.get("ocfMargin") + if margin is not None and margin < 0: + flags.append(f"영업CF 마진 {margin:.1f}% — 매출 대비 현금 유출") + + return flags + + +# ── 영업CF 내부 분해 (BS 변동 기반) ── + + +@memoized_calc +def calcOcfDecomposition(company, *, basePeriod: str | None = None) -> dict | None: + """영업CF를 구성요소로 분해 — 현금흐름의 원천을 파악. + + 대부분 기업이 CF에 개별 조정항목을 안 쓰므로 BS 변동으로 간접 추정. + + OCF ≈ 순이익 + 비현금비용(감가상각 추정) + 운전자본 변동 + 운전자본 변동 = -(delta_AR) - (delta_Inv) + (delta_AP) + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + ni : float — 당기순이익 (원) + ocf : float — 영업활동현금흐름 (원) + depEstimate : float — 감가상각 추정치 (원) + wcEffect : float — 운전자본 변동 효과 (원) + arChange : float — 매출채권 변동 (원) + invChange : float — 재고자산 변동 (원) + apChange : float — 매입채무 변동 (원) + residual : float — 잔차 (원) + """ + isResult = company.select("IS", ["당기순이익"]) + cfResult = company.select("CF", ["영업활동현금흐름"]) + bsResult = company.select( + "BS", + ["매출채권및기타채권", "재고자산", "매입채무", "유형자산"], + ) + + isParsed = toDictBySnakeId(isResult) + cfParsed = toDictBySnakeId(cfResult) + bsParsed = toDictBySnakeId(bsResult) + if isParsed is None or cfParsed is None or bsParsed is None: + return None + + isData, _ = isParsed + cfData, cfPeriods = cfParsed + bsData, _ = bsParsed + + niRow = isData.get("당기순이익", {}) + ocfRow = cfData.get("영업활동현금흐름", {}) + arRow = bsData.get("매출채권및기타채권", {}) + invRow = bsData.get("재고자산", {}) + apRow = bsData.get("매입채무", {}) + ppeRow = bsData.get("유형자산", {}) + + from dartlab.analysis.financial._helpers import annualColsFromPeriods + + yCols = annualColsFromPeriods(cfPeriods, basePeriod, 9) + if len(yCols) < 2: + return None + + def _getF3(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for i in range(len(yCols) - 1): + col = yCols[i] + prevCol = yCols[i + 1] + + ni = _getF3(niRow, col) + ocf = _getF3(ocfRow, col) + ppe = _get(ppeRow, col) + + # 감가상각 추정 (유형자산/10) + depEst = ppe / 10 if ppe > 0 else 0 + + # 운전자본 변동 (BS delta) + arChange = _get(arRow, col) - _get(arRow, prevCol) # 증가=현금유출 + invChange = _get(invRow, col) - _get(invRow, prevCol) + apChange = _get(apRow, col) - _get(apRow, prevCol) # 증가=현금유입 + wcEffect = -arChange - invChange + apChange + + # 잔차 (설명 안 되는 부분: 영업외, 세금, 기타 조정) + residual = ocf - ni - depEst - wcEffect + + history.append( + { + "period": col, + "ni": ni, + "ocf": ocf, + "depEstimate": round(depEst), + "wcEffect": round(wcEffect), + "arChange": round(arChange), + "invChange": round(invChange), + "apChange": round(apChange), + "residual": round(residual), + } + ) + + return {"history": history} if history else None diff --git a/src/dartlab/analysis/financial/costStructure.py b/src/dartlab/analysis/financial/costStructure.py new file mode 100644 index 0000000000000000000000000000000000000000..a15204f6b384321060b81963d455c1e2164f30bd --- /dev/null +++ b/src/dartlab/analysis/financial/costStructure.py @@ -0,0 +1,525 @@ +"""비용 구조 분석 — 원가/판관비 비중, 영업레버리지, 손익분기점 시계열. + +비용이 어떻게 움직이는지, 매출 변동에 이익이 얼마나 민감한지를 시계열로 추적한다. +""" + +from __future__ import annotations + +from typing import Any + +from dartlab.analysis.financial._helpers import annualColsFromPeriods, sumCostOfSales, sumSGA, toDictBySnakeId +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_YEARS = 8 + + +# ── 유틸 ── + + +def _get(row: dict, col: str) -> float: + v = row.get(col) if row else None + return v if v is not None else 0 + + +from dartlab.core.finance.calc import safePct as _pct # noqa: E402 + + +# ── 비용 비중 분해 ── + + +@memoized_calc +def calcCostBreakdown(company, *, basePeriod: str | None = None) -> dict | None: + """매출원가율, 판관비율, 영업비용률 시계열. + + Returns + ------- + dict + history : list[dict] — 기간별 비용 비중 시계열 + period : str — 회계연도 + revenue : float — 매출액 (원) + costOfSales : float — 매출원가 (원) + sga : float — 판매비와관리비 (원) + costOfSalesRatio : float | None — 매출원가율 (%) + sgaRatio : float | None — 판관비율 (%) + operatingCostRatio : float | None — 영업비용률 (%) + notesDetail : dict | None — 비용 성격별 분류 주석 (있는 경우) + """ + # snakeId 단일 + sumCostOfSales / sumSGA 분리 키 fallback + accounts = ["매출액", "매출원가", "판매비와관리비"] + isResult = company.select("IS", accounts) + isParsed = toDictBySnakeId(isResult) + if isParsed is None: + return None + + isData, isPeriods = isParsed + revRow = isData.get("sales", {}) + + yCols = annualColsFromPeriods(isPeriods, basePeriod, _MAX_YEARS) + if not yCols: + return None + + history = [] + for col in yCols: + rev = revRow.get(col) or 0 + cogs = sumCostOfSales(isData, col) # 분리/통합 키 fallback + sga = sumSGA(isData, col) # 판매비/관리비 분리 키 fallback + + history.append( + { + "period": col, + "revenue": rev, + "costOfSales": cogs, + "sga": sga, + "costOfSalesRatio": _pct(cogs, rev), + "sgaRatio": _pct(sga, rev), + "operatingCostRatio": _pct(cogs + sga, rev), + } + ) + + if not history: + return None + + # notes enrichment — 비용의 성격별 분류 (있으면) + from dartlab.analysis.financial._helpers import fetchNotesDetail + + result: dict[str, Any] = {"history": history} + notesDetail = fetchNotesDetail(company, ["costByNature"]) + if notesDetail: + result["notesDetail"] = notesDetail + + return result + + +# ── 영업레버리지 ── + + +@memoized_calc +def calcOperatingLeverage(company, *, basePeriod: str | None = None) -> dict | None: + """영업레버리지(DOL) 시계열 — 매출 변동 대비 영업이익 민감도. + + Returns + ------- + dict + history : list[dict] — 기간별 영업레버리지 시계열 + period : str — 회계연도 + revenue : float — 매출액 (원) + operatingIncome : float — 영업이익 (원) + grossProfit : float — 매출총이익 (원) + dol : float | None — 영업레버리지 (배) + contributionProxy : float | None — 매출총이익/영업이익 (배) + """ + accounts = ["매출액", "영업이익", "매출총이익"] + isResult = company.select("IS", accounts) + isParsed = toDictBySnakeId(isResult) + if isParsed is None: + return None + + isData, isPeriods = isParsed + revRow = isData.get("매출액", {}) + opRow = isData.get("영업이익", {}) + gpRow = isData.get("매출총이익", {}) + + yCols = annualColsFromPeriods(isPeriods, basePeriod, _MAX_YEARS) + if not yCols: + return None + + def _getF2(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for i, col in enumerate(yCols): + rev = _getF2(revRow, col) + opIncome = _getF2(opRow, col) + gp = _getF2(gpRow, col) + + # DOL = 영업이익 변화율 / 매출 변화율 (전년 대비) + # 양쪽 다 양수일 때만 의미 있음 (부호 전환 시 DOL 해석 불가) + dol = None + if i + 1 < len(yCols): + prevCol = yCols[i + 1] + prevRev = _getF2(revRow, prevCol) + prevOp = _getF2(opRow, prevCol) + if prevRev > 0 and prevOp > 0 and opIncome > 0: + revChange = (rev - prevRev) / prevRev + opChange = (opIncome - prevOp) / prevOp + if abs(revChange) > 0.001: + rawDol = opChange / revChange + # DOL > 20이면 해석 무의미 (극단적 레버리지), cap 처리 + dol = max(-20, min(20, rawDol)) + + # contribution proxy = 매출총이익 / 영업이익 (고정비 구조 프록시) + contributionProxy = None + if opIncome > 0 and gp > 0: + contributionProxy = gp / opIncome + + history.append( + { + "period": col, + "revenue": rev, + "operatingIncome": opIncome, + "grossProfit": gp, + "dol": dol, + "contributionProxy": contributionProxy, + } + ) + + return {"history": history} if history else None + + +# ── 손익분기점 추정 ── + + +@memoized_calc +def calcBreakevenEstimate(company, *, basePeriod: str | None = None) -> dict | None: + """BEP 추정 — 고정비/(1-변동비율) 기반 손익분기 매출. + + Returns + ------- + dict + history : list[dict] — 기간별 손익분기점 추정 시계열 + period : str — 회계연도 + revenue : float — 매출액 (원) + fixedCostEstimate : float — 고정비 추정치 (원) + variableCostRatio : float | None — 변동비율 (%) + bepRevenue : float | None — 손익분기 매출액 (원) + marginOfSafety : float | None — 안전마진 (%) + """ + accounts = ["매출액", "매출원가", "판매비와관리비"] + isResult = company.select("IS", accounts) + isParsed = toDictBySnakeId(isResult) + if isParsed is None: + return None + + isData, isPeriods = isParsed + revRow = isData.get("매출액", {}) + cogsRow = isData.get("매출원가", {}) + sgaRow = isData.get("판매비와관리비", {}) + + yCols = annualColsFromPeriods(isPeriods, basePeriod, _MAX_YEARS) + if not yCols: + return None + + def _getF3(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for col in yCols: + rev = _getF3(revRow, col) + cogs = _getF3(cogsRow, col) + sga = _getF3(sgaRow, col) + + # 단순화: 변동비 = 매출원가, 고정비 = 판관비 + variableCostRatio = cogs / rev if rev > 0 else None + fixedCost = sga + bepRevenue = None + marginOfSafety = None + + # 변동비율 95% 이상이면 한계이익률이 너무 작아 BEP 무의미 + if variableCostRatio is not None and 0 < variableCostRatio < 0.95: + bepRevenue = fixedCost / (1 - variableCostRatio) + if rev > 0: + marginOfSafety = (rev - bepRevenue) / rev * 100 + + history.append( + { + "period": col, + "revenue": rev, + "fixedCostEstimate": fixedCost, + "variableCostRatio": variableCostRatio, + "bepRevenue": bepRevenue, + "marginOfSafety": marginOfSafety, + } + ) + + return {"history": history} if history else None + + +# ── 비용의 성격별 분류 분석 ── + + +@memoized_calc +def calcCostByNatureAnalysis(company, *, basePeriod: str | None = None) -> dict | None: + """비용의 성격별 분류(notes) — 인건비/원재료/감가상각 비중 추세. + + K-IFRS 주석에서 비용의 성격별 분류를 추출하여, + 원재료비·인건비·감가상각비 등 성격별 비중의 시계열 변화를 추적한다. + 173개사 이상 데이터 보유 (금융/REIT/지주회사 미공시). + + Returns + ------- + dict | None + None이면 비용 성격별 분류 데이터 없음. + categories : list[dict] — 비용 카테고리별 시계열 + name : str — 카테고리명 (원재료/인건비/감가상각 등) + history : list[dict] — 기간별 금액·비중 + period : str — 회계연도 + amount : float — 금액 (원) + ratio : float — 총비용 대비 비중 (%) + latestRatio : float — 최신 기간 비중 (%) + direction : str | None — 비중 추세 (비중 증가/비중 감소/안정) + periods : list[str] — 대상 회계연도 목록 + insight : str | None — 주요 변화 요약 문장 + """ + from dartlab.analysis.financial._helpers import fetchNotesDetail, parseNumStr + + notesData = fetchNotesDetail(company, ["costByNature"]) + rawRows = notesData.get("costByNature") + if not rawRows: + return None + + # costByNature: [{항목, 2024, 2023, ...}] (항목×연도 테이블) + # 기간 컬럼 추출 + sampleRow = rawRows[0] + periodCols = sorted( + [k for k in sampleRow if k not in ("항목",) and str(k).replace("-", "").isdigit()], reverse=True + ) + if not periodCols: + return None + + periodCols = periodCols[:_MAX_YEARS] + + # 총비용 행 찾기 (합계/총계) + totalRow = None + detailRows = [] + for row in rawRows: + item = str(row.get("항목", "")).strip() + if any(kw in item for kw in ("합계", "총계", "계")): + if totalRow is None: + totalRow = row + else: + detailRows.append(row) + + if not detailRows: + return None + + # 성격별 분류: 주요 비용 카테고리 매핑 + _CATEGORY_KEYWORDS = { + "원재료": ["원재료", "재료비", "원자재"], + "상품매입": ["상품", "상품매입"], + "인건비": ["종업원급여", "급여", "인건비", "퇴직급여", "복리후생"], + "감가상각": ["감가상각", "상각비", "무형자산상각"], + "외주비": ["외주", "용역"], + "기타": [], + } + + categories: dict[str, dict[str, float]] = {} # {catName: {period: amount}} + for row in detailRows: + item = str(row.get("항목", "")).strip() + if not item: + continue + + # 카테고리 매칭 + matched = "기타" + for catName, keywords in _CATEGORY_KEYWORDS.items(): + if any(kw in item for kw in keywords): + matched = catName + break + + if matched not in categories: + categories[matched] = {} + for col in periodCols: + v = parseNumStr(row.get(col)) + if v is not None: + categories[matched][col] = categories[matched].get(col, 0) + v + + if not categories: + return None + + # 총비용 계산 (totalRow 없으면 합산) + totals: dict[str, float] = {} + if totalRow: + for col in periodCols: + v = parseNumStr(totalRow.get(col)) + if v is not None and v > 0: + totals[col] = v + if not totals: + for col in periodCols: + s = sum(cats.get(col, 0) for cats in categories.values()) + if s > 0: + totals[col] = s + + # 카테고리별 결과 생성 + result_categories = [] + for catName, vals in categories.items(): + if not vals: + continue + history = [] + for col in periodCols: + amt = vals.get(col, 0) + total = totals.get(col, 0) + ratio = round(amt / total * 100, 1) if total > 0 else 0 + history.append({"period": col, "amount": amt, "ratio": ratio}) + + latestRatio = history[0]["ratio"] if history else 0 + direction = None + ratios = [h["ratio"] for h in history if h["ratio"] > 0] + if len(ratios) >= 2: + diff = ratios[0] - ratios[-1] + if diff > 3: + direction = "비중 증가" + elif diff < -3: + direction = "비중 감소" + else: + direction = "안정" + + result_categories.append( + { + "name": catName, + "history": history, + "latestRatio": latestRatio, + "direction": direction, + } + ) + + # 비중 기준 정렬 (기타 제외하고 큰 순) + result_categories.sort(key=lambda x: (x["name"] == "기타", -x["latestRatio"])) + + # 인사이트 생성 + insight = None + laborCat = next((c for c in result_categories if c["name"] == "인건비"), None) + materialCat = next((c for c in result_categories if c["name"] == "원재료"), None) + if laborCat and laborCat["direction"] == "비중 증가": + insight = f"인건비 비중 {laborCat['latestRatio']:.0f}%로 증가 추세 — 노동집약도 심화" + elif materialCat and materialCat["direction"] == "비중 증가": + insight = f"원재료비 비중 {materialCat['latestRatio']:.0f}%로 증가 — 원가 부담 확대" + + return { + "categories": result_categories, + "periods": periodCols, + "insight": insight, + } + + +# ── 원재료 비중 (docs 보강) ── + + +@memoized_calc +def calcRawMaterialBreakdown(company, *, basePeriod: str | None = None) -> dict | None: + """주요 원재료 품목별 매입액 비중 — rawMaterial docs 토픽 기반. + + 부문/품목별 매입액 금액 행만 추출 (비중% 행 제외). + 계층적 테이블의 경우 부문별 첫 품목 금액이 대표값으로 나타남. + + Returns + ------- + dict | None + segments : list[dict] — 품목별 매입액 (최대 8개, 금액 내림차순) + name : str — 원재료 품목명 + amount : float — 매입액 (원) + pct : float — 총매입액 대비 비중 (%) + totalAmount : float — 총매입액 (원) + period : str — 기준 회계연도 + """ + from dartlab.analysis.financial._helpers import parseNumStr + + result = company.select("rawMaterial", ["매입액"]) + if result is None: + return None + + import polars as pl + + df = result if isinstance(result, pl.DataFrame) else getattr(result, "df", None) + if df is None or "항목" not in df.columns: + return None + + from dartlab.analysis.financial._helpers import periodCols + + pCols = periodCols(df) + if not pCols: + return None + + # 최신 연도 컬럼 사용 (basePeriod 이하, Q 없는 연도 우선) + annuals = annualColsFromPeriods(pCols, basePeriod, 1) + latestCol = annuals[0] if annuals else pCols[0] + + labelCol = "항목" + items = df[labelCol].to_list() + vals = df[latestCol].to_list() + + # 총계 행 찾기 + totalAmount = None + for it, v in zip(items, vals): + if any(k in str(it) for k in ["총계", "합계"]): + totalAmount = parseNumStr(str(v)) + break + + if totalAmount is None or totalAmount <= 0: + return None + + # 금액 행만 추출 (소계/총계 제외, % 비중 행 제외) + segments = [] + for it, v in zip(items, vals): + it = str(it) + vStr = str(v).strip() + if any(k in it for k in ["총계", "합계", "소계"]): + continue + if "%" in vStr: + continue + parsed = parseNumStr(vStr) + if parsed is None or parsed <= 0: + continue + name = it.replace("_매입액", "").strip() + if not name: + continue + pct = parsed / totalAmount * 100 + if pct < 1: + continue + segments.append({"name": name, "amount": parsed, "pct": round(pct, 1)}) + + if not segments: + return None + + segments.sort(key=lambda x: x["amount"], reverse=True) + return { + "segments": segments[:8], + "totalAmount": totalAmount, + "period": latestCol, + } + + +# ── 플래그 ── + + +@memoized_calc +def calcCostStructureFlags(company, *, basePeriod: str | None = None) -> list[str]: + """비용 구조 경고 신호. + + Returns + ------- + list[str] + 경고 메시지 목록 (매출원가율 연속 상승, 고DOL, 안전마진 부족 등). + """ + flags = [] + + breakdown = calcCostBreakdown(company, basePeriod=basePeriod) + if breakdown and len(breakdown["history"]) >= 3: + hist = breakdown["history"] + # 매출원가율 3년 연속 상승 + cogsRatios = [h.get("costOfSalesRatio") for h in hist[:3]] + if all(r is not None for r in cogsRatios): + if cogsRatios[0] > cogsRatios[1] > cogsRatios[2]: + flags.append(f"매출원가율 3년 연속 상승 ({cogsRatios[2]:.1f}% -> {cogsRatios[0]:.1f}%)") + + # 판관비율 3년 연속 상승 + sgaRatios = [h.get("sgaRatio") for h in hist[:3]] + if all(r is not None for r in sgaRatios): + if sgaRatios[0] > sgaRatios[1] > sgaRatios[2]: + flags.append(f"판관비율 3년 연속 상승 ({sgaRatios[2]:.1f}% -> {sgaRatios[0]:.1f}%)") + + leverage = calcOperatingLeverage(company, basePeriod=basePeriod) + if leverage and leverage["history"]: + h0 = leverage["history"][0] + dol = h0.get("dol") + if dol is not None and dol > 3: + flags.append(f"영업레버리지(DOL) {dol:.1f} — 매출 변동에 이익 민감") + + bep = calcBreakevenEstimate(company, basePeriod=basePeriod) + if bep and bep["history"]: + h0 = bep["history"][0] + mos = h0.get("marginOfSafety") + if mos is not None and mos < 10: + flags.append(f"안전마진 {mos:.1f}% — 손익분기점 근접") + + return flags diff --git a/src/dartlab/analysis/financial/creditRating.py b/src/dartlab/analysis/financial/creditRating.py new file mode 100644 index 0000000000000000000000000000000000000000..dd976be0591393d801714e59a1e36be65940a0e2 --- /dev/null +++ b/src/dartlab/analysis/financial/creditRating.py @@ -0,0 +1,30 @@ +"""3-6 신용평가 — dartlab.credit.calcs로 이동됨. + +cross-dependency 방지: analysis ↛ credit. +이 모듈의 calc 함수들은 dartlab.credit.calcs로 이동했다. +review/registry.py는 dartlab.credit.calcs에서 직접 import한다. + +마이그레이션:: + + # Before + from dartlab.analysis.financial.creditRating import calcCreditScore + + # After + from dartlab.credit.calcs import calcCreditScore +""" + +from __future__ import annotations + + +def __getattr__(name: str): + _MOVED = { + "calcCreditMetrics", + "calcCreditScore", + "calcCreditHistory", + "calcCashFlowGrade", + "calcCreditPeerPosition", + "calcCreditFlags", + } + if name in _MOVED: + raise ImportError(f"{name} has moved to dartlab.credit.calcs. Use: from dartlab.credit.calcs import {name}") + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/src/dartlab/analysis/financial/crossStatement.py b/src/dartlab/analysis/financial/crossStatement.py new file mode 100644 index 0000000000000000000000000000000000000000..7dc8567e5e8580515e9f9922325abc8f04d54a70 --- /dev/null +++ b/src/dartlab/analysis/financial/crossStatement.py @@ -0,0 +1,457 @@ +"""재무제표 간 교차검증 — IS-CF 괴리, IS-BS 괴리, 종합 이상 점수. + +3개 재무제표가 서로 맞는지, 비정상 패턴이 있는지를 시계열로 추적한다. +""" + +from __future__ import annotations + +from dartlab.analysis.financial._helpers import annualColsFromPeriods, toDictBySnakeId +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_YEARS = 8 + + +# ── 유틸 ── + + +def _get(row: dict, col: str) -> float: + v = row.get(col) if row else None + return v if v is not None else 0 + + +def _getFirst(data: dict, keys: list[str], col: str) -> float: + for k in keys: + row = data.get(k, {}) + v = row.get(col) if row else None + if v is not None and v != 0: + return v + return 0 + + +# ── IS-CF 괴리 ── + + +@memoized_calc +def calcIsCfDivergence(company, *, basePeriod: str | None = None) -> dict | None: + """IS-CF 괴리 시계열 — 순이익 vs 영업CF. + + 반환:: + + { + "history": [ + { + "period": str, + "netIncome": float, + "ocf": float, + "divergence": float | None, + "direction": str | None, + "nonRecurringDistortion": bool, + }, + ... + ], + } + """ + isResult = company.select("IS", ["당기순이익", "영업이익"]) + cfResult = company.select("CF", ["영업활동현금흐름"]) + + isParsed = toDictBySnakeId(isResult) + cfParsed = toDictBySnakeId(cfResult) + if isParsed is None or cfParsed is None: + return None + + isData, _ = isParsed + cfData, cfPeriods = cfParsed + + niRow = isData.get("당기순이익", {}) + opRow = isData.get("영업이익", {}) + ocfRow = cfData.get("영업활동현금흐름", {}) + + yCols = annualColsFromPeriods(cfPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + def _getF(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for col in yCols: + ni = _getF(niRow, col) + ocf = _getF(ocfRow, col) + opIncome = _getF(opRow, col) + + divergence = None + direction = None + if ni != 0: + divergence = (ni - ocf) / abs(ni) * 100 + if ni > ocf: + direction = "이익과대" + elif ni < ocf: + direction = "보수적" + else: + direction = "일치" + + # 일회성 왜곡 판정: 영업이익 대비 순이익이 극단적으로 작으면 + # 영업외 일회성 항목(중단사업손실, 대규모 손상 등)이 순이익을 지배 + nonRecurring = False + if opIncome != 0 and abs(ni) < abs(opIncome) * 0.3: + nonRecurring = True + + history.append( + { + "period": col, + "netIncome": ni, + "ocf": ocf, + "divergence": divergence, + "direction": direction, + "nonRecurringDistortion": nonRecurring, + } + ) + + return {"history": history} if history else None + + +# ── IS-BS 괴리 ── + + +@memoized_calc +def calcIsBsDivergence(company, *, basePeriod: str | None = None) -> dict | None: + """IS-BS 괴리 시계열 — 매출 성장 vs 매출채권/재고 성장. + + 반환:: + + { + "history": [ + { + "period": str, + "revenueGrowth": float | None, + "receivableGrowth": float | None, + "inventoryGrowth": float | None, + "revRecGap": float | None, + "revInvGap": float | None, + }, + ... + ], + } + """ + isResult = company.select("IS", ["매출액"]) + bsResult = company.select("BS", ["매출채권및기타채권", "재고자산"]) + + isParsed = toDictBySnakeId(isResult) + bsParsed = toDictBySnakeId(bsResult) + if isParsed is None or bsParsed is None: + return None + + isData, isPeriods = isParsed + bsData, _ = bsParsed + + revRow = isData.get("매출액", {}) + invRow = bsData.get("재고자산", {}) + + _REC_KEYS = ["매출채권및기타채권"] + + yCols = annualColsFromPeriods(isPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + def _getF2(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for i, col in enumerate(yCols): + rev = _getF2(revRow, col) + rec = _getFirst(bsData, _REC_KEYS, col) + inv = _get(invRow, col) + + revenueGrowth = None + receivableGrowth = None + inventoryGrowth = None + + if i + 1 < len(yCols): + prevCol = yCols[i + 1] + prevRev = _getF2(revRow, prevCol) + prevRec = _getFirst(bsData, _REC_KEYS, prevCol) + prevInv = _get(invRow, prevCol) + + if prevRev > 0: + revenueGrowth = (rev - prevRev) / prevRev * 100 + if prevRec > 0: + receivableGrowth = (rec - prevRec) / prevRec * 100 + if prevInv > 0: + inventoryGrowth = (inv - prevInv) / prevInv * 100 + + revRecGap = None + if revenueGrowth is not None and receivableGrowth is not None: + revRecGap = receivableGrowth - revenueGrowth + + revInvGap = None + if revenueGrowth is not None and inventoryGrowth is not None: + revInvGap = inventoryGrowth - revenueGrowth + + history.append( + { + "period": col, + "revenueGrowth": revenueGrowth, + "receivableGrowth": receivableGrowth, + "inventoryGrowth": inventoryGrowth, + "revRecGap": revRecGap, + "revInvGap": revInvGap, + } + ) + + return {"history": history} if history else None + + +# ── 종합 이상 점수 ── + + +@memoized_calc +def calcAnomalyScore(company, *, basePeriod: str | None = None) -> dict | None: + """종합 이상 점수 시계열 — 교차검증 결과 종합. + + 반환:: + + { + "history": [ + { + "period": str, + "score": float, + "components": dict, + }, + ... + ], + } + """ + isCf = calcIsCfDivergence(company, basePeriod=basePeriod) + isBs = calcIsBsDivergence(company, basePeriod=basePeriod) + + if isCf is None: + return None + + # 발생액 정보 (earningsQuality에서 가져오기) + from dartlab.analysis.financial.earningsQuality import calcAccrualAnalysis, calcBeneishTimeline + + accrual = calcAccrualAnalysis(company, basePeriod=basePeriod) + beneish = calcBeneishTimeline(company, basePeriod=basePeriod) + + # 기간별 데��터 매핑 + isCfMap = {h["period"]: h for h in isCf["history"]} if isCf else {} + isBsMap = {h["period"]: h for h in isBs["history"]} if isBs else {} + accrualMap = {h["period"]: h for h in accrual["history"]} if accrual else {} + beneishMap = {h["period"]: h for h in beneish["history"]} if beneish else {} + + periods = list(isCfMap.keys()) + if not periods: + return None + + history = [] + for period in periods: + score = 0 + components = {} + + # 1. IS-CF 괴리 (0~30점) + cf = isCfMap.get(period, {}) + div = cf.get("divergence") + if div is not None: + cfScore = min(30, abs(div) / 100 * 30) + # 일회성 왜곡(중단사업손실 등)이면 점수 절반 감쇄 + if cf.get("nonRecurringDistortion"): + cfScore = cfScore * 0.5 + score += cfScore + components["isCfDivergence"] = cfScore + + # 2. IS-BS 괴리: 매출채권 (0~20점) + bs = isBsMap.get(period, {}) + recGap = bs.get("revRecGap") + if recGap is not None and recGap > 0: + recScore = min(20, recGap / 50 * 20) + score += recScore + components["receivableGap"] = recScore + + # 3. IS-BS 괴리: 재고 (0~20점) + invGap = bs.get("revInvGap") + if invGap is not None and invGap > 0: + invScore = min(20, invGap / 50 * 20) + score += invScore + components["inventoryGap"] = invScore + + # 4. 발생액 (0~15점) + acc = accrualMap.get(period, {}) + sar = acc.get("sloanAccrualRatio") + if sar is not None: + accScore = min(15, abs(sar) / 0.15 * 15) + score += accScore + components["accrualRatio"] = accScore + + # 5. Beneish M-Score (0~15점) + ben = beneishMap.get(period, {}) + ms = ben.get("mScore") + if ms is not None and ms > -2.22: + # -2.22~-1.78 = 회색, -1.78+ = 위험 + mScoreNorm = min(15, max(0, (ms + 2.22) / 0.88 * 15)) + score += mScoreNorm + components["beneishMScore"] = mScoreNorm + + history.append( + { + "period": period, + "score": min(100, score), + "components": components, + } + ) + + return {"history": history} if history else None + + +# ── 플래그 ── + + +@memoized_calc +def calcCrossStatementFlags(company, *, basePeriod: str | None = None) -> list[str]: + """교차검증 경고 신호. + + Returns + ------- + list[str] + 경고 메시지 문자열 리스트 (IS-CF 괴리, 매출채권/재고 이상 증가, 종합 이상점수 등). + """ + flags = [] + + isCf = calcIsCfDivergence(company, basePeriod=basePeriod) + if isCf and isCf["history"]: + h0 = isCf["history"][0] + div = h0.get("divergence") + if div is not None and abs(div) > 50: + suffix = " (일회성 영업외항목 왜곡)" if h0.get("nonRecurringDistortion") else "" + flags.append(f"IS-CF 괴리 {div:.0f}% — 순이익 대비 현금흐름 극심한 차이{suffix}") + + isBs = calcIsBsDivergence(company, basePeriod=basePeriod) + if isBs and isBs["history"]: + h0 = isBs["history"][0] + recGap = h0.get("revRecGap") + if recGap is not None and recGap > 20: + flags.append(f"매출채��� 성장이 매출 성장보다 {recGap:.0f}%p 빠름 — 매출 인식 의심") + invGap = h0.get("revInvGap") + if invGap is not None and invGap > 20: + flags.append(f"재고 성장이 매출 성장보다 {invGap:.0f}%p 빠름 — 재고 적체 또는 부풀리기") + + anomaly = calcAnomalyScore(company, basePeriod=basePeriod) + if anomaly and anomaly["history"]: + h0 = anomaly["history"][0] + if h0["score"] > 70: + flags.append(f"종합 이상점수 {h0['score']:.0f} — 재무제표 신뢰성 주의") + + return flags + + +# ── BS-CF Articulation Check ── + + +@memoized_calc +def calcArticulationCheck(company, *, basePeriod: str | None = None) -> dict | None: + """BS-CF 정합성 검증 — 재무제표 3표가 수학적으로 연결되는지. + + 3가지 정합성: + 1. PPE 정합: delta_PPE ≈ CAPEX - 감가상각 - 처분 + 2. 현금 정합: delta_Cash ≈ OCF + ICF + FCF + 3. 자본 정합: delta_Equity ≈ NI - 배당 + OCI + 신주발행 + + 오차가 크면 연결범위 변동, 환율 효과, 재분류 가능성. + + 반환:: + + { + "history": [ + {"period": str, "ppeError": float, "cashError": float, + "equityError": float, "maxErrorPct": float}, + ... + ], + } + + 학술근거: Articulation of Financial Statements (FASB/IASB). + """ + bsResult = company.select( + "BS", + ["유형자산", "현금및현금성자산", "자본총계"], + ) + cfResult = company.select( + "CF", + ["영업활동현금흐름", "투자활동현금흐름", "재무활동현금흐름", "유형자산의취득", "유형자산의처분"], + ) + isResult = company.select("IS", ["당기순이익"]) + + bsParsed = toDictBySnakeId(bsResult) + cfParsed = toDictBySnakeId(cfResult) + isParsed = toDictBySnakeId(isResult) + if bsParsed is None or cfParsed is None or isParsed is None: + return None + + bsData, bsPeriods = bsParsed + cfData, _ = cfParsed + isData, _ = isParsed + + ppeRow = bsData.get("유형자산", {}) + cashRow = bsData.get("현금및현금성자산", {}) + eqRow = bsData.get("자본총계", {}) + ocfRow = cfData.get("영업활동현금흐름", {}) + icfRow = cfData.get("투자활동현금흐름", {}) + fcfRow = cfData.get("재무활동현금흐름", {}) + capexRow = cfData.get("유형자산의취득", {}) + dispRow = cfData.get("유형자산의처분", {}) + niRow = isData.get("당기순이익", {}) + + yCols = annualColsFromPeriods(bsPeriods, basePeriod, _MAX_YEARS + 1) + if len(yCols) < 2: + return None + + def _getF3(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for i in range(len(yCols) - 1): + col = yCols[i] + prevCol = yCols[i + 1] + + # 1. PPE 정합 + ppeCur = _get(ppeRow, col) + ppePrev = _get(ppeRow, prevCol) + capex = abs(_getF3(capexRow, col)) + disp = abs(_getF3(dispRow, col)) + # 감가상각은 추정 (유형자산/10) + depEst = ppePrev / 10 if ppePrev > 0 else 0 + ppeExpected = ppePrev + capex - depEst - disp + ppeActual = ppeCur + ppeError = abs(ppeActual - ppeExpected) / ppePrev * 100 if ppePrev > 0 else None + + # 2. 현금 정합 + cashCur = _get(cashRow, col) + cashPrev = _get(cashRow, prevCol) + ocf = _getF3(ocfRow, col) + icf = _getF3(icfRow, col) + fcf = _getF3(fcfRow, col) + cashExpected = cashPrev + ocf + icf + fcf + cashError = abs(cashCur - cashExpected) / abs(cashPrev) * 100 if cashPrev != 0 else None + + # 3. 자본 정합 + eqCur = _get(eqRow, col) + eqPrev = _get(eqRow, prevCol) + ni = _getF3(niRow, col) + eqExpected = eqPrev + ni # 배당/OCI 미포함이므로 대략적 + eqError = abs(eqCur - eqExpected) / abs(eqPrev) * 100 if eqPrev != 0 else None + + errors = [e for e in [ppeError, cashError, eqError] if e is not None] + maxErr = max(errors) if errors else None + + history.append( + { + "period": col, + "ppeError": round(ppeError, 1) if ppeError is not None else None, + "cashError": round(cashError, 1) if cashError is not None else None, + "equityError": round(eqError, 1) if eqError is not None else None, + "maxErrorPct": round(maxErr, 1) if maxErr is not None else None, + } + ) + + return {"history": history} if history else None diff --git a/src/dartlab/analysis/financial/disclosureDelta.py b/src/dartlab/analysis/financial/disclosureDelta.py new file mode 100644 index 0000000000000000000000000000000000000000..ba5f75ba06d5bcd9ed50250400d594e7df82d0ab --- /dev/null +++ b/src/dartlab/analysis/financial/disclosureDelta.py @@ -0,0 +1,260 @@ +"""5-2 공시변화감지 -- 이 회사의 공시가 뭐가 달라졌는가. + +sections diff 인프라를 활용하여 기간간 공시 텍스트 변화를 정량화한다. +""" + +from __future__ import annotations + +from dartlab.analysis.financial._memoize import memoized_calc + +# ── 공시변화 종합 요약 ── + + +@memoized_calc +def calcDisclosureChangeSummary(company, *, basePeriod: str | None = None) -> dict | None: + """전체 topic 변화 요약 -- 변화량 상위 topic + 총 변화 건수. + + company.diff() DataFrame에서 changeRate 기준으로 + 가장 많이 바뀐 topic을 추출한다. + + Returns + ------- + dict | None + totalChanges : int — 전체 변화 건수 + totalTopics : int — 전체 topic 수 + changedTopics : int — 변화가 있는 topic 수 + unchangedTopics : int — 변화가 없는 topic 수 + topChanged : list[dict] — 변화율 상위 topic 목록 + topic : str — topic 이름 + chapter : str — 장 이름 + totalPeriods : int — 전체 기간 수 + changedCount : int — 변화 기간 수 + changeRate : float — 변화율 (0~1) + """ + diffResult = _safeDiffResult(company) + if diffResult is None: + return None + + summaries = diffResult.summaries + if not summaries: + return None + + topChanged = diffResult.topChanged(10) + totalChanges = diffResult.totalChanges + totalTopics = len(summaries) + changedTopics = sum(1 for s in summaries if s.changedCount > 0) + + top = [] + for s in topChanged: + if s.changedCount == 0: + continue + top.append( + { + "topic": s.topic, + "chapter": s.chapter, + "totalPeriods": s.totalPeriods, + "changedCount": s.changedCount, + "changeRate": round(s.changeRate, 3), + } + ) + + return ( + { + "totalChanges": totalChanges, + "totalTopics": totalTopics, + "changedTopics": changedTopics, + "unchangedTopics": totalTopics - changedTopics, + "topChanged": top, + } + if top + else None + ) + + +# ── 핵심 공시 변화 추적 ── + + +_KEY_TOPICS = [ + "businessOverview", + "riskFactors", + "accountingPolicy", + "contingencies", + "relatedPartyTransactions", + "segmentInfo", +] + + +@memoized_calc +def calcKeyTopicChanges(company, *, basePeriod: str | None = None) -> dict | None: + """핵심 공시 topic 변화 추적. + + 사업개요/리스크/회계정책/우발부채/특수관계자/사업부문 등 + 분석적으로 중요한 topic의 변화를 추적한다. + + Returns + ------- + dict | None + keyTopics : list[dict] — 핵심 topic별 변화 정보 + topic : str — topic 이름 + chapter : str — 장 이름 + totalPeriods : int — 전체 기간 수 + changedCount : int — 변화 기간 수 + stableCount : int — 안정 기간 수 + changeRate : float — 변화율 (0~1) + """ + diffResult = _safeDiffResult(company) + if diffResult is None: + return None + + summaryMap = {s.topic: s for s in diffResult.summaries} + topicsAttr = getattr(company, "topics", None) + if topicsAttr is not None and hasattr(topicsAttr, "get_column"): + availableTopics = set(topicsAttr.get_column("topic").to_list()) + elif isinstance(topicsAttr, list): + availableTopics = set(topicsAttr) + else: + availableTopics = set(summaryMap.keys()) + + results = [] + for topic in _KEY_TOPICS: + if topic not in availableTopics: + continue + s = summaryMap.get(topic) + if s is None: + continue + results.append( + { + "topic": topic, + "chapter": s.chapter, + "totalPeriods": s.totalPeriods, + "changedCount": s.changedCount, + "stableCount": s.stableCount, + "changeRate": round(s.changeRate, 3), + } + ) + + return {"keyTopics": results} if results else None + + +# ── 변화 크기 분석 ── + + +@memoized_calc +def calcChangeIntensity(company, *, basePeriod: str | None = None) -> dict | None: + """변화 크기(바이트) 분석 -- 어떤 topic이 얼마나 크게 바뀌었나. + + diff entries에서 바이트 변화량 기준 top topic을 추출한다. + + Returns + ------- + dict | None + topByDelta : list[dict] — 바이트 변화량 상위 topic + topic : str — topic 이름 + totalDeltaBytes : int — 누적 바이트 변화량 + totalDeltaBytes : int — 전체 누적 바이트 변화량 + """ + diffResult = _safeDiffResult(company) + if diffResult is None: + return None + + entries = diffResult.entries + if not entries: + return None + + # topic별 누적 변화량 + topicDelta: dict[str, int] = {} + for e in entries: + delta = abs(e.toLen - e.fromLen) + topicDelta[e.topic] = topicDelta.get(e.topic, 0) + delta + + if not topicDelta: + return None + + ranked = sorted(topicDelta.items(), key=lambda x: x[1], reverse=True)[:10] + + return { + "topByDelta": [{"topic": topic, "totalDeltaBytes": delta} for topic, delta in ranked], + "totalDeltaBytes": sum(topicDelta.values()), + } + + +# ── 플래그 ── + + +@memoized_calc +def calcDisclosureDeltaFlags(company, *, basePeriod: str | None = None) -> list[tuple[str, str]]: + """공시변화감지 경고/기회 플래그. + + Returns + ------- + list[tuple[str, str]] + 각 원소는 (플래그 텍스트, "warning" | "opportunity"). + """ + flags: list[tuple[str, str]] = [] + + summary = calcDisclosureChangeSummary(company) + if summary is None: + return flags + + # 변화 없는 회사 + if summary["changedTopics"] == 0: + flags.append(("전 기간 공시 텍스트 변화 없음 -- 보일러플레이트 가능성", "warning")) + return flags + + # 핵심 topic 변화 감지 + keyChanges = calcKeyTopicChanges(company) + if keyChanges and keyChanges["keyTopics"]: + for kt in keyChanges["keyTopics"]: + topic = kt["topic"] + rate = kt["changeRate"] + if rate >= 0.8: + flags.append((f"{topic}: 변화율 {rate:.0%} -- 빈번한 공시 변경", "warning")) + elif rate == 0: + if kt["totalPeriods"] >= 3: + flags.append((f"{topic}: {kt['totalPeriods']}기 연속 무변화", "warning")) + + # 상위 변화 topic + topChanged = summary.get("topChanged", []) + if topChanged: + top = topChanged[0] + if top["changeRate"] >= 0.8: + flags.append((f"최다 변화 topic: {top['topic']} (변화율 {top['changeRate']:.0%})", "warning")) + + # 회계정책 변경 감지 + if keyChanges and keyChanges["keyTopics"]: + acctPolicy = next( + (kt for kt in keyChanges["keyTopics"] if kt["topic"] == "accountingPolicy"), + None, + ) + if acctPolicy and acctPolicy["changedCount"] > 0: + flags.append(("회계정책 공시 변경 감지 -- 정책 변경 여부 확인 필요", "warning")) + + return flags + + +# ── 내부 헬퍼 ── + + +def _safeDiffResult(company): + """company._docs.sections에서 DiffResult를 안전하게 얻는다. + + 결과를 company._cache에 저장하여 4개 calc 함수가 공유. + """ + cache = getattr(company, "_cache", None) + _KEY = "_diffResult" + if cache is not None and _KEY in cache: + return cache[_KEY] + + result = None + try: + docsSections = company._docs.sections + if docsSections is not None: + from dartlab.core.docs.diff import sectionsDiff + + result = sectionsDiff(docsSections) + except (AttributeError, ValueError, KeyError, TypeError, ImportError): + pass + + if cache is not None: + cache[_KEY] = result + return result diff --git a/src/dartlab/analysis/financial/earningsQuality.py b/src/dartlab/analysis/financial/earningsQuality.py new file mode 100644 index 0000000000000000000000000000000000000000..0737ff96ba7c271ce02531bfb0a4789a3a916cd5 --- /dev/null +++ b/src/dartlab/analysis/financial/earningsQuality.py @@ -0,0 +1,728 @@ +"""이익의 질 분석 — 발생액, 이익 지속성, M-Score 시계열. + +이익이 현금으로 뒷받침되는지, 일회성인지, 조작 가능성이 있는지를 시계열로 추적한다. +""" + +from __future__ import annotations + +import math + +from dartlab.analysis.financial._helpers import annualColsFromPeriods, toDictBySnakeId +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_YEARS = 8 + + +# ── 유틸 ── + + +def _get(row: dict, col: str) -> float: + v = row.get(col) if row else None + return v if v is not None else 0 + + +def _safe(numerator: float, denominator: float) -> float | None: + if denominator is None or denominator == 0: + return None + return numerator / denominator + + +# ── 발생액 분석 ── + + +@memoized_calc +def calcAccrualAnalysis(company, *, basePeriod: str | None = None) -> dict | None: + """발생액(Accrual) 시계열 — 이익 중 현금이 아닌 비중. + + Returns + ------- + dict + history : list[dict] — 기간별 발생액 시계열 + period : str — 회계연도 + netIncome : float — 당기순이익 (원) + ocf : float — 영업활동현금흐름 (원) + totalAssets : float — 자산총계 (원) + sloanAccrualRatio : float | None — Sloan 발생액비율 (배) + accrualToRevenue : float | None — 발생액/매출액 (%) + ocfToNi : float | None — 영업CF/순이익 (%) + notesDetail : dict | None — 매출채권 대손충당금 주석 (있는 경우) + """ + isResult = company.select("IS", ["당기순이익", "매출액"]) + cfResult = company.select("CF", ["영업활동현금흐름"]) + bsResult = company.select("BS", ["자산총계"]) + + isParsed = toDictBySnakeId(isResult) + cfParsed = toDictBySnakeId(cfResult) + bsParsed = toDictBySnakeId(bsResult) + if isParsed is None or cfParsed is None or bsParsed is None: + return None + + isData, _ = isParsed + cfData, cfPeriods = cfParsed + bsData, _ = bsParsed + + niRow = isData.get("당기순이익", {}) + revRow = isData.get("매출액", {}) + ocfRow = cfData.get("영업활동현금흐름", {}) + taRow = bsData.get("자산총계", {}) + + yCols = annualColsFromPeriods(cfPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + def _getF(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for col in yCols: + ni = _getF(niRow, col) + ocf = _getF(ocfRow, col) + ta = _get(taRow, col) + rev = _getF(revRow, col) + accrual = ni - ocf + + history.append( + { + "period": col, + "netIncome": ni, + "ocf": ocf, + "totalAssets": ta, + "sloanAccrualRatio": _safe(accrual, ta) if ta > 0 else None, + "accrualToRevenue": _safe(accrual, rev) * 100 if rev > 0 and _safe(accrual, rev) is not None else None, + "ocfToNi": (lambda r: r if abs(r) <= 1000 else None)(_safe(ocf, ni) * 100) + if ni != 0 and _safe(ocf, ni) is not None + else None, + } + ) + + if not history: + return None + + result: dict = {"history": history} + + # notes enrichment — 매출채권 대손충당금 상세 + from dartlab.analysis.financial._helpers import fetchNotesDetail + + notesDetail = fetchNotesDetail(company, ["receivables"]) + if notesDetail: + result["notesDetail"] = notesDetail + + return result + + +# ── 이익 지속성 ── + + +@memoized_calc +def calcEarningsPersistence(company, *, basePeriod: str | None = None) -> dict | None: + """이익 지속성 — 영업이익 vs 영업외손익, 변동성. + + Returns + ------- + dict + history : list[dict] — 기간별 이익 구성 시계열 + period : str — 회계연도 + operatingIncome : float — 영업이익 (원) + preTaxIncome : float — 법인세차감전순이익 (원) + nonOperatingIncome : float — 영업외손익 (원) + nonOpRatio : float | None — 영업외/영업이익 비율 (%) + earningsVolatility : float | None — 영업이익 변동계수 (배) + """ + accounts = ["영업이익", "법인세차감전순이익", "세전이익"] + isResult = company.select("IS", accounts) + isParsed = toDictBySnakeId(isResult) + if isParsed is None: + return None + + isData, isPeriods = isParsed + opRow = isData.get("영업이익", {}) + # 세전이익 fallback + ptRow = isData.get("법인세차감전순이익", isData.get("세전이익", {})) + + yCols = annualColsFromPeriods(isPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + def _getF2(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + opValues = [] + for col in yCols: + opIncome = _getF2(opRow, col) + ptIncome = _getF2(ptRow, col) + nonOp = ptIncome - opIncome + + nonOpRatio = None + if opIncome > 0: + nonOpRatio = abs(nonOp) / opIncome * 100 + + history.append( + { + "period": col, + "operatingIncome": opIncome, + "preTaxIncome": ptIncome, + "nonOperatingIncome": nonOp, + "nonOpRatio": nonOpRatio, + } + ) + if opIncome != 0: + opValues.append(opIncome) + + # 변동계수 (CV = std / |mean|) + earningsVolatility = None + if len(opValues) >= 3: + mean = sum(opValues) / len(opValues) + if mean != 0: + variance = sum((v - mean) ** 2 for v in opValues) / len(opValues) + earningsVolatility = math.sqrt(variance) / abs(mean) + + return {"history": history, "earningsVolatility": earningsVolatility} if history else None + + +# ── Beneish M-Score 시계열 ── + + +@memoized_calc +def calcBeneishTimeline(company, *, basePeriod: str | None = None) -> dict | None: + """Beneish M-Score 시계열 — annual 데이터에서 직접 8변수 계산. + + 8-Variable Model: + DSRI(매출채권/매출 변화), GMI(매출총이익률 역전), AQI(자산품질 변화), + SGI(매출성장), DEPI(감가상각률 변화, 기본1.0), SGAI(판관비율 변화), + LVGI(레버리지 변화), TATA(발생액/총자산) + + M = -4.84 + 0.920*DSRI + 0.528*GMI + 0.404*AQI + 0.892*SGI + + 0.115*DEPI - 0.172*SGAI + 4.679*TATA - 0.327*LVGI + + Returns + ------- + dict + history : list[dict] — 기간별 M-Score 시계열 + period : str — 회계연도 + mScore : float | None — Beneish M-Score (점수) + threshold : float — 조작 판별 임계값 (-1.78) + diagnosticMeta : dict — 진단 메타데이터 + precision : float — 정밀도 + falsePositiveRate : float — 위양성률 + reference : str — 학술 근거 + sampleBase : str — 표본 기반 + krNote : str — K-IFRS 환경 주의사항 + """ + # snakeId 단일 패턴 (alias 양방향이 EDGAR↔DART 변형 자동 처리) + isResult = company.select( + "IS", + ["매출액", "매출원가", "판매비와관리비", "당기순이익"], + ) + bsResult = company.select( + "BS", + ["매출채권및기타채권", "유동자산", "유형자산", "자산총계", "유동부채", "부채총계"], + ) + cfResult = company.select("CF", ["operating_cashflow"]) + + isParsed = toDictBySnakeId(isResult) + bsParsed = toDictBySnakeId(bsResult) + cfParsed = toDictBySnakeId(cfResult) + if isParsed is None or bsParsed is None: + return None + + isData, isPeriods = isParsed + bsData, _ = bsParsed + cfData = cfParsed[0] if cfParsed else {} + + revRow = isData.get("sales", {}) + cogsRow = isData.get("cost_of_sales", {}) + sgaRow = isData.get("selling_and_administrative_expenses", {}) + niRow = isData.get("net_profit", {}) + recRow = bsData.get("trade_and_other_receivables", {}) + caRow = bsData.get("current_assets", {}) + ppeRow = bsData.get("tangible_assets", {}) + taRow = bsData.get("assets", {}) + clRow = bsData.get("current_liabilities", {}) + tlRow = bsData.get("liabilities", {}) + ocfRow = cfData.get("operating_cashflow", {}) + + yCols = annualColsFromPeriods(isPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS + 1) # 전년 대비 필요 → 1년 더 + if len(yCols) < 2: + return None + + def _getF3(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for i in range(len(yCols) - 1): + col = yCols[i] # 당기 + prevCol = yCols[i + 1] # 전기 + + rev = _getF3(revRow, col) + prevRev = _getF3(revRow, prevCol) + cogs = _getF3(cogsRow, col) + prevCogs = _getF3(cogsRow, prevCol) + sga = _getF3(sgaRow, col) + prevSga = _getF3(sgaRow, prevCol) + ni = _getF3(niRow, col) + rec = _get(recRow, col) + prevRec = _get(recRow, prevCol) + ca = _get(caRow, col) + prevCa = _get(caRow, prevCol) + ppe = _get(ppeRow, col) + prevPpe = _get(ppeRow, prevCol) + ta = _get(taRow, col) + prevTa = _get(taRow, prevCol) + _get(clRow, col) + _get(clRow, prevCol) + tl = _get(tlRow, col) + prevTl = _get(tlRow, prevCol) + ocf = _getF3(ocfRow, col) + + # 분모가 0이면 계산 불가 + if prevRev <= 0 or rev <= 0 or prevTa <= 0 or ta <= 0: + history.append({"period": col, "mScore": None}) + continue + + # DSRI: (매출채권t/매출t) / (매출채권t-1/매출t-1) + dsri = (rec / rev) / (prevRec / prevRev) if prevRec > 0 else 1.0 + + # GMI: 매출총이익률t-1 / 매출총이익률t + gm = (rev - cogs) / rev + prevGm = (prevRev - prevCogs) / prevRev if prevRev > 0 else 0 + gmi = prevGm / gm if gm > 0 else 1.0 + + # AQI: (1 - 유동자산t/총자산t - 유형자산t/총자산t) / (1 - 유동자산t-1/총자산t-1 - 유형자산t-1/총자산t-1) + aqi_t = 1 - ca / ta - ppe / ta + aqi_prev = 1 - prevCa / prevTa - prevPpe / prevTa + aqi = aqi_t / aqi_prev if abs(aqi_prev) > 0.001 else 1.0 + + # SGI: 매출t / 매출t-1 + sgi = rev / prevRev + + # DEPI: 감가상각 데이터 없음 → 기본 1.0 (중립) + depi = 1.0 + + # SGAI: (판관비t/매출t) / (판관비t-1/매출t-1) + sgai = (sga / rev) / (prevSga / prevRev) if prevSga > 0 else 1.0 + + # LVGI: (부채총계t/총자산t) / (부채총계t-1/총자산t-1) + lev_t = tl / ta + lev_prev = prevTl / prevTa if prevTa > 0 else 0 + lvgi = lev_t / lev_prev if lev_prev > 0 else 1.0 + + # TATA: (순이익 - 영업CF) / 총자산 + tata = (ni - ocf) / ta if ta > 0 else 0 + + mScore = ( + -4.84 + + 0.920 * dsri + + 0.528 * gmi + + 0.404 * aqi + + 0.892 * sgi + + 0.115 * depi + - 0.172 * sgai + + 4.679 * tata + - 0.327 * lvgi + ) + + history.append({"period": col, "mScore": round(mScore, 4)}) + + if not history: + return None + return { + "history": history, + "threshold": -1.78, + "diagnosticMeta": { + "precision": 0.76, + "falsePositiveRate": 0.178, + "reference": "Beneish(1999), 8변수", + "sampleBase": "미국 제조업 1982-1992", + "krNote": "K-IFRS 환경 미검증 — 정밀도 과대추정 가능", + }, + } + + +# ── 플래그 ── + + +@memoized_calc +def calcEarningsQualityFlags(company, *, basePeriod: str | None = None) -> dict: + """이익 품질 경고 신호. + + Returns + ------- + dict + flags : list[str] — 경고 메시지 목록 (발생액 과다, CF 부족, M-Score 초과 등) + enrichedFlags : list[dict] — 구조화된 플래그 (정밀도/기저율/학술근거 포함) + code : str — 플래그 코드 + message : str — 경고 메시지 + precision : float | None — 정밀도 + baseRate : str — 표본 기반 + reference : str — 학술 근거 + sectorNote : str — 업종별 주의사항 + """ + flags: list[str] = [] + enriched: list[dict] = [] + + accrual = calcAccrualAnalysis(company, basePeriod=basePeriod) + if accrual and accrual["history"]: + h0 = accrual["history"][0] + sar = h0.get("sloanAccrualRatio") + if sar is not None and sar > 0.10: + flags.append(f"Sloan 발생액비율 {sar:.1%} — 이익 현금화 부족") + ocfNi = h0.get("ocfToNi") + if ocfNi is not None and 0 < ocfNi < 40: + flags.append(f"영업CF/순이익 {ocfNi:.0f}% — 이익 대비 현금 부족") + + persistence = calcEarningsPersistence(company, basePeriod=basePeriod) + if persistence: + if persistence["history"]: + h0 = persistence["history"][0] + nonOpRatio = h0.get("nonOpRatio") + nonOpIncome = h0.get("nonOperatingIncome") + if nonOpRatio is not None and nonOpRatio > 30: + if nonOpIncome is not None and nonOpIncome < 0: + suffix = " (일회성 항목 가능성)" if nonOpRatio > 100 else "" + flags.append(f"영업외손실 비중 {nonOpRatio:.0f}% — 영업이익을 상쇄{suffix}") + else: + suffix = " (일회성 항목 가능성)" if nonOpRatio > 100 else "" + flags.append(f"영업외이익 비중 {nonOpRatio:.0f}% — 일회성 이익 의존{suffix}") + + cv = persistence.get("earningsVolatility") + if cv is not None and cv > 0.5: + flags.append(f"이익 변동계수 {cv:.2f} — 이익 변동성 높음") + + beneish = calcBeneishTimeline(company, basePeriod=basePeriod) + if beneish and beneish["history"]: + h0 = beneish["history"][0] + ms = h0.get("mScore") + if ms is not None and ms > -1.78: + msg = f"Beneish M-Score {ms:.2f} — 임계값 초과, 이익 조작 가능성" + flags.append(msg) + meta = beneish.get("diagnosticMeta", {}) + enriched.append( + { + "code": "BENEISH_MANIPULATOR", + "message": msg, + "precision": meta.get("precision"), + "baseRate": meta.get("sampleBase", ""), + "reference": meta.get("reference", ""), + "sectorNote": meta.get("krNote", ""), + } + ) + + return {"flags": flags, "enrichedFlags": enriched} + + +# ── Richardson 3계층 발생액 분해 ── + + +@memoized_calc +def calcRichardsonAccrual(company, *, basePeriod: str | None = None) -> dict | None: + """Richardson et al. (2005) 3계층 발생액 분해. + + BS 변동 기반으로 발생액을 운전자본/비유동영업/금융으로 분리. + 신뢰도가 낮은 LTOACC가 클수록 이익 지속성이 낮다. + + WCACC = (delta_CA - delta_Cash) - (delta_CL - delta_STD) 신뢰도 높음 + LTOACC = delta_NCOA - delta_NCOL 신뢰도 낮음 + FINACC = delta_STI + delta_LTI - delta_LTD - delta_PSTK 중간 + + 학술근거: Richardson, Sloan, Soliman, Tuna (2005). + + Returns + ------- + dict + history : list[dict] — 기간별 3계층 발생액 시계열 + period : str — 회계연도 + wcacc : float | None — 운전자본 발생액/총자산 (%) + ltoacc : float | None — 비유동영업 발생액/총자산 (%) + finacc : float | None — 금융 발생액/총자산 (%) + totalAccrual : float | None — 총발생액/총자산 (%) + reliabilityScore : str | None — 이익 신뢰도 (high/medium/low) + """ + bsResult = company.select( + "BS", + [ + "유동자산", + "비유동자산", + "유동부채", + "비유동부채", + "현금및현금성자산", + "단기차입금", + "장기차입금", + "차입금단기", + "long_term_borrowings", + "short_term_borrowings", + "차입부채", + "장기차입부채", + "유동성장기차입금", + "사채", + "자산총계", + ], + ) + + bsParsed = toDictBySnakeId(bsResult) + if bsParsed is None: + return None + + bsData, bsPeriods = bsParsed + caRow = bsData.get("current_assets", {}) + ncaRow = bsData.get("noncurrent_assets", {}) + clRow = bsData.get("current_liabilities", {}) + nclRow = bsData.get("noncurrent_liabilities", {}) + cashRow = bsData.get("cash_and_cash_equivalents", {}) + stRow = bsData.get("shortterm_borrowings", {}) + ltRow = bsData.get("longterm_borrowings", {}) + unifiedBorrowRow = bsData.get("borrowings", {}) # 통합 차입금 fallback + bondRow = bsData.get("debentures", {}) + taRow = bsData.get("total_assets", {}) + + # stRow/ltRow 가 모두 비어있으면 unifiedBorrow 를 stRow 로 사용 + if not stRow and not ltRow and unifiedBorrowRow: + stRow = unifiedBorrowRow + + yCols = annualColsFromPeriods(bsPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS + 1) + if len(yCols) < 2: + return None + + history = [] + for i in range(len(yCols) - 1): + col = yCols[i] + prevCol = yCols[i + 1] + + # 델타 계산 + dCA = _get(caRow, col) - _get(caRow, prevCol) + dCash = _get(cashRow, col) - _get(cashRow, prevCol) + dCL = _get(clRow, col) - _get(clRow, prevCol) + dSTD = _get(stRow, col) - _get(stRow, prevCol) + dNCA = _get(ncaRow, col) - _get(ncaRow, prevCol) + dNCL = _get(nclRow, col) - _get(nclRow, prevCol) + dLTD = (_get(ltRow, col) + _get(bondRow, col)) - (_get(ltRow, prevCol) + _get(bondRow, prevCol)) + + # 3계층 분해 + wcacc = (dCA - dCash) - (dCL - dSTD) + ltoacc = dNCA - dNCL + finacc = -dCash + dSTD + dLTD # 금융자산 증가 - 금융부채 증가의 역 + + totalAccrual = wcacc + ltoacc + finacc + avgTA = (_get(taRow, col) + _get(taRow, prevCol)) / 2 + + # 정규화 (총자산 평균 대비) + wcaccNorm = round(wcacc / avgTA * 100, 2) if avgTA > 0 else None + ltoaccNorm = round(ltoacc / avgTA * 100, 2) if avgTA > 0 else None + finaccNorm = round(finacc / avgTA * 100, 2) if avgTA > 0 else None + totalNorm = round(totalAccrual / avgTA * 100, 2) if avgTA > 0 else None + + # 신뢰도 판단: LTOACC 비중이 50% 이상이면 낮음 + if totalAccrual != 0 and avgTA > 0: + ltoShare = ( + abs(ltoacc) / (abs(wcacc) + abs(ltoacc) + abs(finacc)) + if (abs(wcacc) + abs(ltoacc) + abs(finacc)) > 0 + else 0 + ) + reliability = "low" if ltoShare > 0.5 else "high" if ltoShare < 0.2 else "medium" + else: + reliability = None + + history.append( + { + "period": col, + "wcacc": wcaccNorm, + "ltoacc": ltoaccNorm, + "finacc": finaccNorm, + "totalAccrual": totalNorm, + "reliabilityScore": reliability, + } + ) + + return {"history": history} if history else None + + +# ── 영업외손익 분해 ── + + +@memoized_calc +def calcNonOperatingBreakdown(company, *, basePeriod: str | None = None) -> dict | None: + """영업외손익 항목별 분해 — 영업이익과 세전이익 사이의 갭. + + 금융이익/비용, 지분법손익, 기타수익/비용을 개별 추적. + 영업외가 영업이익의 30% 이상이면 영업만으로 기업 판단 불가. + + Returns + ------- + dict + history : list[dict] — 기간별 영업외손익 분해 시계열 + period : str — 회계연도 + opIncome : float — 영업이익 (원) + finIncome : float — 금융이익 (원) + finCost : float — 금융비용 (원) + netFinance : float — 순금융손익 (원) + associateIncome : float — 지분법손익 (원) + otherIncome : float — 기타수익 (원) + otherExpense : float — 기타비용 (원) + nonOpTotal : float | None — 영업외손익 합계 (원) + nonOpRatio : float | None — 영업외/영업이익 비율 (%) + notesDetail : dict | None — 관계기업 투자 주석 (있는 경우) + """ + isResult = company.select( + "IS", + ["영업이익", "금융이익", "금융비용", "지분법관련손익", "기타수익", "기타비용", "법인세차감전순이익"], + ) + + isParsed = toDictBySnakeId(isResult) + if isParsed is None: + return None + + isData, isPeriods = isParsed + opRow = isData.get("영업이익", {}) + finIncRow = isData.get("금융이익", {}) + finCostRow = isData.get("금융비용", {}) + assocRow = isData.get("지분법관련손익", {}) + otherIncRow = isData.get("기타수익", {}) + otherExpRow = isData.get("기타비용", {}) + ptRow = isData.get("법인세차감전순이익", {}) + + yCols = annualColsFromPeriods(isPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + def _getF4(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for col in yCols: + op = _getF4(opRow, col) + finInc = _getF4(finIncRow, col) + finCost = _getF4(finCostRow, col) + assoc = _getF4(assocRow, col) + otherInc = _getF4(otherIncRow, col) + otherExp = _getF4(otherExpRow, col) + pt = _getF4(ptRow, col) + + netFinance = finInc - finCost + nonOpTotal = pt - op if op != 0 else None + nonOpRatio = round(abs(nonOpTotal) / abs(op) * 100, 1) if op != 0 and nonOpTotal is not None else None + + history.append( + { + "period": col, + "opIncome": op, + "finIncome": finInc, + "finCost": finCost, + "netFinance": netFinance, + "associateIncome": assoc, + "otherIncome": otherInc, + "otherExpense": otherExp, + "nonOpTotal": nonOpTotal, + "nonOpRatio": nonOpRatio, + } + ) + + if not history: + return None + + result: dict = {"history": history} + + # notes enrichment — 관계기업 투자 상세 + from dartlab.analysis.financial._helpers import fetchNotesDetail + + notesDetail = fetchNotesDetail(company, ["affiliates"]) + if notesDetail: + result["notesDetail"] = notesDetail + + return result + + +# ── EPS 희석 분석 ── + + +@memoized_calc +def calcDilutionTrend(company, *, basePeriod: str | None = None) -> dict | None: + """기본 EPS vs 희석 EPS 괴리율 시계열 — 스톡옵션/전환사채 희석 리스크. + + notes.eps에서 기본주당이익과 희석주당이익을 추출하여 + 희석 괴리율(%)의 추세를 추적한다. + 괴리율이 5% 이상이면 잠재 희석 리스크. + + Returns + ------- + dict + history : list[dict] — 기간별 EPS 희석 시계열 + period : str — 회계연도 + basicEps : float | None — 기본주당이익 (원) + dilutedEps : float | None — 희석주당이익 (원) + dilutionPct : float | None — 희석 괴리율 (%) + latestDilution : float | None — 최신 기간 희석 괴리율 (%) + trend : str | None — 희석 추세 (희석 증가/희석 감소/안정) + """ + from dartlab.analysis.financial._helpers import fetchNotesDetail + + notesData = fetchNotesDetail(company, ["eps"]) + epsDf = notesData.get("eps") + if not epsDf: + return None + + # eps notes: [{항목, 2024, 2023, ...}] + basicRow = None + dilutedRow = None + for row in epsDf: + item = str(row.get("항목", "")).strip() + if "희석" in item: + dilutedRow = row + elif "기본" in item or "주당" in item: + if basicRow is None: + basicRow = row + + if basicRow is None: + return None + + # 기간 컬럼 추출 + periodCols = [k for k in basicRow if k not in ("항목",) and k.isdigit()] + periodCols.sort(reverse=True) + if not periodCols: + return None + + from dartlab.analysis.financial._helpers import parseNumStr + + history = [] + for col in periodCols[:_MAX_YEARS]: + basic = parseNumStr(basicRow.get(col)) + diluted = parseNumStr(dilutedRow.get(col)) if dilutedRow else None + + dilutionPct = None + if basic is not None and diluted is not None and basic != 0: + dilutionPct = round((basic - diluted) / abs(basic) * 100, 2) + + history.append( + { + "period": col, + "basicEps": basic, + "dilutedEps": diluted, + "dilutionPct": dilutionPct, + } + ) + + if not history: + return None + + latestDilution = history[0]["dilutionPct"] + + # 추세: 최근 vs 과거 비교 + trend = None + dilutionVals = [h["dilutionPct"] for h in history if h["dilutionPct"] is not None] + if len(dilutionVals) >= 2: + diff = dilutionVals[0] - dilutionVals[-1] + if diff > 2: + trend = "희석 증가" + elif diff < -2: + trend = "희석 감소" + else: + trend = "안정" + + return { + "history": history, + "latestDilution": latestDilution, + "trend": trend, + } diff --git a/src/dartlab/analysis/financial/efficiency.py b/src/dartlab/analysis/financial/efficiency.py new file mode 100644 index 0000000000000000000000000000000000000000..58c3b8c659927b09fc708d43bdf4bc5424e42a48 --- /dev/null +++ b/src/dartlab/analysis/financial/efficiency.py @@ -0,0 +1,184 @@ +"""2-4 효율성 분석 -- 자산을 얼마나 빨리 돌리는가. + +select()로 IS/BS 원본 계정을 가져와서 +회전율 + CCC를 금액과 함께 보여준다. +재고가 쌓이는지, 매출채권 회수가 느려지는지를 금액으로 파악. +""" + +from __future__ import annotations + +from dartlab.analysis.financial._helpers import MAX_RATIO_YEARS, toDictBySnakeId +from dartlab.analysis.financial._helpers import annualColsFromPeriods as _annualColsFromPeriods +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_YEARS = MAX_RATIO_YEARS + + +def _yoy(cur, prev) -> float | None: + if cur is None or prev is None or prev == 0: + return None + return round((cur - prev) / abs(prev) * 100, 2) + + +def _turnover(revenue, balance) -> float | None: + if revenue is None or balance is None or balance == 0: + return None + return round(revenue / balance, 2) + + +def _days(revenue, balance) -> float | None: + if revenue is None or balance is None or revenue == 0: + return None + return round(balance / revenue * 365, 1) + + +# ── 자산 회전 ── + + +@memoized_calc +def calcTurnoverTrend(company, *, basePeriod: str | None = None) -> dict | None: + """자산 회전 시계열 -- 자산을 얼마나 효율적으로 쓰는가. + + IS(매출) + BS(자산/채권/재고)에서 원본 금액과 회전율을 동시에 본다. + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + revenue : float — 매출액 (원) + totalAssets : float — 자산총계 (원) + receivables : float — 매출채권 (원) + receivablesYoy : float — 매출채권 전년비 (%) + inventory : float — 재고자산 (원) + inventoryYoy : float — 재고자산 전년비 (%) + payables : float — 매입채무 (원) + totalAssetTurnover : float — 총자산회전율 (배) + receivablesTurnover : float — 매출채권회전율 (배) + inventoryTurnover : float — 재고자산회전율 (배) + dso : float — 매출채권 회수일수 (일) + dio : float — 재고자산 보유일수 (일) + dpo : float — 매입채무 지급일수 (일) + ccc : float — 현금전환주기 (일) + """ + isResult = company.select("IS", ["매출액", "매출원가"]) + bsResult = company.select( + "BS", ["자산총계", "매출채권", "매출채권및기타채권", "재고자산", "매입채무", "매입채무및기타채무"] + ) + + isParsed = toDictBySnakeId(isResult) + bsParsed = toDictBySnakeId(bsResult) + if isParsed is None or bsParsed is None: + return None + + isData, isPeriods = isParsed + bsData, _ = bsParsed + + rev = isData.get("매출액", {}) + cogs = isData.get("매출원가", {}) + ta = bsData.get("자산총계", {}) + ar = bsData.get("매출채권", {}) or bsData.get("매출채권및기타채권", {}) + inv = bsData.get("재고자산", {}) + ap = bsData.get("매입채무", {}) or bsData.get("매입채무및기타채무", {}) + + yCols = _annualColsFromPeriods(isPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS + 1) + if len(yCols) < 2: + return None + history = [] + for i, col in enumerate(yCols[:-1]): + prevCol = yCols[i + 1] if i + 1 < len(yCols) else None + r = rev.get(col) + c = cogs.get(col) + + arVal = ar.get(col) + invVal = inv.get(col) + apVal = ap.get(col) + taVal = ta.get(col) + + # 회전율 + totalAssetTurnover = _turnover(r, taVal) + receivablesTurnover = _turnover(r, arVal) + inventoryTurnover = _turnover(c, invVal) # COGS 기준 + + # CCC 구성 (일수) + dso = _days(r, arVal) + dio = _days(c, invVal) + dpo = _days(c, apVal) + ccc = round(dso + dio - dpo, 1) if dso is not None and dio is not None and dpo is not None else None + + history.append( + { + "period": col, + "revenue": r, + "totalAssets": taVal, + "receivables": arVal, + "receivablesYoy": _yoy(arVal, ar.get(prevCol)) if prevCol else None, + "inventory": invVal, + "inventoryYoy": _yoy(invVal, inv.get(prevCol)) if prevCol else None, + "payables": apVal, + "totalAssetTurnover": totalAssetTurnover, + "receivablesTurnover": receivablesTurnover, + "inventoryTurnover": inventoryTurnover, + "dso": dso, + "dio": dio, + "dpo": dpo, + "ccc": ccc, + } + ) + + return {"history": history} if history else None + + +# calcCccTrend는 calcTurnoverTrend에 통합 +calcCccTrend = calcTurnoverTrend + + +# ── 플래그 ── + + +@memoized_calc +def calcEfficiencyFlags(company, *, basePeriod: str | None = None) -> list[str]: + """효율성 경고/기회 플래그. + + Returns + ------- + list[str] + 경고/기회 플래그 문자열 목록. + """ + flags: list[str] = [] + + trend = calcTurnoverTrend(company, basePeriod=basePeriod) + if trend is None or len(trend["history"]) < 2: + return flags + + hist = trend["history"] + + # 총자산회전율 3기 연속 하락 + if len(hist) >= 3: + tats = [h.get("totalAssetTurnover") for h in hist[:3]] + if all(v is not None for v in tats) and tats[0] < tats[1] < tats[2]: + flags.append(f"총자산회전율 3기 연속 하락 ({tats[0]:.2f}회)") + + # 재고 급증 (매출 대비) + h0, h1 = hist[0], hist[1] + invYoy = h0.get("inventoryYoy") + revYoy = h0.get("revenue") + revPrev = h1.get("revenue") + if invYoy is not None and revPrev is not None and revPrev > 0: + _yoy(revYoy, revPrev) + # 여기서는 이미 계산된 inventoryYoy 사용 + if invYoy is not None and invYoy > 20: + flags.append(f"재고자산 +{invYoy:.0f}% 급증") + + # CCC + if len(hist) >= 2: + ccc0 = hist[0].get("ccc") + ccc1 = hist[1].get("ccc") + if ccc0 is not None and ccc1 is not None: + diff = ccc0 - ccc1 + if diff > 20: + flags.append(f"CCC {diff:.0f}일 악화 ({ccc0:.0f}일)") + if ccc0 < 0: + flags.append(f"CCC {ccc0:.0f}일 -- 운전자본 유리 구조") + + return flags diff --git a/src/dartlab/analysis/financial/forecastCalcs.py b/src/dartlab/analysis/financial/forecastCalcs.py new file mode 100644 index 0000000000000000000000000000000000000000..1a08c60fb1823f8f395d90597c185a135efa9837 --- /dev/null +++ b/src/dartlab/analysis/financial/forecastCalcs.py @@ -0,0 +1,646 @@ +"""매출전망 축 -- forecast 엔진을 analysis 패턴으로 래핑. + +calc 함수 7개: 매출예측, 세그먼트전망, ProForma, 시나리오, +방법론, 과거비율, 플래그. + +모든 함수는 (company) -> dict | None 시그니처를 따른다. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from dartlab.analysis.financial._memoize import memoized_calc +from dartlab.analysis.financial.valuation import _IG_TO_SECTOR_KEY +from dartlab.analysis.forecast.revenueForecast import CompanyDataBundle, forecastRevenue +from dartlab.analysis.forecast.simulation import simulateAllScenarios + +log = logging.getLogger(__name__) + + +# ── 공통 헬퍼 ── + + +def _getSeriesAndMeta(company: Any) -> tuple[dict, str | None, str | None, str, str]: + """company에서 series, stockCode, sectorKey, market, currency 추출.""" + ts = company._buildFinanceSeries(freq="Q") + series = ts[0] if isinstance(ts, tuple) else ts + + stockCode = getattr(company, "stockCode", None) + currency = getattr(company, "currency", "KRW") or "KRW" + market = getattr(company, "market", "KR") or "KR" + + # sectorKey: valuation.py _resolveSectorKey 동일 로직 + sectorKey = None + try: + sectorInfo = company.sector + if sectorInfo is not None: + igName = sectorInfo.industryGroup.name + sectorKey = _IG_TO_SECTOR_KEY.get(igName) + except (AttributeError, ValueError): + pass + + return series, stockCode, sectorKey, market, currency + + +def _getShares(company: Any) -> int | None: + """발행주식수 추출.""" + profile = getattr(company, "profile", None) + if profile: + sharesVal = getattr(profile, "sharesOutstanding", None) + if sharesVal: + return int(sharesVal) + return None + + +def _getSectorParams(company: Any): + """SectorParams 추출.""" + try: + return getattr(company, "sectorParams", None) + except AttributeError: + return None + + +def _buildCompanyDataBundle(company: Any): + """segments, salesOrder, structuralBreak → CompanyDataBundle 조립. 없으면 None.""" + segmentRevenue = None + salesDf = None + orderDf = None + structuralBreak = None + + try: + segments = getattr(company, "segments", None) + if segments is not None: + segmentRevenue = getattr(segments, "revenue", None) + except (AttributeError, TypeError): + pass + + try: + salesOrder = getattr(company, "salesOrder", None) + if salesOrder is not None: + salesDf = getattr(salesOrder, "salesDf", None) + orderDf = getattr(salesOrder, "orderDf", None) + except (AttributeError, TypeError): + pass + + # 구조변화 감지 결과 전달 (Chow Test 기반) + try: + from dartlab.analysis.financial.research.predictionSignals import calcStructuralBreak + + structuralBreak = calcStructuralBreak(company) + except (ImportError, AttributeError, TypeError, ValueError): + pass + + if segmentRevenue is None and salesDf is None and orderDf is None and structuralBreak is None: + return None + + return CompanyDataBundle( + segmentRevenue=segmentRevenue, + salesDf=salesDf, + orderDf=orderDf, + structuralBreak=structuralBreak, + ) + + +def _runForecastRevenue(company: Any): + """forecastRevenue 실행 + 결과 캐시. 같은 company에서 중복 호출 방지.""" + cache = getattr(company, "_cache", None) + _KEY = "_forecastRevenueResult" + if cache is not None and _KEY in cache: + return cache[_KEY] + + series, stockCode, sectorKey, market, currency = _getSeriesAndMeta(company) + + companyData = _buildCompanyDataBundle(company) + + result = forecastRevenue( + series, + stockCode=stockCode, + sectorKey=sectorKey, + market=market, + horizon=3, + companyData=companyData, + currency=currency, + ) + + if cache is not None: + cache[_KEY] = result + return result + + +# ── calc 함수 7개 ── + + +@memoized_calc +def calcRevenueForecast(company: Any, *, basePeriod: str | None = None) -> dict | None: + """7-소스 앙상블 3-시나리오 매출 전망. + + Returns + ------- + dict + isEstimate : bool — 추정치 여부 + method : str — 예측 방법론 + confidence : str — 신뢰도 ("high" | "medium" | "low") + currency : str — 통화 코드 + historical : list[float] — 과거 매출 시계열 (원) + projected : list[float] — 전망 매출 시계열 (원) + growthRates : list[float] — 전망 성장률 (%) + horizon : int — 전망 기간 (년) + scenarios : dict — 시나리오별 projected/growthRates/probability + lifecycle : str — 라이프사이클 단계 + forecastable : bool — 예측 가능 여부 + unforecastableReason : str — 예측 불가 사유 (forecastable=False 시) + disclaimer : str — 면책 문구 + """ + result = _runForecastRevenue(company) + if not result or not result.projected: + return None + + currency = getattr(company, "currency", "KRW") or "KRW" + + out: dict = { + "isEstimate": True, + "method": result.method, + "confidence": result.confidence, + "currency": currency, + "historical": result.historical, + "projected": result.projected, + "growthRates": result.growthRates, + "horizon": result.horizon, + } + + # 시나리오 + if result.scenarios: + out["scenarios"] = {} + for label in ("base", "bull", "bear"): + sc = result.scenarios.get(label, []) + sg = result.scenarioGrowthRates.get(label, []) + prob = result.scenarioProbabilities.get(label, 0) + if sc: + out["scenarios"][label] = { + "projected": sc, + "growthRates": sg, + "probability": prob, + } + + # 라이프사이클 + lifecycle = result.aiContext.get("lifecycle", "") + if lifecycle: + out["lifecycle"] = lifecycle + + out["disclaimer"] = result.DISCLAIMER + + # v4: 예측 불가능성 상태 전달 + out["forecastable"] = result.forecastable + if not result.forecastable: + out["unforecastableReason"] = result.unforecastableReason + + return out + + +@memoized_calc +def calcSegmentForecast(company: Any, *, basePeriod: str | None = None) -> dict | None: + """세그먼트별 개별 매출 성장 전망. + + Returns + ------- + dict | None + None: 세그먼트 데이터 없음. + isEstimate : bool — 추정치 여부 + currency : str — 통화 코드 + segments : list[dict] — 세그먼트별 전망 + name : str — 세그먼트명 + projected : list[float] — 전망 매출 (원) + growthRates : list[float] — 전망 성장률 (%) + method : str — 예측 방법론 + shareOfRevenue : float — 매출 비중 (%) + lifecycle : str — 라이프사이클 단계 + """ + result = _runForecastRevenue(company) + if not result or not result.segmentForecasts: + return None + + currency = getattr(company, "currency", "KRW") or "KRW" + + segments = [] + for seg in result.segmentForecasts: + segments.append( + { + "name": seg.name, + "projected": seg.projected, + "growthRates": seg.growthRates, + "method": seg.method, + "shareOfRevenue": seg.shareOfRevenue, + "lifecycle": seg.lifecycle, + } + ) + + return { + "isEstimate": True, + "currency": currency, + "segments": segments, + } + + +@memoized_calc +def calcProFormaHighlights(company: Any, *, basePeriod: str | None = None) -> dict | None: + """Pro-Forma IS 주요 항목 전망. + + Returns + ------- + dict + isEstimate : bool — 추정치 여부 + currency : str — 통화 코드 + wacc : float — 가중평균자본비용 (%) + revenueGrowthPath : list[float] — 연도별 매출 성장률 (%) + years : list[dict] — 연도별 전망 + yearOffset : int — 기준연 대비 오프셋 + revenue : float — 매출 (원) + operatingIncome : float — 영업이익 (원) + netIncome : float — 순이익 (원) + ebitda : float — EBITDA (원) + fcf : float — 잉여현금흐름 (원) + warnings : list[str] — 경고 메시지 + disclaimer : str — 면책 문구 + """ + result = _runForecastRevenue(company) + if not result or not result.projected: + return None + + series, _, sectorKey, _, currency = _getSeriesAndMeta(company) + shares = _getShares(company) + sp = _getSectorParams(company) + + # 매출 성장률 경로 추출 + growthPath = result.growthRates + if not growthPath: + return None + + from dartlab.core.finance.proforma import build_proforma + + try: + pf = build_proforma( + series, + revenue_growth_path=growthPath, + sector_params=sp, + shares=shares, + scenario_name="base", + ) + except (KeyError, ValueError, ZeroDivisionError, TypeError) as exc: + log.debug("pro-forma 생성 실패: %s", exc) + return None + + if not pf.projections: + return None + + years = [] + for p in pf.projections: + years.append( + { + "yearOffset": p.year_offset, + "revenue": p.revenue, + "operatingIncome": p.operating_income, + "netIncome": p.net_income, + "ebitda": p.ebitda, + "fcf": p.fcf, + } + ) + + return { + "isEstimate": True, + "currency": currency, + "wacc": pf.wacc, + "revenueGrowthPath": pf.revenue_growth_path, + "years": years, + "warnings": pf.warnings, + "disclaimer": pf.DISCLAIMER, + } + + +@memoized_calc +def calcScenarioImpact(company: Any, *, basePeriod: str | None = None) -> dict | None: + """매크로 시나리오별 매출/마진 영향. + + Returns + ------- + dict + isEstimate : bool — 추정치 여부 + currency : str — 통화 코드 + scenarios : dict — 시나리오별 (baseline/bull/bear) + label : str — 시나리오 라벨 + revenueChangePct : float — 매출 변화율 (%) + marginChangeBps : float — 마진 변화 (bps) + revenuePath : list[float] — 매출 경로 (원) + marginPath : list[float] — 마진 경로 (%) + warnings : list[str] — 경고 메시지 + """ + series, _, sectorKey, _, currency = _getSeriesAndMeta(company) + shares = _getShares(company) + sp = _getSectorParams(company) + + try: + results = simulateAllScenarios( + series, + sectorKey=sectorKey, + sectorParams=sp, + shares=shares, + ) + except (KeyError, ValueError, ZeroDivisionError, TypeError) as exc: + log.debug("시나리오 시뮬레이션 실패: %s", exc) + return None + + if not results: + return None + + scenarios = {} + for name, sim in results.items(): + scenarios[name] = { + "label": sim.scenarioLabel, + "revenueChangePct": sim.revenueChangePct, + "marginChangeBps": sim.marginChangeBps, + "revenuePath": sim.revenuePath, + "marginPath": sim.marginPath, + "warnings": sim.warnings, + } + + return { + "isEstimate": True, + "currency": currency, + "scenarios": scenarios, + } + + +@memoized_calc +def calcForecastMethodology(company: Any, *, basePeriod: str | None = None) -> dict | None: + """예측 방법론 투명성 공개. + + Returns + ------- + dict + method : str — 예측 방법론 + confidence : str — 신뢰도 ("high" | "medium" | "low") + sources : list[str] — 사용된 데이터 소스 + sourceWeights : dict — 소스별 가중치 + assumptions : list[str] — 가정 목록 + warnings : list[str] — 경고 메시지 + lifecycle : str — 라이프사이클 단계 + """ + result = _runForecastRevenue(company) + if not result: + return None + + return { + "method": result.method, + "confidence": result.confidence, + "sources": result.sources, + "sourceWeights": result.sourceWeights, + "assumptions": result.assumptions, + "warnings": result.warnings, + "lifecycle": result.aiContext.get("lifecycle", ""), + } + + +@memoized_calc +def calcHistoricalRatios(company: Any, *, basePeriod: str | None = None) -> dict | None: + """Pro-Forma 기반 과거 구조 비율. + + Returns + ------- + dict + grossMargin : float — 매출총이익률 (%) + sgaRatio : float — 판관비율 (%) + effectiveTaxRate : float — 유효세율 (%) + depreciationRatio : float — 감가상각비율 (%) + capexToRevenue : float — CAPEX/매출 (%) + interestRateOnDebt : float — 부채이자율 (%) + nwcToRevenue : float — 순운전자본/매출 (%) + dividendPayout : float — 배당성향 (%) + yearsUsed : int — 사용 연도 수 + confidence : str — 신뢰도 + trends : dict — 비율 추세 정보 + warnings : list[str] — 경고 메시지 + """ + series, _, _, _, _ = _getSeriesAndMeta(company) + + from dartlab.core.finance.proforma import extract_historical_ratios + + try: + ratios = extract_historical_ratios(series) + except (KeyError, ValueError, ZeroDivisionError, TypeError) as exc: + log.debug("과거 비율 추출 실패: %s", exc) + return None + + return { + "grossMargin": ratios.gross_margin, + "sgaRatio": ratios.sga_ratio, + "effectiveTaxRate": ratios.effective_tax_rate, + "depreciationRatio": ratios.depreciation_ratio, + "capexToRevenue": ratios.capex_to_revenue, + "interestRateOnDebt": ratios.interest_rate_on_debt, + "nwcToRevenue": ratios.nwc_to_revenue, + "dividendPayout": ratios.dividend_payout, + "yearsUsed": ratios.years_used, + "confidence": ratios.confidence, + "trends": ratios.trends, + "warnings": ratios.warnings, + } + + +@memoized_calc +def calcForecastFlags(company: Any, *, basePeriod: str | None = None) -> dict | None: + """매출전망 플래그. + + Returns + ------- + dict + flags : list[tuple[str, str]] — (severity, message) 쌍 목록 + """ + result = _runForecastRevenue(company) + if not result: + return None + + flags: list[tuple[str, str]] = [] + + # 예측 불가 판정 + if not result.forecastable: + flags.insert(0, ("UNFORECASTABLE", f"예측 불가 -- {result.unforecastableReason}")) + + # 신뢰도 경고 + if result.confidence == "low": + flags.append(("LOW_CONFIDENCE", "예측 신뢰도 낮음 -- 데이터 부족 또는 변동성 과다")) + + # 시계열 전용 (컨센서스/매크로 부재) + if result.method == "timeseries_only": + flags.append(("TIMESERIES_ONLY", "시계열만 사용 -- 컨센서스 데이터 없음")) + + # 구조변화 감지 + if "structural_break" in result.aiContext: + flags.append(("STRUCTURAL_BREAK", "매출 시계열 구조변화 감지 -- 과거 추세가 미래에 유효하지 않을 수 있음")) + + # 시나리오 격차 + if result.scenarios: + bull = result.scenarios.get("bull", []) + bear = result.scenarios.get("bear", []) + if bull and bear and bull[0] > 0 and bear[0] > 0: + spread = (bull[0] - bear[0]) / bear[0] * 100 + if spread > 50: + flags.append(("HIGH_UNCERTAINTY", f"Bull-Bear 격차 {spread:.0f}% -- 불확실성 높음")) + + # 엔진 warnings 전달 + for w in result.warnings: + flags.append(("WARNING", w)) + + if not flags: + return None + + return {"flags": flags} + + +@memoized_calc +def calcCalibrationReport(company: Any, *, basePeriod: str | None = None) -> dict | None: + """예측 캘리브레이션 리포트 — 이 종목의 과거 예측 정확도. + + forward test 레코드가 5건 미만이면 None 반환. + 데이터가 축적되면서 점진적으로 활성화된다. + + Returns + ------- + dict | None + None: 평가 레코드 5건 미만. + brierScore : float — Brier 점수 (0~1, 낮을수록 정확) + nRecords : int — 평가 레코드 수 + bins : list[dict] — 캘리브레이션 구간별 통계 + """ + from dataclasses import asdict + + from dartlab.analysis.forecast.calibrationMetrics import ( + buildCalibrationBins, + computeBrierScore, + ) + from dartlab.analysis.forecast.forwardTest import loadRecords + + stockCode = getattr(company, "stockCode", None) + if not stockCode: + return None + + records = loadRecords(stockCode) + evaluated = [r for r in records if r.directionProbability is not None and r.directionActual is not None] + if len(evaluated) < 5: + return None + + predictions = [r.directionProbability for r in evaluated] # type: ignore[misc] + outcomes = [1 if r.directionActual == "up" else 0 for r in evaluated] + + brier = computeBrierScore(predictions, outcomes) + bins = buildCalibrationBins(predictions, outcomes) + + return { + "brierScore": round(brier, 4), + "nRecords": len(evaluated), + "bins": [asdict(b) for b in bins], + } + + +# ── calc 8: 시나리오 시뮬레이션 ── + + +@memoized_calc +def calcScenarioSimulation(company: Any, *, basePeriod: str | None = None) -> dict | None: + """시나리오 시뮬레이션 — 과거 CAGR 기반 자동 3시나리오 ProForma + 분기 목표. + + 과거 3년 매출 CAGR을 자동 계산하여 base 성장률로 사용하고, + bull/base/bear 3개 시나리오의 ProForma IS/BS/CF + 분기 목표 + DCF를 생성한다. + + 사용자 지정 성장률이 필요하면 scenarioSim.createSimulation()을 직접 호출. + + Returns + ------- + dict + isEstimate : bool — 추정치 여부 + currency : str — 통화 코드 + baseYear : str — 기준 연도 + targetYear : str — 목표 연도 + revenueGrowthCAGR : float — 기준 CAGR (%) + scenarios : dict — 시나리오별 revenue/operatingIncome/netIncome/fcf/wacc (원) + quarterlyRevTargets : dict — 시나리오별 분기 매출 목표 (원) + quarterlyOITargets : dict — 시나리오별 분기 영업이익 목표 (원) + dcfPerShare : dict — 시나리오별 주당 DCF 가치 (원) + seasonality : dict — revenue/operatingIncome 분기 계절성 가중치 + """ + from dartlab.analysis.forecast.scenarioSim import createSimulation + + series, _, _, _, currency = _getSeriesAndMeta(company) + shares = _getShares(company) + + # 과거 CAGR 자동 계산 (3년) + revVals = [] + for sj_key in ("sales", "revenue"): + vals = series.get("IS", {}).get(sj_key, []) + if vals: + # 연간 TTM: 4분기씩 역순으로 4개 연도 + annuals = [] + for end in range(len(vals), 3, -4): + chunk = [v for v in vals[end - 4 : end] if v is not None] + if len(chunk) == 4: + annuals.append(sum(chunk)) + if len(annuals) >= 4: + break + annuals.reverse() + if len(annuals) >= 2: + revVals = annuals + break + + if len(revVals) < 2: + return None + + # CAGR 계산 + first, last = revVals[0], revVals[-1] + nYears = len(revVals) - 1 + if first <= 0 or last <= 0: + cagr = 0.0 + else: + cagr = ((last / first) ** (1 / nYears) - 1) * 100 + + # CAGR 범위 제한 (-20% ~ +50%) + cagr = max(-20.0, min(50.0, cagr)) + + try: + sim = createSimulation( + company, + "자동(CAGR기반)", + revenueGrowth=round(cagr, 1), + shares=shares, + ) + except (KeyError, ValueError, ZeroDivisionError, TypeError) as exc: + log.debug("시나리오 시뮬레이션 실패: %s", exc) + return None + + # 결과 직렬화 (dict 반환) + scenarios = {} + for scName, pf in sim.proformaResults.items(): + if pf.projections: + p = pf.projections[0] + scenarios[scName] = { + "revenue": p.revenue, + "operatingIncome": p.operating_income, + "netIncome": p.net_income, + "fcf": p.fcf, + "wacc": pf.wacc, + } + + return { + "isEstimate": True, + "currency": currency, + "baseYear": sim.baseYear, + "targetYear": sim.targetYear, + "revenueGrowthCAGR": round(cagr, 1), + "scenarios": scenarios, + "quarterlyRevTargets": {sc: [round(v) for v in vals] for sc, vals in sim.quarterlyRevTargets.items()}, + "quarterlyOITargets": {sc: [round(v) for v in vals] for sc, vals in sim.quarterlyOITargets.items()}, + "dcfPerShare": sim.dcfPerShare, + "seasonality": { + "revenue": [round(w, 3) for w in sim.revSeasonality], + "operatingIncome": [round(w, 3) for w in sim.oiSeasonality], + }, + } diff --git a/src/dartlab/analysis/financial/governance.py b/src/dartlab/analysis/financial/governance.py new file mode 100644 index 0000000000000000000000000000000000000000..3893fd036895bf9bb47a4391431c1d19969eacad --- /dev/null +++ b/src/dartlab/analysis/financial/governance.py @@ -0,0 +1,235 @@ +"""5-1 지배구조 분석 -- 이 회사의 주인은 누구이며, 감시는 작동하는가. + +report 데이터(최대주주, 임원, 감사의견)에서 지배구조 핵심 지표를 추출한다. +""" + +from __future__ import annotations + +from dartlab.analysis.financial._helpers import MAX_RATIO_YEARS +from dartlab.analysis.financial._memoize import memoized_calc + +# ── 최대주주 지분 시계열 ── + + +@memoized_calc +def calcOwnershipTrend(company, *, basePeriod: str | None = None) -> dict | None: + """최대주주 지분율 시계열 + 최근 주주 구성. + + report.majorHolder에서 연도별 합산 지분율 추이와 + 최신 시점 개별 주주(top 10)를 추출한다. + + Returns + ------- + dict | None + None: majorHolder 데이터 없음. + history : list[dict] — 연도별 지분율 추이 + year : str — 연도 + ratio : float — 합산 지분율 (%) + change : float — 전기 대비 변동 (%p) + latestHolders : list[dict] — 최근 주주 구성 (상위 10명) + name : str — 주주명 + relate : str — 관계 + ratio : float — 지분율 (%) + shares : int — 보유 주식수 + """ + result = _safePivotMajorHolder(company) + if result is None: + return None + + years = result.years[-MAX_RATIO_YEARS:] + ratios = result.totalShareRatio[-MAX_RATIO_YEARS:] + + history = [] + for i, y in enumerate(years): + r = ratios[i] if i < len(ratios) else None + prevR = ratios[i - 1] if i > 0 and (i - 1) < len(ratios) else None + change = round(r - prevR, 2) if r is not None and prevR is not None else None + history.append({"year": y, "ratio": r, "change": change}) + + holders = result.latestHolders[:10] if result.latestHolders else [] + + return ( + { + "history": history, + "latestHolders": holders, + } + if history + else None + ) + + +# ── 이사회 구성 ── + + +@memoized_calc +def calcBoardComposition(company, *, basePeriod: str | None = None) -> dict | None: + """이사회 구성 -- 사외이사비율, 전체 임원 수. + + report.executive에서 최신 분기 기준 이사회 구성을 추출한다. + + Returns + ------- + dict + totalCount : int — 전체 임원 수 + registeredCount : int — 등기임원 수 + outsideCount : int — 사외이사 수 + outsideRatio : float — 사외이사비율 (%) + """ + result = _safePivotExecutive(company) + if result is None: + return None + + total = result.totalCount + registered = result.registeredCount + outside = result.outsideCount + if total == 0: + return None + + outsideRatio = round(outside / total * 100, 1) if total > 0 else None + + return { + "totalCount": total, + "registeredCount": registered, + "outsideCount": outside, + "outsideRatio": outsideRatio, + } + + +# ── 감사의견 시계열 ── + + +@memoized_calc +def calcAuditOpinionTrend(company, *, basePeriod: str | None = None) -> dict | None: + """감사의견 + 감사인 시계열. + + report.audit에서 연도별 감사의견과 감사인을 추출한다. + 감사인 변경도 감지한다. + + Returns + ------- + dict | None + None: audit 데이터 없음. + history : list[dict] — 연도별 감사 이력 + year : str — 연도 + opinion : str — 감사의견 + auditor : str — 감사인 + auditorChanged : bool — 감사인 변경 여부 + """ + result = _safePivotAudit(company) + if result is None: + return None + + years = result.years[-MAX_RATIO_YEARS:] + opinions = result.opinions[-MAX_RATIO_YEARS:] + auditors = result.auditors[-MAX_RATIO_YEARS:] + + history = [] + for i, y in enumerate(years): + opinion = opinions[i] if i < len(opinions) else None + auditor = auditors[i] if i < len(auditors) else None + prevAuditor = auditors[i - 1] if i > 0 and (i - 1) < len(auditors) else None + auditorChanged = auditor is not None and prevAuditor is not None and auditor != prevAuditor + history.append( + { + "year": y, + "opinion": opinion, + "auditor": auditor, + "auditorChanged": auditorChanged, + } + ) + + return {"history": history} if history else None + + +# ── 플래그 ── + + +@memoized_calc +def calcGovernanceFlags(company, *, basePeriod: str | None = None) -> list[tuple[str, str]]: + """지배구조 경고/기회 플래그. + + Returns + ------- + list[tuple[str, str]] + (message, severity) 쌍 목록. severity: "warning" | "opportunity" + """ + flags: list[tuple[str, str]] = [] + + # 최대주주 지분 + ownership = calcOwnershipTrend(company) + if ownership and ownership["history"]: + latest = ownership["history"][-1] + r = latest.get("ratio") + if r is not None: + if r > 50: + flags.append((f"최대주주 지분율 {r:.1f}% -- 과반 지배", "warning")) + elif r < 20: + flags.append((f"최대주주 지분율 {r:.1f}% -- 경영권 방어 취약", "warning")) + + # 지분 변동 추이 + history = ownership["history"] + if len(history) >= 3: + changes = [h["change"] for h in history[-3:] if h.get("change") is not None] + if len(changes) >= 2 and all(c < -1.0 for c in changes): + flags.append(("최대주주 지분 2기 연속 감소 -- 지분 희석 주의", "warning")) + + # 이사회 구성 + board = calcBoardComposition(company) + if board: + outsideRatio = board.get("outsideRatio") + if outsideRatio is not None: + if outsideRatio < 25: + flags.append((f"사외이사비율 {outsideRatio:.0f}% -- 이사회 독립성 취약", "warning")) + elif outsideRatio >= 50: + flags.append((f"사외이사비율 {outsideRatio:.0f}% -- 이사회 독립성 양호", "opportunity")) + + # 감사의견 + audit = calcAuditOpinionTrend(company) + if audit and audit["history"]: + latest = audit["history"][-1] + opinion = latest.get("opinion") + if opinion and opinion != "적정의견" and opinion != "적정": + flags.append((f"최근 감사의견: {opinion}", "warning")) + + # 감사인 변경 + changes = [h for h in audit["history"] if h.get("auditorChanged")] + if len(changes) >= 2: + flags.append(("감사인 잦은 변경 -- 감사 독립성 점검 필요", "warning")) + + return flags + + +# ── 내부 헬퍼 ── + + +def _safePivotMajorHolder(company): + """report.majorHolder를 안전하게 가져온다.""" + try: + result = company._report.majorHolder + if result is None: + return None + return result + except (AttributeError, ValueError, KeyError, TypeError): + return None + + +def _safePivotExecutive(company): + """report.executive를 안전하게 가져온다.""" + try: + result = company._report.executive + if result is None: + return None + return result + except (AttributeError, ValueError, KeyError, TypeError): + return None + + +def _safePivotAudit(company): + """report.audit를 안전하게 가져온다.""" + try: + result = company._report.audit + if result is None: + return None + return result + except (AttributeError, ValueError, KeyError, TypeError): + return None diff --git a/src/dartlab/analysis/financial/growthAnalysis.py b/src/dartlab/analysis/financial/growthAnalysis.py new file mode 100644 index 0000000000000000000000000000000000000000..2b3f12424556a4eebf6f91b24bb5b3fc10fba971 --- /dev/null +++ b/src/dartlab/analysis/financial/growthAnalysis.py @@ -0,0 +1,440 @@ +"""2-2 성장성 분석 -- 무엇이 얼마나 커졌는가. + +select()로 IS/BS/CF 원본 계정을 가져와서 +금액 + YoY + CAGR + 이익 vs 매출 성장 괴리를 시계열로 보여준다. +""" + +from __future__ import annotations + +from dartlab.analysis.financial._helpers import MAX_RATIO_YEARS, toDictBySnakeId +from dartlab.analysis.financial._helpers import annualColsFromPeriods as _annualColsFromPeriods +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_YEARS = MAX_RATIO_YEARS + + +def _yoy(cur, prev) -> float | None: + if cur is None or prev is None or prev == 0: + return None + return round((cur - prev) / abs(prev) * 100, 2) + + +def _cagrFromList(values: list[float | None], periods: int) -> float | None: + valid = [v for v in values if v is not None and v > 0] + if len(valid) < 2 or periods < 1: + return None + first, last = valid[0], valid[-1] + if first <= 0: + return None + return round((pow(last / first, 1 / periods) - 1) * 100, 2) + + +# ── 성장 추이 ── + + +@memoized_calc +def calcGrowthTrend(company, *, basePeriod: str | None = None) -> dict | None: + """성장 추이 -- 매출/영업이익/순이익/자산의 금액과 YoY. + + IS + BS에서 원본 금액을 가져와 규모감과 방향을 동시에 본다. + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + revenue : float — 매출 (원) + revenueYoy : float — 매출 전기대비 (%) + operatingIncome : float — 영업이익 (원) + operatingIncomeYoy : float — 영업이익 전기대비 (%) + netIncome : float — 당기순이익 (원) + netIncomeYoy : float — 순이익 전기대비 (%) + totalAssets : float — 총자산 (원) + totalAssetsYoy : float — 총자산 전기대비 (%) + cagr : dict — 3년 CAGR (revenue, operatingIncome, netIncome) (%) + """ + isResult = company.select("IS", ["매출액", "영업이익", "당기순이익"]) + bsResult = company.select("BS", ["자산총계"]) + + isParsed = toDictBySnakeId(isResult) + bsParsed = toDictBySnakeId(bsResult) + if isParsed is None: + return None + + isData, isPeriods = isParsed + bsData = bsParsed[0] if bsParsed else {} + + rev = isData.get("매출액", {}) + op = isData.get("영업이익", {}) + ni = isData.get("당기순이익", {}) + ta = bsData.get("자산총계", {}) + + yCols = _annualColsFromPeriods(isPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS + 1) + if len(yCols) < 2: + return None + history = [] + for i, col in enumerate(yCols[:-1]): + prevCol = yCols[i + 1] if i + 1 < len(yCols) else None + + _r = rev.get(col) + _o = op.get(col) + _n = ni.get(col) + _rP = rev.get(prevCol) if prevCol else None + _oP = op.get(prevCol) if prevCol else None + _nP = ni.get(prevCol) if prevCol else None + + history.append( + { + "period": col, + "revenue": _r, + "revenueYoy": _yoy(_r, _rP) if prevCol else None, + "operatingIncome": _o, + "operatingIncomeYoy": _yoy(_o, _oP) if prevCol else None, + "netIncome": _n, + "netIncomeYoy": _yoy(_n, _nP) if prevCol else None, + "totalAssets": ta.get(col), + "totalAssetsYoy": _yoy(ta.get(col), ta.get(prevCol)) if prevCol else None, + } + ) + + # CAGR + revVals = [rev.get(c) for c in reversed(yCols)] + opVals = [op.get(c) for c in reversed(yCols)] + niVals = [ni.get(c) for c in reversed(yCols)] + n = len(yCols) - 1 + + return ( + { + "history": history, + "cagr": { + "revenue": _cagrFromList(revVals, n), + "operatingIncome": _cagrFromList(opVals, n), + "netIncome": _cagrFromList(niVals, n), + "periods": n, + }, + } + if history + else None + ) + + +# ── 성장 품질 ── + + +@memoized_calc +def calcGrowthQuality(company, *, basePeriod: str | None = None) -> dict | None: + """성장 품질 -- 매출 성장이 이익으로 이어지는가. + + 매출 vs 영업이익 성장률 괴리를 본다. + 매출만 크고 이익이 안 따라오면 외형 위주. + + Returns + ------- + dict + quality : str — 성장 품질 판단 ("고품질"|"개선 중"|"외형 위주"|"둔화") + cagr : dict — 3년 CAGR (revenue, operatingIncome, netIncome) (%) + leverageEffect : list[dict] + period : str — 기간 + revenueYoy : float — 매출 전기대비 (%) + operatingIncomeYoy : float — 영업이익 전기대비 (%) + operatingLeverage : float — 영업레버리지 (배) + """ + trend = calcGrowthTrend(company, basePeriod=basePeriod) + if trend is None or len(trend["history"]) < 2: + return None + + cagr = trend["cagr"] + revCagr = cagr.get("revenue") + opCagr = cagr.get("operatingIncome") + + niCagr = cagr.get("netIncome") + hist = trend["history"] + + quality = "판단 불가" + if revCagr is not None and opCagr is not None: + if revCagr < 0: + quality = "역성장" + elif niCagr is not None and niCagr < -5: + quality = "이익 역성장" + elif opCagr < revCagr * 0.5: + # 최신 기 영업이익 YoY가 양수이면 "개선 중"으로 완화 (턴어라운드 기업 배려) + latestOpYoy = hist[0].get("operatingIncomeYoy") if hist else None + if latestOpYoy is not None and latestOpYoy > 10: + quality = "개선 중" + else: + quality = "외형 위주" + elif opCagr > revCagr * 1.5 and opCagr > 0: + quality = "내실 위주" + elif revCagr > 0 and opCagr > 0: + quality = "균형" + + # 이익 성장률이 매출보다 빠른지 (operating leverage) + hist = trend["history"] + leverageEffect = [] + for h in hist: + ry = h.get("revenueYoy") + oy = h.get("operatingIncomeYoy") + if ry is not None and oy is not None and ry != 0: + leverageEffect.append( + { + "period": h["period"], + "revenueYoy": ry, + "operatingIncomeYoy": oy, + "operatingLeverage": round(oy / ry, 2), + } + ) + + return { + "quality": quality, + "cagr": cagr, + "leverageEffect": leverageEffect, + } + + +# ── SGR + 갭 ── + + +@memoized_calc +def calcSustainableGrowthRate(company, *, basePeriod: str | None = None) -> dict | None: + """지속가능성장률(SGR) vs 실제 매출성장률 갭. + + SGR = ROE x (1 - 배당성향/100) + gap = 실제 매출성장률 - SGR + - gap > 0: 외부 자본 필요 (성장이 내부 역량 초과) + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + revenue : float — 매출 (원) + netIncome : float — 순이익 (원) + equity : float — 자기자본 (원) + roe : float — ROE (%) + payoutRatio : float — 배당성향 (%) + sgr : float — 지속가능성장률 (%) + actualGrowth : float — 실제 매출성장률 (%) + gap : float — 실제-SGR 갭 (%) + - gap < 0: 여유 (자사주/배당 확대 여력) + """ + # snakeId 단일 패턴 + isResult = company.select("IS", ["매출액", "당기순이익"]) + bsResult = company.select("BS", ["자본총계"]) + cfResult = company.select("CF", ["dividends_paid"]) + + isParsed = toDictBySnakeId(isResult) + bsParsed = toDictBySnakeId(bsResult) + cfParsed = toDictBySnakeId(cfResult) + if isParsed is None or bsParsed is None: + return None + + isData, isPeriods = isParsed + bsData, _ = bsParsed + + rev = isData.get("sales", {}) + ni = isData.get("net_profit", {}) + eq = bsData.get("stockholders_equity", {}) + + divRow = cfParsed[0].get("dividends_paid", {}) if cfParsed else {} + + yCols = _annualColsFromPeriods(isPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS + 1) + if len(yCols) < 2: + return None + history = [] + for i, col in enumerate(yCols[:-1]): + prevCol = yCols[i + 1] if i + 1 < len(yCols) else None + niVal = ni.get(col) + eqVal = eq.get(col) + revVal = rev.get(col) + revPrev = rev.get(prevCol) if prevCol else None + + roe = round(niVal / eqVal * 100, 2) if niVal is not None and eqVal and eqVal != 0 else None + actualGrowth = _yoy(revVal, revPrev) + divPaid = abs(divRow.get(col) or 0) + payoutRatio = round(divPaid / niVal * 100, 2) if niVal and niVal > 0 and divPaid > 0 else None + + sgr = None + retentionRatio = None + if roe is not None: + if payoutRatio is not None and payoutRatio >= 0: + retentionRatio = round(1 - payoutRatio / 100, 4) + else: + retentionRatio = 1.0 + sgr = round(roe * retentionRatio, 2) + + gap = round(actualGrowth - sgr, 2) if sgr is not None and actualGrowth is not None else None + + history.append( + { + "period": col, + "revenue": revVal, + "netIncome": niVal, + "equity": eqVal, + "roe": roe, + "payoutRatio": payoutRatio, + "sgr": sgr, + "actualGrowth": actualGrowth, + "gap": gap, + } + ) + + return {"history": history} if history else None + + +# ── 플래그 ── + + +@memoized_calc +def calcGrowthFlags(company, *, basePeriod: str | None = None) -> list[str]: + """성장성 경고/기회 플래그. + + Returns + ------- + list[str] + 경고/기회 메시지 리스트. 빈 리스트이면 이상 없음. + """ + flags: list[str] = [] + + trend = calcGrowthTrend(company, basePeriod=basePeriod) + if trend is None: + return flags + + hist = trend["history"] + + # 매출 3기 연속 역성장 + if len(hist) >= 3: + revYoys = [h.get("revenueYoy") for h in hist[:3]] + if all(v is not None and v < 0 for v in revYoys): + flags.append(f"매출 3기 연속 역성장 (최근 {revYoys[0]:.1f}%)") + + # 매출 고성장 + if hist and hist[0].get("revenueYoy") is not None and hist[0]["revenueYoy"] > 20: + flags.append(f"매출 고성장 ({hist[0]['revenueYoy']:.1f}%)") + + # 매출 성장 > 이익 감소 괴리 + if hist: + h = hist[0] + ry = h.get("revenueYoy") + oy = h.get("operatingIncomeYoy") + if ry is not None and oy is not None and ry > 10 and oy < 0: + flags.append(f"매출 성장({ry:.0f}%)에도 이익 감소({oy:.0f}%) -- 수익성 희석") + + return flags + + +# ── 계정별 CAGR 비교 ── + + +from dartlab.core.finance.calc import cagr as _cagr # noqa: E402 + + +@memoized_calc +def calcCagrComparison(company, *, basePeriod: str | None = None) -> dict | None: + """계정별 CAGR 비교 — 절대값 장기 추세로 구조적 변화 감지. + + 매출 CAGR vs 영업이익 CAGR → 마진 방향 + 자산 CAGR vs 매출 CAGR → 자산 효율 방향 + 부채 CAGR vs 자본 CAGR → 레버리지 방향 + + Returns + ------- + dict + comparisons : list[dict] + label : str — 비교 레이블 ("매출 vs 영업이익") + item1 : str — 첫 번째 항목명 + cagr1 : float — 첫 번째 CAGR (%) + item2 : str — 두 번째 항목명 + cagr2 : float — 두 번째 CAGR (%) + gap : float — cagr1-cagr2 (%) + signal : str — 판단 ("양호"|"주의"|"경고") + period : str — CAGR 산출 기간 ("2017 → 2025") + """ + isResult = company.select("IS", ["매출액", "영업이익"]) + bsResult = company.select("BS", ["자산총계", "부채총계", "자본총계"]) + cfResult = company.select("CF", ["유형자산의취득"]) + + isParsed = toDictBySnakeId(isResult) + bsParsed = toDictBySnakeId(bsResult) + cfParsed = toDictBySnakeId(cfResult) + if isParsed is None or bsParsed is None: + return None + + isData, isPeriods = isParsed + bsData, _ = bsParsed + cfData = cfParsed[0] if cfParsed else {} + + yCols = _annualColsFromPeriods(isPeriods, basePeriod, _MAX_YEARS + 1) + if len(yCols) < 3: + return None + + def _v(row, col): + v = row.get(col) if row else None + return v if v is not None else 0 + + def _vF(row, col): + v = row.get(col) + return v if v is not None else 0 + + latest = yCols[0] + oldest = yCols[-1] + years = len(yCols) - 1 + + revRow = isData.get("매출액", {}) + opRow = isData.get("영업이익", {}) + taRow = bsData.get("자산총계", {}) + tlRow = bsData.get("부채총계", {}) + teRow = bsData.get("자본총계", {}) + capexRow = cfData.get("유형자산의취득", {}) + + pairs = [ + ( + "마진 방향", + "매출", + _vF(revRow, oldest), + _vF(revRow, latest), + "영업이익", + _vF(opRow, oldest), + _vF(opRow, latest), + ), + ("자산 효율", "자산", _v(taRow, oldest), _v(taRow, latest), "매출", _vF(revRow, oldest), _vF(revRow, latest)), + ("레버리지", "부채", _v(tlRow, oldest), _v(tlRow, latest), "자본", _v(teRow, oldest), _v(teRow, latest)), + ( + "투자 방향", + "CAPEX", + abs(_vF(capexRow, oldest)), + abs(_vF(capexRow, latest)), + "매출", + _vF(revRow, oldest), + _vF(revRow, latest), + ), + ] + + comparisons = [] + for label, name1, start1, end1, name2, start2, end2 in pairs: + c1 = _cagr(start1, end1, years) + c2 = _cagr(start2, end2, years) + if c1 is not None and c2 is not None: + gap = round(c1 - c2, 2) + if label == "마진 방향": + signal = "마진 확대" if gap <= 0 else "마진 압박" + elif label == "자산 효율": + signal = "효율 개선" if gap <= 0 else "효율 하락" + elif label == "레버리지": + signal = "디레버리징" if gap <= 0 else "레버리지 확대" + else: + signal = "투자 확대" if gap > 0 else "투자 축소" + comparisons.append( + { + "label": label, + "item1": name1, + "cagr1": c1, + "item2": name2, + "cagr2": c2, + "gap": gap, + "signal": signal, + } + ) + + if not comparisons: + return None + return {"comparisons": comparisons, "period": f"{oldest} → {latest}"} diff --git a/src/dartlab/analysis/financial/insight/__init__.py b/src/dartlab/analysis/financial/insight/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b746230d9caec15cf1c42c19d2c97c6bb62dd591 --- /dev/null +++ b/src/dartlab/analysis/financial/insight/__init__.py @@ -0,0 +1,41 @@ +"""인사이트 분석 엔진. + +7영역 등급 분석 + 이상치 탐지 + 종합 요약. + +사용법:: + + from dartlab.analysis.financial.insight import analyze + + result = analyze("005930") + result.grades() # {'performance': 'A', 'profitability': 'B', ...} + result.anomalies # [Anomaly(...), ...] + result.summary # "삼성전자는 실적, 재무건전성 등..." + result.profile # "premium" +""" + +from dartlab.analysis.financial.insight.pipeline import analyze, analyzeAudit +from dartlab.analysis.financial.insight.types import ( + AnalysisResult, + Anomaly, + AuditDataForAnomaly, + DistressAxis, + DistressResult, + Flag, + InsightResult, + MarketDataForDistress, + ModelScore, +) + +__all__ = [ + "analyze", + "analyzeAudit", + "AnalysisResult", + "Anomaly", + "AuditDataForAnomaly", + "DistressAxis", + "DistressResult", + "Flag", + "InsightResult", + "MarketDataForDistress", + "ModelScore", +] diff --git a/src/dartlab/analysis/financial/insight/__pycache__/__init__.cpython-312.pyc b/src/dartlab/analysis/financial/insight/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..04bbffea66ccf794d34b1ad7e8d6731f91ea7c76 Binary files /dev/null and b/src/dartlab/analysis/financial/insight/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/insight/__pycache__/anomaly.cpython-312.pyc b/src/dartlab/analysis/financial/insight/__pycache__/anomaly.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c184436c7e1ff3178f2c303fb01b360fcaac5e9e Binary files /dev/null and b/src/dartlab/analysis/financial/insight/__pycache__/anomaly.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/insight/__pycache__/benchmark.cpython-312.pyc b/src/dartlab/analysis/financial/insight/__pycache__/benchmark.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f783ad68867f5ad0ba1dad0f0ac95115cf77e2de Binary files /dev/null and b/src/dartlab/analysis/financial/insight/__pycache__/benchmark.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/insight/__pycache__/detector.cpython-312.pyc b/src/dartlab/analysis/financial/insight/__pycache__/detector.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dd2ec83fe07b28154a57299cad4fb8040a676e7a Binary files /dev/null and b/src/dartlab/analysis/financial/insight/__pycache__/detector.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/insight/__pycache__/distress.cpython-312.pyc b/src/dartlab/analysis/financial/insight/__pycache__/distress.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bec87d437abda2a7eab432f4ce0271aa69a68446 Binary files /dev/null and b/src/dartlab/analysis/financial/insight/__pycache__/distress.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/insight/__pycache__/grading.cpython-312.pyc b/src/dartlab/analysis/financial/insight/__pycache__/grading.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..05ddb821d2d443107ec5bf7909796661a7f5968b Binary files /dev/null and b/src/dartlab/analysis/financial/insight/__pycache__/grading.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/insight/__pycache__/pipeline.cpython-312.pyc b/src/dartlab/analysis/financial/insight/__pycache__/pipeline.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..41ca18ca541c8b6e0e3b4fdd8758bb1cb7649cef Binary files /dev/null and b/src/dartlab/analysis/financial/insight/__pycache__/pipeline.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/insight/__pycache__/spec.cpython-312.pyc b/src/dartlab/analysis/financial/insight/__pycache__/spec.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..adb662f1b2d39b71b8ebbac8d573c4379545494e Binary files /dev/null and b/src/dartlab/analysis/financial/insight/__pycache__/spec.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/insight/__pycache__/summary.cpython-312.pyc b/src/dartlab/analysis/financial/insight/__pycache__/summary.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b156d47e32e54649a1b8bfc2915d44bfded8d629 Binary files /dev/null and b/src/dartlab/analysis/financial/insight/__pycache__/summary.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/insight/__pycache__/types.cpython-312.pyc b/src/dartlab/analysis/financial/insight/__pycache__/types.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7bd4cd826ce14c08b698471bb167b8a171508ef0 Binary files /dev/null and b/src/dartlab/analysis/financial/insight/__pycache__/types.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/insight/anomaly.py b/src/dartlab/analysis/financial/insight/anomaly.py new file mode 100644 index 0000000000000000000000000000000000000000..8137de0c0ebfdb6440bc7365c38548bbfc860a09 --- /dev/null +++ b/src/dartlab/analysis/financial/insight/anomaly.py @@ -0,0 +1,762 @@ +"""이상치 탐지 — 11개 룰 기반.""" + +from __future__ import annotations + +import math +from typing import Optional + +from dartlab.analysis.financial.insight.types import Anomaly, AuditDataForAnomaly +from dartlab.core.finance.extract import getAnnualValues + + +def _yoyChange(vals: list[Optional[float]]) -> Optional[float]: + from dartlab.core.finance.ratios import yoy_pct + + valid = [(i, v) for i, v in enumerate(vals) if v is not None] + if len(valid) < 2: + return None + _, prev = valid[-2] + _, curr = valid[-1] + return yoy_pct(curr, prev) + + +def detectEarningsQuality(aSeries: dict, isFinancial: bool = False) -> list[Anomaly]: + """이익 품질 이상치: 영업이익↑ but 영업CF↓ (금융업 제외).""" + anomalies: list[Anomaly] = [] + + if isFinancial: + return anomalies + + opIncomeVals = getAnnualValues(aSeries, "IS", "operating_profit") + opCfVals = getAnnualValues(aSeries, "CF", "operating_cashflow") + + opGrowth = _yoyChange(opIncomeVals) + cfGrowth = _yoyChange(opCfVals) + + if opGrowth is not None and cfGrowth is not None: + if opGrowth > 10 and cfGrowth < -10: + anomalies.append( + Anomaly( + "danger", + "earningsQuality", + f"이익↑(+{opGrowth:.0f}%) but 영업CF↓({cfGrowth:.0f}%) — 이익 품질 의심", + opGrowth - cfGrowth, + ) + ) + elif opGrowth > 0 and cfGrowth < 0 and abs(cfGrowth) > 20: + anomalies.append( + Anomaly( + "warning", + "earningsQuality", + f"이익 증가(+{opGrowth:.0f}%) 대비 영업CF 감소({cfGrowth:.0f}%)", + opGrowth - cfGrowth, + ) + ) + + netIncomeVals = getAnnualValues(aSeries, "IS", "net_profit") + + latestNi = None + latestCf = None + for v in reversed(netIncomeVals): + if v is not None: + latestNi = v + break + for v in reversed(opCfVals): + if v is not None: + latestCf = v + break + + if latestNi and latestCf and latestNi > 0 and latestCf < 0: + anomalies.append( + Anomaly( + "danger", + "earningsQuality", + f"순이익 흑자({latestNi / 1e8:,.0f}억) but 영업CF 적자({latestCf / 1e8:,.0f}억)", + ) + ) + + return anomalies + + +def detectWorkingCapitalAnomaly(aSeries: dict) -> list[Anomaly]: + """운전자본 이상치: 매출채권/재고 급증 > 매출 증가.""" + anomalies: list[Anomaly] = [] + + arVals = getAnnualValues(aSeries, "BS", "trade_and_other_receivables") + if not arVals: + arVals = getAnnualValues(aSeries, "BS", "trade_and_other_receivables") + invVals = getAnnualValues(aSeries, "BS", "inventories") + revVals = getAnnualValues(aSeries, "IS", "sales") + + arGrowth = _yoyChange(arVals) + invGrowth = _yoyChange(invVals) + revGrowth = _yoyChange(revVals) + + if arGrowth is not None and revGrowth is not None: + if arGrowth > revGrowth + 20 and arGrowth > 30: + anomalies.append( + Anomaly( + "warning", + "workingCapital", + f"매출채권 급증(+{arGrowth:.0f}%) > 매출 증가(+{revGrowth:.0f}%) — 수금 지연 가능", + arGrowth - revGrowth, + ) + ) + + if invGrowth is not None and revGrowth is not None: + if invGrowth > revGrowth + 30 and invGrowth > 40: + anomalies.append( + Anomaly( + "warning", + "workingCapital", + f"재고자산 급증(+{invGrowth:.0f}%) > 매출 증가(+{revGrowth:.0f}%) — 재고 과잉 가능", + invGrowth - revGrowth, + ) + ) + elif invGrowth is not None and invGrowth > 50: + anomalies.append( + Anomaly( + "info", + "workingCapital", + f"재고자산 대폭 증가(+{invGrowth:.0f}%)", + invGrowth, + ) + ) + + return anomalies + + +def detectBalanceSheetShift(aSeries: dict) -> list[Anomaly]: + """BS 구조 급변: 부채/차입금/자본 ±50% 이상.""" + anomalies: list[Anomaly] = [] + + checkItems = [ + ("BS", "total_liabilities", "부채총계"), + ("BS", "shortterm_borrowings", "단기차입금"), + ("BS", "longterm_borrowings", "장기차입금"), + ("BS", "debentures", "사채"), + ("BS", "owners_of_parent_equity", "자본총계"), + ] + + for sjDiv, snakeId, label in checkItems: + vals = getAnnualValues(aSeries, sjDiv, snakeId) + change = _yoyChange(vals) + if change is not None and abs(change) > 50: + direction = "급증" if change > 0 else "급감" + severity = "warning" if abs(change) > 100 else "info" + anomalies.append( + Anomaly( + severity, + "balanceSheetShift", + f"{label} {direction} ({change:+.0f}%)", + change, + ) + ) + + equityVals = getAnnualValues(aSeries, "BS", "owners_of_parent_equity") + valid = [v for v in equityVals if v is not None] + if valid and valid[-1] is not None and valid[-1] < 0: + anomalies.append( + Anomaly( + "danger", + "balanceSheetShift", + f"자본잠식 ({valid[-1] / 1e8:,.0f}억)", + valid[-1], + ) + ) + + return anomalies + + +def detectCashBurn(aSeries: dict, isFinancial: bool = False) -> list[Anomaly]: + """현금 소진: 현금 급감, 영업CF적자+재무CF양수 (금융업 제외).""" + anomalies: list[Anomaly] = [] + + cashVals = getAnnualValues(aSeries, "BS", "cash_and_cash_equivalents") + cashChange = _yoyChange(cashVals) + + if cashChange is not None and cashChange < -50: + anomalies.append( + Anomaly( + "warning", + "cashBurn", + f"현금성 자산 급감 ({cashChange:.0f}%)", + cashChange, + ) + ) + + opCfVals = getAnnualValues(aSeries, "CF", "operating_cashflow") + finCfVals = getAnnualValues(aSeries, "CF", "cash_flows_from_financing_activities") + + latestOp = None + latestFin = None + for v in reversed(opCfVals): + if v is not None: + latestOp = v + break + for v in reversed(finCfVals): + if v is not None: + latestFin = v + break + + if not isFinancial and latestOp is not None and latestOp < 0 and latestFin is not None and latestFin > 0: + anomalies.append( + Anomaly( + "warning", + "cashBurn", + f"영업CF 적자({latestOp / 1e8:,.0f}억) + 재무CF 양수({latestFin / 1e8:,.0f}억) — 차입으로 영업적자 보전", + ) + ) + + return anomalies + + +def detectMarginDivergence(aSeries: dict) -> list[Anomaly]: + """마진 급변: 영업이익률 ±5%p, 영업외손익 급변.""" + anomalies: list[Anomaly] = [] + + revVals = getAnnualValues(aSeries, "IS", "sales") + opVals = getAnnualValues(aSeries, "IS", "operating_profit") + niVals = getAnnualValues(aSeries, "IS", "net_profit") + + validRev = [v for v in revVals if v is not None] + validOp = [v for v in opVals if v is not None] + validNi = [v for v in niVals if v is not None] + + if len(validRev) >= 2 and len(validOp) >= 2: + prevMargin = (validOp[-2] / validRev[-2] * 100) if validRev[-2] and validRev[-2] != 0 else None + currMargin = (validOp[-1] / validRev[-1] * 100) if validRev[-1] and validRev[-1] != 0 else None + + if prevMargin is not None and currMargin is not None: + marginShift = currMargin - prevMargin + if abs(marginShift) > 5: + direction = "개선" if marginShift > 0 else "악화" + severity = "info" if marginShift > 0 else "warning" + anomalies.append( + Anomaly( + severity, + "marginDivergence", + f"영업이익률 {direction} ({prevMargin:.1f}% → {currMargin:.1f}%, {marginShift:+.1f}%p)", + marginShift, + ) + ) + + if len(validOp) >= 2 and len(validNi) >= 2: + prevGap = validNi[-2] - validOp[-2] if validOp[-2] is not None and validNi[-2] is not None else None + currGap = validNi[-1] - validOp[-1] if validOp[-1] is not None and validNi[-1] is not None else None + + if prevGap is not None and currGap is not None: + gapChange = currGap - prevGap + if abs(gapChange) > 0 and validOp[-1] and validOp[-1] != 0: + gapRatio = (abs(gapChange) / abs(validOp[-1])) * 100 + if gapRatio > 30: + anomalies.append( + Anomaly( + "warning", + "marginDivergence", + f"영업외손익 급변 (영업이익 대비 {gapRatio:.0f}% 규모 변동)", + gapRatio, + ) + ) + + return anomalies + + +def detectFinancialSectorAnomaly(aSeries: dict, isFinancial: bool) -> list[Anomaly]: + """금융업 전용 이상치: 부채비율 급변, 순이익 급감.""" + if not isFinancial: + return [] + + anomalies: list[Anomaly] = [] + + liabVals = getAnnualValues(aSeries, "BS", "total_liabilities") + equityVals = getAnnualValues(aSeries, "BS", "owners_of_parent_equity") or getAnnualValues( + aSeries, "BS", "total_stockholders_equity" + ) + + validLiab = [v for v in liabVals if v is not None] + validEq = [v for v in equityVals if v is not None] + + if len(validLiab) >= 2 and len(validEq) >= 2: + prevDr = (validLiab[-2] / validEq[-2] * 100) if validEq[-2] and validEq[-2] > 0 else None + currDr = (validLiab[-1] / validEq[-1] * 100) if validEq[-1] and validEq[-1] > 0 else None + + if prevDr is not None and currDr is not None: + drShift = currDr - prevDr + if abs(drShift) > 100: + direction = "급증" if drShift > 0 else "급감" + anomalies.append( + Anomaly( + "warning", + "financialSector", + f"금융업 부채비율 {direction} ({prevDr:.0f}% → {currDr:.0f}%, {drShift:+.0f}%p)", + drShift, + ) + ) + + niVals = getAnnualValues(aSeries, "IS", "net_profit") + niChange = _yoyChange(niVals) + if niChange is not None and niChange < -30: + anomalies.append( + Anomaly( + "warning", + "financialSector", + f"금융업 순이익 급감 ({niChange:.0f}%)", + niChange, + ) + ) + + return anomalies + + +def detectTrendDeterioration(aSeries: dict, isFinancial: bool = False) -> list[Anomaly]: + """시계열 악화 패턴 탐지: 연속적자, ICR<1, 부채비율 상승. + + 실험 084/006 검증 결과 기반. + severity: 4기+ danger, 3기 warning, 2기 info. + """ + anomalies: list[Anomaly] = [] + + # 순이익 연속 적자 + niVals = getAnnualValues(aSeries, "IS", "net_profit") + if not niVals: + niVals = getAnnualValues(aSeries, "IS", "net_income") + streak = 0 + for v in reversed(niVals): + if v is not None and v < 0: + streak += 1 + else: + break + if streak >= 2: + sev = "danger" if streak >= 4 else "warning" if streak >= 3 else "info" + anomalies.append(Anomaly(sev, "trendDeterioration", f"순이익 {streak}기 연속 적자", float(streak))) + + # 영업CF 연속 적자 + cfVals = getAnnualValues(aSeries, "CF", "operating_cashflow") + streak = 0 + for v in reversed(cfVals): + if v is not None and v < 0: + streak += 1 + else: + break + if streak >= 2: + sev = "danger" if streak >= 4 else "warning" if streak >= 3 else "info" + anomalies.append(Anomaly(sev, "trendDeterioration", f"영업CF {streak}기 연속 적자", float(streak))) + + if isFinancial: + return anomalies # ICR, 부채비율 추이는 금융업 구조적 왜곡 + + # ICR < 1 연속 (금융업 제외) + opVals = getAnnualValues(aSeries, "IS", "operating_profit") + if not opVals: + opVals = getAnnualValues(aSeries, "IS", "operating_income") + fcVals = getAnnualValues(aSeries, "IS", "finance_costs") + if not fcVals: + fcVals = getAnnualValues(aSeries, "IS", "interest_expense") + + if opVals and fcVals: + n = min(len(opVals), len(fcVals)) + streak = 0 + for i in range(n - 1, -1, -1): + op_v = opVals[i] + fc_v = fcVals[i] + if op_v is not None and fc_v is not None and fc_v > 0 and op_v / fc_v < 1: + streak += 1 + else: + break + if streak >= 2: + sev = "danger" if streak >= 3 else "warning" + anomalies.append(Anomaly(sev, "trendDeterioration", f"ICR<1 {streak}기 연속", float(streak))) + + # 부채비율 연속 상승 (3기+) + tlVals = getAnnualValues(aSeries, "BS", "total_liabilities") + eqVals = getAnnualValues(aSeries, "BS", "owners_of_parent_equity") + if not eqVals: + eqVals = getAnnualValues(aSeries, "BS", "total_stockholders_equity") + + if tlVals and eqVals: + n = min(len(tlVals), len(eqVals)) + drSeries = [] + for i in range(n): + if tlVals[i] is not None and eqVals[i] is not None and eqVals[i] > 0: + drSeries.append(tlVals[i] / eqVals[i] * 100) + else: + drSeries.append(None) + + streak = 0 + for i in range(len(drSeries) - 1, 0, -1): + if drSeries[i] is not None and drSeries[i - 1] is not None and drSeries[i] > drSeries[i - 1]: + streak += 1 + else: + break + if streak >= 3: + sev = "warning" if streak >= 4 else "info" + anomalies.append(Anomaly(sev, "trendDeterioration", f"부채비율 {streak}기 연속 상승", float(streak))) + + return anomalies + + +def detectCCCDeterioration(aSeries: dict, isFinancial: bool = False) -> list[Anomaly]: + """CCC(현금전환주기) 악화 탐지. + + 실험 084/007 검증 결과 기반. + CCC 3기+ 연속 확대 시 운전자본 경색 경고. + 금융업 제외 (DSO/DIO/CCC 비적용). + """ + if isFinancial: + return [] + + anomalies: list[Anomaly] = [] + revVals = getAnnualValues(aSeries, "IS", "sales") + if not revVals: + revVals = getAnnualValues(aSeries, "IS", "revenue") + recVals = getAnnualValues(aSeries, "BS", "trade_and_other_receivables") + invVals = getAnnualValues(aSeries, "BS", "inventories") + payVals = getAnnualValues(aSeries, "BS", "trade_and_other_payables") + cogsVals = getAnnualValues(aSeries, "IS", "cost_of_sales") + + n = ( + min(len(revVals), len(recVals), len(invVals), len(payVals)) + if revVals and recVals and invVals and payVals + else 0 + ) + if n < 3: + return anomalies + + cccSeries: list[Optional[float]] = [] + for i in range(n): + rv = revVals[i] + rc = recVals[i] + iv = invVals[i] + pa = payVals[i] + co = cogsVals[i] if cogsVals and i < len(cogsVals) else rv + + if rv and rv > 0 and rc is not None and iv is not None and pa is not None and co and co > 0: + dso = rc / rv * 365 + dio = iv / co * 365 + dpo = pa / co * 365 + cccSeries.append(dso + dio - dpo) + else: + cccSeries.append(None) + + # 연속 확대 탐지 + streak = 0 + for i in range(len(cccSeries) - 1, 0, -1): + if cccSeries[i] is not None and cccSeries[i - 1] is not None and cccSeries[i] > cccSeries[i - 1]: + streak += 1 + else: + break + + if streak >= 3: + latest = cccSeries[-1] + sev = "warning" if streak >= 4 else "info" + anomalies.append( + Anomaly( + sev, + "cccDeterioration", + f"CCC {streak}기 연속 확대 (최신 {latest:.0f}일)" if latest else f"CCC {streak}기 연속 확대", + float(streak), + ) + ) + + return anomalies + + +# ── Big4 감사법인 목록 ── + +_BIG4_KEYWORDS = ["삼일", "PwC", "삼정", "KPMG", "한영", "EY", "안진", "Deloitte"] + + +def _isBig4(auditor: str | None) -> bool: + """감사인이 Big4인지 판정.""" + if not auditor: + return False + return any(kw in auditor for kw in _BIG4_KEYWORDS) + + +def detectAuditRedFlags(auditData: AuditDataForAnomaly | None) -> list[Anomaly]: + """감사 Red Flag 탐지 — PCAOB AS 3101, ISA 570/701/705, SOX 302/404. + + 6개 항목: 감사인 교체, 감사보수 급변, 계속기업 불확실성, + 내부통제 취약점, 감사의견 비적정, KAM 급증. + """ + if auditData is None: + return [] + + anomalies: list[Anomaly] = [] + + # 1. 감사인 비정상 교체 (PCAOB AS 3101) + auditors = auditData.auditors + if len(auditors) >= 2: + # 고유 감사인 수 (None 제외) + unique = [a for a in auditors if a is not None] + changes = [] + for i in range(1, len(unique)): + if unique[i] != unique[i - 1]: + changes.append((i, unique[i - 1], unique[i])) + + if len(changes) >= 3: + anomalies.append( + Anomaly( + "danger", + "audit", + f"감사인 {len(changes)}회 교체 (5년 내) — 빈번 교체 Red Flag", + float(len(changes)), + ) + ) + elif len(changes) >= 2: + anomalies.append(Anomaly("danger", "audit", "감사인 2년 이내 재교체 — Red Flag", float(len(changes)))) + elif len(changes) == 1: + _, prev, curr = changes[0] + if _isBig4(prev) and not _isBig4(curr): + anomalies.append(Anomaly("warning", "audit", f"Big4→비Big4 교체 ({prev} → {curr})", 1.0)) + + # 2. 감사보수 급변 (ISA 260, ±30% YoY) + fees = auditData.fees + if len(fees) >= 2: + validFees = [(i, f) for i, f in enumerate(fees) if f is not None and f > 0] + if len(validFees) >= 2: + _, prevFee = validFees[-2] + _, currFee = validFees[-1] + feeChange = (currFee - prevFee) / prevFee * 100 + if abs(feeChange) > 30: + direction = "급증" if feeChange > 0 else "급감" + anomalies.append(Anomaly("warning", "audit", f"감사보수 {direction} ({feeChange:+.0f}%)", feeChange)) + + # 3. 계속기업 불확실성 (ISA 570) + if auditData.hasGoingConcern: + anomalies.append(Anomaly("danger", "audit", "계속기업 불확실성 — 감사인 보고 (ISA 570)", 1.0)) + + # 4. 내부통제 취약점 (SOX 302/404) + if auditData.hasInternalControlWeakness: + anomalies.append(Anomaly("danger", "audit", "내부회계관리제도 취약점 보고 (SOX 302/404)", 1.0)) + + # 5. 감사의견 비적정 (ISA 705) + opinions = auditData.opinions + if opinions: + latest = None + for v in reversed(opinions): + if v is not None: + latest = v + break + if latest is not None and "적정" not in str(latest): + anomalies.append(Anomaly("danger", "audit", f"감사의견 비적정: {latest} (ISA 705)", 1.0)) + + # 6. KAM 급증 (ISA 701) + kamCounts = auditData.kamCounts + if len(kamCounts) >= 2: + validKam = [(i, k) for i, k in enumerate(kamCounts) if k is not None] + if len(validKam) >= 2: + _, prevKam = validKam[-2] + _, currKam = validKam[-1] + if currKam > prevKam + 2: + anomalies.append( + Anomaly( + "info", + "audit", + f"KAM 급증 ({prevKam}건 → {currKam}건) — 감사인 위험 인식 확대", + float(currKam - prevKam), + ) + ) + + return anomalies + + +def detectBenfordAnomaly(aSeries: dict) -> list[Anomaly]: + """Benford's Law 이상치 탐지 — 회계 조작 의심 신호. + + Nigrini (1996), AICPA 공식 감사 절차. + 재무제표 수치의 첫째 유효 자릿수 분포를 Benford 기대 분포와 비교. + χ² > 15.51 (df=8, p<0.05) → warning, χ² > 20.09 (p<0.01) → danger. + """ + anomalies: list[Anomaly] = [] + + # 모든 IS/BS/CF 값에서 첫째 유효 자릿수 추출 + digits: list[int] = [] + for sjDiv in ("IS", "BS", "CF"): + section = aSeries.get(sjDiv, {}) + for _key, vals in section.items(): + if not isinstance(vals, list): + continue + for v in vals: + if v is None or not isinstance(v, (int, float)): + continue + if v == 0 or not math.isfinite(v): + continue + # 첫째 유효 자릿수 추출 + absV = abs(v) + d = int(str(absV).lstrip("0").lstrip(".").lstrip("0")[:1]) if absV != 0 else 0 + if 1 <= d <= 9: + digits.append(d) + + # 최소 50개 이상 숫자 필요 + if len(digits) < 50: + return anomalies + + n = len(digits) + # Benford 기대 분포: P(d) = log10(1 + 1/d) + expected = {d: math.log10(1 + 1 / d) for d in range(1, 10)} + observed = {d: 0 for d in range(1, 10)} + for d in digits: + observed[d] += 1 + + # χ² 검정 + chi2 = 0.0 + for d in range(1, 10): + exp_count = expected[d] * n + obs_count = observed[d] + if exp_count > 0: + chi2 += (obs_count - exp_count) ** 2 / exp_count + + # df=8, p<0.01 → 20.09, p<0.05 → 15.51 + if chi2 > 20.09: + anomalies.append( + Anomaly( + "danger", + "earningsQuality", + f"Benford's Law 위반 (χ²={chi2:.1f}, p<0.01) — 회계 수치 분포 이상", + chi2, + ) + ) + elif chi2 > 15.51: + anomalies.append( + Anomaly( + "warning", + "earningsQuality", + f"Benford's Law 이탈 (χ²={chi2:.1f}, p<0.05) — 회계 수치 분포 주의", + chi2, + ) + ) + + return anomalies + + +def detectRevenueQuality(aSeries: dict) -> list[Anomaly]: + """매출 품질 탐지 — Dechow & Dichev (2002). + + OCF/Revenue 비율 추세: 매출이 늘어도 현금이 안 들어오면 의심. + - OCF/Revenue < 0 (매출 흑자인데 영업CF 적자) → danger + - OCF/Revenue 3기 연속 하락 → warning + - 매출채권 증가율 > 매출 증가율 × 1.5 (3기 연속) → warning + """ + anomalies: list[Anomaly] = [] + + revVals = getAnnualValues(aSeries, "IS", "sales") + cfVals = getAnnualValues(aSeries, "CF", "operating_cashflow") + arVals = getAnnualValues(aSeries, "BS", "trade_and_other_receivables") + + # OCF/Revenue 비율 시계열 + n = min(len(revVals), len(cfVals)) if revVals and cfVals else 0 + ocfRevRatios: list[float | None] = [] + for i in range(n): + rv = revVals[i] + cf = cfVals[i] + if rv is not None and cf is not None and rv > 0: + ocfRevRatios.append(cf / rv) + else: + ocfRevRatios.append(None) + + # OCF/Revenue < 0 (최신기, 매출 흑자인데 영업CF 적자) + if ocfRevRatios: + latest = None + for v in reversed(ocfRevRatios): + if v is not None: + latest = v + break + if latest is not None and latest < 0: + anomalies.append( + Anomaly( + "danger", + "earningsQuality", + f"매출 대비 영업CF 적자 (OCF/Revenue={latest:.1%}) — 매출 품질 의심", + latest * 100, + ) + ) + + # OCF/Revenue 3기 연속 하락 + validRatios = [r for r in ocfRevRatios if r is not None] + if len(validRatios) >= 3: + consecutive_decline = 0 + for i in range(len(validRatios) - 1, 0, -1): + if validRatios[i] < validRatios[i - 1]: + consecutive_decline += 1 + else: + break + if consecutive_decline >= 3: + anomalies.append( + Anomaly( + "warning", + "earningsQuality", + f"OCF/Revenue {consecutive_decline}기 연속 하락 — 매출 품질 악화 추세", + float(consecutive_decline), + ) + ) + + # 매출채권 증가율 > 매출 증가율 × 1.5 (3기 연속) + if arVals and revVals and len(arVals) >= 3 and len(revVals) >= 3: + n2 = min(len(arVals), len(revVals)) + arGrowths: list[float | None] = [] + revGrowths: list[float | None] = [] + for i in range(1, n2): + ar_prev, ar_curr = arVals[i - 1], arVals[i] + rv_prev, rv_curr = revVals[i - 1], revVals[i] + if ar_prev and ar_prev > 0 and ar_curr is not None: + arGrowths.append((ar_curr - ar_prev) / ar_prev * 100) + else: + arGrowths.append(None) + if rv_prev and rv_prev > 0 and rv_curr is not None: + revGrowths.append((rv_curr - rv_prev) / rv_prev * 100) + else: + revGrowths.append(None) + + # 최근 3기 매출채권 증가 > 매출 × 1.5 + consecutive_ar = 0 + for i in range(len(arGrowths) - 1, -1, -1): + ag = arGrowths[i] + rg = revGrowths[i] + if ag is not None and rg is not None and rg >= 0 and ag > rg * 1.5 and ag > 10: + consecutive_ar += 1 + else: + break + if consecutive_ar >= 3: + anomalies.append( + Anomaly( + "warning", + "earningsQuality", + f"매출채권 증가율 > 매출 증가율×1.5 {consecutive_ar}기 연속 — 수금 품질 의심", + float(consecutive_ar), + ) + ) + + return anomalies + + +def runAnomalyDetection( + aSeries: dict, + isFinancial: bool = False, + *, + auditData: AuditDataForAnomaly | None = None, +) -> list[Anomaly]: + """전체 이상치 탐지 실행. + + Args: + aSeries: 연간 재무 시계열. + isFinancial: 금융업 여부. + auditData: 감사 데이터 (None이면 감사 탐지기 스킵, 하위호환). + """ + anomalies: list[Anomaly] = [] + anomalies.extend(detectEarningsQuality(aSeries, isFinancial)) + anomalies.extend(detectWorkingCapitalAnomaly(aSeries)) + anomalies.extend(detectBalanceSheetShift(aSeries)) + anomalies.extend(detectCashBurn(aSeries, isFinancial)) + anomalies.extend(detectMarginDivergence(aSeries)) + anomalies.extend(detectFinancialSectorAnomaly(aSeries, isFinancial)) + anomalies.extend(detectTrendDeterioration(aSeries, isFinancial)) + anomalies.extend(detectCCCDeterioration(aSeries, isFinancial)) + # 세계적 감사 기법 — Phase 086 + anomalies.extend(detectAuditRedFlags(auditData)) + anomalies.extend(detectBenfordAnomaly(aSeries)) + anomalies.extend(detectRevenueQuality(aSeries)) + + anomalies.sort(key=lambda a: {"danger": 0, "warning": 1, "info": 2}.get(a.severity, 3)) + return anomalies diff --git a/src/dartlab/analysis/financial/insight/benchmark.py b/src/dartlab/analysis/financial/insight/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..1d691754bbf069d6343313e1bc5743bb4dd9710a --- /dev/null +++ b/src/dartlab/analysis/financial/insight/benchmark.py @@ -0,0 +1,389 @@ +"""섹터별 재무비율 벤치마크. + +016_sectorBenchmark 실험(2026-03-09) 결과 기반. +2508종목 전수조사로 측정한 섹터별 중앙값/사분위수. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional + +from dartlab.core.sector.types import Sector + + +@dataclass +class SectorBenchmark: + """섹터별 재무비율 중앙값/사분위수.""" + + omMedian: float + omQ1: float + omQ3: float + roeMedian: float + roeQ1: float + roeQ3: float + n: int + # Merton D2D 벤치마크 (실험 085_mertonEngine/004 결과 기반) + d2dMedian: Optional[float] = None + d2dQ1: Optional[float] = None + d2dQ3: Optional[float] = None + # 총자산회전율 벤치마크 (섹터별 구조적 차이 반영) + tatMedian: Optional[float] = None + tatQ1: Optional[float] = None + tatQ3: Optional[float] = None + # ROIC 벤치마크 + roicMedian: Optional[float] = None + roicQ1: Optional[float] = None + roicQ3: Optional[float] = None + + +BENCHMARKS: dict[Sector, SectorBenchmark] = { + Sector.IT: SectorBenchmark( + omMedian=2.7, + omQ1=-4.9, + omQ3=7.3, + roeMedian=12.7, + roeQ1=-17.5, + roeQ3=34.0, + n=466, + d2dMedian=4.0, + d2dQ1=2.4, + d2dQ3=6.7, + tatMedian=0.7, + tatQ1=0.35, + tatQ3=1.1, + roicMedian=5.0, + roicQ1=-2.0, + roicQ3=12.0, + ), + Sector.HEALTHCARE: SectorBenchmark( + omMedian=2.2, + omQ1=-19.0, + omQ3=10.3, + roeMedian=0.8, + roeQ1=-66.0, + roeQ3=27.8, + n=259, + d2dMedian=2.9, + d2dQ1=1.7, + d2dQ3=6.1, + tatMedian=0.5, + tatQ1=0.2, + tatQ3=0.9, + roicMedian=3.0, + roicQ1=-8.0, + roicQ3=10.0, + ), + Sector.CONSUMER_DISC: SectorBenchmark( + omMedian=3.2, + omQ1=0.1, + omQ3=6.7, + roeMedian=17.6, + roeQ1=-9.1, + roeQ3=30.5, + n=245, + d2dMedian=4.3, + d2dQ1=2.9, + d2dQ3=7.0, + tatMedian=0.9, + tatQ1=0.5, + tatQ3=1.3, + roicMedian=8.0, + roicQ1=0.0, + roicQ3=15.0, + ), + Sector.FINANCIALS: SectorBenchmark( + omMedian=6.9, + omQ1=3.2, + omQ3=15.6, + roeMedian=25.2, + roeQ1=6.2, + roeQ3=43.8, + n=63, + # 금융업: Merton D2D 구조적 왜곡으로 벤치마크 미설정 + tatMedian=0.05, + tatQ1=0.02, + tatQ3=0.1, + roicMedian=2.0, + roicQ1=0.5, + roicQ3=5.0, + ), + Sector.INDUSTRIALS: SectorBenchmark( + omMedian=3.5, + omQ1=-1.9, + omQ3=7.8, + roeMedian=18.1, + roeQ1=-7.8, + roeQ3=33.0, + n=405, + d2dMedian=4.1, + d2dQ1=2.8, + d2dQ3=6.6, + tatMedian=0.8, + tatQ1=0.45, + tatQ3=1.2, + roicMedian=7.0, + roicQ1=-1.0, + roicQ3=14.0, + ), + Sector.MATERIALS: SectorBenchmark( + omMedian=3.4, + omQ1=-0.6, + omQ3=7.3, + roeMedian=15.3, + roeQ1=-11.9, + roeQ3=29.4, + n=416, + d2dMedian=3.4, + d2dQ1=2.1, + d2dQ3=5.3, + tatMedian=0.7, + tatQ1=0.4, + tatQ3=1.1, + roicMedian=5.0, + roicQ1=-3.0, + roicQ3=12.0, + ), + Sector.ENERGY: SectorBenchmark( + omMedian=2.1, + omQ1=-3.6, + omQ3=5.8, + roeMedian=16.0, + roeQ1=-21.2, + roeQ3=30.7, + n=33, + d2dMedian=2.9, + d2dQ1=1.8, + d2dQ3=4.3, + tatMedian=0.6, + tatQ1=0.3, + tatQ3=1.0, + roicMedian=5.0, + roicQ1=-3.0, + roicQ3=12.0, + ), + Sector.UTILITIES: SectorBenchmark( + omMedian=2.9, + omQ1=1.1, + omQ3=4.6, + roeMedian=21.9, + roeQ1=11.9, + roeQ3=25.6, + n=12, + d2dMedian=6.0, + d2dQ1=4.0, + d2dQ3=8.0, + tatMedian=0.3, + tatQ1=0.15, + tatQ3=0.5, + roicMedian=3.0, + roicQ1=1.0, + roicQ3=6.0, + ), + Sector.COMMUNICATION: SectorBenchmark( + omMedian=1.0, + omQ1=-6.1, + omQ3=7.5, + roeMedian=-0.3, + roeQ1=-55.0, + roeQ3=24.2, + n=141, + d2dMedian=3.4, + d2dQ1=2.1, + d2dQ3=6.6, + tatMedian=0.5, + tatQ1=0.2, + tatQ3=0.9, + roicMedian=3.0, + roicQ1=-5.0, + roicQ3=10.0, + ), + Sector.CONSUMER_STAPLES: SectorBenchmark( + omMedian=3.7, + omQ1=1.2, + omQ3=7.3, + roeMedian=18.3, + roeQ1=0.8, + roeQ3=31.8, + n=123, + d2dMedian=5.1, + d2dQ1=3.6, + d2dQ3=8.3, + tatMedian=0.9, + tatQ1=0.5, + tatQ3=1.3, + roicMedian=7.0, + roicQ1=0.0, + roicQ3=14.0, + ), + Sector.REAL_ESTATE: SectorBenchmark( + omMedian=2.6, + omQ1=-5.5, + omQ3=6.1, + roeMedian=11.2, + roeQ1=-11.0, + roeQ3=30.9, + n=4, + d2dMedian=4.0, + d2dQ1=2.8, + d2dQ3=6.0, + tatMedian=0.2, + tatQ1=0.08, + tatQ3=0.4, + roicMedian=3.0, + roicQ1=-2.0, + roicQ3=8.0, + ), +} + +DEFAULT_BENCHMARK = SectorBenchmark( + omMedian=3.2, + omQ1=-2.3, + omQ3=7.7, + roeMedian=14.2, + roeQ1=-16.9, + roeQ3=31.1, + n=2167, + tatMedian=0.7, + tatQ1=0.3, + tatQ3=1.2, + roicMedian=5.0, + roicQ1=-2.0, + roicQ3=12.0, +) + +# ── US (S&P 500) 섹터 벤치마크 ── 공개 데이터 기반 추정, 추후 실험으로 정밀 보정 +US_BENCHMARKS: dict[Sector, SectorBenchmark] = { + Sector.IT: SectorBenchmark( + omMedian=22.0, + omQ1=12.0, + omQ3=32.0, + roeMedian=28.0, + roeQ1=15.0, + roeQ3=45.0, + n=75, + ), + Sector.HEALTHCARE: SectorBenchmark( + omMedian=8.0, + omQ1=-5.0, + omQ3=25.0, + roeMedian=18.0, + roeQ1=5.0, + roeQ3=35.0, + n=60, + ), + Sector.FINANCIALS: SectorBenchmark( + omMedian=30.0, + omQ1=20.0, + omQ3=42.0, + roeMedian=12.0, + roeQ1=8.0, + roeQ3=18.0, + n=70, + ), + Sector.CONSUMER_DISC: SectorBenchmark( + omMedian=10.0, + omQ1=4.0, + omQ3=18.0, + roeMedian=25.0, + roeQ1=10.0, + roeQ3=40.0, + n=55, + ), + Sector.CONSUMER_STAPLES: SectorBenchmark( + omMedian=12.0, + omQ1=7.0, + omQ3=20.0, + roeMedian=22.0, + roeQ1=12.0, + roeQ3=35.0, + n=35, + ), + Sector.INDUSTRIALS: SectorBenchmark( + omMedian=12.0, + omQ1=6.0, + omQ3=18.0, + roeMedian=22.0, + roeQ1=10.0, + roeQ3=35.0, + n=70, + ), + Sector.COMMUNICATION: SectorBenchmark( + omMedian=18.0, + omQ1=5.0, + omQ3=30.0, + roeMedian=15.0, + roeQ1=5.0, + roeQ3=30.0, + n=25, + ), + Sector.ENERGY: SectorBenchmark( + omMedian=15.0, + omQ1=5.0, + omQ3=25.0, + roeMedian=18.0, + roeQ1=8.0, + roeQ3=30.0, + n=23, + ), + Sector.MATERIALS: SectorBenchmark( + omMedian=12.0, + omQ1=5.0, + omQ3=18.0, + roeMedian=15.0, + roeQ1=8.0, + roeQ3=25.0, + n=28, + ), + Sector.UTILITIES: SectorBenchmark( + omMedian=20.0, + omQ1=14.0, + omQ3=28.0, + roeMedian=10.0, + roeQ1=7.0, + roeQ3=14.0, + n=28, + ), + Sector.REAL_ESTATE: SectorBenchmark( + omMedian=30.0, + omQ1=20.0, + omQ3=45.0, + roeMedian=8.0, + roeQ1=4.0, + roeQ3=14.0, + n=30, + ), +} + +US_DEFAULT_BENCHMARK = SectorBenchmark( + omMedian=14.0, + omQ1=5.0, + omQ3=25.0, + roeMedian=18.0, + roeQ1=8.0, + roeQ3=32.0, + n=500, +) + + +def getBenchmark(sector: Sector, market: str = "KR") -> SectorBenchmark: + """섹터별 벤치마크 반환.""" + if market == "US": + return US_BENCHMARKS.get(sector, US_DEFAULT_BENCHMARK) + return BENCHMARKS.get(sector, DEFAULT_BENCHMARK) + + +def sectorAdjustment(value: Optional[float], median: float, q1: float, q3: float) -> int: + """섹터 중앙값 대비 가점/감점 (±1). + + Q3 이상 → +1 (업종 상위) + Q1 이하 → -1 (업종 하위) + Q1~Q3 → 0 (업종 평균) + """ + if value is None: + return 0 + if value >= q3: + return 1 + if value <= q1: + return -1 + return 0 diff --git a/src/dartlab/analysis/financial/insight/detector.py b/src/dartlab/analysis/financial/insight/detector.py new file mode 100644 index 0000000000000000000000000000000000000000..f86130cf430ffe99dea80e0e2b8d3dc7196934b1 --- /dev/null +++ b/src/dartlab/analysis/financial/insight/detector.py @@ -0,0 +1,64 @@ +"""금융업 감지 + 불완전 연도 감지.""" + +from __future__ import annotations + +from dartlab.core.finance.extract import getAnnualValues, getLatest +from dartlab.core.finance.ratios import RatioResult + + +def _parseYear(period: str) -> str: + """period 문자열에서 연도 추출. 'YYYY_QN' / 'YYYY-QN' 모두 지원.""" + return period[:4] + + +def detectIncompleteYear(qPeriods: list[str]) -> tuple[str, int]: + """최신 연도의 분기 수를 반환. + + Returns: + (lastYear, quarterCount). quarterCount < 4면 불완전 연도. + """ + lastPeriod = qPeriods[-1] + lastYear = _parseYear(lastPeriod) + qCount = sum(1 for p in qPeriods if p.startswith(lastYear)) + return lastYear, qCount + + +def detectFinancialSector( + aSeries: dict, + ratios: RatioResult, +) -> tuple[bool, list[str]]: + """금융업 자동 감지 (신호 2개 이상이면 금융업). + + 신호 후보 6개: + 1. sales 없고 operating_profit 있음 + 2. 부채비율 500% 초과 + 3. 유동자산/유동부채 데이터 없음 + 4. 이자수익 계정 존재 + 5. 순이자수익 계정 존재 + 6. 보험수익 계정 존재 + """ + signals: list[str] = [] + + revVals = getAnnualValues(aSeries, "IS", "sales") + opVals = getAnnualValues(aSeries, "IS", "operating_profit") + hasRevenue = any(v is not None for v in revVals) + hasOpIncome = any(v is not None for v in opVals) + if not hasRevenue and hasOpIncome: + signals.append("sales 없고 operating_profit 있음") + + if ratios.debtRatio is not None and ratios.debtRatio > 500: + signals.append(f"부채비율 {ratios.debtRatio:.0f}%") + + if ratios.currentRatio is None and getLatest(aSeries, "BS", "current_assets") is None: + signals.append("유동자산/유동부채 데이터 없음") + + if getLatest(aSeries, "IS", "interest_income") is not None: + signals.append("이자수익 계정 존재") + + if getLatest(aSeries, "IS", "net_interest_income") is not None: + signals.append("순이자수익 계정 존재") + + if getLatest(aSeries, "IS", "insurance_revenue") is not None: + signals.append("보험수익 계정 존재") + + return len(signals) >= 2, signals diff --git a/src/dartlab/analysis/financial/insight/distress.py b/src/dartlab/analysis/financial/insight/distress.py new file mode 100644 index 0000000000000000000000000000000000000000..4f711adfa6b63d823a62b98ab99ce2d27f1daf44 --- /dev/null +++ b/src/dartlab/analysis/financial/insight/distress.py @@ -0,0 +1,583 @@ +"""부실 예측 종합 스코어카드. + +5축 가중 평균으로 기업 부실 위험을 종합 판정한다. +실험 084_distressModels Phase 1-4 + 085_mertonEngine 검증 결과 기반. + +축 구성 (100점 만점, 0=안전 100=위험): +- 정량 분석 (30%): O-Score, Z''-Score, Z-Score [Merton 없으면 40%] +- 시장 기반 (20%): Merton D2D + PD [Merton 없으면 0%] +- 이익 품질 (15%): Beneish M-Score, Sloan Accrual, Piotroski F-Score [없으면 20%] +- 추세 분석 (25%): anomaly에서 탐지된 시계열 패턴 [없으면 30%] +- 감사 위험 (10%): 감사의견 비적정 등 + +Merton 미제공 시 기존 4축(40/20/30/10) 그대로 동작 (하위호환 100%). +금융업(isFinancial=True) → Merton 무시 (은행 부채 구조적 왜곡). + +레벨: safe(<15), watch(<30), warning(<50), danger(<70), critical(>=70) +신용등급: AAA~D (S&P PD 매핑) +""" + +from __future__ import annotations + +from typing import Optional + +from dartlab.analysis.financial.insight.types import ( + Anomaly, + DistressAxis, + DistressResult, + ModelScore, +) +from dartlab.core.finance.merton import MertonResult +from dartlab.core.finance.ratios import RatioResult + +# ── 신용등급 매핑 테이블 (S&P PD↔Rating 대응) ── + +_CREDIT_GRADE_TABLE: list[tuple[float, str, str]] = [ + (5, "AAA", "투자적격 최상위"), + (10, "AA", "투자적격 상위"), + (15, "A", "투자적격"), + (25, "BBB", "투자적격 하한"), + (35, "BB", "투기등급"), + (50, "B", "투기등급 하위"), + (65, "CCC", "상당한 부실 위험"), + (80, "CC", "부실 임박"), + (90, "C", "부도 직전"), + (100, "D", "부도 수준"), +] + + +def _mapCreditGrade(overall: float) -> tuple[str, str]: + """종합 점수 → (등급, 설명). 기존 10단계.""" + for threshold, grade, desc in _CREDIT_GRADE_TABLE: + if overall < threshold: + return grade, desc + return "D", "부도 수준" + + +def _mapCreditGrade20(overall: float) -> tuple[str, str, float]: + """종합 점수 → (등급, 설명, PD%). 20단계 세분화. + + creditScorecard.mapTo20Grade()의 래퍼. + """ + from dartlab.core.finance.creditScorecard import mapTo20Grade + + return mapTo20Grade(overall) + + +# ── 개별 모델 해석 함수 ── + + +def _interpretOhlson(probability: float) -> ModelScore: + if probability < 1: + zone, interp = "safe", "부도 확률 극히 낮음. 재무구조 건전." + elif probability < 10: + zone, interp = "gray", "부도 확률 낮으나 모니터링 필요." + elif probability < 30: + zone, interp = "distress", "부도 확률 유의미. 재무구조 점검 필요." + else: + zone, interp = "distress", "부도 확률 매우 높음. 즉각적 재무 점검 권고." + return ModelScore( + name="Ohlson O-Score", + rawValue=round(probability, 2), + displayValue=f"P(부도) {probability:.1f}%", + zone=zone, + interpretation=interp, + reference="Ohlson (1980), 9변수 로지스틱, 학술 적중률 96.1%", + ) + + +def _interpretAltmanZpp(score: float) -> ModelScore: + if score > 5.0: + zone, interp = "safe", "비제조업/신흥시장 기준 안전 영역." + elif score > 2.6: + zone, interp = "gray", "회색 영역. 추가 모니터링 권고." + elif score > 1.1: + zone, interp = "distress", "부실 위험 영역. 재무 점검 필요." + else: + zone, interp = "distress", "부실 영역. 즉각적 대응 필요." + return ModelScore( + name="Altman Z''-Score", + rawValue=round(score, 2), + displayValue=f"Z'' = {score:.2f}", + zone=zone, + interpretation=interp, + reference="Altman (1995), 비제조업/신흥시장 변형 4변수", + ) + + +def _interpretAltmanZ(score: float) -> ModelScore: + if score > 3.0: + zone, interp = "safe", "제조업 기준 안전 영역." + elif score > 1.8: + zone, interp = "gray", "회색 영역. 추가 모니터링 권고." + else: + zone, interp = "distress", "부실 영역. 부도 위험 높음." + return ModelScore( + name="Altman Z-Score", + rawValue=round(score, 2), + displayValue=f"Z = {score:.2f}", + zone=zone, + interpretation=interp, + reference="Altman (1968), 제조업 5변수, 학술 적중률 95%", + ) + + +def _interpretBeneish(score: float) -> ModelScore: + if score > -1.78: + zone, interp = "distress", "이익 조작 가능성 높음. 회계 품질 의심." + elif score > -2.22: + zone, interp = "gray", "이익 조작 가능성 존재. 추가 검토 필요." + else: + zone, interp = "safe", "이익 조작 가능성 낮음. 회계 품질 양호." + return ModelScore( + name="Beneish M-Score", + rawValue=round(score, 2), + displayValue=f"M = {score:.2f}", + zone=zone, + interpretation=interp, + reference="Beneish (1999), 8변수, cutoff -2.22", + ) + + +def _interpretSloan(ratio: float) -> ModelScore: + abs_r = abs(ratio) + if abs_r > 20: + zone, interp = "distress", "발생주의 이익 비중 과다. 이익 품질 의심." + elif abs_r > 10: + zone, interp = "gray", "발생주의 이익 비중 다소 높음. 모니터링 필요." + else: + zone, interp = "safe", "발생주의 이익 비중 정상. 현금 기반 이익 건전." + return ModelScore( + name="Sloan Accrual", + rawValue=round(ratio, 2), + displayValue=f"{ratio:.1f}%", + zone=zone, + interpretation=interp, + reference="Sloan (1996), |Accrual/TA| > 10% 주의", + ) + + +def _interpretPiotroski(score: int) -> ModelScore: + if score >= 7: + zone, interp = "safe", "펀더멘탈 강건. 수익성·레버리지·효율성 양호." + elif score >= 5: + zone, interp = "gray", "펀더멘탈 보통. 일부 지표 개선 필요." + elif score >= 3: + zone, interp = "gray", "펀더멘탈 취약. 다수 지표 악화." + else: + zone, interp = "distress", "펀더멘탈 심각하게 취약. 전반적 악화." + return ModelScore( + name="Piotroski F-Score", + rawValue=float(score), + displayValue=f"F = {score}/9", + zone=zone, + interpretation=interp, + reference="Piotroski (2000), 9항목 바이너리, F>=7 강건", + ) + + +# ── 정량 축 점수 정규화 (0~100, 높을수록 위험) ── + + +def _normalizeOhlson(p: float) -> float: + return min(p, 100) + + +def _normalizeZpp(z: float) -> float: + if z < 1.1: + return 100 + if z > 5.0: + return 0 + return (1 - (z - 1.1) / 3.9) * 100 + + +def _normalizeZ(z: float) -> float: + if z < 1.8: + return 100 + if z > 3.0: + return 0 + return (1 - (z - 1.8) / 1.2) * 100 + + +def _normalizeBeneish(m: float) -> float: + if m > -1.78: + return 80 + if m > -2.22: + return 50 + return max(0, 25 + (m + 2.22) * 10) + + +def _normalizeSloan(ratio: float) -> float: + abs_r = abs(ratio) + if abs_r > 20: + return 80 + if abs_r > 10: + return 50 + return abs_r * 5 + + +def _normalizeFScore(f: int) -> float: + if f <= 2: + return 80 + if f <= 4: + return 50 + if f <= 6: + return 25 + return 0 + + +# ── Merton D2D 해석 ── + + +def _interpretMerton(result: MertonResult) -> ModelScore: + """Merton D2D → ModelScore.""" + d2d = result.d2d + if d2d > 4.0: + zone, interp = "safe", "부도 거리 매우 충분. 시장이 평가하는 신용 건전성 우수." + elif d2d > 2.0: + zone, interp = "gray", "부도 거리 보통. 시장 변동성 확대 시 주의." + elif d2d > 1.0: + zone, interp = "distress", "부도 거리 부족. 자산가치가 부채에 근접." + else: + zone, interp = "distress", "부도 거리 극히 부족. 부도 임박 가능성." + return ModelScore( + name="Merton D2D", + rawValue=round(d2d, 3), + displayValue=f"D2D = {d2d:.2f}, PD = {result.pd:.2f}%", + zone=zone, + interpretation=interp, + reference="Merton (1974), 구조 모형. Moody's KMV 글로벌 표준.", + ) + + +def _interpretAuditRedFlags(flagCount: int, hasCritical: bool) -> ModelScore: + """감사 Red Flag 수 → ModelScore.""" + if flagCount == 0: + zone, interp = "safe", "감사 관련 Red Flag 없음." + elif hasCritical: + zone, interp = "distress", f"심각한 감사 Red Flag {flagCount}건. 부실 징후 가능." + elif flagCount <= 2: + zone, interp = "gray", f"감사 주의 신호 {flagCount}건. 모니터링 필요." + else: + zone, interp = "distress", f"감사 Red Flag {flagCount}건 누적. 회계 품질 점검 필요." + return ModelScore( + name="Audit Red Flags", + rawValue=float(flagCount), + displayValue=f"{flagCount}건" + (" (심각 포함)" if hasCritical else ""), + zone=zone, + interpretation=interp, + reference="PCAOB AS 3101, ISA 570/701/705, SOX 302/404", + ) + + +def _normalizeMerton(d2d: float) -> float: + """D2D → 0~100 (높을수록 위험). D2D>4→0, D2D<0.5→100.""" + if d2d > 4.0: + return 0.0 + if d2d < 0.5: + return 100.0 + return (1 - (d2d - 0.5) / 3.5) * 100 + + +# ── 유동성 경보 ── + + +def _calcCashRunway(ratios: RatioResult) -> tuple[Optional[float], Optional[str]]: + """현금 소진 예상 개월 수 계산.""" + cash = ratios.cash or 0 + ocf = ratios.operatingCashflowTTM + + if ocf is not None and ocf > 0: + return 999, "충분 (영업CF 양수, 현금 축적 중)" + + cogs = ratios.costOfSales or 0 + sga = ratios.sga or 0 + monthly_opex = (cogs + sga) / 12 if (cogs + sga) > 0 else None + + if monthly_opex is None or monthly_opex <= 0: + return None, None + + monthly_burn = abs(ocf / 12) if ocf else monthly_opex + if monthly_burn <= 0: + return None, None + + months = cash / monthly_burn + + if months > 24: + alert = "충분 (2년+)" + elif months > 12: + alert = "양호 (1~2년)" + elif months > 6: + alert = "주의 (6~12개월)" + elif months > 3: + alert = "경고 (3~6개월)" + else: + alert = "위험 (3개월 미만)" + + return round(months, 1), alert + + +# ── 위험 요인 추출 ── + + +def _extractRiskFactors( + anomalies: list[Anomaly], + ratios: RatioResult, +) -> list[str]: + """anomaly + ratios에서 구조화된 위험 요인 목록 추출.""" + factors: list[str] = [] + + for a in anomalies: + if a.severity in ("danger", "warning"): + factors.append(a.text) + + if ratios.ohlsonProbability is not None and ratios.ohlsonProbability > 10: + factors.append(f"O-Score 부도 확률 {ratios.ohlsonProbability:.1f}%") + + if ratios.altmanZppScore is not None and ratios.altmanZppScore < 1.1: + factors.append(f"Z''-Score {ratios.altmanZppScore:.2f} (부실 영역)") + + if ratios.beneishMScore is not None and ratios.beneishMScore > -2.22: + factors.append(f"Beneish M-Score {ratios.beneishMScore:.2f} (이익 조작 의심)") + + if ratios.piotroskiFScore is not None and ratios.piotroskiFScore <= 2: + factors.append(f"Piotroski F-Score {ratios.piotroskiFScore}/9 (펀더멘탈 심각)") + + return factors + + +# ── 데이터 품질 판정 ── + + +def _assessDataQuality(modelCount: int) -> str: + if modelCount >= 5: + return "충분" + if modelCount >= 3: + return "보통" + return "부족" + + +# ── 메인 함수 ── + + +def calcDistress( + ratios: RatioResult, + anomalies: list[Anomaly], + isFinancial: bool = False, + *, + mertonResult: MertonResult | None = None, +) -> DistressResult: + """부실 예측 종합 스코어카드 계산. + + 각 모델의 원시 값 → zone 판정 → 해석 텍스트 → 학술 참조를 포함한 + 세계 수준의 근거 기반 레포트를 생성한다. + + mertonResult가 제공되면 5축(30/20/15/25/10), 미제공 시 4축(40/20/30/10). + isFinancial=True이면 Merton을 무시한다 (은행 부채 구조적 왜곡). + """ + # Merton 사용 여부: 비금융 + 수렴된 결과만 + useMerton = mertonResult is not None and not isFinancial and mertonResult.converged + + # ── 1. 정량 축 ── + quant_models: list[ModelScore] = [] + quant_norms: list[float] = [] + + if ratios.ohlsonProbability is not None: + quant_models.append(_interpretOhlson(ratios.ohlsonProbability)) + quant_norms.append(_normalizeOhlson(ratios.ohlsonProbability)) + + if ratios.altmanZppScore is not None: + quant_models.append(_interpretAltmanZpp(ratios.altmanZppScore)) + quant_norms.append(_normalizeZpp(ratios.altmanZppScore)) + + if ratios.altmanZScore is not None: + quant_models.append(_interpretAltmanZ(ratios.altmanZScore)) + quant_norms.append(_normalizeZ(ratios.altmanZScore)) + + quant_score = sum(quant_norms) / len(quant_norms) if quant_norms else 0 + quant_zones = [m.zone for m in quant_models] + if not quant_models: + quant_summary = "정량 모델 데이터 부족." + elif all(z == "safe" for z in quant_zones): + quant_summary = f"{len(quant_models)}개 모델 모두 안전 영역." + elif any(z == "distress" for z in quant_zones): + n_distress = sum(1 for z in quant_zones if z == "distress") + quant_summary = f"{n_distress}/{len(quant_models)}개 모델 부실 영역. 즉각 점검 필요." + else: + quant_summary = f"{len(quant_models)}개 모델 회색 영역 포함. 모니터링 권고." + + quant_axis = DistressAxis( + name="정량 분석", + score=round(quant_score, 1), + weight=0.30 if useMerton else 0.40, + models=quant_models, + summary=quant_summary, + ) + + # ── 2. 이익 품질 축 ── + eq_models: list[ModelScore] = [] + eq_norms: list[float] = [] + + if ratios.beneishMScore is not None: + eq_models.append(_interpretBeneish(ratios.beneishMScore)) + eq_norms.append(_normalizeBeneish(ratios.beneishMScore)) + + if ratios.sloanAccrualRatio is not None: + eq_models.append(_interpretSloan(ratios.sloanAccrualRatio)) + eq_norms.append(_normalizeSloan(ratios.sloanAccrualRatio)) + + if ratios.piotroskiFScore is not None: + eq_models.append(_interpretPiotroski(ratios.piotroskiFScore)) + eq_norms.append(_normalizeFScore(ratios.piotroskiFScore)) + + eq_score = sum(eq_norms) / len(eq_norms) if eq_norms else 0 + if not eq_models: + eq_summary = "이익 품질 모델 데이터 부족." + elif all(m.zone == "safe" for m in eq_models): + eq_summary = f"{len(eq_models)}개 지표 모두 양호. 이익 품질 건전." + elif any(m.zone == "distress" for m in eq_models): + eq_summary = "이익 품질 의심 지표 존재. 회계 검토 권고." + else: + eq_summary = "이익 품질 보통. 일부 지표 모니터링 필요." + + eq_axis = DistressAxis( + name="이익 품질", + score=round(eq_score, 1), + weight=0.15 if useMerton else 0.20, + models=eq_models, + summary=eq_summary, + ) + + # ── 3. 추세 축 ── + trend_score = 0.0 + trend_anomalies = [a for a in anomalies if a.category in ("trendDeterioration", "cccDeterioration")] + for a in trend_anomalies: + if a.severity == "danger": + trend_score += 40 + elif a.severity == "warning": + trend_score += 25 + else: + trend_score += 10 + trend_score = min(trend_score, 100) + + if not trend_anomalies: + trend_summary = "시계열 악화 패턴 없음." + else: + n_danger = sum(1 for a in trend_anomalies if a.severity == "danger") + trend_summary = f"악화 패턴 {len(trend_anomalies)}건 탐지" + if n_danger: + trend_summary += f" (심각 {n_danger}건). 즉각 점검 필요." + else: + trend_summary += ". 모니터링 권고." + + trend_axis = DistressAxis( + name="추세 분석", + score=round(trend_score, 1), + weight=0.25 if useMerton else 0.30, + summary=trend_summary, + ) + + # ── 4. 감사 축 ── + audit_score = 0.0 + audit_anomalies = [a for a in anomalies if a.category in ("audit", "governance")] + audit_models: list[ModelScore] = [] + + # 감사 Red Flag 수 기반 점수 + n_critical = sum(1 for a in audit_anomalies if a.severity == "danger") + sum(1 for a in audit_anomalies if a.severity == "warning") + n_total = len(audit_anomalies) + + if n_total > 0: + audit_models.append(_interpretAuditRedFlags(n_total, n_critical > 0)) + + for a in audit_anomalies: + if a.severity == "danger": + audit_score += 50 + elif a.severity == "warning": + audit_score += 25 + audit_score = min(audit_score, 100) + + if not audit_anomalies: + audit_summary = "감사 이상징후 없음." + elif n_critical > 0: + audit_summary = f"감사 Red Flag {n_total}건 (심각 {n_critical}건). 즉각 점검 필요." + else: + audit_summary = f"감사 이상 {n_total}건 탐지. 모니터링 권고." + + audit_axis = DistressAxis( + name="감사 위험", + score=round(audit_score, 1), + weight=0.10, + models=audit_models, + summary=audit_summary, + ) + + # ── 5. 시장 기반 축 (Merton) ── + axes = [quant_axis] + + if useMerton: + assert mertonResult is not None # type narrowing + merton_model = _interpretMerton(mertonResult) + merton_norm = _normalizeMerton(mertonResult.d2d) + merton_score = merton_norm + + if mertonResult.d2d > 4: + merton_summary = f"D2D {mertonResult.d2d:.2f} — 시장 기반 부도 거리 충분." + elif mertonResult.d2d > 2: + merton_summary = f"D2D {mertonResult.d2d:.2f} — 모니터링 필요." + elif mertonResult.d2d > 1: + merton_summary = f"D2D {mertonResult.d2d:.2f} — 부실 위험 영역." + else: + merton_summary = f"D2D {mertonResult.d2d:.2f} — 부도 임박 영역." + + market_axis = DistressAxis( + name="시장 기반", + score=round(merton_score, 1), + weight=0.20, + models=[merton_model], + summary=merton_summary, + ) + axes.append(market_axis) + + axes.extend([eq_axis, trend_axis, audit_axis]) + + # ── 종합 ── + overall = sum(ax.score * ax.weight for ax in axes) + + if overall >= 70: + level = "critical" + elif overall >= 50: + level = "danger" + elif overall >= 30: + level = "warning" + elif overall >= 15: + level = "watch" + else: + level = "safe" + + creditGrade, creditDesc = _mapCreditGrade(overall) + + # 유동성 + cashMonths, liquidityAlert = _calcCashRunway(ratios) + + # 위험 요인 + riskFactors = _extractRiskFactors(anomalies, ratios) + if useMerton and mertonResult is not None and mertonResult.d2d < 2.0: + riskFactors.append(f"Merton D2D {mertonResult.d2d:.2f} (부실 영역, PD={mertonResult.pd:.1f}%)") + + # 모델 수 / 데이터 품질 + modelCount = len(quant_models) + len(eq_models) + (1 if useMerton else 0) + dataQuality = _assessDataQuality(modelCount) + + return DistressResult( + level=level, + overall=round(overall, 1), + creditGrade=creditGrade, + creditDescription=creditDesc, + axes=axes, + cashRunwayMonths=cashMonths, + liquidityAlert=liquidityAlert, + riskFactors=riskFactors, + modelCount=modelCount, + dataQuality=dataQuality, + ) diff --git a/src/dartlab/analysis/financial/insight/grading.py b/src/dartlab/analysis/financial/insight/grading.py new file mode 100644 index 0000000000000000000000000000000000000000..704033cda70f5921c6efe8bff87094fe864aeac0 --- /dev/null +++ b/src/dartlab/analysis/financial/insight/grading.py @@ -0,0 +1,1126 @@ +"""10영역 인사이트 등급 분석. + +영역: performance, profitability, health, cashflow, governance, risk, opportunity, + predictability, uncertainty, coreEarnings +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +from dartlab.analysis.financial.insight.benchmark import getBenchmark, sectorAdjustment +from dartlab.analysis.financial.insight.detector import detectIncompleteYear +from dartlab.analysis.financial.insight.types import Flag, InsightResult +from dartlab.core.finance.extract import getAnnualValues, getLatest +from dartlab.core.finance.ratios import RatioResult +from dartlab.core.sector.types import Sector + +if TYPE_CHECKING: + from dartlab.providers.dart.company import Company + + +def _scoreToGrade(score: int, maxScore: int) -> str: + ratio = score / maxScore if maxScore > 0 else 0 + if ratio >= 0.8: + return "A" + if ratio >= 0.5: + return "B" + if ratio >= 0.2: + return "C" + if ratio >= 0: + return "D" + return "F" + + +def _getGrowthYoY(annualVals: list[Optional[float]]) -> Optional[float]: + from dartlab.core.finance.ratios import yoy_pct + + valid = [(i, v) for i, v in enumerate(annualVals) if v is not None] + if len(valid) < 2: + return None + _, prev = valid[-2] + _, curr = valid[-1] + return yoy_pct(curr, prev) + + +def _getVolatility(qVals: list[Optional[float]]) -> Optional[float]: + recent = [v for v in qVals[-4:] if v is not None] + if len(recent) < 2: + return None + changes = [] + for i in range(len(recent) - 1): + if recent[i] != 0: + changes.append(abs((recent[i + 1] - recent[i]) / recent[i]) * 100) + return max(changes) if changes else None + + +def analyzePerformance( + aSeries: dict, + aYears: list[str], + qSeries: dict, + qPeriods: list[str], + isFinancial: bool = False, +) -> InsightResult: + """실적 성장성 분석.""" + lastYear, qCount = detectIncompleteYear(qPeriods) + incomplete = qCount < 4 + + revVals = getAnnualValues(aSeries, "IS", "sales") + opVals = getAnnualValues(aSeries, "IS", "operating_profit") + + if incomplete and len(aYears) > 1: + useRevVals = revVals[:-1] + useOpVals = opVals[:-1] + correctionNote = f"(불완전연도 {lastYear} {qCount}Q 제외)" + else: + useRevVals = revVals + useOpVals = opVals + correctionNote = "" + + if isFinancial and not any(v is not None for v in useRevVals): + useRevVals = useOpVals + revLabel = "영업이익" + else: + revLabel = "매출" + + revGrowth = _getGrowthYoY(useRevVals) + opGrowth = _getGrowthYoY(useOpVals) + + qRevVals = qSeries.get("IS", {}).get("sales", []) + if isFinancial and not any(v is not None for v in qRevVals): + qRevVals = qSeries.get("IS", {}).get("operating_profit", []) + revVolatility = _getVolatility(qRevVals) + qOpVals = qSeries.get("IS", {}).get("operating_profit", []) + opVolatility = _getVolatility(qOpVals) + + details: list[str] = [] + risks: list[Flag] = [] + opps: list[Flag] = [] + score = 0 + + if correctionNote: + details.append(correctionNote) + + if revGrowth is not None: + if revGrowth > 20: + details.append(f"{revLabel} 고성장 (+{revGrowth:.1f}%)") + opps.append(Flag("strong", "growth", f"{revLabel} {revGrowth:.1f}% 성장")) + score += 3 + elif revGrowth > 10: + details.append(f"{revLabel} 성장세 양호 (+{revGrowth:.1f}%)") + score += 2 + elif revGrowth > 0: + details.append(f"{revLabel} 소폭 성장 (+{revGrowth:.1f}%)") + score += 1 + elif revGrowth > -10: + details.append(f"{revLabel} 소폭 감소 ({revGrowth:.1f}%)") + else: + details.append(f"{revLabel} 급감 ({revGrowth:.1f}%)") + risks.append(Flag("danger", "finance", f"{revLabel} {revGrowth:.1f}% 급감")) + score -= 2 + + if opGrowth is not None and not isFinancial: + if opGrowth > 50: + details.append(f"영업이익 급증 (+{opGrowth:.1f}%)") + opps.append(Flag("strong", "growth", f"영업이익 {opGrowth:.1f}% 급증")) + score += 3 + elif opGrowth > 15: + details.append(f"영업이익 증가 (+{opGrowth:.1f}%)") + score += 2 + elif opGrowth < -30: + details.append(f"영업이익 급감 ({opGrowth:.1f}%)") + risks.append(Flag("danger", "finance", f"영업이익 {opGrowth:.1f}% 급감")) + score -= 2 + elif opGrowth < -10: + details.append(f"영업이익 감소 ({opGrowth:.1f}%)") + risks.append(Flag("warning", "finance", f"영업이익 {opGrowth:.1f}% 감소")) + score -= 1 + + if revVolatility is not None and revVolatility > 30: + details.append(f"{revLabel} 변동성 높음 (분기 최대 {revVolatility:.1f}%)") + risks.append(Flag("warning", "finance", f"{revLabel} 변동성 {revVolatility:.1f}%")) + + if not isFinancial and opVolatility is not None and opVolatility > 50: + details.append(f"영업이익 변동성 높음 (분기 최대 {opVolatility:.1f}%)") + risks.append(Flag("warning", "finance", f"영업이익 변동성 {opVolatility:.1f}%")) + + grade = _scoreToGrade(score, 6) + if revGrowth is None: + summary = "실적 데이터 부족" + elif revGrowth > 20 and opGrowth and opGrowth > 30: + summary = f"{revLabel}·이익 고성장" + elif revGrowth > 10 and opGrowth and opGrowth > 10: + summary = f"{revLabel}·이익 동반 성장" + elif revGrowth > 0: + summary = f"{revLabel} 성장세 유지" + elif revGrowth > -10: + summary = f"{revLabel} 정체" + else: + summary = f"{revLabel} 감소 추세" + + return InsightResult(grade, summary, details, risks, opps) + + +def analyzeProfitability( + ratios: RatioResult, + aSeries: dict, + isFinancial: bool = False, + sector: Sector = Sector.UNKNOWN, + market: str = "KR", +) -> InsightResult: + """수익성 분석.""" + details: list[str] = [] + risks: list[Flag] = [] + opps: list[Flag] = [] + score = 0 + + if isFinancial: + return _analyzeProfitabilityFinancial(aSeries, details, risks, opps) + + om = ratios.operatingMargin + nm = ratios.netMargin + roe = ratios.roe + roa = ratios.roa + + if om is not None: + if om > 20: + details.append(f"영업이익률 우수 ({om:.1f}%)") + opps.append(Flag("strong", "finance", f"영업이익률 {om:.1f}%")) + score += 3 + elif om > 10: + details.append(f"영업이익률 양호 ({om:.1f}%)") + score += 2 + elif om > 5: + details.append(f"영업이익률 보통 ({om:.1f}%)") + score += 1 + elif om < 0: + details.append(f"영업적자 ({om:.1f}%)") + risks.append(Flag("danger", "finance", f"영업이익률 {om:.1f}% — 적자")) + score -= 2 + else: + details.append(f"영업이익률 저조 ({om:.1f}%)") + score -= 1 + + if om is not None and nm is not None and om != 0: + gap = nm - om + gapRatio = (gap / abs(om)) * 100 + if abs(gapRatio) > 50: + if gap > 0: + details.append(f"영업외수익 발생 (순이익률 {nm:.1f}% > 영업이익률 {om:.1f}%)") + risks.append(Flag("warning", "finance", "본업 외 수익에 의존")) + else: + details.append(f"영업외비용 발생 (순이익률 {nm:.1f}% < 영업이익률 {om:.1f}%)") + risks.append(Flag("warning", "finance", "영업외비용 확인 필요")) + elif abs(gap) < 2 and nm > 0: + details.append("영업이익≈순이익 — 본업 중심 수익구조") + opps.append(Flag("positive", "finance", "건전한 수익구조")) + + if roe is not None: + if roe > 20: + details.append(f"ROE 우수 ({roe:.1f}%)") + opps.append(Flag("strong", "finance", f"ROE {roe:.1f}%")) + score += 2 + elif roe > 10: + details.append(f"ROE 양호 ({roe:.1f}%)") + score += 1 + elif roe < 5: + details.append(f"ROE 저조 ({roe:.1f}%)") + + if roe is not None and roa is not None and roa > 0: + leverage = roe / roa + if isFinancial: + # 금융업은 구조적으로 레버리지가 높음 (예수부채). 경고 대상이 아님 + details.append(f"금융업 레버리지 {leverage:.1f}x (구조적 특성)") + elif leverage > 4: + details.append(f"높은 레버리지로 ROE 달성 (ROE/ROA={leverage:.1f}x)") + risks.append(Flag("warning", "finance", f"ROE/ROA {leverage:.1f}x — 부채 활용 높음")) + elif leverage < 1.5 and roe > 15: + details.append("낮은 레버리지로 고ROE — 진성 수익성") + opps.append(Flag("strong", "finance", f"레버리지 {leverage:.1f}x로 ROE {roe:.1f}%")) + + bm = getBenchmark(sector, market) + omAdj = sectorAdjustment(om, bm.omMedian, bm.omQ1, bm.omQ3) + roeAdj = sectorAdjustment(roe, bm.roeMedian, bm.roeQ1, bm.roeQ3) + adj = omAdj + roeAdj + if adj != 0: + score += adj + direction = "상향" if adj > 0 else "하향" + details.append( + f"[섹터 보정 {direction}: {sector.value} 대비 OM{'↑' if omAdj > 0 else '↓' if omAdj < 0 else '→'} ROE{'↑' if roeAdj > 0 else '↓' if roeAdj < 0 else '→'}]" + ) + + grade = _scoreToGrade(score, 5) + summary = "수익성 " + ("우수" if score >= 4 else "양호" if score >= 2 else "보통" if score >= 0 else "개선 필요") + return InsightResult(grade, summary, details, risks, opps) + + +def _analyzeProfitabilityFinancial( + aSeries: dict, + details: list[str], + risks: list[Flag], + opps: list[Flag], +) -> InsightResult: + """금융업 전용 수익성 분석 (ROE/ROA/CIR).""" + details.append("[금융업 수익성 기준 적용]") + score = 0 + netIncome = getLatest(aSeries, "IS", "net_profit") + totalAssets = getLatest(aSeries, "BS", "total_assets") + totalEquity = getLatest(aSeries, "BS", "owners_of_parent_equity") or getLatest( + aSeries, "BS", "total_stockholders_equity" + ) + opIncome = getLatest(aSeries, "IS", "operating_profit") + opExpense = getLatest(aSeries, "IS", "operating_expense") + + roe = (netIncome / totalEquity) * 100 if netIncome and totalEquity and totalEquity > 0 else None + roa = (netIncome / totalAssets) * 100 if netIncome and totalAssets and totalAssets > 0 else None + cir = ( + (opExpense / (opExpense + opIncome)) * 100 + if opExpense is not None and opIncome is not None and (opExpense + opIncome) != 0 + else None + ) + + if roe is not None: + if roe > 10: + details.append(f"ROE 우수 ({roe:.1f}%)") + opps.append(Flag("strong", "finance", f"금융업 ROE {roe:.1f}%")) + score += 3 + elif roe > 8: + details.append(f"ROE 양호 ({roe:.1f}%)") + score += 2 + elif roe > 5: + details.append(f"ROE 보통 ({roe:.1f}%)") + score += 1 + else: + details.append(f"ROE 저조 ({roe:.1f}%)") + risks.append(Flag("warning", "finance", f"금융업 ROE {roe:.1f}%")) + + if roa is not None: + if roa > 0.7: + details.append(f"ROA 양호 ({roa:.2f}%)") + score += 1 + elif roa > 0.4: + details.append(f"ROA 보통 ({roa:.2f}%)") + elif roa > 0: + details.append(f"ROA 저조 ({roa:.2f}%)") + else: + details.append(f"ROA 적자 ({roa:.2f}%)") + risks.append(Flag("danger", "finance", f"금융업 ROA {roa:.2f}%")) + score -= 2 + + if cir is not None: + if cir < 50: + details.append(f"CIR {cir:.1f}% — 효율적 운영") + opps.append(Flag("positive", "finance", f"CIR {cir:.1f}%")) + score += 1 + elif cir < 60: + details.append(f"CIR {cir:.1f}% — 보통") + else: + details.append(f"CIR {cir:.1f}% — 비효율") + risks.append(Flag("warning", "finance", f"CIR {cir:.1f}%")) + + grade = _scoreToGrade(score, 5) + summary = "금융업 수익성 " + ( + "우수" if score >= 4 else "양호" if score >= 2 else "보통" if score >= 0 else "개선 필요" + ) + return InsightResult(grade, summary, details, risks, opps) + + +def analyzeHealth(ratios: RatioResult, isFinancial: bool = False, currency: str = "KRW") -> InsightResult: + """재무건전성 분석.""" + details: list[str] = [] + risks: list[Flag] = [] + opps: list[Flag] = [] + score = 0 + dr = ratios.debtRatio + cr = ratios.currentRatio + + if isFinancial: + details.append("[금융업 기준 적용]") + if dr is not None: + if dr < 1000: + details.append(f"부채비율 {dr:.0f}% — 금융업 양호") + opps.append(Flag("positive", "finance", f"금융업 부채비율 {dr:.0f}%")) + score += 3 + elif dr < 1500: + details.append(f"부채비율 {dr:.0f}% — 금융업 보통") + score += 1 + elif dr < 2000: + details.append(f"부채비율 {dr:.0f}% — 금융업 다소 높음") + else: + details.append(f"부채비율 {dr:.0f}% — 금융업 과다") + risks.append(Flag("warning", "finance", f"금융업 부채비율 {dr:.0f}%")) + score -= 1 + elif currency == "USD": + # US 기업: 자사주매입으로 equity 축소가 일반적, 부채비율 높음이 정상 + if dr is not None: + if dr < 100: + details.append(f"부채비율 매우 양호 ({dr:.0f}%)") + opps.append(Flag("strong", "finance", f"부채비율 {dr:.0f}%")) + score += 3 + elif dr < 200: + details.append(f"부채비율 양호 ({dr:.0f}%)") + opps.append(Flag("positive", "finance", f"부채비율 {dr:.0f}%")) + score += 2 + elif dr < 400: + details.append(f"부채비율 보통 ({dr:.0f}%)") + score += 1 + elif dr < 600: + details.append(f"부채비율 다소 높음 ({dr:.0f}%)") + else: + details.append(f"부채비율 과다 ({dr:.0f}%)") + risks.append(Flag("warning", "finance", f"부채비율 {dr:.0f}%")) + score -= 1 + + if cr is not None: + if cr > 150: + details.append(f"유동성 매우 충분 ({cr:.0f}%)") + opps.append(Flag("positive", "finance", f"유동비율 {cr:.0f}%")) + score += 2 + elif cr > 100: + details.append(f"유동성 충분 ({cr:.0f}%)") + score += 1 + elif cr > 80: + details.append(f"유동성 보통 ({cr:.0f}%)") + else: + details.append(f"유동성 부족 ({cr:.0f}%)") + risks.append(Flag("warning", "finance", f"유동비율 {cr:.0f}%")) + score -= 1 + else: + if dr is not None: + if dr < 50: + details.append(f"부채비율 매우 양호 ({dr:.0f}%)") + opps.append(Flag("strong", "finance", f"부채비율 {dr:.0f}%")) + score += 3 + elif dr < 100: + details.append(f"부채비율 양호 ({dr:.0f}%)") + opps.append(Flag("positive", "finance", f"부채비율 {dr:.0f}%")) + score += 2 + elif dr < 150: + details.append(f"부채비율 보통 ({dr:.0f}%)") + score += 1 + elif dr < 200: + details.append(f"부채비율 다소 높음 ({dr:.0f}%)") + else: + details.append(f"부채비율 과다 ({dr:.0f}%)") + risks.append(Flag("warning", "finance", f"부채비율 {dr:.0f}%")) + score -= 1 + + if cr is not None: + if cr > 200: + details.append(f"유동성 매우 충분 ({cr:.0f}%)") + opps.append(Flag("positive", "finance", f"유동비율 {cr:.0f}%")) + score += 2 + elif cr > 150: + details.append(f"유동성 충분 ({cr:.0f}%)") + score += 1 + elif cr > 100: + details.append(f"유동성 보통 ({cr:.0f}%)") + elif cr < 100: + details.append(f"유동성 부족 ({cr:.0f}%)") + risks.append(Flag("warning", "finance", f"유동비율 {cr:.0f}%")) + score -= 1 + + # ── 부실 예측 모델 신호 (ratios에서 계산된 값 활용) ── + # Ohlson O-Score: P(bankruptcy) > 10% → 경고 + if ratios.ohlsonProbability is not None: + if ratios.ohlsonProbability > 20: + details.append(f"O-Score 부도확률 {ratios.ohlsonProbability:.1f}% — 고위험") + risks.append(Flag("danger", "distress", f"O-Score P(부도) {ratios.ohlsonProbability:.1f}%")) + score -= 2 + elif ratios.ohlsonProbability > 10: + details.append(f"O-Score 부도확률 {ratios.ohlsonProbability:.1f}% — 주의") + risks.append(Flag("warning", "distress", f"O-Score P(부도) {ratios.ohlsonProbability:.1f}%")) + score -= 1 + + # Altman Z''-Score (금융업 포함 범용) + if ratios.altmanZppScore is not None: + if ratios.altmanZppScore < 1.1: + details.append(f"Z''-Score {ratios.altmanZppScore:.2f} — 부실 영역") + risks.append(Flag("danger", "distress", f"Z'' {ratios.altmanZppScore:.2f} (부실)")) + score -= 2 + elif ratios.altmanZppScore < 2.6: + details.append(f"Z''-Score {ratios.altmanZppScore:.2f} — 회색 영역") + risks.append(Flag("warning", "distress", f"Z'' {ratios.altmanZppScore:.2f} (회색)")) + score -= 1 + elif ratios.altmanZppScore > 5: + details.append(f"Z''-Score {ratios.altmanZppScore:.2f} — 안전") + score += 1 + + grade = _scoreToGrade(score, 7) + label = "금융업 재무건전성" if isFinancial else "재무건전성" + summary = f"{label} " + ("우수" if score >= 5 else "안정" if score >= 2 else "보통" if score >= 0 else "주의 필요") + return InsightResult(grade, summary, details, risks, opps) + + +def analyzeCashflow( + ratios: RatioResult, + aSeries: dict, + isFinancial: bool = False, +) -> InsightResult: + """현금흐름 분석.""" + if isFinancial: + return _analyzeCashflowFinancial(aSeries) + + details: list[str] = [] + risks: list[Flag] = [] + opps: list[Flag] = [] + score = 0 + opCF = ratios.operatingCashflowTTM + fcf = ratios.fcf + revenue = ratios.revenueTTM + + if opCF is not None: + if opCF > 0: + details.append("영업활동 현금 창출 양호") + score += 2 + else: + details.append("영업활동 현금 적자") + risks.append(Flag("danger", "finance", "영업CF 적자")) + score -= 2 + + if fcf is not None: + if fcf > 0: + if revenue and revenue > 0: + fcfMargin = (fcf / revenue) * 100 + if fcfMargin > 15: + details.append(f"FCF 마진 우수 ({fcfMargin:.1f}%)") + opps.append(Flag("strong", "cashflow", f"FCF 마진 {fcfMargin:.1f}%")) + score += 3 + elif fcfMargin > 5: + details.append(f"FCF 마진 양호 ({fcfMargin:.1f}%)") + opps.append(Flag("positive", "cashflow", f"FCF 마진 {fcfMargin:.1f}%")) + score += 2 + else: + details.append(f"FCF 양호, 마진 저조 ({fcfMargin:.1f}%)") + score += 1 + else: + details.append("FCF 양호") + score += 1 + elif opCF and opCF > 0: + details.append("FCF 적자 — 투자 확대 중") + opps.append(Flag("positive", "growth", "적극 투자 (영업CF 양호)")) + else: + details.append("FCF 적자 — 현금 부족") + risks.append(Flag("warning", "finance", "FCF + 영업CF 부진")) + score -= 1 + + cfVals = getAnnualValues(aSeries, "CF", "operating_cashflow") + validCf = [v for v in cfVals if v is not None] + if len(validCf) >= 2: + improving = validCf[-1] > validCf[-2] + if improving and validCf[-1] > 0: + details.append("영업CF 개선 추세") + score += 1 + elif not improving and validCf[-1] < validCf[-2]: + details.append("영업CF 악화 추세") + + grade = _scoreToGrade(score, 6) + summary = "현금흐름 " + ("우수" if score >= 5 else "양호" if score >= 2 else "보통" if score >= 0 else "주의") + return InsightResult(grade, summary, details, risks, opps) + + +def _analyzeCashflowFinancial(aSeries: dict) -> InsightResult: + """금융업 전용 현금흐름 분석.""" + details: list[str] = ["[금융업 현금흐름]"] + risks: list[Flag] = [] + opps: list[Flag] = [] + score = 0 + + opCF = getLatest(aSeries, "CF", "operating_cashflow") + dividendsPaid = getLatest(aSeries, "CF", "dividends_paid") + netIncome = getLatest(aSeries, "IS", "net_profit") + + if opCF is not None: + details.append(f"영업CF: {opCF / 1e8:,.0f}억") + + if dividendsPaid is not None and dividendsPaid > 0: + details.append(f"배당 지급: {dividendsPaid / 1e8:,.0f}억") + opps.append(Flag("positive", "shareholder", f"배당 지급 {dividendsPaid / 1e8:,.0f}억")) + score += 1 + + if netIncome is not None and netIncome > 0: + details.append(f"순이익 {netIncome / 1e8:,.0f}억") + score += 2 + + grade = _scoreToGrade(score, 3) + summary = "금융업 현금흐름 " + ("양호" if score >= 2 else "보통" if score >= 0 else "주의") + return InsightResult(grade, summary, details, risks, opps) + + +def _analyzeGovernanceFromSections(company: Company) -> InsightResult: + """report가 없을 때 sections 기반 governance 분석 (EDGAR 등).""" + import polars as pl + + docs = getattr(company, "docs", None) + if docs is None: + return InsightResult("N", "지배구조 데이터 없음") + sec = getattr(docs, "sections", None) + if sec is None or not isinstance(sec, pl.DataFrame) or sec.is_empty(): + return InsightResult("N", "지배구조 데이터 없음") + + # governance 관련 topic 검색 (EDGAR: director, compensation, ownership) + gov_pattern = "(?i)governance|director|compensation|ownership|security.?owner|executive.?comp" + gov_topics = sec.filter(pl.col("topic").cast(pl.Utf8).str.contains(gov_pattern)) + + if gov_topics.is_empty(): + return InsightResult("N", "지배구조 데이터 없음") + + # 데이터 존재량으로 점수 부여 + n_topics = gov_topics.select("topic").unique().height + n_blocks = gov_topics.height + # 메타 컬럼 제외한 기간 컬럼 수 + meta_cols = {"topic", "blockType", "blockOrder", "textNodeType", "textLevel", "textPath", "source", "chapter"} + period_cols = [c for c in gov_topics.columns if c not in meta_cols] + n_periods = 0 + for col in period_cols: + if gov_topics[col].drop_nulls().len() > 0: + n_periods += 1 + + details: list[str] = [] + score = 0 + max_score = 3 + + if n_topics >= 3: + score += 2 + details.append(f"지배구조 관련 {n_topics}개 topic, {n_blocks}개 블록 공시") + elif n_topics >= 1: + score += 1 + details.append(f"지배구조 관련 {n_topics}개 topic 공시") + + if n_periods >= 3: + score += 1 + details.append(f"{n_periods}개 기간 연속 공시 (일관성 양호)") + + grade = _scoreToGrade(score, max_score) + summary = "지배구조 " + ("양호" if grade in ("A", "B") else "보통" if grade == "C" else "제한적 정보") + return InsightResult(grade, summary, details) + + +def analyzeGovernance(company: Company | None) -> InsightResult: + """지배구조 분석.""" + details: list[str] = [] + risks: list[Flag] = [] + opps: list[Flag] = [] + score = 0 + maxScore = 0 + + if company is None: + return InsightResult("N", "기업 데이터 없음") + + # report namespace가 없으면 sections 기반 fallback (EDGAR 등) + if not hasattr(company, "report") or company.report is None: + return _analyzeGovernanceFromSections(company) + + rpt = company.report + + major = rpt.majorHolder + if major is not None and major.totalShareRatio: + maxScore += 3 + latest = None + for v in reversed(major.totalShareRatio): + if v is not None: + latest = v + break + if latest is not None: + if latest > 50: + details.append(f"최대주주 지분 {latest:.1f}% — 지배력 안정") + opps.append(Flag("positive", "governance", f"최대주주 {latest:.1f}%")) + score += 3 + elif latest > 30: + details.append(f"최대주주 지분 {latest:.1f}% — 적정 수준") + score += 2 + elif latest > 20: + details.append(f"최대주주 지분 {latest:.1f}%") + score += 1 + else: + details.append(f"최대주주 지분 {latest:.1f}% — 경영권 분산") + risks.append(Flag("warning", "governance", f"최대주주 {latest:.1f}%")) + + audit = rpt.audit + if audit is not None and audit.opinions: + maxScore += 2 + latest = None + for v in reversed(audit.opinions): + if v is not None: + latest = v + break + if latest is not None: + if "적정" in str(latest): + details.append("감사의견: 적정") + score += 2 + else: + details.append(f"감사의견: {latest}") + risks.append(Flag("danger", "audit", f"감사의견 비적정: {latest}")) + score -= 2 + + # 감사인 안정성 (PCAOB AS 3101) — Big4 + 장기 유지 + _big4_kw = ["삼일", "PwC", "삼정", "KPMG", "한영", "EY", "안진", "Deloitte"] + if audit is not None and audit.auditors: + maxScore += 2 + uniqueAuditors = [a for a in audit.auditors if a is not None] + latestAuditor = uniqueAuditors[-1] if uniqueAuditors else None + + if latestAuditor and any(kw in latestAuditor for kw in _big4_kw): + # Big4 판정 + changeCount = sum(1 for i in range(1, len(uniqueAuditors)) if uniqueAuditors[i] != uniqueAuditors[i - 1]) + if changeCount == 0 and len(uniqueAuditors) >= 3: + details.append(f"감사인: {latestAuditor} (Big4, 3년+ 유지)") + score += 2 + elif changeCount == 0: + details.append(f"감사인: {latestAuditor} (Big4)") + score += 1 + else: + details.append(f"감사인: {latestAuditor} (Big4, {changeCount}회 교체)") + score += 1 + elif latestAuditor: + details.append(f"감사인: {latestAuditor} (비Big4)") + # 빈번 교체 시 감점 + changeCount = sum(1 for i in range(1, len(uniqueAuditors)) if uniqueAuditors[i] != uniqueAuditors[i - 1]) + if changeCount >= 2: + score -= 1 + risks.append(Flag("warning", "audit", f"감사인 빈번 교체 ({changeCount}회)")) + + # 내부통제 (SOX 302/404) + try: + ic = getattr(rpt, "internalControl", None) + if ic is not None: + controlDf = getattr(ic, "controlDf", None) + if controlDf is not None and len(controlDf) > 0: + maxScore += 2 + latestRow = controlDf.row(-1, named=True) + hasWeakness = latestRow.get("hasWeakness", False) + opinion = latestRow.get("opinion", "") + if hasWeakness: + score -= 2 + details.append(f"내부통제: 취약점 보고 ({opinion})") + risks.append(Flag("danger", "governance", "내부통제 취약점")) + else: + score += 2 + details.append(f"내부통제: {opinion or '적정'}") + except (AttributeError, IndexError): + pass + + # 감사위원회 활동 + try: + auditSys = getattr(rpt, "auditSystem", None) + if auditSys is not None: + activity = getattr(auditSys, "activity", None) or [] + if activity: + maxScore += 1 + score += 1 + details.append(f"감사위원회: {len(activity)}건 활동") + elif getattr(auditSys, "committee", None): + maxScore += 1 + details.append("감사위원회: 설치됨 (활동 미확인)") + except AttributeError: + pass + + div = rpt.dividend + if div is not None and div.dps: + maxScore += 3 + recentDps = [d for d in div.dps[-3:] if d is not None] + if recentDps and all(d > 0 for d in recentDps): + if len(recentDps) >= 3: + details.append(f"3년 연속 배당 (DPS: {recentDps[-1]:,.0f}원)") + opps.append(Flag("positive", "shareholder", "안정적 배당")) + score += 3 + else: + details.append(f"배당 실시 (DPS: {recentDps[-1]:,.0f}원)") + score += 2 + elif recentDps and recentDps[-1] > 0: + details.append(f"배당 재개 (DPS: {recentDps[-1]:,.0f}원)") + score += 1 + else: + details.append("무배당") + risks.append(Flag("warning", "shareholder", "무배당")) + + if maxScore == 0: + return InsightResult("N", "지배구조 데이터 없음") + + grade = _scoreToGrade(score, maxScore) + summary = "지배구조 " + ( + "우수" + if grade == "A" + else "안정" + if grade == "B" + else "보통" + if grade == "C" + else "주의" + if grade == "D" + else "위험" + ) + return InsightResult(grade, summary, details, risks, opps) + + +def analyzeRiskSummary(insights: dict[str, InsightResult]) -> InsightResult: + """리스크 종합 분석.""" + allRisks: list[Flag] = [] + for key in [ + "performance", + "profitability", + "health", + "cashflow", + "governance", + "predictability", + "uncertainty", + "coreEarnings", + ]: + if key in insights and insights[key] is not None: + allRisks.extend(insights[key].risks) + + if not allRisks: + return InsightResult("A", "특별한 리스크 없음", ["주요 재무지표 양호"]) + + dangerCount = sum(1 for r in allRisks if r.level == "danger") + warningCount = sum(1 for r in allRisks if r.level == "warning") + + if dangerCount >= 2: + grade = "F" + summary = f"중대 리스크 {dangerCount}건" + elif dangerCount == 1: + grade = "D" + summary = f"리스크 경고 (위험 {dangerCount}, 주의 {warningCount})" + elif warningCount > 3: + grade = "D" + summary = f"다수 주의 ({warningCount}건)" + elif warningCount > 1: + grade = "C" + summary = f"일부 주의 ({warningCount}건)" + else: + grade = "B" + summary = "경미한 주의 사항" + + return InsightResult(grade, summary, [r.text for r in allRisks], allRisks) + + +def analyzeOpportunitySummary(insights: dict[str, InsightResult]) -> InsightResult: + """기회 종합 분석.""" + allOpps: list[Flag] = [] + for key in [ + "performance", + "profitability", + "health", + "cashflow", + "governance", + "predictability", + "uncertainty", + "coreEarnings", + ]: + if key in insights and insights[key] is not None: + allOpps.extend(insights[key].opportunities) + + if not allOpps: + return InsightResult("D", "특별한 투자 기회 없음") + + strongCount = sum(1 for o in allOpps if o.level == "strong") + positiveCount = sum(1 for o in allOpps if o.level == "positive") + total = strongCount + positiveCount + + if strongCount >= 3 and total >= 5: + grade = "A" + summary = f"투자 매력 높음 ({strongCount}강점, {positiveCount}긍정)" + elif strongCount >= 2: + grade = "B" + summary = f"투자 매력 있음 ({strongCount}강점)" + elif strongCount >= 1 or positiveCount >= 3: + grade = "C" + summary = f"일부 긍정 ({strongCount}강점, {positiveCount}긍정)" + elif positiveCount >= 1: + grade = "D" + summary = f"긍정 요소 미약 ({positiveCount}건)" + else: + grade = "F" + summary = "투자 매력 없음" + + return InsightResult(grade, summary, [o.text for o in allOpps], opportunities=allOpps) + + +def analyzePredictability( + aSeries: dict, + aYears: list[str], + isFinancial: bool = False, +) -> InsightResult: + """사업 예측가능성 분석 (0~10점 → A~F).""" + import statistics + + revVals = getAnnualValues(aSeries, "IS", "sales") + opVals = getAnnualValues(aSeries, "IS", "operating_profit") + niVals = getAnnualValues(aSeries, "IS", "net_profit") + + if isFinancial: + revVals = opVals + + validRev = [v for v in revVals if v is not None] + validOp = [v for v in opVals if v is not None] + validNi = [v for v in niVals if v is not None] + + if len(validRev) < 3: + return InsightResult("N", "예측가능성 데이터 부족") + + details: list[str] = [] + score = 0.0 + + # 매출 CV (낮을수록 예측 가능) + revMean = statistics.mean(validRev) + revCv = statistics.stdev(validRev) / abs(revMean) if revMean != 0 else 1.0 + revScore = max(0, 2.5 - revCv * 2.5) + score += revScore + details.append(f"매출 CV {revCv:.2f} ({revScore:.1f}/2.5)") + + # 영업이익 CV + if len(validOp) >= 3: + opMean = statistics.mean(validOp) + opCv = statistics.stdev(validOp) / abs(opMean) if opMean != 0 else 1.0 + opScore = max(0, 2.5 - opCv * 2.5) + score += opScore + details.append(f"영업이익 CV {opCv:.2f} ({opScore:.1f}/2.5)") + + # 연속 성장 (매출 YoY > 0 횟수) + growthCount = sum(1 for i in range(1, len(validRev)) if validRev[i] > validRev[i - 1]) + maxGrowth = max(1, len(validRev) - 1) + growthScore = (growthCount / maxGrowth) * 2.5 + score += growthScore + details.append(f"연속성장 {growthCount}/{maxGrowth}년 ({growthScore:.1f}/2.5)") + + # 무적자 (순이익 > 0 비율) + if len(validNi) >= 3: + profitCount = sum(1 for v in validNi if v > 0) + profitScore = (profitCount / len(validNi)) * 2.5 + score += profitScore + details.append(f"흑자 {profitCount}/{len(validNi)}년 ({profitScore:.1f}/2.5)") + + score = min(10, score) + grade = _predictabilityGrade(score) + summary = f"예측가능성 {score:.1f}/10 — " + ( + "매우 높음" if score >= 7 else "높음" if score >= 5 else "보통" if score >= 3 else "낮음" + ) + return InsightResult(grade, summary, details) + + +def _predictabilityGrade(score: float) -> str: + """예측가능성 점수 → 등급.""" + if score >= 8: + return "A" + if score >= 6: + return "B" + if score >= 4: + return "C" + if score >= 2: + return "D" + return "F" + + +def analyzeUncertainty( + aSeries: dict, + aYears: list[str], + isFinancial: bool = False, +) -> InsightResult: + """불확실성 등급 분석 (Morningstar 방식 5단계).""" + import statistics + + revVals = getAnnualValues(aSeries, "IS", "sales") + opVals = getAnnualValues(aSeries, "IS", "operating_profit") + + if isFinancial: + revVals = opVals + + validRev = [v for v in revVals if v is not None] + validOp = [v for v in opVals if v is not None] + + if len(validRev) < 5 or len(validOp) < 5: + return InsightResult("N", "불확실성 데이터 부족") + + details: list[str] = [] + + # 매출 CV + revMean = statistics.mean(validRev) + revCv = statistics.stdev(validRev) / abs(revMean) if revMean != 0 else 1.0 + + # DOL (영업레버리지) + dolList = [] + minLen = min(len(validRev), len(validOp)) + for i in range(1, minLen): + if validRev[i - 1] != 0 and validOp[i - 1] != 0: + sChg = (validRev[i] - validRev[i - 1]) / abs(validRev[i - 1]) + oChg = (validOp[i] - validOp[i - 1]) / abs(validOp[i - 1]) + if abs(sChg) > 0.01: + dolList.append(abs(oChg / sChg)) + dol = statistics.median(dolList) if dolList else 2.0 + + # D/E + tl = getLatest(aSeries, "BS", "total_liabilities") + eq = getLatest(aSeries, "BS", "total_stockholders_equity") + deRatio = tl / eq if tl is not None and eq and eq > 0 else 0.0 + + # 영업이익 CV + opMean = statistics.mean(validOp) + opCv = statistics.stdev(validOp) / abs(opMean) if opMean != 0 else 1.0 + + # 종합 점수 (각 최대 25점) + revScore = min(25, revCv / 0.5 * 25) + dolScore = min(25, (dol - 1) / 9 * 25) + deScore = min(25, deRatio / 3 * 25) + opScore = min(25, opCv / 1.0 * 25) + totalScore = revScore + dolScore + deScore + opScore + + if totalScore < 20: + rating, margin = "Low", "±15%" + elif totalScore < 35: + rating, margin = "Medium", "±25%" + elif totalScore < 50: + rating, margin = "High", "±35%" + elif totalScore < 70: + rating, margin = "Very High", "±45%" + else: + rating, margin = "Extreme", "±55%" + + details.append(f"매출CV {revCv:.2f}, DOL {dol:.1f}, D/E {deRatio:.1f}, 영업CV {opCv:.2f}") + details.append(f"종합 {totalScore:.1f}/100 → {rating} (Fair Value 밴드 {margin})") + + grade = _uncertaintyGrade(rating) + summary = f"불확실성 {rating} — Fair Value 밴드 {margin}" + return InsightResult(grade, summary, details) + + +def _uncertaintyGrade(rating: str) -> str: + """불확실성 등급 → insight 등급 (낮은 불확실성 = 좋은 등급).""" + return {"Low": "A", "Medium": "B", "High": "C", "Very High": "D", "Extreme": "F"}.get(rating, "C") + + +def analyzeCoreEarnings( + aSeries: dict, + aYears: list[str], + isFinancial: bool = False, +) -> InsightResult: + """핵심이익 품질 분석 (비경상 항목 분리).""" + import statistics + + opVals = getAnnualValues(aSeries, "IS", "operating_profit") + niVals = getAnnualValues(aSeries, "IS", "net_profit") + taxVals = getAnnualValues(aSeries, "IS", "income_taxes") + pbtVals = getAnnualValues(aSeries, "IS", "profit_before_tax") + + validOp = [v for v in opVals if v is not None] + validNi = [v for v in niVals if v is not None] + validTax = [v for v in taxVals if v is not None] + validPbt = [v for v in pbtVals if v is not None] + + if len(validOp) < 3 or len(validNi) < 3: + return InsightResult("N", "핵심이익 데이터 부족") + + details: list[str] = [] + score = 0 + + # 실효세율 추정 + if validTax and validPbt: + taxRates = [] + for t, p in zip(validTax, validPbt): + if p > 0 and t is not None: + taxRates.append(t / p) + effectiveTax = statistics.mean(taxRates) if taxRates else 0.22 + else: + effectiveTax = 0.22 + + # Core Earnings = 영업이익 × (1-세율) + coreVals = [v * (1 - effectiveTax) for v in validOp] + + # CV 비교: Core vs Reported + coreMean = statistics.mean(coreVals) if coreVals else 0 + reportedMean = statistics.mean(validNi) if validNi else 0 + coreCv = statistics.stdev(coreVals) / abs(coreMean) if coreMean != 0 and len(coreVals) >= 2 else 999 + reportedCv = statistics.stdev(validNi) / abs(reportedMean) if reportedMean != 0 and len(validNi) >= 2 else 999 + + details.append(f"Core CV {coreCv:.2f} vs Reported CV {reportedCv:.2f}") + + # CV 개선 여부 + if coreCv < reportedCv: + improvement = (1 - coreCv / reportedCv) * 100 if reportedCv > 0 else 0 + details.append(f"핵심이익이 변동성 {improvement:.0f}% 개선") + score += 2 + else: + details.append("비경상 항목 영향 미미") + + # 핵심이익 안정성 (Core CV 절대 수준) + if coreCv < 0.2: + details.append("핵심이익 매우 안정") + score += 3 + elif coreCv < 0.4: + details.append("핵심이익 안정") + score += 2 + elif coreCv < 0.7: + details.append("핵심이익 보통") + score += 1 + else: + details.append("핵심이익 변동 큼") + + # 핵심이익 대비 보고이익 괴리 (최신연도) + if coreVals and validNi: + latestCore = coreVals[-1] + latestReported = validNi[-1] + if latestCore != 0: + gap = (latestReported - latestCore) / abs(latestCore) * 100 + if abs(gap) > 30: + details.append(f"보고이익 vs 핵심이익 괴리 {gap:+.0f}%") + if gap < -30: + score -= 1 # 비경상 손실 + elif abs(gap) < 10: + details.append("보고이익 ≈ 핵심이익") + score += 1 + + grade = _scoreToGrade(score, 6) + summary = "이익 품질 " + ("우수" if score >= 5 else "양호" if score >= 3 else "보통" if score >= 1 else "주의") + return InsightResult(grade, summary, details) + + +def disclosureGapFlags( + company: "Company | None", + healthGrade: str | None = None, +) -> list[Flag]: + """공시 텍스트 변화 vs 재무 지표 불일치 탐지. + + diff 기반으로 리스크 서술 급증/감소를 감지하고, 재무 건전성 등급과 교차 비교하여 + '서술형 리스크 급증 vs 재무 안정' 또는 '재무 악화 vs 서술형 은폐' 불일치를 찾는다. + """ + if company is None: + return [] + + from dartlab.analysis.financial.disclosureDelta import _safeDiffResult + + diffResult = _safeDiffResult(company) + if diffResult is None or not diffResult.entries: + return [] + + # 리스크 관련 topic 식별 + riskTopics = { + "riskManagement", + "riskFactor", + "goingConcern", + "audit", + "contingentLiability", + "litigation", + "internalControl", + } + riskChanges = [e for e in diffResult.entries if e.topic in riskTopics and e.status == "changed"] + + flags: list[Flag] = [] + + if riskChanges and healthGrade in ("A", "B"): + topics = ", ".join(sorted({e.topic for e in riskChanges})) + flags.append( + Flag( + level="warning", + category="disclosure_gap", + text=f"리스크 서술 변화({topics}) vs 재무 안정({healthGrade}) — 불일치 확인 필요", + ) + ) + + if not riskChanges and healthGrade in ("D", "F"): + flags.append( + Flag( + level="warning", + category="disclosure_gap", + text=f"재무 악화({healthGrade}) vs 리스크 서술 무변동 — 공시 충실도 점검 필요", + ) + ) + + return flags diff --git a/src/dartlab/analysis/financial/insight/pipeline.py b/src/dartlab/analysis/financial/insight/pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..b641a26b9317f7b58d715bd339e84c8ceaeff19b --- /dev/null +++ b/src/dartlab/analysis/financial/insight/pipeline.py @@ -0,0 +1,289 @@ +"""통합 분석 파이프라인.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from dartlab.analysis.financial.insight.anomaly import detectAuditRedFlags, runAnomalyDetection +from dartlab.analysis.financial.insight.detector import detectFinancialSector +from dartlab.analysis.financial.insight.distress import calcDistress +from dartlab.analysis.financial.insight.grading import ( + analyzeCashflow, + analyzeCoreEarnings, + analyzeGovernance, + analyzeHealth, + analyzeOpportunitySummary, + analyzePerformance, + analyzePredictability, + analyzeProfitability, + analyzeRiskSummary, + analyzeUncertainty, + disclosureGapFlags, +) +from dartlab.analysis.financial.insight.summary import classifyProfile, generateSummary +from dartlab.analysis.financial.insight.types import AnalysisResult, Anomaly, AuditDataForAnomaly, MarketDataForDistress +from dartlab.core.finance.ratios import calcRatios +from dartlab.core.sector.types import Sector + +if TYPE_CHECKING: + from dartlab.providers.dart.company import Company + +SeriesPair = tuple[dict, list[str]] + + +def _extractAuditData(company: Company | None) -> AuditDataForAnomaly | None: + """Company에서 감사 데이터를 추출하여 AuditDataForAnomaly DTO 생성.""" + if company is None: + return None + + rpt = getattr(company, "report", None) + if rpt is None: + return None + + auditors: list[str | None] = [] + opinions: list[str | None] = [] + fees: list[float | None] = [] + kamCounts: list[int | None] = [] + hasGoingConcern = False + hasInternalControlWeakness = False + + # docs 파이프라인 감사 데이터 (opinionDf, feeDf) + try: + auditResult = getattr(rpt, "audit", None) + if auditResult is not None: + # Report API 기반: auditors, opinions 시계열 + rawAuditors = getattr(auditResult, "auditors", None) + rawOpinions = getattr(auditResult, "opinions", None) + if rawAuditors: + auditors = list(rawAuditors) + if rawOpinions: + opinions = list(rawOpinions) + + # docs 기반: opinionDf (KAM, goingConcern 포함) + opDf = getattr(auditResult, "opinionDf", None) + if opDf is not None and len(opDf) > 0: + # KAM 건수 추출 + if "keyAuditMatters" in opDf.columns: + for row in opDf.iter_rows(named=True): + kam = row.get("keyAuditMatters") + if kam and isinstance(kam, str) and len(kam.strip()) > 0: + kamCounts.append(kam.count(",") + 1) + else: + kamCounts.append(0) + + # goingConcern 확인 (최신기) + if "goingConcern" in opDf.columns: + latestGc = opDf.row(-1, named=True).get("goingConcern") + if latestGc and isinstance(latestGc, str) and len(latestGc.strip()) > 0: + hasGoingConcern = True + + # opinionDf에서 auditors/opinions 보강 (Report API 없을 때) + if not auditors and "auditor" in opDf.columns: + auditors = opDf["auditor"].to_list() + if not opinions and "opinion" in opDf.columns: + opinions = opDf["opinion"].to_list() + + # feeDf에서 보수 추출 + feeDf = getattr(auditResult, "feeDf", None) + if feeDf is not None and len(feeDf) > 0: + feeCol = ( + "actualFee" + if "actualFee" in feeDf.columns + else "contractFee" + if "contractFee" in feeDf.columns + else None + ) + if feeCol: + fees = feeDf[feeCol].to_list() + except (AttributeError, TypeError): + pass + + # 내부통제 취약점 + try: + ic = getattr(rpt, "internalControl", None) + if ic is not None: + controlDf = getattr(ic, "controlDf", None) + if controlDf is not None and len(controlDf) > 0: + latestRow = controlDf.row(-1, named=True) + if latestRow.get("hasWeakness"): + hasInternalControlWeakness = True + except (AttributeError, IndexError): + pass + + # 데이터가 하나도 없으면 None + if ( + not auditors + and not opinions + and not fees + and not kamCounts + and not hasGoingConcern + and not hasInternalControlWeakness + ): + return None + + return AuditDataForAnomaly( + auditors=auditors, + opinions=opinions, + fees=fees, + kamCounts=kamCounts, + hasGoingConcern=hasGoingConcern, + hasInternalControlWeakness=hasInternalControlWeakness, + ) + + +def _ratio_archetype_override(company: Company | None) -> str | None: + if company is None: + return None + + try: + from dartlab.core.sector.types import IndustryGroup + except ImportError: + return None + + sectorInfo = getattr(company, "sector", None) + industryGroup = getattr(sectorInfo, "industryGroup", None) + mapping = { + IndustryGroup.BANK: "bank", + IndustryGroup.INSURANCE: "insurance", + IndustryGroup.DIVERSIFIED_FINANCIALS: "securities", + } + return mapping.get(industryGroup) + + +def analyze( + stockCode: str, + company: Company | None = None, + *, + corpName: str | None = None, + qSeriesPair: SeriesPair | None = None, + aSeriesPair: SeriesPair | None = None, + marketData: MarketDataForDistress | None = None, + currency: str | None = None, +) -> AnalysisResult | None: + """종목 종합 인사이트 분석. + + Args: + stockCode: 종목코드 또는 CIK. + company: Company 인스턴스. None이고 series도 없으면 DART pivot 시도. + corpName: 회사명. company가 없을 때 사용. + qSeriesPair: (qSeries, qPeriods). None이면 DART pivot에서 빌드. + aSeriesPair: (aSeries, aYears). None이면 DART pivot에서 빌드. + marketData: 시장 기반 부실 분석 입력. None이면 4축, 제공 시 Merton 5축. + currency: 통화 코드. None이면 company에서 자동 추출 (기본 KRW). + + Returns: + AnalysisResult 또는 데이터 부족 시 None. + """ + if qSeriesPair is None or aSeriesPair is None: + from dartlab.providers.dart.finance.pivot import buildAnnual, buildTimeseries + + if qSeriesPair is None: + qResult = buildTimeseries(stockCode) + if qResult is None: + return None + qSeriesPair = qResult + if aSeriesPair is None: + aResult = buildAnnual(stockCode) + if aResult is None: + return None + aSeriesPair = aResult + + qSeries, qPeriods = qSeriesPair + aSeries, aYears = aSeriesPair + + # currency 자동 추출: company에서 가져오거나 기본 KRW + if currency is None: + currency = getattr(company, "currency", "KRW") + market = "US" if currency == "USD" else "KR" + + ratios = calcRatios(aSeries, archetypeOverride=_ratio_archetype_override(company), currency=currency) + + if company is None and corpName is None: + try: + from dartlab.providers.dart.company import Company + + company = Company(stockCode) + except ValueError: + pass + + isFinancial, _ = detectFinancialSector(aSeries, ratios) + + sector = Sector.UNKNOWN + if company is not None: + sectorInfo = getattr(company, "sector", None) + sector = sectorInfo.sector if sectorInfo else Sector.UNKNOWN + + insights = {} + insights["performance"] = analyzePerformance(aSeries, aYears, qSeries, qPeriods, isFinancial) + insights["profitability"] = analyzeProfitability(ratios, aSeries, isFinancial, sector=sector, market=market) + insights["health"] = analyzeHealth(ratios, isFinancial, currency=currency) + insights["cashflow"] = analyzeCashflow(ratios, aSeries, isFinancial) + insights["governance"] = analyzeGovernance(company) if company else analyzeGovernance(None) + insights["predictability"] = analyzePredictability(aSeries, aYears, isFinancial) + insights["uncertainty"] = analyzeUncertainty(aSeries, aYears, isFinancial) + insights["coreEarnings"] = analyzeCoreEarnings(aSeries, aYears, isFinancial) + insights["risk"] = analyzeRiskSummary(insights) + + # diff + 재무 교차 Red Flag (공시 텍스트 변화 vs 재무 지표 불일치) + healthGrade = insights.get("health") + healthGradeStr = healthGrade.grade if healthGrade else None + gapFlags = disclosureGapFlags(company, healthGrade=healthGradeStr) + if gapFlags and insights["risk"] is not None: + insights["risk"].risks.extend(gapFlags) + insights["risk"].details.extend(f.text for f in gapFlags) + + insights["opportunity"] = analyzeOpportunitySummary(insights) + + # 감사 데이터 추출 (Company가 있을 때만) + auditData = _extractAuditData(company) if company is not None else None + anomalies = runAnomalyDetection(aSeries, isFinancial, auditData=auditData) + + # Merton 시장 기반 모델 (비금융 + marketData 제공 시) + mertonResult = None + if not isFinancial and marketData is not None: + try: + from dartlab.core.finance.merton import calcEquityVolatility, solveMerton + + vol = calcEquityVolatility(marketData.dailyReturns) + if vol > 0: + mertonResult = solveMerton( + equityValue=marketData.marketCap, + debtFaceValue=ratios.totalLiabilities or 0, + equityVolatility=vol, + riskFreeRate=marketData.riskFreeRate, + ) + except ImportError: + pass # scipy 미설치 → Merton 축 제외, 4축으로 동작 + + distress = calcDistress(ratios, anomalies, isFinancial, mertonResult=mertonResult) + + resolvedName = corpName or (company.corpName if company else stockCode) + grades = {k: v.grade for k, v in insights.items()} + profile = classifyProfile(grades) + summaryText = generateSummary(resolvedName, insights, anomalies, profile) + + return AnalysisResult( + corpName=resolvedName, + stockCode=stockCode, + isFinancial=isFinancial, + performance=insights["performance"], + profitability=insights["profitability"], + health=insights["health"], + cashflow=insights["cashflow"], + governance=insights["governance"], + risk=insights["risk"], + opportunity=insights["opportunity"], + predictability=insights.get("predictability"), + uncertainty=insights.get("uncertainty"), + coreEarnings=insights.get("coreEarnings"), + anomalies=anomalies, + distress=distress, + summary=summaryText, + profile=profile, + ) + + +def analyzeAudit(company) -> list[Anomaly]: + """감사 Red Flag만 단독 분석.""" + auditData = _extractAuditData(company) + return detectAuditRedFlags(auditData) diff --git a/src/dartlab/analysis/financial/insight/spec.py b/src/dartlab/analysis/financial/insight/spec.py new file mode 100644 index 0000000000000000000000000000000000000000..57ace1462db3dfb92c566543554a248536fabb0c --- /dev/null +++ b/src/dartlab/analysis/financial/insight/spec.py @@ -0,0 +1,180 @@ +"""insight 엔진 스펙 — 코드에서 자동 추출.""" + +from __future__ import annotations + +AREAS = { + "performance": { + "label": "실적", + "description": "매출/영업이익 YoY 성장률 + 분기 변동성", + "metrics": ["revenue_growth_yoy", "operating_income_growth_yoy", "quarterly_volatility"], + }, + "profitability": { + "label": "수익성", + "description": "영업이익률, 순이익률, ROE, ROA + 섹터 벤치마크 보정", + "metrics": ["operating_margin", "net_margin", "roe", "roa"], + }, + "health": { + "label": "재무건전성", + "description": "부채비율, 유동비율 + 부실 예측 모델 (O-Score, Z''-Score)", + "metrics": ["debt_ratio", "current_ratio", "interest_coverage", "ohlson_o_score", "altman_zpp_score"], + }, + "cashflow": { + "label": "현금흐름", + "description": "영업CF, FCF, 현금성자산 비중", + "metrics": ["operating_cf", "fcf", "cash_ratio"], + }, + "governance": { + "label": "지배구조", + "description": "최대주주 지분율, 감사의견, 사외이사 비율, 자기주식", + "metrics": ["major_holder_pct", "audit_opinion", "outside_director_ratio", "treasury_stock"], + }, + "predictability": { + "label": "예측가능성", + "description": "매출CV + 영업CV + 연속성장 + 무적자 → 0~10점 (GuruFocus Business Predictability)", + "metrics": ["revenue_cv", "operating_cv", "consecutive_growth", "profit_years"], + }, + "uncertainty": { + "label": "불확실성", + "description": "매출CV + DOL + D/E + 영업CV → 5단계 등급 + Fair Value 밴드 (Morningstar Uncertainty Rating)", + "metrics": ["revenue_cv", "dol", "debt_equity", "operating_cv", "fair_value_band"], + }, + "coreEarnings": { + "label": "핵심이익", + "description": "비경상 항목 분리, Core Earnings 안정성 (S&P Core Earnings)", + "metrics": ["core_cv", "reported_cv", "cv_improvement", "core_reported_gap"], + }, + "risk": { + "label": "종합 리스크", + "description": "전 영역 리스크 플래그 종합", + "metrics": [], + }, + "opportunity": { + "label": "종합 기회", + "description": "전 영역 기회 플래그 종합", + "metrics": [], + }, + "macro": { + "label": "매크로 환경", + "description": "경제 사이클(침체/회복/확장/둔화) 판별 + 5대 자산 신호(금리/환율/금/VIX) + 업종 민감도", + "metrics": [ + "cycle_phase", + "hy_spread", + "term_spread", + "vix", + "sector_cyclicality", + "macro_sensitivity_r2", + "valuation_band_percentile", + ], + }, +} + +ANOMALY_DETECTORS = [ + "earnings_quality", + "working_capital", + "balance_sheet_shift", + "cash_burn", + "margin_divergence", + "financial_sector", + "trend_deterioration", + "ccc_deterioration", + "audit_red_flags", + "benford_law", + "revenue_quality", +] + + +DISTRESS_MODELS = { + "ohlsonOScore": { + "label": "Ohlson O-Score", + "description": "9변수 로지스틱 부도 확률 (1980). 금융업 포함 범용.", + }, + "altmanZppScore": { + "label": "Altman Z''-Score", + "description": "비제조업/신흥시장 변형 (1995). 금융업 적용 가능.", + }, + "springateSScore": { + "label": "Springate S-Score", + "description": "Z-Score 캐나다 변형 4변수 (1978). S < 0.862 부실.", + }, + "zmijewskiXScore": { + "label": "Zmijewski X-Score", + "description": "3변수 프로빗 모델 (1984). X > 0 부실. 금융업 왜곡 주의.", + }, + "mertonD2D": { + "label": "Merton D2D", + "description": "구조 모형 부도 거리 (1974). 주가변동성+부채 기반. Moody's KMV 글로벌 표준.", + }, +} + +DISTRESS_SCORECARD = { + "axes": [ + { + "name": "정량 분석", + "weight": "0.30 (Merton 있을 때) / 0.40 (없을 때)", + "models": ["ohlsonOScore", "altmanZppScore", "altmanZScore"], + }, + {"name": "시장 기반", "weight": "0.20 (Merton 있을 때) / 0 (없을 때)", "models": ["mertonD2D"]}, + { + "name": "이익 품질", + "weight": "0.15 (Merton 있을 때) / 0.20 (없을 때)", + "models": ["beneishMScore", "sloanAccrual", "piotroskiFScore"], + }, + { + "name": "추세 분석", + "weight": "0.25 (Merton 있을 때) / 0.30 (없을 때)", + "source": "anomaly (trendDeterioration, cccDeterioration)", + }, + {"name": "감사 위험", "weight": 0.10, "source": "anomaly (audit, governance)"}, + ], + "creditGrade": "AAA~D (S&P PD 매핑, 10단계)", + "cashRunway": "현금 소진 예상 개월 수 + 유동성 경보", + "riskFactors": "anomaly + ratios + Merton D2D에서 구조화된 위험 요인 자동 추출", + "levels": ["safe (<15)", "watch (<30)", "warning (<50)", "danger (<70)", "critical (>=70)"], +} + + +CREDIT_RATING = { + "label": "신용평가", + "description": "제도권 수준 20단계 신용등급 (AAA~D, +/- 포함). 5축 가중평균 + 업종별 차등 기준.", + "axes": [ + { + "name": "채무상환능력", + "weight": "35%", + "metrics": ["FFO/총차입금", "Debt/EBITDA", "FOCF/Debt", "EBITDA/이자비용"], + }, + {"name": "레버리지", "weight": "25%", "metrics": ["부채비율", "차입금의존도", "순차입금/EBITDA"]}, + {"name": "유동성·만기", "weight": "15%", "metrics": ["유동비율", "현금비율", "단기차입금비중"]}, + {"name": "부실모델 앙상블", "weight": "15%", "metrics": ["Altman Z", "Ohlson O", "Zmijewski", "Springate"]}, + { + "name": "이익품질·추세", + "weight": "10%", + "metrics": ["Beneish M", "Sloan Accrual", "Piotroski F", "이익변동성"], + }, + ], + "grades": "AAA, AA+, AA, AA-, A+, A, A-, BBB+, BBB, BBB-, BB+, BB, BB-, B+, B, B-, CCC, CC, C, D", + "cashFlowGrade": "eCR-1 ~ eCR-6 (현금흐름창출능력 별도 평가)", + "outlook": "안정적/긍정적/부정적 (5개년 점수 추세 기반)", + "qualitativeSlots": "AI가 채울 수 있는 정성 조정 슬롯 4개 (시장지위/경쟁우위/경영진/계열지원)", + "methodology": "KIS/KR/NICE + Moody's/S&P 공개 방법론 종합, 업종별 11개 대분류 차등 기준표", +} + + +def buildSpec() -> dict: + """insight 엔진 스펙 반환.""" + return { + "name": "insight", + "description": "기업 분석 등급 (10영역 A~F) + 이상치 탐지 + 부실 예측 + 신용평가 + 프로파일 분류", + "summary": { + "areas": list(AREAS.keys()), + "grading": "A~F (6단계, 점수 기반)", + "anomaly": f"룰 기반 {len(ANOMALY_DETECTORS)}개 탐지기", + "distress": f"5축 부실 예측 스코어카드 ({len(DISTRESS_MODELS)}개 모델, Merton D2D 포함) + 신용등급 + 유동성 경보", + "creditRating": "20단계 신용등급 (AAA~D) + eCR + Outlook + AI 정성 슬롯", + "profile": "classifyProfile (수비형/공격형/성장형/가치형 등)", + }, + "detail": {area: meta for area, meta in AREAS.items()}, + "anomalyDetectors": ANOMALY_DETECTORS, + "distressModels": DISTRESS_MODELS, + "distressScorecard": DISTRESS_SCORECARD, + "creditRating": CREDIT_RATING, + } diff --git a/src/dartlab/analysis/financial/insight/summary.py b/src/dartlab/analysis/financial/insight/summary.py new file mode 100644 index 0000000000000000000000000000000000000000..06cde7b3dff7e2814b4d4bc091d09d447c755462 --- /dev/null +++ b/src/dartlab/analysis/financial/insight/summary.py @@ -0,0 +1,163 @@ +"""종합 요약 텍스트 + 프로필 분류.""" + +from __future__ import annotations + +from typing import Optional + +from dartlab.analysis.financial.insight.types import Anomaly, InsightResult + +GRADE_SCORE = {"A": 4, "B": 3, "C": 2, "D": 1, "F": 0, "N": None} + + +def _josa(word: str, withBatchim: str, withoutBatchim: str) -> str: + if not word: + return word + withBatchim + lastChar = ord(word[-1]) + if 0xAC00 <= lastChar <= 0xD7A3: + hasBatchim = (lastChar - 0xAC00) % 28 != 0 + return word + (withBatchim if hasBatchim else withoutBatchim) + return word + withBatchim + + +def _eunNeun(word: str) -> str: + return _josa(word, "은", "는") + + +def _iGa(word: str) -> str: + return _josa(word, "이", "가") + + +def _avgGrade(grades: dict[str, str]) -> float: + scores = [GRADE_SCORE[g] for g in grades.values() if GRADE_SCORE.get(g) is not None] + if not scores: + return 0 + return sum(scores) / len(scores) + + +def classifyProfile(grades: dict[str, str]) -> str: + """등급 조합으로 기업 프로필 분류.""" + avgScore = _avgGrade(grades) + perf = grades.get("performance", "C") + profit = grades.get("profitability", "C") + health = grades.get("health", "C") + risk = grades.get("risk", "C") + opp = grades.get("opportunity", "C") + + if avgScore >= 3.0 and risk in ("A", "B"): + return "premium" + if perf in ("A", "B") and profit in ("A", "B") and opp in ("A", "B"): + return "growth" + if health in ("A", "B") and risk in ("A", "B") and profit in ("A", "B"): + return "stable" + if risk in ("D", "F") or health == "F": + return "caution" + if avgScore < 1.5: + return "distress" + return "mixed" + + +def _getStrengths(insights: dict[str, InsightResult]) -> list[str]: + strengths = [] + mapping = { + "performance": "실적", + "profitability": "수익성", + "health": "재무건전성", + "cashflow": "현금흐름", + "governance": "지배구조", + } + for key, label in mapping.items(): + if key in insights and insights[key].grade == "A": + strengths.append(label) + return strengths + + +def _getWeaknesses(insights: dict[str, InsightResult]) -> list[str]: + weaknesses = [] + mapping = { + "performance": "실적", + "profitability": "수익성", + "health": "재무건전성", + "cashflow": "현금흐름", + "governance": "지배구조", + } + for key, label in mapping.items(): + if key in insights and insights[key].grade == "F": + weaknesses.append(label) + return weaknesses + + +def _getKeyMetric(insights: dict[str, InsightResult]) -> Optional[str]: + for key in ("performance", "profitability"): + if key in insights: + for detail in insights[key].details: + for keyword in ("성장", "이익률", "ROE"): + if keyword in detail: + return detail + return None + + +def generateSummary( + corpName: str, + insights: dict[str, InsightResult], + anomalies: list[Anomaly], + profile: str, +) -> str: + """한국어 종합 요약 생성.""" + strengths = _getStrengths(insights) + weaknesses = _getWeaknesses(insights) + keyMetric = _getKeyMetric(insights) + + parts: list[str] = [] + nameEunNeun = _eunNeun(corpName) + + if profile == "premium": + if strengths: + parts.append( + f"{nameEunNeun} {', '.join(strengths)} 등 전반적으로 우수한 재무 상태를 보이는 우량 기업입니다." + ) + else: + parts.append(f"{nameEunNeun} 전반적으로 우수한 재무 상태를 보이는 우량 기업입니다.") + + elif profile == "growth": + parts.append(f"{nameEunNeun} 성장성과 수익성이 돋보이는 기업입니다.") + + elif profile == "stable": + parts.append(f"{nameEunNeun} 안정적인 재무구조를 갖춘 기업입니다.") + + elif profile == "caution": + if weaknesses: + parts.append(f"{nameEunNeun} {', '.join(weaknesses)} 측면에서 주의가 필요합니다.") + else: + grades = {k: v.grade for k, v in insights.items()} + riskGrade = grades.get("risk", "C") + if riskGrade in ("D", "F"): + parts.append(f"{nameEunNeun} 재무 리스크 요인이 존재하여 주의가 필요합니다.") + else: + parts.append(f"{nameEunNeun} 일부 재무 지표에서 주의가 필요합니다.") + + elif profile == "distress": + parts.append(f"{nameEunNeun} 여러 재무 지표에서 개선이 시급한 상황입니다.") + + else: + if strengths and weaknesses: + parts.append(f"{nameEunNeun} {', '.join(strengths)} 양호하나 {', '.join(weaknesses)}에서 약점을 보입니다.") + elif strengths: + if len(strengths) == 1: + parts.append(f"{nameEunNeun} {_iGa(strengths[0])} 양호한 기업입니다.") + else: + front = ", ".join(strengths[:-1]) + parts.append(f"{nameEunNeun} {front}, {_iGa(strengths[-1])} 양호한 기업입니다.") + else: + parts.append(f"{nameEunNeun} 전반적으로 보통 수준의 재무 상태를 보입니다.") + + if keyMetric: + parts.append(keyMetric + ".") + + dangerAnomalies = [a for a in anomalies if a.severity == "danger"] + if dangerAnomalies: + topAnomaly = dangerAnomalies[0].text.split("—")[0].strip() + parts.append(f"다만 {topAnomaly} 점에 유의해야 합니다.") + elif len(anomalies) >= 3: + parts.append(f"이상 신호 {len(anomalies)}건이 감지되어 모니터링이 필요합니다.") + + return " ".join(parts) diff --git a/src/dartlab/analysis/financial/insight/types.py b/src/dartlab/analysis/financial/insight/types.py new file mode 100644 index 0000000000000000000000000000000000000000..9ab0fcc9773f5c2099f97bfda693498a11d3cac7 --- /dev/null +++ b/src/dartlab/analysis/financial/insight/types.py @@ -0,0 +1,238 @@ +"""인사이트 엔진 데이터 타입.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Optional + + +@dataclass +class Flag: + """리스크/기회 플래그.""" + + level: str + category: str + text: str + + +@dataclass +class InsightResult: + """단일 영역 분석 결과.""" + + grade: str + summary: str + details: list[str] = field(default_factory=list) + risks: list[Flag] = field(default_factory=list) + opportunities: list[Flag] = field(default_factory=list) + + +@dataclass +class Anomaly: + """이상치 탐지 결과.""" + + severity: str + category: str + text: str + value: Optional[float] = None + + +@dataclass +class ModelScore: + """개별 부실 모델 결과 — 원시 값 + 판정 + 근거.""" + + name: str # "Ohlson O-Score" + rawValue: float # -5.23 + displayValue: str # "P(부도) 0.5%" 또는 "Z'' = 6.42" + zone: str # "safe" / "gray" / "distress" + interpretation: str # "부도 확률 극히 낮음." + reference: str # "Ohlson (1980), 9변수 로지스틱" + + def __repr__(self) -> str: + return f"{self.name:<20s} {self.displayValue:<16s} {self.zone:<10s} {self.interpretation}" + + +@dataclass +class DistressAxis: + """스코어카드 단일 축 — 점수 + 구성 모델 상세.""" + + name: str # "정량 분석" + score: float # 0~100 + weight: float # 0.40 + models: list[ModelScore] = field(default_factory=list) + summary: str = "" + + def __repr__(self) -> str: + pct = int(self.weight * 100) + header = f"[{self.name}] {self.score:.1f}/100 (가중 {pct}%)" + if not self.models: + return f"{header}\n {self.summary}" + lines = [header] + for m in self.models: + lines.append(f" {m}") + return "\n".join(lines) + + +@dataclass +class AuditDataForAnomaly: + """감사 탐지기 입력 DTO. + + pipeline이 Company에서 추출하여 anomaly에 전달. + 각 필드는 연도별 시계열 (최신이 마지막). + """ + + auditors: list[str | None] = field(default_factory=list) # 감사인명 시계열 + opinions: list[str | None] = field(default_factory=list) # 감사의견 시계열 + fees: list[float | None] = field(default_factory=list) # 감사보수 시계열 (백만원) + kamCounts: list[int | None] = field(default_factory=list) # KAM 건수 시계열 + hasGoingConcern: bool = False # 계속기업 불확실성 여부 (최신기) + hasInternalControlWeakness: bool = False # 내부통제 취약점 여부 (최신기) + + +@dataclass +class MarketDataForDistress: + """시장 기반 부실 분석 입력 데이터. + + gather 엔진에서 수집한 시장 데이터를 pipeline에 전달하기 위한 DTO. + pipeline이 직접 gather를 호출하지 않고, 호출자가 준비하여 전달. + """ + + marketCap: float # 시가총액 (원) + dailyReturns: list[float] # 일별 수익률 (최소 60일 권장) + riskFreeRate: float = 0.035 # 무위험이자율 (3.5%) + + +@dataclass +class DistressResult: + """부실 예측 종합 스코어카드. + + 5축 가중 평균 (100점 만점, 0=안전 100=위험): + - 정량 분석 (30%): O-Score, Z''-Score, Z-Score [Merton 있을 때, 없으면 40%] + - 시장 기반 (20%): Merton D2D + PD [Merton 없으면 0%] + - 이익 품질 (15%): Beneish M-Score, Sloan Accrual, Piotroski F-Score [없으면 20%] + - 추세 분석 (25%): 연속적자, ICR<1, CCC 확대 등 [없으면 30%] + - 감사 위험 (10%): 비적정 의견 등 + + Merton 미제공 시 기존 4축(40/20/30/10) 그대로 동작 (하위호환 100%). + 금융업(isFinancial=True) → Merton 무시 (은행 부채 구조적 왜곡). + + 레벨: safe(<15), watch(<30), warning(<50), danger(<70), critical(>=70) + 신용등급: AAA~D (S&P PD 매핑) + """ + + # 종합 판정 + level: str # safe/watch/warning/danger/critical + overall: float # 0~100 + creditGrade: str # AAA~D + creditDescription: str # "투자적격 최상위" 등 + + # 축 상세 (4축 또는 5축) + axes: list[DistressAxis] = field(default_factory=list) + + # 유동성 경보 + cashRunwayMonths: Optional[float] = None + liquidityAlert: Optional[str] = None + + # 핵심 위험 요인 + riskFactors: list[str] = field(default_factory=list) + + # 메타 + modelCount: int = 0 + dataQuality: str = "충분" + + def __repr__(self) -> str: + lines = [ + "=== 부실 예측 스코어카드 ===", + f"종합: {self.level} ({self.overall:.1f}/100) | 신용등급: {self.creditGrade} ({self.creditDescription})", + "", + ] + for axis in self.axes: + lines.append(repr(axis)) + lines.append("") + + if self.liquidityAlert: + runway = f"{self.cashRunwayMonths:.0f}개월" if self.cashRunwayMonths and self.cashRunwayMonths < 900 else "" + lines.append(f"유동성: {self.liquidityAlert} {runway}".strip()) + + if self.riskFactors: + lines.append("위험 요인:") + for rf in self.riskFactors: + lines.append(f" - {rf}") + else: + lines.append("위험 요인: 없음") + + lines.append(f"모델 {self.modelCount}개 사용, 데이터 품질: {self.dataQuality}") + return "\n".join(lines) + + def _repr_html_(self) -> str: + """Jupyter/Marimo용 HTML.""" + try: + from dartlab.display.notebook import htmlDistress + + return htmlDistress(self) + except ImportError: + return f"
{repr(self)}
" + + +@dataclass +class AnalysisResult: + """종합 분석 결과.""" + + corpName: str + stockCode: str + isFinancial: bool + + performance: InsightResult + profitability: InsightResult + health: InsightResult + cashflow: InsightResult + governance: InsightResult + risk: InsightResult + opportunity: InsightResult + + predictability: Optional[InsightResult] = None + uncertainty: Optional[InsightResult] = None + coreEarnings: Optional[InsightResult] = None + + anomalies: list[Anomaly] = field(default_factory=list) + distress: Optional[DistressResult] = None + summary: str = "" + profile: str = "" + + def grades(self) -> dict[str, str]: + """10영역 등급 dict 반환.""" + result = { + "performance": self.performance.grade, + "profitability": self.profitability.grade, + "health": self.health.grade, + "cashflow": self.cashflow.grade, + "governance": self.governance.grade, + "risk": self.risk.grade, + "opportunity": self.opportunity.grade, + } + if self.predictability: + result["predictability"] = self.predictability.grade + if self.uncertainty: + result["uncertainty"] = self.uncertainty.grade + if self.coreEarnings: + result["coreEarnings"] = self.coreEarnings.grade + return result + + def __repr__(self): + try: + from dartlab.display.richInsight import renderInsight + + return renderInsight(self) + except ImportError: + g = self.grades() + gradeStr = " ".join(f"{k[:4]}={v}" for k, v in g.items()) + anomalyStr = f" anomalies={len(self.anomalies)}" if self.anomalies else "" + return f"AnalysisResult({self.corpName}, {gradeStr}{anomalyStr})" + + def _repr_html_(self) -> str: + """Jupyter/Marimo용 HTML.""" + try: + from dartlab.display.notebook import htmlInsight + + return htmlInsight(self) + except ImportError: + return repr(self) diff --git a/src/dartlab/analysis/financial/investmentAnalysis.py b/src/dartlab/analysis/financial/investmentAnalysis.py new file mode 100644 index 0000000000000000000000000000000000000000..29285ca3eb3f5f1869ed52dff34098fcf103af26 --- /dev/null +++ b/src/dartlab/analysis/financial/investmentAnalysis.py @@ -0,0 +1,508 @@ +"""투자 분석 -- ROIC, NOPAT, 투자 강도 시계열. + +select()로 IS/BS/CF 원본 계정을 가져와서 +투자가 실제로 가치를 만드는지를 금액과 함께 시계열로 추적한다. +""" + +from __future__ import annotations + +from dartlab.analysis.financial._helpers import ( + annualColsFromPeriods, + sumBorrowings, + toDictBySnakeId, +) +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_YEARS = 8 + + +def _get(row: dict, col: str) -> float: + v = row.get(col) if row else None + return v if v is not None else 0 + + +from dartlab.core.finance.calc import safePct as _pct # noqa: E402 + + +def _yoy(cur, prev) -> float | None: + if cur is None or prev is None or prev == 0: + return None + return round((cur - prev) / abs(prev) * 100, 2) + + +def _estimateWacc(company) -> float | None: + """company에서 WACC 추정 (compute_company_wacc 래퍼). + + [성능] 자체 캐시 (None 결과도 캐싱) — review에서 5개 calc가 각자 호출하는데 + 매번 fetch_price + fetchBeta 외부 API 호출 발생 (각 ~2.6s = 13s). + None 결과도 캐시해야 외부 API 재호출 방지. + memoized_calc은 None 결과를 캐시 안 함 → 자체 sentinel 캐시 사용. + """ + cache = getattr(company, "_cache", None) + _KEY = "_estimateWacc_v2" + _SENTINEL = "__NONE__" + if cache is not None and _KEY in cache: + cached = cache[_KEY] + return None if cached == _SENTINEL else cached + + result: float | None = None + try: + from dartlab.core.finance.proforma import compute_company_wacc + + annual = company._buildFinanceSeries(freq="Y") + if annual is not None: + series, _ = annual + sectorParams = getattr(company, "sectorParams", None) + # 시총: gather.price 경유 (beta 감쇠에 필요) + marketCap = None + try: + from dartlab.gather.http import run_async + from dartlab.gather.price import fetch + + stockCode = getattr(company, "stockCode", "") + snapshot = run_async(fetch(stockCode, market="KR")) if stockCode else None + if snapshot: + marketCap = snapshot.market_cap + except (ImportError, OSError, RuntimeError): + pass + # 개별 beta + betaCalc = None + try: + from dartlab.core.finance.proforma import _fetchBeta + + betaCalc = _fetchBeta(getattr(company, "stockCode", ""), getattr(company, "currency", "KRW")) + except (ImportError, OSError, RuntimeError): + pass + wacc, _ = compute_company_wacc( + series, + sector_params=sectorParams, + market_cap=marketCap, + currency=getattr(company, "currency", "KRW"), + beta_override=betaCalc, + ) + result = round(wacc, 2) + except (ImportError, AttributeError, TypeError, ValueError): + result = None + + if cache is not None: + cache[_KEY] = result if result is not None else _SENTINEL + return result + + +# ── ROIC (NOPAT / 투하자본) ── + + +@memoized_calc +def calcRoicTimeline(company, *, basePeriod: str | None = None) -> dict | None: + """ROIC 시계열 -- 투하자본 대비 실제 수익률. + + IS에서 영업이익 + 세율, BS에서 자본 + 차입금으로 직접 계산. + ROIC = NOPAT / Invested Capital + + Returns + ------- + dict | None + history : list[dict] — 기간별 ROIC 시계열 + period : str — 기간 + operatingIncome : float | None — 영업이익 + effectiveTaxRate : float — 유효세율 (%) + nopat : float | None — 세후영업이익 + equity : float | None — 자본총계 + totalBorrowing : float | None — 이자부차입금 합계 + cash : float | None — 현금및현금성자산 + investedCapital : float | None — 투하자본 + roic : float | None — ROIC (%) + roicYoy : float | None — ROIC YoY 변화율 (%) + waccEstimate : float | None — 추정 WACC (%) + spread : float | None — ROIC - WACC (%p) + """ + isResult = company.select("IS", ["영업이익", "법인세비용", "법인세차감전순이익"]) + bsResult = company.select( + "BS", + [ + "자본총계", + "단기차입금", + "장기차입금", + "차입금단기", + "long_term_borrowings", + "short_term_borrowings", + "차입부채", + "장기차입부채", + "유동성장기차입금", + "사채", + "현금및현금성자산", + ], + ) + + isParsed = toDictBySnakeId(isResult) + bsParsed = toDictBySnakeId(bsResult) + if isParsed is None or bsParsed is None: + return None + + isData, isPeriods = isParsed + bsData, _ = bsParsed + + opRow = isData.get("operating_profit", {}) + taxRow = isData.get("income_tax_expense") or isData.get("income_taxes", {}) + ptRow = isData.get("profit_before_tax", {}) + eqRow = bsData.get("total_stockholders_equity", {}) + bsData.get("shortterm_borrowings", {}) + bsData.get("longterm_borrowings", {}) + bsData.get("borrowings", {}) + bsData.get("debentures", {}) + cashRow = bsData.get("cash_and_cash_equivalents", {}) + + yCols = annualColsFromPeriods(isPeriods, maxYears=_MAX_YEARS + 1, basePeriod=basePeriod) + if len(yCols) < 2: + return None + + def _getF(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for i, col in enumerate(yCols[:-1]): + yCols[i + 1] if i + 1 < len(yCols) else None + opIncome = _getF(opRow, col) + taxExpense = _getF(taxRow, col) + ptIncome = _getF(ptRow, col) + + # 유효세율 + effectiveTaxRate = abs(taxExpense) / abs(ptIncome) if ptIncome != 0 else 0.25 + effectiveTaxRate = min(effectiveTaxRate, 0.5) + + nopat = round(opIncome * (1 - effectiveTaxRate)) if opIncome != 0 else None + + equity = _get(eqRow, col) + # equity 누락(0) 시 인접 기간 값으로 fallback (매핑 공백 대응) + if equity == 0: + for adjCol in yCols: + adjEq = _get(eqRow, adjCol) + if adjEq > 0: + equity = adjEq + break + # 차입금: 회사 키 패턴 무관 헬퍼 + totalBorrowing = sumBorrowings(bsData, col) + cash = _get(cashRow, col) + investedCapital = equity + totalBorrowing - cash + + roic = round(nopat / investedCapital * 100, 2) if nopat is not None and investedCapital > 0 else None + + history.append( + { + "period": col, + "operatingIncome": opIncome if opIncome != 0 else None, + "effectiveTaxRate": round(effectiveTaxRate * 100, 2), + "nopat": nopat, + "equity": equity if equity != 0 else None, + "totalBorrowing": totalBorrowing if totalBorrowing > 0 else None, + "cash": cash if cash != 0 else None, + "investedCapital": investedCapital if investedCapital > 0 else None, + "roic": roic, + "roicYoy": _yoy(roic, None), # 이전 기간 ROIC는 아래서 계산 + } + ) + + # ROIC YoY 후처리 (history가 최신→과거 순) + for i in range(len(history) - 1): + cur = history[i].get("roic") + prev = history[i + 1].get("roic") + history[i]["roicYoy"] = _yoy(cur, prev) + + # WACC 추정 (최신 시점 1회만, 전 기간 동일 적용) + waccEstimate = _estimateWacc(company) + if waccEstimate is not None: + for h in history: + h["waccEstimate"] = waccEstimate + roic = h.get("roic") + h["spread"] = round(roic - waccEstimate, 2) if roic is not None else None + + return {"history": history} if history else None + + +# ── 투자 강도 ── + + +@memoized_calc +def calcInvestmentIntensity(company, *, basePeriod: str | None = None) -> dict | None: + """투자 강도 시계열 -- CAPEX/매출, 유무형 비율. + + Returns + ------- + dict | None + history : list[dict] — 기간별 투자 강도 시계열 + period : str — 기간 + capex : float | None — CAPEX (유형+무형 취득) + revenue : float | None — 매출액 + tangibleAssets : float | None — 유형자산 + intangibleAssets : float | None — 무형자산 + totalAssets : float | None — 자산총계 + capexToRevenue : float | None — CAPEX/매출 (%) + tangibleRatio : float | None — 유형자산/총자산 (%) + intangibleRatio : float | None — 무형자산/총자산 (%) + """ + cfResult = company.select( + "CF", + ["purchase_of_property_plant_and_equipment", "purchase_of_intangible_assets"], + ) + isResult = company.select("IS", ["매출액"]) + bsResult = company.select("BS", ["유형자산", "무형자산", "자산총계"]) + + isParsed = toDictBySnakeId(isResult) + bsParsed = toDictBySnakeId(bsResult) + if isParsed is None or bsParsed is None: + return None + + cfParsed = toDictBySnakeId(cfResult) + cfData = cfParsed[0] if cfParsed else {} + isData, isPeriods = isParsed + bsData, _ = bsParsed + + capexRow = cfData.get("purchase_of_property_plant_and_equipment", {}) + intCapexRow = cfData.get("purchase_of_intangible_assets", {}) + revRow = isData.get("sales", {}) + ppeRow = bsData.get("tangible_assets", {}) + intRow = bsData.get("intangible_assets", {}) + taRow = bsData.get("자산총계", {}) + + yCols = annualColsFromPeriods(isPeriods, maxYears=_MAX_YEARS, basePeriod=basePeriod) + if not yCols: + return None + + def _getF2(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for col in yCols: + capex = abs(_getF2(capexRow, col)) + abs(_getF2(intCapexRow, col)) + rev = _getF2(revRow, col) + ppe = _get(ppeRow, col) + intangible = _get(intRow, col) + ta = _get(taRow, col) + + history.append( + { + "period": col, + "capex": capex if capex > 0 else None, + "revenue": rev if rev != 0 else None, + "tangibleAssets": ppe if ppe != 0 else None, + "intangibleAssets": intangible if intangible != 0 else None, + "totalAssets": ta if ta != 0 else None, + "capexToRevenue": _pct(capex, rev), + "tangibleRatio": _pct(ppe, ta), + "intangibleRatio": _pct(intangible, ta), + } + ) + + return {"history": history} if history else None + + +# ── NOPAT + 투하자본 ── + + +@memoized_calc +def calcEvaTimeline(company, *, basePeriod: str | None = None) -> dict | None: + """NOPAT + 투하자본 시계열. + + 투하자본 = 자본총계 + 이자부차입금 - 현금 (ROIC와 동일 기준). + + Returns + ------- + dict | None + history : list[dict] — 기간별 EVA 시계열 + period : str — 기간 + nopat : float | None — 세후영업이익 + investedCapital : float — 투하자본 + nopatReturn : float | None — NOPAT/투하자본 (%) + waccEstimate : float | None — 추정 WACC (%) + eva : float | None — 경제적부가가치 (NOPAT - IC * WACC) + """ + isResult = company.select("IS", ["영업이익", "법인세비용", "법인세차감전순이익"]) + bsResult = company.select( + "BS", + [ + "자본총계", + "단기차입금", + "장기차입금", + "차입금단기", + "long_term_borrowings", + "short_term_borrowings", + "차입부채", + "장기차입부채", + "유동성장기차입금", + "사채", + "현금및현금성자산", + ], + ) + + isParsed = toDictBySnakeId(isResult) + bsParsed = toDictBySnakeId(bsResult) + if isParsed is None or bsParsed is None: + return None + + isData, isPeriods = isParsed + bsData, _ = bsParsed + + opRow = isData.get("operating_profit", {}) + taxRow = isData.get("income_tax_expense") or isData.get("income_taxes", {}) + ptRow = isData.get("profit_before_tax", {}) + eqRow = bsData.get("total_stockholders_equity", {}) + bsData.get("shortterm_borrowings", {}) + bsData.get("longterm_borrowings", {}) + bsData.get("borrowings", {}) + bsData.get("debentures", {}) + cashRow = bsData.get("cash_and_cash_equivalents", {}) + + yCols = annualColsFromPeriods(isPeriods, maxYears=_MAX_YEARS, basePeriod=basePeriod) + if not yCols: + return None + + def _getF3(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for col in yCols: + opIncome = _getF3(opRow, col) + taxExpense = _getF3(taxRow, col) + ptIncome = _getF3(ptRow, col) + + # 유효세율 + effectiveTaxRate = abs(taxExpense) / abs(ptIncome) if ptIncome != 0 else 0.25 + effectiveTaxRate = min(effectiveTaxRate, 0.5) + + nopat = opIncome * (1 - effectiveTaxRate) if opIncome != 0 else None + + equity = _get(eqRow, col) + # 차입금: 회사 키 패턴 무관 헬퍼 + totalBorrowing = sumBorrowings(bsData, col) + cash = _get(cashRow, col) + investedCapital = equity + totalBorrowing - cash + + # NOPAT / 투하자본 = 투하자본수익률 + nopatReturn = None + if nopat is not None and investedCapital > 0: + nopatReturn = round(nopat / investedCapital * 100, 2) + + history.append( + { + "period": col, + "nopat": nopat, + "investedCapital": investedCapital, + "nopatReturn": nopatReturn, + "waccEstimate": None, + "eva": None, + } + ) + + # WACC 추정 + EVA 계산 + waccEstimate = _estimateWacc(company) + if waccEstimate is not None: + for h in history: + h["waccEstimate"] = waccEstimate + nopat = h.get("nopat") + ic = h.get("investedCapital") + if nopat is not None and ic is not None and ic > 0: + h["eva"] = round(nopat - ic * waccEstimate / 100) + + return {"history": history} if history else None + + +# ── 타법인 출자 현황 (docs) ── + + +@memoized_calc +def calcInvestmentInOther(company, *, basePeriod: str | None = None) -> dict | None: + """investmentInOtherDetail docs 토픽에서 타법인 출자 총액 추출. + + 반환:: + + { + "totalBookValue": float | None, + "description": str | None, + "period": str | None, + } + """ + import re + + from dartlab.analysis.financial._helpers import parseNumStr + + result = company.show("investmentInOtherDetail") + if result is None: + return None + + import polars as pl + + if not isinstance(result, pl.DataFrame): + return None + + # block index 형태 — text 블록에서 총액 서술 추출 + if "block" in result.columns and "preview" in result.columns: + textBlocks = result.filter(pl.col("type") == "text") + for row in textBlocks.iter_rows(named=True): + preview = str(row.get("preview", "")) + # "타법인 출자 금액은 장부금액 기준 59조 2,469억원" 패턴 + m = re.search(r"출자\s*금액[^\d]*?([\d,]+)\s*조\s*([\d,]+)\s*억", preview) + if m: + tril = parseNumStr(m.group(1)) + bil = parseNumStr(m.group(2)) + if tril is not None and bil is not None: + total = tril * 10000 + bil # 억원 단위 + # 연도 추출 + ym = re.search(r"(\d{4})년", preview) + period = ym.group(1) if ym else None + return { + "totalBookValue": total, + "description": preview[:200], + "period": period, + } + # "XX억원" 패턴 (조 단위 없는 경우) + m2 = re.search(r"출자\s*금액[^\d]*?([\d,]+)\s*억", preview) + if m2: + bil = parseNumStr(m2.group(1)) + if bil is not None: + ym = re.search(r"(\d{4})년", preview) + period = ym.group(1) if ym else None + return { + "totalBookValue": bil, + "description": preview[:200], + "period": period, + } + + return None + + +# ── 플래그 ── + + +@memoized_calc +def calcInvestmentFlags(company, *, basePeriod: str | None = None) -> list[str]: + """투자 분석 경고 신호. + + Returns + ------- + list[str] + 경고 메시지 문자열 리스트 (저ROIC 지속, 무형자산비율 급등 등). + """ + flags = [] + + roic = calcRoicTimeline(company, basePeriod=basePeriod) + if roic and len(roic["history"]) >= 3: + hist = roic["history"] + declining = all(h.get("roic") is not None and h["roic"] < 5 for h in hist[:3]) + if declining: + latest = hist[0].get("roic") + flags.append(f"ROIC {latest:.1f}% — 3년 연속 저수익 (자본비용 미회수 가능성)") + + intensity = calcInvestmentIntensity(company, basePeriod=basePeriod) + if intensity and len(intensity["history"]) >= 2: + hist = intensity["history"] + h0 = hist[0] + h1 = hist[1] + ir0 = h0.get("intangibleRatio") + ir1 = h1.get("intangibleRatio") + if ir0 is not None and ir1 is not None and ir0 - ir1 > 10: + flags.append(f"무형자산비율 +{ir0 - ir1:.0f}%p 급등 — 대규모 인수 또는 영업권 증가") + + return flags diff --git a/src/dartlab/analysis/financial/macroExposure.py b/src/dartlab/analysis/financial/macroExposure.py new file mode 100644 index 0000000000000000000000000000000000000000..96c09ff447504000ecc516e096f84bfca0bf4ca9 --- /dev/null +++ b/src/dartlab/analysis/financial/macroExposure.py @@ -0,0 +1,371 @@ +"""6부 매크로 분석 — 기업-매크로 연결 (매크로 민감도, 밸류에이션 밴드). + +기업 매출과 외생변수의 관계, 기업 멀티플 밴드 등 Company가 필수인 분석. +시장 자체 매크로 분석(사이클, 자산해석 등)은 독립 엔진 dartlab.macro()가 담당. +""" + +from __future__ import annotations + +import logging + +from dartlab.analysis.financial._memoize import memoized_calc + +log = logging.getLogger(__name__) + + +def _loadMacroIndicator(g, seriesId: str, source: str = "ecos", start: str = "2014-01-01"): + """gather에서 단일 매크로 지표 로드.""" + try: + if source == "ecos": + return g.macro("KR", seriesId, start=start) + return g.macro(seriesId, start=start) + except Exception: # noqa: BLE001 + return None + + +def _getGather(): + """gather 싱글톤 로드 (lazy).""" + try: + from dartlab.gather import getDefaultGather + + return getDefaultGather() + except ImportError: + return None + + +def _getLatestValue(df, col: str = "value"): + """DataFrame의 최신 값.""" + if df is None or len(df) == 0: + return None + try: + sorted_df = df.sort("date", descending=True) + val = sorted_df[col][0] + return float(val) if val is not None else None + except Exception: # noqa: BLE001 + return None + + +def _getMonthlyChange(df, months: int = 3, col: str = "value"): + """N개월 전 대비 변화.""" + if df is None or len(df) < months + 1: + return None + try: + sorted_df = df.sort("date", descending=True) + latest = sorted_df[col][0] + past = sorted_df[col][months] + if latest is not None and past is not None and past != 0: + return float(latest - past) + except (KeyError, IndexError, ValueError, TypeError): + pass + return None + + +def _getYoYChange(df, col: str = "value"): + """YoY 변화율 (%).""" + if df is None or len(df) < 13: + return None + try: + import polars as pl + + monthly = df.sort("date").group_by_dynamic("date", every="1mo").agg(pl.col(col).last()) + if len(monthly) < 13: + return None + latest = monthly[col][-1] + year_ago = monthly[col][-13] + if latest is not None and year_ago is not None and year_ago != 0: + return float((latest / year_ago - 1) * 100) + except (KeyError, IndexError, ValueError, TypeError, ImportError): + pass + return None + + +# ══════════════════════════════════════ +# 매크로민감도 — 기업 매출 vs 외생변수 +# ══════════════════════════════════════ + + +@memoized_calc +def calcMacroSensitivity(company, *, basePeriod: str | None = None) -> dict | None: + """기업 매출 vs 외생변수 회귀 — 업종 최적 + 범용 병행. + + exogenousAxes에서 업종 최적 3지표를 가져오고, 범용 3지표(금리/환율/IPI)와 비교. + R-squared가 높은 쪽을 채택. 현재 외생변수 상태 × beta로 매출 방향 추정. + + Returns + ------- + dict + stockCode : str — 종목코드 + optimalIndicators : list[dict] — 업종 최적 지표별 회귀 결과 (label, rSquared, impact) + genericIndicators : list[dict] — 범용 3지표 회귀 결과 + selected : list[dict] — 채택된 지표 그룹 (R2 높은 쪽) + selectedSource : str — 채택 출처 ("업종최적" | "범용") + optimalBestR2 : float — 업종 최적 최고 R-squared + genericBestR2 : float — 범용 최고 R-squared + netDirection : str — 종합 매출 방향 ("positive" | "negative" | "neutral") + netDirectionLabel : str — 종합 방향 한글 라벨 + """ + import polars as pl + + from dartlab.core.finance.exogenousAxes import ExogenousIndicator, getExogenousIndicators + + g = _getGather() + if g is None: + return None + + stockCode = getattr(company, "stockCode", None) or getattr(company, "stock_code", None) + if stockCode is None: + return None + + # 매출 성장률 시계열 — flow 헬퍼 경유 (Q4 분기 단독값 함정 차단) + from dartlab.analysis.financial._helpers import ( + annualColsFromPeriods, + toDictBySnakeId, + ) + + rev_result = company.select("IS", ["매출액"]) + if rev_result is None: + return None + + parsed = toDictBySnakeId(rev_result) + if parsed is None: + return None + isData, isPeriods = parsed + revRow = isData.get("매출액", {}) + yCols = annualColsFromPeriods(isPeriods) + if len(yCols) < 4: + return None + rev_data = [] + for col in sorted(yCols): + val = revRow.get(col) + year_str = col.replace("Q4", "").replace("A", "") + try: + year = int(year_str) + except ValueError: + continue + if val is not None: + rev_data.append({"year": year, "revenue": float(val)}) + + if len(rev_data) < 4: + return None + + rev_df = pl.DataFrame(rev_data).sort("year") + rev_df = rev_df.with_columns((pl.col("revenue") / pl.col("revenue").shift(1) - 1).alias("growth")).drop_nulls( + "growth" + ) + + years = rev_df["year"].to_list() + growth = rev_df["growth"].to_list() + + if len(years) < 3: + return None + + # 업종 최적 3지표 + optimal = getExogenousIndicators(stockCode=stockCode) + + # 범용 3지표 + generic = [ + ExogenousIndicator("BASE_RATE", "ecos", "기준금리", "financial"), + ExogenousIndicator("USDKRW", "ecos", "원/달러", "fx"), + ExogenousIndicator("IPI", "ecos", "산업생산", "domestic"), + ] + + def _regress(indicators: list[ExogenousIndicator]): + """각 지표와 매출 성장률의 R-squared 계산.""" + results = [] + for ind in indicators: + ind_df = _loadMacroIndicator(g, ind.seriesId, ind.source) + if ind_df is None or len(ind_df) == 0: + continue + + # 연간 평균 + annual = ( + ind_df.with_columns(pl.col("date").dt.year().alias("year")) + .group_by("year") + .agg(pl.col("value").mean()) + .sort("year") + ) + + # years와 매칭 + ind_values = [] + for y in years: + row = annual.filter(pl.col("year") == y) + if len(row) > 0: + ind_values.append(float(row["value"][0])) + else: + ind_values.append(None) + + # None 필터링 + 변화율 + valid_pairs = [(i, g_val, v) for i, (g_val, v) in enumerate(zip(growth, ind_values)) if v is not None] + if len(valid_pairs) < 3: + continue + + g_vals = [p[1] for p in valid_pairs] + i_vals = [p[2] for p in valid_pairs] + + # 지표 변화율 + i_changes = [] + for j in range(1, len(i_vals)): + if i_vals[j - 1] != 0: + i_changes.append((i_vals[j] - i_vals[j - 1]) / abs(i_vals[j - 1])) + else: + i_changes.append(0) + + g_subset = g_vals[1:] + if len(g_subset) < 2 or len(i_changes) < 2: + continue + + # R-squared + g_mean = sum(g_subset) / len(g_subset) + i_mean = sum(i_changes) / len(i_changes) + sst = sum((v - g_mean) ** 2 for v in g_subset) + cov = sum((g_subset[k] - g_mean) * (i_changes[k] - i_mean) for k in range(len(g_subset))) + i_var = sum((v - i_mean) ** 2 for v in i_changes) + + r2 = (cov**2) / (sst * i_var) if sst > 0 and i_var > 0 else 0 + + # 현재 지표 변화와 최근 매출 방향 + latest_i_change = i_changes[-1] if i_changes else 0 + beta_sign = 1 if cov > 0 else -1 + impact = "상승" if latest_i_change * beta_sign > 0 else "하락" + + results.append( + { + "label": ind.label, + "seriesId": ind.seriesId, + "axis": ind.axis, + "rSquared": round(r2, 3), + "latestChange": round(latest_i_change * 100, 1), + "impact": impact, + } + ) + + return results + + optimal_results = _regress(optimal) + generic_results = _regress(generic) + + # 최고 R-squared 비교 + opt_best = max((r["rSquared"] for r in optimal_results), default=0) + gen_best = max((r["rSquared"] for r in generic_results), default=0) + + # 더 나은 쪽 선택 + if opt_best >= gen_best: + selected = optimal_results + selectedLabel = "업종최적" + else: + selected = generic_results + selectedLabel = "범용" + + # 종합 방향 + up_count = sum(1 for r in selected if r["impact"] == "상승") + down_count = sum(1 for r in selected if r["impact"] == "하락") + net_direction = "positive" if up_count > down_count else "negative" if down_count > up_count else "neutral" + + return { + "stockCode": stockCode, + "optimalIndicators": optimal_results, + "genericIndicators": generic_results, + "selected": selected, + "selectedSource": selectedLabel, + "optimalBestR2": opt_best, + "genericBestR2": gen_best, + "netDirection": net_direction, + "netDirectionLabel": {"positive": "매출 상승 방향", "negative": "매출 하락 방향", "neutral": "중립"}.get( + net_direction, "중립" + ), + } + + +# ══════════════════════════════════════ +# 밸류에이션밴드 — 기업 PER/PBR 정규분포 밴드 +# ══════════════════════════════════════ + + +@memoized_calc +def calcValuationBand(company, *, basePeriod: str | None = None) -> dict | None: + """PER/PBR 정규분포 밴드에서 현재 위치. + + Returns + ------- + dict + bands : dict — PER/PBR별 밴드 정보 (metric, current, mean, std, percentile, zone, zoneLabel, dataPoints) + overallZone : str — 종합 판정 ("저평가" | "고평가" | "부분 저평가" | "부분 고평가" | "적정") + """ + from dartlab.core.finance.macroCycle import calcMultipleBand + + # ratioSeries에서 PER/PBR 과거 시계열 추출 + try: + ratios = company.show("ratios") + if ratios is None: + return None + except (AttributeError, TypeError): + return None + + result = {} + + for metric, key in [("PER", "per"), ("PBR", "pbr")]: + try: + # ratios DataFrame에서 해당 행 추출 + if hasattr(ratios, "columns"): + import polars as pl + + # snakeId 또는 항목으로 필터 + row = ( + ratios.filter(pl.col("snakeId").str.to_lowercase() == key) if "snakeId" in ratios.columns else None + ) + + if row is None or len(row) == 0: + continue + + # 기간 컬럼에서 값 추출 + values = [] + for col in row.columns: + if col in ("snakeId", "항목", "account"): + continue + val = row[col][0] + if val is not None: + try: + values.append(float(val)) + except (ValueError, TypeError): + pass + + if len(values) < 5: + continue + + current = values[0] # 가장 최근 + band = calcMultipleBand(values, current, metric) + if band is not None: + result[key] = { + "metric": band.metric, + "current": band.current, + "mean": band.mean, + "std": band.std, + "percentile": band.percentile, + "zone": band.zone, + "zoneLabel": band.zLabel, + "dataPoints": len(values), + } + except Exception as e: # noqa: BLE001 + log.debug("밸류에이션밴드 %s 실패: %s", metric, e) + continue + + if not result: + return None + + # 종합 zone + zones = [v["zone"] for v in result.values()] + if all(z == "cheap" for z in zones): + overall = "저평가" + elif all(z == "expensive" for z in zones): + overall = "고평가" + elif any(z == "cheap" for z in zones): + overall = "부분 저평가" + elif any(z == "expensive" for z in zones): + overall = "부분 고평가" + else: + overall = "적정" + + return { + "bands": result, + "overallZone": overall, + } diff --git a/src/dartlab/analysis/financial/peerBenchmark.py b/src/dartlab/analysis/financial/peerBenchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..7729d5c0b1fb5f494bd5217982857ec36b3fb769 --- /dev/null +++ b/src/dartlab/analysis/financial/peerBenchmark.py @@ -0,0 +1,298 @@ +"""5-3 비교분석 -- 이 회사는 시장에서 어디에 서 있는가. + +scan 데이터에서 해당 종목의 백분위/순위를 계산하여 +기존 재무 지표에 시장 맥락을 더한다. +""" + +from __future__ import annotations + +import polars as pl + +from dartlab.analysis.financial._memoize import memoized_calc + +# 비교할 핵심 비율 목록 (scanRatio name → 표시 label) +_BENCHMARK_RATIOS = [ + ("roe", "ROE"), + ("roa", "ROA"), + ("operatingMargin", "영업이익률"), + ("netMargin", "순이익률"), + ("debtRatio", "부채비율"), + ("currentRatio", "유동비율"), + ("revenueGrowth", "매출성장률"), + ("totalAssetTurnover", "총자산회전율"), +] + + +# ── 핵심 비율 백분위 ── + + +@memoized_calc +def calcPeerRanking(company, *, basePeriod: str | None = None) -> dict | None: + """핵심 재무비율 시장 내 백분위 순위. + + scan 데이터에서 최신 기간 기준 백분위(percentile)와 + 순위(rank)를 계산한다. 결과는 company._cache에 저장하여 재활용. + + Returns + ------- + dict + rankings : list[dict] — 비율별 순위 정보 + ratioName : str — 비율 ID (roe, debtRatio 등) + label : str — 표시명 + value : float — 해당 종목 값 + percentile : float — 시장 내 백분위 (%) + rank : int — 순위 (1 = 최상위) + total : int — 전체 종목 수 + period : str — 기준 기간 + """ + cache = getattr(company, "_cache", None) + _KEY = "_peerRanking" + if cache is not None and _KEY in cache: + return cache[_KEY] + + stockCode = _getStockCode(company) + if stockCode is None: + return None + + rankings = [] + for ratioName, label in _BENCHMARK_RATIOS: + result = _calcPercentile(stockCode, ratioName, label) + if result is not None: + rankings.append(result) + + out = {"rankings": rankings} if rankings else None + if cache is not None: + cache[_KEY] = out + return out + + +# ── 수익성 vs 안정성 포지션 ── + + +@memoized_calc +def calcRiskReturnPosition(company, *, basePeriod: str | None = None) -> dict | None: + """수익-위험 매트릭스 포지션. + + ROE(수익) x 부채비율(위험)에서 시장 내 사분면 위치를 결정한다. + calcPeerRanking 캐시가 있으면 재활용. + + Returns + ------- + dict + roe : float — ROE 값 (%) + roePercentile : float — ROE 시장 백분위 (%) + debtRatio : float — 부채비율 값 (%) + debtRatioPercentile : float — 부채비율 시장 백분위 (%) + quadrant : str — 사분면 ("고수익-저위험" | "고수익-고위험" | "저수익-저위험" | "저수익-고위험") + assessment : str — 평가 ("우량" | "레버리지 의존" | "보수적" | "구조 개선 필요") + """ + # ranking 캐시에서 roe/debtRatio 추출 시도 + ranking = calcPeerRanking(company) + roeR = _findRanking(ranking, "roe") if ranking else None + debtR = _findRanking(ranking, "debtRatio") if ranking else None + + if roeR and debtR: + roeVal = roeR["value"] + roePctile = roeR["percentile"] + debtVal = debtR["value"] + debtPctile = debtR["percentile"] + else: + # ranking 없으면 직접 조회 + stockCode = _getStockCode(company) + if stockCode is None: + return None + roeData = _getLatestValue(stockCode, "roe") + debtData = _getLatestValue(stockCode, "debtRatio") + if roeData is None or debtData is None: + return None + roeVal, roePctile = roeData + debtVal, debtPctile = debtData + + # 사분면 결정 (ROE 높/낮 x 부채 높/낮) + highRoe = roePctile >= 50 + highDebt = debtPctile >= 50 + + if highRoe and not highDebt: + quadrant = "고수익-저위험" + assessment = "우량" + elif highRoe and highDebt: + quadrant = "고수익-고위험" + assessment = "레버리지 의존" + elif not highRoe and not highDebt: + quadrant = "저수익-저위험" + assessment = "보수적" + else: + quadrant = "저수익-고위험" + assessment = "구조 개선 필요" + + return { + "roe": roeVal, + "roePercentile": roePctile, + "debtRatio": debtVal, + "debtRatioPercentile": debtPctile, + "quadrant": quadrant, + "assessment": assessment, + } + + +# ── 플래그 ── + + +@memoized_calc +def calcPeerBenchmarkFlags(company, *, basePeriod: str | None = None) -> list[tuple[str, str]]: + """비교분석 경고/기회 플래그. + + Returns + ------- + list[tuple[str, str]] + (메시지, 유형) 튜플 목록. 유형은 "warning" | "opportunity". + """ + flags: list[tuple[str, str]] = [] + + ranking = calcPeerRanking(company) + if ranking is None: + return flags + + for r in ranking["rankings"]: + pctile = r.get("percentile") + label = r.get("label", "") + value = r.get("value") + + if pctile is None or value is None: + continue + + # 수익성 지표: 상위 10%면 기회, 하위 10%면 경고 + if label in ("ROE", "ROA", "영업이익률", "순이익률"): + if pctile >= 90: + flags.append((f"{label} 상위 {100 - pctile:.0f}% ({value:.1f}%)", "opportunity")) + elif pctile <= 10: + flags.append((f"{label} 하위 {pctile:.0f}% ({value:.1f}%)", "warning")) + + # 부채비율: 상위(높은) 10%면 경고 + elif label == "부채비율": + if pctile >= 90: + flags.append((f"부채비율 상위 {100 - pctile:.0f}% ({value:.1f}%)", "warning")) + elif pctile <= 10: + flags.append((f"부채비율 하위 {pctile:.0f}% -- 매우 건전", "opportunity")) + + # 성장률: 상위 10%면 기회 + elif label == "매출성장률": + if pctile >= 90: + flags.append((f"매출성장률 상위 {100 - pctile:.0f}%", "opportunity")) + elif pctile <= 10: + flags.append((f"매출성장률 하위 {pctile:.0f}%", "warning")) + + # 사분면 플래그 — ranking에서 이미 구한 roe/debtRatio로 직접 판정 + _roeR = _findRanking(ranking, "roe") + _debtR = _findRanking(ranking, "debtRatio") + if _roeR and _debtR: + quadrant = _quadrantFromPctile(_roeR["percentile"], _debtR["percentile"]) + if quadrant == "고수익-저위험": + flags.append(("수익-위험 매트릭스: 고수익-저위험 (우량 포지션)", "opportunity")) + elif quadrant == "저수익-고위험": + flags.append(("수익-위험 매트릭스: 저수익-고위험 (구조 개선 필요)", "warning")) + + return flags + + +# ── 내부 헬퍼 ── + + +def _getStockCode(company) -> str | None: + """company에서 stockCode를 안전하게 추출.""" + code = getattr(company, "stockCode", None) + return code if isinstance(code, str) and code else None + + +def _findRanking(ranking: dict, ratioName: str) -> dict | None: + """ranking 결과에서 특정 ratio 항목을 찾는다.""" + for r in ranking.get("rankings", []): + if r.get("ratioName") == ratioName: + return r + return None + + +def _quadrantFromPctile(roePctile: float, debtPctile: float) -> str: + """백분위로 사분면 결정.""" + highRoe = roePctile >= 50 + highDebt = debtPctile >= 50 + if highRoe and not highDebt: + return "고수익-저위험" + if highRoe and highDebt: + return "고수익-고위험" + if not highRoe and not highDebt: + return "저수익-저위험" + return "저수익-고위험" + + +def _calcPercentile(stockCode: str, ratioName: str, label: str) -> dict | None: + """scan 결과에서 해당 종목의 백분위를 계산.""" + try: + df = _loadScanRatio(ratioName) + except (ValueError, ImportError, RuntimeError, FileNotFoundError): + return None + + if df is None or df.is_empty(): + return None + + # 최신 기간 컬럼 찾기 + periodCol = _latestPeriodCol(df) + if periodCol is None: + return None + + # 해당 종목 값 추출 + codeCol = "stockCode" if "stockCode" in df.columns else "종목코드" + if codeCol not in df.columns: + return None + + target = df.filter(pl.col(codeCol) == stockCode) + if target.is_empty(): + return None + + targetVal = target.row(0, named=True).get(periodCol) + if targetVal is None: + return None + + # 전체 분포에서 백분위 계산 + allVals = df[periodCol].drop_nulls().to_list() + if len(allVals) < 10: + return None + + nBelow = sum(1 for v in allVals if v < targetVal) + percentile = round(nBelow / len(allVals) * 100, 1) + rank = sum(1 for v in allVals if v > targetVal) + 1 + + return { + "ratioName": ratioName, + "label": label, + "value": round(targetVal, 2) if isinstance(targetVal, float) else targetVal, + "percentile": percentile, + "rank": rank, + "total": len(allVals), + "period": periodCol, + } + + +def _getLatestValue(stockCode: str, ratioName: str) -> tuple[float, float] | None: + """scan에서 해당 종목의 (값, 백분위) 튜플 반환.""" + result = _calcPercentile(stockCode, ratioName, "") + if result is None: + return None + return (result["value"], result["percentile"]) + + +def _loadScanRatio(ratioName: str) -> pl.DataFrame: + """scan("ratio", name) 경유로 비율 DataFrame을 가져온다.""" + from dartlab.scan import Scan + + return Scan()("ratio", ratioName) + + +def _latestPeriodCol(df: pl.DataFrame) -> str | None: + """DataFrame에서 최신 기간 컬럼을 찾는다.""" + from dartlab.analysis.financial._helpers import periodCols + + cols = periodCols(df) + if not cols: + return None + return cols[0] diff --git a/src/dartlab/analysis/financial/predictionSignals.py b/src/dartlab/analysis/financial/predictionSignals.py new file mode 100644 index 0000000000000000000000000000000000000000..924a6bd9d0ed8edb9163d914c87d669c47819de6 --- /dev/null +++ b/src/dartlab/analysis/financial/predictionSignals.py @@ -0,0 +1,2619 @@ +"""6-2 예측신호 -- 이 회사의 실적은 어디로 향하는가. + +다중 소스 예측 신호를 구조화된 데이터로 제공한다. +forecast 엔진(점 추정)과 달리, 방향성과 신뢰도에 집중한다. + +학술 근거: +- Sloan 1996: 현금흐름 구성요소가 발생액보다 지속성 높음 +- Cao & You 2024 (G&D Award): 횡단면 재무비율 → ML 이익 예측 +- M Competition: 단순 앙상블이 복잡한 가중치를 이김 +- M6: 방향 정확도가 점 정확도보다 투자에 유의미 +""" + +from __future__ import annotations + +import logging +import math + +from dartlab.analysis.financial._helpers import annualColsFromPeriods, toDictBySnakeId +from dartlab.analysis.financial._memoize import memoized_calc + +log = logging.getLogger(__name__) + +_MAX_YEARS = 8 + +# ── 업종별 모멘텀 사전확률 + 매크로 매핑 (JSON 단일 진실의 원천) ── +import json +from pathlib import Path as _Path + +_SECTOR_DATA = json.loads( + (_Path(__file__).resolve().parents[4] / "core" / "data" / "parserMappings" / "sectorPriors.json").read_text( + encoding="utf-8" + ) +) +_INDUSTRY_PRIOR: dict[str, float] = _SECTOR_DATA.get("priors", {}) +_DEFAULT_PRIOR: float = _SECTOR_DATA.get("_metadata", {}).get("defaultPrior", 0.721) +_SECTOR_MACRO_MAP: dict[str, list[dict]] = _SECTOR_DATA.get("sectorMacroMap", {}) +_sensitivity = _SECTOR_DATA.get("sectorSensitivity", {}) +_RATE_SENSITIVE_SECTORS = set(_sensitivity.get("rate", [])) +_FX_SENSITIVE_SECTORS = set(_sensitivity.get("fx", [])) +_COMMODITY_SECTORS = set(_sensitivity.get("commodity", [])) + + +# ── 공통 헬퍼 ── + + +def _get(row: dict, col: str) -> float: + v = row.get(col) if row else None + return v if v is not None else 0 + + +def _safe(num: float, den: float) -> float | None: + if den is None or den == 0: + return None + return num / den + + +def _getStockCode(company) -> str | None: + return getattr(company, "stockCode", None) + + +def _getSectorKey(company) -> str | None: + """업종 키 추출 (scenario.py와 동일 경로).""" + try: + from dartlab.analysis.financial.valuation import _IG_TO_SECTOR_KEY + + sectorInfo = company.sector + if sectorInfo is not None: + igName = sectorInfo.industryGroup.name + return _IG_TO_SECTOR_KEY.get(igName) + except (AttributeError, ValueError, ImportError): + pass + return None + + +# ══════════════════════════════════════ +# calc 1: 이익 모멘텀/지속성 +# ══════════════════════════════════════ + + +@memoized_calc +def calcEarningsMomentum(company, *, basePeriod: str | None = None) -> dict | None: + """이익 모멘텀 — Sloan 분해(현금 vs 발생액) + DuPont 추세. + + 이익이 가속/감속 중인지, 현금 뒷받침이 있는지를 판단한다. + + Returns + ------- + dict + history : list[dict] — 연도별 Sloan 분해 시계열 (netIncome, ocf, accrual, sloanAccrualRatio, ocfToNi, margin, turnover, leverage) + momentum : str — 이익 모멘텀 ("accelerating" | "decelerating" | "reversing" | "stable") + earningsDirection : str — 방향 ("up" | "down" | "flat") + persistenceScore : float — 현금 지속성 점수 (점) + highAccrualWarning : bool — 발생액 비율 경고 (|accrual/자산| > 10%) + confidence : str — 신뢰도 ("high" | "medium" | "low") + """ + isResult = company.select("IS", ["당기순이익", "매출액", "영업이익"]) + cfResult = company.select("CF", ["영업활동현금흐름"]) + bsResult = company.select("BS", ["자산총계", "자본총계"]) + + isParsed = toDictBySnakeId(isResult) + cfParsed = toDictBySnakeId(cfResult) + bsParsed = toDictBySnakeId(bsResult) + if isParsed is None or cfParsed is None or bsParsed is None: + return None + + isData, isPeriods = isParsed + cfData, cfPeriods = cfParsed + bsData, _ = bsParsed + + niRow = isData.get("당기순이익", {}) + revRow = isData.get("매출액", {}) + oiRow = isData.get("영업이익", {}) + ocfRow = cfData.get("영업활동현금흐름", {}) + taRow = bsData.get("자산총계", {}) + teRow = bsData.get("자본총계", {}) + + yCols = annualColsFromPeriods(cfPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if len(yCols) < 3: + return None + # Sloan 분해 시계열 + history = [] + for col in yCols: + # IS/CF 는 flow → annualSumFlow 경유. BS 는 stock → 직접 + ni = niRow.get(col) or 0 + ocf = ocfRow.get(col) or 0 + ta = _get(taRow, col) # BS stock — Q4 가 연말잔액이라 그대로 OK + rev = revRow.get(col) or 0 + oi = oiRow.get(col) or 0 + te = _get(teRow, col) # BS stock + accrual = ni - ocf + + margin = _safe(oi, rev) if rev != 0 else None + turnover = _safe(rev, ta) if ta != 0 else None + leverage = _safe(ta, te) if te != 0 else None + + history.append( + { + "period": col, + "netIncome": ni, + "ocf": ocf, + "accrual": accrual, + "sloanAccrualRatio": _safe(accrual, ta) if ta > 0 else None, + "ocfToNi": _safe(ocf, ni) if ni != 0 else None, + "margin": margin, + "turnover": turnover, + "leverage": leverage, + } + ) + + if len(history) < 3: + return None + + # 이익 방향성 판단 (최근 3년 추세) + recentNi = [h["netIncome"] for h in history[:3]] + niChanges = [recentNi[i] - recentNi[i + 1] for i in range(len(recentNi) - 1)] + + if all(d > 0 for d in niChanges): + momentum = "accelerating" + direction = "up" + elif all(d < 0 for d in niChanges): + momentum = "decelerating" + direction = "down" + elif len(niChanges) >= 2 and niChanges[0] > 0 and niChanges[1] < 0: + momentum = "reversing" + direction = "up" + elif len(niChanges) >= 2 and niChanges[0] < 0 and niChanges[1] > 0: + momentum = "reversing" + direction = "down" + else: + momentum = "stable" + direction = "flat" + + # 현금 지속성 점수 (OCF/NI 비율 기반) + ocfToNiVals = [h["ocfToNi"] for h in history[:5] if h["ocfToNi"] is not None] + if ocfToNiVals: + avgOcfToNi = sum(ocfToNiVals) / len(ocfToNiVals) + if avgOcfToNi >= 1.0: + persistenceScore = min(90, 50 + avgOcfToNi * 20) + elif avgOcfToNi >= 0.5: + persistenceScore = 30 + avgOcfToNi * 40 + else: + persistenceScore = max(10, avgOcfToNi * 60) + else: + persistenceScore = 50 + + # 발생액 비율 기반 경고 + recentAccrual = [h["sloanAccrualRatio"] for h in history[:3] if h["sloanAccrualRatio"] is not None] + highAccrual = any(abs(a) > 0.10 for a in recentAccrual) if recentAccrual else False + + # 신뢰도 + nYears = len(history) + if nYears >= 5 and not highAccrual: + confidence = "high" + elif nYears >= 3: + confidence = "medium" + else: + confidence = "low" + + return { + "history": history, + "momentum": momentum, + "earningsDirection": direction, + "persistenceScore": round(persistenceScore, 1), + "highAccrualWarning": highAccrual, + "confidence": confidence, + } + + +# ══════════════════════════════════════ +# calc 2: 횡단면 피어 예측 +# ══════════════════════════════════════ + + +@memoized_calc +def calcPeerPrediction(company, *, basePeriod: str | None = None) -> dict | None: + """횡단면 피어 예측 — scan 데이터 기반 cross-section 회귀. + + 사전 적합된 횡단면/패널 모델로 이 회사의 매출 성장률을 예측하고, + 실제 성장률과의 괴리를 측정한다. + + Returns + ------- + dict + crossSectionPredicted : float | None — 횡단면 모델 예측 매출 성장률 (%) + panelPredicted : float | None — 패널 모델 예측 매출 성장률 (%) + ensemblePredicted : float — 앙상블 예측 매출 성장률 (%) + companyHistoricalGrowth : float | None — 실제 매출 성장률 (%) + divergence : float | None — 예측-실제 괴리 (%p) + modelR2 : float | None — 횡단면 모델 R-squared + """ + stockCode = _getStockCode(company) + if stockCode is None: + return None + + # 횡단면 모델 로드 시도 (최신 연도부터 탐색) + from datetime import datetime + + try: + from dartlab.analysis.valuation.crossRegression import loadModel, loadPanelModel + + csModel = None + for tryYear in range(datetime.now().year - 1, datetime.now().year - 4, -1): + csModel = loadModel(tryYear) + if csModel is not None: + break + panelModel = loadPanelModel() + except (ImportError, FileNotFoundError, OSError, TypeError): + csModel = None + panelModel = None + + if csModel is None and panelModel is None: + return None + + # 이 회사 피처 추출 (scan ratio에서) + features = _extractPeerFeatures(company) + if features is None: + return None + + sectorKey = _getSectorKey(company) or "" + + # 횡단면 예측 + csPredicted = None + csR2 = None + if csModel is not None: + csPredicted = csModel.predict(features, sectorKey) + csR2 = csModel.rSquared + + # 패널 예측 + panelPredicted = None + if panelModel is not None: + panelPredicted = panelModel.predict(stockCode, features) + + # 앙상블 (단순 평균 — 학술적 최적) + preds = [p for p in [csPredicted, panelPredicted] if p is not None] + if not preds: + return None + + ensemblePredicted = sum(preds) / len(preds) + + # 실제 매출 성장률 + historicalGrowth = _getHistoricalRevenueGrowth(company, basePeriod=basePeriod) + + divergence = None + if historicalGrowth is not None: + divergence = ensemblePredicted - historicalGrowth + + return { + "crossSectionPredicted": round(csPredicted, 2) if csPredicted is not None else None, + "panelPredicted": round(panelPredicted, 2) if panelPredicted is not None else None, + "ensemblePredicted": round(ensemblePredicted, 2), + "companyHistoricalGrowth": round(historicalGrowth, 2) if historicalGrowth is not None else None, + "divergence": round(divergence, 2) if divergence is not None else None, + "modelR2": round(csR2, 3) if csR2 is not None else None, + } + + +def _extractPeerFeatures(company) -> dict[str, float] | None: + """company에서 횡단면 회귀 피처를 추출.""" + features: dict[str, float] = {} + + try: + ratios = company._getRatiosInternal() + if ratios is None: + return None + + per = getattr(ratios, "per", None) + pbr = getattr(ratios, "pbr", None) + opMargin = getattr(ratios, "operatingMargin", None) + debtRatio = getattr(ratios, "debtRatio", None) + + if per is not None: + features["per"] = per + if pbr is not None: + features["pbr"] = pbr + if opMargin is not None: + features["operatingMargin"] = opMargin + if debtRatio is not None: + features["debtRatio"] = debtRatio + + # lnMarketCap + profile = getattr(company, "profile", None) + if profile: + mc = getattr(profile, "marketCap", None) + if mc and mc > 0: + features["lnMarketCap"] = math.log(mc) + + # capexRatio, foreignHoldingRatio, revenueGrowthLag — 없으면 기본값 + features.setdefault("capexRatio", 0.0) + features.setdefault("foreignHoldingRatio", 0.0) + features.setdefault("revenueGrowthLag", 0.0) + + except (AttributeError, TypeError, ValueError): + return None + + # 최소 4개 피처 있어야 유의미 + if len(features) < 4: + return None + + return features + + +def _getHistoricalRevenueGrowth(company, *, basePeriod: str | None = None) -> float | None: + """최근 매출 성장률 (%) 계산.""" + isResult = company.select("IS", ["매출액"]) + parsed = toDictBySnakeId(isResult) + if parsed is None: + return None + data, periods = parsed + revRow = data.get("매출액", {}) + yCols = annualColsFromPeriods(periods, basePeriod=basePeriod, maxYears=3) + if len(yCols) < 2: + return None + cur = _get(revRow, yCols[0]) + prev = _get(revRow, yCols[1]) + if prev == 0: + return None + return ((cur - prev) / abs(prev)) * 100 + + +# ══════════════════════════════════════ +# calc 3: 구조변화 감지 +# ══════════════════════════════════════ + + +@memoized_calc +def calcStructuralBreak(company, *, basePeriod: str | None = None) -> dict | None: + """구조변화 감지 — 매출/영업이익/마진/ROE 4대 지표. + + Chow Test 기반 구조적 변화점을 감지하여 추세 추정의 신뢰도를 판단한다. + + Returns + ------- + dict + metrics : list[dict] — 지표별 구조변화 결과 + name : str — 지표명 (revenue, operatingIncome, operatingMargin, roe) + hasBreak : bool — 구조변화 존재 여부 + breakYear : str | None — 변화점 기간 + preBreakGrowth : float | None — 변화점 전 평균 성장률 (%) + postBreakGrowth : float | None — 변화점 후 평균 성장률 (%) + trendReliability : str — 추세 신뢰도 ("high" | "medium" | "low") + nObservations : int — 관측치 수 + overallStability : str — 전체 안정성 ("stable" | "transitioning" | "volatile") + """ + from dartlab.core.finance.ols import detectStructuralBreak, ols + + isResult = company.select("IS", ["매출액", "영업이익"]) + isParsed = toDictBySnakeId(isResult) + if isParsed is None: + return None + isData, isPeriods = isParsed + + revRow = isData.get("매출액", {}) + oiRow = isData.get("영업이익", {}) + yCols = annualColsFromPeriods(isPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if len(yCols) < 6: + return None + + # ROE 시계열 (ratioSeries에서) + roeVals = _getRatioValues(company, "roe", len(yCols)) + + # 4대 지표 시계열 (오래된 → 최신 순서로 뒤집기) + metrics = [] + + revVals = [_get(revRow, c) for c in reversed(yCols)] + oiVals = [_get(oiRow, c) for c in reversed(yCols)] + marginVals = [ + _safe(oi, rev) * 100 if rev != 0 and _safe(oi, rev) is not None else None for rev, oi in zip(revVals, oiVals) + ] + + for name, vals in [ + ("revenue", revVals), + ("operatingIncome", oiVals), + ("operatingMargin", marginVals), + ("roe", roeVals), + ]: + clean = [v for v in vals if v is not None] + if len(clean) < 6: + metrics.append( + { + "name": name, + "hasBreak": False, + "breakYear": None, + "preBreakGrowth": None, + "postBreakGrowth": None, + "trendReliability": "low", + "nObservations": len(clean), + } + ) + continue + + breakIdx = detectStructuralBreak(clean) + + if breakIdx is not None: + # 변화점 기준 전/후 성장률 + pre = clean[:breakIdx] + post = clean[breakIdx:] + preGrowth = _avgGrowth(pre) + postGrowth = _avgGrowth(post) + + # 연도 매핑 (reversed yCols 기준) + reversedCols = list(reversed(yCols)) + breakYear = reversedCols[breakIdx] if breakIdx < len(reversedCols) else None + + metrics.append( + { + "name": name, + "hasBreak": True, + "breakYear": breakYear, + "preBreakGrowth": round(preGrowth, 2) if preGrowth is not None else None, + "postBreakGrowth": round(postGrowth, 2) if postGrowth is not None else None, + "trendReliability": "low", + "nObservations": len(clean), + } + ) + else: + # 변화점 없음 — 추세 일관 + _, _, r2 = ols(list(range(len(clean))), clean) + reliability = "high" if r2 > 0.7 else ("medium" if r2 > 0.4 else "low") + metrics.append( + { + "name": name, + "hasBreak": False, + "breakYear": None, + "preBreakGrowth": None, + "postBreakGrowth": None, + "trendReliability": reliability, + "nObservations": len(clean), + } + ) + + # 전체 안정성 판단 + nBreaks = sum(1 for m in metrics if m["hasBreak"]) + if nBreaks == 0: + overallStability = "stable" + elif nBreaks <= 1: + overallStability = "transitioning" + else: + overallStability = "volatile" + + return { + "metrics": metrics, + "overallStability": overallStability, + } + + +def _getRatioValues(company, ratioName: str, maxYears: int) -> list[float | None]: + """ratioSeries에서 특정 비율의 시계열을 추출.""" + try: + from dartlab.analysis.financial._helpers import getRatioSeries + + result = getRatioSeries(company) + if result is None: + return [] + data, years = result + vals = data.get("RATIO", {}).get(ratioName, []) + # 최신 maxYears개, 오래된→최신 순서로 + if len(vals) > maxYears: + vals = vals[-maxYears:] + return vals + except (AttributeError, TypeError, ValueError): + return [] + + +def _avgGrowth(vals: list[float]) -> float | None: + """값 목록의 평균 성장률 (%).""" + if len(vals) < 2: + return None + growths = [] + for i in range(1, len(vals)): + if vals[i - 1] != 0: + growths.append(((vals[i] - vals[i - 1]) / abs(vals[i - 1])) * 100) + return sum(growths) / len(growths) if growths else None + + +# ══════════════════════════════════════ +# calc 4: 거시경제 민감도 +# ══════════════════════════════════════ + + +@memoized_calc +def calcMacroSensitivity(company, *, basePeriod: str | None = None) -> dict | None: + """거시경제 민감도 — 섹터별 탄성치 + 관련 지표 매핑. + + 라이브 매크로 데이터를 fetch하지 않는다. + 관련 지표명을 반환하여 AI가 gather.macro()로 조회할 수 있게 한다. + + Returns + ------- + dict + sectorKey : str | None — 업종 키 + sectorCyclicality : str — 경기순환 민감도 ("high" | "moderate" | "low") + revenueToGdp : float — 매출-GDP 탄성치 (배수) + revenueToFx : float — 매출-환율 탄성치 (배수) + marginToGdp : float — 마진-GDP 탄성치 (배수) + fxExposure : str — 환율 노출 ("high" | "moderate" | "low") + commodityExposure : str — 원자재 노출 ("high" | "low") + rateSensitivity : str — 금리 민감도 ("high" | "low") + primaryDrivers : list[dict] — 1차 거시 동인 (indicator, source, direction, description) + secondaryDrivers : list[dict] — 2차 거시 동인 + relevantIndicators : list[str] — 관련 지표 ID 목록 + predictionAxes : dict | None — 라이브 축 상태 (PredictionSpace 캐시 있을 때) + axisImpact : dict | None — 업종별 축 영향도 + netMacroEffect : float | None — 순 매크로 효과 합산 + """ + from dartlab.core.finance.scenario import getElasticity + + sectorKey = _getSectorKey(company) + elasticity = getElasticity(sectorKey) + + # 민감도 분류 + fxExposure = ( + "high" if abs(elasticity.revenueToFx) >= 0.5 else ("moderate" if abs(elasticity.revenueToFx) >= 0.2 else "low") + ) + commodityExposure = "high" if sectorKey in _COMMODITY_SECTORS else "low" + rateSensitivity = "high" if (sectorKey in _RATE_SENSITIVE_SECTORS or elasticity.nimToRate > 0) else "low" + + # 관련 지표 매핑 + drivers = _SECTOR_MACRO_MAP.get(elasticity.cyclicality, _SECTOR_MACRO_MAP["moderate"]) + primaryDrivers = drivers[:2] if len(drivers) >= 2 else drivers + secondaryDrivers = drivers[2:] if len(drivers) > 2 else [] + + # 금리 민감 섹터 추가 지표 + if rateSensitivity == "high": + primaryDrivers = [ + { + "indicator": "BASE_RATE", + "source": "ECOS", + "direction": "direct", + "description": "기준금리 → NIM 직접 영향", + }, + ] + primaryDrivers + + # FX 민감 섹터 추가 지표 + if fxExposure == "high": + secondaryDrivers.append( + { + "indicator": "KRW_USD", + "source": "ECOS", + "direction": "positive_for_export", + "description": "원화 약세 → 수출 유리", + } + ) + + # 관련 지표명 목록 (AI가 gather.macro()로 조회할 때 사용) + allIndicators = list({d["indicator"] for d in primaryDrivers + secondaryDrivers}) + + result = { + "sectorKey": sectorKey, + "sectorCyclicality": elasticity.cyclicality, + "revenueToGdp": elasticity.revenueToGdp, + "revenueToFx": elasticity.revenueToFx, + "marginToGdp": elasticity.marginToGdp, + "fxExposure": fxExposure, + "commodityExposure": commodityExposure, + "rateSensitivity": rateSensitivity, + "primaryDrivers": primaryDrivers, + "secondaryDrivers": secondaryDrivers, + "relevantIndicators": allIndicators, + } + + # Prediction Space enrichment (라이브 축 상태, 캐시 있을 때만) + try: + from dartlab.analysis.forecast.predictionSpace import getPredictionSpace + + space = getPredictionSpace() + if space is not None: + result["predictionAxes"] = { + name: { + "label": ax.label, + "level": ax.level, + "direction": ax.direction, + "momentum": ax.momentum, + } + for name, ax in space.axes.items() + } + result["axisImpact"] = space.impactOn(sectorKey) + result["netMacroEffect"] = round(sum(space.impactOn(sectorKey).values()), 2) + except (ImportError, TypeError): + pass + + return result + + +# ══════════════════════════════════════ +# calc 4b: 거시-재무 동적 회귀 +# ══════════════════════════════════════ + + +@memoized_calc +def calcMacroRegression(company, *, basePeriod: str | None = None) -> dict | None: + """거시-재무 동적 회귀 — 기업별 거시 베타를 과거 데이터에서 학습. + + 정적 상수(scenario.py의 revenueToGdp=1.8 등) 대신, + 실제 과거 매출/마진 성장률과 거시지표 변화율 간 OLS 회귀로 + 기업 고유의 동적 베타를 추정한다. + + 학술 근거: + - Fama-MacBeth 1973: 횡단면 회귀로 팩터 프리미엄 추정 + - 시간차(lag) 효과: GDP t → 매출 t+1 (경기 전달 메커니즘) + + Returns + ------- + dict + betas : dict[str, float] — OLS 기울기 (기업 고유 동적 베타, 변수별) + staticBetas : dict[str, float] — 정적 탄성치 (gdp, rate, fx) (배수) + usedIndicators : dict[str, str] — 사용된 지표 매핑 (v0→seriesId) + marginBetas : dict[str, float] | None — 마진 회귀 OLS 기울기 + lagEffects : dict[str, dict] — 시간차별 상관도 (lag0, lag1, lag2) + rSquared : float — 매출 회귀 R-squared + marginR2 : float | None — 마진 회귀 R-squared + nObs : int — 관측치 수 + nVars : int — 변수 수 + degreesOfFreedom : int — 자유도 + confidence : str — 신뢰도 ("high" | "medium" | "low") + sectorKey : str | None — 업종 키 + table : list[dict] — 기간별 매출 성장률 vs 거시 변화율 시계열 + """ + isResult = company.select("IS", ["매출액", "영업이익"]) + isParsed = toDictBySnakeId(isResult) + if isParsed is None: + return None + isData, isPeriods = isParsed + revRow = isData.get("매출액", {}) + oiRow = isData.get("영업이익", {}) + + # 분기 YoY 기반: 동일 분기 대비 성장률 (관측치 ~36개) + qCols = sorted([p for p in isPeriods if "Q" in p], reverse=True) + yoyCols: list[str] = [] + revGrowth: list[float | None] = [] + marginChange: list[float | None] = [] + + for col in qCols: + prevCol = f"{int(col[:4]) - 1}{col[-2:]}" + if prevCol not in isPeriods: + continue + cur = _get(revRow, col) or None + prev = _get(revRow, prevCol) or None + if cur is not None and prev is not None and prev != 0: + revGrowth.append((cur - prev) / abs(prev) * 100) + else: + revGrowth.append(None) + + curOi = _get(oiRow, col) or None + prevOi = _get(oiRow, prevCol) or None + curMargin = curOi / cur * 100 if cur and curOi and cur != 0 else None + prevMargin = prevOi / prev * 100 if prev and prevOi and prev != 0 else None + if curMargin is not None and prevMargin is not None: + marginChange.append(curMargin - prevMargin) + else: + marginChange.append(None) + + yoyCols.append(col) + + cols = yoyCols + if len(cols) < 6: + return None + + # 적응형 변수 선택: 매핑 후보 + 범용 후보에서 상관도 기반 최적 3개 + stockCode = _getStockCode(company) + macroData = _loadAdaptive(revGrowth, cols, stockCode=stockCode) + if macroData is None: + return None + + # OLS 회귀 + betas, rSquared, nObs = _fitOLS(revGrowth, macroData, cols) + if betas is None: + return None + + # 시간차(lag) 상관도 계산 + lagEffects = _calcLagCorrelation(revGrowth, macroData, cols) + + # 마진 회귀 (금리 → 마진 변화) + marginBetas, marginR2, _ = _fitOLS(marginChange, macroData, cols) + + # 정적 탄성치 비교 + from dartlab.core.finance.scenario import getElasticity + + sectorKey = _getSectorKey(company) + staticEl = getElasticity(sectorKey) + + confidence = "high" if nObs >= 8 and rSquared > 0.3 else ("medium" if nObs >= 5 else "low") + + # 테이블 (연도별 시계열) + table = _buildMacroTable(cols, revGrowth, marginChange, macroData) + + # 사용된 지표 정보 + usedIndicators = macroData.get("_usedIndicators", {}) if isinstance(macroData.get("_usedIndicators"), dict) else {} + + return { + "betas": betas, + "staticBetas": { + "gdp": staticEl.revenueToGdp, + "rate": staticEl.marginToGdp, + "fx": staticEl.revenueToFx, + }, + "usedIndicators": usedIndicators, + "marginBetas": marginBetas, + "lagEffects": lagEffects, + "rSquared": round(rSquared, 4), + "marginR2": round(marginR2, 4) if marginR2 is not None else None, + "nObs": nObs, + "nVars": len(betas) if betas else 0, + "degreesOfFreedom": nObs - len(betas) - 1 if betas else 0, + "confidence": confidence, + "sectorKey": sectorKey, + "table": table, + "_predictedDirection": _predictDirection(betas, macroData), + } + + +def _predictDirection(betas: dict | None, macroData: dict) -> str | None: + """OLS 베타 × 최신 외생변수 변화율 → 예측 방향.""" + if not betas: + return None + # macroData의 첫 번째 값(최신)을 사용 + predicted = 0.0 + for key, beta in betas.items(): + vals = macroData.get(key) + if isinstance(vals, list) and vals and vals[0] is not None: + predicted += beta * vals[0] + return "up" if predicted > 0 else "down" + + +def _getFinanceSeries(company): + """Company 에서 finance series-tuple 추출 (private internal).""" + try: + result = company._buildFinanceSeries(freq="Q") + return result[0] if result else None + except (AttributeError, TypeError): + return None + + +def _loadAdaptive( + revGrowth: list[float | None], periodCols: list[str], stockCode: str | None = None +) -> dict[str, list[float | None]] | None: + """적응형 변수 선택 — 매핑 후보 + 범용 후보에서 상관도 기반 최적 3개. + + 1. exogenousAxes 매핑 3개 + 범용 후보 5개 = 총 8개 로드 + 2. 각 변수와 revGrowth의 상관도 계산 + 3. 상관도 상위 3개 선택 + """ + from dartlab.gather.macro import alignToFinancialPeriods, loadMacroParquet + + # 매핑 후보 + try: + from dartlab.core.finance.exogenousAxes import getExogenousSeriesIds + + mapped = getExogenousSeriesIds(stockCode=stockCode) + except (ImportError, KeyError): + mapped = [("IPI", "ecos"), ("BASE_RATE", "ecos"), ("USDKRW", "ecos")] + + # 범용 후보 (전 시장에서 빈출 + 한국 PPI) + universal = [ + ("PPI_MFG", "ecos"), # 한국 공산품PPI — 가장 직접적 범용 + ("PCUOMFGOMFG", "fred"), # 미국 제조업PPI + ("EXPORT", "ecos"), # 한국 수출 + ("PCOPPUSDM", "fred"), # 구리 (글로벌 수요) + ("INDPRO", "fred"), # 미국 산업생산 + ("TCU", "fred"), # 설비가동률 + ("DGORDER", "fred"), # 내구재 주문 + ] + + # 중복 제거하며 합치기 + seen: set[str] = set() + candidates: list[tuple[str, str]] = [] + for sid, src in mapped + universal: + if sid not in seen: + seen.add(sid) + candidates.append((sid, src)) + + # 전년동기 기간 생성 + prevCols = [f"{int(c[:4]) - 1}{c[4:]}" for c in periodCols] + isRateVar = {"BASE_RATE", "BAMLH0A0HYM2", "CORP_BOND_3Y"} + + # 모든 후보 로드 + YoY 계산 + 상관도 + candidateData: list[tuple[str, str, list[float | None], float]] = [] + + for sid, source in candidates: + df = loadMacroParquet(sid, source=source) + if df is None or df.is_empty(): + continue + curVals = alignToFinancialPeriods(df, periodCols).get_column("value").to_list() + prevVals = alignToFinancialPeriods(df, prevCols).get_column("value").to_list() + + changes: list[float | None] = [] + for cur, prev in zip(curVals, prevVals): + if cur is not None and prev is not None and prev != 0: + changes.append(cur - prev if sid in isRateVar else (cur - prev) / abs(prev) * 100) + else: + changes.append(None) + + # 상관도 계산 + corr = _quickCorr(revGrowth, changes) + candidateData.append((sid, source, changes, abs(corr) if corr is not None else 0)) + + if not candidateData: + return None + + # 상관도 상위 3개 선택 + candidateData.sort(key=lambda x: x[3], reverse=True) + selected = candidateData[:3] + + result: dict[str, list[float | None]] = {} + usedIndicators: dict[str, str] = {} + + for i, (sid, source, changes, corr) in enumerate(selected): + key = f"v{i}" + result[key] = changes + usedIndicators[key] = sid + + result["_usedIndicators"] = usedIndicators # type: ignore[assignment] + return result + + +def _quickCorr(y: list[float | None], x: list[float | None]) -> float | None: + """빠른 피어슨 상관계수.""" + pairs = [(a, b) for a, b in zip(y, x) if a is not None and b is not None] + if len(pairs) < 5: + return None + ys = [p[0] for p in pairs] + xs = [p[1] for p in pairs] + ym = sum(ys) / len(ys) + xm = sum(xs) / len(xs) + cov = sum((a - ym) * (b - xm) for a, b in pairs) / len(pairs) + ystd = math.sqrt(sum((a - ym) ** 2 for a in ys) / len(ys)) + xstd = math.sqrt(sum((b - xm) ** 2 for b in xs) / len(xs)) + if ystd < 1e-12 or xstd < 1e-12: + return None + return cov / (ystd * xstd) + + +def _loadMacroAligned(periodCols: list[str], stockCode: str | None = None) -> dict[str, list[float | None]] | None: + """Parquet 캐시에서 거시 지표를 로드 → YoY 변화율을 직접 계산. + + periodCols가 분기("2024Q3" 등)이면 전년동기 대비 YoY, + 연간이면 전년 대비 변화율. + + Returns: + {"v0": [yoy_change, ...], "v1": [...], "_usedIndicators": {...}} + 또는 None (데이터 없음). + """ + from dartlab.gather.macro import alignToFinancialPeriods, loadMacroParquet + + try: + from dartlab.core.finance.exogenousAxes import getExogenousSeriesIds + + seriesPairs = getExogenousSeriesIds(stockCode=stockCode) + except (ImportError, KeyError): + seriesPairs = [("IPI", "ecos"), ("BASE_RATE", "ecos"), ("USDKRW", "ecos")] + + # 전년동기 기간 생성 + prevCols = [f"{int(c[:4]) - 1}{c[4:]}" for c in periodCols] + + result: dict[str, list[float | None]] = {} + usedIndicators: dict[str, str] = {} + isRateVar = {"BASE_RATE", "BAMLH0A0HYM2", "CORP_BOND_3Y"} + + for i, (seriesId, source) in enumerate(seriesPairs[:3]): + key = f"v{i}" + df = loadMacroParquet(seriesId, source=source) + if df is not None and not df.is_empty(): + curVals = alignToFinancialPeriods(df, periodCols).get_column("value").to_list() + prevVals = alignToFinancialPeriods(df, prevCols).get_column("value").to_list() + + # YoY 변화율 계산 + changes: list[float | None] = [] + for cur, prev in zip(curVals, prevVals): + if cur is not None and prev is not None and prev != 0: + if seriesId in isRateVar: + changes.append(cur - prev) # 금리: 절대 변화 (pp) + else: + changes.append((cur - prev) / abs(prev) * 100) # % + else: + changes.append(None) + result[key] = changes + usedIndicators[key] = seriesId + else: + result[key] = [None] * len(periodCols) + usedIndicators[key] = seriesId + + result["_usedIndicators"] = usedIndicators # type: ignore[assignment] + + hasData = any( + any(v is not None for v in vals) + for k, vals in result.items() + if k != "_usedIndicators" and isinstance(vals, list) + ) + return result if hasData else None + + +def _fitOLS( + y: list[float | None], macroData: dict[str, list[float | None]], cols: list[str] +) -> tuple[dict[str, float] | None, float | None, int]: + """OLS 회귀 — y ~ 거시변화율 + 산업지표변화율 (가변 변수). + + macroData의 키가 회귀 변수명이 된다. + 외부 의존성 없이 순수 Python으로 구현. + + Returns: + (betas, r_squared, n_obs) 또는 (None, None, 0). + """ + # macroData는 이미 YoY 변화율 (또는 level). _loadMacroAligned가 변화율 반환. + varNames = [k for k in macroData.keys() if k != "_usedIndicators" and isinstance(macroData[k], list)] + if not varNames: + return None, None, 0 + + n = min(len(y), *(len(macroData[v]) for v in varNames)) + + validY: list[float] = [] + validX: list[list[float]] = [] + activeVars: list[str] = [] + + # 데이터가 있는 변수만 사용 + for v in varNames: + if any(x is not None for x in macroData[v][:n]): + activeVars.append(v) + + for i in range(n): + yVal = y[i] + if yVal is None: + continue + xVals = [] + skip = False + for v in activeVars: + val = macroData[v][i] if i < len(macroData[v]) else None + if val is None: + skip = True + break + xVals.append(val) + if not skip: + validY.append(yVal) + validX.append(xVals) + + if len(validY) < 3 or not activeVars: + return None, None, 0 + + # OLS: β = (X'X)^-1 X'y (절편 포함) + nObs = len(validY) + k = 1 + len(activeVars) # 절편 + 변수 수 + + X = [[1.0] + row for row in validX] + + XtX = [[sum(X[r][i] * X[r][j] for r in range(nObs)) for j in range(k)] for i in range(k)] + Xty = [sum(X[r][i] * validY[r] for r in range(nObs)) for i in range(k)] + + inv = _invertMatrix(XtX) + if inv is None: + return None, None, 0 + + beta = [sum(inv[i][j] * Xty[j] for j in range(k)) for i in range(k)] + + # R² + yMean = sum(validY) / nObs + ssTot = sum((y_ - yMean) ** 2 for y_ in validY) + yPred = [sum(X[r][j] * beta[j] for j in range(k)) for r in range(nObs)] + ssRes = sum((validY[r] - yPred[r]) ** 2 for r in range(nObs)) + rSquared = 1 - ssRes / ssTot if ssTot > 0 else 0.0 + + betas = {activeVars[i]: round(beta[i + 1], 4) for i in range(len(activeVars))} + + return betas, rSquared, nObs + + +def _invertMatrix(m: list[list[float]]) -> list[list[float]] | None: + """4x4 행렬 가우스-조르단 역행렬.""" + n = len(m) + aug = [row[:] + [1.0 if i == j else 0.0 for j in range(n)] for i, row in enumerate(m)] + + for col in range(n): + # 피벗 선택 + maxRow = max(range(col, n), key=lambda r: abs(aug[r][col])) + if abs(aug[maxRow][col]) < 1e-12: + return None # 특이 행렬 + aug[col], aug[maxRow] = aug[maxRow], aug[col] + + pivot = aug[col][col] + aug[col] = [x / pivot for x in aug[col]] + + for row in range(n): + if row != col: + factor = aug[row][col] + aug[row] = [aug[row][j] - factor * aug[col][j] for j in range(2 * n)] + + return [row[n:] for row in aug] + + +def _calcLagCorrelation( + y: list[float | None], macroData: dict[str, list[float | None]], cols: list[str] +) -> dict[str, dict[str, float | None]]: + """시간차(lag) 상관도 — lag 0, 1, 2.""" + result: dict[str, dict[str, float | None]] = {} + + for key in [k for k in macroData if k != "_usedIndicators" and isinstance(macroData[k], list)]: + vals = macroData[key] + lagCorrs: dict[str, float | None] = {} + # 거시 변화율 + changes = [] + for i in range(len(vals) - 1): + cur, prev = vals[i], vals[i + 1] + if cur is not None and prev is not None and prev != 0: + if key == "rate": + changes.append(cur - prev) + else: + changes.append((cur - prev) / abs(prev) * 100) + else: + changes.append(None) + + for lag in range(3): + corr = _pearsonCorrelation(y, changes, lag=lag) + lagCorrs[f"lag{lag}"] = round(corr, 4) if corr is not None else None + + result[key] = lagCorrs + + return result + + +def _pearsonCorrelation(y: list[float | None], x: list[float | None], *, lag: int = 0) -> float | None: + """피어슨 상관계수 (lag 적용).""" + pairs: list[tuple[float, float]] = [] + for i in range(len(y)): + xi = i + lag + if xi < len(x): + yVal, xVal = y[i], x[xi] + if yVal is not None and xVal is not None: + pairs.append((yVal, xVal)) + + if len(pairs) < 3: + return None + + yVals = [p[0] for p in pairs] + xVals = [p[1] for p in pairs] + yMean = sum(yVals) / len(yVals) + xMean = sum(xVals) / len(xVals) + + cov = sum((y_ - yMean) * (x_ - xMean) for y_, x_ in pairs) / len(pairs) + yStd = math.sqrt(sum((y_ - yMean) ** 2 for y_ in yVals) / len(yVals)) + xStd = math.sqrt(sum((x_ - xMean) ** 2 for x_ in xVals) / len(xVals)) + + if yStd < 1e-12 or xStd < 1e-12: + return None + + return cov / (yStd * xStd) + + +def _buildMacroTable( + cols: list[str], + revGrowth: list[float | None], + marginChange: list[float | None], + macroData: dict[str, list[float | None]], +) -> list[dict]: + """연도별 매출 성장률 vs 거시 변화율 시계열 테이블.""" + table = [] + n = min(len(revGrowth), len(cols) - 1) + + for i in range(n): + row: dict = { + "period": cols[i], + "revGrowthPct": round(revGrowth[i], 2) if revGrowth[i] is not None else None, + "marginChangeBps": round(marginChange[i], 1) + if i < len(marginChange) and marginChange[i] is not None + else None, + } + for key, vals in macroData.items(): + if key == "_usedIndicators" or not isinstance(vals, list): + continue + row[f"{key}Value"] = round(vals[i], 2) if i < len(vals) and vals[i] is not None else None + table.append(row) + + return table + + +# ══════════════════════════════════════ +# calc 4c: 이벤트 충격 분석 +# ══════════════════════════════════════ + + +@memoized_calc +def calcEventImpact(company, *, basePeriod: str | None = None) -> dict | None: + """이벤트 충격 분석 — 공시 급변/지배구조 변화 시점 전후 재무 패턴. + + 과거에 공시 텍스트가 급변하거나 지배구조가 변한 시점을 식별하고, + 해당 시점 전후 매출/마진 변화 패턴을 추출한다. + + Returns + ------- + dict + events : list[dict] — 감지된 이벤트 목록 + period : str — 이벤트 발생 기간 + type : str — 유형 ("disclosureShock" | "structuralBreak" | "revenueShock") + magnitude : float — 변화 크기 + preRevGrowth : float | None — 이벤트 전 매출 성장률 (%) + postRevGrowth : float | None — 이벤트 후 매출 성장률 (%) + preMargin : float | None — 이벤트 전 영업마진 (%) + postMargin : float | None — 이벤트 후 영업마진 (%) + recoveryYears : int | None — 회복까지 걸린 기간 (일수) + averageImpact : dict[str, float] — 이벤트 유형별 평균 충격 (%p) + resilience : str — 충격 회복력 ("high" | "medium" | "low") + avgRecoveryYears : float | None — 평균 회복 기간 + """ + isResult = company.select("IS", ["매출액", "영업이익"]) + isParsed = toDictBySnakeId(isResult) + if isParsed is None: + return None + isData, isPeriods = isParsed + revRow = isData.get("매출액", {}) + oiRow = isData.get("영업이익", {}) + + cols = annualColsFromPeriods(isPeriods, basePeriod=basePeriod) + if len(cols) < 4: + return None + + revValues = [_get(revRow, c) or None for c in cols] + oiValues = [_get(oiRow, c) or None for c in cols] + + # 매출 성장률 + 마진 계산 + revGrowth = _calcGrowthRates(revValues) + margins = _calcMargins(revValues, oiValues) + + events: list[dict] = [] + + # 1. 공시 텍스트 급변 감지 (disclosureDelta 활용) + try: + discDelta = calcDisclosureDelta(company, basePeriod=basePeriod) + if discDelta and discDelta.get("changeIntensity"): + intensity = discDelta["changeIntensity"] + if intensity.get("totalChangeBytes", 0) > 50000: + eventIdx = 0 # 최신 기간 + events.append( + _buildEvent( + period=cols[eventIdx] if eventIdx < len(cols) else "unknown", + eventType="disclosureShock", + magnitude=intensity.get("totalChangeBytes", 0) / 10000, + revGrowth=revGrowth, + margins=margins, + eventIdx=eventIdx, + ) + ) + except (AttributeError, TypeError, KeyError): + pass + + # 2. 구조변화점 감지 (structuralBreak 재활용) + try: + breakResult = calcStructuralBreak(company, basePeriod=basePeriod) + if breakResult: + for metric, detail in breakResult.get("metrics", {}).items(): + if detail.get("breakDetected"): + breakYear = detail.get("breakYear") + if breakYear: + eventIdx = _findPeriodIdx(cols, breakYear) + if eventIdx is not None: + events.append( + _buildEvent( + period=cols[eventIdx] if eventIdx < len(cols) else str(breakYear), + eventType="structuralBreak", + magnitude=abs(detail.get("postBreakGrowth", 0) - detail.get("preBreakGrowth", 0)), + revGrowth=revGrowth, + margins=margins, + eventIdx=eventIdx, + ) + ) + except (AttributeError, TypeError, KeyError): + pass + + # 3. 매출 급변 감지 (|성장률| > 30% = 충격) + for i, g in enumerate(revGrowth): + if g is not None and abs(g) > 30: + events.append( + _buildEvent( + period=cols[i] if i < len(cols) else "unknown", + eventType="revenueShock", + magnitude=abs(g), + revGrowth=revGrowth, + margins=margins, + eventIdx=i, + ) + ) + + if not events: + return { + "events": [], + "averageImpact": {}, + "resilience": "high", + "summary": "최근 5년간 유의미한 충격 이벤트 없음", + } + + # 회복력 판단 + recoveries = [e.get("recoveryYears") for e in events if e.get("recoveryYears") is not None] + avgRecovery = sum(recoveries) / len(recoveries) if recoveries else None + resilience = ( + "high" + if avgRecovery is not None and avgRecovery <= 1 + else ("low" if avgRecovery and avgRecovery >= 3 else "medium") + ) + + # 유형별 평균 충격 + typeImpacts: dict[str, list[float]] = {} + for e in events: + t = e["type"] + impact = (e.get("postRevGrowth") or 0) - (e.get("preRevGrowth") or 0) + typeImpacts.setdefault(t, []).append(impact) + + averageImpact = {t: round(sum(v) / len(v), 2) for t, v in typeImpacts.items()} + + return { + "events": events, + "averageImpact": averageImpact, + "resilience": resilience, + "avgRecoveryYears": round(avgRecovery, 1) if avgRecovery else None, + } + + +def _calcGrowthRates(values: list[float | None]) -> list[float | None]: + """연간 성장률 계산.""" + rates = [] + for i in range(len(values) - 1): + cur, prev = values[i], values[i + 1] + if cur is not None and prev is not None and prev != 0: + rates.append((cur - prev) / abs(prev) * 100) + else: + rates.append(None) + return rates + + +def _calcMargins(revValues: list, oiValues: list | None) -> list[float | None]: + """영업마진 시계열.""" + if oiValues is None: + return [None] * len(revValues) + margins = [] + for r, o in zip(revValues, oiValues): + if r is not None and o is not None and r != 0: + margins.append(o / r * 100) + else: + margins.append(None) + return margins + + +def _buildEvent( + *, + period: str, + eventType: str, + magnitude: float, + revGrowth: list[float | None], + margins: list[float | None], + eventIdx: int, +) -> dict: + """이벤트 전후 재무 패턴 추출.""" + preRevGrowth = revGrowth[eventIdx + 1] if eventIdx + 1 < len(revGrowth) else None + postRevGrowth = revGrowth[eventIdx] if eventIdx < len(revGrowth) else None + preMargin = margins[eventIdx + 1] if eventIdx + 1 < len(margins) else None + postMargin = margins[eventIdx] if eventIdx < len(margins) else None + + # 회복 시간: 이벤트 후 성장률이 양으로 돌아오는 기간 + recoveryYears = None + if postRevGrowth is not None and postRevGrowth < 0: + for j in range(eventIdx - 1, -1, -1): + if j < len(revGrowth) and revGrowth[j] is not None and revGrowth[j] > 0: + recoveryYears = eventIdx - j + break + + return { + "period": period, + "type": eventType, + "magnitude": round(magnitude, 2), + "preRevGrowth": round(preRevGrowth, 2) if preRevGrowth is not None else None, + "postRevGrowth": round(postRevGrowth, 2) if postRevGrowth is not None else None, + "preMargin": round(preMargin, 1) if preMargin is not None else None, + "postMargin": round(postMargin, 1) if postMargin is not None else None, + "recoveryYears": recoveryYears, + } + + +def _findPeriodIdx(cols: list[str], year: int) -> int | None: + """연도로 기간 인덱스 찾기.""" + yearStr = str(year) + for i, col in enumerate(cols): + if col.startswith(yearStr): + return i + return None + + +# ══════════════════════════════════════ +# calc 5: 공시 변화 신호 +# ══════════════════════════════════════ + + +@memoized_calc +def calcDisclosureDelta(company, *, basePeriod: str | None = None) -> dict | None: + """공시 변화 신호 — diff 결과를 예측 신호로 변환. + + 공시 텍스트 변화량을 방향성 신호로 해석한다. + FinBERT 등 톤 분석은 미적용 — 변화 크기만 사용. + + Returns + ------- + dict + overallChangeRate : float — 전체 공시 변화율 (%) + riskChangeRate : float — 리스크 관련 토픽 변화율 (%) + businessChangeRate : float — 사업 관련 토픽 변화율 (%) + revenueRelatedChange : float — 매출 관련 토픽 변화율 (%) + signalDirection : str — 방향성 ("positive" | "negative" | "neutral") + signalStrength : str — 신호 강도 ("strong" | "moderate" | "weak") + topChangedTopics : list[dict] — 변화율 상위 5개 토픽 (topic, changeRate) + """ + try: + diffResult = company._docs.diff() + except (AttributeError, TypeError): + return None + + if diffResult is None: + return None + + overallChangeRate = getattr(diffResult, "changeRate", None) or 0.0 + + # 토픽별 변화율 추출 + riskChangeRate = 0.0 + businessChangeRate = 0.0 + revenueChangeRate = 0.0 + topChangedTopics = [] + + riskTopics = {"riskFactors", "riskDerivative", "contingentLiabilities"} + businessTopics = {"businessOverview", "businessContent"} + revenueTopics = {"revenue", "salesSegment", "productionStatus"} + + topicChanges = getattr(diffResult, "topicChanges", None) or [] + for tc in topicChanges: + topic = getattr(tc, "topic", "") + changeRate = getattr(tc, "changeRate", 0) or 0 + + if topic in riskTopics: + riskChangeRate = max(riskChangeRate, changeRate) + elif topic in businessTopics: + businessChangeRate = max(businessChangeRate, changeRate) + elif topic in revenueTopics: + revenueChangeRate = max(revenueChangeRate, changeRate) + + if changeRate > 20: + topChangedTopics.append({"topic": topic, "changeRate": round(changeRate, 1)}) + + # 방향성 신호 판단 + if riskChangeRate > 60: + signalDirection = "negative" + signalStrength = "strong" + elif riskChangeRate > 30: + signalDirection = "negative" + signalStrength = "moderate" + elif overallChangeRate < 10: + signalDirection = "neutral" + signalStrength = "weak" + elif businessChangeRate > 40 and riskChangeRate < 20: + signalDirection = "positive" + signalStrength = "moderate" + else: + signalDirection = "neutral" + signalStrength = "weak" + + # 변화 큰 토픽 정렬 + topChangedTopics.sort(key=lambda x: x["changeRate"], reverse=True) + + return { + "overallChangeRate": round(overallChangeRate, 1), + "riskChangeRate": round(riskChangeRate, 1), + "businessChangeRate": round(businessChangeRate, 1), + "revenueRelatedChange": round(revenueChangeRate, 1), + "signalDirection": signalDirection, + "signalStrength": signalStrength, + "topChangedTopics": topChangedTopics[:5], + } + + +# ══════════════════════════════════════ +# calc 8: 재고/매출채권 괴리 신호 +# ══════════════════════════════════════ + + +@memoized_calc +def calcInventoryDivergence(company, *, basePeriod: str | None = None) -> dict | None: + """재고/매출채권 괴리 — 수요 둔화 선행 지표. + + 재고 증가율 > 매출 증가율 = 수요 둔화 (NYU Stern). + 매출채권 증가율 > 매출 증가율 = 회수 악화. + NOA 급증 = 이익 조작 가능성 (Oler 2024). + + Returns + ------- + dict + history : list[dict] — 연도별 시계열 (inventory, receivables, revenue, inventoryGrowth(%), revenueGrowth(%), divergence(%p), arDivergence(%p), dso(일), dio(일), noa(원)) + inventorySignal : str — 재고 신호 ("building" | "liquidating" | "stable") + receivableSignal : str — 매출채권 신호 ("deteriorating" | "improving" | "stable") + noaGrowth : float | None — NOA 성장률 (%) + riskScore : int — 리스크 점수 (점, 0-100) + """ + bsResult = company.select( + "BS", ["재고자산", "매출채권및기타채권", "매출채권", "매입채무및기타채무", "매입채무", "자산총계"] + ) + isResult = company.select("IS", ["매출액", "매출원가"]) + + bsParsed = toDictBySnakeId(bsResult) + isParsed = toDictBySnakeId(isResult) + if bsParsed is None or isParsed is None: + return None + + bsData, bsPeriods = bsParsed + isData, _ = isParsed + + invRow = bsData.get("재고자산", {}) + arRow = bsData.get("매출채권및기타채권", bsData.get("매출채권", {})) + apRow = bsData.get("매입채무및기타채무", bsData.get("매입채무", {})) + taRow = bsData.get("자산총계", {}) + revRow = isData.get("매출액", {}) + cogsRow = isData.get("매출원가", {}) + + yCols = annualColsFromPeriods(bsPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if len(yCols) < 3: + return None + + history = [] + for i, col in enumerate(yCols): + inv = _get(invRow, col) + ar = _get(arRow, col) + ap = _get(apRow, col) + ta = _get(taRow, col) + rev = _get(revRow, col) + cogs = _get(cogsRow, col) + + # DSO / DIO + dso = (ar / rev * 365) if rev > 0 else None + dio = (inv / cogs * 365) if cogs > 0 else None + + # YoY 성장률 + invGrowth = None + revGrowth = None + arGrowth = None + if i + 1 < len(yCols): + prevCol = yCols[i + 1] + prevInv = _get(invRow, prevCol) + prevRev = _get(revRow, prevCol) + prevAr = _get(arRow, prevCol) + if prevInv > 0: + invGrowth = ((inv - prevInv) / prevInv) * 100 + if prevRev > 0: + revGrowth = ((rev - prevRev) / prevRev) * 100 + if prevAr > 0: + arGrowth = ((ar - prevAr) / prevAr) * 100 + + divergence = None + if invGrowth is not None and revGrowth is not None: + divergence = invGrowth - revGrowth + + arDivergence = None + if arGrowth is not None and revGrowth is not None: + arDivergence = arGrowth - revGrowth + + # NOA = (자산 - 현금) - (부채 - 금융부채) ≈ 자산 - 매입채무 - 현금 (간이) + noa = ta - ap if ta > 0 else None + + history.append( + { + "period": col, + "inventory": inv, + "receivables": ar, + "revenue": rev, + "inventoryGrowth": round(invGrowth, 1) if invGrowth is not None else None, + "revenueGrowth": round(revGrowth, 1) if revGrowth is not None else None, + "divergence": round(divergence, 1) if divergence is not None else None, + "arDivergence": round(arDivergence, 1) if arDivergence is not None else None, + "dso": round(dso, 1) if dso is not None else None, + "dio": round(dio, 1) if dio is not None else None, + "noa": noa, + } + ) + + if not history: + return None + + # 재고 신호 판단 (최근 2년) + recentDiv = [h["divergence"] for h in history[:2] if h["divergence"] is not None] + if recentDiv: + avgDiv = sum(recentDiv) / len(recentDiv) + if avgDiv > 5: + inventorySignal = "building" + elif avgDiv < -5: + inventorySignal = "liquidating" + else: + inventorySignal = "stable" + else: + inventorySignal = "stable" + + # 매출채권 신호 + recentArDiv = [h["arDivergence"] for h in history[:2] if h["arDivergence"] is not None] + if recentArDiv: + avgArDiv = sum(recentArDiv) / len(recentArDiv) + if avgArDiv > 5: + receivableSignal = "deteriorating" + elif avgArDiv < -5: + receivableSignal = "improving" + else: + receivableSignal = "stable" + else: + receivableSignal = "stable" + + # NOA 성장률 + noaGrowth = None + if len(history) >= 2 and history[0]["noa"] and history[1]["noa"] and history[1]["noa"] > 0: + noaGrowth = ((history[0]["noa"] - history[1]["noa"]) / abs(history[1]["noa"])) * 100 + + # 리스크 점수 (0-100) + riskScore = 30 # 기본 + if inventorySignal == "building": + riskScore += 25 + if receivableSignal == "deteriorating": + riskScore += 20 + if noaGrowth is not None and noaGrowth > 20: + riskScore += 25 + riskScore = min(100, riskScore) + + return { + "history": history, + "inventorySignal": inventorySignal, + "receivableSignal": receivableSignal, + "noaGrowth": round(noaGrowth, 1) if noaGrowth is not None else None, + "riskScore": riskScore, + } + + +# ══════════════════════════════════════ +# calc 9: 동종업계 공시 타이밍 +# ══════════════════════════════════════ + + +@memoized_calc +def calcAnnouncementTiming(company, *, basePeriod: str | None = None) -> dict | None: + """동종업계 공시 타이밍 — 선발 기업 실적으로 후발 예측. + + 같은 업종에서 이미 실적을 발표한 기업들의 성장 방향을 집계한다. + Ramnath 2002, Thomas & Zhang 2008 — 20년+ 검증된 anomaly. + + Returns + ------- + dict + sectorKey : str — 업종 키 + sectorPeersReported : int — 실적 발표 동종 기업 수 + sectorPeersTotal : int — 동종 업종 전체 기업 수 + reportedDirection : dict — 방향별 기업 수 (up, down, flat) + bellwetherSignal : str — 벨웨더 신호 ("positive" | "negative" | "neutral") + peerConsensus : float — 피어 합의 점수 (-1.0 ~ +1.0) + confidence : str — 신뢰도 ("high" | "medium" | "low") + """ + stockCode = _getStockCode(company) + if stockCode is None: + return None + + # 업종 정보 + sectorKey = _getSectorKey(company) + if sectorKey is None: + return None + + # scan growth에서 동종 업종 성장률 로드 + try: + from dartlab.scan import Scan + + scan = Scan() + growthResult = scan("growth") + if growthResult is None or not hasattr(growthResult, "df"): + return None + + df = growthResult.df + except (ImportError, ValueError, AttributeError): + return None + + # 업종 필터 (sector 컬럼이 있으면 사용, 없으면 전체) + sectorCol = None + for col in ("sector", "industry", "industryGroup", "업종"): + if col in df.columns: + sectorCol = col + break + + if sectorCol: + peerDf = df.filter(df[sectorCol] == sectorKey) + else: + return None + + if peerDf.height < 3: + return None + + # 성장률 방향 집계 + growthCol = None + for col in ("revenueGrowth", "revenueCagr3y", "growth", "매출성장률"): + if col in peerDf.columns: + growthCol = col + break + + if growthCol is None: + return None + + codeCol = "stockCode" if "stockCode" in peerDf.columns else peerDf.columns[0] + directions = {"up": 0, "down": 0, "flat": 0} + totalPeers = peerDf.height + selfExcluded = False + + for row in peerDf.iter_rows(named=True): + code = str(row.get(codeCol, "")) + if code == stockCode: + selfExcluded = True + continue + g = row.get(growthCol) + if g is None: + continue + g = float(g) + if g > 2: + directions["up"] += 1 + elif g < -2: + directions["down"] += 1 + else: + directions["flat"] += 1 + + reported = sum(directions.values()) + if reported < 2: + return None + + # 피어 합의 점수 (-1.0 ~ +1.0) + peerConsensus = (directions["up"] - directions["down"]) / reported + + # 벨웨더 신호 (다수 방향) + maxDir = max(directions, key=directions.get) + if directions[maxDir] / reported >= 0.6: + bellwetherSignal = "positive" if maxDir == "up" else ("negative" if maxDir == "down" else "neutral") + else: + bellwetherSignal = "neutral" + + confidence = "high" if reported >= 5 else ("medium" if reported >= 3 else "low") + + return { + "sectorKey": sectorKey, + "sectorPeersReported": reported, + "sectorPeersTotal": totalPeers - (1 if selfExcluded else 0), + "reportedDirection": directions, + "bellwetherSignal": bellwetherSignal, + "peerConsensus": round(peerConsensus, 3), + "confidence": confidence, + } + + +# ══════════════════════════════════════ +# calc 10: 공급망 모멘텀 +# ══════════════════════════════════════ + + +@memoized_calc +def calcSupplyChainSignal(company, *, basePeriod: str | None = None) -> dict | None: + """공급망 모멘텀 — 관계사 실적이 이 회사를 선행. + + Cohen & Frazzini 2008 (J. Finance) — 고객사 실적이 공급사를 1-2분기 선행. + DART 투자관계 + 관계사 거래에서 연결 기업을 식별하고, + 상장 관계사의 성장률로 이 회사에 대한 전파 신호를 계산. + + Returns + ------- + dict + linkedCompanies : list[dict] — 상장 관계사 목록 (code, name, relationship, revenueGrowth(%)) + networkMomentum : float — 정규화 모멘텀 (-1.0 ~ +1.0) + nLinkedListed : int — 상장 관계사 수 + supplyChainRisk : str — 공급망 리스크 ("high" | "moderate" | "low") + confidence : str — 신뢰도 ("high" | "medium" | "low") + """ + stockCode = _getStockCode(company) + if stockCode is None: + return None + + # 관계사 네트워크 추출 + linkedCompanies = _getLinkedCompanies(company, stockCode) + if not linkedCompanies: + return None + + # 상장 관계사의 성장률 조회 (scan growth) + growthMap = _loadGrowthMap() + if not growthMap: + return None + + enriched = [] + for lc in linkedCompanies: + code = lc.get("code", "") + growth = growthMap.get(code) + if growth is not None: + enriched.append( + { + "code": code, + "name": lc.get("name", ""), + "relationship": lc.get("relationship", ""), + "revenueGrowth": round(growth, 1), + } + ) + + if not enriched: + return None + + # 가중 평균 모멘텀 + growths = [e["revenueGrowth"] for e in enriched] + networkMomentum = sum(growths) / len(growths) + # 정규화 (-1 ~ +1) + normalizedMomentum = _clamp(networkMomentum / 30) + + # 공급망 리스크 + negCount = sum(1 for g in growths if g < -5) + if negCount / len(growths) > 0.5: + supplyChainRisk = "high" + elif negCount / len(growths) > 0.25: + supplyChainRisk = "moderate" + else: + supplyChainRisk = "low" + + confidence = "high" if len(enriched) >= 5 else ("medium" if len(enriched) >= 2 else "low") + + return { + "linkedCompanies": enriched[:10], + "networkMomentum": round(normalizedMomentum, 3), + "nLinkedListed": len(enriched), + "supplyChainRisk": supplyChainRisk, + "confidence": confidence, + } + + +def _clamp(v: float, lo: float = -1.0, hi: float = 1.0) -> float: + return max(lo, min(hi, v)) + + +def _getLinkedCompanies(company, stockCode: str) -> list[dict]: + """관계사/투자회사 목록 추출.""" + linked = [] + + # 1. 투자관계 (network edges에서) + try: + from dartlab.scan.network.edges import build_invest_edges + + investDf = build_invest_edges(stockCode) + if investDf is not None and hasattr(investDf, "height") and investDf.height > 0: + for row in investDf.iter_rows(named=True): + toCode = row.get("to_code", "") + if toCode and row.get("is_listed"): + linked.append( + { + "code": toCode, + "name": row.get("to_name", ""), + "relationship": "투자", + } + ) + except (ImportError, ValueError, TypeError): + pass + + # 2. 관계사 거래 (relatedPartyTx에서) + try: + rpt = getattr(company, "relatedPartyTx", None) + if rpt and hasattr(rpt, "revenueTxDf") and rpt.revenueTxDf is not None: + for row in rpt.revenueTxDf.iter_rows(named=True): + entity = row.get("entity", "") + if entity and entity not in {lc["name"] for lc in linked}: + linked.append( + { + "code": "", + "name": entity, + "relationship": "거래", + } + ) + except (AttributeError, TypeError): + pass + + return linked + + +def _loadGrowthMap() -> dict[str, float]: + """scan growth에서 전종목 매출 성장률 맵을 로드.""" + try: + from dartlab.scan import Scan + + scan = Scan() + result = scan("growth") + if result is None or not hasattr(result, "df"): + return {} + + df = result.df + codeCol = "stockCode" if "stockCode" in df.columns else df.columns[0] + growthCol = None + for col in ("revenueGrowth", "revenueCagr3y", "growth"): + if col in df.columns: + growthCol = col + break + if growthCol is None: + return {} + + gmap = {} + for row in df.iter_rows(named=True): + code = str(row.get(codeCol, "")) + g = row.get(growthCol) + if code and g is not None: + gmap[code] = float(g) + return gmap + except (ImportError, ValueError, AttributeError): + return {} + + +# ══════════════════════════════════════ +# calc 6: 다중 신호 종합 +# ══════════════════════════════════════ + + +_DIRECTION_SCORES = { + "up": 1.0, + "accelerating": 1.0, + "bullish": 1.0, + "positive": 0.5, + "flat": 0.0, + "stable": 0.0, + "neutral": 0.0, + "down": -1.0, + "decelerating": -0.5, + "bearish": -1.0, + "negative": -0.5, + "reversing": 0.0, + "transitioning": -0.2, + "volatile": -0.5, +} + + +# ══════════════════════════════════════ +# calc 10: 컨센서스 매출 방향 +# ══════════════════════════════════════ + + +@memoized_calc +def calcConsensusDirection(company, *, basePeriod: str | None = None) -> dict | None: + """컨센서스 매출 방향 — 애널리스트 추정 매출 vs 직전 실적. + + 네이버 finance/annual에서 isConsensus="Y" 기간의 매출 추정치를 가져와서 + 직전 실적 대비 성장/하락 방향을 판단한다. + + Zacks 연구: 컨센서스 방향이 실적 방향의 가장 강력한 단일 예측자 (70%). + + Returns + ------- + dict + consensusRevenue : float — 컨센서스 추정 매출 (원) + lastActualRevenue : float — 직전 실적 매출 (원) + consensusPeriod : str — 컨센서스 기간 + actualPeriod : str — 실적 기간 + expectedGrowthPct : float — 예상 성장률 (%) + direction : str — 방향 ("up" | "down" | "flat") + confidence : str — 신뢰도 ("high" | "medium" | "low") + """ + stockCode = _getStockCode(company) + if not stockCode: + return None + + try: + import httpx + + resp = httpx.get( + f"https://m.stock.naver.com/api/stock/{stockCode}/finance/annual", + headers={"User-Agent": "dartlab/1.0"}, + timeout=10, + ) + if resp.status_code != 200: + return None + + data = resp.json() + fi = data.get("financeInfo", {}) + titles = fi.get("trTitleList", []) + rows = fi.get("rowList", []) + + # 컨센서스 기간 + 직전 실적 기간 찾기 + cnsKey = None + realKeys: list[str] = [] + for t in titles: + if t.get("isConsensus") == "Y" and cnsKey is None: + cnsKey = t["key"] + elif t.get("isConsensus") == "N": + realKeys.append(t["key"]) + + if not cnsKey or not realKeys: + return None + + lastRealKey = realKeys[-1] # 가장 최신 실적 + + # 매출 행 찾기 + for row in rows: + if row.get("title") != "매출액": + continue + + cnsValStr = row.get("columns", {}).get(cnsKey, {}).get("value", "") + realValStr = row.get("columns", {}).get(lastRealKey, {}).get("value", "") + if not cnsValStr or not realValStr: + return None + + cnsVal = float(cnsValStr.replace(",", "")) + realVal = float(realValStr.replace(",", "")) + if realVal == 0: + return None + + growthPct = (cnsVal - realVal) / abs(realVal) * 100 + direction = "up" if growthPct > 2 else ("down" if growthPct < -2 else "flat") + + return { + "consensusRevenue": cnsVal, + "lastActualRevenue": realVal, + "consensusPeriod": cnsKey, + "actualPeriod": lastRealKey, + "expectedGrowthPct": round(growthPct, 1), + "direction": direction, + "confidence": "high" if abs(growthPct) > 10 else ("medium" if abs(growthPct) > 3 else "low"), + } + + except (ImportError, ValueError, KeyError, TypeError): + return None + + return None + + +# ══════════════════════════════════════ +# calc 11: 수급 누적 방향 +# ══════════════════════════════════════ + + +@memoized_calc +def calcFlowDirection(company, *, basePeriod: str | None = None) -> dict | None: + """수급 누적 방향 — 기관/외국인 순매수 분기 집계. + + 최근 60거래일 기관+외국인 순매수 합계가 양이면 실적 개선 기대. + "스마트머니는 실적을 안다" (Park et al., MDPI 2020). + + Returns + ------- + dict + foreignNet60d : int — 외국인 60거래일 순매수 (주) + institutionNet60d : int — 기관 60거래일 순매수 (주) + smartMoneyNet : int — 스마트머니 합계 (주) + direction : str — 방향 ("up" | "down" | "flat") + days : int — 집계 거래일 수 (일) + confidence : str — 신뢰도 ("high" | "medium" | "low") + """ + stockCode = _getStockCode(company) + if not stockCode: + return None + + try: + import httpx + + resp = httpx.get( + f"https://m.stock.naver.com/api/stock/{stockCode}/integration", + headers={"User-Agent": "dartlab/1.0"}, + timeout=10, + ) + if resp.status_code != 200: + return None + + data = resp.json() + deals = data.get("dealTrendInfos", []) + if not deals or len(deals) < 3: + return None + + # 최근 60거래일 집계 + recent = deals[:60] # integration은 ~5일, 있는 만큼 사용 + foreignNet = 0 + instNet = 0 + for d in recent: + fq = d.get("foreignerPureBuyQuant", "0") + oq = d.get("organPureBuyQuant", "0") + foreignNet += int(str(fq).replace(",", "").replace("+", "")) + instNet += int(str(oq).replace(",", "").replace("+", "")) + + smartMoney = foreignNet + instNet + direction = "up" if smartMoney > 0 else ("down" if smartMoney < 0 else "flat") + + return { + "foreignNet60d": foreignNet, + "institutionNet60d": instNet, + "smartMoneyNet": smartMoney, + "direction": direction, + "days": len(recent), + "confidence": "high" if abs(smartMoney) > 1000000 else ("medium" if abs(smartMoney) > 100000 else "low"), + } + + except (ImportError, OSError, ValueError, KeyError, TypeError): + return None + + +# ══════════════════════════════════════ +# calc 12: 매출 모멘텀 (전분기 방향 유지) +# ══════════════════════════════════════ + + +@memoized_calc +def calcRevenueDirection(company, *, basePeriod: str | None = None) -> dict | None: + """매출 방향 예측 — 모멘텀 + 영업이익률 확인 + OLS 확인. + + 검증 결과: + - 모멘텀 단독: 72.1% (4825건, 172종목) + - 모멘텀+영업이익률>0 일치: 76.1% (76% 시점) + - 모멘텀+OLS 일치: 77.7% (68% 시점) + - 2연속 모멘텀: 74.7% + + 방법론: + 1. 기본: 전분기 YoY 방향 유지 (72.1%) + 2. 확인1: 영업이익률 > 0이면 신뢰도 상승 (76.1%) — API 불필요 + 3. 확인2: OLS 외생변수와 일치하면 추가 상승 (77.7%) + 4. 2연속 같은 방향이면 74.7% + + 학술 근거: M4/M5 Competition — 단순 방법이 최강. + + Returns + ------- + dict + latestPeriod : str — 최신 분기 + latestYoyGrowth : float — 최신 분기 YoY 성장률 (%) + direction : str — 예측 방향 ("up" | "down") + streak : int — 연속 동일 방향 분기 수 + margin : float | None — 최신 영업이익률 (%) + marginAgree : bool | None — 마진 방향 일치 여부 + olsAgree : bool | None — OLS 외생변수 방향 일치 여부 + confirms : int — 확인 신호 수 (0-3) + probability : float — 보정된 베이즈 사후확률 (0.0-1.0) + rawPosterior : float — 원시 사후확률 (0.0-1.0) + industryPrior : float — 업종별 모멘텀 사전확률 (0.0-1.0) + confidence : str — 신뢰도 ("very_high" | "high" | "medium" | "low") + history : list[dict] — 최근 4분기 YoY 방향 이력 + """ + isResult = company.select("IS", ["매출액", "영업이익"]) + isParsed = toDictBySnakeId(isResult) + if isParsed is None: + return None + isData, isPeriods = isParsed + revRow = isData.get("매출액", {}) + oiRow = isData.get("영업이익", {}) + + qCols = sorted([p for p in isPeriods if "Q" in p], reverse=True) + + # 최근 분기 YoY + directions: list[dict] = [] + for col in qCols[:6]: + prevCol = f"{int(col[:4]) - 1}{col[-2:]}" + if prevCol not in isPeriods: + continue + cur = _get(revRow, col) or None + prev = _get(revRow, prevCol) or None + if cur is not None and prev is not None and prev != 0: + growth = (cur - prev) / abs(prev) * 100 + directions.append({"period": col, "yoyGrowth": round(growth, 1), "positive": growth > 0}) + + if not directions: + return None + + # 모멘텀 방향 (기본 예측: 전분기 방향 유지) + latest = directions[0] + direction = "up" if latest["positive"] else "down" + + # 2연속 모멘텀 (74.7%) + streak = 1 + if len(directions) >= 2 and directions[0]["positive"] == directions[1]["positive"]: + streak = 2 + if len(directions) >= 3 and streak == 2 and directions[1]["positive"] == directions[2]["positive"]: + streak = 3 + + # 확인1: 영업이익률 > 0 (76.1% — API 불필요, 가장 빠른 확인) + latestRev = _get(revRow, directions[0]["period"]) or None + latestOi = _get(oiRow, directions[0]["period"]) or None + marginPositive = None + margin = None + if latestRev and latestOi and latestRev != 0: + margin = latestOi / latestRev * 100 + marginPositive = margin > 0 + + marginAgree = None + if marginPositive is not None: + # 매출 성장(+) + 영업이익률 양(+) → 일치 + # 매출 하락(-) + 영업이익률 음(-) → 일치 + marginAgree = latest["positive"] == marginPositive + + # 확인2: OLS 외생변수 + macroReg = calcMacroRegression(company, basePeriod=basePeriod) + olsAgree = None + if macroReg and macroReg.get("betas"): + olsDirection = macroReg.get("_predictedDirection") + if olsDirection is not None: + olsAgree = (olsDirection == "up") == latest["positive"] + + # 베이즈 사후확률 갱신 — 업종별 사전확률에서 시작 + # 슈퍼예측가 원리: 사전확률이 정확할수록 사후확률도 정확 + _getSectorKey(company) + industry = None + try: + from dartlab.core.finance.exogenousAxes import _lookupFromKindList + + industry, _ = _lookupFromKindList(_getStockCode(company) or "") + except (ImportError, TypeError): + pass + posterior = _INDUSTRY_PRIOR.get(industry or "", _DEFAULT_PRIOR) + + # 신호 1: streak (2연속 → 74.7%, 3연속 → 더 강함) + if streak >= 3: + posterior = _bayesUpdate(posterior, 0.78) + elif streak >= 2: + posterior = _bayesUpdate(posterior, 0.747) + + # 신호 2: 영업이익률 (연속 값 — 크기 반영) + if margin is not None: + if latest["positive"]: + # 매출 성장 + 마진 크기에 따라 차등 갱신 + marginEvidence = min(0.85, 0.72 + margin * 0.003) if margin > 0 else max(0.55, 0.72 - abs(margin) * 0.005) + else: + # 매출 하락 + 마진 부정이면 하락 확신 강화 + marginEvidence = max(0.55, 0.72 - margin * 0.003) if margin < 0 else min(0.85, 0.72 + abs(margin) * 0.003) + posterior = _bayesUpdate(posterior, marginEvidence) + + # 신호 3: OLS 외생변수 (일치/불일치) + if olsAgree is True: + posterior = _bayesUpdate(posterior, 0.777) + elif olsAgree is False: + posterior = _bayesUpdate(posterior, 0.425) # 불일치 시 하향 (OLS가 42.5%) + + # 보정: 원시 posterior를 실측 기반으로 재보정 + # 원시 78~85% → 실측 62~73%. 선형 보정으로 과신 제거. + calibrated = _calibrate(posterior) + + # 신뢰도 등급 (보정된 확률 기준) + if calibrated >= 0.78: + confidence = "very_high" + elif calibrated >= 0.73: + confidence = "high" + elif calibrated >= 0.65: + confidence = "medium" + else: + confidence = "low" + + # 하위호환: confirms도 유지 + confirms = sum(1 for x in [marginAgree, olsAgree, streak >= 2] if x) + + return { + "latestPeriod": latest["period"], + "latestYoyGrowth": latest["yoyGrowth"], + "direction": direction, + "streak": streak, + "margin": round(margin, 1) if margin is not None else None, + "marginAgree": marginAgree, + "olsAgree": olsAgree, + "confirms": confirms, + "probability": round(calibrated, 3), + "rawPosterior": round(posterior, 3), + "industryPrior": round(_INDUSTRY_PRIOR.get(industry or "", _DEFAULT_PRIOR), 3), + "confidence": confidence, + "history": directions[:4], + } + + +def _calibrate(rawPosterior: float) -> float: + """원시 베이즈 확률을 실측 기반으로 재보정. + + walk-forward 564건 캘리브레이션 결과: + - 원시 78% → 실측 62% + - 원시 83% → 실측 73% + - 원시 86% → 실측 88% + + 선형 보간으로 과신 제거. 원시 확률이 높을수록 실측에 가깝게 보정. + """ + # 보정: posterior를 72% 기저 방향으로 수축 (shrinkage) + # calibrated = base + (raw - base) * shrinkage_factor + base = 0.72 # 모멘텀 기저 정확도 + shrinkage = 0.6 # 60%만 반영 + calibrated = base + (rawPosterior - base) * shrinkage + return max(0.50, min(0.95, calibrated)) + + +def _bayesUpdate(prior: float, evidence: float, damping: float = 0.3) -> float: + """베이즈 사후확률 갱신 (감쇠 적용). + + Args: + prior: 현재 P(매출↑) + evidence: P(매출↑ | 이 신호) + damping: 갱신 강도 감쇠 (0~1). 1.0 = 완전 갱신, 0.3 = 30% 갱신. + 신호 간 독립성 가정 위반을 보정. 0.3이 과신 방지 + 변별력 유지의 균형. + + 나이브 베이즈 + 감쇠: lr^damping + """ + if evidence <= 0 or evidence >= 1: + return prior + lr = evidence / (1 - evidence) + # 감쇠: lr의 damping 거듭제곱 + lr_damped = lr**damping + prior_odds = prior / (1 - prior) + posterior_odds = prior_odds * lr_damped + return posterior_odds / (1 + posterior_odds) + + +@memoized_calc +def calcPredictionSynthesis(company, *, basePeriod: str | None = None) -> dict | None: + """다중 신호 종합 — 5개 신호의 단순 평균 앙상블. + + 학술 근거: 32편 논문, 97개 비교에서 단순 평균이 최적 (Green & Armstrong 2015). + + Returns + ------- + dict + signals : dict — 신호별 상세 (direction, strength, 개별 지표) + consensus : str — 종합 합의 ("bullish" | "bearish" | "neutral") + directionScore : float — 방향 점수 (-1.0 ~ +1.0) + agreementScore : float — 신호 합의도 (0.0 ~ 1.0) + confidence : str — 신뢰도 ("high" | "medium" | "low") + nSignals : int — 유효 신호 수 + revenuePrediction : dict | None — 매출 방향 예측 (direction, confidence, streak, expectedAccuracy(%)) + aiContext : dict — AI 소비용 요약 (directionBias, keyDrivers, keyRisks) + """ + # 각 calc 독립 호출 (company._cache로 중복 방지는 호출자 레벨) + momentum = calcEarningsMomentum(company, basePeriod=basePeriod) + peer = calcPeerPrediction(company, basePeriod=basePeriod) + structural = calcStructuralBreak(company, basePeriod=basePeriod) + macro = calcMacroSensitivity(company, basePeriod=basePeriod) + macroReg = calcMacroRegression(company, basePeriod=basePeriod) + eventImp = calcEventImpact(company, basePeriod=basePeriod) + disclosure = calcDisclosureDelta(company, basePeriod=basePeriod) + inventory = calcInventoryDivergence(company, basePeriod=basePeriod) + timing = calcAnnouncementTiming(company, basePeriod=basePeriod) + supplyChain = calcSupplyChainSignal(company, basePeriod=basePeriod) + + signals = {} + scores = [] + + # 1. 이익 모멘텀 신호 + if momentum is not None: + dirKey = momentum["earningsDirection"] + score = _DIRECTION_SCORES.get(dirKey, 0.0) + signals["earningsMomentum"] = { + "direction": dirKey, + "strength": abs(score), + "detail": momentum["momentum"], + "persistence": momentum["persistenceScore"], + } + scores.append(score) + + # 2. 피어 예측 신호 + if peer is not None and peer.get("divergence") is not None: + div = peer["divergence"] + if div > 5: + peerDir = "positive" + peerScore = min(1.0, div / 20) + elif div < -5: + peerDir = "negative" + peerScore = max(-1.0, div / 20) + else: + peerDir = "neutral" + peerScore = 0.0 + signals["peerPrediction"] = { + "direction": peerDir, + "strength": abs(peerScore), + "divergence": peer["divergence"], + } + scores.append(peerScore) + + # 3. 구조변화 신호 + if structural is not None: + stabDir = structural["overallStability"] + stabScore = _DIRECTION_SCORES.get(stabDir, 0.0) + signals["structuralBreak"] = { + "direction": stabDir, + "strength": abs(stabScore), + "nBreaks": sum(1 for m in structural["metrics"] if m["hasBreak"]), + } + scores.append(stabScore) + + # 4. 거시경제 신호 (방향성은 중립 — 조건부 위험 지표) + if macro is not None: + cyclicality = macro["sectorCyclicality"] + _DIRECTION_SCORES.get(cyclicality, 0.0) if cyclicality == "defensive" else 0.0 + signals["macroSensitivity"] = { + "direction": cyclicality, + "strength": 0.0, + "cyclicality": cyclicality, + "relevantIndicators": macro.get("relevantIndicators", []), + } + # 매크로는 방향 점수에 포함하지 않음 (조건부 지표) + + # 5. 공시 변화 신호 + if disclosure is not None: + discDir = disclosure["signalDirection"] + discScore = _DIRECTION_SCORES.get(discDir, 0.0) + signals["disclosureDelta"] = { + "direction": discDir, + "strength": abs(discScore), + "overallChange": disclosure["overallChangeRate"], + } + scores.append(discScore) + + # 5b. 거시-재무 동적 회귀 신호 + if macroReg is not None and macroReg.get("rSquared", 0) > 0.1: + # netMacroEffect가 있으면 사용, 없으면 betas에서 추정 + netEffect = macro.get("netMacroEffect", 0) if macro else 0 + macroRegScore = _clamp(netEffect / 10) # ±10% → ±1.0 + macroRegDir = "positive" if macroRegScore > 0.15 else ("negative" if macroRegScore < -0.15 else "neutral") + signals["macroRegression"] = { + "direction": macroRegDir, + "strength": abs(macroRegScore), + "rSquared": macroReg["rSquared"], + "confidence": macroReg["confidence"], + "nObs": macroReg["nObs"], + } + scores.append(macroRegScore) + + # 5c. 이벤트 충격 신호 + if eventImp is not None: + resilience = eventImp.get("resilience", "medium") + nEvents = len(eventImp.get("events", [])) + if resilience == "low" and nEvents > 0: + eventScore = -0.5 + eventDir = "negative" + elif resilience == "high": + eventScore = 0.2 + eventDir = "positive" + else: + eventScore = 0.0 + eventDir = "neutral" + signals["eventImpact"] = { + "direction": eventDir, + "strength": abs(eventScore), + "resilience": resilience, + "nEvents": nEvents, + "avgRecoveryYears": eventImp.get("avgRecoveryYears"), + } + if nEvents > 0: + scores.append(eventScore) + + # 6. 재고/매출채권 괴리 신호 + if inventory is not None: + risk = inventory["riskScore"] + invScore = -(risk - 50) / 50 # 50 이하=긍정, 50 이상=부정 + invDir = "negative" if risk > 60 else ("positive" if risk < 30 else "neutral") + signals["inventoryDivergence"] = { + "direction": invDir, + "strength": abs(invScore), + "riskScore": risk, + "inventorySignal": inventory["inventorySignal"], + "receivableSignal": inventory["receivableSignal"], + } + scores.append(invScore) + + # 7. 공시 타이밍 신호 + if timing is not None: + timingScore = timing["peerConsensus"] + timingDir = "positive" if timingScore > 0.2 else ("negative" if timingScore < -0.2 else "neutral") + signals["announcementTiming"] = { + "direction": timingDir, + "strength": abs(timingScore), + "peerConsensus": timing["peerConsensus"], + "bellwether": timing["bellwetherSignal"], + "peersReported": timing["sectorPeersReported"], + } + scores.append(timingScore) + + # 8. 공급망 모멘텀 신호 + if supplyChain is not None: + scScore = supplyChain["networkMomentum"] + scDir = "positive" if scScore > 0.15 else ("negative" if scScore < -0.15 else "neutral") + signals["supplyChain"] = { + "direction": scDir, + "strength": abs(scScore), + "networkMomentum": supplyChain["networkMomentum"], + "nLinked": supplyChain["nLinkedListed"], + "risk": supplyChain["supplyChainRisk"], + } + scores.append(scScore) + + # 9. 컨센서스 매출 방향 + consensus = calcConsensusDirection(company, basePeriod=basePeriod) + if consensus is not None: + cnsDir = consensus["direction"] + cnsScore = _DIRECTION_SCORES.get(cnsDir, 0.0) + signals["consensusDirection"] = { + "direction": cnsDir, + "strength": abs(cnsScore), + "expectedGrowth": consensus["expectedGrowthPct"], + "confidence": consensus["confidence"], + } + scores.append(cnsScore) + + # 10. 수급 누적 방향 + flowDir = calcFlowDirection(company, basePeriod=basePeriod) + if flowDir is not None: + fDir = flowDir["direction"] + fScore = _DIRECTION_SCORES.get(fDir, 0.0) + signals["flowDirection"] = { + "direction": fDir, + "strength": abs(fScore), + "smartMoneyNet": flowDir["smartMoneyNet"], + "confidence": flowDir["confidence"], + } + scores.append(fScore) + + # 11. 매출 모멘텀 (전분기 방향 유지) + revDir = calcRevenueDirection(company, basePeriod=basePeriod) + if revDir is not None: + rDir = revDir["direction"] + rScore = _DIRECTION_SCORES.get(rDir, 0.0) + signals["revenueDirection"] = { + "direction": rDir, + "strength": abs(rScore), + "latestYoyGrowth": revDir["latestYoyGrowth"], + "streak": revDir["streak"], + "confidence": revDir["confidence"], + } + scores.append(rScore) + + if not scores: + return None + + # 단순 평균 (학술적 최적) + avgScore = sum(scores) / len(scores) + + if avgScore > 0.25: + consensus = "bullish" + elif avgScore < -0.25: + consensus = "bearish" + else: + consensus = "neutral" + + # 신호 합의도 (표준편차 기반) + if len(scores) >= 2: + mean = avgScore + variance = sum((s - mean) ** 2 for s in scores) / len(scores) + std = math.sqrt(variance) + agreementScore = max(0, 1.0 - std) + else: + agreementScore = 0.5 + + # 신뢰도 + nSignals = len(scores) + if nSignals >= 4 and agreementScore > 0.6: + confidence = "high" + elif nSignals >= 2: + confidence = "medium" + else: + confidence = "low" + + # AI/forecast 엔진 소비용 요약 + keyDrivers = [] + keyRisks = [] + for name, sig in signals.items(): + if sig.get("direction") in ("up", "positive", "accelerating"): + keyDrivers.append(name) + elif sig.get("direction") in ("down", "negative", "decelerating", "volatile"): + keyRisks.append(name) + + # 매출 방향 예측 (모멘텀 기반 — 검증 정확도 71.3%) + revPrediction = None + if revDir is not None: + revPrediction = { + "direction": revDir["direction"], + "confidence": revDir["confidence"], + "streak": revDir["streak"], + "olsAgree": revDir.get("olsAgree"), + "expectedAccuracy": ( + 77.7 + if revDir.get("olsAgree") and revDir["streak"] >= 2 + else 74.7 + if revDir["streak"] >= 2 + else 77.7 + if revDir.get("olsAgree") + else 71.3 + ), + } + + return { + "signals": signals, + "consensus": consensus, + "directionScore": round(avgScore, 3), + "agreementScore": round(agreementScore, 3), + "confidence": confidence, + "nSignals": nSignals, + "revenuePrediction": revPrediction, + "aiContext": { + "directionBias": round(avgScore, 3), + "keyDrivers": keyDrivers, + "keyRisks": keyRisks, + }, + } + + +# ══════════════════════════════════════ +# calc 7: 예측신호 플래그 +# ══════════════════════════════════════ + + +@memoized_calc +def calcPredictionFlags(company, *, basePeriod: str | None = None) -> list[tuple[str, str]] | None: + """예측신호 경고 플래그. + + Returns + ------- + list[tuple[str, str]] | None + (코드, 메시지) 튜플 목록. 코드는 EARN_DECEL, HIGH_ACCRUAL 등 플래그 ID. + 플래그가 없으면 None. + """ + flags = [] + + # 이익 모멘텀 + momentum = calcEarningsMomentum(company, basePeriod=basePeriod) + if momentum: + if momentum["momentum"] == "decelerating": + flags.append(("EARN_DECEL", "이익 감속 추세 — 최근 3년 연속 감소")) + if momentum["highAccrualWarning"]: + flags.append(("HIGH_ACCRUAL", "높은 발생액 비율 — 이익의 현금 뒷받침 약함")) + if momentum["persistenceScore"] < 30: + flags.append(("LOW_PERSIST", "낮은 이익 지속성 — OCF/NI 비율 낮음")) + + # 구조변화 + structural = calcStructuralBreak(company, basePeriod=basePeriod) + if structural: + if structural["overallStability"] == "volatile": + flags.append(("STRUCT_VOLATILE", "다수 지표에서 구조변화 감지 — 추세 추정 신뢰도 낮음")) + for m in structural["metrics"]: + if m["hasBreak"] and m["name"] == "revenue": + flags.append(("REV_BREAK", f"매출 구조변화 감지 ({m['breakYear']})")) + + # 공시 변화 + disclosure = calcDisclosureDelta(company, basePeriod=basePeriod) + if disclosure: + if disclosure["riskChangeRate"] > 60: + flags.append(("RISK_SURGE", f"리스크 공시 급변 ({disclosure['riskChangeRate']:.0f}%)")) + if disclosure["signalDirection"] == "negative" and disclosure["signalStrength"] == "strong": + flags.append(("DISC_NEGATIVE", "공시 변화 부정적 신호 — 리스크 섹션 대폭 확대")) + + # 피어 괴리 + peer = calcPeerPrediction(company, basePeriod=basePeriod) + if peer and peer.get("divergence") is not None: + if peer["divergence"] < -15: + flags.append(("PEER_BELOW", f"피어 대비 {peer['divergence']:+.1f}%p 하회 예측")) + elif peer["divergence"] > 15: + flags.append(("PEER_ABOVE", f"피어 대비 {peer['divergence']:+.1f}%p 상회 예측")) + + # 거시-재무 회귀 + macroReg = calcMacroRegression(company, basePeriod=basePeriod) + if macroReg: + if macroReg["rSquared"] > 0.3 and macroReg["confidence"] in ("high", "medium"): + betas = macroReg.get("betas", {}) + for indicator, beta in betas.items(): + if abs(beta) > 2.0: + flags.append(("MACRO_HIGH_BETA", f"거시 베타 높음: {indicator} β={beta:+.1f}")) + + # 이벤트 충격 + eventImp = calcEventImpact(company, basePeriod=basePeriod) + if eventImp: + if eventImp.get("resilience") == "low": + flags.append(("LOW_RESILIENCE", f"충격 회복력 낮음 (평균 {eventImp.get('avgRecoveryYears', '?')}년)")) + nEvents = len(eventImp.get("events", [])) + if nEvents >= 3: + flags.append(("FREQUENT_EVENTS", f"최근 충격 이벤트 {nEvents}건")) + + # 재고/매출채권 괴리 + inventory = calcInventoryDivergence(company, basePeriod=basePeriod) + if inventory: + if inventory["riskScore"] > 70: + flags.append(("INV_HIGH_RISK", f"재고/매출채권 위험 점수 {inventory['riskScore']}")) + if inventory["inventorySignal"] == "building": + h = inventory["history"] + div = h[0]["divergence"] if h and h[0].get("divergence") is not None else 0 + flags.append(("INV_DIVERGE", f"재고 급증 vs 매출 (괴리 {div:+.1f}%p)")) + if inventory["receivableSignal"] == "deteriorating": + flags.append(("DSO_SPIKE", "매출채권 회수 악화 — 매출 대비 채권 급증")) + if inventory["noaGrowth"] is not None and inventory["noaGrowth"] > 20: + flags.append(("NOA_SURGE", f"순영업자산 급증 {inventory['noaGrowth']:+.1f}%")) + + # 업종 타이밍 + timing = calcAnnouncementTiming(company, basePeriod=basePeriod) + if timing: + dirs = timing["reportedDirection"] + total = sum(dirs.values()) + if total >= 3 and dirs["down"] / total >= 0.7: + flags.append(("SECTOR_DOWNTURN", f"업종 {dirs['down']}/{total} 기업 실적 하락")) + + # 공급망 리스크 + sc = calcSupplyChainSignal(company, basePeriod=basePeriod) + if sc: + if sc["supplyChainRisk"] == "high": + flags.append(("NETWORK_RISK", f"관계사 {sc['nLinkedListed']}개 중 다수 실적 악화")) + + return flags if flags else None diff --git a/src/dartlab/analysis/financial/profitability.py b/src/dartlab/analysis/financial/profitability.py new file mode 100644 index 0000000000000000000000000000000000000000..7f6a50ece639032bf3661e023887db067d369c7b --- /dev/null +++ b/src/dartlab/analysis/financial/profitability.py @@ -0,0 +1,783 @@ +"""2-1 수익성 분석 -- 이익의 흐름을 추적한다. + +select()로 IS/BS 원본 계정을 가져와서 +금액 + 비율 + YoY 변동을 시계열로 보여준다. +""" + +from __future__ import annotations + +from typing import Any + +from dartlab.analysis.financial._helpers import MAX_RATIO_YEARS, annualColsFromPeriods, sumBorrowings, toDictBySnakeId +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_YEARS = MAX_RATIO_YEARS + + +def _yoy(cur, prev) -> float | None: + if cur is None or prev is None or prev == 0: + return None + return round((cur - prev) / abs(prev) * 100, 2) + + +from dartlab.core.finance.calc import safePct as _pctOf # noqa: E402 + + +# ── 이익 구조 시계열 ── + + +def _isFinancialSector(company) -> bool: + """금융업(은행/보험/증권) 판별.""" + try: + sector = getattr(company, "sector", None) + if sector is not None: + from dartlab.core.sector.types import Sector + + if sector.sector == Sector.FINANCIALS: + return True + except (AttributeError, ImportError): + pass + return False + + +@memoized_calc +def calcMarginTrend(company, *, basePeriod: str | None = None) -> dict | None: + """이익 구조 시계열 -- 매출에서 순이익까지 금액과 마진. + + 일반 기업: 매출/매출원가/매출총이익/판관비/영업이익/당기순이익 + 금융업: 이자수익(revenue 대체)/금융이익/영업이익/당기순이익 + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + revenue : float — 매출 (원) + revenueYoy : float — 매출 전기대비 (%) + operatingIncome : float — 영업이익 (원) + operatingMargin : float — 영업이익률 (%) + operatingIncomeYoy : float — 영업이익 전기대비 (%) + netIncome : float — 당기순이익 (원) + netMargin : float — 순이익률 (%) + netIncomeYoy : float — 순이익 전기대비 (%) + cogs : float — 매출원가 (원) + grossProfit : float — 매출총이익 (원) + grossMargin : float — 매출총이익률 (%) + sga : float — 판관비 (원) + displayHints : dict — core 컬럼 목록 + AI 표시 힌트 + """ + isFinancial = _isFinancialSector(company) + + if isFinancial: + isResult = company.select("IS", ["이자수익", "금융이익", "영업이익", "당기순이익"]) + else: + isResult = company.select( + "IS", ["매출액", "매출원가", "매출총이익", "판매비와관리비", "영업이익", "당기순이익"] + ) + + parsed = toDictBySnakeId(isResult) + if parsed is None: + return None + + data, periods = parsed + + if isFinancial: + # 금융업: 금융이익을 revenue 대체 (이자수익은 일부일 뿐) + rev = data.get("금융이익", {}) or data.get("이자수익", {}) + op = data.get("영업이익", {}) + ni = data.get("당기순이익", {}) + finIncome = data.get("이자수익", {}) + else: + rev = data.get("매출액", {}) + op = data.get("영업이익", {}) + ni = data.get("당기순이익", {}) + + cogs = data.get("매출원가", {}) if not isFinancial else {} + gp = data.get("매출총이익", {}) if not isFinancial else {} + sga = data.get("판매비와관리비", {}) if not isFinancial else {} + + yCols = annualColsFromPeriods(periods, basePeriod, _MAX_YEARS + 1) + if len(yCols) < 2: + return None + history = [] + for i, col in enumerate(yCols): + prevCol = yCols[i + 1] if i + 1 < len(yCols) else None + r = rev.get(col) + if r is None or r == 0: + continue + + _op = op.get(col) + _ni = ni.get(col) + _opPrev = op.get(prevCol) if prevCol else None + _niPrev = ni.get(prevCol) if prevCol else None + _rPrev = rev.get(prevCol) if prevCol else None + + row: dict = { + "period": col, + "revenue": r, + "revenueYoy": _yoy(r, _rPrev) if prevCol else None, + "operatingIncome": _op, + "operatingMargin": _pctOf(_op, r), + "operatingIncomeYoy": _yoy(_op, _opPrev) if prevCol else None, + "netIncome": _ni, + "netMargin": _pctOf(_ni, r), + "netIncomeYoy": _yoy(_ni, _niPrev) if prevCol else None, + } + + if isFinancial: + row["revenueLabel"] = "금융이익" + row["financialIncome"] = finIncome.get(col) + else: + row["cogs"] = cogs.get(col) + row["grossProfit"] = gp.get(col) + row["grossMargin"] = _pctOf(gp.get(col), r) + row["sga"] = sga.get(col) + + history.append(row) + + if not history: + return None + result: dict[str, Any] = {"history": history} + if isFinancial: + result["isFinancial"] = True + # R22-2: AI 가 표 만들 때 핵심 컬럼을 빠뜨리지 않도록 명시. + # 사용자가 "수익성" 물었으면 마진율 (operatingMargin/netMargin/grossMargin) 이 핵심. + result["displayHints"] = { + "core": ["period", "revenue", "operatingMargin", "netMargin", "grossMargin"], + "note": "수익성 응답 시 operatingMargin/netMargin/grossMargin 컬럼을 표에 반드시 포함", + } + return result + + +# ── ROE 분해 (듀퐁 5요소) ── + + +@memoized_calc +def calcReturnTrend(company, *, basePeriod: str | None = None) -> dict | None: + """ROE 구조 분해 -- 수익을 어떻게 만드는가. + + IS + BS에서 원본 계정을 가져와서 듀퐁 5요소를 직접 계산. + ROE = (NI/EBT) x (EBT/EBIT) x (EBIT/Rev) x (Rev/TA) x (TA/Equity) + = 세금부담 x 이자부담 x 영업마진 x 자산회전 x 레버리지 + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + netIncome : float — 당기순이익 (원) + equity : float — 자기자본 (원) + totalAssets : float — 총자산 (원) + roe : float — 자기자본이익률 (%) + roa : float — 총자산이익률 (%) + taxBurden : float — 세금부담률 (배) + interestBurden : float — 이자부담률 (배) + operatingMargin : float — 영업이익률 (%) + assetTurnover : float — 자산회전율 (배) + leverage : float — 재무레버리지 (배) + """ + isResult = company.select("IS", ["매출액", "영업이익", "법인세차감전순이익", "당기순이익"]) + bsResult = company.select("BS", ["자산총계", "자본총계"]) + + isParsed = toDictBySnakeId(isResult) + bsParsed = toDictBySnakeId(bsResult) + if isParsed is None or bsParsed is None: + return None + + isData, isPeriods = isParsed + bsData, _ = bsParsed + + rev = isData.get("매출액", {}) + opIncome = isData.get("영업이익", {}) + pbt = isData.get("법인세차감전순이익", {}) + niRow = isData.get("당기순이익", {}) + ta = bsData.get("자산총계", {}) + eq = bsData.get("자본총계", {}) + + yCols = annualColsFromPeriods(isPeriods, basePeriod, _MAX_YEARS) + if not yCols: + return None + history = [] + for col in yCols: + r = rev.get(col) + o = opIncome.get(col) + p = pbt.get(col) + n = niRow.get(col) + a = ta.get(col) + e = eq.get(col) + + roe = _pctOf(n, e) + roa = _pctOf(n, a) + + # 듀퐁 5요소 (원본에서 직접) + taxBurden = round(n / p, 4) if n is not None and p is not None and p != 0 else None + interestBurden = round(p / o, 4) if p is not None and o is not None and o != 0 else None + operatingMargin = _pctOf(o, r) + assetTurnover = round(r / a, 4) if r is not None and a is not None and a != 0 else None + leverage = round(a / e, 4) if a is not None and e is not None and e != 0 else None + + history.append( + { + "period": col, + "netIncome": n, + "equity": e, + "totalAssets": a, + "roe": roe, + "roa": roa, + "taxBurden": taxBurden, + "interestBurden": interestBurden, + "operatingMargin": operatingMargin, + "assetTurnover": assetTurnover, + "leverage": leverage, + } + ) + + return {"history": history} if history else None + + +# calcDupont은 calcReturnTrend에 통합 +calcDupont = calcReturnTrend + + +# ── 마진 워터폴 ── + + +@memoized_calc +def calcMarginWaterfall(company, *, basePeriod: str | None = None) -> dict | None: + """매출 -> 순이익 마진 워터폴 분해. + + 각 단계에서 얼마나 줄어드는지를 금액 + 비율(%)로 보여준다. + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + steps : list[dict] — 워터폴 단계별 (label, value, pct) + """ + isResult = company.select( + "IS", + [ + "매출액", + "매출원가", + "매출총이익", + "판매비와관리비", + "영업이익", + "금융비용", + "금융수익", + "법인세차감전순이익", + "법인세비용", + "당기순이익", + ], + ) + parsed = toDictBySnakeId(isResult) + if parsed is None: + return None + + data, periods = parsed + rev = data.get("매출액", {}) + cogs = data.get("매출원가", {}) + gp = data.get("매출총이익", {}) + sgaRow = data.get("판매비와관리비", {}) + opRow = data.get("영업이익", {}) + finCost = data.get("금융비용", {}) + finInc = data.get("금융수익", {}) + pbt = data.get("법인세차감전순이익", {}) + tax = data.get("법인세비용", {}) + ni = data.get("당기순이익", {}) + + yCols = annualColsFromPeriods(periods, basePeriod, _MAX_YEARS) + if not yCols: + return None + + def _pct(val, r): + if val is None or r is None or r == 0: + return None + return round(val / r * 100, 2) + + history = [] + for col in yCols: + r = rev.get(col) + if r is None or r == 0: + continue + + steps = [{"label": "매출", "amount": r, "pct": 100.0, "cumPct": 100.0}] + + cogsV = cogs.get(col) + gpV = gp.get(col) + if cogsV is not None: + steps.append( + { + "label": "매출원가", + "amount": cogsV, + "pct": -abs(_pct(cogsV, r) or 0), + "cumPct": _pct(gpV, r) or round(100 - abs(_pct(cogsV, r) or 0), 2), + } + ) + if gpV is not None: + steps.append({"label": "매출총이익", "amount": gpV, "pct": _pct(gpV, r), "cumPct": _pct(gpV, r)}) + + sgaV = sgaRow.get(col) + opV = opRow.get(col) + if sgaV is not None: + steps.append( + { + "label": "판관비", + "amount": sgaV, + "pct": -abs(_pct(sgaV, r) or 0), + "cumPct": _pct(opV, r) or round((_pct(gpV, r) or 0) - abs(_pct(sgaV, r) or 0), 2), + } + ) + if opV is not None: + steps.append({"label": "영업이익", "amount": opV, "pct": _pct(opV, r), "cumPct": _pct(opV, r)}) + + fcV = finCost.get(col) + fiV = finInc.get(col) + opPct = _pct(opV, r) or 0 + if fcV is not None: + steps.append( + { + "label": "금융비용", + "amount": fcV, + "pct": -abs(_pct(fcV, r) or 0), + "cumPct": round(opPct - abs(_pct(fcV, r) or 0), 2), + } + ) + if fiV is not None: + steps.append( + { + "label": "금융수익", + "amount": fiV, + "pct": abs(_pct(fiV, r) or 0), + "cumPct": round(opPct - abs(_pct(fcV, r) or 0) + abs(_pct(fiV, r) or 0), 2), + } + ) + + pbtV = pbt.get(col) + if pbtV is not None: + steps.append({"label": "세전이익", "amount": pbtV, "pct": _pct(pbtV, r), "cumPct": _pct(pbtV, r)}) + + taxV = tax.get(col) + if taxV is not None: + steps.append( + { + "label": "법인세", + "amount": taxV, + "pct": -abs(_pct(taxV, r) or 0), + "cumPct": round((_pct(pbtV, r) or 0) - abs(_pct(taxV, r) or 0), 2), + } + ) + + niV = ni.get(col) + if niV is not None: + steps.append({"label": "순이익", "amount": niV, "pct": _pct(niV, r), "cumPct": _pct(niV, r)}) + + history.append({"period": col, "steps": steps}) + + return {"history": history} if history else None + + +# ── 플래그 ── + + +@memoized_calc +def calcProfitabilityFlags(company, *, basePeriod: str | None = None) -> list[str]: + """수익성 경고/기회 플래그. + + Returns + ------- + list[str] + 경고/기회 메시지 리스트. 빈 리스트이면 이상 없음. + """ + flags: list[str] = [] + isFinancial = _isFinancialSector(company) + + trend = calcMarginTrend(company, basePeriod=basePeriod) + if trend and len(trend["history"]) >= 3: + hist = trend["history"] + # 영업이익률 3기 연속 하락 + oms = [h.get("operatingMargin") for h in hist[:3]] + if all(v is not None for v in oms) and oms[0] < oms[1] < oms[2]: + flags.append(f"영업이익률 3기 연속 하락 ({oms[0]:.1f}%)") + if oms[0] is not None and oms[0] < 0: + flags.append(f"영업적자 ({oms[0]:.1f}%)") + + # 마진 괴리 감지 — 순이익률 vs 영업이익률 + if trend and trend["history"]: + latest = trend["history"][0] + nm = latest.get("netMargin") + om = latest.get("operatingMargin") + if nm is not None and om is not None and om > 0: + ratio = nm / om + if isFinancial: + # 금융업: 금융이익은 순이자+수수료(매출총이익 성격)이므로 + # 영업이익 > 금융이익은 구조적으로 정상. + # 순이익률 << 영업이익률은 금융비용/충당금 때문. + if ratio > 3.0: + flags.append(f"순이익률({nm:.1f}%)이 영업이익률({om:.1f}%)의 {ratio:.1f}배 — 비영업이익 확인 필요") + else: + if ratio > 2.0: + flags.append( + f"순이익률({nm:.1f}%)이 영업이익률({om:.1f}%)의 {ratio:.1f}배 — 대규모 비영업이익 존재" + ) + elif 0 < ratio < 0.3: + flags.append(f"순이익률이 영업이익률의 {ratio:.1f}배 — 대규모 비영업손실") + if om is not None and abs(om) > 100: + if isFinancial: + flags.append( + f"금융업 IS 구조: 금융이익 대비 영업이익률 {om:.1f}%" + " (금융이익=순금융수익, 수수료·보험 등 별도 합산)" + ) + else: + # 지주사(로열티/지분법 수입 위주)는 영업이익률 100%+ 정상. + # "데이터 이상"이라고 하면 사용자에게 불필요한 공포 유발. + flags.append( + f"영업이익률 {om:.1f}% — 매출 대비 영업이익이 크다 (지주사·로열티·지분법이익 구조일 수 있음)" + ) + + if isFinancial: + flags.append("금융업: ROE·ROA가 핵심 수익성 지표 (마진 분석은 참고용)") + + ret = calcReturnTrend(company, basePeriod=basePeriod) + if ret and ret["history"]: + h = ret["history"][0] + roe = h.get("roe") + roa = h.get("roa") + lev = h.get("leverage") + if roe is not None and roa is not None and lev is not None: + if isFinancial: + # 금융업은 레버리지 10x+ 가 정상 (은행 자기자본비율 ~8%) + if roe > 8: + flags.append(f"양호한 ROE ({roe:.1f}%, 금융업 기준)") + else: + if lev > 3: + flags.append(f"ROE의 레버리지 의존도 높음 (자산/자본 = {lev:.1f}배)") + if roe > 15 and roa > 5 and lev < 2: + flags.append(f"진성 고수익 (ROE {roe:.1f}%, 낮은 레버리지)") + + return flags + + +# ── Penman RNOA + FLEV/SPREAD 분해 ── + + +def _get(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + +@memoized_calc +def calcPenmanDecomposition(company, *, basePeriod: str | None = None) -> dict | None: + """Penman 분해 -- ROE가 영업력인지 레버리지인지 분리. + + ROCE = RNOA + FLEV × SPREAD + RNOA = NOPAT / NOA (순영업자산수익률) + FLEV = NFO / Equity (금융레버리지) + NBC = 순금융비용 / NFO (순차입비용률) + SPREAD = RNOA - NBC (초과수익률) + leverageEffect = FLEV × SPREAD (레버리지 효과) + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + rnoa : float — 순영업자산수익률 (%) + flev : float — 금융레버리지 (배) + nbc : float — 순차입비용률 (%) + spread : float — 초과수익률 RNOA-NBC (%) + leverageEffect : float — 레버리지 효과 FLEV×SPREAD (%) + roce : float — 자기자본수익률 RNOA+leverageEffect (%) + + 학술근거: Nissim & Penman (2001), Penman FSA&SV 5e. + """ + isResult = company.select("IS", ["영업이익", "법인세비용", "법인세차감전순이익", "금융이익", "금융비용"]) + bsResult = company.select( + "BS", + [ + "자산총계", + "자본총계", + "매출채권및기타채권", + "재고자산", + "유형자산", + "무형자산", + "매입채무", + "선수금", + "계약부채", + "단기차입금", + "장기차입금", + "차입금단기", + "long_term_borrowings", + "short_term_borrowings", + "차입부채", + "장기차입부채", + "유동성장기차입금", + "사채", + "현금및현금성자산", + ], + ) + + isParsed = toDictBySnakeId(isResult) + bsParsed = toDictBySnakeId(bsResult) + if isParsed is None or bsParsed is None: + return None + + isData, isPeriods = isParsed + bsData, _ = bsParsed + + opRow = isData.get("operating_profit", {}) + taxRow = isData.get("income_tax_expense") or isData.get("income_taxes", {}) + ptRow = isData.get("profit_before_tax", {}) + finIncRow = isData.get("finance_income", {}) + finCostRow = isData.get("finance_costs", {}) + + eqRow = bsData.get("total_stockholders_equity", {}) + recRow = bsData.get("trade_and_other_receivables", {}) + invRow = bsData.get("inventories", {}) + ppeRow = bsData.get("tangible_assets", {}) + intRow = bsData.get("intangible_assets", {}) + apRow = bsData.get("trade_and_other_payables", {}) + advRow = bsData.get("advance_from_customers", {}) + contRow = bsData.get("contract_liabilities", {}) + bsData.get("shortterm_borrowings", {}) + bsData.get("longterm_borrowings", {}) + bsData.get("borrowings", {}) # 통합 차입금 fallback + bsData.get("debentures", {}) + cashRow = bsData.get("cash_and_cash_equivalents", {}) + + yCols = annualColsFromPeriods(isPeriods, maxYears=_MAX_YEARS, basePeriod=basePeriod) + if len(yCols) < 2: + return None + + def _getF(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for col in yCols: + # NOPAT = 영업이익 × (1 - 유효세율) + opIncome = _getF(opRow, col) + taxExpense = abs(_getF(taxRow, col)) + ptIncome = abs(_getF(ptRow, col)) + effectiveTaxRate = taxExpense / ptIncome if ptIncome > 0 else 0.25 + effectiveTaxRate = min(effectiveTaxRate, 0.5) + nopat = opIncome * (1 - effectiveTaxRate) if opIncome != 0 else None + + # NOA = 영업자산 - 영업부채 + opAssets = _get(recRow, col) + _get(invRow, col) + _get(ppeRow, col) + _get(intRow, col) + opLiab = _get(apRow, col) + _get(advRow, col) + _get(contRow, col) + noa = opAssets - opLiab if opAssets > 0 else None + + # NFO = 금융부채 - 금융자산(현금) + # 차입금: 회사 키 패턴 무관 헬퍼 + finDebt = sumBorrowings(bsData, col) + cash = _get(cashRow, col) + nfo = finDebt - cash + + # 순금융비용 + finInc = _getF(finIncRow, col) + finCost = _getF(finCostRow, col) + netFinCost = finCost - finInc # 양수 = 순비용 + + equity = _get(eqRow, col) + + # RNOA + rnoa = round(nopat / noa * 100, 2) if nopat is not None and noa and noa > 0 else None + # FLEV + flev = round(nfo / equity, 2) if equity > 0 else None + # NBC + nbc = round(netFinCost * (1 - effectiveTaxRate) / abs(nfo) * 100, 2) if nfo != 0 else None + # SPREAD + spread = round(rnoa - nbc, 2) if rnoa is not None and nbc is not None else None + # Leverage Effect + levEffect = round(flev * spread, 2) if flev is not None and spread is not None else None + # ROCE (검증: ≈ RNOA + leverageEffect) + roce = round(rnoa + levEffect, 2) if rnoa is not None and levEffect is not None else None + + history.append( + { + "period": col, + "rnoa": rnoa, + "flev": flev, + "nbc": nbc, + "spread": spread, + "leverageEffect": levEffect, + "roce": roce, + } + ) + + if not history: + return None + + return {"history": history} + + +# ── McKinsey ROIC Tree ── + + +@memoized_calc +def calcRoicTree(company, *, basePeriod: str | None = None) -> dict | None: + """McKinsey ROIC Tree — ROIC가 높은/낮은 이유를 원인까지 추적. + + ROIC = Operating Margin × Capital Turnover + Operating Margin = 1 - (COGS/Rev) - (SGA/Rev) - Tax Rate + Capital Turnover = Revenue / Invested Capital + IC = Working Capital (AR+Inv-AP) + Fixed Capital (PPE+Intangible) + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + roic : float — 투하자본수익률 (%) + operatingMargin : float — 영업이익률 (%) + capitalTurnover : float — 투하자본회전율 (배) + grossMargin : float — 매출총이익률 (%) + sgaRatio : float — 판관비율 (%) + effectiveTaxRate : float — 유효세율 (%) + wcTurnover : float — 운전자본회전율 (배) + fixedTurnover : float — 고정자본회전율 (배) + marginDriver : str — 마진 변동 주요인 ("cogs"|"sga"|"tax") + turnoverDriver : str — 회전율 변동 주요인 ("wc"|"fixed") + """ + isResult = company.select( + "IS", ["매출액", "매출원가", "판매비와관리비", "영업이익", "법인세비용", "법인세차감전순이익"] + ) + bsResult = company.select( + "BS", + [ + "매출채권및기타채권", + "재고자산", + "매입채무", + "유형자산", + "무형자산", + "자본총계", + "단기차입금", + "장기차입금", + "차입금단기", + "long_term_borrowings", + "short_term_borrowings", + "차입부채", + "장기차입부채", + "유동성장기차입금", + "사채", + "현금및현금성자산", + ], + ) + + isParsed = toDictBySnakeId(isResult) + bsParsed = toDictBySnakeId(bsResult) + if isParsed is None or bsParsed is None: + return None + + isData, isPeriods = isParsed + bsData, _ = bsParsed + + revRow = isData.get("sales", {}) + cogsRow = isData.get("cost_of_sales", {}) + sgaRow = isData.get("selling_and_administrative_expenses", {}) + opRow = isData.get("operating_profit", {}) + taxRow = isData.get("income_tax_expense") or isData.get("income_taxes", {}) + ptRow = isData.get("profit_before_tax", {}) + + arRow = bsData.get("trade_and_other_receivables", {}) + invRow = bsData.get("inventories", {}) + apRow = bsData.get("trade_and_other_payables", {}) + ppeRow = bsData.get("tangible_assets", {}) + intRow = bsData.get("intangible_assets", {}) + bsData.get("total_stockholders_equity", {}) + bsData.get("shortterm_borrowings", {}) + bsData.get("longterm_borrowings", {}) + bsData.get("borrowings", {}) # 통합 차입금 fallback + bsData.get("debentures", {}) + bsData.get("cash_and_cash_equivalents", {}) + + yCols = annualColsFromPeriods(isPeriods, basePeriod, _MAX_YEARS) + if not yCols: + return None + + def _getF(row: dict, col: str) -> float: + v = row.get(col) + return v if v is not None else 0 + + history = [] + for col in yCols: + rev = _getF(revRow, col) + if rev <= 0: + continue + cogs = _getF(cogsRow, col) + sga = _getF(sgaRow, col) + opIncome = _getF(opRow, col) + taxExp = abs(_getF(taxRow, col)) + ptIncome = abs(_getF(ptRow, col)) + + # Margin 분해 + grossMargin = round((rev - cogs) / rev * 100, 2) if cogs else None + sgaRatio = round(sga / rev * 100, 2) if sga else None + opMargin = round(opIncome / rev * 100, 2) + effectiveTax = round(taxExp / ptIncome * 100, 2) if ptIncome > 0 else 25.0 + effectiveTax = min(effectiveTax, 50.0) + + # NOPAT + nopat = opIncome * (1 - effectiveTax / 100) + + # Invested Capital + wc = _get(arRow, col) + _get(invRow, col) - _get(apRow, col) + fc = _get(ppeRow, col) + _get(intRow, col) + ic = wc + fc if (wc + fc) > 0 else None + + # ROIC + roic = round(nopat / ic * 100, 2) if ic and ic > 0 else None + + # Capital Turnover + capTurnover = round(rev / ic, 2) if ic and ic > 0 else None + + # WC/Fixed Turnover + wcTurnover = round(rev / wc, 2) if wc > 0 else None + fixedTurnover = round(rev / fc, 2) if fc > 0 else None + + # 마진 드라이버 판단 + if grossMargin is not None and sgaRatio is not None: + if grossMargin > 40: + marginDriver = "높은 가격결정력 (매출총이익률 > 40%)" + elif sgaRatio and sgaRatio < 15: + marginDriver = "낮은 판관비 (SGA < 15%)" + elif opMargin > 15: + marginDriver = "고마진 사업모델" + elif opMargin < 5: + marginDriver = "박리다매 또는 원가 경쟁" + else: + marginDriver = "보통 수준" + else: + marginDriver = None + + # 자본회전 드라이버 판단 + if capTurnover is not None: + if capTurnover > 2: + turnoverDriver = "자산 경량 모델 (자본회전 > 2회)" + elif capTurnover < 0.5: + turnoverDriver = "자본 집약 (자본회전 < 0.5회)" + else: + turnoverDriver = "보통 수준" + else: + turnoverDriver = None + + history.append( + { + "period": col, + "roic": roic, + "operatingMargin": opMargin, + "capitalTurnover": capTurnover, + "grossMargin": grossMargin, + "sgaRatio": sgaRatio, + "effectiveTaxRate": round(effectiveTax, 1), + "wcTurnover": wcTurnover, + "fixedTurnover": fixedTurnover, + "marginDriver": marginDriver, + "turnoverDriver": turnoverDriver, + } + ) + + if not history: + return None + return {"history": history} diff --git a/src/dartlab/analysis/financial/research/__init__.py b/src/dartlab/analysis/financial/research/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5f9276ad8bd94165a8e57efa21d173d98adfaf9f --- /dev/null +++ b/src/dartlab/analysis/financial/research/__init__.py @@ -0,0 +1,74 @@ +"""종합 기업분석 리포트 엔진. + +종목코드 하나로 세계 수준의 equity research 리포트를 생성한다. + +사용법:: + + from dartlab.analysis.financial.research import generateResearch + + result = generateResearch(company) + result.executive.opinion # "강력매수" + result.thesis.bullCase # ["매출 +25.3% (고성장)", ...] + result.valuationAnalysis # DCF/DDM/상대가치 종합 + result.riskAnalysis # distress + anomalies +""" + +from dartlab.analysis.financial.research.orchestrator import generateResearch +from dartlab.analysis.financial.research.scoring import calcAllScores +from dartlab.analysis.financial.research.types import ( + AnomalySection, + CompanyOverview, + DistressSection, + DuPontResult, + EarningsQuality, + ExecutiveSummary, + FinancialAnalysis, + ForecastData, + InsightDetail, + InvestmentThesis, + LynchFairValue, + MagicFormulaScore, + MarketData, + NarrativeAnalysis, + NarrativeParagraph, + PeerSection, + PiotroskiScore, + QmjScore, + QuantScores, + ResearchMeta, + ResearchResult, + RiskSection, + SectorKpi, + SectorKpis, + ValuationSection, +) + +__all__ = [ + "generateResearch", + "calcAllScores", + "AnomalySection", + "CompanyOverview", + "DistressSection", + "DuPontResult", + "EarningsQuality", + "ExecutiveSummary", + "FinancialAnalysis", + "ForecastData", + "InsightDetail", + "InvestmentThesis", + "LynchFairValue", + "MagicFormulaScore", + "MarketData", + "NarrativeAnalysis", + "NarrativeParagraph", + "PeerSection", + "PiotroskiScore", + "QmjScore", + "QuantScores", + "ResearchMeta", + "ResearchResult", + "RiskSection", + "SectorKpi", + "SectorKpis", + "ValuationSection", +] diff --git a/src/dartlab/analysis/financial/research/__pycache__/__init__.cpython-312.pyc b/src/dartlab/analysis/financial/research/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fc58be6e6a14e8e39dac69983aee18b8f19e9b06 Binary files /dev/null and b/src/dartlab/analysis/financial/research/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/research/__pycache__/narrative.cpython-312.pyc b/src/dartlab/analysis/financial/research/__pycache__/narrative.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..15c536daa079e18378235dedb5bc65e9e23d1d6e --- /dev/null +++ b/src/dartlab/analysis/financial/research/__pycache__/narrative.cpython-312.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa00908830e7b84c4d934933f26429ff73518fb6621231ac06441d85503ba30d +size 134098 diff --git a/src/dartlab/analysis/financial/research/__pycache__/orchestrator.cpython-312.pyc b/src/dartlab/analysis/financial/research/__pycache__/orchestrator.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2144f9dd2b10c36138aaf83a7a7016587a80480c Binary files /dev/null and b/src/dartlab/analysis/financial/research/__pycache__/orchestrator.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/research/__pycache__/quality.cpython-312.pyc b/src/dartlab/analysis/financial/research/__pycache__/quality.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0daa777464f0b38910090b16138f3bdcfeeb29dd Binary files /dev/null and b/src/dartlab/analysis/financial/research/__pycache__/quality.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/research/__pycache__/scoring.cpython-312.pyc b/src/dartlab/analysis/financial/research/__pycache__/scoring.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..946628af1b32950518dfb8a6c08e566c352acac8 Binary files /dev/null and b/src/dartlab/analysis/financial/research/__pycache__/scoring.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/research/__pycache__/sectorKpi.cpython-312.pyc b/src/dartlab/analysis/financial/research/__pycache__/sectorKpi.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16f34dcd389b805ebc6aefa6e1f18418bac502b2 Binary files /dev/null and b/src/dartlab/analysis/financial/research/__pycache__/sectorKpi.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/research/__pycache__/spec.cpython-312.pyc b/src/dartlab/analysis/financial/research/__pycache__/spec.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa9876567b95208324e08fcfa6d9d350f2a2de3b Binary files /dev/null and b/src/dartlab/analysis/financial/research/__pycache__/spec.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/research/__pycache__/thesis.cpython-312.pyc b/src/dartlab/analysis/financial/research/__pycache__/thesis.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..06b9afbbfaef60257886df0ee4385f792d837558 Binary files /dev/null and b/src/dartlab/analysis/financial/research/__pycache__/thesis.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/research/__pycache__/types.cpython-312.pyc b/src/dartlab/analysis/financial/research/__pycache__/types.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..35a874aa69cfbf2ec275d42d24737a0912eb562d Binary files /dev/null and b/src/dartlab/analysis/financial/research/__pycache__/types.cpython-312.pyc differ diff --git a/src/dartlab/analysis/financial/research/narrative.py b/src/dartlab/analysis/financial/research/narrative.py new file mode 100644 index 0000000000000000000000000000000000000000..ed2db00c374c5d4e2bc53e077620527ee726f5bc --- /dev/null +++ b/src/dartlab/analysis/financial/research/narrative.py @@ -0,0 +1,2662 @@ +"""교차분석 서술 엔진 — 15개 차원에서 IS/BS/CF 3표를 교차분석하여 해석적 서술문으로 변환.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from dartlab.analysis.financial.research.types import ( + DuPontResult, + EarningsQuality, + MarketData, + NarrativeAnalysis, + NarrativeParagraph, +) + +# ══════════════════════════════════════ +# 내부 입력 구조체 +# ══════════════════════════════════════ + + +@dataclass +class _Input: + """narrative 분석 공통 입력.""" + + aSeries: dict + aYears: list[str] + dupont: DuPontResult | None = None + earningsQuality: EarningsQuality | None = None + marketData: MarketData | None = None + segmentsDf: object | None = None # pl.DataFrame | None + costByNatureDf: object | None = None # pl.DataFrame | None + sectorBenchmark: object | None = None # SectorBenchmark | None + sectorParams: object | None = None # SectorParams | None + isFinancial: bool = False + # Phase 1: ratios 연결 + ratios: object | None = None # finance.ratios + # Phase 4: 실전 사업분석 + salesOrderDf: object | None = None # pl.DataFrame | None + productServiceDf: object | None = None # pl.DataFrame | None + quarterlyIsDf: object | None = None # pl.DataFrame | None + # Phase 5: 인적자본 + employeeDf: object | None = None # pl.DataFrame | None + rndDf: object | None = None # pl.DataFrame | None + + +# ══════════════════════════════════════ +# 유틸 +# ══════════════════════════════════════ + + +from dartlab.core.finance.calc import safeDiv as _safeDiv # noqa: E402 + + +def _pct(v: float | None) -> str: + """% 포맷.""" + if v is None: + return "-" + return f"{v:.1f}%" + + +def _pctChange(v: float | None) -> str: + """+/- % 포맷.""" + if v is None: + return "-" + return f"{v:+.1f}%" + + +def _pp(v: float | None) -> str: + """%p 포맷.""" + if v is None: + return "-" + return f"{v:+.1f}%p" + + +def _getVals(series: dict, sjDiv: str, key: str) -> list[float | None]: + """aSeries에서 특정 계정 시계열 추출.""" + return series.get(sjDiv, {}).get(key, []) + + +def _lastN(vals: list[float | None], n: int = 2) -> list[float | None]: + """마지막 n개 non-None 값.""" + filtered = [(i, v) for i, v in enumerate(vals) if v is not None] + return [v for _, v in filtered[-n:]] + + +def _trend(vals: list[float | None]) -> str: + """3개 이상 값의 추세 판별.""" + clean = [v for v in vals if v is not None] + if len(clean) < 3: + return "unknown" + diffs = [clean[i] - clean[i - 1] for i in range(1, len(clean))] + if all(d > 0 for d in diffs): + return "improving" + if all(d < 0 for d in diffs): + return "deteriorating" + return "mixed" + + +def _consecutiveDirection(vals: list[float | None]) -> tuple[str, int]: + """연속 개선/악화 횟수.""" + clean = [v for v in vals if v is not None] + if len(clean) < 2: + return "unknown", 0 + direction = "up" if clean[-1] > clean[-2] else "down" + count = 1 + for i in range(len(clean) - 2, 0, -1): + if direction == "up" and clean[i] > clean[i - 1]: + count += 1 + elif direction == "down" and clean[i] < clean[i - 1]: + count += 1 + else: + break + return direction, count + + +# ══════════════════════════════════════ +# 7개 분석 차원 +# ══════════════════════════════════════ + + +def _analyzeDupont(inp: _Input) -> NarrativeParagraph | None: + """DuPont 5-factor 교차분해 + ROIC.""" + dp = inp.dupont + if dp is None or not dp.roe: + return None + roeLast = next((v for v in reversed(dp.roe) if v is not None), None) + marginLast = next((v for v in reversed(dp.netMargin) if v is not None), None) + turnoverLast = next((v for v in reversed(dp.assetTurnover) if v is not None), None) + leverageLast = next((v for v in reversed(dp.equityMultiplier) if v is not None), None) + if roeLast is None or marginLast is None: + return None + + parts: list[str] = [] + roePct = roeLast * 100 + marginPct = marginLast * 100 + roeStr = f"ROE {roePct:.1f}%" + + # 5-factor 분해 + tbLast = next((v for v in reversed(dp.taxBurden) if v is not None), None) if dp.taxBurden else None + ibLast = next((v for v in reversed(dp.interestBurden) if v is not None), None) if dp.interestBurden else None + opmLast = next((v for v in reversed(dp.operatingMargin) if v is not None), None) if dp.operatingMargin else None + + if tbLast is not None and ibLast is not None and opmLast is not None: + decomp5 = [ + f"세금부담 {tbLast:.2f}", + f"이자부담 {ibLast:.2f}", + f"OPM {opmLast * 100:.1f}%", + f"회전율 {turnoverLast:.2f}배" if turnoverLast else "", + f"레버리지 {leverageLast:.1f}배" if leverageLast else "", + ] + parts.append(f"{roeStr} = {' × '.join(d for d in decomp5 if d)}") + if tbLast < 0.7: + parts.append(f"세금부담률 높음(유효세율 {(1 - tbLast) * 100:.0f}%)") + if ibLast < 0.7: + parts.append("이자비용이 세전이익을 크게 잠식") + else: + decomp = [] + if marginLast is not None: + decomp.append(f"순이익률 {marginPct:.1f}%") + if turnoverLast is not None: + decomp.append(f"자산회전율 {turnoverLast:.2f}배") + if leverageLast is not None: + decomp.append(f"레버리지 {leverageLast:.1f}배") + parts.append(f"{roeStr}는 {' × '.join(decomp)}로 구성") + + # ROIC + roicLast = next((v for v in reversed(dp.roic) if v is not None), None) if dp.roic else None + if roicLast is not None: + roicPct = roicLast * 100 + parts.append(f"ROIC {roicPct:.1f}%") + if roicPct > 10: + parts.append("투자자본 대비 양호한 가치창출") + elif roicPct < 5: + parts.append("ROIC 낮음 — 자본비용 대비 가치파괴 가능성") + + # 업종 비교 + bench = inp.sectorBenchmark + if bench is not None: + roeMedian = getattr(bench, "roeMedian", None) + if roeMedian is not None: + if roePct > roeMedian: + parts.append(f"업종 중앙값({roeMedian:.1f}%) 대비 우수") + else: + parts.append(f"업종 중앙값({roeMedian:.1f}%) 하회") + + # 추세 (assetTurnover) + if len(dp.assetTurnover) >= 2: + cleanTurnover = [v for v in dp.assetTurnover if v is not None] + if len(cleanTurnover) >= 2: + diff = cleanTurnover[-1] - cleanTurnover[-2] + if abs(diff) > 0.05: + direction = "상승" if diff > 0 else "하락" + parts.append(f"자산회전율 {direction} 추세(전년 대비 {diff:+.2f})") + + # driver + driverMap = { + "margin": "마진 주도형", + "turnover": "회전율 주도형", + "leverage": "레버리지 주도형", + "balanced": "균형형", + } + driverLabel = driverMap.get(dp.driver, dp.driver) + parts.append(driverLabel) + + body = ". ".join(parts) + "." + severity = "positive" if roeLast > 0.10 else "neutral" if roeLast > 0.05 else "negative" + return NarrativeParagraph(dimension="dupont", title="수익구조 분해 (DuPont 5-Factor)", body=body, severity=severity) + + +def _analyzeMarginTrend(inp: _Input) -> NarrativeParagraph | None: + """마진 추세 분해 — 원가율/판관비 기여도 분석.""" + sales = _getVals(inp.aSeries, "IS", "sales") + cogs = _getVals(inp.aSeries, "IS", "cost_of_sales") + op = _getVals(inp.aSeries, "IS", "operating_profit") + if not sales or len(sales) < 2: + return None + + # 마진 계산 + gmList = ( + [ + _safeDiv( + s - c if s is not None and c is not None else None, + s, + ) + for s, c in zip(sales, cogs) + ] + if cogs + else [] + ) + # 더 간단하게 다시 + gmList = [] + if cogs: + for s, c in zip(sales, cogs): + if s is not None and c is not None and s != 0: + gmList.append((s - c) / s * 100) + else: + gmList.append(None) + + omPctList = [] + for o, s in zip(op, sales): + if o is not None and s is not None and s != 0: + omPctList.append(o / s * 100) + else: + omPctList.append(None) + + if not omPctList or all(v is None for v in omPctList): + return None + + parts: list[str] = [] + clean = [v for v in omPctList if v is not None] + if len(clean) >= 2: + latest = clean[-1] + prev = clean[-2] + diff = latest - prev + direction, count = _consecutiveDirection(omPctList) + + if count >= 3: + dirLabel = "개선" if direction == "up" else "악화" + vals = [f"{v:.1f}%" for v in clean[-count - 1 :] if v is not None] + parts.append(f"영업이익률 {count}년 연속 {dirLabel} ({'→'.join(vals)})") + else: + parts.append(f"영업이익률 {latest:.1f}%(전년 {prev:.1f}%, {_pp(diff)})") + + # 원가율 변동 기여 + if gmList and len(gmList) >= 2: + cleanGm = [(i, v) for i, v in enumerate(gmList) if v is not None] + if len(cleanGm) >= 2: + gmDiff = cleanGm[-1][1] - cleanGm[-2][1] + if abs(gmDiff) > 0.3: + label = "원가율 개선" if gmDiff > 0 else "원가율 악화" + parts.append(f"매출총이익률 {_pp(gmDiff)} ({label} 기여)") + + # SGA 비율 (sales - cogs - op = SGA) + if cogs and op: + sgaList = [] + for s, c, o in zip(sales, cogs, op): + if all(v is not None for v in (s, c, o)) and s != 0: + sga = s - c - o + sgaList.append(sga / s * 100) + else: + sgaList.append(None) + cleanSga = [(i, v) for i, v in enumerate(sgaList) if v is not None] + if len(cleanSga) >= 2: + sgaDiff = cleanSga[-1][1] - cleanSga[-2][1] + if abs(sgaDiff) > 0.3: + label = "판관비 효율화" if sgaDiff < 0 else "판관비 증가" + parts.append(f"판관비율 {_pp(sgaDiff)} ({label})") + + # EBITDA 마진 (Lens 1 강화) + depreciation = _getVals(inp.aSeries, "CF", "depreciation_and_amortization") + if not depreciation: + depreciation = _getVals(inp.aSeries, "IS", "depreciation") + if op and depreciation: + ebitdaMarginList = [] + for o, d, s in zip(op, depreciation, sales): + if all(v is not None for v in (o, d, s)) and s != 0: + ebitdaMarginList.append((o + abs(d)) / s * 100) + else: + ebitdaMarginList.append(None) + emClean = [v for v in ebitdaMarginList if v is not None] + if len(emClean) >= 2: + emDiff = emClean[-1] - emClean[-2] + parts.append(f"EBITDA마진 {emClean[-1]:.1f}%({_pp(emDiff)})") + + # 유효세율 추이 (Lens 1 강화) + ebt = _getVals(inp.aSeries, "IS", "income_before_tax") + if not ebt: + ebt = _getVals(inp.aSeries, "IS", "profit_before_tax") + taxExpense = _getVals(inp.aSeries, "IS", "income_tax_expense") + if ebt and taxExpense: + taxRateList = [] + for e, t in zip(ebt, taxExpense): + if e is not None and t is not None and e != 0 and e > 0: + taxRateList.append(abs(t) / e * 100) + else: + taxRateList.append(None) + trClean = [v for v in taxRateList if v is not None] + if len(trClean) >= 2: + trDiff = trClean[-1] - trClean[-2] + if abs(trDiff) > 3: + label = "세부담 증가" if trDiff > 0 else "세부담 경감" + parts.append(f"유효세율 {trClean[-1]:.1f}%({_pp(trDiff)}, {label})") + + # costByNature 원가 구조 분해 (Lens 1 강화) + costDf = inp.costByNatureDf + if costDf is not None: + try: + import polars as pl + + if isinstance(costDf, pl.DataFrame) and len(costDf) > 0: + # 원재료비/인건비 비중 추이 (가용 컬럼 기반) + cols = costDf.columns + for keyword, label in [("원재료", "원재료비율"), ("인건비", "인건비율"), ("감가상각", "감가상각비율")]: + matchCols = [c for c in cols if keyword in c] + if matchCols: + vals = costDf[matchCols[0]].to_list() + cleanVals = [v for v in vals if v is not None] + if len(cleanVals) >= 2: + diff = cleanVals[-1] - cleanVals[-2] + if abs(diff) > 1: + parts.append(f"{label} {cleanVals[-1]:.1f}%({_pp(diff)})") + except (ImportError, AttributeError, ValueError, KeyError): + pass + + if not parts: + return None + body = ". ".join(parts) + "." + latestOm = clean[-1] if clean else 0 + severity = "positive" if latestOm > 10 else "neutral" if latestOm > 5 else "negative" + return NarrativeParagraph(dimension="margin", title="마진 추세 분석", body=body, severity=severity) + + +def _analyzeGrowthQuality(inp: _Input) -> NarrativeParagraph | None: + """성장의 질 — 매출 vs 이익 성장률 + 부문별 기여.""" + sales = _getVals(inp.aSeries, "IS", "sales") + op = _getVals(inp.aSeries, "IS", "operating_profit") + ni = _getVals(inp.aSeries, "IS", "net_profit") + if not sales or len(sales) < 2: + return None + + # 직전 YoY 성장률 + def _yoyGrowth(vals: list[float | None]) -> float | None: + clean = [(i, v) for i, v in enumerate(vals) if v is not None and v != 0] + if len(clean) < 2: + return None + prev, curr = clean[-2][1], clean[-1][1] + return (curr - prev) / abs(prev) * 100 + + salesGr = _yoyGrowth(sales) + opGr = _yoyGrowth(op) + niGr = _yoyGrowth(ni) + + parts: list[str] = [] + if salesGr is not None: + parts.append(f"매출 {_pctChange(salesGr)}") + if opGr is not None: + parts.append(f"영업이익 {_pctChange(opGr)}") + if niGr is not None: + parts.append(f"순이익 {_pctChange(niGr)}") + + if not parts: + return None + + # 질적 판단 + qualityNote = "" + if salesGr is not None and opGr is not None: + if opGr > salesGr + 5: + qualityNote = "이익 성장이 매출 성장을 상회하는 질적 성장" + elif salesGr > opGr + 5 and salesGr > 0: + qualityNote = "외형 성장 대비 수익성 미흡 — 마진 압박 가능성" + elif salesGr > 0 and opGr > 0: + qualityNote = "매출과 이익 동반 성장" + if qualityNote: + parts.append(qualityNote) + + # SGR (지속가능성장률 = ROE × (1 - 배당성향)) + totalEquity = _getVals(inp.aSeries, "BS", "total_equity") + niVals = [v for v in ni if v is not None] + teVals = [v for v in totalEquity if v is not None] if totalEquity else [] + if len(niVals) >= 1 and len(teVals) >= 1 and teVals[-1] and teVals[-1] > 0: + roe = niVals[-1] / teVals[-1] + # 배당성향 추정: 이익잉여금 변동 / 순이익 + retainedEarnings = _getVals(inp.aSeries, "BS", "retained_earnings") + reVals = [v for v in retainedEarnings if v is not None] if retainedEarnings else [] + retentionRate = 0.7 # 기본 70% + if len(reVals) >= 2 and niVals[-1] != 0: + reChange = reVals[-1] - reVals[-2] + rr = reChange / niVals[-1] + if 0 < rr <= 1: + retentionRate = rr + sgr = roe * retentionRate * 100 + if salesGr is not None and sgr > 0: + gap = (salesGr or 0) - sgr + parts.append(f"SGR(지속가능성장률) {sgr:.1f}%") + if gap > 10: + parts.append("실제 매출 성장률이 SGR을 크게 상회 — 외부 자금 조달 필요 구간") + elif gap < -10: + parts.append("SGR 대비 저성장 — 잉여 자본 활용 여력 존재") + + # 매출 vs 이익 3년+ 성장률 괴리 패턴 + salesAll = [v for v in sales if v is not None] + opAll = [v for v in op if v is not None] + if len(salesAll) >= 3 and len(opAll) >= 3: + salesCagr = ((salesAll[-1] / salesAll[0]) ** (1 / (len(salesAll) - 1)) - 1) * 100 if salesAll[0] > 0 else None + opCagr = ((opAll[-1] / opAll[0]) ** (1 / (len(opAll) - 1)) - 1) * 100 if opAll[0] > 0 else None + if salesCagr is not None and opCagr is not None: + parts.append(f"매출 CAGR {salesCagr:.1f}% / 영업이익 CAGR {opCagr:.1f}%") + + # 부문별 기여 (segments) + segDf = inp.segmentsDf + if segDf is not None: + try: + segParts = _analyzeSegmentGrowth(segDf) + if segParts: + parts.extend(segParts) + except (AttributeError, ValueError, KeyError): + pass + + body = ". ".join(parts) + "." + severity = "positive" if (salesGr or 0) > 5 else "neutral" if (salesGr or 0) > -5 else "negative" + return NarrativeParagraph(dimension="growth", title="성장의 질", body=body, severity=severity) + + +def _analyzeSegmentGrowth(segDf: object) -> list[str]: + """segments DataFrame에서 부문별 성장 기여 분석.""" + import polars as pl + + if not isinstance(segDf, pl.DataFrame) or segDf.is_empty(): + return [] + + cols = segDf.columns + # 숫자 컬럼 = 기간 (연도) + numCols = [c for c in cols if c not in ("부문", "segment", "항목", "구분") and not c.startswith("__")] + if len(numCols) < 2: + return [] + + # 마지막 2개 기간 + latestCol = numCols[-1] + prevCol = numCols[-2] + nameCol = cols[0] # 첫 컬럼 = 부문명 + + parts: list[str] = [] + rows = segDf.to_dicts() + segments: list[dict] = [] + totalLatest = 0.0 + + for row in rows: + name = row.get(nameCol, "") + if not name or "합계" in str(name) or "소계" in str(name): + continue + latestVal = row.get(latestCol) + prevVal = row.get(prevCol) + if latestVal is not None and isinstance(latestVal, (int, float)): + totalLatest += abs(latestVal) + segments.append({"name": name, "latest": latestVal, "prev": prevVal}) + + if not segments or totalLatest == 0: + return [] + + # 비중 계산 + 성장률 + topSegments: list[str] = [] + for seg in sorted(segments, key=lambda s: abs(s["latest"]), reverse=True)[:3]: + share = abs(seg["latest"]) / totalLatest * 100 + gr = None + if seg["prev"] is not None and isinstance(seg["prev"], (int, float)) and seg["prev"] != 0: + gr = (seg["latest"] - seg["prev"]) / abs(seg["prev"]) * 100 + grStr = f" {_pctChange(gr)}" if gr is not None else "" + topSegments.append(f"{seg['name']} {share:.0f}%{grStr}") + + if topSegments: + parts.append(f"부문별: {', '.join(topSegments)}") + + # 집중도 경고 + if segments: + maxShare = max(abs(s["latest"]) / totalLatest * 100 for s in segments) + if maxShare > 70: + topName = max(segments, key=lambda s: abs(s["latest"]))["name"] + parts.append(f"{topName} 비중 {maxShare:.0f}% — 단일 부문 의존도 높음") + + return parts + + +def _analyzeCashflowQuality(inp: _Input) -> NarrativeParagraph | None: + """현금흐름의 질 — OCF/NI + CAPEX + FCF.""" + eq = inp.earningsQuality + ocf = _getVals(inp.aSeries, "CF", "operating_cashflow") + sales = _getVals(inp.aSeries, "IS", "sales") + + # capex: 여러 가능한 키 + capex = _getVals(inp.aSeries, "CF", "capital_expenditure") + if not capex: + capex = _getVals(inp.aSeries, "CF", "acquisition_of_property_plant_and_equipment") + + parts: list[str] = [] + + # OCF/NI + if eq and eq.cfToNi is not None: + ratio = eq.cfToNi + if ratio > 1.2: + parts.append(f"OCF/순이익 {ratio:.1f}배로 이익의 질 양호 — 현금 뒷받침 충분") + elif ratio > 0.8: + parts.append(f"OCF/순이익 {ratio:.1f}배로 보통 수준") + elif ratio > 0: + parts.append(f"OCF/순이익 {ratio:.1f}배로 현금 뒷받침 미흡") + else: + parts.append(f"OCF/순이익 {ratio:.1f}배 — 영업현금흐름 적자") + + # CAPEX 비율 + if capex and sales: + cleanOcf = [(o, s) for o, s in zip(capex, sales) if o is not None and s is not None and s != 0] + if cleanOcf: + latestCapex, latestSales = cleanOcf[-1] + capexRatio = abs(latestCapex) / latestSales * 100 + parts.append(f"CAPEX/매출 {capexRatio:.1f}%") + + # FCF + if ocf and capex: + cleanPairs = [(o, c) for o, c in zip(ocf, capex) if o is not None and c is not None] + if cleanPairs: + latestOcf, latestCapex = cleanPairs[-1] + fcf = latestOcf - abs(latestCapex) + if fcf > 0: + parts.append(f"FCF 양호({fcf / 1e8:,.0f}억)") + else: + parts.append(f"FCF 적자({fcf / 1e8:,.0f}억) — 투자 부담") + + if not parts: + return None + body = ". ".join(parts) + "." + cfSev = "positive" + if eq and eq.cfToNi is not None: + if eq.cfToNi < 0.5: + cfSev = "negative" + elif eq.cfToNi < 0.8: + cfSev = "warning" + elif eq.cfToNi < 1.2: + cfSev = "neutral" + return NarrativeParagraph(dimension="cashflow", title="현금흐름의 질", body=body, severity=cfSev) + + +def _analyzeEfficiency(inp: _Input) -> NarrativeParagraph | None: + """운전자본 효율성 — DSO/DIO/DPO/CCC 추세.""" + sales = _getVals(inp.aSeries, "IS", "sales") + cogs = _getVals(inp.aSeries, "IS", "cost_of_sales") + receivables = _getVals(inp.aSeries, "BS", "trade_receivable") + if not receivables: + receivables = _getVals(inp.aSeries, "BS", "trade_and_other_receivables") + inventories = _getVals(inp.aSeries, "BS", "inventories") + payables = _getVals(inp.aSeries, "BS", "trade_payable") + if not payables: + payables = _getVals(inp.aSeries, "BS", "trade_and_other_payables") + + if not sales or len(sales) < 2: + return None + + # DSO 계산 + dsoList: list[float | None] = [] + for r, s in zip(receivables, sales): + dsoList.append(r / (s / 365) if r is not None and s is not None and s != 0 else None) + + # DIO 계산 + dioList: list[float | None] = [] + for inv, c in zip(inventories, cogs): + dioList.append(inv / (c / 365) if inv is not None and c is not None and c != 0 else None) + + # DPO 계산 + dpoList: list[float | None] = [] + for p, c in zip(payables, cogs): + dpoList.append(p / (c / 365) if p is not None and c is not None and c != 0 else None) + + # CCC + cccList: list[float | None] = [] + for dso, dio, dpo in zip(dsoList, dioList, dpoList): + if all(v is not None for v in (dso, dio, dpo)): + cccList.append(dso + dio - dpo) + else: + cccList.append(None) + + parts: list[str] = [] + + # DSO 추세 + cleanDso = [v for v in dsoList if v is not None] + if len(cleanDso) >= 2: + dsoDiff = cleanDso[-1] - cleanDso[-2] + if abs(dsoDiff) > 3: + label = "악화" if dsoDiff > 0 else "개선" + parts.append(f"DSO {cleanDso[-2]:.0f}일→{cleanDso[-1]:.0f}일({label})") + else: + parts.append(f"DSO {cleanDso[-1]:.0f}일(안정)") + + # DIO 추세 (개별 표시) + cleanDio = [v for v in dioList if v is not None] + if len(cleanDio) >= 2: + dioDiff = cleanDio[-1] - cleanDio[-2] + if abs(dioDiff) > 3: + label = "재고부담 증가" if dioDiff > 0 else "재고 효율화" + parts.append(f"DIO {cleanDio[-2]:.0f}일→{cleanDio[-1]:.0f}일({label})") + else: + parts.append(f"DIO {cleanDio[-1]:.0f}일(안정)") + + # DPO 추세 (개별 표시) + cleanDpo = [v for v in dpoList if v is not None] + if len(cleanDpo) >= 2: + dpoDiff = cleanDpo[-1] - cleanDpo[-2] + if abs(dpoDiff) > 3: + label = "지급 지연" if dpoDiff > 0 else "조기 지급" + parts.append(f"DPO {cleanDpo[-2]:.0f}일→{cleanDpo[-1]:.0f}일({label})") + + # CCC 추세 + cleanCcc = [v for v in cccList if v is not None] + if len(cleanCcc) >= 2: + cccDiff = cleanCcc[-1] - cleanCcc[-2] + if abs(cccDiff) > 5: + label = "연장" if cccDiff > 0 else "단축" + parts.append(f"CCC {cleanCcc[-2]:.0f}일→{cleanCcc[-1]:.0f}일({cccDiff:+.0f}일 {label})") + # CCC 마이너스 해석 (선수금·선결제 비즈니스) + if cleanCcc[-1] < 0: + parts.append(f"CCC 음수({cleanCcc[-1]:.0f}일) — 매입채무 지급 전 현금 회수, 운전자본 우위 구조") + elif len(cleanCcc) == 1 and cleanCcc[0] < 0: + parts.append(f"CCC 음수({cleanCcc[0]:.0f}일) — 운전자본 우위 구조") + + # 운전자본 절대치 추이 + salesClean = [v for v in sales if v is not None] + arClean = [v for v in receivables if v is not None] if receivables else [] + invClean = [v for v in inventories if v is not None] if inventories else [] + apClean = [v for v in payables if v is not None] if payables else [] + if arClean and invClean and apClean: + nwc = (arClean[-1] + invClean[-1]) - apClean[-1] + nwcRatio = nwc / salesClean[-1] * 100 if salesClean and salesClean[-1] > 0 else None + if nwcRatio is not None: + parts.append(f"순운전자본/매출 {nwcRatio:.1f}%") + + # 매출 증가 + CCC 악화 교차 + if len(salesClean) >= 2 and salesClean[-1] > salesClean[-2] and cleanCcc and len(cleanCcc) >= 2: + if cleanCcc[-1] > cleanCcc[-2]: + parts.append("매출 증가에도 운전자본 부담 확대") + + if not parts: + return None + body = ". ".join(parts) + "." + severity = "warning" if any("악화" in p or "확대" in p or "부담 증가" in p for p in parts) else "neutral" + return NarrativeParagraph(dimension="efficiency", title="운전자본 효율성", body=body, severity=severity) + + +def _analyzeSegments(inp: _Input) -> NarrativeParagraph | None: + """사업부문 분석 — 비중 + 집중도.""" + import polars as pl + + segDf = inp.segmentsDf + if segDf is None or not isinstance(segDf, pl.DataFrame) or segDf.is_empty(): + return None + + cols = segDf.columns + numCols = [c for c in cols if c not in ("부문", "segment", "항목", "구분") and not c.startswith("__")] + if not numCols: + return None + + latestCol = numCols[-1] + nameCol = cols[0] + rows = segDf.to_dicts() + + segments: list[dict] = [] + total = 0.0 + for row in rows: + name = row.get(nameCol, "") + if not name or "합계" in str(name) or "소계" in str(name): + continue + val = row.get(latestCol) + if val is not None and isinstance(val, (int, float)): + total += abs(val) + segments.append({"name": name, "value": val}) + + if not segments or total == 0: + return None + + parts: list[str] = [] + # 비중 상위 3개 + sorted_segs = sorted(segments, key=lambda s: abs(s["value"]), reverse=True) + topParts = [] + for seg in sorted_segs[:4]: + share = abs(seg["value"]) / total * 100 + topParts.append(f"{seg['name']} {share:.0f}%") + parts.append(f"사업구성: {', '.join(topParts)}") + + # 집중도 + maxShare = max(abs(s["value"]) / total * 100 for s in segments) + if maxShare > 70: + topName = sorted_segs[0]["name"] + parts.append(f"{topName} 비중 {maxShare:.0f}% — 단일 부문 의존 구조") + elif maxShare < 30 and len(segments) >= 3: + parts.append("사업 다각화 구조") + + # 기간별 비중 변화 (2개 이상 기간) + if len(numCols) >= 2: + prevCol = numCols[-2] + prevTotal = 0.0 + prevMap: dict[str, float] = {} + for row in rows: + name = row.get(nameCol, "") + if not name or "합계" in str(name): + continue + val = row.get(prevCol) + if val is not None and isinstance(val, (int, float)): + prevTotal += abs(val) + prevMap[name] = val + + if prevTotal > 0: + bigShift = [] + for seg in sorted_segs[:3]: + currShare = abs(seg["value"]) / total * 100 + prevVal = prevMap.get(seg["name"]) + if prevVal is not None: + prevShare = abs(prevVal) / prevTotal * 100 + shiftPp = currShare - prevShare + if abs(shiftPp) > 3: + bigShift.append(f"{seg['name']} {_pp(shiftPp)}") + if bigShift: + parts.append(f"비중 변화: {', '.join(bigShift)}") + + body = ". ".join(parts) + "." + severity = "warning" if maxShare > 70 else "neutral" + return NarrativeParagraph(dimension="segment", title="사업부문 분석", body=body, severity=severity) + + +def _analyzeSectorRelative(inp: _Input) -> NarrativeParagraph | None: + """섹터 상대 포지셔닝 — PER/PBR vs 섹터 + ROE 대비.""" + md = inp.marketData + sp = inp.sectorParams + bench = inp.sectorBenchmark + if md is None or sp is None: + return None + + parts: list[str] = [] + + # PER 비교 + perMultiple = getattr(sp, "perMultiple", None) + if md.per is not None and perMultiple is not None and perMultiple > 0: + discount = (md.per - perMultiple) / perMultiple * 100 + label = "할인" if discount < 0 else "할증" + sectorLabel = getattr(sp, "label", "업종") + parts.append(f"PER {md.per:.1f}배 vs {sectorLabel} 평균 {perMultiple:.1f}배 = {abs(discount):.0f}% {label}") + + # PBR 비교 + pbrMultiple = getattr(sp, "pbrMultiple", None) + if md.pbr is not None and pbrMultiple is not None and pbrMultiple > 0: + discount = (md.pbr - pbrMultiple) / pbrMultiple * 100 + label = "할인" if discount < 0 else "할증" + parts.append(f"PBR {md.pbr:.2f}배 vs 업종 {pbrMultiple:.1f}배({abs(discount):.0f}% {label})") + + # ROE vs 업종 대비 밸류에이션 정당성 + if bench is not None and md.per is not None and perMultiple is not None: + roeMedian = getattr(bench, "roeMedian", None) + dp = inp.dupont + roeLast = None + if dp and dp.roe: + roeLast = next((v for v in reversed(dp.roe) if v is not None), None) + if roeLast is not None: + roeLast = roeLast * 100 + + if roeLast is not None and roeMedian is not None: + perDiscount = md.per < perMultiple + roeAbove = roeLast > roeMedian + if perDiscount and roeAbove: + parts.append(f"ROE({roeLast:.1f}%)가 업종 중앙값({roeMedian:.1f}%) 상회하나 PER 할인 — 저평가 가능성") + elif not perDiscount and not roeAbove: + parts.append(f"ROE({roeLast:.1f}%)가 업종 중앙값({roeMedian:.1f}%) 하회하나 PER 할증 — 프리미엄 과도") + + if not parts: + return None + body = ". ".join(parts) + "." + # PER 할인 + ROE 우수 → positive + severity = "neutral" + if md.per is not None and perMultiple is not None: + if md.per < perMultiple * 0.8: + severity = "positive" + elif md.per > perMultiple * 1.2: + severity = "negative" + return NarrativeParagraph(dimension="sectorRelative", title="섹터 상대 포지셔닝", body=body, severity=severity) + + +# ══════════════════════════════════════ +# BS/CF 심층 + 3표 연결 (Stage 1 v4) +# ══════════════════════════════════════ + + +def _analyzeBalanceSheetStructure(inp: _Input) -> NarrativeParagraph | None: + """자산구성 분석 — 유동/비유동 비중, 유형 vs 무형, 추세.""" + totalAssets = _getVals(inp.aSeries, "BS", "total_assets") + currentAssets = _getVals(inp.aSeries, "BS", "total_current_assets") + nonCurrentAssets = _getVals(inp.aSeries, "BS", "total_non_current_assets") + tangible = _getVals(inp.aSeries, "BS", "property_plant_and_equipment") + intangible = _getVals(inp.aSeries, "BS", "intangible_assets") + if not totalAssets or len(totalAssets) < 2: + return None + + ta = [v for v in totalAssets if v is not None] + if len(ta) < 2 or ta[-1] == 0: + return None + + parts: list[str] = [] + + # 자산 규모 변동 + taGr = (ta[-1] - ta[-2]) / abs(ta[-2]) * 100 + parts.append(f"총자산 {ta[-1] / 1e8:,.0f}억(전년 대비 {taGr:+.1f}%)") + + # 유동/비유동 비중 + ca = _lastN(currentAssets, 1) + nca = _lastN(nonCurrentAssets, 1) + if ca and nca and ta[-1] > 0: + caRatio = ca[-1] / ta[-1] * 100 + ncaRatio = nca[-1] / ta[-1] * 100 + parts.append(f"유동 {caRatio:.0f}% / 비유동 {ncaRatio:.0f}%") + + # 유형 vs 무형 + tanClean = _lastN(tangible, 1) + intClean = _lastN(intangible, 1) + if tanClean and ta[-1] > 0: + tanRatio = tanClean[-1] / ta[-1] * 100 + intRatio = intClean[-1] / ta[-1] * 100 if intClean else 0 + if tanRatio > 30: + parts.append(f"유형자산 비중 {tanRatio:.0f}% — 자본집약적 구조") + if intRatio > 15: + parts.append(f"무형자산 비중 {intRatio:.0f}% — 지식자산 기반") + + # 자산 성장 vs 매출 성장 교차 + sales = _getVals(inp.aSeries, "IS", "sales") + salesClean = [v for v in sales if v is not None] if sales else [] + if len(salesClean) >= 2 and salesClean[-2] != 0: + salesGr = (salesClean[-1] - salesClean[-2]) / abs(salesClean[-2]) * 100 + if taGr > salesGr + 10: + parts.append("자산증가율이 매출증가율 상회 — 자산효율 하락 주의") + + # 감가상각률 변동 (Lens 4 — 이익의 질) + depreciation = _getVals(inp.aSeries, "CF", "depreciation_and_amortization") + if not depreciation: + depreciation = _getVals(inp.aSeries, "IS", "depreciation") + tanVals = [v for v in tangible if v is not None] if tangible else [] + depVals = [v for v in depreciation if v is not None] if depreciation else [] + if len(tanVals) >= 2 and len(depVals) >= 2: + depRatePrev = abs(depVals[-2]) / tanVals[-2] * 100 if tanVals[-2] > 0 else None + depRateCurr = abs(depVals[-1]) / tanVals[-1] * 100 if tanVals[-1] > 0 else None + if depRatePrev is not None and depRateCurr is not None: + depDiff = depRateCurr - depRatePrev + if abs(depDiff) > 2: + label = "감가상각 강화" if depDiff > 0 else "감가상각 완화(이익 부풀리기 가능성)" + parts.append(f"감가상각률 {depRateCurr:.1f}%({_pp(depDiff)}, {label})") + + # 이연법인세 추세 (BS) + deferredTax = _getVals(inp.aSeries, "BS", "deferred_tax_liabilities") + if not deferredTax: + deferredTax = _getVals(inp.aSeries, "BS", "deferred_tax_assets") + dtVals = [v for v in deferredTax if v is not None] if deferredTax else [] + if len(dtVals) >= 2 and ta[-1] > 0: + dtRatioCurr = dtVals[-1] / ta[-1] * 100 + dtRatioPrev = dtVals[-2] / ta[-2] * 100 if len(ta) >= 2 and ta[-2] > 0 else None + if dtRatioPrev is not None and abs(dtRatioCurr - dtRatioPrev) > 0.5: + dtDiff = dtRatioCurr - dtRatioPrev + parts.append(f"이연법인세 비중 {dtRatioCurr:.1f}%({_pp(dtDiff)}) — 세무·회계 차이 변동 주시") + + if not parts: + return None + body = ". ".join(parts) + "." + severity = "neutral" + if taGr > 20: + severity = "warning" + elif taGr < -10: + severity = "negative" + return NarrativeParagraph( + dimension="bsStructure", + title="자산구성 분석", + body=body, + severity=severity, + ) + + +def _analyzeDebtStructure(inp: _Input) -> NarrativeParagraph | None: + """부채구조 분석 — 부채비율, 차입금 의존도, 이자보상배율.""" + totalLiab = _getVals(inp.aSeries, "BS", "total_liabilities") + totalEquity = _getVals(inp.aSeries, "BS", "total_equity") + shortBorrow = _getVals(inp.aSeries, "BS", "short_term_borrowings") + longBorrow = _getVals(inp.aSeries, "BS", "long_term_borrowings") + totalAssets = _getVals(inp.aSeries, "BS", "total_assets") + op = _getVals(inp.aSeries, "IS", "operating_profit") + interest = _getVals(inp.aSeries, "IS", "interest_expense") + if not interest: + interest = _getVals(inp.aSeries, "IS", "finance_costs") + + if not totalLiab or len(totalLiab) < 2: + return None + + parts: list[str] = [] + + # 부채비율 추세 + debtRatioList = [] + for tl, te in zip(totalLiab, totalEquity): + debtRatioList.append(tl / te * 100 if tl is not None and te is not None and te != 0 else None) + cleanDr = [v for v in debtRatioList if v is not None] + if len(cleanDr) >= 2: + drDiff = cleanDr[-1] - cleanDr[-2] + direction, count = _consecutiveDirection(debtRatioList) + if count >= 3: + label = "상승" if direction == "up" else "하락" + parts.append(f"부채비율 {count}년 연속 {label}({cleanDr[-1]:.0f}%)") + else: + parts.append(f"부채비율 {cleanDr[-1]:.0f}%(전년 {cleanDr[-2]:.0f}%, {_pp(drDiff)})") + + # 차입금 의존도 + if shortBorrow and longBorrow and totalAssets: + latestShort = _lastN(shortBorrow, 1) + latestLong = _lastN(longBorrow, 1) + latestTa = _lastN(totalAssets, 1) + if latestShort and latestLong and latestTa and latestTa[-1] > 0: + borrowTotal = (latestShort[-1] or 0) + (latestLong[-1] or 0) + borrowDep = borrowTotal / latestTa[-1] * 100 + parts.append(f"차입금 의존도 {borrowDep:.1f}%") + if borrowDep > 30: + parts.append("차입금 의존도 과다 — 금리 변동 리스크") + + # 이자보상배율 + latestIcr = None + if interest and op: + pairs = [(o, i) for o, i in zip(op, interest) if o is not None and i is not None and i != 0] + if pairs: + latestOp, latestInt = pairs[-1] + latestIcr = latestOp / abs(latestInt) + parts.append(f"이자보상배율 {latestIcr:.1f}배") + if latestIcr < 1.5: + parts.append("이자보상배율 위험 수준 — 이자비용 충당 불안") + + # Net Debt/EBITDA (Lens 10 — 부채상환능력 핵심) + cash = _getVals(inp.aSeries, "BS", "cash_and_cash_equivalents") + depreciation = _getVals(inp.aSeries, "CF", "depreciation_and_amortization") + if not depreciation: + depreciation = _getVals(inp.aSeries, "IS", "depreciation") + cashLast = next((v for v in reversed(cash) if v is not None), None) if cash else None + opLast = next((v for v in reversed(op) if v is not None), None) if op else None + depLast = next((v for v in reversed(depreciation) if v is not None), None) if depreciation else None + borrowLast = None + if shortBorrow and longBorrow: + sb = next((v for v in reversed(shortBorrow) if v is not None), 0) + lb = next((v for v in reversed(longBorrow) if v is not None), 0) + borrowLast = (sb or 0) + (lb or 0) + if borrowLast is not None and cashLast is not None and opLast is not None and depLast is not None: + netDebt = borrowLast - cashLast + ebitda = opLast + abs(depLast) + if ebitda > 0: + ndEbitda = netDebt / ebitda + parts.append(f"Net Debt/EBITDA {ndEbitda:.1f}배") + if ndEbitda > 4: + parts.append("Net Debt/EBITDA 4배 초과 — 부채 부담 과중") + elif ndEbitda < 0: + parts.append("순현금 상태 — 실질 무차입 경영") + + # CF/Debt (OCF/총부채) + ocf = _getVals(inp.aSeries, "CF", "operating_cashflow") + ocfLast = next((v for v in reversed(ocf) if v is not None), None) if ocf else None + liabLast = next((v for v in reversed(totalLiab) if v is not None), None) + if ocfLast is not None and liabLast is not None and liabLast > 0: + cfDebt = ocfLast / liabLast * 100 + parts.append(f"OCF/총부채 {cfDebt:.1f}%") + if cfDebt < 10: + parts.append("현금흐름 대비 부채 과중 — 상환여력 미흡") + + # Fixed Charge Coverage (영업이익 / (이자비용 + 리스비용)) + # 리스비용 별도 계정 없으면 이자보상배율로 대체 (이미 계산) + + if not parts: + return None + body = ". ".join(parts) + "." + severity = "neutral" + if cleanDr and cleanDr[-1] > 200: + severity = "negative" + elif cleanDr and cleanDr[-1] < 50: + severity = "positive" + return NarrativeParagraph( + dimension="debtStructure", + title="부채구조 분석", + body=body, + severity=severity, + ) + + +def _analyzeLiquidity(inp: _Input) -> NarrativeParagraph | None: + """유동성 분석 — 유동비율, 당좌비율, 현금 대비 단기차입금.""" + currentAssets = _getVals(inp.aSeries, "BS", "total_current_assets") + currentLiab = _getVals(inp.aSeries, "BS", "total_current_liabilities") + inventories = _getVals(inp.aSeries, "BS", "inventories") + cash = _getVals(inp.aSeries, "BS", "cash_and_cash_equivalents") + shortBorrow = _getVals(inp.aSeries, "BS", "short_term_borrowings") + + if not currentAssets or not currentLiab or len(currentAssets) < 2: + return None + + parts: list[str] = [] + + # 유동비율 추세 + crList = [] + for ca, cl in zip(currentAssets, currentLiab): + crList.append(ca / cl * 100 if ca is not None and cl is not None and cl != 0 else None) + cleanCr = [v for v in crList if v is not None] + if len(cleanCr) >= 2: + crDiff = cleanCr[-1] - cleanCr[-2] + parts.append(f"유동비율 {cleanCr[-1]:.0f}%(전년 {cleanCr[-2]:.0f}%, {_pp(crDiff)})") + + # 당좌비율 + if inventories: + qrList = [] + for ca, cl, inv in zip(currentAssets, currentLiab, inventories): + if all(v is not None for v in (ca, cl, inv)) and cl != 0: + qrList.append((ca - inv) / cl * 100) + else: + qrList.append(None) + cleanQr = [v for v in qrList if v is not None] + if cleanQr: + parts.append(f"당좌비율 {cleanQr[-1]:.0f}%") + + # 현금 대비 단기차입금 + if cash and shortBorrow: + cashClean = _lastN(cash, 1) + sbClean = _lastN(shortBorrow, 1) + if cashClean and sbClean and sbClean[-1] > 0: + cashCover = cashClean[-1] / sbClean[-1] + parts.append(f"현금/단기차입금 {cashCover:.1f}배") + if cashCover < 0.5: + parts.append("단기차입금 대비 현금 부족 — 유동성 리스크") + + if not parts: + return None + body = ". ".join(parts) + "." + severity = "neutral" + if cleanCr and cleanCr[-1] < 100: + severity = "negative" + elif cleanCr and cleanCr[-1] > 200: + severity = "positive" + return NarrativeParagraph( + dimension="liquidity", + title="유동성 분석", + body=body, + severity=severity, + ) + + +def _analyzeCapitalChange(inp: _Input) -> NarrativeParagraph | None: + """자본변동 분석 — 이익잉여금 축적, 자사주/배당, 유상증자.""" + totalEquity = _getVals(inp.aSeries, "BS", "total_equity") + retainedEarnings = _getVals(inp.aSeries, "BS", "retained_earnings") + shareCapital = _getVals(inp.aSeries, "BS", "share_capital") + if not shareCapital: + shareCapital = _getVals(inp.aSeries, "BS", "capital_stock") + treasuryStock = _getVals(inp.aSeries, "BS", "treasury_stock") + if not treasuryStock: + treasuryStock = _getVals(inp.aSeries, "BS", "treasury_shares") + + if not totalEquity or len(totalEquity) < 2: + return None + + teClean = [v for v in totalEquity if v is not None] + if len(teClean) < 2: + return None + + parts: list[str] = [] + + # 자본 증감 + teGr = (teClean[-1] - teClean[-2]) / abs(teClean[-2]) * 100 + parts.append(f"자기자본 {teClean[-1] / 1e8:,.0f}억(전년 대비 {teGr:+.1f}%)") + + # 이익잉여금 추세 + reClean = [v for v in retainedEarnings if v is not None] if retainedEarnings else [] + if len(reClean) >= 2: + reGr = (reClean[-1] - reClean[-2]) / abs(reClean[-2]) * 100 if reClean[-2] != 0 else 0 + parts.append(f"이익잉여금 {reGr:+.1f}% 변동") + if reGr < -5: + parts.append("이익잉여금 감소 — 배당/자사주 또는 결손 영향") + + # 유상증자 감지 + scClean = [v for v in shareCapital if v is not None] if shareCapital else [] + if len(scClean) >= 2 and scClean[-2] != 0: + scGr = (scClean[-1] - scClean[-2]) / abs(scClean[-2]) * 100 + if scGr > 5: + parts.append(f"자본금 {scGr:+.1f}% 증가 — 유상증자 가능성") + + # 자사주 변동 + tsClean = [v for v in treasuryStock if v is not None] if treasuryStock else [] + if len(tsClean) >= 2: + tsDiff = tsClean[-1] - tsClean[-2] + if tsDiff < -1e9: # 10억 이상 증가 (자사주는 음수이므로 감소 = 매입) + parts.append("자사주 매입 확대 — 주주환원 강화 시그널") + elif tsDiff > 1e9: + parts.append("자사주 처분 — 희석 가능성") + + # Shareholder Yield (배당 + 자사주 매입) / 시가총액 (Lens 6) + ni = _getVals(inp.aSeries, "IS", "net_profit") + niClean = [v for v in ni if v is not None] if ni else [] + dividendPaid = _getVals(inp.aSeries, "CF", "dividends_paid") + if not dividendPaid: + dividendPaid = _getVals(inp.aSeries, "CF", "dividend_paid") + divClean = [v for v in dividendPaid if v is not None] if dividendPaid else [] + if divClean and niClean: + payoutRatio = abs(divClean[-1]) / niClean[-1] * 100 if niClean[-1] and niClean[-1] > 0 else None + if payoutRatio is not None: + parts.append(f"배당성향 {payoutRatio:.0f}%") + if payoutRatio > 80: + parts.append("배당성향 과다 — 재투자 여력 부족 가능") + elif payoutRatio < 10 and niClean[-1] > 0: + parts.append("배당성향 매우 낮음 — 내부유보 위주 경영") + + # Owner Earnings 추정 (= NI + D&A - maintenance CAPEX) + depreciation = _getVals(inp.aSeries, "CF", "depreciation_and_amortization") + if not depreciation: + depreciation = _getVals(inp.aSeries, "IS", "depreciation") + capex = _getVals(inp.aSeries, "CF", "capital_expenditure") + if not capex: + capex = _getVals(inp.aSeries, "CF", "acquisition_of_property_plant_and_equipment") + depAll = [v for v in depreciation if v is not None] if depreciation else [] + capexAll = [v for v in capex if v is not None] if capex else [] + if niClean and depAll and capexAll: + # maintenance capex ≈ 감가상각 수준으로 추정 + ownerEarnings = niClean[-1] + abs(depAll[-1]) - abs(capexAll[-1]) + if niClean[-1] != 0: + oeRatio = ownerEarnings / niClean[-1] + if abs(oeRatio) > 0.1: + parts.append(f"Owner Earnings/NI {oeRatio:.2f}") + if oeRatio > 1.3: + parts.append("유지보수 CAPEX 이하 투자 — 현금 창출 우수") + elif oeRatio < 0.3: + parts.append("성장투자로 현금 대부분 소진 — 주주 환원 여력 제한적") + + if not parts: + return None + body = ". ".join(parts) + "." + severity = "positive" if teGr > 10 else "neutral" if teGr > 0 else "negative" + return NarrativeParagraph( + dimension="capitalChange", + title="자본변동 분석", + body=body, + severity=severity, + ) + + +def _analyzeCashflowDeep(inp: _Input) -> NarrativeParagraph | None: + """현금흐름 심층 — 투자CF/재무CF 분해, FCF 추세, 배당 여력.""" + ocf = _getVals(inp.aSeries, "CF", "operating_cashflow") + icf = _getVals(inp.aSeries, "CF", "investing_cashflow") + if not icf: + icf = _getVals(inp.aSeries, "CF", "investing_activities") + fcf_cf = _getVals(inp.aSeries, "CF", "financing_cashflow") + if not fcf_cf: + fcf_cf = _getVals(inp.aSeries, "CF", "financing_activities") + capex = _getVals(inp.aSeries, "CF", "capital_expenditure") + if not capex: + capex = _getVals(inp.aSeries, "CF", "acquisition_of_property_plant_and_equipment") + dividend = _getVals(inp.aSeries, "CF", "dividends_paid") + + if not ocf or len(ocf) < 2: + return None + + parts: list[str] = [] + + # OCF/NI 비율 (기존 cashflowQuality 기능 포함) + eq = inp.earningsQuality + if eq and eq.cfToNi is not None: + ratio = eq.cfToNi + if ratio > 1.2: + parts.append(f"OCF/순이익 {ratio:.1f}배 — 현금 뒷받침 우수") + elif ratio > 0.8: + parts.append(f"OCF/순이익 {ratio:.1f}배 — 보통 수준") + elif ratio > 0: + parts.append(f"OCF/순이익 {ratio:.1f}배 — 현금 뒷받침 미흡") + else: + parts.append(f"OCF/순이익 {ratio:.1f}배 — 영업현금흐름 적자") + + # OCF 추세 + ocfClean = [v for v in ocf if v is not None] + if len(ocfClean) >= 2: + trendDir = _trend(ocf) + if trendDir == "improving": + parts.append("영업CF 지속 개선 추세") + elif trendDir == "deteriorating": + parts.append("영업CF 지속 악화 추세 — 현금창출 능력 점검 필요") + + # FCF 추세 (OCF - CAPEX) + if capex: + fcfList = [] + for o, c in zip(ocf, capex): + if o is not None and c is not None: + fcfList.append(o - abs(c)) + else: + fcfList.append(None) + fcfClean = [v for v in fcfList if v is not None] + if len(fcfClean) >= 2: + parts.append(f"FCF {fcfClean[-1] / 1e8:,.0f}억(전년 {fcfClean[-2] / 1e8:,.0f}억)") + if fcfClean[-1] < 0: + parts.append("FCF 적자 — 투자 부담 과다") + fcfTrend = _trend(fcfList) + if fcfTrend == "deteriorating" and fcfClean[-1] > 0: + parts.append("FCF 감소 추세 주의") + + # 배당 여력 (FCF / 배당) + if capex and dividend: + pairs = [(o, c, d) for o, c, d in zip(ocf, capex, dividend) if all(v is not None for v in (o, c, d))] + if pairs: + latestOcf, latestCapex, latestDiv = pairs[-1] + fcf = latestOcf - abs(latestCapex) + if latestDiv != 0: + divCover = fcf / abs(latestDiv) + parts.append(f"배당 커버리지(FCF/배당) {divCover:.1f}배") + if divCover < 1: + parts.append("FCF로 배당 충당 불가 — 배당 지속성 의문") + + # CF 라이프사이클 스테이지 판별 (Lens 5) + latestOcfVal = _lastN(ocf, 1) + latestIcfVal = _lastN(icf, 1) if icf else [] + latestFcfVal = _lastN(fcf_cf, 1) if fcf_cf else [] + if latestOcfVal and latestIcfVal and latestFcfVal: + oSign = latestOcfVal[-1] is not None and latestOcfVal[-1] > 0 + iSign = latestIcfVal[-1] is not None and latestIcfVal[-1] > 0 + fSign = latestFcfVal[-1] is not None and latestFcfVal[-1] > 0 + if oSign and not iSign and fSign: + parts.append("CF패턴 [+,-,+] 성장기 — 영업흑자, 투자확대, 외부조달") + elif oSign and not iSign and not fSign: + parts.append("CF패턴 [+,-,-] 성숙기 — 자체 현금으로 투자와 주주환원 병행") + elif not oSign and iSign and not fSign: + parts.append("CF패턴 [-,+,-] 쇠퇴기 — 영업적자, 자산매각, 부채상환") + elif not oSign and not iSign and fSign: + parts.append("CF패턴 [-,-,+] 도입기 — 적자, 투자중, 외부조달 의존") + elif not oSign and iSign and fSign: + parts.append("CF패턴 [-,+,+] 구조조정 — 자산매각 + 외부조달로 적자 보전") + + if not parts: + return None + body = ". ".join(parts) + "." + cfSev = "positive" + if eq and eq.cfToNi is not None: + if eq.cfToNi < 0.5: + cfSev = "negative" + elif eq.cfToNi < 0.8: + cfSev = "warning" + elif eq.cfToNi < 1.2: + cfSev = "neutral" + return NarrativeParagraph( + dimension="cashflowDeep", + title="현금흐름 심층분석", + body=body, + severity=cfSev, + ) + + +def _analyzeIsToCs(inp: _Input) -> NarrativeParagraph | None: + """3표 연결 — IS 순이익 vs CF OCF 괴리 분석.""" + ni = _getVals(inp.aSeries, "IS", "net_profit") + ocf = _getVals(inp.aSeries, "CF", "operating_cashflow") + depreciation = _getVals(inp.aSeries, "CF", "depreciation_and_amortization") + if not depreciation: + depreciation = _getVals(inp.aSeries, "IS", "depreciation") + + if not ni or not ocf or len(ni) < 2: + return None + + parts: list[str] = [] + + # OCF - NI 갭 추세 + gapList = [] + for n, o in zip(ni, ocf): + if n is not None and o is not None: + gapList.append(o - n) + else: + gapList.append(None) + + gapClean = [v for v in gapList if v is not None] + if len(gapClean) >= 2: + latestGap = gapClean[-1] + if abs(latestGap) > 1e9: # 10억 이상 차이 + direction = "초과" if latestGap > 0 else "부족" + parts.append(f"영업CF가 순이익 대비 {abs(latestGap) / 1e8:,.0f}억 {direction}") + + # OCF/NI 비율 추세 (시계열) + ratioList = [] + for n, o in zip(ni, ocf): + ratioList.append(o / n if n is not None and o is not None and n != 0 else None) + ratioClean = [v for v in ratioList if v is not None] + if len(ratioClean) >= 3: + rTrend = _trend(ratioList) + if rTrend == "deteriorating": + parts.append("OCF/순이익 비율 추세적 하락 — 이익의 질 저하 경고") + elif rTrend == "improving": + parts.append("OCF/순이익 비율 추세적 개선 — 이익의 질 향상") + + # 감가상각 기여 + if depreciation: + depClean = _lastN(depreciation, 2) + niClean = _lastN(ni, 2) + if len(depClean) >= 1 and len(niClean) >= 1 and niClean[-1] is not None and niClean[-1] != 0: + depToNi = abs(depClean[-1]) / abs(niClean[-1]) + if depToNi > 0.5: + parts.append(f"감가상각이 순이익의 {depToNi:.0%} — 비현금비용 기여 큼(OCF 양호 원인)") + + # Earnings Persistence (현금이익 비중, Lens 4) + # 현금이익 = OCF, 발생이익 = NI - OCF + niAll = [v for v in ni if v is not None] + ocfAll = [v for v in ocf if v is not None] + if len(niAll) >= 3 and len(ocfAll) >= 3: + cashEarningsRatios = [] + for n, o in zip(niAll, ocfAll): + if n != 0: + cashEarningsRatios.append(o / n) + if len(cashEarningsRatios) >= 3: + avgCer = sum(cashEarningsRatios) / len(cashEarningsRatios) + if avgCer > 1.2: + parts.append(f"평균 OCF/NI {avgCer:.2f} — 높은 이익의 질(Earnings Persistence 양호)") + elif avgCer < 0.5: + parts.append(f"평균 OCF/NI {avgCer:.2f} — 발생이익 의존도 높음(이익 지속성 취약)") + + # incomeQualityRatio (직전 기간) + if ratioClean: + latest = ratioClean[-1] + parts.append(f"이익품질비율(OCF/NI) {latest:.2f}") + if latest < 0: + parts.append("OCF 적자 — 이익의 현금 전환 실패") + elif latest > 2.0: + parts.append("OCF가 순이익의 2배 이상 — 비현금비용 또는 운전자본 환입 효과 큼") + + if not parts: + return None + body = ". ".join(parts) + "." + severity = "neutral" + if ratioClean and ratioClean[-1] < 0.5: + severity = "warning" + elif ratioClean and ratioClean[-1] > 1.5: + severity = "positive" + return NarrativeParagraph( + dimension="isToCs", + title="손익↔현금흐름 연결분석", + body=body, + severity=severity, + ) + + +def _analyzeCfToBs(inp: _Input) -> NarrativeParagraph | None: + """3표 연결 — CF 투자활동 → BS 유형자산, CF 재무활동 → BS 차입금.""" + capex = _getVals(inp.aSeries, "CF", "capital_expenditure") + if not capex: + capex = _getVals(inp.aSeries, "CF", "acquisition_of_property_plant_and_equipment") + depreciation = _getVals(inp.aSeries, "CF", "depreciation_and_amortization") + if not depreciation: + depreciation = _getVals(inp.aSeries, "IS", "depreciation") + tangible = _getVals(inp.aSeries, "BS", "property_plant_and_equipment") + shortBorrow = _getVals(inp.aSeries, "BS", "short_term_borrowings") + longBorrow = _getVals(inp.aSeries, "BS", "long_term_borrowings") + + parts: list[str] = [] + + # CAPEX vs 감가상각 (유지보수 투자 수준) + if capex and depreciation: + pairs = [(c, d) for c, d in zip(capex, depreciation) if c is not None and d is not None and d != 0] + if pairs: + latestCapex, latestDep = pairs[-1] + capexToDep = abs(latestCapex) / abs(latestDep) + parts.append(f"CAPEX/감가상각 {capexToDep:.1f}배") + if capexToDep > 2.0: + parts.append("감가상각의 2배 이상 투자 — 적극적 확장투자") + elif capexToDep < 0.8: + parts.append("감가상각 미만 투자 — 설비 노후화 리스크") + + # BS 유형자산 변동 vs CAPEX 규모 + if tangible and capex: + tanClean = [v for v in tangible if v is not None] + capexClean = [v for v in capex if v is not None] + if len(tanClean) >= 2 and capexClean: + tanChange = tanClean[-1] - tanClean[-2] + latestCapex = abs(capexClean[-1]) + if latestCapex > 0: + retentionRate = tanChange / latestCapex + if retentionRate < 0: + parts.append("CAPEX 투입에도 유형자산 순감소 — 처분 또는 감가상각 과대") + + # BS 차입금 변동 + if shortBorrow and longBorrow: + totalBorrowList = [] + for sb, lb in zip(shortBorrow, longBorrow): + if sb is not None and lb is not None: + totalBorrowList.append(sb + lb) + else: + totalBorrowList.append(None) + bClean = [v for v in totalBorrowList if v is not None] + if len(bClean) >= 2: + bGr = (bClean[-1] - bClean[-2]) / abs(bClean[-2]) * 100 if bClean[-2] != 0 else 0 + if abs(bGr) > 15: + direction = "증가" if bGr > 0 else "감소" + parts.append(f"총차입금 {bGr:+.1f}% {direction}") + + if not parts: + return None + body = ". ".join(parts) + "." + severity = "neutral" + return NarrativeParagraph( + dimension="cfToBs", + title="현금흐름↔재무상태 연결분석", + body=body, + severity=severity, + ) + + +def _analyzeIsToBs(inp: _Input) -> NarrativeParagraph | None: + """3표 연결 — IS 순이익 → BS 이익잉여금, 매출 → 매출채권/재고 비례.""" + ni = _getVals(inp.aSeries, "IS", "net_profit") + retainedEarnings = _getVals(inp.aSeries, "BS", "retained_earnings") + sales = _getVals(inp.aSeries, "IS", "sales") + receivables = _getVals(inp.aSeries, "BS", "trade_receivable") + if not receivables: + receivables = _getVals(inp.aSeries, "BS", "trade_and_other_receivables") + inventories = _getVals(inp.aSeries, "BS", "inventories") + + if not ni or not sales or len(ni) < 2: + return None + + parts: list[str] = [] + + # 순이익 → 이익잉여금 축적률 + if retainedEarnings: + reClean = [v for v in retainedEarnings if v is not None] + niClean = [v for v in ni if v is not None] + if len(reClean) >= 2 and len(niClean) >= 1 and niClean[-1] != 0: + reChange = reClean[-1] - reClean[-2] + retentionRate = reChange / niClean[-1] * 100 if niClean[-1] != 0 else 0 + if retentionRate > 0: + parts.append(f"순이익 중 {retentionRate:.0f}%가 잉여금으로 축적(배당성향 {100 - retentionRate:.0f}%)") + elif retentionRate < -20: + parts.append("이익잉여금 감소 — 순이익 대비 과도한 유출") + + # 매출 증가 vs 매출채권 증가 비례 여부 + salesClean = [v for v in sales if v is not None] + arClean = [v for v in receivables if v is not None] if receivables else [] + if len(salesClean) >= 2 and len(arClean) >= 2: + salesGr = (salesClean[-1] - salesClean[-2]) / abs(salesClean[-2]) * 100 if salesClean[-2] != 0 else 0 + arGr = (arClean[-1] - arClean[-2]) / abs(arClean[-2]) * 100 if arClean[-2] != 0 else 0 + gap = arGr - salesGr + if gap > 20: + parts.append( + f"매출채권 증가율({arGr:+.1f}%)이 매출 증가율({salesGr:+.1f}%)을 크게 상회 — 수금 악화 또는 채널 스터핑 주의" + ) + elif gap < -20 and arGr < 0: + parts.append("매출채권 감소율이 매출 대비 과도 — 공격적 회수 또는 매출 구조 변화") + + # 매출 증가 vs 재고 증가 비례 여부 + invClean = [v for v in inventories if v is not None] if inventories else [] + if len(salesClean) >= 2 and len(invClean) >= 2: + salesGr = (salesClean[-1] - salesClean[-2]) / abs(salesClean[-2]) * 100 if salesClean[-2] != 0 else 0 + invGr = (invClean[-1] - invClean[-2]) / abs(invClean[-2]) * 100 if invClean[-2] != 0 else 0 + gap = invGr - salesGr + if gap > 15: + parts.append(f"재고 증가율({invGr:+.1f}%)이 매출 증가율({salesGr:+.1f}%)을 상회 — 재고 리스크 주의") + + # Revenue Recognition Quality — 지수 분석 (Lens 4 강화) + # 매출채권지수(DSRI) = (AR/Sales)t / (AR/Sales)t-1 + if len(salesClean) >= 2 and len(arClean) >= 2 and salesClean[-2] > 0 and salesClean[-1] > 0: + arRatioPrev = arClean[-2] / salesClean[-2] + arRatioCurr = arClean[-1] / salesClean[-1] + if arRatioPrev > 0: + dsri = arRatioCurr / arRatioPrev + if dsri > 1.2: + parts.append(f"매출채권지수(DSRI) {dsri:.2f} — 매출 대비 매출채권 비정상 팽창, 매출 인식 공격성 주의") + elif dsri < 0.8: + parts.append(f"매출채권지수(DSRI) {dsri:.2f} — 매출 대비 회수 효율 개선") + + # 재고자산지수 = (Inv/Sales)t / (Inv/Sales)t-1 + if len(salesClean) >= 2 and len(invClean) >= 2 and salesClean[-2] > 0 and salesClean[-1] > 0: + invRatioPrev = invClean[-2] / salesClean[-2] + invRatioCurr = invClean[-1] / salesClean[-1] + if invRatioPrev > 0: + invIdx = invRatioCurr / invRatioPrev + if invIdx > 1.2: + parts.append(f"재고자산지수 {invIdx:.2f} — 매출 대비 재고 과잉 축적, 수요 둔화 또는 과잉 생산 신호") + + if not parts: + return None + body = ". ".join(parts) + "." + # severity 판단 + severity = "neutral" + for p in parts: + if "주의" in p or "과도" in p or "악화" in p: + severity = "warning" + break + return NarrativeParagraph( + dimension="isToBs", + title="손익↔재무상태 연결분석", + body=body, + severity=severity, + ) + + +def _analyzeEarningsManipulation(inp: _Input) -> NarrativeParagraph | None: + """Beneish M-Score 8변수 개별 분석 (Lens 4 — 이익의 질).""" + eq = inp.earningsQuality + if eq is None: + return None + mScore = getattr(eq, "beneishMScore", None) + if mScore is None: + return None + + parts: list[str] = [] + flagged = mScore < -1.78 # 조작 가능성 임계값 + + if mScore < -2.22: + parts.append(f"Beneish M-Score {mScore:.2f} — 이익 조작 가능성 낮음(양호)") + elif mScore < -1.78: + parts.append(f"Beneish M-Score {mScore:.2f} — 경계 구간(주의 관찰)") + else: + parts.append(f"Beneish M-Score {mScore:.2f} — 이익 조작 가능성 높음(경고)") + + # ratios에서 Beneish 세부 변수 추출 시도 (BeneishDetail이 있으면) + ratios = inp.ratios + if ratios is not None: + # ratios 객체에서 개별 Beneish 지표 확인 + bd = getattr(ratios, "beneishDetail", None) + if bd is not None and hasattr(bd, "dsri"): + warnings = [] + if bd.dsri is not None and bd.dsri > 1.465: + warnings.append(f"DSRI {bd.dsri:.2f}(매출채권지수 경고)") + if bd.gmi is not None and bd.gmi > 1.193: + warnings.append(f"GMI {bd.gmi:.2f}(매출총이익지수 경고)") + if bd.aqi is not None and bd.aqi > 1.254: + warnings.append(f"AQI {bd.aqi:.2f}(자산품질지수 경고)") + if bd.sgi is not None and bd.sgi > 1.607: + warnings.append(f"SGI {bd.sgi:.2f}(매출성장지수 경고)") + if bd.depi is not None and bd.depi > 1.077: + warnings.append(f"DEPI {bd.depi:.2f}(감가상각지수 경고)") + if bd.tata is not None and bd.tata > 0.018: + warnings.append(f"TATA {bd.tata:.3f}(발생이익비율 경고)") + if warnings: + parts.append("개별 경고: " + ", ".join(warnings)) + + # aSeries 기반 직접 계산 (ratios에 BeneishDetail 없을 때) + if len(parts) == 1: # mScore만 있고 세부 없으면 직접 계산 + sales = _getVals(inp.aSeries, "IS", "sales") + cogs = _getVals(inp.aSeries, "IS", "cost_of_sales") + receivables = _getVals(inp.aSeries, "BS", "trade_receivable") + if not receivables: + receivables = _getVals(inp.aSeries, "BS", "trade_and_other_receivables") + sClean = [v for v in sales if v is not None] if sales else [] + cClean = [v for v in cogs if v is not None] if cogs else [] + arClean = [v for v in receivables if v is not None] if receivables else [] + if len(sClean) >= 2 and len(arClean) >= 2: + arSalesRatioPrev = arClean[-2] / sClean[-2] if sClean[-2] > 0 else 0 + arSalesRatioCurr = arClean[-1] / sClean[-1] if sClean[-1] > 0 else 0 + if arSalesRatioPrev > 0: + dsri = arSalesRatioCurr / arSalesRatioPrev + if dsri > 1.465: + parts.append(f"DSRI {dsri:.2f} — 매출 대비 매출채권 비정상 팽창") + if len(sClean) >= 2 and len(cClean) >= 2: + gmPrev = (sClean[-2] - cClean[-2]) / sClean[-2] if sClean[-2] > 0 else 0 + gmCurr = (sClean[-1] - cClean[-1]) / sClean[-1] if sClean[-1] > 0 else 0 + if gmCurr > 0: + gmi = gmPrev / gmCurr + if gmi > 1.193: + parts.append(f"GMI {gmi:.2f} — 매출총이익률 악화(이익 품질 저하)") + + if not parts: + return None + body = ". ".join(parts) + "." + severity = "warning" if flagged else "positive" if mScore < -2.22 else "neutral" + return NarrativeParagraph( + dimension="earningsManipulation", title="이익조작 감지(Beneish)", body=body, severity=severity + ) + + +def _analyzeDistressModels(inp: _Input) -> NarrativeParagraph | None: + """부실예측 다중모델 교차판정 (Lens 10+15).""" + ratios = inp.ratios + if ratios is None: + return None + + parts: list[str] = [] + models: dict[str, str] = {} # 모델명 → safe/warning/danger + + # Piotroski F-Score + piotroski = getattr(ratios, "piotroskiFScore", None) + if piotroski is not None: + if piotroski >= 7: + models["Piotroski"] = "safe" + parts.append(f"Piotroski F-Score {piotroski}/9(건전)") + elif piotroski <= 3: + models["Piotroski"] = "danger" + parts.append(f"Piotroski F-Score {piotroski}/9(위험)") + else: + models["Piotroski"] = "neutral" + parts.append(f"Piotroski F-Score {piotroski}/9(보통)") + + # Altman Z-Score + altmanZ = getattr(ratios, "altmanZScore", None) + if altmanZ is not None: + if altmanZ > 2.99: + models["Altman"] = "safe" + parts.append(f"Altman Z-Score {altmanZ:.2f}(안전)") + elif altmanZ < 1.81: + models["Altman"] = "danger" + parts.append(f"Altman Z-Score {altmanZ:.2f}(부실 위험)") + else: + models["Altman"] = "neutral" + parts.append(f"Altman Z-Score {altmanZ:.2f}(회색지대)") + + # Altman Z''-Score (신흥시장) + altmanZpp = getattr(ratios, "altmanZppScore", None) + if altmanZpp is not None: + if altmanZpp > 2.6: + models["Altman-Z''"] = "safe" + elif altmanZpp < 1.1: + models["Altman-Z''"] = "danger" + else: + models["Altman-Z''"] = "neutral" + parts.append(f"Z''-Score {altmanZpp:.2f}") + + # Beneish M-Score (이미 earningsManipulation에서 분석하지만, 부실 관점 교차) + eq = inp.earningsQuality + beneish = getattr(eq, "beneishMScore", None) if eq else None + if beneish is not None: + if beneish < -2.22: + models["Beneish"] = "safe" + elif beneish > -1.78: + models["Beneish"] = "danger" + else: + models["Beneish"] = "neutral" + + if not models: + return None + + # 교차판정 — 모델 간 consensus / disagreement + safeCount = sum(1 for v in models.values() if v == "safe") + dangerCount = sum(1 for v in models.values() if v == "danger") + total = len(models) + + if total >= 2: + if safeCount == total: + parts.append(f"부실모델 {total}개 전원 안전 판정 — 재무건전성 높음") + elif dangerCount == total: + parts.append(f"부실모델 {total}개 전원 위험 판정 — 심각한 부실 신호") + elif dangerCount > 0 and safeCount > 0: + parts.append( + f"모델 간 disagreement(안전 {safeCount} vs 위험 {dangerCount}/{total}) — 불확실성 구간, 심층 분석 필요" + ) + elif dangerCount > 0: + parts.append(f"부실 위험 모델 {dangerCount}개 경고 — 재무 안정성 점검 필요") + + if not parts: + return None + body = ". ".join(parts) + "." + severity = ( + "negative" + if dangerCount >= 2 + else "warning" + if dangerCount >= 1 + else "positive" + if safeCount == total + else "neutral" + ) + return NarrativeParagraph(dimension="distressModels", title="부실예측 다중모델", body=body, severity=severity) + + +def _analyzeCostStructure(inp: _Input) -> NarrativeParagraph | None: + """비용 구조 분해 — costByNature 기반 원재료/인건비/감가상각 비중 (Lens 13).""" + costDf = inp.costByNatureDf + if costDf is None: + return None + + try: + import polars as pl + + if not isinstance(costDf, pl.DataFrame) or len(costDf) == 0: + return None + except ImportError: + return None + + parts: list[str] = [] + cols = costDf.columns + + sales = _getVals(inp.aSeries, "IS", "sales") + salesClean = [v for v in sales if v is not None] if sales else [] + + # 주요 비용 항목 비중 추출 + for keyword, label in [ + ("원재료", "원재료비"), + ("인건비", "인건비"), + ("감가상각", "감가상각비"), + ("외주", "외주가공비"), + ("연료", "연료비"), + ]: + matchCols = [c for c in cols if keyword in c] + if not matchCols: + continue + try: + vals = costDf[matchCols[0]].to_list() + cleanVals = [v for v in vals if v is not None] + if len(cleanVals) >= 2: + # 매출 대비 비중 계산 + if salesClean and len(salesClean) >= len(cleanVals): + latestRatio = cleanVals[-1] / salesClean[-1] * 100 if salesClean[-1] > 0 else None + prevRatio = ( + cleanVals[-2] / salesClean[-(len(cleanVals))] * 100 + if len(salesClean) >= 2 and salesClean[-2] > 0 + else None + ) + if latestRatio is not None: + if prevRatio is not None: + diff = latestRatio - prevRatio + parts.append(f"{label}/매출 {latestRatio:.1f}%({_pp(diff)})") + else: + parts.append(f"{label}/매출 {latestRatio:.1f}%") + elif len(cleanVals) >= 2: + diff = cleanVals[-1] - cleanVals[-2] + pctDiff = diff / abs(cleanVals[-2]) * 100 if cleanVals[-2] != 0 else 0 + parts.append(f"{label} {pctDiff:+.1f}% 변동") + except (AttributeError, ValueError, IndexError): + continue + + # 고정비/변동비 추이 추정 (원재료=변동, 감가상각+인건비=고정) + fixedCols = [c for c in cols if any(k in c for k in ("인건비", "감가상각", "임차"))] + variableCols = [c for c in cols if any(k in c for k in ("원재료", "외주", "연료"))] + if fixedCols and variableCols and salesClean: + try: + fixedTotal = sum(costDf[c].to_list()[-1] or 0 for c in fixedCols) + variableTotal = sum(costDf[c].to_list()[-1] or 0 for c in variableCols) + total = fixedTotal + variableTotal + if total > 0: + fixedRatio = fixedTotal / total * 100 + parts.append(f"고정비 비중 추정 {fixedRatio:.0f}% / 변동비 {100 - fixedRatio:.0f}%") + except (AttributeError, ValueError, IndexError): + pass + + if not parts: + return None + body = ". ".join(parts) + "." + return NarrativeParagraph(dimension="costStructure", title="비용 구조 분석", body=body, severity="neutral") + + +def _analyzeSalesOrder(inp: _Input) -> NarrativeParagraph | None: + """수주잔고 분석 — Book-to-Bill, 수주 추이 (Lens 13).""" + soDf = inp.salesOrderDf + if soDf is None: + return None + + try: + import polars as pl + + if not isinstance(soDf, pl.DataFrame) or len(soDf) == 0: + return None + except ImportError: + return None + + parts: list[str] = [] + cols = soDf.columns + + # 수주잔고 컬럼 탐색 + backlogCol = None + for c in cols: + cl = c.lower() + if "잔고" in c or "backlog" in cl: + backlogCol = c + elif "수주" in c and "잔고" not in c or "order" in cl: + pass + elif "매출" in c or "sales" in cl or "납품" in c: + pass + + if backlogCol: + try: + vals = soDf[backlogCol].to_list() + cleanVals = [v for v in vals if v is not None] + if len(cleanVals) >= 2: + diff = (cleanVals[-1] - cleanVals[-2]) / abs(cleanVals[-2]) * 100 if cleanVals[-2] != 0 else 0 + parts.append(f"수주잔고 {cleanVals[-1] / 1e8:,.0f}억(전년 대비 {diff:+.1f}%)") + + # Book-to-Bill 비율 (수주잔고/매출) + sales = _getVals(inp.aSeries, "IS", "sales") + salesClean = [v for v in sales if v is not None] if sales else [] + if salesClean and len(cleanVals) >= 1 and salesClean[-1] > 0: + btb = cleanVals[-1] / salesClean[-1] + parts.append(f"수주잔고/매출 비율 {btb:.2f}") + if btb > 1.5: + parts.append("수주잔고 풍부 — 1.5년 이상 매출 보장") + elif btb < 0.3: + parts.append("수주잔고 부족 — 매출 가시성 낮음") + except (AttributeError, ValueError, IndexError): + pass + + if not parts: + return None + body = ". ".join(parts) + "." + severity = ( + "positive" if any("풍부" in p for p in parts) else "warning" if any("부족" in p for p in parts) else "neutral" + ) + return NarrativeParagraph(dimension="salesOrder", title="수주잔고 분석", body=body, severity=severity) + + +def _analyzeProductMix(inp: _Input) -> NarrativeParagraph | None: + """제품별 매출 구성 + 비중 변화 (Lens 13).""" + psDf = inp.productServiceDf + if psDf is None: + return None + + try: + import polars as pl + + if not isinstance(psDf, pl.DataFrame) or len(psDf) == 0: + return None + except ImportError: + return None + + parts: list[str] = [] + cols = psDf.columns + + # 제품명 + 매출 컬럼 탐색 + nameCol = None + valCols = [] + for c in cols: + cl = c.lower() + if "품목" in c or "제품" in c or "부문" in c or "product" in cl or "name" in cl: + nameCol = c + elif any(ch.isdigit() for ch in c) or "매출" in c or "금액" in c or "sales" in cl: + valCols.append(c) + + if nameCol and valCols: + try: + names = psDf[nameCol].to_list() + latestCol = valCols[-1] + vals = psDf[latestCol].to_list() + total = sum(v for v in vals if v is not None and v > 0) + if total > 0: + items = [(n, v) for n, v in zip(names, vals) if v is not None and v > 0] + items.sort(key=lambda x: x[1], reverse=True) + topItems = items[:3] + topParts = [f"{n} {v / total * 100:.0f}%" for n, v in topItems] + parts.append(f"제품 구성: {', '.join(topParts)}") + + # 집중도 + if items: + topShare = items[0][1] / total * 100 + if topShare > 60: + parts.append(f"최대 제품 비중 {topShare:.0f}% — 높은 제품 집중 리스크") + + # 비중 변화 (이전 기간 데이터 있으면) + if len(valCols) >= 2: + prevCol = valCols[-2] + prevVals = psDf[prevCol].to_list() + prevTotal = sum(v for v in prevVals if v is not None and v > 0) + if prevTotal > 0 and items: + topName = items[0][0] + topIdx = names.index(topName) if topName in names else -1 + if topIdx >= 0 and topIdx < len(prevVals) and prevVals[topIdx] is not None: + prevShare = prevVals[topIdx] / prevTotal * 100 + currShare = items[0][1] / total * 100 + shareDiff = currShare - prevShare + if abs(shareDiff) > 3: + parts.append(f"주력제품 비중 {_pp(shareDiff)}") + except (AttributeError, ValueError, IndexError, KeyError): + pass + + if not parts: + return None + body = ". ".join(parts) + "." + severity = "warning" if any("리스크" in p for p in parts) else "neutral" + return NarrativeParagraph(dimension="productMix", title="제품별 매출 구성", body=body, severity=severity) + + +def _analyzeQuarterlyMomentum(inp: _Input) -> NarrativeParagraph | None: + """분기별 손익 QoQ/YoY + 계절성 패턴 (Lens 13).""" + qDf = inp.quarterlyIsDf + if qDf is None: + return None + + try: + import polars as pl + + if not isinstance(qDf, pl.DataFrame) or len(qDf) == 0: + return None + except ImportError: + return None + + parts: list[str] = [] + cols = qDf.columns + + # 분기별 매출/영업이익 시계열 추출 + salesCol = None + for c in cols: + cl = c.lower() + if "매출" in c and "원가" not in c or "sales" in cl or "revenue" in cl: + salesCol = c + elif "영업이익" in c or "operating" in cl: + pass + elif "기간" in c or "period" in cl or "quarter" in cl or "분기" in c: + pass + + if salesCol: + try: + qSales = qDf[salesCol].to_list() + cleanQ = [v for v in qSales if v is not None] + if len(cleanQ) >= 4: + # 최근 4분기 QoQ 추세 + latest = cleanQ[-1] + prev = cleanQ[-2] + qoq = (latest - prev) / abs(prev) * 100 if prev != 0 else 0 + parts.append(f"직전분기 매출 QoQ {qoq:+.1f}%") + + # YoY (4분기 전 대비) + if len(cleanQ) >= 5: + yoyBase = cleanQ[-5] + if yoyBase != 0: + yoy = (latest - yoyBase) / abs(yoyBase) * 100 + parts.append(f"YoY {yoy:+.1f}%") + + # 계절성 패턴 (Q4 > Q1 패턴 등) + if len(cleanQ) >= 8: + # 최근 2년 분기별 평균으로 계절성 탐지 + q4s = [cleanQ[i] for i in range(3, len(cleanQ), 4)] + q1s = [cleanQ[i] for i in range(0, len(cleanQ), 4)] + if q4s and q1s: + avgQ4 = sum(q4s) / len(q4s) + avgQ1 = sum(q1s) / len(q1s) + if avgQ4 > avgQ1 * 1.3: + parts.append("Q4 매출 집중 패턴 — 연말 계절성") + elif avgQ1 > avgQ4 * 1.3: + parts.append("Q1 매출 집중 패턴 — 연초 계절성") + except (AttributeError, ValueError, IndexError): + pass + + if not parts: + return None + body = ". ".join(parts) + "." + severity = "neutral" + return NarrativeParagraph(dimension="quarterlyMomentum", title="분기별 모멘텀", body=body, severity=severity) + + +def _analyzeBusinessStrategy(inp: _Input) -> NarrativeParagraph | None: + """정량→정성 브릿지 — 수치 패턴에서 사업 전략 자동 분류 (Lens 13 종합).""" + sales = _getVals(inp.aSeries, "IS", "sales") + op = _getVals(inp.aSeries, "IS", "operating_profit") + cogs = _getVals(inp.aSeries, "IS", "cost_of_sales") + if not sales or len(sales) < 3: + return None + + sClean = [v for v in sales if v is not None] + opClean = [v for v in op if v is not None] if op else [] + cClean = [v for v in cogs if v is not None] if cogs else [] + if len(sClean) < 3: + return None + + parts: list[str] = [] + + # 매출 CAGR + salesCagr = ((sClean[-1] / sClean[0]) ** (1 / (len(sClean) - 1)) - 1) * 100 if sClean[0] > 0 else 0 + + # OPM 수준 + latestOpm = opClean[-1] / sClean[-1] * 100 if opClean and sClean[-1] > 0 else 0 + + # 원가율 추이 + cogsRatio = cClean[-1] / sClean[-1] * 100 if cClean and sClean[-1] > 0 else 0 + prevCogsRatio = cClean[-2] / sClean[-2] * 100 if len(cClean) >= 2 and len(sClean) >= 2 and sClean[-2] > 0 else 0 + + # 전략 분류 + strategy = "" + if salesCagr > 15 and latestOpm > 15: + strategy = "고성장·고마진(프리미엄/기술주도형)" + elif salesCagr > 15 and latestOpm <= 5: + strategy = "고성장·저마진(시장점유율 확대전략)" + elif salesCagr < 3 and latestOpm > 15: + strategy = "안정형·고마진(캐시카우/니치마켓)" + elif salesCagr < 3 and latestOpm <= 5: + strategy = "저성장·저마진(원가경쟁/구조조정 필요)" + elif salesCagr > 5 and cogsRatio < prevCogsRatio - 2: + strategy = "수익구조 개선형(원가 절감 성과)" + elif salesCagr > 5 and cogsRatio > prevCogsRatio + 2: + strategy = "외형 확대형(원가 전가 미흡)" + elif abs(salesCagr) <= 5 and latestOpm > 5: + strategy = "안정 성숙형(매출 보합, 수익성 유지)" + else: + strategy = "전환기(명확한 패턴 미형성)" + + parts.append(f"사업전략 유형: {strategy}") + parts.append(f"매출 CAGR {salesCagr:.1f}%, 영업이익률 {latestOpm:.1f}%") + + # segments 정보가 있으면 포트폴리오 판단 + segDf = inp.segmentsDf + if segDf is not None: + try: + import polars as pl + + if isinstance(segDf, pl.DataFrame) and len(segDf) >= 2: + # 부문수 + parts.append(f"사업부문 {len(segDf)}개 운영") + except (ImportError, AttributeError): + pass + + body = ". ".join(parts) + "." + severity = ( + "positive" + if "고성장·고마진" in strategy or "개선형" in strategy + else "warning" + if "저성장·저마진" in strategy + else "neutral" + ) + return NarrativeParagraph(dimension="businessStrategy", title="사업전략 분류", body=body, severity=severity) + + +def _analyzeHumanCapital(inp: _Input) -> NarrativeParagraph | None: + """인적자본 분석 — 1인당 매출/영업이익, 직원수 추이 (Lens 14).""" + empDf = inp.employeeDf + if empDf is None: + return None + + try: + import polars as pl + + if not isinstance(empDf, pl.DataFrame) or len(empDf) == 0: + return None + except ImportError: + return None + + parts: list[str] = [] + cols = empDf.columns + + # 직원수 컬럼 탐색 + headcountCol = None + for c in cols: + if "직원" in c or "인원" in c or "총원" in c or "headcount" in c.lower(): + headcountCol = c + break + + if headcountCol: + try: + headcounts = empDf[headcountCol].to_list() + hClean = [v for v in headcounts if v is not None and v > 0] + if hClean: + sales = _getVals(inp.aSeries, "IS", "sales") + op = _getVals(inp.aSeries, "IS", "operating_profit") + sClean = [v for v in sales if v is not None] if sales else [] + opClean = [v for v in op if v is not None] if op else [] + + # 1인당 매출 + if sClean: + perCapSales = sClean[-1] / hClean[-1] / 1e8 # 억 단위 + parts.append(f"직원수 {hClean[-1]:,.0f}명, 1인당 매출 {perCapSales:.1f}억") + + # 1인당 영업이익 + if opClean and hClean: + perCapOp = opClean[-1] / hClean[-1] / 1e8 + parts.append(f"1인당 영업이익 {perCapOp:.2f}억") + + # 추이 + if len(hClean) >= 2: + hGr = (hClean[-1] - hClean[-2]) / abs(hClean[-2]) * 100 + parts.append(f"직원수 {hGr:+.1f}% 변동") + if sClean and len(sClean) >= 2 and sClean[-2] > 0: + sGr = (sClean[-1] - sClean[-2]) / abs(sClean[-2]) * 100 + if hGr > sGr + 5: + parts.append("직원 증가율 > 매출 증가율 — 생산성 하락 추세") + elif sGr > hGr + 5: + parts.append("매출 증가율 > 직원 증가율 — 생산성 향상") + except (AttributeError, ValueError, IndexError): + pass + + if not parts: + return None + body = ". ".join(parts) + "." + severity = "warning" if any("하락" in p for p in parts) else "neutral" + return NarrativeParagraph(dimension="humanCapital", title="인적자본 분석", body=body, severity=severity) + + +def _analyzeRndEfficiency(inp: _Input) -> NarrativeParagraph | None: + """R&D 투자 효율성 — R&D/매출 비율, 추이 (Lens 14).""" + rndDf = inp.rndDf + if rndDf is None: + return None + + try: + import polars as pl + + if not isinstance(rndDf, pl.DataFrame) or len(rndDf) == 0: + return None + except ImportError: + return None + + parts: list[str] = [] + cols = rndDf.columns + + # R&D 금액 컬럼 탐색 + rndCol = None + for c in cols: + cl = c.lower() + if "연구" in c or "개발" in c or "r&d" in cl or "rnd" in cl or "금액" in c: + rndCol = c + break + + if rndCol: + try: + rndVals = rndDf[rndCol].to_list() + rClean = [v for v in rndVals if v is not None and v > 0] + if rClean: + sales = _getVals(inp.aSeries, "IS", "sales") + sClean = [v for v in sales if v is not None] if sales else [] + + if sClean and sClean[-1] > 0: + rndIntensity = rClean[-1] / sClean[-1] * 100 + parts.append(f"R&D/매출 {rndIntensity:.1f}%") + if rndIntensity > 10: + parts.append("R&D 집약적 — 기술주도형 기업") + elif rndIntensity < 1: + parts.append("R&D 투자 미미") + + # R&D 지출 추이 + if len(rClean) >= 2: + rGr = (rClean[-1] - rClean[-2]) / abs(rClean[-2]) * 100 + parts.append(f"R&D 지출 {rGr:+.1f}% 변동") + + # R&D 투자 대비 매출 증가 효율 + if sClean and len(sClean) >= 2 and sClean[-2] > 0: + sGr = (sClean[-1] - sClean[-2]) / abs(sClean[-2]) * 100 + if rGr > 20 and sGr < 5: + parts.append("R&D 대폭 확대에도 매출 정체 — 투자 회수 시차 또는 효율성 점검 필요") + elif sGr > 10 and rGr > 10: + parts.append("R&D 확대 + 매출 성장 동반 — 투자 효율 양호") + except (AttributeError, ValueError, IndexError): + pass + + if not parts: + return None + body = ". ".join(parts) + "." + severity = "neutral" + return NarrativeParagraph(dimension="rndEfficiency", title="R&D 투자 효율", body=body, severity=severity) + + +def _analyzeValueCreation(inp: _Input) -> NarrativeParagraph | None: + """EVA + 가치창출 판정 — ROIC vs WACC (Lens 6).""" + op = _getVals(inp.aSeries, "IS", "operating_profit") + totalAssets = _getVals(inp.aSeries, "BS", "total_assets") + currentLiab = _getVals(inp.aSeries, "BS", "current_liabilities") + if not currentLiab: + currentLiab = _getVals(inp.aSeries, "BS", "total_current_liabilities") + cash = _getVals(inp.aSeries, "BS", "cash_and_cash_equivalents") + totalEquity = _getVals(inp.aSeries, "BS", "total_equity") + totalLiab = _getVals(inp.aSeries, "BS", "total_liabilities") + + opClean = [v for v in op if v is not None] if op else [] + taClean = [v for v in totalAssets if v is not None] if totalAssets else [] + clClean = [v for v in currentLiab if v is not None] if currentLiab else [] + cashClean = [v for v in cash if v is not None] if cash else [] + teClean = [v for v in totalEquity if v is not None] if totalEquity else [] + tlClean = [v for v in totalLiab if v is not None] if totalLiab else [] + + if not opClean or not taClean or len(opClean) < 2: + return None + + parts: list[str] = [] + taxRate = 0.22 # 법인세율 22% 근사 + + # Invested Capital = 총자산 - 유동부채 - 현금 + cl = clClean[-1] if clClean else 0 + ca = cashClean[-1] if cashClean else 0 + ic = taClean[-1] - cl - ca + if ic <= 0: + return None + + nopat = opClean[-1] * (1 - taxRate) + roic = nopat / ic * 100 + + # WACC 추정 (간이: 자기자본비용 10% + 부채비용 3% × (1-세율)) + equity = teClean[-1] if teClean else 0 + debt = tlClean[-1] if tlClean else 0 + totalCap = equity + debt + if totalCap > 0 and equity > 0: + equityWeight = equity / totalCap + debtWeight = debt / totalCap + costOfEquity = 0.10 # 주주 기대수익률 10% 근사 + costOfDebt = 0.03 # 세후 부채비용 3% 근사 + wacc = (equityWeight * costOfEquity + debtWeight * costOfDebt * (1 - taxRate)) * 100 + else: + wacc = 8.0 # 기본값 + + # EVA = NOPAT - IC × WACC + eva = nopat - ic * (wacc / 100) + evaBillions = eva / 1e8 # 억 단위 + + parts.append(f"ROIC {roic:.1f}% vs WACC {wacc:.1f}%") + spread = roic - wacc + if spread > 3: + parts.append(f"경제적 부가가치(EVA) {evaBillions:+,.0f}억 — 가치 창출 기업") + elif spread > 0: + parts.append(f"EVA {evaBillions:+,.0f}억 — 소폭 가치 창출") + elif spread > -3: + parts.append(f"EVA {evaBillions:+,.0f}억 — 가치 중립(WACC 근접)") + else: + parts.append(f"EVA {evaBillions:+,.0f}억 — 가치 파괴(자본비용 미달)") + + # ROIC 추이 (2년 이상) + if len(opClean) >= 2 and len(taClean) >= 2: + prevCl = clClean[-2] if len(clClean) >= 2 else 0 + prevCash = cashClean[-2] if len(cashClean) >= 2 else 0 + prevIc = taClean[-2] - prevCl - prevCash + if prevIc > 0: + prevNopat = opClean[-2] * (1 - taxRate) + prevRoic = prevNopat / prevIc * 100 + roicDiff = roic - prevRoic + if abs(roicDiff) > 1: + label = "자본효율 개선" if roicDiff > 0 else "자본효율 악화" + parts.append(f"ROIC {_pp(roicDiff)}({label})") + + body = ". ".join(parts) + "." + severity = "positive" if spread > 3 else "neutral" if spread > 0 else "warning" if spread > -3 else "negative" + return NarrativeParagraph(dimension="valueCreation", title="가치창출 분석(EVA)", body=body, severity=severity) + + +def _analyzeIndexTrend(inp: _Input) -> NarrativeParagraph | None: + """지수형 분석 — 기준년=100, 주요 계정 추이, 비정상 팽창 감지 (Lens 3).""" + sales = _getVals(inp.aSeries, "IS", "sales") + op = _getVals(inp.aSeries, "IS", "operating_profit") + ni = _getVals(inp.aSeries, "IS", "net_profit") + receivables = _getVals(inp.aSeries, "BS", "trade_receivable") + if not receivables: + receivables = _getVals(inp.aSeries, "BS", "trade_and_other_receivables") + inventories = _getVals(inp.aSeries, "BS", "inventories") + totalAssets = _getVals(inp.aSeries, "BS", "total_assets") + totalEquity = _getVals(inp.aSeries, "BS", "total_equity") + + if not sales or len(sales) < 3: + return None + + def _toIndex(vals: list[float | None]) -> list[float | None]: + """첫 non-None 값 = 100 기준 지수 변환.""" + base = next((v for v in vals if v is not None and v != 0), None) + if base is None: + return [None] * len(vals) + return [round(v / base * 100, 1) if v is not None else None for v in vals] + + salesIdx = _toIndex(sales) + opIdx = _toIndex(op) if op else [] + _toIndex(ni) if ni else [] + arIdx = _toIndex(receivables) if receivables else [] + invIdx = _toIndex(inventories) if inventories else [] + taIdx = _toIndex(totalAssets) if totalAssets else [] + teIdx = _toIndex(totalEquity) if totalEquity else [] + + parts: list[str] = [] + + # 매출 지수 추이 + salesIdxClean = [v for v in salesIdx if v is not None] + if len(salesIdxClean) >= 3: + parts.append(f"매출지수 {salesIdxClean[0]:.0f}→{salesIdxClean[-1]:.0f}(기준년=100)") + + # 매출채권지수 vs 매출지수 괴리 + arIdxClean = [v for v in arIdx if v is not None] + if len(arIdxClean) >= 3 and len(salesIdxClean) >= 3: + arGap = arIdxClean[-1] - salesIdxClean[-1] + if arGap > 30: + parts.append( + f"매출채권지수({arIdxClean[-1]:.0f})가 매출지수({salesIdxClean[-1]:.0f}) 대비 {arGap:.0f}p 초과 팽창 — 수금 악화 또는 매출 인식 공격성" + ) + elif arGap < -30: + parts.append(f"매출채권지수가 매출 대비 {abs(arGap):.0f}p 축소 — 회수 효율 개선") + + # 재고지수 vs 매출지수 괴리 + invIdxClean = [v for v in invIdx if v is not None] + if len(invIdxClean) >= 3 and len(salesIdxClean) >= 3: + invGap = invIdxClean[-1] - salesIdxClean[-1] + if invGap > 30: + parts.append( + f"재고지수({invIdxClean[-1]:.0f})가 매출지수 대비 {invGap:.0f}p 초과 팽창 — 재고 과잉 축적 경고" + ) + + # 영업이익지수 vs 매출지수 (마진 변화 시각화) + opIdxClean = [v for v in opIdx if v is not None] + if len(opIdxClean) >= 3 and len(salesIdxClean) >= 3: + opGap = opIdxClean[-1] - salesIdxClean[-1] + if opGap > 20: + parts.append(f"영업이익지수({opIdxClean[-1]:.0f})가 매출지수 상회 — 수익성 레버리지 확대") + elif opGap < -20: + parts.append(f"영업이익지수({opIdxClean[-1]:.0f})가 매출지수 하회 — 마진 압축") + + # 자산지수 vs 자본지수 (레버리지 변화) + taIdxClean = [v for v in taIdx if v is not None] + teIdxClean = [v for v in teIdx if v is not None] + if len(taIdxClean) >= 3 and len(teIdxClean) >= 3: + leverageGap = taIdxClean[-1] - teIdxClean[-1] + if leverageGap > 30: + parts.append(f"자산지수({taIdxClean[-1]:.0f}) vs 자본지수({teIdxClean[-1]:.0f}) 괴리 확대 — 레버리지 증가") + + if not parts: + return None + body = ". ".join(parts) + "." + severity = "neutral" + for p in parts: + if "경고" in p or "공격성" in p or "과잉" in p: + severity = "warning" + break + return NarrativeParagraph(dimension="indexTrend", title="지수형 추세 분석", body=body, severity=severity) + + +# ══════════════════════════════════════ +# 교차참조 + 전망 +# ══════════════════════════════════════ + + +def _detectCrossReferences(paragraphs: list[NarrativeParagraph]) -> list[str]: + """차원 간 교차 패턴 탐지.""" + dimMap = {p.dimension: p for p in paragraphs} + refs: list[str] = [] + + margin = dimMap.get("margin") + eff = dimMap.get("efficiency") + growth = dimMap.get("growth") + cf = dimMap.get("cashflowDeep") or dimMap.get("cashflow") + dp = dimMap.get("dupont") + sector = dimMap.get("sectorRelative") + segment = dimMap.get("segment") + bs = dimMap.get("bsStructure") + debt = dimMap.get("debtStructure") + liq = dimMap.get("liquidity") + isToCs = dimMap.get("isToCs") + isToBs = dimMap.get("isToBs") + + # ── 기존 교차 패턴 ── + if margin and eff and margin.severity == "positive" and eff.severity == "warning": + refs.append("마진 개선에도 운전자본 효율 악화 — 실질 현금 수익성 점검 필요") + + if growth and cf and growth.severity == "positive" and cf.severity in ("negative", "warning"): + refs.append("매출 성장 대비 현금창출 부족 — 성장의 지속가능성 의문") + + if dp and sector and "레버리지 주도" in dp.body and sector.severity == "positive": + refs.append("레버리지 의존 수익구조가 밸류에이션 할인의 원인일 수 있음") + + if segment and growth and segment.severity == "warning" and growth.severity == "positive": + refs.append("성장이 단일 부문에 집중 — 해당 부문 둔화 시 전체 실적 급락 리스크") + + if eff and cf and eff.severity == "warning" and cf.severity in ("negative", "warning"): + refs.append("운전자본 비효율과 현금흐름 부진 동반 — 유동성 관리 강화 필요") + + if margin and growth and margin.severity == "negative" and growth.severity == "negative": + refs.append("마진과 성장 동시 악화 — 구조적 수익성 하락 우려") + + # ── v4 3표 연결 교차 패턴 ── + if bs and growth and bs.severity == "warning" and growth.severity in ("negative", "neutral"): + refs.append("자산 증가에도 매출 정체 — 투자 효율성 점검 필요") + + if cf and debt and cf.severity in ("negative", "warning") and debt.body and "증가" in debt.body: + refs.append("영업현금 부진 + 차입금 증가 — 적자 보전 차입 가능성") + + if isToCs and isToCs.severity == "warning" and growth and growth.severity == "positive": + refs.append("이익 증가에도 현금흐름 악화 — 이익의 질 의문") + + if liq and liq.severity == "negative" and cf and cf.severity in ("negative", "warning"): + refs.append("유동성 악화 + 현금흐름 부진 — 단기 자금 경색 리스크") + + if margin and isToBs and margin.severity == "positive" and isToBs.severity == "warning": + refs.append("마진 개선에도 매출채권/재고 과잉 — 채널 스터핑 의심") + + # ── v5 신규 교차 패턴 (Phase 7) ── + costStr = dimMap.get("costStructure") + salesOrder = dimMap.get("salesOrder") + quarterly = dimMap.get("quarterlyMomentum") + employee = dimMap.get("humanCapital") + rnd = dimMap.get("rndEfficiency") + distress = dimMap.get("distressModels") + beneish = dimMap.get("earningsManipulation") + valueCreation = dimMap.get("valueCreation") + indexTrend = dimMap.get("indexTrend") + dimMap.get("businessStrategy") + + # segment + margin: 고마진 부문 비중 하락 + if ( + segment + and margin + and segment.body + and "비중 하락" in segment.body + and margin.severity in ("negative", "warning") + ): + refs.append("고마진 부문 비중 하락 → 전체 이익률 압박") + + # costStructure + margin: 원재료비 비중 상승 + 마진 축소 + if costStr and margin and costStr.body and "원재료" in costStr.body and margin.severity in ("negative", "warning"): + refs.append("원재료비 비중 상승 + 마진 축소 → 원가 전가 실패") + + # salesOrder + growth: 수주잔고 증가 + 매출 정체 + if salesOrder and growth and salesOrder.severity == "positive" and growth.severity in ("negative", "neutral"): + refs.append("수주잔고 증가 + 매출 정체 → 생산능력 병목 또는 인식 시차") + + # quarterly + cashflow: Q4 매출 집중 + OCF 우수 + if quarterly and cf and quarterly.body and "Q4" in quarterly.body and cf.severity == "positive": + refs.append("Q4 매출 집중 + OCF 우수 → 건전한 계절성") + + # employee + growth: 직원 증가율 > 매출 증가율 + if employee and growth and employee.body and "생산성 하락" in employee.body: + refs.append("직원 증가율 > 매출 증가율 → 생산성 하락 추세, 인력 효율화 필요") + + # rnd + margin: R&D 투자 확대 + 마진 유지 + if rnd and margin and rnd.body and "확대" in rnd.body and margin.severity in ("positive", "neutral"): + refs.append("R&D 투자 확대 + 마진 유지 → 기술 투자 효율적") + + # distress + debt: 부실 모델 disagreement + 부채 비율 높음 + if distress and debt and "disagreement" in (distress.body or "") and debt.severity in ("negative", "warning"): + refs.append("부실 모델 disagreement + 부채 비율 높음 → 불확실성 구간") + + # beneish + isToCs: Beneish 경고 + OCF/NI 괴리 + if beneish and isToCs and beneish.severity == "warning" and isToCs.severity == "warning": + refs.append("Beneish 경고 + OCF/NI 괴리 → 이익 품질 심층 검토 필요") + + # costStructure + segment: 비용 구조 변화 + 부문 비중 변화 + if costStr and segment and costStr.body and segment.body: + refs.append("비용 구조 변화 + 부문 비중 변화 → 포트폴리오 전환 진행 가능성") + + # valueCreation + growth: 가치 파괴 + 성장 + if valueCreation and growth and valueCreation.severity in ("warning", "negative") and growth.severity == "positive": + refs.append("매출 성장에도 EVA 부진 → 자본비용 초과 투자, 성장의 질 의문") + + # indexTrend + isToBs: 매출채권 비정상 팽창 교차 확인 + if indexTrend and isToBs and indexTrend.severity == "warning" and isToBs.severity == "warning": + refs.append("지수분석·3표 연결 모두 매출채권/재고 비정상 팽창 감지 → 매출 인식 공격성 심각") + + return refs[:10] + + +def _buildForwardImplications(paragraphs: list[NarrativeParagraph], inp: _Input) -> list[str]: + """전망 시사점 — 가장 강한 신호에서 조건부 시사점 생성.""" + implications: list[str] = [] + + positive = [p for p in paragraphs if p.severity == "positive"] + negative = [p for p in paragraphs if p.severity in ("negative", "warning")] + + if positive: + best = positive[0] + if best.dimension == "growth": + implications.append("현 성장 추세 유지 시 실적 개선 지속 전망") + elif best.dimension == "dupont": + implications.append("수익구조 건전성 기반 안정적 주주가치 창출 기대") + elif best.dimension == "sectorRelative": + implications.append("업종 대비 저평가 구간 — 촉매 발생 시 재평가 여지") + elif best.dimension == "margin": + implications.append("마진 개선 추세 지속 시 이익 레버리지 확대 기대") + elif best.dimension in ("cashflow", "cashflowDeep"): + implications.append("양호한 현금창출력 기반 주주환원 또는 재투자 여력 충분") + elif best.dimension == "bsStructure": + implications.append("자산 구성 효율성 유지 시 자본수익률 개선 기대") + elif best.dimension == "liquidity": + implications.append("풍부한 유동성 — 경기 둔화에도 안정적 운영 가능") + elif best.dimension == "capitalChange": + implications.append("자본 축적 추세 지속 시 재무 안전판 강화") + elif best.dimension == "isToCs": + implications.append("현금주의 이익 양호 — 높은 이익의 질 유지 전망") + elif best.dimension == "valueCreation": + implications.append("ROIC > WACC — 자본비용 초과 수익 창출, 기업가치 증대 지속 기대") + elif best.dimension == "distressModels": + implications.append("부실 모델 전원 안전 판정 — 재무건전성 우수") + elif best.dimension == "salesOrder": + implications.append("수주잔고 풍부 — 향후 매출 가시성 높음") + elif best.dimension == "businessStrategy": + implications.append("고성장·고마진 전략 유효 — 프리미엄 밸류에이션 정당화") + + if negative: + worst = negative[0] + if worst.dimension == "efficiency": + implications.append("운전자본 효율 악화 방치 시 유동성 리스크 확대 가능") + elif worst.dimension in ("cashflow", "cashflowDeep"): + implications.append("현금흐름 부진 지속 시 재무 안정성 악화 우려") + elif worst.dimension == "growth": + implications.append("성장 둔화 추세 반전 없으면 밸류에이션 디레이팅 가능") + elif worst.dimension == "margin": + implications.append("마진 하락 추세 지속 시 구조적 수익성 문제 대두 가능") + elif worst.dimension == "sectorRelative": + implications.append("업종 대비 프리미엄 지속 시 하방 리스크 존재") + elif worst.dimension == "debtStructure": + implications.append("부채구조 악화 추세 지속 시 신용 리스크 상승 가능") + elif worst.dimension == "liquidity": + implications.append("유동성 부족 심화 시 차입 의존도 확대 불가피") + elif worst.dimension == "isToCs": + implications.append("이익-현금흐름 괴리 지속 시 이익의 질 의문 심화") + elif worst.dimension == "isToBs": + implications.append("매출채권/재고 과잉 축적 시 대손·평가손실 리스크") + elif worst.dimension == "cfToBs": + implications.append("투자-자산 불일치 지속 시 자산 효율성 저하 우려") + elif worst.dimension == "valueCreation": + implications.append("EVA 부진 지속 시 기업가치 훼손 — 자본 배분 재검토 필요") + elif worst.dimension == "distressModels": + implications.append("다수 부실 모델 경고 — 재무 안정성 심층 점검 시급") + elif worst.dimension == "earningsManipulation": + implications.append("Beneish 경고 지속 시 이익의 신뢰성 의문 — 감사보고서 주의") + + return implications[:4] + + +# ══════════════════════════════════════ +# 진입점 +# ══════════════════════════════════════ + + +def buildNarrative( + aSeries: dict, + aYears: list[str], + dupont: DuPontResult | None, + earningsQuality: EarningsQuality | None, + marketData: MarketData | None, + company: object, + sectorBenchmark: object | None = None, + sectorParams: object | None = None, + ratios: object | None = None, +) -> NarrativeAnalysis | None: + """15차원 교차분석 서술 생성 (IS/BS/CF 3표 연결).""" + # segments, costByNature 수집 (show → notes fallback) + segDf = None + costDf = None + try: + segDf = company.show("segments") # type: ignore[union-attr] + except (AttributeError, TypeError, KeyError, ValueError): + pass + if segDf is None: + try: + notes = getattr(company, "notes", None) + if notes is not None: + segDf = notes.segments + except (AttributeError, TypeError, KeyError, ValueError): + pass + try: + costDf = company.show("costByNature") # type: ignore[union-attr] + except (AttributeError, TypeError, KeyError, ValueError): + pass + if costDf is None: + try: + notes = getattr(company, "notes", None) + if notes is not None: + costDf = notes.costByNature + except (AttributeError, TypeError, KeyError, ValueError): + pass + + # Phase 4: 실전 사업분석 데이터 수집 + salesOrderDf = None + productServiceDf = None + quarterlyIsDf = None + try: + salesOrderDf = company.show("salesOrder") # type: ignore[union-attr] + except (AttributeError, TypeError, KeyError, ValueError): + pass + try: + productServiceDf = company.show("productService") # type: ignore[union-attr] + except (AttributeError, TypeError, KeyError, ValueError): + pass + try: + finance = getattr(company, "finance", None) + if finance is not None: + ts = getattr(finance, "timeseries", None) + if ts is not None: + quarterlyIsDf = getattr(ts, "IS", None) + except (AttributeError, TypeError): + pass + + # Phase 5: 인적자본 데이터 수집 + employeeDf = None + rndDf = None + try: + employeeDf = company.show("employee") # type: ignore[union-attr] + except (AttributeError, TypeError, KeyError, ValueError): + pass + try: + rndDf = company.show("rnd") # type: ignore[union-attr] + except (AttributeError, TypeError, KeyError, ValueError): + pass + + # 금융업 판별 + isFinancial = False + sectorEnum = None + try: + sectorInfo = getattr(company, "sector", None) + if sectorInfo is not None: + sectorEnum = getattr(sectorInfo, "sector", sectorInfo) + if hasattr(sectorEnum, "value"): + isFinancial = sectorEnum.value == "금융" + elif isinstance(sectorEnum, str): + isFinancial = "금융" in sectorEnum or "FINANCIAL" in sectorEnum.upper() + except (AttributeError, TypeError): + pass + + inp = _Input( + aSeries=aSeries, + aYears=aYears, + dupont=dupont, + earningsQuality=earningsQuality, + marketData=marketData, + segmentsDf=segDf, + costByNatureDf=costDf, + sectorBenchmark=sectorBenchmark, + sectorParams=sectorParams, + isFinancial=isFinancial, + ratios=ratios, + salesOrderDf=salesOrderDf, + productServiceDf=productServiceDf, + quarterlyIsDf=quarterlyIsDf, + employeeDf=employeeDf, + rndDf=rndDf, + ) + + # 26개 분석 차원 실행 (v5) + analyzers = [ + _analyzeDupont, + _analyzeGrowthQuality, + _analyzeCashflowDeep, + _analyzeIsToCs, + _analyzeCfToBs, + _analyzeIsToBs, + _analyzeIndexTrend, + _analyzeEarningsManipulation, + _analyzeDistressModels, + _analyzeCostStructure, + _analyzeSalesOrder, + _analyzeProductMix, + _analyzeQuarterlyMomentum, + _analyzeBusinessStrategy, + _analyzeHumanCapital, + _analyzeRndEfficiency, + _analyzeValueCreation, + _analyzeSectorRelative, + _analyzeSegments, + ] + # 금융업은 margin/efficiency/liquidity skip (BS/CF 구조 다름) + if not isFinancial: + analyzers.insert(1, _analyzeMarginTrend) + analyzers.insert(2, _analyzeBalanceSheetStructure) + analyzers.insert(3, _analyzeDebtStructure) + analyzers.insert(4, _analyzeLiquidity) + analyzers.insert(5, _analyzeCapitalChange) + analyzers.insert(8, _analyzeEfficiency) + + paragraphs: list[NarrativeParagraph] = [] + for fn in analyzers: + try: + result = fn(inp) + if result is not None: + paragraphs.append(result) + except (TypeError, ValueError, KeyError, ZeroDivisionError, AttributeError): + continue + + if len(paragraphs) < 2: + return None + + crossRefs = _detectCrossReferences(paragraphs) + implications = _buildForwardImplications(paragraphs, inp) + + return NarrativeAnalysis( + paragraphs=paragraphs, + forwardImplications=implications, + crossReferences=crossRefs, + ) diff --git a/src/dartlab/analysis/financial/research/orchestrator.py b/src/dartlab/analysis/financial/research/orchestrator.py new file mode 100644 index 0000000000000000000000000000000000000000..435ef3895e6d45e36cb6f5b7a04327bcd9403707 --- /dev/null +++ b/src/dartlab/analysis/financial/research/orchestrator.py @@ -0,0 +1,1076 @@ +"""Research 오케스트레이터 — Company → ResearchResult.""" + +from __future__ import annotations + +import logging +from datetime import datetime, timezone + +from dartlab.analysis.financial.research.quality import calcCoverageScore +from dartlab.analysis.financial.research.scoring import calcAllScores +from dartlab.analysis.financial.research.thesis import classifyProfile, synthesizeThesis +from dartlab.analysis.financial.research.types import ( + AnomalySection, + CompanyOverview, + DistressSection, + EarningsQuality, + ExecutiveSummary, + FinancialAnalysis, + ForecastData, + InsightDetail, + MarketData, + PeerSection, + ResearchMeta, + ResearchResult, + RiskSection, + ValuationSection, +) +from dartlab.analysis.forecast.forecast import forecastMetric + +_log = logging.getLogger(__name__) + + +def generateResearch( + company: object, + *, + sections: list[str] | None = None, + includeMarket: bool = True, +) -> ResearchResult: + """종합 기업분석 리포트 생성.""" + result = ResearchResult() + wantAll = sections is None + want = set(sections) if sections else set() + + stockCode = getattr(company, "stockCode", "") + corpName = getattr(company, "corpName", "") + + # ── Phase 0: Meta ── + result.meta = ResearchMeta( + stockCode=stockCode, + corpName=corpName, + generatedAt=datetime.now(timezone.utc).isoformat(), + ) + + # ── Phase 1: Finance ── + aSeries, aYears = _getAnnualData(company) + ratios = _safeAttr(company, "finance", "ratios") + + # ── Phase 2: Insight (확장 — details/distress/anomalies 전부 수집) ── + insights = None + insightDetails: list[InsightDetail] = [] + if wantAll or want & {"executive", "thesis", "risk", "insightDetails"}: + insights = _safeGet(company, "insights") + + grades: dict[str, str] = {} + if insights is not None: + try: + grades = insights.grades() + except (AttributeError, TypeError): + pass + insightDetails = _extractInsightDetails(insights) + result.insightDetails = insightDetails + + # ── Phase 2: ESG ── + esgResult = None + if wantAll or "esgGovernance" in want: + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + esgResult = _safeGet(company, "esg") + + # ── Phase 2: QuantScores ── + quantScores = None + currentPrice = None + sharesOutstanding = None + + if aSeries and (wantAll or want & {"quantScores", "executive", "thesis"}): + if includeMarket: + currentPrice, sharesOutstanding = _getMarketInfo(company) + quantScores = calcAllScores( + aSeries, + aYears, + currentPrice=currentPrice, + sharesOutstanding=sharesOutstanding, + ) + result.quantScores = quantScores + + # ── Phase 2: Earnings Quality ── + if aSeries and (wantAll or "earningsQuality" in want): + result.earningsQuality = _buildEarningsQuality(aSeries, ratios) + + # ── Phase 2: Financial ── + if aSeries and (wantAll or "financial" in want): + result.financial = _buildFinancial(aSeries, aYears, quantScores) + + # ── Phase 2: Sector KPIs ── + if aSeries and (wantAll or "sectorKpis" in want): + result.sectorKpis = _buildSectorKpis(company, aSeries, ratios) + + # ── Phase 2: Overview ── + if wantAll or "overview" in want: + result.overview = _buildOverview(company) + + # ── Phase 3: Market Data ── + marketData = None + if includeMarket and (wantAll or want & {"marketData", "executive", "valuation"}): + marketData = _buildMarketData(company) + result.marketData = marketData + if currentPrice is None and marketData: + currentPrice = marketData.currentPrice + + # ── Phase 3: Valuation (NEW — analyst 엔진 활용) ── + if aSeries and includeMarket and (wantAll or "valuation" in want): + result.valuationAnalysis = _buildValuation(company, currentPrice, sharesOutstanding) + + # ── Phase 3: Forecast (확장 — 자체예측 + 시나리오) ── + if includeMarket and (wantAll or "forecast" in want): + result.forecast = _buildForecast(company, aSeries) + + # ── Phase 3: Peer (NEW — OOM 안전, 섹터 배수 기반) ── + if aSeries and (wantAll or "peer" in want): + result.peerAnalysis = _buildPeerAnalysis(company, ratios, marketData) + + # ── Phase 3: Risk (NEW — distress + anomalies + insight.risk) ── + if wantAll or "risk" in want: + result.riskAnalysis = _buildRiskAnalysis(insights, insightDetails) + + # ── Phase 4: Executive ── + upside = None + opinion = "" + if marketData and marketData.targetPrice and marketData.currentPrice: + upside = (marketData.targetPrice - marketData.currentPrice) / marketData.currentPrice + opinion = _classifyOpinion(upside) + + # valuation verdict도 반영 + if not opinion and result.valuationAnalysis and result.valuationAnalysis.verdict: + va = result.valuationAnalysis + if va.verdict == "저평가": + opinion = "매수" + elif va.verdict == "고평가": + opinion = "매도" + else: + opinion = "중립" + + keyMetrics = _buildKeyMetrics(ratios, quantScores, marketData) + + result.executive = ExecutiveSummary( + opinion=opinion, + profile=classifyProfile(grades, upside), + targetPrice=marketData.targetPrice if marketData else None, + currentPrice=currentPrice, + upside=upside, + grades=grades, + keyMetrics=keyMetrics, + ) + + # ── Phase 4: Narrative (v3 — 교차분석 서술) ── + if aSeries and (wantAll or want & {"narrative", "thesis"}): + try: + from dartlab.analysis.financial.insight.benchmark import getBenchmark + from dartlab.analysis.financial.research.narrative import buildNarrative + from dartlab.core.sector.params import getParams + + sectorInfo = _safeGet(company, "sector") + sectorEnum = getattr(sectorInfo, "sector", sectorInfo) + sectorBench = getBenchmark(sectorEnum) if sectorEnum else None + sectorPar = getParams(sectorInfo) if sectorInfo and hasattr(sectorInfo, "sector") else None + + result.narrativeAnalysis = buildNarrative( + aSeries, + aYears, + result.financial.dupont if result.financial else None, + result.earningsQuality, + result.marketData, + company, + sectorBenchmark=sectorBench, + sectorParams=sectorPar, + ratios=ratios, + ) + except (ImportError, OSError, ValueError, AttributeError, TypeError) as exc: + _log.debug("narrative 실패: %s", exc) + + # ── Phase 4: Thesis (전면 재작성 — insightDetails/valuation/risk/narrative 전달) ── + if wantAll or "thesis" in want: + result.thesis = synthesizeThesis( + result.executive, + insightDetails=insightDetails, + valuationAnalysis=result.valuationAnalysis, + riskAnalysis=result.riskAnalysis, + quantScores=quantScores, + earningsQuality=result.earningsQuality, + forecastData=result.forecast, + narrativeAnalysis=result.narrativeAnalysis, + ) + + # ── Meta: coverage ── + result.meta.coverageScore = calcCoverageScore( + hasFinance=aSeries is not None, + hasDocs=getattr(company, "_hasDocs", False), + hasInsight=insights is not None, + hasMarket=marketData is not None, + hasValuation=result.valuationAnalysis is not None, + hasForecast=result.forecast is not None, + hasEsg=esgResult is not None, + hasSectorKpis=result.sectorKpis is not None, + hasRisk=result.riskAnalysis is not None, + hasPeer=result.peerAnalysis is not None, + hasNarrative=result.narrativeAnalysis is not None, + ) + + metaWarnings: list[str] = [] + if aSeries is None: + metaWarnings.append("재무 데이터 없음") + if not includeMarket: + metaWarnings.append("시장 데이터 미포함") + result.meta.warnings = metaWarnings + + return result + + +# ══════════════════════════════════════ +# 내부 헬퍼 — 기존 +# ══════════════════════════════════════ + + +def _getAnnualData(company: object) -> tuple[dict | None, list[str]]: + """Company에서 연간 시계열 추출.""" + try: + finance = getattr(company, "finance", None) + if finance is None: + return None, [] + annual = getattr(finance, "annual", None) + if annual is None: + return None, [] + return annual + except (RuntimeError, FileNotFoundError, OSError) as exc: + _log.debug("연간 데이터 로드 실패: %s", exc) + return None, [] + + +def _safeGet(company: object, attr: str) -> object: + """Company property 안전 접근.""" + try: + return getattr(company, attr, None) + except (RuntimeError, FileNotFoundError, OSError, ValueError) as exc: + _log.debug("%s 로드 실패: %s", attr, exc) + return None + + +def _safeAttr(company: object, namespace: str, attr: str) -> object: + """Company.namespace.attr 안전 접근.""" + try: + ns = getattr(company, namespace, None) + if ns is None: + return None + return getattr(ns, attr, None) + except (RuntimeError, FileNotFoundError, OSError, ValueError) as exc: + _log.debug("%s.%s 로드 실패: %s", namespace, attr, exc) + return None + + +def _getMarketInfo(company: object) -> tuple[float | None, float | None]: + """현재가 + 발행주식수.""" + try: + from dartlab.gather import getDefaultGather + + g = getDefaultGather() + stockCode = getattr(company, "stockCode", "") + snap = g.price(stockCode) + if snap is None: + return None, None + + price = getattr(snap, "current", None) + marketCap = getattr(snap, "market_cap", None) + shares = None + if price and marketCap and price > 0: + shares = marketCap / price + return price, shares + except (ImportError, OSError, ValueError) as exc: + _log.debug("market info 실패: %s", exc) + return None, None + + +def _buildEarningsQuality(aSeries: dict, ratios: object) -> EarningsQuality: + """이익의 질 조립.""" + from dartlab.core.finance.extract import getLatest, getTTM + + ni = getTTM(aSeries, "IS", "net_profit", strict=False) + ocf = getTTM(aSeries, "CF", "operating_cashflow", strict=False) + + cfToNi = None + if ni and ocf and ni != 0: + cfToNi = round(ocf / ni, 2) + + beneish = getattr(ratios, "beneish_m_score", None) if ratios else None + ccc = getattr(ratios, "cash_conversion_cycle", None) if ratios else None + + ta = getLatest(aSeries, "BS", "total_assets") + accrual = None + if ni is not None and ocf is not None and ta and ta > 0: + accrual = round((ni - ocf) / ta, 4) + + assessment = "moderate" + if cfToNi is not None: + if cfToNi > 1.0 and (accrual is None or accrual < 0.05): + assessment = "high" + elif cfToNi < 0.5 or (accrual is not None and accrual > 0.15): + assessment = "questionable" + elif cfToNi < 0.8: + assessment = "low" + + return EarningsQuality( + cfToNi=cfToNi, + accrualRatio=accrual, + ccc=ccc, + beneishMScore=beneish, + assessment=assessment, + ) + + +def _buildFinancial(aSeries: dict, aYears: list[str], quantScores: object) -> FinancialAnalysis: + """재무 분석 조립 — 수익성/원가구조/효율성/성장/규모.""" + dupont = getattr(quantScores, "dupont", None) if quantScores else None + + salesList = aSeries.get("IS", {}).get("sales", []) + cogsList = aSeries.get("IS", {}).get("cost_of_sales", []) + opList = aSeries.get("IS", {}).get("operating_profit", []) + niList = aSeries.get("IS", {}).get("net_profit", []) + recvList = aSeries.get("BS", {}).get("trade_receivable", []) or aSeries.get("BS", {}).get( + "trade_and_other_receivables", [] + ) + invList = aSeries.get("BS", {}).get("inventories", []) + payList = aSeries.get("BS", {}).get("trade_payable", []) or aSeries.get("BS", {}).get( + "trade_and_other_payables", [] + ) + + n = min(len(salesList), len(aYears)) + start = max(0, n - 5) + trends: dict[str, list[float | None]] = {} + periods: list[str] = [] + + opMargins: list[float | None] = [] + netMargins: list[float | None] = [] + grossMargins: list[float | None] = [] + cogsRatios: list[float | None] = [] + sgaRatios: list[float | None] = [] + dsoList: list[float | None] = [] + dioList: list[float | None] = [] + dpoList: list[float | None] = [] + cccList: list[float | None] = [] + salesGr: list[float | None] = [] + opGr: list[float | None] = [] + salesAbs: list[float | None] = [] + opAbs: list[float | None] = [] + niAbs: list[float | None] = [] + + for i in range(start, n): + s = salesList[i] if i < len(salesList) else None + cogs = cogsList[i] if i < len(cogsList) else None + op = opList[i] if i < len(opList) else None + ni = niList[i] if i < len(niList) else None + recv = recvList[i] if i < len(recvList) else None + inv = invList[i] if i < len(invList) else None + pay = payList[i] if i < len(payList) else None + periods.append(aYears[i]) + + # 절대 규모 + salesAbs.append(s) + opAbs.append(op) + niAbs.append(ni) + + # 수익성 + if s and s > 0: + opMargins.append(round(op / s * 100, 2) if op is not None else None) + netMargins.append(round(ni / s * 100, 2) if ni is not None else None) + if cogs is not None: + gm = round((s - cogs) / s * 100, 2) + grossMargins.append(gm) + cogsRatios.append(round(cogs / s * 100, 2)) + if op is not None: + sga = s - cogs - op + sgaRatios.append(round(sga / s * 100, 2)) + else: + sgaRatios.append(None) + else: + grossMargins.append(None) + cogsRatios.append(None) + sgaRatios.append(None) + else: + opMargins.append(None) + netMargins.append(None) + grossMargins.append(None) + cogsRatios.append(None) + sgaRatios.append(None) + + # 효율성 (DSO/DIO/DPO/CCC) + dso = recv / (s / 365) if recv is not None and s and s > 0 else None + dio = inv / (cogs / 365) if inv is not None and cogs and cogs > 0 else None + dpo = pay / (cogs / 365) if pay is not None and cogs and cogs > 0 else None + dsoList.append(round(dso, 1) if dso is not None else None) + dioList.append(round(dio, 1) if dio is not None else None) + dpoList.append(round(dpo, 1) if dpo is not None else None) + if dso is not None and dio is not None and dpo is not None: + cccList.append(round(dso + dio - dpo, 1)) + else: + cccList.append(None) + + # 성장률 + prevIdx = i - 1 + if prevIdx >= 0 and prevIdx < len(salesList): + ps = salesList[prevIdx] + if ps and ps != 0 and s is not None: + salesGr.append(round((s - ps) / abs(ps) * 100, 1)) + else: + salesGr.append(None) + po = opList[prevIdx] if prevIdx < len(opList) else None + if po and po != 0 and op is not None: + opGr.append(round((op - po) / abs(po) * 100, 1)) + else: + opGr.append(None) + else: + salesGr.append(None) + opGr.append(None) + + trends["operatingMargin"] = opMargins + trends["netMargin"] = netMargins + trends["grossMargin"] = grossMargins + trends["costOfSalesRatio"] = cogsRatios + trends["sgaRatio"] = sgaRatios + trends["dso"] = dsoList + trends["dio"] = dioList + trends["dpo"] = dpoList + trends["ccc"] = cccList + trends["salesGrowth"] = salesGr + trends["opGrowth"] = opGr + trends["sales"] = salesAbs + trends["operatingProfit"] = opAbs + trends["netProfit"] = niAbs + + # ── BS 요약 시계열 ── + bsSummary: dict[str, list[float | None]] = {} + bsKeys = { + "totalAssets": ["total_assets"], + "currentAssets": ["current_assets"], + "nonCurrentAssets": ["noncurrent_assets", "non_current_assets"], + "totalLiabilities": ["total_liabilities"], + "totalEquity": ["total_stockholders_equity", "total_equity", "owners_of_parent_equity"], + "cashAndEquivalents": ["cash_and_cash_equivalents"], + "shortTermBorrowings": ["shortterm_borrowings", "short_term_borrowings"], + "longTermBorrowings": ["longterm_borrowings", "long_term_borrowings"], + "retainedEarnings": ["retained_earnings"], + "inventories": ["inventories"], + "tradeReceivable": ["trade_and_other_receivables", "trade_receivable"], + } + bsData = aSeries.get("BS", {}) + for outKey, srcCandidates in bsKeys.items(): + raw: list = [] + for candidate in srcCandidates: + raw = bsData.get(candidate, []) + if raw and any(v is not None for v in raw): + break + vals: list[float | None] = [] + for i in range(start, n): + vals.append(raw[i] if i < len(raw) else None) + bsSummary[outKey] = vals + + # 파생: 부채비율, 유동비율 + debtRatios: list[float | None] = [] + currentRatios: list[float | None] = [] + clList = bsData.get("current_liabilities", []) + for i in range(start, n): + tl = bsSummary["totalLiabilities"][i - start] if (i - start) < len(bsSummary["totalLiabilities"]) else None + te = bsSummary["totalEquity"][i - start] if (i - start) < len(bsSummary["totalEquity"]) else None + ca = bsSummary["currentAssets"][i - start] if (i - start) < len(bsSummary["currentAssets"]) else None + cl = clList[i] if i < len(clList) else None + debtRatios.append(round(tl / te * 100, 1) if tl and te and te != 0 else None) + currentRatios.append(round(ca / cl * 100, 1) if ca and cl and cl != 0 else None) + bsSummary["debtRatio"] = debtRatios + bsSummary["currentRatio"] = currentRatios + + # ── CF 요약 시계열 ── + cfSummary: dict[str, list[float | None]] = {} + cfKeys = { + "operatingCf": ["operating_cashflow", "operating_cf", "cash_flows_from_business"], + "investingCf": ["investing_cashflow", "investing_cf"], + "financingCf": ["financing_cashflow", "financing_cf", "cash_flows_from_financing_activities"], + } + cfData = aSeries.get("CF", {}) + for outKey, srcCandidates in cfKeys.items(): + raw2: list = [] + for candidate in srcCandidates: + raw2 = cfData.get(candidate, []) + if raw2 and any(v is not None for v in raw2): + break + vals2: list[float | None] = [] + for i in range(start, n): + vals2.append(raw2[i] if i < len(raw2) else None) + cfSummary[outKey] = vals2 + + # FCF = OCF - CAPEX (capex는 보통 음수로 저장) + capexRaw = ( + cfData.get("purchase_of_property_plant_and_equipment", []) + or cfData.get("capital_expenditures", []) + or cfData.get("capex", []) + ) + fcfList: list[float | None] = [] + capexList: list[float | None] = [] + for i in range(start, n): + ocf = cfSummary["operatingCf"][i - start] if (i - start) < len(cfSummary["operatingCf"]) else None + cx = capexRaw[i] if i < len(capexRaw) else None + capexList.append(cx) + if ocf is not None and cx is not None: + fcfList.append(round(ocf - abs(cx), 1)) + elif ocf is not None: + fcfList.append(ocf) + else: + fcfList.append(None) + cfSummary["capex"] = capexList + cfSummary["fcf"] = fcfList + + # ── 3표 연결 지표 ── + crossMetrics: dict[str, list[float | None]] = {} + ocfToNi: list[float | None] = [] + for idx in range(len(periods)): + ocf = cfSummary["operatingCf"][idx] if idx < len(cfSummary["operatingCf"]) else None + ni = niAbs[idx] if idx < len(niAbs) else None + if ocf is not None and ni is not None and ni != 0: + ocfToNi.append(round(ocf / ni, 2)) + else: + ocfToNi.append(None) + crossMetrics["ocfToNetIncome"] = ocfToNi + + # capex / 감가상각 + deprRaw = ( + cfData.get("depreciation_amortization", []) + or cfData.get("depreciation", []) + or cfData.get("depreciation_and_amortization", []) + ) + capexToDepr: list[float | None] = [] + for i in range(start, n): + cx = capexRaw[i] if i < len(capexRaw) else None + dp = deprRaw[i] if i < len(deprRaw) else None + if cx is not None and dp is not None and dp != 0: + capexToDepr.append(round(abs(cx) / abs(dp), 2)) + else: + capexToDepr.append(None) + crossMetrics["capexToDepreciation"] = capexToDepr + + # 이익잉여금 증가율 + reRaw = bsData.get("retained_earnings", []) + reGrowth: list[float | None] = [] + for i in range(start, n): + cur = reRaw[i] if i < len(reRaw) else None + prev = reRaw[i - 1] if (i - 1) >= 0 and (i - 1) < len(reRaw) else None + if cur is not None and prev is not None and prev != 0: + reGrowth.append(round((cur - prev) / abs(prev) * 100, 1)) + else: + reGrowth.append(None) + crossMetrics["retainedEarningsGrowth"] = reGrowth + + # ── Common-Size IS (매출=100% 기준) ── + isCommonSize: dict[str, list[float | None]] = {} + isKeys = { + "costOfSales": "cost_of_sales", + "grossProfit": "gross_profit", + "operatingProfit": "operating_profit", + "netProfit": "net_profit", + "incomeTaxExpense": "income_tax_expense", + } + isData = aSeries.get("IS", {}) + for outKey, srcKey in isKeys.items(): + raw3 = isData.get(srcKey, []) + vals3: list[float | None] = [] + for i in range(start, n): + s = salesList[i] if i < len(salesList) else None + v = raw3[i] if i < len(raw3) else None + if s and s > 0 and v is not None: + vals3.append(round(v / s * 100, 2)) + else: + vals3.append(None) + isCommonSize[outKey] = vals3 + + # ── Common-Size BS (자산=100% 기준) ── + bsCommonSize: dict[str, list[float | None]] = {} + taRaw = bsData.get("total_assets", []) + bsCsKeys = { + "currentAssets": ["current_assets", "total_current_assets"], + "nonCurrentAssets": ["noncurrent_assets", "non_current_assets", "total_non_current_assets"], + "totalLiabilities": ["total_liabilities"], + "totalEquity": ["total_stockholders_equity", "total_equity", "owners_of_parent_equity"], + "inventories": ["inventories"], + "tradeReceivable": ["trade_and_other_receivables", "trade_receivable"], + "ppe": ["property_plant_and_equipment"], + "intangibleAssets": ["intangible_assets"], + } + for outKey, srcCandidates in bsCsKeys.items(): + raw4: list = [] + for candidate in srcCandidates: + raw4 = bsData.get(candidate, []) + if raw4 and any(v is not None for v in raw4): + break + vals4: list[float | None] = [] + for i in range(start, n): + ta = taRaw[i] if i < len(taRaw) else None + v = raw4[i] if i < len(raw4) else None + if ta and ta > 0 and v is not None: + vals4.append(round(v / ta * 100, 2)) + else: + vals4.append(None) + bsCommonSize[outKey] = vals4 + + return FinancialAnalysis( + dupont=dupont, + marginTrends=trends, + periods=periods, + bsSummary=bsSummary, + cfSummary=cfSummary, + crossStatementMetrics=crossMetrics, + isCommonSize=isCommonSize, + bsCommonSize=bsCommonSize, + ) + + +def _buildSectorKpis(company: object, aSeries: dict, ratios: object) -> object: + """섹터 KPI 조립.""" + try: + from dartlab.analysis.financial.research.sectorKpi import calcSectorKpis + + sectorInfo = _safeGet(company, "sector") + sector = getattr(sectorInfo, "sector", sectorInfo) + return calcSectorKpis(sector, aSeries, ratios) + except (ImportError, ValueError, AttributeError) as exc: + _log.debug("sectorKpi 실패: %s", exc) + return None + + +def _buildOverview(company: object) -> CompanyOverview: + """기업 개요.""" + desc = None + try: + overview = company.show("businessOverview") # type: ignore[union-attr] + if overview is not None and hasattr(overview, "height") and overview.height > 0: + cols = [c for c in overview.columns if c[0].isdigit()] + if cols: + latestCol = cols[0] + vals = overview[latestCol].to_list() + texts = [v for v in vals if isinstance(v, str) and v.strip()] + if texts: + desc = texts[0][:500] + except (RuntimeError, AttributeError, OSError): + pass + + sectorInfo = _safeGet(company, "sector") + sectorEnum = getattr(sectorInfo, "sector", sectorInfo) + sectorName = sectorEnum.value if sectorEnum and hasattr(sectorEnum, "value") else None + + return CompanyOverview(description=desc, sectorName=sectorName) + + +def _buildMarketData(company: object) -> MarketData | None: + """시장 데이터 수집.""" + try: + from dartlab.gather import getDefaultGather + + g = getDefaultGather() + stockCode = getattr(company, "stockCode", "") + + snap = g.price(stockCode) + cons = g.consensus(stockCode) + flow = g.flow(stockCode) + macro = g.macro() + + md = MarketData() + if snap is not None and hasattr(snap, "current"): + md.currentPrice = getattr(snap, "current", None) + md.marketCap = getattr(snap, "market_cap", None) + md.per = getattr(snap, "per", None) + md.pbr = getattr(snap, "pbr", None) + md.dividendYield = getattr(snap, "dividend_yield", None) + md.high52w = getattr(snap, "high_52w", None) + md.low52w = getattr(snap, "low_52w", None) + if cons is not None and hasattr(cons, "target_price"): + md.targetPrice = getattr(cons, "target_price", None) + md.analystCount = getattr(cons, "analyst_count", None) + md.buyRatio = getattr(cons, "buy_ratio", None) + if flow is not None and hasattr(flow, "foreign_holding_ratio"): + md.foreignHoldingRatio = getattr(flow, "foreign_holding_ratio", None) + if macro is not None and isinstance(macro, dict): + md.baseRate = macro.get("baseRate") + md.usdKrw = macro.get("usdKrw") + + return md + except (ImportError, OSError, ValueError) as exc: + _log.debug("market data 실패: %s", exc) + return None + + +def _buildKeyMetrics(ratios: object, quantScores: object, marketData: object) -> list[dict[str, object]]: + """핵심 지표 4-6개.""" + metrics: list[dict[str, object]] = [] + + if marketData: + per = getattr(marketData, "per", None) + if per is not None: + metrics.append({"label": "PER", "value": per, "unit": "배"}) + pbr = getattr(marketData, "pbr", None) + if pbr is not None: + metrics.append({"label": "PBR", "value": pbr, "unit": "배"}) + + if ratios: + roe = getattr(ratios, "roe", None) + if roe is not None: + metrics.append({"label": "ROE", "value": round(roe, 1), "unit": "%"}) + debtRatio = getattr(ratios, "debt_ratio", None) + if debtRatio is not None: + metrics.append({"label": "부채비율", "value": round(debtRatio, 1), "unit": "%"}) + + if quantScores: + p = getattr(quantScores, "piotroski", None) + if p: + metrics.append({"label": "Piotroski F", "value": p.total, "unit": "/9"}) + + return metrics[:6] + + +def _classifyOpinion(upside: float) -> str: + """업사이드 → 투자의견.""" + if upside > 0.30: + return "강력매수" + if upside > 0.10: + return "매수" + if upside > -0.10: + return "중립" + if upside > -0.30: + return "매도" + return "강력매도" + + +# ══════════════════════════════════════ +# 내부 헬퍼 — v2 신규 +# ══════════════════════════════════════ + + +def _extractInsightDetails(insights: object) -> list[InsightDetail]: + """AnalysisResult에서 10영역 상세 추출.""" + areas = [ + "performance", + "profitability", + "health", + "cashflow", + "governance", + "risk", + "opportunity", + "predictability", + "uncertainty", + "coreEarnings", + ] + result: list[InsightDetail] = [] + for area in areas: + ir = getattr(insights, area, None) + if ir is None: + continue + grade = getattr(ir, "grade", "") + summary = getattr(ir, "summary", "") + details = getattr(ir, "details", []) + risks = [getattr(f, "text", str(f)) for f in getattr(ir, "risks", [])] + opportunities = [getattr(f, "text", str(f)) for f in getattr(ir, "opportunities", [])] + result.append( + InsightDetail( + area=area, + grade=grade, + summary=summary, + details=details if isinstance(details, list) else [], + risks=risks, + opportunities=opportunities, + ) + ) + return result + + +def _buildValuation( + company: object, + currentPrice: float | None, + shares: float | None, +) -> ValuationSection | None: + """analyst 밸류에이션 호출 → ValuationSection.""" + try: + valSummary = company.valuation(shares=int(shares) if shares else None) # type: ignore[union-attr] + if valSummary is None: + return None + + dcfPs = None + dcfMos = None + ddmPs = None + relPs = None + methodology: list[str] = [] + warnings: list[str] = [] + + dcf = getattr(valSummary, "dcf", None) + if dcf: + dcfPs = getattr(dcf, "perShareValue", None) + dcfMos = getattr(dcf, "marginOfSafety", None) + if dcfPs: + methodology.append(f"DCF (WACC {getattr(dcf, 'discountRate', '?')}%)") + for w in getattr(dcf, "warnings", []): + warnings.append(w) + + ddm = getattr(valSummary, "ddm", None) + if ddm: + ddmPs = getattr(ddm, "intrinsicValue", None) + model = getattr(ddm, "modelUsed", "") + if ddmPs and model != "N/A": + methodology.append(f"DDM ({model})") + + rel = getattr(valSummary, "relative", None) + if rel: + relPs = getattr(rel, "consensusValue", None) + if relPs: + methodology.append("상대가치 (PER/PBR/EV-EBITDA)") + + fvr = getattr(valSummary, "fairValueRange", None) + verdict = getattr(valSummary, "verdict", "") or "" + + return ValuationSection( + dcfPerShare=dcfPs, + dcfMos=dcfMos, + ddmPerShare=ddmPs, + relativePerShare=relPs, + fairValueRange=fvr, + verdict=verdict, + methodology=methodology, + warnings=warnings, + ) + except (ImportError, OSError, ValueError, AttributeError, TypeError, RuntimeError) as exc: + _log.debug("valuation 실패: %s", exc) + return None + + +def _buildForecast(company: object, aSeries: dict | None) -> ForecastData | None: + """전망 데이터 — 컨센서스 + 자체예측.""" + fd = ForecastData() + + # 1) 컨센서스 (gather) + try: + from dartlab.gather import getDefaultGather + + g = getDefaultGather() + stockCode = getattr(company, "stockCode", "") + revCons = g.revenue_consensus(stockCode) + if revCons: + fd.revenueConsensus = [ + { + "fiscalYear": getattr(rc, "fiscal_year", None), + "revenueEst": getattr(rc, "revenue_est", None), + "operatingProfitEst": getattr(rc, "operating_profit_est", None), + "epsEst": getattr(rc, "eps_est", None), + } + for rc in revCons + ] + except (ImportError, OSError, ValueError) as exc: + _log.debug("consensus forecast 실패: %s", exc) + + # 2) 자체 매출 예측 (analyst.forecast) + if aSeries: + try: + ts = getattr(getattr(company, "finance", None), "timeseries", None) + series = ts[0] if isinstance(ts, tuple) else ts + if series: + fcResult = forecastMetric(series, metric="revenue", horizon=3) + if fcResult and getattr(fcResult, "projected", None): + gr = getattr(fcResult, "growthRate", None) + fd.selfForecast = { + "method": "OLS/CAGR", + "growthRate": round(gr, 1) if gr is not None else None, + "confidence": "high" if getattr(fcResult, "rSquared", 0) > 0.7 else "moderate", + "projected": getattr(fcResult, "projected", []), + } + except (ImportError, OSError, ValueError, AttributeError, TypeError) as exc: + _log.debug("self forecast 실패: %s", exc) + + hasData = fd.revenueConsensus or fd.selfForecast is not None + return fd if hasData else None + + +def _buildPeerAnalysis( + company: object, + ratios: object, + marketData: MarketData | None, +) -> PeerSection | None: + """동종업 비교 — 섹터 배수 기반 (OOM 안전, peer.discover 호출 안 함).""" + try: + sectorInfo = _safeGet(company, "sector") + sectorEnum = getattr(sectorInfo, "sector", sectorInfo) + if sectorEnum is None: + return None + + sectorName = sectorEnum.value if hasattr(sectorEnum, "value") else str(sectorEnum) + + # 섹터 파라미터에서 배수 가져오기 + sectorMultiples: dict[str, float] = {} + try: + from dartlab.core.sector.params import SECTOR_PARAMS + + sp = SECTOR_PARAMS.get(sectorEnum) + if sp: + sectorMultiples = { + "PER": sp.perMultiple, + "PBR": sp.pbrMultiple, + "EV/EBITDA": sp.evEbitdaMultiple, + } + except (ImportError, AttributeError): + pass + + # 기업 현재 배수 + companyMultiples: dict[str, float | None] = {} + if marketData: + companyMultiples["PER"] = marketData.per + companyMultiples["PBR"] = marketData.pbr + if ratios: + evEbitda = getattr(ratios, "ev_ebitda", None) + if evEbitda is not None: + companyMultiples["EV/EBITDA"] = evEbitda + + # 할인/할증 계산 + premiumDiscount: dict[str, float | None] = {} + for key in ["PER", "PBR", "EV/EBITDA"]: + cv = companyMultiples.get(key) + sv = sectorMultiples.get(key) + if cv is not None and sv and sv > 0: + premiumDiscount[key] = round((cv - sv) / sv * 100, 1) + else: + premiumDiscount[key] = None + + # 서술 + narrative = _buildPeerNarrative(sectorName, companyMultiples, sectorMultiples, premiumDiscount) + + return PeerSection( + sectorName=sectorName, + sectorMultiples=sectorMultiples, + companyMultiples=companyMultiples, + premiumDiscount=premiumDiscount, + peerNarrative=narrative, + ) + except (ImportError, OSError, ValueError, AttributeError) as exc: + _log.debug("peer 실패: %s", exc) + return None + + +def _buildPeerNarrative( + sectorName: str, + companyMultiples: dict[str, float | None], + sectorMultiples: dict[str, float], + premiumDiscount: dict[str, float | None], +) -> str: + """peer 비교 서술 생성.""" + parts = [] + for key in ["PER", "PBR"]: + pd = premiumDiscount.get(key) + if pd is not None: + if pd < -15: + parts.append(f"{key} 기준 섹터 대비 {abs(pd):.0f}% 할인") + elif pd > 15: + parts.append(f"{key} 기준 섹터 대비 {pd:.0f}% 할증") + return " | ".join(parts) if parts else f"{sectorName} 섹터 평균 배수 대비 비교" + + +def _buildRiskAnalysis( + insights: object, + insightDetails: list[InsightDetail], +) -> RiskSection | None: + """distress + anomalies + insight.risk → RiskSection.""" + if insights is None: + return None + + distressSection = None + anomalySection = None + insightRisk = None + + # distress + distress = getattr(insights, "distress", None) + if distress is not None: + axes = [] + for ax in getattr(distress, "axes", []): + axes.append( + { + "name": getattr(ax, "name", ""), + "score": getattr(ax, "score", 0), + "weight": getattr(ax, "weight", 0), + } + ) + distressSection = DistressSection( + level=getattr(distress, "level", ""), + overall=getattr(distress, "overall", 0), + creditGrade=getattr(distress, "creditGrade", ""), + creditDescription=getattr(distress, "creditDescription", ""), + riskFactors=getattr(distress, "riskFactors", []), + cashRunwayMonths=getattr(distress, "cashRunwayMonths", None), + axesSummary=axes, + ) + + # anomalies + anomalies = getattr(insights, "anomalies", None) + if anomalies: + items = [] + critCount = 0 + warnCount = 0 + for a in anomalies: + sev = getattr(a, "severity", "") + items.append( + { + "severity": sev, + "category": getattr(a, "category", ""), + "text": getattr(a, "text", ""), + "value": getattr(a, "value", None), + } + ) + if sev in ("critical", "danger"): + critCount += 1 + elif sev == "warning": + warnCount += 1 + anomalySection = AnomalySection(items=items, criticalCount=critCount, warningCount=warnCount) + + # insight.risk + for d in insightDetails: + if d.area == "risk": + insightRisk = d + break + + # narrative + narrative = _buildRiskNarrative(distressSection, anomalySection) + + hasData = distressSection is not None or anomalySection is not None + if not hasData: + return None + + return RiskSection( + distress=distressSection, + anomalies=anomalySection, + insightRisk=insightRisk, + riskNarrative=narrative, + ) + + +def _buildRiskNarrative( + distress: DistressSection | None, + anomalies: AnomalySection | None, +) -> str: + """종합 리스크 서술.""" + parts = [] + if distress: + if distress.level in ("safe", "watch"): + parts.append(f"부실 위험 낮음 (신용 {distress.creditGrade})") + elif distress.level == "warning": + parts.append(f"부실 주의 필요 (신용 {distress.creditGrade})") + else: + parts.append(f"부실 위험 높음 (신용 {distress.creditGrade}, 종합 {distress.overall:.0f}/100)") + + if anomalies: + total = anomalies.criticalCount + anomalies.warningCount + if anomalies.criticalCount > 0: + parts.append(f"심각 이상치 {anomalies.criticalCount}건 감지") + elif total > 0: + parts.append(f"이상치 {total}건 감지 (경고 수준)") + else: + parts.append("이상치 미감지") + + return " | ".join(parts) diff --git a/src/dartlab/analysis/financial/research/quality.py b/src/dartlab/analysis/financial/research/quality.py new file mode 100644 index 0000000000000000000000000000000000000000..04061ea8be6bb9259fc14a5e19ba46c47d5f1cc0 --- /dev/null +++ b/src/dartlab/analysis/financial/research/quality.py @@ -0,0 +1,35 @@ +"""데이터 커버리지/신뢰도 점수.""" + +from __future__ import annotations + + +def calcCoverageScore( + *, + hasFinance: bool = False, + hasDocs: bool = False, + hasInsight: bool = False, + hasMarket: bool = False, + hasValuation: bool = False, + hasForecast: bool = False, + hasEsg: bool = False, + hasSectorKpis: bool = False, + hasRisk: bool = False, + hasPeer: bool = False, + hasNarrative: bool = False, +) -> float: + """0~1 커버리지 점수.""" + weights = { + "finance": (hasFinance, 0.18), + "insight": (hasInsight, 0.15), + "valuation": (hasValuation, 0.13), + "market": (hasMarket, 0.10), + "forecast": (hasForecast, 0.08), + "risk": (hasRisk, 0.08), + "peer": (hasPeer, 0.06), + "docs": (hasDocs, 0.06), + "esg": (hasEsg, 0.05), + "sectorKpis": (hasSectorKpis, 0.05), + "narrative": (hasNarrative, 0.06), + } + score = sum(w for available, w in weights.values() if available) + return round(min(score, 1.0), 2) diff --git a/src/dartlab/analysis/financial/research/scoring.py b/src/dartlab/analysis/financial/research/scoring.py new file mode 100644 index 0000000000000000000000000000000000000000..ab6452d6b377f016fae3ac4e559c23653f1bb3eb --- /dev/null +++ b/src/dartlab/analysis/financial/research/scoring.py @@ -0,0 +1,470 @@ +"""정량 스코어링 프레임워크. + +Piotroski F-Score, Magic Formula, QMJ, Lynch Fair Value, +Buffett Owner Earnings, DuPont 3-factor. +모든 함수는 연간 시계열(buildAnnual 결과)을 입력으로 받는다. +""" + +from __future__ import annotations + +import math + +from dartlab.analysis.financial.research.types import ( + DuPontResult, + LynchFairValue, + MagicFormulaScore, + PiotroskiScore, + QmjScore, + QuantScores, +) + + +def _val(series: dict, sjDiv: str, snakeId: str, idx: int) -> float | None: + """시계열에서 특정 인덱스 값.""" + vals = series.get(sjDiv, {}).get(snakeId, []) + if 0 <= idx < len(vals): + return vals[idx] + return None + + +def _latest(series: dict, sjDiv: str, snakeId: str) -> float | None: + """최신 non-null 값.""" + vals = series.get(sjDiv, {}).get(snakeId, []) + for v in reversed(vals): + if v is not None: + return v + return None + + +def _latestTwo(series: dict, sjDiv: str, snakeId: str) -> tuple[float | None, float | None]: + """최근 2개 non-null (latest, prev).""" + vals = series.get(sjDiv, {}).get(snakeId, []) + found: list[float] = [] + for v in reversed(vals): + if v is not None: + found.append(v) + if len(found) == 2: + break + if len(found) == 2: + return found[0], found[1] + if len(found) == 1: + return found[0], None + return None, None + + +# ══════════════════════════════════════ +# Piotroski F-Score (0-9) +# ══════════════════════════════════════ + + +def calcPiotroski( + aSeries: dict[str, dict[str, list[float | None]]], +) -> PiotroskiScore: + """Piotroski F-Score 9-signal.""" + components: dict[str, bool] = {} + + # --- 수익성 (4 signals) --- + ni = _latest(aSeries, "IS", "net_profit") + ta = _latest(aSeries, "BS", "total_assets") + taPrev = _latestTwo(aSeries, "BS", "total_assets")[1] + ocf = _latest(aSeries, "CF", "operating_cashflow") + + roa = ni / ta if ni is not None and ta and ta > 0 else None + roaPrev = None + niPrev = _latestTwo(aSeries, "IS", "net_profit")[1] + if niPrev is not None and taPrev and taPrev > 0: + roaPrev = niPrev / taPrev + + # F1: ROA > 0 + components["roaPositive"] = roa is not None and roa > 0 + # F2: Operating CF > 0 + components["ocfPositive"] = ocf is not None and ocf > 0 + # F3: ROA increasing + components["roaIncreasing"] = roa is not None and roaPrev is not None and roa > roaPrev + # F4: CF > NI (accrual quality) + components["cfGtNi"] = ocf is not None and ni is not None and ocf > ni + + # --- 건전성 (3 signals) --- + ltd = _latest(aSeries, "BS", "long_term_borrowings") + ltdPrev = _latestTwo(aSeries, "BS", "long_term_borrowings")[1] + # fallback: total_liabilities + if ltd is None: + ltd = _latest(aSeries, "BS", "total_liabilities") + ltdPrev = _latestTwo(aSeries, "BS", "total_liabilities")[1] + + ca = _latest(aSeries, "BS", "current_assets") + cl = _latest(aSeries, "BS", "current_liabilities") + caPrev = _latestTwo(aSeries, "BS", "current_assets")[1] + clPrev = _latestTwo(aSeries, "BS", "current_liabilities")[1] + + cr = ca / cl if ca is not None and cl and cl > 0 else None + crPrev = caPrev / clPrev if caPrev is not None and clPrev and clPrev > 0 else None + + # F5: Long-term debt decreasing + components["debtDecreasing"] = ltd is not None and ltdPrev is not None and ltd <= ltdPrev + # F6: Current ratio increasing + components["currentRatioUp"] = cr is not None and crPrev is not None and cr > crPrev + # F7: No new shares issued (equity not diluted) + eq = _latest(aSeries, "BS", "total_stockholders_equity") + eqPrev = _latestTwo(aSeries, "BS", "total_stockholders_equity")[1] + shares = _latest(aSeries, "BS", "issued_shares") + sharesPrev = _latestTwo(aSeries, "BS", "issued_shares")[1] + if shares is not None and sharesPrev is not None: + components["noNewShares"] = shares <= sharesPrev + elif eq is not None and eqPrev is not None: + # 발행주식수 없으면 자본 변동으로 근사 + components["noNewShares"] = True # conservative + else: + components["noNewShares"] = True + + # --- 효율성 (2 signals) --- + gp = _latest(aSeries, "IS", "gross_profit") + gpPrev = _latestTwo(aSeries, "IS", "gross_profit")[1] + sales = _latest(aSeries, "IS", "sales") + salesPrev = _latestTwo(aSeries, "IS", "sales")[1] + + gm = gp / sales if gp is not None and sales and sales > 0 else None + gmPrev = gpPrev / salesPrev if gpPrev is not None and salesPrev and salesPrev > 0 else None + + # F8: Gross margin increasing + components["grossMarginUp"] = gm is not None and gmPrev is not None and gm > gmPrev + # F9: Asset turnover increasing + at = sales / ta if sales is not None and ta and ta > 0 else None + atPrev = salesPrev / taPrev if salesPrev is not None and taPrev and taPrev > 0 else None + components["assetTurnoverUp"] = at is not None and atPrev is not None and at > atPrev + + total = sum(1 for v in components.values() if v) + if total >= 7: + interp = "strong" + elif total >= 4: + interp = "moderate" + else: + interp = "weak" + + return PiotroskiScore(total=total, components=components, interpretation=interp) + + +# ══════════════════════════════════════ +# Magic Formula (Greenblatt) +# ══════════════════════════════════════ + + +def calcMagicFormula( + aSeries: dict[str, dict[str, list[float | None]]], + currentPrice: float | None = None, + sharesOutstanding: float | None = None, +) -> MagicFormulaScore: + """ROIC + Earnings Yield.""" + op = _latest(aSeries, "IS", "operating_profit") + ta = _latest(aSeries, "BS", "total_assets") + _latest(aSeries, "BS", "current_assets") + cl = _latest(aSeries, "BS", "current_liabilities") + cash = _latest(aSeries, "BS", "cash_and_cash_equivalents") + + # ROIC = EBIT / (Net Working Capital + Net Fixed Assets) + # 근사: ROIC = operating_profit / (total_assets - current_liabilities - cash) + roic = None + if op is not None and ta is not None: + investedCapital = ta - (cl or 0) - (cash or 0) + if investedCapital > 0: + roic = (op / investedCapital) * 100 + + # Earnings Yield = EBIT / EV + # 근사: EV = market_cap + debt - cash + ey = None + if op is not None and currentPrice and sharesOutstanding: + marketCap = currentPrice * sharesOutstanding + debt = _latest(aSeries, "BS", "total_liabilities") or 0 + ev = marketCap + debt - (cash or 0) + if ev > 0: + ey = (op / ev) * 100 + + return MagicFormulaScore(roic=_round(roic), earningsYield=_round(ey)) + + +# ══════════════════════════════════════ +# QMJ (Quality Minus Junk) +# ══════════════════════════════════════ + + +def calcQmj( + aSeries: dict[str, dict[str, list[float | None]]], + aYears: list[str], +) -> QmjScore: + """AQR Quality Minus Junk 4-pillar.""" + # --- Profitability --- + ni = _latest(aSeries, "IS", "net_profit") + eq = _latest(aSeries, "BS", "total_stockholders_equity") + ta = _latest(aSeries, "BS", "total_assets") + sales = _latest(aSeries, "IS", "sales") + gp = _latest(aSeries, "IS", "gross_profit") + ocf = _latest(aSeries, "CF", "operating_cashflow") + + roe = ni / eq if ni is not None and eq and eq > 0 else None + roa = ni / ta if ni is not None and ta and ta > 0 else None + gm = gp / sales if gp is not None and sales and sales > 0 else None + cfoa = ocf / ta if ocf is not None and ta and ta > 0 else None + + profScores = [x for x in [roe, roa, gm, cfoa] if x is not None] + profitability = sum(profScores) / len(profScores) if profScores else None + + # --- Growth (5Y 성장률 평균) --- + salesList = aSeries.get("IS", {}).get("sales", []) + validSales = [v for v in salesList if v is not None and v > 0] + growth = None + if len(validSales) >= 3: + n = len(validSales) - 1 + growth = ((validSales[-1] / validSales[0]) ** (1 / n) - 1) if n > 0 else None + + # --- Safety (부채비율 역수 + 유동비율) --- + tl = _latest(aSeries, "BS", "total_liabilities") + cl = _latest(aSeries, "BS", "current_liabilities") + ca = _latest(aSeries, "BS", "current_assets") + debtRatio = tl / ta if tl is not None and ta and ta > 0 else None + crRatio = ca / cl if ca is not None and cl and cl > 0 else None + safetyScores = [] + if debtRatio is not None: + safetyScores.append(1 - debtRatio) # 낮을수록 안전 + if crRatio is not None: + safetyScores.append(min(crRatio / 2, 1)) # 정규화 + safety = sum(safetyScores) / len(safetyScores) if safetyScores else None + + # --- Payout --- + div = _latest(aSeries, "CF", "dividends_paid") + payout = None + if div is not None and ni is not None and ni > 0: + payout = abs(div) / ni + + # composite + pillars = [profitability, growth, safety, payout] + validPillars = [p for p in pillars if p is not None] + composite = sum(validPillars) / len(validPillars) if validPillars else None + + return QmjScore( + profitability=_round(profitability), + growth=_round(growth), + safety=_round(safety), + payout=_round(payout), + composite=_round(composite), + ) + + +# ══════════════════════════════════════ +# Lynch Fair Value +# ══════════════════════════════════════ + + +def calcLynchFairValue( + aSeries: dict[str, dict[str, list[float | None]]], + currentPrice: float | None = None, + sharesOutstanding: float | None = None, +) -> LynchFairValue: + """Peter Lynch: Fair Value = EPS Growth Rate × EPS.""" + niList = aSeries.get("IS", {}).get("net_profit", []) + validNi = [(i, v) for i, v in enumerate(niList) if v is not None and v > 0] + + if len(validNi) < 3 or not sharesOutstanding or sharesOutstanding <= 0: + return LynchFairValue() + + latestNi = validNi[-1][1] + oldestNi = validNi[0][1] + nYears = validNi[-1][0] - validNi[0][0] + if nYears <= 0 or oldestNi <= 0: + return LynchFairValue() + + growthRate = ((latestNi / oldestNi) ** (1 / nYears) - 1) * 100 + eps = latestNi / sharesOutstanding + fairValue = growthRate * eps if growthRate > 0 else None + + pegRatio = None + if currentPrice and eps > 0 and growthRate > 0: + per = currentPrice / eps + pegRatio = per / growthRate + + signal = None + if fairValue is not None and currentPrice: + ratio = currentPrice / fairValue + if ratio < 0.8: + signal = "undervalued" + elif ratio > 1.2: + signal = "overvalued" + else: + signal = "fair" + + return LynchFairValue( + earningsGrowthRate=_round(growthRate), + fairValue=_round(fairValue), + currentPrice=currentPrice, + pegRatio=_round(pegRatio), + signal=signal, + ) + + +# ══════════════════════════════════════ +# Buffett Owner Earnings +# ══════════════════════════════════════ + + +def calcBuffettOwnerEarnings( + aSeries: dict[str, dict[str, list[float | None]]], +) -> float | None: + """Buffett Owner Earnings = NI + D&A - maintenance CAPEX.""" + ni = _latest(aSeries, "IS", "net_profit") + # depreciation 근사: operating_profit - ebit가 아니라 CF에서 D&A 추출 + ocf = _latest(aSeries, "CF", "operating_cashflow") + capex = _latest(aSeries, "CF", "purchase_of_property_plant_and_equipment") + + if ni is None or ocf is None: + return None + + # Owner Earnings ≈ OCF - maintenance CAPEX + # CAPEX는 음수일 수 있음 + capexAbs = abs(capex) if capex is not None else 0 + return _round(ocf - capexAbs * 0.7) # 유지보수 CAPEX ≈ 70% + + +# ══════════════════════════════════════ +# DuPont 3-Factor +# ══════════════════════════════════════ + + +def calcDuPont( + aSeries: dict[str, dict[str, list[float | None]]], + aYears: list[str], +) -> DuPontResult: + """DuPont 5-factor 분해: ROE = 세금부담 × 이자부담 × OPM × 회전율 × 레버리지.""" + niList = aSeries.get("IS", {}).get("net_profit", []) + salesList = aSeries.get("IS", {}).get("sales", []) + taList = aSeries.get("BS", {}).get("total_assets", []) + eqList = aSeries.get("BS", {}).get("total_stockholders_equity", []) or aSeries.get("BS", {}).get("total_equity", []) + opList = aSeries.get("IS", {}).get("operating_profit", []) + ebtList = aSeries.get("IS", {}).get("income_before_tax", []) or aSeries.get("IS", {}).get("profit_before_tax", []) + clList = aSeries.get("BS", {}).get("current_liabilities", []) + cashList = aSeries.get("BS", {}).get("cash_and_cash_equivalents", []) + + margins: list[float | None] = [] + turnovers: list[float | None] = [] + leverages: list[float | None] = [] + roes: list[float | None] = [] + periods: list[str] = [] + # 5-factor 확장 + taxBurdens: list[float | None] = [] + interestBurdens: list[float | None] = [] + opMargins: list[float | None] = [] + roicList: list[float | None] = [] + + n = min(len(niList), len(salesList), len(taList), len(eqList), len(aYears)) + start = max(0, n - 5) # 최근 5년 + for i in range(start, n): + ni = niList[i] + s = salesList[i] + ta = taList[i] + eq = eqList[i] + op = opList[i] if i < len(opList) else None + ebt = ebtList[i] if i < len(ebtList) else None + cl = clList[i] if i < len(clList) else None + cash = cashList[i] if i < len(cashList) else None + + margin = ni / s if ni is not None and s and s > 0 else None + turnover = s / ta if s is not None and ta and ta > 0 else None + lever = ta / eq if ta is not None and eq and eq > 0 else None + roe = ni / eq if ni is not None and eq and eq > 0 else None + + # 5-factor: taxBurden = NI/EBT, interestBurden = EBT/EBIT(=OP) + tb = ni / ebt if ni is not None and ebt is not None and ebt != 0 else None + ib = ebt / op if ebt is not None and op is not None and op != 0 else None + opm = op / s if op is not None and s and s > 0 else None + + # ROIC = NOPAT / IC, IC = TA - CL - Cash (근사) + roic = None + if op is not None and ta is not None: + ic = ta - (cl or 0) - (cash or 0) + if ic > 0: + nopat = op * (1 - 0.22) # 법인세율 22% 근사 + roic = nopat / ic + + margins.append(_round(margin)) + turnovers.append(_round(turnover)) + leverages.append(_round(lever)) + roes.append(_round(roe)) + taxBurdens.append(_round(tb)) + interestBurdens.append(_round(ib)) + opMargins.append(_round(opm)) + roicList.append(_round(roic)) + periods.append(aYears[i]) + + driver = _identifyDriver(margins, turnovers, leverages) + + return DuPontResult( + netMargin=margins, + assetTurnover=turnovers, + equityMultiplier=leverages, + roe=roes, + periods=periods, + driver=driver, + taxBurden=taxBurdens, + interestBurden=interestBurdens, + operatingMargin=opMargins, + roic=roicList, + ) + + +def _identifyDriver( + margins: list[float | None], + turnovers: list[float | None], + leverages: list[float | None], +) -> str: + """ROE 변동의 주요 동인 식별.""" + + def _cv(vals: list[float | None]) -> float: + valid = [v for v in vals if v is not None] + if len(valid) < 2: + return 0 + mean = sum(valid) / len(valid) + if mean == 0: + return 0 + variance = sum((v - mean) ** 2 for v in valid) / len(valid) + return math.sqrt(variance) / abs(mean) + + cvMargin = _cv(margins) + cvTurnover = _cv(turnovers) + cvLeverage = _cv(leverages) + + maxCv = max(cvMargin, cvTurnover, cvLeverage) + if maxCv == 0: + return "balanced" + if cvMargin == maxCv: + return "margin" + if cvTurnover == maxCv: + return "turnover" + return "leverage" + + +# ══════════════════════════════════════ +# 종합 +# ══════════════════════════════════════ + + +def calcAllScores( + aSeries: dict[str, dict[str, list[float | None]]], + aYears: list[str], + *, + currentPrice: float | None = None, + sharesOutstanding: float | None = None, +) -> QuantScores: + """모든 정량 스코어 한 번에 계산.""" + return QuantScores( + piotroski=calcPiotroski(aSeries), + magicFormula=calcMagicFormula(aSeries, currentPrice, sharesOutstanding), + qmj=calcQmj(aSeries, aYears), + lynchFairValue=calcLynchFairValue(aSeries, currentPrice, sharesOutstanding), + buffettOwnerEarnings=calcBuffettOwnerEarnings(aSeries), + dupont=calcDuPont(aSeries, aYears), + ) + + +def _round(v: float | None, ndigits: int = 4) -> float | None: + """None-safe round.""" + if v is None: + return None + return round(v, ndigits) diff --git a/src/dartlab/analysis/financial/research/sectorKpi.py b/src/dartlab/analysis/financial/research/sectorKpi.py new file mode 100644 index 0000000000000000000000000000000000000000..52e65bd680a56017806af4ed5aaaeeb64c0bda0e --- /dev/null +++ b/src/dartlab/analysis/financial/research/sectorKpi.py @@ -0,0 +1,129 @@ +"""섹터별 특화 KPI 레지스트리.""" + +from __future__ import annotations + +import logging + +from dartlab.analysis.financial.research.types import SectorKpi, SectorKpis +from dartlab.core.sector.types import Sector + +_log = logging.getLogger(__name__) + + +def _extractFromRatios(ratios: object, attr: str) -> float | None: + """RatioResult에서 속성 추출.""" + if ratios is None: + return None + return getattr(ratios, attr, None) + + +def _extractFromSeries( + aSeries: dict, sjDiv: str, snakeId: str, denomSjDiv: str | None = None, denomSnakeId: str | None = None +) -> float | None: + """시계열에서 최신값 또는 비율 추출.""" + vals = aSeries.get(sjDiv, {}).get(snakeId, []) + num = None + for v in reversed(vals): + if v is not None: + num = v + break + if num is None: + return None + if denomSjDiv is None: + return num + denomVals = aSeries.get(denomSjDiv, {}).get(denomSnakeId or "", []) + denom = None + for v in reversed(denomVals): + if v is not None: + denom = v + break + if denom is None or denom == 0: + return None + return num / denom + + +# ── 섹터별 KPI 정의 ── + +_FINANCIALS_KPIS = [ + ("interestMargin", "순이자마진", "IS", "interest_income", "BS", "total_assets", "%"), + ("costToIncome", "판관비율", "IS", "selling_and_admin_expense", "IS", "operating_revenue", "%"), +] + +_HEALTHCARE_KPIS = [ + ("rndIntensity", "R&D/매출", "IS", "research_and_development_expense", "IS", "sales", "%"), +] + +_IT_KPIS = [ + ("rndIntensity", "R&D/매출", "IS", "research_and_development_expense", "IS", "sales", "%"), +] + + +def _buildKpisFromDefs( + defs: list[tuple[str, str, str, str, str | None, str | None, str]], + aSeries: dict, + benchmarks: dict[str, float], +) -> list[SectorKpi]: + """KPI 정의 → SectorKpi 리스트.""" + kpis: list[SectorKpi] = [] + for name, label, numSj, numId, denomSj, denomId, unit in defs: + value = _extractFromSeries(aSeries, numSj, numId, denomSj, denomId) + if value is not None and unit == "%": + value = round(value * 100, 2) + bench = benchmarks.get(name) + assessment = "" + if value is not None and bench is not None: + if value > bench * 1.1: + assessment = "good" + elif value < bench * 0.9: + assessment = "bad" + else: + assessment = "neutral" + kpis.append( + SectorKpi( + name=name, + label=label, + value=round(value, 2) if value is not None else None, + benchmark=bench, + unit=unit, + assessment=assessment, + ) + ) + return kpis + + +# ── 섹터 벤치마크 (대략적 시장 평균) ── + +_BENCHMARKS: dict[str, dict[str, float]] = { + "FINANCIALS": {"interestMargin": 1.5, "costToIncome": 50.0}, + "HEALTHCARE": {"rndIntensity": 10.0}, + "IT": {"rndIntensity": 8.0}, +} + +_SECTOR_DEFS: dict[str, list] = { + "FINANCIALS": _FINANCIALS_KPIS, + "HEALTHCARE": _HEALTHCARE_KPIS, + "IT": _IT_KPIS, +} + + +def calcSectorKpis( + sector: Sector | None, + aSeries: dict[str, dict[str, list[float | None]]], + ratios: object = None, +) -> SectorKpis | None: + """섹터별 특화 KPI 계산.""" + if sector is None or sector == Sector.UNKNOWN: + return None + + sectorKey = sector.name + defs = _SECTOR_DEFS.get(sectorKey) + if not defs: + return None + + benchmarks = _BENCHMARKS.get(sectorKey, {}) + kpis = _buildKpisFromDefs(defs, aSeries, benchmarks) + + if not kpis: + return None + + return SectorKpis(sectorName=sector.value, kpis=kpis) diff --git a/src/dartlab/analysis/financial/research/spec.py b/src/dartlab/analysis/financial/research/spec.py new file mode 100644 index 0000000000000000000000000000000000000000..7c5bf82ad7becab9e7c770261cde515bab4dc663 --- /dev/null +++ b/src/dartlab/analysis/financial/research/spec.py @@ -0,0 +1,74 @@ +"""research 엔진 스펙 — 코드에서 자동 추출.""" + +from __future__ import annotations + +QUANT_MODELS = { + "piotroski": { + "label": "Piotroski F-Score", + "description": "9개 바이너리 시그널 (수익4 + 건전3 + 효율2)", + "range": "0-9", + }, + "magicFormula": { + "label": "Magic Formula", + "description": "Greenblatt ROIC + Earnings Yield", + }, + "qmj": { + "label": "Quality Minus Junk", + "description": "AQR 4-pillar (수익성, 성장, 안전, 배당)", + }, + "lynchFairValue": { + "label": "Lynch Fair Value", + "description": "EPS CAGR × EPS, PEG ratio", + }, + "buffettOwnerEarnings": { + "label": "Buffett Owner Earnings", + "description": "OCF - 유지보수 CAPEX", + }, + "dupont": { + "label": "DuPont 3-Factor", + "description": "순이익률 × 자산회전율 × 레버리지", + }, +} + +REPORT_SECTIONS = [ + "executive", + "thesis", + "overview", + "sectorKpis", + "financial", + "earningsQuality", + "quantScores", + "marketData", + "forecast", + "insightDetails", + "valuationAnalysis", + "riskAnalysis", + "peerAnalysis", + "narrativeAnalysis", +] + + +def buildSpec() -> dict: + """research 엔진 스펙.""" + return { + "name": "research", + "description": "종합 기업분석 리포트 — 종목코드 하나로 equity research 생성", + "summary": { + "sections": len(REPORT_SECTIONS), + "quantModels": len(QUANT_MODELS), + "method": "c.analysis('research', '종합리포트') 또는 dartlab.analysis('research', '종합리포트', c)", + }, + "detail": { + "sections": REPORT_SECTIONS, + "quantModels": QUANT_MODELS, + "dataSources": [ + "finance (BS/IS/CF/ratios/timeseries)", + "insight (10영역 등급 + 상세 + distress + anomalies)", + "analyst (DCF/DDM/상대가치 밸류에이션)", + "gather (주가/컨센서스/수급/거시)", + "sector (WICS 분류 + 섹터 배수)", + "esg (ESG 공시 분석)", + "forecast (자체 매출 예측)", + ], + }, + } diff --git a/src/dartlab/analysis/financial/research/thesis.py b/src/dartlab/analysis/financial/research/thesis.py new file mode 100644 index 0000000000000000000000000000000000000000..e5c63437b25db936854edb07946825471b8b9feb --- /dev/null +++ b/src/dartlab/analysis/financial/research/thesis.py @@ -0,0 +1,354 @@ +"""투자논거 합성 — insight 수치 기반 bull/bear/catalyst 생성.""" + +from __future__ import annotations + +from dartlab.analysis.financial.research.types import ( + EarningsQuality, + ExecutiveSummary, + ForecastData, + InsightDetail, + InvestmentThesis, + NarrativeAnalysis, + QuantScores, + RiskSection, + ValuationSection, +) + + +def synthesizeThesis( + executive: ExecutiveSummary, + *, + insightDetails: list[InsightDetail] | None = None, + valuationAnalysis: ValuationSection | None = None, + riskAnalysis: RiskSection | None = None, + quantScores: QuantScores | None = None, + earningsQuality: EarningsQuality | None = None, + forecastData: ForecastData | None = None, + narrativeAnalysis: NarrativeAnalysis | None = None, +) -> InvestmentThesis: + """분석 결과를 종합하여 bull/bear case 생성.""" + bull: list[str] = [] + bear: list[str] = [] + catalysts: list[str] = [] + monitoring: list[str] = [] + + # ── insight details 기반 (구체적 수치) ── + if insightDetails: + _addInsightBullBear(insightDetails, bull, bear, catalysts, monitoring) + + # ── valuation 기반 ── + if valuationAnalysis: + _addValuationBullBear(valuationAnalysis, executive, bull, bear, catalysts) + + # ── risk 기반 ── + if riskAnalysis: + _addRiskBear(riskAnalysis, bear, monitoring) + + # ── forecast 기반 ── + if forecastData: + _addForecastBullBear(forecastData, bull, catalysts) + + # ── Piotroski / Lynch (fallback — insight 없을 때 보조) ── + if quantScores: + _addQuantBullBear(quantScores, bull, bear, monitoring) + + # ── earnings quality ── + if earningsQuality: + if earningsQuality.assessment == "high": + bull.append("현금흐름 기반 이익의 질 양호") + elif earningsQuality.assessment in ("low", "questionable"): + bear.append(f"이익의 질 {earningsQuality.assessment} — 발생주의 비중 과다 주의") + + # ── upside (기본) ── + if executive.upside is not None: + if executive.upside > 0.15: + bull.append(f"컨센서스 대비 상승여력 {executive.upside:+.1%}") + elif executive.upside < -0.15: + bear.append(f"컨센서스 대비 고평가 {executive.upside:+.1%}") + + # ── narrative 교차분석 결과 병합 (v3) ── + if narrativeAnalysis and narrativeAnalysis.paragraphs: + _mergeNarrative(narrativeAnalysis, bull, bear, catalysts) + + # ── deduplicate & limit ── + bull = _dedupe(bull)[:7] + bear = _dedupe(bear)[:7] + catalysts = _dedupe(catalysts)[:5] + monitoring = _dedupe(monitoring)[:5] + + # ── confidence ── + totalSignals = len(bull) + len(bear) + confidence = 0.5 + if totalSignals > 0: + bullRatio = len(bull) / totalSignals + confidence = 0.3 + abs(bullRatio - 0.5) * 1.4 # 0.3~1.0 + + # ── summary narrative (v3: 교차분석 기반) ── + if narrativeAnalysis and narrativeAnalysis.paragraphs: + narrative = _buildNarrativeFromAnalysis(narrativeAnalysis, valuationAnalysis) + else: + narrative = _buildNarrative(executive, bull, bear, valuationAnalysis) + + return InvestmentThesis( + bullCase=bull, + bearCase=bear, + catalysts=catalysts, + monitoringPoints=monitoring, + confidence=round(min(confidence, 1.0), 2), + summaryNarrative=narrative, + ) + + +def _addInsightBullBear( + details: list[InsightDetail], + bull: list[str], + bear: list[str], + catalysts: list[str], + monitoring: list[str], +) -> None: + """insight 10영역의 구체적 수치를 bull/bear에 직접 사용.""" + # 영역별 중요도 (앞쪽일수록 중요) + + for detail in details: + if detail.area in ("risk", "opportunity"): + # risk/opportunity는 종합 플래그이므로 별도 처리 + if detail.area == "opportunity" and detail.grade in ("A", "B"): + for opp in detail.details[:2]: + catalysts.append(opp) + continue + + if detail.grade in ("A", "B"): + for d in detail.details[:2]: + bull.append(d) + for opp in detail.opportunities[:1]: + catalysts.append(opp) + elif detail.grade in ("D", "F"): + for d in detail.details[:2]: + # 등급은 D/F인데 detail 텍스트가 긍정적이면 skip + if _isPositiveText(d): + continue + bear.append(d) + for r in detail.risks[:1]: + monitoring.append(r) + elif detail.grade == "C": + # C등급은 모니터링만 + for r in detail.risks[:1]: + monitoring.append(r) + + +def _addValuationBullBear( + va: ValuationSection, + executive: ExecutiveSummary, + bull: list[str], + bear: list[str], + catalysts: list[str], +) -> None: + """밸류에이션 결과를 bull/bear에 반영.""" + if va.verdict == "저평가": + if va.fairValueRange: + lo, hi = va.fairValueRange + bull.append(f"3가지 밸류에이션 기준 저평가 (적정 {lo:,.0f}~{hi:,.0f}원)") + if va.dcfMos is not None and va.dcfMos > 20: + bull.append(f"DCF 안전마진 {va.dcfMos:.0f}%") + catalysts.append("시장이 내재가치를 반영할 촉매 대기") + elif va.verdict == "고평가": + if va.fairValueRange: + lo, hi = va.fairValueRange + bear.append(f"3가지 밸류에이션 기준 고평가 (적정 {lo:,.0f}~{hi:,.0f}원)") + + +def _addRiskBear( + ra: RiskSection, + bear: list[str], + monitoring: list[str], +) -> None: + """리스크 분석 결과를 bear/monitoring에 반영.""" + if ra.distress: + d = ra.distress + if d.level in ("danger", "critical"): + bear.append(f"부실 위험 {d.level} (신용등급 {d.creditGrade})") + for rf in d.riskFactors[:2]: + bear.append(rf) + elif d.level == "warning": + monitoring.append(f"부실 주의 (신용 {d.creditGrade}, 종합 {d.overall:.0f}/100)") + if d.cashRunwayMonths is not None and d.cashRunwayMonths < 18: + bear.append(f"현금소진 {d.cashRunwayMonths:.0f}개월 이내 예상") + + if ra.anomalies: + for item in ra.anomalies.items: + sev = item.get("severity", "") + text = item.get("text", "") + if sev in ("critical", "danger"): + bear.append(f"이상치: {text}") + elif sev == "warning" and len(monitoring) < 5: + monitoring.append(text) + + +def _addForecastBullBear( + fc: ForecastData, + bull: list[str], + catalysts: list[str], +) -> None: + """예측 데이터를 bull/catalyst에 반영.""" + if fc.selfForecast: + sf = fc.selfForecast + gr = sf.get("growthRate") + method = sf.get("method", "") + conf = sf.get("confidence", "") + if gr is not None and gr > 5: + bull.append(f"자체 매출 예측 +{gr:.1f}% ({method})") + if gr is not None and gr > 0: + catalysts.append(f"매출 성장 +{gr:.1f}% 전망 (신뢰도 {conf})") + + if fc.scenarioSummary: + sc = fc.scenarioSummary + bullVal = sc.get("bull") + baseVal = sc.get("base") + if bullVal and baseVal and bullVal > baseVal: + catalysts.append(f"시나리오 상방 {bullVal:,.0f}원 (기준 {baseVal:,.0f}원)") + + +def _addQuantBullBear( + qs: QuantScores, + bull: list[str], + bear: list[str], + monitoring: list[str], +) -> None: + """정량 스코어 보조 bull/bear.""" + if qs.piotroski: + f = qs.piotroski + if f.total >= 7: + bull.append(f"Piotroski F-Score {f.total}/9 — 펀더멘탈 건전") + elif f.total <= 3: + bear.append(f"Piotroski F-Score {f.total}/9 — 펀더멘탈 취약") + + if qs.lynchFairValue: + lv = qs.lynchFairValue + if lv.signal == "undervalued" and lv.pegRatio is not None: + bull.append(f"PEG {lv.pegRatio:.2f} — 성장 대비 저렴") + elif lv.signal == "overvalued" and lv.pegRatio is not None: + bear.append(f"PEG {lv.pegRatio:.2f} — 성장 대비 비쌈") + + if qs.dupont: + dp = qs.dupont + if dp.driver == "leverage": + monitoring.append("레버리지가 ROE 주도 — 부채비율 추이 주시") + elif dp.driver == "margin": + monitoring.append("순이익률 변동이 ROE 주도 — 마진 추이 주시") + + +def _buildNarrative( + executive: ExecutiveSummary, + bull: list[str], + bear: list[str], + va: ValuationSection | None, +) -> str: + """1-2문장 핵심 투자 요약.""" + parts = [] + + nBull = len(bull) + nBear = len(bear) + if nBull > nBear * 2: + parts.append("긍정적 신호가 압도적으로 우세") + elif nBull > nBear: + parts.append("긍정적 신호 우세") + elif nBear > nBull * 2: + parts.append("부정적 신호가 압도적으로 우세") + elif nBear > nBull: + parts.append("부정적 신호 우세") + else: + parts.append("긍정/부정 신호 균형") + + if va and va.verdict: + parts.append(f"밸류에이션 {va.verdict}") + + if executive.opinion: + parts.append(f"투자의견 {executive.opinion}") + + return " | ".join(parts) + "." + + +def _isPositiveText(text: str) -> bool: + """텍스트가 명백히 긍정적 의미인지 판별.""" + positiveKeywords = ["안전", "양호", "우수", "개선", "충분", "안정", "미미"] + negativeKeywords = ["위험", "부족", "악화", "적자", "과다", "취약"] + hasPositive = any(kw in text for kw in positiveKeywords) + hasNegative = any(kw in text for kw in negativeKeywords) + return hasPositive and not hasNegative + + +def _dedupe(items: list[str]) -> list[str]: + """중복 제거 (순서 유지).""" + seen: set[str] = set() + result: list[str] = [] + for item in items: + if item not in seen: + seen.add(item) + result.append(item) + return result + + +def _mergeNarrative( + na: NarrativeAnalysis, + bull: list[str], + bear: list[str], + catalysts: list[str], +) -> None: + """narrative 교차분석 결과를 bull/bear/catalysts에 병합 (요약만).""" + for p in na.paragraphs: + # Deep Analysis 패널에 full body가 이미 표시되므로 짧은 title만 삽입 + label = p.title or _firstSentence(p.body) + if not label: + continue + if p.severity == "positive": + bull.append(label) + elif p.severity in ("negative", "warning"): + bear.append(label) + for fi in na.forwardImplications: + catalysts.append(fi) + + +def _buildNarrativeFromAnalysis( + na: NarrativeAnalysis, + va: ValuationSection | None, +) -> str: + """교차분석 기반 summaryNarrative 생성.""" + parts: list[str] = [] + positive = [p for p in na.paragraphs if p.severity == "positive"] + negative = [p for p in na.paragraphs if p.severity in ("negative", "warning")] + + if positive: + parts.append(_firstSentence(positive[0].body)) + if negative: + parts.append(_firstSentence(negative[0].body)) + if va and va.verdict: + parts.append(f"밸류에이션 {va.verdict}") + + return " | ".join(parts) + "." if parts else "" + + +def _firstSentence(text: str) -> str: + """본문에서 첫 문장 추출 (숫자 소수점 구분).""" + # ". " 패턴으로 문장 분리 (소수점 ".1%" 등과 구분) + import re + + # 마침표 뒤에 공백 또는 문자열 끝 + sentences = re.split(r"\.\s", text, maxsplit=1) + return sentences[0] if sentences else text + + +def classifyProfile(grades: dict[str, str], upside: float | None) -> str: + """투자 프로파일 분류.""" + gradeValues = {"A": 5, "B": 4, "C": 3, "D": 2, "F": 1} + vals = [gradeValues.get(g, 3) for g in grades.values()] + avg = sum(vals) / len(vals) if vals else 3 + + if avg >= 4.0: + return "premium" + if avg >= 3.5: + return "growth" + if avg >= 2.5: + return "stable" + if avg >= 2.0: + return "caution" + return "distress" diff --git a/src/dartlab/analysis/financial/research/types.py b/src/dartlab/analysis/financial/research/types.py new file mode 100644 index 0000000000000000000000000000000000000000..5841d66f40d7e205a109295228103152ed38fe90 --- /dev/null +++ b/src/dartlab/analysis/financial/research/types.py @@ -0,0 +1,1109 @@ +"""Research 엔진 데이터 타입 — 종합 기업분석 리포트.""" + +from __future__ import annotations + +from dataclasses import asdict, dataclass, field + +# ══════════════════════════════════════ +# 정량 스코어링 +# ══════════════════════════════════════ + + +@dataclass +class PiotroskiScore: + """Piotroski F-Score (0-9).""" + + total: int = 0 + components: dict[str, bool] = field(default_factory=dict) + interpretation: str = "" # "strong" | "moderate" | "weak" + + +@dataclass +class MagicFormulaScore: + """Greenblatt Magic Formula.""" + + roic: float | None = None + earningsYield: float | None = None + + +@dataclass +class QmjScore: + """AQR Quality Minus Junk (4-pillar).""" + + profitability: float | None = None + growth: float | None = None + safety: float | None = None + payout: float | None = None + composite: float | None = None + + +@dataclass +class LynchFairValue: + """Peter Lynch Fair Value.""" + + earningsGrowthRate: float | None = None # 5Y EPS CAGR (%) + fairValue: float | None = None # growthRate * EPS + currentPrice: float | None = None + pegRatio: float | None = None + signal: str | None = None # "undervalued" | "fair" | "overvalued" + + +@dataclass +class DuPontResult: + """DuPont 5-factor 분해.""" + + netMargin: list[float | None] = field(default_factory=list) + assetTurnover: list[float | None] = field(default_factory=list) + equityMultiplier: list[float | None] = field(default_factory=list) + roe: list[float | None] = field(default_factory=list) + periods: list[str] = field(default_factory=list) + driver: str = "" # "margin" | "turnover" | "leverage" | "balanced" + # 5-factor 확장 + taxBurden: list[float | None] = field(default_factory=list) # NI/EBT + interestBurden: list[float | None] = field(default_factory=list) # EBT/EBIT + operatingMargin: list[float | None] = field(default_factory=list) # EBIT/Sales + roic: list[float | None] = field(default_factory=list) # NOPAT/IC + + +@dataclass +class QuantScores: + """정량 스코어링 프레임워크 종합.""" + + piotroski: PiotroskiScore | None = None + magicFormula: MagicFormulaScore | None = None + qmj: QmjScore | None = None + lynchFairValue: LynchFairValue | None = None + buffettOwnerEarnings: float | None = None + dupont: DuPontResult | None = None + + +# ══════════════════════════════════════ +# 리포트 섹션 +# ══════════════════════════════════════ + + +@dataclass +class ResearchMeta: + """리포트 메타데이터.""" + + stockCode: str = "" + corpName: str = "" + generatedAt: str = "" + dataAsOf: str = "" + coverageScore: float = 0.0 # 0~1 + market: str = "KR" + currency: str = "KRW" + warnings: list[str] = field(default_factory=list) + + +@dataclass +class ExecutiveSummary: + """핵심 요약.""" + + opinion: str = "" # "강력매수" ~ "강력매도" + profile: str = "" # "premium" | "growth" | "stable" | "caution" | "distress" + targetPrice: float | None = None + currentPrice: float | None = None + upside: float | None = None + thesis: str = "" + grades: dict[str, str] = field(default_factory=dict) + keyMetrics: list[dict[str, object]] = field(default_factory=list) + + +@dataclass +class InvestmentThesis: + """투자논거.""" + + bullCase: list[str] = field(default_factory=list) + bearCase: list[str] = field(default_factory=list) + catalysts: list[str] = field(default_factory=list) + monitoringPoints: list[str] = field(default_factory=list) + confidence: float = 0.0 + summaryNarrative: str = "" + + +@dataclass +class CompanyOverview: + """기업 개요.""" + + description: str | None = None + sectorName: str | None = None + industryName: str | None = None + newsHeadlines: list[str] = field(default_factory=list) + + +@dataclass +class SectorKpi: + """단일 섹터 KPI.""" + + name: str = "" + label: str = "" + value: float | None = None + benchmark: float | None = None + unit: str = "" + assessment: str = "" # "good" | "neutral" | "bad" + + +@dataclass +class SectorKpis: + """섹터별 특화 KPI.""" + + sectorName: str = "" + kpis: list[SectorKpi] = field(default_factory=list) + + +@dataclass +class BeneishDetail: + """Beneish M-Score 8변수 개별.""" + + dsri: float | None = None # 매출채권지수 + gmi: float | None = None # 매출총이익지수 + aqi: float | None = None # 자산품질지수 + sgi: float | None = None # 매출성장지수 + depi: float | None = None # 감가상각지수 + sgai: float | None = None # 판관비지수 + lvgi: float | None = None # 레버리지지수 + tata: float | None = None # 발생주의비율 + mScore: float | None = None # 종합 M-Score + flagged: list[str] = field(default_factory=list) # 경고 변수명 리스트 + + +@dataclass +class FinancialAnalysis: + """재무 분석.""" + + dupont: DuPontResult | None = None + marginTrends: dict[str, list[float | None]] = field(default_factory=dict) + periods: list[str] = field(default_factory=list) + # BS 요약 시계열 + bsSummary: dict[str, list[float | None]] = field(default_factory=dict) + # CF 요약 시계열 + cfSummary: dict[str, list[float | None]] = field(default_factory=dict) + # 3표 연결 지표 시계열 + crossStatementMetrics: dict[str, list[float | None]] = field(default_factory=dict) + # Common-Size 분석 (Lens 2) + isCommonSize: dict[str, list[float | None]] = field(default_factory=dict) # IS항목/매출 % + bsCommonSize: dict[str, list[float | None]] = field(default_factory=dict) # BS항목/자산 % + + +@dataclass +class EarningsQuality: + """이익의 질.""" + + cfToNi: float | None = None + accrualRatio: float | None = None + ccc: float | None = None # Cash Conversion Cycle (days) + beneishMScore: float | None = None + assessment: str = "" # "high" | "moderate" | "low" | "questionable" + + +@dataclass +class MarketData: + """시장 데이터 요약.""" + + currentPrice: float | None = None + marketCap: float | None = None + per: float | None = None + pbr: float | None = None + dividendYield: float | None = None + high52w: float | None = None + low52w: float | None = None + targetPrice: float | None = None + analystCount: int | None = None + buyRatio: float | None = None + foreignHoldingRatio: float | None = None + baseRate: float | None = None + usdKrw: float | None = None + + +@dataclass +class ForecastData: + """전망 데이터.""" + + revenueConsensus: list[dict[str, object]] = field(default_factory=list) + selfForecast: dict[str, object] | None = None + scenarioSummary: dict[str, object] | None = None + + +# ══════════════════════════════════════ +# 새 섹션 타입 — v2 +# ══════════════════════════════════════ + + +@dataclass +class InsightDetail: + """insight 영역 상세 — 등급 + 근거 수치.""" + + area: str = "" + grade: str = "" + summary: str = "" + details: list[str] = field(default_factory=list) + risks: list[str] = field(default_factory=list) + opportunities: list[str] = field(default_factory=list) + + +@dataclass +class DistressSection: + """부실 리스크 스코어카드.""" + + level: str = "" # safe/watch/warning/danger/critical + overall: float = 0.0 # 0~100 + creditGrade: str = "" # AAA~D + creditDescription: str = "" + riskFactors: list[str] = field(default_factory=list) + cashRunwayMonths: float | None = None + axesSummary: list[dict[str, object]] = field(default_factory=list) + + +@dataclass +class AnomalySection: + """이상치 탐지 결과.""" + + items: list[dict[str, object]] = field(default_factory=list) + criticalCount: int = 0 + warningCount: int = 0 + + +@dataclass +class RiskSection: + """종합 리스크 — distress + anomalies + insight.risk 통합.""" + + distress: DistressSection | None = None + anomalies: AnomalySection | None = None + insightRisk: InsightDetail | None = None + riskNarrative: str = "" + + +@dataclass +class ValuationSection: + """밸류에이션 — DCF/DDM/상대가치 종합.""" + + dcfPerShare: float | None = None + dcfMos: float | None = None # 안전마진 (%) + ddmPerShare: float | None = None + relativePerShare: float | None = None + fairValueRange: tuple[float, float] | None = None + verdict: str = "" # "저평가" | "적정" | "고평가" + methodology: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + + +@dataclass +class PeerSection: + """동종업 비교 — 섹터 배수 기반 (OOM 안전).""" + + sectorName: str = "" + sectorMultiples: dict[str, float] = field(default_factory=dict) + companyMultiples: dict[str, float | None] = field(default_factory=dict) + premiumDiscount: dict[str, float | None] = field(default_factory=dict) + peerNarrative: str = "" + + +# ══════════════════════════════════════ +# Narrative Analysis — v3 +# ══════════════════════════════════════ + + +@dataclass +class NarrativeParagraph: + """단일 교차분석 서술 단위.""" + + dimension: str = "" # "dupont"|"margin"|"growth"|"cashflow"|"efficiency"|"segment"|"sectorRelative" + title: str = "" + body: str = "" # 2-3문장 교차분석 서술 + severity: str = "" # "positive"|"neutral"|"negative"|"warning" + + +@dataclass +class NarrativeAnalysis: + """7차원 교차분석 서술 결과.""" + + paragraphs: list[NarrativeParagraph] = field(default_factory=list) + forwardImplications: list[str] = field(default_factory=list) + crossReferences: list[str] = field(default_factory=list) + + +# ══════════════════════════════════════ +# 렌더링 헬퍼 +# ══════════════════════════════════════ + + +def _fmtNum(v: float | None, suffix: str = "", precision: int = 1) -> str: + """숫자 포맷.""" + if v is None: + return "-" + return f"{v:,.{precision}f}{suffix}" + + +def _fmtBig(v: float | None) -> str: + """자동 단위 포맷 (조/억/만).""" + if v is None: + return "-" + av = abs(v) + if av >= 1e12: + return f"{v / 1e12:,.1f}조" + if av >= 1e8: + return f"{v / 1e8:,.0f}억" + if av >= 1e4: + return f"{v / 1e4:,.0f}만" + return f"{v:,.0f}" + + +def _fmtPrice(v: float | None) -> str: + """주가 포맷.""" + if v is None: + return "-" + return f"{v:,.0f}원" + + +def _opinionColor(opinion: str) -> str: + """투자의견 → rich 색상.""" + m = {"강력매수": "bold green", "매수": "green", "중립": "yellow", "매도": "red", "강력매도": "bold red"} + return m.get(opinion, "white") + + +def _profileBadge(profile: str) -> tuple[str, str]: + """프로파일 → (뱃지, 색상).""" + m = { + "premium": ("★", "bold green"), + "growth": ("▲", "green"), + "stable": ("●", "cyan"), + "caution": ("▼", "yellow"), + "distress": ("✗", "bold red"), + } + return m.get(profile, ("?", "white")) + + +def _assessColor(assessment: str) -> str: + """평가 → 색상.""" + return {"high": "green", "good": "green", "moderate": "yellow", "neutral": "yellow"}.get(assessment, "red") + + +def _distressColor(level: str) -> str: + """부실 수준 → 색상.""" + return {"safe": "green", "watch": "cyan", "warning": "yellow", "danger": "red", "critical": "bold red"}.get( + level, "white" + ) + + +def _verdictColor(verdict: str) -> str: + """밸류에이션 판정 → 색상.""" + return {"저평가": "green", "적정": "yellow", "고평가": "red"}.get(verdict, "white") + + +# ══════════════════════════════════════ +# ResearchResult +# ══════════════════════════════════════ + + +@dataclass +class ResearchResult: + """종합 기업분석 리포트.""" + + # 기존 섹션 + meta: ResearchMeta = field(default_factory=ResearchMeta) + executive: ExecutiveSummary = field(default_factory=ExecutiveSummary) + thesis: InvestmentThesis = field(default_factory=InvestmentThesis) + overview: CompanyOverview | None = None + sectorKpis: SectorKpis | None = None + financial: FinancialAnalysis | None = None + earningsQuality: EarningsQuality | None = None + quantScores: QuantScores | None = None + marketData: MarketData | None = None + forecast: ForecastData | None = None + + # v2 새 섹션 + insightDetails: list[InsightDetail] = field(default_factory=list) + valuationAnalysis: ValuationSection | None = None + riskAnalysis: RiskSection | None = None + peerAnalysis: PeerSection | None = None + + # v3 교차분석 서술 + narrativeAnalysis: NarrativeAnalysis | None = None + + DISCLAIMER: str = "본 분석은 투자 참고용이며 투자 권유가 아닙니다." + + def __repr__(self) -> str: + try: + from rich.console import Console + + console = Console(highlight=False, force_terminal=True) + with console.capture() as capture: + self._richPrint(console) + return capture.get() + except ImportError: + return self.summary() + + def _repr_html_(self) -> str: + """Jupyter / Colab / Marimo HTML 렌더링.""" + try: + from rich.console import Console + + console = Console(record=True, force_jupyter=True, width=100) + self._richPrint(console) + return console.export_html(inline_styles=True) + except ImportError: + return f"
{self.summary()}
" + + def _richPrint(self, console) -> None: + """rich Console에 전체 리포트 출력.""" + from rich.panel import Panel + from rich.table import Table + from rich.text import Text + + name = self.meta.corpName or self.meta.stockCode + ex = self.executive + + # ── 1. Header ── + header = Text() + header.append(f"{name}\n", style="bold white") + header.append(f"생성일 {self.meta.generatedAt[:10] if self.meta.generatedAt else '-'} ") + header.append("커버리지 ") + score = self.meta.coverageScore + filled = int(score * 15) + header.append("█" * filled, style="green") + header.append("░" * (15 - filled), style="dim") + header.append(f" {score:.0%}") + if self.meta.warnings: + header.append(f"\n⚠ {', '.join(self.meta.warnings)}", style="yellow") + console.print(Panel(header, title="[bold]Research Report[/bold]", border_style="blue")) + + # ── 2. Executive Summary ── + execTable = Table(show_header=False, box=None, padding=(0, 2)) + execTable.add_column(style="dim", width=10) + execTable.add_column() + if ex.opinion: + badge, bcolor = _profileBadge(ex.profile) + execTable.add_row( + "투자의견", f"[{_opinionColor(ex.opinion)}]{ex.opinion}[/] [{bcolor}]{badge} {ex.profile}[/]" + ) + if ex.currentPrice is not None: + priceText = f"{ex.currentPrice:,.0f}" + if ex.targetPrice: + priceText += f" → [bold]{ex.targetPrice:,.0f}[/bold]" + if ex.upside is not None: + color = "green" if ex.upside > 0 else "red" + priceText += f" [{color}]({ex.upside:+.1%})[/{color}]" + execTable.add_row("가격", priceText) + if ex.grades: + gradeText = Text() + for k, v in ex.grades.items(): + color = "green" if v in ("A", "B") else "yellow" if v == "C" else "red" + gradeText.append(f" {k}=", style="dim") + gradeText.append(v, style=f"bold {color}") + execTable.add_row("등급", gradeText) + if ex.keyMetrics: + metricText = " | ".join(f"{m['label']} {m['value']}{m.get('unit', '')}" for m in ex.keyMetrics) + execTable.add_row("핵심지표", metricText) + console.print(Panel(execTable, title="[bold]Executive Summary[/bold]", border_style="cyan")) + + # ── 3. Investment Thesis ── + self._renderThesis(console) + + # ── 3.5 Narrative Analysis ── + self._renderNarrative(console) + + # ── 4. Valuation ── + self._renderValuation(console) + + # ── 5. Quant + Earnings Quality ── + self._renderQuantAndQuality(console) + + # ── 6. Risk Analysis ── + self._renderRisk(console) + + # ── 7. Forecast ── + self._renderForecast(console) + + # ── 8. Peer ── + self._renderPeer(console) + + # ── 9. Market Data ── + self._renderMarket(console) + + # ── 10. Financial Trends ── + self._renderFinancial(console) + + # ── 11. Sector KPIs ── + self._renderSectorKpis(console) + + # ── 12. Overview ── + if self.overview and self.overview.description: + from rich.panel import Panel + + console.print(Panel(self.overview.description[:300], title="[bold]Overview[/bold]", border_style="dim")) + + # ── 13. Disclaimer ── + console.print(f"\n[dim italic]{self.DISCLAIMER}[/]") + + def _renderThesis(self, console) -> None: + """Investment Thesis 패널.""" + from rich.panel import Panel + from rich.text import Text + + th = self.thesis + thText = Text() + if th.summaryNarrative: + thText.append(f"{th.summaryNarrative}\n\n", style="bold") + if th.bullCase: + thText.append("Bull Case\n", style="bold green") + for b in th.bullCase: + thText.append(f" + {b}\n", style="green") + if th.bearCase: + thText.append("Bear Case\n", style="bold red") + for b in th.bearCase: + thText.append(f" - {b}\n", style="red") + if th.catalysts: + thText.append("촉매\n", style="bold yellow") + for c in th.catalysts: + thText.append(f" ▸ {c}\n", style="yellow") + if th.monitoringPoints: + thText.append("모니터링\n", style="bold dim") + for m in th.monitoringPoints: + thText.append(f" ◦ {m}\n", style="dim") + thText.append(f"\n확신도 {th.confidence:.0%}", style="bold") + console.print(Panel(thText, title="[bold]Investment Thesis[/bold]", border_style="green")) + + def _renderNarrative(self, console) -> None: + """Narrative Analysis 패널.""" + na = self.narrativeAnalysis + if na is None or not na.paragraphs: + return + from rich.panel import Panel + from rich.text import Text + + nt = Text() + severityStyle = { + "positive": "green", + "negative": "red", + "warning": "yellow", + "neutral": "dim", + } + for p in na.paragraphs: + color = severityStyle.get(p.severity, "white") + icon = {"positive": "▲", "negative": "▼", "warning": "⚠", "neutral": "●"}.get(p.severity, "●") + nt.append(f"{icon} {p.title}\n", style=f"bold {color}") + nt.append(f" {p.body}\n\n") + if na.crossReferences: + nt.append("교차분석\n", style="bold cyan") + for cr in na.crossReferences: + nt.append(f" ◆ {cr}\n", style="cyan") + nt.append("\n") + if na.forwardImplications: + nt.append("전망 시사점\n", style="bold magenta") + for fi in na.forwardImplications: + nt.append(f" → {fi}\n", style="magenta") + console.print(Panel(nt, title="[bold]Deep Analysis[/bold]", border_style="bright_blue")) + + def _renderValuation(self, console) -> None: + """Valuation 패널.""" + va = self.valuationAnalysis + if va is None: + return + if ( + va.dcfPerShare is None + and va.ddmPerShare is None + and va.relativePerShare is None + and va.fairValueRange is None + ): + return + from rich.panel import Panel + from rich.table import Table + + vt = Table(show_header=True, box=None, padding=(0, 2)) + vt.add_column("방법론") + vt.add_column("적정가", justify="right") + vt.add_column("비고") + if va.dcfPerShare is not None: + mos = f"안전마진 {va.dcfMos:.0f}%" if va.dcfMos is not None else "" + vt.add_row("DCF", _fmtPrice(va.dcfPerShare), mos) + if va.ddmPerShare is not None: + vt.add_row("DDM (배당)", _fmtPrice(va.ddmPerShare), "") + if va.relativePerShare is not None: + vt.add_row("상대가치", _fmtPrice(va.relativePerShare), "섹터 배수 기반") + if va.fairValueRange: + lo, hi = va.fairValueRange + color = _verdictColor(va.verdict) + vt.add_row( + "[bold]종합[/bold]", + f"[bold]{lo:,.0f} ~ {hi:,.0f}원[/bold]", + f"[{color}]{va.verdict}[/{color}]", + ) + for m in va.methodology: + vt.add_row("", "", f"[dim]{m}[/dim]") + for w in va.warnings: + vt.add_row("", "", f"[yellow]⚠ {w}[/yellow]") + console.print(Panel(vt, title="[bold]Valuation[/bold]", border_style="magenta")) + + def _renderQuantAndQuality(self, console) -> None: + """Quant Scores + Earnings Quality 패널 (side by side).""" + from rich.columns import Columns + from rich.panel import Panel + from rich.table import Table + + panels = [] + if self.quantScores: + qt = Table(show_header=False, box=None, padding=(0, 1)) + qt.add_column(style="dim", width=12) + qt.add_column() + qs = self.quantScores + if qs.piotroski: + p = qs.piotroski + bar = "[green]●[/]" * p.total + "[dim]○[/]" * (9 - p.total) + qt.add_row("Piotroski", f"{bar} {p.total}/9 ({p.interpretation})") + if qs.magicFormula: + mf = qs.magicFormula + parts = [] + if mf.roic is not None: + parts.append(f"ROIC {mf.roic:.1f}%") + if mf.earningsYield is not None: + parts.append(f"EY {mf.earningsYield:.1f}%") + qt.add_row("Magic Formula", " | ".join(parts) if parts else "-") + if qs.qmj and qs.qmj.composite is not None: + q = qs.qmj + qt.add_row( + "QMJ", + f"{q.composite:.2f} (P{q.profitability:.1f} G{q.growth:.1f} S{q.safety:.1f})", + ) + if qs.lynchFairValue: + lv = qs.lynchFairValue + sig = { + "undervalued": "[green]저평가[/]", + "overvalued": "[red]고평가[/]", + "fair": "[yellow]적정[/]", + }.get(lv.signal or "", "") + parts = [] + if lv.fairValue is not None: + parts.append(f"적정 {lv.fairValue:,.0f}") + if lv.pegRatio is not None: + parts.append(f"PEG {lv.pegRatio:.2f}") + parts.append(sig) + qt.add_row("Lynch", " ".join(parts)) + if qs.buffettOwnerEarnings is not None: + qt.add_row("Buffett OE", _fmtBig(qs.buffettOwnerEarnings)) + if qs.dupont: + qt.add_row("DuPont", f"주도: [bold]{qs.dupont.driver}[/bold]") + panels.append(Panel(qt, title="[bold]Quant Scores[/bold]", border_style="magenta")) + + if self.earningsQuality: + eq = self.earningsQuality + et = Table(show_header=False, box=None, padding=(0, 1)) + et.add_column(style="dim", width=10) + et.add_column() + et.add_row("평가", f"[{_assessColor(eq.assessment)}]{eq.assessment}[/]") + if eq.cfToNi is not None: + et.add_row("CF/NI", _fmtNum(eq.cfToNi, precision=2)) + if eq.accrualRatio is not None: + et.add_row("Accrual", _fmtNum(eq.accrualRatio, precision=4)) + if eq.ccc is not None: + et.add_row("CCC", f"{eq.ccc:.0f}일") + if eq.beneishMScore is not None: + color = "green" if eq.beneishMScore < -2.22 else "red" + et.add_row("Beneish M", f"[{color}]{eq.beneishMScore:.2f}[/]") + panels.append(Panel(et, title="[bold]이익의 질[/bold]", border_style="magenta")) + + if panels: + console.print(Columns(panels, equal=True, expand=True)) + + def _renderRisk(self, console) -> None: + """Risk Analysis 패널.""" + ra = self.riskAnalysis + if ra is None: + return + from rich.panel import Panel + from rich.text import Text + + riskText = Text() + if ra.distress: + d = ra.distress + color = _distressColor(d.level) + riskText.append("부실 위험: ", style="dim") + riskText.append(f"{d.level.upper()}", style=f"bold {color}") + riskText.append(f" (종합 {d.overall:.0f}/100, 신용 {d.creditGrade})\n") + if d.cashRunwayMonths is not None: + riskText.append(f"현금소진 예상: {d.cashRunwayMonths:.0f}개월\n", style="yellow") + for rf in d.riskFactors[:3]: + riskText.append(f" ▸ {rf}\n", style="dim") + if ra.anomalies and ra.anomalies.items: + a = ra.anomalies + riskText.append("\n이상치: ", style="dim") + riskText.append(f"Critical {a.criticalCount}", style="bold red") + riskText.append(f" / Warning {a.warningCount}\n", style="yellow") + for item in a.items[:4]: + sev = item.get("severity", "") + color = "red" if sev in ("critical", "danger") else "yellow" + riskText.append(f" ● {item.get('text', '')}\n", style=color) + if ra.riskNarrative: + riskText.append(f"\n{ra.riskNarrative}", style="italic") + console.print(Panel(riskText, title="[bold]Risk Analysis[/bold]", border_style="red")) + + def _renderForecast(self, console) -> None: + """Forecast 패널.""" + fc = self.forecast + if fc is None: + return + from rich.panel import Panel + from rich.table import Table + + ft = Table(show_header=True, box=None, padding=(0, 1)) + ft.add_column("연도", style="dim") + ft.add_column("매출", justify="right") + ft.add_column("영업이익", justify="right") + ft.add_column("EPS", justify="right") + for rc in fc.revenueConsensus: + rev = rc.get("revenueEst") + op = rc.get("operatingProfitEst") + ft.add_row( + str(rc.get("fiscalYear", "?")), + _fmtBig(rev * 1e8 if rev else None), + _fmtBig(op * 1e8 if op else None), + _fmtNum(rc.get("epsEst"), "원", precision=0) if rc.get("epsEst") else "-", + ) + if fc.selfForecast: + sf = fc.selfForecast + method = sf.get("method", "") + gr = sf.get("growthRate") + conf = sf.get("confidence", "") + if gr is not None: + ft.add_row("", f"[dim]자체예측 성장 {gr:.1f}% ({method}, {conf})[/dim]", "", "") + if fc.scenarioSummary: + sc = fc.scenarioSummary + base = sc.get("base") + bull = sc.get("bull") + bear = sc.get("bear") + if base is not None: + ft.add_row( + "", + f"[dim]시나리오 Base {_fmtPrice(base)} / Bull {_fmtPrice(bull)} / Bear {_fmtPrice(bear)}[/dim]", + "", + "", + ) + console.print(Panel(ft, title="[bold]Forecast[/bold]", border_style="blue")) + + def _renderPeer(self, console) -> None: + """Peer Comparison 패널.""" + pa = self.peerAnalysis + if pa is None: + return + hasCompanyData = any(v is not None for v in pa.companyMultiples.values()) + if not hasCompanyData and not pa.sectorMultiples: + return + from rich.panel import Panel + from rich.table import Table + + pt = Table(show_header=True, box=None, padding=(0, 2)) + pt.add_column("배수") + pt.add_column("기업", justify="right") + pt.add_column("섹터", justify="right", style="dim") + pt.add_column("할인/할증", justify="right") + for key in ["PER", "PBR", "EV/EBITDA"]: + cv = pa.companyMultiples.get(key) + sv = pa.sectorMultiples.get(key) + pd = pa.premiumDiscount.get(key) + cvText = f"{cv:.1f}배" if cv is not None else "-" + svText = f"{sv:.1f}배" if sv is not None else "-" + if pd is not None: + color = "green" if pd < 0 else "red" + pdText = f"[{color}]{pd:+.0f}%[/{color}]" + else: + pdText = "-" + pt.add_row(key, cvText, svText, pdText) + if pa.peerNarrative: + pt.add_row("", f"[dim]{pa.peerNarrative}[/dim]", "", "") + console.print(Panel(pt, title=f"[bold]Peer — {pa.sectorName}[/bold]", border_style="yellow")) + + def _renderMarket(self, console) -> None: + """Market Data 패널.""" + md = self.marketData + if md is None: + return + hasData = (md.marketCap and md.marketCap > 0) or md.per is not None or md.pbr is not None + if not hasData: + return + from rich.panel import Panel + from rich.table import Table + + mt = Table(show_header=False, box=None, padding=(0, 1)) + mt.add_column(style="dim", width=10) + mt.add_column() + if md.marketCap and md.marketCap > 0: + mt.add_row("시가총액", _fmtBig(md.marketCap)) + if md.per is not None: + mt.add_row("PER", _fmtNum(md.per, "배")) + if md.pbr is not None: + mt.add_row("PBR", _fmtNum(md.pbr, "배", precision=2)) + if md.dividendYield is not None: + mt.add_row("배당률", _fmtNum(md.dividendYield, "%")) + if md.high52w and md.low52w and md.high52w > 0: + mt.add_row("52주", f"{md.low52w:,.0f} ~ {md.high52w:,.0f}") + if md.foreignHoldingRatio is not None: + mt.add_row("외인보유", _fmtNum(md.foreignHoldingRatio, "%")) + if md.analystCount and md.analystCount > 0: + mt.add_row("애널리스트", f"{md.analystCount}명") + if md.baseRate is not None: + mt.add_row("기준금리", _fmtNum(md.baseRate, "%")) + console.print(Panel(mt, title="[bold]Market Data[/bold]", border_style="blue")) + + def _renderFinancial(self, console) -> None: + """Financial Trends — 수익성·DuPont·효율성 종합 테이블.""" + if not self.financial or not self.financial.periods: + return + from rich.panel import Panel + from rich.table import Table + + fa = self.financial + periods = fa.periods + + # ── 1) 수익성 추이 ── + ft = Table(show_header=True, box=None, padding=(0, 2), title="수익성 추이") + ft.add_column("지표", style="dim") + for p in periods: + ft.add_column(p, justify="right") + if fa.marginTrends.get("grossMargin"): + ft.add_row("매출총이익률", *[_fmtNum(v, "%") for v in fa.marginTrends["grossMargin"]]) + if fa.marginTrends.get("operatingMargin"): + ft.add_row("영업이익률", *[_fmtNum(v, "%") for v in fa.marginTrends["operatingMargin"]]) + if fa.marginTrends.get("netMargin"): + ft.add_row("순이익률", *[_fmtNum(v, "%") for v in fa.marginTrends["netMargin"]]) + if fa.marginTrends.get("costOfSalesRatio"): + ft.add_row("원가율", *[_fmtNum(v, "%") for v in fa.marginTrends["costOfSalesRatio"]]) + if fa.marginTrends.get("sgaRatio"): + ft.add_row("판관비율", *[_fmtNum(v, "%") for v in fa.marginTrends["sgaRatio"]]) + + # ── 2) DuPont 분해 ── + if fa.dupont and fa.dupont.roe: + dp = fa.dupont + ft.add_row("") # separator + ft.add_row( + "[bold]ROE[/bold]", + *[_fmtNum(v * 100 if v else None, "%") for v in dp.roe], + ) + ft.add_row( + " 순이익률", + *[_fmtNum(v * 100 if v else None, "%") for v in dp.netMargin], + ) + ft.add_row( + " 자산회전율", + *[_fmtNum(v, "배", precision=2) for v in dp.assetTurnover], + ) + ft.add_row( + " 레버리지", + *[_fmtNum(v, "배") for v in dp.equityMultiplier], + ) + + # ── 3) 효율성 추이 ── + if fa.marginTrends.get("dso") or fa.marginTrends.get("ccc"): + ft.add_row("") # separator + if fa.marginTrends.get("dso"): + ft.add_row("매출채권회전일", *[_fmtNum(v, "일", precision=0) for v in fa.marginTrends["dso"]]) + if fa.marginTrends.get("dio"): + ft.add_row("재고자산회전일", *[_fmtNum(v, "일", precision=0) for v in fa.marginTrends["dio"]]) + if fa.marginTrends.get("dpo"): + ft.add_row("매입채무회전일", *[_fmtNum(v, "일", precision=0) for v in fa.marginTrends["dpo"]]) + if fa.marginTrends.get("ccc"): + ft.add_row("[bold]CCC[/bold]", *[_fmtNum(v, "일", precision=0) for v in fa.marginTrends["ccc"]]) + + # ── 4) 성장률 ── + if fa.marginTrends.get("salesGrowth"): + ft.add_row("") + ft.add_row("매출 성장률", *[_fmtNum(v, "%") for v in fa.marginTrends["salesGrowth"]]) + if fa.marginTrends.get("opGrowth"): + ft.add_row("영업이익 성장률", *[_fmtNum(v, "%") for v in fa.marginTrends["opGrowth"]]) + + # ── 5) 규모 (억 단위) ── + if fa.marginTrends.get("sales"): + ft.add_row("") + ft.add_row("매출", *[_fmtBig(v) for v in fa.marginTrends["sales"]]) + if fa.marginTrends.get("operatingProfit"): + ft.add_row("영업이익", *[_fmtBig(v) for v in fa.marginTrends["operatingProfit"]]) + if fa.marginTrends.get("netProfit"): + ft.add_row("순이익", *[_fmtBig(v) for v in fa.marginTrends["netProfit"]]) + + console.print(Panel(ft, title="[bold]Financial Analysis[/bold]", border_style="cyan")) + + # ── 6) BS 요약 ── + if fa.bsSummary and fa.bsSummary.get("totalAssets"): + bt = Table(show_header=True, box=None, padding=(0, 2), title="재무상태표 요약") + bt.add_column("지표", style="dim") + for p in periods: + bt.add_column(p, justify="right") + bsLabels = { + "totalAssets": "자산총계", + "currentAssets": "유동자산", + "nonCurrentAssets": "비유동자산", + "totalLiabilities": "부채총계", + "totalEquity": "자본총계", + "cashAndEquivalents": "현금및현금성자산", + "retainedEarnings": "이익잉여금", + "debtRatio": "부채비율", + "currentRatio": "유동비율", + } + for key, label in bsLabels.items(): + vals = fa.bsSummary.get(key) + if not vals: + continue + if key in ("debtRatio", "currentRatio"): + bt.add_row(label, *[_fmtNum(v, "%", precision=1) for v in vals]) + else: + bt.add_row(label, *[_fmtBig(v) for v in vals]) + console.print(Panel(bt, title="[bold]Balance Sheet Summary[/bold]", border_style="cyan")) + + # ── 7) CF 요약 ── + if fa.cfSummary and fa.cfSummary.get("operatingCf"): + ct = Table(show_header=True, box=None, padding=(0, 2), title="현금흐름표 요약") + ct.add_column("지표", style="dim") + for p in periods: + ct.add_column(p, justify="right") + cfLabels = { + "operatingCf": "영업CF", + "investingCf": "투자CF", + "financingCf": "재무CF", + "capex": "CAPEX", + "fcf": "FCF", + } + for key, label in cfLabels.items(): + vals = fa.cfSummary.get(key) + if not vals: + continue + ct.add_row(label, *[_fmtBig(v) for v in vals]) + console.print(Panel(ct, title="[bold]Cash Flow Summary[/bold]", border_style="cyan")) + + # ── 8) 3표 연결 지표 ── + if fa.crossStatementMetrics and fa.crossStatementMetrics.get("ocfToNetIncome"): + xt = Table(show_header=True, box=None, padding=(0, 2), title="3표 연결 지표") + xt.add_column("지표", style="dim") + for p in periods: + xt.add_column(p, justify="right") + xLabels = { + "ocfToNetIncome": "OCF/NI", + "capexToDepreciation": "CAPEX/감가상각", + "retainedEarningsGrowth": "이익잉여금 증가율", + } + for key, label in xLabels.items(): + vals = fa.crossStatementMetrics.get(key) + if not vals: + continue + if key == "retainedEarningsGrowth": + xt.add_row(label, *[_fmtNum(v, "%", precision=1) for v in vals]) + else: + xt.add_row(label, *[_fmtNum(v, "배", precision=2) for v in vals]) + console.print(Panel(xt, title="[bold]Cross-Statement Metrics[/bold]", border_style="cyan")) + + def _renderSectorKpis(self, console) -> None: + """Sector KPIs 패널.""" + if not self.sectorKpis: + return + from rich.panel import Panel + from rich.table import Table + + st = Table(show_header=True, box=None, padding=(0, 2)) + st.add_column("KPI") + st.add_column("값", justify="right") + st.add_column("벤치마크", justify="right", style="dim") + st.add_column("평가") + for kpi in self.sectorKpis.kpis: + val = f"{kpi.value}{kpi.unit}" if kpi.value is not None else "-" + bench = f"{kpi.benchmark}{kpi.unit}" if kpi.benchmark is not None else "-" + badge = {"good": "[green]✓[/]", "bad": "[red]✗[/]", "neutral": "[yellow]~[/]"}.get(kpi.assessment, "") + st.add_row(kpi.label, val, bench, badge) + console.print(Panel(st, title=f"[bold]섹터 KPI — {self.sectorKpis.sectorName}[/bold]", border_style="yellow")) + + def summary(self) -> str: + """plain text 전체 출력 (rich 없는 환경용).""" + sep = "-" * 50 + lines: list[str] = [] + name = self.meta.corpName or self.meta.stockCode + + lines.append(f"{'=' * 50}") + lines.append(f" {name} 종합 기업분석 리포트") + lines.append(f"{'=' * 50}") + lines.append(f" 생성일: {self.meta.generatedAt[:10] if self.meta.generatedAt else '-'}") + lines.append(f" 커버리지: {self.meta.coverageScore:.0%}") + if self.meta.warnings: + lines.append(f" ! {', '.join(self.meta.warnings)}") + + ex = self.executive + lines.append(f"\n{sep}") + lines.append(" Executive Summary") + lines.append(sep) + if ex.opinion: + lines.append(f" 투자의견: {ex.opinion} | 프로파일: {ex.profile}") + if ex.currentPrice is not None: + p = f" 현재가: {ex.currentPrice:,.0f}" + if ex.targetPrice: + p += f" -> 목표가: {ex.targetPrice:,.0f}" + if ex.upside is not None: + p += f" ({ex.upside:+.1%})" + lines.append(p) + + th = self.thesis + lines.append(f"\n{sep}") + lines.append(" Investment Thesis") + lines.append(sep) + if th.summaryNarrative: + lines.append(f" {th.summaryNarrative}") + for b in th.bullCase: + lines.append(f" + {b}") + for b in th.bearCase: + lines.append(f" - {b}") + lines.append(f" 확신도: {th.confidence:.0%}") + + if self.narrativeAnalysis and self.narrativeAnalysis.paragraphs: + lines.append(f"\n{sep}") + lines.append(" Deep Analysis") + lines.append(sep) + for p in self.narrativeAnalysis.paragraphs: + lines.append(f" [{p.dimension}] {p.body}") + if self.narrativeAnalysis.crossReferences: + for cr in self.narrativeAnalysis.crossReferences: + lines.append(f" * {cr}") + if self.narrativeAnalysis.forwardImplications: + for fi in self.narrativeAnalysis.forwardImplications: + lines.append(f" -> {fi}") + + if self.valuationAnalysis: + va = self.valuationAnalysis + lines.append(f"\n{sep}") + lines.append(" Valuation") + lines.append(sep) + if va.dcfPerShare is not None: + lines.append(f" DCF: {va.dcfPerShare:,.0f}원") + if va.ddmPerShare is not None: + lines.append(f" DDM: {va.ddmPerShare:,.0f}원") + if va.relativePerShare is not None: + lines.append(f" 상대가치: {va.relativePerShare:,.0f}원") + if va.fairValueRange: + lo, hi = va.fairValueRange + lines.append(f" 적정범위: {lo:,.0f} ~ {hi:,.0f}원 ({va.verdict})") + + if self.riskAnalysis and self.riskAnalysis.distress: + d = self.riskAnalysis.distress + lines.append(f"\n{sep}") + lines.append(f" Risk: {d.level} (신용 {d.creditGrade})") + lines.append(sep) + for rf in d.riskFactors[:3]: + lines.append(f" ▸ {rf}") + + if self.marketData: + md = self.marketData + lines.append(f"\n{sep}") + lines.append(" Market Data") + lines.append(sep) + parts = [] + if md.marketCap and md.marketCap > 0: + parts.append(f"시총 {_fmtBig(md.marketCap)}") + if md.per is not None: + parts.append(f"PER {md.per:.1f}") + if md.pbr is not None: + parts.append(f"PBR {md.pbr:.2f}") + if parts: + lines.append(f" {' | '.join(parts)}") + + lines.append(f"\n{'=' * 50}") + lines.append(f" {self.DISCLAIMER}") + lines.append(f"{'=' * 50}") + return "\n".join(lines) + + def toDict(self) -> dict: + """전체 리포트를 dict로 변환.""" + return asdict(self) diff --git a/src/dartlab/analysis/financial/revenue.py b/src/dartlab/analysis/financial/revenue.py new file mode 100644 index 0000000000000000000000000000000000000000..262a26548d221c88212b3fc31c87b1d4ee1792ef --- /dev/null +++ b/src/dartlab/analysis/financial/revenue.py @@ -0,0 +1,940 @@ +"""1-1 수익 구조 분석 — 계산만 담당. + +블록 조립은 review/sections/revenue.py가 한다. +여기는 company.select() → 계산 → dict/숫자 반환. + +데이터 접근: select() 단일 경로. +- 부문별 매출: select("productService") → 항목×기간 수평화 DF +- 지역/제품별: select("salesOrder") → 항목×기간 수평화 DF +- 재무제표: select("IS", [...]) → 숫자 DF +""" + +from __future__ import annotations + +from dartlab.analysis.financial._helpers import ( + annualColsFromPeriods as _annualColsFromPeriods, +) +from dartlab.analysis.financial._helpers import ( + parseNumStr as _parseNumStr, +) +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_SEGMENTS = 8 +_MAX_YEARS = 8 + +_SECTOR_KR = { + "ENERGY": "에너지", + "MATERIALS": "소재", + "INDUSTRIALS": "산업재", + "CONSUMER_DISC": "경기관련소비재", + "CONSUMER_STAPLES": "필수소비재", + "HEALTHCARE": "건강관리", + "FINANCIALS": "금융", + "IT": "IT", + "COMMUNICATION": "커뮤니케이션서비스", + "UTILITIES": "유틸리티", + "REAL_ESTATE": "부동산", +} + + +# ── 유틸 ── + +_SKIP_KEYWORDS = {"합계", "조정", "내부", "소계", "총계", "부문계", "기타", "국내외"} + + +def _getRatios(company): + """ratios 객체 (RatioResult) 를 안전하게 가져온다 — internal 사용.""" + try: + return company._getRatiosInternal() + except (ValueError, KeyError, AttributeError): + return None + + +def _selectDocsRevenue( + company, *, basePeriod: str | None = None +) -> tuple[dict[str, dict[str, float]], list[str]] | None: + """productService/salesOrder에서 부문별 매출 시계열을 추출. + + fallback 체인: productService → salesOrder → EDGAR XBRL segments. + 반환: ({부문명: {period: 매출액}}, annualCols) 또는 None. + """ + for topic in ("productService", "salesOrder"): + try: + result = company.select(topic, ["매출액"]) + except (ValueError, KeyError): + result = None + if result is None: + continue + parsed = _parseDocsRevenueResult(result, basePeriod=basePeriod) + if parsed is not None: + return parsed + + # EDGAR fallback: XBRL segment revenue 태그 + edgarResult = _selectEdgarSegmentRevenue(company, basePeriod=basePeriod) + if edgarResult is not None: + return edgarResult + + return None + + +def _selectEdgarSegmentRevenue( + company, *, basePeriod: str | None = None +) -> tuple[dict[str, dict[str, float]], list[str]] | None: + """EDGAR XBRL segment revenue 태그에서 부문별 매출 추출. + + SEC XBRL에서 segment 관련 revenue 태그를 직접 읽어서 + DART productService와 동일한 형태로 반환. + """ + market = getattr(company, "market", "KR") + if market != "US": + return None + + cik = getattr(company, "cik", None) + if not cik: + return None + + try: + import polars as pl + + from dartlab.providers.edgar.report import edgarFinancePath + + path = edgarFinancePath(cik) + if not path.exists(): + return None + + # segment revenue 관련 태그 검색 + df = ( + pl.scan_parquet(path) + .filter( + pl.col("tag").str.contains( + "(?i)RevenueFromContractWithCustomer|SegmentReportingInformationRevenue|" + "SalesRevenueNet|RevenueFromExternalCustomers" + ) + & pl.col("form").is_in(["10-K", "20-F"]) + & pl.col("unit").str.contains("(?i)USD") + ) + .select("tag", "label", "fy", "val", "filed") + .collect() + ) + + if df.is_empty(): + return None + + # 연도별 최신값 (filed 기준) + df = df.sort("filed", descending=True).unique(subset=["tag", "fy"], keep="first") + + # segment가 있으면 label에 segment 이름이 다를 것 + # 같은 tag가 여러 번 나오면 segment 분할된 것 + tagCounts = df.group_by("fy", "tag").agg(pl.count()).filter(pl.col("count") > 1) + hasSegments = tagCounts.height > 0 + + if not hasSegments: + return None + + # label 기반으로 segment 이름 추출 + years = sorted(df["fy"].unique().drop_nulls().to_list(), reverse=True) + yearCols = [str(y) for y in years[:_MAX_YEARS]] + if not yearCols: + return None + + segData: dict[str, dict[str, float]] = {} + latestFy = years[0] + latestRows = df.filter(pl.col("fy") == latestFy) + + for row in latestRows.iter_rows(named=True): + label = str(row.get("label") or row.get("tag") or "") + val = row.get("val") + if val is None or val <= 0: + continue + # label을 segment 이름으로 사용 + segName = label.split(",")[0].strip()[:30] + if not segName: + continue + if segName not in segData: + segData[segName] = {} + segData[segName][str(latestFy)] = val + + # 다른 연도도 채우기 + for segName in segData: + for y in years[1:_MAX_YEARS]: + yRows = df.filter((pl.col("fy") == y) & pl.col("label").str.contains(segName.split(" ")[0])) + if yRows.height > 0: + segData[segName][str(y)] = yRows["val"][0] + + if not segData or len(segData) < 2: + return None + + return segData, yearCols + except (ImportError, OSError, ValueError, KeyError): + return None + + +def _parseDocsRevenueResult( + result, *, basePeriod: str | None = None +) -> tuple[dict[str, dict[str, float]], list[str]] | None: + """docs select 결과에서 부문별 매출 시계열 파싱.""" + df = result.df + if df.is_empty(): + return None + + itemCol = df.columns[0] + pCols = [c for c in df.columns if c != itemCol] + yCols = _annualColsFromPeriods(pCols, basePeriod, _MAX_YEARS) + if not yCols: + return None + + segData: dict[str, dict[str, float]] = {} + for row in df.iter_rows(named=True): + rawItem = str(row.get(itemCol, "")) + if any(kw in rawItem for kw in _SKIP_KEYWORDS): + continue + # 부문명 추출: "DX_매출액" → "DX", "국내_매출액" → "국내" + segName = rawItem.replace("_매출액", "").strip() + if not segName: + continue + + vals: dict[str, float] = {} + for yc in yCols: + v = _parseNumStr(row.get(yc)) + if v is not None and v > 0: + vals[yc] = v + if vals: + segData[segName] = vals + + if not segData: + return None + return segData, yCols + + +def _selectDocsOpIncome(company, yCols: list[str]) -> dict[str, dict[str, float]] | None: + """productService/salesOrder에서 부문별 영업이익 시계열을 추출 (있는 기업만).""" + for topic in ("productService", "salesOrder"): + result = company.select(topic, ["영업이익", "영업손익"], strict=False) + if result is None: + continue + df = result.df + if df.is_empty(): + continue + + itemCol = df.columns[0] + opData: dict[str, dict[str, float]] = {} + for row in df.iter_rows(named=True): + rawItem = str(row.get(itemCol, "")) + if any(kw in rawItem for kw in _SKIP_KEYWORDS): + continue + segName = rawItem.replace("_영업이익", "").replace("_영업손익", "").strip() + if not segName: + continue + vals: dict[str, float] = {} + for yc in yCols: + v = _parseNumStr(row.get(yc)) + if v is not None: + vals[yc] = v + if vals: + opData[segName] = vals + + if opData: + return opData + return None + + +def _selectDocsSalesOrder(company, keyword: str | None = None): + """salesOrder에서 항목별 매출 시계열을 추출.""" + if keyword: + result = company.select("salesOrder", [keyword]) + else: + result = company.select("salesOrder", colList=None) + if result is None: + return None + return result + + +# ── 계산 함수들 ── + + +@memoized_calc +def calcCompanyProfile(company, *, basePeriod: str | None = None) -> dict | None: + """업종/주요제품 맥락. + + Returns + ------- + dict | None + sector : str — 섹터 > 산업그룹 문자열 + company : str — 기업명 (EDGAR만) + products : str — 주요제품 설명 + """ + parts: dict[str, str] = {} + + market = getattr(company, "market", "KR") + + try: + sectorInfo = company.sector + if sectorInfo: + sectorKr = _SECTOR_KR.get(sectorInfo.sector.name, sectorInfo.sector.name) + groupKr = sectorInfo.industryGroup.value + parts["sector"] = f"섹터: {sectorKr} > {groupKr}" + except (ValueError, KeyError, AttributeError): + pass + + if market == "US": + # EDGAR: corpName + 10-K Item 1 첫 문장에서 사업 설명 추출 + corpName = getattr(company, "corpName", None) + if corpName: + parts["company"] = corpName + try: + sections = company._docs.sections + if sections is not None: + import polars as pl + + item1 = sections.filter(pl.col("topic").str.contains("(?i)item1Business")) + if not item1.is_empty(): + pCols = [ + c + for c in item1.columns + if c + not in ( + "topic", + "blockType", + "blockOrder", + "textNodeType", + "textLevel", + "textPath", + ) + ] + if pCols: + latestText = item1[pCols[-1]].drop_nulls().to_list() + if latestText: + firstPara = str(latestText[0])[:200] + parts["products"] = firstPara + except (ValueError, KeyError, AttributeError): + pass + else: + # DART: KRX listing에서 주요제품 + try: + import dartlab + + listing = dartlab.listing() + stockCode = getattr(company, "stockCode", "") + if stockCode: + row = listing.filter(listing["종목코드"] == stockCode) + if not row.is_empty() and "주요제품" in row.columns: + products = row["주요제품"][0] + if products: + parts["products"] = f"주요제품: {products}" + except (ImportError, ValueError, KeyError): + pass + + return parts if parts else None + + +@memoized_calc +def calcSegmentComposition(company, *, basePeriod: str | None = None) -> dict | None: + """부문별 매출 구성 (최신 기간). + + 반환:: + + { + "segments": [{"name": str, "revenue": float, "opIncome": float|None}, ...], + "totalRevenue": float, + "totalOpIncome": float, + "hasOpIncome": bool, + "summary": str, + "compositionHistory": [{"year": str, "shares": {seg: pct}}, ...] | None, + } + """ + docsResult = _selectDocsRevenue(company, basePeriod=basePeriod) + if docsResult is None: + return None + + segData, yCols = docsResult + latestYear = yCols[0] + + # 영업이익 데이터도 시도 (있는 기업만) + opData = _selectDocsOpIncome(company, yCols) + + segments = [] + for segName, vals in segData.items(): + rev = vals.get(latestYear) + if rev is not None and rev > 0: + opIncome = opData.get(segName, {}).get(latestYear) if opData else None + opMargin = opIncome / rev * 100 if opIncome is not None and rev > 0 else None + segments.append({"name": segName, "revenue": rev, "opIncome": opIncome, "opMargin": opMargin}) + + if not segments: + return None + + segments.sort(key=lambda x: x["revenue"], reverse=True) + if len(segments) > _MAX_SEGMENTS: + top = segments[: _MAX_SEGMENTS - 1] + others = segments[_MAX_SEGMENTS - 1 :] + othersRev = sum(s["revenue"] for s in others) + top.append({"name": "기타", "revenue": othersRev, "opIncome": None}) + segments = top + + totalRev = sum(s["revenue"] for s in segments) + if totalRev == 0: + return None + + hasOp = any(s["opIncome"] is not None for s in segments) + totalOp = sum(s["opIncome"] for s in segments if s["opIncome"] is not None) + + topSeg = segments[0] + topPct = topSeg["revenue"] / totalRev * 100 + summary = f"{topSeg['name']} {topPct:.0f}%" + if len(segments) >= 2: + seg2 = segments[1] + seg2Pct = seg2["revenue"] / totalRev * 100 + summary += f", {seg2['name']} {seg2Pct:.0f}%" + + compositionHistory = _calcCompositionHistory(segData, yCols) + + return { + "segments": segments, + "totalRevenue": totalRev, + "totalOpIncome": totalOp, + "hasOpIncome": hasOp, + "summary": summary, + "compositionHistory": compositionHistory, + } + + +@memoized_calc +def calcSegmentTrend(company, *, basePeriod: str | None = None) -> dict | None: + """다년간 부문별 매출 추이 + YoY + 영업이익률 추세. + + 반환:: + + { + "yearCols": [str, ...], + "rows": [ + { + "name": str, + "values": {year: float}, + "yoy": float|None, + "opMargins": {year: float}|None, + "opMarginDirection": str|None, + }, + ... + ], + } + """ + docsResult = _selectDocsRevenue(company, basePeriod=basePeriod) + if docsResult is None: + return None + + segData, yCols = docsResult + if not yCols: + return None + + # 영업이익 시계열도 시도 + opData = _selectDocsOpIncome(company, yCols) + + rows = [] + for segName, vals in segData.items(): + positiveVals = {yc: vals.get(yc, 0) for yc in yCols} + if not any(v > 0 for v in positiveVals.values()): + continue + + yoy = None + if len(yCols) >= 2: + cur = vals.get(yCols[0]) + prev = vals.get(yCols[1]) + if cur is not None and prev is not None and prev > 0: + yoy = (cur - prev) / prev * 100 + + # 부문별 영업이익률 시계열 + opMargins = None + opMarginDirection = None + if opData and segName in opData: + opMargins = {} + for yc in yCols: + rev = vals.get(yc) + opInc = opData[segName].get(yc) + if rev and rev > 0 and opInc is not None: + opMargins[yc] = opInc / rev * 100 + if not opMargins: + opMargins = None + elif len(opMargins) >= 2: + marginVals = [opMargins[yc] for yc in yCols if yc in opMargins] + diff = marginVals[0] - marginVals[-1] + if diff > 3: + opMarginDirection = "개선" + elif diff < -3: + opMarginDirection = "악화" + else: + opMarginDirection = "안정" + + rows.append( + { + "name": segName, + "values": positiveVals, + "yoy": yoy, + "opMargins": opMargins, + "opMarginDirection": opMarginDirection, + } + ) + + if not rows: + return None + + rows.sort(key=lambda x: x["values"].get(yCols[0], 0), reverse=True) + return {"yearCols": yCols, "rows": rows[:_MAX_SEGMENTS]} + + +@memoized_calc +def calcBreakdown(company, sub: str, *, basePeriod: str | None = None) -> dict | None: + """지역별/제품별 매출 비중 + 다년간 비중 변화. + + 반환:: + + { + "items": [{"name": str, "value": float, "pct": float}, ...], + "total": float, + "breakdownHistory": [{"year": str, "shares": {name: pct}}, ...] | None, + } + """ + result = _selectDocsSalesOrder(company) + if result is None: + return None + + df = result.df + if df.is_empty(): + return None + + itemCol = df.columns[0] + periodCols = [c for c in df.columns if c != itemCol] + yCols = _annualColsFromPeriods(periodCols, basePeriod, 1) + if not yCols: + return None + + latestYear = yCols[0] + + items = [] + for row in df.iter_rows(named=True): + name = str(row.get(itemCol, "")).strip() + if any(kw in name for kw in _SKIP_KEYWORDS): + continue + v = _parseNumStr(row.get(latestYear)) + if v is not None and v > 0: + items.append({"name": name, "value": v}) + + if not items: + return None + + items.sort(key=lambda x: x["value"], reverse=True) + total = sum(i["value"] for i in items) + if total == 0: + return None + + for i in items: + i["pct"] = i["value"] / total * 100 + + result_dict: dict = {"items": items[:_MAX_SEGMENTS], "total": total} + + history = _calcBreakdownHistoryFromDocs(company, basePeriod=basePeriod) + if history: + result_dict["breakdownHistory"] = history + + return result_dict + + +@memoized_calc +def calcRevenueGrowth(company, *, basePeriod: str | None = None) -> dict | None: + """매출 성장 지표. + + 반환:: + + { + "yoy": float|None, + "cagr3y": float|None, + "quarterlySelect": SelectResult|None, + } + """ + ratios = _getRatios(company) + yoy = getattr(ratios, "revenueGrowth", None) if ratios else None + cagr = getattr(ratios, "revenueGrowth3Y", None) if ratios else None + + # annual 기반 CAGR 교차 검증 — ratioSeries 분기 기반이 왜곡될 수 있음 + try: + ann = company._buildFinanceSeries(freq="Y") + if ann: + from dartlab.core.finance.extract import getRevenueGrowth3Y + + annualCagr = getRevenueGrowth3Y(ann[0]) + if annualCagr is not None: + if cagr is None: + cagr = annualCagr + elif abs((cagr or 0) - annualCagr) > 5: + # 분기 CAGR과 연간 CAGR이 5%p 이상 차이나면 연간 우선 + cagr = annualCagr + except (ValueError, KeyError, AttributeError): + pass + + quarterly = None + try: + result = company.select("IS", ["매출액"]) + if result is not None: + quarterly = result + except (ValueError, KeyError, AttributeError): + pass + + if yoy is None and cagr is None and quarterly is None: + return None + + return {"yoy": yoy, "cagr3y": cagr, "quarterlySelect": quarterly} + + +@memoized_calc +def calcConcentration(company, *, basePeriod: str | None = None) -> dict | None: + """매출 집중도. + + 반환:: + + { + "hhi": float, + "hhiLabel": str, + "topPct": float, + "domesticPct": float|None, + "hhiHistory": list|None, + "hhiDirection": str, + } + """ + revVals = _getDocsRevenueVals(company) + if not revVals: + return None + + total = sum(revVals) + hhi = sum((v / total * 100) ** 2 for v in revVals) + if hhi > 5000: + hhiLabel = "고집중" + elif hhi > 2500: + hhiLabel = "중간 집중" + else: + hhiLabel = "분산" + + topPct = max(revVals) / total * 100 + domesticPct = _calcDomesticExportRatio(company) + + hhiResult = _calcHhiHistory(company) + hhiHistory = None + hhiDirection = "안정" + if hhiResult is not None: + hhiHistory, hhiDirection = hhiResult + + return { + "hhi": hhi, + "hhiLabel": hhiLabel, + "topPct": topPct, + "domesticPct": domesticPct, + "hhiHistory": hhiHistory, + "hhiDirection": hhiDirection, + } + + +@memoized_calc +def calcRevenueQuality(company, *, basePeriod: str | None = None) -> dict | None: + """매출 품질 — 현금 뒷받침과 마진 추세. + + 반환:: + + { + "cashConversion": float|None, + "cashConversionLabel": str, + "grossMargin": float|None, + "grossMarginTrend": [float, ...], + "grossMarginDirection": str, + } + """ + ratios = _getRatios(company) + if ratios is None: + return None + + cc = getattr(ratios, "operatingCfToNetIncome", None) + gm = getattr(ratios, "grossMargin", None) + + if cc is None and gm is None: + return None + + ccLabel = "양호" + if cc is not None: + if cc >= 80: + ccLabel = "양호" + elif cc >= 40: + ccLabel = "주의" + else: + ccLabel = "위험" + + gmTrend: list[float] = [] + try: + seriesResult = company._ratioSeries() + if seriesResult is not None: + data, _years = seriesResult + gmSeries = data.get("RATIO", {}).get("grossMargin", []) + if gmSeries: + gmTrend = [v for v in gmSeries[-4:] if v is not None] + except (ValueError, KeyError, AttributeError): + pass + + gmDirection = "안정" + if len(gmTrend) >= 2: + first = gmTrend[0] + last = gmTrend[-1] + if first is not None and last is not None: + diff = last - first + if diff > 2: + gmDirection = "개선" + elif diff < -2: + gmDirection = "악화" + + return { + "cashConversion": cc, + "cashConversionLabel": ccLabel, + "grossMargin": gm, + "grossMarginTrend": gmTrend, + "grossMarginDirection": gmDirection, + } + + +@memoized_calc +def calcGrowthContribution(company, *, basePeriod: str | None = None) -> dict | None: + """부문별 성장 기여 분해 — 성장이 어디에서 왔는가. + + 반환:: + + { + "totalGrowthPct": float, + "contributions": [{"name": str, "amount": float, "pct": float}, ...], + "driver": str, + "period": str, + } + """ + docsResult = _selectDocsRevenue(company, basePeriod=basePeriod) + if docsResult is None: + return None + + segData, yCols = docsResult + if len(yCols) < 2: + return None + + curYear = yCols[0] + baseIdx = min(3, len(yCols) - 1) + baseYear = yCols[baseIdx] + + contributions = [] + totalCur = 0.0 + totalBase = 0.0 + + for segName, vals in segData.items(): + cur = vals.get(curYear) + base = vals.get(baseYear) + if cur is None or base is None: + continue + + totalCur += cur + totalBase += base + contributions.append({"name": segName, "amount": cur - base}) + + if not contributions or totalBase == 0: + return None + + totalChange = totalCur - totalBase + totalGrowthPct = totalChange / totalBase * 100 + + if totalChange == 0: + for c in contributions: + c["pct"] = 0.0 + else: + for c in contributions: + c["pct"] = c["amount"] / abs(totalChange) * 100 + + contributions.sort(key=lambda x: abs(x["amount"]), reverse=True) + contributions = contributions[:_MAX_SEGMENTS] + + top = contributions[0] + topPct = abs(top["pct"]) + direction = "성장" if top["amount"] > 0 else "감소" + driver = f"{top['name']}이(가) 전체 {direction}의 {topPct:.0f}% 기여" + + return { + "totalGrowthPct": totalGrowthPct, + "contributions": contributions, + "driver": driver, + "period": f"{baseYear} -> {curYear}", + } + + +@memoized_calc +def calcFlags(company, *, basePeriod: str | None = None) -> list[tuple[str, str]]: + """수익 관련 경고/기회 플래그. + + Returns + ------- + list[tuple[str, str]] + 각 원소는 (플래그 텍스트, "warning" | "opportunity"). + """ + flags: list[tuple[str, str]] = [] + + revVals = _getDocsRevenueVals(company) + if revVals: + total = sum(revVals) + hhi = sum((v / total * 100) ** 2 for v in revVals) + if hhi > 5000: + flags.append((f"매출 고집중 (HHI {hhi:,.0f}) -- 단일 부문 의존", "warning")) + elif hhi > 2500: + flags.append((f"매출 중간 집중 (HHI {hhi:,.0f})", "warning")) + + ratios = _getRatios(company) + if ratios is not None: + rg = getattr(ratios, "revenueGrowth", None) + cagr = getattr(ratios, "revenueGrowth3Y", None) + if rg is not None: + if rg > 20: + flags.append((f"매출 고성장 YoY +{rg:.0f}%", "opportunity")) + elif rg < -10: + flags.append((f"매출 역성장 YoY {rg:.0f}%", "warning")) + if rg is not None and cagr is not None: + if rg > 10 and cagr < 0: + flags.append( + ( + f"YoY +{rg:.0f}%이나 3Y CAGR {cagr:.0f}%: 반짝 회복 가능성", + "warning", + ) + ) + elif rg < -5 and cagr > 5: + flags.append( + ( + f"YoY {rg:.0f}%이나 3Y CAGR +{cagr:.0f}%: 일시적 둔화 가능성", + "opportunity", + ) + ) + + return flags + + +# ── 내부 헬퍼 ── + + +def _getDocsRevenueVals(company) -> list[float]: + """productService에서 최신 기간 부문별 매출 양수 값 리스트.""" + docsResult = _selectDocsRevenue(company) + if docsResult is None: + return [] + + segData, yCols = docsResult + latestYear = yCols[0] + + vals = [] + for _segName, segVals in segData.items(): + v = segVals.get(latestYear) + if v is not None and v > 0: + vals.append(v) + return vals + + +def _calcCompositionHistory(segData: dict[str, dict[str, float]], yCols: list[str]) -> list[dict] | None: + """연도별 부문 비중 변화. [{year, shares: {seg: pct}}, ...].""" + history = [] + for yc in yCols: + yearVals = {s: segData[s].get(yc, 0) for s in segData} + total = sum(yearVals.values()) + if total <= 0: + continue + shares = {s: v / total * 100 for s, v in yearVals.items() if v > 0} + history.append({"year": yc, "shares": shares}) + return history if len(history) >= 2 else None + + +def _calcHhiHistory(company) -> tuple[list[dict], str] | None: + """연도별 HHI 시계열 + 방향. ([{year, hhi}], direction).""" + docsResult = _selectDocsRevenue(company) + if docsResult is None: + return None + segData, yCols = docsResult + hhiList = [] + for yc in yCols: + yearVals = [segData[s].get(yc, 0) for s in segData] + total = sum(yearVals) + if total <= 0: + continue + hhi = sum((v / total * 100) ** 2 for v in yearVals if v > 0) + hhiList.append({"year": yc, "hhi": hhi}) + if not hhiList: + return None + direction = "안정" + if len(hhiList) >= 2: + newest = hhiList[0]["hhi"] + oldest = hhiList[-1]["hhi"] + diff = newest - oldest + if diff < -300: + direction = "다각화 진행" + elif diff > 300: + direction = "집중 심화" + return hhiList, direction + + +def _calcBreakdownHistoryFromDocs(company, *, basePeriod: str | None = None) -> list[dict] | None: + """salesOrder���서 다년간 비중 변화.""" + result = _selectDocsSalesOrder(company) + if result is None: + return None + + df = result.df + if df.is_empty(): + return None + + itemCol = df.columns[0] + periodCols = [c for c in df.columns if c != itemCol] + yCols = _annualColsFromPeriods(periodCols, basePeriod, _MAX_YEARS) + if len(yCols) < 2: + return None + + history = [] + for yc in yCols: + shares: dict[str, float] = {} + total = 0.0 + for row in df.iter_rows(named=True): + name = str(row.get(itemCol, "")).strip() + if any(kw in name for kw in _SKIP_KEYWORDS): + continue + v = _parseNumStr(row.get(yc)) + if v is not None and v > 0: + shares[name] = v + total += v + if total > 0 and shares: + history.append({"year": yc, "shares": {k: v / total * 100 for k, v in shares.items()}}) + + return history if len(history) >= 2 else None + + +def _calcDomesticExportRatio(company) -> float | None: + """내수 비중(%) — salesOrder��서 국내 키워드 매칭.""" + result = _selectDocsSalesOrder(company) + if result is None: + return None + + df = result.df + if df.is_empty(): + return None + + itemCol = df.columns[0] + periodCols = [c for c in df.columns if c != itemCol] + yCols = _annualColsFromPeriods(periodCols, None, 1) + if not yCols: + return None + + latestYear = yCols[0] + domesticKeywords = {"국내", "한국", "내수", "korea", "domestic"} + + domesticVal = 0.0 + totalVal = 0.0 + for row in df.iter_rows(named=True): + name = str(row.get(itemCol, "")).strip() + if any(kw in name for kw in _SKIP_KEYWORDS): + continue + v = _parseNumStr(row.get(latestYear)) + if v is not None and v > 0: + totalVal += v + if any(kw in name.lower() for kw in domesticKeywords): + domesticVal += v + + return domesticVal / totalVal * 100 if totalVal > 0 else None diff --git a/src/dartlab/analysis/financial/scorecard.py b/src/dartlab/analysis/financial/scorecard.py new file mode 100644 index 0000000000000000000000000000000000000000..69263a722dfef6c393425341807e09c4b56168e9 --- /dev/null +++ b/src/dartlab/analysis/financial/scorecard.py @@ -0,0 +1,391 @@ +"""2-5 종합 평가 -- 8영역 스코어카드, Piotroski, 종합 플래그.""" + +from __future__ import annotations + +from dartlab.analysis.financial._memoize import memoized_calc + +_GRADE_MAP = { + "performance": "성장성", + "profitability": "수익성", + "health": "안정성", + "cashflow": "현금흐름", +} + + +def _sectorRelativeScore(company, value: float, metric: str) -> int: + """섹터 분포 기준 상대 점수 (0~4). + + Q3 초과 → 4(A), 중앙값~Q3 → 3(B), Q1~중앙값 → 2(C), + Q1 미만이면서 양수 → 1(D), 음수 → 0(F). + 벤치마크가 없으면 절대 기준 fallback. + """ + try: + from dartlab.analysis.financial.insight.benchmark import getBenchmark + + sector = company.sector + if sector is not None: + bm = getBenchmark(sector.sector) + median = getattr(bm, f"{metric}Median", None) + q1 = getattr(bm, f"{metric}Q1", None) + q3 = getattr(bm, f"{metric}Q3", None) + if median is not None and q1 is not None and q3 is not None: + if value >= q3: + return 4 + if value >= median: + return 3 + if value >= q1: + return 2 + if value > 0: + return 1 + return 0 + except (ValueError, KeyError, AttributeError): + pass + # fallback: 절대 기준 + if value > 0.15 if metric == "tat" else value > 10: + return 3 + if value > 0: + return 1 + return 0 + + +@memoized_calc +def calcScorecard(company, *, basePeriod: str | None = None) -> dict | None: + """8영역 등급 요약. + + 기존 5영역(수익성/성장성/안정성/효율성/현금흐름) + + 이익품질/투자효율/재무정합성. + + Returns + ------- + dict + items : list[dict] — 영역별 등급 + area : str — 영역명 + grade : str — 등급 ("A" | "B" | "C" | "D" | "F") + profile : str — 종합 프로필 ("premium" | "average" | "weak") + """ + # insights — analyze() 직접 호출 (c.insights 는 P3 에서 제거됨) + insights = None + cacheKey = "_insights_analyze" + if hasattr(company, "_cache") and cacheKey in company._cache: + insights = company._cache[cacheKey] + else: + try: + from dartlab.analysis.financial.insight.pipeline import analyze + + insights = analyze(company.stockCode, company=company) + if hasattr(company, "_cache"): + company._cache[cacheKey] = insights + except (ImportError, ValueError, KeyError, AttributeError, TypeError): + insights = None + + # 금융업 판별 + sector = getattr(company, "sector", None) + isFinancial = False + if sector: + sectorVal = getattr(sector, "sector", None) + if sectorVal and hasattr(sectorVal, "value") and sectorVal.value == "금융": + isFinancial = True + + items = [] + if insights is not None: + grades = insights.grades() + if grades: + for eng, kor in _GRADE_MAP.items(): + grade = grades.get(eng) + if grade: + # 금융업: 안정성/효율성은 제조업 기준 부적합 → 등급 미표시 + if isFinancial and kor in ("안정성", "효율성"): + continue + items.append({"area": kor, "grade": grade}) + + # 효율성은 ratioSeries 기반으로 직접 판정 + effGrade = _calcEfficiencyGrade(company) + if effGrade: + items.append({"area": "효율성", "grade": effGrade}) + + # 이익품질 + eqGrade = _calcEarningsQualityGrade(company, basePeriod=basePeriod) + if eqGrade: + items.append({"area": "이익품질", "grade": eqGrade}) + + # 투자효율 + invGrade = _calcInvestmentGrade(company, basePeriod=basePeriod) + if invGrade: + items.append({"area": "투자효율", "grade": invGrade}) + + # 재무정합성 + csGrade = _calcCrossStatementGrade(company, basePeriod=basePeriod) + if csGrade: + items.append({"area": "재무정합성", "grade": csGrade}) + + if not items: + return None + + return {"items": items, "profile": getattr(insights, "profile", "") if insights else ""} + + +def _calcEfficiencyGrade(company) -> str | None: + """총자산회전율 추세로 효율성 등급 산출 — 섹터 상대 등급. + + 업종별 TAT 분포(중앙값/사분위)를 기준으로 상대 위치 판정. + 추세 개선 시 +1 보너스. + """ + try: + result = company._ratioSeries() + if result is None: + return None + except (ValueError, KeyError, AttributeError): + return None + + data, _years = result + tat = data.get("RATIO", {}).get("totalAssetTurnover", []) + recent = [v for v in tat[-3:] if v is not None] + if not recent: + return None + + latest = recent[-1] + improving = len(recent) >= 2 and recent[-1] >= recent[-2] + + # 섹터 상대 등급 (0~4) + score = _sectorRelativeScore(company, latest, "tat") + + # 추세 개선 보너스 (+1) + if improving: + score = min(4, score + 1) + + return ["F", "D", "C", "B", "A"][score] + + +def _calcEarningsQualityGrade(company, *, basePeriod: str | None = None) -> str | None: + """이익품질 등급 — 발생액비율 + M-Score 기반.""" + try: + from dartlab.analysis.financial.earningsQuality import calcAccrualAnalysis, calcBeneishTimeline + + accrual = calcAccrualAnalysis(company, basePeriod=basePeriod) + beneish = calcBeneishTimeline(company, basePeriod=basePeriod) + + score = 0 # 0~100 (높을수록 좋음) + count = 0 + + if accrual and accrual["history"]: + sar = accrual["history"][0].get("sloanAccrualRatio") + if sar is not None: + # 낮은 발생액 = 좋음 + if abs(sar) < 0.05: + score += 100 + elif abs(sar) < 0.10: + score += 70 + elif abs(sar) < 0.15: + score += 40 + else: + score += 10 + count += 1 + + ocfNi = accrual["history"][0].get("ocfToNi") + if ocfNi is not None: + if ocfNi > 100: + score += 100 + elif ocfNi > 70: + score += 80 + elif ocfNi > 40: + score += 50 + else: + score += 20 + count += 1 + + if beneish and beneish["history"]: + ms = beneish["history"][0].get("mScore") + if ms is not None: + if ms < -2.22: + score += 100 + elif ms < -1.78: + score += 60 + else: + score += 20 + count += 1 + + if count == 0: + return None + avg = score / count + if avg >= 80: + return "A" + if avg >= 60: + return "B" + if avg >= 40: + return "C" + if avg >= 20: + return "D" + return "F" + except (ImportError, AttributeError, TypeError, ValueError): + return None + + +def _calcInvestmentGrade(company, *, basePeriod: str | None = None) -> str | None: + """투자효율 등급 -- ROIC 섹터 상대 등급.""" + try: + from dartlab.analysis.financial.investmentAnalysis import calcRoicTimeline + + result = calcRoicTimeline(company, basePeriod=basePeriod) + if result is None or not result["history"]: + return None + + h0 = result["history"][0] + roic = h0.get("roic") + if roic is None: + return None + + # 섹터 상대 등급 (0~4) + score = _sectorRelativeScore(company, roic, "roic") + + return ["F", "D", "C", "B", "A"][score] + except (ImportError, AttributeError, TypeError, ValueError, KeyError): + return None + + +def _calcCrossStatementGrade(company, *, basePeriod: str | None = None) -> str | None: + """재무정합성 등급 — anomalyScore 기반.""" + try: + from dartlab.analysis.financial.crossStatement import calcAnomalyScore + + result = calcAnomalyScore(company, basePeriod=basePeriod) + if result is None or not result["history"]: + return None + + h0 = result["history"][0] + anomalyScore = h0.get("score", 0) + + # 낮을수록 좋음 + if anomalyScore < 15: + return "A" + if anomalyScore < 30: + return "B" + if anomalyScore < 50: + return "C" + if anomalyScore < 70: + return "D" + return "F" + except (ImportError, AttributeError, TypeError, ValueError): + return None + + +@memoized_calc +def calcPiotroskiDetail(company, *, basePeriod: str | None = None) -> dict | None: + """Piotroski F-Score 9개 항목 상세. + + Returns + ------- + dict + total : int — 총점 (점, 0~9) + interpretation : str — 해석 문구 + items : list[dict] — 9개 신호별 결과 + signal : str — 신호명 + pass : bool — 충족 여부 + """ + try: + annual = company._buildFinanceSeries(freq="Y") + if annual is None: + return None + except (ValueError, KeyError, AttributeError): + return None + + aSeries, _aYears = annual + from dartlab.analysis.financial.research.scoring import calcPiotroski + + score = calcPiotroski(aSeries) + + labels = { + "roaPositive": "ROA 양수", + "ocfPositive": "영업CF 양수", + "roaIncreasing": "ROA 개선", + "cfGtNi": "CF > 순이익", + "debtDecreasing": "장기부채 감소", + "currentRatioUp": "유동비율 개선", + "noNewShares": "주식 미발행", + "grossMarginUp": "매출총이익률 개선", + "assetTurnoverUp": "자산회전율 개선", + } + items = [{"signal": labels.get(k, k), "pass": v} for k, v in score.components.items()] + + return { + "total": score.total, + "interpretation": score.interpretation, + "items": items, + } + + +@memoized_calc +def calcSummaryFlags(company, *, basePeriod: str | None = None) -> list[str]: + """전체 경고/기회 요약 -- 8영역 플래그 수집. + + Returns + ------- + list[str] + 경고/기회 메시지 목록 + """ + flags: list[str] = [] + + from dartlab.analysis.financial.efficiency import calcEfficiencyFlags + from dartlab.analysis.financial.growthAnalysis import calcGrowthFlags + from dartlab.analysis.financial.profitability import calcProfitabilityFlags + from dartlab.analysis.financial.stability import calcStabilityFlags + + flags.extend(calcProfitabilityFlags(company, basePeriod=basePeriod)) + flags.extend(calcGrowthFlags(company, basePeriod=basePeriod)) + + # calcStabilityFlags, calcEarningsQualityFlags: dict 반환 → flags 키 추출 + stabResult = calcStabilityFlags(company, basePeriod=basePeriod) + if isinstance(stabResult, dict): + flags.extend(stabResult.get("flags", [])) + elif isinstance(stabResult, list): + flags.extend(stabResult) + + flags.extend(calcEfficiencyFlags(company, basePeriod=basePeriod)) + + # 새 영역 플래그 + try: + from dartlab.analysis.financial.earningsQuality import calcEarningsQualityFlags + + eqResult = calcEarningsQualityFlags(company, basePeriod=basePeriod) + if isinstance(eqResult, dict): + flags.extend(eqResult.get("flags", [])) + elif isinstance(eqResult, list): + flags.extend(eqResult) + except (ImportError, AttributeError, TypeError, ValueError): + pass + + try: + from dartlab.analysis.financial.investmentAnalysis import calcInvestmentFlags + + flags.extend(calcInvestmentFlags(company, basePeriod=basePeriod)) + except (ImportError, AttributeError, TypeError, ValueError): + pass + + try: + from dartlab.analysis.financial.crossStatement import calcCrossStatementFlags + + flags.extend(calcCrossStatementFlags(company, basePeriod=basePeriod)) + except (ImportError, AttributeError, TypeError, ValueError): + pass + + try: + from dartlab.analysis.financial.costStructure import calcCostStructureFlags + + flags.extend(calcCostStructureFlags(company, basePeriod=basePeriod)) + except (ImportError, AttributeError, TypeError, ValueError): + pass + + try: + from dartlab.analysis.financial.capitalAllocation import calcCapitalAllocationFlags + + flags.extend(calcCapitalAllocationFlags(company, basePeriod=basePeriod)) + except (ImportError, AttributeError, TypeError, ValueError): + pass + + try: + from dartlab.analysis.financial.taxAnalysis import calcTaxFlags + + flags.extend(calcTaxFlags(company, basePeriod=basePeriod)) + except (ImportError, AttributeError, TypeError, ValueError): + pass + + return flags diff --git a/src/dartlab/analysis/financial/stability.py b/src/dartlab/analysis/financial/stability.py new file mode 100644 index 0000000000000000000000000000000000000000..a7651957fd4f11f342bc2ca21cf1ae5e122c67a5 --- /dev/null +++ b/src/dartlab/analysis/financial/stability.py @@ -0,0 +1,731 @@ +"""2-3 안정성 분석 -- 부채 구조와 지급 능력을 추적한다. + +select()로 BS/IS/CF 원본 계정을 가져와서 +부채비율 + 이자보상배율 + 부실 판별을 금액과 함께 보여준다. +레버리지가 늘었는지, 이자를 갚을 수 있는지를 금액으로 파악. +""" + +from __future__ import annotations + +from dartlab.analysis.financial._helpers import ( + MAX_RATIO_YEARS, + annualColsFromPeriods, + getRatios, + sumBorrowings, + toDictBySnakeId, +) +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_YEARS = MAX_RATIO_YEARS + + +def _isHoldingOrFinancial(company) -> bool: + """지주사 또는 금융업 판별.""" + try: + name = getattr(company, "corpName", "") or "" + if any(k in name for k in ("지주", "홀딩스", "Holdings")): + return True + sector = getattr(company, "sector", None) + if sector is not None: + from dartlab.core.sector.types import Sector + + if sector.sector == Sector.FINANCIALS: + return True + except (AttributeError, ImportError): + pass + return False + + +def _yoy(cur, prev) -> float | None: + if cur is None or prev is None or prev == 0: + return None + return round((cur - prev) / abs(prev) * 100, 2) + + +from dartlab.core.finance.calc import safePct as _pctOf # noqa: E402 + + +# ── 레버리지 구조 시계열 ── + + +@memoized_calc +def calcLeverageTrend(company, *, basePeriod: str | None = None) -> dict | None: + """레버리지 구조 시계열 -- 부채로 얼마나 버티는가. + + BS에서 부채/자본/자산 원본 금액을 가져와서 + 부채비율 + 자기자본비율 + 순차입금비율을 금액과 함께 보여준다. + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + totalDebt : float — 부채총계 (원) + totalDebtYoy : float — 부채총계 전년비 (%) + equity : float — 자본총계 (원) + equityYoy : float — 자본총계 전년비 (%) + totalAssets : float — 자산총계 (원) + cash : float — 현금및현금성자산 (원) + totalBorrowing : float — 총차입금 (원) + netDebt : float — 순차입금 (원) + debtRatio : float — 부채비율 (%) + equityRatio : float — 자기자본비율 (%) + netDebtRatio : float — 순차입금비율 (%) + notesDetail : dict — 차입금/리스 주석 상세 (있을 때만) + """ + bsResult = company.select( + "BS", + [ + "부채총계", + "자본총계", + "자산총계", + "현금및현금성자산", + "단기차입금", + "장기차입금", + "차입금단기", + "long_term_borrowings", + "short_term_borrowings", + "차입부채", + "장기차입부채", + "유동성장기차입금", + "사채", + ], + ) + parsed = toDictBySnakeId(bsResult) + if parsed is None: + return None + + data, periods = parsed + debt = data.get("total_liabilities", {}) + equity = data.get("total_stockholders_equity", {}) + ta = data.get("total_assets", {}) + cash = data.get("cash_and_cash_equivalents", {}) + + yCols = annualColsFromPeriods(periods, basePeriod, _MAX_YEARS + 1) + if len(yCols) < 2: + return None + + history = [] + for i, col in enumerate(yCols[:-1]): + prevCol = yCols[i + 1] if i + 1 < len(yCols) else None + d = debt.get(col) + e = equity.get(col) + a = ta.get(col) + c = cash.get(col) + + # 차입금: 회사 키 패턴 무관 헬퍼 + totalBorrowing = sumBorrowings(data, col) + netDebt = totalBorrowing - (c or 0) if totalBorrowing > 0 else None + + debtRatio = _pctOf(d, e) + equityRatio = _pctOf(e, a) + netDebtRatio = _pctOf(netDebt, e) if netDebt is not None else None + + history.append( + { + "period": col, + "totalDebt": d, + "totalDebtYoy": _yoy(d, debt.get(prevCol)) if prevCol else None, + "equity": e, + "equityYoy": _yoy(e, equity.get(prevCol)) if prevCol else None, + "totalAssets": a, + "cash": c, + "totalBorrowing": totalBorrowing if totalBorrowing > 0 else None, + "netDebt": netDebt, + "debtRatio": debtRatio, + "equityRatio": equityRatio, + "netDebtRatio": netDebtRatio, + } + ) + + if not history: + return None + + result: dict = {"history": history} + + # notes enrichment — 차입금 구성 + 리스부채 + from dartlab.analysis.financial._helpers import fetchNotesDetail + + notesDetail = fetchNotesDetail(company, ["borrowings", "lease"]) + if notesDetail: + result["notesDetail"] = notesDetail + + return result + + +# ── 이자보상 시계열 ── + + +@memoized_calc +def calcCoverageTrend(company, *, basePeriod: str | None = None) -> dict | None: + """이자보상배율 시계열 -- 이자를 갚을 능력이 있는가. + + IS 영업이익 / 이자비용으로 산출. + 이자비용 소스 우선순위: IS 이자비용 → CF interest_paid → IS 금융비용. + 금융비용은 외환손실·파생상품 등 비이자 항목 포함하여 과대계상 위험. + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + operatingIncome : float — 영업이익 (원) + operatingIncomeYoy : float — 영업이익 전년비 (%) + interestExpense : float — 이자비용 (원) + interestExpenseSource : str — 이자비용 소스 ("이자비용"|"CF이자지급"|"금융비용") + interestCoverage : float — 이자보상배율 (배) + """ + isResult = company.select("IS", ["영업이익", "금융비용", "이자비용"]) + parsed = toDictBySnakeId(isResult) + if parsed is None: + return None + + data, periods = parsed + op = data.get("operating_profit", {}) + finCost = data.get("finance_costs", {}) + intCost = data.get("interest_expense", {}) + + # CF interest_paid (실제 현금 이자 지급액) + cfIntPaid: dict = {} + try: + cfResult = company.select("CF", ["interest_paid"]) + cfParsed = toDictBySnakeId(cfResult) + if cfParsed is not None: + cfData, _ = cfParsed + cfIntPaid = cfData.get("interest_paid", {}) + except (ValueError, KeyError, AttributeError): + pass + + yCols = annualColsFromPeriods(periods, basePeriod, _MAX_YEARS + 1) + if len(yCols) < 2: + return None + history = [] + for i, col in enumerate(yCols[:-1]): + prevCol = yCols[i + 1] if i + 1 < len(yCols) else None + o = op.get(col) + + # 이자비용 우선순위: IS 이자비용 → CF interest_paid → IS 금융비용 + intVal = intCost.get(col) + cfVal = cfIntPaid.get(col) + finVal = finCost.get(col) + + if intVal: + interest = intVal + source = "이자비용" + elif cfVal: + interest = abs(cfVal) # CF는 지출이라 음수일 수 있음 + source = "CF이자지급" + elif finVal: + interest = finVal + source = "금융비용" + else: + interest = None + source = None + + coverage = None + if o is not None and interest is not None and interest != 0: + coverage = round(o / abs(interest), 2) + + history.append( + { + "period": col, + "operatingIncome": o, + "operatingIncomeYoy": _yoy(o, op.get(prevCol)) if prevCol else None, + "interestExpense": interest, + "interestExpenseSource": source, + "interestCoverage": coverage, + } + ) + + return {"history": history} if history else None + + +# ── 부실 판별 (Altman Z-Score) ── + + +@memoized_calc +def calcDistressScore(company, *, basePeriod: str | None = None) -> dict | None: + """Altman Z-Score 시계열 -- 부실 위험은 어디인가. + + BS/IS에서 원본 계정을 가져와 5개 변수를 직접 계산. + Z = 1.2*X1 + 1.4*X2 + 3.3*X3 + 0.6*X4 + 1.0*X5 + X1 = 운전자본/총자산, X2 = 이익잉여금/총자산, X3 = EBIT/총자산 + X4 = 시가총액/부채총계, X5 = 매출/총자산 + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + totalAssets : float — 자산총계 (원) + workingCapital : float — 운전자본 (원) + retainedEarnings : float — 이익잉여금 (원) + ebit : float — EBIT (원) + revenue : float — 매출액 (원) + totalDebt : float — 부채총계 (원) + x1_wcTa : float — 운전자본/총자산 + x2_reTa : float — 이익잉여금/총자산 + x3_ebitTa : float — EBIT/총자산 + x4_mcapTl : float — 시가총액/부채총계 + x5_revTa : float — 매출/총자산 + zScore : float — Z-Score (점) + zModel : str — 사용 모델 ("Z-Score"|"Z''-Score") + zone : str — 판정 구간 ("안전"|"회색"|"위험") + latestScore : float — 최신 Z-Score (점) + zone : str — 최신 판정 ("안전"|"회색"|"위험"|"판별 불가") + diagnosticMeta : dict + model : str — 모델명 + precision : float — 정밀도 + typeIError : float — 1종 오류율 + reference : str — 학술 출처 + marketNote : str — 시장 적용 참고 + notesDetail : dict — 충당부채 주석 상세 (있을 때만) + """ + bsResult = company.select( + "BS", ["자산총계", "유동자산", "유동부채", "부채총계", "이익잉여금", "미처분이익잉여금(결손금)"] + ) + isResult = company.select("IS", ["영업이익", "매출액"]) + + bsParsed = toDictBySnakeId(bsResult) + isParsed = toDictBySnakeId(isResult) + if bsParsed is None or isParsed is None: + return None + + bsData, bsPeriods = bsParsed + isData, _ = isParsed + + taRow = bsData.get("total_assets", {}) + caRow = bsData.get("current_assets", {}) + clRow = bsData.get("current_liabilities", {}) + tlRow = bsData.get("total_liabilities", {}) + from dartlab.analysis.financial._helpers import mergeRows + + reRow = mergeRows(bsData.get("retained_earnings"), bsData.get("unappropriated_retained_earnings_deficit")) + opRow = isData.get("operating_profit", {}) + revRow = isData.get("sales", {}) + + # 시가총액 (X4용) -- ratios에서 가져옴 + ratios = getRatios(company) + marketCap = ratios.marketCap if ratios else None + + yCols = annualColsFromPeriods(bsPeriods, basePeriod, _MAX_YEARS) + if not yCols: + return None + history = [] + for col in yCols: + a = taRow.get(col) + ca = caRow.get(col) + cl = clRow.get(col) + tl = tlRow.get(col) + re = reRow.get(col) + ebit = opRow.get(col) + rev = revRow.get(col) + + if a is None or a == 0: + continue + + wc = (ca or 0) - (cl or 0) + x1 = round(wc / a, 4) if a else None + x2 = round(re / a, 4) if re is not None and a else None + x3 = round(ebit / a, 4) if ebit is not None and a else None + x4 = round(marketCap / tl, 4) if marketCap is not None and tl and tl > 0 else None + x5 = round(rev / a, 4) if rev is not None and a else None + + # X4(시가총액/부채) 없으면 Altman Z'' (비제조업) 대체 + zScore = None + zModel = None + if all(v is not None for v in [x1, x2, x3, x4, x5]): + zScore = round(1.2 * x1 + 1.4 * x2 + 3.3 * x3 + 0.6 * x4 + 1.0 * x5, 2) + zModel = "Z-Score" + elif all(v is not None for v in [x1, x2, x3, x5]): + # Z'' = 6.56*X1 + 3.26*X2 + 6.72*X3 + 1.05*X5 (book value 기반) + zScore = round(6.56 * x1 + 3.26 * x2 + 6.72 * x3 + 1.05 * x5, 2) + zModel = "Z''-Score" + + if zScore is not None: + safeThreshold = 2.99 if zModel == "Z-Score" else 2.60 + dangerThreshold = 1.81 if zModel == "Z-Score" else 1.10 + if zScore > safeThreshold: + zone = "안전" + elif zScore > dangerThreshold: + zone = "회색" + else: + zone = "위험" + else: + zone = None + + history.append( + { + "period": col, + "totalAssets": a, + "workingCapital": wc, + "retainedEarnings": re, + "ebit": ebit, + "revenue": rev, + "totalDebt": tl, + "x1_wcTa": x1, + "x2_reTa": x2, + "x3_ebitTa": x3, + "x4_mcapTl": x4, + "x5_revTa": x5, + "zScore": zScore, + "zModel": zModel, + "zone": zone, + } + ) + + if not history: + return None + + latest = history[0] + zModel = latest.get("zModel", "") + result: dict = { + "history": history, + "latestScore": latest.get("zScore"), + "zone": latest.get("zone") or "판별 불가", + "diagnosticMeta": { + "model": zModel, + "precision": 0.95 if zModel == "Z-Score" else 0.82, + "typeIError": 0.06 if zModel == "Z-Score" else 0.15, + "reference": "Altman(1968)" if zModel == "Z-Score" else "Altman(1995)", + "marketNote": "한국 시장: Altman et al.(2014) 신흥시장 Z'' 적용", + }, + } + + # notes enrichment — 충당부채 (위험/회색 구간일 때 의미) + from dartlab.analysis.financial._helpers import fetchNotesDetail + + notesDetail = fetchNotesDetail(company, ["provisions"]) + if notesDetail: + result["notesDetail"] = notesDetail + + return result + + +# ── 부실 앙상블 (기존 유지 -- getRatios 사용) ── + + +@memoized_calc +def calcDistressEnsemble(company, *, basePeriod: str | None = None) -> dict | None: + """4개 부실예측 모델 앙상블 -- 다수결 투표. + + Altman Z-Score, Ohlson O-Score, Springate S-Score, Zmijewski X-Score + 각 모델의 판정(safe/warning/danger)을 집계하여 종합 등급 산출. + + Returns + ------- + dict + models : list[dict] + model : str — 모델명 + score : float — 모델 점수 (점) + verdict : str — 개별 판정 ("safe"|"warning"|"danger") + threshold : str — 임계값 설명 + ensemble : str — 종합 판정 ("안전"|"주의"|"위험") + agreement : float — 모델 간 일치도 (%) + dangerCount : int — 위험 판정 모델 수 + safeCount : int — 안전 판정 모델 수 + total : int — 전체 모델 수 + """ + ratios = getRatios(company) + if ratios is None: + return None + + models = [] + + # Altman Z-Score: >2.99 safe, 1.81~2.99 gray, <1.81 danger + z = ratios.altmanZScore + if z is not None: + if z > 2.99: + verdict = "safe" + elif z > 1.81: + verdict = "warning" + else: + verdict = "danger" + models.append( + { + "model": "Altman Z-Score", + "score": z, + "verdict": verdict, + "threshold": "안전 >2.99 / 회색 1.81~2.99 / 위험 <1.81", + } + ) + + # Altman Z'' (비제조/신흥): >2.60 safe, 1.10~2.60 gray, <1.10 danger + zpp = ratios.altmanZppScore + if zpp is not None: + if zpp > 2.60: + verdict = "safe" + elif zpp > 1.10: + verdict = "warning" + else: + verdict = "danger" + models.append( + { + "model": "Altman Z''-Score", + "score": zpp, + "verdict": verdict, + "threshold": "안전 >2.60 / 회색 1.10~2.60 / 위험 <1.10", + } + ) + + # Ohlson O-Score: P(default) < 10% safe, 10~50% warning, >50% danger + oProb = ratios.ohlsonProbability + if oProb is not None: + if oProb < 10: + verdict = "safe" + elif oProb < 50: + verdict = "warning" + else: + verdict = "danger" + models.append( + { + "model": "Ohlson O-Score", + "score": ratios.ohlsonOScore, + "probability": oProb, + "verdict": verdict, + "threshold": "안전 <10% / 경고 10~50% / 위험 >50%", + } + ) + + # Springate S-Score: >0.862 safe, else danger + ss = ratios.springateSScore + if ss is not None: + verdict = "safe" if ss > 0.862 else "danger" + models.append( + {"model": "Springate S-Score", "score": ss, "verdict": verdict, "threshold": "안전 >0.862 / 위험 <0.862"} + ) + + # Zmijewski X-Score: <0 safe, else danger + xz = ratios.zmijewskiXScore + if xz is not None: + verdict = "safe" if xz < 0 else "danger" + models.append({"model": "Zmijewski X-Score", "score": xz, "verdict": verdict, "threshold": "안전 <0 / 위험 >0"}) + + if not models: + return None + + # 다수결 + dangerCount = sum(1 for m in models if m["verdict"] == "danger") + safeCount = sum(1 for m in models if m["verdict"] == "safe") + total = len(models) + + if dangerCount > total / 2: + ensemble = "위험" + elif safeCount > total / 2: + ensemble = "안전" + else: + ensemble = "주의" + + agreement = max(dangerCount, safeCount) / total * 100 + + return { + "models": models, + "ensemble": ensemble, + "agreement": round(agreement, 1), + "dangerCount": dangerCount, + "safeCount": safeCount, + "total": total, + } + + +@memoized_calc +def calcDebtMaturity(company, *, basePeriod: str | None = None) -> dict | None: + """부채 만기 구조 분석. + + 단기/장기 차입금 비율, 차환 리스크 지표. + + Returns + ------- + dict + history : list[dict] + period : str — 기간 + shortTermBorrowing : float — 단기차입금 (원) + longTermBorrowing : float — 장기차입금 (원) + bonds : float — 사채 (원) + totalBorrowing : float — 총차입금 (원) + shortTermRatio : float — 단기차입금 비중 (%) + currentToTotalDebt : float — 유동부채/부채총계 (%) + refinancingRisk : float — 단기차입금/OCF (배) + """ + bsResult = company.select( + "BS", + [ + "단기차입금", + "장기차입금", + "사채", + "차입부채", + "발행사채", + "유동금융부채", + "장기금융부채", + "유동부채", + "비유동부채", + "부채총계", + ], + ) + parsed = toDictBySnakeId(bsResult, maxPeriods=5) + if parsed is None: + return None + + data, periods = parsed + # 일반 제조업 + stRow = data.get("단기차입금", {}) + ltRow = data.get("장기차입금", {}) + bondsRow = data.get("사채", {}) + # 금융업 + borrowRow = data.get("차입부채", {}) + issuedBondRow = data.get("발행사채", {}) + # 바이오 등 + curFinRow = data.get("유동금융부채", {}) + ltFinRow = data.get("장기금융부채", {}) + + clRow = data.get("유동부채", {}) + data.get("비유동부채", {}) + tlRow = data.get("부채총계", {}) + + # 연도 컬럼만 + annualPeriods = annualColsFromPeriods(periods, basePeriod, 5) + if not annualPeriods: + return None + + # OCF for 차환능력 평가 + cfResult = company.select("CF", ["영업활동현금흐름"]) + cfParsed = toDictBySnakeId(cfResult, maxPeriods=5) if cfResult else None + cfData = cfParsed[0] if cfParsed else {} + ocfRow = cfData.get("영업활동현금흐름", {}) + history = [] + for col in annualPeriods: + # 차입금: 업종별 계정 대응 + st = stRow.get(col) or 0 + lt = ltRow.get(col) or 0 + bondsVal = bondsRow.get(col) or 0 + totalBorrowing = st + lt + bondsVal + + # 금융업 fallback + if totalBorrowing == 0: + borrow = borrowRow.get(col) or 0 + issued = issuedBondRow.get(col) or 0 + totalBorrowing = borrow + issued + st = borrow # 금융업 차입부채를 단기로 근사 + lt = issued + + # 바이오 등 fallback + if totalBorrowing == 0: + curFin = curFinRow.get(col) or 0 + ltFin = ltFinRow.get(col) or 0 + totalBorrowing = curFin + ltFin + st = curFin + lt = ltFin + + cl = clRow.get(col) or 0 + tl = tlRow.get(col) or 0 + ocf = ocfRow.get(col) + + shortTermRatio = round(st / totalBorrowing * 100, 2) if totalBorrowing > 0 else None + currentToTotal = round(cl / tl * 100, 2) if tl > 0 else None + + # 단기차입금/OCF = 차환능력 (낮을수록 안전) + refinancingRisk = None + if ocf is not None and ocf > 0 and st > 0: + refinancingRisk = round(st / ocf, 2) + + history.append( + { + "period": col, + "shortTermBorrowing": st, + "longTermBorrowing": lt, + "bonds": bondsVal, + "totalBorrowing": totalBorrowing, + "shortTermRatio": shortTermRatio, + "currentToTotalDebt": currentToTotal, + "refinancingRisk": refinancingRisk, + } + ) + + return {"history": history} if history else None + + +# ── 플래그 ── + + +@memoized_calc +def calcStabilityFlags(company, *, basePeriod: str | None = None) -> dict: + """안정성 경고/기회 플래그. + + Returns + ------- + dict + flags : list[str] — 경고/기회 플래그 문자열 목록 + enrichedFlags : list[dict] — 상세 진단 메타 포함 플래그 목록 + """ + flags: list[str] = [] + enriched: list[dict] = [] + + # 레버리지 + isFinancial = _isHoldingOrFinancial(company) + lev = calcLeverageTrend(company, basePeriod=basePeriod) + if lev and lev["history"]: + hist = lev["history"] + h0 = hist[0] + dr = h0.get("debtRatio") + if dr is not None: + if isFinancial: + # 금융업: 예수부채로 부채비율이 구조적으로 높음. 비금융 기준 적용 불가 + # 양호/보통은 플래그로 안 넣음 (중복 방지). 과다만 경고. + if dr >= 1500: + flags.append(f"부채비율 {dr:.0f}% -- 금융업 과다") + elif dr > 200: + flags.append(f"부채비율 {dr:.0f}% -- 재무 위험") + elif dr < 50: + flags.append(f"부채비율 {dr:.0f}% -- 매우 안정") + + # 부채 3기 연속 증가 + if len(hist) >= 3: + debts = [h.get("totalDebt") for h in hist[:3]] + if all(v is not None for v in debts) and debts[0] > debts[1] > debts[2]: + yoy = h0.get("totalDebtYoy") + flags.append(f"부채 3기 연속 증가 (최근 +{yoy:.0f}%)" if yoy else "부채 3기 연속 증가") + + # 이자보상 + cov = calcCoverageTrend(company, basePeriod=basePeriod) + if cov and cov["history"]: + h0 = cov["history"][0] + ic = h0.get("interestCoverage") + source = h0.get("interestExpenseSource") + # 순현금 여부 확인 -- 순현금이면 금융비용 기반 저배율은 오진 가능 + isNetCash = False + if lev and lev["history"]: + nd = lev["history"][0].get("netDebt") + if nd is not None and nd < 0: + isNetCash = True + # 지주사/금융업: 영업이익 구조적 저수준 (지분법이익이 영업외에 잡힘) + if ic is not None: + if isFinancial: + # 지주사/금융은 영업이익 기반 이자보상배율이 구조적으로 낮음 + if ic < 1: + flags.append(f"이자보상배율 {ic:.1f}배 -- 지주/금융 구조상 저수준 (영업외 수익이 이자 커버)") + elif ic < 1 and not isNetCash: + flags.append(f"이자보상배율 {ic:.1f}배 -- 이자 지급 불능 위험") + elif ic < 3 and not (isNetCash and source == "금융비용"): + flags.append(f"이자보상배율 {ic:.1f}배 -- 이자 부담 과다") + + # Altman Z-Score (제조업 기반 모형 — 금융/지주사는 구조적 왜곡) + if not isFinancial: + distress = calcDistressScore(company, basePeriod=basePeriod) + if distress and distress.get("latestScore") is not None: + z = distress["latestScore"] + if z < 1.81: + msg = f"Altman Z-Score {z:.2f} -- 부실 위험 구간" + flags.append(msg) + meta = distress.get("diagnosticMeta", {}) + enriched.append( + { + "code": "ALTMAN_DISTRESS", + "message": msg, + "precision": meta.get("precision"), + "baseRate": meta.get("marketNote", ""), + "reference": meta.get("reference", ""), + "sectorNote": "금융업/지주회사 부채 구조 왜곡 — Z-Score 부적합" if isFinancial else "", + } + ) + + return {"flags": flags, "enrichedFlags": enriched} diff --git a/src/dartlab/analysis/financial/taxAnalysis.py b/src/dartlab/analysis/financial/taxAnalysis.py new file mode 100644 index 0000000000000000000000000000000000000000..0693b7c285c270a336e3c55a8ab0931c0e8ee965 --- /dev/null +++ b/src/dartlab/analysis/financial/taxAnalysis.py @@ -0,0 +1,269 @@ +"""세금 분석 — 유효세율, 세금 현금화, 이연법인세 시계열. + +세금 부담의 실체와 미래 세금 리스크를 시계열로 추적한다. +""" + +from __future__ import annotations + +from dartlab.analysis.financial._helpers import annualColsFromPeriods, toDictBySnakeId +from dartlab.analysis.financial._memoize import memoized_calc + +_MAX_YEARS = 8 + + +def _get(row: dict, col: str) -> float: + v = row.get(col) if row else None + return v if v is not None else 0 + + +from dartlab.core.finance.calc import safePct as _pct # noqa: E402 + + +# ── 유효세율 ── + + +@memoized_calc +def calcEffectiveTaxRate(company, *, basePeriod: str | None = None) -> dict | None: + """유효세율 시계열 — 법인세비용/세전이익. + + 반환:: + + { + "history": [ + { + "period": str, + "preTaxIncome": float, + "taxExpense": float, + "effectiveTaxRate": float | None, + "statutoryRate": float, + "taxGap": float | None, + }, + ... + ], + } + """ + accounts = ["법인세비용", "법인세차감전순이익", "세전이익"] + isResult = company.select("IS", accounts) + isParsed = toDictBySnakeId(isResult) + if isParsed is None: + return None + + isData, isPeriods = isParsed + taxRow = isData.get("법인세비용", {}) + ptRow = isData.get("법인세차감전순이익", isData.get("세전이익", {})) + + yCols = annualColsFromPeriods(isPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + # 법정세율 (한국 기준, 2023~) + statutoryRate = 24.0 # 과세표준 구간에 따라 다르나 대기업 근사 + + history = [] + for col in yCols: + ptIncome = ptRow.get(col) or 0 + taxExpense = taxRow.get(col) or 0 + + effectiveTaxRate = None + taxGap = None + if ptIncome > 0: + effectiveTaxRate = abs(taxExpense) / ptIncome * 100 + taxGap = effectiveTaxRate - statutoryRate + + history.append( + { + "period": col, + "preTaxIncome": ptIncome, + "taxExpense": taxExpense, + "effectiveTaxRate": effectiveTaxRate, + "statutoryRate": statutoryRate, + "taxGap": taxGap, + } + ) + + return {"history": history} if history else None + + +# ── 세금 현금화 ── + + +@memoized_calc +def calcTaxCashConversion(company, *, basePeriod: str | None = None) -> dict | None: + """세금 현금화 시계열 — IS 법인세비용 vs CF 법인세납부. + + 반환:: + + { + "history": [ + { + "period": str, + "taxExpense": float, + "taxPaid": float | None, + "taxCashRatio": float | None, + }, + ... + ], + } + """ + isResult = company.select("IS", ["법인세비용"]) + cfResult = company.select("CF", ["payments_of_income_taxes"]) + + isParsed = toDictBySnakeId(isResult) + if isParsed is None: + return None + + isData, isPeriods = isParsed + taxExpRow = isData.get("법인세비용", {}) + + cfParsed = toDictBySnakeId(cfResult) + cfData = cfParsed[0] if cfParsed else {} + taxPaidRow = cfData.get("payments_of_income_taxes", {}) + + yCols = annualColsFromPeriods(isPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + history = [] + for col in yCols: + taxExpense = abs(taxExpRow.get(col) or 0) + taxPaidVal = taxPaidRow.get(col) if taxPaidRow else None + taxPaid = abs(taxPaidVal) if taxPaidVal is not None else None + + taxCashRatio = None + if taxPaid is not None and taxExpense > 0: + taxCashRatio = taxPaid / taxExpense * 100 + + history.append( + { + "period": col, + "taxExpense": taxExpense, + "taxPaid": taxPaid, + "taxCashRatio": taxCashRatio, + } + ) + + return {"history": history} if history else None + + +# ── 이연법인세 ── + + +@memoized_calc +def calcDeferredTax(company, *, basePeriod: str | None = None) -> dict | None: + """이연법인세 시계열 — 이연자산/부채 추세. + + 반환:: + + { + "history": [ + { + "period": str, + "deferredTaxAsset": float, + "deferredTaxLiability": float, + "netDeferredTax": float, + "dtaToTotalAssets": float | None, + }, + ... + ], + } + """ + bsResult = company.select("BS", ["이연법인세자산", "이연법인세부채", "자산총계"]) + bsParsed = toDictBySnakeId(bsResult) + if bsParsed is None: + return None + + bsData, bsPeriods = bsParsed + dtaRow = bsData.get("deferred_tax_assets", {}) + dtlRow = bsData.get("deferred_tax_liabilities", {}) + taRow = bsData.get("assets", {}) + + yCols = annualColsFromPeriods(bsPeriods, basePeriod=basePeriod, maxYears=_MAX_YEARS) + if not yCols: + return None + + history = [] + for col in yCols: + dta = _get(dtaRow, col) + dtl = _get(dtlRow, col) + ta = _get(taRow, col) + netDt = dta - dtl + + history.append( + { + "period": col, + "deferredTaxAsset": dta, + "deferredTaxLiability": dtl, + "netDeferredTax": netDt, + "dtaToTotalAssets": _pct(dta, ta), + } + ) + + return {"history": history} if history else None + + +# ── 플래그 ── + + +@memoized_calc +def calcTaxFlags(company, *, basePeriod: str | None = None) -> list[str]: + """세금 관련 경고 신호. + + Returns + ------- + list[str] + 경고 메시지 문자열 리스트 (극저/고세율, 세금혜택 의존, 세금현금 과대납부, + 이연법인세 급증/연속 증가 등). + """ + flags = [] + + etr = calcEffectiveTaxRate(company, basePeriod=basePeriod) + if etr and etr["history"]: + h0 = etr["history"][0] + rate = h0.get("effectiveTaxRate") + statutory = h0.get("statutoryRate", 24.0) + if rate is not None: + if rate < 10: + flags.append(f"유효세율 {rate:.1f}% — 극저세율 (세금 혜택 또는 이연)") + elif rate > 35: + flags.append(f"유효세율 {rate:.1f}% — 고세율 (추가 세금 부담)") + + # 법정세율의 50% 미만이 3기 연속이면 구조적 세금혜택 의존 + if len(etr["history"]) >= 3: + lowTaxYears = sum( + 1 + for h in etr["history"][:3] + if h.get("effectiveTaxRate") is not None and h["effectiveTaxRate"] < statutory * 0.5 + ) + if lowTaxYears >= 3: + flags.append("유효세율 3기 연속 법정세율의 50% 미만 — 세금혜택 구조적 의존") + + # 유효세율 변동성 + rates = [h.get("effectiveTaxRate") for h in etr["history"][:5] if h.get("effectiveTaxRate") is not None] + if len(rates) >= 3: + mean = sum(rates) / len(rates) + if mean > 0: + std = (sum((r - mean) ** 2 for r in rates) / len(rates)) ** 0.5 + cv = std / mean + if cv > 0.5: + flags.append(f"유효세율 변동계수 {cv:.2f} — 세금 비용 불안정") + + cashConv = calcTaxCashConversion(company, basePeriod=basePeriod) + if cashConv and cashConv["history"]: + h0 = cashConv["history"][0] + tcr = h0.get("taxCashRatio") + if tcr is not None and tcr > 150: + flags.append(f"세금현금비율 {tcr:.0f}% — 법인세 과대 납부 (과거 이연분 정산)") + + deferred = calcDeferredTax(company, basePeriod=basePeriod) + if deferred and len(deferred["history"]) >= 2: + hist = deferred["history"] + dta0 = hist[0].get("deferredTaxAsset") + dta1 = hist[1].get("deferredTaxAsset") + if dta0 is not None and dta1 is not None and dta1 > 0 and dta0 / dta1 > 2: + flags.append(f"이연법인세자산 {dta0 / dta1:.1f}배 급증 — 미래 과세소득 가정 검토") + + # 이연법인세자산 3기 연속 증가 + if len(hist) >= 3: + dtas = [h.get("deferredTaxAsset") for h in hist[:3]] + if all(v is not None for v in dtas) and dtas[0] > dtas[1] > dtas[2] > 0: + flags.append("이연법인세자산 3기 연속 증가 — 실현 가능성 점검 필요") + + return flags diff --git a/src/dartlab/analysis/financial/valuation.py b/src/dartlab/analysis/financial/valuation.py new file mode 100644 index 0000000000000000000000000000000000000000..834783a37809e9f2975bbba9b99e34bc9a2b688f --- /dev/null +++ b/src/dartlab/analysis/financial/valuation.py @@ -0,0 +1,1012 @@ +"""가치평가 축 -- 기존 밸류에이션 엔진을 analysis 14축 패턴으로 래핑. + +calc 함수 9개: DCF, DDM, 상대가치, RIM, 목표주가, 역내재성장률, +민감도, 종합합성, 플래그. + +모든 함수는 (company) -> dict | None 시그니처를 따른다. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from dartlab.analysis.financial._memoize import memoized_calc +from dartlab.analysis.valuation.pricetarget import compute_price_target +from dartlab.analysis.valuation.residualIncome import calcResidualIncome as _rimCalc + +log = logging.getLogger(__name__) + + +# ── IndustryGroup → SECTOR_ELASTICITY 키 매핑 ── + +_IG_TO_SECTOR_KEY: dict[str, str] = { + "SEMICONDUCTOR": "반도체", + "AUTO": "자동차", + "CHEMICAL": "화학", + "METALS": "철강", + "CONSTRUCTION": "건설", + "CONSTRUCTION_MATERIALS": "건설", + "BANK": "금융/은행", + "INSURANCE": "금융/보험", + "DIVERSIFIED_FINANCIALS": "금융/증권", + "SOFTWARE": "IT/소프트웨어", + "IT_SERVICE": "IT/소프트웨어", + "INTERNET": "IT/소프트웨어", + "TECH_HARDWARE": "전자/하드웨어", + "DISPLAY": "디스플레이", + "TELECOM": "통신", + "RETAIL": "유통", + "FOOD_BEV_TOBACCO": "식품", + "FOOD_STAPLES": "식품", + "HOUSEHOLD": "식품", + "PHARMA_BIO": "제약/바이오", + "HEALTHCARE_EQUIP": "제약/바이오", + "UTILITIES": "전력/에너지", + "ELECTRIC": "전력/에너지", + "GAS_UTILITY": "전력/에너지", + "ENERGY_EQUIP": "에너지/자원", + "OIL_GAS": "에너지/자원", + "CAPITAL_GOODS": "산업재", + "MACHINERY": "산업재", + "TRANSPORTATION": "산업재", + "COMMERCIAL_SERVICE": "산업재", + "SHIPBUILDING": "조선", + "CONSUMER_DURABLES": "섬유/의류", + "CONSUMER_SERVICE": "유통", + "MEDIA_ENTERTAINMENT": "미디어/엔터", + "MEDIA": "미디어/엔터", + "GAME": "게임", + "REAL_ESTATE": "부동산", + "REIT": "부동산", + "AEROSPACE_DEFENSE": "산업재", + "HOTEL_LEISURE": "유통", +} + + +def _resolveSectorKey(company: Any) -> str | None: + """company.sector에서 SECTOR_ELASTICITY 키를 추출.""" + try: + sectorInfo = company.sector + if sectorInfo is None: + return None + igName = sectorInfo.industryGroup.name + return _IG_TO_SECTOR_KEY.get(igName) + except (AttributeError, ValueError): + return None + + +# ── 시가 연동 헬퍼 ── + + +def _fetchPriceContext(company: Any) -> dict | None: + """gather.price에서 현재가/시총 가져오기 (sync). + + 같은 company에 대해 세션 내 1회만 네트워크 호출. + 실패 시 None 반환 -- 시가 의존 calc만 graceful skip. + """ + # company._cache에 저장하여 동일 세션 내 재활용 + cache = getattr(company, "_cache", None) + _KEY = "_priceContext" + if cache is not None and _KEY in cache: + return cache[_KEY] + + stockCode = getattr(company, "stockCode", None) + if not stockCode: + return None + + result = None + try: + from dartlab.gather.http import run_async + from dartlab.gather.price import fetch + + snapshot = run_async(fetch(stockCode, market="KR")) + if snapshot is not None: + result = { + "currentPrice": snapshot.current, + "marketCap": snapshot.market_cap, + "per": snapshot.per, + "pbr": snapshot.pbr, + "isStale": getattr(snapshot, "is_stale", False), + } + except (ImportError, OSError, RuntimeError, AttributeError): + log.debug("price fetch 실패: %s", stockCode) + + if cache is not None: + cache[_KEY] = result + return result + + +def _getSeriesAndShares(company: Any) -> tuple[dict | None, int | None, str]: + """company에서 annual series, shares, currency 추출.""" + try: + ann = company._buildFinanceSeries(freq="Y") + if ann is None: + return None, None, getattr(company, "currency", "KRW") or "KRW" + series = ann[0] if isinstance(ann, tuple) else ann + except (ValueError, KeyError, AttributeError): + return None, None, getattr(company, "currency", "KRW") or "KRW" + + shares = None + profile = getattr(company, "profile", None) + if profile: + sharesVal = getattr(profile, "sharesOutstanding", None) + if sharesVal: + shares = int(sharesVal) + + # fallback: 시가총액/현재가에서 shares 추정 + if shares is None: + price = _fetchPriceContext(company) + if price and price.get("marketCap") and price.get("currentPrice"): + mc = price["marketCap"] + cp = price["currentPrice"] + if mc > 0 and cp > 0: + shares = int(mc / cp) + + currency = getattr(company, "currency", "KRW") or "KRW" + return series, shares, currency + + +def _getSectorParams(company: Any): + """company에서 sectorParams 추출.""" + try: + return getattr(company, "sectorParams", None) + except AttributeError: + return None + + +# ── calc 함수 9개 ── + + +@memoized_calc +def calcDcf(company: Any, *, basePeriod: str | None = None) -> dict | None: + """DCF (현금흐름 할인) 밸류에이션. + + Returns + ------- + dict + perShareValue : float — 주당 적정가 (원) + enterpriseValue : float — 기업가치 (원) + equityValue : float — 자기자본가치 (원) + discountRate : float — 할인율 (%) + growthRateInitial : float — 초기 성장률 (%) + terminalGrowth : float — 영구성장률 (%) + marginOfSafety : float — 안전마진 (%) + fcfProjections : list — FCF 예측 시계열 (원) + fcfHistorical : list — FCF 과거 시계열 (원) + exitMultipleTv : float — 출구배수 기반 터미널가치 (원) + exitMultipleEv : float — 출구배수 기반 기업가치 (원) + exitMultiplePerShare : float — 출구배수 기반 주당가치 (원) + assumptions : dict — 가정 파라미터 + warnings : list[str] — 경고 메시지 + currentPrice : float | None — 현재 주가 (원) + currency : str — 통화 (KRW | USD) + """ + from dartlab.core.finance.dcf import dcfValuation + + series, shares, currency = _getSeriesAndShares(company) + sp = _getSectorParams(company) + price = _fetchPriceContext(company) + currentPrice = price["currentPrice"] if price else None + marketCap = price["marketCap"] if price else None + + from dartlab.core.finance.proforma import compute_company_wacc + + wacc, _ = compute_company_wacc( + series, + sector_params=sp, + market_cap=marketCap, + currency=currency, + ) + + result = dcfValuation( + series, + shares=shares, + sectorParams=sp, + currentPrice=currentPrice, + currency=currency, + discountRate=wacc, + ) + return { + "perShareValue": result.perShareValue, + "enterpriseValue": result.enterpriseValue, + "equityValue": result.equityValue, + "discountRate": result.discountRate, + "growthRateInitial": result.growthRateInitial, + "terminalGrowth": result.terminalGrowth, + "marginOfSafety": result.marginOfSafety, + "fcfProjections": result.fcfProjections, + "fcfHistorical": result.fcfHistorical, + "exitMultipleTv": result.exitMultipleTv, + "exitMultipleEv": result.exitMultipleEv, + "exitMultiplePerShare": result.exitMultiplePerShare, + "assumptions": result.assumptions, + "warnings": result.warnings, + "currentPrice": currentPrice, + "currency": currency, + } + + +@memoized_calc +def calcDdm(company: Any, *, basePeriod: str | None = None) -> dict | None: + """DDM (배당 할인) 밸류에이션. + + calcDividendPolicy의 연간 배당 데이터를 우선 사용하여 + 분기 CF 합산 오류를 방지한다. + + Returns + ------- + dict + intrinsicValue : float — 주당 내재가치 (원) + dividendPerShare : float — 주당배당금 (원) + dividendYield : float — 배당수익률 (%) + payoutRatio : float — 배당성향 (%) + dividendGrowth : float — 배당 성장률 (%) + modelUsed : str — 사용 모델 ("Gordon" | "H-Model" | "N/A") + discountRate : float — 할인율 (%) + warnings : list[str] — 경고 메시지 + currentPrice : float | None — 현재 주가 (원) + currency : str — 통화 (KRW | USD) + """ + from dartlab.analysis.financial.capitalAllocation import calcDividendPolicy + from dartlab.core.finance.dcf import ddmValuation + + series, shares, currency = _getSeriesAndShares(company) + sp = _getSectorParams(company) + price = _fetchPriceContext(company) + currentPrice = price["currentPrice"] if price else None + + # 1순위: Report API DPS (가장 정확한 연간 주당배당금) + annualDivs: list[float] | None = None + try: + from dartlab.providers.dart.report.pivot import pivotDividend + + stockCode = getattr(company, "stockCode", None) + divResult = pivotDividend(stockCode) if stockCode else None + if divResult and divResult.dps: + validDps = [d for d in divResult.dps if d is not None and d > 0] + if validDps and shares and shares > 0: + annualDivs = [dps * shares for dps in validDps] + except (ImportError, ValueError, KeyError, AttributeError): + pass + + # 2순위: calcDividendPolicy CF 기반 (Report 없을 때 fallback) + if not annualDivs: + divPolicy = calcDividendPolicy(company, basePeriod=basePeriod) + if divPolicy and divPolicy.get("history"): + hist = divPolicy["history"] + minDiv = shares * 100 if shares and shares > 0 else 1e9 + annualDivs = [ + h["dividendsPaid"] for h in reversed(hist) if h.get("dividendsPaid") and h["dividendsPaid"] > minDiv + ] + + result = ddmValuation( + series, + shares=shares, + sectorParams=sp, + currentPrice=currentPrice, + annualDividends=annualDivs, + ) + if result.modelUsed == "N/A" and not result.warnings: + return None + + return { + "intrinsicValue": result.intrinsicValue, + "dividendPerShare": result.dividendPerShare, + "dividendYield": result.dividendYield, + "payoutRatio": result.payoutRatio, + "dividendGrowth": result.dividendGrowth, + "modelUsed": result.modelUsed, + "discountRate": result.discountRate, + "warnings": result.warnings, + "currentPrice": currentPrice, + "currency": currency, + } + + +@memoized_calc +def calcRelativeValuation(company: Any, *, basePeriod: str | None = None) -> dict | None: + """상대가치 (PER/PBR/EV-EBITDA/PSR/PEG) 밸류에이션. + + Returns + ------- + dict + sectorMultiples : dict — 업종 평균 멀티플 (PER, PBR 등) (배수) + currentMultiples : dict — 현재 멀티플 (배수) + impliedValues : dict — 멀티플별 내재가치 (원) + premiumDiscount : dict — 업종 대비 할인/프리미엄 (%) + consensusValue : float — 합의 적정가 (원) + warnings : list[str] — 경고 메시지 + currentPrice : float | None — 현재 주가 (원) + currency : str — 통화 (KRW | USD) + """ + from dartlab.core.finance.dcf import relativeValuation + + series, shares, currency = _getSeriesAndShares(company) + sp = _getSectorParams(company) + price = _fetchPriceContext(company) + marketCap = price["marketCap"] if price else None + currentPrice = price["currentPrice"] if price else None + + result = relativeValuation( + series, + sectorParams=sp, + marketCap=marketCap, + shares=shares, + currentPrice=currentPrice, + ) + return { + "sectorMultiples": result.sectorMultiples, + "currentMultiples": result.currentMultiples, + "impliedValues": result.impliedValues, + "premiumDiscount": result.premiumDiscount, + "consensusValue": result.consensusValue, + "warnings": result.warnings, + "currentPrice": currentPrice, + "currency": currency, + } + + +@memoized_calc +def calcResidualIncome(company: Any, *, basePeriod: str | None = None) -> dict | None: + """RIM (잔여이익모델) 밸류에이션. + + Returns + ------- + dict + bps : float — 주당순자산 (원) + coe : float — 자기자본비용 (%) + riHistory : list — 잔여이익 시계열 (원) + intrinsicValue : float — 주당 내재가치 (원) + upside : float — 상승여력 (%) + terminalValue : float — 터미널가치 (원) + warnings : list[str] — 경고 메시지 + currentPrice : float | None — 현재 주가 (원) + currency : str — 통화 (KRW | USD) + """ + series, shares, currency = _getSeriesAndShares(company) + sp = _getSectorParams(company) + price = _fetchPriceContext(company) + currentPrice = price["currentPrice"] if price else None + beta = sp.beta if sp else None + + result = _rimCalc( + series, + shares=shares, + currentPrice=currentPrice, + currency=currency, + beta=beta, + ) + if result is None: + return None + + return { + "bps": result.bps, + "coe": result.coe, + "riHistory": result.riHistory, + "intrinsicValue": result.intrinsicValue, + "upside": result.upside, + "terminalValue": result.terminalValue, + "warnings": result.warnings, + "currentPrice": currentPrice, + "currency": currency, + } + + +# ── 지주사 NAV ────────────────────────────────────────── + +# 주요 지주사 핵심 자회사 매핑 (종목코드: 지분율%) +# 출처: DART 사업보고서 기준. 자회사 지분 변동 시 업데이트 필요. +_HOLDING_SUBS: dict[str, list[tuple[str, float]]] = { + "034730": [ # SK + ("096770", 64.25), # SK이노베이션 + ("017670", 26.80), # SK텔레콤 + ("402340", 42.30), # SK스퀘어 + ], + "003550": [ # LG + ("373220", 30.10), # LG에너지솔루션 + ("051910", 33.30), # LG화학 + ("066570", 33.67), # LG전자 + ], + "028260": [ # 삼성물산 + ("005930", 4.99), # 삼성전자 + ("207940", 43.37), # 삼성바이오로직스 + ], + "005490": [ # POSCO홀딩스 + ("005380", 5.20), # 현대차 (실제는 포스코인터/포스코퓨처엠이나 종목코드 확인 필요) + ], +} + + +@memoized_calc +def calcNavValuation(company: Any) -> dict | None: + """지주사 NAV = Sum(상장 자회사 시총 x 지분율) - 순차입금. 할인 30%. + + Returns + ------- + dict + navGross : float — 할인 전 NAV (원) + navDiscounted : float — 할인 후 NAV (원) + navPerShare : float | None — 주당 NAV (원) + holdingDiscount : float — 지주사 할인율 (0.30) + subsidiaries : list[dict] — 자회사별 상세 (code, ratio(%), marketCap(원), value(원)) + netDebt : float — 순차입금 (원) + """ + stockCode = getattr(company, "stockCode", "") + subs = _HOLDING_SUBS.get(stockCode) + if not subs: + return None + + series, shares, currency = _getSeriesAndShares(company) + + # 자회사 시총 합산 (Company 객체 생성 금지 — OOM 방지) + totalSubValue = 0.0 + subDetails = [] + for subCode, ratio in subs: + try: + from dartlab.gather.http import run_async + from dartlab.gather.price import fetch + + snapshot = run_async(fetch(subCode, market="KR")) + if snapshot and snapshot.market_cap and snapshot.market_cap > 0: + subValue = snapshot.market_cap * ratio / 100 + totalSubValue += subValue + subDetails.append( + {"code": subCode, "ratio": ratio, "marketCap": snapshot.market_cap, "value": subValue} + ) + except (ImportError, OSError, RuntimeError, AttributeError): + pass + + if totalSubValue <= 0: + return None + + # 순차입금 + from dartlab.core.finance.extract import getLatest + + if series: + stb = getLatest(series, "BS", "shortterm_borrowings") or 0 + ltb = getLatest(series, "BS", "longterm_borrowings") or 0 + bonds = getLatest(series, "BS", "debentures") or 0 + cash = getLatest(series, "BS", "cash_and_cash_equivalents") or 0 + netDebt = stb + ltb + bonds - cash + else: + netDebt = 0 + + # NAV = 자회사 지분가치 합계 - 순차입금 + navGross = totalSubValue - netDebt + # 지주사 할인 30% (한국 실증 평균) + navDiscounted = navGross * 0.70 + + navPerShare = navDiscounted / shares if shares and shares > 0 else None + + return { + "navGross": navGross, + "navDiscounted": navDiscounted, + "navPerShare": navPerShare, + "holdingDiscount": 0.30, + "subsidiaries": subDetails, + "netDebt": netDebt, + } + + +@memoized_calc +def calcPriceTarget(company: Any, *, basePeriod: str | None = None) -> dict | None: + """확률 가중 주가 목표가 (5 시나리오 + Monte Carlo). + + Returns + ------- + dict + weightedTarget : float — 확률 가중 목표 주가 (원) + percentiles : dict — 백분위별 주가 (원) + expectedValue : float — 기대가치 (원) + upside : float | None — 상승여력 (%) + probabilityAboveCurrent : float — 현재가 초과 확률 (0.0-1.0) + signal : str — 투자 신호 ("buy" | "hold" | "sell") + confidence : str — 신뢰도 ("high" | "medium" | "low") + scenarios : list[dict] — 시나리오별 상세 (name, probability, perShareValue(원), enterpriseValue(원)) + waccDetails : dict — WACC 상세 + warnings : list[str] — 경고 메시지 + currentPrice : float | None — 현재 주가 (원) + currency : str — 통화 (KRW | USD) + """ + series, shares, currency = _getSeriesAndShares(company) + price = _fetchPriceContext(company) + currentPrice = price["currentPrice"] if price else None + marketCap = price["marketCap"] if price else None + sectorKey = _resolveSectorKey(company) + + result = compute_price_target( + series, + sector_key=sectorKey, + current_price=currentPrice, + shares=shares, + market_cap=marketCap, + ) + + # 금융업 등 DCF 불가 시: 시나리오 전부 0이면 DDM/RIM으로 대체 + allZero = all(s.per_share_value == 0 for s in result.scenarios) if result.scenarios else True + if allZero: + ddmResult = calcDdm(company, basePeriod=basePeriod) + rimResult = calcResidualIncome(company, basePeriod=basePeriod) + fallbackValue = None + if ddmResult and ddmResult.get("intrinsicValue") and ddmResult["intrinsicValue"] > 0: + fallbackValue = ddmResult["intrinsicValue"] + elif rimResult and rimResult.get("intrinsicValue") and rimResult["intrinsicValue"] > 0: + fallbackValue = rimResult["intrinsicValue"] + if fallbackValue: + # DDM/RIM 기반 시나리오 생성 (±10%, ±20% 변동) + from dartlab.analysis.valuation.pricetarget import ScenarioPriceTarget + + fallbackScenarios = [ + ScenarioPriceTarget("baseline", 0.55, None, 0, 0, fallbackValue, 0, 0, None), + ScenarioPriceTarget("rate_hike", 0.20, None, 0, 0, fallbackValue * 0.9, 0, 0, None), + ScenarioPriceTarget("china_slowdown", 0.15, None, 0, 0, fallbackValue * 0.85, 0, 0, None), + ScenarioPriceTarget("adverse", 0.10, None, 0, 0, fallbackValue * 0.75, 0, 0, None), + ] + wt = sum(s.per_share_value * s.probability for s in fallbackScenarios) + up = ((wt / currentPrice - 1) * 100) if currentPrice and currentPrice > 0 else None + sig = "buy" if up and up > 10 else ("sell" if up and up < -10 else "hold") + from dartlab.analysis.valuation.pricetarget import PriceTargetResult + + result = PriceTargetResult( + scenarios=fallbackScenarios, + weighted_target=wt, + percentiles=result.percentiles, + expected_value=fallbackValue, + current_price=currentPrice, + upside_pct=up, + probability_above_current=result.probability_above_current, + signal=sig, + confidence="low", + wacc_details=getattr(result, "wacc_details", {}), + warnings=result.warnings + ["DCF 시나리오 불가 → DDM/RIM 기반 fallback"], + ) + + scenarios = [] + for s in result.scenarios: + scenarios.append( + { + "name": s.scenario_name, + "probability": s.probability, + "perShareValue": s.per_share_value, + "enterpriseValue": s.enterprise_value, + } + ) + + return { + "weightedTarget": result.weighted_target, + "percentiles": result.percentiles, + "expectedValue": result.expected_value, + "upside": result.upside_pct, + "probabilityAboveCurrent": result.probability_above_current, + "signal": result.signal, + "confidence": result.confidence, + "scenarios": scenarios, + "waccDetails": result.wacc_details, + "warnings": result.warnings, + "currentPrice": currentPrice, + "currency": currency, + } + + +@memoized_calc +def calcReverseImplied(company: Any, *, basePeriod: str | None = None) -> dict | None: + """역내재성장률 -- 시장이 내재하는 매출 성장률 역산. + + Returns + ------- + dict + impliedGrowthRate : float — 내재 매출 성장률 (%) + impliedRevenue : float — 내재 매출 (원) + marketCap : float — 시가총액 (원) + latestRevenue : float — 최신 매출 (원) + assumedMargin : float — 가정 영업이익률 (%) + assumedWacc : float — 가정 WACC (%) + signal : str — 신호 ("overpriced" | "underpriced" | "fair") + warnings : list[str] — 경고 메시지 + currentPrice : float | None — 현재 주가 (원) + currency : str — 통화 (KRW | USD) + """ + from dartlab.core.finance.priceImplied import reverseImpliedGrowth + + series, shares, currency = _getSeriesAndShares(company) + price = _fetchPriceContext(company) + if not price or not price.get("marketCap"): + return None + + result = reverseImpliedGrowth(series, marketCap=price["marketCap"]) + if result is None: + return None + + return { + "impliedGrowthRate": result.impliedGrowthRate, + "impliedRevenue": result.impliedRevenue, + "marketCap": result.marketCap, + "latestRevenue": result.latestRevenue, + "assumedMargin": result.assumedMargin, + "assumedWacc": result.assumedWacc, + "signal": result.signal, + "warnings": result.warnings, + "currentPrice": price.get("currentPrice"), + "currency": currency, + } + + +@memoized_calc +def calcSensitivity(company: Any, *, basePeriod: str | None = None) -> dict | None: + """WACC x 영구성장률 민감도 그리드. + + Returns + ------- + dict + grid : list[list[float]] — WACC x 영구성장률 주가 그리드 (원) + baseWacc : float — 기준 WACC (%) + baseTerminalGrowth : float — 기준 영구성장률 (%) + baseValue : float — 기준 주가 (원) + currentPrice : float | None — 현재 주가 (원) + currency : str — 통화 (KRW | USD) + """ + from dartlab.core.finance.dcf import sensitivityAnalysis + + series, shares, currency = _getSeriesAndShares(company) + sp = _getSectorParams(company) + price = _fetchPriceContext(company) + currentPrice = price["currentPrice"] if price else None + + result = sensitivityAnalysis( + series, + shares=shares, + sectorParams=sp, + currentPrice=currentPrice, + currency=currency, + ) + if result is None: + return None + + return { + "grid": result.grid, + "baseWacc": result.baseWacc, + "baseTerminalGrowth": result.baseTerminalGrowth, + "baseValue": result.baseValue, + "currentPrice": currentPrice, + "currency": currency, + } + + +def _classifyCompanyType(company: Any, series: dict) -> tuple[str, dict[str, float]]: + """기업 특성 분류 -> 최적 모델 가중치 반환 (CFA 프레임워크 기반). + + Returns: + (companyType, weights) where companyType is one of: + "financial", "growth", "cyclical", "dividend", "general" + """ + from dartlab.core.finance.extract import getAnnualValues, getRevenueGrowth3Y + + sector = getattr(company, "sector", None) + sectorStr = "" + isFinancial = False + if sector: + sectorVal = getattr(sector, "sector", None) + if sectorVal: + sectorStr = sectorVal.value if hasattr(sectorVal, "value") else str(sectorVal) + if sectorStr == "금융": + isFinancial = True + + # 지주사 판별 (금융보다 우선 — 한진칼 같은 금융 분류 지주사 대응) + igVal = getattr(sector, "industryGroup", None) if sector else None + igStr = igVal.name if igVal and hasattr(igVal, "name") else str(igVal or "") + corpName = getattr(company, "corpName", "") + _holdingCodes = {"034730", "003550", "028260", "005490", "180640"} # SK, LG, 삼성물산, POSCO홀딩스, 한진칼 + stockCode = getattr(company, "stockCode", "") + # 금융지주(신한지주, KB금융 등)는 financial이지 holding이 아님 + isFinancialHolding = isFinancial and ("지주" in corpName or "금융" in corpName) + isHolding = not isFinancialHolding and ( + "HOLDING" in igStr.upper() + or "지주" in corpName + or "지주" in sectorStr + or "홀딩스" in corpName + or stockCode in _holdingCodes + ) + if isHolding: + # 지주사: DCF(연결 기반) 과대평가 위험 → 상대가치/RIM 우선, DCF 대폭 축소 + return "holding", {"DCF": 0.05, "DDM": 0.10, "상대가치": 0.15, "RIM": 0.30, "NAV": 0.40} + + if isFinancial: + # 금융업: FCF 무의미, RIM/DDM 우선, DCF 제외 + return "financial", {"DCF": 0.0, "DDM": 0.35, "상대가치": 0.30, "RIM": 0.35} + + # ── 사이클 업종 사전 판별 (섹터 기반 — CAGR/CV보다 우선) ── + _cyclicalIg = { + "SEMICONDUCTOR", + "CHEMICAL", + "METALS", + "SHIPBUILDING", + "TRANSPORTATION", + "OIL_GAS", + "ENERGY_EQUIP", + "CONSTRUCTION_MATERIALS", + "CAPITAL_GOODS", + "AUTO", + "DISPLAY", + "AIRLINE", + } + # NI CV가 높아도 사이클 기업이 아닌 업종 → cyclical 제외 + _stableIg = { + "TELECOM", + "UTILITIES", + "GAS_UTILITY", + "ELECTRIC", + "SOFTWARE", + "IT_SERVICE", + "INTERNET", + "MEDIA_ENTERTAINMENT", + "MEDIA", + "GAME", + } + + isCyclicalSector = igStr.upper() in _cyclicalIg + isStableSector = igStr.upper() in _stableIg + + # 유틸리티: 규제기업으로 CAPEX 극대, FCF 만성 적자 → DCF 부적합, DDM/RIM 우선 + if igStr.upper() in ("UTILITIES", "GAS_UTILITY", "ELECTRIC"): + return "utility", {"DCF": 0.10, "DDM": 0.35, "상대가치": 0.15, "RIM": 0.40} + # 수주잔고 기반 업종: DCF가 과거 적자를 외삽하므로 가중 축소, RIM/상대가치 우선 + _backlogIg = {"SHIPBUILDING", "CONSTRUCTION", "CONSTRUCTION_MATERIALS"} + isBacklogSector = igStr.upper() in _backlogIg + + if isBacklogSector: + return "backlog_cyclical", {"DCF": 0.15, "DDM": 0.05, "상대가치": 0.45, "RIM": 0.35} + + # 바이오/제약: FCF 적자 빈번, DCF 부적합. PSR/PBR 기반 상대가치 + RIM 우선 + if igStr.upper() in ("PHARMA_BIO", "HEALTHCARE_EQUIP"): + return "pharma_bio", {"DCF": 0.10, "DDM": 0.05, "상대가치": 0.50, "RIM": 0.35} + + if isCyclicalSector: + return "cyclical", {"DCF": 0.25, "DDM": 0.10, "상대가치": 0.40, "RIM": 0.25} + + # 성장주 판별: 매출 3Y CAGR > 15% (사이클 업종은 위에서 이미 처리) + revCagr = getRevenueGrowth3Y(series) + if revCagr is not None and revCagr > 15: + return "growth", {"DCF": 0.45, "DDM": 0.05, "상대가치": 0.25, "RIM": 0.25} + + # 순환주 판별 (통계 기반): NI CV > 0.5이고 안정 업종이 아닌 경우 + niVals = getAnnualValues(series, "IS", "net_profit") + if niVals and len(niVals) >= 4 and not isStableSector: + validNi = [v for v in niVals[-5:] if v is not None and v > 0] + if len(validNi) >= 3: + mean = sum(validNi) / len(validNi) + if mean > 0: + var = sum((v - mean) ** 2 for v in validNi) / len(validNi) + cv = (var**0.5) / mean + if cv > 0.5: + return "cyclical", {"DCF": 0.25, "DDM": 0.10, "상대가치": 0.40, "RIM": 0.25} + + # 배당주: 안정적 ��당 (DDM 가중 높임) + divVals = getAnnualValues(series, "CF", "dividends_paid") + if divVals and len(divVals) >= 3: + recentDivs = [abs(v) for v in divVals[-3:] if v is not None and v != 0] + if len(recentDivs) >= 3: + return "dividend", {"DCF": 0.25, "DDM": 0.30, "상대가치": 0.25, "RIM": 0.20} + + # 일반 + return "general", {"DCF": 0.35, "DDM": 0.15, "상대가치": 0.25, "RIM": 0.25} + + +@memoized_calc +def calcValuationSynthesis(company: Any, *, basePeriod: str | None = None) -> dict | None: + """종합 밸류에이션 -- 기업 유형별 자동 모델 선택 + 가중 합성. + + Returns + ------- + dict + fairValueRange : dict — 적정가 범위 (원) + verdict : str — 판정 ("저평가" | "적정" | "고평가") + currentPrice : float | None — 현재 주가 (원) + estimates : list[dict] — 모델별 추정 (method, value(원), weight) + companyType : str — 기업 유형 ("financial" | "growth" | "cyclical" | "dividend" | "holding" | "general" 등) + weightedFairValue : float | None — 가중 합성 적정가 (원) + modelWeights : dict[str, float] — 모델별 가중치 + currency : str — 통화 (KRW | USD) + reverseImplied : dict | None — 역내재성장률 (모델 실패 시 보충) + warnings : list[str] — 경고 메시지 + technicalContext : dict | None — 기술적 분석 컨텍스트 (verdict, score, rsi) + """ + from dartlab.core.finance.dcf import fullValuation + + series, shares, currency = _getSeriesAndShares(company) + if series is None: + return None + + sp = _getSectorParams(company) + price = _fetchPriceContext(company) + currentPrice = price["currentPrice"] if price else None + marketCap = price["marketCap"] if price else None + + companyType, weights = _classifyCompanyType(company, series) + + # 개별 beta (수익률 회귀) + CAPM 기반 동적 WACC + from dartlab.core.finance.proforma import _fetchBeta, compute_company_wacc + + stockCode = getattr(company, "stockCode", "") + betaCalc = _fetchBeta(stockCode, currency) if stockCode else None + + wacc, _waccDetail = compute_company_wacc( + series, + sector_params=sp, + market_cap=marketCap, + currency=currency, + beta_override=betaCalc, + ) + + result = fullValuation( + series, + shares=shares, + sectorParams=sp, + marketCap=marketCap, + currentPrice=currentPrice, + currency=currency, + discountRate=wacc, + ) + + # 극단값 필터: 현재가 2% 미만 또는 10배 이상은 무의미 → 합성 제외 + _minVal = currentPrice * 0.02 if currentPrice and currentPrice > 0 else 0 + _maxVal = currentPrice * 10 if currentPrice and currentPrice > 0 else float("inf") + + def _inRange(v: float) -> bool: + return _minVal < v < _maxVal + + estimates: list[dict] = [] + if result.dcf and result.dcf.perShareValue and _inRange(result.dcf.perShareValue): + estimates.append({"method": "DCF", "value": result.dcf.perShareValue, "weight": weights.get("DCF", 0)}) + # DDM: fullValuation 내부 DDM 대신 calcDdm 사용 (calcDividendPolicy 기반, 더 정확) + ddmResult = calcDdm(company, basePeriod=basePeriod) + ddmValue = ddmResult.get("intrinsicValue") if ddmResult else None + if ddmValue and _inRange(ddmValue): + estimates.append({"method": "DDM", "value": ddmValue, "weight": weights.get("DDM", 0)}) + if result.relative and result.relative.consensusValue and _inRange(result.relative.consensusValue): + estimates.append( + {"method": "상대가치", "value": result.relative.consensusValue, "weight": weights.get("상대가치", 0)} + ) + + # RIM 결과도 합성에 포함 + beta = sp.beta if sp else None + rimResult = _rimCalc(series, shares=shares, currentPrice=currentPrice, currency=currency, beta=beta) + if rimResult and rimResult.intrinsicValue and _inRange(rimResult.intrinsicValue): + estimates.append({"method": "RIM", "value": rimResult.intrinsicValue, "weight": weights.get("RIM", 0)}) + + # Forward BPS × Target PBR — 수주잔고 기반 업종 (조선/건설) + if companyType == "backlog_cyclical": + from dartlab.core.finance.extract import getAnnualValues, getLatest, getRevenueGrowth3Y + + eq = getLatest(series, "BS", "total_equity") + if eq and shares and shares > 0: + bps = eq / shares + getRevenueGrowth3Y(series) or 0 + # 2년 후 Forward BPS = 현재 BPS × (1 + ROE추정)^2 + # ROE 추정: 최근 양수 ROE 또는 섹터 평균 8% + niVals = getAnnualValues(series, "IS", "net_profit") + recentNi = [v for v in (niVals[-3:] if niVals else []) if v is not None and v > 0] + roe = recentNi[-1] / eq * 100 if recentNi and eq and eq > 0 else 8.0 + roe = min(max(roe, 3.0), 25.0) + forwardBps = bps * (1 + roe / 100) ** 2 + # Target PBR: 조선 사이클 상단 2.0~4.0, 평균 3.0 + targetPbr = 3.0 + forwardPbrValue = forwardBps * targetPbr + if _inRange(forwardPbrValue): + estimates.append( + {"method": "Forward PBR", "value": forwardPbrValue, "weight": weights.get("상대가치", 0.45)} + ) + + # NAV — 지주사만 (자회사 시총 합산 기반) + if companyType == "holding": + navResult = calcNavValuation(company) + if navResult and navResult.get("navPerShare") and _inRange(navResult["navPerShare"]): + estimates.append({"method": "NAV", "value": navResult["navPerShare"], "weight": weights.get("NAV", 0.40)}) + + # 가중 합성 적정가 + weightedFairValue = None + if estimates: + totalW = sum(e["weight"] for e in estimates if e["weight"] > 0) + if totalW > 0: + # 미가용 모델의 가중치를 비례 재배분 + normFactor = 1.0 / totalW + weightedFairValue = sum(e["value"] * e["weight"] * normFactor for e in estimates) + weightedFairValue = round(weightedFairValue, 0) + + # 역내재성장률 — 모든 모델 실패 시 시장 기대 역산으로 보충 + reverseImplied = None + if not estimates or weightedFairValue is None: + ri = calcReverseImplied(company, basePeriod=basePeriod) + if ri: + reverseImplied = { + "impliedGrowthRate": ri.get("impliedGrowthRate"), + "signal": ri.get("signal"), + } + + warnings = [] + if price and price.get("isStale"): + warnings.append("주가 데이터가 최신이 아닐 수 있습니다 (stale cache)") + + # 모델 간 극단 괴리 경고 + if len(estimates) >= 2: + vals = [e["value"] for e in estimates] + maxVal, minVal = max(vals), min(vals) + if minVal > 0 and maxVal / minVal > 10: + warnings.append(f"모델 간 극단 괴리 ({maxVal / minVal:.0f}배) — 합성 신뢰도 낮음") + + # 기술적 분석 컨텍스트 (선택적 — quant 실패 시 무시) + technicalContext = None + try: + from dartlab.quant.extended import calcTechnicalVerdict + + tv = calcTechnicalVerdict(company) + if tv and tv.get("verdict"): + technicalContext = { + "verdict": tv["verdict"], + "score": tv.get("score", 0), + "rsi": tv.get("rsi"), + } + except (ImportError, ValueError, TypeError, AttributeError): + pass + + return { + "fairValueRange": result.fairValueRange, + "verdict": result.verdict, + "currentPrice": currentPrice, + "estimates": estimates, + "companyType": companyType, + "weightedFairValue": weightedFairValue, + "modelWeights": weights, + "currency": currency, + "reverseImplied": reverseImplied, + "warnings": warnings, + "technicalContext": technicalContext, + } + + +@memoized_calc +def calcValuationFlags(company: Any, *, basePeriod: str | None = None) -> list[dict]: + """가치평가 관련 플래그 집계. + + Returns + ------- + list[dict] + signal : str — 신호 유형 ("opportunity" | "warning" | "info") + label : str — 플래그 설명 메시지 + """ + flags: list[dict] = [] + + dcf = calcDcf(company, basePeriod=basePeriod) + if dcf: + mos = dcf.get("marginOfSafety") + if mos is not None: + if mos > 30: + flags.append({"signal": "opportunity", "label": f"DCF 안전마진 {mos:.0f}% -- 저평가 가능"}) + elif mos < -30: + flags.append({"signal": "warning", "label": f"DCF 안전마진 {mos:.0f}% -- 고평가 주의"}) + + ddm = calcDdm(company, basePeriod=basePeriod) + if ddm and ddm.get("modelUsed") == "N/A": + flags.append({"signal": "info", "label": "DDM 적용 불가 (무배당/데이터 부족)"}) + + synthesis = calcValuationSynthesis(company, basePeriod=basePeriod) + if synthesis: + verdict = synthesis.get("verdict", "") + if verdict == "저평가": + flags.append({"signal": "opportunity", "label": "종합 판정: 저평가"}) + elif verdict == "고평가": + flags.append({"signal": "warning", "label": "종합 판정: 고평가"}) + + # 기술적 분석 교차 플래그 + tc = synthesis.get("technicalContext") + if tc and verdict: + techVerdict = tc.get("verdict", "") + rsi = tc.get("rsi", 50) + if verdict == "저평가" and techVerdict == "약세" and rsi <= 30: + flags.append({"signal": "opportunity", "label": "저평가 + 과매도(RSI 30↓) — 역발상 매수 기회 가능성"}) + elif verdict == "고평가" and techVerdict == "강세" and rsi >= 70: + flags.append({"signal": "warning", "label": "고평가 + 과매수(RSI 70↑) — 과열 경고"}) + elif verdict == "저평가" and techVerdict == "강세": + flags.append({"signal": "opportunity", "label": "저평가 + 기술적 강세 — 시장 재평가 진행 중"}) + + return flags diff --git a/src/dartlab/analysis/forecast/__init__.py b/src/dartlab/analysis/forecast/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/dartlab/analysis/forecast/__pycache__/__init__.cpython-312.pyc b/src/dartlab/analysis/forecast/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bc4c198609a0bc83d9780754dce14b54ee85d905 Binary files /dev/null and b/src/dartlab/analysis/forecast/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/analysis/forecast/__pycache__/calibrationMetrics.cpython-312.pyc b/src/dartlab/analysis/forecast/__pycache__/calibrationMetrics.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ed87a025a9e20e0b2c1336d5a2020156ff9a173d Binary files /dev/null and b/src/dartlab/analysis/forecast/__pycache__/calibrationMetrics.cpython-312.pyc differ diff --git a/src/dartlab/analysis/forecast/__pycache__/calibrator.cpython-312.pyc b/src/dartlab/analysis/forecast/__pycache__/calibrator.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1d2cf6b0195361d7a5a581e0b08e46bddc20c906 Binary files /dev/null and b/src/dartlab/analysis/forecast/__pycache__/calibrator.cpython-312.pyc differ diff --git a/src/dartlab/analysis/forecast/__pycache__/forecast.cpython-312.pyc b/src/dartlab/analysis/forecast/__pycache__/forecast.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7e246e2d8db9997c718b5e0e8f9591585b1827c Binary files /dev/null and b/src/dartlab/analysis/forecast/__pycache__/forecast.cpython-312.pyc differ diff --git a/src/dartlab/analysis/forecast/__pycache__/forwardTest.cpython-312.pyc b/src/dartlab/analysis/forecast/__pycache__/forwardTest.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f71d8d3b6e4e675b83f157b3c6467fcb93513fa7 Binary files /dev/null and b/src/dartlab/analysis/forecast/__pycache__/forwardTest.cpython-312.pyc differ diff --git a/src/dartlab/analysis/forecast/__pycache__/prediction.cpython-312.pyc b/src/dartlab/analysis/forecast/__pycache__/prediction.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..621827488faf2c5ec78fffad7bcf474e9eb1e34a Binary files /dev/null and b/src/dartlab/analysis/forecast/__pycache__/prediction.cpython-312.pyc differ diff --git a/src/dartlab/analysis/forecast/__pycache__/predictionSpace.cpython-312.pyc b/src/dartlab/analysis/forecast/__pycache__/predictionSpace.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..123e0c21be4a985cb942c4818daa25350204096a Binary files /dev/null and b/src/dartlab/analysis/forecast/__pycache__/predictionSpace.cpython-312.pyc differ diff --git a/src/dartlab/analysis/forecast/__pycache__/proforma.cpython-312.pyc b/src/dartlab/analysis/forecast/__pycache__/proforma.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..04a89b0592181145f3df9d490da7f0745607d666 Binary files /dev/null and b/src/dartlab/analysis/forecast/__pycache__/proforma.cpython-312.pyc differ diff --git a/src/dartlab/analysis/forecast/__pycache__/revenueForecast.cpython-312.pyc b/src/dartlab/analysis/forecast/__pycache__/revenueForecast.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b270e0c7cb1d4af8c447fc102dd8b3373770ea4 Binary files /dev/null and b/src/dartlab/analysis/forecast/__pycache__/revenueForecast.cpython-312.pyc differ diff --git a/src/dartlab/analysis/forecast/__pycache__/scenarioSim.cpython-312.pyc b/src/dartlab/analysis/forecast/__pycache__/scenarioSim.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e251e3d680d5a37e01a229a4d4c07d5240edbee1 Binary files /dev/null and b/src/dartlab/analysis/forecast/__pycache__/scenarioSim.cpython-312.pyc differ diff --git a/src/dartlab/analysis/forecast/__pycache__/simulation.cpython-312.pyc b/src/dartlab/analysis/forecast/__pycache__/simulation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e1ecbe0cd9e21cb28acffd7bd8550678f444c916 Binary files /dev/null and b/src/dartlab/analysis/forecast/__pycache__/simulation.cpython-312.pyc differ diff --git a/src/dartlab/analysis/forecast/calibrationMetrics.py b/src/dartlab/analysis/forecast/calibrationMetrics.py new file mode 100644 index 0000000000000000000000000000000000000000..8583c5b5e3b950d26253e5f16bee872f40f93a19 --- /dev/null +++ b/src/dartlab/analysis/forecast/calibrationMetrics.py @@ -0,0 +1,118 @@ +"""확률 캘리브레이션 메트릭 — Brier Score + 신뢰도 다이어그램 데이터. + +네이트 실버 원칙: "80% 확률 예측이 정말 80% 맞는지" 검증. +기상청이 예측에 성공한 핵심 이유: 확률 구간별 적중률을 추적한다. +""" + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass +class CalibrationBin: + """단일 확률 구간의 캘리브레이션.""" + + binLower: float # 구간 하한 (예: 0.7) + binUpper: float # 구간 상한 (예: 0.8) + meanPredicted: float # 평균 예측 확률 + meanActual: float # 실제 적중률 + count: int # 예측 건수 + gap: float # |meanPredicted - meanActual| + + +@dataclass +class CalibrationReport: + """전체 캘리브레이션 리포트.""" + + brierScore: float # 0~1 (낮을수록 좋음) + brierSkill: float | None # 기저율 대비 스킬 (>0이면 기저율보다 나음) + bins: list[CalibrationBin] # reliability diagram 데이터 + totalPredictions: int + baseRate: float # 실제 "상승" 비율 + maxCalibrationGap: float # 최대 구간 괴리 + isWellCalibrated: bool # 모든 구간 gap < 0.10 + + +def computeBrierScore( + predictions: list[float], + outcomes: list[int], +) -> float: + """Brier Score = mean((predicted - actual)^2). + + 0 = 완벽, 1 = 최악. + """ + if not predictions or len(predictions) != len(outcomes): + return 1.0 + total = sum((p - o) ** 2 for p, o in zip(predictions, outcomes)) + return total / len(predictions) + + +def buildCalibrationBins( + predictions: list[float], + outcomes: list[int], + nBins: int = 5, +) -> list[CalibrationBin]: + """확률 구간별 적중률 계산 (reliability diagram 데이터).""" + if not predictions: + return [] + + binWidth = 1.0 / nBins + bins: list[CalibrationBin] = [] + + for i in range(nBins): + lower = i * binWidth + upper = (i + 1) * binWidth + + preds = [] + acts = [] + for p, o in zip(predictions, outcomes): + if lower <= p < upper or (i == nBins - 1 and p == upper): + preds.append(p) + acts.append(o) + + if preds: + meanP = sum(preds) / len(preds) + meanA = sum(acts) / len(acts) + bins.append( + CalibrationBin( + binLower=round(lower, 2), + binUpper=round(upper, 2), + meanPredicted=round(meanP, 4), + meanActual=round(meanA, 4), + count=len(preds), + gap=round(abs(meanP - meanA), 4), + ) + ) + + return bins + + +def generateCalibrationReport( + predictions: list[float], + outcomes: list[int], +) -> CalibrationReport | None: + """전체 캘리브레이션 리포트 생성.""" + if not predictions or len(predictions) < 5: + return None + + brier = computeBrierScore(predictions, outcomes) + bins = buildCalibrationBins(predictions, outcomes) + baseRate = sum(outcomes) / len(outcomes) if outcomes else 0.5 + + # Brier Skill Score: 1 - (brier / brier_ref), brier_ref = baseRate*(1-baseRate) + brierRef = baseRate * (1 - baseRate) + brierSkill = 1 - (brier / brierRef) if brierRef > 0 else None + + maxGap = max((b.gap for b in bins), default=0.0) + wellCalibrated = all(b.gap < 0.10 for b in bins) if bins else False + + return CalibrationReport( + brierScore=round(brier, 4), + brierSkill=round(brierSkill, 4) if brierSkill is not None else None, + bins=bins, + totalPredictions=len(predictions), + baseRate=round(baseRate, 4), + maxCalibrationGap=round(maxGap, 4), + isWellCalibrated=wellCalibrated, + ) diff --git a/src/dartlab/analysis/forecast/calibrator.py b/src/dartlab/analysis/forecast/calibrator.py new file mode 100644 index 0000000000000000000000000000000000000000..25e315c9c5e869cf39e26bb0f64e48e857b90077 --- /dev/null +++ b/src/dartlab/analysis/forecast/calibrator.py @@ -0,0 +1,95 @@ +"""Analyst Calibrator — 외부 데이터로 DCF 시나리오 확률 보정. + +컨센서스, 수급, 매크로 데이터를 기반으로 +DCF 시나리오 확률을 재가중한다. +""" + +from __future__ import annotations + +import logging + +from dartlab.gather.types import MarketSnapshot + +log = logging.getLogger(__name__) + + +def calibrate_scenarios( + base_probs: dict[str, float], + dcf_baseline_price: float, + market: MarketSnapshot, +) -> tuple[dict[str, float], list[str]]: + """외부 시장 데이터로 DCF 시나리오 확률 보정. + + Args: + base_probs: 기존 시나리오 확률 (예: {"baseline": 0.40, ...}). + dcf_baseline_price: DCF baseline 시나리오 목표가. + market: MarketSnapshot (컨센서스, 수급 포함). + + Returns: + (보정된 확률 dict, 보정 근거 list). + """ + probs = dict(base_probs) + reasons: list[str] = [] + + def _adjust(scenario: str, delta: float, reason: str) -> None: + if scenario in probs: + probs[scenario] += delta + reasons.append(reason) + + # ── 규칙 1: 컨센서스 vs DCF 괴리 ── + if market.consensus and dcf_baseline_price > 0: + consensus_price = market.consensus.target_price + if consensus_price > 0: + ratio = consensus_price / dcf_baseline_price + if ratio > 1.5: + # 컨센서스가 DCF보다 50% 이상 높음 → optimistic ↑ + _adjust( + "optimistic", + 0.05, + f"컨센서스({consensus_price:,.0f})가 DCF baseline보다 {ratio:.1f}배 → optimistic +5%p", + ) + _adjust("baseline", -0.03, "컨센서스 상향 → baseline -3%p") + _adjust("adverse", -0.02, "컨센서스 상향 → adverse -2%p") + elif ratio < 0.7: + # 컨센서스가 DCF보다 30% 이상 낮음 → adverse ↑ + _adjust( + "adverse", 0.05, f"컨센서스({consensus_price:,.0f})가 DCF baseline보다 {ratio:.1f}배 → adverse +5%p" + ) + _adjust("baseline", -0.03, "컨센서스 하향 → baseline -3%p") + _adjust("optimistic", -0.02, "컨센서스 하향 → optimistic -2%p") + + # ── 규칙 2: 매수 비율 ── + if market.consensus and market.consensus.analyst_count >= 3: + buy_ratio = market.consensus.buy_ratio + if buy_ratio >= 0.8: + _adjust("baseline", 0.03, f"매수의견 {buy_ratio:.0%} → baseline +3%p") + elif buy_ratio < 0.3: + _adjust("adverse", 0.03, f"매수의견 {buy_ratio:.0%} (낮음) → adverse +3%p") + _adjust("baseline", -0.03, "매수의견 저조 → baseline -3%p") + + # ── 규칙 3: 외국인 순매도 ── + foreign_net = market.supply_demand.get("foreign_net") + if foreign_net is not None and foreign_net < -1_000_000: + _adjust("adverse", 0.03, f"외국인 순매도 {foreign_net:,.0f}주 → adverse +3%p") + _adjust("baseline", -0.03, "외국인 순매도 → baseline -3%p") + elif foreign_net is not None and foreign_net > 1_000_000: + _adjust("baseline", 0.03, f"외국인 순매수 {foreign_net:,.0f}주 → baseline +3%p") + + # ── 규칙 4: 기준금리 (매크로) ── + base_rate = market.macro.get("base_rate") + if base_rate is not None: + if base_rate > 4.0: + _adjust("rate_hike", 0.05, f"기준금리 {base_rate:.1f}% (고금리) → rate_hike +5%p") + _adjust("baseline", -0.03, "고금리 → baseline -3%p") + elif base_rate < 2.0: + _adjust("rate_hike", -0.03, f"기준금리 {base_rate:.1f}% (저금리) → rate_hike -3%p") + _adjust("baseline", 0.03, "저금리 → baseline +3%p") + + # ── 정규화 (합계=1.0, 하한 1%p) ── + for k in probs: + probs[k] = max(probs[k], 0.01) + total = sum(probs.values()) + if total > 0: + probs = {k: v / total for k, v in probs.items()} + + return probs, reasons diff --git a/src/dartlab/analysis/forecast/forecast.py b/src/dartlab/analysis/forecast/forecast.py new file mode 100644 index 0000000000000000000000000000000000000000..bf1a85a5b1685b8258fba6de02e79790dd9f2e54 --- /dev/null +++ b/src/dartlab/analysis/forecast/forecast.py @@ -0,0 +1,518 @@ +"""시계열 예측 + 시나리오 분석 + 민감도 분석 엔진.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Optional + +from dartlab.core.finance.extract import getAnnualValues +from dartlab.core.finance.fmt import fmtBig, fmtPrice +from dartlab.core.finance.ols import ( + _coefficientOfVariation, + _detectStructuralBreak, + _ols, +) +from dartlab.core.sector.types import SectorParams + +# ── 결과 타입 ────────────────────────────────────────────── + + +@dataclass +class ForecastResult: + """시계열 예측 결과.""" + + metric: str + metricLabel: str + historical: list[Optional[float]] + projected: list[float] + horizon: int + method: str + confidence: str + rSquared: float + growthRate: float + assumptions: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + currency: str = "KRW" + + DISCLAIMER: str = "본 분석은 투자 참고용이며 투자 권유가 아닙니다." + + def __repr__(self) -> str: + c = self.currency + lines = [ + f"[{self.metricLabel} 예측 — {self.method}]", + f" 신뢰도: {self.confidence} (R²={self.rSquared:.2f})", + f" 성장률: {self.growthRate:.1f}%", + ] + validHist = [v for v in self.historical if v is not None] + if validHist: + lines.append(f" 최근 실적: {fmtBig(validHist[-1], c)}") + if self.projected: + for i, p in enumerate(self.projected, 1): + lines.append(f" +{i}년 예측: {fmtBig(p, c)}") + if self.warnings: + for w in self.warnings: + lines.append(f" ⚠ {w}") + lines.append(f" ※ {self.DISCLAIMER}") + return "\n".join(lines) + + +@dataclass +class ScenarioResult: + """시나리오 분석 결과.""" + + base: dict[str, float] + bull: dict[str, float] + bear: dict[str, float] + probability: dict[str, float] + weightedValue: Optional[float] + currentPrice: Optional[float] + warnings: list[str] = field(default_factory=list) + currency: str = "KRW" + + DISCLAIMER: str = "본 분석은 투자 참고용이며 투자 권유가 아닙니다." + + def __repr__(self) -> str: + c = self.currency + lines = ["[시나리오 분석]"] + for label, scenario, prob in [ + ("Bull", self.bull, self.probability.get("bull", 25)), + ("Base", self.base, self.probability.get("base", 50)), + ("Bear", self.bear, self.probability.get("bear", 25)), + ]: + growth = scenario.get("growth", 0) + value = scenario.get("perShareValue", 0) + lines.append(f" {label} ({prob:.0f}%): 성장 {growth:+.1f}%, 적정가 {fmtPrice(value, c)}") + if self.weightedValue is not None: + lines.append(f" 확률가중 적정가: {fmtPrice(self.weightedValue, c)}") + if self.currentPrice: + lines.append(f" 현재가: {fmtPrice(self.currentPrice, c)}") + lines.append(f" ※ {self.DISCLAIMER}") + return "\n".join(lines) + + +@dataclass +class SensitivityResult: + """민감도 분석 결과.""" + + waccValues: list[float] + growthValues: list[float] + matrix: list[list[float]] + baseWacc: float + baseGrowth: float + baseValue: float + + DISCLAIMER: str = "본 분석은 투자 참고용이며 투자 권유가 아닙니다." + + def __repr__(self) -> str: + lines = ["[민감도 분석 — WACC × 영구성장률]"] + header = "WACC \\ g " + " ".join(f"{g:.1f}%" for g in self.growthValues) + lines.append(f" {header}") + for i, wacc in enumerate(self.waccValues): + row = f" {wacc:.1f}% " + " ".join( + f"{self.matrix[i][j] / 1e4:,.0f}만" if self.matrix[i][j] > 0 else " N/A" + for j in range(len(self.growthValues)) + ) + lines.append(row) + lines.append(f" ※ {self.DISCLAIMER}") + return "\n".join(lines) + + +# ── 예측 메트릭 정의 ────────────────────────────────────────── + +FORECAST_TARGETS: dict[str, tuple[str, str, str]] = { + "revenue": ("IS", "sales", "매출"), + "operating_income": ("IS", "operating_profit", "영업이익"), + "net_income": ("IS", "net_profit", "순이익"), + "operating_cashflow": ("CF", "operating_cashflow", "영업CF"), +} + +_FALLBACKS: dict[str, list[str]] = { + "sales": ["revenue"], + "operating_profit": ["operating_income"], + "net_profit": ["net_income"], +} + + +# ── 예측 엔진 ────────────────────────────────────────────── + + +def forecastMetric( + series: dict, + metric: str = "revenue", + horizon: int = 3, + sectorParams: Optional[SectorParams] = None, +) -> ForecastResult: + """단일 메트릭 시계열 예측.""" + warnings: list[str] = [] + target = FORECAST_TARGETS.get(metric) + if target is None: + return ForecastResult( + metric=metric, + metricLabel=metric, + historical=[], + projected=[], + horizon=horizon, + method="N/A", + confidence="low", + rSquared=0, + growthRate=0, + warnings=[f"미지원 예측 대상: {metric}"], + ) + + sjDiv, snakeId, label = target + + vals = getAnnualValues(series, sjDiv, snakeId) + if not any(v is not None for v in vals): + for fb in _FALLBACKS.get(snakeId, []): + vals = getAnnualValues(series, sjDiv, fb) + if any(v is not None for v in vals): + break + + validPairs = [(i, v) for i, v in enumerate(vals) if v is not None] + if len(validPairs) < 3: + return ForecastResult( + metric=metric, + metricLabel=label, + historical=vals, + projected=[], + horizon=horizon, + method="N/A", + confidence="low", + rSquared=0, + growthRate=0, + warnings=["예측 불가: 유효 데이터 3년 미만"], + ) + + xVals = [float(p[0]) for p in validPairs] + yVals = [p[1] for p in validPairs] + + breakIdx = _detectStructuralBreak(yVals, minSegment=4) + if breakIdx is not None and breakIdx < len(yVals): + nBefore = breakIdx + nAfter = len(yVals) - breakIdx + if nAfter >= 3: + warnings.append(f"구조적 전환 감지 (데이터 {nBefore}→{nAfter}개 분할) — 전환 이후 데이터 기반 예측") + xVals = xVals[breakIdx:] + yVals = yVals[breakIdx:] + + cv = _coefficientOfVariation(yVals) + slope, intercept, r2 = _ols(xVals, yVals) + + n = len(yVals) + if yVals[0] > 0 and yVals[-1] > 0: + cagr = ((yVals[-1] / yVals[0]) ** (1 / max(n - 1, 1)) - 1) * 100 + else: + cagr = 0.0 + + sectorGrowth = sectorParams.growthRate if sectorParams else 3.0 + + if cv > 0.4: + method = "mean_revert" + meanVal = sum(yVals) / n + projected = [] + last = yVals[-1] + for yr in range(1, horizon + 1): + blend = yr / (horizon + 1) + proj = last * (1 - blend) + meanVal * blend + projected.append(proj) + growthRate = 0.0 + warnings.append("높은 변동성 → 평균 회귀 모델 적용") + elif r2 > 0.7 and abs(cagr) < 30: + method = "linear" + lastX = xVals[-1] + projected = [slope * (lastX + yr) + intercept for yr in range(1, horizon + 1)] + growthRate = cagr + for i, p in enumerate(projected): + if p < 0 and yVals[-1] > 0: + projected[i] = yVals[-1] * 0.5 + warnings.append(f"+{i + 1}년 예측이 음수 → 최근값의 50%로 대체") + else: + method = "cagr_decay" + growth = min(max(cagr, -10), 25) + terminal = sectorGrowth + projected = [] + last = yVals[-1] + for yr in range(1, horizon + 1): + blend = (yr - 1) / max(horizon - 1, 1) + g = growth * (1 - blend) + terminal * blend + proj = last * (1 + g / 100) + projected.append(proj) + last = proj + growthRate = growth + + if r2 > 0.8 and n >= 5: + confidence = "high" + elif r2 > 0.5 and n >= 3: + confidence = "medium" + else: + confidence = "low" + + assumptions = [] + if method == "linear": + assumptions.append(f"선형 추세 연장 (R²={r2:.2f})") + elif method == "cagr_decay": + assumptions.append(f"CAGR {cagr:.1f}% → 섹터평균 {sectorGrowth:.1f}%로 감속") + elif method == "mean_revert": + meanVal = sum(yVals) / n + assumptions.append(f"평균 {meanVal / 1e8:,.0f}억으로 회귀") + assumptions.append(f"과거 {n}개년 데이터 기반") + + return ForecastResult( + metric=metric, + metricLabel=label, + historical=vals, + projected=projected, + horizon=horizon, + method=method, + confidence=confidence, + rSquared=round(r2, 3), + growthRate=round(growthRate, 1), + assumptions=assumptions, + warnings=warnings, + ) + + +def _marginLinkedForecast( + revResult: ForecastResult, + series: dict, + metric: str, + horizon: int, +) -> ForecastResult | None: + """매출 전망 × 마진 추세 → 영업이익/순이익 파생 예측. + + 단순 OLS보다 정확: 매출 방향 예측(72~78%)을 이익에 전파. + """ + if not revResult.projected or revResult.confidence == "low": + return None + + target = FORECAST_TARGETS.get(metric) + if target is None: + return None + sjDiv, snakeId, label = target + + # 과거 마진 계산 + revVals = getAnnualValues(series, "IS", "sales") + if not any(v is not None for v in revVals): + revVals = getAnnualValues(series, "IS", "revenue") + metricVals = getAnnualValues(series, sjDiv, snakeId) + for fb in _FALLBACKS.get(snakeId, []): + if not any(v is not None for v in metricVals): + metricVals = getAnnualValues(series, sjDiv, fb) + + margins = [] + for r, m in zip(revVals, metricVals): + if r and m and r != 0: + margins.append(m / r) + + if len(margins) < 2: + return None + + # 최근 3년 마진 가중평균 (최신에 가중) + recent = margins[-3:] if len(margins) >= 3 else margins + weights = list(range(1, len(recent) + 1)) + wSum = sum(w * m for w, m in zip(weights, recent)) + avgMargin = wSum / sum(weights) + + # 매출 전망 × 마진 → 이익 전망 + projected = [rev * avgMargin for rev in revResult.projected] + validHist = [v for v in metricVals if v is not None] + lastVal = validHist[-1] if validHist else 0 + growthRate = ((projected[-1] / lastVal) ** (1 / horizon) - 1) * 100 if lastVal and lastVal > 0 else 0 + + return ForecastResult( + metric=metric, + metricLabel=label, + historical=metricVals, + projected=projected, + horizon=horizon, + method=f"매출전망×마진({avgMargin:.1%})", + confidence=revResult.confidence, + rSquared=revResult.rSquared, + growthRate=round(growthRate, 1), + assumptions=[ + f"매출 전망 연동 (마진 {avgMargin:.1%} 적용)", + f"최근 {len(recent)}년 가중평균 마진 사용", + ], + currency=revResult.currency, + ) + + +def forecastAll( + series: dict, + horizon: int = 3, + sectorParams: Optional[SectorParams] = None, +) -> dict[str, ForecastResult]: + """모든 주요 메트릭 예측. + + 매출은 정교한 앙상블, 영업이익/순이익은 매출×마진 연동. + 마진 연동 실패 시 단순 시계열 OLS fallback. + """ + results: dict[str, ForecastResult] = {} + + # 매출 먼저 + revResult = forecastMetric(series, metric="revenue", horizon=horizon, sectorParams=sectorParams) + results["revenue"] = revResult + + # 영업이익/순이익: 매출×마진 연동 우선, fallback OLS + for key in ("operating_income", "net_income"): + linked = _marginLinkedForecast(revResult, series, key, horizon) + if linked is not None: + results[key] = linked + else: + results[key] = forecastMetric(series, metric=key, horizon=horizon, sectorParams=sectorParams) + + # OCF는 단독 예측 + results["operating_cashflow"] = forecastMetric( + series, metric="operating_cashflow", horizon=horizon, sectorParams=sectorParams + ) + + return results + + +# ── 시나리오 분석 ────────────────────────────────────────── + + +def scenarioAnalysis( + series: dict, + shares: Optional[int] = None, + sectorParams: Optional[SectorParams] = None, + currentPrice: Optional[float] = None, +) -> ScenarioResult: + """3-Scenario DCF 분석.""" + from dartlab.core.finance.dcf import DCFResult, dcfValuation + + warnings: list[str] = [] + sp = sectorParams or SectorParams( + discountRate=10.0, + growthRate=3.0, + perMultiple=15, + pbrMultiple=1.2, + evEbitdaMultiple=8, + label="기타", + ) + + baseDcf = dcfValuation(series, shares=shares, sectorParams=sp, currentPrice=currentPrice) + bullDcf = dcfValuation( + series, + shares=shares, + sectorParams=sp, + currentPrice=currentPrice, + discountRate=max(sp.discountRate - 1.0, 5.0), + terminalGrowth=min(sp.growthRate, 3.0) + 0.5, + ) + bearDcf = dcfValuation( + series, + shares=shares, + sectorParams=sp, + currentPrice=currentPrice, + discountRate=sp.discountRate + 1.0, + terminalGrowth=max(min(sp.growthRate, 3.0) - 0.5, 0.5), + ) + + def _scenarioDict(dcf: DCFResult) -> dict[str, float | None]: + return { + "growth": dcf.growthRateInitial, + "discountRate": dcf.discountRate, + "terminalGrowth": dcf.terminalGrowth, + "enterpriseValue": dcf.enterpriseValue, + "equityValue": dcf.equityValue, + "perShareValue": dcf.perShareValue, # None 보존 (DCF 결손 시) + } + + base = _scenarioDict(baseDcf) + bull = _scenarioDict(bullDcf) + bear = _scenarioDict(bearDcf) + + prob = {"base": 50, "bull": 25, "bear": 25} + + weighted = None + baseV = base.get("perShareValue", 0) + bullV = bull.get("perShareValue", 0) + bearV = bear.get("perShareValue", 0) + if baseV > 0 or bullV > 0 or bearV > 0: + weighted = round( + baseV * prob["base"] / 100 + bullV * prob["bull"] / 100 + bearV * prob["bear"] / 100, + 0, + ) + + if not baseDcf.fcfProjections: + warnings.append("FCF 데이터 부족 → 시나리오 분석 신뢰도 낮음") + + return ScenarioResult( + base=base, + bull=bull, + bear=bear, + probability=prob, + weightedValue=weighted, + currentPrice=currentPrice, + warnings=warnings, + ) + + +# ── 민감도 분석 ────────────────────────────────────────── + + +def sensitivityAnalysis( + series: dict, + shares: Optional[int] = None, + sectorParams: Optional[SectorParams] = None, + waccSteps: int = 5, + waccRange: float = 2.0, + growthSteps: int = 5, + growthRange: float = 1.0, +) -> SensitivityResult: + """WACC × Terminal Growth 민감도 테이블.""" + from dartlab.core.finance.dcf import dcfValuation + + sp = sectorParams or SectorParams( + discountRate=10.0, + growthRate=3.0, + perMultiple=15, + pbrMultiple=1.2, + evEbitdaMultiple=8, + label="기타", + ) + + baseWacc = sp.discountRate + baseGrowth = min(sp.growthRate, 3.0) + + waccLo = max(baseWacc - waccRange, 4.0) + waccHi = baseWacc + waccRange + waccStep = (waccHi - waccLo) / max(waccSteps - 1, 1) + waccValues = [round(waccLo + i * waccStep, 1) for i in range(waccSteps)] + + growthLo = max(baseGrowth - growthRange, 0.5) + growthHi = baseGrowth + growthRange + gStep = (growthHi - growthLo) / max(growthSteps - 1, 1) + gValues = [round(growthLo + i * gStep, 1) for i in range(growthSteps)] + + matrix: list[list[float]] = [] + bValue = 0.0 + + for wacc in waccValues: + row: list[float] = [] + for tg in gValues: + if wacc <= tg: + row.append(0) + continue + dcf = dcfValuation( + series, + shares=shares, + sectorParams=sp, + discountRate=wacc, + terminalGrowth=tg, + ) + val = dcf.perShareValue or 0 + row.append(val) + if abs(wacc - baseWacc) < 0.05 and abs(tg - baseGrowth) < 0.05: + bValue = val + matrix.append(row) + + return SensitivityResult( + waccValues=waccValues, + growthValues=gValues, + matrix=matrix, + baseWacc=baseWacc, + baseGrowth=baseGrowth, + baseValue=bValue, + ) diff --git a/src/dartlab/analysis/forecast/forwardTest.py b/src/dartlab/analysis/forecast/forwardTest.py new file mode 100644 index 0000000000000000000000000000000000000000..7c37c76101d751a9ec84a69468cae97cac273e86 --- /dev/null +++ b/src/dartlab/analysis/forecast/forwardTest.py @@ -0,0 +1,199 @@ +"""Forward Test 인프라 — 매출 예측 저장 + 사후 평가.""" + +from __future__ import annotations + +import json +import logging +from dataclasses import asdict, dataclass, field +from datetime import datetime, timezone +from pathlib import Path + +log = logging.getLogger(__name__) + +_FORWARD_TEST_DIR = Path.home() / ".dartlab" / "forward_tests" + + +@dataclass +class ForwardTestRecord: + """단일 예측 기록.""" + + key: str # {stockCode}_{date}_{horizon}_{version} + stockCode: str + forecastDate: str # ISO format + version: str # "v3" + horizon: int + projected: list[float] # 예측 매출 (원) + scenarios: dict[str, list[float]] # Base/Bull/Bear + sourcesUsed: list[str] # 사용된 소스 목록 + assumptions: list[str] # 예측 가정 + actual: list[float | None] = field(default_factory=list) # 실적 (나중에 채움) + evaluation: dict | None = None # 평가 결과 + # v4: 방향 예측 캘리브레이션용 + directionProbability: float | None = None # 매출 방향 예측 확률 (0~1) + directionPredicted: str | None = None # "up" | "down" + directionActual: str | None = None # 사후 채움 + + +def generateKey(stockCode: str, horizon: int, version: str = "v3") -> str: + """고유 키 생성.""" + dateStr = datetime.now(timezone.utc).strftime("%Y%m%d") + return f"{stockCode}_{dateStr}_{horizon}y_{version}" + + +def saveForecast(record: ForwardTestRecord) -> Path: + """예측 기록 저장 (opt-in).""" + _FORWARD_TEST_DIR.mkdir(parents=True, exist_ok=True) + filepath = _FORWARD_TEST_DIR / f"{record.stockCode}.json" + + # 기존 기록 로드 + records = _loadRaw(filepath) + + # 같은 키 덮어쓰기 + records = [r for r in records if r.get("key") != record.key] + records.append(asdict(record)) + + filepath.write_text(json.dumps(records, ensure_ascii=False, indent=2), encoding="utf-8") + log.info("Forward test 저장: %s → %s", record.key, filepath) + return filepath + + +def loadRecords(stockCode: str) -> list[ForwardTestRecord]: + """종목별 저장된 예측 기록 로드.""" + filepath = _FORWARD_TEST_DIR / f"{stockCode}.json" + raw = _loadRaw(filepath) + results = [] + for r in raw: + try: + results.append(ForwardTestRecord(**r)) + except TypeError: + log.debug("Forward test 기록 파싱 실패: %s", r.get("key", "?")) + return results + + +def evaluate( + record: ForwardTestRecord, + actualRevenue: list[float], +) -> dict: + """예측 vs 실적 비교 평가.""" + projected = record.projected + n = min(len(projected), len(actualRevenue)) + if n == 0: + return {"error": "비교할 데이터 없음"} + + # MAE, MAPE + errors = [] + pctErrors = [] + directionHits = 0 + for i in range(n): + p, a = projected[i], actualRevenue[i] + if a is None or a <= 0: + continue + err = abs(p - a) + errors.append(err) + pctErrors.append(err / a * 100) + # 방향성: 이전 대비 증감 일치 + if i > 0 and actualRevenue[i - 1] and actualRevenue[i - 1] > 0: + predDir = p > projected[i - 1] if i < len(projected) else True + actDir = a > actualRevenue[i - 1] + if predDir == actDir: + directionHits += 1 + + mae = sum(errors) / len(errors) if errors else 0 + mape = sum(pctErrors) / len(pctErrors) if pctErrors else 0 + directionAccuracy = directionHits / max(n - 1, 1) * 100 + + # 시나리오 히트: 실적이 어느 시나리오 범위에 들어갔는지 + scenarioHit = _checkScenarioHit(record.scenarios, actualRevenue) + + result = { + "mae": round(mae), + "mape": round(mape, 2), + "directionAccuracy": round(directionAccuracy, 1), + "scenarioHit": scenarioHit, + "nCompared": n, + "evaluatedAt": datetime.now(timezone.utc).isoformat(), + } + + # 방향 실적 자동 설정 (캘리브레이션용) + if record.directionPredicted and len(actualRevenue) >= 2: + record.directionActual = "up" if actualRevenue[-1] > actualRevenue[0] else "down" + + # 기록 업데이트 + record.actual = actualRevenue[:n] + record.evaluation = result + return result + + +def _checkScenarioHit( + scenarios: dict[str, list[float]], + actual: list[float], +) -> str: + """실적이 어느 시나리오 범위에 있는지 판정.""" + if not scenarios or not actual: + return "unknown" + + bull = scenarios.get("bull", []) + bear = scenarios.get("bear", []) + + hits: dict[str, int] = {"within_range": 0, "above_bull": 0, "below_bear": 0} + n = min(len(actual), len(bull), len(bear)) + + for i in range(n): + a = actual[i] + if a is None or a <= 0: + continue + hi = bull[i] if i < len(bull) else float("inf") + lo = bear[i] if i < len(bear) else 0 + if a > hi: + hits["above_bull"] += 1 + elif a < lo: + hits["below_bear"] += 1 + else: + hits["within_range"] += 1 + + if not any(hits.values()): + return "unknown" + + return max(hits, key=hits.get) # type: ignore[arg-type] + + +def evaluateCalibration( + stockCodes: list[str] | None = None, +) -> dict | None: # CalibrationReport from calibrationMetrics + """저장된 forward test 전체의 캘리브레이션 평가. + + 모든 기록에서 directionProbability와 directionActual을 수집하여 + Brier Score + reliability diagram 생성. + """ + from dartlab.analysis.forecast.calibrationMetrics import generateCalibrationReport + + if stockCodes is None: + # 모든 종목 파일 스캔 + if not _FORWARD_TEST_DIR.exists(): + return None + stockCodes = [f.stem for f in _FORWARD_TEST_DIR.glob("*.json")] + + predictions: list[float] = [] + outcomes: list[int] = [] + + for code in stockCodes: + for r in loadRecords(code): + if r.directionProbability is not None and r.directionActual is not None: + predictions.append(r.directionProbability) + outcomes.append(1 if r.directionActual == "up" else 0) + + if len(predictions) < 5: + return None + + return generateCalibrationReport(predictions, outcomes) + + +def _loadRaw(filepath: Path) -> list[dict]: + """JSON 파일에서 raw dict 리스트 로드.""" + if not filepath.exists(): + return [] + try: + data = json.loads(filepath.read_text(encoding="utf-8")) + return data if isinstance(data, list) else [] + except (json.JSONDecodeError, OSError): + return [] diff --git a/src/dartlab/analysis/forecast/prediction.py b/src/dartlab/analysis/forecast/prediction.py new file mode 100644 index 0000000000000000000000000000000000000000..0381d5250701c6ee3ed6cdf76599779e6b263b35 --- /dev/null +++ b/src/dartlab/analysis/forecast/prediction.py @@ -0,0 +1,313 @@ +"""Context Signal Fusion — 기업 맥락 신호 -> 시나리오 확률 동적 재가중.""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +# ====================================== +# 데이터 구조 +# ====================================== + + +@dataclass +class ContextSignals: + """기업 맥락 신호 — 시나리오 확률 조정의 근거.""" + + # insight 등급 (7개 영역) + insightGrades: dict[str, str] = field(default_factory=dict) + # 전체 sections 평균 변화율 (0~100) + diffChangeRate: float = 0.0 + # 리스크 topic 변화율 (0~100) + riskChangeRate: float = 0.0 + # 기업 규모 ("Large" | "Mid" | "Small") + sizeClass: str = "Mid" + # 업종 경기민감도 ("high" | "moderate" | "defensive") + sectorCyclicality: str = "moderate" + # 성장률 백분위 (0~100, 시장 내 순위) + growthRankPct: float = 50.0 + # 공시 정성 신호 (Phase 2) + disclosureTone: float = 0.0 # -1.0 ~ +1.0 + disclosureChangeIntensity: float = 0.0 # 0.0 ~ 1.0 + disclosureGrowthAdj: float = 0.0 # %p + disclosureConfidence: str = "low" # "high" | "medium" | "low" + # 시나리오별 확률 조정치 (결과) + adjustments: dict[str, float] = field(default_factory=dict) + # 조정 근거 설명 + reasoning: list[str] = field(default_factory=list) + + def __repr__(self) -> str: + lines = ["[Context Signals -- 맥락 신호]"] + if self.insightGrades: + gradesStr = ", ".join(f"{k}={v}" for k, v in self.insightGrades.items()) + lines.append(f" 인사이트 등급: {gradesStr}") + lines.append(f" 공시 변화율: {self.diffChangeRate:.1f}%") + lines.append(f" 리스크 변화율: {self.riskChangeRate:.1f}%") + lines.append(f" 기업 규모: {self.sizeClass}") + lines.append(f" 업종 경기민감도: {self.sectorCyclicality}") + lines.append(f" 성장 순위: 상위 {self.growthRankPct:.0f}%") + if self.disclosureTone != 0.0: + lines.append( + f" 공시 tone: {self.disclosureTone:+.2f} (강도 {self.disclosureChangeIntensity:.2f}, 조정 {self.disclosureGrowthAdj:+.1f}%p, {self.disclosureConfidence})" + ) + if self.adjustments: + lines.append(f" 확률 조정: {self.adjustments}") + if self.reasoning: + lines.append(" 조정 근거:") + for r in self.reasoning: + lines.append(f" - {r}") + return "\n".join(lines) + + +# ====================================== +# 신호 수집 (Company 객체에서) +# ====================================== + + +def collectSignals(company, *, usePredictionAxis: bool = False) -> ContextSignals: + """Company 객체에서 맥락 신호를 수집한다. + + usePredictionAxis=True이면 analysis("forecast", "예측신호") 결과를 추가로 소비하여 + 공시 tone, 변화 강도, 성장 조정치를 enrichment한다. + """ + signals = ContextSignals() + + # 1. insight 등급 수집 + try: + insights = company.insights + if insights: + for areaKey in ("profitability", "health", "cashflow", "governance", "risk", "opportunity", "performance"): + area = getattr(insights, areaKey, None) + if area and hasattr(area, "grade"): + signals.insightGrades[areaKey] = area.grade + except (AttributeError, TypeError): + pass + + # 2. diff 변화율 수집 + try: + diffResult = company._docs.diff() + if diffResult and hasattr(diffResult, "changeRate"): + signals.diffChangeRate = diffResult.changeRate or 0.0 + # 리스크 topic 변화율 + if diffResult and hasattr(diffResult, "topicChanges"): + riskTopics = ["riskFactors", "riskDerivative", "contingentLiabilities"] + riskChanges = [] + for tc in diffResult.topicChanges: + if hasattr(tc, "topic") and tc.topic in riskTopics: + riskChanges.append(tc.changeRate or 0) + if riskChanges: + signals.riskChangeRate = sum(riskChanges) / len(riskChanges) + except (AttributeError, TypeError): + pass + + # 3. rank (규모, 성장 순위) + try: + rankInfo = getattr(company, "rank", None) or getattr(company, "rankInfo", None) + if rankInfo: + if hasattr(rankInfo, "sizeClass"): + signals.sizeClass = rankInfo.sizeClass or "Mid" + if hasattr(rankInfo, "growthRankPct"): + signals.growthRankPct = rankInfo.growthRankPct or 50.0 + except (AttributeError, TypeError): + pass + + # 4. sector 경기민감도 + try: + from dartlab.core.finance.scenario import getElasticity as get_elasticity + + sectorKey = None + # sectorInfo dict에서 가져오기 + sectorInfo = getattr(company, "sectorInfo", None) + if sectorInfo and isinstance(sectorInfo, dict): + sectorKey = sectorInfo.get("sector") + # profile.sectorName fallback + if not sectorKey: + profile = getattr(company, "profile", None) + if profile and hasattr(profile, "sectorName"): + sectorKey = profile.sectorName + if sectorKey: + elasticity = get_elasticity(sectorKey) + signals.sectorCyclicality = elasticity.cyclicality + except (ImportError, AttributeError, TypeError): + pass + + # 5. 예측신호 축 enrichment (선택적) + if usePredictionAxis: + _enrichFromPredictionAxis(company, signals) + + # 신호에서 조정치 계산 + adjustments, reasoning = _computeAdjustments(signals) + signals.adjustments = adjustments + signals.reasoning = reasoning + + return signals + + +def _enrichFromPredictionAxis(company, signals: ContextSignals) -> None: + """analysis("forecast", "예측신호") 결과로 ContextSignals를 보강한다.""" + try: + from dartlab.analysis.financial.predictionSignals import ( + calcDisclosureDelta, + calcPredictionSynthesis, + ) + + # 공시 변화 신호 → disclosure 필드 enrichment + disclosure = calcDisclosureDelta(company) + if disclosure is not None: + intensity = disclosure["overallChangeRate"] / 100.0 + signals.disclosureChangeIntensity = intensity + if disclosure["signalDirection"] == "negative": + signals.disclosureTone = -0.3 * intensity + elif disclosure["signalDirection"] == "positive": + signals.disclosureTone = 0.2 * intensity + + # 종합 신호 → 성장 조정치 + synthesis = calcPredictionSynthesis(company) + if synthesis is not None: + bias = synthesis["aiContext"]["directionBias"] + if abs(bias) > 0.2: + signals.disclosureGrowthAdj = bias * 3.0 + signals.disclosureConfidence = synthesis["confidence"] + except (ImportError, TypeError, KeyError): + pass + + # Prediction Space — 6대 축 기반 시나리오 확률 조정 + _enrichFromPredictionSpace(signals) + + +def _enrichFromPredictionSpace(signals: ContextSignals) -> None: + """PredictionSpace 6대 축으로 시나리오 확률을 조정한다.""" + try: + from dartlab.analysis.forecast.predictionSpace import getPredictionSpace + + space = getPredictionSpace() + if space is None: + return + + adj = signals.adjustments + reasons = signals.reasoning + + # 경기축 수축 → adverse 확률 상승 + bc = space.axes.get("businessCycle") + if bc and bc.level < -0.3: + delta = min(0.08, abs(bc.level) * 0.1) + adj["adverse"] = adj.get("adverse", 0.0) + delta + adj["baseline"] = adj.get("baseline", 0.0) - delta + reasons.append(f"경기축 수축 (level={bc.level:.2f}) -> adverse +{delta:.0%}p") + + # 금리축 급등 → rate_hike 시나리오 + ir = space.axes.get("interestRate") + if ir and ir.level > 0.3 and ir.direction == "improving": + adj["rate_hike"] = adj.get("rate_hike", 0.0) + 0.05 + adj["baseline"] = adj.get("baseline", 0.0) - 0.03 + reasons.append(f"금리축 상승 (level={ir.level:.2f}, {ir.direction}) -> rate_hike +5%p") + + # 환율축 급변 (약세) → 수출기업 유리, 내수기업 불리 + fx = space.axes.get("fxRate") + if fx and fx.level > 0.3: + adj["baseline"] = adj.get("baseline", 0.0) + 0.02 + reasons.append(f"환율축 원화약세 (level={fx.level:.2f}) -> 수출기업 baseline +2%p") + + # 심리축 악화 → 소비 위축 + st = space.axes.get("sentiment") + if st and st.level < -0.3: + adj["adverse"] = adj.get("adverse", 0.0) + 0.03 + reasons.append(f"심리축 위축 (level={st.level:.2f}) -> adverse +3%p") + + except (ImportError, TypeError): + pass + + +# ====================================== +# 확률 재가중 +# ====================================== + + +_ADJUSTMENT_RULES = [ + # (조건 함수, 시나리오 조정 dict, 근거 메시지) +] + + +def _computeAdjustments(signals: ContextSignals) -> tuple[dict[str, float], list[str]]: + """맥락 신호 -> 확률 조정치 + 근거 계산.""" + adj: dict[str, float] = {} + reasons: list[str] = [] + + def _add(scenario: str, delta: float, reason: str) -> None: + adj[scenario] = adj.get(scenario, 0.0) + delta + reasons.append(reason) + + grades = signals.insightGrades + + # 규칙 1: 수익성 위험 (profitability D/F -> adverse +5%p) + if grades.get("profitability") in ("D", "F"): + _add("adverse", 0.05, f"수익성 등급 {grades['profitability']} -> 하방 리스크 +5%p") + _add("baseline", -0.05, "수익성 약화 -> baseline -5%p") + + # 규칙 2: 재무건전성 위험 (health D/F -> adverse +5%p) + if grades.get("health") in ("D", "F"): + _add("adverse", 0.05, f"건전성 등급 {grades['health']} -> 하방 리스크 +5%p") + _add("baseline", -0.05, "건전성 약화 -> baseline -5%p") + + # 규칙 3: 리스크 급변 (riskChangeRate > 60% -> adverse +5%p) + if signals.riskChangeRate > 60: + _add("adverse", 0.05, f"리스크 공시 변화율 {signals.riskChangeRate:.0f}% -> adverse +5%p") + _add("baseline", -0.05, "리스크 급변 -> baseline -5%p") + + # 규칙 4: 높은 기회 (opportunity A -> baseline +5%p) + if grades.get("opportunity") in ("A",): + _add("baseline", 0.05, "기회 등급 A -> baseline +5%p") + _add("adverse", -0.03, "긍정 기회 -> adverse -3%p") + + # 규칙 5: 경기민감 업종 (high -> rate_hike +3%p) + if signals.sectorCyclicality == "high": + _add("rate_hike", 0.03, "경기민감 업종 -> rate_hike +3%p") + _add("baseline", -0.03, "경기민감 -> baseline -3%p") + + # 규칙 6: 방어적 업종 (defensive -> baseline +5%p, adverse -3%p) + if signals.sectorCyclicality == "defensive": + _add("baseline", 0.05, "방어적 업종 -> baseline +5%p") + _add("adverse", -0.03, "방어적 -> adverse -3%p") + + # 규칙 7: 고성장 상위 20% (baseline +3%p) + if signals.growthRankPct <= 20: + _add("baseline", 0.03, f"성장 상위 {signals.growthRankPct:.0f}% -> baseline +3%p") + + # 규칙 8: 현금흐름 위험 (cashflow D/F -> adverse +3%p) + if grades.get("cashflow") in ("D", "F"): + _add("adverse", 0.03, f"현금흐름 등급 {grades['cashflow']} -> adverse +3%p") + + return adj, reasons + + +def adjustProbabilities( + baseProbs: dict[str, float], + signals: ContextSignals, +) -> dict[str, float]: + """기본 확률을 맥락 신호로 재가중한다.""" + # adjustments가 아직 계산되지 않았으면 자동 계산 + if not signals.adjustments and ( + signals.insightGrades + or signals.riskChangeRate > 0 + or signals.sectorCyclicality != "moderate" + or signals.growthRankPct <= 20 + ): + adj, reasoning = _computeAdjustments(signals) + signals.adjustments = adj + signals.reasoning = reasoning + + result = dict(baseProbs) + + for scenario, delta in signals.adjustments.items(): + if scenario in result: + result[scenario] += delta + + # 하한 1%p (음수 확률 방지) + for k in result: + result[k] = max(result[k], 0.01) + + # 합계 정규화 -> 1.0 + total = sum(result.values()) + if total > 0: + result = {k: v / total for k, v in result.items()} + + return result diff --git a/src/dartlab/analysis/forecast/predictionSpace.py b/src/dartlab/analysis/forecast/predictionSpace.py new file mode 100644 index 0000000000000000000000000000000000000000..6cb6acf4d6770d0b2f4b8d367ca634e952e12c70 --- /dev/null +++ b/src/dartlab/analysis/forecast/predictionSpace.py @@ -0,0 +1,393 @@ +"""Prediction Space — 매출 변화를 설명하는 6대 보편 축. + +전체 기업에 공통으로 작용하는 외부 축을 실시간 데이터로 계산한다. +기업별 차이는 SECTOR_ELASTICITY(25개 섹터 탄성치)로 구분. + +6대 축: +- businessCycle (경기): CLI, IPI +- interestRate (금리): BASE_RATE, TREASURY_3Y +- fxRate (환율): USDKRW +- commodity (원자재): PPI, WTI +- sentiment (심리): CSI, BSI +- liquidity (유동성): M2 + +사용법:: + + from dartlab.analysis.forecast.predictionSpace import getPredictionSpace + + space = getPredictionSpace() + if space: + print(space.axes["businessCycle"].level) # -1.0 ~ +1.0 + print(space.impactOn("반도체")) # 축별 매출 영향 % +""" + +from __future__ import annotations + +import logging +import time +from dataclasses import dataclass, field + +log = logging.getLogger(__name__) + + +# ── 데이터 구조 ── + + +@dataclass +class AxisState: + """단일 예측 축의 현재 상태.""" + + name: str + label: str + level: float # -1.0 (수축) ~ +1.0 (팽창) + direction: str # "improving" | "deteriorating" | "stable" + momentum: float # -1.0 ~ +1.0 + confidence: str # "high" | "medium" | "low" + indicators: dict[str, float | None] = field(default_factory=dict) + asOf: str = "" + + +@dataclass +class PredictionSpace: + """6대 보편 예측 축.""" + + axes: dict[str, AxisState] = field(default_factory=dict) + timestamp: str = "" + dataFreshness: str = "unavailable" + + def impactOn(self, sectorKey: str | None) -> dict[str, float]: + """섹터별 매출 영향 추정 (%). + + SECTOR_ELASTICITY의 탄성치를 사용하여 + 각 축 상태 → 매출 변화율로 변환. + """ + from dartlab.core.finance.scenario import getElasticity + + e = getElasticity(sectorKey) + impacts: dict[str, float] = {} + + bc = self.axes.get("businessCycle") + if bc: + impacts["businessCycle"] = bc.level * e.revenueToGdp * 3.0 + + ir = self.axes.get("interestRate") + if ir: + if e.nimToRate > 0: + impacts["interestRate"] = ir.level * e.nimToRate / 100 + else: + impacts["interestRate"] = -ir.level * 0.5 + + fx = self.axes.get("fxRate") + if fx: + impacts["fxRate"] = fx.level * e.revenueToFx * 2.0 + + cm = self.axes.get("commodity") + if cm: + commodityProducers = {"화학", "철강", "에너지/자원", "Energy", "Materials"} + if sectorKey in commodityProducers: + impacts["commodity"] = cm.level * 2.0 + else: + impacts["commodity"] = -cm.level * 1.0 + + st = self.axes.get("sentiment") + if st and e.cyclicality in ("high", "moderate"): + impacts["sentiment"] = st.level * 1.5 + + lq = self.axes.get("liquidity") + if lq: + impacts["liquidity"] = lq.level * 0.5 + + return {k: round(v, 2) for k, v in impacts.items()} + + def summary(self) -> dict: + """AI 컨텍스트용 요약.""" + return { + "axes": { + name: { + "label": ax.label, + "state": "expansion" if ax.level > 0.2 else "contraction" if ax.level < -0.2 else "neutral", + "direction": ax.direction, + "level": round(ax.level, 2), + "momentum": round(ax.momentum, 2), + } + for name, ax in self.axes.items() + }, + "timestamp": self.timestamp, + "freshness": self.dataFreshness, + } + + +# ── 축 정의 ── + +_AXIS_DEFS = { + "businessCycle": { + "label": "경기축", + "indicators": ["CLI", "IPI", "MANUFACTURING"], + "market": "KR", + "normalize": "index", # 100 기준 + }, + "interestRate": { + "label": "금리축", + "indicators": ["BASE_RATE", "TREASURY_3Y"], + "market": "KR", + "normalize": "zscore", + }, + "fxRate": { + "label": "환율축", + "indicators": ["USDKRW"], + "market": "KR", + "normalize": "price", + "baseline": 1300, # 중립 환율 기준 + }, + "commodity": { + "label": "원자재축", + "indicators": ["PPI"], + "market": "KR", + "normalize": "index", + }, + "sentiment": { + "label": "심리축", + "indicators": ["CSI", "BSI"], + "market": "KR", + "normalize": "index", + }, + "liquidity": { + "label": "유동성축", + "indicators": ["M2"], + "market": "KR", + "normalize": "growth", + }, +} + + +# ── 정규화 함수 ── + + +def _clamp(v: float, lo: float = -1.0, hi: float = 1.0) -> float: + return max(lo, min(hi, v)) + + +def _normalizeIndex(values: list[float]) -> float: + """지수형 (100 기준). CLI, CSI, BSI, IPI 등.""" + if not values: + return 0.0 + latest = values[-1] + return _clamp((latest - 100) / 20) + + +def _normalizeZscore(values: list[float]) -> float: + """Z-score (2년 평균/표준편차 기준).""" + if len(values) < 6: + return 0.0 + mean = sum(values) / len(values) + variance = sum((v - mean) ** 2 for v in values) / len(values) + std = variance**0.5 + if std < 0.001: + return 0.0 + return _clamp((values[-1] - mean) / std / 2) + + +def _normalizePrice(values: list[float], baseline: float) -> float: + """가격형 (기준가 대비 편차).""" + if not values or baseline == 0: + return 0.0 + latest = values[-1] + return _clamp((latest - baseline) / baseline * 5) + + +def _normalizeGrowth(values: list[float]) -> float: + """성장률형 (YoY 변화율).""" + if len(values) < 13: + return 0.0 + current = values[-1] + yearAgo = values[-13] if len(values) >= 13 else values[0] + if yearAgo == 0: + return 0.0 + yoy = (current - yearAgo) / abs(yearAgo) + return _clamp(yoy * 5) + + +def _computeDirection(values: list[float], nMonths: int = 3) -> tuple[str, float]: + """3개월 추세로 방향 + 모멘텀 계산.""" + if len(values) < nMonths: + return "stable", 0.0 + + recent = values[-nMonths:] + if len(recent) < 2: + return "stable", 0.0 + + # 단순 기울기 + n = len(recent) + xMean = (n - 1) / 2 + yMean = sum(recent) / n + num = sum((i - xMean) * (y - yMean) for i, y in enumerate(recent)) + den = sum((i - xMean) ** 2 for i in range(n)) + slope = num / den if den > 0 else 0.0 + + # 정규화 모멘텀 + scale = abs(yMean) if yMean != 0 else 1.0 + momentum = _clamp(slope / scale * 10) + + threshold = 0.1 + if momentum > threshold: + return "improving", momentum + elif momentum < -threshold: + return "deteriorating", momentum + else: + return "stable", momentum + + +# ── 축 상태 계산 ── + + +def _computeAxisState(axisName: str, axisDef: dict, macroData: dict) -> AxisState | None: + """단일 축의 상태를 계산.""" + indicators = axisDef["indicators"] + normalizeType = axisDef["normalize"] + label = axisDef["label"] + + # 지표 값 수집 + allValues: list[list[float]] = [] + indicatorVals: dict[str, float | None] = {} + + for indName in indicators: + series = macroData.get(indName) + if series is None or len(series) == 0: + indicatorVals[indName] = None + continue + + vals = [v for v in series if v is not None] + if not vals: + indicatorVals[indName] = None + continue + + indicatorVals[indName] = vals[-1] + allValues.append(vals) + + if not allValues: + return None + + # 주 지표 (첫 번째)의 값으로 정규화 + primaryVals = allValues[0] + + if normalizeType == "index": + level = _normalizeIndex(primaryVals) + elif normalizeType == "zscore": + level = _normalizeZscore(primaryVals) + elif normalizeType == "price": + baseline = axisDef.get("baseline", 1300) + level = _normalizePrice(primaryVals, baseline) + elif normalizeType == "growth": + level = _normalizeGrowth(primaryVals) + else: + level = 0.0 + + # 보조 지표가 있으면 평균 + if len(allValues) > 1: + secondaryLevels = [] + for vals in allValues[1:]: + if normalizeType == "index": + secondaryLevels.append(_normalizeIndex(vals)) + elif normalizeType == "zscore": + secondaryLevels.append(_normalizeZscore(vals)) + + if secondaryLevels: + # 주 지표 2x, 보조 1x 가중 + totalWeight = 2 + len(secondaryLevels) + level = (level * 2 + sum(secondaryLevels)) / totalWeight + + direction, momentum = _computeDirection(primaryVals) + + # 신뢰도 + nAvailable = sum(1 for v in indicatorVals.values() if v is not None) + if nAvailable == len(indicators): + confidence = "high" + elif nAvailable >= 1: + confidence = "medium" + else: + confidence = "low" + + return AxisState( + name=axisName, + label=label, + level=round(level, 3), + direction=direction, + momentum=round(momentum, 3), + confidence=confidence, + indicators=indicatorVals, + ) + + +# ── 매크로 데이터 fetch ── + + +def _fetchMacroData() -> dict[str, list[float]]: + """ECOS/FRED에서 필요한 지표를 가져온다. + + Gather 레이어의 캐싱을 활용하므로 반복 호출 비용 낮음. + """ + from dartlab.gather import getDefaultGather + + g = getDefaultGather() + result: dict[str, list[float]] = {} + + # 필요한 지표 목록 + krIndicators = ["CLI", "IPI", "MANUFACTURING", "BASE_RATE", "TREASURY_3Y", "USDKRW", "PPI", "CSI", "BSI", "M2"] + + for ind in krIndicators: + try: + df = g.macro("KR", ind, start="2023-01-01") + if df is not None and len(df) > 0: + valCol = [c for c in df.columns if c != "date"] + if valCol: + vals = df[valCol[0]].to_list() + result[ind] = [v for v in vals if v is not None] + except (ValueError, TypeError, AttributeError, KeyError) as e: + log.debug("매크로 지표 %s 로드 실패: %s", ind, e) + + return result + + +# ── 세션 캐시 ── + +_SPACE_CACHE: PredictionSpace | None = None +_CACHE_TS: float = 0 +_CACHE_TTL = 3600 # 1시간 + + +def getPredictionSpace(*, forceRefresh: bool = False) -> PredictionSpace | None: + """보편 예측 공간을 가져온다. + + 첫 호출 시 매크로 데이터 fetch (2-5초). + 이후 세션 내 캐시 반환 (마이크로초). + API 키 없으면 None. + """ + global _SPACE_CACHE, _CACHE_TS + + if not forceRefresh and _SPACE_CACHE and (time.monotonic() - _CACHE_TS < _CACHE_TTL): + return _SPACE_CACHE + + macroData = _fetchMacroData() + if not macroData: + return None + + from datetime import datetime + + axes: dict[str, AxisState] = {} + for axisName, axisDef in _AXIS_DEFS.items(): + state = _computeAxisState(axisName, axisDef, macroData) + if state is not None: + axes[axisName] = state + + if not axes: + return None + + space = PredictionSpace( + axes=axes, + timestamp=datetime.now().isoformat(), + dataFreshness="fresh" if len(axes) >= 4 else "partial", + ) + + _SPACE_CACHE = space + _CACHE_TS = time.monotonic() + return space diff --git a/src/dartlab/analysis/forecast/proforma.py b/src/dartlab/analysis/forecast/proforma.py new file mode 100644 index 0000000000000000000000000000000000000000..b8df8de867b17875d85f99b98ae4e18b7b7f3185 --- /dev/null +++ b/src/dartlab/analysis/forecast/proforma.py @@ -0,0 +1,3 @@ +"""하위호환 re-export -- 실제 구현은 core/finance/proforma.py.""" + +from dartlab.core.finance.proforma import * # noqa: F401,F403 diff --git a/src/dartlab/analysis/forecast/revenueForecast.py b/src/dartlab/analysis/forecast/revenueForecast.py new file mode 100644 index 0000000000000000000000000000000000000000..3c3ba41c508709e3405e63768de9e1e88188ae66 --- /dev/null +++ b/src/dartlab/analysis/forecast/revenueForecast.py @@ -0,0 +1,1325 @@ +"""매출액 예측 엔진 v4 — 4-소스 앙상블 + 세그먼트 Bottom-Up + 시나리오. + +4-소스 앙상블: +1. 자체 시계열 (과거 매출 OLS/CAGR/평균회귀) +2. 시장 컨센서스 (네이버/Yahoo 금융 애널리스트 매출 추정치) +3. ROIC 기반 내재 성장 (Damodaran Value Driver: g = ROIC × Reinvestment Rate) +4. 세그먼트 Bottom-Up (부문별 개별 예측 → 합산) ++ 수주잔고 선행지표 (B/R ratio → 내재 성장률, 전 종목 적용) + +v3→v4 변경 (실험 098 기반): +- 매크로 GDP β 제거 (기여도 0%, 오히려 악화) +- FX regex 제거 (29% 성공률) +- 주가내재 역산 제거 (순환논리) +- 횡단면 회귀 제거 (비활성) +- 공시 tone 제거 (미검증) + +설계 원칙 (Engine-First, AI-Augmented): +- 엔진이 재현 가능하고 투명한 기본 예측을 생성 +- ai_context 필드로 AI가 세계 지식으로 보정할 수 있는 브릿지 제공 +- 결과 스키마는 도메인(DART/EDGAR/EDINET) 불문 동일 +- 3-시나리오 출력 (Base/Bull/Bear)으로 불확실성 정량화 +- CompanyDataBundle로 L1 데이터를 L0에 전달 (L0→L1 import 금지) + +외부 의존성: gather 엔진 (optional — 없으면 시계열 only). +""" + +from __future__ import annotations + +import functools +import logging +import math +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + import polars as pl + +from dartlab.analysis.forecast.forecast import ( + forecastMetric, +) +from dartlab.core.finance.extract import ( + getAnnualValues, + getLatest, + getTTM, +) +from dartlab.core.finance.fmt import fmtBig + +log = logging.getLogger(__name__) + +# ROIC 기반 성장 소스 가중치 (시계열에서 할당) +_ROIC_WEIGHT = 0.15 + + +# ══════════════════════════════════════ +# L1 → L0 데이터 브릿지 +# ══════════════════════════════════════ + + +@dataclass +class CompanyDataBundle: + """L1(Company) → L0(forecast) 데이터 브릿지.""" + + segmentRevenue: pl.DataFrame | None = None # c.segments.revenue + salesDf: pl.DataFrame | None = None # c.salesOrder.salesDf + orderDf: pl.DataFrame | None = None # c.salesOrder.orderDf + structuralBreak: dict | None = None # calcStructuralBreak() 결과 + + +@dataclass +class SegmentForecast: + """개별 세그먼트 예측 결과.""" + + name: str + historical: list[Optional[float]] + projected: list[float] + growthRates: list[float] + method: str + shareOfRevenue: float # 최근 매출 비중 (%, 0~100) + lifecycle: str + + +@dataclass +class BacklogSignal: + """수주잔고 기반 선행 시그널.""" + + backlogRevenueRatio: float # 현재 B/R ratio + brRatioTrend: str # "increasing" | "stable" | "declining" + impliedRevenueGrowth: float # 수주잔고 기반 내재 매출 성장률 (%) + conversionRate: float # 과거 평균 수주→매출 전환율 + sectorsApplicable: bool # 건설/조선/방산만 강신호 + + +@dataclass +class RevenueForecastAIOverlay: + """AI 보정 결과 — 구조화된 스키마.""" + + growthAdjustment: list[float] # 연도별 %p 보정 + direction: str # "up" | "down" | "neutral" + magnitude: str # "minor" (<2%p) | "moderate" (2-5%p) | "major" (>5%p) + scenarioShift: dict[str, float] | None = None # 시나리오 확률 이동 + reasoning: list[str] = field(default_factory=list) # 보정 근거 + applied: bool = False + + +# ══════════════════════════════════════ +# 결과 타입 +# ══════════════════════════════════════ + + +@dataclass +class RevenueForecastResult: + """매출 예측 결과 — 소스별 기여도 투명 공개.""" + + historical: list[Optional[float]] + projected: list[float] + horizon: int + method: str # "ensemble" | "timeseries_only" | "consensus_only" | "N/A" + confidence: str # "high" | "medium" | "low" + growthRates: list[float] # 연도별 YoY 성장률 (%) + sources: list[str] # ["timeseries", "consensus", "macro", "roic"] + sourceWeights: dict[str, float] # {"timeseries": 0.4, "consensus": 0.45, ...} + consensusRevenue: list[float] = field(default_factory=list) + assumptions: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + aiContext: dict = field(default_factory=dict) + + # v3 확장 필드 (전부 default — 하위호환) + scenarios: dict[str, list[float]] = field(default_factory=dict) # base/bull/bear + scenarioGrowthRates: dict[str, list[float]] = field(default_factory=dict) + scenarioProbabilities: dict[str, float] = field(default_factory=dict) + segmentForecasts: list[SegmentForecast] = field(default_factory=list) + backlogSignal: BacklogSignal | None = None + aiOverlay: RevenueForecastAIOverlay | None = None + forwardTestKey: str | None = None + currency: str = "KRW" + + # v4: 예측 불가능성 명시 (네이트 실버 — 예측할 수 없는 것을 예측하지 마라) + forecastable: bool = True + unforecastableReason: str = "" + + DISCLAIMER: str = "본 분석은 투자 참고용이며 투자 권유가 아닙니다." + + def __repr__(self) -> str: + cur = self.currency + lines = [f"[매출 예측 — {self.method}]"] + lines.append(f" 신뢰도: {self.confidence}") + lc = self.aiContext.get("lifecycle", "") + if lc: + lines.append(f" 라이프사이클: {lc}") + lines.append(f" 소스: {', '.join(f'{k}({v:.0%})' for k, v in self.sourceWeights.items())}") + + validHist = [v for v in self.historical if v is not None] + if self.projected and validHist: + base = ( + self.projected[0] / (1 + self.growthRates[0] / 100) + if self.growthRates and self.growthRates[0] != -100 + else validHist[-1] + ) + lines.append(f" 기준 매출: {fmtBig(base, cur)}") + elif validHist: + lines.append(f" 최근 실적: {fmtBig(validHist[-1], cur)}") + + for i, (proj, gr) in enumerate(zip(self.projected, self.growthRates), 1): + lines.append(f" +{i}년: {fmtBig(proj, cur)} ({gr:+.1f}%)") + + # v3: 시나리오 + if self.scenarios: + probs = self.scenarioProbabilities + for label in ("bull", "bear"): + sc = self.scenarios.get(label, []) + sg = self.scenarioGrowthRates.get(label, []) + prob = probs.get(label, 0) + if sc: + lines.append( + f" {label.title()}({prob:.0f}%): {fmtBig(sc[0], cur)} ({sg[0]:+.1f}%)" + if sg + else f" {label.title()}: {fmtBig(sc[0], cur)}" + ) + + # v3: 세그먼트 + if self.segmentForecasts: + lines.append(f" 세그먼트: {len(self.segmentForecasts)}개 부문") + for sf in self.segmentForecasts[:3]: # 상위 3개만 표시 + if sf.projected: + lines.append( + f" {sf.name}({sf.shareOfRevenue:.0f}%): {fmtBig(sf.projected[0], cur)} ({sf.growthRates[0]:+.1f}%)" + if sf.growthRates + else f" {sf.name}: {fmtBig(sf.projected[0], cur)}" + ) + + # v3: 수주잔고 + if self.backlogSignal: + bs = self.backlogSignal + lines.append( + f" 수주잔고: B/R={bs.backlogRevenueRatio:.1f}x ({bs.brRatioTrend}), 내재 성장 {bs.impliedRevenueGrowth:+.1f}%" + ) + + if self.assumptions: + for a in self.assumptions: + lines.append(f" · {a}") + if self.warnings: + for w in self.warnings: + lines.append(f" ⚠ {w}") + lines.append(f" ※ {self.DISCLAIMER}") + return "\n".join(lines) + + +# ══════════════════════════════════════ +# 컨센서스 매출 추출 +# ══════════════════════════════════════ + + +@functools.lru_cache(maxsize=64) +def _fetchConsensusRevenue( + stockCode: str, + market: str = "KR", +) -> tuple[tuple[int, float, str], ...]: + """gather에서 매출 컨센서스를 가져온다. + + [성능] @lru_cache — review에서 4번 호출되는데 매번 외부 API. + 같은 stockCode 입력은 첫 호출 후 즉시 반환. + Return type은 tuple (lru_cache는 hashable result 권장). + """ + try: + from dartlab.gather import Gather + + g = Gather() + items = g.revenue_consensus(stockCode, market=market) + try: + g.close() + except RuntimeError: + pass # event loop already closed + return tuple((item.fiscal_year, item.revenue_est, item.source) for item in items if item.revenue_est > 0) + except (ImportError, OSError) as exc: + log.debug("컨센서스 수집 실패: %s", exc) + return () + + +# ══════════════════════════════════════ +# ROIC 기반 내재 성장률 (Damodaran Value Driver) +# ══════════════════════════════════════ + + +def _fundamentalGrowth(series: dict) -> tuple[float | None, dict]: + """ROIC × Reinvestment Rate → 내재 성장률.""" + detail: dict = {} + + # NOPAT = 영업이익 × (1 - 유효세율) + opIncome = getTTM(series, "IS", "operating_income") or getTTM(series, "IS", "operating_profit") + if opIncome is None or opIncome <= 0: + return None, detail + + pbt = getTTM(series, "IS", "profit_before_tax") + taxExp = getTTM(series, "IS", "income_tax_expense") + effectiveTax = 0.22 # 기본값: 한국 법인세 실효세율 + if pbt and pbt > 0 and taxExp is not None: + et = taxExp / pbt + if 0 <= et <= 0.5: + effectiveTax = et + + nopat = opIncome * (1 - effectiveTax) + + # Invested Capital = 자기자본 + max(순차입금, 0) + totalEquity = getLatest(series, "BS", "total_stockholders_equity") or getLatest( + series, "BS", "owners_of_parent_equity" + ) + cash = getLatest(series, "BS", "cash_and_cash_equivalents") or 0 + shortBorr = getLatest(series, "BS", "shortterm_borrowings") or 0 + longBorr = getLatest(series, "BS", "longterm_borrowings") or 0 + bonds = getLatest(series, "BS", "bonds_payable") or 0 + netDebt = shortBorr + longBorr + bonds - cash + + if totalEquity is None or totalEquity <= 0: + return None, detail + + invested = totalEquity + max(netDebt, 0) + if invested <= 0: + return None, detail + + roic = (nopat / invested) * 100 # % + + # CAPEX (CF에서 음수로 기록됨) + capexRaw = getTTM(series, "CF", "purchase_of_property_plant_and_equipment") + capex = abs(capexRaw) if capexRaw else 0 + + # Depreciation + dep = getTTM(series, "CF", "depreciation_and_amortization") + if dep is None: + dep = getTTM(series, "CF", "depreciation_cf") + if dep is None: + dep = getTTM(series, "CF", "depreciation") + if dep is None: + # fallback: 유형자산 × 5% + 무형자산 × 10% + tangible = getLatest(series, "BS", "tangible_assets") or 0 + intangible = getLatest(series, "BS", "intangible_assets") or 0 + dep = tangible * 0.05 + intangible * 0.1 + + # ΔNWC (순운전자본 변동) + caVals = getAnnualValues(series, "BS", "current_assets") + clVals = getAnnualValues(series, "BS", "current_liabilities") + cashVals = getAnnualValues(series, "BS", "cash_and_cash_equivalents") + deltaNwc = 0.0 + if len(caVals) >= 2 and len(clVals) >= 2: + + def _nwcAt(idx: int) -> float | None: + ca = caVals[idx] if idx < len(caVals) else None + cl = clVals[idx] if idx < len(clVals) else None + c = cashVals[idx] if idx < len(cashVals) and cashVals[idx] else 0 + if ca is not None and cl is not None: + return (ca - (c or 0)) - cl + return None + + nwcCurr = _nwcAt(-1) + nwcPrev = _nwcAt(-2) + if nwcCurr is not None and nwcPrev is not None: + deltaNwc = nwcCurr - nwcPrev + + # Reinvestment = CAPEX - Depreciation + ΔNWC + reinvestment = capex - dep + deltaNwc + + if nopat <= 0: + return None, detail + + reinvestmentRate = reinvestment / nopat + # 재투자율 범위 제한 (음수 = 자본 회수, >1.0 = 공격 투자) + reinvestmentRate = max(min(reinvestmentRate, 1.5), -0.5) + + fundamentalG = roic * reinvestmentRate # % 단위 + + detail = { + "roic": round(roic, 2), + "reinvestmentRate": round(reinvestmentRate * 100, 1), + "nopat": nopat, + "investedCapital": invested, + "capex": capex, + "depreciation": dep, + "deltaNwc": deltaNwc, + "fundamentalGrowth": round(fundamentalG, 2), + } + + return fundamentalG, detail + + +# ══════════════════════════════════════ +# 기업 라이프사이클 판별 +# ══════════════════════════════════════ + + +def _classifyLifecycle(series: dict) -> tuple[str, dict]: + """기업 라이프사이클 단계 판별.""" + revVals = getAnnualValues(series, "IS", "revenue") or getAnnualValues(series, "IS", "sales") + valid = [v for v in revVals if v is not None and v > 0] + + if len(valid) < 4: + return "unknown", {"reason": "매출 데이터 4기간 미만"} + + # 3Y CAGR + recent = valid[-4:] # 최근 4개 = 3년 성장 + cagr = ((recent[-1] / recent[0]) ** (1 / 3) - 1) * 100 if recent[0] > 0 else 0 + + # CV (Coefficient of Variation) + meanRev = sum(recent) / len(recent) + if meanRev > 0: + variance = sum((v - meanRev) ** 2 for v in recent) / len(recent) + cv = math.sqrt(variance) / meanRev + else: + cv = 0 + + # 부호 변화 횟수 (성장률 방향 전환) + growthSigns = [] + for i in range(1, len(recent)): + if recent[i - 1] > 0: + growthSigns.append(1 if recent[i] > recent[i - 1] else -1) + signChanges = sum(1 for i in range(1, len(growthSigns)) if growthSigns[i] != growthSigns[i - 1]) + + detail = { + "cagr_3y": round(cagr, 1), + "cv": round(cv, 3), + "signChanges": signChanges, + "dataPoints": len(valid), + } + + # signChanges 임계: 분기 데이터(>8개)는 3회, 연간은 2회 + signThreshold = 3 if len(valid) > 8 else 2 + if cv > 0.4 or signChanges >= signThreshold: + return "transition", detail + if cagr > 15 and cv < 0.3: + return "high_growth", detail + if cagr < -5: + return "decline", detail + return "mature", detail + + +def _lifecycleWeightAdjustments( + lifecycle: str, + baseWeights: dict[str, float], +) -> dict[str, float]: + """라이프사이클에 따른 가중치 조정.""" + w = dict(baseWeights) + + if lifecycle == "high_growth": + # 컨센서스 의존도 높임 + if "consensus" in w and "timeseries" in w: + shift = min(0.1, w["timeseries"]) + w["consensus"] += shift + w["timeseries"] -= shift + elif lifecycle == "mature": + # ROIC, 시계열에 더 의존 + if "roic" in w and "consensus" in w: + shift = min(0.05, w["consensus"]) + w["roic"] += shift + w["consensus"] -= shift + elif lifecycle == "transition": + # 넓은 신뢰구간 (여기서는 가중치보다 confidence에 반영) + if "consensus" in w and "timeseries" in w: + shift = min(0.1, w["timeseries"]) + w["consensus"] += shift + w["timeseries"] -= shift + # decline: 기본 가중치 유지 (시계열 mean_revert가 이미 보수적) + + return w + + +# ══════════════════════════════════════ +# 앙상블 가중치 계산 +# ══════════════════════════════════════ + + +def _computeWeights( + tsAvailable: bool, + consensusItems: list[tuple[int, float, str]], + roicGrowth: float | None, + structuralBreak: dict | None = None, +) -> dict[str, float]: + """소스별 가중치 계산. + + structuralBreak가 전달되면 구조변화 심각도에 따라 + 시계열 가중치를 삭감하고 컨센서스로 이전한다. + """ + weights: dict[str, float] = {} + + hasConsensusEst = any(src.endswith("_consensus") for _, _, src in consensusItems) + + if tsAvailable and hasConsensusEst: + weights["timeseries"] = 0.40 + weights["consensus"] = 0.45 + elif hasConsensusEst: + weights["consensus"] = 1.0 + else: + weights["timeseries"] = 1.0 + + # 구조변화 감지 시 시계열 가중치 삭감 + if structuralBreak and "timeseries" in weights: + revenueBreak = any(m.get("hasBreak") for m in structuralBreak.get("metrics", []) if m.get("name") == "revenue") + stability = structuralBreak.get("overallStability", "stable") + + if revenueBreak or stability == "volatile": + # volatile(2+ breaks): 60% 삭감, transitioning(1 break): 40% 삭감 + penalty = 0.6 if stability == "volatile" else 0.4 + reduction = weights["timeseries"] * penalty + weights["timeseries"] -= reduction + if "consensus" in weights: + weights["consensus"] += reduction + # consensus 없으면 삭감만 (총 가중치 < 1.0 → 정규화에서 보정) + + # ROIC 소스: 시계열에서 할당 + if roicGrowth is not None and "timeseries" in weights: + roicShare = min(_ROIC_WEIGHT, weights["timeseries"]) + weights["roic"] = roicShare + weights["timeseries"] -= roicShare + + return weights + + +# ══════════════════════════════════════ +# 세그먼트 Bottom-Up 예측 +# ══════════════════════════════════════ + +# 세그먼트 가중치: 시계열에서 할당 +_SEGMENT_WEIGHT = 0.25 + +# 수주잔고 선행 시그널 가중치 +_BACKLOG_WEIGHT = 0.15 + + +def _extractSegmentForecasts( + segmentRevenue: object, # pl.DataFrame | None (TYPE_CHECKING 회피) + horizon: int = 3, +) -> list[SegmentForecast]: + """세그먼트별 개별 시계열 예측.""" + if segmentRevenue is None: + return [] + + import importlib.util + + if importlib.util.find_spec("polars") is None: + return [] + + df = segmentRevenue + if not hasattr(df, "columns") or "부문" not in df.columns: + return [] + + # 연도 컬럼 추출 (숫자만) + yearCols = sorted( + [c for c in df.columns if c != "부문" and c.isdigit()], + key=int, + ) + if len(yearCols) < 3: + return [] + + totalLatest = 0.0 + segmentLatest: dict[str, float] = {} + + results: list[SegmentForecast] = [] + for row in df.iter_rows(named=True): + name = row.get("부문", "") + if not name: + continue + + # 시계열 추출 (오래된 순서로) + vals = [row.get(y) for y in yearCols] + valid = [(i, v) for i, v in enumerate(vals) if v is not None and v > 0] + if len(valid) < 3: + continue + + # 최근 매출 (비중 계산용) + latest = valid[-1][1] + segmentLatest[name] = latest + totalLatest += latest + + # forecastMetric에 넣기 위한 가짜 series dict 구성 + fakeSeries = { + "IS": {"sales": [v for _, v in valid]}, + } + fr = forecastMetric(fakeSeries, "revenue", horizon) + if not fr.projected: + continue + + # 라이프사이클 판정 + lc, _ = _classifyLifecycle(fakeSeries) + + # 성장률 계산 + growthRates: list[float] = [] + prevVal = latest + for p in fr.projected: + if prevVal > 0: + growthRates.append(round((p / prevVal - 1) * 100, 1)) + else: + growthRates.append(0.0) + prevVal = p + + results.append( + SegmentForecast( + name=name, + historical=[v for _, v in valid], + projected=fr.projected, + growthRates=growthRates, + method=fr.method, + shareOfRevenue=0.0, # 후처리에서 계산 + lifecycle=lc, + ) + ) + + # 비중 계산 + if totalLatest > 0: + for sf in results: + latestRev = segmentLatest.get(sf.name, 0) + sf.shareOfRevenue = round(latestRev / totalLatest * 100, 1) + + # 비중 내림차순 정렬 + results.sort(key=lambda x: x.shareOfRevenue, reverse=True) + return results + + +def _segmentBottomUpGrowth( + segmentForecasts: list[SegmentForecast], + horizon: int, + lastRevenue: float | None, +) -> list[float]: + """세그먼트별 예측을 합산하여 Bottom-Up 성장률 시계열 생성.""" + if not segmentForecasts or not lastRevenue or lastRevenue <= 0: + return [] + + growthRates: list[float] = [] + # 세그먼트 합산: 각 연도별 세그먼트 projected 합 + prevTotal = sum(sf.historical[-1] for sf in segmentForecasts if sf.historical) + if prevTotal <= 0: + return [] + + for yr in range(horizon): + yrTotal = 0.0 + for sf in segmentForecasts: + if yr < len(sf.projected): + yrTotal += sf.projected[yr] + elif sf.projected: + yrTotal += sf.projected[-1] + if prevTotal > 0: + growthRates.append((yrTotal / prevTotal - 1) * 100) + else: + growthRates.append(0.0) + prevTotal = yrTotal + + return growthRates + + +# ══════════════════════════════════════ +# 수주잔고 선행지표 (Source 6) +# ══════════════════════════════════════ + + +def _computeBacklogSignal( + orderDf: object, # pl.DataFrame | None + salesDf: object, # pl.DataFrame | None + sectorKey: str | None = None, +) -> BacklogSignal | None: + """수주잔고 기반 선행 시그널 계산.""" + if orderDf is None or salesDf is None: + return None + + if not hasattr(orderDf, "columns") or not hasattr(salesDf, "columns"): + return None + + try: + # 수주잔고 합산 (모든 행의 마지막 value 컬럼 합) + orderValCols = [c for c in orderDf.columns if c != "label"] + salesValCols = [c for c in salesDf.columns if c != "label"] + + if not orderValCols or not salesValCols: + return None + + # 최신 기간 수주잔고 합산 + latestOrderCol = orderValCols[0] # 첫 컬럼이 최근 + latestSalesCol = salesValCols[0] + + orderTotal = 0.0 + for row in orderDf.iter_rows(named=True): + v = row.get(latestOrderCol) + if v is not None and isinstance(v, (int, float)): + orderTotal += abs(v) + + salesTotal = 0.0 + for row in salesDf.iter_rows(named=True): + v = row.get(latestSalesCol) + if v is not None and isinstance(v, (int, float)): + salesTotal += abs(v) + + if salesTotal <= 0 or orderTotal <= 0: + return None + + brRatio = orderTotal / salesTotal + + # B/R ratio 추세 (2기간 이상 필요) + brRatios: list[float] = [] + nPeriods = min(len(orderValCols), len(salesValCols)) + for i in range(min(nPeriods, 3)): + oCol = orderValCols[i] + sCol = salesValCols[i] + oSum = sum( + abs(row.get(oCol, 0) or 0) + for row in orderDf.iter_rows(named=True) + if isinstance(row.get(oCol), (int, float)) + ) + sSum = sum( + abs(row.get(sCol, 0) or 0) + for row in salesDf.iter_rows(named=True) + if isinstance(row.get(sCol), (int, float)) + ) + if sSum > 0: + brRatios.append(oSum / sSum) + + # 추세 판단 + if len(brRatios) >= 2: + if brRatios[0] > brRatios[-1] * 1.05: + trend = "increasing" + elif brRatios[0] < brRatios[-1] * 0.95: + trend = "declining" + else: + trend = "stable" + else: + trend = "stable" + + # 내재 매출 성장률: B/R ratio 변화 → 매출 성장 추정 + if len(brRatios) >= 2 and brRatios[-1] > 0: + impliedGrowth = (brRatios[0] / brRatios[-1] - 1) * 100 + else: + impliedGrowth = 0.0 + + # 전환율: 역사적 평균 (매출/수주잔고) + conversionRate = 1.0 / brRatio if brRatio > 0 else 0.0 + + # 건설/조선/방산: 수주잔고가 특히 강한 선행지표인 섹터 (정보 목적) + _strongSectors = {"건설", "조선", "방산", "건설/토목", "조선/기계"} + isApplicable = bool(sectorKey and any(s in sectorKey for s in _strongSectors)) + + return BacklogSignal( + backlogRevenueRatio=round(brRatio, 2), + brRatioTrend=trend, + impliedRevenueGrowth=round(impliedGrowth, 1), + conversionRate=round(conversionRate, 3), + sectorsApplicable=isApplicable, + ) + except (TypeError, ValueError, KeyError): + return None + + +# ══════════════════════════════════════ +# 3-시나리오 빌더 (Base/Bull/Bear) +# ══════════════════════════════════════ + +# 라이프사이클별 spread 배수 (1σ 대비) +_LIFECYCLE_SPREAD = { + "high_growth": 1.5, + "mature": 0.7, + "transition": 2.0, + "decline": 1.2, + "unknown": 1.0, +} + + +def _buildScenarios( + projected: list[float], + growthRates: list[float], + historical: list[Optional[float]], + lifecycle: str, + lastRevenue: float | None, + structuralBreak: dict | None = None, +) -> tuple[dict[str, list[float]], dict[str, list[float]], dict[str, float]]: + """Base/Bull/Bear 3-시나리오 생성.""" + if not projected or not lastRevenue or lastRevenue <= 0: + return {}, {}, {} + + # 과거 성장률 변동성 (σ) 계산 + validHist = [v for v in historical if v is not None and v > 0] + histGrowth: list[float] = [] + for i in range(1, len(validHist)): + if validHist[i - 1] > 0: + histGrowth.append((validHist[i] / validHist[i - 1] - 1) * 100) + + if histGrowth: + meanG = sum(histGrowth) / len(histGrowth) + variance = sum((g - meanG) ** 2 for g in histGrowth) / max(len(histGrowth) - 1, 1) + sigma = math.sqrt(variance) + else: + sigma = 5.0 # 기본 5%p + + # 최소 sigma 보장 (너무 좁은 밴드 방지) + sigma = max(sigma, 3.0) + + spread = _LIFECYCLE_SPREAD.get(lifecycle, 1.0) + + scenarios: dict[str, list[float]] = {"base": list(projected)} + scenarioGrs: dict[str, list[float]] = {"base": list(growthRates)} + + # Bull / Bear + for label, direction in [("bull", 1.0), ("bear", -1.0)]: + scProjected: list[float] = [] + scGrs: list[float] = [] + prev = lastRevenue + for i, gr in enumerate(growthRates): + # 시간 감쇠: 멀수록 불확실성 증가 + timeFactor = 1.0 + i * 0.15 + adjGr = gr + direction * sigma * spread * timeFactor + # Bull cap: 2× base growth, Bear floor: -base growth (mature 이상) + if direction > 0: + adjGr = min(adjGr, max(gr * 2, gr + 20)) + else: + if lifecycle != "decline": + adjGr = max(adjGr, min(gr * 0.5, gr - 20)) + val = prev * (1 + adjGr / 100) + scProjected.append(val) + scGrs.append(round(adjGr, 1)) + prev = val + scenarios[label] = scProjected + scenarioGrs[label] = scGrs + + # 구조변화 감지 시 시나리오 확률 조정 (하방 리스크 확대) + stability = structuralBreak.get("overallStability", "stable") if structuralBreak else "stable" + if stability == "volatile": + probabilities = {"base": 40.0, "bull": 20.0, "bear": 40.0} + elif stability == "transitioning": + probabilities = {"base": 45.0, "bull": 22.0, "bear": 33.0} + else: + probabilities = {"base": 50.0, "bull": 25.0, "bear": 25.0} + + return scenarios, scenarioGrs, probabilities + + +# ══════════════════════════════════════ +# 메인 예측 함수 +# ══════════════════════════════════════ + + +def forecastRevenue( + series: dict, + stockCode: str | None = None, + sectorKey: str | None = None, + market: str = "KR", + horizon: int = 3, + companyData: CompanyDataBundle | None = None, + currency: str = "KRW", +) -> RevenueForecastResult: + """매출액 앙상블 예측.""" + warnings: list[str] = [] + assumptions: list[str] = [] + + # ── 라이프사이클 판별 ── + lifecycle, lifecycleDetail = _classifyLifecycle(series) + + # ── Source 1: 시계열 예측 (기존 forecast.py) ── + tsResult = forecastMetric(series, "revenue", horizon) + tsAvailable = len(tsResult.projected) > 0 + + # 과거 매출 시계열 (revenue 키 조회) + historical = tsResult.historical + + # 최근 매출 (앙상블 기준점) + validHist = [v for v in historical if v is not None] + lastRevenue = validHist[-1] if validHist else None + + # ── Source 2: 컨센서스 (KR: 네이버, US+: Yahoo) ── + consensusItems: list[tuple[int, float, str]] = [] + if stockCode: + consensusItems = _fetchConsensusRevenue(stockCode, market) + if not consensusItems and market != "KR": + warnings.append(f"컨센서스 수집 실패({market}) — 시계열 기반 예측") + + # ── Source 4: ROIC 기반 내재 성장 ── + roicGrowth, roicDetail = _fundamentalGrowth(series) + roicGrowthRate: float | None = roicGrowth # % 단위 + + # ── Source 5: 세그먼트 Bottom-Up ── + segmentForecasts: list[SegmentForecast] = [] + segGrowthRates: list[float] = [] + if companyData and companyData.segmentRevenue is not None: + segmentForecasts = _extractSegmentForecasts( + companyData.segmentRevenue, + horizon, + ) + if segmentForecasts: + segGrowthRates = _segmentBottomUpGrowth( + segmentForecasts, + horizon, + lastRevenue, + ) + + # ── Source 6: 수주잔고 선행지표 ── + backlogSignal: BacklogSignal | None = None + if companyData and companyData.orderDf is not None: + backlogSignal = _computeBacklogSignal( + companyData.orderDf, + companyData.salesDf, + sectorKey, + ) + + # ── 가중치 계산 ── + _sb = companyData.structuralBreak if companyData else None + weights = _computeWeights(tsAvailable, consensusItems, roicGrowth, structuralBreak=_sb) + + # v3 소스 가중치 할당 (시계열에서 할당) + if segGrowthRates and "timeseries" in weights: + segShare = min(_SEGMENT_WEIGHT, weights["timeseries"]) + weights["segments"] = segShare + weights["timeseries"] -= segShare + + if backlogSignal and "timeseries" in weights: + blShare = min(_BACKLOG_WEIGHT, weights["timeseries"]) + weights["backlog"] = blShare + weights["timeseries"] -= blShare + + # 시계열 최소 보장: 과도 희석 방지 (최소 0.10) + _TS_FLOOR = 0.10 + if "timeseries" in weights and weights["timeseries"] < _TS_FLOOR: + deficit = _TS_FLOOR - weights["timeseries"] + weights["timeseries"] = _TS_FLOOR + # 부족분을 다른 v3 소스에서 비례 차감 + v3Keys = [k for k in ("segments", "backlog") if k in weights and weights[k] > 0] + if v3Keys: + totalV3 = sum(weights[k] for k in v3Keys) + for k in v3Keys: + weights[k] -= deficit * (weights[k] / totalV3) + weights[k] = max(weights[k], 0.0) + + # 라이프사이클 기반 가중치 조정 + weights = _lifecycleWeightAdjustments(lifecycle, weights) + + # 시계열 최소 보장 재확인 (라이프사이클 조정 후) + if "timeseries" in weights and weights["timeseries"] < _TS_FLOOR: + deficit = _TS_FLOOR - weights["timeseries"] + weights["timeseries"] = _TS_FLOOR + v3Keys2 = [k for k in ("segments", "backlog") if k in weights and weights[k] > 0] + if v3Keys2: + totalV3_2 = sum(weights[k] for k in v3Keys2) + if totalV3_2 > 0: + for k in v3Keys2: + weights[k] -= deficit * (weights[k] / totalV3_2) + weights[k] = max(weights[k], 0.0) + + # ── 앙상블 ── + projected: list[float] = [] + consensusRevenue: list[float] = [] + + # 컨센서스에서 전체 매출 시계열 (actual + estimate) 구축 + consensusByYear: dict[int, tuple[float, str]] = {} # year → (revenue_원, source) + if consensusItems: + for fy, rev, src in consensusItems: + if rev > 0: + consensusByYear[fy] = (rev * 1e8, src) # 억원 → 원 + + # 컨센서스 estimate만 추출 + consensusProj: dict[int, float] = {} + for fy, (revWon, src) in consensusByYear.items(): + if src.endswith("_consensus"): + consensusProj[fy] = revWon + consensusRevenue.append(revWon) + + # 기준 연도: 컨센서스 actual 중 가장 최근 + baseYear = 0 + lastActualRevenue: float | None = None + actualsSorted = sorted( + [(fy, rev) for fy, (rev, src) in consensusByYear.items() if src.endswith("_actual")], + key=lambda x: x[0], + ) + if actualsSorted: + baseYear = actualsSorted[-1][0] + lastActualRevenue = actualsSorted[-1][1] + if baseYear == 0: + baseYear = 2025 + + # lastRevenue를 컨센서스 actual과 동기화 (더 신뢰할 수 있으므로) + if lastActualRevenue: + lastRevenue = lastActualRevenue + + # 시계열 성장률: projected 간 YoY 성장률 (분기 데이터이므로 자체 기준 비교) + tsGrowthRates: list[float] = [] + if tsAvailable and tsResult.projected: + prev = tsResult.historical[-1] if tsResult.historical and tsResult.historical[-1] else None + for p in tsResult.projected: + if prev and prev > 0 and p > 0: + tsGrowthRates.append((p / prev - 1) * 100) + else: + tsGrowthRates.append(tsResult.growthRate) + prev = p + + # 컨센서스 성장률 계산 + conGrowthRates: list[float] = [] + sortedConYears = sorted(consensusProj.keys()) + for i, fy in enumerate(sortedConYears): + if i == 0: + # 첫 컨센서스 연도: actual 대비 성장률 + if lastRevenue and lastRevenue > 0: + conGrowthRates.append((consensusProj[fy] / lastRevenue - 1) * 100) + else: + conGrowthRates.append(0.0) + else: + prevFy = sortedConYears[i - 1] + prevRev = consensusProj[prevFy] + if prevRev > 0: + conGrowthRates.append((consensusProj[fy] / prevRev - 1) * 100) + else: + conGrowthRates.append(0.0) + + # ROIC 성장률: horizon 동안 일정 (내재 성장은 구조적) + roicG = roicGrowthRate if roicGrowthRate is not None else 0.0 + + # ROIC vs 시계열 괴리 감지 + roicTsGap: float | None = None + if roicGrowthRate is not None and tsGrowthRates: + avgTsG = sum(tsGrowthRates) / len(tsGrowthRates) + roicTsGap = roicGrowthRate - avgTsG + if abs(roicTsGap) > 10: + warnings.append( + f"ROIC 내재 성장률({roicGrowthRate:.1f}%)과 시계열 성장률({avgTsG:.1f}%) 괴리 {roicTsGap:+.1f}%p" + ) + + # 앙상블: 성장률 기반 블렌딩 (스케일 불일치 방지) + prevRevenue = lastRevenue or 0 + for yrOffset in range(1, horizon + 1): + if prevRevenue <= 0: + break + + # 시계열 성장률 + tsG = ( + tsGrowthRates[yrOffset - 1] + if yrOffset <= len(tsGrowthRates) + else (tsGrowthRates[-1] if tsGrowthRates else 0.0) + ) + + # 컨센서스 성장률 + conG = conGrowthRates[yrOffset - 1] if yrOffset <= len(conGrowthRates) else None + + # 가중 성장률 계산 + blendedGrowth = 0.0 + if conG is not None and "consensus" in weights: + blendedGrowth += conG * weights.get("consensus", 0) + blendedGrowth += tsG * weights.get("timeseries", 0) + else: + # 컨센서스 없는 연도 → 시계열이 컨센서스 몫도 흡수 + blendedGrowth += tsG * (weights.get("timeseries", 0) + weights.get("consensus", 0)) + + blendedGrowth += roicG * weights.get("roic", 0) + + # 세그먼트 Bottom-Up 성장률 + if segGrowthRates and "segments" in weights: + segG = ( + segGrowthRates[yrOffset - 1] + if yrOffset <= len(segGrowthRates) + else (segGrowthRates[-1] if segGrowthRates else 0.0) + ) + blendedGrowth += segG * weights.get("segments", 0) + + # 수주잔고 내재 성장률 + if backlogSignal and "backlog" in weights: + # 수주잔고 신호는 horizon 동안 감쇠 + decay = max(0.5, 1.0 - (yrOffset - 1) * 0.2) + blendedGrowth += backlogSignal.impliedRevenueGrowth * decay * weights.get("backlog", 0) + + projVal = prevRevenue * (1 + blendedGrowth / 100) + projected.append(projVal) + prevRevenue = projVal + + # ── 스키마 보장: projected가 horizon보다 적으면 패딩 ── + while len(projected) < horizon: + if projected: + projected.append(projected[-1]) + elif lastRevenue and lastRevenue > 0: + projected.append(lastRevenue) + else: + projected.append(0.0) + + # ── 성장률 계산 ── + growthRates: list[float] = [] + for i, proj in enumerate(projected): + if i == 0 and lastRevenue and lastRevenue > 0: + growthRates.append((proj / lastRevenue - 1) * 100) + elif i > 0 and projected[i - 1] > 0: + growthRates.append((proj / projected[i - 1] - 1) * 100) + else: + growthRates.append(0.0) + + while len(growthRates) < horizon: + growthRates.append(0.0) + + # ── 메서드 & 신뢰도 결정 ── + activeSources = [s for s in weights if weights[s] > 0] + if not activeSources: + activeSources = ["timeseries"] + method = "ensemble" if len(activeSources) > 1 else f"{activeSources[0]}_only" + + # 신뢰도: 소스 수 + 시계열 R² + 컨센서스 유무 + 라이프사이클 + if len(activeSources) >= 3 and tsResult.rSquared > 0.5: + confidence = "high" + elif len(activeSources) >= 2 and (tsAvailable or consensusProj): + confidence = "medium" if lifecycle != "transition" else "low" + elif tsAvailable or consensusProj: + confidence = "medium" + else: + confidence = "low" + + # transition → 최대 medium + if lifecycle == "transition" and confidence == "high": + confidence = "medium" + + # 비-KR 시장에서 컨센서스 없으면 → 최대 medium + if market != "KR" and not consensusProj: + if confidence == "high": + confidence = "medium" + + # ── 예측 불가 판정 (2개 이상 조건 동시 충족 시 거부) ── + _unfConditions: list[str] = [] + if confidence == "low" and lifecycle == "transition": + _unfConditions.append("전환기 기업 + 낮은 신뢰도") + if tsResult.rSquared < 0.1 and not consensusProj: + _unfConditions.append("시계열 R²<0.1 + 컨센서스 없음") + if _sb and _sb.get("overallStability") == "volatile" and confidence != "high": + _unfConditions.append("다중 구조변화 + 높지 않은 신뢰도") + + _forecastable = len(_unfConditions) < 2 + _unfReason = "; ".join(_unfConditions) if not _forecastable else "" + if not _forecastable: + warnings.append(f"예측 불가 판정: {_unfReason}") + + # ── 스키마 보장: sourceWeights 합이 1.0 ── + wSum = sum(v for v in weights.values() if v > 0) + if wSum > 0 and abs(wSum - 1.0) > 0.01: + for k in weights: + if weights[k] > 0: + weights[k] = weights[k] / wSum + + finalWeights = {k: round(v, 2) for k, v in weights.items() if v > 0} + if not finalWeights: + finalWeights = {"timeseries": 1.0} + # 반올림 오차 보정: 가장 큰 가중치에 잔여분 할당 + wTotal = sum(finalWeights.values()) + if abs(wTotal - 1.0) > 0.001 and finalWeights: + maxKey = max(finalWeights, key=finalWeights.get) # type: ignore[arg-type] + finalWeights[maxKey] = round(finalWeights[maxKey] + (1.0 - wTotal), 2) + + # ── 가정 설명 (정규화된 가중치 기준) ── + for src, w in finalWeights.items(): + if w > 0: + if src == "timeseries": + assumptions.append(f"시계열({w:.0%}): {tsResult.method}, R²={tsResult.rSquared:.2f}") + elif src == "consensus": + nEst = len(consensusProj) + assumptions.append(f"컨센서스({w:.0%}): 네이버 금융 {nEst}개년 추정치") + elif src == "roic": + assumptions.append(f"ROIC({w:.0%}): g=ROIC×재투자율={roicGrowthRate:.1f}%") + + if lifecycle != "unknown": + assumptions.append( + f"라이프사이클: {lifecycle} (CAGR {lifecycleDetail.get('cagr_3y', 'N/A')}%, CV {lifecycleDetail.get('cv', 'N/A')})" + ) + + # ── AI 컨텍스트 (Tier 2 브릿지) ── + conTsGap: float | None = None + if conGrowthRates and tsGrowthRates: + avgCon = sum(conGrowthRates) / len(conGrowthRates) + avgTs = sum(tsGrowthRates) / len(tsGrowthRates) + conTsGap = avgCon - avgTs + + avgGrowth = sum(growthRates) / len(growthRates) if growthRates else 0.0 + aiContext: dict = { + "base_growth": round(avgGrowth, 2), + "lifecycle": lifecycle, + "lifecycle_detail": lifecycleDetail, + "market": market, + "sources_used": list(finalWeights.keys()), + "ts_method": tsResult.method, + "ts_r_squared": tsResult.rSquared, + "roic_growth": round(roicGrowthRate, 2) if roicGrowthRate is not None else None, + "roic_detail": roicDetail if roicDetail else None, + "roic_ts_gap": round(roicTsGap, 2) if roicTsGap is not None else None, + "consensus_vs_ts_gap": round(conTsGap, 2) if conTsGap is not None else None, + "sector_key": sectorKey, + "key_assumptions": assumptions.copy(), + "uncertainty_flags": [], + } + + # 불확실성 플래그 + if lifecycle == "transition": + aiContext["uncertainty_flags"].append("전환기 기업 — 과거 추세 신뢰도 낮음") + if roicTsGap is not None and abs(roicTsGap) > 10: + aiContext["uncertainty_flags"].append(f"ROIC-시계열 괴리 {roicTsGap:+.1f}%p") + if conTsGap is not None and abs(conTsGap) > 15: + aiContext["uncertainty_flags"].append(f"컨센서스-시계열 괴리 {conTsGap:+.1f}%p") + if not consensusProj: + aiContext["uncertainty_flags"].append("컨센서스 데이터 없음") + + # 구조변화 컨텍스트 (forecastCalcs.py dead code 활성화) + if _sb: + aiContext["structural_break"] = { + "stability": _sb.get("overallStability", "stable"), + "revenue_break": any(m.get("hasBreak") for m in _sb.get("metrics", []) if m.get("name") == "revenue"), + "n_breaks": sum(1 for m in _sb.get("metrics", []) if m.get("hasBreak")), + } + if _sb.get("overallStability") in ("volatile", "transitioning"): + aiContext["uncertainty_flags"].append(f"구조변화 감지 ({_sb['overallStability']}) — 과거 추세 신뢰도 제한") + + # ── v3: 3-시나리오 ── + scenarios, scenarioGrs, scenarioProbs = _buildScenarios( + projected, + [round(g, 1) for g in growthRates], + historical, + lifecycle, + lastRevenue, + structuralBreak=_sb, + ) + + # v3: 세그먼트/수주잔고 AI 컨텍스트 + if segmentForecasts: + aiContext["segment_count"] = len(segmentForecasts) + aiContext["segments_top3"] = [ + {"name": sf.name, "share": sf.shareOfRevenue, "growth": sf.growthRates[0] if sf.growthRates else 0} + for sf in segmentForecasts[:3] + ] + if backlogSignal: + aiContext["backlog"] = { + "br_ratio": backlogSignal.backlogRevenueRatio, + "trend": backlogSignal.brRatioTrend, + "implied_growth": backlogSignal.impliedRevenueGrowth, + "applicable": backlogSignal.sectorsApplicable, + } + + # Forward test 키 생성 (저장은 opt-in) + ftKey = None + if stockCode: + from dartlab.analysis.forecast.forwardTest import generateKey + + ftKey = generateKey(stockCode, horizon) + + return RevenueForecastResult( + historical=historical, + projected=projected, + horizon=horizon, + method=method, + confidence=confidence, + growthRates=[round(g, 1) for g in growthRates], + sources=list(finalWeights.keys()), + sourceWeights=finalWeights, + consensusRevenue=consensusRevenue, + assumptions=assumptions, + warnings=warnings + tsResult.warnings, + aiContext=aiContext, + scenarios=scenarios, + scenarioGrowthRates=scenarioGrs, + scenarioProbabilities=scenarioProbs, + segmentForecasts=segmentForecasts, + backlogSignal=backlogSignal, + forwardTestKey=ftKey, + currency=currency, + forecastable=_forecastable, + unforecastableReason=_unfReason, + ) + + +# ══════════════════════════════════════ +# AI 오버레이 적용 +# ══════════════════════════════════════ + + +_MAX_ANNUAL_ADJ = 10.0 # 연간 보정 ±%p 캡 +_MAX_TOTAL_ADJ = 20.0 # 총 보정 ±%p 캡 + + +def applyAiOverlay( + result: RevenueForecastResult, + overlay: RevenueForecastAIOverlay, +) -> RevenueForecastResult: + """AI 보정을 예측 결과에 적용.""" + if not overlay.reasoning: + log.warning("AI overlay rejected: reasoning 비어있음") + return result + + adj = overlay.growthAdjustment + if not adj or len(adj) < result.horizon: + adj = (adj or []) + [0.0] * result.horizon + adj = adj[: result.horizon] + + # 가드레일: 연간 캡 + adj = [max(min(a, _MAX_ANNUAL_ADJ), -_MAX_ANNUAL_ADJ) for a in adj] + + # 가드레일: 총 캡 + total = sum(abs(a) for a in adj) + if total > _MAX_TOTAL_ADJ: + scale = _MAX_TOTAL_ADJ / total + adj = [a * scale for a in adj] + + # 보정 적용 + newProjected: list[float] = [] + prev = ( + result.projected[0] / (1 + result.growthRates[0] / 100) + if result.projected and result.growthRates and result.growthRates[0] != -100 + else 0 + ) + + for i, (proj, gr) in enumerate(zip(result.projected, result.growthRates)): + newGr = gr + adj[i] + if prev > 0: + newVal = prev * (1 + newGr / 100) + else: + newVal = proj * (1 + adj[i] / 100) + newProjected.append(newVal) + prev = newVal + + newGrowthRates = [] + for i, p in enumerate(newProjected): + if i == 0: + base = ( + result.projected[0] / (1 + result.growthRates[0] / 100) + if result.projected and result.growthRates and result.growthRates[0] != -100 + else 0 + ) + if base > 0: + newGrowthRates.append(round((p / base - 1) * 100, 1)) + else: + newGrowthRates.append(0.0) + elif newProjected[i - 1] > 0: + newGrowthRates.append(round((p / newProjected[i - 1] - 1) * 100, 1)) + else: + newGrowthRates.append(0.0) + + # 시나리오 확률 이동 + newProbs = dict(result.scenarioProbabilities) + if overlay.scenarioShift and newProbs: + for k, shift in overlay.scenarioShift.items(): + if k in newProbs: + newProbs[k] = max(5, min(70, newProbs[k] + shift)) + # 정규화 + pSum = sum(newProbs.values()) + if pSum > 0: + newProbs = {k: round(v / pSum * 100, 1) for k, v in newProbs.items()} + + appliedOverlay = RevenueForecastAIOverlay( + growthAdjustment=adj, + direction=overlay.direction, + magnitude=overlay.magnitude, + scenarioShift=overlay.scenarioShift, + reasoning=overlay.reasoning, + applied=True, + ) + + return RevenueForecastResult( + historical=result.historical, + projected=newProjected, + horizon=result.horizon, + method=result.method, + confidence=overlay.confidence_override + if hasattr(overlay, "confidence_override") and overlay.confidence_override + else result.confidence, # type: ignore[attr-defined] + growthRates=newGrowthRates, + sources=result.sources, + sourceWeights=result.sourceWeights, + consensusRevenue=result.consensusRevenue, + assumptions=result.assumptions + [f"AI 보정: {overlay.direction} ({overlay.magnitude})"], + warnings=result.warnings, + aiContext=result.aiContext, + scenarios=result.scenarios, + scenarioGrowthRates=result.scenarioGrowthRates, + scenarioProbabilities=newProbs, + segmentForecasts=result.segmentForecasts, + backlogSignal=result.backlogSignal, + aiOverlay=appliedOverlay, + forwardTestKey=result.forwardTestKey, + currency=result.currency, + ) diff --git a/src/dartlab/analysis/forecast/scenarioSim.py b/src/dartlab/analysis/forecast/scenarioSim.py new file mode 100644 index 0000000000000000000000000000000000000000..f2c5827a688f3c218add17cbc398cf4f4c50b483 --- /dev/null +++ b/src/dartlab/analysis/forecast/scenarioSim.py @@ -0,0 +1,420 @@ +"""시나리오 시뮬레이터 — 분기별 모니터링 + 행동 추천. + +예측하지 않는다. 시나리오를 설정하고, 분기마다 실적과 비교하여 +어떤 경로 위에 있는지 판정하고, 행동을 추천한다. + +흐름:: + + 1. createSimulation(company, "반도체 회복", revenueGrowth=15) + → 3개 시나리오(bull/base/bear) × ProForma IS/BS/CF + → 과거 계절성으로 Q1~Q4 분기 목표 분해 + → 시나리오별 DCF 적정가치 + + 2. judgeQuarter(sim, "2025Q1", revenue, operatingIncome) + → 매출 + 영업이익 이중 판정 + → 연간 착지 재예측 + → 행동 추천 + +검증: experiments/107_scenarioSim (001~008, 8개 기업 사후 검증 완료). +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any + +from dartlab.core.finance.proforma import ( + ProFormaResult, + ProFormaYear, + build_proforma, + extract_historical_ratios, +) + +# ══════════════════════════════════════ +# 데이터 구조 +# ══════════════════════════════════════ + + +@dataclass +class QuarterJudgment: + """한 분기 실적 판정 결과.""" + + quarter: str + actualRevenue: float + actualOI: float + targetRevBase: float + targetOIBase: float + revDeviation: float # (actual - base) / base × 100 + oiDeviation: float + revPath: str # on_track | outperform | outperform_mild | underperform | underperform_severe + oiPath: str + action: str # 보유 | 비중확대 검토 | 비중축소 검토 | 시나리오 재설정 + reason: str + reforecastRevenue: float # 연간 착지 재예측 + reforecastOI: float + + +@dataclass +class ScenarioSimulation: + """시나리오 시뮬레이션 세션.""" + + stockCode: str + companyName: str + scenarioName: str + baseYear: str + targetYear: str + createdAt: str = field(default_factory=lambda: datetime.now().isoformat()) + + # 시나리오 가정 + revenueGrowthPath: list[float] = field(default_factory=list) + marginBlendWeight: float = 0.5 + + # ProForma 결과 (bull/base/bear) + proformaResults: dict[str, ProFormaResult] = field(default_factory=dict) + + # 분기 목표 + quarterlyRevTargets: dict[str, list[float]] = field(default_factory=dict) + quarterlyOITargets: dict[str, list[float]] = field(default_factory=dict) + + # DCF 적정가치 + dcfPerShare: dict[str, int] = field(default_factory=dict) + + # 판정 이력 + judgments: list[QuarterJudgment] = field(default_factory=list) + + # 계절성 비중 + revSeasonality: list[float] = field(default_factory=list) + oiSeasonality: list[float] = field(default_factory=list) + + +# ══════════════════════════════════════ +# 내부 유틸 +# ══════════════════════════════════════ + + +def _quarterlyValues(is_df: Any, snakeId: str, year: str) -> list[float]: + """IS DataFrame에서 특정 연도의 Q1~Q4 값 추출.""" + row = is_df.filter(is_df["snakeId"] == snakeId) + if row.height == 0: + return [] + vals = [] + for q in range(1, 5): + col = f"{year}Q{q}" + if col in row.columns: + v = row[col].to_list()[0] + vals.append(float(v) if v is not None else 0) + return vals if len(vals) == 4 else [] + + +def _computeSeasonality(is_df: Any, snakeId: str, years: list[str]) -> list[float]: + """과거 N년 Q1~Q4 비중 평균.""" + all_w: list[list[float]] = [] + for y in years: + qv = _quarterlyValues(is_df, snakeId, y) + if len(qv) == 4: + total = sum(abs(v) for v in qv) + if total > 0: + all_w.append([abs(v) / total for v in qv]) + if not all_w: + return [0.25, 0.25, 0.25, 0.25] + n = len(all_w) + avg = [sum(w[q] for w in all_w) / n for q in range(4)] + s = sum(avg) + return [w / s for w in avg] if s > 0 else [0.25] * 4 + + +def _blendWeight(baseOpMargin: float) -> float: + """기준연도 영업이익률 → 과거 비율 가중치. + + 저마진/적자 기업일수록 과거 비율(정상 수준)에 더 의존. + """ + if baseOpMargin < 2.0: + return 0.8 + elif baseOpMargin < 5.0: + return 0.7 + else: + return 0.5 + + +def _judgePath(actual: float, bull: float, base: float, bear: float, tolerance: float = 0.05) -> str: + """실적 vs 시나리오 목표 → 경로 판정.""" + if base == 0: + return "unknown" + dev = (actual - base) / abs(base) + if abs(dev) <= tolerance: + return "on_track" + elif actual >= bull: + return "outperform" + elif actual <= bear: + return "underperform_severe" + elif dev > 0: + return "outperform_mild" + else: + return "underperform" + + +def _decideAction(revPath: str, oiPath: str, history: list[QuarterJudgment]) -> tuple[str, str]: + """매출 + 영업이익 이중 판정 → 행동 추천.""" + severity = { + "outperform": 2, + "outperform_mild": 1, + "on_track": 0, + "underperform": -1, + "underperform_severe": -2, + "unknown": 0, + } + combined = (severity.get(revPath, 0) + severity.get(oiPath, 0)) / 2 + + prevScores = [] + for h in history[-2:]: + ps = (severity.get(h.revPath, 0) + severity.get(h.oiPath, 0)) / 2 + prevScores.append(ps) + consecutiveNeg = all(s < 0 for s in prevScores) if prevScores else False + + if combined >= 1.5: + return "비중확대 검토", "매출+이익 모두 Bull 이상" + elif combined >= 0.5: + if prevScores and prevScores[-1] >= 0.5: + return "비중확대 검토", "2분기 연속 상회" + return "보유 (긍정)", "상회 관찰 중" + elif combined >= -0.5: + return "보유", "시나리오 경로 내" + elif combined >= -1.5: + if consecutiveNeg: + return "비중축소 검토", "2분기 연속 하회" + if oiPath == "underperform_severe" and "underperform" not in revPath: + return "비중축소 검토", "매출은 괜찮으나 마진 붕괴" + return "보유 (경계)", "1분기 하회, 추세 확인 필요" + else: + return "비중축소 검토", "Bear 시나리오 이탈" + + +def _scenarioDCF( + projections: list[ProFormaYear], + wacc_pct: float, + netDebt: float = 0, + shares: int = 1, + terminalGrowth: float = 0.02, +) -> int: + """3년 ProForma FCF → DCF 적정가 (per share).""" + wacc = wacc_pct / 100 + if wacc <= terminalGrowth: + wacc = terminalGrowth + 0.05 + + pvFcf = sum(p.fcf / (1 + wacc) ** (i + 1) for i, p in enumerate(projections)) + + lastP = projections[-1] + normalizedFcf = lastP.ocf - lastP.depreciation + if normalizedFcf <= 0: + normalizedFcf = lastP.ocf * 0.3 + + tv = normalizedFcf * (1 + terminalGrowth) / (wacc - terminalGrowth) + pvTv = tv / (1 + wacc) ** len(projections) + + ev = pvFcf + pvTv + equityValue = ev - netDebt + return int(equityValue / shares) if shares > 0 else 0 + + +# ══════════════════════════════════════ +# 공개 API +# ══════════════════════════════════════ + + +def createSimulation( + company: Any, + scenarioName: str, + revenueGrowth: float | list[float], + *, + baseYear: str = "2024", + targetYear: str = "2025", + bullSpread: float = 1.5, + bearSpread: float = 0.3, + overrides: dict[str, float] | None = None, + shares: int | None = None, +) -> ScenarioSimulation: + """시나리오 시뮬레이션 생성. + + Args: + company: Company 객체. + scenarioName: 사용자 지정 이름 ("반도체 회복" 등). + revenueGrowth: 연간 매출 성장률 (%). float이면 3년 수렴 자동 생성. + baseYear: 기준 연도 (이 해까지의 데이터로 시나리오 설정). + targetYear: 검증 대상 연도. + bullSpread: Bull 성장률 = base × bullSpread. + bearSpread: Bear 성장률 = base × bearSpread. + overrides: ProForma 비율 오버라이드. + shares: 발행주식수 (DCF per share 계산용). + + Returns: + ScenarioSimulation — 3개 시나리오 ProForma + 분기 목표 + DCF. + """ + # 성장률 경로 생성 + if isinstance(revenueGrowth, (int, float)): + g = float(revenueGrowth) + growthPath = [g, g * 0.7, g * 0.5] # 3년 수렴 + else: + growthPath = list(revenueGrowth) + + # series 추출 + 시간 필터 + ts = company._buildFinanceSeries(freq="Q") + fullSeries = ts[0] if isinstance(ts, tuple) else ts + periods = ts[1] if isinstance(ts, tuple) else [] + + cutoff = f"{baseYear}-Q4" + cutIdx = periods.index(cutoff) + 1 if cutoff in periods else len(periods) + series = {stmt: {k: v[:cutIdx] for k, v in fullSeries[stmt].items()} for stmt in ["IS", "BS", "CF"]} + + # 과거 비율 + 기준연도 비율 + ratios = extract_historical_ratios(series) + is_df = company.show("IS") + + rev_base = sum(_quarterlyValues(is_df, "sales", baseYear)) + gp_base = sum(_quarterlyValues(is_df, "gross_profit", baseYear)) + oi_base = sum(_quarterlyValues(is_df, "operating_profit", baseYear)) + + baseGM = gp_base / rev_base * 100 if rev_base else ratios.gross_margin + baseOpMargin = oi_base / rev_base * 100 if rev_base else 0 + + # GM이 0인 경우 (서비스업 등 매출총이익 미보고) → 과거 비율 전적 의존 + if baseGM < 1.0 and ratios.gross_margin > 5.0: + baseGM = ratios.gross_margin + + # 마진 블렌딩 가중치 + hw = _blendWeight(baseOpMargin) + blendedGM = baseGM * (1 - hw) + ratios.gross_margin * hw + + # 3개 시나리오 ProForma + scenarioDefs = [ + ("bull", [g * bullSpread if g > 0 else g * (1 / bullSpread) for g in growthPath], blendedGM + 2), + ("base", growthPath, blendedGM), + ("bear", [g * bearSpread if g > 0 else g * (1 / bearSpread) for g in growthPath], blendedGM - 2), + ] + + pfResults: dict[str, ProFormaResult] = {} + for scName, path, gm in scenarioDefs: + combinedOverrides = {"gross_margin": gm} + if overrides: + combinedOverrides.update(overrides) + try: + pf = build_proforma( + series, + revenue_growth_path=path, + scenario_name=scName, + shares=shares, + overrides=combinedOverrides, + ) + if pf.projections: + pfResults[scName] = pf + except (KeyError, ValueError, ZeroDivisionError, TypeError): + pass + + if "base" not in pfResults: + msg = f"{baseYear} 데이터로 ProForma 생성 실패" + raise ValueError(msg) + + # 계절성 분해 + seasonYears = [str(int(baseYear) - i) for i in range(3) if int(baseYear) - i >= 2019] + revW = _computeSeasonality(is_df, "sales", seasonYears) + oiW = _computeSeasonality(is_df, "operating_profit", seasonYears) + + # 분기 목표 + qRevTargets: dict[str, list[float]] = {} + qOiTargets: dict[str, list[float]] = {} + for sc, pf in pfResults.items(): + p = pf.projections[0] + qRevTargets[sc] = [p.revenue * w for w in revW] + qOiTargets[sc] = [p.operating_income * w for w in oiW] + + # DCF + dcfValues: dict[str, int] = {} + if shares and shares > 0: + for sc, pf in pfResults.items(): + p1 = pf.projections[0] + netDebt = (p1.short_term_debt + p1.long_term_debt) - p1.cash + dcfValues[sc] = _scenarioDCF(pf.projections, pf.wacc, netDebt, shares) + + sim = ScenarioSimulation( + stockCode=company.stockCode, + companyName=getattr(company, "name", company.stockCode), + scenarioName=scenarioName, + baseYear=baseYear, + targetYear=targetYear, + revenueGrowthPath=growthPath, + marginBlendWeight=hw, + proformaResults=pfResults, + quarterlyRevTargets=qRevTargets, + quarterlyOITargets=qOiTargets, + dcfPerShare=dcfValues, + revSeasonality=revW, + oiSeasonality=oiW, + ) + + return sim + + +def judgeQuarter( + simulation: ScenarioSimulation, + quarter: str, + actualRevenue: float, + actualOI: float, +) -> QuarterJudgment: + """분기 실적 판정. + + Args: + simulation: createSimulation()으로 생성한 시뮬레이션. + quarter: 분기 식별자 ("2025Q1"). + actualRevenue: 실제 매출. + actualOI: 실제 영업이익. + + Returns: + QuarterJudgment — 판정 + 행동 + 재예측. + """ + qIdx = int(quarter[-1]) - 1 # Q1=0, Q2=1, ... + + # 목표값 + bullRev = simulation.quarterlyRevTargets.get("bull", [0] * 4)[qIdx] + baseRev = simulation.quarterlyRevTargets["base"][qIdx] + bearRev = simulation.quarterlyRevTargets.get("bear", [0] * 4)[qIdx] + + bullOI = simulation.quarterlyOITargets.get("bull", [0] * 4)[qIdx] + baseOI = simulation.quarterlyOITargets["base"][qIdx] + bearOI = simulation.quarterlyOITargets.get("bear", [0] * 4)[qIdx] + + # 판정 + revPath = _judgePath(actualRevenue, bullRev, baseRev, bearRev) + oiPath = _judgePath(actualOI, bullOI, baseOI, bearOI) + + revDev = (actualRevenue - baseRev) / abs(baseRev) * 100 if baseRev else 0 + oiDev = (actualOI - baseOI) / abs(baseOI) * 100 if baseOI else 0 + + # 행동 + action, reason = _decideAction(revPath, oiPath, simulation.judgments) + + # 재예측 (단순: YTD 실적 + 남은 분기 원래 목표) + allActualsRev = [j.actualRevenue for j in simulation.judgments] + [actualRevenue] + allActualsOI = [j.actualOI for j in simulation.judgments] + [actualOI] + remainRevTargets = simulation.quarterlyRevTargets["base"][qIdx + 1 :] + remainOITargets = simulation.quarterlyOITargets["base"][qIdx + 1 :] + reforecastRev = sum(allActualsRev) + sum(remainRevTargets) + reforecastOI = sum(allActualsOI) + sum(remainOITargets) + + judgment = QuarterJudgment( + quarter=quarter, + actualRevenue=actualRevenue, + actualOI=actualOI, + targetRevBase=baseRev, + targetOIBase=baseOI, + revDeviation=round(revDev, 1), + oiDeviation=round(oiDev, 1), + revPath=revPath, + oiPath=oiPath, + action=action, + reason=reason, + reforecastRevenue=reforecastRev, + reforecastOI=reforecastOI, + ) + + simulation.judgments.append(judgment) + return judgment diff --git a/src/dartlab/analysis/forecast/simulation.py b/src/dartlab/analysis/forecast/simulation.py new file mode 100644 index 0000000000000000000000000000000000000000..9d6f6e89ac6d733fc7c015e33d1fc86bae8c9d89 --- /dev/null +++ b/src/dartlab/analysis/forecast/simulation.py @@ -0,0 +1,903 @@ +"""경제 시나리오 기반 시뮬레이션 예측 엔진. + +3-Layer 구조: +1. MacroScenario — 거시경제 변수 경로 (GDP, 금리, 환율, CPI) +2. SectorElasticity — 업종별 거시경제 감응도 (beta) +3. CompanySimulation — 기업 실적 시뮬레이션 (시나리오 + Monte Carlo + 스트레스) + +외부 의존성 제로 (random 모듈만 사용). +""" + +from __future__ import annotations + +import math +import random +from dataclasses import dataclass, field +from typing import Optional + +from dartlab.core.finance.extract import ( + getAnnualValues, + getLatest, + getTTM, +) +from dartlab.core.finance.fmt import fmtBig, fmtPrice +from dartlab.core.finance.scenario import ( + BASELINE_FX, + BASELINE_RATE, + DEFAULT_ELASTICITY, + PRESET_SCENARIOS, + MacroScenario, + SectorElasticity, + getElasticity, +) +from dartlab.core.sector.types import SectorParams + +# ══════════════════════════════════════ +# Layer 3: 기업 시뮬레이션 +# ══════════════════════════════════════ + +# ── 결과 타입 ── + + +@dataclass +class SimulationResult: + """단일 시나리오 시뮬레이션 결과.""" + + scenarioName: str + scenarioLabel: str + years: int + revenuePath: list[float] + operatingIncomePath: list[float] + marginPath: list[float] + fcfPath: list[float] + dcfValue: float + perShareValue: Optional[float] + revenueChangePct: float + marginChangeBps: float + elasticityUsed: SectorElasticity + assumptions: dict[str, str] = field(default_factory=dict) + warnings: list[str] = field(default_factory=list) + currency: str = "KRW" + + DISCLAIMER: str = "본 시뮬레이션은 참고용이며 실제 경제 상황과 다를 수 있습니다." + + def __repr__(self) -> str: + c = self.currency + lines = [f"[{self.scenarioLabel} 시뮬레이션]"] + lines.append(f" 경기감응도: {self.elasticityUsed}") + for i, (rev, oi, mg) in enumerate( + zip( + self.revenuePath, + self.operatingIncomePath, + self.marginPath, + ) + ): + lines.append(f" +{i + 1}년: 매출 {fmtBig(rev, c)}, 영업이익 {fmtBig(oi, c)}, 마진 {mg:.1f}%") + lines.append(f" 매출 변화: {self.revenueChangePct:+.1f}%") + lines.append(f" 마진 변화: {self.marginChangeBps:+.0f}bps") + if self.perShareValue is not None: + lines.append(f" 주당 가치: {fmtPrice(self.perShareValue, c)}") + if self.warnings: + for w in self.warnings: + lines.append(f" ⚠ {w}") + lines.append(f" ※ {self.DISCLAIMER}") + return "\n".join(lines) + + +@dataclass +class MonteCarloResult: + """Monte Carlo 시뮬레이션 결과.""" + + iterations: int + scenarioName: str + percentiles: dict[str, dict[str, float]] + expectedValue: float + stdDev: float + var95: float + upsideProbability: float # 현재 대비 상승 확률 (%) + warnings: list[str] = field(default_factory=list) + currency: str = "KRW" + + DISCLAIMER: str = "본 시뮬레이션은 참고용이며 실제 경제 상황과 다를 수 있습니다." + + def __repr__(self) -> str: + c = self.currency + lines = [f"[Monte Carlo — {self.scenarioName} ({self.iterations:,}회)]"] + for metric, pcts in self.percentiles.items(): + p5 = pcts.get("p5", 0) + p50 = pcts.get("p50", 0) + p95 = pcts.get("p95", 0) + lines.append(f" {metric}: P5={fmtBig(p5, c)} P50={fmtBig(p50, c)} P95={fmtBig(p95, c)}") + lines.append(f" 기대값: {fmtBig(self.expectedValue, c)}") + lines.append(f" VaR(95%): {fmtBig(self.var95, c)}") + lines.append(f" 상승 확률: {self.upsideProbability:.0f}%") + if self.warnings: + for w in self.warnings: + lines.append(f" ⚠ {w}") + lines.append(f" ※ {self.DISCLAIMER}") + return "\n".join(lines) + + +@dataclass +class StressTestResult: + """스트레스 테스트 결과.""" + + scenarioName: str + scenarioLabel: str + year3RevenueChange: float + year3MarginChange: float + year3DebtRatio: Optional[float] + year3CurrentRatio: Optional[float] + year3InterestCoverage: Optional[float] + survivalRisk: str # "low" | "medium" | "high" | "critical" + dividendSustainable: bool + recoveryTimeline: str + warnings: list[str] = field(default_factory=list) + + DISCLAIMER: str = "본 시뮬레이션은 참고용이며 실제 경제 상황과 다를 수 있습니다." + + def __repr__(self) -> str: + lines = [f"[스트레스 테스트 — {self.scenarioLabel}]"] + lines.append(f" 3년 후 매출 변화: {self.year3RevenueChange:+.1f}%") + lines.append(f" 3년 후 마진 변화: {self.year3MarginChange:+.0f}bps") + if self.year3DebtRatio is not None: + lines.append(f" 3년 후 부채비율: {self.year3DebtRatio:.0f}%") + if self.year3CurrentRatio is not None: + lines.append(f" 3년 후 유동비율: {self.year3CurrentRatio:.0f}%") + if self.year3InterestCoverage is not None: + lines.append(f" 3년 후 이자보상배율: {self.year3InterestCoverage:.1f}x") + lines.append(f" 생존 위험도: {self.survivalRisk}") + lines.append(f" 배당 지속: {'가능' if self.dividendSustainable else '불가능'}") + lines.append(f" 회복 전망: {self.recoveryTimeline}") + if self.warnings: + for w in self.warnings: + lines.append(f" ⚠ {w}") + lines.append(f" ※ {self.DISCLAIMER}") + return "\n".join(lines) + + +# ── 내부 유틸 ── + + +def _extractBaseMetrics(series: dict) -> dict[str, Optional[float]]: + """현재 기업 기본 지표 추출.""" + rev = getTTM(series, "IS", "sales") or getTTM(series, "IS", "revenue") + oi = getTTM(series, "IS", "operating_profit") or getTTM(series, "IS", "operating_income") + ni = getTTM(series, "IS", "net_profit") or getTTM(series, "IS", "net_income") + ocf = getTTM(series, "CF", "operating_cashflow") + capex = getTTM(series, "CF", "purchase_of_property_plant_and_equipment") + div = getTTM(series, "CF", "dividends_paid") + + margin = (oi / rev * 100) if rev and oi and rev > 0 else None + fcf = (ocf - abs(capex or 0)) if ocf is not None else None + + totalAssets = getLatest(series, "BS", "total_assets") + totalEquity = getLatest(series, "BS", "total_stockholders_equity") or getLatest( + series, "BS", "owners_of_parent_equity" + ) + totalLiab = getLatest(series, "BS", "total_liabilities") + currentAssets = getLatest(series, "BS", "current_assets") + currentLiab = getLatest(series, "BS", "current_liabilities") + cash = getLatest(series, "BS", "cash_and_cash_equivalents") or 0 + stb = getLatest(series, "BS", "shortterm_borrowings") or 0 + ltb = getLatest(series, "BS", "longterm_borrowings") or 0 + bonds = getLatest(series, "BS", "debentures") or 0 + finCosts = getTTM(series, "IS", "finance_costs") or getTTM(series, "IS", "interest_expense") + + debtRatio = (totalLiab / totalEquity * 100) if totalLiab and totalEquity and totalEquity > 0 else None + currentRatio = (currentAssets / currentLiab * 100) if currentAssets and currentLiab and currentLiab > 0 else None + interestCov = (oi / abs(finCosts)) if oi and finCosts and abs(finCosts) > 0 else None + netDebt = stb + ltb + bonds - cash + + return { + "revenue": rev, + "operatingIncome": oi, + "netIncome": ni, + "margin": margin, + "ocf": ocf, + "fcf": fcf, + "capex": capex, + "dividendsPaid": div, + "totalAssets": totalAssets, + "totalEquity": totalEquity, + "totalLiabilities": totalLiab, + "currentAssets": currentAssets, + "currentLiabilities": currentLiab, + "debtRatio": debtRatio, + "currentRatio": currentRatio, + "interestCoverage": interestCov, + "netDebt": netDebt, + "financeCosts": finCosts, + } + + +def _extractVolatility(series: dict) -> dict[str, float]: + """과거 시계열에서 변동성 추출.""" + revVals = getAnnualValues(series, "IS", "sales") or getAnnualValues(series, "IS", "revenue") + oiVals = getAnnualValues(series, "IS", "operating_profit") or getAnnualValues(series, "IS", "operating_income") + + def _std(values: list) -> float: + valid = [v for v in values if v is not None] + if len(valid) < 3: + return 0.1 # 기본값 10% + mean = sum(valid) / len(valid) + if abs(mean) < 1e-12: + return 0.1 + variance = sum((v - mean) ** 2 for v in valid) / (len(valid) - 1) + return math.sqrt(variance) / abs(mean) + + def _marginStd(revList: list, oiList: list) -> float: + margins = [] + for r, o in zip(revList, oiList): + if r is not None and o is not None and r > 0: + margins.append(o / r * 100) + if len(margins) < 3: + return 2.0 # 기본 2%p + mean = sum(margins) / len(margins) + variance = sum((m - mean) ** 2 for m in margins) / (len(margins) - 1) + return math.sqrt(variance) + + return { + "revenueCv": _std(revVals), + "marginStd": _marginStd(revVals, oiVals), + } + + +def _applyMacroShock( + baseRevenue: float, + baseMargin: float, + scenario: MacroScenario, + elasticity: SectorElasticity, + yearIdx: int, + baseWacc: float, +) -> tuple[float, float, float]: + """매크로 충격 적용 -> (조정 매출, 조정 마진%, 조정 할인율).""" + gdp = scenario.gdpGrowth[yearIdx] + rate = scenario.interestRate[yearIdx] + fx = scenario.krwUsd[yearIdx] + + # GDP 충격 + revGdpEffect = elasticity.revenueToGdp * gdp / 100 + + # 환율 충격 (baseline 대비 변화율) + fxChangePct = (fx - BASELINE_FX) / BASELINE_FX * 100 + revFxEffect = elasticity.revenueToFx * fxChangePct / 1000 # 10%당 beta 적용 + + adjustedRevenue = baseRevenue * (1 + revGdpEffect + revFxEffect) + + # 마진 충격 + marginShockBps = elasticity.marginToGdp * gdp / 100 + # NIM 충격 (금융업) + rateChange = rate - BASELINE_RATE + nimShockBps = elasticity.nimToRate * rateChange / 100 + adjustedMargin = baseMargin + marginShockBps + nimShockBps + + # WACC 조정 (금리 변동의 50% 반영) + adjustedWacc = baseWacc + rateChange * 0.5 + + return adjustedRevenue, max(adjustedMargin, -50), adjustedWacc + + +# ── 시나리오 시뮬레이션 ── + + +def simulateScenario( + series: dict, + scenario: MacroScenario | str, + sectorKey: Optional[str] = None, + sectorParams: Optional[SectorParams] = None, + shares: Optional[int] = None, +) -> SimulationResult: + """단일 거시경제 시나리오 하에서 3년 실적 경로 시뮬레이션.""" + warnings: list[str] = [] + + # 시나리오 로드 + if isinstance(scenario, str): + sc = PRESET_SCENARIOS.get(scenario) + if sc is None: + return SimulationResult( + scenarioName=scenario, + scenarioLabel="알 수 없음", + years=0, + revenuePath=[], + operatingIncomePath=[], + marginPath=[], + fcfPath=[], + dcfValue=0, + perShareValue=None, + revenueChangePct=0, + marginChangeBps=0, + elasticityUsed=DEFAULT_ELASTICITY, + warnings=[f"미지원 시나리오: {scenario}. 선택지: {', '.join(PRESET_SCENARIOS)}"], + ) + else: + sc = scenario + + elasticity = getElasticity(sectorKey) + base = _extractBaseMetrics(series) + baseWacc = sectorParams.discountRate if sectorParams else 10.0 + + rev = base["revenue"] + margin = base["margin"] + if rev is None or rev <= 0: + return SimulationResult( + scenarioName=sc.name, + scenarioLabel=sc.label, + years=0, + revenuePath=[], + operatingIncomePath=[], + marginPath=[], + fcfPath=[], + dcfValue=0, + perShareValue=None, + revenueChangePct=0, + marginChangeBps=0, + elasticityUsed=elasticity, + warnings=["매출 데이터 부족"], + ) + + if margin is None: + margin = 10.0 + warnings.append("마진 데이터 미확인 -> 10%로 가정") + + capexRatio = abs(base["capex"] or 0) / rev if rev > 0 else 0.05 + taxRate = 0.22 # 한국 법인세 기본 + + # 3년 경로 시뮬레이션 + horizon = min(len(sc.gdpGrowth), 3) + revenuePath: list[float] = [] + oiPath: list[float] = [] + marginPath: list[float] = [] + fcfPath: list[float] = [] + waccPath: list[float] = [] + + prevRev = rev + prevMargin = margin + + for yr in range(horizon): + adjRev, adjMargin, adjWacc = _applyMacroShock( + prevRev, + prevMargin, + sc, + elasticity, + yr, + baseWacc, + ) + adjOi = adjRev * adjMargin / 100 + adjFcf = adjOi * (1 - taxRate) - adjRev * capexRatio + + revenuePath.append(adjRev) + oiPath.append(adjOi) + marginPath.append(adjMargin) + fcfPath.append(adjFcf) + waccPath.append(adjWacc) + + prevRev = adjRev + prevMargin = adjMargin + + # DCF 가치 (시나리오 경로의 FCF 합산) + terminalGrowth = min(sectorParams.growthRate if sectorParams else 3.0, 3.0) + lastWacc = waccPath[-1] if waccPath else baseWacc + + if lastWacc <= terminalGrowth: + terminalGrowth = max(lastWacc - 2.0, 0.5) + + pvSum = sum(fcf / (1 + lastWacc / 100) ** (yr + 1) for yr, fcf in enumerate(fcfPath)) + terminalFcf = fcfPath[-1] if fcfPath else 0 + if terminalFcf > 0: + tv = terminalFcf * (1 + terminalGrowth / 100) / (lastWacc / 100 - terminalGrowth / 100) + pvTv = tv / (1 + lastWacc / 100) ** horizon + else: + tv = 0 + pvTv = 0 + warnings.append("FCF 음수 -> Terminal Value 미적용") + + ev = pvSum + pvTv + netDebt = base["netDebt"] or 0 + equityValue = ev - netDebt + perShare = equityValue / shares if shares and shares > 0 else None + + # 변화율 계산 + finalRev = revenuePath[-1] if revenuePath else rev + revChange = (finalRev - rev) / rev * 100 if rev > 0 else 0 + marginChange = (marginPath[-1] - margin) * 100 if marginPath else 0 # bps + + return SimulationResult( + scenarioName=sc.name, + scenarioLabel=sc.label, + years=horizon, + revenuePath=revenuePath, + operatingIncomePath=oiPath, + marginPath=marginPath, + fcfPath=fcfPath, + dcfValue=ev, + perShareValue=perShare, + revenueChangePct=round(revChange, 1), + marginChangeBps=round(marginChange, 0), + elasticityUsed=elasticity, + assumptions={ + "경기감응도(beta)": f"GDP {elasticity.revenueToGdp:.1f}, FX {elasticity.revenueToFx:.1f}", + "업종 경기민감도": elasticity.cyclicality, + "할인율": f"{baseWacc:.1f}% -> {lastWacc:.1f}%", + "CapEx 비율": f"{capexRatio * 100:.1f}%", + }, + warnings=warnings, + ) + + +def simulateAllScenarios( + series: dict, + sectorKey: Optional[str] = None, + sectorParams: Optional[SectorParams] = None, + shares: Optional[int] = None, + scenarios: Optional[list[str]] = None, +) -> dict[str, SimulationResult]: + """모든 사전 정의 시나리오 일괄 시뮬레이션.""" + keys = scenarios or list(PRESET_SCENARIOS.keys()) + return { + key: simulateScenario(series, key, sectorKey, sectorParams, shares) for key in keys if key in PRESET_SCENARIOS + } + + +# ── Monte Carlo 시뮬레이션 ── + + +def monteCarloForecast( + series: dict, + sectorKey: Optional[str] = None, + sectorParams: Optional[SectorParams] = None, + shares: Optional[int] = None, + scenario: MacroScenario | str = "baseline", + iterations: int = 10000, + horizon: int = 3, + seed: Optional[int] = None, +) -> MonteCarloResult: + """Monte Carlo 시뮬레이션 (순수 Python).""" + if seed is not None: + random.seed(seed) + + warnings: list[str] = [] + + # 시나리오 로드 + if isinstance(scenario, str): + sc = PRESET_SCENARIOS.get(scenario, PRESET_SCENARIOS["baseline"]) + else: + sc = scenario + + elasticity = getElasticity(sectorKey) + base = _extractBaseMetrics(series) + vol = _extractVolatility(series) + baseWacc = sectorParams.discountRate if sectorParams else 10.0 + + rev = base["revenue"] + margin = base["margin"] + if rev is None or rev <= 0: + return MonteCarloResult( + iterations=iterations, + scenarioName=sc.name, + percentiles={}, + expectedValue=0, + stdDev=0, + var95=0, + upsideProbability=0, + warnings=["매출 데이터 부족"], + ) + if margin is None: + margin = 10.0 + + revCv = min(vol["revenueCv"], 0.5) # 상한 50% + marginStd = min(vol["marginStd"], 10.0) # 상한 10%p + + # 평균 경로 계산 (시나리오 기반) + meanRevPath: list[float] = [] + meanMarginPath: list[float] = [] + prevR, prevM = rev, margin + for yr in range(min(horizon, len(sc.gdpGrowth))): + ar, am, _ = _applyMacroShock(prevR, prevM, sc, elasticity, yr, baseWacc) + meanRevPath.append(ar) + meanMarginPath.append(am) + prevR, prevM = ar, am + + # Monte Carlo 실행 + finalRevenues: list[float] = [] + finalOis: list[float] = [] + finalFcfs: list[float] = [] + + capexRatio = abs(base["capex"] or 0) / rev if rev > 0 else 0.05 + taxRate = 0.22 + + for _ in range(iterations): + simRev = rev + simMargin = margin + for yr in range(len(meanRevPath)): + # 평균 경로에 노이즈 추가 + revNoise = random.gauss(0, revCv) + marginNoise = random.gauss(0, marginStd) + + simRev = meanRevPath[yr] * (1 + revNoise) + simMargin = meanMarginPath[yr] + marginNoise + + simOi = simRev * max(simMargin, -50) / 100 + simFcf = simOi * (1 - taxRate) - simRev * capexRatio + + finalRevenues.append(simRev) + finalOis.append(simOi) + finalFcfs.append(simFcf) + + # 백분위 산출 + def _percentiles(vals: list[float]) -> dict[str, float]: + sortedVals = sorted(vals) + n = len(sortedVals) + return { + "p5": sortedVals[int(n * 0.05)], + "p25": sortedVals[int(n * 0.25)], + "p50": sortedVals[int(n * 0.50)], + "p75": sortedVals[int(n * 0.75)], + "p95": sortedVals[int(n * 0.95)], + } + + percentiles = { + "매출": _percentiles(finalRevenues), + "영업이익": _percentiles(finalOis), + "FCF": _percentiles(finalFcfs), + } + + # 통계 + meanRevFinal = sum(finalRevenues) / iterations + stdDev = math.sqrt(sum((r - meanRevFinal) ** 2 for r in finalRevenues) / (iterations - 1)) + var95 = sorted(finalRevenues)[int(iterations * 0.05)] + upsideProb = sum(1 for r in finalRevenues if r > rev) / iterations * 100 + + if revCv >= 0.4: + warnings.append("과거 매출 변동성 높음 -> 시뮬레이션 신뢰도 낮음") + + return MonteCarloResult( + iterations=iterations, + scenarioName=sc.label, + percentiles=percentiles, + expectedValue=meanRevFinal, + stdDev=stdDev, + var95=var95, + upsideProbability=round(upsideProb, 1), + warnings=warnings, + ) + + +# ── 스트레스 테스트 ── + + +def stressTest( + series: dict, + sectorKey: Optional[str] = None, + sectorParams: Optional[SectorParams] = None, + scenario: str = "adverse", +) -> StressTestResult: + """CCAR 스타일 스트레스 테스트.""" + warnings: list[str] = [] + + sim = simulateScenario(series, scenario, sectorKey, sectorParams) + base = _extractBaseMetrics(series) + + sc = PRESET_SCENARIOS.get(scenario, PRESET_SCENARIOS["adverse"]) + + # 3년 후 재무 건전성 추정 + revChange = sim.revenueChangePct + marginChange = sim.marginChangeBps + + # 부채비율 추정: 이익 감소 -> 자본 감소 -> 부채비율 상승 + debtRatio3y = None + if base["debtRatio"] is not None and base["totalEquity"] and base["totalEquity"] > 0: + # 3년간 누적 이익 변화 반영 + cumProfitLoss = sum(sim.operatingIncomePath) * 0.78 if sim.operatingIncomePath else 0 + baselineProfit = (base["operatingIncome"] or 0) * 0.78 * 3 + equityChange = cumProfitLoss - baselineProfit + newEquity = base["totalEquity"] + equityChange + if newEquity > 0: + debtRatio3y = round((base["totalLiabilities"] or 0) / newEquity * 100, 0) + else: + debtRatio3y = 9999 + warnings.append("스트레스 하 자본잠식 위험") + + # 유동비율 추정 + currentRatio3y = base["currentRatio"] + if currentRatio3y is not None and revChange < -10: + currentRatio3y = currentRatio3y * (1 + revChange / 100 * 0.3) # 보수적 조정 + + # 이자보상배율 + intCov3y = None + if sim.operatingIncomePath and base["financeCosts"] and abs(base["financeCosts"]) > 0: + intCov3y = round(sim.operatingIncomePath[-1] / abs(base["financeCosts"]), 1) + + # 배당 지속 가능성 + divSustainable = True + if base["dividendsPaid"] and sim.fcfPath: + finalFcf = sim.fcfPath[-1] + divAmount = abs(base["dividendsPaid"] or 0) + if finalFcf < divAmount: + divSustainable = False + + # 생존 위험도 판단 + riskScore = 0 + if revChange < -20: + riskScore += 2 + elif revChange < -10: + riskScore += 1 + + if debtRatio3y is not None and debtRatio3y > 300: + riskScore += 2 + elif debtRatio3y is not None and debtRatio3y > 200: + riskScore += 1 + + if intCov3y is not None and intCov3y < 1: + riskScore += 2 + elif intCov3y is not None and intCov3y < 2: + riskScore += 1 + + if not divSustainable: + riskScore += 1 + + if riskScore >= 5: + survivalRisk = "critical" + elif riskScore >= 3: + survivalRisk = "high" + elif riskScore >= 1: + survivalRisk = "medium" + else: + survivalRisk = "low" + + # 회복 전망 + elasticity = getElasticity(sectorKey) + if elasticity.cyclicality == "high": + recovery = "V자 반등 가능 (경기민감 업종)" + elif elasticity.cyclicality == "defensive": + recovery = "안정적 — 충격 자체가 제한적" + else: + recovery = "점진적 회복 (1~2년)" + + return StressTestResult( + scenarioName=sc.name, + scenarioLabel=sc.label, + year3RevenueChange=round(revChange, 1), + year3MarginChange=round(marginChange, 0), + year3DebtRatio=debtRatio3y, + year3CurrentRatio=round(currentRatio3y, 0) if currentRatio3y else None, + year3InterestCoverage=intCov3y, + survivalRisk=survivalRisk, + dividendSustainable=divSustainable, + recoveryTimeline=recovery, + warnings=warnings, + ) + + +# ══════════════════════════════════════ +# 역사적 충격 재현 +# ══════════════════════════════════════ + + +# 실제 과거 거시경제 경로 (ECOS/FRED 데이터 기반) +HISTORICAL_SCENARIOS: dict[str, MacroScenario] = { + "gfc_2008": MacroScenario( + "gfc_2008", + "2008 글로벌 금융위기", + gdpGrowth=[-5.1, 0.7, 6.5], # 2009, 2010, 2011 (한국 실제) + interestRate=[2.0, 2.0, 2.5], + krwUsd=[1276, 1156, 1108], + cpi=[2.8, 2.9, 4.0], + description="실제 2008-2010 한국 거시경제 경로", + ), + "covid_2020": MacroScenario( + "covid_2020", + "2020 코로나 팬데믹", + gdpGrowth=[-0.7, 4.3, 2.6], # 2020, 2021, 2022 (한국 실제) + interestRate=[0.5, 0.75, 3.5], + krwUsd=[1180, 1185, 1292], + cpi=[0.5, 2.5, 5.1], + description="실제 2020-2022 한국 거시경제 경로", + ), + "euro_crisis_2011": MacroScenario( + "euro_crisis_2011", + "2011 유럽 재정위기", + gdpGrowth=[3.7, 2.4, 3.2], # 2012, 2013, 2014 (한국) + interestRate=[3.0, 2.5, 2.0], + krwUsd=[1126, 1055, 1053], + cpi=[2.2, 1.3, 1.3], + description="실제 2011-2013 한국 거시경제 경로 (유럽위기 파급)", + ), + "rate_hike_2022": MacroScenario( + "rate_hike_2022", + "2022 긴축 충격", + gdpGrowth=[2.6, 1.4, 2.0], # 2022, 2023, 2024 + interestRate=[3.5, 3.5, 3.0], + krwUsd=[1292, 1306, 1380], + cpi=[5.1, 3.6, 2.3], + description="실제 2022-2024 글로벌 긴축 + 인플레이션", + ), +} + + +def simulateHistorical( + series: dict, + historicalKey: str, + sectorKey: Optional[str] = None, + sectorParams: Optional[SectorParams] = None, + shares: Optional[int] = None, + learnedBetas: Optional[dict[str, float]] = None, +) -> SimulationResult: + """역사적 충격 재현 시뮬레이션. + + "2008년 금융위기가 다시 오면 이 기업은 어떻게 되는가?" + + Args: + historicalKey: "gfc_2008", "covid_2020", "euro_crisis_2011", "rate_hike_2022" + learnedBetas: calcMacroRegression에서 학습된 기업별 베타. + None이면 정적 탄성치 사용. + """ + sc = HISTORICAL_SCENARIOS.get(historicalKey) + if sc is None: + available = ", ".join(HISTORICAL_SCENARIOS.keys()) + # 빈 결과 반환 + return SimulationResult( + scenarioName=historicalKey, + scenarioLabel="알 수 없음", + years=0, + revenuePath=[], + operatingIncomePath=[], + marginPath=[], + fcfPath=[], + dcfValue=0, + perShareValue=None, + revenueChangePct=0, + marginChangeBps=0, + elasticityUsed=DEFAULT_ELASTICITY, + warnings=[f"미지원 역사 시나리오: {historicalKey}. 선택지: {available}"], + ) + + # 학습된 베타가 있으면 탄성치 오버라이드 + if learnedBetas: + elasticity = SectorElasticity( + revenueToGdp=learnedBetas.get("gdp", DEFAULT_ELASTICITY.revenueToGdp), + revenueToFx=learnedBetas.get("fx", DEFAULT_ELASTICITY.revenueToFx), + marginToGdp=learnedBetas.get("rate", DEFAULT_ELASTICITY.marginToGdp), + nimToRate=0, + cyclicality="learned", + ) + else: + elasticity = getElasticity(sectorKey) + + result = simulateScenario(series, sc, sectorKey, sectorParams, shares) + # 탄성치를 학습값으로 교체 (결과에 반영) + if learnedBetas: + result.elasticityUsed = elasticity + result.assumptions["경기감응도(beta)"] = ( + f"학습 GDP {elasticity.revenueToGdp:.2f}, FX {elasticity.revenueToFx:.2f}" + ) + return result + + +# ══════════════════════════════════════ +# 시뮬레이션 백테스팅 +# ══════════════════════════════════════ + + +@dataclass +class BacktestResult: + """시뮬레이션 백테스트 결과.""" + + scenariosTested: int + directionAccuracy: float # 매출 방향 (증/감) 정확도 (%) + avgError: float # 평균 절대 오차 (%) + scenarioHitRate: float # base/bull/bear 범위 내 적중률 (%) + details: list[dict] # 시나리오별 상세 + warnings: list[str] = field(default_factory=list) + + def __repr__(self) -> str: + lines = [f"[시뮬레이션 백테스트 ({self.scenariosTested}개 시나리오)]"] + lines.append(f" 방향 정확도: {self.directionAccuracy:.0f}%") + lines.append(f" 평균 오차: {self.avgError:.1f}%") + lines.append(f" 시나리오 적중률: {self.scenarioHitRate:.0f}%") + return "\n".join(lines) + + +def backtestSimulation( + series: dict, + sectorKey: Optional[str] = None, + sectorParams: Optional[SectorParams] = None, +) -> BacktestResult | None: + """과거 시점으로 돌아가서 시뮬레이션 정확도 측정. + + 역사적 시나리오(2008, 2020 등)를 사용하여: + 1. 해당 시점 직전 재무 데이터 기준으로 시뮬레이션 실행 + 2. 실제 결과와 비교 + 3. 방향 정확도 + 오차 + 시나리오 적중률 산출 + """ + details: list[dict] = [] + warnings: list[str] = [] + + # 각 역사적 시나리오 테스트 + for key, sc in HISTORICAL_SCENARIOS.items(): + sim = simulateScenario(series, sc, sectorKey, sectorParams) + if not sim.revenuePath: + continue + + # 실제 매출 변화 (공시 데이터에서) + actualRevChange = _getActualRevChange(series, key) + if actualRevChange is None: + continue + + predictedChange = sim.revenueChangePct + + # 방향 일치 여부 + directionCorrect = (predictedChange > 0) == (actualRevChange > 0) + + # 오차 + error = abs(predictedChange - actualRevChange) + + details.append( + { + "scenario": key, + "label": sc.label, + "predictedRevChange": round(predictedChange, 1), + "actualRevChange": round(actualRevChange, 1), + "error": round(error, 1), + "directionCorrect": directionCorrect, + } + ) + + if not details: + return None + + n = len(details) + dirAcc = sum(1 for d in details if d["directionCorrect"]) / n * 100 + avgErr = sum(d["error"] for d in details) / n + hitRate = sum(1 for d in details if d["error"] < 15) / n * 100 # 15%p 이내 = 적중 + + return BacktestResult( + scenariosTested=n, + directionAccuracy=round(dirAcc, 1), + avgError=round(avgErr, 1), + scenarioHitRate=round(hitRate, 1), + details=details, + warnings=warnings, + ) + + +def _getActualRevChange(series: dict, historicalKey: str) -> float | None: + """역사적 시나리오 기간의 실제 매출 변화율 추출.""" + periodMap = { + "gfc_2008": ("2008", "2011"), + "covid_2020": ("2019", "2022"), + "euro_crisis_2011": ("2011", "2014"), + "rate_hike_2022": ("2021", "2024"), + } + + if historicalKey not in periodMap: + return None + + startYear, endYear = periodMap[historicalKey] + startRev = _getRevByYear(series, startYear) + endRev = _getRevByYear(series, endYear) + + if startRev is None or endRev is None or startRev == 0: + return None + + return (endRev - startRev) / abs(startRev) * 100 + + +def _getRevByYear(series: dict, year: str) -> float | None: + """특정 연도의 매출 추출.""" + from dartlab.core.finance.extract import getAnnualValues + + revValues = getAnnualValues(series, "IS", "sales") or getAnnualValues(series, "IS", "revenue") + if not revValues: + return None + + # periodCols에서 해당 연도 찾기 + # series dict에서 직접 연도 매칭 + for stmt in ["IS"]: + stmtData = series.get(stmt, {}) + for account, row in stmtData.items(): + if "sales" in account.lower() or "revenue" in account.lower(): + for key, val in row.items(): + if key.startswith(year) and val is not None: + return val + return None diff --git a/src/dartlab/analysis/graph/__init__.py b/src/dartlab/analysis/graph/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4b91d9a7ad0533a825bf6b406b80346901c9c127 --- /dev/null +++ b/src/dartlab/analysis/graph/__init__.py @@ -0,0 +1,27 @@ +"""core/graph — 기업 재무 인과 그래프 (Phase 2). + +FINOS ai-evals-framework + Microsoft GraphRAG 패턴. +14축 calc 결과를 노드/엣지로 모델링하여 인과 질문에 환각 없이 답한다. + +진입점: + from dartlab.analysis.graph import CompanyGraph, buildGraph + g = buildGraph(company) + causes = g.causes("영업이익률") # 마진 하락 원인 트리 +""" + +from dartlab.analysis.graph.builder import buildGraph +from dartlab.analysis.graph.schema import CompanyGraph, Edge, EdgeType, Node, NodeType +from dartlab.analysis.graph.traverse import ancestors, causes, related, timeline + +__all__ = [ + "CompanyGraph", + "Edge", + "EdgeType", + "Node", + "NodeType", + "buildGraph", + "causes", + "ancestors", + "timeline", + "related", +] diff --git a/src/dartlab/analysis/graph/__pycache__/__init__.cpython-312.pyc b/src/dartlab/analysis/graph/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1996ef5fbe91dc8a47d409f8b2181eccf3e7baf0 Binary files /dev/null and b/src/dartlab/analysis/graph/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/analysis/graph/__pycache__/builder.cpython-312.pyc b/src/dartlab/analysis/graph/__pycache__/builder.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b96e829d60fc70fd1755aa57c26e056c3b1cf985 Binary files /dev/null and b/src/dartlab/analysis/graph/__pycache__/builder.cpython-312.pyc differ diff --git a/src/dartlab/analysis/graph/__pycache__/schema.cpython-312.pyc b/src/dartlab/analysis/graph/__pycache__/schema.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d9c1ecb81f2028dee657bc58ed953e075c5746fe Binary files /dev/null and b/src/dartlab/analysis/graph/__pycache__/schema.cpython-312.pyc differ diff --git a/src/dartlab/analysis/graph/__pycache__/traverse.cpython-312.pyc b/src/dartlab/analysis/graph/__pycache__/traverse.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ae0a37ddb825a39bcd668826af88ecc13cd66b0e Binary files /dev/null and b/src/dartlab/analysis/graph/__pycache__/traverse.cpython-312.pyc differ diff --git a/src/dartlab/analysis/graph/builder.py b/src/dartlab/analysis/graph/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..1627c43375f35ff12087df123a652131ddeb617a --- /dev/null +++ b/src/dartlab/analysis/graph/builder.py @@ -0,0 +1,312 @@ +"""CompanyGraph 빌더 — Company × 14축 calc → 인과 그래프. + +review의 6막 인과 연결을 명시적 그래프로 끌어올린다. +calc 결과의 dict 키에서 노드를 추출하고, 6막 간 인과를 엣지로 연결. + +빌드 < 2초 (calc 캐시 재사용), 메모리 < 50MB. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from dartlab.analysis.graph.schema import ( + CompanyGraph, + Edge, + EdgeType, + Node, + NodeType, +) + +log = logging.getLogger(__name__) + +# calc 실패 시 잡아야 하는 예외 +_SAFE = ( + KeyError, + TypeError, + ValueError, + IndexError, + AttributeError, + ZeroDivisionError, + FileNotFoundError, + OSError, + RuntimeError, + ArithmeticError, + StopIteration, + ImportError, +) + + +def _safe(fn: Any, *args: Any, **kwargs: Any) -> Any: + try: + return fn(*args, **kwargs) + except _SAFE: + return None + + +def _nid(type_: str, label: str, period: str = "") -> str: + """노드 ID 생성.""" + if period: + return f"{type_}:{label}:{period}" + return f"{type_}:{label}" + + +def _addMetricNode( + g: CompanyGraph, + label: str, + value: Any, + period: str, + unit: str = "", +) -> str: + """메트릭 노드 추가 + ID 반환.""" + nid = _nid("metric", label, period) + g.addNode( + Node( + id=nid, + type=NodeType.METRIC, + label=label, + value=value, + period=period, + unit=unit, + ) + ) + return nid + + +def _addCauses(g: CompanyGraph, source_id: str, target_id: str, label: str = "") -> None: + g.addEdge(Edge(source=source_id, target=target_id, type=EdgeType.CAUSES, label=label)) + + +def _addPartOf(g: CompanyGraph, part_id: str, whole_id: str, label: str = "") -> None: + g.addEdge(Edge(source=part_id, target=whole_id, type=EdgeType.PART_OF, label=label)) + + +def _addDerived(g: CompanyGraph, base_id: str, derived_id: str, label: str = "") -> None: + g.addEdge(Edge(source=base_id, target=derived_id, type=EdgeType.DERIVED, label=label)) + + +def _addAnomaly(g: CompanyGraph, source_id: str, target_id: str, label: str = "") -> None: + g.addEdge(Edge(source=source_id, target=target_id, type=EdgeType.ANOMALY, label=label)) + + +# ── 1막: 수익구조 ───────────────────────────────────────── + + +def _buildAct1(g: CompanyGraph, company: Any, bp: str | None) -> None: + """매출 구성 → 부문별 partOf + 성장 causes.""" + try: + from dartlab.analysis.financial.revenue import ( + calcRevenueGrowth, + calcSegmentComposition, + ) + except ImportError: + return + + seg = _safe(calcSegmentComposition, company, basePeriod=bp) + if seg and isinstance(seg, dict): + segments = seg.get("segments") or [] + total_rev = seg.get("totalRevenue") + if total_rev: + rev_id = _addMetricNode(g, "매출액", total_rev, bp or "", "억원") + for s in segments[:10]: + name = s.get("name", "?") + rev = s.get("revenue") + share = s.get("share") + sid = _nid("segment", name) + g.addNode(Node(id=sid, type=NodeType.SEGMENT, label=name, value=rev)) + _addPartOf(g, sid, rev_id, label=f"{share}%" if share else "") + + growth = _safe(calcRevenueGrowth, company, basePeriod=bp) + if growth and isinstance(growth, dict): + yoy = growth.get("yoy") + if isinstance(yoy, dict): + for period, rate in list(yoy.items())[:5]: + gid = _addMetricNode(g, "매출성장률", rate, period, "%") + rev_id = _nid("metric", "매출액", period) + if rev_id in g.nodes: + _addDerived(g, rev_id, gid) + + +# ── 2막: 수익성 ────────────────────────────────────────── + + +def _buildAct2(g: CompanyGraph, company: Any, bp: str | None) -> None: + """마진 추이 → causes 체인.""" + try: + from dartlab.analysis.financial.profitability import calcMarginTrend + except ImportError: + return + + margin = _safe(calcMarginTrend, company, basePeriod=bp) + if not margin or not isinstance(margin, dict): + return + history = margin.get("history") or [] + for row in history[:5]: + period = row.get("period", "") + if not period: + continue + op_margin = row.get("operatingMargin") + net_margin = row.get("netMargin") + gross_margin = row.get("grossMargin") + + if gross_margin is not None: + gm_id = _addMetricNode(g, "매출총이익률", gross_margin, period, "%") + if op_margin is not None: + om_id = _addMetricNode(g, "영업이익률", op_margin, period, "%") + if gross_margin is not None: + _addCauses(g, gm_id, om_id, "매출총이익 → 판관비 차감") + if net_margin is not None: + nm_id = _addMetricNode(g, "순이익률", net_margin, period, "%") + if op_margin is not None: + _addCauses(g, om_id, nm_id, "영업이익 → 영업외/세금 차감") + + +# ── 3막: 현금흐름 ───────────────────────────────────────── + + +def _buildAct3(g: CompanyGraph, company: Any, bp: str | None) -> None: + """현금흐름 → 이익에서 현금 전환 causes.""" + try: + from dartlab.analysis.financial.cashflow import calcCashFlowOverview + except ImportError: + return + + cf = _safe(calcCashFlowOverview, company, basePeriod=bp) + if not cf or not isinstance(cf, dict): + return + for row in (cf.get("history") or [])[:5]: + period = row.get("period", "") + if not period: + continue + ocf = row.get("ocf") + fcf = row.get("fcf") + capex = row.get("capex") + if ocf is not None: + ocf_id = _addMetricNode(g, "영업CF", ocf, period, "억원") + # 2막 순이익 → 3막 OCF + ni_id = _nid("metric", "순이익률", period) + if ni_id in g.nodes: + _addCauses(g, ni_id, ocf_id, "이익 → 현금 전환") + if fcf is not None: + fcf_id = _addMetricNode(g, "FCF", fcf, period, "억원") + if ocf is not None: + _addDerived(g, ocf_id, fcf_id, "OCF - CAPEX") + if capex is not None: + capex_id = _addMetricNode(g, "CAPEX", abs(capex), period, "억원") + if ocf is not None and fcf is not None: + _addCauses(g, capex_id, fcf_id, "CAPEX 차감") + + +# ── 4막: 안정성 ────────────────────────────────────────── + + +def _buildAct4(g: CompanyGraph, company: Any, bp: str | None) -> None: + """부채/Z-Score → 3막 FCF에서 감당 가능한지 causes.""" + try: + from dartlab.analysis.financial.stability import calcDistressScore, calcLeverageTrend + except ImportError: + return + + lev = _safe(calcLeverageTrend, company, basePeriod=bp) + if lev and isinstance(lev, dict): + for row in (lev.get("history") or [])[:5]: + period = row.get("period", "") + dr = row.get("debtRatio") + if period and dr is not None: + dr_id = _addMetricNode(g, "부채비율", dr, period, "%") + # 3막 FCF → 4막 부채 감당 + fcf_id = _nid("metric", "FCF", period) + if fcf_id in g.nodes: + _addCauses(g, fcf_id, dr_id, "FCF로 부채 상환 가능 여부") + + distress = _safe(calcDistressScore, company, basePeriod=bp) + if distress and isinstance(distress, dict): + for row in (distress.get("history") or [])[:5]: + period = row.get("period", "") + zscore = row.get("zScore") + if period and zscore is not None: + zid = _addMetricNode(g, "Z-Score", zscore, period, "") + if zscore < 1.8: + _addAnomaly(g, zid, zid, f"Z-Score {zscore:.1f} < 1.8 위험") + + +# ── 5막: 자본배분 ───────────────────────────────────────── + + +def _buildAct5(g: CompanyGraph, company: Any, bp: str | None) -> None: + """ROIC + 배당 → 가치 창출/파괴 causes.""" + try: + from dartlab.analysis.financial.investmentAnalysis import calcRoicTimeline + except ImportError: + return + + roic = _safe(calcRoicTimeline, company, basePeriod=bp) + if roic and isinstance(roic, dict): + for row in (roic.get("history") or [])[:5]: + period = row.get("period", "") + r = row.get("roic") + wacc = row.get("wacc") + if period and r is not None: + rid = _addMetricNode(g, "ROIC", r, period, "%") + if wacc is not None: + wid = _addMetricNode(g, "WACC", wacc, period, "%") + if r > wacc: + _addCauses(g, rid, wid, f"ROIC {r:.1f}% > WACC {wacc:.1f}% → 가치 창출") + else: + _addAnomaly(g, rid, wid, f"ROIC {r:.1f}% < WACC {wacc:.1f}% → 가치 파괴") + + +# ── 6막 전환 엣지 (막 간 인과) ──────────────────────────── + + +def _buildActTransitions(g: CompanyGraph) -> None: + """6막 간 인과 연결 — 가장 최근 기간에서 대표 엣지 1개씩.""" + # 1→2: 매출 → 마진 + for n in g.findNodes(type=NodeType.METRIC, label="매출액"): + om = g.getNode(_nid("metric", "영업이익률", n.period)) + if om: + _addCauses(g, n.id, om.id, "매출 규모 → 마진 수준") + break + # 4→5: 부채 → ROIC + for n in g.findNodes(type=NodeType.METRIC, label="부채비율"): + roic = g.getNode(_nid("metric", "ROIC", n.period)) + if roic: + _addCauses(g, n.id, roic.id, "자본 구조 → 투자 수익") + break + + +# ── 메인 빌더 ───────────────────────────────────────────── + + +def buildGraph(company: Any, *, basePeriod: str | None = None) -> CompanyGraph: + """Company → CompanyGraph. + + 14축 calc 캐시 재사용 → 빌드 < 2초. + 메모리 < 50MB (dict-of-dicts, Polars 비사용). + """ + stockCode = getattr(company, "stockCode", None) or getattr(company, "ticker", "") or "" + corpName = getattr(company, "corpName", "") or "" + g = CompanyGraph(stockCode=stockCode, corpName=corpName) + + # basePeriod 해석 + bp = basePeriod + if bp is None: + try: + from dartlab.analysis.financial._helpers import resolveBasePeriod + + pr = resolveBasePeriod(company, None, maxYears=5, maxQuarters=8) + bp = pr.basePeriod if pr else None + except _SAFE: + pass + + # 6막 빌드 + _buildAct1(g, company, bp) + _buildAct2(g, company, bp) + _buildAct3(g, company, bp) + _buildAct4(g, company, bp) + _buildAct5(g, company, bp) + _buildActTransitions(g) + + log.debug("buildGraph: %s", g.summary()) + return g diff --git a/src/dartlab/analysis/graph/schema.py b/src/dartlab/analysis/graph/schema.py new file mode 100644 index 0000000000000000000000000000000000000000..44348986f097802ca24e54093d1edd9650f46a33 --- /dev/null +++ b/src/dartlab/analysis/graph/schema.py @@ -0,0 +1,124 @@ +"""그래프 스키마 — 노드/엣지 타입 + CompanyGraph 자료구조. + +외부 의존성 0 — dict-of-dicts (NetworkX 불필요). +메모리 < 50MB 목표 (Company 500MB 위에 얇은 레이어). + +6 노드 타입: + METRIC — 재무 지표 (영업이익률, ROE, 부채비율 등) + ACCOUNT — 계정 (매출액, 영업이익 등) + SEGMENT — 사업부/부문 + PERIOD — 기간 (2024, 2024Q4 등) + EVENT — 이벤트 (사이클 정점, 배당 증가 등) + MACRO — 매크로 지표 (금리, 환율 등) + +5 엣지 타입: + CAUSES — A가 B의 원인 (마진 하락 → 이익 감소) + PART_OF — A가 B의 구성 요소 (DX부문 → 매출) + DERIVED — A에서 B가 계산됨 (매출-원가 → 영업이익) + COMPARES_TO — 같은 레벨 비교 (동종업계 평균 vs 나) + ANOMALY — 이상치 신호 (Z-Score < 1.8, 발생액 비정상) +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + + +class NodeType(str, Enum): + METRIC = "metric" + ACCOUNT = "account" + SEGMENT = "segment" + PERIOD = "period" + EVENT = "event" + MACRO = "macro" + + +class EdgeType(str, Enum): + CAUSES = "causes" + PART_OF = "partOf" + DERIVED = "derived" + COMPARES_TO = "comparesTo" + ANOMALY = "anomaly" + + +@dataclass(frozen=True) +class Node: + id: str # unique — "metric:영업이익률:2024" 형태 + type: NodeType + label: str # 사람이 읽는 이름 + value: Any = None # 수치 또는 문자열 + period: str = "" # 기간 + unit: str = "" # %, 조원, 배 등 + meta: dict = field(default_factory=dict) + + +@dataclass(frozen=True) +class Edge: + source: str # node id + target: str # node id + type: EdgeType + weight: float = 1.0 # 연결 강도 (정규화 가능) + label: str = "" # "매출 51% 차지" 같은 설명 + + +class CompanyGraph: + """기업 재무 인과 그래프. + + dict-of-dicts 구조. NetworkX 없이 순수 Python. + nodes: {id: Node} + edges: {source_id: {target_id: Edge}} + reverse: {target_id: {source_id: Edge}} ← 역방향 (causes 추적용) + """ + + def __init__(self, stockCode: str = "", corpName: str = "") -> None: + self.stockCode = stockCode + self.corpName = corpName + self.nodes: dict[str, Node] = {} + self.edges: dict[str, dict[str, Edge]] = {} # forward + self.reverse: dict[str, dict[str, Edge]] = {} # backward + + def addNode(self, node: Node) -> None: + self.nodes[node.id] = node + + def addEdge(self, edge: Edge) -> None: + # forward + if edge.source not in self.edges: + self.edges[edge.source] = {} + self.edges[edge.source][edge.target] = edge + # reverse (backward traversal용) + if edge.target not in self.reverse: + self.reverse[edge.target] = {} + self.reverse[edge.target][edge.source] = edge + + def getNode(self, node_id: str) -> Node | None: + return self.nodes.get(node_id) + + def outgoing(self, node_id: str) -> list[Edge]: + """node_id에서 나가는 엣지들.""" + return list(self.edges.get(node_id, {}).values()) + + def incoming(self, node_id: str) -> list[Edge]: + """node_id로 들어오는 엣지들 (역방향).""" + return list(self.reverse.get(node_id, {}).values()) + + def findNodes(self, *, type: NodeType | None = None, label: str = "") -> list[Node]: + """조건 매칭 노드 검색. label은 부분 매칭.""" + results = [] + for n in self.nodes.values(): + if type and n.type != type: + continue + if label and label.lower() not in n.label.lower(): + continue + results.append(n) + return results + + def __len__(self) -> int: + return len(self.nodes) + + def edgeCount(self) -> int: + return sum(len(targets) for targets in self.edges.values()) + + def summary(self) -> str: + return f"CompanyGraph({self.corpName}): {len(self)} nodes, {self.edgeCount()} edges" diff --git a/src/dartlab/analysis/graph/traverse.py b/src/dartlab/analysis/graph/traverse.py new file mode 100644 index 0000000000000000000000000000000000000000..08f585a9900686c2484202c812b18ff1a215bf4a --- /dev/null +++ b/src/dartlab/analysis/graph/traverse.py @@ -0,0 +1,168 @@ +"""그래프 탐색 쿼리 — causes/ancestors/timeline/related. + +인과 질문 ("왜 마진이 떨어졌나") → 그래프 traversal → 텍스트 결과. +모든 주장이 노드 ID로 추적 가능 → 환각 0. +""" + +from __future__ import annotations + +from dartlab.analysis.graph.schema import CompanyGraph, Edge, EdgeType, Node + + +def causes( + graph: CompanyGraph, + label: str, + *, + max_depth: int = 3, +) -> list[tuple[Node, Edge, int]]: + """label 매칭 노드의 원인 트리 (역방향 BFS). + + Returns: + [(원인노드, 엣지, depth), ...] — depth 1이 직접 원인. + """ + targets = graph.findNodes(label=label) + if not targets: + return [] + + results: list[tuple[Node, Edge, int]] = [] + visited: set[str] = set() + queue: list[tuple[str, int]] = [(t.id, 0) for t in targets] + + while queue: + nid, depth = queue.pop(0) + if depth >= max_depth: + continue + for edge in graph.incoming(nid): + if edge.source in visited: + continue + visited.add(edge.source) + source_node = graph.getNode(edge.source) + if source_node: + results.append((source_node, edge, depth + 1)) + queue.append((edge.source, depth + 1)) + + return results + + +def ancestors( + graph: CompanyGraph, + label: str, + *, + max_depth: int = 5, +) -> list[Node]: + """label 매칭 노드의 조상 체인 (PART_OF 엣지만).""" + targets = graph.findNodes(label=label) + if not targets: + return [] + + result: list[Node] = [] + visited: set[str] = set() + queue = [t.id for t in targets] + + while queue: + nid = queue.pop(0) + if len(result) >= max_depth: + break + for edge in graph.incoming(nid): + if edge.type != EdgeType.PART_OF: + continue + if edge.source in visited: + continue + visited.add(edge.source) + node = graph.getNode(edge.source) + if node: + result.append(node) + queue.append(edge.source) + + return result + + +def timeline( + graph: CompanyGraph, + label: str, +) -> list[Node]: + """같은 label의 노드를 기간순 정렬.""" + nodes = graph.findNodes(label=label) + return sorted(nodes, key=lambda n: n.period) + + +def related( + graph: CompanyGraph, + label: str, + *, + edge_type: EdgeType | None = None, +) -> list[tuple[Node, Edge]]: + """label 매칭 노드의 연결된 노드 (forward + backward).""" + targets = graph.findNodes(label=label) + if not targets: + return [] + + results: list[tuple[Node, Edge]] = [] + seen: set[str] = set() + + for t in targets: + for edge in graph.outgoing(t.id): + if edge_type and edge.type != edge_type: + continue + if edge.target not in seen: + node = graph.getNode(edge.target) + if node: + results.append((node, edge)) + seen.add(edge.target) + for edge in graph.incoming(t.id): + if edge_type and edge.type != edge_type: + continue + if edge.source not in seen: + node = graph.getNode(edge.source) + if node: + results.append((node, edge)) + seen.add(edge.source) + + return results + + +# ── 서사 생성 ───────────────────────────────────────────── + + +def causesNarrative(graph: CompanyGraph, label: str) -> str: + """causes() 결과를 자연어 서사로 변환. + + 환각 0 보장: 모든 문장이 노드 value + edge label에서 생성됨. + """ + chain = causes(graph, label) + if not chain: + return f"'{label}'에 대한 원인 관계를 찾을 수 없습니다." + + lines = [f"### {label} 원인 분석"] + for node, edge, depth in chain: + indent = " " * depth + val = f" = {node.value}{node.unit}" if node.value is not None else "" + period = f" ({node.period})" if node.period else "" + edge_label = f" — {edge.label}" if edge.label else "" + lines.append(f"{indent}← {node.label}{val}{period}{edge_label}") + + return "\n".join(lines) + + +def timelineNarrative(graph: CompanyGraph, label: str) -> str: + """timeline() 결과를 추이 서사로.""" + nodes = timeline(graph, label) + if not nodes: + return f"'{label}' 시계열 데이터 없음." + + lines = [f"### {label} 추이"] + for n in nodes: + val = f"{n.value}{n.unit}" if n.value is not None else "N/A" + lines.append(f"- {n.period}: {val}") + + # 방향 판단 + vals = [n.value for n in nodes if isinstance(n.value, (int, float))] + if len(vals) >= 2: + if vals[-1] > vals[0]: + lines.append(f"\n→ **상승 추세** ({vals[0]} → {vals[-1]})") + elif vals[-1] < vals[0]: + lines.append(f"\n→ **하락 추세** ({vals[0]} → {vals[-1]})") + else: + lines.append("\n→ **보합**") + + return "\n".join(lines) diff --git a/src/dartlab/analysis/qa/2026-03-29_hyundai.md b/src/dartlab/analysis/qa/2026-03-29_hyundai.md new file mode 100644 index 0000000000000000000000000000000000000000..90615b6ee72cebba08e33df495db2e0b0e23d661 --- /dev/null +++ b/src/dartlab/analysis/qa/2026-03-29_hyundai.md @@ -0,0 +1,164 @@ +# 현대자동차 (005380) 스토리 분석 -- 2026-03-29 + +> dartlab.analysis() 14축 전체를 사용한 스토리 기반 재무제표 분석. + +--- + +## 이 회사는 어떤 회사인가 (1부: 재무제표 읽기) + +### 무엇으로 돈을 버는가 (1-1 수익구조) + +자동차, 자동차부품, 차량정비. 2025년 매출 46.8조(별도 기준). + +부문별로 보면 차량 23.3조(2025), 금융 3.1조. 차량이 전체 성장의 92%를 차지한다. 금융은 8% 기여. 2022년 대비 차량 부문이 11.4조 → 23.3조로 2배 늘었다. + +그런데 **매출총이익률이 악화 중**이다: 20.2% → 18.9% → 17.7% → 16.8%. 4년 연속 하락. grossMarginDirection="악화". 매출은 늘고 있는데 남기는 돈의 비율은 줄고 있다. + +3년 CAGR 1.79%. 최근 1년은 사실상 성장 정체(+0.46%). + +### 돈을 어디서 조달하는가 (1-2 자금조달) + +총자산 368.8조. 자기자본 127.6조(35%). **금융부채 비중 44.7%** -- 자산의 거의 절반을 빚으로 조달한다. 내부유보 27.5%에 불과. + +삼성전자가 내부유보 71%, 금융부채 4%인 것과 정반대. 현대차는 **차입으로 돌아가는 회사**다. + +부채 총계: 241.2조(부채비율 189%). 순차입금 146.6조(순차입금비율 115%). 금융부채 165조 중 사채 106.9조가 가장 크고, 단기차입 33.8조, 장기차입 24.3조. 부채의 핵심은 사채 -- 자동차 할부금융을 사채로 조달하는 구조. + +여기서 중요한 발견: **interestBurden 이자보상배율 11.0배(우수)**, **liquidity 유동비율 136%(보통), 현금비율 21%**. 부채비율 189%인데 이자보상배율이 11배라? 자금조달의 interestBurden에서는 11.0배인데 안정성의 coverageTrend에서는 9.78배로 다르다. 소스가 다를 수 있다. + +distressIndicators: Altman Z 1.12(부실 위험), Piotroski F 1/9(재무 약화). **Piotroski 1점은 심각**하다. 9개 재무 건전성 항목 중 1개만 통과. + +capitalFlags: Altman Z 부실 경계, Piotroski F 재무 약화, 금융부채 비중 68%. + +### 조달한 돈으로 뭘 준비했는가 (1-3 자산구조) + +총자산 368.8조 중 영업자산 88.6조(24%), 비영업자산 18.4조(5%), **나머지 71%가 기타**. 이 71%의 정체가 뭔가 -- 자동차 할부채권/리스자산이 여기에 잡힐 가능성이 크다. 자동차 금융이 자산의 대부분을 차지하는 것. + +운전자본: 매출채권 8.6조(+45.6%!), 재고 20.7조(+4.4%). **매출채권이 1년 만에 45% 급증**. 매출은 +0.46%인데 채권이 45% 늘었다. 뒤에서 재무정합성 anomalyScore가 이걸 잡아낸다. + +CCC=145일. DSO 67일, DIO 193일, DPO 115일. 자동차 제조업 특성상 재고일수가 높고, 매입채무 활용(115일)으로 CCC를 줄이고 있다. + +CAPEX 3.8조. depreciation 0(데이터 갭). + +### 실제로 현금은 어떻게 흘렀는가 (1-4 현금흐름) + +**영업CF가 -2.1조(적자)**. 자동차를 팔아서 현금이 나오는 게 아니라 빠져나가고 있다. 순이익은 1.2조 흑자인데 영업CF가 마이너스 -- OCF/순이익 = -176.6%. 이건 운전자본 변동(매출채권 급증 등)이 현금을 잡아먹고 있다는 뜻. + +FCF=-5.9조. 영업에서 이미 적자인데 투자까지 -3.9조. 합치면 6조 적자. 이 돈은 어디서? → 차입. 재무CF가 메우고 있다. + +cashFlowFlags: "영업CF 적자 -- 본업에서 현금이 나오지 않음", "영업CF 마진 -4.5%" + +이건 1-2 자금조달에서 본 "금융부채 비중 45%"와 연결된다. 벌어서 못 채우니 빌려서 메우는 구조. + +--- + +## 얼마나 잘 벌고 있는가 (2부: 비율 분석) + +### 수익성 (2-1) + +waterfall: 매출 100% → 원가 -83.3% → 매출총이익 16.8% → 판관비 -13.1% → 영업이익 3.6%. + +**원가율 83.3%가 핵심 문제**다. costStructureFlags가 "매출원가율 3년 연속 상승 (79.8% -> 83.3%)"을 잡아냈다. 매출 100원 중 83원이 원가. 남는 17원에서 판관비 13원을 빼면 영업이익 3.6원. + +영업률 3기 연속 하락: 5.42% → 4.58% → 3.62%. profitabilityFlags 발동. + +ROE 0.93%. 듀퐁 분해: 영업마진 3.62% x 자산회전율 0.13 x 레버리지 2.89 x 세금부담 0.71 x 이자부담 0.98. 레버리지 2.89배를 써서도 ROE가 0.93%밖에 안 된다. **수익성 자체가 무너져서 레버리지를 써도 안 되는 상태**. + +### 성장성 (2-2) + +CAGR: 매출 9.88%, 영업이익 6.21%, 순이익 -8.98%(역성장). growthQuality="균형"이라고 나오지만, **순이익 CAGR이 마이너스**인데 "균형"은 맞지 않다. + +영업레버리지: -86.8배(2025). 매출 +0.46%에 영업이익 -39.9%. 매출이 조금만 움직여도 이익이 극단적으로 반응하는 BEP 근처 상태. + +SGR=0.41% vs 실제성장=0.46%. gap 0.05%p. 성장이 정체되면서 SGR과 실제성장이 일치한다. 역설적으로 "지속가능한" 상태이긴 한데, **수익률이 1% 미만인 상태에서의 지속가능은 의미가 없다**. + +### 안정성 (2-3) + +부채비율 189%. 순부채비율 115%. 부채 3기 연속 증가(+10%). Altman Z''=1.63(회색 구간). distressEnsemble에서 Altman Z 1.12(위험), Springate 0.49(위험), Ohlson 0.5%(안전), Zmijewski(안전). **모델마다 판단이 다르다** -- 자동차 금융 부채를 어떻게 보느냐에 따라 달라진다. + +부채 만기: 단기 33.8조(20.5%), 장기 24.3조, 사채 106.9조. 단기 비중이 낮은 편이라 급한 상환 압력은 덜하다. + +### 효율성 (2-4) + +총자산회전율 0.13회(3기 연속 하락). 368조 자산에서 47조 매출. 자산 대부분이 금융자산(할부채권/리스)이라 회전율이 낮게 나오는 구조적 한계가 있다. + +CCC=145일에서 29일 악화. efficiencyFlags가 이걸 잡았다. + +--- + +## 이익의 질과 자본 활용 (3부: 심화) + +### 이익품질 (3-1) + +**IS-CF 괴리가 심각하다**. 순이익 1.2조 vs OCF -2.1조. divergence=276.6%, direction="이익과대". 이익은 흑자인데 현금은 적자. Sloan Accrual Ratio +0.89%(양수 = 발생액 > 현금 = 이익과대 방향). + +**IS-BS 괴리**: 매출 +0.46% vs 매출채권 +45.6%. revRecGap=45.1%p. anomalyScore=55.5(삼성 15.6 대비 3.5배). 이건 공격적 매출 인식 또는 채권 회수 지연을 의미. Beneish M-Score -1.93으로 조작 임계(-1.78)에 근접. + +매출은 안 늘었는데 채권만 45% 늘었다 → 1-4에서 본 영업CF 적자의 직접 원인. **스토리가 하나로 연결된다**: 채권 급증 → 운전자본 악화 → 영업CF 적자 → 차입 의존 → 부채 증가. + +### 비용구조 (3-2) + +원가율 83.3%(3년 연속 상승). DOL=-20(BEP 근처). 손익분기 매출 추정 36.7조 vs 현재 46.8조. 안전마진 21.6%. 삼성전자(45.4%)의 절반. **매출이 22% 줄면 적자 전환**하는 구조. + +### 자본배분 (3-3) + +배당 6583억, 성향 55.6%. **이익의 절반 이상을 배당으로 지급**. FCF가 -5.9조인데 배당 6583억을 하고 있다. 이 돈은 전액 차입으로 조달. + +FCF 사용: FCF -5.9조, 배당 -6583억, 잔여 -6.6조. 모든 항목이 마이너스. 벌어서 쓰는 게 아니라 빌려서 쓰고 있다. + +유보율 44.4%. 삼성전자(87.5%) 대비 절반. 이익이 줄어드는데 배당은 오히려 +24% 늘렸다. + +### 투자효율 (3-4) + +ROIC=0.44%. NOPAT 1.2조 / 투하자본 274.3조. **274조를 투자해서 1.2조를 번다**. 3년 연속 저수익 플래그. WACC(추정 6-8%) 대비 가치파괴가 확실하다. + +### 재무정합성 (3-5) + +anomalyScore 55.5. 구성: IS-CF 괴리 30점, 매출채권 갭 18점, Beneish 5점. **재무제표 정합성에 경고 신호**가 있다. + +유효세율 28.9%(법정 24% 대비 +4.9%p). 이연법인세부채 > 이연법인세자산. 미래 세금 부담이 더 큰 상태. + +--- + +## 종합 판단 + +현대자동차는 **외형은 크지만 속이 비어가는 회사**다. + +1. **원가 악화가 모든 것을 설명**: 원가율 79.8%→83.3% 상승이 영업률 5.42%→3.62% 하락으로, ROE 0.93%로, ROIC 0.44%로 이어진다. +2. **현금이 안 돈다**: 영업CF 적자, FCF -5.9조. 매출채권 +45% 급증이 현금을 잡아먹는다. +3. **차입으로 버틴다**: 금융부채 165조, 순차입금 147조. 배당까지 차입으로 지급. +4. **Piotroski 1/9**: 9개 재무 건전성 항목 중 1개만 통과. 이건 단순한 경고가 아니라 구조적 약화 신호. + +스토리 요약: "매출은 정체, 원가는 올라가고, 현금은 안 돌고, 빚으로 배당한다. 전기차 전환 비용이 수익성을 잡아먹는 전형적인 레거시 자동차 회사의 전환기 고통." + +핵심 질문: 원가율 상승이 일시적(전기차 초기 비용)인가, 구조적(경쟁력 약화)인가. 전자라면 견딜 수 있고, 후자라면 위험하다. + +--- + +## 엔진 이슈 + +### 잘 동작한 것 +- 1부→2부→3부 스토리 연결이 강하다: 채권급증(1-3) → 영업CF적자(1-4) → IS-CF괴리(3-1) → anomalyScore 55.5(3-5) +- capitalFlags에서 Piotroski 1/9을 바로 보여줌 +- efficiencyFlags에서 CCC 29일 악화를 잡아냄 +- cashFlowFlags에서 영업CF 적자를 명시 + +### 문제점 + +| 항목 | 문제 | 심각도 | +|------|------|--------| +| growthQuality="균형" | 순이익 CAGR -8.98%인데 "균형" 판정은 오류 | 높 | +| interestBurden 불일치 | 자금조달에서 11.0배 vs 안정성에서 9.78배 | 중 | +| segmentComposition=None | 부문별 영업이익 비중 없음 | 중 | +| otherAssetsPct 71% | 자동차 금융자산이 "기타"로 빠져 자산 구조 해석 불가 | 높 | +| depreciation=0 | 감가상각 데이터 누락으로 CAPEX/감가비율 못 구함 | 중 | +| 영업CF 적자 원인 | 운전자본 변동 분해가 없어서 "왜 적자인지" 추정만 가능 | 높 | +| debtRepaid=0 | FCF usage에서 부채상환이 0인데, 실제로는 사채 상환이 있을 것 | 중 | + +### 가장 큰 아쉬움 +**영업CF 적자의 원인 분해**가 없다. OCF = 순이익 + 비현금비용 +/- 운전자본 변동인데, 현재 analysis는 OCF 총액만 보여주고 "운전자본이 얼마를 잡아먹었는지"를 안 보여준다. 현대차처럼 이익 흑자+영업CF 적자인 경우에 이게 가장 중요한 정보다. + +--- + +*분석일: 2026-03-29* +*dartlab.analysis() 14축 사용* diff --git a/src/dartlab/analysis/qa/2026-03-29_naver.md b/src/dartlab/analysis/qa/2026-03-29_naver.md new file mode 100644 index 0000000000000000000000000000000000000000..8ed6d8eb61d47c2f5d38430583e626c1da6e7dbf --- /dev/null +++ b/src/dartlab/analysis/qa/2026-03-29_naver.md @@ -0,0 +1,154 @@ +# NAVER (035420) 스토리 분석 -- 2026-03-29 + +> dartlab.analysis() 14축 전체를 사용한 스토리 기반 재무제표 분석. + +--- + +## 이 회사는 어떤 회사인가 (1부: 재무제표 읽기) + +### 무엇으로 돈을 버는가 (1-1 수익구조) + +포털 서비스 및 온라인 광고. 2025년 매출 3.2조. 부문 데이터는 2023년만 있음 -- 핀테크 2547억, 클라우드 1733억, 콘텐츠 1355억. growthContribution=None으로 부문별 성장 기여도 산출 불가. + +revenueQuality도 None. **IT 플랫폼은 매출원가/매출총이익이 별도로 잡히지 않아 매출 품질 지표가 빠진다.** + +3년 CAGR 4.66%. + +### 돈을 어디서 조달하는가 (1-2 자금조달) + +총자산 41.1조. 자기자본 29.0조(70%). **내부유보 비중 67.2%**, 금융부채 3.4%. 삼성전자(내부유보 71%, 금융부채 4%)와 비슷한 자기자본 중심 구조. + +이익잉여금 27.6조가 자본의 95%. capitalFlags: "내부유보 비중 95% -- 자기 힘으로 성장". + +부채 12.1조인데 영업부채 10.7조, 금융부채 1.4조. **금융부채가 거의 없다**. 순현금: 현금 6.0조 - 차입금 1.4조 = 순현금 4.6조. + +distressIndicators: Altman Z 5.52(안전), Ohlson 0.1%. 그런데 Springate 0.55(부실 위험). Springate가 위험으로 나오는 이유는 매출/자산 비율이 0.08로 극도로 낮기 때문(플랫폼 비즈니스 특성). + +### 조달한 돈으로 뭘 준비했는가 (1-3 자산구조) + +총자산 41.1조 중 영업자산 8.8조(21%), 비영업자산(현금+투자) 6.0조(15%), **기타 64%**. 기타의 정체: 관계기업투자(LINE, 웹툰 등 해외자회사 지분), 장기금융자산 등. + +유형자산 3.6조(8.8%) -- 데이터센터. 무형자산 3.4조(8.3%) -- 소프트웨어/영업권. 유형과 무형이 거의 같다. + +운전자본: 매출채권 4456억(DSO 51일). 재고 202억(사실상 없음). CCC=None -- 플랫폼 비즈니스는 운전자본 사이클이 의미 없다. + +CAPEX 5667억. CAPEX/매출 17.7%. **매출의 18%를 CAPEX에 쓴다** -- 데이터센터+클라우드 인프라 투자. + +### 실제로 현금은 어떻게 흘렀는가 (1-4 현금흐름) + +영업CF 9445억, FCF 3778억. OCF/순이익 = **579%**. 순이익 1630억 대비 영업CF가 5.8배. 이건 감가상각+비현금비용이 크다는 뜻이기도 하지만, **순이익이 영업외 손실로 크게 깎였기 때문**이기도 하다. + +영업CF 마진 29.6%. 매출의 30%가 현금으로 들어온다. 현금 창출력은 우수. + +--- + +## 얼마나 잘 벌고 있는가 (2부: 비율 분석) + +### 수익성 (2-1) + +**여기서 NAVER의 핵심 스토리가 나온다.** + +영업률 19.1%. 순이익률 5.1%. **14%p 괴리**. + +waterfall: 매출 100% → 영업이익 19.1% → 금융비용 -6.8% → 금융수익 +10.1% → 세전이익 9.4% → 법인세 -4.3% → 순이익 5.1%. + +매출원가/판관비가 잡히지 않아 매출→영업이익 구간은 블랙박스다. 영업이익 이후를 보면: 금융비용 2171억(-6.8%), 금융수익 3213억(+10.1%). 금융 순이익은 +1042억으로 플러스인데, 세전이익이 영업이익보다 낮다? → **영업이익 6106억에서 세전이익 2995억으로 3111억이 사라졌다**. 이게 earningsPersistence의 nonOperatingIncome=-3111억, nonOpRatio=104%. + +영업외 손실이 영업이익의 104%다. waterfall에 안 보이는 항목(지분법 손실, 자산손상 등)이 3111억을 잡아먹고 있다. + +듀퐁 분해: ROE 0.56%. 영업마진 19.1% x 자산회전율 0.08 x 레버리지 1.42 x 세금부담 0.54 x **이자부담 0.49**. interestBurden=0.49 = 영업이익의 51%가 영업외에서 사라진다. 이게 ROE를 0.56%로 끌어내리는 주범. + +### 성장성 (2-2) + +CAGR: 매출 16.1%, 영업이익 13.5%, 순이익 -15.8%. growthQuality="균형"인데 **순이익 CAGR이 -15.8%**. 현대차와 같은 문제 -- 순이익이 역성장인데 "균형"은 맞지 않다. + +SGR=0.56% vs 실제성장=10.7%. gap 10.2%p. ROE가 0.56%이니 SGR도 0.56%. payoutRatio=None(배당 데이터 누락). + +### 안정성 (2-3) + +부채비율 42%(매우 안정). 순부채비율 -15.9%(순현금). **그런데 이자보상배율이 2.8배**. 부채가 적은데 이자보상이 낮은 이유: interestExpenseSource="금융비용"(2171억)인데 여기에 FX손실/파생상품 손실이 포함되어 있어 순수 이자비용보다 크게 잡힌다. stabilityFlags가 "이자보상배율 2.8배 -- 이자 부담 과다"를 띄우는데, **이건 오진**이다. + +### 효율성 (2-4) + +총자산회전율 0.08회. CCC=None. DSO/DIO/DPO 모두 None. 플랫폼 비즈니스에서 효율성 분석은 사실상 무의미. + +--- + +## 이익의 질과 자본 활용 (3부: 심화) + +### 이익품질 (3-1) + +Sloan Accrual Ratio -1.9%(보수적). Beneish M-Score -2.60(조작 가능성 매우 낮음). 이익 자체는 양호. + +**earningsQualityFlags: "영업외손익 비중 104% -- 일회성 이익 의존"**. 이건 정확히 맞다. 다만 "일회성 이익 의존"이라기보다 "영업외 손실이 영업이익을 상쇄"라고 표현해야 더 정확하다. + +IS-CF 괴리: divergence=-479%(보수적 방향). 순이익 1630억 vs OCF 9445억. 영업외 손실이 순이익을 깎았지만 현금은 제대로 들어오고 있다는 뜻. + +anomalyScore=31.9. 구성: IS-CF 괴리 30점. 매출채권/재고 갭은 정상. + +### 비용구조 (3-2) + +**costBreakdown이 전부 0**. costOfSales=0, sga=0. NAVER는 IS에서 매출원가/판관비를 별도 계정으로 보고하지 않는 구조(또는 데이터 매핑 누락). 따라서 비용구조 분석이 완전히 무력화됨. + +breakevenEstimate도 null. 비용구조 분석 전체가 NAVER에서 동작하지 않는다. + +### 재무정합성 (3-5) + +유효세율 45.6%(법정 24% 대비 +21.6%p). **세율이 법정의 거의 2배**. 이연법인세부채 8545억 > 이연법인세자산 3097억. 해외자회사 관련 세금 또는 지분법 손실의 세금 효과일 수 있다. + +이건 중요한 스토리: 영업이익의 104%가 영업외에서 사라지고, 남은 이익에 45.6% 세금이 붙어서 최종 ROE가 0.56%. + +### 자본배분 (3-3) + +배당 0원. 자사주 매입 0원. totalReturn=0. FCF 3778억 전액이 잔여. **NAVER는 주주환원을 하지 않는다**(별도 기준). CAPEX/매출 17.7%로 데이터센터 투자에 집중. + +### 투자효율 (3-4) + +ROIC=1.36%. NOPAT 3323억 / 투하자본 24.4조. 유효세율 45.6%가 NOPAT을 크게 깎는다. 3년 연속 저수익 플래그. + +무형자산비율 8.3%, 유형자산비율 8.8%. 자산의 대부분은 투자자산(관계기업 지분 등)이라 ROIC의 분모가 부풀어 있다. + +--- + +## 종합 판단 + +NAVER는 **영업은 훌륭하지만 영업 밖에서 가치가 파괴되는 회사**다. + +1. **긍정**: 영업률 19%, OCF 마진 30%, 순현금 4.6조, 내부유보 95%. 본업의 현금 창출력은 강하다. +2. **문제**: 영업외 손실 3111억이 영업이익 6106억의 51%를 상쇄. 유효세율 45.6%. 최종 ROE 0.56%. +3. **핵심 질문**: 해외자회사(LINE, 웹툰) 투자가 결국 가치를 만들어낼 것인가. 현재는 지분법 손실+FX 손실로 가치를 파괴하고 있다. + +스토리 요약: "국내 플랫폼 독점으로 영업률 19%를 유지하지만, 글로벌 확장(LINE, 웹툰)의 비용이 이익을 거의 전부 잡아먹고 있다. 현금은 잘 벌지만 주주에게 돌아오는 게 없다." + +--- + +## 엔진 이슈 + +### 잘 동작한 것 +- earningsPersistence의 nonOpRatio=104%가 핵심 스토리를 정확히 짚어냄 +- earningsQualityFlags "영업외손익 비중 104%"가 발동 +- OCF/순이익 579%가 "이익은 적지만 현금은 잘 번다"를 드러냄 +- capitalFlags "내부유보 95%"가 자금 구조를 한줄로 요약 + +### 문제점 + +| 항목 | 문제 | 심각도 | +|------|------|--------| +| costBreakdown 전부 0 | NAVER IS에서 원가/판관비 매핑 안 됨 → 비용구조 분석 전멸 | 치명 | +| marginTrend cogs=None, grossMargin=None | 매출총이익률 시계열 불가 | 높 | +| waterfall 매출원가 단계 누락 | 매출→영업이익 블랙박스 | 높 | +| growthQuality="균형" | 순이익 CAGR -15.8%인데 "균형" | 높 | +| revenueQuality=None | 매출 품질 지표 전부 누락 | 중 | +| segmentTrend 2023년만 | 시계열 불가 | 중 | +| 이자보상배율 2.8배 "이자 부담 과다" | 금융비용에 FX/파생 포함 → 오진 | 중 | +| payoutRatio=None, dividendsPaid=0 | CF에서 배당 누락 (별도 기준이라 연결과 다를 수 있음) | 소 | +| effectiveTaxRate 45.6% | 왜 높은지 설명이 없음 → 스토리가 끊김 | 중 | +| nonOpRatio 표현 | "일회성 이익 의존"이라 했는데 실제로는 "영업외 손실이 이익 상쇄" | 소 | + +### 가장 큰 발견 +**IT 플랫폼은 비용구조 분석이 완전히 깨진다.** costOfSales=0, sga=0, grossProfit=None. 이건 NAVER뿐 아니라 카카오, 넷마블 등 모든 IT 기업에서 같을 것이다. IS 구조가 제조업과 다르기 때문. 단순히 "매출원가"를 못 찾는 게 아니라, IT 기업은 "플랫폼비용"이나 "인건비"가 원가 역할을 하는데 이걸 매핑하는 로직이 없다. + +--- + +*분석일: 2026-03-29* +*dartlab.analysis() 14축 사용* diff --git a/src/dartlab/analysis/qa/2026-03-29_samsung.md b/src/dartlab/analysis/qa/2026-03-29_samsung.md new file mode 100644 index 0000000000000000000000000000000000000000..ea146b979ed1ec1c6cbe40bc9fc4a068d9750459 --- /dev/null +++ b/src/dartlab/analysis/qa/2026-03-29_samsung.md @@ -0,0 +1,169 @@ +# 삼성전자 (005930) 스토리 분석 -- 2026-03-29 + +> dartlab.analysis() 14축 전체를 사용한 스토리 기반 재무제표 분석. +> 데이터: DART 공시 재무제표 (2021~2025, 연간/분기) + +--- + +## 이 회사는 어떤 회사인가 (1부: 재무제표 읽기) + +### 무엇으로 돈을 버는가 (1-1 수익구조) + +삼성전자는 반도체, 스마트폰, 디스플레이, 가전을 만드는 회사다. 2025년 매출 93.8조. + +부문별로 보면 DX(스마트폰+가전) 36.4조, DS(반도체) 11.1조(2024), SDC(디스플레이) 2.9조, Harman 1.4조다. 2025년에는 사업부 재편으로 DX만 잡히고 나머지는 빠져 있지만, 과거 추이를 보면 **반도체(DS)가 수익의 핵심 엔진**이다. DS가 2022년 9.8조 → 2023년 6.7조 → 2024년 11.1조로 사이클을 그대로 탄다. + +매출총이익률 추이: 47.2%(최신) ← 38.9% ← 34.2% ← 35.6%. **바닥(2023)에서 13%p 개선**. 사이클 회복이 마진을 끌어올리고 있다. + +성장 기여도를 보면 DX 부문이 2022→2025 성장의 100%를 차지한다. 이건 DS가 2025년 수치가 빠져서 생긴 데이터 한계이지만, **DX 자체도 36.4조로 2배 가까이 성장**한 것은 사실이다. + +revenueQuality: 매출총이익률 방향 "개선". 3년 CAGR 5.84%. 분기별 매출은 2016Q1 49.8조에서 2025Q4 93.8조까지 꾸준한 우상향. + +### 돈을 어디서 조달하는가 (1-2 자금조달) + +총자산 566.9조. 자기자본 436.3조(77%). **내부유보 비중 92%** -- 이익잉여금 402.1조가 자본의 대부분이다. 외부 자본(유상증자, 차입)에 의존하지 않고 벌어서 쌓은 돈으로 운영하는 회사. + +금융부채 추이가 눈에 띈다: 2021년 4.7조 → 2022년 5.2조 → 2023년 11.4조 → 2024년 17.1조 → 2025년 24.1조. **4년 만에 5배**. 반도체 다운사이클에서도 투자를 멈추지 않았고, 2023~2024년 이익이 줄어든 구간에서 차입으로 투자를 지속한 흔적이다. + +그럼에도 순현금 포지션: 현금 57.9조 - 차입금 24.1조 = **순현금 33.8조**. 차입이 늘었지만 현금이 더 많다. + +cashFlowStructure: 2025년 영업CF 28.8조, 투자CF -31.0조, 재무CF +5.5조. 패턴="확장형 -- 영업 + 외부 조달로 적극 투자". **영업으로 버는 것보다 더 많이 투자**하고, 부족분을 차입으로 메운다. HBM/파운드리 설비투자가 본격화된 시기. + +부실 지표: Altman Z 7.97(안전), Ohlson 부실확률 0.0%, Piotroski F 7/9(재무 건전). 부실과는 거리가 멀다. + +### 조달한 돈으로 뭘 준비했는가 (1-3 자산구조) + +총자산 566.9조 중 영업자산 355.5조(62.7%), 비영업자산(현금+투자) 57.9조(10.2%). + +유형자산 215.3조(38%). 반도체 팹, 디스플레이 라인 등 장치산업의 전형. 무형자산 29.5조(5.2%)는 상대적으로 작다. + +운전자본: 매출채권 51.1조(+17.2%), 재고 52.6조(+1.7%), 매입채무 13.0조. **CCC=490일**. 매출채권 회수 199일 + 재고 보유 388일 - 매입채무 지급 96일. 반도체 장치산업 특성상 높지만, 490일은 "돈이 1.3년간 묶여 있다"는 뜻이다. + +CAPEX 12.8조. CAPEX/매출 13.6%. depreciation 데이터가 빠져(0) CAPEX/감가 비율은 산출 불가 -- 이건 데이터 갭. + +플래그: "CCC 490일 -- 현금 회수 매우 느림" + +### 실제로 현금은 어떻게 흘렀는가 (1-4 현금흐름) + +| 구분 | 2025 | 2024 | 2023 | 2022 | 2021 | +|------|------|------|------|------|------| +| 영업CF | 28.8조 | 22.0조 | 11.7조 | 16.1조 | 24.4조 | +| FCF | 16.0조 | 6.9조 | -4.9조 | -18.5조 | 9.2조 | + +2022~2023년 FCF 음수 -- 반도체 다운사이클에서 이익은 급감했는데 투자는 계속한 구간. 2024년부터 FCF가 다시 양수로 전환, 2025년 16조로 확대. + +현금 품질: OCF/순이익=146.6%. 순이익보다 영업CF가 1.5배 큰 것은 **감가상각 등 비현금 비용이 크다**는 뜻. 장치산업의 전형적 패턴이고, 이익 품질 관점에서 양호하다. + +--- + +## 얼마나 잘 벌고 있는가 (2부: 비율 분석) + +### 수익성 (2-1) + +2025년 영업률 21.4%(2024년 10.9%, 2023년 6.5% 바닥에서 회복). 순이익률 20.9%. 영업률과 순이익률이 거의 같다 -- 영업외 손익이 중립. 이건 NAVER(영업률 19% vs 순이익률 5%)와 정반대. + +waterfall: 매출 100% → 원가 -52.8% → 매출총이익 47.2% → 판관비 -25.8% → 영업이익 21.4% → 금융순이익 +0.5% → 세전 22.4% → 법인세 -1.5% → 순이익 20.9%. + +판관비가 25.8%로 상당히 크다. R&D 비용이 여기에 포함되어 있을 가능성. + +ROE 4.5%. 자기자본이 436조로 워낙 커서 ROE가 낮다. 듀퐁 분해: 영업마진 21.4% x 자산회전율 0.17 x 레버리지 1.30 x 세금부담 0.93 x 이자부담 1.05. **자산회전율(0.17)이 ROE를 깎는 주범**. 566조 자산 대비 94조 매출이니 자산 활용도가 낮다. + +### 성장성 (2-2) + +CAGR: 매출 8.8%, 영업이익 17.3%, 순이익 24.4%. growthQuality="내실 위주"(이익 CAGR > 매출 CAGR). + +영업레버리지: 2025년 8.78배(매출 +1%에 영업이익 +8.78%). 고정비 비중이 높은 반도체 산업 특성. 이건 사이클 상승기에 이익이 폭발적으로 늘고, 하강기에 폭발적으로 줄어든다는 뜻. 실제로 2023년 영업레버리지 9.03(-3.8% 매출 감소에 -34.4% 영업이익 감소). + +SGR=3.94% vs 실제성장=23.8%. gap 19.9%p. 자기자본이익률만으로는 이 성장을 지탱 못한다. 실제로 차입이 4년 만에 5배 늘었다. 이걸 1-2 자금조달에서 이미 봤다 -- **스토리가 연결**된다. + +### 안정성 (2-3) + +부채비율 29.9%(매우 안정). 순부채비율 -7.75%(순현금). 이자보상배율 4.9배(금융비용 기준). Altman Z''=4.36 안전. 부채 3기 연속 증가(+16%) 플래그가 있지만, 순현금 포지션이므로 당장 위험은 아니다. + +부채 만기: 단기 17.6조(73%), 장기 6.5조. 단기 비중이 높지만, 현금 57.9조로 즉시 상환 가능. + +### 효율성 (2-4) + +총자산회전율 0.17회. CCC=490일. 이미 1-3에서 봤듯이 반도체 장치산업의 구조적 한계. 재고회전율 0.94회(연간 1회도 안 돌림). 매출채권회전율 1.84회. 매입채무 13조는 작아서 DPO가 96일에 불과. + +--- + +## 이익의 질과 자본 활용 (3부: 심화) + +### 이익품질 (3-1) + +Sloan Accrual Ratio -1.6%(음수 = OCF > 순이익 = 보수적). Beneish M-Score -2.43(조작 가능성 낮음, -1.78 기준). 이익 변동계수 0.68(변동성 높음 -- 사이클 산업이므로 당연). + +영업이익 20.1조 중 영업외 비율 4.5%. 영업외 손익의 영향이 작다 -- 영업이 이익의 거의 전부. + +### 비용구조 (3-2) + +원가율 52.8%, 판관비율 25.8%. 영업원가율 합계 78.6%. + +DOL(영업레버리지) 8.8배. 손익분기 매출 추정 51.3조. 현재 매출 93.8조 대비 안전마진 45.4%. 매출이 45% 줄어도 BEP 이상. 다만 2023년 실제로 BEP에 근접했었다(영업률 6.5%). + +### 자본배분 (3-3) + +배당 2.5조(성향 12.5%), 연속배당 5년이지만 **3년 연속 감소**. FCF 16.0조 중 배당 2.5조 + 부채상환 0.9조 = 잔여 12.7조(79%). 주주환원/FCF=15.5%. + +자사주 매입은 400억으로 미미. CAPEX/매출 13.6%, 유보율 87.5%. **벌어서 대부분 재투자하고, 현금으로 쌓는 패턴**. + +### 투자효율 (3-4) + +ROIC=4.66%. NOPAT 18.8조 / 투하자본 402.5조(자본 436조 + 차입 24조 - 현금 58조). **ROIC가 WACC(추정 7~8%)보다 낮다면 가치파괴**. 3년 연속 저수익 플래그 발동. + +무형자산 5.2%, 유형자산 38.0%. 자산의 핵심은 유형(팹). CAPEX 12.8조가 유형자산 215조의 6%에 해당 -- 유지보수+확장 수준. + +### 재무정합성 (3-5) + +IS-CF 괴리: 순이익 19.6조 vs OCF 28.8조. 괴리 -46.6%(보수적 방향). 이익보다 현금이 더 들어온다. + +IS-BS 괴리: 매출 +23.8% vs 매출채권 +17.2% vs 재고 +1.7%. 매출이 채권/재고보다 빠르게 늘고 있다 -- 정상(공격적 매출 인식이 아님). + +유효세율 6.6%(법정 24% 대비 17.4%p 차이). 이연법인세자산 18.8조(총자산의 3.3%). 2022~2023년 적자/저이익에서 발생한 이연법인세를 활용 중. + +--- + +## 종합 판단 + +삼성전자는 **사이클 회복 중인 자본 집약형 기업**이다. + +1. **긍정**: 순현금 33.8조, 내부유보 92%, Piotroski 7/9, OCF/순이익 146%. 재무 안전성은 최상위. +2. **우려**: ROIC 4.66%(WACC 미달 가능), CCC 490일, 차입 4년간 5배 증가. 벌어서 쌓는 속도보다 투자하는 속도가 빠르다. +3. **핵심 질문**: HBM/AI 투자가 ROIC를 끌어올릴 수 있는가. 현재 402조 투하자본에서 NOPAT 18.8조는 자본비용을 못 벌고 있다. + +스토리 요약: "막대한 자산을 쌓아놓고 사이클 회복을 기다리는 회사. 사이클이 올라오면서 이익은 폭발하지만, 투하자본 대비 수익률은 아직 부족. 안전하지만 효율적이진 않다." + +--- + +## 분석 과정에서 발견한 엔진 이슈 + +### 데이터가 잘 나온 것 +- 1부→2부→3부 스토리 연결이 자연스럽다 (자금조달에서 본 차입 증가 → 성장성 SGR gap → 자본배분 잔여현금) +- waterfall로 원가→판관비→영업외→법인세 각 단계의 금액과 비율이 한눈에 보임 +- fundingSources의 내부유보/금융부채 비중이 자금 구조를 바로 보여줌 +- cashFlowStructure의 패턴 분류("확장형")가 유용 + +### 빠지거나 깨진 것 + +| 항목 | 문제 | 심각도 | +|------|------|--------| +| segmentComposition | None (부문별 영업이익 구성 없음) | 중 | +| concentration | None (매출 집중도 없음) | 중 | +| interestBurden (1-2) | None | 중 | +| liquidity (1-2) | None | 중 | +| capexPattern.depreciation | 0 (감가상각 데이터 누락) | 중 | +| 2025년 DS부문 | segmentTrend에서 null (사업부 재편 영향) | 소 | +| cashConversion (revenueQuality) | null | 소 | +| cashFlowStructure.pattern | null (패턴은 cashFlowOverview에만 있음) | 소 | + +### 스토리 분석에서 아쉬운 점 +1. **영업외 손익 분해 부재**: 영업이익→세전이익 구간을 금융비용/금융수익/지분법/기타로 쪼개는 데이터가 없음. NAVER 분석 시 치명적일 것. +2. **CAPEX 내역**: 유형자산투자/무형자산투자 분리는 있지만, 어느 사업부에 투자하는지는 안 보임 (이건 공시 데이터 한계일 수도) +3. **R&D 비용**: 판관비 25.8%가 연구개발비를 포함하는지 분리가 안 됨. 반도체 회사에서 R&D는 핵심 투자인데. +4. **배당 감소 vs 자사주**: "배당금 3년 연속 감소" 플래그만 있고, "총 주주환원(배당+자사주)은 어떤가"는 플래그에 안 잡힘 + +--- + +*분석일: 2026-03-29* +*dartlab.analysis() 14축 사용* diff --git a/src/dartlab/analysis/qa/2026-03-29_summary.md b/src/dartlab/analysis/qa/2026-03-29_summary.md new file mode 100644 index 0000000000000000000000000000000000000000..9698bd667575c838d14eef245cc371b8f75b32c2 --- /dev/null +++ b/src/dartlab/analysis/qa/2026-03-29_summary.md @@ -0,0 +1,109 @@ +# 스토리 분석 QA 종합 -- 2026-03-29 + +> 삼성전자(제조/반도체), 현대차(자동차), NAVER(IT/플랫폼) 3개 기업을 +> dartlab.analysis() 14축으로 분석한 결과 종합. + +--- + +## 1. 스토리 분석이 동작하는 것들 + +14축 구조(1부 읽기 → 2부 비율 → 3부 심화)가 의도대로 **스토리를 연결**하는 사례: + +**삼성전자**: 자금조달(차입 4년간 5배 증가) → 성장성(SGR gap +19.9%p) → 자본배분(잔여현금 79%). "차입이 늘지만 내부유보로 안전한 상태에서 사이클 회복을 기다린다"는 서사가 자연스럽게 나옴. + +**현대차**: 자산구조(매출채권 +45%) → 현금흐름(영업CF 적자) → 이익품질(IS-CF 괴리 277%, anomalyScore 55.5). "채권급증 → 현금고갈 → 차입의존"이 데이터로 연결됨. + +**NAVER**: 수익성(영업률 19% vs 순이익률 5%) → 이익품질(nonOpRatio 104%) → 재무정합성(유효세율 45.6%). "영업은 훌륭하지만 영업 밖에서 가치가 파괴된다"를 3개 축이 동시에 드러냄. + +--- + +## 2. 발견된 엔진 이슈 (우선순위순) + +### [치명] 비제조업 비용구조 전멸 + +NAVER에서 costBreakdown이 전부 0. grossMargin=None. waterfall에서 매출→영업이익이 블랙박스. IT 기업은 "매출원가/판관비"가 IS에서 분리 보고되지 않거나 다른 계정명을 사용. + +- 영향: 비용구조(3-2) 축 전체 무력화, marginTrend의 grossMargin 누락, breakevenEstimate 불가 +- 대상: IT/플랫폼 전체 (카카오, 넷마블, 크래프톤 등) +- 필요: IS 계정 매핑에서 비제조업 원가 구조(인건비, 플랫폼비용 등) 인식 + +### [높음] growthQuality 오판 + +현대차(순이익 CAGR -8.98%)와 NAVER(순이익 CAGR -15.75%)에서 모두 growthQuality="균형". 순이익이 역성장인데 "균형"은 분석가로서 납득이 안 됨. + +- 원인: 매출/영업이익 CAGR만 보고 순이익 CAGR을 growthQuality 판정에 반영하지 않는 것으로 추정 +- 필요: 순이익 CAGR이 음수이면 quality를 "이익 역성장" 등으로 별도 분류 + +### [높음] 영업CF 적자 원인 분해 부재 + +현대차에서 가장 아쉬웠던 것. 순이익 1.2조 흑자 + 영업CF -2.1조 적자인데, "왜 적자인지"를 추정만 할 수 있음. OCF = 순이익 + 비현금비용 +/- 운전자본변동인데 운전자본 변동 금액이 없음. + +- 현금흐름(1-4)에 운전자본변동 분해 추가 필요 +- 또는 이익품질(3-1)에서 accrual의 구성요소(채권변동, 재고변동, 채무변동)를 보여줘야 + +### [높음] otherAssetsPct 해석 불가 + +현대차 자산의 71%, NAVER 자산의 64%가 "기타". 이 기타가 뭔지 모르면 자산구조 스토리를 못 쓴다. 현대차는 할부채권/리스자산, NAVER는 관계기업투자/장기금융자산일 것인데, assetStructure가 이걸 분해하지 않음. + +- 필요: 금융자산, 관계기업투자, 기타 분해 (최소 2-3개 항목 추가) + +### [중간] 이자보상배율 오진 (금융비용 vs 이자비용) + +NAVER: 이자보상배율 2.8배 → "이자 부담 과다" 플래그. 실제 차입금 1.4조, 순현금 4.6조인 회사에 "이자 부담 과다"는 거짓 경보. +원인: interestExpenseSource="금융비용"에 FX/파생 손실이 포함됨. + +- 이미 interestExpenseSource 필드로 소스를 표시하고 있지만, 플래그 판정에서 이걸 고려하지 않음 +- 필요: source가 "금융비용"이고 순현금인 경우 플래그 발동 조건 조정 + +### [중간] segmentComposition/concentration 항상 None + +3개 기업 모두 segmentComposition=None, concentration=None. 부문별 영업이익 구성과 매출 집중도가 없으면 수익구조 스토리의 깊이가 얕아짐. + +### [중간] depreciation=0 (3개 기업 모두) + +capexPattern에서 depreciation이 항상 0. CAPEX/감가비율(성장투자 vs 유지투자 판별)을 못 구함. CF에서 감가상각을 select하거나 BS 유형자산 변동에서 추정해야. + +### [소소] 부문 시계열 단절 + +삼성전자: 2025년 사업부 재편으로 DS부문 null. NAVER: 2023년만 부문 데이터 있음. 부문 재편 시 시계열 연결이 안 됨 -- 이건 공시 데이터 한계이기도 함. + +--- + +## 3. 축별 커버리지 평가 + +| 축 | 삼성전자 | 현대차 | NAVER | 판정 | +|---|---------|--------|-------|------| +| 1-1 수익구조 | O (부문 일부) | O (부문 일부) | X (부문 1년만) | 보완 | +| 1-2 자금조달 | O | O | 부분 (interestBurden/liquidity None) | 양호 | +| 1-3 자산구조 | O | 기타 71% | 기타 64% | 보완 | +| 1-4 현금흐름 | O | O (원인분해 부재) | O | 양호 | +| 2-1 수익성 | O | O | X (원가 없음) | 보완 | +| 2-2 성장성 | O | growthQuality 오판 | growthQuality 오판 | 수정 | +| 2-3 안정성 | O | O | 이자보상 오진 | 수정 | +| 2-4 효율성 | O | O | CCC=None | 구조적 | +| 3-1 이익품질 | O | O | O (핵심 발견) | 양호 | +| 3-2 비용구조 | O | O | X (전부 0) | 치명 | +| 3-3 자본배분 | O | O | O | 양호 | +| 3-4 투자효율 | O | O | O | 양호 | +| 3-5 재무정합성 | O | O (anomaly 55.5 유용) | O (세율 발견) | 양호 | + +14축 중 제조업(삼성/현대)은 12~13축 동작, IT(NAVER)는 9~10축만 동작. + +--- + +## 4. 다음 개선 작업 제안 + +1. **growthQuality 판정 로직 수정** -- 순이익 CAGR 음수 시 "균형" 판정 금지 [코드 수정, 즉시 가능] +2. **이자보상배율 플래그 조건 보완** -- source가 금융비용이고 순현금이면 플래그 억제 [코드 수정, 즉시 가능] +3. **비제조업 비용구조** -- IS 매핑에서 매출원가/판관비 대안 계정 인식 [데이터 조사 필요] +4. **영업CF 운전자본 분해** -- CF에서 운전자본변동 항목 select [데이터 조사 필요] +5. **자산구조 기타 분해** -- 금융자산/관계기업투자 분리 [BS select 확장] +6. **depreciation 데이터** -- CF 또는 주석에서 감가상각 추출 [데이터 소스 확인 필요] + +1, 2번은 로직 수정으로 즉시 가능. 3~6번은 데이터 소스 확인이 선행되어야 함. + +--- + +*분석일: 2026-03-29* +*분석 대상: 삼성전자(005930), 현대자동차(005380), NAVER(035420)* +*총 42회 analysis() 호출 (14축 x 3기업)* diff --git a/src/dartlab/analysis/valuation/__init__.py b/src/dartlab/analysis/valuation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/dartlab/analysis/valuation/__pycache__/__init__.cpython-312.pyc b/src/dartlab/analysis/valuation/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c91dc1d1d7c767ef3fea95c26eeaa9b340a89a6 Binary files /dev/null and b/src/dartlab/analysis/valuation/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/analysis/valuation/__pycache__/crossRegression.cpython-312.pyc b/src/dartlab/analysis/valuation/__pycache__/crossRegression.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30973a871a4bad42c3aae96c49464c1a52deeb47 Binary files /dev/null and b/src/dartlab/analysis/valuation/__pycache__/crossRegression.cpython-312.pyc differ diff --git a/src/dartlab/analysis/valuation/__pycache__/fmt.cpython-312.pyc b/src/dartlab/analysis/valuation/__pycache__/fmt.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..19f104ffa2f86583c48828ffa60f71f3994e1bef Binary files /dev/null and b/src/dartlab/analysis/valuation/__pycache__/fmt.cpython-312.pyc differ diff --git a/src/dartlab/analysis/valuation/__pycache__/pricetarget.cpython-312.pyc b/src/dartlab/analysis/valuation/__pycache__/pricetarget.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4913b225c7582453a61025537431229501d9fcdc Binary files /dev/null and b/src/dartlab/analysis/valuation/__pycache__/pricetarget.cpython-312.pyc differ diff --git a/src/dartlab/analysis/valuation/__pycache__/residualIncome.cpython-312.pyc b/src/dartlab/analysis/valuation/__pycache__/residualIncome.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86b88e2d028404be36b242fc095721daeb6c953e Binary files /dev/null and b/src/dartlab/analysis/valuation/__pycache__/residualIncome.cpython-312.pyc differ diff --git a/src/dartlab/analysis/valuation/__pycache__/synthesizer.cpython-312.pyc b/src/dartlab/analysis/valuation/__pycache__/synthesizer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2989af5711970981e2e2ce6c26344d3dc0e76dfb Binary files /dev/null and b/src/dartlab/analysis/valuation/__pycache__/synthesizer.cpython-312.pyc differ diff --git a/src/dartlab/analysis/valuation/__pycache__/types.cpython-312.pyc b/src/dartlab/analysis/valuation/__pycache__/types.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b834eef4f2b7b8fc97f7dcac33af7df45dce5bac Binary files /dev/null and b/src/dartlab/analysis/valuation/__pycache__/types.cpython-312.pyc differ diff --git a/src/dartlab/analysis/valuation/__pycache__/valuation.cpython-312.pyc b/src/dartlab/analysis/valuation/__pycache__/valuation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..066dc1bd5cab061c71a5901a4a5734f20bdbb946 Binary files /dev/null and b/src/dartlab/analysis/valuation/__pycache__/valuation.cpython-312.pyc differ diff --git a/src/dartlab/analysis/valuation/analyst.py b/src/dartlab/analysis/valuation/analyst.py new file mode 100644 index 0000000000000000000000000000000000000000..23189f6c7492e7bdb51a6d054aa2730994bbced2 --- /dev/null +++ b/src/dartlab/analysis/valuation/analyst.py @@ -0,0 +1,243 @@ +"""Analyst 엔진 — 멀티소스 종합 분석. + +Usage:: + + from dartlab.analysis.valuation.analyst import Analyst + + a = Analyst() + report = a.report(company, current_price=200000) + print(report) +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +from dartlab.gather import Gather, MarketSnapshot + +from .synthesizer import synthesize +from .types import AnalystReport, ValuationMethod + +if TYPE_CHECKING: + from dartlab.analysis.forecast.forecast import ( + ForecastResult, + ScenarioResult, + SensitivityResult, + ) + from dartlab.analysis.forecast.revenueForecast import RevenueForecastResult + from dartlab.analysis.forecast.simulation import ( + MonteCarloResult, + SimulationResult, + StressTestResult, + ) + from dartlab.analysis.valuation.valuation import ( + DCFResult, + DDMResult, + RelativeValuationResult, + ValuationSummary, + ) + +log = logging.getLogger(__name__) + + +class Analyst: + """종합 애널리스트 엔진 facade. + + DCF + 시장 데이터 → 가중평균 목표가 + 투자의견. + + Example:: + + a = Analyst() + report = a.report(company, current_price=200000) + print(report.target_price) + print(report.opinion) + """ + + def __init__(self, gather: Gather | None = None) -> None: + self._gather = gather or Gather() + self._owns_gather = gather is None + + def report( + self, + company=None, + *, + stock_code: str = "", + company_name: str = "", + current_price: float = 0.0, + dcf_target: float | None = None, + dcf_confidence: float = 0.5, + shares: int = 0, + company_financials: dict | None = None, + market_snapshot: MarketSnapshot | None = None, + ) -> AnalystReport: + """종합 애널리스트 리포트 생성. + + Args: + company: dartlab Company 객체 (있으면 자동 추출). + stock_code: 종목코드. + company_name: 회사명. + current_price: 현재 주가 (0이면 시장에서 수집). + dcf_target: DCF 목표가 (없으면 DCF skip). + dcf_confidence: DCF 신뢰도. + shares: 발행주식수. + company_financials: EPS/BPS/EBITDA dict. + market_snapshot: 미리 수집한 MarketSnapshot. + + Returns: + AnalystReport. + """ + # Company 객체에서 자동 추출 + if company is not None: + stock_code, company_name, shares, company_financials = _extract_from_company( + company, stock_code, company_name, shares, company_financials + ) + + # 시장 데이터 수집 + if market_snapshot is None and stock_code: + try: + snap = self._gather.collect(stock_code) + market_snapshot = snap.to_market_snapshot() + except OSError as exc: + log.warning("시장 데이터 수집 실패: %s", exc) + + # 현재가 — 시장에서 가져오기 + if current_price <= 0 and market_snapshot and market_snapshot.current_price > 0: + current_price = market_snapshot.current_price + + # 합성 + return synthesize( + dcf_target=dcf_target, + dcf_confidence=dcf_confidence, + market=market_snapshot, + company_financials=company_financials, + shares=shares, + current_price=current_price, + company_name=company_name, + stock_code=stock_code, + ) + + def collect_market(self, stock_code: str) -> MarketSnapshot: + """시장 데이터만 수집.""" + return self._gather.collect(stock_code).to_market_snapshot() + + def close(self) -> None: + """리소스 정리.""" + if self._owns_gather: + self._gather.close() + + def __repr__(self) -> str: + return "Analyst()" + + +def _extract_from_company( + company, + stock_code: str, + company_name: str, + shares: int, + financials: dict | None, +) -> tuple[str, str, int, dict | None]: + """Company 객체에서 필요한 정보 추출.""" + # 종목코드 + if not stock_code: + try: + stock_code = getattr(company, "stockCode", "") or getattr(company, "stock_code", "") + except AttributeError: + pass + + # 회사명 + if not company_name: + try: + company_name = getattr(company, "name", "") or "" + except AttributeError: + pass + + # 발행주식수 + if shares <= 0: + try: + profile = getattr(company, "profile", None) + if profile: + shares_val = getattr(profile, "sharesOutstanding", 0) + if shares_val: + shares = int(shares_val) + except (AttributeError, TypeError, ValueError): + pass + + # 재무 데이터 — EPS, BPS + if financials is None: + financials = {} + try: + ratios = company._finance.ratios + if ratios is not None: + eps = ratios.get("eps") if isinstance(ratios, dict) else getattr(ratios, "eps", None) + bps = ratios.get("bps") if isinstance(ratios, dict) else getattr(ratios, "bps", None) + if eps: + financials["eps"] = float(eps) + if bps: + financials["bps"] = float(bps) + except (AttributeError, TypeError, ValueError): + pass + + return stock_code, company_name, shares, financials or None + + +__all__ = [ + "Analyst", + "AnalystReport", + "ValuationMethod", + # forecast + "ForecastResult", + "ScenarioResult", + "SensitivityResult", + # valuation + "DCFResult", + "DDMResult", + "RelativeValuationResult", + "ValuationSummary", + # simulation + "SimulationResult", + "MonteCarloResult", + "StressTestResult", + # revenue forecast + "RevenueForecastResult", +] + + +# ── lazy re-export (순환 의존 방지) ── + +_LAZY_MAP: dict[str, tuple[str, str]] = { + "ForecastResult": ("dartlab.analysis.forecast.forecast", "ForecastResult"), + "ScenarioResult": ("dartlab.analysis.forecast.forecast", "ScenarioResult"), + "SensitivityResult": ("dartlab.analysis.forecast.forecast", "SensitivityResult"), + "forecastMetric": ("dartlab.analysis.forecast.forecast", "forecastMetric"), + "forecastAll": ("dartlab.analysis.forecast.forecast", "forecastAll"), + "scenarioAnalysis": ("dartlab.analysis.forecast.forecast", "scenarioAnalysis"), + "sensitivityAnalysis": ("dartlab.analysis.forecast.forecast", "sensitivityAnalysis"), + "RevenueForecastResult": ("dartlab.analysis.forecast.revenueForecast", "RevenueForecastResult"), + "forecastRevenue": ("dartlab.analysis.forecast.revenueForecast", "forecastRevenue"), + "SimulationResult": ("dartlab.analysis.forecast.simulation", "SimulationResult"), + "MonteCarloResult": ("dartlab.analysis.forecast.simulation", "MonteCarloResult"), + "StressTestResult": ("dartlab.analysis.forecast.simulation", "StressTestResult"), + "simulateScenario": ("dartlab.analysis.forecast.simulation", "simulateScenario"), + "simulateAllScenarios": ("dartlab.analysis.forecast.simulation", "simulateAllScenarios"), + "monteCarloForecast": ("dartlab.analysis.forecast.simulation", "monteCarloForecast"), + "stressTest": ("dartlab.analysis.forecast.simulation", "stressTest"), + "DCFResult": ("dartlab.core.finance.dcf", "DCFResult"), + "DDMResult": ("dartlab.core.finance.dcf", "DDMResult"), + "RelativeValuationResult": ("dartlab.core.finance.dcf", "RelativeValuationResult"), + "ValuationSummary": ("dartlab.core.finance.dcf", "ValuationSummary"), + "dcfValuation": ("dartlab.core.finance.dcf", "dcfValuation"), + "ddmValuation": ("dartlab.core.finance.dcf", "ddmValuation"), + "fullValuation": ("dartlab.core.finance.dcf", "fullValuation"), + "relativeValuation": ("dartlab.core.finance.dcf", "relativeValuation"), +} + + +def __getattr__(name: str): + if name in _LAZY_MAP: + import importlib + + modPath, attr = _LAZY_MAP[name] + mod = importlib.import_module(modPath) + return getattr(mod, attr) + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/src/dartlab/analysis/valuation/crossRegression.py b/src/dartlab/analysis/valuation/crossRegression.py new file mode 100644 index 0000000000000000000000000000000000000000..d56e3f30a1ecb7775e730b172607b9de659e597c --- /dev/null +++ b/src/dartlab/analysis/valuation/crossRegression.py @@ -0,0 +1,392 @@ +"""횡단면·패널 회귀 매출 예측 엔진. + +횡단면 회귀: 같은 시점에 전 상장사 데이터를 모아 매출 성장률을 설명. +패널 회귀: 여러 연도를 쌓아 기업 고정효과(fixed effect)로 기업 특성 통제. + +모든 구현은 순수 Python (외부 ML 라이브러리 의존 없음). +사전 적합(pre-fit) 후 JSON 캐시 → 개별 기업 예측은 즉시 계산. +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from pathlib import Path + +from dartlab.core.finance.ols import olsMulti as _olsMulti + +log = logging.getLogger(__name__) + +_MODEL_CACHE_DIR = Path.home() / ".dartlab" / "models" + +# 횡단면 회귀 피처 목록 (순서 고정) +FEATURES = [ + "per", + "pbr", + "lnMarketCap", + "operatingMargin", + "capexRatio", + "debtRatio", + "foreignHoldingRatio", + "revenueGrowthLag", +] + + +@dataclass +class CrossSectionModel: + """횡단면 회귀 모델 — 사전 적합 결과.""" + + year: int + coefficients: list[float] # [intercept, β1, β2, ..., β_sector1, ...] + featureNames: list[str] # FEATURES + 섹터 더미 이름 + rSquared: float + adjRSquared: float + nObs: int + sectorNames: list[str] # 섹터 더미로 사용된 섹터 목록 (첫 번째 제외 = reference) + warnings: list[str] = field(default_factory=list) + + def predict(self, features: dict[str, float], sector: str = "") -> float | None: + """개별 기업의 매출 성장률 예측 (%). + + features dict 키: FEATURES 목록과 동일. + sector: WICS 업종명 (sectorNames에 있으면 해당 더미 1, 아니면 0). + """ + if not self.coefficients: + return None + + x = [] + for fname in FEATURES: + v = features.get(fname) + if v is None: + return None + x.append(v) + + # 섹터 더미 + for sname in self.sectorNames: + x.append(1.0 if sector == sname else 0.0) + + if len(x) + 1 != len(self.coefficients): + log.debug("피처 수 불일치: x=%d, coeffs=%d", len(x) + 1, len(self.coefficients)) + return None + + # intercept + β·x + pred = self.coefficients[0] + for i, xi in enumerate(x): + pred += self.coefficients[i + 1] * xi + + return pred + + +@dataclass +class PanelModel: + """패널 회귀 모델 (기업 고정효과).""" + + coefficients: list[float] # [β1, β2, ...] (절편 없음 — demeaned) + featureNames: list[str] + rSquared: float + nObs: int + nFirms: int + firmIntercepts: dict[str, float] # 기업별 절편 (고정효과) + grandMean: float # 전체 평균 성장률 + + def predict(self, stockCode: str, features: dict[str, float]) -> float | None: + """기업 고정효과 + 변수 효과 → 매출 성장률 예측.""" + x = [] + for fname in self.featureNames: + v = features.get(fname) + if v is None: + return None + x.append(v) + + if len(x) != len(self.coefficients): + return None + + alpha = self.firmIntercepts.get(stockCode, self.grandMean) + pred = alpha + for i, xi in enumerate(x): + pred += self.coefficients[i] * xi + return pred + + +@dataclass +class CompanyFeatures: + """횡단면/패널 회귀에 사용할 기업별 피처.""" + + stockCode: str + year: int + sector: str + revenueGrowth: float # 종속변수: 매출 성장률 (%) + per: float + pbr: float + lnMarketCap: float + operatingMargin: float + capexRatio: float + debtRatio: float + foreignHoldingRatio: float + revenueGrowthLag: float # 전년 매출 성장률 + + def toFeatureDict(self) -> dict[str, float]: + """FEATURES 순서에 맞는 dict 반환.""" + return { + "per": self.per, + "pbr": self.pbr, + "lnMarketCap": self.lnMarketCap, + "operatingMargin": self.operatingMargin, + "capexRatio": self.capexRatio, + "debtRatio": self.debtRatio, + "foreignHoldingRatio": self.foreignHoldingRatio, + "revenueGrowthLag": self.revenueGrowthLag, + } + + def toFeatureVector(self) -> list[float]: + """FEATURES 순서의 float 리스트.""" + d = self.toFeatureDict() + return [d[f] for f in FEATURES] + + +# ══════════════════════════════════════════════════════════ +# 횡단면 회귀 적합 +# ══════════════════════════════════════════════════════════ + + +def fitCrossSection( + observations: list[CompanyFeatures], + *, + minObs: int = 30, + winsorize: float = 0.02, +) -> CrossSectionModel | None: + """전 상장사 횡단면 회귀 적합. + + Parameters + ---------- + observations : 같은 연도의 CompanyFeatures 리스트 + minObs : 최소 관측치 수 + winsorize : 양쪽 꼬리 절사 비율 (기본 2%) + """ + if len(observations) < minObs: + return None + + year = observations[0].year + + # 이상치 제거 (winsorize) + obs = _winsorizeObs(observations, winsorize) + + # 섹터 더미 구성 + sectors = sorted({o.sector for o in obs if o.sector}) + sectors[0] if sectors else "" + sectorDummies = sectors[1:] if len(sectors) > 1 else [] + + # X, y 구성 + X: list[list[float]] = [] + y: list[float] = [] + for o in obs: + row = o.toFeatureVector() + # 섹터 더미 추가 + for sd in sectorDummies: + row.append(1.0 if o.sector == sd else 0.0) + X.append(row) + y.append(o.revenueGrowth) + + # OLS 적합 + result = _olsMulti(X, y, addIntercept=True) + if result is None: + return None + + featureNames = list(FEATURES) + sectorDummies + warnings: list[str] = [] + if result.rSquared < 0.05: + warnings.append(f"R² 매우 낮음 ({result.rSquared:.3f}) — 예측력 제한적") + + return CrossSectionModel( + year=year, + coefficients=result.coefficients, + featureNames=featureNames, + rSquared=result.rSquared, + adjRSquared=result.adjRSquared, + nObs=result.nObs, + sectorNames=sectorDummies, + warnings=warnings, + ) + + +# ══════════════════════════════════════════════════════════ +# 패널 회귀 적합 (기업 고정효과) +# ══════════════════════════════════════════════════════════ + + +def fitPanel( + observations: list[CompanyFeatures], + *, + minObs: int = 50, + minYears: int = 3, +) -> PanelModel | None: + """패널 회귀 (within estimator — 기업 고정효과). + + 각 변수에서 기업 평균을 빼고(demean) OLS 적합. + 기업별 절편(αi) = 기업 평균 y - β · 기업 평균 X. + """ + if len(observations) < minObs: + return None + + years = {o.year for o in observations} + if len(years) < minYears: + return None + + # 기업별 그룹핑 + firmObs: dict[str, list[CompanyFeatures]] = {} + for o in observations: + firmObs.setdefault(o.stockCode, []).append(o) + + # 기업별 평균 계산 + demeaning + xDemeaned: list[list[float]] = [] + yDemeaned: list[float] = [] + firmMeanY: dict[str, float] = {} + firmMeanX: dict[str, list[float]] = {} + + for code, oList in firmObs.items(): + if len(oList) < 2: + continue + + # 기업 평균 + nF = len(FEATURES) + meanX = [0.0] * nF + meanY = 0.0 + for o in oList: + vec = o.toFeatureVector() + for j in range(nF): + meanX[j] += vec[j] + meanY += o.revenueGrowth + cnt = len(oList) + if cnt == 0: + continue + meanX = [v / cnt for v in meanX] + meanY /= cnt + + firmMeanY[code] = meanY + firmMeanX[code] = meanX + + # demeaning + for o in oList: + vec = o.toFeatureVector() + xDemeaned.append([vec[j] - meanX[j] for j in range(nF)]) + yDemeaned.append(o.revenueGrowth - meanY) + + if len(xDemeaned) < minObs: + return None + + # demeaned OLS (절편 없음) + result = _olsMulti(xDemeaned, yDemeaned, addIntercept=False) + if result is None: + return None + + # 기업별 절편 복원: αi = meanY_i - β · meanX_i + firmIntercepts: dict[str, float] = {} + for code in firmMeanY: + alpha = firmMeanY[code] + for j, beta in enumerate(result.coefficients): + alpha -= beta * firmMeanX[code][j] + firmIntercepts[code] = alpha + + grandMean = sum(firmMeanY.values()) / len(firmMeanY) if firmMeanY else 0.0 + + return PanelModel( + coefficients=result.coefficients, + featureNames=list(FEATURES), + rSquared=result.rSquared, + nObs=result.nObs, + nFirms=len(firmMeanY), + firmIntercepts=firmIntercepts, + grandMean=grandMean, + ) + + +# ══════════════════════════════════════════════════════════ +# 모델 캐시 (일 1회 사전 적합 → JSON → 즉시 로드) +# ══════════════════════════════════════════════════════════ + + +def saveModel(model: CrossSectionModel) -> Path: + """횡단면 모델 JSON 저장.""" + _MODEL_CACHE_DIR.mkdir(parents=True, exist_ok=True) + path = _MODEL_CACHE_DIR / f"crossSection_{model.year}.json" + data = { + "year": model.year, + "coefficients": model.coefficients, + "featureNames": model.featureNames, + "rSquared": model.rSquared, + "adjRSquared": model.adjRSquared, + "nObs": model.nObs, + "sectorNames": model.sectorNames, + "warnings": model.warnings, + } + path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8") + log.info("CrossSection 모델 저장: %s (%d obs, R²=%.3f)", path, model.nObs, model.rSquared) + return path + + +def loadModel(year: int) -> CrossSectionModel | None: + """캐시된 횡단면 모델 로드.""" + path = _MODEL_CACHE_DIR / f"crossSection_{year}.json" + if not path.exists(): + return None + try: + data = json.loads(path.read_text(encoding="utf-8")) + return CrossSectionModel(**data) + except (json.JSONDecodeError, TypeError, KeyError) as e: + log.debug("CrossSection 모델 로드 실패: %s — %s", path, e) + return None + + +def savePanelModel(model: PanelModel) -> Path: + """패널 모델 JSON 저장.""" + _MODEL_CACHE_DIR.mkdir(parents=True, exist_ok=True) + path = _MODEL_CACHE_DIR / "panel_latest.json" + data = { + "coefficients": model.coefficients, + "featureNames": model.featureNames, + "rSquared": model.rSquared, + "nObs": model.nObs, + "nFirms": model.nFirms, + "firmIntercepts": model.firmIntercepts, + "grandMean": model.grandMean, + } + path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8") + log.info("Panel 모델 저장: %s (%d obs, %d firms)", path, model.nObs, model.nFirms) + return path + + +def loadPanelModel() -> PanelModel | None: + """캐시된 패널 모델 로드.""" + path = _MODEL_CACHE_DIR / "panel_latest.json" + if not path.exists(): + return None + try: + data = json.loads(path.read_text(encoding="utf-8")) + return PanelModel(**data) + except (json.JSONDecodeError, TypeError, KeyError) as e: + log.debug("Panel 모델 로드 실패: %s — %s", path, e) + return None + + +# ══════════════════════════════════════════════════════════ +# 내부 유틸 +# ══════════════════════════════════════════════════════════ + + +def _winsorizeObs( + obs: list[CompanyFeatures], + pct: float, +) -> list[CompanyFeatures]: + """종속변수(revenueGrowth) 양쪽 꼬리 절사.""" + if pct <= 0 or len(obs) < 10: + return obs + + growths = sorted(o.revenueGrowth for o in obs) + n = len(growths) + loIdx = max(int(n * pct), 1) + hiIdx = min(int(n * (1 - pct)), n - 1) + lo = growths[loIdx] + hi = growths[hiIdx] + + return [o for o in obs if lo <= o.revenueGrowth <= hi] diff --git a/src/dartlab/analysis/valuation/fmt.py b/src/dartlab/analysis/valuation/fmt.py new file mode 100644 index 0000000000000000000000000000000000000000..b105524d114945a42ba7ca9d4086fd01e14f21ac --- /dev/null +++ b/src/dartlab/analysis/valuation/fmt.py @@ -0,0 +1,5 @@ +"""하위호환 re-export -- 실제 구현은 core/finance/fmt.py.""" + +from dartlab.core.finance.fmt import fmtBig, fmtPrice, fmtUnit + +__all__ = ["fmtBig", "fmtPrice", "fmtUnit"] diff --git a/src/dartlab/analysis/valuation/priceImplied.py b/src/dartlab/analysis/valuation/priceImplied.py new file mode 100644 index 0000000000000000000000000000000000000000..c892fc21713e36bc17141ed00f11f7dab313c4df --- /dev/null +++ b/src/dartlab/analysis/valuation/priceImplied.py @@ -0,0 +1,3 @@ +"""하위호환 re-export -- 실제 구현은 core/finance/priceImplied.py.""" + +from dartlab.core.finance.priceImplied import * # noqa: F401,F403 diff --git a/src/dartlab/analysis/valuation/pricetarget.py b/src/dartlab/analysis/valuation/pricetarget.py new file mode 100644 index 0000000000000000000000000000000000000000..335feed5919c89a6615f0744c2088d33ef0ffc9b --- /dev/null +++ b/src/dartlab/analysis/valuation/pricetarget.py @@ -0,0 +1,583 @@ +"""확률 가중 주가 목표가 엔진. + +5개 매크로 시나리오 × pro-forma → DCF → Monte Carlo → 투자 신호. + +외부 의존성 제로 (random, math 모듈만 사용). +""" + +from __future__ import annotations + +import math +import random +from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +from dartlab.analysis.forecast.prediction import adjustProbabilities +from dartlab.core.finance.extract import getLatest, getTTM +from dartlab.core.finance.proforma import ( + build_proforma, + compute_company_wacc, + extract_historical_ratios, +) +from dartlab.core.finance.scenario import ( + PRESET_SCENARIOS, + MacroScenario, + SectorElasticity, + getElasticity, + getNoiseSigma, +) + +if TYPE_CHECKING: + from dartlab.analysis.forecast.prediction import ContextSignals + from dartlab.core.finance.proforma import ProFormaResult + + +# ══════════════════════════════════════ +# Cholesky 분해 (순수 Python -- numpy 의존 없음) +# ══════════════════════════════════════ + + +def _choleskyDecompose(matrix: list[list[float]]) -> list[list[float]]: + """상관행렬 → 하삼각 행렬 L (L × L^T = matrix).""" + n = len(matrix) + L = [[0.0] * n for _ in range(n)] + for i in range(n): + for j in range(i + 1): + s = sum(L[i][k] * L[j][k] for k in range(j)) + if i == j: + val = matrix[i][i] - s + L[i][j] = math.sqrt(max(val, 0.0)) + else: + L[i][j] = (matrix[i][j] - s) / L[j][j] if L[j][j] > 0 else 0.0 + return L + + +def _choleskyMultiply(L: list[list[float]], z: list[float]) -> list[float]: + """L × z = 상관된 난수 벡터.""" + n = len(z) + result = [0.0] * n + for i in range(n): + result[i] = sum(L[i][j] * z[j] for j in range(i + 1)) + return result + + +# ══════════════════════════════════════ +# 데이터 구조 +# ══════════════════════════════════════ + + +@dataclass +class ScenarioPriceTarget: + """단일 시나리오의 주가 목표.""" + + scenario_name: str + probability: float + proforma: ProFormaResult + enterprise_value: float + equity_value: float + per_share_value: float + wacc_used: float + terminal_growth: float + implied_per: float | None + + +@dataclass +class PriceTargetResult: + """확률 가중 주가 목표가 전체 결과.""" + + scenarios: list[ScenarioPriceTarget] + weighted_target: float + percentiles: dict[str, float] # p10, p25, p50, p75, p90 + expected_value: float + current_price: float | None + upside_pct: float | None + probability_above_current: float | None + signal: str # strong_buy / buy / hold / sell / strong_sell + confidence: str # high / medium / low + wacc_details: dict[str, float] + warnings: list[str] = field(default_factory=list) + + DISCLAIMER: str = "본 분석은 투자 참고용이며 투자 권유가 아닙니다." + + def __repr__(self) -> str: + lines = ["[확률 가중 주가 목표가]"] + lines.append(f" 가중 목표가: {self.weighted_target:,.0f}원") + if self.current_price: + lines.append(f" 현재가: {self.current_price:,.0f}원") + if self.upside_pct is not None: + lines.append(f" 업사이드: {self.upside_pct:+.1f}%") + if self.probability_above_current is not None: + lines.append(f" 현재가 상회 확률: {self.probability_above_current:.0f}%") + lines.append(f" 투자 신호: {self.signal}") + lines.append(f" 신뢰도: {self.confidence}") + lines.append("") + + # 시나리오별 + lines.append(" === 시나리오별 목표가 ===") + lines.append(" 시나리오 | 확률 | 목표가 | EV") + lines.append(" -----------------|-------|--------------|----------") + for s in self.scenarios: + lines.append( + f" {s.scenario_name:<16s} | {s.probability * 100:4.0f}% " + f"| {s.per_share_value:>12,.0f}원 | {s.enterprise_value / 1e8:>8,.0f}억" + ) + + # 분포 + lines.append("") + lines.append(" === Monte Carlo 분포 ===") + for k, v in sorted(self.percentiles.items()): + lines.append(f" {k}: {v:,.0f}원") + + if self.warnings: + lines.append("") + for w in self.warnings: + lines.append(f" ⚠ {w}") + lines.append(f"\n ※ {self.DISCLAIMER}") + return "\n".join(lines) + + +# ══════════════════════════════════════ +# 시나리오 확률 가중치 +# ══════════════════════════════════════ + +SCENARIO_PROBABILITIES: dict[str, float] = { + "baseline": 0.40, + "rate_hike": 0.20, + "china_slowdown": 0.15, + "semiconductor_down": 0.15, + "adverse": 0.10, +} + + +# ══════════════════════════════════════ +# 핵심 함수 +# ══════════════════════════════════════ + + +def _derive_revenue_path_from_macro( + base_growth: float, + scenario: MacroScenario, + elasticity: SectorElasticity, + years: int = 5, +) -> list[float]: + """매크로 시나리오 + 섹터 감응도 → 5년 매출 성장률 경로. + + baseline GDP 대비 시나리오 GDP 차이에 β를 곱해 성장률 조정. + """ + baseline_gdp = PRESET_SCENARIOS["baseline"].gdpGrowth + path = [] + for i in range(years): + gdp_idx = min(i, len(scenario.gdpGrowth) - 1) + base_gdp_idx = min(i, len(baseline_gdp) - 1) + + gdp_delta = scenario.gdpGrowth[gdp_idx] - baseline_gdp[base_gdp_idx] + fx_delta = 0.0 + if i < len(scenario.krwUsd) and scenario.krwUsd[0] > 0: + fx_pct = (scenario.krwUsd[min(i, len(scenario.krwUsd) - 1)] / scenario.krwUsd[0] - 1) * 100 + fx_delta = fx_pct / 10 * elasticity.revenueToFx # 환율 10% 변화당 + + growth = base_growth + gdp_delta * elasticity.revenueToGdp + fx_delta + # mean reversion: 극단 시나리오는 3~5년차에 baseline 복귀 경향 + if i >= 2: + growth = growth * 0.7 + base_growth * 0.3 + if i >= 4: + growth = growth * 0.5 + base_growth * 0.5 + + path.append(round(growth, 2)) + return path + + +def _dcf_from_proforma( + proforma: ProFormaResult, + wacc: float, + terminal_growth: float = 2.0, + shares: int | None = None, +) -> tuple[float, float, float]: + """Pro-forma FCF → Terminal Value → EV → Equity → 주당가치. + + Returns: + (enterprise_value, equity_value, per_share_value) + """ + if not proforma.projections: + return 0.0, 0.0, 0.0 + + discount_rate = wacc / 100 + tg = terminal_growth / 100 + + # FCF 현가 합 + pv_fcf = 0.0 + for p in proforma.projections: + df = (1 + discount_rate) ** p.year_offset + pv_fcf += p.fcf / df + + last = proforma.projections[-1] + last_year = last.year_offset + + # FCF 양수면 Gordon Growth, 음수면 EBITDA exit multiple fallback + if last.fcf > 0: + if discount_rate > tg: + terminal_value = last.fcf * (1 + tg) / (discount_rate - tg) + else: + terminal_value = last.fcf * 20 + pv_tv = terminal_value / ((1 + discount_rate) ** last_year) + enterprise_value = pv_fcf + pv_tv + else: + # CAPEX 집약 산업 — EBITDA exit multiple로 전체 EV 대체 + # FCF PV가 음수인 경우 누적 FCF를 사용하면 EBITDA TV를 압도하므로, + # EBITDA 기반 순수 EV 계산으로 전환 + ebitda = last.ebitda if last.ebitda > 0 else last.operating_income + if ebitda > 0: + exit_multiple = max(6.0, min(1 / discount_rate, 15.0)) + terminal_value = ebitda * exit_multiple + pv_tv = terminal_value / ((1 + discount_rate) ** last_year) + enterprise_value = pv_tv # FCF PV 무시, EBITDA exit만 사용 + elif last.revenue > 0: + # EBITDA 음수 기업 — EV/Sales fallback (보수적) + # 적자 지속 → 낮은 배수. 시나리오별 매출 차이는 반영 + sales_multiple = max(0.2, min(1 / discount_rate * 0.08, 0.8)) + terminal_value = last.revenue * sales_multiple + pv_tv = terminal_value / ((1 + discount_rate) ** last_year) + enterprise_value = pv_tv + else: + enterprise_value = 0 + + # EV → Equity + total_debt = proforma.base_year.get("total_debt", 0) + cash = proforma.base_year.get("cash", 0) + equity_value = enterprise_value - total_debt + cash + + # 주당가치 + if shares and shares > 0: + per_share = equity_value / shares + else: + per_share = equity_value # 주식수 없으면 총 equity 반환 + + return enterprise_value, equity_value, max(per_share, 0) + + +def _monte_carlo_price_distribution( + series: dict, + base_growth: float, + elasticity: SectorElasticity, + wacc: float, + terminal_growth: float, + shares: int | None, + iterations: int = 5000, + seed: int | None = None, + size_class: str = "Mid", +) -> tuple[dict[str, float], float | None, list[float]]: + """v2 Multi-Noise MC — 5변수 동시 noise + NWC + sizeClass σ. + + 5개 변수: growth, margin, wacc, capex, tax + sizeClass별 σ 차등: Small 1.5x, Mid 1.0x, Large 0.8x + + Returns: + (percentiles, probability_above_current, all_values) + """ + if seed is not None: + random.seed(seed) + + ratios = extract_historical_ratios(series) + base = { + "revenue": getTTM(series, "IS", "sales") or 0, + "cash": getLatest(series, "BS", "cash_and_cash_equivalents") or 0, + "total_debt": ( + (getLatest(series, "BS", "shortterm_borrowings") or 0) + + (getLatest(series, "BS", "longterm_borrowings") or 0) + + (getLatest(series, "BS", "debentures") or 0) + ), + } + if base["revenue"] <= 0: + return {}, None, [] + + discount_rate = wacc / 100 + tg = terminal_growth / 100 + values: list[float] = [] + + # v3: sizeClass별 sigma + Cholesky 상관 + sigma_growth = getNoiseSigma("growth", size_class) + sigma_margin = getNoiseSigma("margin", size_class) + sigma_wacc = getNoiseSigma("wacc", size_class) + sigma_capex = getNoiseSigma("capex", size_class) + sigma_tax = getNoiseSigma("tax", size_class) + sigmas = [sigma_growth, sigma_margin, sigma_wacc, sigma_capex, sigma_tax] + + # 상관행렬: growth-margin 동조, wacc-growth 역상관 + # [growth, margin, wacc, capex, tax] + corr = [ + [1.0, 0.4, -0.3, 0.2, 0.0], # growth + [0.4, 1.0, -0.2, 0.0, 0.0], # margin (경기 좋으면 마진도 개선) + [-0.3, -0.2, 1.0, 0.0, 0.1], # wacc (금리 상승 시 성장 둔화) + [0.2, 0.0, 0.0, 1.0, 0.0], # capex + [0.0, 0.0, 0.1, 0.0, 1.0], # tax + ] + chol = _choleskyDecompose(corr) + + for _ in range(iterations): + # v3: Cholesky 기반 상관 noise + z = [random.gauss(0, 1) for _ in range(5)] + correlated = _choleskyMultiply(chol, z) + growth_noise = correlated[0] * sigmas[0] + margin_noise = correlated[1] * sigmas[1] + wacc_noise = correlated[2] * sigmas[2] + capex_noise = correlated[3] * sigmas[3] + tax_noise = correlated[4] * sigmas[4] + + noisy_growth = base_growth + growth_noise + noisy_margin = max(5.0, min(ratios.gross_margin + margin_noise, 80.0)) + noisy_wacc = max(0.03, min(discount_rate + wacc_noise / 100, 0.25)) + noisy_capex = max(0.5, ratios.capex_to_revenue + capex_noise) + noisy_tax = max(5.0, min(ratios.effective_tax_rate + tax_noise, 50.0)) + + # v2: IS + NWC 반영 5년 DCF + rev = base["revenue"] + fcf_pv = 0.0 + last_fcf = 0.0 + last_ebitda = 0.0 + prev_nwc = ( + rev * ratios.receivables_to_revenue / 100 + + rev * ratios.inventory_to_revenue / 100 + - rev * ratios.payables_to_revenue / 100 + ) + for yr in range(1, 6): + rev = rev * (1 + noisy_growth / 100) + gross = rev * noisy_margin / 100 + dep = rev * ratios.depreciation_ratio / 100 + # v3: IS 구조 분기 — D&A가 SGA에 포함된 경우 별도 차감하지 않음 + if ratios.dep_in_sga: + op_income = gross - rev * ratios.sga_ratio / 100 + else: + op_income = gross - rev * ratios.sga_ratio / 100 - dep + ebitda = op_income + dep + ebt = max(op_income, 0) + ni = ebt * (1 - noisy_tax / 100) + capex = rev * noisy_capex / 100 + # v2: NWC 변동 반영 + nwc = ( + rev * ratios.receivables_to_revenue / 100 + + rev * ratios.inventory_to_revenue / 100 + - rev * ratios.payables_to_revenue / 100 + ) + delta_nwc = nwc - prev_nwc + fcf = ni + dep - delta_nwc - capex + df = (1 + noisy_wacc) ** yr + fcf_pv += fcf / df + last_fcf = fcf + last_ebitda = ebitda + prev_nwc = nwc + + # Terminal Value — FCF 음수 시 EBITDA exit multiple fallback + if last_fcf > 0: + if noisy_wacc > tg: + tv = last_fcf * (1 + tg) / (noisy_wacc - tg) + else: + tv = last_fcf * 20 + elif last_ebitda > 0: + exit_mult = max(6.0, min(1 / noisy_wacc, 15.0)) + tv = last_ebitda * exit_mult + elif rev > 0: + # EBITDA 음수 — EV/Sales fallback (보수적) + sales_mult = max(0.2, min(1 / noisy_wacc * 0.08, 0.8)) + tv = rev * sales_mult + else: + tv = 0 + pv_tv = tv / ((1 + noisy_wacc) ** 5) + + ev = fcf_pv + pv_tv + eq = ev - base["total_debt"] + base["cash"] + per_share = eq / shares if shares and shares > 0 else eq + values.append(max(per_share, 0)) + + if not values: + return {}, None, [] + + values.sort() + n = len(values) + + def pct(p: float) -> float: + idx = int(n * p) + return values[min(idx, n - 1)] + + percentiles = { + "p10": pct(0.10), + "p25": pct(0.25), + "p50": pct(0.50), + "p75": pct(0.75), + "p90": pct(0.90), + } + + return percentiles, None, values + + +def _classify_signal( + upside_pct: float | None, + percentiles: dict[str, float], + current_price: float | None, +) -> str: + """upside와 분포로 투자 신호 판정.""" + if upside_pct is None: + return "hold" + + p10 = percentiles.get("p10", 0) + p90 = percentiles.get("p90", float("inf")) + + if upside_pct > 30 and current_price and p10 > current_price: + return "strong_buy" + if upside_pct > 15: + return "buy" + if upside_pct < -30 and current_price and p90 < current_price: + return "strong_sell" + if upside_pct < -15: + return "sell" + return "hold" + + +def compute_price_target( + series: dict, + sector_key: str | None = None, + current_price: float | None = None, + shares: int | None = None, + market_cap: float | None = None, + terminal_growth: float = 2.0, + mc_iterations: int = 5000, + mc_seed: int | None = None, + scenario_probabilities: dict[str, float] | None = None, + context_signals: ContextSignals | None = None, +) -> PriceTargetResult: + """메인 — 5시나리오 × pro-forma → DCF → Monte Carlo → 확률 분포 → signal. + + v2: context_signals가 있으면 확률 동적 재가중 + sizeClass별 MC σ 차등. + + Args: + series: 시계열 dict (unwrap 완료). + sector_key: 업종 키 (SectorElasticity 조회). + current_price: 현재 주가 (원). + shares: 발행주식수. + market_cap: 시가총액 (WACC의 equity weight 계산용). + terminal_growth: 영구성장률 (%). + mc_iterations: Monte Carlo 반복 횟수. + mc_seed: 난수 시드 (재현용). + scenario_probabilities: 시나리오별 확률 오버라이드. + context_signals: v2 맥락 신호 (확률 재가중 + MC σ 차등). + """ + warnings: list[str] = [] + probs = scenario_probabilities or dict(SCENARIO_PROBABILITIES) + elasticity = getElasticity(sector_key) + + # v2: context_signals가 있으면 확률 재가중 + size_class = "Mid" + if context_signals: + size_class = context_signals.sizeClass + probs = adjustProbabilities(probs, context_signals) + if context_signals.reasoning: + warnings.append(f"맥락 기반 확률 재가중 ({len(context_signals.reasoning)}개 규칙 적용)") + + # WACC 계산 — v2: 업종 β 활용 + wacc, wacc_details = compute_company_wacc( + series, + sector_elasticity=elasticity, + market_cap=market_cap, + ) + + # 기준 매출 성장률 (3년 CAGR) + from dartlab.core.finance.extract import getRevenueGrowth3Y + + base_growth = getRevenueGrowth3Y(series) + if base_growth is None: + base_growth = 3.0 + warnings.append("매출 성장률 데이터 부족 — 기본값 3% 사용") + + # 반도체 아닌 업종은 semiconductor_down 확률을 baseline에 재배분 + if sector_key and sector_key != "반도체" and "semiconductor_down" in probs: + semi_prob = probs.pop("semiconductor_down") + probs["baseline"] = probs.get("baseline", 0.4) + semi_prob + + # 확률 정규화 + total_prob = sum(probs.values()) + if total_prob > 0 and abs(total_prob - 1.0) > 0.001: + probs = {k: v / total_prob for k, v in probs.items()} + + # 시나리오별 pro-forma + DCF + scenario_targets: list[ScenarioPriceTarget] = [] + for name, prob in probs.items(): + scenario = PRESET_SCENARIOS.get(name) + if not scenario: + warnings.append(f"미지원 시나리오: {name}") + continue + + rev_path = _derive_revenue_path_from_macro(base_growth, scenario, elasticity) + pf = build_proforma( + series, + revenue_growth_path=rev_path, + sector_elasticity=elasticity, + market_cap=market_cap, + scenario_name=scenario.label, + ) + ev, eq, per_share = _dcf_from_proforma(pf, wacc, terminal_growth, shares) + + # Implied P/E + last_ni = pf.projections[-1].net_income if pf.projections else 0 + implied_per = eq / last_ni if last_ni and last_ni > 0 else None + + scenario_targets.append( + ScenarioPriceTarget( + scenario_name=name, + probability=prob, + proforma=pf, + enterprise_value=ev, + equity_value=eq, + per_share_value=per_share, + wacc_used=wacc, + terminal_growth=terminal_growth, + implied_per=implied_per, + ) + ) + + # 가중 목표가 + weighted_target = sum(s.per_share_value * s.probability for s in scenario_targets) + + # Monte Carlo 분포 + percentiles, _, mc_values = _monte_carlo_price_distribution( + series, + base_growth, + elasticity, + wacc, + terminal_growth, + shares, + mc_iterations, + mc_seed, + size_class=size_class, + ) + + # upside + upside_pct: float | None = None + prob_above: float | None = None + if current_price and current_price > 0: + upside_pct = (weighted_target / current_price - 1) * 100 + if mc_values: + above_count = sum(1 for v in mc_values if v > current_price) + prob_above = above_count / len(mc_values) * 100 + + # expected value (MC 평균) + expected_value = sum(mc_values) / len(mc_values) if mc_values else weighted_target + + # signal + signal = _classify_signal(upside_pct, percentiles, current_price) + + # 신뢰도 + ratios = extract_historical_ratios(series) + confidence = ratios.confidence + + return PriceTargetResult( + scenarios=scenario_targets, + weighted_target=weighted_target, + percentiles=percentiles, + expected_value=expected_value, + current_price=current_price, + upside_pct=upside_pct, + probability_above_current=prob_above, + signal=signal, + confidence=confidence, + wacc_details=wacc_details, + warnings=warnings, + ) diff --git a/src/dartlab/analysis/valuation/residualIncome.py b/src/dartlab/analysis/valuation/residualIncome.py new file mode 100644 index 0000000000000000000000000000000000000000..8e33e3838e5ebf48aba46a00774f9cbc25031b45 --- /dev/null +++ b/src/dartlab/analysis/valuation/residualIncome.py @@ -0,0 +1,264 @@ +"""잔여이익모델(RIM) 엔진 -- BPS + 초과이익 현가 = 내재가치. + +공식: + RI_t = NI_t - (Equity_{t-1} * CoE) + Intrinsic = BPS + sum(RI_t / (1+CoE)^t) + TV + +CoE 추정: CAPM proxy (무위험이자율 + 프리미엄) 또는 배당수익률 기반. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Optional + +from dartlab.core.finance.extract import getAnnualValues, getLatest + + +@dataclass +class RIMResult: + """잔여이익모델 결과.""" + + bps: float + coe: float # Cost of Equity (%) + riHistory: list[dict] # [{period, ni, equity, ri}, ...] + intrinsicValue: Optional[float] # 주당 내재가치 + upside: Optional[float] # 현재가 대비 (%) + terminalValue: Optional[float] + warnings: list[str] = field(default_factory=list) + currency: str = "KRW" + + DISCLAIMER: str = "본 분석은 투자 참고용이며 투자 권유가 아닙니다." + + def __repr__(self) -> str: + lines = ["[RIM 밸류에이션]"] + lines.append(f" BPS: {self.bps:,.0f}") + lines.append(f" 자기자본비용(CoE): {self.coe:.1f}%") + if self.intrinsicValue is not None: + lines.append(f" 주당 내재가치: {self.intrinsicValue:,.0f}") + if self.upside is not None: + lines.append(f" 업사이드: {self.upside:+.1f}%") + if self.warnings: + for w in self.warnings: + lines.append(f" -- {w}") + lines.append(f" * {self.DISCLAIMER}") + return "\n".join(lines) + + +def calcResidualIncome( + series: dict, + shares: Optional[int] = None, + currentPrice: Optional[float] = None, + coe: Optional[float] = None, + currency: str = "KRW", + beta: Optional[float] = None, +) -> RIMResult | None: + """잔여이익모델 -- 자기자본 대비 초과이익의 현재가치. + + Args: + series: 시계열 dict (finance.timeseries). + shares: 발행주식수. + currentPrice: 현재 주가. + coe: 자기자본비용 (%). None이면 자동 추정. + currency: 통화. + + Returns: + RIMResult 또는 데이터 부족 시 None. + """ + warnings: list[str] = [] + + # 연간 순이익 + niVals = getAnnualValues(series, "IS", "net_profit") + if not niVals: + niVals = getAnnualValues(series, "IS", "net_income") + if not niVals or len(niVals) < 2: + return None + + # 연간 자기자본 + eqVals = getAnnualValues(series, "BS", "total_stockholders_equity") + if not eqVals: + eqVals = getAnnualValues(series, "BS", "owners_of_parent_equity") + if not eqVals or len(eqVals) < 2: + return None + + # CoE 추정 + if coe is None: + coe = _estimateCoe(series, warnings, currency=currency, beta=beta) + + coeDecimal = coe / 100 + + # 최근 BPS + latestEquity = getLatest(series, "BS", "total_stockholders_equity") + if latestEquity is None: + latestEquity = getLatest(series, "BS", "owners_of_parent_equity") + if latestEquity is None or latestEquity <= 0: + return None + + bps = latestEquity / shares if shares and shares > 0 else 0 + + # RI 시계열 (최소 2년 필요: equity_{t-1} 사용) + n = min(len(niVals), len(eqVals)) + riHistory: list[dict] = [] + riValues: list[float] = [] + + for i in range(1, n): + ni = niVals[i] + prevEq = eqVals[i - 1] + if ni is None or prevEq is None or prevEq <= 0: + continue + ri = ni - prevEq * coeDecimal + riHistory.append( + { + "index": i, + "ni": ni, + "equity": prevEq, + "ri": ri, + } + ) + riValues.append(ri) + + if len(riValues) < 1: + warnings.append("잔여이익 계산 불가 (데이터 부족)") + return RIMResult( + bps=bps, + coe=coe, + riHistory=riHistory, + intrinsicValue=None, + upside=None, + terminalValue=None, + warnings=warnings, + currency=currency, + ) + + # 평균 RI -> 영구가치 추정 + sum(riValues) / len(riValues) + + # fade factor(omega) 동적 추정 -- ROE-CoE spread 지속성 + omega = _estimateOmega(niVals, eqVals, coeDecimal, warnings) + + # Terminal Value: RI_T * omega / (1 + CoE - omega) (CFA 표준) + denominator = 1 + coeDecimal - omega + if denominator > 0 and coeDecimal > 0: + lastRi = riValues[-1] + tv = lastRi * omega / denominator + else: + tv = 0 + + # PV of RI history + pvRi = 0.0 + for idx, ri in enumerate(riValues, 1): + pvRi += ri / (1 + coeDecimal) ** idx + + # PV of TV + pvTv = tv / (1 + coeDecimal) ** len(riValues) if len(riValues) > 0 else 0 + + # 내재 자기자본 = 현재 자기자본 + PV(RI) + PV(TV) + intrinsicEquity = latestEquity + pvRi + pvTv + + intrinsicPerShare = None + upside = None + if shares and shares > 0: + intrinsicPerShare = round(intrinsicEquity / shares, 0) + if currentPrice and currentPrice > 0: + upside = round((intrinsicPerShare - currentPrice) / currentPrice * 100, 1) + + if len(riValues) < 3: + warnings.append("RI 데이터 3년 미만 -- 결과 신뢰도 낮음") + + return RIMResult( + bps=round(bps, 0), + coe=coe, + riHistory=riHistory, + intrinsicValue=intrinsicPerShare, + upside=upside, + terminalValue=round(tv, 0) if tv else None, + warnings=warnings, + currency=currency, + ) + + +def _estimateOmega( + niVals: list, + eqVals: list, + coeDecimal: float, + warnings: list[str], +) -> float: + """초과이익 지속성(omega) 동적 추정. + + ROE-CoE spread가 안정적이면 omega 높음 (경쟁우위 지속), + 급감 추세면 omega 낮음 (평균 회귀). + 범위: 0.2 ~ 0.8 + """ + n = min(len(niVals), len(eqVals)) + if n < 3: + return 0.5 # 데이터 부족 시 기본값 + + # ROE spread = ROE - CoE (각 연도) + spreads: list[float] = [] + for i in range(n): + ni = niVals[i] + eq = eqVals[i] + if ni is not None and eq is not None and eq > 0: + roe = ni / eq + spreads.append(roe - coeDecimal) + + if len(spreads) < 2: + return 0.5 + + # spread의 자기상관(AR1)으로 persistence 추정 + # AR1 계수 = Cov(x_t, x_{t-1}) / Var(x_{t-1}) + mean = sum(spreads) / len(spreads) + demeaned = [s - mean for s in spreads] + + cov01 = sum(demeaned[i] * demeaned[i - 1] for i in range(1, len(demeaned))) + var0 = sum(d * d for d in demeaned[:-1]) + + if var0 > 0: + ar1 = cov01 / var0 + else: + ar1 = 0.5 + + # AR1을 omega 범위로 매핑 (0.2 ~ 0.8) + omega = max(0.2, min(ar1, 0.8)) + + # spread가 전부 음수면 (CoE > ROE) → 초과이익 없음, omega 낮춤 + if all(s < 0 for s in spreads): + omega = min(omega, 0.3) + warnings.append("ROE < CoE -- 초과이익 없음, omega 하향") + + return round(omega, 2) + + +def _estimateCoe( + series: dict, + warnings: list[str], + currency: str = "KRW", + beta: float | None = None, +) -> float: + """자기자본비용 추정 -- CAPM (MarketParams 기반). + + CoE = Rf + beta * (ERP + CRP) + beta 미지정 시 1.0 기본, ROE 기반 가감. + """ + from dartlab.core.sector.types import getMarketParams + + mkt = getMarketParams(currency) + b = beta if beta is not None else 1.0 + baseCoe = mkt.ke(b) + + # ROE 기반 미세조정 (beta 미지정 시에만) + if beta is None: + niVals = getAnnualValues(series, "IS", "net_profit") + eqVals = getAnnualValues(series, "BS", "total_stockholders_equity") + if niVals and eqVals and len(niVals) >= 2 and len(eqVals) >= 2: + recentNi = niVals[-1] + recentEq = eqVals[-1] + if recentNi is not None and recentEq and recentEq > 0: + roe = recentNi / recentEq * 100 + if roe < 5: + baseCoe = min(baseCoe + 2.0, 15.0) + warnings.append("ROE 낮음 -- CoE 상향 조정") + elif roe > 20: + baseCoe = max(baseCoe - 1.0, 7.0) + + return baseCoe diff --git a/src/dartlab/analysis/valuation/synthesizer.py b/src/dartlab/analysis/valuation/synthesizer.py new file mode 100644 index 0000000000000000000000000000000000000000..f3bed62a2447ae95d8f0394e591d940e241cfb08 --- /dev/null +++ b/src/dartlab/analysis/valuation/synthesizer.py @@ -0,0 +1,306 @@ +"""Analyst Synthesizer — 복수 밸류에이션 가중평균 합성. + +DCF + 컨센서스 + 피어 멀티플 + 상대가치 → 가중평균 목표가. +데이터 미가용 시 가용 방법 간 비례 재배분. +""" + +from __future__ import annotations + +import logging +from datetime import datetime, timezone + +from dartlab.gather.types import MarketSnapshot + +from .types import AnalystReport, ValuationMethod, _classify_opinion + +log = logging.getLogger(__name__) + +# 기본 가중치 (모든 데이터 가용 시) +DEFAULT_WEIGHTS: dict[str, float] = { + "dcf": 0.30, + "consensus": 0.35, + "peer_multiple": 0.20, + "relative": 0.15, +} + + +def synthesize( + *, + dcf_target: float | None = None, + dcf_confidence: float = 0.5, + market: MarketSnapshot | None = None, + company_financials: dict | None = None, + shares: int = 0, + current_price: float = 0.0, + company_name: str = "", + stock_code: str = "", + custom_weights: dict[str, float] | None = None, +) -> AnalystReport: + """복수 밸류에이션 → 가중평균 목표가 + 종합 의견. + + Args: + dcf_target: DCF 가중 목표가 (engines/common/finance/pricetarget). + dcf_confidence: DCF 신뢰도 (0~1). + market: MarketSnapshot (engines/market/collector). + company_financials: {"eps": float, "bps": float, "ebitda": float, ...} + shares: 발행주식수. + current_price: 현재 주가. + company_name: 회사명. + stock_code: 종목코드. + custom_weights: 사용자 정의 가중치 (기본값 덮어쓰기). + + Returns: + AnalystReport. + """ + methods: list[ValuationMethod] = [] + reasoning: list[str] = [] + warnings: list[str] = [] + weights = dict(custom_weights or DEFAULT_WEIGHTS) + + # ── 1. DCF 밸류에이션 ── + if dcf_target and dcf_target > 0: + methods.append( + ValuationMethod( + name="dcf", + value=dcf_target, + weight=weights.get("dcf", 0.30), + confidence=dcf_confidence, + reasoning="자체 DCF 엔진 (MC 시뮬레이션 + 시나리오 가중)", + ) + ) + else: + # DCF 미가용 → 가중치 재배분 + removed = weights.pop("dcf", 0) + if removed > 0: + _redistribute(weights, removed) + warnings.append("DCF 결과 미가용 — 가중치 재배분") + + # ── 2. 컨센서스 밸류에이션 ── + consensus_target = _extract_consensus(market, weights, methods, reasoning, warnings) + + # ── 3. 피어 멀티플 밸류에이션 ── + _extract_peer_multiple(market, company_financials, shares, weights, methods, reasoning, warnings) + + # ── 4. 상대가치 (역사적 밴드) ── + _extract_relative(market, company_financials, weights, methods, reasoning, warnings) + + # ── 가중평균 계산 ── + if not methods: + warnings.append("사용 가능한 밸류에이션 방법이 없습니다") + return AnalystReport( + stock_code=stock_code, + company_name=company_name, + current_price=current_price, + warnings=warnings, + generated_at=datetime.now(timezone.utc).isoformat(), + ) + + # 가중치 정규화 + total_weight = sum(m.weight for m in methods) + if total_weight > 0: + for m in methods: + m.weight = m.weight / total_weight + + # 가중평균 목표가 + target_price = sum(m.value * m.weight for m in methods) + + # 종합 신뢰도 = 가중평균 신뢰도 + confidence = sum(m.confidence * m.weight for m in methods) + + # 업사이드 + upside = 0.0 + if current_price > 0: + upside = (target_price - current_price) / current_price + + # 투자의견 + opinion = _classify_opinion(upside) + + # DCF-컨센서스 괴리 체크 + if dcf_target and consensus_target and dcf_target > 0 and consensus_target > 0: + gap = abs(dcf_target - consensus_target) / max(dcf_target, consensus_target) + if gap > 0.5: + reasoning.append( + f"DCF({dcf_target:,.0f})와 컨센서스({consensus_target:,.0f}) 괴리 {gap:.0%} — DCF 가중치 하향 적용" + ) + # DCF 가중치 ×0.7 재조정 + for m in methods: + if m.name == "dcf": + m.weight *= 0.7 + # 재정규화 + total_weight = sum(m.weight for m in methods) + if total_weight > 0: + for m in methods: + m.weight /= total_weight + target_price = sum(m.value * m.weight for m in methods) + if current_price > 0: + upside = (target_price - current_price) / current_price + opinion = _classify_opinion(upside) + + # 판단 근거 생성 + reasoning.append( + f"종합 목표가 {target_price:,.0f}원 = {' + '.join(f'{m.name}({m.value:,.0f}×{m.weight:.0%})' for m in methods)}" + ) + + return AnalystReport( + stock_code=stock_code, + company_name=company_name, + target_price=target_price, + current_price=current_price, + upside=upside, + opinion=opinion, + methods=methods, + confidence=confidence, + reasoning=reasoning, + warnings=warnings, + generated_at=datetime.now(timezone.utc).isoformat(), + ) + + +# ══════════════════════════════════════ +# 내부 헬퍼 +# ══════════════════════════════════════ + + +def _redistribute(weights: dict[str, float], removed: float) -> None: + """제거된 가중치를 나머지에 비례 배분.""" + total = sum(weights.values()) + if total <= 0: + return + for k in weights: + weights[k] += removed * (weights[k] / total) + + +def _extract_consensus( + market: MarketSnapshot | None, + weights: dict[str, float], + methods: list[ValuationMethod], + reasoning: list[str], + warnings: list[str], +) -> float | None: + """컨센서스 목표가 추출.""" + if not market or not market.consensus: + removed = weights.pop("consensus", 0) + if removed > 0: + _redistribute(weights, removed) + warnings.append("컨센서스 데이터 미가용 — 가중치 재배분") + return None + + c = market.consensus + confidence = 0.7 # 기본 신뢰도 + + # 애널리스트 수에 따른 신뢰도 조정 + if c.analyst_count >= 10: + confidence = 0.85 + elif c.analyst_count >= 5: + confidence = 0.75 + elif c.analyst_count < 3: + confidence = 0.5 + # 소수 애널리스트 → 가중치 ×0.5 + w = weights.get("consensus", 0) + weights["consensus"] = w * 0.5 + removed = w * 0.5 + _redistribute({k: v for k, v in weights.items() if k != "consensus"}, removed) + warnings.append(f"애널리스트 {c.analyst_count}명 — 컨센서스 신뢰도 낮음") + + methods.append( + ValuationMethod( + name="consensus", + value=c.target_price, + weight=weights.get("consensus", 0.35), + confidence=confidence, + reasoning=f"시장 컨센서스 (애널리스트 {c.analyst_count}명, 매수비율 {c.buy_ratio:.0%})", + ) + ) + reasoning.append(f"컨센서스 목표가 {c.target_price:,.0f}원 (범위: {c.low:,.0f}~{c.high:,.0f}, {c.analyst_count}명)") + return c.target_price + + +def _extract_peer_multiple( + market: MarketSnapshot | None, + financials: dict | None, + shares: int, + weights: dict[str, float], + methods: list[ValuationMethod], + reasoning: list[str], + warnings: list[str], +) -> None: + """피어 멀티플 → 상대가치 목표가.""" + # MVP에서는 업종 PER만 사용 (Phase 2에서 글로벌 피어 추가) + if not market or not financials or shares <= 0: + removed = weights.pop("peer_multiple", 0) + if removed > 0: + _redistribute(weights, removed) + return + + sector_per = market.multiples.get("sector_per") + eps = financials.get("eps") + + if not sector_per or not eps or eps <= 0: + removed = weights.pop("peer_multiple", 0) + if removed > 0: + _redistribute(weights, removed) + warnings.append("피어 멀티플 계산 불가 (업종 PER 또는 EPS 없음)") + return + + peer_target = sector_per * eps + methods.append( + ValuationMethod( + name="peer_multiple", + value=peer_target, + weight=weights.get("peer_multiple", 0.20), + confidence=0.6, + reasoning=f"업종 PER({sector_per:.1f}) × EPS({eps:,.0f})", + ) + ) + reasoning.append(f"피어 멀티플 목표가 {peer_target:,.0f}원 (업종PER {sector_per:.1f}×EPS {eps:,.0f})") + + +def _extract_relative( + market: MarketSnapshot | None, + financials: dict | None, + weights: dict[str, float], + methods: list[ValuationMethod], + reasoning: list[str], + warnings: list[str], +) -> None: + """상대가치 — 52주 밴드 + PBR 역사 기반.""" + if not market or not market.price_range_52w: + removed = weights.pop("relative", 0) + if removed > 0: + _redistribute(weights, removed) + return + + low_52w, high_52w = market.price_range_52w + if low_52w <= 0 or high_52w <= 0: + removed = weights.pop("relative", 0) + if removed > 0: + _redistribute(weights, removed) + return + + # 52주 중간값을 상대가치 기준 + # PBR이 있으면 PBR 기반 보정 + midpoint = (low_52w + high_52w) / 2 + + bps = (financials or {}).get("bps") + current_pbr = market.multiples.get("pbr") + if bps and current_pbr and bps > 0: + # 적정 PBR = 현재 PBR의 ±20% 밴드 중간 + fair_pbr = current_pbr * 1.0 # 현재 PBR 유지 가정 + pbr_target = bps * fair_pbr + # 52주 중간값과 PBR 기반 평균 + relative_target = (midpoint + pbr_target) / 2 + reasoning_text = f"52주 중간({midpoint:,.0f}) + PBR({fair_pbr:.2f}×BPS {bps:,.0f}) 평균" + else: + relative_target = midpoint + reasoning_text = f"52주 범위 중간값 ({low_52w:,.0f}~{high_52w:,.0f})" + + methods.append( + ValuationMethod( + name="relative", + value=relative_target, + weight=weights.get("relative", 0.15), + confidence=0.5, + reasoning=reasoning_text, + ) + ) + reasoning.append(f"상대가치 목표가 {relative_target:,.0f}원 ({reasoning_text})") diff --git a/src/dartlab/analysis/valuation/types.py b/src/dartlab/analysis/valuation/types.py new file mode 100644 index 0000000000000000000000000000000000000000..43d171f79472ac2d8c4f1010899abffe291aca4c --- /dev/null +++ b/src/dartlab/analysis/valuation/types.py @@ -0,0 +1,96 @@ +"""Analyst 엔진 데이터 타입 — 종합 분석 결과.""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +from dartlab.core.finance.fmt import fmtPrice + + +@dataclass +class ValuationMethod: + """개별 밸류에이션 방법론 결과.""" + + name: str = "" # "dcf", "consensus", "peer_multiple", "relative" + value: float = 0.0 # 산출 목표가 + weight: float = 0.0 # 가중치 (0~1) + confidence: float = 0.0 # 신뢰도 (0~1) + reasoning: str = "" # 산출 근거 + currency: str = "KRW" + + def __repr__(self) -> str: + return f"{self.name}: {fmtPrice(self.value, self.currency)} (가중치={self.weight:.0%}, 신뢰도={self.confidence:.0%})" + + +# 투자 의견 매핑 +_OPINION_MAP = { + "strong_buy": "강력매수", + "buy": "매수", + "hold": "중립", + "sell": "매도", + "strong_sell": "강력매도", +} + + +def _classify_opinion(upside: float) -> str: + """업사이드 → 투자의견 분류. + + Args: + upside: (target - current) / current 비율. + + Returns: + "강력매수" | "매수" | "중립" | "매도" | "강력매도" + """ + if upside > 0.30: + return "강력매수" + if upside > 0.10: + return "매수" + if upside > -0.10: + return "중립" + if upside > -0.30: + return "매도" + return "강력매도" + + +@dataclass +class AnalystReport: + """종합 애널리스트 리포트.""" + + stock_code: str = "" + company_name: str = "" + target_price: float = 0.0 # 가중평균 목표가 + current_price: float = 0.0 + upside: float = 0.0 # (target - current) / current + opinion: str = "" # "강력매수" | "매수" | "중립" | "매도" | "강력매도" + methods: list[ValuationMethod] = field(default_factory=list) + confidence: float = 0.0 # 종합 신뢰도 (0~1) + reasoning: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + generated_at: str = "" + currency: str = "KRW" + + DISCLAIMER: str = "본 분석은 투자 참고용이며 투자 권유가 아닙니다." + + def __repr__(self) -> str: + lines = [f"[애널리스트 리포트 — {self.company_name or self.stock_code}]"] + lines.append(f" 종합 목표가: {fmtPrice(self.target_price, self.currency)}") + lines.append(f" 현재가: {fmtPrice(self.current_price, self.currency)}") + lines.append(f" 업사이드: {self.upside:+.1%}") + lines.append(f" 투자의견: {self.opinion}") + lines.append(f" 신뢰도: {self.confidence:.0%}") + lines.append("") + lines.append(" [밸류에이션 방법론]") + for m in self.methods: + lines.append(f" {m}") + if self.reasoning: + lines.append("") + lines.append(" [판단 근거]") + for r in self.reasoning: + lines.append(f" - {r}") + if self.warnings: + lines.append("") + lines.append(" [주의사항]") + for w in self.warnings: + lines.append(f" ⚠ {w}") + lines.append(f"\n {self.DISCLAIMER}") + return "\n".join(lines) diff --git a/src/dartlab/analysis/valuation/valuation.py b/src/dartlab/analysis/valuation/valuation.py new file mode 100644 index 0000000000000000000000000000000000000000..8b71d0dbc0de8be46e6d2963ff39baa7f1bc2a16 --- /dev/null +++ b/src/dartlab/analysis/valuation/valuation.py @@ -0,0 +1,3 @@ +"""하위호환 re-export -- 실제 구현은 core/finance/dcf.py.""" + +from dartlab.core.finance.dcf import * # noqa: F401,F403 diff --git a/src/dartlab/audit/__init__.py b/src/dartlab/audit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c1acd7c72b43aac47880c6fd4ea5fe158002b344 --- /dev/null +++ b/src/dartlab/audit/__init__.py @@ -0,0 +1,171 @@ +"""dartlab.audit — 분석 감사 엔진. + +전 기업 순차 분석 + 품질 DB 누적. + +사용법:: + + import dartlab + + # 특정 기업 감사 + dartlab.runAudit(["005930", "035420"]) + + # 전 기업 (getKindList 기반) + dartlab.runAudit() + + # 이어하기 (오늘 날짜 기준 미완료분만) + dartlab.runAudit(resume=True) + + # 결과 조회 + dartlab.queryAudit("005930") + dartlab.queryAudit(issues=True) + dartlab.queryAudit(coverage=True) +""" + +from __future__ import annotations + +from typing import Any + +import polars as pl + + +def runAudit( + codes: list[str] | None = None, + *, + resume: bool = False, + runDate: str = "", + onProgress: Any = None, +) -> list[dict[str, Any]]: + """분석 감사 실행. + + Capabilities: + - 종목코드 리스트 또는 전 기업 순차 분석 + - 15축 analysis + insights + valuation + forecast + ratios + review + - SQLite 메타 + parquet 결과 + JSON review 자동 저장 + - resume=True로 중단된 감사 이어하기 + + Requires: + 데이터: finance (자동 다운로드) + + AIContext: + 전 기업 분석 품질 측정 + 이슈 추적용. + 결과는 ~/.dartlab/data/audit/ 에 누적. + + Guide: + - "삼성전자만 감사" -> runAudit(["005930"]) + - "전 기업 감사" -> runAudit() + - "이어하기" -> runAudit(resume=True) + + SeeAlso: + - queryAudit: 감사 결과 조회 + - analysis: 단일 종목 분석 + + Args: + codes: 종목코드 리스트. None이면 전 기업. + resume: True면 오늘 날짜 기준 미완료분만. + runDate: 감사 날짜 (기본 오늘). + onProgress: 콜백 (stockCode, idx, total, result). + + Returns: + list[dict] — 기업별 감사 결과 요약. + + Example:: + + import dartlab + results = dartlab.runAudit(["005930"]) + print(results[0]["coverageRate"]) # 0.85 + """ + from dartlab.audit.runner import AuditRunner + from dartlab.audit.store import AuditStore + + store = AuditStore() + runner = AuditRunner(store) + try: + return runner.auditBatch( + codes, + resume=resume, + runDate=runDate, + onProgress=onProgress, + ) + finally: + store.close() + + +def queryAudit( + stockCode: str | None = None, + *, + axis: str | None = None, + issues: bool = False, + coverage: bool = False, + runDate: str | None = None, +) -> pl.DataFrame: + """감사 결과 조회. + + Capabilities: + - 종목별 최신 분석 결과 (parquet) + - 축별 크로스 기업 비교 + - 미해결 이슈 목록 + - 축별 coverage 통계 + + Requires: + 감사 실행 이력 (runAudit 이후). + + AIContext: + 엔진 품질 모니터링 + 개선 우선순위 결정. + + Guide: + - "삼성전자 감사 결과" -> queryAudit("005930") + - "수익구조 전체 커버리지" -> queryAudit(axis="수익구조") + - "미해결 이슈" -> queryAudit(issues=True) + - "축별 커버리지" -> queryAudit(coverage=True) + + SeeAlso: + - runAudit: 감사 실행 + + Args: + stockCode: 종목코드. + axis: 분석 축 이름. + issues: True면 미해결 이슈 반환. + coverage: True면 축별 coverage 통계 반환. + runDate: 날짜 필터. + + Returns: + pl.DataFrame — 조회 결과. + + Example:: + + import dartlab + df = dartlab.queryAudit("005930") + print(df) + """ + from dartlab.audit.store import AuditStore + + store = AuditStore() + try: + if coverage: + return store.coverageSummary(runDate) + + if issues: + return store.queryIssues(resolved=False) + + if stockCode: + df = store.queryParquet(stockCode, runDate) + if df is not None and axis: + return df.filter(pl.col("axis") == axis) + if df is not None: + return df + return store.queryRuns(stockCode=stockCode) + + if axis: + # 크로스 기업: 모든 parquet에서 해당 축 필터 + summary = store.coverageSummary(runDate) + if not summary.is_empty(): + return summary.filter(pl.col("axis") == axis) + return pl.DataFrame() + + # 기본: 전체 실행 기록 + return store.queryRuns(runDate=runDate) + finally: + store.close() + + +__all__ = ["runAudit", "queryAudit"] diff --git a/src/dartlab/audit/__pycache__/__init__.cpython-312.pyc b/src/dartlab/audit/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..74526fe7183926eb07ea960bfa9abad54c4a6d3a Binary files /dev/null and b/src/dartlab/audit/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/audit/__pycache__/__init__.cpython-313.pyc b/src/dartlab/audit/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d1ce4bdaee0a7f4db3536178b4d5a97ece1042fd Binary files /dev/null and b/src/dartlab/audit/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/dartlab/audit/issues.py b/src/dartlab/audit/issues.py new file mode 100644 index 0000000000000000000000000000000000000000..077f058fdfa77228e658bc612d836d58eae6d51b --- /dev/null +++ b/src/dartlab/audit/issues.py @@ -0,0 +1,73 @@ +"""이슈 자동 탐지 휴리스틱 — 감사 결과에서 문제 식별.""" + +from __future__ import annotations + +from typing import Any + + +def detectIssues( + rows: list[dict[str, Any]], + *, + coverageThreshold: float = 0.5, +) -> list[dict[str, Any]]: + """parquet row 리스트에서 이슈를 자동 탐지한다. + + Args: + rows: auditOne()이 생성한 row 리스트. + coverageThreshold: 이 비율 미만이면 coverage 이슈. + + Returns: + 이슈 dict 리스트. + """ + issues: list[dict[str, Any]] = [] + + if not rows: + return issues + + # ── 축별 coverage 계산 ── + axisCounts: dict[str, dict[str, int]] = {} + for row in rows: + axis = row.get("axis", "") + if not axis: + continue + if axis not in axisCounts: + axisCounts[axis] = {"total": 0, "ok": 0, "none": 0, "error": 0} + axisCounts[axis]["total"] += 1 + status = row.get("status", "") + if status in axisCounts[axis]: + axisCounts[axis][status] += 1 + + # ── 축별 coverage 이슈 ── + for axis, counts in axisCounts.items(): + total = counts["total"] + ok = counts["ok"] + if total == 0: + continue + rate = ok / total + if rate < coverageThreshold: + severity = "critical" if rate == 0 else "warning" + issues.append( + { + "category": "quality", + "severity": severity, + "axis": axis, + "blockKey": "", + "description": f"{axis} coverage {rate:.0%} ({ok}/{total})", + } + ) + + # ── 전체 error 비율 ── + totalRows = len(rows) + errorRows = sum(1 for r in rows if r.get("status") == "error") + if totalRows > 0 and errorRows / totalRows > 0.2: + issues.append( + { + "category": "calcError", + "severity": "critical", + "axis": "", + "blockKey": "", + "description": f"전체 error 비율 {errorRows}/{totalRows} ({errorRows / totalRows:.0%})", + } + ) + + return issues diff --git a/src/dartlab/audit/runner.py b/src/dartlab/audit/runner.py new file mode 100644 index 0000000000000000000000000000000000000000..ea9f17796eeb56ef76b8c848e405f793a009d277 --- /dev/null +++ b/src/dartlab/audit/runner.py @@ -0,0 +1,387 @@ +"""AuditRunner — 기업 순회 + 전체 분석 + 저장.""" + +from __future__ import annotations + +import gc +import logging +import time +from datetime import date +from typing import Any + +from dartlab.audit.serializer import serializeCalcResult +from dartlab.audit.store import AuditStore + +logger = logging.getLogger("dartlab.audit") + +# ── 14축 목록 (financial 그룹, analysis/__init__.py와 동기) ── +# 가치평가/매출전망은 별도 그룹(valuation/forecast)으로 아래에서 개별 호출 + +ALL_AXES: tuple[str, ...] = ( + "수익구조", + "자금조달", + "자산구조", + "현금흐름", + "수익성", + "성장성", + "안정성", + "효율성", + "종합평가", + "이익품질", + "비용구조", + "자본배분", + "투자효율", + "재무정합성", +) + +# ── 추가 분석 기능 (axis 외) ── + +_EXTRA_BLOCKS = ( + ("insights", "grades"), + ("insights", "anomalies"), + ("insights", "profile"), + ("insights", "summary"), + ("valuation", "composite"), + ("forecast", "predicted"), + ("ratios", "ratioTable"), +) + + +def _safeGetAttr(obj: Any, attr: str) -> Any: + """AttributeError 안전 접근.""" + try: + return getattr(obj, attr) + except (AttributeError, TypeError, ValueError, KeyError): + return None + + +class AuditRunner: + """단일/다수 기업 감사 실행기.""" + + def __init__(self, store: AuditStore | None = None): + self._store = store or AuditStore() + + def auditOne(self, stockCode: str, *, runDate: str = "") -> dict[str, Any]: + """단일 기업 전체 분석 + 저장.""" + from dartlab.company import Company + + if not runDate: + runDate = date.today().isoformat() + + t0 = time.time() + logger.info("[audit] %s 시작", stockCode) + + c = Company(stockCode) + corpName = getattr(c, "corpName", "") or "" + sector = "" + try: + sectorObj = getattr(c, "sector", None) + if sectorObj is not None: + sector = str(getattr(sectorObj, "sector", "") or "") + except (AttributeError, TypeError): + pass + + engineVersion = "" + try: + from dartlab import __version__ + + engineVersion = __version__ + except ImportError: + pass + + rows: list[dict[str, Any]] = [] + issues: list[dict[str, Any]] = [] + + # ── 1. 14축 financial analysis ── + for axis in ALL_AXES: + axisT0 = time.time() + try: + result = c.analysis("financial", axis) + except (TypeError, ValueError, KeyError, AttributeError, ArithmeticError) as e: + result = None + issues.append( + { + "category": "calcError", + "severity": "critical", + "axis": axis, + "blockKey": "", + "description": f"analysis('financial', '{axis}') 실행 실패: {e}", + } + ) + + if isinstance(result, dict): + for blockKey, val in result.items(): + dMs = int((time.time() - axisT0) * 1000) + ser = serializeCalcResult(blockKey, val) + rows.append( + { + "axis": axis, + "blockKey": ser["blockKey"], + "status": ser["status"], + "resultJson": ser["resultJson"], + "durationMs": dMs, + } + ) + if ser["status"] == "none": + issues.append( + { + "category": "dataMissing", + "severity": "warning", + "axis": axis, + "blockKey": blockKey, + "description": f"{axis}/{blockKey} 결과 None", + } + ) + elif result is None: + rows.append( + { + "axis": axis, + "blockKey": "", + "status": "error", + "resultJson": "null", + "durationMs": int((time.time() - axisT0) * 1000), + } + ) + + # ── 2. insights ── + try: + insights = c.insights + if insights is not None: + for attr in ("grades", "anomalies", "profile", "summary"): + val = _safeGetAttr(insights, attr) + ser = serializeCalcResult(attr, val) + rows.append( + { + "axis": "insights", + "blockKey": ser["blockKey"], + "status": ser["status"], + "resultJson": ser["resultJson"], + "durationMs": 0, + } + ) + else: + rows.append( + { + "axis": "insights", + "blockKey": "", + "status": "none", + "resultJson": "null", + "durationMs": 0, + } + ) + except (TypeError, ValueError, KeyError, AttributeError) as e: + issues.append( + { + "category": "calcError", + "severity": "warning", + "axis": "insights", + "blockKey": "", + "description": f"insights 실패: {e}", + } + ) + + # ── 3. valuation (analysis 가치평가 축) ── + try: + valResult = c.analysis("valuation", "가치평가") + if valResult is not None: + ser = serializeCalcResult("valuationResult", valResult) + rows.append( + { + "axis": "valuation", + "blockKey": "valuationResult", + "status": ser["status"], + "resultJson": ser["resultJson"], + "durationMs": 0, + } + ) + except (TypeError, ValueError, KeyError, AttributeError) as e: + issues.append( + { + "category": "calcError", + "severity": "warning", + "axis": "valuation", + "blockKey": "valuationResult", + "description": f"analysis('valuation', '가치평가') 실패: {e}", + } + ) + + # ── 4. forecast (analysis 매출전망 축) ── + try: + fcResult = c.analysis("forecast", "매출전망") + if fcResult is not None: + ser = serializeCalcResult("forecastResult", fcResult) + rows.append( + { + "axis": "forecast", + "blockKey": "forecastResult", + "status": ser["status"], + "resultJson": ser["resultJson"], + "durationMs": 0, + } + ) + except (TypeError, ValueError, KeyError, AttributeError) as e: + issues.append( + { + "category": "calcError", + "severity": "warning", + "axis": "forecast", + "blockKey": "forecastResult", + "description": f"analysis('forecast', '매출전망') 실패: {e}", + } + ) + + # ── 5. ratios ── + try: + ratios = c.show("ratios") + if ratios is not None: + ser = serializeCalcResult("ratioTable", ratios) + rows.append( + { + "axis": "ratios", + "blockKey": "ratioTable", + "status": ser["status"], + "resultJson": ser["resultJson"], + "durationMs": 0, + } + ) + except (TypeError, ValueError, KeyError, AttributeError) as e: + issues.append( + { + "category": "calcError", + "severity": "warning", + "axis": "ratios", + "blockKey": "ratioTable", + "description": f"ratios 실패: {e}", + } + ) + + # ── 6. review ── + reviewJson = "" + try: + review = c.review() + if review is not None: + reviewJson = review.toJson() + except (TypeError, ValueError, KeyError, AttributeError) as e: + issues.append( + { + "category": "calcError", + "severity": "warning", + "axis": "review", + "blockKey": "", + "description": f"review() 실패: {e}", + } + ) + + # ── 집계 ── + totalCalcs = len(rows) + okCalcs = sum(1 for r in rows if r.get("status") == "ok") + durationSec = time.time() - t0 + + # ── 저장 ── + self._store.saveParquet( + stockCode=stockCode, + corpName=corpName, + runDate=runDate, + rows=rows, + ) + if reviewJson: + self._store.saveReviewJson( + stockCode=stockCode, + runDate=runDate, + reviewJson=reviewJson, + ) + runId = self._store.saveRun( + stockCode=stockCode, + corpName=corpName, + sector=sector, + runDate=runDate, + engineVersion=engineVersion, + totalCalcs=totalCalcs, + okCalcs=okCalcs, + durationSec=durationSec, + ) + if issues: + self._store.saveIssues(runId, issues) + + logger.info( + "[audit] %s 완료 — %d/%d ok (%.1fs)", + stockCode, + okCalcs, + totalCalcs, + durationSec, + ) + + # ── 메모리 해제 (필수) ── + del c + gc.collect() + + return { + "stockCode": stockCode, + "corpName": corpName, + "runId": runId, + "totalCalcs": totalCalcs, + "okCalcs": okCalcs, + "coverageRate": okCalcs / totalCalcs if totalCalcs > 0 else 0.0, + "durationSec": round(durationSec, 1), + "issueCount": len(issues), + } + + def auditBatch( + self, + codes: list[str] | None = None, + *, + resume: bool = False, + runDate: str = "", + onProgress: Any = None, + ) -> list[dict[str, Any]]: + """다수 기업 순차 감사. + + Args: + codes: 종목코드 리스트. None이면 전 기업 (getKindList). + resume: True면 오늘 날짜 기준 미완료분만. + runDate: 감사 날짜 (기본 오늘). + onProgress: 콜백 (stockCode, idx, total, result) -> None. + """ + if not runDate: + runDate = date.today().isoformat() + + if codes is None: + from dartlab.gather.listing import getKindList + + kindDf = getKindList() + codes = kindDf["종목코드"].to_list() + + if resume: + done = self._store.completedCodes(runDate) + codes = [c for c in codes if c not in done] + logger.info("[audit] resume: %d개 미완료 기업", len(codes)) + + results = [] + total = len(codes) + for idx, stockCode in enumerate(codes): + try: + result = self.auditOne(stockCode, runDate=runDate) + results.append(result) + except (OSError, RuntimeError, ValueError) as e: + logger.error("[audit] %s 치명적 오류: %s", stockCode, e) + results.append( + { + "stockCode": stockCode, + "corpName": "", + "runId": -1, + "totalCalcs": 0, + "okCalcs": 0, + "coverageRate": 0.0, + "durationSec": 0.0, + "issueCount": 0, + "error": str(e), + } + ) + gc.collect() + + if onProgress: + try: + onProgress(stockCode, idx + 1, total, results[-1]) + except (TypeError, ValueError): + pass + + return results diff --git a/src/dartlab/audit/serializer.py b/src/dartlab/audit/serializer.py new file mode 100644 index 0000000000000000000000000000000000000000..29ec6c3beac64d4490677312cf1725305f8329b6 --- /dev/null +++ b/src/dartlab/audit/serializer.py @@ -0,0 +1,72 @@ +"""analysis dict -> JSON 변환 (DataFrame/NaN/None 안전 직렬화).""" + +from __future__ import annotations + +import json +import math +from typing import Any + +import polars as pl + + +def serializeValue(value: Any) -> Any: + """단일 값을 JSON-safe로 변환한다.""" + if value is None: + return None + if isinstance(value, float): + if math.isnan(value) or math.isinf(value): + return None + return value + if isinstance(value, (int, str, bool)): + return value + if isinstance(value, pl.DataFrame): + return _serializeDataFrame(value) + if isinstance(value, pl.Series): + return value.to_list() + if isinstance(value, dict): + return {str(k): serializeValue(v) for k, v in value.items()} + if isinstance(value, (list, tuple)): + return [serializeValue(v) for v in value] + # dataclass / namedtuple + if hasattr(value, "__dict__"): + return {str(k): serializeValue(v) for k, v in value.__dict__.items()} + return str(value) + + +def _serializeDataFrame(df: pl.DataFrame) -> list[dict[str, Any]]: + """Polars DataFrame -> list of dicts.""" + rows = df.to_dicts() + return [{k: serializeValue(v) for k, v in row.items()} for row in rows] + + +def toJsonStr(value: Any) -> str: + """값을 JSON 문자열로 직렬화한다.""" + safe = serializeValue(value) + return json.dumps(safe, ensure_ascii=False, default=str) + + +def serializeCalcResult(blockKey: str, result: Any) -> dict[str, Any]: + """단일 calc 결과를 parquet row용 dict로 변환한다. + + Returns: + {"blockKey": str, "status": "ok"|"none"|"error", "resultJson": str} + """ + if result is None: + return { + "blockKey": blockKey, + "status": "none", + "resultJson": "null", + } + try: + jsonStr = toJsonStr(result) + return { + "blockKey": blockKey, + "status": "ok", + "resultJson": jsonStr, + } + except (TypeError, ValueError, OverflowError) as e: + return { + "blockKey": blockKey, + "status": "error", + "resultJson": json.dumps({"error": str(e)}, ensure_ascii=False), + } diff --git a/src/dartlab/audit/store.py b/src/dartlab/audit/store.py new file mode 100644 index 0000000000000000000000000000000000000000..527c10e516efbcff1cba040f079d4aab3991f348 --- /dev/null +++ b/src/dartlab/audit/store.py @@ -0,0 +1,316 @@ +"""감사 결과 저장소 — SQLite 메타 + parquet 결과 + JSON review.""" + +from __future__ import annotations + +import sqlite3 +from datetime import date +from pathlib import Path +from typing import Any + +import polars as pl + + +def _defaultDataDir() -> Path: + """config.dataDir / audit/ — 데이터 루트 설정을 따른다.""" + try: + from dartlab.core.dataLoader import _getDataRoot + + return Path(_getDataRoot()) / "audit" + except (ImportError, RuntimeError): + # fallback: 레포 상대경로 + repoRoot = Path(__file__).resolve().parents[3] + return repoRoot / "data" / "audit" + + +# ── SQLite 스키마 ── + +_CREATE_RUN = """ +CREATE TABLE IF NOT EXISTS auditRun ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + stockCode TEXT NOT NULL, + corpName TEXT DEFAULT '', + sector TEXT DEFAULT '', + runDate TEXT NOT NULL, + engineVersion TEXT DEFAULT '', + totalCalcs INTEGER DEFAULT 0, + okCalcs INTEGER DEFAULT 0, + coverageRate REAL DEFAULT 0.0, + durationSec REAL DEFAULT 0.0, + status TEXT DEFAULT 'complete' +) +""" + +_CREATE_ISSUE = """ +CREATE TABLE IF NOT EXISTS auditIssue ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + runId INTEGER REFERENCES auditRun(id), + category TEXT DEFAULT '', + severity TEXT DEFAULT 'info', + axis TEXT DEFAULT '', + blockKey TEXT DEFAULT '', + description TEXT DEFAULT '', + resolved INTEGER DEFAULT 0, + resolvedNote TEXT DEFAULT '' +) +""" + +_CREATE_INDEXES = [ + "CREATE INDEX IF NOT EXISTS idx_run_stock ON auditRun(stockCode)", + "CREATE INDEX IF NOT EXISTS idx_run_date ON auditRun(runDate)", + "CREATE INDEX IF NOT EXISTS idx_issue_run ON auditIssue(runId)", + "CREATE INDEX IF NOT EXISTS idx_issue_cat ON auditIssue(category)", +] + + +class AuditStore: + """감사 결과 저장소 — SQLite + parquet + JSON.""" + + def __init__(self, dataDir: Path | None = None): + self._dataDir = dataDir or _defaultDataDir() + self._dbPath = self._dataDir / "audit.db" + self._conn: sqlite3.Connection | None = None + + def _ensureDb(self) -> sqlite3.Connection: + """lazy init — 첫 호출 시에만 DB 생성.""" + if self._conn is not None: + return self._conn + self._dataDir.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(str(self._dbPath)) + conn.execute("PRAGMA journal_mode=WAL") + conn.execute(_CREATE_RUN) + conn.execute(_CREATE_ISSUE) + for idx in _CREATE_INDEXES: + conn.execute(idx) + conn.commit() + self._conn = conn + return conn + + # ── 실행 기록 ── + + def saveRun( + self, + *, + stockCode: str, + corpName: str = "", + sector: str = "", + runDate: str = "", + engineVersion: str = "", + totalCalcs: int = 0, + okCalcs: int = 0, + durationSec: float = 0.0, + status: str = "complete", + ) -> int: + """감사 실행 기록을 저장하고 runId를 반환한다.""" + conn = self._ensureDb() + if not runDate: + runDate = date.today().isoformat() + coverageRate = okCalcs / totalCalcs if totalCalcs > 0 else 0.0 + cursor = conn.execute( + """INSERT INTO auditRun + (stockCode, corpName, sector, runDate, engineVersion, + totalCalcs, okCalcs, coverageRate, durationSec, status) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + ( + stockCode, + corpName, + sector, + runDate, + engineVersion, + totalCalcs, + okCalcs, + coverageRate, + durationSec, + status, + ), + ) + conn.commit() + return cursor.lastrowid # type: ignore[return-value] + + def saveIssues(self, runId: int, issues: list[dict[str, Any]]) -> None: + """감사 이슈 목록을 저장한다.""" + if not issues: + return + conn = self._ensureDb() + conn.executemany( + """INSERT INTO auditIssue + (runId, category, severity, axis, blockKey, description) + VALUES (?, ?, ?, ?, ?, ?)""", + [ + ( + runId, + iss.get("category", ""), + iss.get("severity", "info"), + iss.get("axis", ""), + iss.get("blockKey", ""), + iss.get("description", ""), + ) + for iss in issues + ], + ) + conn.commit() + + # ── parquet 저장 ── + + def saveParquet( + self, + *, + stockCode: str, + corpName: str, + runDate: str, + rows: list[dict[str, Any]], + ) -> Path: + """분석 결과를 parquet으로 저장한다.""" + if not runDate: + runDate = date.today().isoformat() + dayDir = self._dataDir / runDate + dayDir.mkdir(parents=True, exist_ok=True) + + for row in rows: + row.setdefault("stockCode", stockCode) + row.setdefault("corpName", corpName) + row.setdefault("runDate", runDate) + + df = pl.DataFrame(rows) + outPath = dayDir / f"{stockCode}.parquet" + df.write_parquet(outPath) + return outPath + + # ── review JSON 저장 ── + + def saveReviewJson( + self, + *, + stockCode: str, + runDate: str, + reviewJson: str, + ) -> Path: + """review JSON을 저장한다.""" + if not runDate: + runDate = date.today().isoformat() + dayDir = self._dataDir / runDate + dayDir.mkdir(parents=True, exist_ok=True) + outPath = dayDir / f"{stockCode}_review.json" + outPath.write_text(reviewJson, encoding="utf-8") + return outPath + + # ── 조회 ── + + def queryRuns( + self, + stockCode: str | None = None, + runDate: str | None = None, + limit: int = 100, + ) -> pl.DataFrame: + """감사 실행 기록을 조회한다.""" + conn = self._ensureDb() + query = "SELECT * FROM auditRun WHERE 1=1" + params: list[Any] = [] + if stockCode: + query += " AND stockCode = ?" + params.append(stockCode) + if runDate: + query += " AND runDate = ?" + params.append(runDate) + query += " ORDER BY id DESC LIMIT ?" + params.append(limit) + + cursor = conn.execute(query, params) + columns = [desc[0] for desc in cursor.description] + rows = cursor.fetchall() + if not rows: + return pl.DataFrame(schema={c: pl.Utf8 for c in columns}) + return pl.DataFrame([dict(zip(columns, row)) for row in rows]) + + def queryIssues( + self, + runId: int | None = None, + resolved: bool | None = None, + category: str | None = None, + limit: int = 500, + ) -> pl.DataFrame: + """감사 이슈를 조회한다.""" + conn = self._ensureDb() + query = "SELECT * FROM auditIssue WHERE 1=1" + params: list[Any] = [] + if runId is not None: + query += " AND runId = ?" + params.append(runId) + if resolved is not None: + query += " AND resolved = ?" + params.append(1 if resolved else 0) + if category: + query += " AND category = ?" + params.append(category) + query += " ORDER BY id DESC LIMIT ?" + params.append(limit) + + cursor = conn.execute(query, params) + columns = [desc[0] for desc in cursor.description] + rows = cursor.fetchall() + if not rows: + return pl.DataFrame(schema={c: pl.Utf8 for c in columns}) + return pl.DataFrame([dict(zip(columns, row)) for row in rows]) + + def queryParquet( + self, + stockCode: str, + runDate: str | None = None, + ) -> pl.DataFrame | None: + """특정 기업의 parquet 결과를 읽는다.""" + if runDate: + path = self._dataDir / runDate / f"{stockCode}.parquet" + if path.exists(): + return pl.read_parquet(path) + return None + + # 최신 날짜 자동 탐색 + candidates = sorted(self._dataDir.glob(f"*/{stockCode}.parquet"), reverse=True) + if candidates: + return pl.read_parquet(candidates[0]) + return None + + def completedCodes(self, runDate: str | None = None) -> set[str]: + """오늘(또는 지정 날짜) 완료된 종목코드 set.""" + conn = self._ensureDb() + if not runDate: + runDate = date.today().isoformat() + cursor = conn.execute( + "SELECT DISTINCT stockCode FROM auditRun WHERE runDate = ? AND status = 'complete'", + (runDate,), + ) + return {row[0] for row in cursor.fetchall()} + + def coverageSummary(self, runDate: str | None = None) -> pl.DataFrame: + """축별 coverage 통계.""" + if not runDate: + runDate = date.today().isoformat() + dayDir = self._dataDir / runDate + if not dayDir.exists(): + return pl.DataFrame() + + parquets = list(dayDir.glob("*.parquet")) + if not parquets: + return pl.DataFrame() + + dfs = [pl.read_parquet(p) for p in parquets] + combined = pl.concat(dfs) + + return ( + combined.group_by("axis", "blockKey") + .agg( + pl.col("status").count().alias("total"), + (pl.col("status") == "ok").sum().alias("ok"), + (pl.col("status") == "none").sum().alias("none"), + (pl.col("status") == "error").sum().alias("error"), + ) + .with_columns((pl.col("ok") / pl.col("total") * 100).round(1).alias("coveragePct")) + .sort("axis", "blockKey") + ) + + # ── 정리 ── + + def close(self) -> None: + """DB 연결 종료.""" + if self._conn: + self._conn.close() + self._conn = None diff --git a/src/dartlab/channel/__init__.py b/src/dartlab/channel/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e177b9eac6c24f4fb56586439cd2ac7a8eecfc0f --- /dev/null +++ b/src/dartlab/channel/__init__.py @@ -0,0 +1,13 @@ +"""DartLab Channel — 외부 공유 엔진. + +dartlab channel 명령으로 PC dartlab을 외부에서 접근 가능한 영구 URL로 공개한다. +Microsoft DevTunnels을 기술 백엔드로 사용 (VS Code Remote Tunnels와 동일 인프라). + +상세: ops/channel.md +""" + +from __future__ import annotations + +from dartlab.channel.devtunnel import DevTunnelSetupError, setup_devtunnel + +__all__ = ["DevTunnelSetupError", "setup_devtunnel"] diff --git a/src/dartlab/channel/__pycache__/__init__.cpython-312.pyc b/src/dartlab/channel/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..674d4309da125710128600926ac562d556e1e438 Binary files /dev/null and b/src/dartlab/channel/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/channel/__pycache__/devtunnel.cpython-312.pyc b/src/dartlab/channel/__pycache__/devtunnel.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62c0d98902fd06136ebc3d643ed1ee507b82da26 Binary files /dev/null and b/src/dartlab/channel/__pycache__/devtunnel.cpython-312.pyc differ diff --git a/src/dartlab/channel/adapters/__init__.py b/src/dartlab/channel/adapters/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..205a4e488f1cd0ab887dea959befefc48bd81fa6 --- /dev/null +++ b/src/dartlab/channel/adapters/__init__.py @@ -0,0 +1,38 @@ +"""DartLab 메시징 채널 어댑터. + +지원 플랫폼: +- telegram (python-telegram-bot) — polling 기반, 공개 URL 불필요 +- slack (slack-bolt) — Socket Mode, 공개 URL 불필요 +- discord (discord.py) — Gateway, slash command +""" + +from __future__ import annotations + +from dartlab.channel.adapters.base import ChannelAdapter + +_ADAPTER_MAP: dict[str, str] = { + "telegram": "dartlab.channel.adapters.telegram", + "slack": "dartlab.channel.adapters.slack", + "discord": "dartlab.channel.adapters.discord", +} + + +def create_adapter(platform: str, **kwargs) -> ChannelAdapter: + """플랫폼별 어댑터를 생성한다. + + Args: + platform: "telegram" | "slack" | "discord" + **kwargs: 플랫폼별 인자 (token 등) + """ + module_path = _ADAPTER_MAP.get(platform) + if module_path is None: + available = ", ".join(_ADAPTER_MAP) + raise ValueError(f"알 수 없는 채널: {platform!r}. 사용 가능: {available}") + + import importlib + + mod = importlib.import_module(module_path) + return mod.create(**kwargs) + + +__all__ = ["ChannelAdapter", "create_adapter"] diff --git a/src/dartlab/channel/adapters/__pycache__/__init__.cpython-312.pyc b/src/dartlab/channel/adapters/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5bb21284885d0c95c729b73477beab6ae6ea808 Binary files /dev/null and b/src/dartlab/channel/adapters/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/dartlab/channel/adapters/__pycache__/base.cpython-312.pyc b/src/dartlab/channel/adapters/__pycache__/base.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b36525bad8b5c8b312d3be1e2e0c6ac801d54188 Binary files /dev/null and b/src/dartlab/channel/adapters/__pycache__/base.cpython-312.pyc differ diff --git a/src/dartlab/channel/adapters/__pycache__/telegram.cpython-312.pyc b/src/dartlab/channel/adapters/__pycache__/telegram.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00706d7d3c42e7d927311d47bf94c9b6691fbbb2 Binary files /dev/null and b/src/dartlab/channel/adapters/__pycache__/telegram.cpython-312.pyc differ diff --git a/src/dartlab/channel/adapters/base.py b/src/dartlab/channel/adapters/base.py new file mode 100644 index 0000000000000000000000000000000000000000..a364f1b4ac1c0d9227b629b652fe5f1bf4920a75 --- /dev/null +++ b/src/dartlab/channel/adapters/base.py @@ -0,0 +1,103 @@ +"""메시징 채널 어댑터 추상 클래스 + 공통 분석 로직.""" + +from __future__ import annotations + +import asyncio +import logging +from abc import ABC, abstractmethod + +logger = logging.getLogger(__name__) + + +def _chunk_text(text: str, max_len: int) -> list[str]: + """텍스트를 max_len 이하 청크로 분할한다. + + 문단(\\n\\n) → 줄(\\n) → 강제 분할 순으로 시도. + """ + if len(text) <= max_len: + return [text] + + chunks: list[str] = [] + remaining = text + + while remaining: + if len(remaining) <= max_len: + chunks.append(remaining) + break + + # 문단 경계에서 분할 시도 + cut = remaining.rfind("\n\n", 0, max_len) + if cut > 0: + chunks.append(remaining[:cut]) + remaining = remaining[cut + 2 :] + continue + + # 줄 경계에서 분할 시도 + cut = remaining.rfind("\n", 0, max_len) + if cut > 0: + chunks.append(remaining[:cut]) + remaining = remaining[cut + 1 :] + continue + + # 강제 분할 + chunks.append(remaining[:max_len]) + remaining = remaining[max_len:] + + return chunks + + +class ChannelAdapter(ABC): + """메시징 채널 어댑터 추상 클래스.""" + + name: str = "base" + max_message_length: int = 4000 + + @abstractmethod + async def start(self) -> None: + """어댑터를 시작한다 (blocking).""" + ... + + @abstractmethod + async def stop(self) -> None: + """어댑터를 정리한다.""" + ... + + @abstractmethod + async def send_text(self, channel_id: str, text: str) -> None: + """텍스트 메시지를 전송한다.""" + ... + + async def handle_ask(self, channel_id: str, user_text: str) -> None: + """사용자 메시지를 분석하고 응답을 전송한다. + + 모든 어댑터가 공유하는 핵심 로직: + 1. AI에 질문 전달 (종목은 AI가 자율 판단) + 2. 응답 청킹 + 전송 + """ + user_text = user_text.strip() + if not user_text: + await self.send_text(channel_id, "질문을 입력해주세요. 예: 삼성전자 재무분석") + return + + question = user_text + await self.send_text(channel_id, "분석 중...") + + # AI 분석 실행 (blocking → asyncio.to_thread) + try: + answer = await asyncio.to_thread(self._run_analysis, None, question) + except Exception as exc: # noqa: BLE001 + logger.exception("분석 실패: %s", exc) + await self.send_text(channel_id, f"분석 중 오류가 발생했습니다: {exc}") + return + + # 응답 청킹 + 전송 + chunks = _chunk_text(answer, self.max_message_length) + for chunk in chunks: + await self.send_text(channel_id, chunk) + + @staticmethod + def _run_analysis(company, question: str) -> str: + """AI 분석을 실행하고 텍스트를 반환한다 (동기).""" + from dartlab.ai.runtime.standalone import ask + + return ask(question, company=company, stream=False) diff --git a/src/dartlab/channel/adapters/discord.py b/src/dartlab/channel/adapters/discord.py new file mode 100644 index 0000000000000000000000000000000000000000..ca9a0b1206a6ceabc8cce4c7ab91f821d2700ebb --- /dev/null +++ b/src/dartlab/channel/adapters/discord.py @@ -0,0 +1,112 @@ +"""Discord 채널 어댑터 — Gateway + Slash Command. + +사용: + dartlab share --discord $BOT_TOKEN +""" + +from __future__ import annotations + +import logging + +from dartlab.channel.adapters.base import ChannelAdapter + +logger = logging.getLogger(__name__) + + +class DiscordAdapter(ChannelAdapter): + """discord.py 기반 Discord 어댑터.""" + + name = "discord" + max_message_length = 2000 + + def __init__(self, token: str): + self._token = token + self._bot = None + + async def start(self) -> None: + """Discord 봇을 시작하고 이벤트 리스너 등록.""" + try: + import discord + from discord import app_commands + from discord.ext import commands + except ImportError as exc: + raise RuntimeError("Discord 어댑터를 사용하려면:\n uv pip install discord.py") from exc + + intents = discord.Intents.default() + intents.message_content = True + bot = commands.Bot(command_prefix="!", intents=intents) + self._bot = bot + adapter = self + + @bot.event + async def on_ready(): + logger.info("Discord 봇 준비 완료: %s", bot.user) + try: + synced = await bot.tree.sync() + logger.info("Slash commands 동기화: %d개", len(synced)) + except discord.HTTPException: + logger.warning("Slash commands 동기화 실패") + + @bot.tree.command(name="ask", description="DartLab 기업 분석") + @app_commands.describe(query="종목명 + 질문 (예: 삼성전자 배당 분석)") + async def ask_cmd(interaction: discord.Interaction, query: str): + await interaction.response.defer() + channel_id = str(interaction.channel_id) + + # handle_ask가 send_text를 호출하므로, followup으로 대체 + adapter._interaction = interaction + adapter._followup_sent = False + await adapter.handle_ask(channel_id, query) + + @bot.event + async def on_message(message: discord.Message): + if message.author == bot.user: + return + # DM이거나 봇이 멘션된 경우 + is_dm = message.guild is None + is_mentioned = bot.user in message.mentions if bot.user else False + if not is_dm and not is_mentioned: + return + + text = message.content + # 멘션 제거 + if bot.user: + text = text.replace(f"<@{bot.user.id}>", "").strip() + + adapter._interaction = None + adapter._channel_obj = message.channel + await adapter.handle_ask(str(message.channel.id), text) + + logger.info("Discord 봇 시작") + await bot.start(self._token) + + async def stop(self) -> None: + """Discord 봇 연결 종료.""" + if self._bot: + await self._bot.close() + + async def send_text(self, channel_id: str, text: str) -> None: + """Discord 채널에 텍스트 메시지 전송.""" + # slash command의 경우 followup 사용 + interaction = getattr(self, "_interaction", None) + if interaction is not None: + if not getattr(self, "_followup_sent", False): + await interaction.followup.send(text) + self._followup_sent = True + else: + await interaction.followup.send(text) + return + + # 일반 메시지의 경우 + channel = getattr(self, "_channel_obj", None) + if channel: + await channel.send(text) + elif self._bot: + ch = self._bot.get_channel(int(channel_id)) + if ch: + await ch.send(text) + + +def create(*, token: str, **kwargs) -> DiscordAdapter: + """DiscordAdapter 팩토리.""" + return DiscordAdapter(token) diff --git a/src/dartlab/channel/adapters/slack.py b/src/dartlab/channel/adapters/slack.py new file mode 100644 index 0000000000000000000000000000000000000000..21ad017fa8a19cf8330d6719efafae3a5797a8d6 --- /dev/null +++ b/src/dartlab/channel/adapters/slack.py @@ -0,0 +1,106 @@ +"""Slack 채널 어댑터 — Socket Mode. + +사용: + dartlab share --slack $BOT_TOKEN --slack-app-token $APP_TOKEN +""" + +from __future__ import annotations + +import asyncio +import inspect +import logging + +from dartlab.channel.adapters.base import ChannelAdapter + +logger = logging.getLogger(__name__) + + +class SlackAdapter(ChannelAdapter): + """slack-bolt 기반 Slack 어댑터 (Socket Mode).""" + + name = "slack" + max_message_length = 3000 + + def __init__(self, bot_token: str, app_token: str): + self._bot_token = bot_token + self._app_token = app_token + self._app = None + self._client = None + self._handler = None + + async def start(self) -> None: + """Slack 봇을 Socket Mode로 시작.""" + try: + from slack_bolt import App + from slack_bolt.adapter.socket_mode import SocketModeHandler + except ImportError as exc: + raise RuntimeError("Slack 어댑터를 사용하려면:\n uv pip install slack-bolt") from exc + + app = App(token=self._bot_token) + self._app = app + self._client = app.client + adapter = self + + @app.event("app_mention") + def on_mention(event, _say): + text = event.get("text", "") + # @bot 멘션 제거 + import re + + text = re.sub(r"<@[A-Z0-9]+>", "", text).strip() + channel = event["channel"] + + import asyncio + + asyncio.run(adapter.handle_ask(channel, text)) + + @app.event("message") + def on_dm(event, _say): + # DM에서는 멘션 없이 바로 처리 + if event.get("channel_type") != "im": + return + text = event.get("text", "").strip() + channel = event["channel"] + + import asyncio + + asyncio.run(adapter.handle_ask(channel, text)) + + logger.info("Slack 봇 시작 (Socket Mode)") + handler = SocketModeHandler(app, self._app_token) + self._handler = handler + await asyncio.to_thread(handler.start) + + async def stop(self) -> None: + """Slack 봇 연결 종료.""" + if self._handler is None: + return + close = getattr(self._handler, "close", None) + if callable(close): + await asyncio.to_thread(close) + return + + client = getattr(self._handler, "client", None) + disconnect = getattr(client, "disconnect", None) if client else None + if callable(disconnect): + result = disconnect() + if inspect.isawaitable(result): + await result + + async def send_text(self, channel_id: str, text: str) -> None: + """Slack 채널에 텍스트 메시지 전송. + + slack_bolt 동기 클라이언트는 내부 HTTP 호출이라 이벤트 루프 블록. + asyncio.to_thread로 워커 풀에서 실행 (Discord/Telegram과 동일 패턴). + """ + if self._client: + await asyncio.to_thread( + self._client.chat_postMessage, + channel=channel_id, + text=text, + ) + + +def create(*, bot_token: str, app_token: str, **kwargs) -> SlackAdapter: + """SlackAdapter 팩토리.""" + return SlackAdapter(bot_token, app_token) diff --git a/src/dartlab/channel/adapters/telegram.py b/src/dartlab/channel/adapters/telegram.py new file mode 100644 index 0000000000000000000000000000000000000000..759f61a925e5a150c9089e4a50e4ca9f14b47347 --- /dev/null +++ b/src/dartlab/channel/adapters/telegram.py @@ -0,0 +1,104 @@ +"""Telegram 채널 어댑터 — polling 기반. + +사용: + dartlab share --telegram $BOT_TOKEN +""" + +from __future__ import annotations + +import asyncio +import logging + +from dartlab.channel.adapters.base import ChannelAdapter + +logger = logging.getLogger(__name__) + + +class TelegramAdapter(ChannelAdapter): + """python-telegram-bot 기반 Telegram 어댑터.""" + + name = "telegram" + max_message_length = 4096 + + def __init__(self, token: str): + self._token = token + self._app = None + self._stop_event = None + + async def start(self) -> None: + """Telegram 봇을 polling 모드로 시작.""" + try: + from telegram import Update + from telegram.ext import ( + ApplicationBuilder, + CommandHandler, + ContextTypes, + MessageHandler, + filters, + ) + except ImportError as exc: + raise RuntimeError("Telegram 어댑터를 사용하려면:\n uv pip install python-telegram-bot") from exc + + app = ApplicationBuilder().token(self._token).build() + self._app = app + self._stop_event = asyncio.Event() + + adapter = self + + async def on_start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + if update.effective_chat: + await adapter.send_text( + str(update.effective_chat.id), + "DartLab 분석 봇입니다.\n사용법: /ask 삼성전자 배당 분석\n또는 바로 메시지를 보내세요.", + ) + + async def on_ask(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + if update.effective_chat and context.args: + text = " ".join(context.args) + await adapter.handle_ask(str(update.effective_chat.id), text) + elif update.effective_chat: + await adapter.send_text( + str(update.effective_chat.id), + "사용법: /ask 삼성전자 배당 분석", + ) + + async def on_message(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + if update.effective_chat and update.message and update.message.text: + text = update.message.text.strip() + if text.startswith("/"): + return + await adapter.handle_ask(str(update.effective_chat.id), text) + + app.add_handler(CommandHandler("start", on_start)) + app.add_handler(CommandHandler("ask", on_ask)) + app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, on_message)) + + logger.info("Telegram 봇 시작 (polling)") + await app.initialize() + await app.start() + await app.updater.start_polling() + + try: + await self._stop_event.wait() + except asyncio.CancelledError: + pass + + async def stop(self) -> None: + """Telegram 봇 종료 및 updater 정리.""" + if self._stop_event is not None and not self._stop_event.is_set(): + self._stop_event.set() + if self._app: + if self._app.updater: + await self._app.updater.stop() + await self._app.stop() + await self._app.shutdown() + + async def send_text(self, channel_id: str, text: str) -> None: + """Telegram 채팅방에 텍스트 메시지 전송.""" + if self._app: + await self._app.bot.send_message(chat_id=channel_id, text=text) + + +def create(*, token: str, **kwargs) -> TelegramAdapter: + """TelegramAdapter 팩토리.""" + return TelegramAdapter(token) diff --git a/src/dartlab/channel/devtunnel.py b/src/dartlab/channel/devtunnel.py new file mode 100644 index 0000000000000000000000000000000000000000..bf99003743c19f359136688191bcab598b2d540b --- /dev/null +++ b/src/dartlab/channel/devtunnel.py @@ -0,0 +1,546 @@ +"""DevTunnels 모드 — Microsoft DevTunnels을 이용한 영구 외부 공개 URL. + +VS Code Remote Tunnels의 기반 기술과 동일. Cloudflare Quick Tunnel의 모바일 +fetch hang 문제를 우회하기 위한 1순위 백엔드. + +자동화 흐름: + 1. find_devtunnel_binary : devtunnel CLI 자동 탐색 (PATH/winget package) + 2. install_devtunnel : winget으로 자동 설치 (없을 때) + 3. ensure_logged_in : `devtunnel user show` 확인 → 미인증 시 `devtunnel user login -g` + 4. ensure_tunnel : 상태 파일에서 ID 재사용 또는 신규 생성 + 5. start_host : `devtunnel host ` 백그라운드 실행, URL 추출 + +설계: +- Cloudflare 코드와 독립 (channel/tunnel.py 건드리지 않음) +- DARTLAB_TUNNEL 안 켬 → 토큰/미들웨어 시스템 안 씀 +- `--allow-anonymous`로 동작 (1차) — 추후 GitHub access control 옵션 가능 +- 영구 URL: tunnel ID는 `~/.dartlab/devtunnel-state.json`에 저장 +""" + +from __future__ import annotations + +import atexit +import json +import logging +import os +import platform +import re +import shutil +import subprocess +import threading +from pathlib import Path + +logger = logging.getLogger(__name__) + +_DARTLAB_BIN_DIR = Path.home() / ".dartlab" / "bin" +_STATE_FILE = Path.home() / ".dartlab" / "devtunnel-state.json" + +# URL 추출 정규식 (devtunnel host 출력에서 https URL 캡처) +_URL_PATTERN = re.compile(r"https://[a-z0-9-]+(?:-\d+)?\.[a-z0-9-]+\.devtunnels\.ms[/\w-]*") + + +class DevTunnelSetupError(RuntimeError): + """DevTunnels 셋업 실패.""" + + +# ── state ──────────────────────────────────────────────────────────────── + + +def _load_state() -> dict: + if _STATE_FILE.exists(): + try: + return json.loads(_STATE_FILE.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + return {} + return {} + + +def _save_state(**kw) -> None: + state = _load_state() + state.update(kw) + _STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + _STATE_FILE.write_text(json.dumps(state, indent=2, ensure_ascii=False), encoding="utf-8") + + +# ── 바이너리 ───────────────────────────────────────────────────────────── + + +def find_devtunnel_binary() -> str | None: + """devtunnel CLI 위치 탐색. winget 설치 후에도 PATH 갱신 없이 찾음.""" + # 1. PATH + p = shutil.which("devtunnel") + if p: + return p + + # 2. dartlab 로컬 다운로드 + ext = ".exe" if platform.system() == "Windows" else "" + local = _DARTLAB_BIN_DIR / f"devtunnel{ext}" + if local.exists(): + return str(local) + + if platform.system() == "Windows": + # 3. Program Files (수동 설치) + for env_key in ("ProgramFiles", "ProgramFiles(x86)"): + base = os.environ.get(env_key) + if base: + exe = Path(base) / "Microsoft" / "DevTunnel" / "devtunnel.exe" + if exe.exists(): + return str(exe) + # 4. winget package dir + local_app = os.environ.get("LOCALAPPDATA") + if local_app: + from glob import glob + + pattern = str( + Path(local_app) / "Microsoft" / "WinGet" / "Packages" / "Microsoft.devtunnel*" / "**" / "devtunnel.exe" + ) + for found in glob(pattern, recursive=True): + return found + else: + # macOS/Linux + for p in ("/usr/local/bin/devtunnel", "/opt/homebrew/bin/devtunnel", "/usr/bin/devtunnel"): + if Path(p).exists(): + return p + return None + + +def install_devtunnel(auto_yes: bool = False) -> str: + """devtunnel CLI 자동 설치.""" + os_name = platform.system() + + if not auto_yes: + try: + ans = input("\n devtunnel 미설치. 자동 설치하시겠습니까? [Y/n] ").strip().lower() + except (EOFError, KeyboardInterrupt): + ans = "n" + if ans not in ("", "y", "yes"): + raise DevTunnelSetupError("devtunnel 설치를 사용자가 취소했습니다.") + + if os_name == "Windows": + # 1차: winget + try: + print(" winget으로 devtunnel 설치 중... (1~2분)") + result = subprocess.run( + [ + "winget", + "install", + "--id", + "Microsoft.devtunnel", + "-e", + "--silent", + "--accept-source-agreements", + "--accept-package-agreements", + ], + capture_output=True, + text=True, + timeout=300, + encoding="utf-8", + errors="replace", + ) + if result.returncode == 0: + print(" devtunnel 설치 완료 (winget)") + bin_path = find_devtunnel_binary() + if bin_path: + return bin_path + else: + print(f" winget 실패 (rc={result.returncode}): {(result.stderr or '').strip()[:200]}") + except (FileNotFoundError, subprocess.SubprocessError) as exc: + print(f" winget 실행 실패: {exc}") + + # 2차: 직접 다운로드 + print(" 직접 다운로드 fallback 시도...") + try: + from urllib.request import urlretrieve + + _DARTLAB_BIN_DIR.mkdir(parents=True, exist_ok=True) + url = "https://aka.ms/TunnelsCliDownload/win-x64" + target = _DARTLAB_BIN_DIR / "devtunnel.exe" + print(f" 다운로드: {url}") + urlretrieve(url, target) + print(f" 설치 완료: {target}") + return str(target) + except OSError as exc: + print(f" 직접 다운로드 실패: {exc}") + + raise DevTunnelSetupError( + "Windows 자동 설치 실패. 수동 설치:\n https://learn.microsoft.com/azure/developer/dev-tunnels/get-started" + ) + + elif os_name == "Darwin": + # brew + try: + result = subprocess.run( + ["brew", "install", "--cask", "devtunnel"], + capture_output=True, + text=True, + timeout=600, + encoding="utf-8", + errors="replace", + ) + if result.returncode == 0: + bin_path = find_devtunnel_binary() + if bin_path: + return bin_path + except (FileNotFoundError, subprocess.SubprocessError): + pass + # 직접 다운로드 fallback + try: + from urllib.request import urlretrieve + + _DARTLAB_BIN_DIR.mkdir(parents=True, exist_ok=True) + url = "https://aka.ms/TunnelsCliDownload/osx-x64-zip" + target = _DARTLAB_BIN_DIR / "devtunnel.zip" + urlretrieve(url, target) + import zipfile + + with zipfile.ZipFile(target) as zf: + zf.extractall(_DARTLAB_BIN_DIR) + target.unlink() + bin_path = _DARTLAB_BIN_DIR / "devtunnel" + bin_path.chmod(0o755) + return str(bin_path) + except OSError as exc: + raise DevTunnelSetupError(f"devtunnel 설치 실패: {exc}") from exc + + elif os_name == "Linux": + # 보안: curl | bash 같은 임의 원격 코드 실행은 사용자 명시 동의 필요. + # 환경변수 DARTLAB_DEVTUNNEL_AUTOINSTALL=1 또는 대화식 prompt 동의 시만 진행. + autoinstall = os.environ.get("DARTLAB_DEVTUNNEL_AUTOINSTALL", "").strip() == "1" + if not autoinstall: + raise DevTunnelSetupError( + "Linux 자동 설치는 'curl ... | bash' 원격 스크립트를 실행합니다.\n" + " 명시 동의가 필요합니다.\n" + " 진행하려면: DARTLAB_DEVTUNNEL_AUTOINSTALL=1 환경변수 설정 후 재시도\n" + " 수동 설치: https://learn.microsoft.com/azure/developer/dev-tunnels/get-started" + ) + try: + print(" curl로 devtunnel 설치 중... (사용자 동의 OK)") + result = subprocess.run( + ["sh", "-c", "curl -sL https://aka.ms/DevTunnelCliInstall | bash"], + capture_output=True, + text=True, + timeout=300, + encoding="utf-8", + errors="replace", + ) + if result.returncode == 0: + bin_path = find_devtunnel_binary() + if bin_path: + return bin_path + except (FileNotFoundError, subprocess.SubprocessError) as exc: + raise DevTunnelSetupError(f"devtunnel 설치 실패: {exc}") from exc + raise DevTunnelSetupError("Linux 자동 설치 실패") + + else: + raise DevTunnelSetupError(f"지원하지 않는 OS: {os_name}") + + +# ── 인증 ────────────────────────────────────────────────────────────────── + + +def is_logged_in(bin_path: str) -> bool: + """devtunnel 사용자 로그인 여부 확인.""" + try: + result = subprocess.run( + [bin_path, "user", "show"], + capture_output=True, + text=True, + timeout=15, + encoding="utf-8", + errors="replace", + ) + if result.returncode != 0: + return False + # "Logged in as" 또는 "Not logged in" 같은 텍스트 매칭 + out = (result.stdout + result.stderr).lower() + if "not logged in" in out or "not authenticated" in out: + return False + return "logged in" in out or "@" in result.stdout # 이메일 형태 포함 + except (subprocess.SubprocessError, FileNotFoundError): + return False + + +def ensure_logged_in(bin_path: str, auto_yes: bool = False) -> None: + """미인증 시 GitHub 로그인 자동 실행.""" + if is_logged_in(bin_path): + print(" ✓ devtunnel 이미 인증됨") + return + + print("\n GitHub 인증 필요. 잠시 후 브라우저가 자동으로 열립니다.") + print(" → GitHub 로그인 → dev tunnel 권한 허용\n") + + if not auto_yes: + try: + ans = input(" 계속하시겠습니까? [Y/n] ").strip().lower() + except (EOFError, KeyboardInterrupt): + ans = "n" + if ans not in ("", "y", "yes"): + raise DevTunnelSetupError("devtunnel 인증을 사용자가 취소했습니다.") + + # devtunnel user login -g (GitHub) — 출력을 그대로 콘솔로 + print("\n devtunnel user login -g 실행 중...\n") + try: + result = subprocess.run( + [bin_path, "user", "login", "-g"], + timeout=600, + # capture 안 함 — 사용자가 진행 상황 직접 봄 + ) + except subprocess.SubprocessError as exc: + raise DevTunnelSetupError(f"devtunnel 로그인 실패: {exc}") from exc + + if result.returncode != 0: + raise DevTunnelSetupError(f"devtunnel 로그인 종료 코드 {result.returncode}") + + if not is_logged_in(bin_path): + raise DevTunnelSetupError("로그인 후에도 인증 상태가 아닙니다.") + + print("\n ✓ devtunnel 인증 완료") + + +# ── tunnel 생성/재사용 ──────────────────────────────────────────────────── + + +def ensure_tunnel(bin_path: str, port: int) -> str: + """tunnel ID 재사용 또는 신규 생성. tunnel_id 반환. + + 포트 매핑 + anonymous 접근 + anti-phishing 우회까지 보장. + """ + state = _load_state() + existing_id = state.get("tunnel_id") + if existing_id: + try: + result = subprocess.run( + [bin_path, "show", existing_id], + capture_output=True, + text=True, + timeout=30, + encoding="utf-8", + errors="replace", + ) + if result.returncode == 0: + print(f" 기존 tunnel 재사용: {existing_id}") + _ensure_port_mapping(bin_path, existing_id, port) + _ensure_anonymous_access(bin_path, existing_id) + return existing_id + except subprocess.SubprocessError: + pass + + # 신규 생성 — conflict 시 기존 tunnel 찾기 또는 timestamp 라벨로 재시도 + sanitized = re.sub(r"[^a-zA-Z0-9-]", "-", platform.node().lower())[:24] or "host" + base_label = f"dartlab-{sanitized}" + tunnel_label = base_label + tunnel_id = None + + for attempt in range(3): + print(f" tunnel 생성: {tunnel_label}") + try: + result = subprocess.run( + [bin_path, "create", tunnel_label, "--allow-anonymous"], + capture_output=True, + text=True, + timeout=60, + encoding="utf-8", + errors="replace", + ) + except subprocess.SubprocessError as exc: + raise DevTunnelSetupError(f"devtunnel create 실패: {exc}") from exc + + out = result.stdout + result.stderr + if result.returncode == 0: + match = re.search(r"Tunnel ID\s*[:=]\s*(\S+)", out) + if not match: + match = re.search(r"\b([a-z0-9]+-[a-z0-9]+\.[a-z0-9]+)\b", out) + if not match: + raise DevTunnelSetupError(f"tunnel ID 파싱 실패. 출력:\n{out[-500:]}") + tunnel_id = match.group(1) + break + + # Conflict 처리 + if "conflict" in out.lower() or "already" in out.lower(): + # 1차: list에서 기존 dartlab-* tunnel 찾기 + try: + list_res = subprocess.run( + [bin_path, "list"], + capture_output=True, + text=True, + timeout=30, + encoding="utf-8", + errors="replace", + ) + if list_res.returncode == 0: + print(" devtunnel list 결과:") + for line in list_res.stdout.splitlines(): + if line.strip(): + print(f" {line}") + # 진짜 ID 형식: