Spaces:

CatPtain
/

OpenBB

Paused

App Files Files Community

CatPtain commited on May 28, 2025

Commit

1b6b94f

verified ·

1 Parent(s): b8f2a01

Upload 131 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
openbb_platform/providers/sec/README.md +13 -0
openbb_platform/providers/sec/__init__.py +1 -0
openbb_platform/providers/sec/openbb_sec/__init__.py +50 -0
openbb_platform/providers/sec/openbb_sec/models/__init__.py +1 -0
openbb_platform/providers/sec/openbb_sec/models/cik_map.py +61 -0
openbb_platform/providers/sec/openbb_sec/models/company_filings.py +347 -0
openbb_platform/providers/sec/openbb_sec/models/compare_company_facts.py +190 -0
openbb_platform/providers/sec/openbb_sec/models/equity_ftd.py +104 -0
openbb_platform/providers/sec/openbb_sec/models/equity_search.py +91 -0
openbb_platform/providers/sec/openbb_sec/models/etf_holdings.py +870 -0
openbb_platform/providers/sec/openbb_sec/models/form_13FHR.py +107 -0
openbb_platform/providers/sec/openbb_sec/models/htm_file.py +97 -0
openbb_platform/providers/sec/openbb_sec/models/insider_trading.py +221 -0
openbb_platform/providers/sec/openbb_sec/models/institutions_search.py +75 -0
openbb_platform/providers/sec/openbb_sec/models/latest_financial_reports.py +261 -0
openbb_platform/providers/sec/openbb_sec/models/management_discussion_analysis.py +1394 -0
openbb_platform/providers/sec/openbb_sec/models/py.typed +0 -0
openbb_platform/providers/sec/openbb_sec/models/rss_litigation.py +98 -0
openbb_platform/providers/sec/openbb_sec/models/schema_files.py +64 -0
openbb_platform/providers/sec/openbb_sec/models/sec_filing.py +728 -0
openbb_platform/providers/sec/openbb_sec/models/sic_search.py +111 -0
openbb_platform/providers/sec/openbb_sec/models/symbol_map.py +62 -0
openbb_platform/providers/sec/openbb_sec/py.typed +0 -0
openbb_platform/providers/sec/openbb_sec/utils/__init__.py +1 -0
openbb_platform/providers/sec/openbb_sec/utils/definitions.py +1350 -0
openbb_platform/providers/sec/openbb_sec/utils/form4.py +657 -0
openbb_platform/providers/sec/openbb_sec/utils/frames.py +284 -0
openbb_platform/providers/sec/openbb_sec/utils/helpers.py +362 -0
openbb_platform/providers/sec/openbb_sec/utils/parse_13f.py +231 -0
openbb_platform/providers/sec/openbb_sec/utils/py.typed +0 -0
openbb_platform/providers/sec/poetry.lock +0 -0
openbb_platform/providers/sec/pyproject.toml +26 -0
openbb_platform/providers/sec/tests/__init__.py +1 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_cik_map_fetcher_urllib3_v1.yaml +0 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_cik_map_fetcher_urllib3_v2.yaml +0 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_company_filings_fetcher_urllib3_v1.yaml +0 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_company_filings_fetcher_urllib3_v2.yaml +0 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_compare_company_facts_fetcher_urllib3_v1.yaml +0 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_compare_company_facts_fetcher_urllib3_v2.yaml +0 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_ftd_fetcher_urllib3_v1.yaml +0 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_ftd_fetcher_urllib3_v2.yaml +0 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_search_fetcher_urllib3_v1.yaml +0 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_search_fetcher_urllib3_v2.yaml +0 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_etf_holdings_fetcher_urllib3_v1.yaml +0 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_etf_holdings_fetcher_urllib3_v2.yaml +0 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_filing_fetcher_urllib3_v1.yaml +212 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_filing_fetcher_urllib3_v2.yaml +212 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_form_13FHR_fetcher_urllib3_v1.yaml +0 -0
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_form_13FHR_fetcher_urllib3_v2.yaml +0 -0

.gitattributes CHANGED Viewed

@@ -16,3 +16,5 @@ openbb_platform/providers/bls/openbb_bls/assets/tu_series.xz filter=lfs diff=lfs
 openbb_platform/providers/bls/openbb_bls/assets/wages_series.xz filter=lfs diff=lfs merge=lfs -text
 openbb_platform/providers/finra/tests/record/http/test_finra_fetchers/test_finra_short_interest_fetcher_urllib3_v1.yaml filter=lfs diff=lfs merge=lfs -text
 openbb_platform/providers/finra/tests/record/http/test_finra_fetchers/test_finra_short_interest_fetcher_urllib3_v2.yaml filter=lfs diff=lfs merge=lfs -text

 openbb_platform/providers/bls/openbb_bls/assets/wages_series.xz filter=lfs diff=lfs merge=lfs -text
 openbb_platform/providers/finra/tests/record/http/test_finra_fetchers/test_finra_short_interest_fetcher_urllib3_v1.yaml filter=lfs diff=lfs merge=lfs -text
 openbb_platform/providers/finra/tests/record/http/test_finra_fetchers/test_finra_short_interest_fetcher_urllib3_v2.yaml filter=lfs diff=lfs merge=lfs -text
+openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_institutions_search_fetcher_urllib3_v1.yaml filter=lfs diff=lfs merge=lfs -text
+openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_institutions_search_fetcher_urllib3_v2.yaml filter=lfs diff=lfs merge=lfs -text

openbb_platform/providers/sec/README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# OpenBB SEC Provider
+This extension integrates the [SEC](https://www.sec.gov/edgar) data provider into the OpenBB Platform.
+## Installation
+To install the extension:
+```bash
+pip install openbb-sec
+```
+Documentation available [here](https://docs.openbb.co/platform/developer_guide/contributing).

openbb_platform/providers/sec/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """SEC Provider."""

openbb_platform/providers/sec/openbb_sec/__init__.py ADDED Viewed

	@@ -0,0 +1,50 @@

+"""SEC provider module."""
+from openbb_core.provider.abstract.provider import Provider
+from openbb_sec.models.cik_map import SecCikMapFetcher
+from openbb_sec.models.company_filings import SecCompanyFilingsFetcher
+from openbb_sec.models.compare_company_facts import SecCompareCompanyFactsFetcher
+from openbb_sec.models.equity_ftd import SecEquityFtdFetcher
+from openbb_sec.models.equity_search import SecEquitySearchFetcher
+from openbb_sec.models.etf_holdings import SecEtfHoldingsFetcher
+from openbb_sec.models.form_13FHR import SecForm13FHRFetcher
+from openbb_sec.models.htm_file import SecHtmFileFetcher
+from openbb_sec.models.insider_trading import SecInsiderTradingFetcher
+from openbb_sec.models.institutions_search import SecInstitutionsSearchFetcher
+from openbb_sec.models.latest_financial_reports import SecLatestFinancialReportsFetcher
+from openbb_sec.models.management_discussion_analysis import (
+    SecManagementDiscussionAnalysisFetcher,
+)
+from openbb_sec.models.rss_litigation import SecRssLitigationFetcher
+from openbb_sec.models.schema_files import SecSchemaFilesFetcher
+from openbb_sec.models.sec_filing import SecFilingFetcher
+from openbb_sec.models.sic_search import SecSicSearchFetcher
+from openbb_sec.models.symbol_map import SecSymbolMapFetcher
+sec_provider = Provider(
+    name="sec",
+    website="https://www.sec.gov/data",
+    description="SEC is the public listings regulatory body for the United States.",
+    credentials=None,
+    fetcher_dict={
+        "CikMap": SecCikMapFetcher,
+        "CompanyFilings": SecCompanyFilingsFetcher,
+        "CompareCompanyFacts": SecCompareCompanyFactsFetcher,
+        "EquityFTD": SecEquityFtdFetcher,
+        "EquitySearch": SecEquitySearchFetcher,
+        "EtfHoldings": SecEtfHoldingsFetcher,
+        "Filings": SecCompanyFilingsFetcher,
+        "Form13FHR": SecForm13FHRFetcher,
+        "SecHtmFile": SecHtmFileFetcher,
+        "InsiderTrading": SecInsiderTradingFetcher,
+        "InstitutionsSearch": SecInstitutionsSearchFetcher,
+        "LatestFinancialReports": SecLatestFinancialReportsFetcher,
+        "ManagementDiscussionAnalysis": SecManagementDiscussionAnalysisFetcher,
+        "RssLitigation": SecRssLitigationFetcher,
+        "SchemaFiles": SecSchemaFilesFetcher,
+        "SecFiling": SecFilingFetcher,
+        "SicSearch": SecSicSearchFetcher,
+        "SymbolMap": SecSymbolMapFetcher,
+    },
+    repr_name="Securities and Exchange Commission (SEC)",
+)

openbb_platform/providers/sec/openbb_sec/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """SEC Provider Models."""

openbb_platform/providers/sec/openbb_sec/models/cik_map.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""SEC CIK Mapping Model."""
+# pylint: disable=unused-argument
+from typing import Any, Dict, Optional
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.cik_map import CikMapData, CikMapQueryParams
+from pydantic import Field
+class SecCikMapQueryParams(CikMapQueryParams):
+    """SEC CIK Mapping Query.
+    Source: https://sec.gov/
+    """
+    use_cache: Optional[bool] = Field(
+        default=True,
+        description="Whether or not to use cache for the request, default is True.",
+    )
+class SecCikMapData(CikMapData):
+    """SEC CIK Mapping Data."""
+class SecCikMapFetcher(
+    Fetcher[
+        SecCikMapQueryParams,
+        SecCikMapData,
+    ]
+):
+    """SEC CIK Map Fetcher."""
+    @staticmethod
+    def transform_query(params: Dict[str, Any]) -> SecCikMapQueryParams:
+        """Transform the query."""
+        return SecCikMapQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecCikMapQueryParams,
+        credentials: Optional[Dict[str, str]],
+        **kwargs: Any,
+    ) -> Dict:
+        """Return the raw data from the SEC endpoint."""
+        # pylint: disable=import-outside-toplevel
+        from openbb_sec.utils.helpers import symbol_map
+        results = {"cik": await symbol_map(query.symbol, query.use_cache)}
+        if not results:
+            return {"Error": "Symbol not found."}
+        return results
+    @staticmethod
+    def transform_data(
+        query: SecCikMapQueryParams, data: Dict, **kwargs: Any
+    ) -> SecCikMapData:
+        """Transform the data to the standard format."""
+        return SecCikMapData.model_validate(data)

openbb_platform/providers/sec/openbb_sec/models/company_filings.py ADDED Viewed

	@@ -0,0 +1,347 @@

+"""SEC Company Filings Model."""
+# pylint: disable=unused-argument
+from datetime import (
+    date as dateType,
+    datetime,
+)
+from typing import Any, Dict, List, Optional, Union
+from warnings import warn
+from openbb_core.app.model.abstract.error import OpenBBError
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.company_filings import (
+    CompanyFilingsData,
+    CompanyFilingsQueryParams,
+)
+from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS
+from openbb_core.provider.utils.errors import EmptyDataError
+from openbb_sec.utils.definitions import FORM_LIST, HEADERS
+from pydantic import Field, field_validator
+class SecCompanyFilingsQueryParams(CompanyFilingsQueryParams):
+    """SEC Company Filings Query.
+    Source: https://sec.gov/
+    """
+    __json_schema_extra__ = {
+        "form_type": {
+            "multiple_items_allowed": True,
+            "choices": FORM_LIST,
+        }
+    }
+    cik: Optional[Union[str, int]] = Field(
+        description="Lookup filings by Central Index Key (CIK) instead of by symbol.",
+        default=None,
+    )
+    start_date: Optional[dateType] = Field(
+        default=None,
+        description=QUERY_DESCRIPTIONS.get("start_date", ""),
+    )
+    end_date: Optional[dateType] = Field(
+        default=None,
+        description=QUERY_DESCRIPTIONS.get("end_date", ""),
+    )
+    form_type: Optional[str] = Field(
+        description="SEC form type to filter by.",
+        default=None,
+    )
+    limit: Optional[int] = Field(
+        default=None,
+        description=QUERY_DESCRIPTIONS.get("limit", ""),
+    )
+    use_cache: bool = Field(
+        description="Whether or not to use cache.  If True, cache will store for one day.",
+        default=True,
+    )
+    @field_validator("form_type", mode="before", check_fields=False)
+    @classmethod
+    def validate_form_type(cls, v):
+        """Validate form_type."""
+        if not v:
+            return None
+        if isinstance(v, str):
+            forms = v.split(",")
+        elif isinstance(v, list):
+            forms = v
+        else:
+            raise OpenBBError("Unexpected form_type value.")
+        new_forms: list = []
+        messages: list = []
+        for form in forms:
+            if form.upper() in FORM_LIST:
+                new_forms.append(form.upper())
+            else:
+                messages.append(f"Invalid form type: {form}")
+        if not new_forms:
+            raise OpenBBError(
+                f"No valid forms provided -> {', '.join(messages)} -> Valid forms: {', '.join(FORM_LIST)}"
+            )
+        if new_forms and messages:
+            warn("\n ".join(messages))
+        return ",".join(new_forms) if len(new_forms) > 1 else new_forms[0]
+class SecCompanyFilingsData(CompanyFilingsData):
+    """SEC Company Filings Data."""
+    __alias_dict__ = {
+        "filing_date": "filingDate",
+        "accepted_date": "acceptanceDateTime",
+        "filing_url": "filingDetailUrl",
+        "report_url": "primaryDocumentUrl",
+        "report_type": "form",
+        "report_date": "reportDate",
+        "primary_doc_description": "primaryDocDescription",
+        "primary_doc": "primaryDocument",
+        "accession_number": "accessionNumber",
+        "file_number": "fileNumber",
+        "film_number": "filmNumber",
+        "is_inline_xbrl": "isInlineXBRL",
+        "is_xbrl": "isXBRL",
+        "complete_submission_url": "completeSubmissionUrl",
+        "filing_detail_url": "filingDetailUrl",
+    }
+    report_date: Optional[dateType] = Field(
+        description="The date of the filing.",
+        default=None,
+    )
+    act: Optional[Union[str, int]] = Field(
+        description="The SEC Act number.", default=None
+    )
+    items: Optional[Union[str, float]] = Field(
+        description="The SEC Item numbers.", default=None
+    )
+    primary_doc_description: Optional[str] = Field(
+        description="The description of the primary document.",
+        default=None,
+    )
+    primary_doc: Optional[str] = Field(
+        description="The filename of the primary document.",
+        default=None,
+    )
+    accession_number: Optional[Union[str, int]] = Field(
+        description="The accession number.",
+        default=None,
+    )
+    file_number: Optional[Union[str, int]] = Field(
+        description="The file number.",
+        default=None,
+    )
+    film_number: Optional[Union[str, int]] = Field(
+        description="The film number.",
+        default=None,
+    )
+    is_inline_xbrl: Optional[Union[str, int]] = Field(
+        description="Whether the filing is an inline XBRL filing.",
+        default=None,
+    )
+    is_xbrl: Optional[Union[str, int]] = Field(
+        description="Whether the filing is an XBRL filing.",
+        default=None,
+    )
+    size: Optional[Union[str, int]] = Field(
+        description="The size of the filing.", default=None
+    )
+    complete_submission_url: Optional[str] = Field(
+        description="The URL to the complete filing submission.",
+        default=None,
+    )
+    filing_detail_url: Optional[str] = Field(
+        description="The URL to the filing details.",
+        default=None,
+    )
+    @field_validator("report_date", mode="before", check_fields=False)
+    @classmethod
+    def validate_report_date(cls, v: Optional[Union[str, dateType]]):
+        """Validate report_date."""
+        if isinstance(v, dateType):
+            return v
+        v = v if v != "" else None
+        return (
+            datetime.strptime(v, "%Y-%m-%d").date()
+            if v and isinstance(v, str)
+            else None
+        )
+class SecCompanyFilingsFetcher(
+    Fetcher[SecCompanyFilingsQueryParams, List[SecCompanyFilingsData]]
+):
+    """SEC Company Filings Fetcher."""
+    @staticmethod
+    def transform_query(params: Dict[str, Any]) -> SecCompanyFilingsQueryParams:
+        """Transform query params."""
+        return SecCompanyFilingsQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecCompanyFilingsQueryParams,
+        credentials: Optional[Dict[str, str]],
+        **kwargs: Any,
+    ) -> List[Dict]:
+        """Extract the data from the SEC endpoint."""
+        # pylint: disable=import-outside-toplevel
+        from aiohttp_client_cache import SQLiteBackend
+        from aiohttp_client_cache.session import CachedSession
+        from openbb_core.app.utils import get_user_cache_directory
+        from openbb_core.provider.utils.helpers import amake_request, amake_requests
+        from openbb_sec.utils.helpers import symbol_map
+        from pandas import DataFrame
+        filings = DataFrame()
+        if query.symbol and not query.cik:
+            query.cik = await symbol_map(
+                query.symbol.lower(), use_cache=query.use_cache
+            )
+            if not query.cik:
+                raise OpenBBError(f"CIK not found for symbol {query.symbol}")
+        if query.cik is None:
+            raise OpenBBError("CIK or symbol must be provided.")
+        # The leading 0s need to be inserted but are typically removed from the data to store as an integer.
+        if len(query.cik) != 10:  # type: ignore
+            cik_: str = ""
+            temp = 10 - len(query.cik)  # type: ignore
+            for i in range(temp):
+                cik_ = cik_ + "0"
+            query.cik = cik_ + str(query.cik)  # type: ignore
+        url = f"https://data.sec.gov/submissions/CIK{query.cik}.json"
+        data: Union[dict, List[dict]] = []
+        if query.use_cache is True:
+            cache_dir = f"{get_user_cache_directory()}/http/sec_company_filings"
+            async with CachedSession(
+                cache=SQLiteBackend(cache_dir, expire_after=3600 * 24)
+            ) as session:
+                await session.delete_expired_responses()
+                try:
+                    data = await amake_request(url, headers=HEADERS, session=session)  # type: ignore
+                finally:
+                    await session.close()
+        else:
+            data = await amake_request(url, headers=HEADERS)  # type: ignore
+        # This seems to work for the data structure.
+        filings = (
+            DataFrame.from_records(data["filings"].get("recent"))  # type: ignore
+            if "filings" in data
+            else DataFrame()
+        )
+        results = filings.to_dict("records")
+        # If there are lots of filings, there will be custom pagination.
+        if (
+            (query.limit and len(filings) >= 1000)
+            or query.form_type is not None
+            or query.limit == 0
+        ):
+            async def callback(response, session):
+                """Response callback for excess company filings."""
+                result = await response.json()
+                if result:
+                    new_data = DataFrame.from_records(result)
+                    results.extend(new_data.to_dict("records"))
+            urls: List = []
+            new_urls = (
+                DataFrame(data["filings"].get("files"))  # type: ignore
+                if "filings" in data
+                else DataFrame()
+            )
+            for i in new_urls.index:
+                new_cik: str = data["filings"]["files"][i]["name"]  # type: ignore
+                new_url: str = "https://data.sec.gov/submissions/" + new_cik
+                urls.append(new_url)
+            if query.use_cache is True:
+                cache_dir = f"{get_user_cache_directory()}/http/sec_company_filings"
+                async with CachedSession(
+                    cache=SQLiteBackend(cache_dir, expire_after=3600 * 24)
+                ) as session:
+                    try:
+                        await amake_requests(urls, headers=HEADERS, session=session, response_callback=callback)  # type: ignore
+                    finally:
+                        await session.close()
+            else:
+                await amake_requests(urls, headers=HEADERS, response_callback=callback)  # type: ignore
+        return results
+    @staticmethod
+    def transform_data(
+        query: SecCompanyFilingsQueryParams, data: List[Dict], **kwargs: Any
+    ) -> List[SecCompanyFilingsData]:
+        """Transform the data."""
+        # pylint: disable=import-outside-toplevel
+        from numpy import nan
+        from pandas import NA, DataFrame, to_datetime
+        if not data:
+            raise EmptyDataError(
+                f"No filings found for CIK {query.cik}, or symbol {query.symbol}"
+            )
+        cols = [
+            "reportDate",
+            "filingDate",
+            "acceptanceDateTime",
+            "act",
+            "form",
+            "items",
+            "primaryDocDescription",
+            "primaryDocument",
+            "accessionNumber",
+            "fileNumber",
+            "filmNumber",
+            "isInlineXBRL",
+            "isXBRL",
+            "size",
+        ]
+        filings = DataFrame(data, columns=cols).astype(str)
+        filings["reportDate"] = to_datetime(filings["reportDate"]).dt.date
+        filings["filingDate"] = to_datetime(filings["filingDate"]).dt.date
+        filings = filings.sort_values(by=["filingDate", "reportDate"], ascending=False)
+        if query.start_date:
+            filings = filings[filings["filingDate"] >= query.start_date]
+        if query.end_date:
+            filings = filings[filings["filingDate"] <= query.end_date]
+        base_url = f"https://www.sec.gov/Archives/edgar/data/{str(int(query.cik))}/"  # type: ignore
+        filings["primaryDocumentUrl"] = (
+            base_url
+            + filings["accessionNumber"].str.replace("-", "")
+            + "/"
+            + filings["primaryDocument"]
+        )
+        filings["completeSubmissionUrl"] = (
+            base_url + filings["accessionNumber"] + ".txt"
+        )
+        filings["filingDetailUrl"] = (
+            base_url + filings["accessionNumber"] + "-index.htm"
+        )
+        if query.form_type:
+            form_types = query.form_type.replace("_", " ").split(",")
+            filings = filings[
+                filings.form.str.contains("|".join(form_types), case=False, na=False)
+            ]
+        if query.limit:
+            filings = filings.head(query.limit) if query.limit != 0 else filings
+        if len(filings) == 0:
+            raise EmptyDataError("No filings were found using the filters provided.")
+        filings = filings.replace({NA: None, nan: None})
+        return [
+            SecCompanyFilingsData.model_validate(d) for d in filings.to_dict("records")
+        ]

openbb_platform/providers/sec/openbb_sec/models/compare_company_facts.py ADDED Viewed

	@@ -0,0 +1,190 @@

+"""SEC Compare Company Facts Model."""
+# pylint: disable=unused-argument
+from typing import Any, Dict, List, Optional, Union
+from warnings import warn
+from openbb_core.provider.abstract.annotated_result import AnnotatedResult
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.compare_company_facts import (
+    CompareCompanyFactsData,
+    CompareCompanyFactsQueryParams,
+)
+from openbb_core.provider.utils.descriptions import DATA_DESCRIPTIONS
+from openbb_core.provider.utils.errors import EmptyDataError
+from openbb_sec.utils.definitions import (
+    FACT_CHOICES,
+    FACTS,
+    FISCAL_PERIODS,
+)
+from pydantic import Field, field_validator
+class SecCompareCompanyFactsQueryParams(CompareCompanyFactsQueryParams):
+    """SEC Compare Company Facts Query.
+    Source: https://www.sec.gov/edgar/sec-api-documentation
+    The xbrl/frames API aggregates one fact for each reporting entity
+    that is last filed that most closely fits the calendrical period requested.
+    Because company financial calendars can start and end on any month or day and even change in length from quarter to
+    quarter according to the day of the week, the frame data is assembled by the dates that best align with a calendar
+    quarter or year. Data users should be mindful different reporting start and end dates for facts contained in a frame.
+    """
+    __json_schema_extra__ = {
+        "symbol": {"multiple_items_allowed": True},
+        "fact": {"multiple_items_allowed": False, "choices": sorted(FACTS)},
+        "fiscal_period": {
+            "multiple_items_allowed": False,
+            "choices": ["fy", "q1", "q2", "q3", "q4"],
+        },
+    }
+    fact: FACT_CHOICES = Field(
+        default="Revenues",
+        description="Fact or concept from the SEC taxonomy, in UpperCamelCase. Defaults to, 'Revenues'."
+        + " AAPL, MSFT, GOOG, BRK-A currently report revenue as, 'RevenueFromContractWithCustomerExcludingAssessedTax'."
+        + " In previous years, they have reported as 'Revenues'.",
+    )
+    year: Optional[int] = Field(
+        default=None,
+        description="The year to retrieve the data for. If not provided, the current year is used."
+        + " When symbol(s) are provided, excluding the year will return all reported values for the concept.",
+    )
+    fiscal_period: Optional[FISCAL_PERIODS] = Field(
+        default=None,
+        description="The fiscal period to retrieve the data for."
+        + " If not provided, the most recent quarter is used."
+        + " This parameter is ignored when a symbol is supplied.",
+    )
+    instantaneous: bool = Field(
+        default=False,
+        description="Whether to retrieve instantaneous data. See the notes above for more information."
+        + " Defaults to False. Some facts are only available as instantaneous data."
+        + "\nThe function will automatically attempt the inverse of this parameter"
+        + " if the initial fiscal quarter request fails."
+        + " This parameter is ignored when a symbol is supplied.",
+    )
+    use_cache: bool = Field(
+        default=True,
+        description="Whether to use cache for the request. Defaults to True.",
+    )
+    @field_validator("fact", mode="before", check_fields=False)
+    @classmethod
+    def validate_fact(cls, v):
+        """Set the default state."""
+        if not v or v == "":
+            return "Revenues"
+        return v
+class SecCompareCompanyFactsData(CompareCompanyFactsData):
+    """SEC Compare Company Facts Data."""
+    __alias_dict__ = {
+        "reported_date": "filed",
+        "period_beginning": "start",
+        "period_ending": "end",
+        "fiscal_year": "fy",
+        "fiscal_period": "fp",
+        "name": "entityName",
+        "accession": "accn",
+        "value": "val",
+        "location": "loc",
+    }
+    cik: Union[str, int] = Field(
+        description=DATA_DESCRIPTIONS.get("cik", ""),
+    )
+    location: Optional[str] = Field(
+        default=None,
+        description="Geographic location of the reporting entity.",
+    )
+    form: Optional[str] = Field(
+        default=None,
+        description="The SEC form associated with the fact or concept.",
+    )
+    frame: Optional[str] = Field(
+        default=None,
+        description="The frame ID associated with the fact or concept, if applicable.",
+    )
+    accession: str = Field(
+        description="SEC filing accession number associated with the reported fact or concept.",
+    )
+    fact: str = Field(
+        description="The display name of the fact or concept.",
+    )
+    unit: str = Field(
+        default=None,
+        description="The unit of measurement for the fact or concept.",
+    )
+class SecCompareCompanyFactsFetcher(
+    Fetcher[SecCompareCompanyFactsQueryParams, List[SecCompareCompanyFactsData]]
+):
+    """SEC Compare Company Facts Fetcher."""
+    @staticmethod
+    def transform_query(params: Dict[str, Any]) -> SecCompareCompanyFactsQueryParams:
+        """Transform the query."""
+        return SecCompareCompanyFactsQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecCompareCompanyFactsQueryParams,
+        credentials: Optional[Dict[str, str]],
+        **kwargs: Any,
+    ) -> Dict:
+        """Return the raw data from the SEC endpoint."""
+        # pylint: disable=import-outside-toplevel
+        from openbb_sec.utils.frames import get_concept, get_frame
+        results: Dict = {}
+        if not query.symbol:
+            results = await get_frame(
+                fact=query.fact,
+                year=query.year,
+                fiscal_period=query.fiscal_period,
+                instantaneous=query.instantaneous,
+                use_cache=query.use_cache,
+            )
+        if query.symbol is not None:
+            if query.instantaneous is True:
+                warn(
+                    "The 'instantaneous' parameter is ignored when a symbol is supplied."
+                )
+            if query.fiscal_period is not None:
+                warn(
+                    "The 'fiscal_period' parameter is ignored when a symbol is supplied."
+                )
+            results = await get_concept(
+                symbol=query.symbol,
+                fact=query.fact,
+                year=query.year,
+                use_cache=query.use_cache,
+            )
+        if not results:
+            raise EmptyDataError("The request was returned empty.")
+        return results
+    @staticmethod
+    def transform_data(
+        query: SecCompareCompanyFactsQueryParams,
+        data: Dict,
+        **kwargs: Any,
+    ) -> AnnotatedResult[List[SecCompareCompanyFactsData]]:
+        """Transform the data and validate the model."""
+        if not data:
+            raise EmptyDataError("The request was returned empty.")
+        metadata = data.get("metadata")
+        results_data = data.get("data", [])
+        return AnnotatedResult(
+            result=[SecCompareCompanyFactsData.model_validate(d) for d in results_data],  # type: ignore
+            metadata=metadata,
+        )

openbb_platform/providers/sec/openbb_sec/models/equity_ftd.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""SEC Equity FTD Model."""
+# pylint: disable=unused-argument
+from typing import Any, Dict, List, Optional
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.equity_ftd import (
+    EquityFtdData,
+    EquityFtdQueryParams,
+)
+from openbb_core.provider.utils.errors import EmptyDataError
+from pydantic import Field
+class SecEquityFtdQueryParams(EquityFtdQueryParams):
+    """SEC Equity FTD Query.
+    Source: https://sec.gov/
+    """
+    limit: Optional[int] = Field(
+        description="""
+        Limit the number of reports to parse, from most recent.
+        Approximately 24 reports per year, going back to 2009.
+        """,
+        default=24,
+    )
+    skip_reports: Optional[int] = Field(
+        description="""
+        Skip N number of reports from current. A value of 1 will skip the most recent report.
+        """,
+        default=0,
+    )
+    use_cache: Optional[bool] = Field(
+        default=True,
+        description="Whether or not to use cache for the request, default is True."
+        + " Each reporting period is a separate URL, new reports will be added to the cache.",
+    )
+class SecEquityFtdData(EquityFtdData):
+    """SEC Equity FTD Data."""
+    __alias_dict__ = {"settlement_date": "date"}
+class SecEquityFtdFetcher(
+    Fetcher[
+        SecEquityFtdQueryParams,
+        List[SecEquityFtdData],
+    ]
+):
+    """SEC Equity FTD Fetcher."""
+    @staticmethod
+    def transform_query(params: Dict[str, Any]) -> SecEquityFtdQueryParams:
+        """Transform query params."""
+        return SecEquityFtdQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecEquityFtdQueryParams,
+        credentials: Optional[Dict[str, str]],
+        **kwargs: Any,
+    ) -> List[Dict]:
+        """Extract the data from the SEC website."""
+        # pylint: disable=import-outside-toplevel
+        import asyncio  # noqa
+        from openbb_sec.utils.helpers import download_zip_file, get_ftd_urls  # noqa
+        results = []
+        limit = query.limit if query.limit is not None and query.limit > 0 else 0
+        urls_data = await get_ftd_urls()
+        urls = list(urls_data.values())
+        if limit > 0:
+            urls = (
+                urls[:limit]
+                if not query.skip_reports
+                else urls[query.skip_reports : limit + query.skip_reports]  # noqa: E203
+            )
+        async def get_one(url):
+            """Get data for one URL as a task."""
+            data = await download_zip_file(url, query.symbol, query.use_cache)
+            results.extend(data)
+        tasks = [get_one(url) for url in urls]
+        await asyncio.gather(*tasks)
+        if not results:
+            raise EmptyDataError(
+                "There was an error collecting data, no results were returned."
+            )
+        return sorted(results, key=lambda d: d["date"], reverse=True)
+    @staticmethod
+    def transform_data(
+        query: SecEquityFtdQueryParams, data: List[Dict], **kwargs: Any
+    ) -> List[SecEquityFtdData]:
+        """Transform the data to the standard format."""
+        return [SecEquityFtdData.model_validate(d) for d in data]

openbb_platform/providers/sec/openbb_sec/models/equity_search.py ADDED Viewed

	@@ -0,0 +1,91 @@

+"""SEC Equity Search Model."""
+# pylint: disable=unused-argument
+from typing import Any, Dict, List, Optional
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.equity_search import (
+    EquitySearchData,
+    EquitySearchQueryParams,
+)
+from pydantic import Field
+class SecEquitySearchQueryParams(EquitySearchQueryParams):
+    """SEC Equity Search Query.
+    Source: https://sec.gov/
+    """
+    use_cache: bool = Field(
+        default=True,
+        description="Whether to use the cache or not.",
+    )
+    is_fund: bool = Field(
+        default=False,
+        description="Whether to direct the search to the list of mutual funds and ETFs.",
+    )
+class SecEquitySearchData(EquitySearchData):
+    """SEC Equity Search Data."""
+    cik: str = Field(description="Central Index Key")
+class SecEquitySearchFetcher(
+    Fetcher[
+        SecEquitySearchQueryParams,
+        List[SecEquitySearchData],
+    ]
+):
+    """SEC Equity Search Fetcher."""
+    @staticmethod
+    def transform_query(params: Dict[str, Any]) -> SecEquitySearchQueryParams:
+        """Transform the query."""
+        return SecEquitySearchQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecEquitySearchQueryParams,
+        credentials: Optional[Dict[str, str]],
+        **kwargs: Any,
+    ) -> List[Dict]:
+        """Return the raw data from the SEC endpoint."""
+        # pylint: disable=import-outside-toplevel
+        from openbb_sec.utils.helpers import (
+            get_all_companies,
+            get_mf_and_etf_map,
+        )
+        from pandas import DataFrame
+        results = DataFrame()
+        if query.is_fund is True:
+            companies = await get_mf_and_etf_map(use_cache=query.use_cache)
+            results = companies[
+                companies["cik"].str.contains(query.query, case=False)
+                | companies["seriesId"].str.contains(query.query, case=False)
+                | companies["classId"].str.contains(query.query, case=False)
+                | companies["symbol"].str.contains(query.query, case=False)
+            ]
+        if query.is_fund is False:
+            companies = await get_all_companies(use_cache=query.use_cache)
+            results = companies[
+                companies["name"].str.contains(query.query, case=False)
+                | companies["symbol"].str.contains(query.query, case=False)
+                | companies["cik"].str.contains(query.query, case=False)
+            ]
+        return results.astype(str).to_dict("records")
+    @staticmethod
+    def transform_data(
+        query: SecEquitySearchQueryParams, data: Dict, **kwargs: Any
+    ) -> List[SecEquitySearchData]:
+        """Transform the data to the standard format."""
+        return [SecEquitySearchData.model_validate(d) for d in data]

openbb_platform/providers/sec/openbb_sec/models/etf_holdings.py ADDED Viewed

	@@ -0,0 +1,870 @@

+"""SEC ETF Holings Model."""
+# pylint: disable =[unused-argument,too-many-locals,too-many-branches]
+from datetime import date as dateType
+from typing import Any, Dict, List, Optional, Union
+from warnings import warn
+from openbb_core.app.model.abstract.error import OpenBBError
+from openbb_core.provider.abstract.annotated_result import AnnotatedResult
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.etf_holdings import (
+    EtfHoldingsData,
+    EtfHoldingsQueryParams,
+)
+from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS
+from openbb_core.provider.utils.errors import EmptyDataError
+from pydantic import Field, field_validator, model_validator
+class SecEtfHoldingsQueryParams(EtfHoldingsQueryParams):
+    """SEC ETF Holdings Query.
+    Source: https://www.sec.gov/Archives/edgar/data/
+    """
+    date: Optional[Union[str, dateType]] = Field(
+        description=QUERY_DESCRIPTIONS.get("date", "")
+        + "  The date represents the period ending."
+        + " The date entered will return the closest filing.",
+        default=None,
+    )
+    use_cache: bool = Field(
+        description="Whether or not to use cache for the request.",
+        default=True,
+    )
+class SecEtfHoldingsData(EtfHoldingsData):
+    """SEC ETF Holdings Data."""
+    __alias_dict__ = {
+        "name": "title",
+        "weight": "pctVal",
+        "value": "valUSD",
+        "payoff_profile": "payoffProfile",
+        "currency": "curCd",
+        "asset_category": "assetCat",
+        "issuer_category": "issuerCat",
+        "country": "invCountry",
+        "is_restricted": "isRestrictedSec",
+        "fair_value_level": "fairValLevel",
+        "is_cash_collateral": "isCashCollateral",
+        "is_non_cash_collateral": "isNonCashCollateral",
+        "is_loan_by_fund": "isLoanByFund",
+        "loan_value": "loanVal",
+    }
+    lei: Optional[str] = Field(description="The LEI of the holding.", default=None)
+    cusip: Optional[str] = Field(description="The CUSIP of the holding.", default=None)
+    isin: Optional[str] = Field(description="The ISIN of the holding.", default=None)
+    other_id: Optional[str] = Field(
+        description="Internal identifier for the holding.", default=None
+    )
+    balance: Optional[float] = Field(
+        description="The balance of the holding.", default=None
+    )
+    weight: Optional[float] = Field(
+        description="The weight of the holding in ETF in %.",
+        default=None,
+        json_schema_extra={"x-unit_measurement": "percent", "x-frontend_multiply": 100},
+    )
+    value: Optional[float] = Field(
+        description="The value of the holding in USD.", default=None
+    )
+    payoff_profile: Optional[str] = Field(
+        description="The payoff profile of the holding.",
+        default=None,
+    )
+    units: Optional[Union[float, str]] = Field(
+        description="The units of the holding.", default=None
+    )
+    currency: Optional[str] = Field(
+        description="The currency of the holding.", default=None
+    )
+    asset_category: Optional[str] = Field(
+        description="The asset category of the holding.", default=None
+    )
+    issuer_category: Optional[str] = Field(
+        description="The issuer category of the holding.",
+        default=None,
+    )
+    country: Optional[str] = Field(
+        description="The country of the holding.", default=None
+    )
+    is_restricted: Optional[str] = Field(
+        description="Whether the holding is restricted.",
+        default=None,
+    )
+    fair_value_level: Optional[int] = Field(
+        description="The fair value level of the holding.",
+        default=None,
+    )
+    is_cash_collateral: Optional[str] = Field(
+        description="Whether the holding is cash collateral.",
+        default=None,
+    )
+    is_non_cash_collateral: Optional[str] = Field(
+        description="Whether the holding is non-cash collateral.",
+        default=None,
+    )
+    is_loan_by_fund: Optional[str] = Field(
+        description="Whether the holding is loan by fund.",
+        default=None,
+    )
+    loan_value: Optional[float] = Field(
+        description="The loan value of the holding.",
+        default=None,
+    )
+    issuer_conditional: Optional[str] = Field(
+        description="The issuer conditions of the holding.", default=None
+    )
+    asset_conditional: Optional[str] = Field(
+        description="The asset conditions of the holding.", default=None
+    )
+    maturity_date: Optional[dateType] = Field(
+        description="The maturity date of the debt security.", default=None
+    )
+    coupon_kind: Optional[str] = Field(
+        description="The type of coupon for the debt security.", default=None
+    )
+    rate_type: Optional[str] = Field(
+        description="The type of rate for the debt security, floating or fixed.",
+        default=None,
+    )
+    annualized_return: Optional[float] = Field(
+        description="The annualized return on the debt security.",
+        default=None,
+        json_schema_extra={"x-unit_measurement": "percent", "x-frontend_multiply": 100},
+    )
+    is_default: Optional[str] = Field(
+        description="If the debt security is defaulted.", default=None
+    )
+    in_arrears: Optional[str] = Field(
+        description="If the debt security is in arrears.", default=None
+    )
+    is_paid_kind: Optional[str] = Field(
+        description="If the debt security payments are paid in kind.", default=None
+    )
+    derivative_category: Optional[str] = Field(
+        description="The derivative category of the holding.", default=None
+    )
+    counterparty: Optional[str] = Field(
+        description="The counterparty of the derivative.", default=None
+    )
+    underlying_name: Optional[str] = Field(
+        description="The name of the underlying asset associated with the derivative.",
+        default=None,
+    )
+    option_type: Optional[str] = Field(description="The type of option.", default=None)
+    derivative_payoff: Optional[str] = Field(
+        description="The payoff profile of the derivative.", default=None
+    )
+    expiry_date: Optional[dateType] = Field(
+        description="The expiry or termination date of the derivative.", default=None
+    )
+    exercise_price: Optional[float] = Field(
+        description="The exercise price of the option.", default=None
+    )
+    exercise_currency: Optional[str] = Field(
+        description="The currency of the option exercise price.", default=None
+    )
+    shares_per_contract: Optional[float] = Field(
+        description="The number of shares per contract.", default=None
+    )
+    delta: Optional[Union[str, float]] = Field(
+        description="The delta of the option.", default=None
+    )
+    rate_type_rec: Optional[str] = Field(
+        description="The type of rate for receivable portion of the swap.", default=None
+    )
+    receive_currency: Optional[str] = Field(
+        description="The receive currency of the swap.", default=None
+    )
+    upfront_receive: Optional[float] = Field(
+        description="The upfront amount received of the swap.", default=None
+    )
+    floating_rate_index_rec: Optional[str] = Field(
+        description="The floating rate index for receivable portion of the swap.",
+        default=None,
+    )
+    floating_rate_spread_rec: Optional[float] = Field(
+        description="The floating rate spread for reveivable portion of the swap.",
+        default=None,
+    )
+    rate_tenor_rec: Optional[str] = Field(
+        description="The rate tenor for receivable portion of the swap.", default=None
+    )
+    rate_tenor_unit_rec: Optional[Union[str, int]] = Field(
+        description="The rate tenor unit for receivable portion of the swap.",
+        default=None,
+    )
+    reset_date_rec: Optional[str] = Field(
+        description="The reset date for receivable portion of the swap.", default=None
+    )
+    reset_date_unit_rec: Optional[Union[str, int]] = Field(
+        description="The reset date unit for receivable portion of the swap.",
+        default=None,
+    )
+    rate_type_pmnt: Optional[str] = Field(
+        description="The type of rate for payment portion of the swap.", default=None
+    )
+    payment_currency: Optional[str] = Field(
+        description="The payment currency of the swap.", default=None
+    )
+    upfront_payment: Optional[float] = Field(
+        description="The upfront amount received of the swap.", default=None
+    )
+    floating_rate_index_pmnt: Optional[str] = Field(
+        description="The floating rate index for payment portion of the swap.",
+        default=None,
+    )
+    floating_rate_spread_pmnt: Optional[float] = Field(
+        description="The floating rate spread for payment portion of the swap.",
+        default=None,
+    )
+    rate_tenor_pmnt: Optional[str] = Field(
+        description="The rate tenor for payment portion of the swap.", default=None
+    )
+    rate_tenor_unit_pmnt: Optional[Union[str, int]] = Field(
+        description="The rate tenor unit for payment portion of the swap.", default=None
+    )
+    reset_date_pmnt: Optional[str] = Field(
+        description="The reset date for payment portion of the swap.", default=None
+    )
+    reset_date_unit_pmnt: Optional[Union[str, int]] = Field(
+        description="The reset date unit for payment portion of the swap.", default=None
+    )
+    repo_type: Optional[str] = Field(description="The type of repo.", default=None)
+    is_cleared: Optional[str] = Field(
+        description="If the repo is cleared.", default=None
+    )
+    is_tri_party: Optional[str] = Field(
+        description="If the repo is tri party.", default=None
+    )
+    principal_amount: Optional[float] = Field(
+        description="The principal amount of the repo.", default=None
+    )
+    principal_currency: Optional[str] = Field(
+        description="The currency of the principal amount.", default=None
+    )
+    collateral_type: Optional[str] = Field(
+        description="The collateral type of the repo.", default=None
+    )
+    collateral_amount: Optional[float] = Field(
+        description="The collateral amount of the repo.", default=None
+    )
+    collateral_currency: Optional[str] = Field(
+        description="The currency of the collateral amount.", default=None
+    )
+    exchange_currency: Optional[str] = Field(
+        description="The currency of the exchange rate.", default=None
+    )
+    exchange_rate: Optional[float] = Field(
+        description="The exchange rate.", default=None
+    )
+    currency_sold: Optional[str] = Field(
+        description="The currency sold in a Forward Derivative.",
+        default=None,
+    )
+    currency_amount_sold: Optional[float] = Field(
+        description="The amount of currency sold in a Forward Derivative.",
+        default=None,
+    )
+    currency_bought: Optional[str] = Field(
+        description="The currency bought in a Forward Derivative.",
+        default=None,
+    )
+    currency_amount_bought: Optional[float] = Field(
+        description="The amount of currency bought in a Forward Derivative.",
+        default=None,
+    )
+    notional_amount: Optional[float] = Field(
+        description="The notional amount of the derivative.", default=None
+    )
+    notional_currency: Optional[str] = Field(
+        description="The currency of the derivative's notional amount.", default=None
+    )
+    unrealized_gain: Optional[float] = Field(
+        description="The unrealized gain or loss on the derivative.", default=None
+    )
+    @field_validator("weight", "annualized_return", mode="before", check_fields=False)
+    @classmethod
+    def normalize_percent(cls, v):
+        """Normalize the percent values."""
+        return float(v) / 100 if v else None
+    @model_validator(mode="before")
+    @classmethod
+    def replace_zero(cls, values):
+        """Check for zero values and replace with None."""
+        return (
+            {k: None if v == 0 else v for k, v in values.items()}
+            if isinstance(values, dict)
+            else values
+        )
+class SecEtfHoldingsFetcher(
+    Fetcher[
+        SecEtfHoldingsQueryParams,
+        List[SecEtfHoldingsData],
+    ]
+):
+    """SEC ETF Holdings."""
+    @staticmethod
+    def transform_query(params: Dict[str, Any]) -> SecEtfHoldingsQueryParams:
+        """Transform the query."""
+        params["symbol"] = params["symbol"].upper()
+        return SecEtfHoldingsQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecEtfHoldingsQueryParams,
+        credentials: Optional[Dict[str, str]],
+        **kwargs: Any,
+    ) -> Dict:
+        """Return the raw data from the SEC endpoint."""
+        # pylint: disable=import-outside-toplevel
+        import asyncio  # noqa
+        import xmltodict  # noqa
+        from aiohttp_client_cache import SQLiteBackend  # noqa
+        from aiohttp_client_cache.session import CachedSession  # noqa
+        from openbb_core.app.utils import get_user_cache_directory  # noqa
+        from openbb_core.provider.utils.helpers import amake_request  # noqa
+        from openbb_sec.utils.helpers import HEADERS, get_nport_candidates  # noqa
+        from pandas import DataFrame, Series, to_datetime  # noqa
+        # Implement a retry mechanism in case of RemoteDisconnected Error.
+        retries = 3
+        for i in range(retries):
+            filings = []
+            try:
+                filings = await get_nport_candidates(
+                    symbol=query.symbol, use_cache=query.use_cache
+                )
+                if filings:
+                    break
+            except Exception as e:
+                if i < retries - 1:
+                    warn(f"Error: {e}. Retrying...")
+                    await asyncio.sleep(1)
+                    continue
+                raise e
+        filing_candidates = DataFrame.from_records(filings)
+        if filing_candidates.empty:
+            raise OpenBBError(f"No N-Port records found for {query.symbol}.")
+        dates = filing_candidates.period_ending.to_list()
+        new_date: str = ""
+        if query.date is not None:
+            date = query.date
+            # Gets the URL for the nearest date to the requested date.
+            __dates = Series(to_datetime(dates))
+            __date = to_datetime(date)
+            __nearest = DataFrame(__dates - __date)
+            __nearest_date = abs(__nearest[0].astype("int64")).idxmin()
+            new_date = __dates[__nearest_date].strftime("%Y-%m-%d")
+            date = new_date if new_date else date
+            warn(f"Closest filing date to, {query.date}, is the period ending: {date}")
+            filing_url = filing_candidates[filing_candidates["period_ending"] == date][
+                "primary_doc"
+            ].values[0]
+        else:
+            filing_url = filing_candidates["primary_doc"].values[0]
+        async def callback(response, session):
+            """Response callback for the request."""
+            return await response.read()
+        response: Union[dict, List[dict]] = []
+        if query.use_cache is True:
+            cache_dir = f"{get_user_cache_directory()}/http/sec_etf"
+            async with CachedSession(cache=SQLiteBackend(cache_dir)) as session:
+                try:
+                    response = await amake_request(
+                        filing_url, headers=HEADERS, session=session, response_callback=callback  # type: ignore
+                    )
+                finally:
+                    await session.close()
+        else:
+            response = await amake_request(
+                filing_url, headers=HEADERS, response_callback=callback  # type: ignore
+            )
+        results = xmltodict.parse(response)
+        return results
+    # pylint: disable=too-many-statements
+    @staticmethod
+    def transform_data(  # noqa: PLR0912
+        query: SecEtfHoldingsQueryParams,
+        data: Dict,
+        **kwargs: Any,
+    ) -> AnnotatedResult[List[SecEtfHoldingsData]]:
+        """Transform the data."""
+        # pylint: disable=import-outside-toplevel
+        from pandas import DataFrame, to_datetime
+        from pandas.tseries.offsets import MonthEnd
+        if not data:
+            raise EmptyDataError(f"No data was returned for the symbol, {query.symbol}")
+        results = []
+        response = data
+        # Parse the response if it is a NPORT-P filing.
+        if (
+            "edgarSubmission" in response
+            and "formData" in response["edgarSubmission"]
+            and response["edgarSubmission"]["headerData"]["submissionType"] == "NPORT-P"
+            and "invstOrSecs" in response["edgarSubmission"]["formData"]
+            and "invstOrSec" in response["edgarSubmission"]["formData"]["invstOrSecs"]
+        ):
+            df = DataFrame.from_records(
+                response["edgarSubmission"]["formData"]["invstOrSecs"]["invstOrSec"]
+            )
+            # Conditionally flatten deeply nested values.
+            for i in df.index:
+                if "isin" in df.iloc[i]["identifiers"]:
+                    df.loc[i, "isin"] = df.iloc[i]["identifiers"]["isin"].get("@value")
+                if (
+                    "other" in df.iloc[i]["identifiers"]
+                    and "@value" in df.iloc[i]["identifiers"]["other"]
+                ):
+                    df.loc[i, "other_id"] = df.iloc[i]["identifiers"]["other"].get(
+                        "@value"
+                    )
+                if "securityLending" in df.iloc[i]:
+                    security_lending = df.iloc[i]["securityLending"]
+                    if "loanByFundCondition" in security_lending:
+                        loan_by_fund_condition = security_lending["loanByFundCondition"]
+                        df.loc[i, "isLoanByFund"] = loan_by_fund_condition.get(
+                            "@isLoanByFund"
+                        )
+                        df.loc[i, "loanVal"] = loan_by_fund_condition.get("@loanVal")
+                    if "isCashCollateral" in security_lending:
+                        df.loc[i, "isCashCollateral"] = security_lending.get(
+                            "isCashCollateral"
+                        )
+                    if "isNonCashCollateral" in security_lending:
+                        df.loc[i, "isNonCashCollateral"] = security_lending.get(
+                            "isNonCashCollateral"
+                        )
+                if "debtSec" in df.iloc[i] and isinstance(df.loc[i]["debtSec"], dict):
+                    debt_sec = df.iloc[i]["debtSec"]
+                    df.loc[i, "maturity_date"] = debt_sec.get("maturityDt")
+                    df.loc[i, "coupon_kind"] = debt_sec.get("couponKind")
+                    df.loc[i, "annualized_return"] = debt_sec.get("annualizedRt")
+                    df.loc[i, "is_default"] = debt_sec.get("isDefault")
+                    df.loc[i, "in_arrears"] = debt_sec.get("areIntrstPmntsInArrs")
+                    df.loc[i, "is_paid_kind"] = debt_sec.get("isPaidKind")
+                if "issuerConditional" in df.iloc[i] and isinstance(
+                    df.iloc[i]["issuerConditional"], dict
+                ):
+                    df.loc[i, "issuer_conditional"] = df.iloc[i][
+                        "issuerConditional"
+                    ].get("@desc")
+                if "assetConditional" in df.iloc[i] and isinstance(
+                    df.iloc[i]["assetConditional"], dict
+                ):
+                    df.loc[i, "asset_conditional"] = df.iloc[i]["assetConditional"].get(
+                        "@desc"
+                    )
+                if "derivativeInfo" in df.iloc[i] and isinstance(
+                    df.iloc[i]["derivativeInfo"], dict
+                ):
+                    derivative_info = df.iloc[i]["derivativeInfo"]
+                    if "optionSwaptionWarrantDeriv" in derivative_info:
+                        option_swaption_warrant_deriv = derivative_info[
+                            "optionSwaptionWarrantDeriv"
+                        ]
+                        df.loc[i, "derivative_category"] = (
+                            option_swaption_warrant_deriv.get("@derivCat")
+                        )
+                        df.loc[i, "counterparty"] = option_swaption_warrant_deriv[
+                            "counterparties"
+                        ].get("counterpartyName")
+                        df.loc[i, "lei"] = option_swaption_warrant_deriv[
+                            "counterparties"
+                        ].get("counterpartyLei")
+                        df.loc[i, "underlying_name"] = (
+                            option_swaption_warrant_deriv["descRefInstrmnt"]
+                            .get("otherRefInst", {})
+                            .get("issueTitle")
+                        )
+                        df.loc[i, "underlying_name"] = option_swaption_warrant_deriv[
+                            "descRefInstrmnt"
+                        ].get("nestedDerivInfo", {}).get("fwdDeriv", {}).get(
+                            "derivAddlInfo", {}
+                        ).get(
+                            "title"
+                        ) or option_swaption_warrant_deriv[
+                            "descRefInstrmnt"
+                        ].get(
+                            "otherRefInst", {}
+                        ).get(
+                            "issueTitle"
+                        )
+                        df.loc[i, "option_type"] = option_swaption_warrant_deriv.get(
+                            "putOrCall"
+                        )
+                        df.loc[i, "derivative_payoff"] = (
+                            option_swaption_warrant_deriv.get("writtenOrPur")
+                        )
+                        df.loc[i, "expiry_date"] = option_swaption_warrant_deriv.get(
+                            "expDt"
+                        )
+                        df.loc[i, "exercise_price"] = option_swaption_warrant_deriv.get(
+                            "exercisePrice"
+                        )
+                        df.loc[i, "exercise_currency"] = (
+                            option_swaption_warrant_deriv.get("exercisePriceCurCd")
+                        )
+                        df.loc[i, "shares_per_contract"] = (
+                            option_swaption_warrant_deriv.get("shareNo")
+                        )
+                        if option_swaption_warrant_deriv.get("delta") != "XXXX":
+                            df.loc[i, "delta"] = option_swaption_warrant_deriv.get(
+                                "delta"
+                            )
+                        df.loc[i, "unrealized_gain"] = float(
+                            option_swaption_warrant_deriv.get("unrealizedAppr")
+                        )
+                    if "futrDeriv" in derivative_info:
+                        futr_deriv = derivative_info["futrDeriv"]
+                        df.loc[i, "derivative_category"] = futr_deriv.get("@derivCat")
+                        if isinstance(futr_deriv.get("counterparties"), dict):
+                            df.loc[i, "counterparty"] = futr_deriv[
+                                "counterparties"
+                            ].get("counterpartyName")
+                            df.loc[i, "lei"] = futr_deriv["counterparties"].get(
+                                "counterpartyLei"
+                            )
+                        df.loc[i, "underlying_name"] = (
+                            futr_deriv["descRefInstrmnt"]
+                            .get("indexBasketInfo", {})
+                            .get("indexName")
+                        )
+                        df.loc[i, "other_id"] = (
+                            futr_deriv["descRefInstrmnt"]
+                            .get("indexBasketInfo", {})
+                            .get("indexIdentifier")
+                        )
+                        df.loc[i, "derivative_payoff"] = futr_deriv.get("payOffProf")
+                        df.loc[i, "expiry_date"] = futr_deriv.get(
+                            "expDt"
+                        ) or futr_deriv.get("expDate")
+                        df.loc[i, "notional_amount"] = float(
+                            futr_deriv.get("notionalAmt")
+                        )
+                        df.loc[i, "notional_currency"] = futr_deriv.get("curCd")
+                        df.loc[i, "unrealized_gain"] = float(
+                            futr_deriv.get("unrealizedAppr")
+                        )
+                    if "fwdDeriv" in derivative_info:
+                        fwd_deriv = derivative_info["fwdDeriv"]
+                        df.loc[i, "derivative_category"] = fwd_deriv.get("@derivCat")
+                        df.loc[i, "counterparty"] = fwd_deriv["counterparties"].get(
+                            "counterpartyName"
+                        )
+                        df.loc[i, "currency_sold"] = fwd_deriv.get("curSold")
+                        df.loc[i, "currency_amount_sold"] = float(
+                            fwd_deriv.get("amtCurSold")
+                        )
+                        df.loc[i, "currency_bought"] = fwd_deriv.get("curPur")
+                        df.loc[i, "currency_amount_bought"] = float(
+                            fwd_deriv.get("amtCurPur")
+                        )
+                        df.loc[i, "expiry_date"] = fwd_deriv.get("settlementDt")
+                        df.loc[i, "unrealized_gain"] = float(
+                            fwd_deriv.get("unrealizedAppr")
+                        )
+                    if "swapDeriv" in df.iloc[i]["derivativeInfo"]:
+                        swap_deriv = df.iloc[i]["derivativeInfo"]["swapDeriv"]
+                        df.loc[i, "derivative_category"] = swap_deriv.get("@derivCat")
+                        df.loc[i, "counterparty"] = swap_deriv["counterparties"].get(
+                            "counterpartyName"
+                        )
+                        df.loc[i, "lei"] = swap_deriv["counterparties"].get(
+                            "counterpartyLei"
+                        )
+                        if "otherRefInst" in swap_deriv["descRefInstrmnt"]:
+                            df.loc[i, "underlying_name"] = swap_deriv[
+                                "descRefInstrmnt"
+                            ]["otherRefInst"].get("issueTitle")
+                        if "indexBasketInfo" in swap_deriv["descRefInstrmnt"]:
+                            df.loc[i, "underlying_name"] = swap_deriv[
+                                "descRefInstrmnt"
+                            ]["indexBasketInfo"].get("indexName")
+                            df.loc[i, "other_id"] = swap_deriv["descRefInstrmnt"][
+                                "indexBasketInfo"
+                            ].get("indexIdentifier")
+                        df.loc[i, "swap_description"] = (
+                            swap_deriv["otherRecDesc"].get("#text")
+                            if "otherRecDesc" in swap_deriv["descRefInstrmnt"]
+                            else None
+                        )
+                        if "floatingRecDesc" in swap_deriv:
+                            df.loc[i, "rate_type_rec"] = swap_deriv[
+                                "floatingRecDesc"
+                            ].get("@fixedOrFloating")
+                            df.loc[i, "floating_rate_index_rec"] = swap_deriv[
+                                "floatingRecDesc"
+                            ].get("@floatingRtIndex")
+                            df.loc[i, "floating_rate_spread_rec"] = float(
+                                swap_deriv["floatingRecDesc"].get("@floatingRtSpread")
+                            )
+                            df.loc[i, "payment_amount_rec"] = float(
+                                swap_deriv["floatingRecDesc"].get("@pmntAmt")
+                            )
+                            df.loc[i, "rate_tenor_rec"] = swap_deriv["floatingRecDesc"][
+                                "rtResetTenors"
+                            ]["rtResetTenor"].get("@rateTenor")
+                            df.loc[i, "rate_tenor_unit_rec"] = swap_deriv[
+                                "floatingRecDesc"
+                            ]["rtResetTenors"]["rtResetTenor"].get("@rateTenorUnit")
+                            df.loc[i, "reset_date_rec"] = swap_deriv["floatingRecDesc"][
+                                "rtResetTenors"
+                            ]["rtResetTenor"].get("@resetDt")
+                            df.loc[i, "reset_date_unit_rec"] = swap_deriv[
+                                "floatingRecDesc"
+                            ]["rtResetTenors"]["rtResetTenor"].get("@resetDtUnit")
+                        if "floatingPmntDesc" in swap_deriv:
+                            df.loc[i, "rate_type_pmnt"] = swap_deriv[
+                                "floatingPmntDesc"
+                            ].get("@fixedOrFloating")
+                            df.loc[i, "floating_rate_index_pmnt"] = swap_deriv[
+                                "floatingPmntDesc"
+                            ].get("@floatingRtIndex")
+                            df.loc[i, "floating_rate_spread_pmnt"] = float(
+                                swap_deriv["floatingPmntDesc"].get("@floatingRtSpread")
+                            )
+                            df.loc[i, "payment_amount_pmnt"] = float(
+                                swap_deriv["floatingPmntDesc"].get("@pmntAmt")
+                            )
+                            df.loc[i, "rate_tenor_pmnt"] = swap_deriv[
+                                "floatingPmntDesc"
+                            ]["rtResetTenors"]["rtResetTenor"].get("@rateTenor")
+                            df.loc[i, "rate_tenor_unit_pmnt"] = swap_deriv[
+                                "floatingPmntDesc"
+                            ]["rtResetTenors"]["rtResetTenor"].get("@rateTenorUnit")
+                            df.loc[i, "reset_date_pmnt"] = swap_deriv[
+                                "floatingPmntDesc"
+                            ]["rtResetTenors"]["rtResetTenor"].get("@resetDt")
+                            df.loc[i, "reset_date_unit_rec"] = swap_deriv[
+                                "floatingPmntDesc"
+                            ]["rtResetTenors"]["rtResetTenor"].get("@resetDtUnit")
+                        df.loc[i, "expiry_date"] = swap_deriv.get("terminationDt")
+                        df.loc[i, "upfront_payment"] = float(
+                            swap_deriv.get("upfrontPmnt")
+                        )
+                        df.loc[i, "payment_currency"] = swap_deriv.get("pmntCurCd")
+                        df.loc[i, "upfront_receive"] = float(
+                            swap_deriv.get("upfrontRcpt")
+                        )
+                        df.loc[i, "receive_currency"] = swap_deriv.get("rcptCurCd")
+                        df.loc[i, "notional_amount"] = float(
+                            swap_deriv.get("notionalAmt")
+                        )
+                        df.loc[i, "notional_currency"] = swap_deriv.get("curCd")
+                        df.loc[i, "unrealized_gain"] = float(
+                            swap_deriv.get("unrealizedAppr")
+                        )
+                if "repurchaseAgrmt" in df.iloc[i] and isinstance(
+                    df.iloc[i]["repurchaseAgrmt"], dict
+                ):
+                    repurchase_agrmt = df.iloc[i]["repurchaseAgrmt"]
+                    df.loc[i, "repo_type"] = repurchase_agrmt.get("transCat")
+                    if "clearedCentCparty" in repurchase_agrmt and isinstance(
+                        repurchase_agrmt["clearedCentCparty"], dict
+                    ):
+                        cleared_cent_cparty = repurchase_agrmt["clearedCentCparty"]
+                        df.loc[i, "is_cleared"] = cleared_cent_cparty.get("@isCleared")
+                        df.loc[i, "counterparty"] = cleared_cent_cparty.get(
+                            "@centralCounterparty"
+                        )
+                    df.loc[i, "is_tri_party"] = repurchase_agrmt.get("isTriParty")
+                    df.loc[i, "annualized_return"] = repurchase_agrmt.get(
+                        "repurchaseRt"
+                    )
+                    df.loc[i, "maturity_date"] = repurchase_agrmt.get("maturityDt")
+                    if (
+                        "repurchaseCollaterals" in repurchase_agrmt
+                        and "repurchaseCollateral"
+                        in repurchase_agrmt["repurchaseCollaterals"]
+                    ):
+                        repurchase_collateral = repurchase_agrmt[
+                            "repurchaseCollaterals"
+                        ]["repurchaseCollateral"]
+                        df.loc[i, "principal_amount"] = float(
+                            repurchase_collateral.get("principalAmt")
+                        )
+                        df.loc[i, "principal_currency"] = repurchase_collateral.get(
+                            "@principalCd"
+                        )
+                        df.loc[i, "collateral_amount"] = float(
+                            repurchase_collateral.get("collateralVal")
+                        )
+                        df.loc[i, "collateral_currency"] = repurchase_collateral.get(
+                            "@collateralCd"
+                        )
+                        df.loc[i, "collateral_type"] = repurchase_collateral.get(
+                            "@invstCat"
+                        )
+                if "currencyConditional" in df.iloc[i] and isinstance(
+                    df.iloc[i]["currencyConditional"], dict
+                ):
+                    currency_conditional = df.iloc[i]["currencyConditional"]
+                    df.loc[i, "exchange_currency"] = currency_conditional.get("@curCd")
+                    df.loc[i, "exchange_rate"] = currency_conditional.get("@exchangeRt")
+            # Drop the flattened columns
+            to_drop = [
+                "identifiers",
+                "securityLending",
+                "issuerConditional",
+                "assetConditional",
+                "debtSec",
+                "currencyConditional",
+                "derivativeInfo",
+                "repurchaseAgrmt",
+            ]
+            for col in to_drop:
+                if col in df.columns:
+                    df = df.drop(col, axis=1)
+            df["pctVal"] = df["pctVal"].astype(float)
+            results = (
+                df.fillna("N/A")
+                .replace("N/A", None)
+                .sort_values(by="pctVal", ascending=False)
+                .to_dict(orient="records")
+            )
+        # Extract additional information from the form that doesn't belong in the holdings table.
+        metadata = {}
+        month_1: str = ""
+        month_2: str = ""
+        month_3: str = ""
+        try:
+            gen_info = response["edgarSubmission"]["formData"].get("genInfo", {})  # type: ignore
+            if gen_info:
+                metadata["fund_name"] = gen_info.get("seriesName")
+                metadata["series_id"] = gen_info.get("seriesId")
+                metadata["lei"] = gen_info.get("seriesLei")
+                metadata["period_ending"] = gen_info.get("repPdDate")
+                metadata["fiscal_year_end"] = gen_info.get("repPdEnd")
+                current_month = to_datetime(metadata["period_ending"])
+                month_1 = (current_month - MonthEnd(2)).date().strftime("%Y-%m-%d")
+                month_2 = (current_month - MonthEnd(1)).date().strftime("%Y-%m-%d")
+                month_3 = current_month.strftime("%Y-%m-%d")
+            fund_info = response["edgarSubmission"]["formData"].get("fundInfo", {})  # type: ignore
+            if fund_info:
+                metadata["total_assets"] = float(fund_info.pop("totAssets", None))
+                metadata["total_liabilities"] = float(fund_info.pop("totLiabs", None))
+                metadata["net_assets"] = float(fund_info.pop("netAssets", None))
+                metadata["cash_and_equivalents"] = fund_info.pop(
+                    "cshNotRptdInCorD", None
+                )
+                return_info = fund_info["returnInfo"]["monthlyTotReturns"].get(
+                    "monthlyTotReturn", {}
+                )
+                returns = {
+                    month_1: float(return_info.get("@rtn1")) / 100,
+                    month_2: float(return_info.get("@rtn2")) / 100,
+                    month_3: float(return_info.get("@rtn3")) / 100,
+                }
+                metadata["returns"] = returns
+                flow = {
+                    month_1: {
+                        "creation": float(fund_info["mon1Flow"].get("@sales", None)),
+                        "redemption": float(
+                            fund_info["mon1Flow"].get("@redemption", None)
+                        ),
+                    },
+                    month_2: {
+                        "creation": float(fund_info["mon2Flow"].get("@sales", None)),
+                        "redemption": float(
+                            fund_info["mon2Flow"].get("@redemption", None)
+                        ),
+                    },
+                    month_3: {
+                        "creation": float(fund_info["mon3Flow"].get("@sales")),
+                        "redemption": float(
+                            fund_info["mon3Flow"].get("@redemption", None)
+                        ),
+                    },
+                }
+                metadata["flow"] = flow
+                gains = {
+                    month_1: {
+                        "realized": float(
+                            fund_info["returnInfo"]["othMon1"].get(
+                                "@netRealizedGain", None
+                            )
+                        ),
+                        "unrealized": float(
+                            fund_info["returnInfo"]["othMon1"].get(
+                                "@netUnrealizedAppr", None
+                            )
+                        ),
+                    },
+                    month_2: {
+                        "realized": float(
+                            fund_info["returnInfo"]["othMon2"].get(
+                                "@netRealizedGain", None
+                            )
+                        ),
+                        "unrealized": float(
+                            fund_info["returnInfo"]["othMon2"].get(
+                                "@netUnrealizedAppr", None
+                            )
+                        ),
+                    },
+                    month_3: {
+                        "realized": float(
+                            fund_info["returnInfo"]["othMon3"].get(
+                                "@netRealizedGain", None
+                            )
+                        ),
+                        "unrealized": float(
+                            fund_info["returnInfo"]["othMon3"].get(
+                                "@netUnrealizedAppr", None
+                            )
+                        ),
+                    },
+                }
+                metadata["gains"] = gains
+                _borrowers = fund_info["borrowers"].get("borrower", [])
+                if _borrowers:
+                    borrowers = [
+                        {
+                            "name": d["@name"],
+                            "lei": d["@lei"],
+                            "value": float(d["@aggrVal"]),
+                        }
+                        for d in _borrowers
+                    ]
+                    metadata["borrowers"] = borrowers
+        except Exception as e:  # pylint: disable=W0718
+            warn(f"Error extracting metadata: {e}")
+        return AnnotatedResult(
+            result=[SecEtfHoldingsData.model_validate(d) for d in results],
+            metadata=metadata,
+        )

openbb_platform/providers/sec/openbb_sec/models/form_13FHR.py ADDED Viewed

	@@ -0,0 +1,107 @@

+"""SEC Form 13F-HR Model."""
+# pylint: disable =unused-argument
+from typing import Any, Optional
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.form_13FHR import (
+    Form13FHRData,
+    Form13FHRQueryParams,
+)
+from pydantic import Field
+class SecForm13FHRQueryParams(Form13FHRQueryParams):
+    """SEC Form 13F-HR Query Params.
+    Source: https://www.sec.gov/Archives/edgar/data/
+    """
+class SecForm13FHRData(Form13FHRData):
+    """SEC Form 13F-HR Data."""
+    __alias_dict__ = {
+        "issuer": "nameOfIssuer",
+        "asset_class": "titleOfClass",
+        "option_type": "putCall",
+    }
+    weight: float = Field(
+        description="The weight of the security relative to the market value of all securities in the filing"
+        + " , as a normalized percent.",
+        json_schema_extra={"x-unit_measurement": "percent", "x-frontend_multiply": 100},
+    )
+class SecForm13FHRFetcher(Fetcher[SecForm13FHRQueryParams, list[SecForm13FHRData]]):
+    """SEC Form 13F-HR Fetcher."""
+    @staticmethod
+    def transform_query(params: dict[str, Any]) -> SecForm13FHRQueryParams:
+        """Transform the query."""
+        return SecForm13FHRQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecForm13FHRQueryParams,
+        credentials: Optional[dict[str, str]],
+        **kwargs: Any,
+    ) -> list[dict]:
+        """Return the raw data from the SEC endpoint."""
+        # pylint: disable=import-outside-toplevel
+        import asyncio  # noqa
+        from openbb_core.app.model.abstract.error import OpenBBError
+        from openbb_core.provider.utils.errors import EmptyDataError
+        from openbb_sec.utils import parse_13f
+        symbol = query.symbol
+        urls: list = []
+        cik = symbol.isnumeric()
+        try:
+            filings = (
+                await parse_13f.get_13f_candidates(symbol=symbol)
+                if cik is False
+                else await parse_13f.get_13f_candidates(cik=symbol)
+            )
+            if query.limit and query.date is None:
+                urls = filings.iloc[: query.limit].to_list()
+            if query.date is not None:
+                date = parse_13f.date_to_quarter_end(query.date.strftime("%Y-%m-%d"))
+                filings.index = filings.index.astype(str)
+                urls = [filings.loc[date]]
+            results: list = []
+            async def get_filing(url):
+                """Get a single 13F-HR filing and parse it."""
+                data = await parse_13f.parse_13f_hr(url)
+                if len(data) > 0:
+                    results.extend(data)
+            await asyncio.gather(*[get_filing(url) for url in urls])
+            if not results:
+                raise EmptyDataError("No data was returned with the given parameters.")
+            return results
+        except OpenBBError as e:
+            raise e from e
+    @staticmethod
+    def transform_data(
+        query: SecForm13FHRQueryParams,
+        data: list[dict],
+        **kwargs: Any,
+    ) -> list[SecForm13FHRData]:
+        """Transform the data."""
+        return [
+            SecForm13FHRData.model_validate(d)
+            for d in sorted(
+                data,
+                key=lambda d: [d["period_ending"], d["weight"]],
+                reverse=True,
+            )
+        ]

openbb_platform/providers/sec/openbb_sec/models/htm_file.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""SEC HTM/HTML File Model."""
+# pylint: disable=unused-argument
+from typing import Any, Optional
+from openbb_core.app.model.abstract.error import OpenBBError
+from openbb_core.provider.abstract.data import Data
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.abstract.query_params import QueryParams
+from pydantic import Field
+class SecHtmFileQueryParams(QueryParams):
+    """SEC HTM File Query Parameters."""
+    url: str = Field(
+        default="",
+        description="URL for the SEC filing.",
+    )
+    use_cache: bool = Field(
+        default=True,
+        description="Cache the file for use later. Default is True.",
+    )
+class SecHtmFileData(Data):
+    """SEC HTM File Data."""
+    url: str = Field(
+        description="URL of the downloaded file.",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+    content: str = Field(description="Raw content of the HTM/HTML file.")
+class SecHtmFileFetcher(Fetcher[SecHtmFileQueryParams, SecHtmFileData]):
+    """SEC HTM File Fetcher."""
+    @staticmethod
+    def transform_query(params: dict[str, Any]) -> SecHtmFileQueryParams:
+        """Transform the query."""
+        if not params.get("url"):
+            raise OpenBBError(ValueError("Please enter a URL."))
+        url = params.get("url", "")
+        if (
+            not url.startswith("http")
+            or "sec.gov" not in url
+            or (not url.endswith(".htm") and not url.endswith(".html"))
+        ):
+            raise OpenBBError(
+                ValueError(
+                    "Invalid URL. Please a SEC URL that directs specifically to a HTM or HTML file."
+                )
+            )
+        return SecHtmFileQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecHtmFileQueryParams,
+        credentials: Optional[dict[str, str]],
+        **kwargs: Any,
+    ) -> dict:
+        """Return the raw data from the SEC endpoint."""
+        # pylint: disable=import-outside-toplevel
+        from openbb_sec.models.sec_filing import SecBaseFiling
+        return {
+            "url": query.url,
+            "content": SecBaseFiling.download_file(query.url, False, query.use_cache),
+        }
+    @staticmethod
+    def transform_data(
+        query: SecHtmFileQueryParams, data: dict, **kwargs: Any
+    ) -> SecHtmFileData:
+        """Transform the data to the standard format."""
+        # pylint: disable=import-outside-toplevel
+        from bs4 import BeautifulSoup  # noqa
+        if not data or not data.get("content"):
+            raise OpenBBError("Failed to extract HTM file data.")
+        content = data.pop("content", "")
+        soup = BeautifulSoup(content, "html.parser").find("html")
+        # Remove style elements that add background color to table rows
+        for row in soup.find_all("tr"):
+            if "background-color" in row.get("style", ""):
+                del row["style"]
+            for attr in ["class", "bgcolor"]:
+                if attr in row.attrs:
+                    del row[attr]
+        return SecHtmFileData(content=str(soup), url=data["url"])

openbb_platform/providers/sec/openbb_sec/models/insider_trading.py ADDED Viewed

	@@ -0,0 +1,221 @@

+"""SEC Insider Trading Model."""
+# pylint: disable =unused-argument
+from datetime import date as dateType
+from typing import Any, Optional, Union
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.insider_trading import (
+    InsiderTradingData,
+    InsiderTradingQueryParams,
+)
+from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS
+from pydantic import Field, field_validator
+TRANSACTION_CODE_MAP = {
+    "A": "Grant, award or other acquisition pursuant to Rule 16b-3(d)",
+    "C": "Conversion of derivative security",
+    "D": "Disposition to the issuer of issuer equity securities pursuant to Rule 16b-3(e)",
+    "E": "Expiration of short derivative position",
+    "F": (
+        "Payment of exercise price or tax liability by delivering or withholding securities incident to the receipt, "
+        "exercise or vesting of a security issued in accordance with Rule 16b-3"
+    ),
+    "G": "Bona fide gift",
+    "H": "Expiration (or cancellation) of long derivative position with value received",
+    "I": (
+        "Discretionary transaction in accordance with Rule 16b-3(f) "
+        "resulting in acquisition or disposition of issuer securities"
+    ),
+    "J": "Other acquisition or disposition (describe transaction)",
+    "L": "Small acquisition under Rule 16a-6",
+    "M": "Exercise or conversion of derivative security exempted pursuant to Rule 16b-3",
+    "O": "Exercise of out-of-the-money derivative security",
+    "P": "Open market or private purchase of non-derivative or derivative security",
+    "S": "Open market or private sale of non-derivative or derivative security",
+    "U": "Disposition pursuant to a tender of shares in a change of control transaction",
+    "W": "Acquisition or disposition by will or the laws of descent and distribution",
+    "X": "Exercise of in-the-money or at-the-money derivative security",
+    "Z": "Deposit into or withdrawal from voting trust",
+}
+TIMELINESS_MAP = {
+    "E": "Early",
+    "L": "Late",
+    "Empty": "On-time",
+}
+class SecInsiderTradingQueryParams(InsiderTradingQueryParams):
+    """SEC Insider Trading Query Params.
+    Source: https://www.sec.gov/Archives/edgar/data/
+    """
+    start_date: Optional[dateType] = Field(
+        default=None,
+        description=QUERY_DESCRIPTIONS.get("start_date", "")
+        + " Wide date ranges can result in long download times."
+        + " Recommended to use a smaller date range, default is 120 days ago.",
+    )
+    end_date: Optional[dateType] = Field(
+        default=None,
+        description=QUERY_DESCRIPTIONS.get("end_date", "") + " Default is today.",
+    )
+    use_cache: bool = Field(
+        default=True,
+        description="Persist the data locally for future use. Default is True."
+        + " Each form submission is an individual download and the SEC limits the number of concurrent downloads."
+        + " This prevents the same file from being downloaded multiple times.",
+    )
+class SecInsiderTradingData(InsiderTradingData):
+    """SEC Insider Trading Data."""
+    company_name: Optional[str] = Field(
+        default=None, description="Name of the company."
+    )
+    form: Optional[Union[str, int]] = Field(default=None, description="Form type.")
+    director: Optional[bool] = Field(
+        default=None, description="Whether the owner is a director."
+    )
+    officer: Optional[bool] = Field(
+        default=None, description="Whether the owner is an officer."
+    )
+    ten_percent_owner: Optional[bool] = Field(
+        default=None, description="Whether the owner is a 10% owner."
+    )
+    other: Optional[bool] = Field(
+        default=None, description="Whether the owner is classified as other."
+    )
+    other_text: Optional[str] = Field(
+        default=None, description="Text for other classification."
+    )
+    transaction_timeliness: Optional[str] = Field(
+        default=None, description="Timeliness of the transaction."
+    )
+    ownership_type: Optional[str] = Field(
+        default=None, description="Type of ownership, direct or indirect."
+    )
+    nature_of_ownership: Optional[str] = Field(
+        default=None, description="Nature of the ownership."
+    )
+    exercise_date: Optional[dateType] = Field(
+        default=None, description="Date of exercise."
+    )
+    expiration_date: Optional[dateType] = Field(
+        default=None, description="Date of expiration for the derivative."
+    )
+    deemed_execution_date: Optional[dateType] = Field(
+        default=None, description="Deemed execution date."
+    )
+    underlying_security_title: Optional[str] = Field(
+        default=None, description="Title of the underlying security."
+    )
+    underlying_security_shares: Optional[float] = Field(
+        default=None,
+        description="Number of underlying shares associated with the derivative.",
+    )
+    underlying_security_value: Optional[float] = Field(
+        default=None, description="Value of the underlying security."
+    )
+    conversion_exercise_price: Optional[float] = Field(
+        default=None, description="Price of conversion or exercise of the securities."
+    )
+    transaction_value: Optional[float] = Field(
+        default=None, description="Total value of the transaction."
+    )
+    value_owned: Optional[float] = Field(
+        default=None, description="Value of the securities owned after the transaction."
+    )
+    footnote: Optional[str] = Field(
+        default=None, description="Footnote for the transaction."
+    )
+    @field_validator("symbol", mode="before", check_fields=False)
+    @classmethod
+    def _to_upper(cls, v):
+        """Convert symbol to uppercase."""
+        return v.upper() if v else None
+    @field_validator("ownership_type", mode="before", check_fields=False)
+    @classmethod
+    def _map_ownership_type(cls, v):
+        """Map ownership type to description."""
+        if not v:
+            return None
+        return "Direct" if v.strip() == "D" else "Indirect" if v.strip() == "I" else v
+    @field_validator("acquisition_or_disposition", mode="before", check_fields=False)
+    @classmethod
+    def _map_acquisition_disposition(cls, v):
+        """Map acquisition or disposition to description."""
+        if not v:
+            return None
+        return (
+            "Acquisition"
+            if v.strip() == "A"
+            else "Disposition" if v.strip() == "D" else v
+        )
+    @field_validator("transaction_type", mode="before", check_fields=False)
+    @classmethod
+    def _map_transaction_code(cls, v):
+        """Map transaction code to description."""
+        return TRANSACTION_CODE_MAP.get(v, v) if v else None
+    @field_validator("transaction_timeliness", mode="before", check_fields=False)
+    @classmethod
+    def _map_timeliness(cls, v):
+        """Map timeliness code to description."""
+        return TIMELINESS_MAP.get(v, v) if v else None
+class SecInsiderTradingFetcher(
+    Fetcher[SecInsiderTradingQueryParams, list[SecInsiderTradingData]]
+):
+    """SEC Insider Trading Fetcher."""
+    @staticmethod
+    def transform_query(params: dict[str, Any]) -> SecInsiderTradingQueryParams:
+        """Transform query parameters."""
+        # pylint: disable=import-outside-toplevel
+        from datetime import datetime, timedelta
+        start_date = params.get("start_date")
+        end_date = params.get("end_date")
+        if not start_date and not end_date:
+            params["start_date"] = (datetime.now() - timedelta(days=120)).date()
+            params["end_date"] = datetime.now().date()
+        return SecInsiderTradingQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecInsiderTradingQueryParams,
+        credentials: Optional[dict[str, str]],
+        **kwargs: Any,
+    ) -> list[dict]:
+        """Extract the data from the SEC archives."""
+        # pylint: disable=import-outside-toplevel
+        from openbb_sec.utils.form4 import get_form_4
+        return await get_form_4(
+            query.symbol,
+            query.start_date,
+            query.end_date,
+            query.limit,
+            query.use_cache,
+        )
+    @staticmethod
+    def transform_data(
+        query: SecInsiderTradingQueryParams,
+        data: list[dict],
+        **kwargs: Any,
+    ) -> list[SecInsiderTradingData]:
+        """Transform the data."""
+        return [SecInsiderTradingData.model_validate(d) for d in data]

openbb_platform/providers/sec/openbb_sec/models/institutions_search.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""SEC Institutions Search Model."""
+# pylint: disable=unused-argument
+from typing import Any, Dict, List, Optional, Union
+from openbb_core.provider.abstract.data import Data
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.cot_search import CotSearchQueryParams
+from pydantic import Field
+class SecInstitutionsSearchQueryParams(CotSearchQueryParams):
+    """SEC Institutions Search Query.
+    Source: https://sec.gov/
+    """
+    use_cache: Optional[bool] = Field(
+        default=True,
+        description="Whether or not to use cache.",
+    )
+class SecInstitutionsSearchData(Data):
+    """SEC Institutions Search Data."""
+    __alias_dict__ = {
+        "name": "Institution",
+        "cik": "CIK Number",
+    }
+    name: Optional[str] = Field(
+        default=None,
+        description="The name of the institution.",
+    )
+    cik: Optional[Union[str, int]] = Field(
+        default=None,
+        description="Central Index Key (CIK)",
+    )
+class SecInstitutionsSearchFetcher(
+    Fetcher[
+        SecInstitutionsSearchQueryParams,
+        List[SecInstitutionsSearchData],
+    ]
+):
+    """SEC Institutions Search Fetcher."""
+    @staticmethod
+    def transform_query(params: Dict[str, Any]) -> SecInstitutionsSearchQueryParams:
+        """Transform the query."""
+        return SecInstitutionsSearchQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecInstitutionsSearchQueryParams,
+        credentials: Optional[Dict[str, str]],
+        **kwargs: Any,
+    ) -> List[Dict]:
+        """Return the raw data from the SEC endpoint."""
+        # pylint: disable=import-outside-toplevel
+        from openbb_sec.utils.helpers import get_all_ciks
+        institutions = await get_all_ciks(use_cache=query.use_cache)
+        hp = institutions["Institution"].str.contains(query.query, case=False)
+        return institutions[hp].astype(str).to_dict("records")
+    @staticmethod
+    def transform_data(
+        query: SecInstitutionsSearchQueryParams, data: List[Dict], **kwargs: Any
+    ) -> List[SecInstitutionsSearchData]:
+        """Transform the data to the standard format."""
+        return [SecInstitutionsSearchData.model_validate(d) for d in data]

openbb_platform/providers/sec/openbb_sec/models/latest_financial_reports.py ADDED Viewed

	@@ -0,0 +1,261 @@

+"""RSS Latest Financials Model."""
+# pylint: disable=unused-argument
+from datetime import date as dateType
+from typing import Any, Optional
+from openbb_core.app.model.abstract.error import OpenBBError
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.latest_financial_reports import (
+    LatestFinancialReportsData,
+    LatestFinancialReportsQueryParams,
+)
+from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS
+from openbb_core.provider.utils.errors import EmptyDataError
+from pydantic import Field, field_validator
+report_type_choices = [
+    "1-K",
+    "1-SA",
+    "1-U",
+    "10-D",
+    "10-K",
+    "10-KT",
+    "10-Q",
+    "10-QT",
+    "20-F",
+    "40-F",
+    "6-K",
+    "8-K",
+]
+class SecLatestFinancialReportsQueryParams(LatestFinancialReportsQueryParams):
+    """SEC Latest Financial Reports Query.
+    source: https://www.sec.gov/edgar/search/
+    """
+    __json_schema_extra__ = {
+        "report_type": {"multiple_items_allowed": True, "choices": report_type_choices}
+    }
+    date: Optional[dateType] = Field(
+        default=None,
+        description=QUERY_DESCRIPTIONS.get("date", "") + " Defaults to today.",
+    )
+    report_type: Optional[str] = Field(
+        default=None,
+        description="Return only a specific form type. Default is all quarterly, annual, and current reports."
+        + f" Choices: {', '.join(report_type_choices)}.",
+    )
+    @field_validator("report_type", mode="before", check_fields=False)
+    @classmethod
+    def validate_report_type(cls, v):
+        """Validate the report type."""
+        if v is None:
+            return v
+        rpts = v.split(",")
+        for rpt in rpts:
+            if rpt not in report_type_choices:
+                raise ValueError(
+                    f"Invalid report type: {rpt}. Choices: {', '.join(report_type_choices)}"
+                )
+        return v
+class SecLatestFinancialReportsData(LatestFinancialReportsData):
+    """SEC Latest Financial Reports Data."""
+    items: Optional[str] = Field(
+        default=None, description="Item codes associated with the filing."
+    )
+    index_headers: str = Field(description="URL to the index headers file.")
+    complete_submission: str = Field(
+        description="URL to the complete submission text file."
+    )
+    metadata: Optional[str] = Field(
+        default=None, description="URL to the MetaLinks.json file, if available."
+    )
+    financial_report: Optional[str] = Field(
+        default=None, description="URL to the Financial_Report.xlsx file, if available."
+    )
+class SecLatestFinancialReportsFetcher(
+    Fetcher[SecLatestFinancialReportsQueryParams, list[SecLatestFinancialReportsData]]
+):
+    """SEC Latest Financial Reports Fetcher."""
+    @staticmethod
+    def transform_query(params: dict[str, Any]) -> SecLatestFinancialReportsQueryParams:
+        """Transform the query params."""
+        return SecLatestFinancialReportsQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecLatestFinancialReportsQueryParams,
+        credentials: Optional[dict[str, str]],
+        **kwargs: Any,
+    ) -> list[dict]:
+        """Extract the raw data from the SEC."""
+        # pylint: disable=import-outside-toplevel
+        from datetime import timedelta  # noqa
+        from openbb_core.provider.utils.helpers import amake_request
+        from warnings import warn
+        today = dateType.today()
+        query_date = query.date if query.date is not None else today
+        if query_date.weekday() > 4:
+            query_date -= timedelta(days=query_date.weekday() - 4)
+        date = query_date.strftime("%Y-%m-%d")
+        SEARCH_HEADERS = {
+            "User-Agent": "my real company name definitelynot@fakecompany.com",
+            "Accept-Encoding": "gzip, deflate",
+        }
+        forms = (
+            query.report_type
+            if query.report_type is not None
+            else (
+                "1-K%2C1-SA%2C1-U%2C1-Z%2C1-Z-W%2C10-D%2C10-K%2C10-KT%2C10-Q%2C10-QT%2C11-K%2C11-KT%2C15-12B%2C15-12G%2C"
+                "15-15D%2C15F-12B%2C15F-12G%2C15F-15D%2C18-K%2C20-F%2C24F-2NT%2C25%2C25-NSE%2C40-17F2%2C40-17G%2C40-F%2C"
+                "6-K%2C8-K%2C8-K12G3%2C8-K15D5%2CABS-15G%2CABS-EE%2CANNLRPT%2CDSTRBRPT%2CN-30B-2%2CN-30D%2CN-CEN%2CN-CSR%2C"
+                "N-CSRS%2CN-MFP%2CN-MFP1%2CN-MFP2%2CN-PX%2CN-Q%2CNSAR-A%2CNSAR-B%2CNSAR-U%2CNT%2010-D%2CNT%2010-K%2C"
+                "NT%2010-Q%2CNT%2011-K%2CNT%2020-F%2CQRTLYRPT%2CSD%2CSP%2015D2"
+            )
+        )
+        def get_url(date, offset):
+            return (
+                "https://efts.sec.gov/LATEST/search-index?dateRange=custom"
+                f"&category=form-cat1&startdt={date}&enddt={date}&forms={forms}&count=100&from={offset}"
+            )
+        n_hits = 0
+        results: list = []
+        url = get_url(date, n_hits)
+        try:
+            response = await amake_request(url, headers=SEARCH_HEADERS)
+        except OpenBBError as e:
+            raise OpenBBError(f"Failed to get SEC data: {e}") from e
+        if not isinstance(response, dict):
+            raise OpenBBError(
+                f"Unexpected data response. Expected dictionary, got {response.__class__.__name__}"
+            )
+        hits = response.get("hits", {})
+        total_hits = hits.get("total", {}).get("value")
+        if hits.get("hits"):
+            results.extend(hits["hits"])
+        n_hits += len(results)
+        while n_hits < total_hits:
+            offset = n_hits
+            url = get_url(date, offset)
+            try:
+                response = await amake_request(url, headers=SEARCH_HEADERS)
+            except Exception as e:
+                warn(f"Failed to get the next page of SEC data: {e}")
+                break
+            hits = response.get("hits", {})
+            new_results = hits.get("hits", [])
+            if not new_results:
+                break
+            results.extend(new_results)
+            n_hits += len(new_results)
+        if not results and query.report_type is None:
+            raise OpenBBError("No data was returned.")
+        if not results and query.report_type is not None:
+            raise EmptyDataError(
+                f"No data was returned for form type {query.report_type}."
+            )
+        return results
+    @staticmethod
+    def transform_data(
+        query: SecLatestFinancialReportsQueryParams,
+        data: list[dict],
+        **kwargs: Any,
+    ) -> list[SecLatestFinancialReportsData]:
+        """Transform the raw data."""
+        results: list[SecLatestFinancialReportsData] = []
+        def parse_entry(entry):
+            """Parse each entry in the response."""
+            source = entry.get("_source", {})
+            ciks = ",".join(source["ciks"]) if source.get("ciks") else None
+            display_nammes = source.get("display_names", [])
+            names: list = []
+            tickers: list = []
+            sics = ",".join(source.get("sics", []))
+            for name in display_nammes:
+                ticker = name.split("(")[1].split(")")[0].strip()
+                tickers.append(ticker)
+                _name = name.split("(")[0].strip()
+                names.append(_name)
+            output: dict = {}
+            output["filing_date"] = source.get("file_date")
+            output["period_ending"] = source.get("period_ending")
+            output["symbol"] = ",".join(tickers).replace(" ", "")
+            output["name"] = ",".join(names)
+            output["cik"] = ciks
+            output["sic"] = sics
+            output["report_type"] = source.get("form")
+            output["description"] = source.get("file_description")
+            _id = entry.get("_id")
+            root_url = (
+                "https://www.sec.gov/Archives/edgar/data/"
+                + source["ciks"][0]
+                + "/"
+                + source["adsh"].replace("-", "")
+                + "/"
+            )
+            output["items"] = ",".join(source["items"]) if source.get("items") else None
+            output["url"] = root_url + _id.split(":")[1]
+            output["index_headers"] = (
+                root_url + _id.split(":")[0] + "-index-headers.html"
+            )
+            output["complete_submission"] = root_url + _id.split(":")[0] + ".txt"
+            output["metadata"] = (
+                root_url + "MetaLinks.json"
+                if output["report_type"].startswith("10-")
+                or output["report_type"].startswith("8-")
+                else None
+            )
+            output["financial_report"] = (
+                root_url + "Financial_Report.xlsx"
+                if output["report_type"].startswith("10-")
+                or output["report_type"].startswith("8-")
+                or output["report_type"] in ["N-CSR", "QRTLYRPT", "ANNLRPT"]
+                else None
+            )
+            return output
+        # Some duplicates may exist in the data.
+        seen = set()
+        for entry in data:
+            parsed_entry = parse_entry(entry)
+            if parsed_entry["url"] not in seen:
+                seen.add(parsed_entry["url"])
+                results.append(
+                    SecLatestFinancialReportsData.model_validate(parsed_entry)
+                )
+        return results

openbb_platform/providers/sec/openbb_sec/models/management_discussion_analysis.py ADDED Viewed

	@@ -0,0 +1,1394 @@

+"""SEC Management & Discussion Model."""
+# pylint: disable=unused-argument,too-many-branches,too-many-locals,too-many-statements,too-many-nested-blocks,too-many-boolean-expressions,too-many-lines
+from typing import Any, Literal, Optional
+from openbb_core.app.model.abstract.error import OpenBBError
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.management_discussion_analysis import (
+    ManagementDiscussionAnalysisData,
+    ManagementDiscussionAnalysisQueryParams,
+)
+from openbb_core.provider.utils.errors import EmptyDataError
+from pydantic import Field
+class SecManagementDiscussionAnalysisQueryParams(
+    ManagementDiscussionAnalysisQueryParams
+):
+    """SEC Management & Discussion Query."""
+    strategy: Literal["inscriptis", "trafilatura"] = Field(
+        default="trafilatura",
+        description="The strategy to use for extracting the text. Default is 'trafilatura'.",
+    )
+    wrap_length: int = Field(
+        default=120,
+        description="The length to wrap the extracted text, excluding tables. Default is 120.",
+    )
+    include_tables: bool = Field(
+        default=False,
+        description="Return tables formatted as markdown in the text. Default is False."
+        + " Tables may reveal 'missing' content,"
+        + " but will likely need some level of manual cleaning, post-request, to display properly."
+        + " In some cases, tables may not be recoverable due to the nature of the document.",
+    )
+    use_cache: bool = Field(
+        default=True,
+        description="When True, the file will be cached for use later. Default is True.",
+    )
+    raw_html: bool = Field(
+        default=False,
+        description="When True, the raw HTML content of the entire filing will be returned. Default is False."
+        + " Use this option to parse the document manually.",
+    )
+class SecManagementDiscussionAnalysisData(ManagementDiscussionAnalysisData):
+    """SEC Management & Discussion Data."""
+    url: str = Field(
+        description="The URL of the filing from which the data was extracted."
+    )
+class SecManagementDiscussionAnalysisFetcher(
+    Fetcher[
+        SecManagementDiscussionAnalysisQueryParams, SecManagementDiscussionAnalysisData
+    ]
+):
+    """SEC Management & Discussion Fetcher."""
+    @staticmethod
+    def transform_query(
+        params: dict[str, Any]
+    ) -> SecManagementDiscussionAnalysisQueryParams:
+        """Transform the query."""
+        return SecManagementDiscussionAnalysisQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecManagementDiscussionAnalysisQueryParams,
+        credentials: Optional[dict[str, Any]],
+        **kwargs: Any,
+    ) -> dict:
+        """Extract the data."""
+        # pylint: disable=import-outside-toplevel
+        from aiohttp_client_cache import SQLiteBackend
+        from aiohttp_client_cache.session import CachedSession
+        from openbb_core.app.utils import get_user_cache_directory
+        from openbb_core.provider.utils.helpers import amake_request
+        from openbb_sec.models.company_filings import SecCompanyFilingsFetcher
+        from openbb_sec.utils.helpers import SEC_HEADERS, sec_callback
+        from pandas import offsets, to_datetime
+        # Get the company filings to find the URL.
+        if query.symbol == "BLK" or query.symbol.isnumeric():
+            filings = await SecCompanyFilingsFetcher.fetch_data(
+                {
+                    "cik": "0001364742" if query.symbol == "BLK" else query.symbol,
+                    "form_type": "10-K,10-Q",
+                    "use_cache": query.use_cache,
+                },
+                {},
+            )
+        else:
+            filings = await SecCompanyFilingsFetcher.fetch_data(
+                {
+                    "symbol": query.symbol,
+                    "form_type": "10-K,10-Q",
+                    "use_cache": query.use_cache,
+                },
+                {},
+            )
+        if not filings:
+            raise OpenBBError(
+                f"Could not find any 10-K or 10-Q filings for the symbol. -> {query.symbol}"
+            )
+        # If no calendar year or period is provided, get the most recent filing.
+        target_filing: Any = None
+        calendar_year: Any = None
+        calendar_period: Any = None
+        if query.calendar_year is None and query.calendar_period is None:
+            target_filing = (
+                filings[0]
+                if not query.calendar_year and not query.calendar_period
+                else None
+            )
+        if not target_filing:
+            if query.calendar_period and not query.calendar_year:
+                calendar_year = to_datetime("today").year
+                calendar_period = to_datetime("today").quarter
+            elif query.calendar_year and query.calendar_period:
+                calendar_year = query.calendar_year
+                calendar_period = int(query.calendar_period[1])
+            elif query.calendar_year:
+                calendar_year = query.calendar_year
+                calendar_period = 1
+            if query.calendar_year and not query.calendar_period:
+                target_filing = [
+                    f
+                    for f in filings
+                    if f.report_type == "10-K"
+                    and f.filing_date.year == query.calendar_year
+                ]
+                if not target_filing:
+                    target_filing = [
+                        f for f in filings if f.filing_date.year == query.calendar_year
+                    ]
+                if target_filing:
+                    target_filing = target_filing[0]
+            elif calendar_year and calendar_period:
+                start = to_datetime(f"{calendar_year}Q{calendar_period}")
+                start_date = (
+                    start - offsets.QuarterBegin(1) + offsets.MonthBegin(1)
+                ).date()
+                end_date = (
+                    start_date + offsets.QuarterEnd(0) - offsets.MonthEnd(0)
+                ).date()
+                for filing in filings:
+                    if start_date < filing.filing_date < end_date:
+                        target_filing = filing
+                        break
+        if not target_filing:
+            raise OpenBBError(
+                f"Could not find a filing for the symbol -> {query.symbol}"
+            )
+        url = target_filing.report_url
+        response = ""
+        if query.use_cache is True:
+            cache_dir = f"{get_user_cache_directory()}/http/sec_financials"
+            async with CachedSession(cache=SQLiteBackend(cache_dir)) as session:
+                try:
+                    await session.delete_expired_responses()
+                    response = await amake_request(
+                        url,
+                        headers=SEC_HEADERS,
+                        response_callback=sec_callback,
+                        session=session,
+                    )  # type: ignore
+                finally:
+                    await session.close()
+        else:
+            response = await amake_request(url, headers=SEC_HEADERS, response_callback=sec_callback)  # type: ignore
+        if response and not isinstance(response, str):
+            raise OpenBBError(
+                f"Unexpected response received. Expected string and got -> {response.__class__.__name__}"
+                f" -> {response[:100]}"
+            )
+        if isinstance(response, str):
+            return {
+                "symbol": query.symbol,
+                "calendar_year": (
+                    calendar_year if calendar_year else target_filing.report_date.year
+                ),
+                "calendar_period": (
+                    calendar_period
+                    if calendar_period
+                    else to_datetime(target_filing.report_date).quarter
+                ),
+                "period_ending": target_filing.report_date,
+                "report_type": target_filing.report_type,
+                "url": url,
+                "content": response,
+            }
+    @staticmethod
+    def transform_data(  # noqa: PLR0912
+        query: SecManagementDiscussionAnalysisQueryParams,
+        data: dict,
+        **kwargs: Any,
+    ) -> SecManagementDiscussionAnalysisData:
+        """Transform the data."""
+        # pylint: disable=import-outside-toplevel
+        import re  # noqa
+        from inscriptis import get_text
+        from inscriptis.model.config import ParserConfig
+        from textwrap import wrap
+        from trafilatura import extract
+        from warnings import warn
+        if query.raw_html is True:
+            return SecManagementDiscussionAnalysisData(**data)
+        is_quarterly = data.get("report_type", "").endswith("Q")
+        is_inscriptis = query.strategy == "inscriptis"
+        def is_table_header(line: str) -> bool:
+            """Check if line is a table header"""
+            return (
+                (
+                    all(
+                        not char.isnumeric()
+                        for char in line.replace("(", "")
+                        .replace(")", "")
+                        .replace(",", "")
+                        .replace(" ", "")
+                        .replace("|", "")
+                    )
+                    and line.replace("|", "").replace("-", "").strip() != ""
+                    and "/" not in line
+                )
+                or all(
+                    len(str(word).strip()) == 4 and str(word).strip().startswith("20")
+                    for word in line.split("|")
+                    if word
+                )
+                or line.replace("|", "").replace(" ", "").endswith(":")
+                or "of dollars" in line.lower()
+            )
+        def insert_cell_dividers(line):
+            cells = line.strip().split("|")
+            new_cells: list = []
+            for cell in cells:
+                cell = cell.replace("$", "").replace(" % ", "").replace("%", "")  # noqa
+                if (
+                    "par value" in cell.lower()
+                    or "shares" in cell.lower()
+                    or (" %-" in cell and "notes" in cell.lower())
+                    or "as of" in cell.lower()
+                    or "of dollars" in cell.lower()
+                    or "year" in cell.lower()
+                    or "scenario" in cell.lower()
+                    or " to " in cell.lower()
+                    or "section" in cell.lower()
+                    or "title" in cell.lower()
+                    or "adverse currency fluctuation" in cell.lower()
+                    or "vs" in cell.lower()
+                    or cell.strip().endswith(",")
+                ):
+                    new_cells.append(cell)
+                    continue
+                if "Form 10-" in cell:
+                    continue
+                new_cell = cell.strip()
+                if new_cell.endswith(("-", "—", "–")) and any(
+                    c.isalpha() for c in new_cell
+                ):
+                    # Remove the dash and insert a divider before it
+                    new_cell = re.sub(r"[—\-–]+$", "", new_cell).strip() + " | —"
+                elif (
+                    re.search("[A-Za-z]", new_cell)
+                    and re.search("[0-9]", new_cell)
+                    and re.search(r"[A-Za-z]\s+[0-9]", new_cell)
+                    and "thru" not in new_cell.lower()
+                    and "through" not in new_cell.lower()
+                    and "outstanding" not in new_cell.lower()
+                    and "Tier" not in new_cell
+                    and "%" not in new_cell
+                    and "$" not in new_cell
+                    and "in" not in new_cell
+                    and "year" not in new_cell
+                    and "scenario" not in new_cell
+                ):
+                    # Handle cases with spaces between letters and numbers
+                    new_cell = re.sub(
+                        r"(?<=[A-Za-z])\s+(?=[0-9])(?!\([a-zA-Z])", " |", new_cell
+                    )
+                    new_cell = re.sub(
+                        r"(?<=[A-Za-z])(?=[0-9])(?!\([a-zA-Z])", "|", new_cell
+                    )
+                # Insert divider between consecutive numbers, excluding number(letter)
+                if (
+                    re.search(
+                        r"(\(\d+\.?\d*\)|\d+\.?\d*)\s+(\(\d+\.?\d*\)|\d+\.?\d*)",
+                        new_cell,
+                    )
+                    and "versus" not in new_cell.lower()
+                    and "thru" not in new_cell.lower()
+                    and "through" not in new_cell.lower()
+                    and not re.search(r"\d+\.?\d*\([a-zA-Z]\)", new_cell)
+                ):
+                    new_cell = re.sub(
+                        r"(\(\d+\)|\d+(?:\.\d+)?)\s+(?=\(|\d)(?!\([a-zA-Z])",
+                        r"\1|",
+                        new_cell,
+                    )
+                new_cells.append(new_cell)
+            return "|".join(new_cells)
+        def process_extracted_text(  # noqa: PLR0912
+            extracted_text: str, is_inscriptis: bool
+        ) -> list:
+            """Process extracted text"""
+            new_lines: list = []
+            starting_line = "Item 2."
+            annual_start = "Item 7."
+            ending_line = "Item 6"
+            annual_end = "Item 8. "
+            found_start = False
+            at_end = False
+            previous_line = ""
+            start_line_text = ""
+            line_i = 0
+            extracted_lines = extracted_text.splitlines()
+            for line in extracted_lines:
+                line_i += 1
+                if (
+                    not line.strip()
+                    or line.replace("|", "")
+                    .strip()
+                    .startswith(("Page ", "Table of Contents"))
+                    or line.strip() in ("|", start_line_text)
+                    or (len(line) < 3 and line.isnumeric())
+                    or line.strip().replace("_", "").replace("**", "") == ""
+                ):
+                    continue
+                if (
+                    "Discussion and Analysis of Financial Condition and Results of Operations is presented in".lower()
+                    in line.lower()
+                ):
+                    annual_end = "PART IV"
+                elif (
+                    "see the information under" in line.lower()
+                    and "discussion and analysis" in line.lower()
+                ) and (
+                    (is_quarterly and "10-K" not in line)
+                    or (not is_quarterly and "10-Q" not in line)
+                ):
+                    annual_end = "statements of consolidated"
+                    ending_line = "statements of conslidated"
+                if (
+                    (
+                        line.strip()
+                        .lower()
+                        .startswith(
+                            (
+                                starting_line.lower(),
+                                annual_start.lower(),
+                            )
+                        )
+                        and "management" in line.lower()
+                    )
+                    or (
+                        line.replace("|", "")
+                        .lstrip(" ")
+                        .lower()
+                        .startswith("the following is management")
+                        and "discussion and analysis of" in line.lower()
+                    )
+                    or (
+                        line.endswith(
+                            " “Management’s Discussion and Analysis of Financial Condition and Results of Operations” "
+                            "below."
+                        )
+                    )
+                    or (
+                        line.replace("*", "").strip().lower().startswith("item")
+                        and line.replace("*", "")
+                        .replace(".", "")
+                        .strip()
+                        .lower()
+                        .endswith(
+                            "discussion and analysis of financial condition and results of operations"
+                        )
+                    )
+                    # Section may be in a nested table.
+                    or (
+                        line.replace("*", "")
+                        .replace("|", "")
+                        .strip()
+                        .lower()
+                        .startswith("item")
+                        and line.replace("*", "")
+                        .replace("|", "")
+                        .replace(".", "")
+                        .rstrip(" ")
+                        .lower()
+                        .endswith(
+                            "discussion and analysis of financial condition and results of operations"
+                        )
+                        and line_i > 200
+                    )
+                    or (
+                        line.replace("*", "").replace("|", "").strip().lower()
+                        == "financial review"
+                        and line_i > 200
+                    )
+                    or (
+                        line.replace("*", "")
+                        .replace("|", "")
+                        .replace(".", "")
+                        .strip()
+                        .lower()
+                        .endswith(
+                            (
+                                "discussion and analysis",
+                                "discussion and analysis of",
+                                "analysis of financial",
+                                "of financial condition",
+                            )
+                        )
+                        and extracted_lines[line_i + 1]
+                        .replace("|", "")
+                        .replace(".", "")
+                        .strip()
+                        .lower()
+                        .endswith(("financial condition", "results of operations"))
+                    )
+                    or (
+                        line.replace("|", "").replace(".", "").strip()
+                        == "Management’s Discussion and Analysis of Financial Condition and Results of Operations"
+                    )
+                    or (
+                        line
+                        in [
+                            "2. MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS",
+                            "7. MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS",
+                            "Items 2. and 3. Management’s Discussion and Analysis of Financial Condition and "
+                            "Results of Operations; Quantitative and Qualitative Disclosures about Market Risk",
+                            "MANAGEMENT'S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS |",
+                            "Item 2. Management’s Discussion and Analysis of Financial Condition and Results of Operations.",  # noqa
+                            "Item 7. Management’s Discussion and Analysis of Financial Condition and Results of Operations.",  # noqa
+                            "MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS",
+                            "Management's Discussion and Analysis of Financial Condition and Results of Operations",
+                            "MANAGEMENT’S DISCUSSION AND ANALYSIS OF THE FINANCIAL CONDITION AND RESULTS OF",
+                            "MANAGEMENT'S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS",
+                            "Part I. Item 2. Management’s Discussion and Analysis of Financial Condition and Results of Operations",  # noqa
+                            "MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS (“MD&A”)",  # noqa
+                            "ITEM 7 – MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS (MD&A)",  # noqa
+                            "ITEM 2 – MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS (MD&A)",  # noqa
+                            "Part II. Item 7. Management’s Discussion and Analysis of Financial Condition and Results of Operations",  # noqa  # pylint: disable=line-too-long
+                            "| Item 2. | |",
+                            "| Item 7. | |",
+                        ]
+                    )
+                    or line.startswith(
+                        "Item 7—Management's Discussion and Analysis of Financial Conditions"
+                    )
+                    or (
+                        line.startswith(
+                            "MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS (MD&A)"
+                        )
+                        and line_i > 200
+                    )
+                    or (
+                        line.replace("|", "").strip()
+                        == "Management's Discussion and Analysis"
+                        and line_i > 300
+                    )
+                    or (
+                        line.replace("|", "")
+                        .strip()
+                        .startswith(
+                            "The following discussion and analysis of the financial condition and results of operations"
+                        )
+                    )
+                ):
+                    line = line.replace("|", "").replace("*", "")  # noqa
+                    if line.strip(" ")[-1].isnumeric():
+                        continue
+                    if (
+                        extracted_lines[line_i + 1]
+                        .replace("*", "")
+                        .replace(".", "")
+                        .strip()
+                        .lower()
+                        .endswith(("financial condition", "results of operations"))
+                    ):
+                        line = "Management’s Discussion and Analysis of Financial Condition and Results of Operations"  # noqa
+                        _ = extracted_lines.pop(line_i + 1)
+                    found_start = True
+                    at_end = False
+                    start_line_text = line
+                    new_lines.append(
+                        "# **MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS (MD&A)"
+                        "**\n\n"
+                    )
+                    continue
+                if (
+                    found_start
+                    and (
+                        line.replace("|", "")
+                        .strip()
+                        .lower()
+                        .startswith(ending_line.lower())
+                        and is_quarterly
+                    )
+                    or (
+                        annual_end.lower() in line.lower()
+                        and not is_quarterly
+                        and len(new_lines) > 20
+                    )
+                    or line.replace("|", "").strip().lower().startswith("signatures")
+                    or line.strip().startswith(
+                        "Item 8—Financial Statements and Supplementary Data"
+                    )
+                    or line.strip().startswith("MANAGEMENT AND AUDITOR’S REPORTS")
+                    or line == "EXHIBIT INDEX"
+                ):
+                    at_end = True
+                    line = line.replace("|", " ").replace("  ", " ")  # noqa
+                if found_start and not at_end:
+                    if (
+                        line[0].isdigit()
+                        or line[0] == "•"
+                        or line[0] == "●"
+                        and line[1] not in [".", " ", "\u0020"]
+                        and line[1].isalpha()
+                    ):
+                        word = line.split(" ")[0]
+                        if not word.replace(" ", "").isnumeric():
+                            line = line[0] + " " + line[1:]  # noqa
+                    if "▪" in line:
+                        line = line.replace("▪", "").replace("|", "").strip()  # noqa
+                        line = "- " + line  # noqa
+                    if "●" in line or "•" in line or "◦" in line:
+                        line = (  # noqa
+                            line.replace("|", "")
+                            .replace("●", "-")
+                            .replace("•", "-")
+                            .replace("◦", "-")
+                        )
+                    if (
+                        line.replace("|", "").strip().startswith("-")
+                        and len(line.strip()) > 1
+                        and line.strip()[1] != " "
+                    ):
+                        line = "- " + line[1:]  # noqa
+                    if "the following table" in line.lower():
+                        line = (  # noqa
+                            line.replace("|", "").replace("  ", " ").strip() + "\n"
+                        )
+                    if (
+                        line.replace("|", "").replace(" ", "").strip().startswith("(")
+                        and (
+                            line.replace("|", "").replace(" ", "").strip().endswith(")")
+                        )
+                        and line.count("|") < 3
+                    ):
+                        line = line.replace("|", "").replace(" ", "").strip()  # noqa
+                        next_line = (
+                            extracted_lines[line_i + 1]
+                            if line_i + 1 < len(extracted_lines)
+                            else ""
+                        )
+                        if not next_line.replace("|", "").replace(" ", "").strip():
+                            next_line = (
+                                extracted_lines[line_i + 2]
+                                if line_i + 2 < len(extracted_lines)
+                                else ""
+                            )
+                            if line_i + 1 < len(extracted_lines):
+                                _ = extracted_lines.pop(line_i + 1)
+                        if (
+                            next_line.replace("|", "")
+                            .replace(" ", "")
+                            .strip()
+                            .endswith((",", ";", "."))
+                        ):
+                            line = (  # noqa
+                                line.replace("|", "").replace(" ", "").strip()
+                                + " "
+                                + next_line.replace("|", "").strip()
+                            )
+                            _ = extracted_lines.pop(line_i + 1)
+                    if "|" in line:
+                        first_word = line.split("|")[0].strip()
+                        if first_word.isupper() or "item" in first_word.lower():
+                            line = (  # noqa
+                                line.replace("|", " ").replace("  ", " ").strip()
+                            )
+                        if (
+                            line.endswith("|")
+                            and not line.startswith("|")
+                            and len(line) > 1
+                        ):
+                            line = (  # noqa
+                                "| " + line
+                                if len(line.split("|")) > 1
+                                else line.replace("|", "").strip()
+                            )
+                        elif (
+                            line.startswith("|")
+                            and not line.endswith("|")
+                            and len(line) > 1
+                            and len(line.split("|"))
+                        ):
+                            line = (  # noqa
+                                line + " |"
+                                if len(line.split("|")) > 1
+                                else line.replace("|", "").strip()
+                            )
+                        if query.include_tables is False and "|" in line:
+                            continue
+                        if (
+                            "page" in line.replace("|", "").lower()
+                            or "form 10-" in line.lower()
+                        ):
+                            continue
+                        if "$" in line:
+                            line = line.replace("$ |", "").replace("| |", "|")  # noqa
+                        elif "%" in line:
+                            line = line.replace("% |", "").replace("| |", "|")  # noqa
+                        if "|" not in previous_line and all(
+                            char == "|" for char in line.replace(" ", "")
+                        ):
+                            line = (  # noqa
+                                line
+                                + "\n"
+                                + line.replace("      ", "")
+                                .replace("   ", "")
+                                .replace("  ", "")
+                                .replace(" ", ":------:")
+                            )
+                        else:
+                            is_header = is_table_header(line)
+                            is_multi_header = (
+                                "months ended" in line.lower()
+                                or "year ended" in line.lower()
+                                or "quarter ended" in line.lower()
+                                or "change" in line.lower()
+                                or line.strip().endswith(",")
+                            )
+                            is_date = (
+                                ", 20" in line
+                                and "through" not in line.lower()
+                                and "thru" not in line.lower()
+                                and "from" not in line.lower()
+                            ) or (
+                                "20" in line
+                                and all(
+                                    len(d.strip()) == 4 for d in line.split("|") if d
+                                )
+                            )
+                            if is_header or is_date or is_multi_header:
+                                line = (  # noqa
+                                    line.replace(" | | ", " | ")
+                                    .replace(" | |", " | ")
+                                    .replace("| % |", "")
+                                    .replace("| $ |", "")
+                                    .replace("|$ |", "")
+                                )
+                                if is_header:
+                                    line = "| " + line  # noqa
+                            else:
+                                line = (  # noqa
+                                    line.replace("| $ | ", "")
+                                    .replace("| % |", "")
+                                    .replace("   ", "|")
+                                    .replace("|$ |", "")
+                                )
+                                if not line.strip().startswith("|"):
+                                    line = "| " + line  # noqa
+                                line = insert_cell_dividers(line)  # noqa
+                                line = (  # noqa
+                                    line.replace(" | | ", " | ")
+                                    .replace(" | |", " |")
+                                    .replace("||", "|")
+                                    .replace("||", "|")
+                                    .replace(" | | | ", " | ")
+                                    .replace(" | | |", "|")
+                                )
+                                if line[-1] != "|":
+                                    line = line + "|"  # noqa
+                        previous_line = new_lines[-1]
+                        next_line = extracted_lines[line_i + 1]
+                        if "|" in previous_line and not line.strip():
+                            continue
+                        if (
+                            "|" in previous_line
+                            and "|" in next_line
+                            and not line.strip("\n").replace(" ", "")
+                        ):
+                            continue
+                        if (
+                            "|" in previous_line
+                            and "|" not in next_line
+                            and "|" in extracted_lines[line_i + 2]
+                            and not line.strip()
+                        ):
+                            line_i += 1
+                            continue
+                        if (
+                            "|" in previous_line
+                            and "|" in next_line
+                            and not line.strip("\n").replace(" ", "")
+                        ):
+                            continue
+                        if (
+                            "|" in previous_line
+                            and "|" not in next_line
+                            and "|" in extracted_lines[line_i + 2]
+                            and not line.strip()
+                        ):
+                            line_i += 1
+                            continue
+                        if is_inscriptis is True:
+                            if (
+                                "|:-" in previous_line
+                                and "|" in extracted_lines[line_i + 1]
+                                and line.strip()
+                                and not line.strip().startswith("|")
+                            ):
+                                line = "|" + line  # noqa
+                                if not line.strip().endswith("|"):
+                                    line = line + "|"  # noqa
+                            line = (  # noqa
+                                line.replace("||||", "|")
+                                .replace("|||", "|")
+                                .replace("|          |", "")
+                                .replace("| | |", "|")
+                                .replace("| |", "|")
+                                .replace("    ", "")
+                                .replace("||", "|")
+                                .replace("|%|", "")
+                                .replace("|%  |", "")
+                                .replace("|$|", "")
+                                .replace("|$ |", "")
+                                .replace("|)", ")")
+                                .replace("  )", ")")
+                                .replace(" )", ")")
+                                .replace("| | |", "|")
+                                .replace("|  |", "|")
+                                .replace(" | | ", "|")
+                                .replace("| |", "|")
+                            )
+                            if (
+                                "months ended" in line.lower()
+                                or "year ended" in line.lower()
+                                or "quarter ended" in line.lower()
+                                or "weeks ended" in line.lower()
+                                and "|" not in line
+                                and "|" in previous_line
+                            ):
+                                line = "|" + line  # noqa
+                        if line not in ["||", "|  |"]:
+                            new_lines.append(line)
+                            previous_line = line
+                    else:
+                        if (
+                            "|" in previous_line
+                            and "|" in extracted_lines[line_i + 1]
+                            and not line.strip()
+                        ):
+                            continue
+                        if is_inscriptis is True and ".   " in line:
+                            line = line.replace(".   ", ".\n\n")  # noqa
+                        elif is_inscriptis is True and ".  " in line:
+                            line = line.replace(".  ", ".\n\n")  # noqa
+                        if " ." in line:
+                            line = line.replace(" .", ".")  # noqa
+                        if "|" in previous_line:
+                            new_lines.extend(
+                                ["\n"] + wrap(line, width=query.wrap_length) + ["\n"]
+                            )
+                        elif line.strip().startswith("-"):
+                            new_lines.extend([line] + ["\n"])
+                        else:
+                            new_lines.extend(
+                                wrap(line, width=query.wrap_length) + ["\n"]
+                            )
+                        previous_line = line
+            return new_lines
+        # Do a first pass, and if extraction fails we can identify where the problem originates.
+        def try_inscriptis(filing_str):
+            """Try using Inscriptis instead."""
+            extracted_text = get_text(
+                filing_str,
+                config=ParserConfig(
+                    table_cell_separator="|",
+                ),
+            )
+            extracted_lines = []
+            for line in extracted_text.splitlines():
+                if not line.strip():
+                    continue
+                extracted_lines.append(
+                    line.strip()
+                    .replace(" , ", ", ")
+                    .replace(" . ", ". ")
+                    .replace(" .", ".")
+                    .replace(" ’ ", "'")
+                    .replace(" ' ", "'")
+                    .replace("“  ", "“")
+                    .replace("  ”", "”")
+                    .replace("o f", "of")
+                    .replace("a n", "an")
+                    .replace("in crease", "increase")
+                )
+            return process_extracted_text("\n".join(extracted_lines), True)
+        filing_str = data.get("content", "")
+        if query.strategy == "trafilatura":
+            extracted_text = extract(
+                filing_str,
+                include_tables=True,
+                include_comments=True,
+                include_formatting=True,
+                include_images=True,
+                include_links=False,
+            )
+            new_lines = process_extracted_text(extracted_text, False)
+            if not new_lines:
+                warn("Trafilatura extraction failed, trying Inscriptis.")
+                new_lines = try_inscriptis(filing_str)
+                is_inscriptis = True
+        else:
+            new_lines = try_inscriptis(filing_str)
+        if not new_lines:
+            raise EmptyDataError(
+                "No content was found in the filing, likely a parsing error from unreachable content."
+                f" -> {data['url']}"
+                " -> The content can be analyzed by inspecting"
+                " the output of `SecManagementDiscussionAnalysisFetcher.aextract_data`,"
+                " or by setting `raw_html=True` in the query."
+            )
+        # Second pass - clean up document
+        def is_title_case(line: str) -> bool:
+            """Check if line follows financial document title case patterns"""
+            if (
+                line.strip().startswith("-")
+                or line.strip().endswith(".")
+                or line.strip().endswith(",")
+                or "“" in line
+                or line.endswith("-")
+                or line.lower().endswith("ended")
+            ):
+                return False
+            if line.istitle() and not line.endswith(".") and not line.startswith("-"):
+                return True
+            if (
+                line.strip().endswith(",")
+                or line.strip().startswith("-")
+                or line.strip().endswith(".")
+            ):
+                return False
+            if (
+                "|" not in line
+                and line.strip().isupper()
+                and len(line.strip()) > 1
+                and line[-1].isalpha()
+                or line.strip().startswith("Item")
+                or line.strip().startswith("ITEM")
+            ):
+                return True
+            return (
+                line.replace("(", "")
+                .replace(")", "")
+                .replace(",", "")
+                .replace(" and ", " And ")
+                .replace(" of ", " Of ")
+                .replace(" the ", " The ")
+                .replace(" vs ", " VS ")
+                .replace(" in ", " In ")
+                .replace(" to ", " To ")
+                .replace(" for ", " For ")
+                .replace(" with ", " With ")
+                .replace(" on ", " On ")
+                .replace(" at ", " At ")
+                .replace(" from ", " From ")
+                .replace(" by ", " By ")
+            ).istitle()
+        def count_columns_in_data_row(data_row: str) -> int:
+            """Count actual columns from first data row"""
+            return len(list(data_row.split("|"))) - 2
+        def pad_row_columns(row: str, target_cols: int) -> str:
+            """Pad a table row with empty cells to match target column count"""
+            cells = row.split("|")
+            current_cols = len(cells) - 2  # Exclude outer pipes
+            if current_cols < target_cols:
+                # Add empty cells
+                if (
+                    is_table_header(row)
+                    and row.replace("|", "").replace(" ", "").endswith(":")
+                    or (
+                        row.replace("|", "").replace(" ", "").endswith(")")
+                        and row.replace("|", "").replace(" ", "")[0].isalpha()
+                        and len(row.split("|")) < 3
+                    )
+                    and not (
+                        "20" in row and all(len(d) == 4 for d in row.split("|") if d)
+                    )
+                ):
+                    cells = [c for c in cells if c.strip()] + [
+                        " " for _ in range(target_cols - current_cols - 2)
+                    ]
+                    return "|" + "|".join(cells)
+                cells = [" " for _ in range(target_cols - current_cols - 2)] + cells
+            return "|".join(cells)
+        def process_document(  # noqa: PLR0912
+            document: list[str], is_inscriptis: bool
+        ) -> list[str]:
+            """Clean up document lines"""
+            cleaned_lines: list = []
+            i = 0
+            max_cols = 0
+            while i < len(document):
+                current_line = document[i]
+                if (
+                    "|" in document[i - 1]
+                    and i - 1 > 1
+                    and i + 1 <= len(document)
+                    and i + 1 < len(document)
+                    and "|" in document[i + 1]
+                ) and (
+                    current_line == "" or current_line.replace("|", "").strip() == ""
+                ):
+                    i += 1
+                    continue
+                if is_inscriptis is True and "|" not in current_line:
+                    current_line = current_line.replace("   ", " ")
+                if is_inscriptis is True and "-::-" in current_line:
+                    current_line = (
+                        current_line.replace(":------::", "")
+                        .replace("::------:", "")
+                        .replace("::------::", "")
+                        .replace(" ", "")
+                    ).strip()
+                if (
+                    is_inscriptis is True
+                    and "|:-" in current_line
+                    and "|" not in document[i - 1]
+                ):
+                    cleaned_lines.append("|   " * current_line.count("|"))
+                if is_inscriptis is True and "|" in document[i - 1]:
+                    if current_line.strip() in [
+                        '""',
+                        "",
+                        " ",
+                        "\n",
+                        "|",
+                        "|   |   |   |   |",
+                        "|   |   |",
+                    ]:
+                        _ = document.pop(i)
+                        continue
+                    current_line = current_line.replace("   ", " ")
+                    if (
+                        current_line.strip().startswith("(inmillions")
+                        and "|" not in current_line
+                    ):
+                        current_line = "|" + current_line
+                    if (
+                        current_line.strip().startswith("|:-")
+                        and current_line[-1] != "|"
+                    ):
+                        current_line = current_line + "|"
+                    if (
+                        "in the preceding table" in current_line.lower()
+                        or "in the table above" in current_line.lower()
+                        or "the following tables present" in current_line.lower()
+                        and "|" in document[i - 1]
+                    ):
+                        cleaned_lines.append("\n")
+                        current_line = "\n" + current_line.replace("|", "").strip()
+                    if (
+                        current_line.startswith("# ")
+                        and "|" not in current_line
+                        and "|" in document[i - 1]
+                    ):
+                        current_line = "|" + current_line.replace("# ", " *")
+                        cleaned_lines.append(current_line)
+                        i += 1
+                        continue
+                    if (
+                        "|" in document[i - 1]
+                        and len(current_line) > 1
+                        and "|" not in current_line
+                        and current_line.replace(")", "")[-1].isnumeric()
+                    ):
+                        current_line = "|" + current_line + " |"
+                if (
+                    current_line.strip()
+                    and current_line.strip().startswith("-")
+                    and current_line.strip().endswith("-")
+                    and len(current_line.strip().replace("-", "").replace(" ", "")) < 4
+                    and current_line.strip()
+                    .replace("-", "")
+                    .replace(" ", "")
+                    .isnumeric()
+                ):
+                    i += 1
+                    continue
+                if "![" in current_line:
+                    image_file = (
+                        current_line.split("]")[1].replace("(", "").replace(")", "")
+                    )
+                    base_url = data["url"].rsplit("/", 1)[0]
+                    image_url = f"{base_url}/{image_file}"
+                    cleaned_lines.append(f"![Graphic]({image_url})")
+                    i += 1
+                    continue
+                if current_line.strip() == "| | o |":
+                    i += 1
+                    current_line = "- " + document[i].replace("|", "").strip()
+                    cleaned_lines.append(current_line)
+                    i += 1
+                    continue
+                if current_line.strip() == ":------:":
+                    i += 1
+                    continue
+                if current_line.count("|") < 3:
+                    current_line = (
+                        current_line.replace("|", "").replace(":------:", "").strip()
+                    )
+                    cleaned_lines.append(current_line)
+                    i += 1
+                    continue
+                next_line = document[i + 1] if i + 1 < len(document) else ""
+                if next_line.replace("**", "").strip() == "AND RESULTS OF OPERATIONS":
+                    current_line = (
+                        "**"
+                        + current_line.replace("**", "").replace("\n", "").strip()
+                        + " "
+                        + "AND RESULTS OF OPERATIONS"
+                        + "**"
+                    )
+                    _ = document.pop(i + 1)
+                    cleaned_lines.append(current_line)
+                    i += 1
+                    continue
+                previous_line = document[i - 1] if i > 0 else ""
+                if current_line.strip() in (
+                    "--",
+                    "-",
+                    "|:------:|",
+                    "||",
+                    "|  |",
+                    ":------:",
+                ):
+                    if not next_line.strip() or next_line == current_line:
+                        i += 2
+                        continue
+                    i += 1
+                    continue
+                if "| :-" in current_line:
+                    current_line = current_line.replace(" :- ", ":-")
+                if "|:-" in current_line and not current_line.strip().endswith("|"):
+                    current_line = current_line + "|"
+                if (
+                    not current_line.strip()
+                    and "|" in document[i - 1]
+                    and "|" in document[i + 1]
+                ):
+                    continue
+                if (
+                    query.include_tables is False
+                    and "|" in current_line
+                    and "|" not in document[i - 1]
+                ):
+                    current_line = current_line.replace("|", "")
+                if current_line.startswith(" -"):
+                    current_line = "- " + current_line[2:]
+                if (
+                    current_line.startswith(("(", "["))
+                    and current_line.endswith((")", "]"))
+                    and len(current_line) < 4
+                ):
+                    current_line = current_line.replace("[", "(").replace("]", ")")
+                    dead_line = True
+                    new_i = i
+                    while dead_line is True:
+                        new_i += 1
+                        next_line = document[new_i]
+                        if next_line.replace("|", "").strip():
+                            dead_line = False
+                            break
+                    next_line = next_line.replace("|", "").rstrip()
+                    if document[new_i + 1].replace("|", "").rstrip() == next_line:
+                        new_i += 1
+                    current_line = (
+                        current_line
+                        + " "
+                        + next_line.replace("|", "").strip().rstrip(" ")
+                    ).strip()
+                    i = new_i
+                    previous_line = document[i - 1]
+                if (
+                    current_line.replace("|", "").strip().startswith("-")
+                    and current_line[1] != " "
+                ):
+                    current_line = current_line.replace("|", "").replace("-", "- ")
+                if (
+                    "|" in current_line
+                    and "|" in previous_line
+                    and "|" in next_line
+                    and "|:-" not in next_line
+                    and current_line.replace(" ", "").replace("|", "") == ""
+                ):
+                    i += 1
+                    continue
+                if query.include_tables is False and "|" in current_line:
+                    i += 1
+                    continue
+                # Fix table header rows with missing dividers.
+                # We can't fix all tables, but this helps with some.
+                if (
+                    "|" in current_line
+                    and "|" not in previous_line
+                    and "|:-" not in next_line
+                ) and current_line.count("|") > 2:
+                    n_bars = current_line.replace(" |  | ", "|").count("|")
+                    inserted_line = ("|:------:" * (n_bars - 2)) + "|"
+                    document.insert(
+                        i + 1,
+                        inserted_line.replace(":------:", "   ").strip()[1:-2],
+                    )
+                    document.insert(i + 2, inserted_line)
+                    current_line = current_line.replace("|", "").lstrip(" ") + "\n"
+                elif (
+                    "|:-" in current_line
+                    and "|" not in previous_line
+                    and "|" in next_line
+                ):
+                    inserted_line = current_line.replace("-", "").replace("::", "   ")
+                    if previous_line.strip():
+                        inserted_line = "\n" + inserted_line
+                    document.insert(i - 1, inserted_line)
+                    cleaned_lines.append(inserted_line)
+                if current_line.startswith("|:-") and not current_line.strip().endswith(
+                    "|"
+                ):
+                    current_line = current_line + "|"
+                # Detect table by empty header pattern
+                if (
+                    i + 2 < len(document)
+                    and "|" in current_line
+                    and all(not cell.strip() for cell in current_line.split("|")[1:-1])
+                    and ":---" in document[i + 1]
+                ):
+                    table_i = i + 2
+                    max_cols = 0
+                    # First pass - find max columns
+                    while table_i < len(document):
+                        if "|" not in document[table_i]:
+                            break
+                        row = document[table_i].strip()
+                        if row and row != "|":
+                            cols = count_columns_in_data_row(row)
+                            max_cols = max(max_cols, cols)
+                        table_i += 1
+                    # Fix empty header row
+                    header_line = (
+                        "| " + " | ".join([" " for _ in range(max_cols)]) + " |"
+                    )
+                    cleaned_lines.append(header_line)
+                    # Fix separator row
+                    separator_line = (
+                        "|" + "|".join([":------:" for _ in range(max_cols)]) + "|"
+                    )
+                    cleaned_lines.append(separator_line)
+                    i += 2  # Skip original header and separator
+                else:
+                    if current_line.strip().startswith("-"):
+                        current_line = current_line.replace("|", "")
+                        if current_line.strip()[-1] not in (".", ";", ":") and (
+                            (
+                                next_line.replace("|", "").strip().islower()
+                                and next_line.replace("|", "").strip().endswith(".")
+                            )
+                            or not next_line.strip()
+                            and i + 2 < len(document)
+                            and document[i + 2].replace("|", "").strip().endswith(".")
+                        ):
+                            if not next_line.strip() and i + 2 <= len(document):
+                                next_line = document[i + 2].strip()
+                            current_line = (
+                                current_line + " " + next_line.replace("|", "").strip()
+                            )
+                            cleaned_lines.append(current_line)
+                            i += 2
+                            continue
+                    # Check if this is a table row that needs padding
+                    current_line = current_line.replace(")  (", ")|(")
+                    if (
+                        current_line.strip().startswith("-")
+                        and "|" not in current_line
+                        and "." in current_line
+                        and (
+                            document[i - 1].strip().endswith(", and")
+                            or document[i - 1].strip().endswith(" and")
+                        )
+                    ):
+                        clean_line = current_line.split(".")[0] + ".\n\n"
+                        if len(current_line.split(".")) > 1:
+                            remaining = ". ".join(current_line.split(".")[1:])
+                            clean_line += remaining + "\n"
+                        cleaned_lines.append(clean_line)
+                        i += 1
+                        continue
+                    if current_line.strip().startswith("-") and (
+                        "|" not in current_line
+                        and not previous_line.replace("|", "")
+                        .strip()
+                        .endswith((";", ".", ":"))
+                        and current_line.strip()
+                        .replace("-", "")
+                        .replace(" ", "")
+                        .islower()
+                    ):
+                        old_line = cleaned_lines.pop(-1)
+                        if not old_line.strip("\n"):
+                            old_line = cleaned_lines.pop(-2)
+                        cleaned_lines.append(
+                            old_line.strip("\n")
+                            + " "
+                            + current_line.replace("-", "").strip()
+                        )
+                    elif "|" in current_line:
+                        current_line = current_line.replace("|)|", ")|").replace(
+                            "| | (Dollars in ", "| (Dollars in "
+                        )
+                        if (
+                            current_line in ("|  |", "| |", "|")
+                            or "form 10-k" in current_line.replace("|", "").lower()
+                        ):
+                            i += 1
+                            continue
+                        current_cols = count_columns_in_data_row(current_line)
+                        if max_cols and max_cols > 0 and current_cols != max_cols:
+                            padded_line = pad_row_columns(current_line, max_cols)
+                            cleaned_lines.append(padded_line.strip())
+                        else:
+                            cleaned_lines.append(current_line)
+                    # Not a table row, keep unchanged
+                    else:
+                        cleaned_lines.append(current_line)
+                    i += 1
+            return cleaned_lines
+        document = "\n".join(new_lines)
+        cleaned_lines = process_document(document.splitlines(), is_inscriptis)
+        finished_lines: list = []
+        i = 0
+        for line in cleaned_lines:
+            i += 1
+            line = line.replace(  # noqa
+                "(amountsinmillions,exceptpershare,share,percentagesandwarehousecountdata) ",
+                "",
+            )
+            if (
+                "|" not in line
+                and "#" not in line
+                and is_title_case(line)
+                and "|" not in cleaned_lines[i - 1]
+            ):
+                if "." in line and " " not in line:
+                    continue
+                if len(finished_lines) > 1 and "|" not in finished_lines[-1]:
+                    finished_lines.append(
+                        f"## **{line.strip().replace('*', '').rstrip()}**"
+                        if line.strip().startswith("Item") or line.strip().isupper()
+                        else f"### **{line.strip().replace('*', '').rstrip()}**"
+                    )
+            else:
+                finished_lines.append(line)
+        data["content"] = "\n".join(finished_lines)
+        return SecManagementDiscussionAnalysisData(**data)

openbb_platform/providers/sec/openbb_sec/models/py.typed ADDED Viewed

File without changes

openbb_platform/providers/sec/openbb_sec/models/rss_litigation.py ADDED Viewed

	@@ -0,0 +1,98 @@

+"""SEC Litigation RSS Feed Model."""
+# pylint: disable=unused-argument
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+from openbb_core.app.model.abstract.error import OpenBBError
+from openbb_core.provider.abstract.data import Data
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.abstract.query_params import QueryParams
+from openbb_sec.utils.definitions import HEADERS
+from pydantic import Field
+class SecRssLitigationQueryParams(QueryParams):
+    """SEC Litigation RSS Feed Query.
+    Source: https://sec.gov/
+    """
+class SecRssLitigationData(Data):
+    """SEC Litigation RSS Feed Data."""
+    __alias_dict__ = {
+        "published": "date",
+    }
+    published: datetime = Field(description="The date of publication.")
+    title: str = Field(description="The title of the release.")
+    summary: str = Field(description="Short summary of the release.")
+    id: str = Field(description="The identifier associated with the release.")
+    link: str = Field(description="URL to the release.")
+class SecRssLitigationFetcher(
+    Fetcher[SecRssLitigationQueryParams, List[SecRssLitigationData]]
+):
+    """SEC RSS Litigration Fetcher."""
+    @staticmethod
+    def transform_query(params: Dict[str, Any]) -> SecRssLitigationQueryParams:
+        """Transform the query."""
+        return SecRssLitigationQueryParams(**params)
+    @staticmethod
+    def extract_data(
+        query: SecRssLitigationQueryParams,
+        credentials: Optional[Dict[str, str]],
+        **kwargs: Any,
+    ) -> List[Dict]:
+        """Return the raw data from the SEC endpoint."""
+        # pylint: disable=import-outside-toplevel
+        import re  # noqa
+        import xmltodict
+        from openbb_core.provider.utils.helpers import make_request
+        from pandas import DataFrame, to_datetime
+        results: List = []
+        url = "https://www.sec.gov/enforcement-litigation/litigation-releases/rss"
+        r = make_request(url, headers=HEADERS)
+        if r.status_code != 200:
+            raise OpenBBError(f"Status code {r.status_code} returned.")
+        def clean_xml(xml_content):
+            """Clean the XML content before parsing."""
+            xml_content = re.sub(r"&(?!amp;|lt;|gt;|quot;|apos;)", "&amp;", xml_content)
+            return xml_content
+        cleaned_content = clean_xml(r.text)
+        data = xmltodict.parse(cleaned_content)
+        cols = ["title", "link", "summary", "date", "id"]
+        feed = DataFrame.from_records(data["rss"]["channel"]["item"])[
+            ["title", "link", "description", "pubDate", "dc:creator"]
+        ]
+        feed.columns = cols
+        feed["date"] = to_datetime(feed["date"], format="mixed")
+        feed = feed.set_index("date")
+        # Remove special characters
+        for column in ["title", "summary"]:
+            feed[column] = (
+                feed[column]
+                .replace(r"[^\w\s]|_", "", regex=True)
+                .replace(r"\n", "", regex=True)
+            )
+        results = feed.reset_index().to_dict(orient="records")
+        return results
+    @staticmethod
+    def transform_data(
+        query: SecRssLitigationQueryParams, data: List[Dict], **kwargs: Any
+    ) -> List[SecRssLitigationData]:
+        """Transform the data to the standard format."""
+        return [SecRssLitigationData.model_validate(d) for d in data]

openbb_platform/providers/sec/openbb_sec/models/schema_files.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""SEC Schema Files List Model."""
+# pylint: disable=unused-argument
+from typing import Any, Dict, List, Optional
+from openbb_core.app.model.abstract.error import OpenBBError
+from openbb_core.provider.abstract.data import Data
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.cot_search import CotSearchQueryParams
+from pydantic import Field
+class SecSchemaFilesQueryParams(CotSearchQueryParams):
+    """SEC Schema Files List Query.
+    Source: https://sec.gov/
+    """
+    url: Optional[str] = Field(
+        description="Enter an optional URL path to fetch the next level.", default=None
+    )
+    use_cache: Optional[bool] = Field(
+        default=True,
+        description="Whether or not to use cache.",
+    )
+class SecSchemaFilesData(Data):
+    """SEC Schema Files List Data."""
+    files: List[str] = Field(description="Dictionary of URLs to SEC Schema Files")
+class SecSchemaFilesFetcher(Fetcher[SecSchemaFilesQueryParams, SecSchemaFilesData]):
+    """SEC Schema Files Fetcher."""
+    @staticmethod
+    def transform_query(params: Dict[str, Any]) -> SecSchemaFilesQueryParams:
+        """Transform the query."""
+        return SecSchemaFilesQueryParams(**params)
+    @staticmethod
+    def extract_data(
+        query: SecSchemaFilesQueryParams,
+        credentials: Optional[Dict[str, str]],
+        **kwargs: Any,
+    ) -> Dict:
+        """Return the raw data from the SEC endpoint."""
+        # pylint: disable=import-outside-toplevel
+        from openbb_sec.utils.helpers import get_schema_filelist
+        if query.url and ".xsd" in query.url or query.url and ".xml" in query.url:
+            raise OpenBBError("Invalid URL. This endpoint does not parse the files.")
+        results = get_schema_filelist(query.query, query.url)
+        return {"files": results}
+    @staticmethod
+    def transform_data(
+        query: SecSchemaFilesQueryParams, data: Dict, **kwargs: Any
+    ) -> SecSchemaFilesData:
+        """Transform the data to the standard format."""
+        return SecSchemaFilesData.model_validate(data)

openbb_platform/providers/sec/openbb_sec/models/sec_filing.py ADDED Viewed

	@@ -0,0 +1,728 @@

+"""SEC Filing Model."""
+# pylint: disable=unused-argument
+from datetime import date as dateType
+from typing import Any, Optional, Union
+from openbb_core.app.model.abstract.error import OpenBBError
+from openbb_core.provider.abstract.data import Data
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.abstract.query_params import QueryParams
+from pydantic import ConfigDict, Field, PrivateAttr, computed_field
+class SecFilingQueryParams(QueryParams):
+    """SEC Filing Query Parameters."""
+    __json_schema_extra__ = {
+        "url": {
+            "x-widget_config": {
+                "label": "Filing URL",
+            }
+        }
+    }
+    url: str = Field(
+        default="",
+        description="URL for the SEC filing."
+        + " The specific URL is not directly used or downloaded,"
+        + " but is used to generate the base URL for the filing."
+        + " e.g. https://www.sec.gov/Archives/edgar/data/317540/000031754024000045/coke-20240731.htm"
+        + " and https://www.sec.gov/Archives/edgar/data/317540/000031754024000045/"
+        + " are both valid URLs for the same filing.",
+    )
+    use_cache: bool = Field(
+        default=True,
+        description="Use cache for the index headers and cover page. Default is True.",
+    )
+class SecFilingData(Data):
+    """SEC Filing Data."""
+    # For Workspace, ConfigDict is used to enter the widget configuration at the "$.data" level.
+    # Here, we are using a subset of the data - the document URLs with direct links - to avoid nested data.
+    # This creates column definitions for the target output while preserving the structure of the model.
+    model_config = ConfigDict(
+        json_schema_extra={
+            "x-widget_config": {
+                "dataKey": "results.document_urls",
+                "table": {
+                    "columnsDefs": [
+                        {
+                            "field": "sequence",
+                            "headerName": "Sequence",
+                            "headerTooltip": "The sequence of the document.",
+                            "type": "number",
+                            "pinned": "left",
+                            "maxWidth": 105,
+                        },
+                        {
+                            "field": "type",
+                            "headerName": "Document Type",
+                            "headerTooltip": "The type of document.",
+                            "type": "text",
+                            "maxWidth": 150,
+                        },
+                        {
+                            "field": "filename",
+                            "headerName": "Filename",
+                            "headerTooltip": "The filename of the document.",
+                            "type": "text",
+                            "maxWidth": 250,
+                        },
+                        {
+                            "field": "content_description",
+                            "headerName": "Description",
+                            "headerTooltip": "Description of the document.",
+                            "type": "text",
+                            "minWidth": 600,
+                        },
+                        {
+                            "field": "url",
+                            "headerName": "URL",
+                            "headerTooltip": "The URL of the document.",
+                            "type": "text",
+                            "maxWidth": 75,
+                        },
+                    ],
+                },
+            }
+        }
+    )
+    base_url: str = Field(
+        title="Base URL",
+        description="Base URL of the filing.",
+        json_schema_extra={
+            "x-widget_config": {
+                "exclude": True
+            }  # Tells the widget factory to exclude this field. Has no effect on endpoint.
+        },
+    )
+    name: str = Field(
+        title="Entity Name",
+        description="Name of the entity filing.",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+    cik: str = Field(
+        title="CIK",
+        description="Central Index Key.",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+    trading_symbols: Optional[list] = Field(
+        default=None,
+        title="Trading Symbols",
+        description="Trading symbols, if available.",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+    sic: str = Field(
+        title="SIC",
+        description="Standard Industrial Classification.",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+    sic_organization_name: str = Field(
+        title="SIC Organization",
+        description="SIC Organization Name.",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+    filing_date: dateType = Field(
+        title="Filing Date",
+        description="Filing date.",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+    period_ending: Optional[dateType] = Field(
+        default=None,
+        title="Period Ending",
+        description="Date of the ending period for the filing, if available.",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+    fiscal_year_end: Optional[str] = Field(
+        default=None,
+        title="Fiscal Year End",
+        description="Fiscal year end of the entity, if available. Format: MM-DD",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+    document_type: str = Field(
+        title="Document Type",
+        description="Specific SEC filing type.",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+    has_cover_page: bool = Field(
+        title="Has Cover Page",
+        description="True if the filing has a cover page.",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+    description: Optional[str] = Field(
+        default=None,
+        title="Content Description",
+        description="Description of attached content, mostly applicable to 8-K filings.",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+    cover_page: Optional[dict] = Field(
+        default=None,
+        title="Cover Page",
+        description="Cover page information, if available.",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+    document_urls: list = Field(
+        title="Document URLs",
+        description="List of files associated with the filing.",
+        json_schema_extra={"x-widget_config": {"exclude": True}},
+    )
+class SecBaseFiling(Data):  # pylint: disable=too-many-instance-attributes
+    """Base SEC Filing model."""
+    _url: str = PrivateAttr(default="")
+    _index_headers_url: str = PrivateAttr(default="")
+    _index_headers_download: str = PrivateAttr(default="")
+    _document_urls: list = PrivateAttr(default=None)
+    _filing_date: str = PrivateAttr(default="")
+    _period_ending: str = PrivateAttr(default="")
+    _document_type: str = PrivateAttr(default="")
+    _name: str = PrivateAttr(default="")
+    _cik: str = PrivateAttr(default="")
+    _sic: str = PrivateAttr(default="")
+    _sic_organization_name: Optional[str] = PrivateAttr(default="")
+    _description: Optional[str] = PrivateAttr(default=None)
+    _cover_page_url: Optional[str] = PrivateAttr(default=None)
+    _fiscal_year_end: str = PrivateAttr(default="")
+    _fiscal_period: str = PrivateAttr(default="")
+    _cover_page: dict = PrivateAttr(default=None)
+    _trading_symbols: list = PrivateAttr(default=None)
+    _use_cache: bool = PrivateAttr(default=True)
+    @computed_field(title="Base URL", description="Base URL of the filing.")  # type: ignore
+    @property
+    def base_url(self) -> str:
+        """Base URL of the filing."""
+        return self._url
+    @computed_field(title="Entity Name", description="Name of the entity filing.")  # type: ignore
+    @property
+    def name(self) -> str:
+        """Entity name."""
+        return self._name
+    @computed_field(title="CIK", description="Central Index Key.")  # type: ignore
+    @property
+    def cik(self) -> str:
+        """Central Index Key."""
+        return self._cik
+    @computed_field(  # type: ignore
+        title="Trading Symbols", description="Trading symbols, if available."
+    )
+    @property
+    def trading_symbols(self) -> Optional[list]:
+        """Trading symbols, if available."""
+        return self._trading_symbols
+    @computed_field(title="SIC", description="Standard Industrial Classification.")  # type: ignore
+    @property
+    def sic(self) -> str:
+        """Standard Industrial Classification."""
+        return self._sic
+    @computed_field(title="SIC Organization", description="SIC Organization Name.")  # type: ignore
+    @property
+    def sic_organization_name(self) -> Optional[str]:
+        """Standard Industrial Classification Organization Name."""
+        return self._sic_organization_name
+    @computed_field(title="Filing Date", description="Filing date.")  # type: ignore
+    @property
+    def filing_date(self) -> dateType:
+        """Filing date."""
+        return dateType.fromisoformat(self._filing_date)
+    @computed_field(  # type: ignore
+        title="Period Ending",
+        description="Date of the ending period for the filing, if available.",
+    )
+    @property
+    def period_ending(self) -> Optional[dateType]:
+        """Date of the ending period for the filing."""
+        if self._period_ending:
+            return dateType.fromisoformat(self._period_ending)
+        return None
+    @computed_field(  # type: ignore
+        title="Fiscal Year End",
+        description="Fiscal year end of the entity, if available. Format: MM-DD",
+    )
+    @property
+    def fiscal_year_end(self) -> Optional[str]:
+        """Fiscal year end date of the entity."""
+        return self._fiscal_year_end
+    @computed_field(title="Document Type", description="Specific SEC filing type.")  # type: ignore
+    @property
+    def document_type(self) -> str:
+        """Document type."""
+        return self._document_type
+    @computed_field(  # type: ignore
+        title="Has Cover Page", description="True if the filing has a cover page."
+    )
+    @property
+    def has_cover_page(self) -> bool:
+        """True if the filing has a cover page."""
+        return bool(self._cover_page_url)
+    @computed_field(  # type: ignore
+        title="Cover Page", description="Cover page information, if available."
+    )
+    @property
+    def cover_page(self) -> Optional[dict]:
+        """Cover page information, if available."""
+        return self._cover_page
+    @computed_field(  # type: ignore
+        title="Content Description",
+        description="Description of attached content, mostly applicable to 8-K filings.",
+    )
+    @property
+    def description(self) -> Optional[str]:
+        """Document description, if available."""
+        return self._description
+    @computed_field(  # type: ignore
+        title="Document URLs", description="List of files associated with the filing."
+    )
+    @property
+    def document_urls(self) -> list:
+        """List of document URLs."""
+        return self._document_urls
+    def __init__(self, url: str, use_cache: bool = True):
+        """Initialize the Filing class."""
+        # pylint: disable=import-outside-toplevel
+        from openbb_core.provider.utils.helpers import run_async
+        from openbb_sec.utils.helpers import cik_map
+        super().__init__()
+        if not url:
+            raise ValueError("Please enter a URL.")
+        if "/data/" not in url:
+            raise ValueError("Invalid SEC URL supplied, must be a filing URL.")
+        check_val: str = url.split("/data/")[1].split("/")[1]
+        if len(check_val) != 18:
+            raise ValueError("Invalid SEC URL supplied, must be a filing URL.")
+        new_url = url.split(check_val)[0] + check_val + "/"
+        cik_check = new_url.split("/")[-3]
+        new_url = new_url.replace(f"/{cik_check}/", f"/{cik_check.lstrip('0')}/")
+        self._url = new_url
+        self._use_cache = use_cache
+        index_headers = (
+            check_val[:-8]
+            + "-"
+            + check_val[-8:-6]
+            + "-"
+            + check_val[-6:]
+            + "-index-headers.htm"
+        )
+        self._index_headers_url = self._url + index_headers
+        self._download_index_headers()
+        if self._document_urls:
+            for doc in self._document_urls:
+                if doc.get("url", "").endswith("R1.htm"):
+                    self._cover_page_url = doc.get("url")
+                    break
+        if self.has_cover_page and not self._cover_page:
+            self._download_cover_page()
+        if not self._trading_symbols:
+            symbol = run_async(cik_map, self._cik)
+            if symbol:
+                self._trading_symbols = [symbol]
+    @staticmethod
+    async def _adownload_file(url, use_cache: bool = True):
+        """Download a file asynchronously from a SEC URL."""
+        # pylint: disable=import-outside-toplevel
+        from aiohttp_client_cache import SQLiteBackend
+        from aiohttp_client_cache.session import CachedSession
+        from openbb_core.app.utils import get_user_cache_directory
+        from openbb_core.provider.utils.helpers import amake_request
+        from openbb_sec.utils.definitions import SEC_HEADERS
+        from openbb_sec.utils.helpers import sec_callback
+        response: Union[dict, list, str, None] = None
+        if use_cache is True:
+            cache_dir = f"{get_user_cache_directory()}/http/sec_filings"
+            async with CachedSession(cache=SQLiteBackend(cache_dir)) as session:
+                try:
+                    await session.delete_expired_responses()
+                    response = await amake_request(
+                        url,
+                        headers=SEC_HEADERS,
+                        session=session,
+                        response_callback=sec_callback,
+                        raise_for_status=True,
+                    )  # type: ignore
+                finally:
+                    await session.close()
+        else:
+            response = await amake_request(
+                url,
+                headers=SEC_HEADERS,
+                response_callback=sec_callback,
+                raise_for_status=True,
+            )  # type: ignore
+        return response
+    @staticmethod
+    def download_file(url, read_html_table: bool = False, use_cache: bool = True):
+        """Download a file from a SEC URL."""
+        # pylint: disable=import-outside-toplevel
+        from openbb_core.provider.utils.helpers import run_async  # noqa
+        from warnings import warn
+        try:
+            response = run_async(SecBaseFiling._adownload_file, url, use_cache)
+            if read_html_table is True:
+                if not url.endswith(".htm") and not url.endswith(".html"):
+                    warn(f"File is not a HTML file: {url}")
+                    return response
+                return SecBaseFiling.try_html_table(response)
+            return response
+        except Exception as e:
+            raise RuntimeError(f"Failed to download file: {e} -> {e.args}") from e
+    @staticmethod
+    def try_html_table(text: str, **kwargs) -> list:
+        """Attempt to parse tables from a HTML string. All keyword arguments passed to `pandas.read_html`"""
+        # pylint: disable=import-outside-toplevel
+        from io import StringIO  # noqa
+        from pandas import read_html
+        try:
+            return read_html(StringIO(text), **kwargs)
+        except Exception as e:
+            raise RuntimeError(f"Failed to parse table: {e}") from e
+    def _download_index_headers(
+        self,
+    ):  # pylint: disable=too-many-branches, too-many-statements, too-many-locals
+        """Download the index headers table."""
+        # pylint: disable=import-outside-toplevel
+        import re  # noqa
+        from bs4 import BeautifulSoup
+        try:
+            if not self._index_headers_download:
+                response = self.download_file(
+                    self._index_headers_url, False, self._use_cache
+                )
+                self._index_headers_download = response
+            else:
+                response = self._index_headers_download
+            soup = BeautifulSoup(response, "html.parser")
+            text = soup.find("pre").text
+            def document_to_dict(doc):
+                """Convert the document section to a dictionary."""
+                doc_dict: dict = {}
+                doc_dict["type"] = re.search(r"<TYPE>(.*?)\n", doc).group(1).strip()  # type: ignore
+                doc_dict["sequence"] = (
+                    re.search(r"<SEQUENCE>(.*?)\n", doc).group(1).strip()  # type: ignore
+                )
+                doc_dict["filename"] = (
+                    re.search(r"<FILENAME>(.*?)\n", doc).group(1).strip()  # type: ignore
+                )
+                description_match = re.search(r"<DESCRIPTION>(.*?)\n", doc)
+                if description_match:
+                    doc_dict["description"] = description_match.group(1).strip()
+                url = self.base_url + doc_dict["filename"]
+                doc_dict["url"] = url
+                return doc_dict
+            # Isolate each document by tag
+            documents = re.findall(r"<DOCUMENT>.*?</DOCUMENT>", text, re.DOTALL)
+            # Convert each document to a dictionary
+            document_dicts = [document_to_dict(doc) for doc in documents]
+            if document_dicts:
+                self._document_urls = document_dicts
+            lines = text.split("\n")
+            n_items = 0
+            for line in lines:
+                if ":" not in line:
+                    continue
+                value = line.split(":")[1].strip()
+                if n_items == 9:
+                    break
+                if "CONFORMED PERIOD OF REPORT" in line:
+                    as_of_date = value
+                    self._period_ending = (
+                        as_of_date[:4] + "-" + as_of_date[4:6] + "-" + as_of_date[6:]
+                    )
+                elif "FILED AS OF DATE" in line:
+                    filing_date = value
+                    self._filing_date = (
+                        filing_date[:4] + "-" + filing_date[4:6] + "-" + filing_date[6:]
+                    )
+                    n_items += 1
+                elif "COMPANY CONFORMED NAME" in line:
+                    self._name = value
+                    n_items += 1
+                elif "CONFORMED SUBMISSION TYPE" in line:
+                    self._document_type = value
+                    n_items += 1
+                elif "CENTRAL INDEX KEY" in line:
+                    self._cik = value
+                    n_items += 1
+                elif "STANDARD INDUSTRIAL CLASSIFICATION" in line:
+                    self._sic = value
+                    n_items += 1
+                elif "ORGANIZATION NAME" in line:
+                    self._sic_organization_name = value
+                    n_items += 1
+                elif "FISCAL YEAR END" in line:
+                    fy = value
+                    self._fiscal_year_end = fy[:2] + "-" + fy[2:]
+                    n_items += 1
+                # There might be two lines of ITEM INFORMATION
+                elif "ITEM INFORMATION" in line:
+                    info = value
+                    self._description = (
+                        self._description + "; " + info if self._description else info
+                    )
+                    n_items += 1
+                continue
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to download and read the index headers table: {e}"
+            ) from e
+    @staticmethod
+    def _multiplier_map(string) -> int:  # pylint: disable=too-many-return-statements
+        """Map a string to a multiplier."""
+        if string.lower() == "millions":
+            return 1000000
+        if string.lower() == "hundreds of thousands":
+            return 100000
+        if string.lower() == "tens of thousands":
+            return 10000
+        if string.lower() == "thousands":
+            return 1000
+        if string.lower() == "hundreds":
+            return 100
+        if string.lower() == "tens":
+            return 10
+        return 1
+    def _download_cover_page(
+        self,
+    ):  # pylint: disable=too-many-branches, too-many-statements, too-many-locals
+        """Download the cover page table."""
+        # pylint: disable=import-outside-toplevel
+        from pandas import MultiIndex, to_datetime
+        symbols_list: list = []
+        try:
+            response = self.download_file(self._cover_page_url, True, self._use_cache)
+            if not response:
+                raise RuntimeError("Failed to download cover page table")
+            df = response[0]
+            if isinstance(df.columns, MultiIndex):
+                df = df.droplevel(0, axis=1)
+            if df.empty or len(df) < 1:
+                raise RuntimeError("Failed to read cover page table")
+            fiscal_year = df[df.iloc[:, 0] == "Document Fiscal Year Focus"]
+            if not fiscal_year.empty:
+                fiscal_year = fiscal_year.iloc[:, 1].values[0]
+            elif fiscal_year.empty:
+                fiscal_year = None
+            if fiscal_year:
+                self._fiscal_year = fiscal_year
+            fiscal_period = df[df.iloc[:, 0] == "Document Fiscal Period Focus"]
+            if not fiscal_period.empty:
+                fiscal_period = fiscal_period.iloc[:, 1].values[0]
+            elif fiscal_period.empty:
+                fiscal_period = None
+            if fiscal_period:
+                self._fiscal_period = fiscal_period
+            title = (
+                df.columns[0][0]
+                if isinstance(df.columns, MultiIndex)
+                else df.columns[0]
+            )
+            if title and "- shares" in title:
+                shares_multiplier = title.split(" shares in ")[-1]
+                multiplier = self._multiplier_map(shares_multiplier)
+                shares_outstanding = (
+                    df[df.iloc[:, 0].str.contains("Shares Outstanding")]
+                    .iloc[:, 2]
+                    .values[0]
+                )
+                as_of_date = (
+                    df.columns[2][1]
+                    if isinstance(df.columns, MultiIndex)
+                    else df.columns[2]
+                )
+                if as_of_date and shares_outstanding:
+                    self._shares_outstanding = {
+                        to_datetime(as_of_date).strftime("%Y-%m-%d"): int(
+                            shares_outstanding * multiplier
+                        )
+                    }
+            if not df.empty:
+                trading_symbols_df = df[
+                    df.iloc[:, 0]
+                    .astype(str)
+                    .str.lower()
+                    .isin(["trading symbol", "no trading symbol flag"])
+                ]
+                symbols_dict: dict = {}
+                trading_symbols = (
+                    trading_symbols_df.iloc[:, 1]
+                    .str.strip()
+                    .str.replace("true", "No Trading Symbol")
+                    .tolist()
+                )
+                symbol_names = (
+                    df[
+                        df.iloc[:, 0].astype(str).str.strip()
+                        == "Title of 12(b) Security"
+                    ]
+                    .iloc[:, 1]
+                    .tolist()
+                )
+                exchange_names = (
+                    df[
+                        df.iloc[:, 0].astype(str).str.strip()
+                        == "Security Exchange Name"
+                    ]
+                    .iloc[:, 1]
+                    .fillna("No Exchange")
+                    .tolist()
+                )
+                if trading_symbols:
+                    self._trading_symbols = sorted(
+                        [d for d in trading_symbols if d and d != "No Trading Symbol"]
+                    )
+                    symbols_dict = dict(zip(symbol_names, trading_symbols))
+                    exchanges_dict = dict(zip(symbol_names, exchange_names))
+                    for k, v in symbols_dict.items():
+                        symbols_list.append(
+                            {
+                                "Title": k,
+                                "Symbol": v,
+                                "Exchange": exchanges_dict.get(k, "No Exchange"),
+                            }
+                        )
+                df.columns = [d[1] if isinstance(d, tuple) else d for d in df.columns]
+                df = df.iloc[:, :2].dropna(how="any")
+                df.columns = ["key", "value"]
+                output = df.set_index("key").to_dict()["value"]
+                if not output.get("SIC") and self._sic:
+                    output["SIC"] = self._sic
+                    output["SIC Organization Name"] = self.sic_organization_name
+                for k, v in output.copy().items():
+                    if k in [
+                        "Title of 12(b) Security",
+                        "Trading Symbol",
+                        "Security Exchange Name",
+                        "No Trading Symbol Flag",
+                    ]:
+                        del output[k]
+                if symbols_list:
+                    output["12(b) Securities"] = symbols_list
+                self._cover_page = output
+        except IndexError:
+            pass
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to download and read the cover page table: {e}"
+            ) from e
+    def __repr__(self):
+        """Return the string representation of the class."""
+        repr_str = "SEC Filing(\n"
+        for k, v in self.model_computed_fields.items():
+            if not v:
+                continue
+            repr_str += f"  {k} : {v.return_type.__name__} - {v.description}\n"
+        repr_str += ")"
+        return repr_str
+class SecFilingFetcher(Fetcher[SecFilingQueryParams, SecFilingData]):
+    """SEC Filing Fetcher."""
+    @staticmethod
+    def transform_query(params: dict[str, Any]) -> SecFilingQueryParams:
+        """Transform the query parameters."""
+        return SecFilingQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecFilingQueryParams,
+        credentials: Optional[dict[str, str]],
+        **kwargs: Any,
+    ) -> dict:
+        """Extract the raw data from the SEC site."""
+        try:
+            data = SecBaseFiling(query.url, query.use_cache)
+        except Exception as e:  # pylint: disable=broad-except
+            raise OpenBBError(e) from e
+        return data.model_dump(exclude_none=True)
+    @staticmethod
+    def transform_data(
+        query: SecFilingQueryParams, data: dict, **kwargs: Any
+    ) -> SecFilingData:
+        """Transform the raw data into a structured format."""
+        return SecFilingData.model_validate(data)

openbb_platform/providers/sec/openbb_sec/models/sic_search.py ADDED Viewed

	@@ -0,0 +1,111 @@

+"""SEC Standard Industrial Classification Code (SIC) Model."""
+# pylint: disable=unused-argument
+from typing import Any, Dict, List, Optional, Union
+from openbb_core.provider.abstract.data import Data
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.cot_search import CotSearchQueryParams
+from pydantic import Field
+class SecSicSearchQueryParams(CotSearchQueryParams):
+    """SEC Standard Industrial Classification Code (SIC) Query.
+    Source: https://sec.gov/
+    """
+    use_cache: Optional[bool] = Field(
+        default=True,
+        description="Whether or not to use cache.",
+    )
+class SecSicSearchData(Data):
+    """SEC Standard Industrial Classification Code (SIC) Data."""
+    __alias_dict__ = {
+        "sic": "SIC Code",
+        "industry": "Industry Title",
+        "office": "Office",
+    }
+    sic: int = Field(description="Sector Industrial Code (SIC)")
+    industry: str = Field(description="Industry title.")
+    office: str = Field(
+        description="Reporting office within the Corporate Finance Office"
+    )
+class SecSicSearchFetcher(
+    Fetcher[
+        SecSicSearchQueryParams,
+        List[SecSicSearchData],
+    ]
+):
+    """SEC SIC Search Fetcher."""
+    @staticmethod
+    def transform_query(
+        params: Dict[str, Any], **kwargs: Any
+    ) -> SecSicSearchQueryParams:
+        """Transform the query."""
+        return SecSicSearchQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecSicSearchQueryParams,
+        credentials: Optional[Dict[str, str]],
+        **kwargs: Any,
+    ) -> List[Dict]:
+        """Extract data from the SEC website table."""
+        # pylint: disable=import-outside-toplevel
+        from aiohttp_client_cache import SQLiteBackend
+        from aiohttp_client_cache.session import CachedSession
+        from openbb_core.app.utils import get_user_cache_directory
+        from openbb_core.provider.utils.helpers import amake_request
+        from openbb_sec.utils.helpers import SEC_HEADERS, sec_callback
+        from pandas import DataFrame, read_html
+        data = DataFrame()
+        results: List[Dict] = []
+        url = (
+            "https://www.sec.gov/corpfin/"
+            "division-of-corporation-finance-standard-industrial-classification-sic-code-list"
+        )
+        response: Union[dict, List[dict], str] = {}
+        if query.use_cache is True:
+            cache_dir = f"{get_user_cache_directory()}/http/sec_sic"
+            async with CachedSession(
+                cache=SQLiteBackend(cache_dir, expire_after=3600 * 24 * 30)
+            ) as session:
+                try:
+                    response = await amake_request(
+                        url, headers=SEC_HEADERS, session=session, response_callback=sec_callback  # type: ignore
+                    )
+                finally:
+                    await session.close()
+        else:
+            response = await amake_request(url, headers=SEC_HEADERS, response_callback=sec_callback)  # type: ignore
+        data = read_html(response)[0].astype(str)
+        if len(data) == 0:
+            return results
+        if query:
+            data = data[
+                data["SIC Code"].str.contains(query.query, case=False)
+                | data["Office"].str.contains(query.query, case=False)
+                | data["Industry Title"].str.contains(query.query, case=False)
+            ]
+        data["SIC Code"] = data["SIC Code"].astype(int)
+        results = data.to_dict("records")
+        return results
+    @staticmethod
+    def transform_data(
+        query: SecSicSearchQueryParams, data: List[Dict], **kwargs: Any
+    ) -> List[SecSicSearchData]:
+        """Transform the data."""
+        return [SecSicSearchData.model_validate(d) for d in data]

openbb_platform/providers/sec/openbb_sec/models/symbol_map.py ADDED Viewed

	@@ -0,0 +1,62 @@

+"""SEC Symbol Mapping Model."""
+# pylint: disable=unused-argument
+from typing import Any, Dict, Optional
+from openbb_core.app.model.abstract.error import OpenBBError
+from openbb_core.provider.abstract.data import Data
+from openbb_core.provider.abstract.fetcher import Fetcher
+from openbb_core.provider.standard_models.symbol_map import SymbolMapQueryParams
+from openbb_core.provider.utils.descriptions import DATA_DESCRIPTIONS
+from pydantic import Field
+class SecSymbolMapQueryParams(SymbolMapQueryParams):
+    """SEC Symbol Mapping Query.
+    Source: https://sec.gov/
+    """
+class SecSymbolMapData(Data):
+    """SEC symbol map Data."""
+    symbol: str = Field(description=DATA_DESCRIPTIONS.get("symbol", ""))
+class SecSymbolMapFetcher(
+    Fetcher[
+        SecSymbolMapQueryParams,
+        SecSymbolMapData,
+    ]
+):
+    """Transform the query, extract and transform the data from the SEC endpoints."""
+    @staticmethod
+    def transform_query(params: Dict[str, Any]) -> SecSymbolMapQueryParams:
+        """Transform the query."""
+        return SecSymbolMapQueryParams(**params)
+    @staticmethod
+    async def aextract_data(
+        query: SecSymbolMapQueryParams,
+        credentials: Optional[Dict[str, str]],
+        **kwargs: Any,
+    ) -> Dict:
+        """Return the raw data from the SEC endpoint."""
+        # pylint: disable=import-outside-toplevel
+        from openbb_sec.utils.helpers import cik_map
+        if not query.query.isdigit():
+            raise OpenBBError("Query is required and must be a valid CIK.")
+        symbol = await cik_map(int(query.query), query.use_cache)
+        response = {"symbol": symbol}
+        return response
+    @staticmethod
+    def transform_data(
+        query: SecSymbolMapQueryParams, data: Dict, **kwargs: Any
+    ) -> SecSymbolMapData:
+        """Transform the data to the standard format."""
+        return SecSymbolMapData.model_validate(data)

openbb_platform/providers/sec/openbb_sec/py.typed ADDED Viewed

File without changes

openbb_platform/providers/sec/openbb_sec/utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """SEC Utils."""

openbb_platform/providers/sec/openbb_sec/utils/definitions.py ADDED Viewed

	@@ -0,0 +1,1350 @@

+"""SEC Definitions and Models."""
+# pylint: disable=too-many-lines
+from typing import Dict, Literal
+QUARTERS = Literal[1, 2, 3, 4]
+SEC_HEADERS: Dict[str, str] = {
+    "User-Agent": "my real company name definitelynot@fakecompany.com",
+    "Accept-Encoding": "gzip, deflate",
+    "Host": "www.sec.gov",
+}
+# Some endpoints don't like the Host header.
+HEADERS: Dict[str, str] = {
+    "User-Agent": "my real company name definitelynot@fakecompany.com",
+    "Accept-Encoding": "gzip, deflate",
+}
+FORM_TYPES = Literal[
+    "1",
+    "1-A",
+    "1-A_POS",
+    "1-A-W",
+    "1-E",
+    "1-E_AD",
+    "1-K",
+    "1-SA",
+    "1-U",
+    "1-Z",
+    "1-Z-W",
+    "10-12B",
+    "10-12G",
+    "10-D",
+    "10-K",
+    "10-KT",
+    "10-Q",
+    "10-QT",
+    "11-K",
+    "11-KT",
+    "13F-HR",
+    "13F-NT",
+    "13FCONP",
+    "144",
+    "15-12B",
+    "15-12G",
+    "15-15D",
+    "15F-12B",
+    "15F-12G",
+    "15F-15D",
+    "18-12B",
+    "18-K",
+    "19B-4E",
+    "2-A",
+    "2-AF",
+    "2-E",
+    "20-F",
+    "20FR12B",
+    "20FR12G",
+    "24F-2NT",
+    "25",
+    "25-NSE",
+    "253G1",
+    "253G2",
+    "253G3",
+    "253G4",
+    "3",
+    "305B2",
+    "34-12H",
+    "4",
+    "40-17F1",
+    "40-17F2",
+    "40-17G",
+    "40-17GCS",
+    "40-202A",
+    "40-203A",
+    "40-206A",
+    "40-24B2",
+    "40-33",
+    "40-6B",
+    "40-8B25",
+    "40-8F-2",
+    "40-APP",
+    "40-F",
+    "40-OIP",
+    "40FR12B",
+    "40FR12G",
+    "424A",
+    "424B1",
+    "424B2",
+    "424B3",
+    "424B4",
+    "424B5",
+    "424B7",
+    "424B8",
+    "424H",
+    "425",
+    "485APOS",
+    "485BPOS",
+    "485BXT",
+    "486APOS",
+    "486BPOS",
+    "486BXT",
+    "487",
+    "497",
+    "497AD",
+    "497H2",
+    "497J",
+    "497K",
+    "497VPI",
+    "497VPU",
+    "5",
+    "6-K",
+    "6B_NTC",
+    "6B_ORDR",
+    "8-A12B",
+    "8-A12G",
+    "8-K",
+    "8-K12B",
+    "8-K12G3",
+    "8-K15D5",
+    "8-M",
+    "8F-2_NTC",
+    "8F-2_ORDR",
+    "9-M",
+    "ABS-15G",
+    "ABS-EE",
+    "ADN-MTL",
+    "ADV-E",
+    "ADV-H-C",
+    "ADV-H-T",
+    "ADV-NR",
+    "ANNLRPT",
+    "APP_NTC",
+    "APP_ORDR",
+    "APP_WD",
+    "APP_WDG",
+    "ARS",
+    "ATS-N",
+    "ATS-N-C",
+    "ATS-N/UA",
+    "AW",
+    "AW_WD",
+    "C",
+    "C-AR",
+    "C-AR-W",
+    "C-TR",
+    "C-TR-W",
+    "C-U",
+    "C-U-W",
+    "C-W",
+    "CB",
+    "CERT",
+    "CERTARCA",
+    "CERTBATS",
+    "CERTCBO",
+    "CERTNAS",
+    "CERTNYS",
+    "CERTPAC",
+    "CFPORTAL",
+    "CFPORTAL-W",
+    "CORRESP",
+    "CT_ORDER",
+    "D",
+    "DEF_14A",
+    "DEF_14C",
+    "DEFA14A",
+    "DEFA14C",
+    "DEFC14A",
+    "DEFC14C",
+    "DEFM14A",
+    "DEFM14C",
+    "DEFN14A",
+    "DEFR14A",
+    "DEFR14C",
+    "DEL_AM",
+    "DFAN14A",
+    "DFRN14A",
+    "DOS",
+    "DOSLTR",
+    "DRS",
+    "DRSLTR",
+    "DSTRBRPT",
+    "EFFECT",
+    "F-1",
+    "F-10",
+    "F-10EF",
+    "F-10POS",
+    "F-1MEF",
+    "F-3",
+    "F-3ASR",
+    "F-3D",
+    "F-3DPOS",
+    "F-3MEF",
+    "F-4",
+    "F-4_POS",
+    "F-4MEF",
+    "F-6",
+    "F-6_POS",
+    "F-6EF",
+    "F-7",
+    "F-7_POS",
+    "F-8",
+    "F-8_POS",
+    "F-80",
+    "F-80POS",
+    "F-9",
+    "F-9_POS",
+    "F-N",
+    "F-X",
+    "FOCUSN",
+    "FWP",
+    "G-405",
+    "G-405N",
+    "G-FIN",
+    "G-FINW",
+    "IRANNOTICE",
+    "MA",
+    "MA-A",
+    "MA-I",
+    "MA-W",
+    "MSD",
+    "MSDCO",
+    "MSDW",
+    "N-1",
+    "N-14",
+    "N-14_8C",
+    "N-14MEF",
+    "N-18F1",
+    "N-1A",
+    "N-2",
+    "N-2_POSASR",
+    "N-23C-2",
+    "N-23C3A",
+    "N-23C3B",
+    "N-23C3C",
+    "N-2ASR",
+    "N-2MEF",
+    "N-30B-2",
+    "N-30D",
+    "N-4",
+    "N-5",
+    "N-54A",
+    "N-54C",
+    "N-6",
+    "N-6F",
+    "N-8A",
+    "N-8B-2",
+    "N-8F",
+    "N-8F_NTC",
+    "N-8F_ORDR",
+    "N-CEN",
+    "N-CR",
+    "N-CSR",
+    "N-CSRS",
+    "N-MFP",
+    "N-MFP1",
+    "N-MFP2",
+    "N-PX",
+    "N-Q",
+    "N-VP",
+    "N-VPFS",
+    "NO_ACT",
+    "NPORT-EX",
+    "NPORT-NP",
+    "NPORT-P",
+    "NRSRO-CE",
+    "NRSRO-UPD",
+    "NSAR-A",
+    "NSAR-AT",
+    "NSAR-B",
+    "NSAR-BT",
+    "NSAR-U",
+    "NT_10-D",
+    "NT_10-K",
+    "NT_10-Q",
+    "NT_11-K",
+    "NT_20-F",
+    "NT_N-CEN",
+    "NT_N-MFP",
+    "NT_N-MFP1",
+    "NT_N-MFP2",
+    "NT_NPORT-EX",
+    "NT_NPORT-P",
+    "NT-NCEN",
+    "NT-NCSR",
+    "NT-NSAR",
+    "NTFNCEN",
+    "NTFNCSR",
+    "NTFNSAR",
+    "NTN_10D",
+    "NTN_10K",
+    "NTN_10Q",
+    "NTN_20F",
+    "OIP_NTC",
+    "OIP_ORDR",
+    "POS_8C",
+    "POS_AM",
+    "POS_AMI",
+    "POS_EX",
+    "POS462B",
+    "POS462C",
+    "POSASR",
+    "PRE_14A",
+    "PRE_14C",
+    "PREC14A",
+    "PREC14C",
+    "PREM14A",
+    "PREM14C",
+    "PREN14A",
+    "PRER14A",
+    "PRER14C",
+    "PRRN14A",
+    "PX14A6G",
+    "PX14A6N",
+    "QRTLYRPT",
+    "QUALIF",
+    "REG-NR",
+    "REVOKED",
+    "RW",
+    "RW_WD",
+    "S-1",
+    "S-11",
+    "S-11MEF",
+    "S-1MEF",
+    "S-20",
+    "S-3",
+    "S-3ASR",
+    "S-3D",
+    "S-3DPOS",
+    "S-3MEF",
+    "S-4",
+    "S-4_POS",
+    "S-4EF",
+    "S-4MEF",
+    "S-6",
+    "S-8",
+    "S-8_POS",
+    "S-B",
+    "S-BMEF",
+    "SBSE",
+    "SBSE-A",
+    "SBSE-BD",
+    "SBSE-C",
+    "SBSE-W",
+    "SC_13D",
+    "SC_13E1",
+    "SC_13E3",
+    "SC_13G",
+    "SC_14D9",
+    "SC_14F1",
+    "SC_14N",
+    "SC_TO-C",
+    "SC_TO-I",
+    "SC_TO-T",
+    "SC13E4F",
+    "SC14D1F",
+    "SC14D9C",
+    "SC14D9F",
+    "SD",
+    "SDR",
+    "SE",
+    "SEC_ACTION",
+    "SEC_STAFF_ACTION",
+    "SEC_STAFF_LETTER",
+    "SF-1",
+    "SF-3",
+    "SL",
+    "SP_15D2",
+    "STOP_ORDER",
+    "SUPPL",
+    "T-3",
+    "TA-1",
+    "TA-2",
+    "TA-W",
+    "TACO",
+    "TH",
+    "TTW",
+    "UNDER",
+    "UPLOAD",
+    "WDL-REQ",
+    "X-17A-5",
+]
+FORM_LIST = [
+    "1",
+    "1-A",
+    "1-A_POS",
+    "1-A-W",
+    "1-E",
+    "1-E_AD",
+    "1-K",
+    "1-SA",
+    "1-U",
+    "1-Z",
+    "1-Z-W",
+    "10-12B",
+    "10-12G",
+    "10-D",
+    "10-K",
+    "10-KT",
+    "10-Q",
+    "10-QT",
+    "11-K",
+    "11-KT",
+    "13F-HR",
+    "13F-NT",
+    "13FCONP",
+    "144",
+    "15-12B",
+    "15-12G",
+    "15-15D",
+    "15F-12B",
+    "15F-12G",
+    "15F-15D",
+    "18-12B",
+    "18-K",
+    "19B-4E",
+    "2-A",
+    "2-AF",
+    "2-E",
+    "20-F",
+    "20FR12B",
+    "20FR12G",
+    "24F-2NT",
+    "25",
+    "25-NSE",
+    "253G1",
+    "253G2",
+    "253G3",
+    "253G4",
+    "3",
+    "305B2",
+    "34-12H",
+    "4",
+    "40-17F1",
+    "40-17F2",
+    "40-17G",
+    "40-17GCS",
+    "40-202A",
+    "40-203A",
+    "40-206A",
+    "40-24B2",
+    "40-33",
+    "40-6B",
+    "40-8B25",
+    "40-8F-2",
+    "40-APP",
+    "40-F",
+    "40-OIP",
+    "40FR12B",
+    "40FR12G",
+    "424A",
+    "424B1",
+    "424B2",
+    "424B3",
+    "424B4",
+    "424B5",
+    "424B7",
+    "424B8",
+    "424H",
+    "425",
+    "485APOS",
+    "485BPOS",
+    "485BXT",
+    "486APOS",
+    "486BPOS",
+    "486BXT",
+    "487",
+    "497",
+    "497AD",
+    "497H2",
+    "497J",
+    "497K",
+    "497VPI",
+    "497VPU",
+    "5",
+    "6-K",
+    "6B_NTC",
+    "6B_ORDR",
+    "8-A12B",
+    "8-A12G",
+    "8-K",
+    "8-K12B",
+    "8-K12G3",
+    "8-K15D5",
+    "8-M",
+    "8F-2_NTC",
+    "8F-2_ORDR",
+    "9-M",
+    "ABS-15G",
+    "ABS-EE",
+    "ADN-MTL",
+    "ADV-E",
+    "ADV-H-C",
+    "ADV-H-T",
+    "ADV-NR",
+    "ANNLRPT",
+    "APP_NTC",
+    "APP_ORDR",
+    "APP_WD",
+    "APP_WDG",
+    "ARS",
+    "ATS-N",
+    "ATS-N-C",
+    "ATS-N/UA",
+    "AW",
+    "AW_WD",
+    "C",
+    "C-AR",
+    "C-AR-W",
+    "C-TR",
+    "C-TR-W",
+    "C-U",
+    "C-U-W",
+    "C-W",
+    "CB",
+    "CERT",
+    "CERTARCA",
+    "CERTBATS",
+    "CERTCBO",
+    "CERTNAS",
+    "CERTNYS",
+    "CERTPAC",
+    "CFPORTAL",
+    "CFPORTAL-W",
+    "CORRESP",
+    "CT_ORDER",
+    "D",
+    "DEF_14A",
+    "DEF_14C",
+    "DEFA14A",
+    "DEFA14C",
+    "DEFC14A",
+    "DEFC14C",
+    "DEFM14A",
+    "DEFM14C",
+    "DEFN14A",
+    "DEFR14A",
+    "DEFR14C",
+    "DEL_AM",
+    "DFAN14A",
+    "DFRN14A",
+    "DOS",
+    "DOSLTR",
+    "DRS",
+    "DRSLTR",
+    "DSTRBRPT",
+    "EFFECT",
+    "F-1",
+    "F-10",
+    "F-10EF",
+    "F-10POS",
+    "F-1MEF",
+    "F-3",
+    "F-3ASR",
+    "F-3D",
+    "F-3DPOS",
+    "F-3MEF",
+    "F-4",
+    "F-4_POS",
+    "F-4MEF",
+    "F-6",
+    "F-6_POS",
+    "F-6EF",
+    "F-7",
+    "F-7_POS",
+    "F-8",
+    "F-8_POS",
+    "F-80",
+    "F-80POS",
+    "F-9",
+    "F-9_POS",
+    "F-N",
+    "F-X",
+    "FOCUSN",
+    "FWP",
+    "G-405",
+    "G-405N",
+    "G-FIN",
+    "G-FINW",
+    "IRANNOTICE",
+    "MA",
+    "MA-A",
+    "MA-I",
+    "MA-W",
+    "MSD",
+    "MSDCO",
+    "MSDW",
+    "N-1",
+    "N-14",
+    "N-14_8C",
+    "N-14MEF",
+    "N-18F1",
+    "N-1A",
+    "N-2",
+    "N-2_POSASR",
+    "N-23C-2",
+    "N-23C3A",
+    "N-23C3B",
+    "N-23C3C",
+    "N-2ASR",
+    "N-2MEF",
+    "N-30B-2",
+    "N-30D",
+    "N-4",
+    "N-5",
+    "N-54A",
+    "N-54C",
+    "N-6",
+    "N-6F",
+    "N-8A",
+    "N-8B-2",
+    "N-8F",
+    "N-8F_NTC",
+    "N-8F_ORDR",
+    "N-CEN",
+    "N-CR",
+    "N-CSR",
+    "N-CSRS",
+    "N-MFP",
+    "N-MFP1",
+    "N-MFP2",
+    "N-PX",
+    "N-Q",
+    "N-VP",
+    "N-VPFS",
+    "NO_ACT",
+    "NPORT-EX",
+    "NPORT-NP",
+    "NPORT-P",
+    "NRSRO-CE",
+    "NRSRO-UPD",
+    "NSAR-A",
+    "NSAR-AT",
+    "NSAR-B",
+    "NSAR-BT",
+    "NSAR-U",
+    "NT_10-D",
+    "NT_10-K",
+    "NT_10-Q",
+    "NT_11-K",
+    "NT_20-F",
+    "NT_N-CEN",
+    "NT_N-MFP",
+    "NT_N-MFP1",
+    "NT_N-MFP2",
+    "NT_NPORT-EX",
+    "NT_NPORT-P",
+    "NT-NCEN",
+    "NT-NCSR",
+    "NT-NSAR",
+    "NTFNCEN",
+    "NTFNCSR",
+    "NTFNSAR",
+    "NTN_10D",
+    "NTN_10K",
+    "NTN_10Q",
+    "NTN_20F",
+    "OIP_NTC",
+    "OIP_ORDR",
+    "POS_8C",
+    "POS_AM",
+    "POS_AMI",
+    "POS_EX",
+    "POS462B",
+    "POS462C",
+    "POSASR",
+    "PRE_14A",
+    "PRE_14C",
+    "PREC14A",
+    "PREC14C",
+    "PREM14A",
+    "PREM14C",
+    "PREN14A",
+    "PRER14A",
+    "PRER14C",
+    "PRRN14A",
+    "PX14A6G",
+    "PX14A6N",
+    "QRTLYRPT",
+    "QUALIF",
+    "REG-NR",
+    "REVOKED",
+    "RW",
+    "RW_WD",
+    "S-1",
+    "S-11",
+    "S-11MEF",
+    "S-1MEF",
+    "S-20",
+    "S-3",
+    "S-3ASR",
+    "S-3D",
+    "S-3DPOS",
+    "S-3MEF",
+    "S-4",
+    "S-4_POS",
+    "S-4EF",
+    "S-4MEF",
+    "S-6",
+    "S-8",
+    "S-8_POS",
+    "S-B",
+    "S-BMEF",
+    "SBSE",
+    "SBSE-A",
+    "SBSE-BD",
+    "SBSE-C",
+    "SBSE-W",
+    "SC_13D",
+    "SC_13E1",
+    "SC_13E3",
+    "SC_13G",
+    "SC_14D9",
+    "SC_14F1",
+    "SC_14N",
+    "SC_TO-C",
+    "SC_TO-I",
+    "SC_TO-T",
+    "SC13E4F",
+    "SC14D1F",
+    "SC14D9C",
+    "SC14D9F",
+    "SD",
+    "SDR",
+    "SE",
+    "SEC_ACTION",
+    "SEC_STAFF_ACTION",
+    "SEC_STAFF_LETTER",
+    "SF-1",
+    "SF-3",
+    "SL",
+    "SP_15D2",
+    "STOP_ORDER",
+    "SUPPL",
+    "T-3",
+    "TA-1",
+    "TA-2",
+    "TA-W",
+    "TACO",
+    "TH",
+    "TTW",
+    "UNDER",
+    "UPLOAD",
+    "WDL-REQ",
+    "X-17A-5",
+]
+TAXONOMIES = Literal["us-gaap", "dei", "ifrs-full", "srt"]
+FACTS = [
+    "AccountsPayableCurrent",
+    "AccountsReceivableNet",
+    "AccountsReceivableNetCurrent",
+    "AccrualForTaxesOtherThanIncomeTaxesCurrent",
+    "AccrualForTaxesOtherThanIncomeTaxesCurrentAndNoncurrent",
+    "AccruedIncomeTaxesCurrent",
+    "AccruedIncomeTaxesNoncurrent",
+    "AccruedInsuranceCurrent",
+    "AccruedLiabilitiesCurrent",
+    "AccumulatedDepreciationDepletionAndAmortizationPropertyPlantAndEquipment",
+    "AccumulatedOtherComprehensiveIncomeLossNetOfTax",
+    "AcquisitionsNetOfCashAcquiredAndPurchasesOfIntangibleAndOtherAssets",
+    "AdvertisingExpense",
+    "AdjustmentsToAdditionalPaidInCapitalSharebasedCompensationRequisiteServicePeriodRecognitionValue",
+    "AllocatedShareBasedCompensationExpense",
+    "AntidilutiveSecuritiesExcludedFromComputationOfEarningsPerShareAmount",
+    "Assets",
+    "AssetsCurrent",
+    "AssetsNoncurrent",
+    "NoncurrentAssets",
+    "AssetImpairmentCharges",
+    "BuildingsAndImprovementsGross",
+    "CapitalLeaseObligationsCurrent",
+    "CapitalLeaseObligationsNoncurrent",
+    "Cash",
+    "CashAndCashEquivalentsAtCarryingValue",
+    "CashCashEquivalentsAndShortTermInvestments",
+    "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents",
+    "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalentsIncludingDisposalGroupAndDiscontinuedOperations",
+    "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalentsPeriodIncreaseDecreaseIncludingExchangeRateEffect",
+    "CommitmentsAndContingencies",
+    "CommercialPaper",
+    "CommonStockDividendsPerShareDeclared",
+    "CommonStockDividendsPerShareCashPaid",
+    "CommonStocksIncludingAdditionalPaidInCapital",
+    "ComprehensiveIncomeNetOfTax",
+    "ComprehensiveIncomeNetOfTaxAttributableToNoncontrollingInterest",
+    "ComprehensiveIncomeNetOfTaxIncludingPortionAttributableToNoncontrollingInterest",
+    "ConstructionInProgressGross",
+    "ContractWithCustomerAssetNet",
+    "ContractWithCustomerLiability",
+    "ContractWithCustomerLiabilityCurrent",
+    "ContractWithCustomerLiabilityNoncurrent",
+    "CostOfRevenue",
+    "CostOfGoodsAndServicesSold",
+    "CurrentFederalTaxExpenseBenefit",
+    "CurrentForeignTaxExpenseBenefit",
+    "CurrentIncomeTaxExpenseBenefit",
+    "CurrentStateAndLocalTaxExpenseBenefit",
+    "DebtInstrumentFaceAmount",
+    "DebtInstrumentFairValue",
+    "DebtLongtermAndShorttermCombinedAmount",
+    "DeferredFederalIncomeTaxExpenseBenefit",
+    "DeferredForeignIncomeTaxExpenseBenefit",
+    "DeferredIncomeTaxExpenseBenefit",
+    "DeferredIncomeTaxesAndTaxCredits",
+    "DeferredIncomeTaxLiabilities",
+    "DeferredIncomeTaxLiabilitiesNet",
+    "DeferredRevenue",
+    "DeferredTaxAssetsGross",
+    "DeferredTaxAssetsLiabilitiesNet",
+    "DeferredTaxAssetsNet",
+    "DeferredTaxLiabilities",
+    "DefinedContributionPlanCostRecognized",
+    "Depreciation",
+    "DepreciationAmortizationAndAccretionNet",
+    "DepreciationAmortizationAndOther",
+    "DepreciationAndAmortization",
+    "DepreciationDepletionAndAmortization",
+    "DerivativeCollateralObligationToReturnCash",
+    "DerivativeCollateralRightToReclaimCash",
+    "DerivativeFairValueOfDerivativeNet",
+    "DerivativeLiabilityCollateralRightToReclaimCashOffset",
+    "DerivativeNotionalAmount",
+    "Dividends",
+    "DividendsCash",
+    "DividendsPayableAmountPerShare",
+    "DividendsPayableCurrent",
+    "DistributedEarnings",
+    "EarningsPerShareBasic",
+    "EarningsPerShareDiluted",
+    "EffectOfExchangeRateOnCashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents",
+    "EffectOfExchangeRateOnCashCashEquivalentsRestrictedCashAndRestrictedCashEquivalentsIncludingDisposalGroupAndDiscontinuedOperations",
+    "EmployeeRelatedLiabilitiesCurrent",
+    "EmployeeRelatedLiabilitiesCurrentAndNoncurrent",
+    "EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense",
+    "FinanceLeaseInterestExpense",
+    "FinanceLeaseInterestPaymentOnLiability",
+    "FinanceLeaseLiability",
+    "FinanceLeaseLiabilityCurrent",
+    "FinanceLeaseLiabilityNoncurrent",
+    "FinanceLeaseLiabilityPaymentsDue",
+    "FinanceLeaseLiabilityPaymentsDueAfterYearFive",
+    "FinanceLeaseLiabilityPaymentsDueNextTwelveMonths",
+    "FinanceLeaseLiabilityPaymentsDueYearFive",
+    "FinanceLeaseLiabilityPaymentsDueYearFour",
+    "FinanceLeaseLiabilityPaymentsDueYearThree",
+    "FinanceLeaseLiabilityPaymentsDueYearTwo",
+    "FinanceLeaseLiabilityPaymentsRemainderOfFiscalYear",
+    "FinanceLeaseLiabilityUndiscountedExcessAmount",
+    "FinanceLeasePrincipalPayments",
+    "FinanceLeaseRightOfUseAsset",
+    "FinancingReceivableAllowanceForCreditLosses",
+    "FiniteLivedIntangibleAssetsNet",
+    "FixturesAndEquipmentGross",
+    "GainLossOnInvestments",
+    "GainLossOnInvestmentsAndDerivativeInstruments",
+    "GainLossOnSaleOfBusiness",
+    "GainsLossesOnExtinguishmentOfDebt",
+    "GeneralAndAdministrativeExpense",
+    "Goodwill",
+    "GrossProfit",
+    "ImpairmentOfIntangibleAssetsExcludingGoodwill",
+    "ImpairmentOfIntangibleAssetsIndefinitelivedExcludingGoodwill",
+    "IncomeLossFromContinuingOperations",
+    "IncomeLossFromContinuingOperationsAttributableToNoncontrollingEntity",
+    "IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest",
+    "IncomeLossFromContinuingOperationsPerBasicShare",
+    "IncomeLossFromContinuingOperationsPerDilutedShare",
+    "InterestAndDebtExpense",
+    "IncomeTaxExpenseBenefit",
+    "IncomeTaxesPaid",
+    "IncomeTaxesPaidNet",
+    "IncreaseDecreaseInAccountsAndOtherReceivables",
+    "IncreaseDecreaseInAccountsPayable",
+    "IncreaseDecreaseInAccountsReceivable",
+    "IncreaseDecreaseInAccruedLiabilities",
+    "IncreaseDecreaseInAccruedIncomeTaxesPayable",
+    "IncreaseDecreaseInAccruedTaxesPayable",
+    "IncreaseDecreaseInContractWithCustomerLiability",
+    "IncreaseDecreaseInDeferredIncomeTaxes",
+    "IncreaseDecreaseInInventories",
+    "IncreaseDecreaseInOtherCurrentAssets",
+    "IncreaseDecreaseInOtherCurrentLiabilities",
+    "IncreaseDecreaseInOtherNoncurrentAssets",
+    "IncreaseDecreaseInOtherNoncurrentLiabilities",
+    "IncreaseDecreaseInPensionPlanObligations",
+    "IncrementalCommonSharesAttributableToShareBasedPaymentArrangements",
+    "InterestExpenseDebt",
+    "InterestIncomeExpenseNet",
+    "InterestPaid",
+    "InterestPaidNet",
+    "InventoryNet",
+    "InvestmentIncomeInterest",
+    "Land",
+    "LeaseAndRentalExpense",
+    "LesseeOperatingLeaseLiabilityPaymentsDue",
+    "LesseeOperatingLeaseLiabilityPaymentsDueAfterYearFive",
+    "LesseeOperatingLeaseLiabilityPaymentsDueNextTwelveMonths",
+    "LesseeOperatingLeaseLiabilityPaymentsDueYearFive",
+    "LesseeOperatingLeaseLiabilityPaymentsDueYearFour",
+    "LesseeOperatingLeaseLiabilityPaymentsDueYearThree",
+    "LesseeOperatingLeaseLiabilityPaymentsDueYearTwo",
+    "LesseeOperatingLeaseLiabilityPaymentsRemainderOfFiscalYear",
+    "LettersOfCreditOutstandingAmount",
+    "Liabilities",
+    "LiabilitiesAndStockholdersEquity",
+    "LiabilitiesCurrent",
+    "LineOfCredit",
+    "LineOfCreditFacilityMaximumBorrowingCapacity",
+    "LongTermDebt",
+    "LongTermDebtCurrent",
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalAfterYearFive",
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalInNextTwelveMonths",  # pragma: allowlist secret
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearFive",
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearFour",
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearThree",
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearTwo",
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalRemainderOfFiscalYear",
+    "LongTermDebtNoncurrent",
+    "LongTermInvestments",
+    "LossContingencyEstimateOfPossibleLoss",
+    "MachineryAndEquipmentGross",
+    "MarketableSecuritiesCurrent",
+    "MarketableSecuritiesNoncurrent",
+    "MinorityInterest",
+    "NetCashProvidedByUsedInFinancingActivities",
+    "NetCashProvidedByUsedInInvestingActivities",
+    "NetCashProvidedByUsedInOperatingActivities",
+    "NetIncomeLoss",
+    "NetIncomeLossAttributableToNoncontrollingInterest",
+    "NetIncomeLossAttributableToNonredeemableNoncontrollingInterest",
+    "NetIncomeLossAttributableToRedeemableNoncontrollingInterest",
+    "NoncurrentAssets",
+    "NonoperatingIncomeExpense",
+    "NoninterestIncome",
+    "NotesReceivableNet",
+    "OperatingExpenses",
+    "OperatingIncomeLoss",
+    "OperatingLeaseCost",
+    "OperatingLeaseLiability",
+    "OperatingLeaseLiabilityCurrent",
+    "OperatingLeaseLiabilityNoncurrent",
+    "OperatingLeaseRightOfUseAsset",
+    "OtherAccruedLiabilitiesCurrent",
+    "OtherAssetsCurrent",
+    "OtherAssetsNoncurrent",
+    "OtherComprehensiveIncomeLossAvailableForSaleSecuritiesAdjustmentNetOfTax",
+    "OtherComprehensiveIncomeLossCashFlowHedgeGainLossAfterReclassificationAndTax",
+    "OtherComprehensiveIncomeLossDerivativeInstrumentGainLossafterReclassificationandTax",
+    "OtherComprehensiveIncomeLossDerivativeInstrumentGainLossbeforeReclassificationafterTax",
+    "OtherComprehensiveIncomeLossForeignCurrencyTransactionAndTranslationAdjustmentNetOfTax",
+    "OtherComprehensiveIncomeLossNetOfTax",
+    "OtherComprehensiveIncomeLossNetOfTaxPortionAttributableToParent",
+    "OtherComprehensiveIncomeUnrealizedHoldingGainLossOnSecuritiesArisingDuringPeriodNetOfTax",
+    "OtherIncome",
+    "OtherLiabilitiesCurrent",
+    "OtherLiabilitiesNoncurrent",
+    "OtherLongTermDebt",
+    "OtherNoncashIncomeExpense",
+    "PaymentsForCapitalImprovements",
+    "PaymentsOfDividends",
+    "PaymentsOfDividendsMinorityInterest",
+    "PaymentsForProceedsFromBusinessesAndInterestInAffiliates",
+    "PaymentsForProceedsFromOtherInvestingActivities",
+    "PaymentsForRent",
+    "PaymentsForRepurchaseOfCommonStock",
+    "PaymentsOfDebtExtinguishmentCosts",
+    "PaymentsToAcquireInvestments",
+    "PaymentsToAcquirePropertyPlantAndEquipment",
+    "PreferredStockSharesOutstanding",
+    "PreferredStockValue",
+    "PrepaidExpenseAndOtherAssetsCurrent",
+    "PrepaidExpenseCurrent",
+    "ProceedsFromDebtMaturingInMoreThanThreeMonths",
+    "ProceedsFromDebtNetOfIssuanceCosts",
+    "ProceedsFromDivestitureOfBusinesses",
+    "ProceedsFromInvestments",
+    "ProceedsFromIssuanceOfCommonStock",
+    "ProceedsFromIssuanceOfDebt",
+    "ProceedsFromIssuanceOfLongTermDebt",
+    "ProceedsFromIssuanceOfUnsecuredDebt",
+    "ProceedsFromIssuanceOrSaleOfEquity",
+    "ProceedsFromMaturitiesPrepaymentsAndCallsOfAvailableForSaleSecurities",
+    "ProceedsFromPaymentsForOtherFinancingActivities",
+    "ProceedsFromPaymentsToMinorityShareholders",
+    "ProceedsFromRepaymentsOfShortTermDebt",
+    "ProceedsFromRepaymentsOfShortTermDebtMaturingInThreeMonthsOrLess",
+    "ProceedsFromSaleOfPropertyPlantAndEquipment",
+    "ProceedsFromStockOptionsExercised",
+    "ProfitLoss",
+    "PropertyPlantAndEquipmentGross",
+    "PropertyPlantAndEquipmentNet",
+    "ReceivablesNetCurrent",
+    "RedeemableNoncontrollingInterestEquityCarryingAmount",
+    "RepaymentsOfDebtMaturingInMoreThanThreeMonths",
+    "RepaymentsOfLongTermDebt",
+    "ResearchAndDevelopmentExpense",
+    "RestrictedCash",
+    "RestrictedCashAndCashEquivalents",
+    "RestrictedStockExpense",
+    "RestructuringCharges",
+    "RetainedEarningsAccumulatedDeficit",
+    "Revenues",
+    "RevenueFromContractWithCustomerExcludingAssessedTax",
+    "SecuredLongTermDebt",
+    "SellingAndMarketingExpense",
+    "SellingGeneralAndAdministrativeExpense",
+    "ShareBasedCompensation",
+    "ShortTermBorrowings",
+    "ShortTermInvestments",
+    "StockholdersEquity",
+    "StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest",
+    "StockholdersEquityOther",
+    "StockIssuedDuringPeriodValueNewIssues",
+    "StockOptionPlanExpense",
+    "StockRedeemedOrCalledDuringPeriodValue",
+    "StockRepurchasedDuringPeriodValue",
+    "StockRepurchasedAndRetiredDuringPeriodValue",
+    "TaxesPayableCurrent",
+    "TradingSecuritiesDebt",
+    "TreasuryStockAcquiredAverageCostPerShare",
+    "TreasuryStockSharesAcquired",
+    "UnrealizedGainLossOnInvestments",
+    "UnrecognizedTaxBenefits",
+    "UnsecuredDebt",
+    "VariableLeaseCost",
+    "WeightedAverageNumberOfDilutedSharesOutstanding",
+    "WeightedAverageNumberOfSharesOutstandingBasic",
+    "WeightedAverageNumberDilutedSharesOutstandingAdjustment",
+]
+USD_PER_SHARE_FACTS = [
+    "EarningsPerShareBasic",
+    "EarningsPerShareDiluted",
+    "TreasuryStockAcquiredAverageCostPerShare",
+    "CommonStockDividendsPerShareDeclared",
+    "CommonStockDividendsPerShareCashPaid",
+    "DividendsPayableAmountPerShare",
+    "IncomeLossFromContinuingOperationsPerBasicShare",
+    "IncomeLossFromContinuingOperationsPerDilutedShare",
+]
+SHARES_FACTS = [
+    "WeightedAverageNumberOfDilutedSharesOutstanding",
+    "WeightedAverageNumberOfSharesOutstandingBasic",
+    "WeightedAverageNumberDilutedSharesOutstandingAdjustment",
+    "AntidilutiveSecuritiesExcludedFromComputationOfEarningsPerShareAmount",
+    "IncrementalCommonSharesAttributableToShareBasedPaymentArrangements",
+    "TreasuryStockSharesAcquired",
+    "PreferredStockSharesOutstanding",
+]
+FACT_CHOICES = Literal[  # pylint: disable=C0103
+    "AccountsPayableCurrent",
+    "AccountsReceivableNet",
+    "AccountsReceivableNetCurrent",
+    "AccrualForTaxesOtherThanIncomeTaxesCurrent",
+    "AccrualForTaxesOtherThanIncomeTaxesCurrentAndNoncurrent",
+    "AccruedIncomeTaxesCurrent",
+    "AccruedIncomeTaxesNoncurrent",
+    "AccruedInsuranceCurrent",
+    "AccruedLiabilitiesCurrent",
+    "AccumulatedDepreciationDepletionAndAmortizationPropertyPlantAndEquipment",
+    "AccumulatedOtherComprehensiveIncomeLossNetOfTax",
+    "AcquisitionsNetOfCashAcquiredAndPurchasesOfIntangibleAndOtherAssets",
+    "AdjustmentsToAdditionalPaidInCapitalSharebasedCompensationRequisiteServicePeriodRecognitionValue",
+    "AdvertisingExpense",
+    "AllocatedShareBasedCompensationExpense",
+    "AntidilutiveSecuritiesExcludedFromComputationOfEarningsPerShareAmount",
+    "Assets",
+    "AssetsCurrent",
+    "AssetsNoncurrent",
+    "NoncurrentAssets",
+    "AssetImpairmentCharges",
+    "BuildingsAndImprovementsGross",
+    "CapitalLeaseObligationsCurrent",
+    "CapitalLeaseObligationsNoncurrent",
+    "Cash",
+    "CashAndCashEquivalentsAtCarryingValue",
+    "CashCashEquivalentsAndShortTermInvestments",
+    "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents",
+    "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalentsIncludingDisposalGroupAndDiscontinuedOperations",
+    "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalentsPeriodIncreaseDecreaseIncludingExchangeRateEffect",
+    "CommitmentsAndContingencies",
+    "CommercialPaper",
+    "CommonStockDividendsPerShareDeclared",
+    "CommonStockDividendsPerShareCashPaid",
+    "CommonStocksIncludingAdditionalPaidInCapital",
+    "ComprehensiveIncomeNetOfTax",
+    "ComprehensiveIncomeNetOfTaxAttributableToNoncontrollingInterest",
+    "ComprehensiveIncomeNetOfTaxIncludingPortionAttributableToNoncontrollingInterest",
+    "ConstructionInProgressGross",
+    "ContractWithCustomerAssetNet",
+    "ContractWithCustomerLiability",
+    "ContractWithCustomerLiabilityCurrent",
+    "ContractWithCustomerLiabilityNoncurrent",
+    "CostOfRevenue",
+    "CostOfGoodsAndServicesSold",
+    "CurrentFederalTaxExpenseBenefit",
+    "CurrentForeignTaxExpenseBenefit",
+    "CurrentIncomeTaxExpenseBenefit",
+    "CurrentStateAndLocalTaxExpenseBenefit",
+    "DebtInstrumentFaceAmount",
+    "DebtInstrumentFairValue",
+    "DebtLongtermAndShorttermCombinedAmount",
+    "DeferredFederalIncomeTaxExpenseBenefit",
+    "DeferredForeignIncomeTaxExpenseBenefit",
+    "DeferredIncomeTaxExpenseBenefit",
+    "DeferredIncomeTaxesAndTaxCredits",
+    "DeferredIncomeTaxLiabilities",
+    "DeferredIncomeTaxLiabilitiesNet",
+    "DeferredRevenue",
+    "DeferredTaxAssetsGross",
+    "DeferredTaxAssetsLiabilitiesNet",
+    "DeferredTaxAssetsNet",
+    "DeferredTaxLiabilities",
+    "DefinedContributionPlanCostRecognized",
+    "Depreciation",
+    "DepreciationAmortizationAndAccretionNet",
+    "DepreciationAmortizationAndOther",
+    "DepreciationAndAmortization",
+    "DepreciationDepletionAndAmortization",
+    "DerivativeCollateralObligationToReturnCash",
+    "DerivativeCollateralRightToReclaimCash",
+    "DerivativeFairValueOfDerivativeNet",
+    "DerivativeLiabilityCollateralRightToReclaimCashOffset",
+    "DerivativeNotionalAmount",
+    "Dividends",
+    "DividendsCash",
+    "DividendsPayableAmountPerShare",
+    "DividendsPayableCurrent",
+    "DistributedEarnings",
+    "EarningsPerShareBasic",
+    "EarningsPerShareDiluted",
+    "EffectOfExchangeRateOnCashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents",
+    "EffectOfExchangeRateOnCashCashEquivalentsRestrictedCashAndRestrictedCashEquivalentsIncludingDisposalGroupAndDiscontinuedOperations",
+    "EmployeeRelatedLiabilitiesCurrent",
+    "EmployeeRelatedLiabilitiesCurrentAndNoncurrent",
+    "EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense",
+    "FinanceLeaseInterestExpense",
+    "FinanceLeaseInterestPaymentOnLiability",
+    "FinanceLeaseLiability",
+    "FinanceLeaseLiabilityCurrent",
+    "FinanceLeaseLiabilityNoncurrent",
+    "FinanceLeaseLiabilityPaymentsDue",
+    "FinanceLeaseLiabilityPaymentsDueAfterYearFive",
+    "FinanceLeaseLiabilityPaymentsDueNextTwelveMonths",
+    "FinanceLeaseLiabilityPaymentsDueYearFive",
+    "FinanceLeaseLiabilityPaymentsDueYearFour",
+    "FinanceLeaseLiabilityPaymentsDueYearThree",
+    "FinanceLeaseLiabilityPaymentsDueYearTwo",
+    "FinanceLeaseLiabilityPaymentsRemainderOfFiscalYear",
+    "FinanceLeaseLiabilityUndiscountedExcessAmount",
+    "FinanceLeasePrincipalPayments",
+    "FinanceLeaseRightOfUseAsset",
+    "FinancingReceivableAllowanceForCreditLosses",
+    "FiniteLivedIntangibleAssetsNet",
+    "FixturesAndEquipmentGross",
+    "GainLossOnInvestments",
+    "GainLossOnInvestmentsAndDerivativeInstruments",
+    "GainLossOnSaleOfBusiness",
+    "GainsLossesOnExtinguishmentOfDebt",
+    "GeneralAndAdministrativeExpense",
+    "Goodwill",
+    "GrossProfit",
+    "ImpairmentOfIntangibleAssetsExcludingGoodwill",
+    "ImpairmentOfIntangibleAssetsIndefinitelivedExcludingGoodwill",
+    "IncomeLossFromContinuingOperations",
+    "IncomeLossFromContinuingOperationsAttributableToNoncontrollingEntity",
+    "IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest",
+    "IncomeLossFromContinuingOperationsPerBasicShare",
+    "IncomeLossFromContinuingOperationsPerDilutedShare",
+    "InterestAndDebtExpense",
+    "IncomeTaxExpenseBenefit",
+    "IncomeTaxesPaid",
+    "IncomeTaxesPaidNet",
+    "IncreaseDecreaseInAccountsAndOtherReceivables",
+    "IncreaseDecreaseInAccountsPayable",
+    "IncreaseDecreaseInAccountsReceivable",
+    "IncreaseDecreaseInAccruedLiabilities",
+    "IncreaseDecreaseInAccruedIncomeTaxesPayable",
+    "IncreaseDecreaseInAccruedTaxesPayable",
+    "IncreaseDecreaseInContractWithCustomerLiability",
+    "IncreaseDecreaseInDeferredIncomeTaxes",
+    "IncreaseDecreaseInInventories",
+    "IncreaseDecreaseInOtherCurrentAssets",
+    "IncreaseDecreaseInOtherCurrentLiabilities",
+    "IncreaseDecreaseInOtherNoncurrentAssets",
+    "IncreaseDecreaseInOtherNoncurrentLiabilities",
+    "IncreaseDecreaseInPensionPlanObligations",
+    "IncrementalCommonSharesAttributableToShareBasedPaymentArrangements",
+    "InterestExpenseDebt",
+    "InterestIncomeExpenseNet",
+    "InterestPaid",
+    "InterestPaidNet",
+    "InventoryNet",
+    "InvestmentIncomeInterest",
+    "Land",
+    "LeaseAndRentalExpense",
+    "LesseeOperatingLeaseLiabilityPaymentsDue",
+    "LesseeOperatingLeaseLiabilityPaymentsDueAfterYearFive",
+    "LesseeOperatingLeaseLiabilityPaymentsDueNextTwelveMonths",
+    "LesseeOperatingLeaseLiabilityPaymentsDueYearFive",
+    "LesseeOperatingLeaseLiabilityPaymentsDueYearFour",
+    "LesseeOperatingLeaseLiabilityPaymentsDueYearThree",
+    "LesseeOperatingLeaseLiabilityPaymentsDueYearTwo",
+    "LesseeOperatingLeaseLiabilityPaymentsRemainderOfFiscalYear",
+    "LettersOfCreditOutstandingAmount",
+    "Liabilities",
+    "LiabilitiesAndStockholdersEquity",
+    "LiabilitiesCurrent",
+    "LineOfCredit",
+    "LineOfCreditFacilityMaximumBorrowingCapacity",
+    "LongTermDebt",
+    "LongTermDebtCurrent",
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalAfterYearFive",
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalInNextTwelveMonths",
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearFive",
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearFour",
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearThree",
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearTwo",
+    "LongTermDebtMaturitiesRepaymentsOfPrincipalRemainderOfFiscalYear",
+    "LongTermDebtNoncurrent",
+    "LongTermInvestments",
+    "LossContingencyEstimateOfPossibleLoss",
+    "MachineryAndEquipmentGross",
+    "MarketableSecuritiesCurrent",
+    "MarketableSecuritiesNoncurrent",
+    "MinorityInterest",
+    "NetCashProvidedByUsedInFinancingActivities",
+    "NetCashProvidedByUsedInInvestingActivities",
+    "NetCashProvidedByUsedInOperatingActivities",
+    "NetIncomeLoss",
+    "NetIncomeLossAttributableToNoncontrollingInterest",
+    "NetIncomeLossAttributableToNonredeemableNoncontrollingInterest",
+    "NetIncomeLossAttributableToRedeemableNoncontrollingInterest",
+    "NoncurrentAssets",
+    "NonoperatingIncomeExpense",
+    "NoninterestIncome",
+    "NotesReceivableNet",
+    "OperatingExpenses",
+    "OperatingIncomeLoss",
+    "OperatingLeaseCost",
+    "OperatingLeaseLiability",
+    "OperatingLeaseLiabilityCurrent",
+    "OperatingLeaseLiabilityNoncurrent",
+    "OperatingLeaseRightOfUseAsset",
+    "OtherAccruedLiabilitiesCurrent",
+    "OtherAssetsCurrent",
+    "OtherAssetsNoncurrent",
+    "OtherComprehensiveIncomeLossAvailableForSaleSecuritiesAdjustmentNetOfTax",
+    "OtherComprehensiveIncomeLossCashFlowHedgeGainLossAfterReclassificationAndTax",
+    "OtherComprehensiveIncomeLossDerivativeInstrumentGainLossafterReclassificationandTax",
+    "OtherComprehensiveIncomeLossDerivativeInstrumentGainLossbeforeReclassificationafterTax",
+    "OtherComprehensiveIncomeLossForeignCurrencyTransactionAndTranslationAdjustmentNetOfTax",
+    "OtherComprehensiveIncomeLossNetOfTax",
+    "OtherComprehensiveIncomeLossNetOfTaxPortionAttributableToParent",
+    "OtherComprehensiveIncomeUnrealizedHoldingGainLossOnSecuritiesArisingDuringPeriodNetOfTax",
+    "OtherIncome",
+    "OtherLiabilitiesCurrent",
+    "OtherLiabilitiesNoncurrent",
+    "OtherLongTermDebt",
+    "OtherNoncashIncomeExpense",
+    "PaymentsForCapitalImprovements",
+    "PaymentsOfDividends",
+    "PaymentsOfDividendsMinorityInterest",
+    "PaymentsForProceedsFromBusinessesAndInterestInAffiliates",
+    "PaymentsForProceedsFromOtherInvestingActivities",
+    "PaymentsForRent",
+    "PaymentsForRepurchaseOfCommonStock",
+    "PaymentsOfDebtExtinguishmentCosts",
+    "PaymentsToAcquireInvestments",
+    "PaymentsToAcquirePropertyPlantAndEquipment",
+    "PreferredStockSharesOutstanding",
+    "PreferredStockValue",
+    "PrepaidExpenseAndOtherAssetsCurrent",
+    "PrepaidExpenseCurrent",
+    "ProceedsFromDebtMaturingInMoreThanThreeMonths",
+    "ProceedsFromDebtNetOfIssuanceCosts",
+    "ProceedsFromDivestitureOfBusinesses",
+    "ProceedsFromInvestments",
+    "ProceedsFromIssuanceOfCommonStock",
+    "ProceedsFromIssuanceOfDebt",
+    "ProceedsFromIssuanceOfLongTermDebt",
+    "ProceedsFromIssuanceOfUnsecuredDebt",
+    "ProceedsFromIssuanceOrSaleOfEquity",
+    "ProceedsFromMaturitiesPrepaymentsAndCallsOfAvailableForSaleSecurities",
+    "ProceedsFromPaymentsForOtherFinancingActivities",
+    "ProceedsFromPaymentsToMinorityShareholders",
+    "ProceedsFromRepaymentsOfShortTermDebt",
+    "ProceedsFromRepaymentsOfShortTermDebtMaturingInThreeMonthsOrLess",
+    "ProceedsFromSaleOfPropertyPlantAndEquipment",
+    "ProceedsFromStockOptionsExercised",
+    "ProfitLoss",
+    "PropertyPlantAndEquipmentGross",
+    "PropertyPlantAndEquipmentNet",
+    "ReceivablesNetCurrent",
+    "RedeemableNoncontrollingInterestEquityCarryingAmount",
+    "RepaymentsOfDebtMaturingInMoreThanThreeMonths",
+    "RepaymentsOfLongTermDebt",
+    "ResearchAndDevelopmentExpense",
+    "RestrictedCash",
+    "RestrictedCashAndCashEquivalents",
+    "RestrictedStockExpense",
+    "RestructuringCharges",
+    "RetainedEarningsAccumulatedDeficit",
+    "Revenues",
+    "RevenueFromContractWithCustomerExcludingAssessedTax",
+    "SecuredLongTermDebt",
+    "SellingAndMarketingExpense",
+    "SellingGeneralAndAdministrativeExpense",
+    "ShareBasedCompensation",
+    "ShortTermBorrowings",
+    "ShortTermInvestments",
+    "StockholdersEquity",
+    "StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest",
+    "StockholdersEquityOther",
+    "StockIssuedDuringPeriodValueNewIssues",
+    "StockOptionPlanExpense",
+    "StockRedeemedOrCalledDuringPeriodValue",
+    "StockRepurchasedDuringPeriodValue",
+    "StockRepurchasedAndRetiredDuringPeriodValue",
+    "TaxesPayableCurrent",
+    "TradingSecuritiesDebt",
+    "TreasuryStockAcquiredAverageCostPerShare",
+    "TreasuryStockSharesAcquired",
+    "UnrealizedGainLossOnInvestments",
+    "UnrecognizedTaxBenefits",
+    "UnsecuredDebt",
+    "VariableLeaseCost",
+    "WeightedAverageNumberOfDilutedSharesOutstanding",
+    "WeightedAverageNumberOfSharesOutstandingBasic",
+    "WeightedAverageNumberDilutedSharesOutstandingAdjustment",
+]
+FISCAL_PERIODS = Literal["fy", "q1", "q2", "q3", "q4"]  # pylint: disable=C0103
+FISCAL_PERIODS_DICT = {
+    "fy": None,
+    "q1": 1,
+    "q2": 2,
+    "q3": 3,
+    "q4": 4,
+}

openbb_platform/providers/sec/openbb_sec/utils/form4.py ADDED Viewed

	@@ -0,0 +1,657 @@

+"""Module for handling Form 4 data, by company, from the SEC."""
+import logging
+from datetime import date as dateType
+from typing import Optional
+from openbb_core.app.model.abstract.error import OpenBBError
+SEC_HEADERS: dict[str, str] = {
+    "User-Agent": "Jesus Window Washing jesus@stainedglass.com",
+    "Accept-Encoding": "gzip, deflate",
+    "Host": "www.sec.gov",
+}
+field_map = {
+    "filing_date": "filing_date",
+    "symbol": "symbol",
+    "form": "form",
+    "owner": "owner_name",
+    "owner_cik": "owner_cik",
+    "issuer": "company_name",
+    "issuer_cik": "company_cik",
+    "isDirector": "director",
+    "isOfficer": "officer",
+    "isTenPercentOwner": "ten_percent_owner",
+    "isOther": "other",
+    "otherText": "other_text",
+    "officerTitle": "owner_title",
+    "securityTitle": "security_type",
+    "transactionDate": "transaction_date",
+    "footnote": "footnote",
+    "transactionShares": "securities_transacted",
+    "transactionPricePerShare": "transaction_price",
+    "transactionTotalValue": "transaction_value",
+    "transactionCode": "transaction_type",
+    "transactionAcquiredDisposedCode": "acquisition_or_disposition",
+    "sharesOwnedFollowingTransaction": "securities_owned",
+    "valueOwnedFollowingTransaction": "value_owned",
+    "transactionTimeliness": "transaction_timeliness",
+    "directOrIndirectOwnership": "ownership_type",
+    "natureOfOwnership": "nature_of_ownership",
+    "conversionOrExercisePrice": "conversion_exercise_price",
+    "exerciseDate": "exercise_date",
+    "expirationDate": "expiration_date",
+    "deemedExecutionDate": "deemed_execution_date",
+    "underlyingSecurityTitle": "underlying_security_title",
+    "underlyingSecurityShares": "underlying_security_shares",
+    "underlyingSecurityValue": "underlying_security_value",
+}
+timeliness_map = {
+    "E": "Early",
+    "L": "Late",
+    "Empty": "On-time",
+}
+transaction_code_map = {
+    "A": "Grant, award or other acquisition pursuant to Rule 16b-3(d)",
+    "C": "Conversion of derivative security",
+    "D": "Disposition to the issuer of issuer equity securities pursuant to Rule 16b-3(e)",
+    "E": "Expiration of short derivative position",
+    "F": (
+        "Payment of exercise price or tax liability by delivering or withholding securities incident to the receipt, "
+        "exercise or vesting of a security issued in accordance with Rule 16b-3"
+    ),
+    "G": "Bona fide gift",
+    "H": "Expiration (or cancellation) of long derivative position with value received",
+    "I": (
+        "Discretionary transaction in accordance with Rule 16b-3(f) "
+        "resulting in acquisition or disposition of issuer securities"
+    ),
+    "J": "Other acquisition or disposition (describe transaction)",
+    "L": "Small acquisition under Rule 16a-6",
+    "M": "Exercise or conversion of derivative security exempted pursuant to Rule 16b-3",
+    "O": "Exercise of out-of-the-money derivative security",
+    "P": "Open market or private purchase of non-derivative or derivative security",
+    "S": "Open market or private sale of non-derivative or derivative security",
+    "U": "Disposition pursuant to a tender of shares in a change of control transaction",
+    "W": "Acquisition or disposition by will or the laws of descent and distribution",
+    "X": "Exercise of in-the-money or at-the-money derivative security",
+    "Z": "Deposit into or withdrawal from voting trust",
+}
+def get_logger():
+    """Get the logger."""
+    logger_instance = logging.getLogger("openbb.sec")
+    handler = logging.StreamHandler()
+    handler.setLevel(logging.INFO)
+    formatter = logging.Formatter("\n%(message)s\n")
+    handler.setFormatter(formatter)
+    logger_instance.addHandler(handler)
+    logger_instance.setLevel(logging.INFO)
+    return logger_instance
+logger = get_logger()
+def setup_database(conn):
+    """Create a caching database for Form 4 data."""
+    create_table_query = """
+    CREATE TABLE IF NOT EXISTS form4_data (
+        filing_date DATE,
+        symbol TEXT,
+        form TEXT,
+        owner_name TEXT,
+        owner_cik TEXT,
+        company_name TEXT,
+        company_cik TEXT,
+        director BOOLEAN,
+        officer BOOLEAN,
+        ten_percent_owner BOOLEAN,
+        other BOOLEAN,
+        other_text TEXT,
+        owner_title TEXT,
+        security_type TEXT,
+        transaction_date DATE,
+        transaction_type TEXT,
+        acquisition_or_disposition TEXT,
+        footnote TEXT,
+        securities_transacted REAL,
+        transaction_price MONEY,
+        transaction_value MONEY,
+        securities_owned REAL,
+        value_owned MONEY,
+        transaction_timeliness TEXT,
+        ownership_type TEXT,
+        nature_of_ownership TEXT,
+        conversion_exercise_price MONEY,
+        exercise_date DATE,
+        expiration_date DATE,
+        deemed_execution_date DATE,
+        underlying_security_title TEXT,
+        underlying_security_shares REAL,
+        underlying_security_value MONEY,
+        filing_url TEXT NOT NULL
+    );
+    """
+    conn.execute(create_table_query)
+    conn.commit()
+def add_missing_column(conn, column_name):
+    """Add a missing column to the form4_data table."""
+    missing_type = (
+        "MONEY"
+        if "price" in column_name or "value" in column_name
+        else (
+            "REAL"
+            if "shares" in column_name
+            else (
+                "BOOLEAN"
+                if "is_" in column_name
+                else "DATE" if "date" in column_name else "TEXT"
+            )
+        )
+    )
+    cursor = conn.cursor()
+    cursor.execute(f"ALTER TABLE form4_data ADD COLUMN {column_name} {missing_type}")
+    conn.commit()
+def compress_db(db_path):
+    """Compress the database file."""
+    # pylint: disable=import-outside-toplevel
+    import gzip
+    import shutil
+    with open(db_path, "rb") as f_in, gzip.open(f"{db_path}.gz", "wb") as f_out:
+        shutil.copyfileobj(f_in, f_out)
+def decompress_db(db_path):
+    """Decompress the database file."""
+    # pylint: disable=import-outside-toplevel
+    import gzip
+    import shutil
+    with gzip.open(f"{db_path}.gz", "rb") as f_in, open(db_path, "wb") as f_out:
+        shutil.copyfileobj(f_in, f_out)
+def close_db(conn, db_path):
+    """Sort the table by "date" before closing the connection and compressing the database."""
+    # pylint: disable=import-outside-toplevel
+    import os
+    conn.execute(
+        "CREATE TABLE IF NOT EXISTS form4_data_sorted AS SELECT * FROM form4_data ORDER BY filing_date"
+    )
+    conn.execute("DROP TABLE form4_data")
+    conn.execute("ALTER TABLE form4_data_sorted RENAME TO form4_data")
+    conn.commit()
+    conn.close()
+    compress_db(db_path)
+    os.remove(db_path)
+async def get_form_4_urls(
+    symbol,
+    start_date: Optional[dateType] = None,
+    end_date: Optional[dateType] = None,
+    use_cache: bool = True,
+):
+    """Get the form 4 URLs for a symbol."""
+    # pylint: disable=import-outside-toplevel
+    from openbb_sec.models.company_filings import SecCompanyFilingsFetcher
+    fetcher = SecCompanyFilingsFetcher()
+    form_4 = await fetcher.fetch_data(
+        dict(
+            symbol=symbol,
+            form_type="4",
+            provider="sec",
+            limit=0,
+            use_cache=use_cache,
+        ),
+        {},
+    )
+    start_date = (
+        start_date
+        if isinstance(start_date, dateType)
+        else (
+            dateType.fromisoformat(start_date)  # type: ignore
+            if start_date and isinstance(start_date, str)
+            else None
+        )
+    )
+    end_date = (
+        end_date
+        if isinstance(end_date, dateType)
+        else (
+            dateType.fromisoformat(end_date)  # type: ignore
+            if end_date and isinstance(end_date, str)
+            else None
+        )
+    )
+    urls: list = []
+    for item in form_4:
+        if (
+            (not start_date or not item.filing_date)
+            or start_date
+            and item.filing_date < start_date
+        ):
+            continue
+        if (
+            (not end_date or not item.report_date)
+            or end_date
+            and item.report_date > end_date
+        ):
+            continue
+        to_replace = f"{item.primary_doc.split('/')[0]}/"
+        form_url = item.report_url.replace(to_replace, "")
+        if form_url.endswith(".xml"):
+            urls.append(form_url)
+    return urls
+def clean_xml(xml_content):
+    """Clean the XML content."""
+    # pylint: disable=import-outside-toplevel
+    import re
+    xml_content = re.sub(r"\\", "", xml_content)
+    xml_content = xml_content.replace("/s/ ", "")
+    xml_content = re.sub(r"&(?!amp;|lt;|gt;|quot;|apos;)", "&amp;", xml_content)
+    return xml_content
+async def get_form_4_data(url) -> dict:
+    """Get the form 4 data."""
+    # pylint: disable=import-outside-toplevel
+    from warnings import warn  # noqa
+    from xmltodict import parse
+    from openbb_core.provider.utils.helpers import amake_request
+    async def response_callback(response, _):
+        """Response callback function."""
+        return await response.read()
+    response = await amake_request(
+        url,
+        headers=SEC_HEADERS,
+        response_callback=response_callback,
+        timeout=30,
+    )  # type: ignore
+    response_text = response.decode("utf-8")
+    if "Traffic Limit" in response_text:
+        raise OpenBBError(
+            "You've exceeded the SEC's traffic limit. Access will be limited for 10 minutes."
+            " Reduce the number of requests by using a more specific date range."
+        )
+    cleaned_response = clean_xml(response_text)
+    try:
+        xml_data = parse(cleaned_response)
+    except Exception as e:
+        warn(f"Error parsing XML from {url}: {e}")
+        return {}
+    return (
+        xml_data.get("ownershipDocument") if xml_data.get("ownershipDocument") else {}
+    )
+async def parse_form_4_data(  # noqa: PLR0915, PLR0912  # pylint: disable=too-many-branches
+    data,
+):
+    """Parse the Form 4 data."""
+    owner = data.get("reportingOwner", {})
+    owners = ""
+    ciks = ""
+    if isinstance(owner, list):
+        owners = ";".join(
+            [d.get("reportingOwnerId", {}).get("rptOwnerName") for d in owner]
+        )
+        ciks = ";".join(
+            [d.get("reportingOwnerId", {}).get("rptOwnerCik") for d in owner]
+        )
+    issuer = data.get("issuer", {})
+    owner_relationship = (
+        owner.get("reportingOwnerRelationship", {})
+        if isinstance(owner, dict)
+        else (
+            owner[0].get("reportingOwnerRelationship", {})
+            if isinstance(owner, list)
+            else {}
+        )
+    )
+    signature_data = data.get("ownerSignature")
+    if signature_data and isinstance(signature_data, dict):
+        signature_date = signature_data.get("signatureDate")
+    elif signature_data and isinstance(signature_data, list):
+        signature_date = signature_data[0].get("signatureDate")
+    else:
+        signature_date = None
+    footnotes = data.get("footnotes", {})
+    if footnotes:
+        footnote_items = footnotes.get("footnote")
+        if isinstance(footnote_items, dict):
+            footnote_items = [footnote_items]
+        footnotes = {item["@id"]: item["#text"] for item in footnote_items}
+    metadata = {
+        "filing_date": signature_date or data.get("periodOfReport"),
+        "symbol": issuer.get("issuerTradingSymbol", "").upper(),
+        "form": data.get("documentType"),
+        "owner": (
+            owners if owners else owner.get("reportingOwnerId", {}).get("rptOwnerName")
+        ),
+        "owner_cik": (
+            ciks if ciks else owner.get("reportingOwnerId", {}).get("rptOwnerCik")
+        ),
+        "issuer": issuer.get("issuerName"),
+        "issuer_cik": issuer.get("issuerCik"),
+        **owner_relationship,
+    }
+    results: list = []
+    if data.get("nonDerivativeTable") and (
+        data["nonDerivativeTable"].get("nonDerivativeTransaction")
+        or data["nonDerivativeTable"].get("nonDerivativeHolding")
+    ):
+        temp_table = data["nonDerivativeTable"]
+        tables = (
+            temp_table["nonDerivativeTransaction"]
+            if temp_table.get("nonDerivativeTransaction")
+            else temp_table["nonDerivativeHolding"]
+        )
+        parsed_table1: list = []
+        if isinstance(tables, dict):
+            tables = [tables]
+        for table in tables:
+            if isinstance(table, str):
+                continue
+            new_row = {**metadata}
+            for key, value in table.items():
+                if key == "transactionCoding":
+                    new_row["transaction_type"] = value.get("transactionCode")
+                    new_row["form"] = (
+                        value.get("transactionFormType") or metadata["form"]
+                    )
+                elif isinstance(value, dict):
+                    if "footnoteId" in value:
+                        if isinstance(value["footnoteId"], list):
+                            ids = [item["@id"] for item in value["footnoteId"]]
+                            footnotes = (
+                                "; ".join(
+                                    [
+                                        footnotes.get(footnote_id, "")
+                                        for footnote_id in ids
+                                    ]
+                                )
+                                if isinstance(footnotes, dict)
+                                else footnotes
+                            )
+                            new_row["footnote"] = footnotes
+                        else:
+                            footnote_id = value["footnoteId"]["@id"]
+                            new_row["footnote"] = (
+                                (
+                                    footnotes
+                                    if isinstance(footnotes, str)
+                                    else footnotes.get(footnote_id)
+                                )
+                                if footnotes
+                                else None
+                            )
+                    for k, v in value.items():
+                        if k == "value":
+                            new_row[key] = v
+                        if isinstance(v, dict):
+                            if "footnoteId" in v:
+                                if isinstance(v["footnoteId"], list):
+                                    ids = [item["@id"] for item in v["footnoteId"]]
+                                    footnotes = (
+                                        footnotes
+                                        if isinstance(footnotes, str)
+                                        else (
+                                            "; ".join(
+                                                [
+                                                    footnotes.get(footnote_id)
+                                                    for footnote_id in ids
+                                                ]
+                                            )
+                                            if footnotes
+                                            else None
+                                        )
+                                    )
+                                    new_row["footnote"] = footnotes
+                                else:
+                                    footnote_id = v["footnoteId"]["@id"]
+                                    new_row["footnote"] = (
+                                        (
+                                            footnotes
+                                            if isinstance(footnotes, str)
+                                            else footnotes.get(footnote_id)
+                                        )
+                                        if footnotes
+                                        else None
+                                    )
+                            for k1, v1 in v.items():
+                                if k1 == "value":
+                                    new_row[k] = v1
+            if new_row:
+                parsed_table1.append(new_row)
+        results.extend(parsed_table1)
+    if (
+        data.get("derivativeTable")
+        and data["derivativeTable"].get("derivativeTransaction")
+    ) or data.get("derivativeSecurity"):
+        parsed_table2: list = []
+        tables = (
+            data["derivativeSecurity"]
+            if data.get("derivativeSecurity")
+            else data["derivativeTable"]["derivativeTransaction"]
+        )
+        if isinstance(tables, dict):
+            tables = [tables]
+        for table in tables:
+            if isinstance(table, str):
+                continue
+            new_row = {**metadata}
+            for key, value in table.items():
+                if key == "transactionCoding":
+                    new_row["transaction_type"] = value.get("transactionCode")
+                    new_row["form"] = (
+                        value.get("transactionFormType") or metadata["form"]
+                    )
+                elif isinstance(value, dict):
+                    for k, v in value.items():
+                        if k == "value":
+                            new_row[key] = v
+                        if isinstance(v, dict):
+                            for k1, v1 in v.items():
+                                if k1 == "value":
+                                    new_row[k] = v1
+            t_value = new_row.pop("transactionValue", None)
+            if t_value:
+                new_row["transactionTotalValue"] = t_value
+            parsed_table2.append(new_row)
+        results.extend(parsed_table2)
+    return results
+async def download_data(urls, use_cache: bool = True):  # noqa: PLR0915
+    """Get the Form 4 data from a list of URLs."""
+    # pylint: disable=import-outside-toplevel
+    import asyncio  # noqa
+    import os
+    import sqlite3
+    from numpy import nan
+    from openbb_core.app.utils import get_user_cache_directory
+    from pandas import DataFrame
+    results: list = []
+    non_cached_urls: list = []
+    try:
+        if use_cache is True:
+            db_dir = f"{get_user_cache_directory()}/sql"
+            db_path = f"{db_dir}/sec_form4.db"
+            # Decompress the database file
+            if os.path.exists(f"{db_path}.gz"):
+                decompress_db(db_path)
+            os.makedirs(db_dir, exist_ok=True)
+            try:
+                conn = sqlite3.connect(db_path)
+                setup_database(conn)
+                cached_data = get_cached_data(urls, conn)
+                cached_urls = {entry["filing_url"] for entry in cached_data}
+                for url in urls:
+                    if url not in cached_urls:
+                        non_cached_urls.append(url)
+            except sqlite3.DatabaseError as e:
+                logger.info("Error connecting to the database.")
+                retry_input = input(
+                    "Would you like to retry with a new database? (y/n): "
+                )
+                if retry_input.lower() == "y":
+                    faulty_db_path = f"{db_path}.faulty"
+                    os.rename(db_path, faulty_db_path)
+                    logger.info("Renamed faulty database to %s", faulty_db_path)
+                    db_path = f"{db_dir}/sec_form4.db"
+                    conn = sqlite3.connect(db_path)
+                    setup_database(conn)
+                    cached_data = get_cached_data(urls, conn)
+                    cached_urls = {entry["filing_url"] for entry in cached_data}
+                    for url in urls:
+                        if url not in cached_urls:
+                            non_cached_urls.append(url)
+                else:
+                    raise OpenBBError(e) from e
+            results.extend(cached_data)
+        elif use_cache is False:
+            non_cached_urls = urls
+        async def get_one(url):
+            """Get the data for one URL."""
+            data = await get_form_4_data(url)
+            result = await parse_form_4_data(data)
+            if not result and use_cache is True:
+                df = DataFrame([{"filing_url": url}])
+                df.to_sql("form4_data", conn, if_exists="append", index=False)
+            if result:
+                df = DataFrame(result)
+                df.loc[:, "filing_url"] = url
+                df = df.replace({nan: None}).rename(columns=field_map)
+                try:
+                    if use_cache is True:
+                        df.to_sql("form4_data", conn, if_exists="append", index=False)
+                except sqlite3.DatabaseError as e:
+                    if "no column named" in str(e):
+                        missing_column = (
+                            str(e).split("no column named ")[1].split(" ")[0]
+                        )
+                        missing_column = field_map.get(missing_column, missing_column)
+                        add_missing_column(conn, missing_column)
+                        df.to_sql("form4_data", conn, if_exists="append", index=False)
+                    else:
+                        raise OpenBBError(e) from e
+                results.extend(df.replace({nan: None}).to_dict(orient="records"))
+        time_estimate = (len(non_cached_urls) / 7) * 1.8
+        logger.info(
+            "Found %d total filings and %d"
+            " uncached entries to download, estimated download time: %d seconds.",
+            len(urls),
+            len(non_cached_urls),
+            round(time_estimate),
+        )
+        min_warn_time = 10
+        if time_estimate > min_warn_time:
+            logger.info(
+                "Warning: This function is not intended for mass data collection."
+                " Long download times are due to limitations with concurrent downloads from the SEC."
+                "\n\nReduce the number of requests by using a more specific date range."
+            )
+        if len(non_cached_urls) > 0:
+            async with asyncio.Semaphore(8):
+                for url_chunk in [
+                    non_cached_urls[i : i + 8]
+                    for i in range(0, len(non_cached_urls), 8)
+                ]:
+                    await asyncio.gather(*[get_one(url) for url in url_chunk])
+                    await asyncio.sleep(1.125)
+        if use_cache is True:
+            close_db(conn, db_path)
+        results = [entry for entry in results if entry.get("filing_date")]
+        return sorted(results, key=lambda x: x["filing_date"], reverse=True)
+    except Exception as e:  # pylint: disable=broad-except
+        if use_cache is True:
+            close_db(conn, db_path)
+        raise OpenBBError(
+            f"Unexpected error while downloading and processing data -> {e.__class__.__name__}: {e}"
+        ) from e
+def get_cached_data(urls, conn):
+    """Retrieve cached data for a list of URLs."""
+    # pylint: disable=import-outside-toplevel
+    from numpy import nan
+    from pandas import read_sql
+    placeholders = ", ".join("?" for _ in urls)
+    query = f"SELECT * FROM form4_data WHERE filing_url IN ({placeholders})"  # noqa
+    df = read_sql(query, conn, params=urls)
+    return df.replace({nan: None}).to_dict(orient="records") if not df.empty else []
+async def get_form_4(
+    symbol,
+    start_date: Optional[dateType] = None,
+    end_date: Optional[dateType] = None,
+    limit: Optional[int] = None,
+    use_cache: bool = True,
+) -> list[dict]:
+    """Get the Form 4 data by ticker symbol or CIK number."""
+    # pylint: disable=import-outside-toplevel
+    import asyncio
+    try:
+        urls = await get_form_4_urls(symbol, start_date, end_date, use_cache)
+        if limit is not None:
+            urls = urls[:limit]
+        data = await download_data(urls, use_cache)
+    except asyncio.TimeoutError as e:
+        raise OpenBBError(
+            "A timeout error occurred while downloading the data. Please try again."
+        ) from e
+    if not data:
+        raise OpenBBError(f"No Form 4 data was returned for {symbol}.")
+    return data

openbb_platform/providers/sec/openbb_sec/utils/frames.py ADDED Viewed

	@@ -0,0 +1,284 @@

+"""SEC Frames Utilities."""
+# pylint: disable=line-too-long
+import asyncio
+from datetime import datetime
+from typing import Dict, List, Optional, Union
+from warnings import warn
+from aiohttp_client_cache import SQLiteBackend
+from aiohttp_client_cache.session import CachedSession
+from openbb_core.app.model.abstract.error import OpenBBError
+from openbb_core.app.utils import get_user_cache_directory
+from openbb_core.provider.utils.errors import EmptyDataError
+from openbb_core.provider.utils.helpers import amake_request
+from openbb_sec.utils.definitions import (
+    FISCAL_PERIODS,
+    FISCAL_PERIODS_DICT,
+    HEADERS,
+    SHARES_FACTS,
+    TAXONOMIES,
+    USD_PER_SHARE_FACTS,
+)
+from openbb_sec.utils.helpers import get_all_companies, symbol_map
+from pandas import DataFrame
+async def fetch_data(url, use_cache, persist) -> Union[Dict, List[Dict]]:
+    """Fetch the data from the constructed URL."""
+    response: Union[Dict, List[Dict]] = {}
+    if use_cache is True:
+        cache_dir = f"{get_user_cache_directory()}/http/sec_frames"
+        async with CachedSession(
+            cache=(
+                SQLiteBackend(cache_dir, expire_after=3600 * 24)
+                if persist is False
+                else SQLiteBackend(cache_dir)
+            )
+        ) as session:
+            try:
+                response = await amake_request(url, headers=HEADERS, session=session)  # type: ignore
+            finally:
+                await session.close()
+    else:
+        response = await amake_request(url, headers=HEADERS)  # type: ignore
+    return response
+async def get_frame(  # pylint: disable =too-many-arguments,too-many-locals, too-many-statements
+    fact: str = "Revenues",
+    year: Optional[int] = None,
+    fiscal_period: Optional[FISCAL_PERIODS] = None,
+    taxonomy: Optional[TAXONOMIES] = "us-gaap",
+    units: Optional[str] = "USD",
+    instantaneous: bool = False,
+    use_cache: bool = True,
+) -> Dict:
+    """Get a frame of data for a given fact.
+    Source: https://www.sec.gov/edgar/sec-api-documentation
+    The xbrl/frames API aggregates one fact for each reporting entity
+    that is last filed that most closely fits the calendrical period requested.
+    This API supports for annual, quarterly and instantaneous data:
+    https://data.sec.gov/api/xbrl/frames/us-gaap/AccountsPayableCurrent/USD/CY2019Q1I.json
+    Where the units of measure specified in the XBRL contains a numerator and a denominator,
+    these are separated by “-per-” such as “USD-per-shares”. Note that the default unit in XBRL is “pure”.
+    The period format is CY#### for annual data (duration 365 days +/- 30 days),
+    CY####Q# for quarterly data (duration 91 days +/- 30 days).
+    Because company financial calendars can start and end on any month or day and even change in length from quarter to
+    quarter according to the day of the week, the frame data is assembled by the dates that best align with a calendar
+    quarter or year. Data users should be mindful different reporting start and end dates for facts contained in a frame.
+    Parameters
+    ----------
+    fact : str
+        The fact to retrieve. This should be a valid fact from the SEC taxonomy, in UpperCamelCase.
+        Defaults to "Revenues".
+        AAPL, MSFT, GOOG, BRK-A all report revenue as, "RevenueFromContractWithCustomerExcludingAssessedTax".
+        In previous years, they may have reported as "Revenues".
+    year : int, optional
+        The year to retrieve the data for. If not provided, the current year is used.
+    fiscal_period: Literal["fy", "q1", "q2", "q3", "q4"], optional
+        The fiscal period to retrieve the data for. If not provided, the most recent quarter is used.
+    taxonomy : Literal["us-gaap", "dei", "ifrs-full", "srt"], optional
+        The taxonomy to use. Defaults to "us-gaap".
+    units : str, optional
+        The units to use. Defaults to "USD". This should be a valid unit from the SEC taxonomy, see the notes above.
+        The most common units are "USD", "shares", and "USD-per-shares". EPS and outstanding shares facts will
+        automatically set.
+    instantaneous: bool
+        Whether to retrieve instantaneous data. See the notes above for more information. Defaults to False.
+        Some facts are only available as instantaneous data.
+        The function will automatically attempt to retrieve the data if the initial fiscal quarter request fails.
+    use_cache: bool
+        Whether to use cache for the request. Defaults to True.
+    Returns
+    -------
+    Dict:
+        Nested dictionary with keys, "metadata" and "data".
+        The "metadata" key contains information about the frame.
+    """
+    current_date = datetime.now().date()
+    quarter = FISCAL_PERIODS_DICT.get(fiscal_period) if fiscal_period else None
+    if year is None and quarter is None:
+        quarter = (current_date.month - 1) // 3
+        year = current_date.year
+    if year is None:
+        year = current_date.year
+    persist = current_date.year == year
+    if fact in SHARES_FACTS:
+        units = "shares"
+    if fact in USD_PER_SHARE_FACTS:
+        units = "USD-per-shares"
+    url = f"https://data.sec.gov/api/xbrl/frames/{taxonomy}/{fact}/{units}/CY{year}"
+    if quarter:
+        url = url + f"Q{quarter}"
+    if instantaneous:
+        url = url + "I"
+    url = url + ".json"
+    response: Union[Dict, List[Dict]] = {}
+    try:
+        response = await fetch_data(url, use_cache, persist)
+    except Exception as e:  # pylint: disable=W0718
+        message = (
+            "No frame was found with the combination of parameters supplied."
+            + " Try adjusting the period."
+            + " Not all GAAP measures have frames available."
+        )
+        if url.endswith("I.json"):
+            warn("No instantaneous frame was found, trying calendar period data.")
+            url = url.replace("I.json", ".json")
+            try:
+                response = await fetch_data(url, use_cache, persist)
+            except Exception:
+                raise OpenBBError(message) from e
+        elif "Q" in url and not url.endswith("I.json"):
+            warn(
+                "No frame was found for the requested quarter, trying instantaneous data."
+            )
+            url = url.replace(".json", "I.json")
+            try:
+                response = await fetch_data(url, use_cache, persist)
+            except Exception:
+                raise OpenBBError(message) from e
+        else:
+            raise OpenBBError(message) from e
+    data = sorted(response.get("data", {}), key=lambda x: x["val"], reverse=True)  # type: ignore
+    metadata = {
+        "frame": response.get("ccp", ""),  # type: ignore
+        "tag": response.get("tag", ""),  # type: ignore
+        "label": response.get("label", ""),  # type: ignore
+        "description": response.get("description", ""),  # type: ignore
+        "taxonomy": response.get("taxonomy", ""),  # type: ignore
+        "unit": response.get("uom", ""),  # type: ignore
+        "count": response.get("pts", ""),  # type: ignore
+    }
+    df = DataFrame(data)
+    companies = await get_all_companies(use_cache=use_cache)
+    cik_to_symbol = companies.set_index("cik")["symbol"].to_dict()
+    df["symbol"] = df["cik"].astype(str).map(cik_to_symbol)
+    df["unit"] = metadata.get("unit")
+    df["fact"] = metadata.get("label")
+    df["frame"] = metadata.get("frame")
+    df = df.fillna("N/A").replace("N/A", None)
+    results = {"metadata": metadata, "data": df.to_dict("records")}
+    return results
+async def get_concept(
+    symbol: str,
+    fact: str = "Revenues",
+    year: Optional[int] = None,
+    taxonomy: Optional[TAXONOMIES] = "us-gaap",
+    use_cache: bool = True,
+) -> Dict:
+    """Return all the XBRL disclosures from a single company (CIK) Concept (a taxonomy and tag) into a single JSON file.
+    Each entry contains a separate array of facts for each units of measure that the company has chosen to disclose
+    (e.g. net profits reported in U.S. dollars and in Canadian dollars).
+    Parameters
+    ----------
+    symbol: str
+        The ticker symbol to look up.
+    fact : str
+        The fact to retrieve. This should be a valid fact from the SEC taxonomy, in UpperCamelCase.
+        Defaults to "Revenues".
+        AAPL, MSFT, GOOG, BRK-A all report revenue as, "RevenueFromContractWithCustomerExcludingAssessedTax".
+        In previous years, they may have reported as "Revenues".
+    year : int, optional
+        The year to retrieve the data for. If not provided, all reported values will be returned.
+    taxonomy : Literal["us-gaap", "dei", "ifrs-full", "srt"], optional
+        The taxonomy to use. Defaults to "us-gaap".
+    use_cache: bool
+        Whether to use cache for the request. Defaults to True.
+    Returns
+    -------
+    Dict:
+        Nested dictionary with keys, "metadata" and "data".
+        The "metadata" key contains information about the company concept.
+    """
+    symbols = symbol.split(",")
+    results: List[Dict] = []
+    messages: List = []
+    metadata: Dict = {}
+    async def get_one(ticker):
+        """Get data for one symbol."""
+        ticker = ticker.upper()
+        message = f"Symbol Error: No data was found for, {ticker} and {fact}"
+        cik = await symbol_map(ticker)
+        if cik == "":
+            message = f"Symbol Error: No CIK was found for, {ticker}"
+            warn(message)
+            messages.append(message)
+        else:
+            url = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{cik}/{taxonomy}/{fact}.json"
+            response: Union[Dict, List[Dict]] = {}
+            try:
+                response = await fetch_data(url, use_cache, False)
+            except Exception as _:  # pylint: disable=W0718
+                warn(message)
+                messages.append(message)
+            if response:
+                units = response.get("units", {})  # type: ignore
+                metadata[ticker] = {
+                    "cik": response.get("cik", ""),  # type: ignore
+                    "taxonomy": response.get("taxonomy", ""),  # type: ignore
+                    "tag": response.get("tag", ""),  # type: ignore
+                    "label": response.get("label", ""),  # type: ignore
+                    "description": response.get("description", ""),  # type: ignore
+                    "name": response.get("entityName", ""),  # type: ignore
+                    "units": (
+                        list(units) if units and len(units) > 1 else list(units)[0]
+                    ),
+                }
+                for k, v in units.items():
+                    unit = k
+                    values = v
+                    for item in values:
+                        item["unit"] = unit
+                        item["symbol"] = ticker
+                        item["cik"] = metadata[ticker]["cik"]
+                        item["name"] = metadata[ticker]["name"]
+                        item["fact"] = metadata[ticker]["label"]
+                    results.extend(values)
+    await asyncio.gather(*[get_one(ticker) for ticker in symbols])
+    if not results:
+        raise EmptyDataError(f"{messages}")
+    if year is not None:
+        filtered_results = [d for d in results if str(year) == str(d.get("fy"))]
+        if len(filtered_results) > 0:
+            results = filtered_results
+        if len(filtered_results) == 0:
+            warn(
+                f"No results were found for {fact} in the year, {year}."
+                " Returning all entries instead. Concept and fact names may differ by company and year."
+            )
+    return {
+        "metadata": metadata,
+        "data": sorted(results, key=lambda x: (x["filed"], x["end"]), reverse=True),
+    }

openbb_platform/providers/sec/openbb_sec/utils/helpers.py ADDED Viewed

	@@ -0,0 +1,362 @@

+"""SEC Helpers module."""
+# pylint: disable =unused-argument
+from typing import Dict, List, Optional, Union
+from aiohttp_client_cache import SQLiteBackend
+from aiohttp_client_cache.session import CachedSession
+from openbb_core.app.model.abstract.error import OpenBBError
+from openbb_core.app.utils import get_user_cache_directory
+from openbb_core.provider.utils.helpers import amake_request, make_request
+from openbb_sec.utils.definitions import HEADERS, SEC_HEADERS
+from pandas import DataFrame
+async def sec_callback(response, session):
+    """Response callback for SEC requests."""
+    content_type = response.headers.get("Content-Type", "")
+    if "application/json" in content_type:
+        return await response.json()
+    if "text/html" in content_type:
+        return await response.text(encoding="latin-1")
+    return await response.text()
+async def get_all_companies(use_cache: bool = True) -> DataFrame:
+    """Get all company names, tickers, and CIK numbers registered with the SEC.
+    Companies are sorted by market cap.
+    Returns
+    -------
+    DataFrame: Pandas DataFrame with columns for Symbol, Company Name, and CIK Number.
+    Example
+    -------
+    >>> tickers = get_all_companies()
+    """
+    url = "https://www.sec.gov/files/company_tickers.json"
+    response: Union[dict, List[dict]] = {}
+    if use_cache is True:
+        cache_dir = f"{get_user_cache_directory()}/http/sec_companies"
+        async with CachedSession(
+            cache=SQLiteBackend(cache_dir, expire_after=3600 * 24 * 2)
+        ) as session:
+            try:
+                await session.delete_expired_responses()
+                response = await amake_request(url, headers=SEC_HEADERS, session=session)  # type: ignore
+            finally:
+                await session.close()
+    else:
+        response = await amake_request(url, headers=SEC_HEADERS)  # type: ignore
+    df = DataFrame(response).transpose()
+    cols = ["cik", "symbol", "name"]
+    df.columns = cols
+    return df.astype(str)
+async def get_all_ciks(use_cache: bool = True) -> DataFrame:
+    """Get a list of entity names and their CIK number."""
+    url = "https://www.sec.gov/Archives/edgar/cik-lookup-data.txt"
+    async def callback(response, session):
+        """Response callback for CIK lookup data."""
+        return await response.text(encoding="latin-1")
+    response: Union[dict, List[dict], str] = {}
+    if use_cache is True:
+        cache_dir = f"{get_user_cache_directory()}/http/sec_ciks"
+        async with CachedSession(
+            cache=SQLiteBackend(cache_dir, expire_after=3600 * 24 * 2)
+        ) as session:
+            try:
+                await session.delete_expired_responses()
+                response = await amake_request(url, headers=SEC_HEADERS, session=session, response_callback=callback)  # type: ignore
+            finally:
+                await session.close()
+    else:
+        response = await amake_request(url, headers=SEC_HEADERS, response_callback=callback)  # type: ignore
+    data = response
+    lines = data.split("\n")  # type: ignore
+    data_list = []
+    delimiter = ":"
+    for line in lines:
+        row = line.split(delimiter)
+        data_list.append(row)
+    df = DataFrame(data_list)
+    df = df.iloc[:, 0:2]
+    cols = ["Institution", "CIK Number"]
+    df.columns = cols
+    df = df.dropna()
+    return df.astype(str)
+async def get_mf_and_etf_map(use_cache: bool = True) -> DataFrame:
+    """Return the CIK number of a ticker symbol for querying the SEC API."""
+    symbols = DataFrame()
+    url = "https://www.sec.gov/files/company_tickers_mf.json"
+    response: Union[dict, List[dict]] = {}
+    if use_cache is True:
+        cache_dir = f"{get_user_cache_directory()}/http/sec_mf_etf_map"
+        async with CachedSession(
+            cache=SQLiteBackend(cache_dir, expire_after=3600 * 24 * 2)
+        ) as session:
+            try:
+                await session.delete_expired_responses()
+                response = await amake_request(url, headers=SEC_HEADERS, session=session, response_callback=sec_callback)  # type: ignore
+            finally:
+                await session.close()
+    else:
+        response = await amake_request(url, headers=SEC_HEADERS, response_callback=sec_callback)  # type: ignore
+    symbols = DataFrame(data=response["data"], columns=response["fields"])  # type: ignore
+    return symbols.astype(str)
+async def search_institutions(keyword: str, use_cache: bool = True) -> DataFrame:
+    """Search for an institution by name.  It is case-insensitive."""
+    institutions = await get_all_ciks(use_cache=use_cache)
+    hp = institutions["Institution"].str.contains(keyword, case=False)
+    return institutions[hp]
+async def symbol_map(symbol: str, use_cache: bool = True) -> str:
+    """Return the CIK number of a ticker symbol for querying the SEC API."""
+    symbol = symbol.upper().replace(".", "-")
+    symbols = await get_all_companies(use_cache=use_cache)
+    if symbol not in symbols["symbol"].to_list():
+        symbols = await get_mf_and_etf_map(use_cache=use_cache)
+        if symbol not in symbols["symbol"].to_list():
+            return ""
+    cik = symbols[symbols["symbol"] == symbol]["cik"].iloc[0]
+    cik_: str = ""
+    temp = 10 - len(cik)
+    for i in range(temp):  # pylint: disable=W0612
+        cik_ = cik_ + "0"
+    return str(cik_ + cik)
+async def cik_map(cik: Union[str, int], use_cache: bool = True) -> str:
+    """Convert a CIK number to a ticker symbol.  Enter CIK as an integer with no leading zeros.
+    Function is not meant for funds.
+    Parameters
+    ----------
+    cik : int
+        The CIK number to convert to a ticker symbol.
+    Returns
+    -------
+    str: The ticker symbol associated with the CIK number.
+    """
+    _cik = str(cik) if isinstance(cik, int) else cik.lstrip("0")
+    symbol = ""
+    companies = await get_all_companies(use_cache=use_cache)
+    if _cik in companies["cik"].to_list():
+        symbol = companies[companies["cik"] == _cik]["symbol"].iloc[0]
+    else:
+        return f"Error: CIK, {_cik}, does not have a unique ticker."
+    return symbol
+def get_schema_filelist(query: str = "", url: str = "", use_cache: bool = True) -> List:
+    """Get a list of schema files from the SEC website."""
+    from pandas import read_html  # pylint: disable=import-outside-toplevel
+    results: List = []
+    url = url if url else f"https://xbrl.fasb.org/us-gaap/{query}"
+    _url = url
+    _url = url + "/" if query else _url
+    response = make_request(_url)
+    data = read_html(response.content)[0]["Name"].dropna()
+    if len(data) > 0:
+        data.iloc[0] = url if not query else url + "/"
+        results = data.to_list()
+    return results
+async def download_zip_file(
+    url, symbol: Optional[str] = None, use_cache: bool = True
+) -> List[Dict]:
+    """Download a list of files from URLs."""
+    # pylint: disable=import-outside-toplevel
+    from io import BytesIO
+    from zipfile import ZipFile
+    from pandas import concat, read_csv, to_datetime
+    results = DataFrame()
+    async def callback(response, session):
+        """Response callback for ZIP file downloads."""
+        return await response.read()
+    response: Union[dict, List[dict]] = {}
+    if use_cache is True:
+        cache_dir = f"{get_user_cache_directory()}/http/sec_ftd"
+        async with CachedSession(cache=SQLiteBackend(cache_dir)) as session:
+            try:
+                response = await amake_request(url, session=session, headers=HEADERS, response_callback=callback)  # type: ignore
+            finally:
+                await session.close()
+    else:
+        response = await amake_request(url, headers=HEADERS, response_callback=callback)  # type: ignore
+    try:
+        data = read_csv(BytesIO(response), compression="zip", sep="|")  # type: ignore
+        results = data.iloc[:-2]
+    except ValueError:
+        zip_file = ZipFile(BytesIO(response))  # type: ignore
+        file_list = [d.filename for d in zip_file.infolist()]
+        for item in file_list:
+            with zip_file.open(item) as _item:
+                _file = read_csv(
+                    _item,
+                    encoding="ISO-8859-1",
+                    sep="|",
+                    low_memory=False,
+                    on_bad_lines="skip",
+                )
+                results = concat([results, _file.iloc[:-2]])
+    if "SETTLEMENT DATE" in results.columns:
+        results = results.rename(
+            columns={
+                "SETTLEMENT DATE": "date",
+                "SYMBOL": "symbol",
+                "CUSIP": "cusip",
+                "QUANTITY (FAILS)": "quantity",
+                "PRICE": "price",
+                "DESCRIPTION": "description",
+            }
+        )
+        if symbol:
+            results = results[results["symbol"] == symbol]
+        results["date"] = to_datetime(results["date"], format="%Y%m%d").dt.date
+        # Replace invalid decimal values with None
+        results["price"] = results["price"].mask(
+            ~results["price"].str.contains(r"^\d+(?:\.\d+)?$", regex=True), None
+        )
+        results["price"] = results["price"].astype(float)
+    return results.reset_index(drop=True).to_dict("records")
+async def get_ftd_urls() -> Dict:
+    """Get Fails-to-Deliver Data URLs."""
+    from pandas import Series  # pylint: disable=import-outside-toplevel
+    results = {}
+    position = None
+    key = "title"
+    value = "Fails-to-Deliver Data"
+    r = await amake_request("https://www.sec.gov/data.json", headers=SEC_HEADERS)
+    data = r.get("dataset", {})  # type: ignore
+    for index, d in enumerate(data):
+        if key in d and d[key] == value:
+            position = index
+            break
+    if position is not None:
+        fails = data[position]["distribution"]
+        key = "downloadURL"
+        urls = list(map(lambda d: d[key], filter(lambda d: key in d, fails)))
+        dates = [d[-11:-4] for d in urls]
+        ftd_urls = Series(index=dates, data=urls)
+        ftd_urls.index = ftd_urls.index.str.replace("_", "")
+        results = ftd_urls.to_dict()
+    return results
+async def get_series_id(
+    symbol: Optional[str] = None, cik: Optional[str] = None, use_cache: bool = True
+):
+    """Map the fund to the series and class IDs for validating the correct filing.
+    For an exact match, use a symbol.
+    """
+    symbol = symbol if symbol else ""
+    cik = cik if cik else ""
+    results = DataFrame()
+    if not symbol and not cik:
+        raise OpenBBError("Either symbol or cik must be provided.")
+    target = symbol if symbol else cik
+    choice = "cik" if not symbol else "symbol"
+    funds = await get_mf_and_etf_map(use_cache=use_cache)
+    results = funds[
+        funds["cik"].str.contains(target, case=False)
+        | funds["seriesId"].str.contains(target, case=False)
+        | funds["classId"].str.contains(target, case=False)
+        | funds["symbol"].str.contains(target, case=False)
+    ]
+    if len(results) > 0:
+        results = results[results[choice if not symbol else choice] == target]
+        return results
+async def get_nport_candidates(symbol: str, use_cache: bool = True) -> List[Dict]:
+    """Get a list of all NPORT-P filings for a given fund's symbol."""
+    results = []
+    _series_id = await get_series_id(symbol, use_cache=use_cache)
+    try:
+        series_id = (
+            await symbol_map(symbol, use_cache)
+            if _series_id is None or len(_series_id) == 0
+            else _series_id["seriesId"].iloc[0]
+        )
+    except IndexError as e:
+        raise OpenBBError("Fund not found for, the symbol: " + symbol) from e
+    if series_id == "" or series_id is None:
+        raise OpenBBError("Fund not found for, the symbol: " + symbol)
+    url = f"https://efts.sec.gov/LATEST/search-index?q={series_id}&dateRange=all&forms=NPORT-P"
+    response: Union[dict, List[dict]] = {}
+    if use_cache is True:
+        cache_dir = f"{get_user_cache_directory()}/http/sec_etf"
+        async with CachedSession(cache=SQLiteBackend(cache_dir)) as session:
+            try:
+                await session.delete_expired_responses()
+                response = await amake_request(url, session=session, headers=HEADERS, response_callback=sec_callback)  # type: ignore
+            finally:
+                await session.close()
+    else:
+        response = await amake_request(url, response_callback=sec_callback)  # type: ignore
+    if "hits" in response and len(response["hits"].get("hits")) > 0:  # type: ignore
+        hits = response["hits"]["hits"]  # type: ignore
+        results = [
+            {
+                "name": d["_source"]["display_names"][0],
+                "cik": d["_source"]["ciks"][0],
+                "file_date": d["_source"]["file_date"],
+                "period_ending": d["_source"]["period_ending"],
+                "form_type": d["_source"]["form"],
+                "primary_doc": (
+                    f"https://www.sec.gov/Archives/edgar/data/{int(d['_source']['ciks'][0])}"  # noqa
+                    + f"/{d['_id'].replace('-', '').replace(':', '/')}"  # noqa
+                ),
+            }
+            for d in hits
+        ]
+    return (
+        sorted(results, key=lambda d: d["file_date"], reverse=True)
+        if len(results) > 0
+        else results
+    )

openbb_platform/providers/sec/openbb_sec/utils/parse_13f.py ADDED Viewed

	@@ -0,0 +1,231 @@

+"""Utility functions for parsing SEC Form 13F-HR."""
+from typing import Any, Optional
+from openbb_core.app.model.abstract.error import OpenBBError
+def date_to_quarter_end(date: str) -> str:
+    """Convert a date to the end of the calendar quarter."""
+    # pylint: disable=import-outside-toplevel
+    from pandas import to_datetime
+    from pandas.tseries.offsets import QuarterEnd
+    return (
+        (to_datetime(date).to_period("Q").to_timestamp("D") + QuarterEnd())
+        .date()
+        .strftime("%Y-%m-%d")
+    )
+async def get_13f_candidates(symbol: Optional[str] = None, cik: Optional[str] = None):
+    """Get the 13F-HR filings for a given symbol or CIK."""
+    # pylint: disable=import-outside-toplevel
+    from openbb_sec.models.company_filings import SecCompanyFilingsFetcher
+    from pandas import DataFrame, to_datetime
+    fetcher = SecCompanyFilingsFetcher()
+    params: dict[str, Any] = {}
+    if cik is not None:
+        params["cik"] = str(cik)
+    if symbol is not None:
+        params["symbol"] = symbol
+    if cik is None and symbol is None:
+        raise OpenBBError("Either symbol or cik must be provided.")
+    params["use_cache"] = False
+    params["form_type"] = "13F-HR"
+    filings = await fetcher.fetch_data(params, {})
+    filings = [d.model_dump() for d in filings]
+    if len(filings) == 0:
+        raise OpenBBError(f"No 13F-HR filings found for {symbol if symbol else cik}.")
+    # Filings before June 30, 2013 are non-structured and are not supported by downstream parsers.
+    up_to = to_datetime("2013-06-30").date()  # pylint: disable=unused-variable # noqa
+    return (
+        DataFrame(data=filings)
+        .query("`report_date` >= @up_to")
+        .set_index("report_date")["complete_submission_url"]
+        .fillna("N/A")
+        .replace("N/A", None)
+    )
+async def complete_submission_callback(response, _):
+    """Use callback function for processing the response object."""
+    if response.status == 200:
+        return await response.text()
+    raise OpenBBError(f"Request failed with status code {response.status}")
+async def get_complete_submission(url: str):
+    """Get the Complete Submission TXT file string from the SEC API."""
+    # pylint: disable=import-outside-toplevel
+    from openbb_core.provider.utils.helpers import amake_request
+    from openbb_sec.utils.definitions import HEADERS
+    return await amake_request(
+        url, headers=HEADERS, response_callback=complete_submission_callback
+    )
+def parse_header(filing_str: str) -> dict:
+    """Parse the header of a Complete Submission TXT file string."""
+    # pylint: disable=import-outside-toplevel
+    import xmltodict
+    from bs4 import BeautifulSoup
+    header_dict: dict = {}
+    soup = (
+        filing_str
+        if filing_str.__class__.__name__ == "BeautifulSoup"
+        else BeautifulSoup(filing_str, "xml")
+    )
+    try:
+        header_xml = soup.find("headerData")
+        header_dict = xmltodict.parse(str(header_xml))["headerData"]
+    except KeyError:
+        header_xml = soup.find("type")
+        header_dict = xmltodict.parse(str(header_xml)).get("type")
+    if header_dict:
+        return header_dict  # type: ignore
+    raise OpenBBError(
+        "Failed to parse the form header."
+        + " Check the `filing_str` to for the tag, 'headerData'."
+    )
+def get_submission_type(filing_str: str):
+    """Get the submission type of a Complete Submission TXT file string."""
+    header = parse_header(filing_str)
+    if header:
+        try:
+            form_type = header["submissionType"]
+            return form_type
+        except KeyError:
+            form_type = header["#text"]
+            return form_type
+    raise OpenBBError(
+        "Failed to get the submission type from the form header."
+        + " Check the response from `parse_header`."
+    )
+def get_period_ending(filing_str: str):
+    """Get the report date from a Complete Submission TXT file string."""
+    header = parse_header(filing_str)
+    if header.get("filerInfo"):
+        return header["filerInfo"].get("periodOfReport")
+    raise OpenBBError(
+        "Failed to get the period of report from the form header."
+        + " Check the response from `parse_header`."
+    )
+async def parse_13f_hr(filing: str):
+    """Parse a 13F-HR filing from the Complete Submission TXT file string."""
+    # pylint: disable=import-outside-toplevel
+    import xmltodict
+    from bs4 import BeautifulSoup
+    from numpy import nan
+    from pandas import DataFrame, to_datetime
+    # Check if the input string is a URL
+    if filing.startswith("https://"):
+        filing = await get_complete_submission(filing)  # type: ignore
+    soup = BeautifulSoup(filing, "xml")
+    info_table = soup.find_all("informationTable")
+    if not info_table:
+        info_table = soup.find_all("table")[-1]
+    parsed_xml = xmltodict.parse(
+        str(info_table[0]).replace("ns1:", "").replace("n1:", "")
+    )["informationTable"]["infoTable"]
+    if parsed_xml is None:
+        raise OpenBBError(
+            "Failed to parse the 13F-HR information table."
+            + " Check the `filing_str` to make sure it is valid and contains the tag 'informationTable'."
+            + " Documents filed before Q2 2013 are not supported."
+        )
+    period_ending = get_period_ending(soup)
+    data = (
+        DataFrame(parsed_xml)
+        if isinstance(parsed_xml, list)
+        else DataFrame([parsed_xml])
+    )
+    data.columns = data.columns.str.replace("ns1:", "")
+    data.loc[:, "value"] = data["value"].astype(int)
+    security_type: list = []
+    principal_amount: list = []
+    # Unpack the nested objects
+    try:
+        security_type = [d.get("sshPrnamtType") for d in data["shrsOrPrnAmt"]]
+        data.loc[:, "security_type"] = security_type
+        principal_amount = [int(d.get("sshPrnamt", 0)) for d in data["shrsOrPrnAmt"]]
+        data.loc[:, "principal_amount"] = principal_amount
+        _ = data.pop("shrsOrPrnAmt")
+    except ValueError:
+        pass
+    try:
+        sole = [d.get("Sole") for d in data["votingAuthority"]]
+        shared = [d.get("Shared") for d in data["votingAuthority"]]
+        none = [d.get("None") for d in data["votingAuthority"]]
+        data.loc[:, "voting_authority_sole"] = [int(s) if s else 0 for s in sole]
+        data.loc[:, "voting_authority_shared"] = [int(s) if s else 0 for s in shared]
+        data.loc[:, "voting_authority_none"] = [int(s) if s else 0 for s in none]
+        _ = data.pop("votingAuthority")
+    except ValueError:
+        pass
+    if "putCall" in data.columns:
+        data.loc[:, "putCall"] = data["putCall"].fillna("--")
+    # Add the period ending so that the filing is identified when multiple are requested.
+    data["period_ending"] = to_datetime(period_ending, yearfirst=False).date()
+    df = DataFrame(data)
+    # Aggregate the data because there are multiple entries for each security and we need the totals.
+    # We break it down by CUSIP, security type, and option type.
+    agg_index = [
+        "period_ending",
+        "nameOfIssuer",
+        "cusip",
+        "titleOfClass",
+        "security_type",
+        "putCall",
+        "investmentDiscretion",
+    ]
+    agg_columns = {
+        "value": "sum",
+        "principal_amount": "sum",
+        "voting_authority_sole": "sum",
+        "voting_authority_shared": "sum",
+        "voting_authority_none": "sum",
+    }
+    # Only aggregate columns that exist in the DataFrame
+    agg_columns = {k: v for k, v in agg_columns.items() if k in df.columns}
+    agg_index = [k for k in agg_index if k in df.columns]
+    df = df.groupby([*agg_index]).agg(agg_columns)
+    for col in [
+        "voting_authority_sole",
+        "voting_authority_shared",
+        "voting_authority_none",
+    ]:
+        if col in df.columns and all(df[col] == 0):
+            df.drop(columns=col, inplace=True)
+    total_value = df["value"].sum()
+    df["weight"] = round(df["value"] / total_value, 6)
+    return (
+        df.reset_index()
+        .replace({nan: None, "--": None})
+        .sort_values(by="weight", ascending=False)
+        .to_dict("records")
+    )

openbb_platform/providers/sec/openbb_sec/utils/py.typed ADDED Viewed

File without changes

openbb_platform/providers/sec/poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/pyproject.toml ADDED Viewed

	@@ -0,0 +1,26 @@

+[tool.poetry]
+name = "openbb-sec"
+version = "1.4.3"
+description = "SEC extension for OpenBB"
+authors = ["OpenBB Team <hello@openbb.co>"]
+license = "AGPL-3.0-only"
+readme = "README.md"
+packages = [{ include = "openbb_sec" }]
+[tool.poetry.dependencies]
+python = ">=3.9.21,<3.13"
+openbb-core = "^1.4.6"
+aiohttp-client-cache = "^0.11.0"
+aiosqlite = "^0.20.0"
+xmltodict = "^0.13.0"
+beautifulsoup4 = "^4.12"
+lxml = "^5.2.1"
+trafilatura = "^2.0"
+inscriptis = "^2.5.3"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+[tool.poetry.plugins."openbb_provider_extension"]
+sec = "openbb_sec:sec_provider"

openbb_platform/providers/sec/tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """SEC tests."""

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_cik_map_fetcher_urllib3_v1.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_cik_map_fetcher_urllib3_v2.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_company_filings_fetcher_urllib3_v1.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_company_filings_fetcher_urllib3_v2.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_compare_company_facts_fetcher_urllib3_v1.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_compare_company_facts_fetcher_urllib3_v2.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_ftd_fetcher_urllib3_v1.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_ftd_fetcher_urllib3_v2.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_search_fetcher_urllib3_v1.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_search_fetcher_urllib3_v2.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_etf_holdings_fetcher_urllib3_v1.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_etf_holdings_fetcher_urllib3_v2.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_filing_fetcher_urllib3_v1.yaml ADDED Viewed

	@@ -0,0 +1,212 @@

+interactions:
+- request:
+    body: null
+    headers:
+      Accept-Encoding:
+      - gzip, deflate
+      Host:
+      - www.sec.gov
+    method: GET
+    uri: https://www.sec.gov/Archives/edgar/data/21344/000155278124000634/0001552781-24-000634-index-headers.htm
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA7WY33OiSBDHn+GvmNuHfRsFRaOGo2qE0XDLDw9wL9mrqxRJSMLGqIWmzr2//roB
+        BQXdvTW3D1mZ6enpT0/3dxT1KrAtTb3izNDUwAwsrvlcJ9wYM4/4b3ev8WoVL+ZEkiS502ld9GTa
+        Uig8dduK2swWiOovlIoqrKPoh3tanXXj+SFprJ5eZ2RAWlJLkVtyV1SZrvNJwBydU4MFPDBtrm1n
+        ZaXdbncyG983XYc6U3t4xL2oBjcTrvXoJ1GdTIeWqVPD1ac2dwKqu1Mn0HC7CfdM19ju0BZVM+C2
+        r3UaUmv7ud+QZFEdmZbpjNOYtCJafKTuiJZmqX7FnHHZCCYhRlHVXXvCnBu0YumjM3I9mxvUYQCp
+        uzojumvhH5g0PyGVJEFQCrAw4B07YOubOrjuSaLqemPmmF9YkCYCXUgKscP522N4v35L4vkTEHj+
+        NkedntRt9ZQu5M8P8rBNR3e9ieulPjSDY6y+zix6w5lHuWNocqsN8M2D0HPcz8yach+fgYMW6WZ6
+        oOEBIHhxRDKVWi35Ih22t8OQo05Hbvf7sMmh1+HUNx04aMoMw4P/NYzc4zyQNdfhpEjYxGJfGOYs
+        uNFYYDEnYDmkNoZPX8yJ1pbaeLqTK1ipKZJCuxdd2oLzgX1r9rGZab3jts0Df81tSTRLPSJSiiNZ
+        5w1d4waeJh7XPs7Wl2i2rd6PT+vLuoKn8fwh2jSe1/sdtV2e7XJscW0z4tKafkQfBy2560iSnetA
+        EGp7clfzxJ8ObTNbgoUzELBysj4lW1KS9in4gliKlVnLEndEPA61C/O77sWGJWZqmJY0LDWiZZhA
+        O0Rk8UiMOInu14tkRRYJ0aNkHcZz4j4+xvdRsrokfAazKG5l00vClstFPF+/RvM1zhyuGxB98bqM
+        5qsQzL8RliTh/ClC61WdeV2Qo3gezu/jcEb8dbjO14bzB8I3z/FdvF6JWDAGYT5i4znAot0x4XM+
+        lWlPeTJd6Q1EUch7GJezgSAKu4EitagisFjY0yJ4hLPwmAVBG/yafOI3aFNSJ0GAqncM5hloMoVu
+        McFat1C0RqaeUw75Z+6xMffJnyhgf8GysoRlm5Pin1CRM0EAPSsqTBAKTUtDgDRADvZEDc1A1yDF
+        qbARFDYCwobjqbaJQiY7JJOdAZpCMvKiTKsSfMMNCKKGldhvK4Tdr/NBTO4uoELicDvLLiItVE4U
+        tmpDcjlIjyITGCKDbZ3GwBGAyGDMW53JeXFojE8gNvg50xuh2CQVPJjZkzxRQD36HwNA4Wjui046
+        VpawdACzjA+Y5kyofp9y0Bock9MRzDBWBo5ELUW56Ny+LGjvBXUuc8GvM39qSJ6T6PHXD4dmHzRj
+        cf+Wtq9MKHmMZ9GAHBqpzVBT7xIti70S6NHI+TXt9xtyJfrWseijTb9P5e+HX9iV4m8dxl9Y/SzA
+        2GOTK1OvALQrAC+L29niadH4unzKXHJf98wJttmhoypVaXGJp73jKc2fcRSyJDd8/aoCo9TA0O21
+        0disHuqAroeeRcAbt1na6EfJ9jyV6JQS3Z7NmYQGH1UIO6cIbx+ix8bmdXaUEjyajpnK8I+Rbj2W
+        aDt1tFu7M4ktNqwQd08Sz8K7k8TgkVs/Cps7K8F2a2FzuzNh4UtfBfbiJOwyiU7CgkcfNmT/5YBz
+        nyXmi1rm3O5nma9tqwLbr8B6hWAe8JkGZwOSFXG+xxE471BM+zse7ywF5dc6r0LIUoVi9zXv1ouW
+        i2Td2MxWm/Ogal2WLzxpB1lr+p7HJldvbP958Xfj6+o8xtxJmaq4x/PJd+Wo3t1JlrH71ZkohZ8y
+        TXGrF/PvClS9/kbxDL5R+2+vr2Hy7Zh2/DjXZjVrVlyWEZVSGZbN8NfqDrVU1d9x5bE/CJAeejtD
+        h37zXaeauOodY0fr0IrnLysousX8vKzt+yozFtfLvs3P0sG38ypc9U6pfa2wuUtmjX/i5XmsJ12X
+        0Ytb5uSSd+2P3smfGrdQpO/TInVey+y92p8nqWV9o/yIw12v1Bl/J43NwxdPahNfSanN9P2U2kxf
+        Fov/AnSFX/M0FgAA
+    headers:
+      Accept-Ranges:
+      - bytes
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Length:
+      - '1551'
+      Content-Type:
+      - text/html
+      Date:
+      - Mon, 17 Feb 2025 16:56:19 GMT
+      Last-Modified:
+      - Mon, 16 Dec 2024 19:34:14 GMT
+      Strict-Transport-Security:
+      - max-age=31536000 ; includeSubDomains ; preload
+      Vary:
+      - Accept-Encoding
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - 1; mode=block
+      x-amz-id-2:
+      - 36KuPmr3UWIQ8x31DyNQbm1OVx9hB40N0dw451XQLYA4IrIsvW2q02kCDDjAMIZPKkuwZB1/wS0=
+      x-amz-meta-mode:
+      - '33188'
+      x-amz-replication-status:
+      - COMPLETED
+      x-amz-request-id:
+      - 5S9961E5DEA95VYH
+      x-amz-server-side-encryption:
+      - AES256
+      x-amz-version-id:
+      - Em.xpqQlD0bTs0G3LJTAM3AKc2Z7jmV.
+    status:
+      code: 200
+      message: OK
+- request:
+    body: null
+    headers:
+      Accept-Encoding:
+      - gzip, deflate
+      Host:
+      - www.sec.gov
+    method: GET
+    uri: https://www.sec.gov/Archives/edgar/data/21344/000155278124000634/R1.htm
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA+xaa3PaRhT9XH7FHbWN7YwBSQb8EpohQskwsYEa0ibtdDyLtIBioWVWC5hm8t97
+        Vw/b2EQmnXQGJfABYe25y96z5x7tLjaaHevdpd3umwWj/6Frm+8vLwpGz/7tnd22bPO0YLxuXdjt
+        xqVtXmmlsZgUjKbds65a3X6r0zZbTbtxBu9fXV1A2hH2Y7+X3SHYlxdKXLwIT/jUNMrxtWD4XnAD
+        nPp1JRRLn4ZjSoUCYjmldUXQW1F2wlCBMafDuuIFjj9zaZnTKeOiJFuwh9Dh3lQ8DPlI5iS+q0DI
+        nbrSG7NF6SOiyy+hyaDNBFzRCZtT6I+9ECw2mdBAwMuyUY7jzOxezcJP8Ws4CxzhsQAEG4182kZc
+        zxtgTiPYpwfwKcV5Q/y7FNw3l6JsS64XTn2yrNf3AhbQvQcRWei9gc+cm73zFPsZqB9S+ASZQdE3
+        nMPnu6i7ZAtGOZmdAXOXktIpCSAKrytJ/BnI+HPFnB+V9EqpgsEIMg1BBj4FxydhWFfiiVFgwLhL
+        eV1RFXCo7yPSwQHVFV0Bz8V5dCeV6oleramntcppVVflNAou38ZpV8LHWCZDg7qiKcDZIv1sGq43
+        T4e38FwxPgNdVae3ODojFJwFI9PCyeXGgKPSkjtGGaOk8Mar3zOO+zOb1CmBdnSIXemVFXA5Hhq/
+        zzIar5v+PcWhJqOJEy8OmBBscgbRmGBOfG+EIxdsit9F0jCSyvpeWGdz5rn76gEGMdS659wk2g3x
+        rXG1DwLlegh7Lh1i5LVLvesmc2ZSvH0U6t4hLLzAZQvAHsy0BWSTUSYyHXdl4FLWinlSfGvEcxlP
+        aSHBPUmbbVHaDczMldm99sloNe+7JpBtGYkPCVbNZqlv44x3KfeYawduk4gvTX2MAQSBRGWQcSf/
+        WP+5E4Qd4BNl+dpDE55NBpSvEhK3gmyGuD2DClXViqqua8f500ac5xUdeeh6JBBtMqFrmbiHgMRk
+        sGF1rAZYnQv5lldZWFgLnPitwKW3b+lyLSMJBiIQICpTIfjStaPKhoWyfRLpk9uWixl7Q88hcgWT
+        UTWIhVXw8zVUPSmqNf2kUqvmVTOtwGEclzNRwj2B7mmxGUpkaTF3fU2tRBxCFAOMQxIHMjDLge28
+        iqnhupyGYXK58AKqrSUoARymH0BCoRNksYKtBcCXxRxStJhPoOuTf0heVZVkbuHHDu+zRZBNlMRJ
+        CUlkBkkN4aOTb0jK1uonKpgO73I2x53e+hq7Y+auulJ4Bj1vGjmXS5eFgvh/etMves8dLzEUEPuc
+        3xypR9pR/iQjS6LBKXlKRVQssum5zPcrauUgf5K4QAv0u2Pchq97WEetEDU//3iuHdeKuqZr+Zv+
+        P7gnBA3kkc0sSJYj4SoTCQRWMd9mI7hNeugxhHnCC0aX6IXcI/4qD/ftkAK+v91wl9P4+M6h0TEI
+        7v0p7wyHj8sDcUXnARBiJETQ708bj2hpheGM8s3JifH/A0fbJJ340WlPKB9hjbzhbCHGSBqOf/0O
+        MUVCDIUE+22IGW8JMbOwOCJkeh2tr6QWLNlzZ9gTzLlp3Hph/YZdS2nJTRHeUkt6tUv478Sf0Uv6
+        9LEUQyHCHsIvEg6Ihyggg7oXP2s19Tx/oupRZ4bPn6WmD/ryJ45VNqJbwIag6fuDA0jBWccvEX/R
+        FugLHObOm/qcuFhGveVkwB49spImiNsyaHm74VnUNkrDvnXGJBjRp4dzKQJSyHOHc+0PvQ1PDXLl
+        L8iDVjo5rraZoGFzRnVVr60zlwj0K0QwQJw8u67tTOVZU4l5i0zlEXc/qJlsmvk2imJnJxvYiVo6
+        rqrP2UkE2tnJV9tJzNvOTh7YiZVfVez8ZKPliaY/XJ4cr1+eIOiRnxzv/GSD5Ynk7amfbPiD/Hfo
+        Jzn8V4SdnXzV8mTVTk7XL0+e2snpzk42WJ6st5PTH9ZOTjf8QXQbVbHzk//iJ9o6Q0EgqCUspOqj
+        wtDgrxj/985edvby9fbyKr+q2GJ7+RcAAP//7J1Lc9s2EIDv+RWYXtJORb1IuY6a8YwfScbjSeLG
+        PbRHiIQszFAkS4CWlV/fXYCSSQqKoSSTSCJ8sCUKz+Xi8y6w4u4TXoJ+ZTfF75vNFShUN1f8vuOJ
+        BU9Qbhs8Adm1lCd+3/HkuHky6A6ru7O+0VpRhRo8GTieWOymDE27s75l8NcR8uQAw94cTnbAiV/3
+        fvyhCSf+5m6KP3Q4eRYnZrm1FSWWM99HhXAosfJ0/GoYiu+bPR2/GYbi+w4lFp6ObwpD8S2/lXCE
+        ODnA72M4nOyEk1Ft42QLTkYbGycOJzY4GRk3TlqME3fOc9w8GXRPap7OyLxxcrJhsY8cTyw2Tk5M
+        Bzv+AX6l/TvxxHLm+6gUDid2UW0182RL0P1gwzxxUbJWUW1G86S1UbK2M99HpXA4sfJ2Xo2exYkq
+        5HDyFd7OK+OxTotx4ryd4+aJ39iMNQbd+4bNWBd0b3OuY5JbW1niAu6PGyX97mnV0wm2RLCdNj2d
+        wEWw2ZgmpyZPJ2htBFvgItiOnCeDbr/Gky0RbP0NnrgINpudk76RJ62NYLOd+T4qhcOJladTOycO
+        ArOns3FOHAQOJxaejklubUXJAT7C2qFkJ5TUnnwyMoac+JtPPhm5kBMblJjk1lKU2M58HxXix6Gk
+        pzLmwItKEptmjp0XZVqdRh6djEaRyqPTzKuD70upFHL2iU2vqKS/bGldpeDZlkpF9Z2fVSY14xFT
+        ci9bC9M4zcdkMeMSG7O+B9gO3AMU6T9r6a2VA3vUeX3KbjET0XoG5cTHZKiS/LyA0Vj3+5SZSd9/
+        dVM9csWmPOH4ZGA1GJXuJzu7SNOY0YRMQRRQmkoCFWQOC3sxYwlcYTrJVZgmEp+TSlF2gmQ5e+Bp
+        IeKlN+Uxi/C55DQMWSbhtSgmcy4E9NR93cvOdGqhb5/C7wTuM8vxka1iNYVt+gQz+5CSaD1lAn3y
+        GFXsOw/piklo+Pnx7KreWkdeKCUp8zmpZTiupHdaLcZVYihUUA/bYOMkXeQ0g4439L3BqFoL5Xi8
+        nN/P5JgEoHvGRmsjUp+RW5AjfzSMTo3A1OlTK7h2Vc4mU/XHSR7z8USr6TX849fJnb7U3gWNKSjJ
+        1iYT+uX6ZQ6hbdWjQie8MEFupVirbFrrBb+G4A/kXPXB8S3HnHpIfghyQGMnVCaOA9NPB1NVQQ+T
+        S0maz8Fa+8yiO5nDEB2gdgGUMbFby0n1FiwptLreXL07/1QxpVRiToH4ghJzcurdjFW5SGWFmarX
+        OhtlZ+M6o6CrTEjCHtCG08VY9CeZ2nX2wRuc697A2EPfCBvXtWkckxQ+yTdq6wpgedRHh9Wx15wm
+        QhMwU3e/S/7G9nE9yVUNNQeQ17/w471/711dOYNyH7htXLeHCXBUMQftr4E2TqTlrEZiIeuQV9Eq
+        A+mEIeIyTNIVgTP8qyjCGaGCDPreTQd//9Uhweno4vbjXYcwGf6mybeurxqEjmM+5+hNy1SxUMCK
+        wJ0h8MyhMXWlyFZAvXtz2eRvh5RsX4BSkJcfkdEvHT73CZ/bmLPP1ITq4ydNw1KOnrvQc2v+yJaj
+        tJYgc0BW0DyXMumQi4LHak8eFRwzIeaMrdL4Op79dJ5t1elDg5vz6b8z4J6yvrYcb6i8K6+2nuLW
+        8Wu/+PWkso5eLadXLQlxywGGIlhvF2Y65TK8+8wzdaziOLZvHKspr0NZy1HWyDTfcphVrTEh1UFJ
+        rnfsQDZuj2zvWNbQ3kOjmdozq0/BoWx3lF2yROY0vk4i9njDli1n2DkpEv5fwfBEIeL3XOIhgMcx
+        jXlUnhPIFAQJQuPTJcE/kjOhY/xm9EGd5UJREH0Yp6KAdUYWXM4UFKGpLrlWsYChSsccLwmdTDDo
+        j+KZBBXk8vrmZ5JyXZgMxmQmZTbu9RaLRRcth26a3/eG/b7fy9OY9TKYGkxfqTpUez3Jz4h3W0xi
+        LvDcGuaqL6n/CuuY1/NQlpcLjB4nw6Cv39+xUNF5MCzfFxNRXpp46lpbad1YoIeI6bA+BYfp3TH9
+        Zs7ye2jsXZ4u5OwynWc0aTusQZ94qIJZpiRn91xg+Iskc8akPstlpdDIvZIaUhfFRmAMErSDOtQ6
+        1FZRa1xkhwZcF9r9jah9CxacXjMt5yssgFXsC1q1JFFCKUMKOYsjjFNMF4IUGVrFgz8IoCenIbBV
+        6FKZUnIyp0v1VRfKgTqeT5RlLXREpWBgbqPy6dYbZU9rZdMMh0hjIorpZrtBpfsOoUlUxlbCQAWh
+        OXaVwedoaSuTHMS2zGbMfZ3mGwn8PwAAAP//7J3fT6NAEID/FR41uRIWWj1ffDE+NLkfxmvunhGo
+        NlehgWu0//3N7C64S8Faa67szbxaassO39dhdnb5SAO/sOeadjHPnTffnr27v3eneVKUq0J9Z1mc
+        uirWcOOw4Ymiy9lTMWrsphvL5cKbMtPZIaa5VuU1UYOHFdmFObKsu+Ho7tVL3kUDZul9XJonwiLc
+        X4S3zb0sRo26+fA+/hm85+XG/JIsvG7qhTMv62CwhlqtsmQBaV8K3vMgd5RpIWjzE45Q8oDl1xKS
+        zkUJR9xt5OKYSpdtrVItFwe4OHDZR6Vrcua5/4+y8yx+nqr5Hyw+wvfmWoHWNIyMZw+Np/VyMpt+
+        O4V78mVVeL/h+s/R03HuXT+ulsUGDuh527V8G3x0XM/LXehpuXrNTrW4z5XHUdvT2x+sbda2qe0e
+        XF3zt0yuNS3TlL29j7e/FEm8vHmA11jVYCk5HN4Kx6MuumILqkqpuTowBHW1r1jXbMXZ5uHWguDg
+        BAzCg8tap9iDVM6yHD75+3xO3mJv2GKt3tKjLhPAAQvcby1V678ruA6q+cbcgaOA/OtepaDN9hpN
+        X0EsN2gbJUZQZB8XZKYqs4DX12W1xmPh39+u4UoRUTYanySnkL5C3NSuIEYyRzxZjbKtbHWc0ExW
+        d9Hu5i8AdyEcLn5WvmPKH6cjue8vK79D+eN0S/nhHSvf5pxlT0b27Z3EiSu+2To91punKxlnOFdX
+        6aEibtCOCi9Nf7bRcc2acgGbPgd5AmzO95jT3HKfuD3NFbiNUUAQqu8hbgyqWiBqsbJPWz5NxUjQ
+        VqrJlItalW1o9UlgLx2rdS+1FnA5L7Cl9GssF0otiYv1KJWHqomC96jD0FVtiEci5FpDZ60h3lK7
+        INoLsU20a1rnGsO7dW49lou4yevnkFVyMGSlIQdRQ3DVPtExbhFd4RoxbEHIvUzrhvsihqAx60p2
+        zWCYmP4xT4Atto/FfsGNCeRTV9b0C3GbHSUvfVKB2DkRNg4nRl6qby1xs5z/NDO1z9DOTaNWbgpj
+        QzMR7aTYNZNzLvoui7/pUcNXcmcs+afADyc3cfkTW/3V04YJ6d5C+XjMHh6zfw73AWDvonAXxQcx
+        7D678Osm/M/nk5fHhIdnDO6Awe0PGFNLidoAnz/P1LpDbU/AmFpK1ApfhKa6z5naYVPbEzCmlhK1
+        QesiuGBqh01tT8CYWtLUCsbWMWwFc0uR23Fg3ChFAWM7dGw7A8bUUqJW+KFZ3oj4x3bg1PYEjKml
+        RG1kZ1xRyNQOm9qegDG1lKgN/MicAIwipnbY1PYEjKmlRe3Euk1iagdPbWfAmFpK1Ar/zEq4Jkzt
+        sKntCRhTS4taYambu6MGT21nwJhaStQG/sWEqXWI2p6AMbWUqI1axQ3uaTwKtX8BAAD//yNlDBlr
+        hI3m2pGUaw30LJAbXCajqywGea7FEWGjuXYk5VpDPQOURDC6ymKQ51ocETaaa0dSrjVGnUgwMRnN
+        tYM71+KIsNFcO7JyLcqGTdPR+dpBn2uxRthorqVproUpACVHEJ1RkpsDokNcI0JAtIu/c6ivqx+Q
+        DQAQ6ApYaiQBAA==
+    headers:
+      Accept-Ranges:
+      - bytes
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Length:
+      - '4399'
+      Content-Type:
+      - text/html
+      Date:
+      - Mon, 17 Feb 2025 16:56:19 GMT
+      Last-Modified:
+      - Mon, 16 Dec 2024 19:34:14 GMT
+      Strict-Transport-Security:
+      - max-age=31536000 ; includeSubDomains ; preload
+      Vary:
+      - Accept-Encoding
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - 1; mode=block
+      x-amz-id-2:
+      - E26geXEJKqcpKRmxQRW43mwPREtnu0++BGRkV1iaIm+Z/8RtQCrvg0B8lqyUjeoAu3wjcZfwbe8=
+      x-amz-meta-mode:
+      - '33188'
+      x-amz-replication-status:
+      - COMPLETED
+      x-amz-request-id:
+      - 5S93Q8BMTVEG3CVQ
+      x-amz-server-side-encryption:
+      - AES256
+      x-amz-version-id:
+      - poa5Dfjus0LpfUlH7sTIt4TIBlhRg.Ab
+    status:
+      code: 200
+      message: OK
+version: 1

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_filing_fetcher_urllib3_v2.yaml ADDED Viewed

	@@ -0,0 +1,212 @@

+interactions:
+- request:
+    body: null
+    headers:
+      Accept-Encoding:
+      - gzip, deflate
+      Host:
+      - www.sec.gov
+    method: GET
+    uri: https://www.sec.gov/Archives/edgar/data/21344/000155278124000634/0001552781-24-000634-index-headers.htm
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA7WY33OiSBDHn+GvmNuHfRsFRaOGo2qE0XDLDw9wL9mrqxRJSMLGqIWmzr2//roB
+        BQXdvTW3D1mZ6enpT0/3dxT1KrAtTb3izNDUwAwsrvlcJ9wYM4/4b3ev8WoVL+ZEkiS502ld9GTa
+        Uig8dduK2swWiOovlIoqrKPoh3tanXXj+SFprJ5eZ2RAWlJLkVtyV1SZrvNJwBydU4MFPDBtrm1n
+        ZaXdbncyG983XYc6U3t4xL2oBjcTrvXoJ1GdTIeWqVPD1ac2dwKqu1Mn0HC7CfdM19ju0BZVM+C2
+        r3UaUmv7ud+QZFEdmZbpjNOYtCJafKTuiJZmqX7FnHHZCCYhRlHVXXvCnBu0YumjM3I9mxvUYQCp
+        uzojumvhH5g0PyGVJEFQCrAw4B07YOubOrjuSaLqemPmmF9YkCYCXUgKscP522N4v35L4vkTEHj+
+        NkedntRt9ZQu5M8P8rBNR3e9ieulPjSDY6y+zix6w5lHuWNocqsN8M2D0HPcz8yach+fgYMW6WZ6
+        oOEBIHhxRDKVWi35Ih22t8OQo05Hbvf7sMmh1+HUNx04aMoMw4P/NYzc4zyQNdfhpEjYxGJfGOYs
+        uNFYYDEnYDmkNoZPX8yJ1pbaeLqTK1ipKZJCuxdd2oLzgX1r9rGZab3jts0Df81tSTRLPSJSiiNZ
+        5w1d4waeJh7XPs7Wl2i2rd6PT+vLuoKn8fwh2jSe1/sdtV2e7XJscW0z4tKafkQfBy2560iSnetA
+        EGp7clfzxJ8ObTNbgoUzELBysj4lW1KS9in4gliKlVnLEndEPA61C/O77sWGJWZqmJY0LDWiZZhA
+        O0Rk8UiMOInu14tkRRYJ0aNkHcZz4j4+xvdRsrokfAazKG5l00vClstFPF+/RvM1zhyuGxB98bqM
+        5qsQzL8RliTh/ClC61WdeV2Qo3gezu/jcEb8dbjO14bzB8I3z/FdvF6JWDAGYT5i4znAot0x4XM+
+        lWlPeTJd6Q1EUch7GJezgSAKu4EitagisFjY0yJ4hLPwmAVBG/yafOI3aFNSJ0GAqncM5hloMoVu
+        McFat1C0RqaeUw75Z+6xMffJnyhgf8GysoRlm5Pin1CRM0EAPSsqTBAKTUtDgDRADvZEDc1A1yDF
+        qbARFDYCwobjqbaJQiY7JJOdAZpCMvKiTKsSfMMNCKKGldhvK4Tdr/NBTO4uoELicDvLLiItVE4U
+        tmpDcjlIjyITGCKDbZ3GwBGAyGDMW53JeXFojE8gNvg50xuh2CQVPJjZkzxRQD36HwNA4Wjui046
+        VpawdACzjA+Y5kyofp9y0Bock9MRzDBWBo5ELUW56Ny+LGjvBXUuc8GvM39qSJ6T6PHXD4dmHzRj
+        cf+Wtq9MKHmMZ9GAHBqpzVBT7xIti70S6NHI+TXt9xtyJfrWseijTb9P5e+HX9iV4m8dxl9Y/SzA
+        2GOTK1OvALQrAC+L29niadH4unzKXHJf98wJttmhoypVaXGJp73jKc2fcRSyJDd8/aoCo9TA0O21
+        0disHuqAroeeRcAbt1na6EfJ9jyV6JQS3Z7NmYQGH1UIO6cIbx+ix8bmdXaUEjyajpnK8I+Rbj2W
+        aDt1tFu7M4ktNqwQd08Sz8K7k8TgkVs/Cps7K8F2a2FzuzNh4UtfBfbiJOwyiU7CgkcfNmT/5YBz
+        nyXmi1rm3O5nma9tqwLbr8B6hWAe8JkGZwOSFXG+xxE471BM+zse7ywF5dc6r0LIUoVi9zXv1ouW
+        i2Td2MxWm/Ogal2WLzxpB1lr+p7HJldvbP958Xfj6+o8xtxJmaq4x/PJd+Wo3t1JlrH71ZkohZ8y
+        TXGrF/PvClS9/kbxDL5R+2+vr2Hy7Zh2/DjXZjVrVlyWEZVSGZbN8NfqDrVU1d9x5bE/CJAeejtD
+        h37zXaeauOodY0fr0IrnLysousX8vKzt+yozFtfLvs3P0sG38ypc9U6pfa2wuUtmjX/i5XmsJ12X
+        0Ytb5uSSd+2P3smfGrdQpO/TInVey+y92p8nqWV9o/yIw12v1Bl/J43NwxdPahNfSanN9P2U2kxf
+        Fov/AnSFX/M0FgAA
+    headers:
+      Accept-Ranges:
+      - bytes
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Length:
+      - '1551'
+      Content-Type:
+      - text/html
+      Date:
+      - Mon, 17 Feb 2025 16:55:12 GMT
+      Last-Modified:
+      - Mon, 16 Dec 2024 19:34:14 GMT
+      Strict-Transport-Security:
+      - max-age=31536000 ; includeSubDomains ; preload
+      Vary:
+      - Accept-Encoding
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - 1; mode=block
+      x-amz-id-2:
+      - 36KuPmr3UWIQ8x31DyNQbm1OVx9hB40N0dw451XQLYA4IrIsvW2q02kCDDjAMIZPKkuwZB1/wS0=
+      x-amz-meta-mode:
+      - '33188'
+      x-amz-replication-status:
+      - COMPLETED
+      x-amz-request-id:
+      - 5S9961E5DEA95VYH
+      x-amz-server-side-encryption:
+      - AES256
+      x-amz-version-id:
+      - Em.xpqQlD0bTs0G3LJTAM3AKc2Z7jmV.
+    status:
+      code: 200
+      message: OK
+- request:
+    body: null
+    headers:
+      Accept-Encoding:
+      - gzip, deflate
+      Host:
+      - www.sec.gov
+    method: GET
+    uri: https://www.sec.gov/Archives/edgar/data/21344/000155278124000634/R1.htm
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA+xaa3PaRhT9XH7FHbWN7YwBSQb8EpohQskwsYEa0ibtdDyLtIBioWVWC5hm8t97
+        Vw/b2EQmnXQGJfABYe25y96z5x7tLjaaHevdpd3umwWj/6Frm+8vLwpGz/7tnd22bPO0YLxuXdjt
+        xqVtXmmlsZgUjKbds65a3X6r0zZbTbtxBu9fXV1A2hH2Y7+X3SHYlxdKXLwIT/jUNMrxtWD4XnAD
+        nPp1JRRLn4ZjSoUCYjmldUXQW1F2wlCBMafDuuIFjj9zaZnTKeOiJFuwh9Dh3lQ8DPlI5iS+q0DI
+        nbrSG7NF6SOiyy+hyaDNBFzRCZtT6I+9ECw2mdBAwMuyUY7jzOxezcJP8Ws4CxzhsQAEG4182kZc
+        zxtgTiPYpwfwKcV5Q/y7FNw3l6JsS64XTn2yrNf3AhbQvQcRWei9gc+cm73zFPsZqB9S+ASZQdE3
+        nMPnu6i7ZAtGOZmdAXOXktIpCSAKrytJ/BnI+HPFnB+V9EqpgsEIMg1BBj4FxydhWFfiiVFgwLhL
+        eV1RFXCo7yPSwQHVFV0Bz8V5dCeV6oleramntcppVVflNAou38ZpV8LHWCZDg7qiKcDZIv1sGq43
+        T4e38FwxPgNdVae3ODojFJwFI9PCyeXGgKPSkjtGGaOk8Mar3zOO+zOb1CmBdnSIXemVFXA5Hhq/
+        zzIar5v+PcWhJqOJEy8OmBBscgbRmGBOfG+EIxdsit9F0jCSyvpeWGdz5rn76gEGMdS659wk2g3x
+        rXG1DwLlegh7Lh1i5LVLvesmc2ZSvH0U6t4hLLzAZQvAHsy0BWSTUSYyHXdl4FLWinlSfGvEcxlP
+        aSHBPUmbbVHaDczMldm99sloNe+7JpBtGYkPCVbNZqlv44x3KfeYawduk4gvTX2MAQSBRGWQcSf/
+        WP+5E4Qd4BNl+dpDE55NBpSvEhK3gmyGuD2DClXViqqua8f500ac5xUdeeh6JBBtMqFrmbiHgMRk
+        sGF1rAZYnQv5lldZWFgLnPitwKW3b+lyLSMJBiIQICpTIfjStaPKhoWyfRLpk9uWixl7Q88hcgWT
+        UTWIhVXw8zVUPSmqNf2kUqvmVTOtwGEclzNRwj2B7mmxGUpkaTF3fU2tRBxCFAOMQxIHMjDLge28
+        iqnhupyGYXK58AKqrSUoARymH0BCoRNksYKtBcCXxRxStJhPoOuTf0heVZVkbuHHDu+zRZBNlMRJ
+        CUlkBkkN4aOTb0jK1uonKpgO73I2x53e+hq7Y+auulJ4Bj1vGjmXS5eFgvh/etMves8dLzEUEPuc
+        3xypR9pR/iQjS6LBKXlKRVQssum5zPcrauUgf5K4QAv0u2Pchq97WEetEDU//3iuHdeKuqZr+Zv+
+        P7gnBA3kkc0sSJYj4SoTCQRWMd9mI7hNeugxhHnCC0aX6IXcI/4qD/ftkAK+v91wl9P4+M6h0TEI
+        7v0p7wyHj8sDcUXnARBiJETQ708bj2hpheGM8s3JifH/A0fbJJ340WlPKB9hjbzhbCHGSBqOf/0O
+        MUVCDIUE+22IGW8JMbOwOCJkeh2tr6QWLNlzZ9gTzLlp3Hph/YZdS2nJTRHeUkt6tUv478Sf0Uv6
+        9LEUQyHCHsIvEg6Ihyggg7oXP2s19Tx/oupRZ4bPn6WmD/ryJ45VNqJbwIag6fuDA0jBWccvEX/R
+        FugLHObOm/qcuFhGveVkwB49spImiNsyaHm74VnUNkrDvnXGJBjRp4dzKQJSyHOHc+0PvQ1PDXLl
+        L8iDVjo5rraZoGFzRnVVr60zlwj0K0QwQJw8u67tTOVZU4l5i0zlEXc/qJlsmvk2imJnJxvYiVo6
+        rqrP2UkE2tnJV9tJzNvOTh7YiZVfVez8ZKPliaY/XJ4cr1+eIOiRnxzv/GSD5Ynk7amfbPiD/Hfo
+        Jzn8V4SdnXzV8mTVTk7XL0+e2snpzk42WJ6st5PTH9ZOTjf8QXQbVbHzk//iJ9o6Q0EgqCUspOqj
+        wtDgrxj/985edvby9fbyKr+q2GJ7+RcAAP//7J1Lc9s2EIDv+RWYXtJORb1IuY6a8YwfScbjSeLG
+        PbRHiIQszFAkS4CWlV/fXYCSSQqKoSSTSCJ8sCUKz+Xi8y6w4u4TXoJ+ZTfF75vNFShUN1f8vuOJ
+        BU9Qbhs8Adm1lCd+3/HkuHky6A6ru7O+0VpRhRo8GTieWOymDE27s75l8NcR8uQAw94cTnbAiV/3
+        fvyhCSf+5m6KP3Q4eRYnZrm1FSWWM99HhXAosfJ0/GoYiu+bPR2/GYbi+w4lFp6ObwpD8S2/lXCE
+        ODnA72M4nOyEk1Ft42QLTkYbGycOJzY4GRk3TlqME3fOc9w8GXRPap7OyLxxcrJhsY8cTyw2Tk5M
+        Bzv+AX6l/TvxxHLm+6gUDid2UW0182RL0P1gwzxxUbJWUW1G86S1UbK2M99HpXA4sfJ2Xo2exYkq
+        5HDyFd7OK+OxTotx4ryd4+aJ39iMNQbd+4bNWBd0b3OuY5JbW1niAu6PGyX97mnV0wm2RLCdNj2d
+        wEWw2ZgmpyZPJ2htBFvgItiOnCeDbr/Gky0RbP0NnrgINpudk76RJ62NYLOd+T4qhcOJladTOycO
+        ArOns3FOHAQOJxaejklubUXJAT7C2qFkJ5TUnnwyMoac+JtPPhm5kBMblJjk1lKU2M58HxXix6Gk
+        pzLmwItKEptmjp0XZVqdRh6djEaRyqPTzKuD70upFHL2iU2vqKS/bGldpeDZlkpF9Z2fVSY14xFT
+        ci9bC9M4zcdkMeMSG7O+B9gO3AMU6T9r6a2VA3vUeX3KbjET0XoG5cTHZKiS/LyA0Vj3+5SZSd9/
+        dVM9csWmPOH4ZGA1GJXuJzu7SNOY0YRMQRRQmkoCFWQOC3sxYwlcYTrJVZgmEp+TSlF2gmQ5e+Bp
+        IeKlN+Uxi/C55DQMWSbhtSgmcy4E9NR93cvOdGqhb5/C7wTuM8vxka1iNYVt+gQz+5CSaD1lAn3y
+        GFXsOw/piklo+Pnx7KreWkdeKCUp8zmpZTiupHdaLcZVYihUUA/bYOMkXeQ0g4439L3BqFoL5Xi8
+        nN/P5JgEoHvGRmsjUp+RW5AjfzSMTo3A1OlTK7h2Vc4mU/XHSR7z8USr6TX849fJnb7U3gWNKSjJ
+        1iYT+uX6ZQ6hbdWjQie8MEFupVirbFrrBb+G4A/kXPXB8S3HnHpIfghyQGMnVCaOA9NPB1NVQQ+T
+        S0maz8Fa+8yiO5nDEB2gdgGUMbFby0n1FiwptLreXL07/1QxpVRiToH4ghJzcurdjFW5SGWFmarX
+        OhtlZ+M6o6CrTEjCHtCG08VY9CeZ2nX2wRuc697A2EPfCBvXtWkckxQ+yTdq6wpgedRHh9Wx15wm
+        QhMwU3e/S/7G9nE9yVUNNQeQ17/w471/711dOYNyH7htXLeHCXBUMQftr4E2TqTlrEZiIeuQV9Eq
+        A+mEIeIyTNIVgTP8qyjCGaGCDPreTQd//9Uhweno4vbjXYcwGf6mybeurxqEjmM+5+hNy1SxUMCK
+        wJ0h8MyhMXWlyFZAvXtz2eRvh5RsX4BSkJcfkdEvHT73CZ/bmLPP1ITq4ydNw1KOnrvQc2v+yJaj
+        tJYgc0BW0DyXMumQi4LHak8eFRwzIeaMrdL4Op79dJ5t1elDg5vz6b8z4J6yvrYcb6i8K6+2nuLW
+        8Wu/+PWkso5eLadXLQlxywGGIlhvF2Y65TK8+8wzdaziOLZvHKspr0NZy1HWyDTfcphVrTEh1UFJ
+        rnfsQDZuj2zvWNbQ3kOjmdozq0/BoWx3lF2yROY0vk4i9njDli1n2DkpEv5fwfBEIeL3XOIhgMcx
+        jXlUnhPIFAQJQuPTJcE/kjOhY/xm9EGd5UJREH0Yp6KAdUYWXM4UFKGpLrlWsYChSsccLwmdTDDo
+        j+KZBBXk8vrmZ5JyXZgMxmQmZTbu9RaLRRcth26a3/eG/b7fy9OY9TKYGkxfqTpUez3Jz4h3W0xi
+        LvDcGuaqL6n/CuuY1/NQlpcLjB4nw6Cv39+xUNF5MCzfFxNRXpp46lpbad1YoIeI6bA+BYfp3TH9
+        Zs7ye2jsXZ4u5OwynWc0aTusQZ94qIJZpiRn91xg+Iskc8akPstlpdDIvZIaUhfFRmAMErSDOtQ6
+        1FZRa1xkhwZcF9r9jah9CxacXjMt5yssgFXsC1q1JFFCKUMKOYsjjFNMF4IUGVrFgz8IoCenIbBV
+        6FKZUnIyp0v1VRfKgTqeT5RlLXREpWBgbqPy6dYbZU9rZdMMh0hjIorpZrtBpfsOoUlUxlbCQAWh
+        OXaVwedoaSuTHMS2zGbMfZ3mGwn8PwAAAP//7J3fT6NAEID/FR41uRIWWj1ffDE+NLkfxmvunhGo
+        NlehgWu0//3N7C64S8Faa67szbxaassO39dhdnb5SAO/sOeadjHPnTffnr27v3eneVKUq0J9Z1mc
+        uirWcOOw4Ymiy9lTMWrsphvL5cKbMtPZIaa5VuU1UYOHFdmFObKsu+Ho7tVL3kUDZul9XJonwiLc
+        X4S3zb0sRo26+fA+/hm85+XG/JIsvG7qhTMv62CwhlqtsmQBaV8K3vMgd5RpIWjzE45Q8oDl1xKS
+        zkUJR9xt5OKYSpdtrVItFwe4OHDZR6Vrcua5/4+y8yx+nqr5Hyw+wvfmWoHWNIyMZw+Np/VyMpt+
+        O4V78mVVeL/h+s/R03HuXT+ulsUGDuh527V8G3x0XM/LXehpuXrNTrW4z5XHUdvT2x+sbda2qe0e
+        XF3zt0yuNS3TlL29j7e/FEm8vHmA11jVYCk5HN4Kx6MuumILqkqpuTowBHW1r1jXbMXZ5uHWguDg
+        BAzCg8tap9iDVM6yHD75+3xO3mJv2GKt3tKjLhPAAQvcby1V678ruA6q+cbcgaOA/OtepaDN9hpN
+        X0EsN2gbJUZQZB8XZKYqs4DX12W1xmPh39+u4UoRUTYanySnkL5C3NSuIEYyRzxZjbKtbHWc0ExW
+        d9Hu5i8AdyEcLn5WvmPKH6cjue8vK79D+eN0S/nhHSvf5pxlT0b27Z3EiSu+2To91punKxlnOFdX
+        6aEibtCOCi9Nf7bRcc2acgGbPgd5AmzO95jT3HKfuD3NFbiNUUAQqu8hbgyqWiBqsbJPWz5NxUjQ
+        VqrJlItalW1o9UlgLx2rdS+1FnA5L7Cl9GssF0otiYv1KJWHqomC96jD0FVtiEci5FpDZ60h3lK7
+        INoLsU20a1rnGsO7dW49lou4yevnkFVyMGSlIQdRQ3DVPtExbhFd4RoxbEHIvUzrhvsihqAx60p2
+        zWCYmP4xT4Atto/FfsGNCeRTV9b0C3GbHSUvfVKB2DkRNg4nRl6qby1xs5z/NDO1z9DOTaNWbgpj
+        QzMR7aTYNZNzLvoui7/pUcNXcmcs+afADyc3cfkTW/3V04YJ6d5C+XjMHh6zfw73AWDvonAXxQcx
+        7D678Osm/M/nk5fHhIdnDO6Awe0PGFNLidoAnz/P1LpDbU/AmFpK1ApfhKa6z5naYVPbEzCmlhK1
+        QesiuGBqh01tT8CYWtLUCsbWMWwFc0uR23Fg3ChFAWM7dGw7A8bUUqJW+KFZ3oj4x3bg1PYEjKml
+        RG1kZ1xRyNQOm9qegDG1lKgN/MicAIwipnbY1PYEjKmlRe3Euk1iagdPbWfAmFpK1Ar/zEq4Jkzt
+        sKntCRhTS4taYambu6MGT21nwJhaStQG/sWEqXWI2p6AMbWUqI1axQ3uaTwKtX8BAAD//yNlDBlr
+        hI3m2pGUaw30LJAbXCajqywGea7FEWGjuXYk5VpDPQOURDC6ymKQ51ocETaaa0dSrjVGnUgwMRnN
+        tYM71+KIsNFcO7JyLcqGTdPR+dpBn2uxRthorqVproUpACVHEJ1RkpsDokNcI0JAtIu/c6ivqx+Q
+        DQAQ6ApYaiQBAA==
+    headers:
+      Accept-Ranges:
+      - bytes
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Length:
+      - '4399'
+      Content-Type:
+      - text/html
+      Date:
+      - Mon, 17 Feb 2025 16:55:13 GMT
+      Last-Modified:
+      - Mon, 16 Dec 2024 19:34:14 GMT
+      Strict-Transport-Security:
+      - max-age=31536000 ; includeSubDomains ; preload
+      Vary:
+      - Accept-Encoding
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - 1; mode=block
+      x-amz-id-2:
+      - E26geXEJKqcpKRmxQRW43mwPREtnu0++BGRkV1iaIm+Z/8RtQCrvg0B8lqyUjeoAu3wjcZfwbe8=
+      x-amz-meta-mode:
+      - '33188'
+      x-amz-replication-status:
+      - COMPLETED
+      x-amz-request-id:
+      - 5S93Q8BMTVEG3CVQ
+      x-amz-server-side-encryption:
+      - AES256
+      x-amz-version-id:
+      - poa5Dfjus0LpfUlH7sTIt4TIBlhRg.Ab
+    status:
+      code: 200
+      message: OK
+version: 1

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_form_13FHR_fetcher_urllib3_v1.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_form_13FHR_fetcher_urllib3_v2.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff