CatPtain commited on
Commit
1b6b94f
·
verified ·
1 Parent(s): b8f2a01

Upload 131 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. openbb_platform/providers/sec/README.md +13 -0
  3. openbb_platform/providers/sec/__init__.py +1 -0
  4. openbb_platform/providers/sec/openbb_sec/__init__.py +50 -0
  5. openbb_platform/providers/sec/openbb_sec/models/__init__.py +1 -0
  6. openbb_platform/providers/sec/openbb_sec/models/cik_map.py +61 -0
  7. openbb_platform/providers/sec/openbb_sec/models/company_filings.py +347 -0
  8. openbb_platform/providers/sec/openbb_sec/models/compare_company_facts.py +190 -0
  9. openbb_platform/providers/sec/openbb_sec/models/equity_ftd.py +104 -0
  10. openbb_platform/providers/sec/openbb_sec/models/equity_search.py +91 -0
  11. openbb_platform/providers/sec/openbb_sec/models/etf_holdings.py +870 -0
  12. openbb_platform/providers/sec/openbb_sec/models/form_13FHR.py +107 -0
  13. openbb_platform/providers/sec/openbb_sec/models/htm_file.py +97 -0
  14. openbb_platform/providers/sec/openbb_sec/models/insider_trading.py +221 -0
  15. openbb_platform/providers/sec/openbb_sec/models/institutions_search.py +75 -0
  16. openbb_platform/providers/sec/openbb_sec/models/latest_financial_reports.py +261 -0
  17. openbb_platform/providers/sec/openbb_sec/models/management_discussion_analysis.py +1394 -0
  18. openbb_platform/providers/sec/openbb_sec/models/py.typed +0 -0
  19. openbb_platform/providers/sec/openbb_sec/models/rss_litigation.py +98 -0
  20. openbb_platform/providers/sec/openbb_sec/models/schema_files.py +64 -0
  21. openbb_platform/providers/sec/openbb_sec/models/sec_filing.py +728 -0
  22. openbb_platform/providers/sec/openbb_sec/models/sic_search.py +111 -0
  23. openbb_platform/providers/sec/openbb_sec/models/symbol_map.py +62 -0
  24. openbb_platform/providers/sec/openbb_sec/py.typed +0 -0
  25. openbb_platform/providers/sec/openbb_sec/utils/__init__.py +1 -0
  26. openbb_platform/providers/sec/openbb_sec/utils/definitions.py +1350 -0
  27. openbb_platform/providers/sec/openbb_sec/utils/form4.py +657 -0
  28. openbb_platform/providers/sec/openbb_sec/utils/frames.py +284 -0
  29. openbb_platform/providers/sec/openbb_sec/utils/helpers.py +362 -0
  30. openbb_platform/providers/sec/openbb_sec/utils/parse_13f.py +231 -0
  31. openbb_platform/providers/sec/openbb_sec/utils/py.typed +0 -0
  32. openbb_platform/providers/sec/poetry.lock +0 -0
  33. openbb_platform/providers/sec/pyproject.toml +26 -0
  34. openbb_platform/providers/sec/tests/__init__.py +1 -0
  35. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_cik_map_fetcher_urllib3_v1.yaml +0 -0
  36. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_cik_map_fetcher_urllib3_v2.yaml +0 -0
  37. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_company_filings_fetcher_urllib3_v1.yaml +0 -0
  38. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_company_filings_fetcher_urllib3_v2.yaml +0 -0
  39. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_compare_company_facts_fetcher_urllib3_v1.yaml +0 -0
  40. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_compare_company_facts_fetcher_urllib3_v2.yaml +0 -0
  41. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_ftd_fetcher_urllib3_v1.yaml +0 -0
  42. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_ftd_fetcher_urllib3_v2.yaml +0 -0
  43. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_search_fetcher_urllib3_v1.yaml +0 -0
  44. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_search_fetcher_urllib3_v2.yaml +0 -0
  45. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_etf_holdings_fetcher_urllib3_v1.yaml +0 -0
  46. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_etf_holdings_fetcher_urllib3_v2.yaml +0 -0
  47. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_filing_fetcher_urllib3_v1.yaml +212 -0
  48. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_filing_fetcher_urllib3_v2.yaml +212 -0
  49. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_form_13FHR_fetcher_urllib3_v1.yaml +0 -0
  50. openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_form_13FHR_fetcher_urllib3_v2.yaml +0 -0
.gitattributes CHANGED
@@ -16,3 +16,5 @@ openbb_platform/providers/bls/openbb_bls/assets/tu_series.xz filter=lfs diff=lfs
16
  openbb_platform/providers/bls/openbb_bls/assets/wages_series.xz filter=lfs diff=lfs merge=lfs -text
17
  openbb_platform/providers/finra/tests/record/http/test_finra_fetchers/test_finra_short_interest_fetcher_urllib3_v1.yaml filter=lfs diff=lfs merge=lfs -text
18
  openbb_platform/providers/finra/tests/record/http/test_finra_fetchers/test_finra_short_interest_fetcher_urllib3_v2.yaml filter=lfs diff=lfs merge=lfs -text
 
 
 
16
  openbb_platform/providers/bls/openbb_bls/assets/wages_series.xz filter=lfs diff=lfs merge=lfs -text
17
  openbb_platform/providers/finra/tests/record/http/test_finra_fetchers/test_finra_short_interest_fetcher_urllib3_v1.yaml filter=lfs diff=lfs merge=lfs -text
18
  openbb_platform/providers/finra/tests/record/http/test_finra_fetchers/test_finra_short_interest_fetcher_urllib3_v2.yaml filter=lfs diff=lfs merge=lfs -text
19
+ openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_institutions_search_fetcher_urllib3_v1.yaml filter=lfs diff=lfs merge=lfs -text
20
+ openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_institutions_search_fetcher_urllib3_v2.yaml filter=lfs diff=lfs merge=lfs -text
openbb_platform/providers/sec/README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # OpenBB SEC Provider
2
+
3
+ This extension integrates the [SEC](https://www.sec.gov/edgar) data provider into the OpenBB Platform.
4
+
5
+ ## Installation
6
+
7
+ To install the extension:
8
+
9
+ ```bash
10
+ pip install openbb-sec
11
+ ```
12
+
13
+ Documentation available [here](https://docs.openbb.co/platform/developer_guide/contributing).
openbb_platform/providers/sec/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """SEC Provider."""
openbb_platform/providers/sec/openbb_sec/__init__.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC provider module."""
2
+
3
+ from openbb_core.provider.abstract.provider import Provider
4
+ from openbb_sec.models.cik_map import SecCikMapFetcher
5
+ from openbb_sec.models.company_filings import SecCompanyFilingsFetcher
6
+ from openbb_sec.models.compare_company_facts import SecCompareCompanyFactsFetcher
7
+ from openbb_sec.models.equity_ftd import SecEquityFtdFetcher
8
+ from openbb_sec.models.equity_search import SecEquitySearchFetcher
9
+ from openbb_sec.models.etf_holdings import SecEtfHoldingsFetcher
10
+ from openbb_sec.models.form_13FHR import SecForm13FHRFetcher
11
+ from openbb_sec.models.htm_file import SecHtmFileFetcher
12
+ from openbb_sec.models.insider_trading import SecInsiderTradingFetcher
13
+ from openbb_sec.models.institutions_search import SecInstitutionsSearchFetcher
14
+ from openbb_sec.models.latest_financial_reports import SecLatestFinancialReportsFetcher
15
+ from openbb_sec.models.management_discussion_analysis import (
16
+ SecManagementDiscussionAnalysisFetcher,
17
+ )
18
+ from openbb_sec.models.rss_litigation import SecRssLitigationFetcher
19
+ from openbb_sec.models.schema_files import SecSchemaFilesFetcher
20
+ from openbb_sec.models.sec_filing import SecFilingFetcher
21
+ from openbb_sec.models.sic_search import SecSicSearchFetcher
22
+ from openbb_sec.models.symbol_map import SecSymbolMapFetcher
23
+
24
+ sec_provider = Provider(
25
+ name="sec",
26
+ website="https://www.sec.gov/data",
27
+ description="SEC is the public listings regulatory body for the United States.",
28
+ credentials=None,
29
+ fetcher_dict={
30
+ "CikMap": SecCikMapFetcher,
31
+ "CompanyFilings": SecCompanyFilingsFetcher,
32
+ "CompareCompanyFacts": SecCompareCompanyFactsFetcher,
33
+ "EquityFTD": SecEquityFtdFetcher,
34
+ "EquitySearch": SecEquitySearchFetcher,
35
+ "EtfHoldings": SecEtfHoldingsFetcher,
36
+ "Filings": SecCompanyFilingsFetcher,
37
+ "Form13FHR": SecForm13FHRFetcher,
38
+ "SecHtmFile": SecHtmFileFetcher,
39
+ "InsiderTrading": SecInsiderTradingFetcher,
40
+ "InstitutionsSearch": SecInstitutionsSearchFetcher,
41
+ "LatestFinancialReports": SecLatestFinancialReportsFetcher,
42
+ "ManagementDiscussionAnalysis": SecManagementDiscussionAnalysisFetcher,
43
+ "RssLitigation": SecRssLitigationFetcher,
44
+ "SchemaFiles": SecSchemaFilesFetcher,
45
+ "SecFiling": SecFilingFetcher,
46
+ "SicSearch": SecSicSearchFetcher,
47
+ "SymbolMap": SecSymbolMapFetcher,
48
+ },
49
+ repr_name="Securities and Exchange Commission (SEC)",
50
+ )
openbb_platform/providers/sec/openbb_sec/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """SEC Provider Models."""
openbb_platform/providers/sec/openbb_sec/models/cik_map.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC CIK Mapping Model."""
2
+
3
+ # pylint: disable=unused-argument
4
+
5
+ from typing import Any, Dict, Optional
6
+
7
+ from openbb_core.provider.abstract.fetcher import Fetcher
8
+ from openbb_core.provider.standard_models.cik_map import CikMapData, CikMapQueryParams
9
+ from pydantic import Field
10
+
11
+
12
+ class SecCikMapQueryParams(CikMapQueryParams):
13
+ """SEC CIK Mapping Query.
14
+
15
+ Source: https://sec.gov/
16
+ """
17
+
18
+ use_cache: Optional[bool] = Field(
19
+ default=True,
20
+ description="Whether or not to use cache for the request, default is True.",
21
+ )
22
+
23
+
24
+ class SecCikMapData(CikMapData):
25
+ """SEC CIK Mapping Data."""
26
+
27
+
28
+ class SecCikMapFetcher(
29
+ Fetcher[
30
+ SecCikMapQueryParams,
31
+ SecCikMapData,
32
+ ]
33
+ ):
34
+ """SEC CIK Map Fetcher."""
35
+
36
+ @staticmethod
37
+ def transform_query(params: Dict[str, Any]) -> SecCikMapQueryParams:
38
+ """Transform the query."""
39
+ return SecCikMapQueryParams(**params)
40
+
41
+ @staticmethod
42
+ async def aextract_data(
43
+ query: SecCikMapQueryParams,
44
+ credentials: Optional[Dict[str, str]],
45
+ **kwargs: Any,
46
+ ) -> Dict:
47
+ """Return the raw data from the SEC endpoint."""
48
+ # pylint: disable=import-outside-toplevel
49
+ from openbb_sec.utils.helpers import symbol_map
50
+
51
+ results = {"cik": await symbol_map(query.symbol, query.use_cache)}
52
+ if not results:
53
+ return {"Error": "Symbol not found."}
54
+ return results
55
+
56
+ @staticmethod
57
+ def transform_data(
58
+ query: SecCikMapQueryParams, data: Dict, **kwargs: Any
59
+ ) -> SecCikMapData:
60
+ """Transform the data to the standard format."""
61
+ return SecCikMapData.model_validate(data)
openbb_platform/providers/sec/openbb_sec/models/company_filings.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Company Filings Model."""
2
+
3
+ # pylint: disable=unused-argument
4
+
5
+ from datetime import (
6
+ date as dateType,
7
+ datetime,
8
+ )
9
+ from typing import Any, Dict, List, Optional, Union
10
+ from warnings import warn
11
+
12
+ from openbb_core.app.model.abstract.error import OpenBBError
13
+ from openbb_core.provider.abstract.fetcher import Fetcher
14
+ from openbb_core.provider.standard_models.company_filings import (
15
+ CompanyFilingsData,
16
+ CompanyFilingsQueryParams,
17
+ )
18
+ from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS
19
+ from openbb_core.provider.utils.errors import EmptyDataError
20
+ from openbb_sec.utils.definitions import FORM_LIST, HEADERS
21
+ from pydantic import Field, field_validator
22
+
23
+
24
+ class SecCompanyFilingsQueryParams(CompanyFilingsQueryParams):
25
+ """SEC Company Filings Query.
26
+
27
+ Source: https://sec.gov/
28
+ """
29
+
30
+ __json_schema_extra__ = {
31
+ "form_type": {
32
+ "multiple_items_allowed": True,
33
+ "choices": FORM_LIST,
34
+ }
35
+ }
36
+
37
+ cik: Optional[Union[str, int]] = Field(
38
+ description="Lookup filings by Central Index Key (CIK) instead of by symbol.",
39
+ default=None,
40
+ )
41
+ start_date: Optional[dateType] = Field(
42
+ default=None,
43
+ description=QUERY_DESCRIPTIONS.get("start_date", ""),
44
+ )
45
+ end_date: Optional[dateType] = Field(
46
+ default=None,
47
+ description=QUERY_DESCRIPTIONS.get("end_date", ""),
48
+ )
49
+ form_type: Optional[str] = Field(
50
+ description="SEC form type to filter by.",
51
+ default=None,
52
+ )
53
+ limit: Optional[int] = Field(
54
+ default=None,
55
+ description=QUERY_DESCRIPTIONS.get("limit", ""),
56
+ )
57
+ use_cache: bool = Field(
58
+ description="Whether or not to use cache. If True, cache will store for one day.",
59
+ default=True,
60
+ )
61
+
62
+ @field_validator("form_type", mode="before", check_fields=False)
63
+ @classmethod
64
+ def validate_form_type(cls, v):
65
+ """Validate form_type."""
66
+ if not v:
67
+ return None
68
+ if isinstance(v, str):
69
+ forms = v.split(",")
70
+ elif isinstance(v, list):
71
+ forms = v
72
+ else:
73
+ raise OpenBBError("Unexpected form_type value.")
74
+ new_forms: list = []
75
+ messages: list = []
76
+ for form in forms:
77
+ if form.upper() in FORM_LIST:
78
+ new_forms.append(form.upper())
79
+ else:
80
+ messages.append(f"Invalid form type: {form}")
81
+
82
+ if not new_forms:
83
+ raise OpenBBError(
84
+ f"No valid forms provided -> {', '.join(messages)} -> Valid forms: {', '.join(FORM_LIST)}"
85
+ )
86
+
87
+ if new_forms and messages:
88
+ warn("\n ".join(messages))
89
+
90
+ return ",".join(new_forms) if len(new_forms) > 1 else new_forms[0]
91
+
92
+
93
+ class SecCompanyFilingsData(CompanyFilingsData):
94
+ """SEC Company Filings Data."""
95
+
96
+ __alias_dict__ = {
97
+ "filing_date": "filingDate",
98
+ "accepted_date": "acceptanceDateTime",
99
+ "filing_url": "filingDetailUrl",
100
+ "report_url": "primaryDocumentUrl",
101
+ "report_type": "form",
102
+ "report_date": "reportDate",
103
+ "primary_doc_description": "primaryDocDescription",
104
+ "primary_doc": "primaryDocument",
105
+ "accession_number": "accessionNumber",
106
+ "file_number": "fileNumber",
107
+ "film_number": "filmNumber",
108
+ "is_inline_xbrl": "isInlineXBRL",
109
+ "is_xbrl": "isXBRL",
110
+ "complete_submission_url": "completeSubmissionUrl",
111
+ "filing_detail_url": "filingDetailUrl",
112
+ }
113
+
114
+ report_date: Optional[dateType] = Field(
115
+ description="The date of the filing.",
116
+ default=None,
117
+ )
118
+ act: Optional[Union[str, int]] = Field(
119
+ description="The SEC Act number.", default=None
120
+ )
121
+ items: Optional[Union[str, float]] = Field(
122
+ description="The SEC Item numbers.", default=None
123
+ )
124
+ primary_doc_description: Optional[str] = Field(
125
+ description="The description of the primary document.",
126
+ default=None,
127
+ )
128
+ primary_doc: Optional[str] = Field(
129
+ description="The filename of the primary document.",
130
+ default=None,
131
+ )
132
+ accession_number: Optional[Union[str, int]] = Field(
133
+ description="The accession number.",
134
+ default=None,
135
+ )
136
+ file_number: Optional[Union[str, int]] = Field(
137
+ description="The file number.",
138
+ default=None,
139
+ )
140
+ film_number: Optional[Union[str, int]] = Field(
141
+ description="The film number.",
142
+ default=None,
143
+ )
144
+ is_inline_xbrl: Optional[Union[str, int]] = Field(
145
+ description="Whether the filing is an inline XBRL filing.",
146
+ default=None,
147
+ )
148
+ is_xbrl: Optional[Union[str, int]] = Field(
149
+ description="Whether the filing is an XBRL filing.",
150
+ default=None,
151
+ )
152
+ size: Optional[Union[str, int]] = Field(
153
+ description="The size of the filing.", default=None
154
+ )
155
+ complete_submission_url: Optional[str] = Field(
156
+ description="The URL to the complete filing submission.",
157
+ default=None,
158
+ )
159
+ filing_detail_url: Optional[str] = Field(
160
+ description="The URL to the filing details.",
161
+ default=None,
162
+ )
163
+
164
+ @field_validator("report_date", mode="before", check_fields=False)
165
+ @classmethod
166
+ def validate_report_date(cls, v: Optional[Union[str, dateType]]):
167
+ """Validate report_date."""
168
+ if isinstance(v, dateType):
169
+ return v
170
+ v = v if v != "" else None
171
+ return (
172
+ datetime.strptime(v, "%Y-%m-%d").date()
173
+ if v and isinstance(v, str)
174
+ else None
175
+ )
176
+
177
+
178
+ class SecCompanyFilingsFetcher(
179
+ Fetcher[SecCompanyFilingsQueryParams, List[SecCompanyFilingsData]]
180
+ ):
181
+ """SEC Company Filings Fetcher."""
182
+
183
+ @staticmethod
184
+ def transform_query(params: Dict[str, Any]) -> SecCompanyFilingsQueryParams:
185
+ """Transform query params."""
186
+ return SecCompanyFilingsQueryParams(**params)
187
+
188
+ @staticmethod
189
+ async def aextract_data(
190
+ query: SecCompanyFilingsQueryParams,
191
+ credentials: Optional[Dict[str, str]],
192
+ **kwargs: Any,
193
+ ) -> List[Dict]:
194
+ """Extract the data from the SEC endpoint."""
195
+ # pylint: disable=import-outside-toplevel
196
+ from aiohttp_client_cache import SQLiteBackend
197
+ from aiohttp_client_cache.session import CachedSession
198
+ from openbb_core.app.utils import get_user_cache_directory
199
+ from openbb_core.provider.utils.helpers import amake_request, amake_requests
200
+ from openbb_sec.utils.helpers import symbol_map
201
+ from pandas import DataFrame
202
+
203
+ filings = DataFrame()
204
+
205
+ if query.symbol and not query.cik:
206
+ query.cik = await symbol_map(
207
+ query.symbol.lower(), use_cache=query.use_cache
208
+ )
209
+ if not query.cik:
210
+ raise OpenBBError(f"CIK not found for symbol {query.symbol}")
211
+ if query.cik is None:
212
+ raise OpenBBError("CIK or symbol must be provided.")
213
+
214
+ # The leading 0s need to be inserted but are typically removed from the data to store as an integer.
215
+ if len(query.cik) != 10: # type: ignore
216
+ cik_: str = ""
217
+ temp = 10 - len(query.cik) # type: ignore
218
+ for i in range(temp):
219
+ cik_ = cik_ + "0"
220
+ query.cik = cik_ + str(query.cik) # type: ignore
221
+
222
+ url = f"https://data.sec.gov/submissions/CIK{query.cik}.json"
223
+ data: Union[dict, List[dict]] = []
224
+ if query.use_cache is True:
225
+ cache_dir = f"{get_user_cache_directory()}/http/sec_company_filings"
226
+ async with CachedSession(
227
+ cache=SQLiteBackend(cache_dir, expire_after=3600 * 24)
228
+ ) as session:
229
+ await session.delete_expired_responses()
230
+ try:
231
+ data = await amake_request(url, headers=HEADERS, session=session) # type: ignore
232
+ finally:
233
+ await session.close()
234
+ else:
235
+ data = await amake_request(url, headers=HEADERS) # type: ignore
236
+
237
+ # This seems to work for the data structure.
238
+ filings = (
239
+ DataFrame.from_records(data["filings"].get("recent")) # type: ignore
240
+ if "filings" in data
241
+ else DataFrame()
242
+ )
243
+ results = filings.to_dict("records")
244
+
245
+ # If there are lots of filings, there will be custom pagination.
246
+ if (
247
+ (query.limit and len(filings) >= 1000)
248
+ or query.form_type is not None
249
+ or query.limit == 0
250
+ ):
251
+
252
+ async def callback(response, session):
253
+ """Response callback for excess company filings."""
254
+ result = await response.json()
255
+ if result:
256
+ new_data = DataFrame.from_records(result)
257
+ results.extend(new_data.to_dict("records"))
258
+
259
+ urls: List = []
260
+ new_urls = (
261
+ DataFrame(data["filings"].get("files")) # type: ignore
262
+ if "filings" in data
263
+ else DataFrame()
264
+ )
265
+ for i in new_urls.index:
266
+ new_cik: str = data["filings"]["files"][i]["name"] # type: ignore
267
+ new_url: str = "https://data.sec.gov/submissions/" + new_cik
268
+ urls.append(new_url)
269
+ if query.use_cache is True:
270
+ cache_dir = f"{get_user_cache_directory()}/http/sec_company_filings"
271
+ async with CachedSession(
272
+ cache=SQLiteBackend(cache_dir, expire_after=3600 * 24)
273
+ ) as session:
274
+ try:
275
+ await amake_requests(urls, headers=HEADERS, session=session, response_callback=callback) # type: ignore
276
+ finally:
277
+ await session.close()
278
+ else:
279
+ await amake_requests(urls, headers=HEADERS, response_callback=callback) # type: ignore
280
+
281
+ return results
282
+
283
+ @staticmethod
284
+ def transform_data(
285
+ query: SecCompanyFilingsQueryParams, data: List[Dict], **kwargs: Any
286
+ ) -> List[SecCompanyFilingsData]:
287
+ """Transform the data."""
288
+ # pylint: disable=import-outside-toplevel
289
+ from numpy import nan
290
+ from pandas import NA, DataFrame, to_datetime
291
+
292
+ if not data:
293
+ raise EmptyDataError(
294
+ f"No filings found for CIK {query.cik}, or symbol {query.symbol}"
295
+ )
296
+ cols = [
297
+ "reportDate",
298
+ "filingDate",
299
+ "acceptanceDateTime",
300
+ "act",
301
+ "form",
302
+ "items",
303
+ "primaryDocDescription",
304
+ "primaryDocument",
305
+ "accessionNumber",
306
+ "fileNumber",
307
+ "filmNumber",
308
+ "isInlineXBRL",
309
+ "isXBRL",
310
+ "size",
311
+ ]
312
+ filings = DataFrame(data, columns=cols).astype(str)
313
+ filings["reportDate"] = to_datetime(filings["reportDate"]).dt.date
314
+ filings["filingDate"] = to_datetime(filings["filingDate"]).dt.date
315
+ filings = filings.sort_values(by=["filingDate", "reportDate"], ascending=False)
316
+ if query.start_date:
317
+ filings = filings[filings["filingDate"] >= query.start_date]
318
+ if query.end_date:
319
+ filings = filings[filings["filingDate"] <= query.end_date]
320
+ base_url = f"https://www.sec.gov/Archives/edgar/data/{str(int(query.cik))}/" # type: ignore
321
+ filings["primaryDocumentUrl"] = (
322
+ base_url
323
+ + filings["accessionNumber"].str.replace("-", "")
324
+ + "/"
325
+ + filings["primaryDocument"]
326
+ )
327
+ filings["completeSubmissionUrl"] = (
328
+ base_url + filings["accessionNumber"] + ".txt"
329
+ )
330
+ filings["filingDetailUrl"] = (
331
+ base_url + filings["accessionNumber"] + "-index.htm"
332
+ )
333
+ if query.form_type:
334
+ form_types = query.form_type.replace("_", " ").split(",")
335
+ filings = filings[
336
+ filings.form.str.contains("|".join(form_types), case=False, na=False)
337
+ ]
338
+ if query.limit:
339
+ filings = filings.head(query.limit) if query.limit != 0 else filings
340
+
341
+ if len(filings) == 0:
342
+ raise EmptyDataError("No filings were found using the filters provided.")
343
+ filings = filings.replace({NA: None, nan: None})
344
+
345
+ return [
346
+ SecCompanyFilingsData.model_validate(d) for d in filings.to_dict("records")
347
+ ]
openbb_platform/providers/sec/openbb_sec/models/compare_company_facts.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Compare Company Facts Model."""
2
+
3
+ # pylint: disable=unused-argument
4
+
5
+ from typing import Any, Dict, List, Optional, Union
6
+ from warnings import warn
7
+
8
+ from openbb_core.provider.abstract.annotated_result import AnnotatedResult
9
+ from openbb_core.provider.abstract.fetcher import Fetcher
10
+ from openbb_core.provider.standard_models.compare_company_facts import (
11
+ CompareCompanyFactsData,
12
+ CompareCompanyFactsQueryParams,
13
+ )
14
+ from openbb_core.provider.utils.descriptions import DATA_DESCRIPTIONS
15
+ from openbb_core.provider.utils.errors import EmptyDataError
16
+ from openbb_sec.utils.definitions import (
17
+ FACT_CHOICES,
18
+ FACTS,
19
+ FISCAL_PERIODS,
20
+ )
21
+ from pydantic import Field, field_validator
22
+
23
+
24
+ class SecCompareCompanyFactsQueryParams(CompareCompanyFactsQueryParams):
25
+ """SEC Compare Company Facts Query.
26
+
27
+ Source: https://www.sec.gov/edgar/sec-api-documentation
28
+
29
+ The xbrl/frames API aggregates one fact for each reporting entity
30
+ that is last filed that most closely fits the calendrical period requested.
31
+
32
+ Because company financial calendars can start and end on any month or day and even change in length from quarter to
33
+ quarter according to the day of the week, the frame data is assembled by the dates that best align with a calendar
34
+ quarter or year. Data users should be mindful different reporting start and end dates for facts contained in a frame.
35
+ """
36
+
37
+ __json_schema_extra__ = {
38
+ "symbol": {"multiple_items_allowed": True},
39
+ "fact": {"multiple_items_allowed": False, "choices": sorted(FACTS)},
40
+ "fiscal_period": {
41
+ "multiple_items_allowed": False,
42
+ "choices": ["fy", "q1", "q2", "q3", "q4"],
43
+ },
44
+ }
45
+
46
+ fact: FACT_CHOICES = Field(
47
+ default="Revenues",
48
+ description="Fact or concept from the SEC taxonomy, in UpperCamelCase. Defaults to, 'Revenues'."
49
+ + " AAPL, MSFT, GOOG, BRK-A currently report revenue as, 'RevenueFromContractWithCustomerExcludingAssessedTax'."
50
+ + " In previous years, they have reported as 'Revenues'.",
51
+ )
52
+ year: Optional[int] = Field(
53
+ default=None,
54
+ description="The year to retrieve the data for. If not provided, the current year is used."
55
+ + " When symbol(s) are provided, excluding the year will return all reported values for the concept.",
56
+ )
57
+ fiscal_period: Optional[FISCAL_PERIODS] = Field(
58
+ default=None,
59
+ description="The fiscal period to retrieve the data for."
60
+ + " If not provided, the most recent quarter is used."
61
+ + " This parameter is ignored when a symbol is supplied.",
62
+ )
63
+ instantaneous: bool = Field(
64
+ default=False,
65
+ description="Whether to retrieve instantaneous data. See the notes above for more information."
66
+ + " Defaults to False. Some facts are only available as instantaneous data."
67
+ + "\nThe function will automatically attempt the inverse of this parameter"
68
+ + " if the initial fiscal quarter request fails."
69
+ + " This parameter is ignored when a symbol is supplied.",
70
+ )
71
+ use_cache: bool = Field(
72
+ default=True,
73
+ description="Whether to use cache for the request. Defaults to True.",
74
+ )
75
+
76
+ @field_validator("fact", mode="before", check_fields=False)
77
+ @classmethod
78
+ def validate_fact(cls, v):
79
+ """Set the default state."""
80
+ if not v or v == "":
81
+ return "Revenues"
82
+ return v
83
+
84
+
85
+ class SecCompareCompanyFactsData(CompareCompanyFactsData):
86
+ """SEC Compare Company Facts Data."""
87
+
88
+ __alias_dict__ = {
89
+ "reported_date": "filed",
90
+ "period_beginning": "start",
91
+ "period_ending": "end",
92
+ "fiscal_year": "fy",
93
+ "fiscal_period": "fp",
94
+ "name": "entityName",
95
+ "accession": "accn",
96
+ "value": "val",
97
+ "location": "loc",
98
+ }
99
+
100
+ cik: Union[str, int] = Field(
101
+ description=DATA_DESCRIPTIONS.get("cik", ""),
102
+ )
103
+ location: Optional[str] = Field(
104
+ default=None,
105
+ description="Geographic location of the reporting entity.",
106
+ )
107
+ form: Optional[str] = Field(
108
+ default=None,
109
+ description="The SEC form associated with the fact or concept.",
110
+ )
111
+ frame: Optional[str] = Field(
112
+ default=None,
113
+ description="The frame ID associated with the fact or concept, if applicable.",
114
+ )
115
+ accession: str = Field(
116
+ description="SEC filing accession number associated with the reported fact or concept.",
117
+ )
118
+ fact: str = Field(
119
+ description="The display name of the fact or concept.",
120
+ )
121
+ unit: str = Field(
122
+ default=None,
123
+ description="The unit of measurement for the fact or concept.",
124
+ )
125
+
126
+
127
+ class SecCompareCompanyFactsFetcher(
128
+ Fetcher[SecCompareCompanyFactsQueryParams, List[SecCompareCompanyFactsData]]
129
+ ):
130
+ """SEC Compare Company Facts Fetcher."""
131
+
132
+ @staticmethod
133
+ def transform_query(params: Dict[str, Any]) -> SecCompareCompanyFactsQueryParams:
134
+ """Transform the query."""
135
+ return SecCompareCompanyFactsQueryParams(**params)
136
+
137
+ @staticmethod
138
+ async def aextract_data(
139
+ query: SecCompareCompanyFactsQueryParams,
140
+ credentials: Optional[Dict[str, str]],
141
+ **kwargs: Any,
142
+ ) -> Dict:
143
+ """Return the raw data from the SEC endpoint."""
144
+ # pylint: disable=import-outside-toplevel
145
+ from openbb_sec.utils.frames import get_concept, get_frame
146
+
147
+ results: Dict = {}
148
+ if not query.symbol:
149
+ results = await get_frame(
150
+ fact=query.fact,
151
+ year=query.year,
152
+ fiscal_period=query.fiscal_period,
153
+ instantaneous=query.instantaneous,
154
+ use_cache=query.use_cache,
155
+ )
156
+ if query.symbol is not None:
157
+ if query.instantaneous is True:
158
+ warn(
159
+ "The 'instantaneous' parameter is ignored when a symbol is supplied."
160
+ )
161
+ if query.fiscal_period is not None:
162
+ warn(
163
+ "The 'fiscal_period' parameter is ignored when a symbol is supplied."
164
+ )
165
+ results = await get_concept(
166
+ symbol=query.symbol,
167
+ fact=query.fact,
168
+ year=query.year,
169
+ use_cache=query.use_cache,
170
+ )
171
+ if not results:
172
+ raise EmptyDataError("The request was returned empty.")
173
+
174
+ return results
175
+
176
+ @staticmethod
177
+ def transform_data(
178
+ query: SecCompareCompanyFactsQueryParams,
179
+ data: Dict,
180
+ **kwargs: Any,
181
+ ) -> AnnotatedResult[List[SecCompareCompanyFactsData]]:
182
+ """Transform the data and validate the model."""
183
+ if not data:
184
+ raise EmptyDataError("The request was returned empty.")
185
+ metadata = data.get("metadata")
186
+ results_data = data.get("data", [])
187
+ return AnnotatedResult(
188
+ result=[SecCompareCompanyFactsData.model_validate(d) for d in results_data], # type: ignore
189
+ metadata=metadata,
190
+ )
openbb_platform/providers/sec/openbb_sec/models/equity_ftd.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Equity FTD Model."""
2
+
3
+ # pylint: disable=unused-argument
4
+
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from openbb_core.provider.abstract.fetcher import Fetcher
8
+ from openbb_core.provider.standard_models.equity_ftd import (
9
+ EquityFtdData,
10
+ EquityFtdQueryParams,
11
+ )
12
+ from openbb_core.provider.utils.errors import EmptyDataError
13
+ from pydantic import Field
14
+
15
+
16
+ class SecEquityFtdQueryParams(EquityFtdQueryParams):
17
+ """SEC Equity FTD Query.
18
+
19
+ Source: https://sec.gov/
20
+ """
21
+
22
+ limit: Optional[int] = Field(
23
+ description="""
24
+ Limit the number of reports to parse, from most recent.
25
+ Approximately 24 reports per year, going back to 2009.
26
+ """,
27
+ default=24,
28
+ )
29
+ skip_reports: Optional[int] = Field(
30
+ description="""
31
+ Skip N number of reports from current. A value of 1 will skip the most recent report.
32
+ """,
33
+ default=0,
34
+ )
35
+ use_cache: Optional[bool] = Field(
36
+ default=True,
37
+ description="Whether or not to use cache for the request, default is True."
38
+ + " Each reporting period is a separate URL, new reports will be added to the cache.",
39
+ )
40
+
41
+
42
+ class SecEquityFtdData(EquityFtdData):
43
+ """SEC Equity FTD Data."""
44
+
45
+ __alias_dict__ = {"settlement_date": "date"}
46
+
47
+
48
+ class SecEquityFtdFetcher(
49
+ Fetcher[
50
+ SecEquityFtdQueryParams,
51
+ List[SecEquityFtdData],
52
+ ]
53
+ ):
54
+ """SEC Equity FTD Fetcher."""
55
+
56
+ @staticmethod
57
+ def transform_query(params: Dict[str, Any]) -> SecEquityFtdQueryParams:
58
+ """Transform query params."""
59
+ return SecEquityFtdQueryParams(**params)
60
+
61
+ @staticmethod
62
+ async def aextract_data(
63
+ query: SecEquityFtdQueryParams,
64
+ credentials: Optional[Dict[str, str]],
65
+ **kwargs: Any,
66
+ ) -> List[Dict]:
67
+ """Extract the data from the SEC website."""
68
+ # pylint: disable=import-outside-toplevel
69
+ import asyncio # noqa
70
+ from openbb_sec.utils.helpers import download_zip_file, get_ftd_urls # noqa
71
+
72
+ results = []
73
+ limit = query.limit if query.limit is not None and query.limit > 0 else 0
74
+ urls_data = await get_ftd_urls()
75
+ urls = list(urls_data.values())
76
+ if limit > 0:
77
+ urls = (
78
+ urls[:limit]
79
+ if not query.skip_reports
80
+ else urls[query.skip_reports : limit + query.skip_reports] # noqa: E203
81
+ )
82
+
83
+ async def get_one(url):
84
+ """Get data for one URL as a task."""
85
+ data = await download_zip_file(url, query.symbol, query.use_cache)
86
+ results.extend(data)
87
+
88
+ tasks = [get_one(url) for url in urls]
89
+
90
+ await asyncio.gather(*tasks)
91
+
92
+ if not results:
93
+ raise EmptyDataError(
94
+ "There was an error collecting data, no results were returned."
95
+ )
96
+
97
+ return sorted(results, key=lambda d: d["date"], reverse=True)
98
+
99
+ @staticmethod
100
+ def transform_data(
101
+ query: SecEquityFtdQueryParams, data: List[Dict], **kwargs: Any
102
+ ) -> List[SecEquityFtdData]:
103
+ """Transform the data to the standard format."""
104
+ return [SecEquityFtdData.model_validate(d) for d in data]
openbb_platform/providers/sec/openbb_sec/models/equity_search.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Equity Search Model."""
2
+
3
+ # pylint: disable=unused-argument
4
+
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from openbb_core.provider.abstract.fetcher import Fetcher
8
+ from openbb_core.provider.standard_models.equity_search import (
9
+ EquitySearchData,
10
+ EquitySearchQueryParams,
11
+ )
12
+ from pydantic import Field
13
+
14
+
15
+ class SecEquitySearchQueryParams(EquitySearchQueryParams):
16
+ """SEC Equity Search Query.
17
+
18
+ Source: https://sec.gov/
19
+ """
20
+
21
+ use_cache: bool = Field(
22
+ default=True,
23
+ description="Whether to use the cache or not.",
24
+ )
25
+ is_fund: bool = Field(
26
+ default=False,
27
+ description="Whether to direct the search to the list of mutual funds and ETFs.",
28
+ )
29
+
30
+
31
+ class SecEquitySearchData(EquitySearchData):
32
+ """SEC Equity Search Data."""
33
+
34
+ cik: str = Field(description="Central Index Key")
35
+
36
+
37
+ class SecEquitySearchFetcher(
38
+ Fetcher[
39
+ SecEquitySearchQueryParams,
40
+ List[SecEquitySearchData],
41
+ ]
42
+ ):
43
+ """SEC Equity Search Fetcher."""
44
+
45
+ @staticmethod
46
+ def transform_query(params: Dict[str, Any]) -> SecEquitySearchQueryParams:
47
+ """Transform the query."""
48
+ return SecEquitySearchQueryParams(**params)
49
+
50
+ @staticmethod
51
+ async def aextract_data(
52
+ query: SecEquitySearchQueryParams,
53
+ credentials: Optional[Dict[str, str]],
54
+ **kwargs: Any,
55
+ ) -> List[Dict]:
56
+ """Return the raw data from the SEC endpoint."""
57
+ # pylint: disable=import-outside-toplevel
58
+ from openbb_sec.utils.helpers import (
59
+ get_all_companies,
60
+ get_mf_and_etf_map,
61
+ )
62
+ from pandas import DataFrame
63
+
64
+ results = DataFrame()
65
+
66
+ if query.is_fund is True:
67
+ companies = await get_mf_and_etf_map(use_cache=query.use_cache)
68
+ results = companies[
69
+ companies["cik"].str.contains(query.query, case=False)
70
+ | companies["seriesId"].str.contains(query.query, case=False)
71
+ | companies["classId"].str.contains(query.query, case=False)
72
+ | companies["symbol"].str.contains(query.query, case=False)
73
+ ]
74
+
75
+ if query.is_fund is False:
76
+ companies = await get_all_companies(use_cache=query.use_cache)
77
+
78
+ results = companies[
79
+ companies["name"].str.contains(query.query, case=False)
80
+ | companies["symbol"].str.contains(query.query, case=False)
81
+ | companies["cik"].str.contains(query.query, case=False)
82
+ ]
83
+
84
+ return results.astype(str).to_dict("records")
85
+
86
+ @staticmethod
87
+ def transform_data(
88
+ query: SecEquitySearchQueryParams, data: Dict, **kwargs: Any
89
+ ) -> List[SecEquitySearchData]:
90
+ """Transform the data to the standard format."""
91
+ return [SecEquitySearchData.model_validate(d) for d in data]
openbb_platform/providers/sec/openbb_sec/models/etf_holdings.py ADDED
@@ -0,0 +1,870 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC ETF Holings Model."""
2
+
3
+ # pylint: disable =[unused-argument,too-many-locals,too-many-branches]
4
+
5
+ from datetime import date as dateType
6
+ from typing import Any, Dict, List, Optional, Union
7
+ from warnings import warn
8
+
9
+ from openbb_core.app.model.abstract.error import OpenBBError
10
+ from openbb_core.provider.abstract.annotated_result import AnnotatedResult
11
+ from openbb_core.provider.abstract.fetcher import Fetcher
12
+ from openbb_core.provider.standard_models.etf_holdings import (
13
+ EtfHoldingsData,
14
+ EtfHoldingsQueryParams,
15
+ )
16
+ from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS
17
+ from openbb_core.provider.utils.errors import EmptyDataError
18
+ from pydantic import Field, field_validator, model_validator
19
+
20
+
21
+ class SecEtfHoldingsQueryParams(EtfHoldingsQueryParams):
22
+ """SEC ETF Holdings Query.
23
+
24
+ Source: https://www.sec.gov/Archives/edgar/data/
25
+ """
26
+
27
+ date: Optional[Union[str, dateType]] = Field(
28
+ description=QUERY_DESCRIPTIONS.get("date", "")
29
+ + " The date represents the period ending."
30
+ + " The date entered will return the closest filing.",
31
+ default=None,
32
+ )
33
+ use_cache: bool = Field(
34
+ description="Whether or not to use cache for the request.",
35
+ default=True,
36
+ )
37
+
38
+
39
+ class SecEtfHoldingsData(EtfHoldingsData):
40
+ """SEC ETF Holdings Data."""
41
+
42
+ __alias_dict__ = {
43
+ "name": "title",
44
+ "weight": "pctVal",
45
+ "value": "valUSD",
46
+ "payoff_profile": "payoffProfile",
47
+ "currency": "curCd",
48
+ "asset_category": "assetCat",
49
+ "issuer_category": "issuerCat",
50
+ "country": "invCountry",
51
+ "is_restricted": "isRestrictedSec",
52
+ "fair_value_level": "fairValLevel",
53
+ "is_cash_collateral": "isCashCollateral",
54
+ "is_non_cash_collateral": "isNonCashCollateral",
55
+ "is_loan_by_fund": "isLoanByFund",
56
+ "loan_value": "loanVal",
57
+ }
58
+
59
+ lei: Optional[str] = Field(description="The LEI of the holding.", default=None)
60
+ cusip: Optional[str] = Field(description="The CUSIP of the holding.", default=None)
61
+ isin: Optional[str] = Field(description="The ISIN of the holding.", default=None)
62
+ other_id: Optional[str] = Field(
63
+ description="Internal identifier for the holding.", default=None
64
+ )
65
+ balance: Optional[float] = Field(
66
+ description="The balance of the holding.", default=None
67
+ )
68
+ weight: Optional[float] = Field(
69
+ description="The weight of the holding in ETF in %.",
70
+ default=None,
71
+ json_schema_extra={"x-unit_measurement": "percent", "x-frontend_multiply": 100},
72
+ )
73
+ value: Optional[float] = Field(
74
+ description="The value of the holding in USD.", default=None
75
+ )
76
+ payoff_profile: Optional[str] = Field(
77
+ description="The payoff profile of the holding.",
78
+ default=None,
79
+ )
80
+ units: Optional[Union[float, str]] = Field(
81
+ description="The units of the holding.", default=None
82
+ )
83
+ currency: Optional[str] = Field(
84
+ description="The currency of the holding.", default=None
85
+ )
86
+ asset_category: Optional[str] = Field(
87
+ description="The asset category of the holding.", default=None
88
+ )
89
+ issuer_category: Optional[str] = Field(
90
+ description="The issuer category of the holding.",
91
+ default=None,
92
+ )
93
+ country: Optional[str] = Field(
94
+ description="The country of the holding.", default=None
95
+ )
96
+ is_restricted: Optional[str] = Field(
97
+ description="Whether the holding is restricted.",
98
+ default=None,
99
+ )
100
+ fair_value_level: Optional[int] = Field(
101
+ description="The fair value level of the holding.",
102
+ default=None,
103
+ )
104
+ is_cash_collateral: Optional[str] = Field(
105
+ description="Whether the holding is cash collateral.",
106
+ default=None,
107
+ )
108
+ is_non_cash_collateral: Optional[str] = Field(
109
+ description="Whether the holding is non-cash collateral.",
110
+ default=None,
111
+ )
112
+ is_loan_by_fund: Optional[str] = Field(
113
+ description="Whether the holding is loan by fund.",
114
+ default=None,
115
+ )
116
+ loan_value: Optional[float] = Field(
117
+ description="The loan value of the holding.",
118
+ default=None,
119
+ )
120
+ issuer_conditional: Optional[str] = Field(
121
+ description="The issuer conditions of the holding.", default=None
122
+ )
123
+ asset_conditional: Optional[str] = Field(
124
+ description="The asset conditions of the holding.", default=None
125
+ )
126
+ maturity_date: Optional[dateType] = Field(
127
+ description="The maturity date of the debt security.", default=None
128
+ )
129
+ coupon_kind: Optional[str] = Field(
130
+ description="The type of coupon for the debt security.", default=None
131
+ )
132
+ rate_type: Optional[str] = Field(
133
+ description="The type of rate for the debt security, floating or fixed.",
134
+ default=None,
135
+ )
136
+ annualized_return: Optional[float] = Field(
137
+ description="The annualized return on the debt security.",
138
+ default=None,
139
+ json_schema_extra={"x-unit_measurement": "percent", "x-frontend_multiply": 100},
140
+ )
141
+ is_default: Optional[str] = Field(
142
+ description="If the debt security is defaulted.", default=None
143
+ )
144
+ in_arrears: Optional[str] = Field(
145
+ description="If the debt security is in arrears.", default=None
146
+ )
147
+ is_paid_kind: Optional[str] = Field(
148
+ description="If the debt security payments are paid in kind.", default=None
149
+ )
150
+ derivative_category: Optional[str] = Field(
151
+ description="The derivative category of the holding.", default=None
152
+ )
153
+ counterparty: Optional[str] = Field(
154
+ description="The counterparty of the derivative.", default=None
155
+ )
156
+ underlying_name: Optional[str] = Field(
157
+ description="The name of the underlying asset associated with the derivative.",
158
+ default=None,
159
+ )
160
+ option_type: Optional[str] = Field(description="The type of option.", default=None)
161
+ derivative_payoff: Optional[str] = Field(
162
+ description="The payoff profile of the derivative.", default=None
163
+ )
164
+ expiry_date: Optional[dateType] = Field(
165
+ description="The expiry or termination date of the derivative.", default=None
166
+ )
167
+ exercise_price: Optional[float] = Field(
168
+ description="The exercise price of the option.", default=None
169
+ )
170
+ exercise_currency: Optional[str] = Field(
171
+ description="The currency of the option exercise price.", default=None
172
+ )
173
+ shares_per_contract: Optional[float] = Field(
174
+ description="The number of shares per contract.", default=None
175
+ )
176
+ delta: Optional[Union[str, float]] = Field(
177
+ description="The delta of the option.", default=None
178
+ )
179
+ rate_type_rec: Optional[str] = Field(
180
+ description="The type of rate for receivable portion of the swap.", default=None
181
+ )
182
+ receive_currency: Optional[str] = Field(
183
+ description="The receive currency of the swap.", default=None
184
+ )
185
+ upfront_receive: Optional[float] = Field(
186
+ description="The upfront amount received of the swap.", default=None
187
+ )
188
+ floating_rate_index_rec: Optional[str] = Field(
189
+ description="The floating rate index for receivable portion of the swap.",
190
+ default=None,
191
+ )
192
+ floating_rate_spread_rec: Optional[float] = Field(
193
+ description="The floating rate spread for reveivable portion of the swap.",
194
+ default=None,
195
+ )
196
+ rate_tenor_rec: Optional[str] = Field(
197
+ description="The rate tenor for receivable portion of the swap.", default=None
198
+ )
199
+ rate_tenor_unit_rec: Optional[Union[str, int]] = Field(
200
+ description="The rate tenor unit for receivable portion of the swap.",
201
+ default=None,
202
+ )
203
+ reset_date_rec: Optional[str] = Field(
204
+ description="The reset date for receivable portion of the swap.", default=None
205
+ )
206
+ reset_date_unit_rec: Optional[Union[str, int]] = Field(
207
+ description="The reset date unit for receivable portion of the swap.",
208
+ default=None,
209
+ )
210
+ rate_type_pmnt: Optional[str] = Field(
211
+ description="The type of rate for payment portion of the swap.", default=None
212
+ )
213
+ payment_currency: Optional[str] = Field(
214
+ description="The payment currency of the swap.", default=None
215
+ )
216
+ upfront_payment: Optional[float] = Field(
217
+ description="The upfront amount received of the swap.", default=None
218
+ )
219
+ floating_rate_index_pmnt: Optional[str] = Field(
220
+ description="The floating rate index for payment portion of the swap.",
221
+ default=None,
222
+ )
223
+ floating_rate_spread_pmnt: Optional[float] = Field(
224
+ description="The floating rate spread for payment portion of the swap.",
225
+ default=None,
226
+ )
227
+ rate_tenor_pmnt: Optional[str] = Field(
228
+ description="The rate tenor for payment portion of the swap.", default=None
229
+ )
230
+ rate_tenor_unit_pmnt: Optional[Union[str, int]] = Field(
231
+ description="The rate tenor unit for payment portion of the swap.", default=None
232
+ )
233
+ reset_date_pmnt: Optional[str] = Field(
234
+ description="The reset date for payment portion of the swap.", default=None
235
+ )
236
+ reset_date_unit_pmnt: Optional[Union[str, int]] = Field(
237
+ description="The reset date unit for payment portion of the swap.", default=None
238
+ )
239
+ repo_type: Optional[str] = Field(description="The type of repo.", default=None)
240
+ is_cleared: Optional[str] = Field(
241
+ description="If the repo is cleared.", default=None
242
+ )
243
+ is_tri_party: Optional[str] = Field(
244
+ description="If the repo is tri party.", default=None
245
+ )
246
+ principal_amount: Optional[float] = Field(
247
+ description="The principal amount of the repo.", default=None
248
+ )
249
+ principal_currency: Optional[str] = Field(
250
+ description="The currency of the principal amount.", default=None
251
+ )
252
+ collateral_type: Optional[str] = Field(
253
+ description="The collateral type of the repo.", default=None
254
+ )
255
+ collateral_amount: Optional[float] = Field(
256
+ description="The collateral amount of the repo.", default=None
257
+ )
258
+ collateral_currency: Optional[str] = Field(
259
+ description="The currency of the collateral amount.", default=None
260
+ )
261
+ exchange_currency: Optional[str] = Field(
262
+ description="The currency of the exchange rate.", default=None
263
+ )
264
+ exchange_rate: Optional[float] = Field(
265
+ description="The exchange rate.", default=None
266
+ )
267
+ currency_sold: Optional[str] = Field(
268
+ description="The currency sold in a Forward Derivative.",
269
+ default=None,
270
+ )
271
+ currency_amount_sold: Optional[float] = Field(
272
+ description="The amount of currency sold in a Forward Derivative.",
273
+ default=None,
274
+ )
275
+ currency_bought: Optional[str] = Field(
276
+ description="The currency bought in a Forward Derivative.",
277
+ default=None,
278
+ )
279
+ currency_amount_bought: Optional[float] = Field(
280
+ description="The amount of currency bought in a Forward Derivative.",
281
+ default=None,
282
+ )
283
+ notional_amount: Optional[float] = Field(
284
+ description="The notional amount of the derivative.", default=None
285
+ )
286
+ notional_currency: Optional[str] = Field(
287
+ description="The currency of the derivative's notional amount.", default=None
288
+ )
289
+ unrealized_gain: Optional[float] = Field(
290
+ description="The unrealized gain or loss on the derivative.", default=None
291
+ )
292
+
293
+ @field_validator("weight", "annualized_return", mode="before", check_fields=False)
294
+ @classmethod
295
+ def normalize_percent(cls, v):
296
+ """Normalize the percent values."""
297
+ return float(v) / 100 if v else None
298
+
299
+ @model_validator(mode="before")
300
+ @classmethod
301
+ def replace_zero(cls, values):
302
+ """Check for zero values and replace with None."""
303
+ return (
304
+ {k: None if v == 0 else v for k, v in values.items()}
305
+ if isinstance(values, dict)
306
+ else values
307
+ )
308
+
309
+
310
+ class SecEtfHoldingsFetcher(
311
+ Fetcher[
312
+ SecEtfHoldingsQueryParams,
313
+ List[SecEtfHoldingsData],
314
+ ]
315
+ ):
316
+ """SEC ETF Holdings."""
317
+
318
+ @staticmethod
319
+ def transform_query(params: Dict[str, Any]) -> SecEtfHoldingsQueryParams:
320
+ """Transform the query."""
321
+ params["symbol"] = params["symbol"].upper()
322
+ return SecEtfHoldingsQueryParams(**params)
323
+
324
+ @staticmethod
325
+ async def aextract_data(
326
+ query: SecEtfHoldingsQueryParams,
327
+ credentials: Optional[Dict[str, str]],
328
+ **kwargs: Any,
329
+ ) -> Dict:
330
+ """Return the raw data from the SEC endpoint."""
331
+ # pylint: disable=import-outside-toplevel
332
+ import asyncio # noqa
333
+ import xmltodict # noqa
334
+ from aiohttp_client_cache import SQLiteBackend # noqa
335
+ from aiohttp_client_cache.session import CachedSession # noqa
336
+ from openbb_core.app.utils import get_user_cache_directory # noqa
337
+ from openbb_core.provider.utils.helpers import amake_request # noqa
338
+ from openbb_sec.utils.helpers import HEADERS, get_nport_candidates # noqa
339
+ from pandas import DataFrame, Series, to_datetime # noqa
340
+
341
+ # Implement a retry mechanism in case of RemoteDisconnected Error.
342
+ retries = 3
343
+ for i in range(retries):
344
+ filings = []
345
+ try:
346
+ filings = await get_nport_candidates(
347
+ symbol=query.symbol, use_cache=query.use_cache
348
+ )
349
+ if filings:
350
+ break
351
+ except Exception as e:
352
+ if i < retries - 1:
353
+ warn(f"Error: {e}. Retrying...")
354
+ await asyncio.sleep(1)
355
+ continue
356
+ raise e
357
+ filing_candidates = DataFrame.from_records(filings)
358
+ if filing_candidates.empty:
359
+ raise OpenBBError(f"No N-Port records found for {query.symbol}.")
360
+ dates = filing_candidates.period_ending.to_list()
361
+ new_date: str = ""
362
+ if query.date is not None:
363
+ date = query.date
364
+ # Gets the URL for the nearest date to the requested date.
365
+ __dates = Series(to_datetime(dates))
366
+ __date = to_datetime(date)
367
+ __nearest = DataFrame(__dates - __date)
368
+ __nearest_date = abs(__nearest[0].astype("int64")).idxmin()
369
+ new_date = __dates[__nearest_date].strftime("%Y-%m-%d")
370
+ date = new_date if new_date else date
371
+ warn(f"Closest filing date to, {query.date}, is the period ending: {date}")
372
+ filing_url = filing_candidates[filing_candidates["period_ending"] == date][
373
+ "primary_doc"
374
+ ].values[0]
375
+ else:
376
+ filing_url = filing_candidates["primary_doc"].values[0]
377
+
378
+ async def callback(response, session):
379
+ """Response callback for the request."""
380
+ return await response.read()
381
+
382
+ response: Union[dict, List[dict]] = []
383
+ if query.use_cache is True:
384
+ cache_dir = f"{get_user_cache_directory()}/http/sec_etf"
385
+ async with CachedSession(cache=SQLiteBackend(cache_dir)) as session:
386
+ try:
387
+ response = await amake_request(
388
+ filing_url, headers=HEADERS, session=session, response_callback=callback # type: ignore
389
+ )
390
+ finally:
391
+ await session.close()
392
+ else:
393
+ response = await amake_request(
394
+ filing_url, headers=HEADERS, response_callback=callback # type: ignore
395
+ )
396
+ results = xmltodict.parse(response)
397
+
398
+ return results
399
+
400
+ # pylint: disable=too-many-statements
401
+ @staticmethod
402
+ def transform_data( # noqa: PLR0912
403
+ query: SecEtfHoldingsQueryParams,
404
+ data: Dict,
405
+ **kwargs: Any,
406
+ ) -> AnnotatedResult[List[SecEtfHoldingsData]]:
407
+ """Transform the data."""
408
+ # pylint: disable=import-outside-toplevel
409
+ from pandas import DataFrame, to_datetime
410
+ from pandas.tseries.offsets import MonthEnd
411
+
412
+ if not data:
413
+ raise EmptyDataError(f"No data was returned for the symbol, {query.symbol}")
414
+ results = []
415
+
416
+ response = data
417
+
418
+ # Parse the response if it is a NPORT-P filing.
419
+ if (
420
+ "edgarSubmission" in response
421
+ and "formData" in response["edgarSubmission"]
422
+ and response["edgarSubmission"]["headerData"]["submissionType"] == "NPORT-P"
423
+ and "invstOrSecs" in response["edgarSubmission"]["formData"]
424
+ and "invstOrSec" in response["edgarSubmission"]["formData"]["invstOrSecs"]
425
+ ):
426
+ df = DataFrame.from_records(
427
+ response["edgarSubmission"]["formData"]["invstOrSecs"]["invstOrSec"]
428
+ )
429
+ # Conditionally flatten deeply nested values.
430
+ for i in df.index:
431
+ if "isin" in df.iloc[i]["identifiers"]:
432
+ df.loc[i, "isin"] = df.iloc[i]["identifiers"]["isin"].get("@value")
433
+
434
+ if (
435
+ "other" in df.iloc[i]["identifiers"]
436
+ and "@value" in df.iloc[i]["identifiers"]["other"]
437
+ ):
438
+ df.loc[i, "other_id"] = df.iloc[i]["identifiers"]["other"].get(
439
+ "@value"
440
+ )
441
+
442
+ if "securityLending" in df.iloc[i]:
443
+ security_lending = df.iloc[i]["securityLending"]
444
+ if "loanByFundCondition" in security_lending:
445
+ loan_by_fund_condition = security_lending["loanByFundCondition"]
446
+ df.loc[i, "isLoanByFund"] = loan_by_fund_condition.get(
447
+ "@isLoanByFund"
448
+ )
449
+ df.loc[i, "loanVal"] = loan_by_fund_condition.get("@loanVal")
450
+ if "isCashCollateral" in security_lending:
451
+ df.loc[i, "isCashCollateral"] = security_lending.get(
452
+ "isCashCollateral"
453
+ )
454
+ if "isNonCashCollateral" in security_lending:
455
+ df.loc[i, "isNonCashCollateral"] = security_lending.get(
456
+ "isNonCashCollateral"
457
+ )
458
+
459
+ if "debtSec" in df.iloc[i] and isinstance(df.loc[i]["debtSec"], dict):
460
+ debt_sec = df.iloc[i]["debtSec"]
461
+ df.loc[i, "maturity_date"] = debt_sec.get("maturityDt")
462
+ df.loc[i, "coupon_kind"] = debt_sec.get("couponKind")
463
+ df.loc[i, "annualized_return"] = debt_sec.get("annualizedRt")
464
+ df.loc[i, "is_default"] = debt_sec.get("isDefault")
465
+ df.loc[i, "in_arrears"] = debt_sec.get("areIntrstPmntsInArrs")
466
+ df.loc[i, "is_paid_kind"] = debt_sec.get("isPaidKind")
467
+
468
+ if "issuerConditional" in df.iloc[i] and isinstance(
469
+ df.iloc[i]["issuerConditional"], dict
470
+ ):
471
+ df.loc[i, "issuer_conditional"] = df.iloc[i][
472
+ "issuerConditional"
473
+ ].get("@desc")
474
+
475
+ if "assetConditional" in df.iloc[i] and isinstance(
476
+ df.iloc[i]["assetConditional"], dict
477
+ ):
478
+ df.loc[i, "asset_conditional"] = df.iloc[i]["assetConditional"].get(
479
+ "@desc"
480
+ )
481
+
482
+ if "derivativeInfo" in df.iloc[i] and isinstance(
483
+ df.iloc[i]["derivativeInfo"], dict
484
+ ):
485
+ derivative_info = df.iloc[i]["derivativeInfo"]
486
+
487
+ if "optionSwaptionWarrantDeriv" in derivative_info:
488
+ option_swaption_warrant_deriv = derivative_info[
489
+ "optionSwaptionWarrantDeriv"
490
+ ]
491
+ df.loc[i, "derivative_category"] = (
492
+ option_swaption_warrant_deriv.get("@derivCat")
493
+ )
494
+ df.loc[i, "counterparty"] = option_swaption_warrant_deriv[
495
+ "counterparties"
496
+ ].get("counterpartyName")
497
+ df.loc[i, "lei"] = option_swaption_warrant_deriv[
498
+ "counterparties"
499
+ ].get("counterpartyLei")
500
+ df.loc[i, "underlying_name"] = (
501
+ option_swaption_warrant_deriv["descRefInstrmnt"]
502
+ .get("otherRefInst", {})
503
+ .get("issueTitle")
504
+ )
505
+ df.loc[i, "underlying_name"] = option_swaption_warrant_deriv[
506
+ "descRefInstrmnt"
507
+ ].get("nestedDerivInfo", {}).get("fwdDeriv", {}).get(
508
+ "derivAddlInfo", {}
509
+ ).get(
510
+ "title"
511
+ ) or option_swaption_warrant_deriv[
512
+ "descRefInstrmnt"
513
+ ].get(
514
+ "otherRefInst", {}
515
+ ).get(
516
+ "issueTitle"
517
+ )
518
+ df.loc[i, "option_type"] = option_swaption_warrant_deriv.get(
519
+ "putOrCall"
520
+ )
521
+ df.loc[i, "derivative_payoff"] = (
522
+ option_swaption_warrant_deriv.get("writtenOrPur")
523
+ )
524
+ df.loc[i, "expiry_date"] = option_swaption_warrant_deriv.get(
525
+ "expDt"
526
+ )
527
+ df.loc[i, "exercise_price"] = option_swaption_warrant_deriv.get(
528
+ "exercisePrice"
529
+ )
530
+ df.loc[i, "exercise_currency"] = (
531
+ option_swaption_warrant_deriv.get("exercisePriceCurCd")
532
+ )
533
+ df.loc[i, "shares_per_contract"] = (
534
+ option_swaption_warrant_deriv.get("shareNo")
535
+ )
536
+ if option_swaption_warrant_deriv.get("delta") != "XXXX":
537
+ df.loc[i, "delta"] = option_swaption_warrant_deriv.get(
538
+ "delta"
539
+ )
540
+ df.loc[i, "unrealized_gain"] = float(
541
+ option_swaption_warrant_deriv.get("unrealizedAppr")
542
+ )
543
+
544
+ if "futrDeriv" in derivative_info:
545
+ futr_deriv = derivative_info["futrDeriv"]
546
+ df.loc[i, "derivative_category"] = futr_deriv.get("@derivCat")
547
+ if isinstance(futr_deriv.get("counterparties"), dict):
548
+ df.loc[i, "counterparty"] = futr_deriv[
549
+ "counterparties"
550
+ ].get("counterpartyName")
551
+ df.loc[i, "lei"] = futr_deriv["counterparties"].get(
552
+ "counterpartyLei"
553
+ )
554
+ df.loc[i, "underlying_name"] = (
555
+ futr_deriv["descRefInstrmnt"]
556
+ .get("indexBasketInfo", {})
557
+ .get("indexName")
558
+ )
559
+ df.loc[i, "other_id"] = (
560
+ futr_deriv["descRefInstrmnt"]
561
+ .get("indexBasketInfo", {})
562
+ .get("indexIdentifier")
563
+ )
564
+ df.loc[i, "derivative_payoff"] = futr_deriv.get("payOffProf")
565
+ df.loc[i, "expiry_date"] = futr_deriv.get(
566
+ "expDt"
567
+ ) or futr_deriv.get("expDate")
568
+ df.loc[i, "notional_amount"] = float(
569
+ futr_deriv.get("notionalAmt")
570
+ )
571
+ df.loc[i, "notional_currency"] = futr_deriv.get("curCd")
572
+ df.loc[i, "unrealized_gain"] = float(
573
+ futr_deriv.get("unrealizedAppr")
574
+ )
575
+
576
+ if "fwdDeriv" in derivative_info:
577
+ fwd_deriv = derivative_info["fwdDeriv"]
578
+ df.loc[i, "derivative_category"] = fwd_deriv.get("@derivCat")
579
+ df.loc[i, "counterparty"] = fwd_deriv["counterparties"].get(
580
+ "counterpartyName"
581
+ )
582
+ df.loc[i, "currency_sold"] = fwd_deriv.get("curSold")
583
+ df.loc[i, "currency_amount_sold"] = float(
584
+ fwd_deriv.get("amtCurSold")
585
+ )
586
+ df.loc[i, "currency_bought"] = fwd_deriv.get("curPur")
587
+ df.loc[i, "currency_amount_bought"] = float(
588
+ fwd_deriv.get("amtCurPur")
589
+ )
590
+ df.loc[i, "expiry_date"] = fwd_deriv.get("settlementDt")
591
+ df.loc[i, "unrealized_gain"] = float(
592
+ fwd_deriv.get("unrealizedAppr")
593
+ )
594
+
595
+ if "swapDeriv" in df.iloc[i]["derivativeInfo"]:
596
+ swap_deriv = df.iloc[i]["derivativeInfo"]["swapDeriv"]
597
+ df.loc[i, "derivative_category"] = swap_deriv.get("@derivCat")
598
+ df.loc[i, "counterparty"] = swap_deriv["counterparties"].get(
599
+ "counterpartyName"
600
+ )
601
+ df.loc[i, "lei"] = swap_deriv["counterparties"].get(
602
+ "counterpartyLei"
603
+ )
604
+ if "otherRefInst" in swap_deriv["descRefInstrmnt"]:
605
+ df.loc[i, "underlying_name"] = swap_deriv[
606
+ "descRefInstrmnt"
607
+ ]["otherRefInst"].get("issueTitle")
608
+ if "indexBasketInfo" in swap_deriv["descRefInstrmnt"]:
609
+ df.loc[i, "underlying_name"] = swap_deriv[
610
+ "descRefInstrmnt"
611
+ ]["indexBasketInfo"].get("indexName")
612
+ df.loc[i, "other_id"] = swap_deriv["descRefInstrmnt"][
613
+ "indexBasketInfo"
614
+ ].get("indexIdentifier")
615
+ df.loc[i, "swap_description"] = (
616
+ swap_deriv["otherRecDesc"].get("#text")
617
+ if "otherRecDesc" in swap_deriv["descRefInstrmnt"]
618
+ else None
619
+ )
620
+ if "floatingRecDesc" in swap_deriv:
621
+ df.loc[i, "rate_type_rec"] = swap_deriv[
622
+ "floatingRecDesc"
623
+ ].get("@fixedOrFloating")
624
+ df.loc[i, "floating_rate_index_rec"] = swap_deriv[
625
+ "floatingRecDesc"
626
+ ].get("@floatingRtIndex")
627
+ df.loc[i, "floating_rate_spread_rec"] = float(
628
+ swap_deriv["floatingRecDesc"].get("@floatingRtSpread")
629
+ )
630
+ df.loc[i, "payment_amount_rec"] = float(
631
+ swap_deriv["floatingRecDesc"].get("@pmntAmt")
632
+ )
633
+ df.loc[i, "rate_tenor_rec"] = swap_deriv["floatingRecDesc"][
634
+ "rtResetTenors"
635
+ ]["rtResetTenor"].get("@rateTenor")
636
+ df.loc[i, "rate_tenor_unit_rec"] = swap_deriv[
637
+ "floatingRecDesc"
638
+ ]["rtResetTenors"]["rtResetTenor"].get("@rateTenorUnit")
639
+ df.loc[i, "reset_date_rec"] = swap_deriv["floatingRecDesc"][
640
+ "rtResetTenors"
641
+ ]["rtResetTenor"].get("@resetDt")
642
+ df.loc[i, "reset_date_unit_rec"] = swap_deriv[
643
+ "floatingRecDesc"
644
+ ]["rtResetTenors"]["rtResetTenor"].get("@resetDtUnit")
645
+ if "floatingPmntDesc" in swap_deriv:
646
+ df.loc[i, "rate_type_pmnt"] = swap_deriv[
647
+ "floatingPmntDesc"
648
+ ].get("@fixedOrFloating")
649
+ df.loc[i, "floating_rate_index_pmnt"] = swap_deriv[
650
+ "floatingPmntDesc"
651
+ ].get("@floatingRtIndex")
652
+ df.loc[i, "floating_rate_spread_pmnt"] = float(
653
+ swap_deriv["floatingPmntDesc"].get("@floatingRtSpread")
654
+ )
655
+ df.loc[i, "payment_amount_pmnt"] = float(
656
+ swap_deriv["floatingPmntDesc"].get("@pmntAmt")
657
+ )
658
+ df.loc[i, "rate_tenor_pmnt"] = swap_deriv[
659
+ "floatingPmntDesc"
660
+ ]["rtResetTenors"]["rtResetTenor"].get("@rateTenor")
661
+ df.loc[i, "rate_tenor_unit_pmnt"] = swap_deriv[
662
+ "floatingPmntDesc"
663
+ ]["rtResetTenors"]["rtResetTenor"].get("@rateTenorUnit")
664
+ df.loc[i, "reset_date_pmnt"] = swap_deriv[
665
+ "floatingPmntDesc"
666
+ ]["rtResetTenors"]["rtResetTenor"].get("@resetDt")
667
+ df.loc[i, "reset_date_unit_rec"] = swap_deriv[
668
+ "floatingPmntDesc"
669
+ ]["rtResetTenors"]["rtResetTenor"].get("@resetDtUnit")
670
+ df.loc[i, "expiry_date"] = swap_deriv.get("terminationDt")
671
+ df.loc[i, "upfront_payment"] = float(
672
+ swap_deriv.get("upfrontPmnt")
673
+ )
674
+ df.loc[i, "payment_currency"] = swap_deriv.get("pmntCurCd")
675
+ df.loc[i, "upfront_receive"] = float(
676
+ swap_deriv.get("upfrontRcpt")
677
+ )
678
+ df.loc[i, "receive_currency"] = swap_deriv.get("rcptCurCd")
679
+ df.loc[i, "notional_amount"] = float(
680
+ swap_deriv.get("notionalAmt")
681
+ )
682
+ df.loc[i, "notional_currency"] = swap_deriv.get("curCd")
683
+ df.loc[i, "unrealized_gain"] = float(
684
+ swap_deriv.get("unrealizedAppr")
685
+ )
686
+
687
+ if "repurchaseAgrmt" in df.iloc[i] and isinstance(
688
+ df.iloc[i]["repurchaseAgrmt"], dict
689
+ ):
690
+ repurchase_agrmt = df.iloc[i]["repurchaseAgrmt"]
691
+ df.loc[i, "repo_type"] = repurchase_agrmt.get("transCat")
692
+
693
+ if "clearedCentCparty" in repurchase_agrmt and isinstance(
694
+ repurchase_agrmt["clearedCentCparty"], dict
695
+ ):
696
+ cleared_cent_cparty = repurchase_agrmt["clearedCentCparty"]
697
+ df.loc[i, "is_cleared"] = cleared_cent_cparty.get("@isCleared")
698
+ df.loc[i, "counterparty"] = cleared_cent_cparty.get(
699
+ "@centralCounterparty"
700
+ )
701
+ df.loc[i, "is_tri_party"] = repurchase_agrmt.get("isTriParty")
702
+ df.loc[i, "annualized_return"] = repurchase_agrmt.get(
703
+ "repurchaseRt"
704
+ )
705
+ df.loc[i, "maturity_date"] = repurchase_agrmt.get("maturityDt")
706
+
707
+ if (
708
+ "repurchaseCollaterals" in repurchase_agrmt
709
+ and "repurchaseCollateral"
710
+ in repurchase_agrmt["repurchaseCollaterals"]
711
+ ):
712
+ repurchase_collateral = repurchase_agrmt[
713
+ "repurchaseCollaterals"
714
+ ]["repurchaseCollateral"]
715
+ df.loc[i, "principal_amount"] = float(
716
+ repurchase_collateral.get("principalAmt")
717
+ )
718
+ df.loc[i, "principal_currency"] = repurchase_collateral.get(
719
+ "@principalCd"
720
+ )
721
+ df.loc[i, "collateral_amount"] = float(
722
+ repurchase_collateral.get("collateralVal")
723
+ )
724
+ df.loc[i, "collateral_currency"] = repurchase_collateral.get(
725
+ "@collateralCd"
726
+ )
727
+ df.loc[i, "collateral_type"] = repurchase_collateral.get(
728
+ "@invstCat"
729
+ )
730
+
731
+ if "currencyConditional" in df.iloc[i] and isinstance(
732
+ df.iloc[i]["currencyConditional"], dict
733
+ ):
734
+ currency_conditional = df.iloc[i]["currencyConditional"]
735
+ df.loc[i, "exchange_currency"] = currency_conditional.get("@curCd")
736
+ df.loc[i, "exchange_rate"] = currency_conditional.get("@exchangeRt")
737
+
738
+ # Drop the flattened columns
739
+ to_drop = [
740
+ "identifiers",
741
+ "securityLending",
742
+ "issuerConditional",
743
+ "assetConditional",
744
+ "debtSec",
745
+ "currencyConditional",
746
+ "derivativeInfo",
747
+ "repurchaseAgrmt",
748
+ ]
749
+ for col in to_drop:
750
+ if col in df.columns:
751
+ df = df.drop(col, axis=1)
752
+
753
+ df["pctVal"] = df["pctVal"].astype(float)
754
+ results = (
755
+ df.fillna("N/A")
756
+ .replace("N/A", None)
757
+ .sort_values(by="pctVal", ascending=False)
758
+ .to_dict(orient="records")
759
+ )
760
+ # Extract additional information from the form that doesn't belong in the holdings table.
761
+ metadata = {}
762
+ month_1: str = ""
763
+ month_2: str = ""
764
+ month_3: str = ""
765
+ try:
766
+ gen_info = response["edgarSubmission"]["formData"].get("genInfo", {}) # type: ignore
767
+ if gen_info:
768
+ metadata["fund_name"] = gen_info.get("seriesName")
769
+ metadata["series_id"] = gen_info.get("seriesId")
770
+ metadata["lei"] = gen_info.get("seriesLei")
771
+ metadata["period_ending"] = gen_info.get("repPdDate")
772
+ metadata["fiscal_year_end"] = gen_info.get("repPdEnd")
773
+ current_month = to_datetime(metadata["period_ending"])
774
+ month_1 = (current_month - MonthEnd(2)).date().strftime("%Y-%m-%d")
775
+ month_2 = (current_month - MonthEnd(1)).date().strftime("%Y-%m-%d")
776
+ month_3 = current_month.strftime("%Y-%m-%d")
777
+ fund_info = response["edgarSubmission"]["formData"].get("fundInfo", {}) # type: ignore
778
+ if fund_info:
779
+ metadata["total_assets"] = float(fund_info.pop("totAssets", None))
780
+ metadata["total_liabilities"] = float(fund_info.pop("totLiabs", None))
781
+ metadata["net_assets"] = float(fund_info.pop("netAssets", None))
782
+ metadata["cash_and_equivalents"] = fund_info.pop(
783
+ "cshNotRptdInCorD", None
784
+ )
785
+ return_info = fund_info["returnInfo"]["monthlyTotReturns"].get(
786
+ "monthlyTotReturn", {}
787
+ )
788
+ returns = {
789
+ month_1: float(return_info.get("@rtn1")) / 100,
790
+ month_2: float(return_info.get("@rtn2")) / 100,
791
+ month_3: float(return_info.get("@rtn3")) / 100,
792
+ }
793
+ metadata["returns"] = returns
794
+ flow = {
795
+ month_1: {
796
+ "creation": float(fund_info["mon1Flow"].get("@sales", None)),
797
+ "redemption": float(
798
+ fund_info["mon1Flow"].get("@redemption", None)
799
+ ),
800
+ },
801
+ month_2: {
802
+ "creation": float(fund_info["mon2Flow"].get("@sales", None)),
803
+ "redemption": float(
804
+ fund_info["mon2Flow"].get("@redemption", None)
805
+ ),
806
+ },
807
+ month_3: {
808
+ "creation": float(fund_info["mon3Flow"].get("@sales")),
809
+ "redemption": float(
810
+ fund_info["mon3Flow"].get("@redemption", None)
811
+ ),
812
+ },
813
+ }
814
+ metadata["flow"] = flow
815
+ gains = {
816
+ month_1: {
817
+ "realized": float(
818
+ fund_info["returnInfo"]["othMon1"].get(
819
+ "@netRealizedGain", None
820
+ )
821
+ ),
822
+ "unrealized": float(
823
+ fund_info["returnInfo"]["othMon1"].get(
824
+ "@netUnrealizedAppr", None
825
+ )
826
+ ),
827
+ },
828
+ month_2: {
829
+ "realized": float(
830
+ fund_info["returnInfo"]["othMon2"].get(
831
+ "@netRealizedGain", None
832
+ )
833
+ ),
834
+ "unrealized": float(
835
+ fund_info["returnInfo"]["othMon2"].get(
836
+ "@netUnrealizedAppr", None
837
+ )
838
+ ),
839
+ },
840
+ month_3: {
841
+ "realized": float(
842
+ fund_info["returnInfo"]["othMon3"].get(
843
+ "@netRealizedGain", None
844
+ )
845
+ ),
846
+ "unrealized": float(
847
+ fund_info["returnInfo"]["othMon3"].get(
848
+ "@netUnrealizedAppr", None
849
+ )
850
+ ),
851
+ },
852
+ }
853
+ metadata["gains"] = gains
854
+ _borrowers = fund_info["borrowers"].get("borrower", [])
855
+ if _borrowers:
856
+ borrowers = [
857
+ {
858
+ "name": d["@name"],
859
+ "lei": d["@lei"],
860
+ "value": float(d["@aggrVal"]),
861
+ }
862
+ for d in _borrowers
863
+ ]
864
+ metadata["borrowers"] = borrowers
865
+ except Exception as e: # pylint: disable=W0718
866
+ warn(f"Error extracting metadata: {e}")
867
+ return AnnotatedResult(
868
+ result=[SecEtfHoldingsData.model_validate(d) for d in results],
869
+ metadata=metadata,
870
+ )
openbb_platform/providers/sec/openbb_sec/models/form_13FHR.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Form 13F-HR Model."""
2
+
3
+ # pylint: disable =unused-argument
4
+
5
+ from typing import Any, Optional
6
+
7
+ from openbb_core.provider.abstract.fetcher import Fetcher
8
+ from openbb_core.provider.standard_models.form_13FHR import (
9
+ Form13FHRData,
10
+ Form13FHRQueryParams,
11
+ )
12
+ from pydantic import Field
13
+
14
+
15
+ class SecForm13FHRQueryParams(Form13FHRQueryParams):
16
+ """SEC Form 13F-HR Query Params.
17
+
18
+ Source: https://www.sec.gov/Archives/edgar/data/
19
+ """
20
+
21
+
22
+ class SecForm13FHRData(Form13FHRData):
23
+ """SEC Form 13F-HR Data."""
24
+
25
+ __alias_dict__ = {
26
+ "issuer": "nameOfIssuer",
27
+ "asset_class": "titleOfClass",
28
+ "option_type": "putCall",
29
+ }
30
+
31
+ weight: float = Field(
32
+ description="The weight of the security relative to the market value of all securities in the filing"
33
+ + " , as a normalized percent.",
34
+ json_schema_extra={"x-unit_measurement": "percent", "x-frontend_multiply": 100},
35
+ )
36
+
37
+
38
+ class SecForm13FHRFetcher(Fetcher[SecForm13FHRQueryParams, list[SecForm13FHRData]]):
39
+ """SEC Form 13F-HR Fetcher."""
40
+
41
+ @staticmethod
42
+ def transform_query(params: dict[str, Any]) -> SecForm13FHRQueryParams:
43
+ """Transform the query."""
44
+ return SecForm13FHRQueryParams(**params)
45
+
46
+ @staticmethod
47
+ async def aextract_data(
48
+ query: SecForm13FHRQueryParams,
49
+ credentials: Optional[dict[str, str]],
50
+ **kwargs: Any,
51
+ ) -> list[dict]:
52
+ """Return the raw data from the SEC endpoint."""
53
+ # pylint: disable=import-outside-toplevel
54
+ import asyncio # noqa
55
+ from openbb_core.app.model.abstract.error import OpenBBError
56
+ from openbb_core.provider.utils.errors import EmptyDataError
57
+ from openbb_sec.utils import parse_13f
58
+
59
+ symbol = query.symbol
60
+ urls: list = []
61
+ cik = symbol.isnumeric()
62
+ try:
63
+ filings = (
64
+ await parse_13f.get_13f_candidates(symbol=symbol)
65
+ if cik is False
66
+ else await parse_13f.get_13f_candidates(cik=symbol)
67
+ )
68
+ if query.limit and query.date is None:
69
+ urls = filings.iloc[: query.limit].to_list()
70
+ if query.date is not None:
71
+ date = parse_13f.date_to_quarter_end(query.date.strftime("%Y-%m-%d"))
72
+ filings.index = filings.index.astype(str)
73
+ urls = [filings.loc[date]]
74
+
75
+ results: list = []
76
+
77
+ async def get_filing(url):
78
+ """Get a single 13F-HR filing and parse it."""
79
+ data = await parse_13f.parse_13f_hr(url)
80
+
81
+ if len(data) > 0:
82
+ results.extend(data)
83
+
84
+ await asyncio.gather(*[get_filing(url) for url in urls])
85
+
86
+ if not results:
87
+ raise EmptyDataError("No data was returned with the given parameters.")
88
+
89
+ return results
90
+ except OpenBBError as e:
91
+ raise e from e
92
+
93
+ @staticmethod
94
+ def transform_data(
95
+ query: SecForm13FHRQueryParams,
96
+ data: list[dict],
97
+ **kwargs: Any,
98
+ ) -> list[SecForm13FHRData]:
99
+ """Transform the data."""
100
+ return [
101
+ SecForm13FHRData.model_validate(d)
102
+ for d in sorted(
103
+ data,
104
+ key=lambda d: [d["period_ending"], d["weight"]],
105
+ reverse=True,
106
+ )
107
+ ]
openbb_platform/providers/sec/openbb_sec/models/htm_file.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC HTM/HTML File Model."""
2
+
3
+ # pylint: disable=unused-argument
4
+
5
+ from typing import Any, Optional
6
+
7
+ from openbb_core.app.model.abstract.error import OpenBBError
8
+ from openbb_core.provider.abstract.data import Data
9
+ from openbb_core.provider.abstract.fetcher import Fetcher
10
+ from openbb_core.provider.abstract.query_params import QueryParams
11
+ from pydantic import Field
12
+
13
+
14
+ class SecHtmFileQueryParams(QueryParams):
15
+ """SEC HTM File Query Parameters."""
16
+
17
+ url: str = Field(
18
+ default="",
19
+ description="URL for the SEC filing.",
20
+ )
21
+ use_cache: bool = Field(
22
+ default=True,
23
+ description="Cache the file for use later. Default is True.",
24
+ )
25
+
26
+
27
+ class SecHtmFileData(Data):
28
+ """SEC HTM File Data."""
29
+
30
+ url: str = Field(
31
+ description="URL of the downloaded file.",
32
+ json_schema_extra={"x-widget_config": {"exclude": True}},
33
+ )
34
+ content: str = Field(description="Raw content of the HTM/HTML file.")
35
+
36
+
37
+ class SecHtmFileFetcher(Fetcher[SecHtmFileQueryParams, SecHtmFileData]):
38
+ """SEC HTM File Fetcher."""
39
+
40
+ @staticmethod
41
+ def transform_query(params: dict[str, Any]) -> SecHtmFileQueryParams:
42
+ """Transform the query."""
43
+ if not params.get("url"):
44
+ raise OpenBBError(ValueError("Please enter a URL."))
45
+
46
+ url = params.get("url", "")
47
+
48
+ if (
49
+ not url.startswith("http")
50
+ or "sec.gov" not in url
51
+ or (not url.endswith(".htm") and not url.endswith(".html"))
52
+ ):
53
+ raise OpenBBError(
54
+ ValueError(
55
+ "Invalid URL. Please a SEC URL that directs specifically to a HTM or HTML file."
56
+ )
57
+ )
58
+ return SecHtmFileQueryParams(**params)
59
+
60
+ @staticmethod
61
+ async def aextract_data(
62
+ query: SecHtmFileQueryParams,
63
+ credentials: Optional[dict[str, str]],
64
+ **kwargs: Any,
65
+ ) -> dict:
66
+ """Return the raw data from the SEC endpoint."""
67
+ # pylint: disable=import-outside-toplevel
68
+ from openbb_sec.models.sec_filing import SecBaseFiling
69
+
70
+ return {
71
+ "url": query.url,
72
+ "content": SecBaseFiling.download_file(query.url, False, query.use_cache),
73
+ }
74
+
75
+ @staticmethod
76
+ def transform_data(
77
+ query: SecHtmFileQueryParams, data: dict, **kwargs: Any
78
+ ) -> SecHtmFileData:
79
+ """Transform the data to the standard format."""
80
+ # pylint: disable=import-outside-toplevel
81
+ from bs4 import BeautifulSoup # noqa
82
+
83
+ if not data or not data.get("content"):
84
+ raise OpenBBError("Failed to extract HTM file data.")
85
+
86
+ content = data.pop("content", "")
87
+ soup = BeautifulSoup(content, "html.parser").find("html")
88
+
89
+ # Remove style elements that add background color to table rows
90
+ for row in soup.find_all("tr"):
91
+ if "background-color" in row.get("style", ""):
92
+ del row["style"]
93
+ for attr in ["class", "bgcolor"]:
94
+ if attr in row.attrs:
95
+ del row[attr]
96
+
97
+ return SecHtmFileData(content=str(soup), url=data["url"])
openbb_platform/providers/sec/openbb_sec/models/insider_trading.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Insider Trading Model."""
2
+
3
+ # pylint: disable =unused-argument
4
+
5
+ from datetime import date as dateType
6
+ from typing import Any, Optional, Union
7
+
8
+ from openbb_core.provider.abstract.fetcher import Fetcher
9
+ from openbb_core.provider.standard_models.insider_trading import (
10
+ InsiderTradingData,
11
+ InsiderTradingQueryParams,
12
+ )
13
+ from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS
14
+ from pydantic import Field, field_validator
15
+
16
+ TRANSACTION_CODE_MAP = {
17
+ "A": "Grant, award or other acquisition pursuant to Rule 16b-3(d)",
18
+ "C": "Conversion of derivative security",
19
+ "D": "Disposition to the issuer of issuer equity securities pursuant to Rule 16b-3(e)",
20
+ "E": "Expiration of short derivative position",
21
+ "F": (
22
+ "Payment of exercise price or tax liability by delivering or withholding securities incident to the receipt, "
23
+ "exercise or vesting of a security issued in accordance with Rule 16b-3"
24
+ ),
25
+ "G": "Bona fide gift",
26
+ "H": "Expiration (or cancellation) of long derivative position with value received",
27
+ "I": (
28
+ "Discretionary transaction in accordance with Rule 16b-3(f) "
29
+ "resulting in acquisition or disposition of issuer securities"
30
+ ),
31
+ "J": "Other acquisition or disposition (describe transaction)",
32
+ "L": "Small acquisition under Rule 16a-6",
33
+ "M": "Exercise or conversion of derivative security exempted pursuant to Rule 16b-3",
34
+ "O": "Exercise of out-of-the-money derivative security",
35
+ "P": "Open market or private purchase of non-derivative or derivative security",
36
+ "S": "Open market or private sale of non-derivative or derivative security",
37
+ "U": "Disposition pursuant to a tender of shares in a change of control transaction",
38
+ "W": "Acquisition or disposition by will or the laws of descent and distribution",
39
+ "X": "Exercise of in-the-money or at-the-money derivative security",
40
+ "Z": "Deposit into or withdrawal from voting trust",
41
+ }
42
+
43
+ TIMELINESS_MAP = {
44
+ "E": "Early",
45
+ "L": "Late",
46
+ "Empty": "On-time",
47
+ }
48
+
49
+
50
+ class SecInsiderTradingQueryParams(InsiderTradingQueryParams):
51
+ """SEC Insider Trading Query Params.
52
+
53
+ Source: https://www.sec.gov/Archives/edgar/data/
54
+ """
55
+
56
+ start_date: Optional[dateType] = Field(
57
+ default=None,
58
+ description=QUERY_DESCRIPTIONS.get("start_date", "")
59
+ + " Wide date ranges can result in long download times."
60
+ + " Recommended to use a smaller date range, default is 120 days ago.",
61
+ )
62
+ end_date: Optional[dateType] = Field(
63
+ default=None,
64
+ description=QUERY_DESCRIPTIONS.get("end_date", "") + " Default is today.",
65
+ )
66
+ use_cache: bool = Field(
67
+ default=True,
68
+ description="Persist the data locally for future use. Default is True."
69
+ + " Each form submission is an individual download and the SEC limits the number of concurrent downloads."
70
+ + " This prevents the same file from being downloaded multiple times.",
71
+ )
72
+
73
+
74
+ class SecInsiderTradingData(InsiderTradingData):
75
+ """SEC Insider Trading Data."""
76
+
77
+ company_name: Optional[str] = Field(
78
+ default=None, description="Name of the company."
79
+ )
80
+ form: Optional[Union[str, int]] = Field(default=None, description="Form type.")
81
+ director: Optional[bool] = Field(
82
+ default=None, description="Whether the owner is a director."
83
+ )
84
+ officer: Optional[bool] = Field(
85
+ default=None, description="Whether the owner is an officer."
86
+ )
87
+ ten_percent_owner: Optional[bool] = Field(
88
+ default=None, description="Whether the owner is a 10% owner."
89
+ )
90
+ other: Optional[bool] = Field(
91
+ default=None, description="Whether the owner is classified as other."
92
+ )
93
+ other_text: Optional[str] = Field(
94
+ default=None, description="Text for other classification."
95
+ )
96
+ transaction_timeliness: Optional[str] = Field(
97
+ default=None, description="Timeliness of the transaction."
98
+ )
99
+ ownership_type: Optional[str] = Field(
100
+ default=None, description="Type of ownership, direct or indirect."
101
+ )
102
+ nature_of_ownership: Optional[str] = Field(
103
+ default=None, description="Nature of the ownership."
104
+ )
105
+ exercise_date: Optional[dateType] = Field(
106
+ default=None, description="Date of exercise."
107
+ )
108
+ expiration_date: Optional[dateType] = Field(
109
+ default=None, description="Date of expiration for the derivative."
110
+ )
111
+ deemed_execution_date: Optional[dateType] = Field(
112
+ default=None, description="Deemed execution date."
113
+ )
114
+ underlying_security_title: Optional[str] = Field(
115
+ default=None, description="Title of the underlying security."
116
+ )
117
+ underlying_security_shares: Optional[float] = Field(
118
+ default=None,
119
+ description="Number of underlying shares associated with the derivative.",
120
+ )
121
+ underlying_security_value: Optional[float] = Field(
122
+ default=None, description="Value of the underlying security."
123
+ )
124
+ conversion_exercise_price: Optional[float] = Field(
125
+ default=None, description="Price of conversion or exercise of the securities."
126
+ )
127
+ transaction_value: Optional[float] = Field(
128
+ default=None, description="Total value of the transaction."
129
+ )
130
+ value_owned: Optional[float] = Field(
131
+ default=None, description="Value of the securities owned after the transaction."
132
+ )
133
+ footnote: Optional[str] = Field(
134
+ default=None, description="Footnote for the transaction."
135
+ )
136
+
137
+ @field_validator("symbol", mode="before", check_fields=False)
138
+ @classmethod
139
+ def _to_upper(cls, v):
140
+ """Convert symbol to uppercase."""
141
+ return v.upper() if v else None
142
+
143
+ @field_validator("ownership_type", mode="before", check_fields=False)
144
+ @classmethod
145
+ def _map_ownership_type(cls, v):
146
+ """Map ownership type to description."""
147
+ if not v:
148
+ return None
149
+ return "Direct" if v.strip() == "D" else "Indirect" if v.strip() == "I" else v
150
+
151
+ @field_validator("acquisition_or_disposition", mode="before", check_fields=False)
152
+ @classmethod
153
+ def _map_acquisition_disposition(cls, v):
154
+ """Map acquisition or disposition to description."""
155
+ if not v:
156
+ return None
157
+ return (
158
+ "Acquisition"
159
+ if v.strip() == "A"
160
+ else "Disposition" if v.strip() == "D" else v
161
+ )
162
+
163
+ @field_validator("transaction_type", mode="before", check_fields=False)
164
+ @classmethod
165
+ def _map_transaction_code(cls, v):
166
+ """Map transaction code to description."""
167
+ return TRANSACTION_CODE_MAP.get(v, v) if v else None
168
+
169
+ @field_validator("transaction_timeliness", mode="before", check_fields=False)
170
+ @classmethod
171
+ def _map_timeliness(cls, v):
172
+ """Map timeliness code to description."""
173
+ return TIMELINESS_MAP.get(v, v) if v else None
174
+
175
+
176
+ class SecInsiderTradingFetcher(
177
+ Fetcher[SecInsiderTradingQueryParams, list[SecInsiderTradingData]]
178
+ ):
179
+ """SEC Insider Trading Fetcher."""
180
+
181
+ @staticmethod
182
+ def transform_query(params: dict[str, Any]) -> SecInsiderTradingQueryParams:
183
+ """Transform query parameters."""
184
+ # pylint: disable=import-outside-toplevel
185
+ from datetime import datetime, timedelta
186
+
187
+ start_date = params.get("start_date")
188
+ end_date = params.get("end_date")
189
+
190
+ if not start_date and not end_date:
191
+ params["start_date"] = (datetime.now() - timedelta(days=120)).date()
192
+ params["end_date"] = datetime.now().date()
193
+
194
+ return SecInsiderTradingQueryParams(**params)
195
+
196
+ @staticmethod
197
+ async def aextract_data(
198
+ query: SecInsiderTradingQueryParams,
199
+ credentials: Optional[dict[str, str]],
200
+ **kwargs: Any,
201
+ ) -> list[dict]:
202
+ """Extract the data from the SEC archives."""
203
+ # pylint: disable=import-outside-toplevel
204
+ from openbb_sec.utils.form4 import get_form_4
205
+
206
+ return await get_form_4(
207
+ query.symbol,
208
+ query.start_date,
209
+ query.end_date,
210
+ query.limit,
211
+ query.use_cache,
212
+ )
213
+
214
+ @staticmethod
215
+ def transform_data(
216
+ query: SecInsiderTradingQueryParams,
217
+ data: list[dict],
218
+ **kwargs: Any,
219
+ ) -> list[SecInsiderTradingData]:
220
+ """Transform the data."""
221
+ return [SecInsiderTradingData.model_validate(d) for d in data]
openbb_platform/providers/sec/openbb_sec/models/institutions_search.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Institutions Search Model."""
2
+
3
+ # pylint: disable=unused-argument
4
+
5
+ from typing import Any, Dict, List, Optional, Union
6
+
7
+ from openbb_core.provider.abstract.data import Data
8
+ from openbb_core.provider.abstract.fetcher import Fetcher
9
+ from openbb_core.provider.standard_models.cot_search import CotSearchQueryParams
10
+ from pydantic import Field
11
+
12
+
13
+ class SecInstitutionsSearchQueryParams(CotSearchQueryParams):
14
+ """SEC Institutions Search Query.
15
+
16
+ Source: https://sec.gov/
17
+ """
18
+
19
+ use_cache: Optional[bool] = Field(
20
+ default=True,
21
+ description="Whether or not to use cache.",
22
+ )
23
+
24
+
25
+ class SecInstitutionsSearchData(Data):
26
+ """SEC Institutions Search Data."""
27
+
28
+ __alias_dict__ = {
29
+ "name": "Institution",
30
+ "cik": "CIK Number",
31
+ }
32
+
33
+ name: Optional[str] = Field(
34
+ default=None,
35
+ description="The name of the institution.",
36
+ )
37
+ cik: Optional[Union[str, int]] = Field(
38
+ default=None,
39
+ description="Central Index Key (CIK)",
40
+ )
41
+
42
+
43
+ class SecInstitutionsSearchFetcher(
44
+ Fetcher[
45
+ SecInstitutionsSearchQueryParams,
46
+ List[SecInstitutionsSearchData],
47
+ ]
48
+ ):
49
+ """SEC Institutions Search Fetcher."""
50
+
51
+ @staticmethod
52
+ def transform_query(params: Dict[str, Any]) -> SecInstitutionsSearchQueryParams:
53
+ """Transform the query."""
54
+ return SecInstitutionsSearchQueryParams(**params)
55
+
56
+ @staticmethod
57
+ async def aextract_data(
58
+ query: SecInstitutionsSearchQueryParams,
59
+ credentials: Optional[Dict[str, str]],
60
+ **kwargs: Any,
61
+ ) -> List[Dict]:
62
+ """Return the raw data from the SEC endpoint."""
63
+ # pylint: disable=import-outside-toplevel
64
+ from openbb_sec.utils.helpers import get_all_ciks
65
+
66
+ institutions = await get_all_ciks(use_cache=query.use_cache)
67
+ hp = institutions["Institution"].str.contains(query.query, case=False)
68
+ return institutions[hp].astype(str).to_dict("records")
69
+
70
+ @staticmethod
71
+ def transform_data(
72
+ query: SecInstitutionsSearchQueryParams, data: List[Dict], **kwargs: Any
73
+ ) -> List[SecInstitutionsSearchData]:
74
+ """Transform the data to the standard format."""
75
+ return [SecInstitutionsSearchData.model_validate(d) for d in data]
openbb_platform/providers/sec/openbb_sec/models/latest_financial_reports.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """RSS Latest Financials Model."""
2
+
3
+ # pylint: disable=unused-argument
4
+
5
+ from datetime import date as dateType
6
+ from typing import Any, Optional
7
+
8
+ from openbb_core.app.model.abstract.error import OpenBBError
9
+ from openbb_core.provider.abstract.fetcher import Fetcher
10
+ from openbb_core.provider.standard_models.latest_financial_reports import (
11
+ LatestFinancialReportsData,
12
+ LatestFinancialReportsQueryParams,
13
+ )
14
+ from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS
15
+ from openbb_core.provider.utils.errors import EmptyDataError
16
+ from pydantic import Field, field_validator
17
+
18
+ report_type_choices = [
19
+ "1-K",
20
+ "1-SA",
21
+ "1-U",
22
+ "10-D",
23
+ "10-K",
24
+ "10-KT",
25
+ "10-Q",
26
+ "10-QT",
27
+ "20-F",
28
+ "40-F",
29
+ "6-K",
30
+ "8-K",
31
+ ]
32
+
33
+
34
+ class SecLatestFinancialReportsQueryParams(LatestFinancialReportsQueryParams):
35
+ """SEC Latest Financial Reports Query.
36
+
37
+ source: https://www.sec.gov/edgar/search/
38
+ """
39
+
40
+ __json_schema_extra__ = {
41
+ "report_type": {"multiple_items_allowed": True, "choices": report_type_choices}
42
+ }
43
+
44
+ date: Optional[dateType] = Field(
45
+ default=None,
46
+ description=QUERY_DESCRIPTIONS.get("date", "") + " Defaults to today.",
47
+ )
48
+ report_type: Optional[str] = Field(
49
+ default=None,
50
+ description="Return only a specific form type. Default is all quarterly, annual, and current reports."
51
+ + f" Choices: {', '.join(report_type_choices)}.",
52
+ )
53
+
54
+ @field_validator("report_type", mode="before", check_fields=False)
55
+ @classmethod
56
+ def validate_report_type(cls, v):
57
+ """Validate the report type."""
58
+ if v is None:
59
+ return v
60
+ rpts = v.split(",")
61
+ for rpt in rpts:
62
+ if rpt not in report_type_choices:
63
+ raise ValueError(
64
+ f"Invalid report type: {rpt}. Choices: {', '.join(report_type_choices)}"
65
+ )
66
+ return v
67
+
68
+
69
+ class SecLatestFinancialReportsData(LatestFinancialReportsData):
70
+ """SEC Latest Financial Reports Data."""
71
+
72
+ items: Optional[str] = Field(
73
+ default=None, description="Item codes associated with the filing."
74
+ )
75
+ index_headers: str = Field(description="URL to the index headers file.")
76
+ complete_submission: str = Field(
77
+ description="URL to the complete submission text file."
78
+ )
79
+ metadata: Optional[str] = Field(
80
+ default=None, description="URL to the MetaLinks.json file, if available."
81
+ )
82
+ financial_report: Optional[str] = Field(
83
+ default=None, description="URL to the Financial_Report.xlsx file, if available."
84
+ )
85
+
86
+
87
+ class SecLatestFinancialReportsFetcher(
88
+ Fetcher[SecLatestFinancialReportsQueryParams, list[SecLatestFinancialReportsData]]
89
+ ):
90
+ """SEC Latest Financial Reports Fetcher."""
91
+
92
+ @staticmethod
93
+ def transform_query(params: dict[str, Any]) -> SecLatestFinancialReportsQueryParams:
94
+ """Transform the query params."""
95
+ return SecLatestFinancialReportsQueryParams(**params)
96
+
97
+ @staticmethod
98
+ async def aextract_data(
99
+ query: SecLatestFinancialReportsQueryParams,
100
+ credentials: Optional[dict[str, str]],
101
+ **kwargs: Any,
102
+ ) -> list[dict]:
103
+ """Extract the raw data from the SEC."""
104
+ # pylint: disable=import-outside-toplevel
105
+ from datetime import timedelta # noqa
106
+ from openbb_core.provider.utils.helpers import amake_request
107
+ from warnings import warn
108
+
109
+ today = dateType.today()
110
+ query_date = query.date if query.date is not None else today
111
+
112
+ if query_date.weekday() > 4:
113
+ query_date -= timedelta(days=query_date.weekday() - 4)
114
+
115
+ date = query_date.strftime("%Y-%m-%d")
116
+
117
+ SEARCH_HEADERS = {
118
+ "User-Agent": "my real company name definitelynot@fakecompany.com",
119
+ "Accept-Encoding": "gzip, deflate",
120
+ }
121
+
122
+ forms = (
123
+ query.report_type
124
+ if query.report_type is not None
125
+ else (
126
+ "1-K%2C1-SA%2C1-U%2C1-Z%2C1-Z-W%2C10-D%2C10-K%2C10-KT%2C10-Q%2C10-QT%2C11-K%2C11-KT%2C15-12B%2C15-12G%2C"
127
+ "15-15D%2C15F-12B%2C15F-12G%2C15F-15D%2C18-K%2C20-F%2C24F-2NT%2C25%2C25-NSE%2C40-17F2%2C40-17G%2C40-F%2C"
128
+ "6-K%2C8-K%2C8-K12G3%2C8-K15D5%2CABS-15G%2CABS-EE%2CANNLRPT%2CDSTRBRPT%2CN-30B-2%2CN-30D%2CN-CEN%2CN-CSR%2C"
129
+ "N-CSRS%2CN-MFP%2CN-MFP1%2CN-MFP2%2CN-PX%2CN-Q%2CNSAR-A%2CNSAR-B%2CNSAR-U%2CNT%2010-D%2CNT%2010-K%2C"
130
+ "NT%2010-Q%2CNT%2011-K%2CNT%2020-F%2CQRTLYRPT%2CSD%2CSP%2015D2"
131
+ )
132
+ )
133
+
134
+ def get_url(date, offset):
135
+ return (
136
+ "https://efts.sec.gov/LATEST/search-index?dateRange=custom"
137
+ f"&category=form-cat1&startdt={date}&enddt={date}&forms={forms}&count=100&from={offset}"
138
+ )
139
+
140
+ n_hits = 0
141
+ results: list = []
142
+ url = get_url(date, n_hits)
143
+ try:
144
+ response = await amake_request(url, headers=SEARCH_HEADERS)
145
+ except OpenBBError as e:
146
+ raise OpenBBError(f"Failed to get SEC data: {e}") from e
147
+
148
+ if not isinstance(response, dict):
149
+ raise OpenBBError(
150
+ f"Unexpected data response. Expected dictionary, got {response.__class__.__name__}"
151
+ )
152
+
153
+ hits = response.get("hits", {})
154
+ total_hits = hits.get("total", {}).get("value")
155
+
156
+ if hits.get("hits"):
157
+ results.extend(hits["hits"])
158
+
159
+ n_hits += len(results)
160
+
161
+ while n_hits < total_hits:
162
+ offset = n_hits
163
+ url = get_url(date, offset)
164
+ try:
165
+ response = await amake_request(url, headers=SEARCH_HEADERS)
166
+ except Exception as e:
167
+ warn(f"Failed to get the next page of SEC data: {e}")
168
+ break
169
+
170
+ hits = response.get("hits", {})
171
+ new_results = hits.get("hits", [])
172
+
173
+ if not new_results:
174
+ break
175
+
176
+ results.extend(new_results)
177
+ n_hits += len(new_results)
178
+
179
+ if not results and query.report_type is None:
180
+ raise OpenBBError("No data was returned.")
181
+
182
+ if not results and query.report_type is not None:
183
+ raise EmptyDataError(
184
+ f"No data was returned for form type {query.report_type}."
185
+ )
186
+
187
+ return results
188
+
189
+ @staticmethod
190
+ def transform_data(
191
+ query: SecLatestFinancialReportsQueryParams,
192
+ data: list[dict],
193
+ **kwargs: Any,
194
+ ) -> list[SecLatestFinancialReportsData]:
195
+ """Transform the raw data."""
196
+ results: list[SecLatestFinancialReportsData] = []
197
+
198
+ def parse_entry(entry):
199
+ """Parse each entry in the response."""
200
+ source = entry.get("_source", {})
201
+ ciks = ",".join(source["ciks"]) if source.get("ciks") else None
202
+ display_nammes = source.get("display_names", [])
203
+ names: list = []
204
+ tickers: list = []
205
+ sics = ",".join(source.get("sics", []))
206
+ for name in display_nammes:
207
+ ticker = name.split("(")[1].split(")")[0].strip()
208
+ tickers.append(ticker)
209
+ _name = name.split("(")[0].strip()
210
+ names.append(_name)
211
+
212
+ output: dict = {}
213
+ output["filing_date"] = source.get("file_date")
214
+ output["period_ending"] = source.get("period_ending")
215
+ output["symbol"] = ",".join(tickers).replace(" ", "")
216
+ output["name"] = ",".join(names)
217
+ output["cik"] = ciks
218
+ output["sic"] = sics
219
+ output["report_type"] = source.get("form")
220
+ output["description"] = source.get("file_description")
221
+
222
+ _id = entry.get("_id")
223
+ root_url = (
224
+ "https://www.sec.gov/Archives/edgar/data/"
225
+ + source["ciks"][0]
226
+ + "/"
227
+ + source["adsh"].replace("-", "")
228
+ + "/"
229
+ )
230
+ output["items"] = ",".join(source["items"]) if source.get("items") else None
231
+ output["url"] = root_url + _id.split(":")[1]
232
+ output["index_headers"] = (
233
+ root_url + _id.split(":")[0] + "-index-headers.html"
234
+ )
235
+ output["complete_submission"] = root_url + _id.split(":")[0] + ".txt"
236
+ output["metadata"] = (
237
+ root_url + "MetaLinks.json"
238
+ if output["report_type"].startswith("10-")
239
+ or output["report_type"].startswith("8-")
240
+ else None
241
+ )
242
+ output["financial_report"] = (
243
+ root_url + "Financial_Report.xlsx"
244
+ if output["report_type"].startswith("10-")
245
+ or output["report_type"].startswith("8-")
246
+ or output["report_type"] in ["N-CSR", "QRTLYRPT", "ANNLRPT"]
247
+ else None
248
+ )
249
+ return output
250
+
251
+ # Some duplicates may exist in the data.
252
+ seen = set()
253
+ for entry in data:
254
+ parsed_entry = parse_entry(entry)
255
+ if parsed_entry["url"] not in seen:
256
+ seen.add(parsed_entry["url"])
257
+ results.append(
258
+ SecLatestFinancialReportsData.model_validate(parsed_entry)
259
+ )
260
+
261
+ return results
openbb_platform/providers/sec/openbb_sec/models/management_discussion_analysis.py ADDED
@@ -0,0 +1,1394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Management & Discussion Model."""
2
+
3
+ # pylint: disable=unused-argument,too-many-branches,too-many-locals,too-many-statements,too-many-nested-blocks,too-many-boolean-expressions,too-many-lines
4
+
5
+ from typing import Any, Literal, Optional
6
+
7
+ from openbb_core.app.model.abstract.error import OpenBBError
8
+ from openbb_core.provider.abstract.fetcher import Fetcher
9
+ from openbb_core.provider.standard_models.management_discussion_analysis import (
10
+ ManagementDiscussionAnalysisData,
11
+ ManagementDiscussionAnalysisQueryParams,
12
+ )
13
+ from openbb_core.provider.utils.errors import EmptyDataError
14
+ from pydantic import Field
15
+
16
+
17
+ class SecManagementDiscussionAnalysisQueryParams(
18
+ ManagementDiscussionAnalysisQueryParams
19
+ ):
20
+ """SEC Management & Discussion Query."""
21
+
22
+ strategy: Literal["inscriptis", "trafilatura"] = Field(
23
+ default="trafilatura",
24
+ description="The strategy to use for extracting the text. Default is 'trafilatura'.",
25
+ )
26
+ wrap_length: int = Field(
27
+ default=120,
28
+ description="The length to wrap the extracted text, excluding tables. Default is 120.",
29
+ )
30
+ include_tables: bool = Field(
31
+ default=False,
32
+ description="Return tables formatted as markdown in the text. Default is False."
33
+ + " Tables may reveal 'missing' content,"
34
+ + " but will likely need some level of manual cleaning, post-request, to display properly."
35
+ + " In some cases, tables may not be recoverable due to the nature of the document.",
36
+ )
37
+ use_cache: bool = Field(
38
+ default=True,
39
+ description="When True, the file will be cached for use later. Default is True.",
40
+ )
41
+ raw_html: bool = Field(
42
+ default=False,
43
+ description="When True, the raw HTML content of the entire filing will be returned. Default is False."
44
+ + " Use this option to parse the document manually.",
45
+ )
46
+
47
+
48
+ class SecManagementDiscussionAnalysisData(ManagementDiscussionAnalysisData):
49
+ """SEC Management & Discussion Data."""
50
+
51
+ url: str = Field(
52
+ description="The URL of the filing from which the data was extracted."
53
+ )
54
+
55
+
56
+ class SecManagementDiscussionAnalysisFetcher(
57
+ Fetcher[
58
+ SecManagementDiscussionAnalysisQueryParams, SecManagementDiscussionAnalysisData
59
+ ]
60
+ ):
61
+ """SEC Management & Discussion Fetcher."""
62
+
63
+ @staticmethod
64
+ def transform_query(
65
+ params: dict[str, Any]
66
+ ) -> SecManagementDiscussionAnalysisQueryParams:
67
+ """Transform the query."""
68
+ return SecManagementDiscussionAnalysisQueryParams(**params)
69
+
70
+ @staticmethod
71
+ async def aextract_data(
72
+ query: SecManagementDiscussionAnalysisQueryParams,
73
+ credentials: Optional[dict[str, Any]],
74
+ **kwargs: Any,
75
+ ) -> dict:
76
+ """Extract the data."""
77
+ # pylint: disable=import-outside-toplevel
78
+ from aiohttp_client_cache import SQLiteBackend
79
+ from aiohttp_client_cache.session import CachedSession
80
+ from openbb_core.app.utils import get_user_cache_directory
81
+ from openbb_core.provider.utils.helpers import amake_request
82
+ from openbb_sec.models.company_filings import SecCompanyFilingsFetcher
83
+ from openbb_sec.utils.helpers import SEC_HEADERS, sec_callback
84
+ from pandas import offsets, to_datetime
85
+
86
+ # Get the company filings to find the URL.
87
+
88
+ if query.symbol == "BLK" or query.symbol.isnumeric():
89
+ filings = await SecCompanyFilingsFetcher.fetch_data(
90
+ {
91
+ "cik": "0001364742" if query.symbol == "BLK" else query.symbol,
92
+ "form_type": "10-K,10-Q",
93
+ "use_cache": query.use_cache,
94
+ },
95
+ {},
96
+ )
97
+
98
+ else:
99
+ filings = await SecCompanyFilingsFetcher.fetch_data(
100
+ {
101
+ "symbol": query.symbol,
102
+ "form_type": "10-K,10-Q",
103
+ "use_cache": query.use_cache,
104
+ },
105
+ {},
106
+ )
107
+
108
+ if not filings:
109
+ raise OpenBBError(
110
+ f"Could not find any 10-K or 10-Q filings for the symbol. -> {query.symbol}"
111
+ )
112
+
113
+ # If no calendar year or period is provided, get the most recent filing.
114
+
115
+ target_filing: Any = None
116
+ calendar_year: Any = None
117
+ calendar_period: Any = None
118
+
119
+ if query.calendar_year is None and query.calendar_period is None:
120
+ target_filing = (
121
+ filings[0]
122
+ if not query.calendar_year and not query.calendar_period
123
+ else None
124
+ )
125
+
126
+ if not target_filing:
127
+ if query.calendar_period and not query.calendar_year:
128
+ calendar_year = to_datetime("today").year
129
+ calendar_period = to_datetime("today").quarter
130
+ elif query.calendar_year and query.calendar_period:
131
+ calendar_year = query.calendar_year
132
+ calendar_period = int(query.calendar_period[1])
133
+ elif query.calendar_year:
134
+ calendar_year = query.calendar_year
135
+ calendar_period = 1
136
+
137
+ if query.calendar_year and not query.calendar_period:
138
+ target_filing = [
139
+ f
140
+ for f in filings
141
+ if f.report_type == "10-K"
142
+ and f.filing_date.year == query.calendar_year
143
+ ]
144
+ if not target_filing:
145
+ target_filing = [
146
+ f for f in filings if f.filing_date.year == query.calendar_year
147
+ ]
148
+ if target_filing:
149
+ target_filing = target_filing[0]
150
+
151
+ elif calendar_year and calendar_period:
152
+ start = to_datetime(f"{calendar_year}Q{calendar_period}")
153
+ start_date = (
154
+ start - offsets.QuarterBegin(1) + offsets.MonthBegin(1)
155
+ ).date()
156
+ end_date = (
157
+ start_date + offsets.QuarterEnd(0) - offsets.MonthEnd(0)
158
+ ).date()
159
+
160
+ for filing in filings:
161
+ if start_date < filing.filing_date < end_date:
162
+ target_filing = filing
163
+ break
164
+
165
+ if not target_filing:
166
+ raise OpenBBError(
167
+ f"Could not find a filing for the symbol -> {query.symbol}"
168
+ )
169
+
170
+ url = target_filing.report_url
171
+ response = ""
172
+
173
+ if query.use_cache is True:
174
+ cache_dir = f"{get_user_cache_directory()}/http/sec_financials"
175
+ async with CachedSession(cache=SQLiteBackend(cache_dir)) as session:
176
+ try:
177
+ await session.delete_expired_responses()
178
+ response = await amake_request(
179
+ url,
180
+ headers=SEC_HEADERS,
181
+ response_callback=sec_callback,
182
+ session=session,
183
+ ) # type: ignore
184
+ finally:
185
+ await session.close()
186
+ else:
187
+ response = await amake_request(url, headers=SEC_HEADERS, response_callback=sec_callback) # type: ignore
188
+
189
+ if response and not isinstance(response, str):
190
+ raise OpenBBError(
191
+ f"Unexpected response received. Expected string and got -> {response.__class__.__name__}"
192
+ f" -> {response[:100]}"
193
+ )
194
+
195
+ if isinstance(response, str):
196
+ return {
197
+ "symbol": query.symbol,
198
+ "calendar_year": (
199
+ calendar_year if calendar_year else target_filing.report_date.year
200
+ ),
201
+ "calendar_period": (
202
+ calendar_period
203
+ if calendar_period
204
+ else to_datetime(target_filing.report_date).quarter
205
+ ),
206
+ "period_ending": target_filing.report_date,
207
+ "report_type": target_filing.report_type,
208
+ "url": url,
209
+ "content": response,
210
+ }
211
+
212
+ @staticmethod
213
+ def transform_data( # noqa: PLR0912
214
+ query: SecManagementDiscussionAnalysisQueryParams,
215
+ data: dict,
216
+ **kwargs: Any,
217
+ ) -> SecManagementDiscussionAnalysisData:
218
+ """Transform the data."""
219
+ # pylint: disable=import-outside-toplevel
220
+ import re # noqa
221
+ from inscriptis import get_text
222
+ from inscriptis.model.config import ParserConfig
223
+ from textwrap import wrap
224
+ from trafilatura import extract
225
+ from warnings import warn
226
+
227
+ if query.raw_html is True:
228
+ return SecManagementDiscussionAnalysisData(**data)
229
+
230
+ is_quarterly = data.get("report_type", "").endswith("Q")
231
+ is_inscriptis = query.strategy == "inscriptis"
232
+
233
+ def is_table_header(line: str) -> bool:
234
+ """Check if line is a table header"""
235
+ return (
236
+ (
237
+ all(
238
+ not char.isnumeric()
239
+ for char in line.replace("(", "")
240
+ .replace(")", "")
241
+ .replace(",", "")
242
+ .replace(" ", "")
243
+ .replace("|", "")
244
+ )
245
+ and line.replace("|", "").replace("-", "").strip() != ""
246
+ and "/" not in line
247
+ )
248
+ or all(
249
+ len(str(word).strip()) == 4 and str(word).strip().startswith("20")
250
+ for word in line.split("|")
251
+ if word
252
+ )
253
+ or line.replace("|", "").replace(" ", "").endswith(":")
254
+ or "of dollars" in line.lower()
255
+ )
256
+
257
+ def insert_cell_dividers(line):
258
+ cells = line.strip().split("|")
259
+ new_cells: list = []
260
+ for cell in cells:
261
+ cell = cell.replace("$", "").replace(" % ", "").replace("%", "") # noqa
262
+ if (
263
+ "par value" in cell.lower()
264
+ or "shares" in cell.lower()
265
+ or (" %-" in cell and "notes" in cell.lower())
266
+ or "as of" in cell.lower()
267
+ or "of dollars" in cell.lower()
268
+ or "year" in cell.lower()
269
+ or "scenario" in cell.lower()
270
+ or " to " in cell.lower()
271
+ or "section" in cell.lower()
272
+ or "title" in cell.lower()
273
+ or "adverse currency fluctuation" in cell.lower()
274
+ or "vs" in cell.lower()
275
+ or cell.strip().endswith(",")
276
+ ):
277
+ new_cells.append(cell)
278
+ continue
279
+ if "Form 10-" in cell:
280
+ continue
281
+ new_cell = cell.strip()
282
+ if new_cell.endswith(("-", "—", "–")) and any(
283
+ c.isalpha() for c in new_cell
284
+ ):
285
+ # Remove the dash and insert a divider before it
286
+ new_cell = re.sub(r"[—\-–]+$", "", new_cell).strip() + " | —"
287
+ elif (
288
+ re.search("[A-Za-z]", new_cell)
289
+ and re.search("[0-9]", new_cell)
290
+ and re.search(r"[A-Za-z]\s+[0-9]", new_cell)
291
+ and "thru" not in new_cell.lower()
292
+ and "through" not in new_cell.lower()
293
+ and "outstanding" not in new_cell.lower()
294
+ and "Tier" not in new_cell
295
+ and "%" not in new_cell
296
+ and "$" not in new_cell
297
+ and "in" not in new_cell
298
+ and "year" not in new_cell
299
+ and "scenario" not in new_cell
300
+ ):
301
+ # Handle cases with spaces between letters and numbers
302
+ new_cell = re.sub(
303
+ r"(?<=[A-Za-z])\s+(?=[0-9])(?!\([a-zA-Z])", " |", new_cell
304
+ )
305
+ new_cell = re.sub(
306
+ r"(?<=[A-Za-z])(?=[0-9])(?!\([a-zA-Z])", "|", new_cell
307
+ )
308
+ # Insert divider between consecutive numbers, excluding number(letter)
309
+ if (
310
+ re.search(
311
+ r"(\(\d+\.?\d*\)|\d+\.?\d*)\s+(\(\d+\.?\d*\)|\d+\.?\d*)",
312
+ new_cell,
313
+ )
314
+ and "versus" not in new_cell.lower()
315
+ and "thru" not in new_cell.lower()
316
+ and "through" not in new_cell.lower()
317
+ and not re.search(r"\d+\.?\d*\([a-zA-Z]\)", new_cell)
318
+ ):
319
+ new_cell = re.sub(
320
+ r"(\(\d+\)|\d+(?:\.\d+)?)\s+(?=\(|\d)(?!\([a-zA-Z])",
321
+ r"\1|",
322
+ new_cell,
323
+ )
324
+ new_cells.append(new_cell)
325
+ return "|".join(new_cells)
326
+
327
+ def process_extracted_text( # noqa: PLR0912
328
+ extracted_text: str, is_inscriptis: bool
329
+ ) -> list:
330
+ """Process extracted text"""
331
+
332
+ new_lines: list = []
333
+ starting_line = "Item 2."
334
+ annual_start = "Item 7."
335
+ ending_line = "Item 6"
336
+ annual_end = "Item 8. "
337
+ found_start = False
338
+ at_end = False
339
+ previous_line = ""
340
+ start_line_text = ""
341
+ line_i = 0
342
+ extracted_lines = extracted_text.splitlines()
343
+
344
+ for line in extracted_lines:
345
+ line_i += 1
346
+ if (
347
+ not line.strip()
348
+ or line.replace("|", "")
349
+ .strip()
350
+ .startswith(("Page ", "Table of Contents"))
351
+ or line.strip() in ("|", start_line_text)
352
+ or (len(line) < 3 and line.isnumeric())
353
+ or line.strip().replace("_", "").replace("**", "") == ""
354
+ ):
355
+ continue
356
+
357
+ if (
358
+ "Discussion and Analysis of Financial Condition and Results of Operations is presented in".lower()
359
+ in line.lower()
360
+ ):
361
+ annual_end = "PART IV"
362
+ elif (
363
+ "see the information under" in line.lower()
364
+ and "discussion and analysis" in line.lower()
365
+ ) and (
366
+ (is_quarterly and "10-K" not in line)
367
+ or (not is_quarterly and "10-Q" not in line)
368
+ ):
369
+ annual_end = "statements of consolidated"
370
+ ending_line = "statements of conslidated"
371
+
372
+ if (
373
+ (
374
+ line.strip()
375
+ .lower()
376
+ .startswith(
377
+ (
378
+ starting_line.lower(),
379
+ annual_start.lower(),
380
+ )
381
+ )
382
+ and "management" in line.lower()
383
+ )
384
+ or (
385
+ line.replace("|", "")
386
+ .lstrip(" ")
387
+ .lower()
388
+ .startswith("the following is management")
389
+ and "discussion and analysis of" in line.lower()
390
+ )
391
+ or (
392
+ line.endswith(
393
+ " “Management’s Discussion and Analysis of Financial Condition and Results of Operations” "
394
+ "below."
395
+ )
396
+ )
397
+ or (
398
+ line.replace("*", "").strip().lower().startswith("item")
399
+ and line.replace("*", "")
400
+ .replace(".", "")
401
+ .strip()
402
+ .lower()
403
+ .endswith(
404
+ "discussion and analysis of financial condition and results of operations"
405
+ )
406
+ )
407
+ # Section may be in a nested table.
408
+ or (
409
+ line.replace("*", "")
410
+ .replace("|", "")
411
+ .strip()
412
+ .lower()
413
+ .startswith("item")
414
+ and line.replace("*", "")
415
+ .replace("|", "")
416
+ .replace(".", "")
417
+ .rstrip(" ")
418
+ .lower()
419
+ .endswith(
420
+ "discussion and analysis of financial condition and results of operations"
421
+ )
422
+ and line_i > 200
423
+ )
424
+ or (
425
+ line.replace("*", "").replace("|", "").strip().lower()
426
+ == "financial review"
427
+ and line_i > 200
428
+ )
429
+ or (
430
+ line.replace("*", "")
431
+ .replace("|", "")
432
+ .replace(".", "")
433
+ .strip()
434
+ .lower()
435
+ .endswith(
436
+ (
437
+ "discussion and analysis",
438
+ "discussion and analysis of",
439
+ "analysis of financial",
440
+ "of financial condition",
441
+ )
442
+ )
443
+ and extracted_lines[line_i + 1]
444
+ .replace("|", "")
445
+ .replace(".", "")
446
+ .strip()
447
+ .lower()
448
+ .endswith(("financial condition", "results of operations"))
449
+ )
450
+ or (
451
+ line.replace("|", "").replace(".", "").strip()
452
+ == "Management’s Discussion and Analysis of Financial Condition and Results of Operations"
453
+ )
454
+ or (
455
+ line
456
+ in [
457
+ "2. MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS",
458
+ "7. MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS",
459
+ "Items 2. and 3. Management’s Discussion and Analysis of Financial Condition and "
460
+ "Results of Operations; Quantitative and Qualitative Disclosures about Market Risk",
461
+ "MANAGEMENT'S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS |",
462
+ "Item 2. Management’s Discussion and Analysis of Financial Condition and Results of Operations.", # noqa
463
+ "Item 7. Management’s Discussion and Analysis of Financial Condition and Results of Operations.", # noqa
464
+ "MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS",
465
+ "Management's Discussion and Analysis of Financial Condition and Results of Operations",
466
+ "MANAGEMENT’S DISCUSSION AND ANALYSIS OF THE FINANCIAL CONDITION AND RESULTS OF",
467
+ "MANAGEMENT'S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS",
468
+ "Part I. Item 2. Management’s Discussion and Analysis of Financial Condition and Results of Operations", # noqa
469
+ "MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS (“MD&A”)", # noqa
470
+ "ITEM 7 – MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS (MD&A)", # noqa
471
+ "ITEM 2 – MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS (MD&A)", # noqa
472
+ "Part II. Item 7. Management’s Discussion and Analysis of Financial Condition and Results of Operations", # noqa # pylint: disable=line-too-long
473
+ "| Item 2. | |",
474
+ "| Item 7. | |",
475
+ ]
476
+ )
477
+ or line.startswith(
478
+ "Item 7—Management's Discussion and Analysis of Financial Conditions"
479
+ )
480
+ or (
481
+ line.startswith(
482
+ "MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS (MD&A)"
483
+ )
484
+ and line_i > 200
485
+ )
486
+ or (
487
+ line.replace("|", "").strip()
488
+ == "Management's Discussion and Analysis"
489
+ and line_i > 300
490
+ )
491
+ or (
492
+ line.replace("|", "")
493
+ .strip()
494
+ .startswith(
495
+ "The following discussion and analysis of the financial condition and results of operations"
496
+ )
497
+ )
498
+ ):
499
+ line = line.replace("|", "").replace("*", "") # noqa
500
+ if line.strip(" ")[-1].isnumeric():
501
+ continue
502
+
503
+ if (
504
+ extracted_lines[line_i + 1]
505
+ .replace("*", "")
506
+ .replace(".", "")
507
+ .strip()
508
+ .lower()
509
+ .endswith(("financial condition", "results of operations"))
510
+ ):
511
+ line = "Management’s Discussion and Analysis of Financial Condition and Results of Operations" # noqa
512
+ _ = extracted_lines.pop(line_i + 1)
513
+ found_start = True
514
+ at_end = False
515
+ start_line_text = line
516
+ new_lines.append(
517
+ "# **MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS (MD&A)"
518
+ "**\n\n"
519
+ )
520
+ continue
521
+
522
+ if (
523
+ found_start
524
+ and (
525
+ line.replace("|", "")
526
+ .strip()
527
+ .lower()
528
+ .startswith(ending_line.lower())
529
+ and is_quarterly
530
+ )
531
+ or (
532
+ annual_end.lower() in line.lower()
533
+ and not is_quarterly
534
+ and len(new_lines) > 20
535
+ )
536
+ or line.replace("|", "").strip().lower().startswith("signatures")
537
+ or line.strip().startswith(
538
+ "Item 8—Financial Statements and Supplementary Data"
539
+ )
540
+ or line.strip().startswith("MANAGEMENT AND AUDITOR’S REPORTS")
541
+ or line == "EXHIBIT INDEX"
542
+ ):
543
+ at_end = True
544
+ line = line.replace("|", " ").replace(" ", " ") # noqa
545
+
546
+ if found_start and not at_end:
547
+ if (
548
+ line[0].isdigit()
549
+ or line[0] == "•"
550
+ or line[0] == "●"
551
+ and line[1] not in [".", " ", "\u0020"]
552
+ and line[1].isalpha()
553
+ ):
554
+ word = line.split(" ")[0]
555
+ if not word.replace(" ", "").isnumeric():
556
+ line = line[0] + " " + line[1:] # noqa
557
+
558
+ if "▪" in line:
559
+ line = line.replace("▪", "").replace("|", "").strip() # noqa
560
+ line = "- " + line # noqa
561
+
562
+ if "●" in line or "•" in line or "◦" in line:
563
+ line = ( # noqa
564
+ line.replace("|", "")
565
+ .replace("●", "-")
566
+ .replace("•", "-")
567
+ .replace("◦", "-")
568
+ )
569
+
570
+ if (
571
+ line.replace("|", "").strip().startswith("-")
572
+ and len(line.strip()) > 1
573
+ and line.strip()[1] != " "
574
+ ):
575
+ line = "- " + line[1:] # noqa
576
+
577
+ if "the following table" in line.lower():
578
+ line = ( # noqa
579
+ line.replace("|", "").replace(" ", " ").strip() + "\n"
580
+ )
581
+
582
+ if (
583
+ line.replace("|", "").replace(" ", "").strip().startswith("(")
584
+ and (
585
+ line.replace("|", "").replace(" ", "").strip().endswith(")")
586
+ )
587
+ and line.count("|") < 3
588
+ ):
589
+ line = line.replace("|", "").replace(" ", "").strip() # noqa
590
+ next_line = (
591
+ extracted_lines[line_i + 1]
592
+ if line_i + 1 < len(extracted_lines)
593
+ else ""
594
+ )
595
+ if not next_line.replace("|", "").replace(" ", "").strip():
596
+ next_line = (
597
+ extracted_lines[line_i + 2]
598
+ if line_i + 2 < len(extracted_lines)
599
+ else ""
600
+ )
601
+ if line_i + 1 < len(extracted_lines):
602
+ _ = extracted_lines.pop(line_i + 1)
603
+ if (
604
+ next_line.replace("|", "")
605
+ .replace(" ", "")
606
+ .strip()
607
+ .endswith((",", ";", "."))
608
+ ):
609
+ line = ( # noqa
610
+ line.replace("|", "").replace(" ", "").strip()
611
+ + " "
612
+ + next_line.replace("|", "").strip()
613
+ )
614
+ _ = extracted_lines.pop(line_i + 1)
615
+
616
+ if "|" in line:
617
+ first_word = line.split("|")[0].strip()
618
+ if first_word.isupper() or "item" in first_word.lower():
619
+ line = ( # noqa
620
+ line.replace("|", " ").replace(" ", " ").strip()
621
+ )
622
+
623
+ if (
624
+ line.endswith("|")
625
+ and not line.startswith("|")
626
+ and len(line) > 1
627
+ ):
628
+ line = ( # noqa
629
+ "| " + line
630
+ if len(line.split("|")) > 1
631
+ else line.replace("|", "").strip()
632
+ )
633
+ elif (
634
+ line.startswith("|")
635
+ and not line.endswith("|")
636
+ and len(line) > 1
637
+ and len(line.split("|"))
638
+ ):
639
+ line = ( # noqa
640
+ line + " |"
641
+ if len(line.split("|")) > 1
642
+ else line.replace("|", "").strip()
643
+ )
644
+
645
+ if query.include_tables is False and "|" in line:
646
+ continue
647
+
648
+ if (
649
+ "page" in line.replace("|", "").lower()
650
+ or "form 10-" in line.lower()
651
+ ):
652
+ continue
653
+
654
+ if "$" in line:
655
+ line = line.replace("$ |", "").replace("| |", "|") # noqa
656
+ elif "%" in line:
657
+ line = line.replace("% |", "").replace("| |", "|") # noqa
658
+
659
+ if "|" not in previous_line and all(
660
+ char == "|" for char in line.replace(" ", "")
661
+ ):
662
+ line = ( # noqa
663
+ line
664
+ + "\n"
665
+ + line.replace(" ", "")
666
+ .replace(" ", "")
667
+ .replace(" ", "")
668
+ .replace(" ", ":------:")
669
+ )
670
+
671
+ else:
672
+ is_header = is_table_header(line)
673
+ is_multi_header = (
674
+ "months ended" in line.lower()
675
+ or "year ended" in line.lower()
676
+ or "quarter ended" in line.lower()
677
+ or "change" in line.lower()
678
+ or line.strip().endswith(",")
679
+ )
680
+ is_date = (
681
+ ", 20" in line
682
+ and "through" not in line.lower()
683
+ and "thru" not in line.lower()
684
+ and "from" not in line.lower()
685
+ ) or (
686
+ "20" in line
687
+ and all(
688
+ len(d.strip()) == 4 for d in line.split("|") if d
689
+ )
690
+ )
691
+ if is_header or is_date or is_multi_header:
692
+ line = ( # noqa
693
+ line.replace(" | | ", " | ")
694
+ .replace(" | |", " | ")
695
+ .replace("| % |", "")
696
+ .replace("| $ |", "")
697
+ .replace("|$ |", "")
698
+ )
699
+ if is_header:
700
+ line = "| " + line # noqa
701
+ else:
702
+ line = ( # noqa
703
+ line.replace("| $ | ", "")
704
+ .replace("| % |", "")
705
+ .replace(" ", "|")
706
+ .replace("|$ |", "")
707
+ )
708
+ if not line.strip().startswith("|"):
709
+ line = "| " + line # noqa
710
+ line = insert_cell_dividers(line) # noqa
711
+ line = ( # noqa
712
+ line.replace(" | | ", " | ")
713
+ .replace(" | |", " |")
714
+ .replace("||", "|")
715
+ .replace("||", "|")
716
+ .replace(" | | | ", " | ")
717
+ .replace(" | | |", "|")
718
+ )
719
+ if line[-1] != "|":
720
+ line = line + "|" # noqa
721
+
722
+ previous_line = new_lines[-1]
723
+ next_line = extracted_lines[line_i + 1]
724
+
725
+ if "|" in previous_line and not line.strip():
726
+ continue
727
+
728
+ if (
729
+ "|" in previous_line
730
+ and "|" in next_line
731
+ and not line.strip("\n").replace(" ", "")
732
+ ):
733
+ continue
734
+
735
+ if (
736
+ "|" in previous_line
737
+ and "|" not in next_line
738
+ and "|" in extracted_lines[line_i + 2]
739
+ and not line.strip()
740
+ ):
741
+ line_i += 1
742
+ continue
743
+
744
+ if (
745
+ "|" in previous_line
746
+ and "|" in next_line
747
+ and not line.strip("\n").replace(" ", "")
748
+ ):
749
+ continue
750
+ if (
751
+ "|" in previous_line
752
+ and "|" not in next_line
753
+ and "|" in extracted_lines[line_i + 2]
754
+ and not line.strip()
755
+ ):
756
+ line_i += 1
757
+ continue
758
+
759
+ if is_inscriptis is True:
760
+ if (
761
+ "|:-" in previous_line
762
+ and "|" in extracted_lines[line_i + 1]
763
+ and line.strip()
764
+ and not line.strip().startswith("|")
765
+ ):
766
+ line = "|" + line # noqa
767
+ if not line.strip().endswith("|"):
768
+ line = line + "|" # noqa
769
+
770
+ line = ( # noqa
771
+ line.replace("||||", "|")
772
+ .replace("|||", "|")
773
+ .replace("| |", "")
774
+ .replace("| | |", "|")
775
+ .replace("| |", "|")
776
+ .replace(" ", "")
777
+ .replace("||", "|")
778
+ .replace("|%|", "")
779
+ .replace("|% |", "")
780
+ .replace("|$|", "")
781
+ .replace("|$ |", "")
782
+ .replace("|)", ")")
783
+ .replace(" )", ")")
784
+ .replace(" )", ")")
785
+ .replace("| | |", "|")
786
+ .replace("| |", "|")
787
+ .replace(" | | ", "|")
788
+ .replace("| |", "|")
789
+ )
790
+ if (
791
+ "months ended" in line.lower()
792
+ or "year ended" in line.lower()
793
+ or "quarter ended" in line.lower()
794
+ or "weeks ended" in line.lower()
795
+ and "|" not in line
796
+ and "|" in previous_line
797
+ ):
798
+ line = "|" + line # noqa
799
+
800
+ if line not in ["||", "| |"]:
801
+ new_lines.append(line)
802
+ previous_line = line
803
+ else:
804
+ if (
805
+ "|" in previous_line
806
+ and "|" in extracted_lines[line_i + 1]
807
+ and not line.strip()
808
+ ):
809
+ continue
810
+
811
+ if is_inscriptis is True and ". " in line:
812
+ line = line.replace(". ", ".\n\n") # noqa
813
+ elif is_inscriptis is True and ". " in line:
814
+ line = line.replace(". ", ".\n\n") # noqa
815
+
816
+ if " ." in line:
817
+ line = line.replace(" .", ".") # noqa
818
+
819
+ if "|" in previous_line:
820
+ new_lines.extend(
821
+ ["\n"] + wrap(line, width=query.wrap_length) + ["\n"]
822
+ )
823
+ elif line.strip().startswith("-"):
824
+ new_lines.extend([line] + ["\n"])
825
+ else:
826
+ new_lines.extend(
827
+ wrap(line, width=query.wrap_length) + ["\n"]
828
+ )
829
+ previous_line = line
830
+
831
+ return new_lines
832
+
833
+ # Do a first pass, and if extraction fails we can identify where the problem originates.
834
+
835
+ def try_inscriptis(filing_str):
836
+ """Try using Inscriptis instead."""
837
+ extracted_text = get_text(
838
+ filing_str,
839
+ config=ParserConfig(
840
+ table_cell_separator="|",
841
+ ),
842
+ )
843
+ extracted_lines = []
844
+ for line in extracted_text.splitlines():
845
+ if not line.strip():
846
+ continue
847
+ extracted_lines.append(
848
+ line.strip()
849
+ .replace(" , ", ", ")
850
+ .replace(" . ", ". ")
851
+ .replace(" .", ".")
852
+ .replace(" ’ ", "'")
853
+ .replace(" ' ", "'")
854
+ .replace("“ ", "“")
855
+ .replace(" ”", "”")
856
+ .replace("o f", "of")
857
+ .replace("a n", "an")
858
+ .replace("in crease", "increase")
859
+ )
860
+
861
+ return process_extracted_text("\n".join(extracted_lines), True)
862
+
863
+ filing_str = data.get("content", "")
864
+
865
+ if query.strategy == "trafilatura":
866
+ extracted_text = extract(
867
+ filing_str,
868
+ include_tables=True,
869
+ include_comments=True,
870
+ include_formatting=True,
871
+ include_images=True,
872
+ include_links=False,
873
+ )
874
+ new_lines = process_extracted_text(extracted_text, False)
875
+
876
+ if not new_lines:
877
+ warn("Trafilatura extraction failed, trying Inscriptis.")
878
+ new_lines = try_inscriptis(filing_str)
879
+ is_inscriptis = True
880
+
881
+ else:
882
+ new_lines = try_inscriptis(filing_str)
883
+
884
+ if not new_lines:
885
+ raise EmptyDataError(
886
+ "No content was found in the filing, likely a parsing error from unreachable content."
887
+ f" -> {data['url']}"
888
+ " -> The content can be analyzed by inspecting"
889
+ " the output of `SecManagementDiscussionAnalysisFetcher.aextract_data`,"
890
+ " or by setting `raw_html=True` in the query."
891
+ )
892
+
893
+ # Second pass - clean up document
894
+
895
+ def is_title_case(line: str) -> bool:
896
+ """Check if line follows financial document title case patterns"""
897
+ if (
898
+ line.strip().startswith("-")
899
+ or line.strip().endswith(".")
900
+ or line.strip().endswith(",")
901
+ or "“" in line
902
+ or line.endswith("-")
903
+ or line.lower().endswith("ended")
904
+ ):
905
+ return False
906
+
907
+ if line.istitle() and not line.endswith(".") and not line.startswith("-"):
908
+ return True
909
+
910
+ if (
911
+ line.strip().endswith(",")
912
+ or line.strip().startswith("-")
913
+ or line.strip().endswith(".")
914
+ ):
915
+ return False
916
+
917
+ if (
918
+ "|" not in line
919
+ and line.strip().isupper()
920
+ and len(line.strip()) > 1
921
+ and line[-1].isalpha()
922
+ or line.strip().startswith("Item")
923
+ or line.strip().startswith("ITEM")
924
+ ):
925
+ return True
926
+
927
+ return (
928
+ line.replace("(", "")
929
+ .replace(")", "")
930
+ .replace(",", "")
931
+ .replace(" and ", " And ")
932
+ .replace(" of ", " Of ")
933
+ .replace(" the ", " The ")
934
+ .replace(" vs ", " VS ")
935
+ .replace(" in ", " In ")
936
+ .replace(" to ", " To ")
937
+ .replace(" for ", " For ")
938
+ .replace(" with ", " With ")
939
+ .replace(" on ", " On ")
940
+ .replace(" at ", " At ")
941
+ .replace(" from ", " From ")
942
+ .replace(" by ", " By ")
943
+ ).istitle()
944
+
945
+ def count_columns_in_data_row(data_row: str) -> int:
946
+ """Count actual columns from first data row"""
947
+ return len(list(data_row.split("|"))) - 2
948
+
949
+ def pad_row_columns(row: str, target_cols: int) -> str:
950
+ """Pad a table row with empty cells to match target column count"""
951
+ cells = row.split("|")
952
+ current_cols = len(cells) - 2 # Exclude outer pipes
953
+
954
+ if current_cols < target_cols:
955
+ # Add empty cells
956
+ if (
957
+ is_table_header(row)
958
+ and row.replace("|", "").replace(" ", "").endswith(":")
959
+ or (
960
+ row.replace("|", "").replace(" ", "").endswith(")")
961
+ and row.replace("|", "").replace(" ", "")[0].isalpha()
962
+ and len(row.split("|")) < 3
963
+ )
964
+ and not (
965
+ "20" in row and all(len(d) == 4 for d in row.split("|") if d)
966
+ )
967
+ ):
968
+ cells = [c for c in cells if c.strip()] + [
969
+ " " for _ in range(target_cols - current_cols - 2)
970
+ ]
971
+ return "|" + "|".join(cells)
972
+ cells = [" " for _ in range(target_cols - current_cols - 2)] + cells
973
+
974
+ return "|".join(cells)
975
+
976
+ def process_document( # noqa: PLR0912
977
+ document: list[str], is_inscriptis: bool
978
+ ) -> list[str]:
979
+ """Clean up document lines"""
980
+ cleaned_lines: list = []
981
+ i = 0
982
+ max_cols = 0
983
+
984
+ while i < len(document):
985
+ current_line = document[i]
986
+ if (
987
+ "|" in document[i - 1]
988
+ and i - 1 > 1
989
+ and i + 1 <= len(document)
990
+ and i + 1 < len(document)
991
+ and "|" in document[i + 1]
992
+ ) and (
993
+ current_line == "" or current_line.replace("|", "").strip() == ""
994
+ ):
995
+ i += 1
996
+ continue
997
+
998
+ if is_inscriptis is True and "|" not in current_line:
999
+ current_line = current_line.replace(" ", " ")
1000
+
1001
+ if is_inscriptis is True and "-::-" in current_line:
1002
+ current_line = (
1003
+ current_line.replace(":------::", "")
1004
+ .replace("::------:", "")
1005
+ .replace("::------::", "")
1006
+ .replace(" ", "")
1007
+ ).strip()
1008
+ if (
1009
+ is_inscriptis is True
1010
+ and "|:-" in current_line
1011
+ and "|" not in document[i - 1]
1012
+ ):
1013
+ cleaned_lines.append("| " * current_line.count("|"))
1014
+
1015
+ if is_inscriptis is True and "|" in document[i - 1]:
1016
+ if current_line.strip() in [
1017
+ '""',
1018
+ "",
1019
+ " ",
1020
+ "\n",
1021
+ "|",
1022
+ "| | | | |",
1023
+ "| | |",
1024
+ ]:
1025
+ _ = document.pop(i)
1026
+ continue
1027
+
1028
+ current_line = current_line.replace(" ", " ")
1029
+
1030
+ if (
1031
+ current_line.strip().startswith("(inmillions")
1032
+ and "|" not in current_line
1033
+ ):
1034
+ current_line = "|" + current_line
1035
+
1036
+ if (
1037
+ current_line.strip().startswith("|:-")
1038
+ and current_line[-1] != "|"
1039
+ ):
1040
+ current_line = current_line + "|"
1041
+
1042
+ if (
1043
+ "in the preceding table" in current_line.lower()
1044
+ or "in the table above" in current_line.lower()
1045
+ or "the following tables present" in current_line.lower()
1046
+ and "|" in document[i - 1]
1047
+ ):
1048
+ cleaned_lines.append("\n")
1049
+ current_line = "\n" + current_line.replace("|", "").strip()
1050
+
1051
+ if (
1052
+ current_line.startswith("# ")
1053
+ and "|" not in current_line
1054
+ and "|" in document[i - 1]
1055
+ ):
1056
+ current_line = "|" + current_line.replace("# ", " *")
1057
+ cleaned_lines.append(current_line)
1058
+ i += 1
1059
+ continue
1060
+
1061
+ if (
1062
+ "|" in document[i - 1]
1063
+ and len(current_line) > 1
1064
+ and "|" not in current_line
1065
+ and current_line.replace(")", "")[-1].isnumeric()
1066
+ ):
1067
+ current_line = "|" + current_line + " |"
1068
+
1069
+ if (
1070
+ current_line.strip()
1071
+ and current_line.strip().startswith("-")
1072
+ and current_line.strip().endswith("-")
1073
+ and len(current_line.strip().replace("-", "").replace(" ", "")) < 4
1074
+ and current_line.strip()
1075
+ .replace("-", "")
1076
+ .replace(" ", "")
1077
+ .isnumeric()
1078
+ ):
1079
+ i += 1
1080
+ continue
1081
+ if "![" in current_line:
1082
+ image_file = (
1083
+ current_line.split("]")[1].replace("(", "").replace(")", "")
1084
+ )
1085
+ base_url = data["url"].rsplit("/", 1)[0]
1086
+ image_url = f"{base_url}/{image_file}"
1087
+ cleaned_lines.append(f"![Graphic]({image_url})")
1088
+ i += 1
1089
+ continue
1090
+
1091
+ if current_line.strip() == "| | o |":
1092
+ i += 1
1093
+ current_line = "- " + document[i].replace("|", "").strip()
1094
+ cleaned_lines.append(current_line)
1095
+ i += 1
1096
+ continue
1097
+ if current_line.strip() == ":------:":
1098
+ i += 1
1099
+ continue
1100
+ if current_line.count("|") < 3:
1101
+ current_line = (
1102
+ current_line.replace("|", "").replace(":------:", "").strip()
1103
+ )
1104
+ cleaned_lines.append(current_line)
1105
+ i += 1
1106
+ continue
1107
+
1108
+ next_line = document[i + 1] if i + 1 < len(document) else ""
1109
+
1110
+ if next_line.replace("**", "").strip() == "AND RESULTS OF OPERATIONS":
1111
+ current_line = (
1112
+ "**"
1113
+ + current_line.replace("**", "").replace("\n", "").strip()
1114
+ + " "
1115
+ + "AND RESULTS OF OPERATIONS"
1116
+ + "**"
1117
+ )
1118
+ _ = document.pop(i + 1)
1119
+ cleaned_lines.append(current_line)
1120
+ i += 1
1121
+ continue
1122
+
1123
+ previous_line = document[i - 1] if i > 0 else ""
1124
+
1125
+ if current_line.strip() in (
1126
+ "--",
1127
+ "-",
1128
+ "|:------:|",
1129
+ "||",
1130
+ "| |",
1131
+ ":------:",
1132
+ ):
1133
+ if not next_line.strip() or next_line == current_line:
1134
+ i += 2
1135
+ continue
1136
+ i += 1
1137
+ continue
1138
+
1139
+ if "| :-" in current_line:
1140
+ current_line = current_line.replace(" :- ", ":-")
1141
+
1142
+ if "|:-" in current_line and not current_line.strip().endswith("|"):
1143
+ current_line = current_line + "|"
1144
+
1145
+ if (
1146
+ not current_line.strip()
1147
+ and "|" in document[i - 1]
1148
+ and "|" in document[i + 1]
1149
+ ):
1150
+ continue
1151
+
1152
+ if (
1153
+ query.include_tables is False
1154
+ and "|" in current_line
1155
+ and "|" not in document[i - 1]
1156
+ ):
1157
+ current_line = current_line.replace("|", "")
1158
+
1159
+ if current_line.startswith(" -"):
1160
+ current_line = "- " + current_line[2:]
1161
+
1162
+ if (
1163
+ current_line.startswith(("(", "["))
1164
+ and current_line.endswith((")", "]"))
1165
+ and len(current_line) < 4
1166
+ ):
1167
+ current_line = current_line.replace("[", "(").replace("]", ")")
1168
+ dead_line = True
1169
+ new_i = i
1170
+ while dead_line is True:
1171
+ new_i += 1
1172
+ next_line = document[new_i]
1173
+ if next_line.replace("|", "").strip():
1174
+ dead_line = False
1175
+ break
1176
+
1177
+ next_line = next_line.replace("|", "").rstrip()
1178
+
1179
+ if document[new_i + 1].replace("|", "").rstrip() == next_line:
1180
+ new_i += 1
1181
+
1182
+ current_line = (
1183
+ current_line
1184
+ + " "
1185
+ + next_line.replace("|", "").strip().rstrip(" ")
1186
+ ).strip()
1187
+ i = new_i
1188
+ previous_line = document[i - 1]
1189
+
1190
+ if (
1191
+ current_line.replace("|", "").strip().startswith("-")
1192
+ and current_line[1] != " "
1193
+ ):
1194
+ current_line = current_line.replace("|", "").replace("-", "- ")
1195
+
1196
+ if (
1197
+ "|" in current_line
1198
+ and "|" in previous_line
1199
+ and "|" in next_line
1200
+ and "|:-" not in next_line
1201
+ and current_line.replace(" ", "").replace("|", "") == ""
1202
+ ):
1203
+ i += 1
1204
+ continue
1205
+
1206
+ if query.include_tables is False and "|" in current_line:
1207
+ i += 1
1208
+ continue
1209
+
1210
+ # Fix table header rows with missing dividers.
1211
+ # We can't fix all tables, but this helps with some.
1212
+
1213
+ if (
1214
+ "|" in current_line
1215
+ and "|" not in previous_line
1216
+ and "|:-" not in next_line
1217
+ ) and current_line.count("|") > 2:
1218
+ n_bars = current_line.replace(" | | ", "|").count("|")
1219
+ inserted_line = ("|:------:" * (n_bars - 2)) + "|"
1220
+
1221
+ document.insert(
1222
+ i + 1,
1223
+ inserted_line.replace(":------:", " ").strip()[1:-2],
1224
+ )
1225
+ document.insert(i + 2, inserted_line)
1226
+ current_line = current_line.replace("|", "").lstrip(" ") + "\n"
1227
+
1228
+ elif (
1229
+ "|:-" in current_line
1230
+ and "|" not in previous_line
1231
+ and "|" in next_line
1232
+ ):
1233
+ inserted_line = current_line.replace("-", "").replace("::", " ")
1234
+
1235
+ if previous_line.strip():
1236
+ inserted_line = "\n" + inserted_line
1237
+
1238
+ document.insert(i - 1, inserted_line)
1239
+ cleaned_lines.append(inserted_line)
1240
+
1241
+ if current_line.startswith("|:-") and not current_line.strip().endswith(
1242
+ "|"
1243
+ ):
1244
+ current_line = current_line + "|"
1245
+
1246
+ # Detect table by empty header pattern
1247
+ if (
1248
+ i + 2 < len(document)
1249
+ and "|" in current_line
1250
+ and all(not cell.strip() for cell in current_line.split("|")[1:-1])
1251
+ and ":---" in document[i + 1]
1252
+ ):
1253
+ table_i = i + 2
1254
+ max_cols = 0
1255
+ # First pass - find max columns
1256
+ while table_i < len(document):
1257
+ if "|" not in document[table_i]:
1258
+ break
1259
+ row = document[table_i].strip()
1260
+ if row and row != "|":
1261
+ cols = count_columns_in_data_row(row)
1262
+ max_cols = max(max_cols, cols)
1263
+ table_i += 1
1264
+
1265
+ # Fix empty header row
1266
+ header_line = (
1267
+ "| " + " | ".join([" " for _ in range(max_cols)]) + " |"
1268
+ )
1269
+ cleaned_lines.append(header_line)
1270
+
1271
+ # Fix separator row
1272
+ separator_line = (
1273
+ "|" + "|".join([":------:" for _ in range(max_cols)]) + "|"
1274
+ )
1275
+ cleaned_lines.append(separator_line)
1276
+
1277
+ i += 2 # Skip original header and separator
1278
+ else:
1279
+ if current_line.strip().startswith("-"):
1280
+ current_line = current_line.replace("|", "")
1281
+ if current_line.strip()[-1] not in (".", ";", ":") and (
1282
+ (
1283
+ next_line.replace("|", "").strip().islower()
1284
+ and next_line.replace("|", "").strip().endswith(".")
1285
+ )
1286
+ or not next_line.strip()
1287
+ and i + 2 < len(document)
1288
+ and document[i + 2].replace("|", "").strip().endswith(".")
1289
+ ):
1290
+ if not next_line.strip() and i + 2 <= len(document):
1291
+ next_line = document[i + 2].strip()
1292
+
1293
+ current_line = (
1294
+ current_line + " " + next_line.replace("|", "").strip()
1295
+ )
1296
+ cleaned_lines.append(current_line)
1297
+ i += 2
1298
+ continue
1299
+ # Check if this is a table row that needs padding
1300
+ current_line = current_line.replace(") (", ")|(")
1301
+ if (
1302
+ current_line.strip().startswith("-")
1303
+ and "|" not in current_line
1304
+ and "." in current_line
1305
+ and (
1306
+ document[i - 1].strip().endswith(", and")
1307
+ or document[i - 1].strip().endswith(" and")
1308
+ )
1309
+ ):
1310
+ clean_line = current_line.split(".")[0] + ".\n\n"
1311
+ if len(current_line.split(".")) > 1:
1312
+ remaining = ". ".join(current_line.split(".")[1:])
1313
+ clean_line += remaining + "\n"
1314
+ cleaned_lines.append(clean_line)
1315
+ i += 1
1316
+ continue
1317
+
1318
+ if current_line.strip().startswith("-") and (
1319
+ "|" not in current_line
1320
+ and not previous_line.replace("|", "")
1321
+ .strip()
1322
+ .endswith((";", ".", ":"))
1323
+ and current_line.strip()
1324
+ .replace("-", "")
1325
+ .replace(" ", "")
1326
+ .islower()
1327
+ ):
1328
+ old_line = cleaned_lines.pop(-1)
1329
+ if not old_line.strip("\n"):
1330
+ old_line = cleaned_lines.pop(-2)
1331
+
1332
+ cleaned_lines.append(
1333
+ old_line.strip("\n")
1334
+ + " "
1335
+ + current_line.replace("-", "").strip()
1336
+ )
1337
+
1338
+ elif "|" in current_line:
1339
+ current_line = current_line.replace("|)|", ")|").replace(
1340
+ "| | (Dollars in ", "| (Dollars in "
1341
+ )
1342
+ if (
1343
+ current_line in ("| |", "| |", "|")
1344
+ or "form 10-k" in current_line.replace("|", "").lower()
1345
+ ):
1346
+ i += 1
1347
+ continue
1348
+ current_cols = count_columns_in_data_row(current_line)
1349
+ if max_cols and max_cols > 0 and current_cols != max_cols:
1350
+ padded_line = pad_row_columns(current_line, max_cols)
1351
+ cleaned_lines.append(padded_line.strip())
1352
+ else:
1353
+ cleaned_lines.append(current_line)
1354
+
1355
+ # Not a table row, keep unchanged
1356
+ else:
1357
+ cleaned_lines.append(current_line)
1358
+ i += 1
1359
+
1360
+ return cleaned_lines
1361
+
1362
+ document = "\n".join(new_lines)
1363
+
1364
+ cleaned_lines = process_document(document.splitlines(), is_inscriptis)
1365
+
1366
+ finished_lines: list = []
1367
+
1368
+ i = 0
1369
+ for line in cleaned_lines:
1370
+ i += 1
1371
+ line = line.replace( # noqa
1372
+ "(amountsinmillions,exceptpershare,share,percentagesandwarehousecountdata) ",
1373
+ "",
1374
+ )
1375
+ if (
1376
+ "|" not in line
1377
+ and "#" not in line
1378
+ and is_title_case(line)
1379
+ and "|" not in cleaned_lines[i - 1]
1380
+ ):
1381
+ if "." in line and " " not in line:
1382
+ continue
1383
+ if len(finished_lines) > 1 and "|" not in finished_lines[-1]:
1384
+ finished_lines.append(
1385
+ f"## **{line.strip().replace('*', '').rstrip()}**"
1386
+ if line.strip().startswith("Item") or line.strip().isupper()
1387
+ else f"### **{line.strip().replace('*', '').rstrip()}**"
1388
+ )
1389
+ else:
1390
+ finished_lines.append(line)
1391
+
1392
+ data["content"] = "\n".join(finished_lines)
1393
+
1394
+ return SecManagementDiscussionAnalysisData(**data)
openbb_platform/providers/sec/openbb_sec/models/py.typed ADDED
File without changes
openbb_platform/providers/sec/openbb_sec/models/rss_litigation.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Litigation RSS Feed Model."""
2
+
3
+ # pylint: disable=unused-argument
4
+
5
+ from datetime import datetime
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from openbb_core.app.model.abstract.error import OpenBBError
9
+ from openbb_core.provider.abstract.data import Data
10
+ from openbb_core.provider.abstract.fetcher import Fetcher
11
+ from openbb_core.provider.abstract.query_params import QueryParams
12
+ from openbb_sec.utils.definitions import HEADERS
13
+ from pydantic import Field
14
+
15
+
16
+ class SecRssLitigationQueryParams(QueryParams):
17
+ """SEC Litigation RSS Feed Query.
18
+
19
+ Source: https://sec.gov/
20
+ """
21
+
22
+
23
+ class SecRssLitigationData(Data):
24
+ """SEC Litigation RSS Feed Data."""
25
+
26
+ __alias_dict__ = {
27
+ "published": "date",
28
+ }
29
+
30
+ published: datetime = Field(description="The date of publication.")
31
+ title: str = Field(description="The title of the release.")
32
+ summary: str = Field(description="Short summary of the release.")
33
+ id: str = Field(description="The identifier associated with the release.")
34
+ link: str = Field(description="URL to the release.")
35
+
36
+
37
+ class SecRssLitigationFetcher(
38
+ Fetcher[SecRssLitigationQueryParams, List[SecRssLitigationData]]
39
+ ):
40
+ """SEC RSS Litigration Fetcher."""
41
+
42
+ @staticmethod
43
+ def transform_query(params: Dict[str, Any]) -> SecRssLitigationQueryParams:
44
+ """Transform the query."""
45
+ return SecRssLitigationQueryParams(**params)
46
+
47
+ @staticmethod
48
+ def extract_data(
49
+ query: SecRssLitigationQueryParams,
50
+ credentials: Optional[Dict[str, str]],
51
+ **kwargs: Any,
52
+ ) -> List[Dict]:
53
+ """Return the raw data from the SEC endpoint."""
54
+ # pylint: disable=import-outside-toplevel
55
+ import re # noqa
56
+ import xmltodict
57
+ from openbb_core.provider.utils.helpers import make_request
58
+ from pandas import DataFrame, to_datetime
59
+
60
+ results: List = []
61
+ url = "https://www.sec.gov/enforcement-litigation/litigation-releases/rss"
62
+ r = make_request(url, headers=HEADERS)
63
+
64
+ if r.status_code != 200:
65
+ raise OpenBBError(f"Status code {r.status_code} returned.")
66
+
67
+ def clean_xml(xml_content):
68
+ """Clean the XML content before parsing."""
69
+ xml_content = re.sub(r"&(?!amp;|lt;|gt;|quot;|apos;)", "&amp;", xml_content)
70
+ return xml_content
71
+
72
+ cleaned_content = clean_xml(r.text)
73
+ data = xmltodict.parse(cleaned_content)
74
+ cols = ["title", "link", "summary", "date", "id"]
75
+ feed = DataFrame.from_records(data["rss"]["channel"]["item"])[
76
+ ["title", "link", "description", "pubDate", "dc:creator"]
77
+ ]
78
+ feed.columns = cols
79
+ feed["date"] = to_datetime(feed["date"], format="mixed")
80
+ feed = feed.set_index("date")
81
+ # Remove special characters
82
+ for column in ["title", "summary"]:
83
+ feed[column] = (
84
+ feed[column]
85
+ .replace(r"[^\w\s]|_", "", regex=True)
86
+ .replace(r"\n", "", regex=True)
87
+ )
88
+
89
+ results = feed.reset_index().to_dict(orient="records")
90
+
91
+ return results
92
+
93
+ @staticmethod
94
+ def transform_data(
95
+ query: SecRssLitigationQueryParams, data: List[Dict], **kwargs: Any
96
+ ) -> List[SecRssLitigationData]:
97
+ """Transform the data to the standard format."""
98
+ return [SecRssLitigationData.model_validate(d) for d in data]
openbb_platform/providers/sec/openbb_sec/models/schema_files.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Schema Files List Model."""
2
+
3
+ # pylint: disable=unused-argument
4
+
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from openbb_core.app.model.abstract.error import OpenBBError
8
+ from openbb_core.provider.abstract.data import Data
9
+ from openbb_core.provider.abstract.fetcher import Fetcher
10
+ from openbb_core.provider.standard_models.cot_search import CotSearchQueryParams
11
+ from pydantic import Field
12
+
13
+
14
+ class SecSchemaFilesQueryParams(CotSearchQueryParams):
15
+ """SEC Schema Files List Query.
16
+
17
+ Source: https://sec.gov/
18
+ """
19
+
20
+ url: Optional[str] = Field(
21
+ description="Enter an optional URL path to fetch the next level.", default=None
22
+ )
23
+ use_cache: Optional[bool] = Field(
24
+ default=True,
25
+ description="Whether or not to use cache.",
26
+ )
27
+
28
+
29
+ class SecSchemaFilesData(Data):
30
+ """SEC Schema Files List Data."""
31
+
32
+ files: List[str] = Field(description="Dictionary of URLs to SEC Schema Files")
33
+
34
+
35
+ class SecSchemaFilesFetcher(Fetcher[SecSchemaFilesQueryParams, SecSchemaFilesData]):
36
+ """SEC Schema Files Fetcher."""
37
+
38
+ @staticmethod
39
+ def transform_query(params: Dict[str, Any]) -> SecSchemaFilesQueryParams:
40
+ """Transform the query."""
41
+ return SecSchemaFilesQueryParams(**params)
42
+
43
+ @staticmethod
44
+ def extract_data(
45
+ query: SecSchemaFilesQueryParams,
46
+ credentials: Optional[Dict[str, str]],
47
+ **kwargs: Any,
48
+ ) -> Dict:
49
+ """Return the raw data from the SEC endpoint."""
50
+ # pylint: disable=import-outside-toplevel
51
+ from openbb_sec.utils.helpers import get_schema_filelist
52
+
53
+ if query.url and ".xsd" in query.url or query.url and ".xml" in query.url:
54
+ raise OpenBBError("Invalid URL. This endpoint does not parse the files.")
55
+ results = get_schema_filelist(query.query, query.url)
56
+
57
+ return {"files": results}
58
+
59
+ @staticmethod
60
+ def transform_data(
61
+ query: SecSchemaFilesQueryParams, data: Dict, **kwargs: Any
62
+ ) -> SecSchemaFilesData:
63
+ """Transform the data to the standard format."""
64
+ return SecSchemaFilesData.model_validate(data)
openbb_platform/providers/sec/openbb_sec/models/sec_filing.py ADDED
@@ -0,0 +1,728 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Filing Model."""
2
+
3
+ # pylint: disable=unused-argument
4
+
5
+ from datetime import date as dateType
6
+ from typing import Any, Optional, Union
7
+
8
+ from openbb_core.app.model.abstract.error import OpenBBError
9
+ from openbb_core.provider.abstract.data import Data
10
+ from openbb_core.provider.abstract.fetcher import Fetcher
11
+ from openbb_core.provider.abstract.query_params import QueryParams
12
+ from pydantic import ConfigDict, Field, PrivateAttr, computed_field
13
+
14
+
15
+ class SecFilingQueryParams(QueryParams):
16
+ """SEC Filing Query Parameters."""
17
+
18
+ __json_schema_extra__ = {
19
+ "url": {
20
+ "x-widget_config": {
21
+ "label": "Filing URL",
22
+ }
23
+ }
24
+ }
25
+
26
+ url: str = Field(
27
+ default="",
28
+ description="URL for the SEC filing."
29
+ + " The specific URL is not directly used or downloaded,"
30
+ + " but is used to generate the base URL for the filing."
31
+ + " e.g. https://www.sec.gov/Archives/edgar/data/317540/000031754024000045/coke-20240731.htm"
32
+ + " and https://www.sec.gov/Archives/edgar/data/317540/000031754024000045/"
33
+ + " are both valid URLs for the same filing.",
34
+ )
35
+ use_cache: bool = Field(
36
+ default=True,
37
+ description="Use cache for the index headers and cover page. Default is True.",
38
+ )
39
+
40
+
41
+ class SecFilingData(Data):
42
+ """SEC Filing Data."""
43
+
44
+ # For Workspace, ConfigDict is used to enter the widget configuration at the "$.data" level.
45
+ # Here, we are using a subset of the data - the document URLs with direct links - to avoid nested data.
46
+ # This creates column definitions for the target output while preserving the structure of the model.
47
+ model_config = ConfigDict(
48
+ json_schema_extra={
49
+ "x-widget_config": {
50
+ "dataKey": "results.document_urls",
51
+ "table": {
52
+ "columnsDefs": [
53
+ {
54
+ "field": "sequence",
55
+ "headerName": "Sequence",
56
+ "headerTooltip": "The sequence of the document.",
57
+ "type": "number",
58
+ "pinned": "left",
59
+ "maxWidth": 105,
60
+ },
61
+ {
62
+ "field": "type",
63
+ "headerName": "Document Type",
64
+ "headerTooltip": "The type of document.",
65
+ "type": "text",
66
+ "maxWidth": 150,
67
+ },
68
+ {
69
+ "field": "filename",
70
+ "headerName": "Filename",
71
+ "headerTooltip": "The filename of the document.",
72
+ "type": "text",
73
+ "maxWidth": 250,
74
+ },
75
+ {
76
+ "field": "content_description",
77
+ "headerName": "Description",
78
+ "headerTooltip": "Description of the document.",
79
+ "type": "text",
80
+ "minWidth": 600,
81
+ },
82
+ {
83
+ "field": "url",
84
+ "headerName": "URL",
85
+ "headerTooltip": "The URL of the document.",
86
+ "type": "text",
87
+ "maxWidth": 75,
88
+ },
89
+ ],
90
+ },
91
+ }
92
+ }
93
+ )
94
+
95
+ base_url: str = Field(
96
+ title="Base URL",
97
+ description="Base URL of the filing.",
98
+ json_schema_extra={
99
+ "x-widget_config": {
100
+ "exclude": True
101
+ } # Tells the widget factory to exclude this field. Has no effect on endpoint.
102
+ },
103
+ )
104
+ name: str = Field(
105
+ title="Entity Name",
106
+ description="Name of the entity filing.",
107
+ json_schema_extra={"x-widget_config": {"exclude": True}},
108
+ )
109
+ cik: str = Field(
110
+ title="CIK",
111
+ description="Central Index Key.",
112
+ json_schema_extra={"x-widget_config": {"exclude": True}},
113
+ )
114
+ trading_symbols: Optional[list] = Field(
115
+ default=None,
116
+ title="Trading Symbols",
117
+ description="Trading symbols, if available.",
118
+ json_schema_extra={"x-widget_config": {"exclude": True}},
119
+ )
120
+ sic: str = Field(
121
+ title="SIC",
122
+ description="Standard Industrial Classification.",
123
+ json_schema_extra={"x-widget_config": {"exclude": True}},
124
+ )
125
+ sic_organization_name: str = Field(
126
+ title="SIC Organization",
127
+ description="SIC Organization Name.",
128
+ json_schema_extra={"x-widget_config": {"exclude": True}},
129
+ )
130
+ filing_date: dateType = Field(
131
+ title="Filing Date",
132
+ description="Filing date.",
133
+ json_schema_extra={"x-widget_config": {"exclude": True}},
134
+ )
135
+ period_ending: Optional[dateType] = Field(
136
+ default=None,
137
+ title="Period Ending",
138
+ description="Date of the ending period for the filing, if available.",
139
+ json_schema_extra={"x-widget_config": {"exclude": True}},
140
+ )
141
+ fiscal_year_end: Optional[str] = Field(
142
+ default=None,
143
+ title="Fiscal Year End",
144
+ description="Fiscal year end of the entity, if available. Format: MM-DD",
145
+ json_schema_extra={"x-widget_config": {"exclude": True}},
146
+ )
147
+ document_type: str = Field(
148
+ title="Document Type",
149
+ description="Specific SEC filing type.",
150
+ json_schema_extra={"x-widget_config": {"exclude": True}},
151
+ )
152
+ has_cover_page: bool = Field(
153
+ title="Has Cover Page",
154
+ description="True if the filing has a cover page.",
155
+ json_schema_extra={"x-widget_config": {"exclude": True}},
156
+ )
157
+ description: Optional[str] = Field(
158
+ default=None,
159
+ title="Content Description",
160
+ description="Description of attached content, mostly applicable to 8-K filings.",
161
+ json_schema_extra={"x-widget_config": {"exclude": True}},
162
+ )
163
+ cover_page: Optional[dict] = Field(
164
+ default=None,
165
+ title="Cover Page",
166
+ description="Cover page information, if available.",
167
+ json_schema_extra={"x-widget_config": {"exclude": True}},
168
+ )
169
+ document_urls: list = Field(
170
+ title="Document URLs",
171
+ description="List of files associated with the filing.",
172
+ json_schema_extra={"x-widget_config": {"exclude": True}},
173
+ )
174
+
175
+
176
+ class SecBaseFiling(Data): # pylint: disable=too-many-instance-attributes
177
+ """Base SEC Filing model."""
178
+
179
+ _url: str = PrivateAttr(default="")
180
+ _index_headers_url: str = PrivateAttr(default="")
181
+ _index_headers_download: str = PrivateAttr(default="")
182
+ _document_urls: list = PrivateAttr(default=None)
183
+ _filing_date: str = PrivateAttr(default="")
184
+ _period_ending: str = PrivateAttr(default="")
185
+ _document_type: str = PrivateAttr(default="")
186
+ _name: str = PrivateAttr(default="")
187
+ _cik: str = PrivateAttr(default="")
188
+ _sic: str = PrivateAttr(default="")
189
+ _sic_organization_name: Optional[str] = PrivateAttr(default="")
190
+ _description: Optional[str] = PrivateAttr(default=None)
191
+ _cover_page_url: Optional[str] = PrivateAttr(default=None)
192
+ _fiscal_year_end: str = PrivateAttr(default="")
193
+ _fiscal_period: str = PrivateAttr(default="")
194
+ _cover_page: dict = PrivateAttr(default=None)
195
+ _trading_symbols: list = PrivateAttr(default=None)
196
+ _use_cache: bool = PrivateAttr(default=True)
197
+
198
+ @computed_field(title="Base URL", description="Base URL of the filing.") # type: ignore
199
+ @property
200
+ def base_url(self) -> str:
201
+ """Base URL of the filing."""
202
+ return self._url
203
+
204
+ @computed_field(title="Entity Name", description="Name of the entity filing.") # type: ignore
205
+ @property
206
+ def name(self) -> str:
207
+ """Entity name."""
208
+ return self._name
209
+
210
+ @computed_field(title="CIK", description="Central Index Key.") # type: ignore
211
+ @property
212
+ def cik(self) -> str:
213
+ """Central Index Key."""
214
+ return self._cik
215
+
216
+ @computed_field( # type: ignore
217
+ title="Trading Symbols", description="Trading symbols, if available."
218
+ )
219
+ @property
220
+ def trading_symbols(self) -> Optional[list]:
221
+ """Trading symbols, if available."""
222
+ return self._trading_symbols
223
+
224
+ @computed_field(title="SIC", description="Standard Industrial Classification.") # type: ignore
225
+ @property
226
+ def sic(self) -> str:
227
+ """Standard Industrial Classification."""
228
+ return self._sic
229
+
230
+ @computed_field(title="SIC Organization", description="SIC Organization Name.") # type: ignore
231
+ @property
232
+ def sic_organization_name(self) -> Optional[str]:
233
+ """Standard Industrial Classification Organization Name."""
234
+ return self._sic_organization_name
235
+
236
+ @computed_field(title="Filing Date", description="Filing date.") # type: ignore
237
+ @property
238
+ def filing_date(self) -> dateType:
239
+ """Filing date."""
240
+ return dateType.fromisoformat(self._filing_date)
241
+
242
+ @computed_field( # type: ignore
243
+ title="Period Ending",
244
+ description="Date of the ending period for the filing, if available.",
245
+ )
246
+ @property
247
+ def period_ending(self) -> Optional[dateType]:
248
+ """Date of the ending period for the filing."""
249
+ if self._period_ending:
250
+ return dateType.fromisoformat(self._period_ending)
251
+ return None
252
+
253
+ @computed_field( # type: ignore
254
+ title="Fiscal Year End",
255
+ description="Fiscal year end of the entity, if available. Format: MM-DD",
256
+ )
257
+ @property
258
+ def fiscal_year_end(self) -> Optional[str]:
259
+ """Fiscal year end date of the entity."""
260
+ return self._fiscal_year_end
261
+
262
+ @computed_field(title="Document Type", description="Specific SEC filing type.") # type: ignore
263
+ @property
264
+ def document_type(self) -> str:
265
+ """Document type."""
266
+ return self._document_type
267
+
268
+ @computed_field( # type: ignore
269
+ title="Has Cover Page", description="True if the filing has a cover page."
270
+ )
271
+ @property
272
+ def has_cover_page(self) -> bool:
273
+ """True if the filing has a cover page."""
274
+ return bool(self._cover_page_url)
275
+
276
+ @computed_field( # type: ignore
277
+ title="Cover Page", description="Cover page information, if available."
278
+ )
279
+ @property
280
+ def cover_page(self) -> Optional[dict]:
281
+ """Cover page information, if available."""
282
+ return self._cover_page
283
+
284
+ @computed_field( # type: ignore
285
+ title="Content Description",
286
+ description="Description of attached content, mostly applicable to 8-K filings.",
287
+ )
288
+ @property
289
+ def description(self) -> Optional[str]:
290
+ """Document description, if available."""
291
+ return self._description
292
+
293
+ @computed_field( # type: ignore
294
+ title="Document URLs", description="List of files associated with the filing."
295
+ )
296
+ @property
297
+ def document_urls(self) -> list:
298
+ """List of document URLs."""
299
+ return self._document_urls
300
+
301
+ def __init__(self, url: str, use_cache: bool = True):
302
+ """Initialize the Filing class."""
303
+ # pylint: disable=import-outside-toplevel
304
+ from openbb_core.provider.utils.helpers import run_async
305
+ from openbb_sec.utils.helpers import cik_map
306
+
307
+ super().__init__()
308
+
309
+ if not url:
310
+ raise ValueError("Please enter a URL.")
311
+
312
+ if "/data/" not in url:
313
+ raise ValueError("Invalid SEC URL supplied, must be a filing URL.")
314
+
315
+ check_val: str = url.split("/data/")[1].split("/")[1]
316
+
317
+ if len(check_val) != 18:
318
+ raise ValueError("Invalid SEC URL supplied, must be a filing URL.")
319
+
320
+ new_url = url.split(check_val)[0] + check_val + "/"
321
+
322
+ cik_check = new_url.split("/")[-3]
323
+ new_url = new_url.replace(f"/{cik_check}/", f"/{cik_check.lstrip('0')}/")
324
+ self._url = new_url
325
+ self._use_cache = use_cache
326
+ index_headers = (
327
+ check_val[:-8]
328
+ + "-"
329
+ + check_val[-8:-6]
330
+ + "-"
331
+ + check_val[-6:]
332
+ + "-index-headers.htm"
333
+ )
334
+ self._index_headers_url = self._url + index_headers
335
+ self._download_index_headers()
336
+
337
+ if self._document_urls:
338
+ for doc in self._document_urls:
339
+ if doc.get("url", "").endswith("R1.htm"):
340
+ self._cover_page_url = doc.get("url")
341
+ break
342
+
343
+ if self.has_cover_page and not self._cover_page:
344
+ self._download_cover_page()
345
+
346
+ if not self._trading_symbols:
347
+ symbol = run_async(cik_map, self._cik)
348
+ if symbol:
349
+ self._trading_symbols = [symbol]
350
+
351
+ @staticmethod
352
+ async def _adownload_file(url, use_cache: bool = True):
353
+ """Download a file asynchronously from a SEC URL."""
354
+ # pylint: disable=import-outside-toplevel
355
+ from aiohttp_client_cache import SQLiteBackend
356
+ from aiohttp_client_cache.session import CachedSession
357
+ from openbb_core.app.utils import get_user_cache_directory
358
+ from openbb_core.provider.utils.helpers import amake_request
359
+ from openbb_sec.utils.definitions import SEC_HEADERS
360
+ from openbb_sec.utils.helpers import sec_callback
361
+
362
+ response: Union[dict, list, str, None] = None
363
+ if use_cache is True:
364
+ cache_dir = f"{get_user_cache_directory()}/http/sec_filings"
365
+ async with CachedSession(cache=SQLiteBackend(cache_dir)) as session:
366
+ try:
367
+ await session.delete_expired_responses()
368
+ response = await amake_request(
369
+ url,
370
+ headers=SEC_HEADERS,
371
+ session=session,
372
+ response_callback=sec_callback,
373
+ raise_for_status=True,
374
+ ) # type: ignore
375
+ finally:
376
+ await session.close()
377
+ else:
378
+ response = await amake_request(
379
+ url,
380
+ headers=SEC_HEADERS,
381
+ response_callback=sec_callback,
382
+ raise_for_status=True,
383
+ ) # type: ignore
384
+
385
+ return response
386
+
387
+ @staticmethod
388
+ def download_file(url, read_html_table: bool = False, use_cache: bool = True):
389
+ """Download a file from a SEC URL."""
390
+ # pylint: disable=import-outside-toplevel
391
+ from openbb_core.provider.utils.helpers import run_async # noqa
392
+ from warnings import warn
393
+
394
+ try:
395
+ response = run_async(SecBaseFiling._adownload_file, url, use_cache)
396
+
397
+ if read_html_table is True:
398
+ if not url.endswith(".htm") and not url.endswith(".html"):
399
+ warn(f"File is not a HTML file: {url}")
400
+ return response
401
+
402
+ return SecBaseFiling.try_html_table(response)
403
+
404
+ return response
405
+
406
+ except Exception as e:
407
+ raise RuntimeError(f"Failed to download file: {e} -> {e.args}") from e
408
+
409
+ @staticmethod
410
+ def try_html_table(text: str, **kwargs) -> list:
411
+ """Attempt to parse tables from a HTML string. All keyword arguments passed to `pandas.read_html`"""
412
+ # pylint: disable=import-outside-toplevel
413
+ from io import StringIO # noqa
414
+ from pandas import read_html
415
+
416
+ try:
417
+ return read_html(StringIO(text), **kwargs)
418
+ except Exception as e:
419
+ raise RuntimeError(f"Failed to parse table: {e}") from e
420
+
421
+ def _download_index_headers(
422
+ self,
423
+ ): # pylint: disable=too-many-branches, too-many-statements, too-many-locals
424
+ """Download the index headers table."""
425
+ # pylint: disable=import-outside-toplevel
426
+ import re # noqa
427
+ from bs4 import BeautifulSoup
428
+
429
+ try:
430
+ if not self._index_headers_download:
431
+ response = self.download_file(
432
+ self._index_headers_url, False, self._use_cache
433
+ )
434
+ self._index_headers_download = response
435
+ else:
436
+ response = self._index_headers_download
437
+
438
+ soup = BeautifulSoup(response, "html.parser")
439
+ text = soup.find("pre").text
440
+
441
+ def document_to_dict(doc):
442
+ """Convert the document section to a dictionary."""
443
+ doc_dict: dict = {}
444
+ doc_dict["type"] = re.search(r"<TYPE>(.*?)\n", doc).group(1).strip() # type: ignore
445
+ doc_dict["sequence"] = (
446
+ re.search(r"<SEQUENCE>(.*?)\n", doc).group(1).strip() # type: ignore
447
+ )
448
+ doc_dict["filename"] = (
449
+ re.search(r"<FILENAME>(.*?)\n", doc).group(1).strip() # type: ignore
450
+ )
451
+ description_match = re.search(r"<DESCRIPTION>(.*?)\n", doc)
452
+
453
+ if description_match:
454
+ doc_dict["description"] = description_match.group(1).strip()
455
+
456
+ url = self.base_url + doc_dict["filename"]
457
+ doc_dict["url"] = url
458
+
459
+ return doc_dict
460
+
461
+ # Isolate each document by tag
462
+ documents = re.findall(r"<DOCUMENT>.*?</DOCUMENT>", text, re.DOTALL)
463
+ # Convert each document to a dictionary
464
+ document_dicts = [document_to_dict(doc) for doc in documents]
465
+
466
+ if document_dicts:
467
+ self._document_urls = document_dicts
468
+
469
+ lines = text.split("\n")
470
+ n_items = 0
471
+
472
+ for line in lines:
473
+
474
+ if ":" not in line:
475
+ continue
476
+
477
+ value = line.split(":")[1].strip()
478
+
479
+ if n_items == 9:
480
+ break
481
+
482
+ if "CONFORMED PERIOD OF REPORT" in line:
483
+ as_of_date = value
484
+ self._period_ending = (
485
+ as_of_date[:4] + "-" + as_of_date[4:6] + "-" + as_of_date[6:]
486
+ )
487
+ elif "FILED AS OF DATE" in line:
488
+ filing_date = value
489
+ self._filing_date = (
490
+ filing_date[:4] + "-" + filing_date[4:6] + "-" + filing_date[6:]
491
+ )
492
+ n_items += 1
493
+ elif "COMPANY CONFORMED NAME" in line:
494
+ self._name = value
495
+ n_items += 1
496
+ elif "CONFORMED SUBMISSION TYPE" in line:
497
+ self._document_type = value
498
+ n_items += 1
499
+ elif "CENTRAL INDEX KEY" in line:
500
+ self._cik = value
501
+ n_items += 1
502
+ elif "STANDARD INDUSTRIAL CLASSIFICATION" in line:
503
+ self._sic = value
504
+ n_items += 1
505
+ elif "ORGANIZATION NAME" in line:
506
+ self._sic_organization_name = value
507
+ n_items += 1
508
+ elif "FISCAL YEAR END" in line:
509
+ fy = value
510
+ self._fiscal_year_end = fy[:2] + "-" + fy[2:]
511
+ n_items += 1
512
+ # There might be two lines of ITEM INFORMATION
513
+ elif "ITEM INFORMATION" in line:
514
+ info = value
515
+ self._description = (
516
+ self._description + "; " + info if self._description else info
517
+ )
518
+ n_items += 1
519
+ continue
520
+
521
+ except Exception as e:
522
+ raise RuntimeError(
523
+ f"Failed to download and read the index headers table: {e}"
524
+ ) from e
525
+
526
+ @staticmethod
527
+ def _multiplier_map(string) -> int: # pylint: disable=too-many-return-statements
528
+ """Map a string to a multiplier."""
529
+ if string.lower() == "millions":
530
+ return 1000000
531
+ if string.lower() == "hundreds of thousands":
532
+ return 100000
533
+ if string.lower() == "tens of thousands":
534
+ return 10000
535
+ if string.lower() == "thousands":
536
+ return 1000
537
+ if string.lower() == "hundreds":
538
+ return 100
539
+ if string.lower() == "tens":
540
+ return 10
541
+ return 1
542
+
543
+ def _download_cover_page(
544
+ self,
545
+ ): # pylint: disable=too-many-branches, too-many-statements, too-many-locals
546
+ """Download the cover page table."""
547
+ # pylint: disable=import-outside-toplevel
548
+ from pandas import MultiIndex, to_datetime
549
+
550
+ symbols_list: list = []
551
+ try:
552
+ response = self.download_file(self._cover_page_url, True, self._use_cache)
553
+ if not response:
554
+ raise RuntimeError("Failed to download cover page table")
555
+ df = response[0]
556
+ if isinstance(df.columns, MultiIndex):
557
+ df = df.droplevel(0, axis=1)
558
+
559
+ if df.empty or len(df) < 1:
560
+ raise RuntimeError("Failed to read cover page table")
561
+
562
+ fiscal_year = df[df.iloc[:, 0] == "Document Fiscal Year Focus"]
563
+
564
+ if not fiscal_year.empty:
565
+ fiscal_year = fiscal_year.iloc[:, 1].values[0]
566
+ elif fiscal_year.empty:
567
+ fiscal_year = None
568
+
569
+ if fiscal_year:
570
+ self._fiscal_year = fiscal_year
571
+
572
+ fiscal_period = df[df.iloc[:, 0] == "Document Fiscal Period Focus"]
573
+
574
+ if not fiscal_period.empty:
575
+ fiscal_period = fiscal_period.iloc[:, 1].values[0]
576
+ elif fiscal_period.empty:
577
+ fiscal_period = None
578
+
579
+ if fiscal_period:
580
+ self._fiscal_period = fiscal_period
581
+
582
+ title = (
583
+ df.columns[0][0]
584
+ if isinstance(df.columns, MultiIndex)
585
+ else df.columns[0]
586
+ )
587
+
588
+ if title and "- shares" in title:
589
+ shares_multiplier = title.split(" shares in ")[-1]
590
+ multiplier = self._multiplier_map(shares_multiplier)
591
+ shares_outstanding = (
592
+ df[df.iloc[:, 0].str.contains("Shares Outstanding")]
593
+ .iloc[:, 2]
594
+ .values[0]
595
+ )
596
+ as_of_date = (
597
+ df.columns[2][1]
598
+ if isinstance(df.columns, MultiIndex)
599
+ else df.columns[2]
600
+ )
601
+
602
+ if as_of_date and shares_outstanding:
603
+ self._shares_outstanding = {
604
+ to_datetime(as_of_date).strftime("%Y-%m-%d"): int(
605
+ shares_outstanding * multiplier
606
+ )
607
+ }
608
+
609
+ if not df.empty:
610
+ trading_symbols_df = df[
611
+ df.iloc[:, 0]
612
+ .astype(str)
613
+ .str.lower()
614
+ .isin(["trading symbol", "no trading symbol flag"])
615
+ ]
616
+ symbols_dict: dict = {}
617
+ trading_symbols = (
618
+ trading_symbols_df.iloc[:, 1]
619
+ .str.strip()
620
+ .str.replace("true", "No Trading Symbol")
621
+ .tolist()
622
+ )
623
+ symbol_names = (
624
+ df[
625
+ df.iloc[:, 0].astype(str).str.strip()
626
+ == "Title of 12(b) Security"
627
+ ]
628
+ .iloc[:, 1]
629
+ .tolist()
630
+ )
631
+ exchange_names = (
632
+ df[
633
+ df.iloc[:, 0].astype(str).str.strip()
634
+ == "Security Exchange Name"
635
+ ]
636
+ .iloc[:, 1]
637
+ .fillna("No Exchange")
638
+ .tolist()
639
+ )
640
+ if trading_symbols:
641
+ self._trading_symbols = sorted(
642
+ [d for d in trading_symbols if d and d != "No Trading Symbol"]
643
+ )
644
+ symbols_dict = dict(zip(symbol_names, trading_symbols))
645
+ exchanges_dict = dict(zip(symbol_names, exchange_names))
646
+
647
+ for k, v in symbols_dict.items():
648
+ symbols_list.append(
649
+ {
650
+ "Title": k,
651
+ "Symbol": v,
652
+ "Exchange": exchanges_dict.get(k, "No Exchange"),
653
+ }
654
+ )
655
+
656
+ df.columns = [d[1] if isinstance(d, tuple) else d for d in df.columns]
657
+ df = df.iloc[:, :2].dropna(how="any")
658
+ df.columns = ["key", "value"]
659
+ output = df.set_index("key").to_dict()["value"]
660
+
661
+ if not output.get("SIC") and self._sic:
662
+ output["SIC"] = self._sic
663
+ output["SIC Organization Name"] = self.sic_organization_name
664
+
665
+ for k, v in output.copy().items():
666
+ if k in [
667
+ "Title of 12(b) Security",
668
+ "Trading Symbol",
669
+ "Security Exchange Name",
670
+ "No Trading Symbol Flag",
671
+ ]:
672
+ del output[k]
673
+
674
+ if symbols_list:
675
+ output["12(b) Securities"] = symbols_list
676
+
677
+ self._cover_page = output
678
+
679
+ except IndexError:
680
+ pass
681
+
682
+ except Exception as e:
683
+ raise RuntimeError(
684
+ f"Failed to download and read the cover page table: {e}"
685
+ ) from e
686
+
687
+ def __repr__(self):
688
+ """Return the string representation of the class."""
689
+ repr_str = "SEC Filing(\n"
690
+
691
+ for k, v in self.model_computed_fields.items():
692
+ if not v:
693
+ continue
694
+ repr_str += f" {k} : {v.return_type.__name__} - {v.description}\n"
695
+
696
+ repr_str += ")"
697
+
698
+ return repr_str
699
+
700
+
701
+ class SecFilingFetcher(Fetcher[SecFilingQueryParams, SecFilingData]):
702
+ """SEC Filing Fetcher."""
703
+
704
+ @staticmethod
705
+ def transform_query(params: dict[str, Any]) -> SecFilingQueryParams:
706
+ """Transform the query parameters."""
707
+ return SecFilingQueryParams(**params)
708
+
709
+ @staticmethod
710
+ async def aextract_data(
711
+ query: SecFilingQueryParams,
712
+ credentials: Optional[dict[str, str]],
713
+ **kwargs: Any,
714
+ ) -> dict:
715
+ """Extract the raw data from the SEC site."""
716
+ try:
717
+ data = SecBaseFiling(query.url, query.use_cache)
718
+ except Exception as e: # pylint: disable=broad-except
719
+ raise OpenBBError(e) from e
720
+
721
+ return data.model_dump(exclude_none=True)
722
+
723
+ @staticmethod
724
+ def transform_data(
725
+ query: SecFilingQueryParams, data: dict, **kwargs: Any
726
+ ) -> SecFilingData:
727
+ """Transform the raw data into a structured format."""
728
+ return SecFilingData.model_validate(data)
openbb_platform/providers/sec/openbb_sec/models/sic_search.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Standard Industrial Classification Code (SIC) Model."""
2
+
3
+ # pylint: disable=unused-argument
4
+
5
+ from typing import Any, Dict, List, Optional, Union
6
+
7
+ from openbb_core.provider.abstract.data import Data
8
+ from openbb_core.provider.abstract.fetcher import Fetcher
9
+ from openbb_core.provider.standard_models.cot_search import CotSearchQueryParams
10
+ from pydantic import Field
11
+
12
+
13
+ class SecSicSearchQueryParams(CotSearchQueryParams):
14
+ """SEC Standard Industrial Classification Code (SIC) Query.
15
+
16
+ Source: https://sec.gov/
17
+ """
18
+
19
+ use_cache: Optional[bool] = Field(
20
+ default=True,
21
+ description="Whether or not to use cache.",
22
+ )
23
+
24
+
25
+ class SecSicSearchData(Data):
26
+ """SEC Standard Industrial Classification Code (SIC) Data."""
27
+
28
+ __alias_dict__ = {
29
+ "sic": "SIC Code",
30
+ "industry": "Industry Title",
31
+ "office": "Office",
32
+ }
33
+
34
+ sic: int = Field(description="Sector Industrial Code (SIC)")
35
+ industry: str = Field(description="Industry title.")
36
+ office: str = Field(
37
+ description="Reporting office within the Corporate Finance Office"
38
+ )
39
+
40
+
41
+ class SecSicSearchFetcher(
42
+ Fetcher[
43
+ SecSicSearchQueryParams,
44
+ List[SecSicSearchData],
45
+ ]
46
+ ):
47
+ """SEC SIC Search Fetcher."""
48
+
49
+ @staticmethod
50
+ def transform_query(
51
+ params: Dict[str, Any], **kwargs: Any
52
+ ) -> SecSicSearchQueryParams:
53
+ """Transform the query."""
54
+ return SecSicSearchQueryParams(**params)
55
+
56
+ @staticmethod
57
+ async def aextract_data(
58
+ query: SecSicSearchQueryParams,
59
+ credentials: Optional[Dict[str, str]],
60
+ **kwargs: Any,
61
+ ) -> List[Dict]:
62
+ """Extract data from the SEC website table."""
63
+ # pylint: disable=import-outside-toplevel
64
+ from aiohttp_client_cache import SQLiteBackend
65
+ from aiohttp_client_cache.session import CachedSession
66
+ from openbb_core.app.utils import get_user_cache_directory
67
+ from openbb_core.provider.utils.helpers import amake_request
68
+ from openbb_sec.utils.helpers import SEC_HEADERS, sec_callback
69
+ from pandas import DataFrame, read_html
70
+
71
+ data = DataFrame()
72
+ results: List[Dict] = []
73
+ url = (
74
+ "https://www.sec.gov/corpfin/"
75
+ "division-of-corporation-finance-standard-industrial-classification-sic-code-list"
76
+ )
77
+ response: Union[dict, List[dict], str] = {}
78
+ if query.use_cache is True:
79
+ cache_dir = f"{get_user_cache_directory()}/http/sec_sic"
80
+ async with CachedSession(
81
+ cache=SQLiteBackend(cache_dir, expire_after=3600 * 24 * 30)
82
+ ) as session:
83
+ try:
84
+ response = await amake_request(
85
+ url, headers=SEC_HEADERS, session=session, response_callback=sec_callback # type: ignore
86
+ )
87
+ finally:
88
+ await session.close()
89
+ else:
90
+ response = await amake_request(url, headers=SEC_HEADERS, response_callback=sec_callback) # type: ignore
91
+
92
+ data = read_html(response)[0].astype(str)
93
+ if len(data) == 0:
94
+ return results
95
+ if query:
96
+ data = data[
97
+ data["SIC Code"].str.contains(query.query, case=False)
98
+ | data["Office"].str.contains(query.query, case=False)
99
+ | data["Industry Title"].str.contains(query.query, case=False)
100
+ ]
101
+ data["SIC Code"] = data["SIC Code"].astype(int)
102
+ results = data.to_dict("records")
103
+
104
+ return results
105
+
106
+ @staticmethod
107
+ def transform_data(
108
+ query: SecSicSearchQueryParams, data: List[Dict], **kwargs: Any
109
+ ) -> List[SecSicSearchData]:
110
+ """Transform the data."""
111
+ return [SecSicSearchData.model_validate(d) for d in data]
openbb_platform/providers/sec/openbb_sec/models/symbol_map.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Symbol Mapping Model."""
2
+
3
+ # pylint: disable=unused-argument
4
+
5
+ from typing import Any, Dict, Optional
6
+
7
+ from openbb_core.app.model.abstract.error import OpenBBError
8
+ from openbb_core.provider.abstract.data import Data
9
+ from openbb_core.provider.abstract.fetcher import Fetcher
10
+ from openbb_core.provider.standard_models.symbol_map import SymbolMapQueryParams
11
+ from openbb_core.provider.utils.descriptions import DATA_DESCRIPTIONS
12
+ from pydantic import Field
13
+
14
+
15
+ class SecSymbolMapQueryParams(SymbolMapQueryParams):
16
+ """SEC Symbol Mapping Query.
17
+
18
+ Source: https://sec.gov/
19
+ """
20
+
21
+
22
+ class SecSymbolMapData(Data):
23
+ """SEC symbol map Data."""
24
+
25
+ symbol: str = Field(description=DATA_DESCRIPTIONS.get("symbol", ""))
26
+
27
+
28
+ class SecSymbolMapFetcher(
29
+ Fetcher[
30
+ SecSymbolMapQueryParams,
31
+ SecSymbolMapData,
32
+ ]
33
+ ):
34
+ """Transform the query, extract and transform the data from the SEC endpoints."""
35
+
36
+ @staticmethod
37
+ def transform_query(params: Dict[str, Any]) -> SecSymbolMapQueryParams:
38
+ """Transform the query."""
39
+ return SecSymbolMapQueryParams(**params)
40
+
41
+ @staticmethod
42
+ async def aextract_data(
43
+ query: SecSymbolMapQueryParams,
44
+ credentials: Optional[Dict[str, str]],
45
+ **kwargs: Any,
46
+ ) -> Dict:
47
+ """Return the raw data from the SEC endpoint."""
48
+ # pylint: disable=import-outside-toplevel
49
+ from openbb_sec.utils.helpers import cik_map
50
+
51
+ if not query.query.isdigit():
52
+ raise OpenBBError("Query is required and must be a valid CIK.")
53
+ symbol = await cik_map(int(query.query), query.use_cache)
54
+ response = {"symbol": symbol}
55
+ return response
56
+
57
+ @staticmethod
58
+ def transform_data(
59
+ query: SecSymbolMapQueryParams, data: Dict, **kwargs: Any
60
+ ) -> SecSymbolMapData:
61
+ """Transform the data to the standard format."""
62
+ return SecSymbolMapData.model_validate(data)
openbb_platform/providers/sec/openbb_sec/py.typed ADDED
File without changes
openbb_platform/providers/sec/openbb_sec/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """SEC Utils."""
openbb_platform/providers/sec/openbb_sec/utils/definitions.py ADDED
@@ -0,0 +1,1350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Definitions and Models."""
2
+
3
+ # pylint: disable=too-many-lines
4
+
5
+ from typing import Dict, Literal
6
+
7
+ QUARTERS = Literal[1, 2, 3, 4]
8
+
9
+ SEC_HEADERS: Dict[str, str] = {
10
+ "User-Agent": "my real company name definitelynot@fakecompany.com",
11
+ "Accept-Encoding": "gzip, deflate",
12
+ "Host": "www.sec.gov",
13
+ }
14
+
15
+ # Some endpoints don't like the Host header.
16
+
17
+ HEADERS: Dict[str, str] = {
18
+ "User-Agent": "my real company name definitelynot@fakecompany.com",
19
+ "Accept-Encoding": "gzip, deflate",
20
+ }
21
+
22
+
23
+ FORM_TYPES = Literal[
24
+ "1",
25
+ "1-A",
26
+ "1-A_POS",
27
+ "1-A-W",
28
+ "1-E",
29
+ "1-E_AD",
30
+ "1-K",
31
+ "1-SA",
32
+ "1-U",
33
+ "1-Z",
34
+ "1-Z-W",
35
+ "10-12B",
36
+ "10-12G",
37
+ "10-D",
38
+ "10-K",
39
+ "10-KT",
40
+ "10-Q",
41
+ "10-QT",
42
+ "11-K",
43
+ "11-KT",
44
+ "13F-HR",
45
+ "13F-NT",
46
+ "13FCONP",
47
+ "144",
48
+ "15-12B",
49
+ "15-12G",
50
+ "15-15D",
51
+ "15F-12B",
52
+ "15F-12G",
53
+ "15F-15D",
54
+ "18-12B",
55
+ "18-K",
56
+ "19B-4E",
57
+ "2-A",
58
+ "2-AF",
59
+ "2-E",
60
+ "20-F",
61
+ "20FR12B",
62
+ "20FR12G",
63
+ "24F-2NT",
64
+ "25",
65
+ "25-NSE",
66
+ "253G1",
67
+ "253G2",
68
+ "253G3",
69
+ "253G4",
70
+ "3",
71
+ "305B2",
72
+ "34-12H",
73
+ "4",
74
+ "40-17F1",
75
+ "40-17F2",
76
+ "40-17G",
77
+ "40-17GCS",
78
+ "40-202A",
79
+ "40-203A",
80
+ "40-206A",
81
+ "40-24B2",
82
+ "40-33",
83
+ "40-6B",
84
+ "40-8B25",
85
+ "40-8F-2",
86
+ "40-APP",
87
+ "40-F",
88
+ "40-OIP",
89
+ "40FR12B",
90
+ "40FR12G",
91
+ "424A",
92
+ "424B1",
93
+ "424B2",
94
+ "424B3",
95
+ "424B4",
96
+ "424B5",
97
+ "424B7",
98
+ "424B8",
99
+ "424H",
100
+ "425",
101
+ "485APOS",
102
+ "485BPOS",
103
+ "485BXT",
104
+ "486APOS",
105
+ "486BPOS",
106
+ "486BXT",
107
+ "487",
108
+ "497",
109
+ "497AD",
110
+ "497H2",
111
+ "497J",
112
+ "497K",
113
+ "497VPI",
114
+ "497VPU",
115
+ "5",
116
+ "6-K",
117
+ "6B_NTC",
118
+ "6B_ORDR",
119
+ "8-A12B",
120
+ "8-A12G",
121
+ "8-K",
122
+ "8-K12B",
123
+ "8-K12G3",
124
+ "8-K15D5",
125
+ "8-M",
126
+ "8F-2_NTC",
127
+ "8F-2_ORDR",
128
+ "9-M",
129
+ "ABS-15G",
130
+ "ABS-EE",
131
+ "ADN-MTL",
132
+ "ADV-E",
133
+ "ADV-H-C",
134
+ "ADV-H-T",
135
+ "ADV-NR",
136
+ "ANNLRPT",
137
+ "APP_NTC",
138
+ "APP_ORDR",
139
+ "APP_WD",
140
+ "APP_WDG",
141
+ "ARS",
142
+ "ATS-N",
143
+ "ATS-N-C",
144
+ "ATS-N/UA",
145
+ "AW",
146
+ "AW_WD",
147
+ "C",
148
+ "C-AR",
149
+ "C-AR-W",
150
+ "C-TR",
151
+ "C-TR-W",
152
+ "C-U",
153
+ "C-U-W",
154
+ "C-W",
155
+ "CB",
156
+ "CERT",
157
+ "CERTARCA",
158
+ "CERTBATS",
159
+ "CERTCBO",
160
+ "CERTNAS",
161
+ "CERTNYS",
162
+ "CERTPAC",
163
+ "CFPORTAL",
164
+ "CFPORTAL-W",
165
+ "CORRESP",
166
+ "CT_ORDER",
167
+ "D",
168
+ "DEF_14A",
169
+ "DEF_14C",
170
+ "DEFA14A",
171
+ "DEFA14C",
172
+ "DEFC14A",
173
+ "DEFC14C",
174
+ "DEFM14A",
175
+ "DEFM14C",
176
+ "DEFN14A",
177
+ "DEFR14A",
178
+ "DEFR14C",
179
+ "DEL_AM",
180
+ "DFAN14A",
181
+ "DFRN14A",
182
+ "DOS",
183
+ "DOSLTR",
184
+ "DRS",
185
+ "DRSLTR",
186
+ "DSTRBRPT",
187
+ "EFFECT",
188
+ "F-1",
189
+ "F-10",
190
+ "F-10EF",
191
+ "F-10POS",
192
+ "F-1MEF",
193
+ "F-3",
194
+ "F-3ASR",
195
+ "F-3D",
196
+ "F-3DPOS",
197
+ "F-3MEF",
198
+ "F-4",
199
+ "F-4_POS",
200
+ "F-4MEF",
201
+ "F-6",
202
+ "F-6_POS",
203
+ "F-6EF",
204
+ "F-7",
205
+ "F-7_POS",
206
+ "F-8",
207
+ "F-8_POS",
208
+ "F-80",
209
+ "F-80POS",
210
+ "F-9",
211
+ "F-9_POS",
212
+ "F-N",
213
+ "F-X",
214
+ "FOCUSN",
215
+ "FWP",
216
+ "G-405",
217
+ "G-405N",
218
+ "G-FIN",
219
+ "G-FINW",
220
+ "IRANNOTICE",
221
+ "MA",
222
+ "MA-A",
223
+ "MA-I",
224
+ "MA-W",
225
+ "MSD",
226
+ "MSDCO",
227
+ "MSDW",
228
+ "N-1",
229
+ "N-14",
230
+ "N-14_8C",
231
+ "N-14MEF",
232
+ "N-18F1",
233
+ "N-1A",
234
+ "N-2",
235
+ "N-2_POSASR",
236
+ "N-23C-2",
237
+ "N-23C3A",
238
+ "N-23C3B",
239
+ "N-23C3C",
240
+ "N-2ASR",
241
+ "N-2MEF",
242
+ "N-30B-2",
243
+ "N-30D",
244
+ "N-4",
245
+ "N-5",
246
+ "N-54A",
247
+ "N-54C",
248
+ "N-6",
249
+ "N-6F",
250
+ "N-8A",
251
+ "N-8B-2",
252
+ "N-8F",
253
+ "N-8F_NTC",
254
+ "N-8F_ORDR",
255
+ "N-CEN",
256
+ "N-CR",
257
+ "N-CSR",
258
+ "N-CSRS",
259
+ "N-MFP",
260
+ "N-MFP1",
261
+ "N-MFP2",
262
+ "N-PX",
263
+ "N-Q",
264
+ "N-VP",
265
+ "N-VPFS",
266
+ "NO_ACT",
267
+ "NPORT-EX",
268
+ "NPORT-NP",
269
+ "NPORT-P",
270
+ "NRSRO-CE",
271
+ "NRSRO-UPD",
272
+ "NSAR-A",
273
+ "NSAR-AT",
274
+ "NSAR-B",
275
+ "NSAR-BT",
276
+ "NSAR-U",
277
+ "NT_10-D",
278
+ "NT_10-K",
279
+ "NT_10-Q",
280
+ "NT_11-K",
281
+ "NT_20-F",
282
+ "NT_N-CEN",
283
+ "NT_N-MFP",
284
+ "NT_N-MFP1",
285
+ "NT_N-MFP2",
286
+ "NT_NPORT-EX",
287
+ "NT_NPORT-P",
288
+ "NT-NCEN",
289
+ "NT-NCSR",
290
+ "NT-NSAR",
291
+ "NTFNCEN",
292
+ "NTFNCSR",
293
+ "NTFNSAR",
294
+ "NTN_10D",
295
+ "NTN_10K",
296
+ "NTN_10Q",
297
+ "NTN_20F",
298
+ "OIP_NTC",
299
+ "OIP_ORDR",
300
+ "POS_8C",
301
+ "POS_AM",
302
+ "POS_AMI",
303
+ "POS_EX",
304
+ "POS462B",
305
+ "POS462C",
306
+ "POSASR",
307
+ "PRE_14A",
308
+ "PRE_14C",
309
+ "PREC14A",
310
+ "PREC14C",
311
+ "PREM14A",
312
+ "PREM14C",
313
+ "PREN14A",
314
+ "PRER14A",
315
+ "PRER14C",
316
+ "PRRN14A",
317
+ "PX14A6G",
318
+ "PX14A6N",
319
+ "QRTLYRPT",
320
+ "QUALIF",
321
+ "REG-NR",
322
+ "REVOKED",
323
+ "RW",
324
+ "RW_WD",
325
+ "S-1",
326
+ "S-11",
327
+ "S-11MEF",
328
+ "S-1MEF",
329
+ "S-20",
330
+ "S-3",
331
+ "S-3ASR",
332
+ "S-3D",
333
+ "S-3DPOS",
334
+ "S-3MEF",
335
+ "S-4",
336
+ "S-4_POS",
337
+ "S-4EF",
338
+ "S-4MEF",
339
+ "S-6",
340
+ "S-8",
341
+ "S-8_POS",
342
+ "S-B",
343
+ "S-BMEF",
344
+ "SBSE",
345
+ "SBSE-A",
346
+ "SBSE-BD",
347
+ "SBSE-C",
348
+ "SBSE-W",
349
+ "SC_13D",
350
+ "SC_13E1",
351
+ "SC_13E3",
352
+ "SC_13G",
353
+ "SC_14D9",
354
+ "SC_14F1",
355
+ "SC_14N",
356
+ "SC_TO-C",
357
+ "SC_TO-I",
358
+ "SC_TO-T",
359
+ "SC13E4F",
360
+ "SC14D1F",
361
+ "SC14D9C",
362
+ "SC14D9F",
363
+ "SD",
364
+ "SDR",
365
+ "SE",
366
+ "SEC_ACTION",
367
+ "SEC_STAFF_ACTION",
368
+ "SEC_STAFF_LETTER",
369
+ "SF-1",
370
+ "SF-3",
371
+ "SL",
372
+ "SP_15D2",
373
+ "STOP_ORDER",
374
+ "SUPPL",
375
+ "T-3",
376
+ "TA-1",
377
+ "TA-2",
378
+ "TA-W",
379
+ "TACO",
380
+ "TH",
381
+ "TTW",
382
+ "UNDER",
383
+ "UPLOAD",
384
+ "WDL-REQ",
385
+ "X-17A-5",
386
+ ]
387
+
388
+ FORM_LIST = [
389
+ "1",
390
+ "1-A",
391
+ "1-A_POS",
392
+ "1-A-W",
393
+ "1-E",
394
+ "1-E_AD",
395
+ "1-K",
396
+ "1-SA",
397
+ "1-U",
398
+ "1-Z",
399
+ "1-Z-W",
400
+ "10-12B",
401
+ "10-12G",
402
+ "10-D",
403
+ "10-K",
404
+ "10-KT",
405
+ "10-Q",
406
+ "10-QT",
407
+ "11-K",
408
+ "11-KT",
409
+ "13F-HR",
410
+ "13F-NT",
411
+ "13FCONP",
412
+ "144",
413
+ "15-12B",
414
+ "15-12G",
415
+ "15-15D",
416
+ "15F-12B",
417
+ "15F-12G",
418
+ "15F-15D",
419
+ "18-12B",
420
+ "18-K",
421
+ "19B-4E",
422
+ "2-A",
423
+ "2-AF",
424
+ "2-E",
425
+ "20-F",
426
+ "20FR12B",
427
+ "20FR12G",
428
+ "24F-2NT",
429
+ "25",
430
+ "25-NSE",
431
+ "253G1",
432
+ "253G2",
433
+ "253G3",
434
+ "253G4",
435
+ "3",
436
+ "305B2",
437
+ "34-12H",
438
+ "4",
439
+ "40-17F1",
440
+ "40-17F2",
441
+ "40-17G",
442
+ "40-17GCS",
443
+ "40-202A",
444
+ "40-203A",
445
+ "40-206A",
446
+ "40-24B2",
447
+ "40-33",
448
+ "40-6B",
449
+ "40-8B25",
450
+ "40-8F-2",
451
+ "40-APP",
452
+ "40-F",
453
+ "40-OIP",
454
+ "40FR12B",
455
+ "40FR12G",
456
+ "424A",
457
+ "424B1",
458
+ "424B2",
459
+ "424B3",
460
+ "424B4",
461
+ "424B5",
462
+ "424B7",
463
+ "424B8",
464
+ "424H",
465
+ "425",
466
+ "485APOS",
467
+ "485BPOS",
468
+ "485BXT",
469
+ "486APOS",
470
+ "486BPOS",
471
+ "486BXT",
472
+ "487",
473
+ "497",
474
+ "497AD",
475
+ "497H2",
476
+ "497J",
477
+ "497K",
478
+ "497VPI",
479
+ "497VPU",
480
+ "5",
481
+ "6-K",
482
+ "6B_NTC",
483
+ "6B_ORDR",
484
+ "8-A12B",
485
+ "8-A12G",
486
+ "8-K",
487
+ "8-K12B",
488
+ "8-K12G3",
489
+ "8-K15D5",
490
+ "8-M",
491
+ "8F-2_NTC",
492
+ "8F-2_ORDR",
493
+ "9-M",
494
+ "ABS-15G",
495
+ "ABS-EE",
496
+ "ADN-MTL",
497
+ "ADV-E",
498
+ "ADV-H-C",
499
+ "ADV-H-T",
500
+ "ADV-NR",
501
+ "ANNLRPT",
502
+ "APP_NTC",
503
+ "APP_ORDR",
504
+ "APP_WD",
505
+ "APP_WDG",
506
+ "ARS",
507
+ "ATS-N",
508
+ "ATS-N-C",
509
+ "ATS-N/UA",
510
+ "AW",
511
+ "AW_WD",
512
+ "C",
513
+ "C-AR",
514
+ "C-AR-W",
515
+ "C-TR",
516
+ "C-TR-W",
517
+ "C-U",
518
+ "C-U-W",
519
+ "C-W",
520
+ "CB",
521
+ "CERT",
522
+ "CERTARCA",
523
+ "CERTBATS",
524
+ "CERTCBO",
525
+ "CERTNAS",
526
+ "CERTNYS",
527
+ "CERTPAC",
528
+ "CFPORTAL",
529
+ "CFPORTAL-W",
530
+ "CORRESP",
531
+ "CT_ORDER",
532
+ "D",
533
+ "DEF_14A",
534
+ "DEF_14C",
535
+ "DEFA14A",
536
+ "DEFA14C",
537
+ "DEFC14A",
538
+ "DEFC14C",
539
+ "DEFM14A",
540
+ "DEFM14C",
541
+ "DEFN14A",
542
+ "DEFR14A",
543
+ "DEFR14C",
544
+ "DEL_AM",
545
+ "DFAN14A",
546
+ "DFRN14A",
547
+ "DOS",
548
+ "DOSLTR",
549
+ "DRS",
550
+ "DRSLTR",
551
+ "DSTRBRPT",
552
+ "EFFECT",
553
+ "F-1",
554
+ "F-10",
555
+ "F-10EF",
556
+ "F-10POS",
557
+ "F-1MEF",
558
+ "F-3",
559
+ "F-3ASR",
560
+ "F-3D",
561
+ "F-3DPOS",
562
+ "F-3MEF",
563
+ "F-4",
564
+ "F-4_POS",
565
+ "F-4MEF",
566
+ "F-6",
567
+ "F-6_POS",
568
+ "F-6EF",
569
+ "F-7",
570
+ "F-7_POS",
571
+ "F-8",
572
+ "F-8_POS",
573
+ "F-80",
574
+ "F-80POS",
575
+ "F-9",
576
+ "F-9_POS",
577
+ "F-N",
578
+ "F-X",
579
+ "FOCUSN",
580
+ "FWP",
581
+ "G-405",
582
+ "G-405N",
583
+ "G-FIN",
584
+ "G-FINW",
585
+ "IRANNOTICE",
586
+ "MA",
587
+ "MA-A",
588
+ "MA-I",
589
+ "MA-W",
590
+ "MSD",
591
+ "MSDCO",
592
+ "MSDW",
593
+ "N-1",
594
+ "N-14",
595
+ "N-14_8C",
596
+ "N-14MEF",
597
+ "N-18F1",
598
+ "N-1A",
599
+ "N-2",
600
+ "N-2_POSASR",
601
+ "N-23C-2",
602
+ "N-23C3A",
603
+ "N-23C3B",
604
+ "N-23C3C",
605
+ "N-2ASR",
606
+ "N-2MEF",
607
+ "N-30B-2",
608
+ "N-30D",
609
+ "N-4",
610
+ "N-5",
611
+ "N-54A",
612
+ "N-54C",
613
+ "N-6",
614
+ "N-6F",
615
+ "N-8A",
616
+ "N-8B-2",
617
+ "N-8F",
618
+ "N-8F_NTC",
619
+ "N-8F_ORDR",
620
+ "N-CEN",
621
+ "N-CR",
622
+ "N-CSR",
623
+ "N-CSRS",
624
+ "N-MFP",
625
+ "N-MFP1",
626
+ "N-MFP2",
627
+ "N-PX",
628
+ "N-Q",
629
+ "N-VP",
630
+ "N-VPFS",
631
+ "NO_ACT",
632
+ "NPORT-EX",
633
+ "NPORT-NP",
634
+ "NPORT-P",
635
+ "NRSRO-CE",
636
+ "NRSRO-UPD",
637
+ "NSAR-A",
638
+ "NSAR-AT",
639
+ "NSAR-B",
640
+ "NSAR-BT",
641
+ "NSAR-U",
642
+ "NT_10-D",
643
+ "NT_10-K",
644
+ "NT_10-Q",
645
+ "NT_11-K",
646
+ "NT_20-F",
647
+ "NT_N-CEN",
648
+ "NT_N-MFP",
649
+ "NT_N-MFP1",
650
+ "NT_N-MFP2",
651
+ "NT_NPORT-EX",
652
+ "NT_NPORT-P",
653
+ "NT-NCEN",
654
+ "NT-NCSR",
655
+ "NT-NSAR",
656
+ "NTFNCEN",
657
+ "NTFNCSR",
658
+ "NTFNSAR",
659
+ "NTN_10D",
660
+ "NTN_10K",
661
+ "NTN_10Q",
662
+ "NTN_20F",
663
+ "OIP_NTC",
664
+ "OIP_ORDR",
665
+ "POS_8C",
666
+ "POS_AM",
667
+ "POS_AMI",
668
+ "POS_EX",
669
+ "POS462B",
670
+ "POS462C",
671
+ "POSASR",
672
+ "PRE_14A",
673
+ "PRE_14C",
674
+ "PREC14A",
675
+ "PREC14C",
676
+ "PREM14A",
677
+ "PREM14C",
678
+ "PREN14A",
679
+ "PRER14A",
680
+ "PRER14C",
681
+ "PRRN14A",
682
+ "PX14A6G",
683
+ "PX14A6N",
684
+ "QRTLYRPT",
685
+ "QUALIF",
686
+ "REG-NR",
687
+ "REVOKED",
688
+ "RW",
689
+ "RW_WD",
690
+ "S-1",
691
+ "S-11",
692
+ "S-11MEF",
693
+ "S-1MEF",
694
+ "S-20",
695
+ "S-3",
696
+ "S-3ASR",
697
+ "S-3D",
698
+ "S-3DPOS",
699
+ "S-3MEF",
700
+ "S-4",
701
+ "S-4_POS",
702
+ "S-4EF",
703
+ "S-4MEF",
704
+ "S-6",
705
+ "S-8",
706
+ "S-8_POS",
707
+ "S-B",
708
+ "S-BMEF",
709
+ "SBSE",
710
+ "SBSE-A",
711
+ "SBSE-BD",
712
+ "SBSE-C",
713
+ "SBSE-W",
714
+ "SC_13D",
715
+ "SC_13E1",
716
+ "SC_13E3",
717
+ "SC_13G",
718
+ "SC_14D9",
719
+ "SC_14F1",
720
+ "SC_14N",
721
+ "SC_TO-C",
722
+ "SC_TO-I",
723
+ "SC_TO-T",
724
+ "SC13E4F",
725
+ "SC14D1F",
726
+ "SC14D9C",
727
+ "SC14D9F",
728
+ "SD",
729
+ "SDR",
730
+ "SE",
731
+ "SEC_ACTION",
732
+ "SEC_STAFF_ACTION",
733
+ "SEC_STAFF_LETTER",
734
+ "SF-1",
735
+ "SF-3",
736
+ "SL",
737
+ "SP_15D2",
738
+ "STOP_ORDER",
739
+ "SUPPL",
740
+ "T-3",
741
+ "TA-1",
742
+ "TA-2",
743
+ "TA-W",
744
+ "TACO",
745
+ "TH",
746
+ "TTW",
747
+ "UNDER",
748
+ "UPLOAD",
749
+ "WDL-REQ",
750
+ "X-17A-5",
751
+ ]
752
+
753
+ TAXONOMIES = Literal["us-gaap", "dei", "ifrs-full", "srt"]
754
+
755
+ FACTS = [
756
+ "AccountsPayableCurrent",
757
+ "AccountsReceivableNet",
758
+ "AccountsReceivableNetCurrent",
759
+ "AccrualForTaxesOtherThanIncomeTaxesCurrent",
760
+ "AccrualForTaxesOtherThanIncomeTaxesCurrentAndNoncurrent",
761
+ "AccruedIncomeTaxesCurrent",
762
+ "AccruedIncomeTaxesNoncurrent",
763
+ "AccruedInsuranceCurrent",
764
+ "AccruedLiabilitiesCurrent",
765
+ "AccumulatedDepreciationDepletionAndAmortizationPropertyPlantAndEquipment",
766
+ "AccumulatedOtherComprehensiveIncomeLossNetOfTax",
767
+ "AcquisitionsNetOfCashAcquiredAndPurchasesOfIntangibleAndOtherAssets",
768
+ "AdvertisingExpense",
769
+ "AdjustmentsToAdditionalPaidInCapitalSharebasedCompensationRequisiteServicePeriodRecognitionValue",
770
+ "AllocatedShareBasedCompensationExpense",
771
+ "AntidilutiveSecuritiesExcludedFromComputationOfEarningsPerShareAmount",
772
+ "Assets",
773
+ "AssetsCurrent",
774
+ "AssetsNoncurrent",
775
+ "NoncurrentAssets",
776
+ "AssetImpairmentCharges",
777
+ "BuildingsAndImprovementsGross",
778
+ "CapitalLeaseObligationsCurrent",
779
+ "CapitalLeaseObligationsNoncurrent",
780
+ "Cash",
781
+ "CashAndCashEquivalentsAtCarryingValue",
782
+ "CashCashEquivalentsAndShortTermInvestments",
783
+ "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents",
784
+ "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalentsIncludingDisposalGroupAndDiscontinuedOperations",
785
+ "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalentsPeriodIncreaseDecreaseIncludingExchangeRateEffect",
786
+ "CommitmentsAndContingencies",
787
+ "CommercialPaper",
788
+ "CommonStockDividendsPerShareDeclared",
789
+ "CommonStockDividendsPerShareCashPaid",
790
+ "CommonStocksIncludingAdditionalPaidInCapital",
791
+ "ComprehensiveIncomeNetOfTax",
792
+ "ComprehensiveIncomeNetOfTaxAttributableToNoncontrollingInterest",
793
+ "ComprehensiveIncomeNetOfTaxIncludingPortionAttributableToNoncontrollingInterest",
794
+ "ConstructionInProgressGross",
795
+ "ContractWithCustomerAssetNet",
796
+ "ContractWithCustomerLiability",
797
+ "ContractWithCustomerLiabilityCurrent",
798
+ "ContractWithCustomerLiabilityNoncurrent",
799
+ "CostOfRevenue",
800
+ "CostOfGoodsAndServicesSold",
801
+ "CurrentFederalTaxExpenseBenefit",
802
+ "CurrentForeignTaxExpenseBenefit",
803
+ "CurrentIncomeTaxExpenseBenefit",
804
+ "CurrentStateAndLocalTaxExpenseBenefit",
805
+ "DebtInstrumentFaceAmount",
806
+ "DebtInstrumentFairValue",
807
+ "DebtLongtermAndShorttermCombinedAmount",
808
+ "DeferredFederalIncomeTaxExpenseBenefit",
809
+ "DeferredForeignIncomeTaxExpenseBenefit",
810
+ "DeferredIncomeTaxExpenseBenefit",
811
+ "DeferredIncomeTaxesAndTaxCredits",
812
+ "DeferredIncomeTaxLiabilities",
813
+ "DeferredIncomeTaxLiabilitiesNet",
814
+ "DeferredRevenue",
815
+ "DeferredTaxAssetsGross",
816
+ "DeferredTaxAssetsLiabilitiesNet",
817
+ "DeferredTaxAssetsNet",
818
+ "DeferredTaxLiabilities",
819
+ "DefinedContributionPlanCostRecognized",
820
+ "Depreciation",
821
+ "DepreciationAmortizationAndAccretionNet",
822
+ "DepreciationAmortizationAndOther",
823
+ "DepreciationAndAmortization",
824
+ "DepreciationDepletionAndAmortization",
825
+ "DerivativeCollateralObligationToReturnCash",
826
+ "DerivativeCollateralRightToReclaimCash",
827
+ "DerivativeFairValueOfDerivativeNet",
828
+ "DerivativeLiabilityCollateralRightToReclaimCashOffset",
829
+ "DerivativeNotionalAmount",
830
+ "Dividends",
831
+ "DividendsCash",
832
+ "DividendsPayableAmountPerShare",
833
+ "DividendsPayableCurrent",
834
+ "DistributedEarnings",
835
+ "EarningsPerShareBasic",
836
+ "EarningsPerShareDiluted",
837
+ "EffectOfExchangeRateOnCashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents",
838
+ "EffectOfExchangeRateOnCashCashEquivalentsRestrictedCashAndRestrictedCashEquivalentsIncludingDisposalGroupAndDiscontinuedOperations",
839
+ "EmployeeRelatedLiabilitiesCurrent",
840
+ "EmployeeRelatedLiabilitiesCurrentAndNoncurrent",
841
+ "EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense",
842
+ "FinanceLeaseInterestExpense",
843
+ "FinanceLeaseInterestPaymentOnLiability",
844
+ "FinanceLeaseLiability",
845
+ "FinanceLeaseLiabilityCurrent",
846
+ "FinanceLeaseLiabilityNoncurrent",
847
+ "FinanceLeaseLiabilityPaymentsDue",
848
+ "FinanceLeaseLiabilityPaymentsDueAfterYearFive",
849
+ "FinanceLeaseLiabilityPaymentsDueNextTwelveMonths",
850
+ "FinanceLeaseLiabilityPaymentsDueYearFive",
851
+ "FinanceLeaseLiabilityPaymentsDueYearFour",
852
+ "FinanceLeaseLiabilityPaymentsDueYearThree",
853
+ "FinanceLeaseLiabilityPaymentsDueYearTwo",
854
+ "FinanceLeaseLiabilityPaymentsRemainderOfFiscalYear",
855
+ "FinanceLeaseLiabilityUndiscountedExcessAmount",
856
+ "FinanceLeasePrincipalPayments",
857
+ "FinanceLeaseRightOfUseAsset",
858
+ "FinancingReceivableAllowanceForCreditLosses",
859
+ "FiniteLivedIntangibleAssetsNet",
860
+ "FixturesAndEquipmentGross",
861
+ "GainLossOnInvestments",
862
+ "GainLossOnInvestmentsAndDerivativeInstruments",
863
+ "GainLossOnSaleOfBusiness",
864
+ "GainsLossesOnExtinguishmentOfDebt",
865
+ "GeneralAndAdministrativeExpense",
866
+ "Goodwill",
867
+ "GrossProfit",
868
+ "ImpairmentOfIntangibleAssetsExcludingGoodwill",
869
+ "ImpairmentOfIntangibleAssetsIndefinitelivedExcludingGoodwill",
870
+ "IncomeLossFromContinuingOperations",
871
+ "IncomeLossFromContinuingOperationsAttributableToNoncontrollingEntity",
872
+ "IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest",
873
+ "IncomeLossFromContinuingOperationsPerBasicShare",
874
+ "IncomeLossFromContinuingOperationsPerDilutedShare",
875
+ "InterestAndDebtExpense",
876
+ "IncomeTaxExpenseBenefit",
877
+ "IncomeTaxesPaid",
878
+ "IncomeTaxesPaidNet",
879
+ "IncreaseDecreaseInAccountsAndOtherReceivables",
880
+ "IncreaseDecreaseInAccountsPayable",
881
+ "IncreaseDecreaseInAccountsReceivable",
882
+ "IncreaseDecreaseInAccruedLiabilities",
883
+ "IncreaseDecreaseInAccruedIncomeTaxesPayable",
884
+ "IncreaseDecreaseInAccruedTaxesPayable",
885
+ "IncreaseDecreaseInContractWithCustomerLiability",
886
+ "IncreaseDecreaseInDeferredIncomeTaxes",
887
+ "IncreaseDecreaseInInventories",
888
+ "IncreaseDecreaseInOtherCurrentAssets",
889
+ "IncreaseDecreaseInOtherCurrentLiabilities",
890
+ "IncreaseDecreaseInOtherNoncurrentAssets",
891
+ "IncreaseDecreaseInOtherNoncurrentLiabilities",
892
+ "IncreaseDecreaseInPensionPlanObligations",
893
+ "IncrementalCommonSharesAttributableToShareBasedPaymentArrangements",
894
+ "InterestExpenseDebt",
895
+ "InterestIncomeExpenseNet",
896
+ "InterestPaid",
897
+ "InterestPaidNet",
898
+ "InventoryNet",
899
+ "InvestmentIncomeInterest",
900
+ "Land",
901
+ "LeaseAndRentalExpense",
902
+ "LesseeOperatingLeaseLiabilityPaymentsDue",
903
+ "LesseeOperatingLeaseLiabilityPaymentsDueAfterYearFive",
904
+ "LesseeOperatingLeaseLiabilityPaymentsDueNextTwelveMonths",
905
+ "LesseeOperatingLeaseLiabilityPaymentsDueYearFive",
906
+ "LesseeOperatingLeaseLiabilityPaymentsDueYearFour",
907
+ "LesseeOperatingLeaseLiabilityPaymentsDueYearThree",
908
+ "LesseeOperatingLeaseLiabilityPaymentsDueYearTwo",
909
+ "LesseeOperatingLeaseLiabilityPaymentsRemainderOfFiscalYear",
910
+ "LettersOfCreditOutstandingAmount",
911
+ "Liabilities",
912
+ "LiabilitiesAndStockholdersEquity",
913
+ "LiabilitiesCurrent",
914
+ "LineOfCredit",
915
+ "LineOfCreditFacilityMaximumBorrowingCapacity",
916
+ "LongTermDebt",
917
+ "LongTermDebtCurrent",
918
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalAfterYearFive",
919
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalInNextTwelveMonths", # pragma: allowlist secret
920
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearFive",
921
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearFour",
922
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearThree",
923
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearTwo",
924
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalRemainderOfFiscalYear",
925
+ "LongTermDebtNoncurrent",
926
+ "LongTermInvestments",
927
+ "LossContingencyEstimateOfPossibleLoss",
928
+ "MachineryAndEquipmentGross",
929
+ "MarketableSecuritiesCurrent",
930
+ "MarketableSecuritiesNoncurrent",
931
+ "MinorityInterest",
932
+ "NetCashProvidedByUsedInFinancingActivities",
933
+ "NetCashProvidedByUsedInInvestingActivities",
934
+ "NetCashProvidedByUsedInOperatingActivities",
935
+ "NetIncomeLoss",
936
+ "NetIncomeLossAttributableToNoncontrollingInterest",
937
+ "NetIncomeLossAttributableToNonredeemableNoncontrollingInterest",
938
+ "NetIncomeLossAttributableToRedeemableNoncontrollingInterest",
939
+ "NoncurrentAssets",
940
+ "NonoperatingIncomeExpense",
941
+ "NoninterestIncome",
942
+ "NotesReceivableNet",
943
+ "OperatingExpenses",
944
+ "OperatingIncomeLoss",
945
+ "OperatingLeaseCost",
946
+ "OperatingLeaseLiability",
947
+ "OperatingLeaseLiabilityCurrent",
948
+ "OperatingLeaseLiabilityNoncurrent",
949
+ "OperatingLeaseRightOfUseAsset",
950
+ "OtherAccruedLiabilitiesCurrent",
951
+ "OtherAssetsCurrent",
952
+ "OtherAssetsNoncurrent",
953
+ "OtherComprehensiveIncomeLossAvailableForSaleSecuritiesAdjustmentNetOfTax",
954
+ "OtherComprehensiveIncomeLossCashFlowHedgeGainLossAfterReclassificationAndTax",
955
+ "OtherComprehensiveIncomeLossDerivativeInstrumentGainLossafterReclassificationandTax",
956
+ "OtherComprehensiveIncomeLossDerivativeInstrumentGainLossbeforeReclassificationafterTax",
957
+ "OtherComprehensiveIncomeLossForeignCurrencyTransactionAndTranslationAdjustmentNetOfTax",
958
+ "OtherComprehensiveIncomeLossNetOfTax",
959
+ "OtherComprehensiveIncomeLossNetOfTaxPortionAttributableToParent",
960
+ "OtherComprehensiveIncomeUnrealizedHoldingGainLossOnSecuritiesArisingDuringPeriodNetOfTax",
961
+ "OtherIncome",
962
+ "OtherLiabilitiesCurrent",
963
+ "OtherLiabilitiesNoncurrent",
964
+ "OtherLongTermDebt",
965
+ "OtherNoncashIncomeExpense",
966
+ "PaymentsForCapitalImprovements",
967
+ "PaymentsOfDividends",
968
+ "PaymentsOfDividendsMinorityInterest",
969
+ "PaymentsForProceedsFromBusinessesAndInterestInAffiliates",
970
+ "PaymentsForProceedsFromOtherInvestingActivities",
971
+ "PaymentsForRent",
972
+ "PaymentsForRepurchaseOfCommonStock",
973
+ "PaymentsOfDebtExtinguishmentCosts",
974
+ "PaymentsToAcquireInvestments",
975
+ "PaymentsToAcquirePropertyPlantAndEquipment",
976
+ "PreferredStockSharesOutstanding",
977
+ "PreferredStockValue",
978
+ "PrepaidExpenseAndOtherAssetsCurrent",
979
+ "PrepaidExpenseCurrent",
980
+ "ProceedsFromDebtMaturingInMoreThanThreeMonths",
981
+ "ProceedsFromDebtNetOfIssuanceCosts",
982
+ "ProceedsFromDivestitureOfBusinesses",
983
+ "ProceedsFromInvestments",
984
+ "ProceedsFromIssuanceOfCommonStock",
985
+ "ProceedsFromIssuanceOfDebt",
986
+ "ProceedsFromIssuanceOfLongTermDebt",
987
+ "ProceedsFromIssuanceOfUnsecuredDebt",
988
+ "ProceedsFromIssuanceOrSaleOfEquity",
989
+ "ProceedsFromMaturitiesPrepaymentsAndCallsOfAvailableForSaleSecurities",
990
+ "ProceedsFromPaymentsForOtherFinancingActivities",
991
+ "ProceedsFromPaymentsToMinorityShareholders",
992
+ "ProceedsFromRepaymentsOfShortTermDebt",
993
+ "ProceedsFromRepaymentsOfShortTermDebtMaturingInThreeMonthsOrLess",
994
+ "ProceedsFromSaleOfPropertyPlantAndEquipment",
995
+ "ProceedsFromStockOptionsExercised",
996
+ "ProfitLoss",
997
+ "PropertyPlantAndEquipmentGross",
998
+ "PropertyPlantAndEquipmentNet",
999
+ "ReceivablesNetCurrent",
1000
+ "RedeemableNoncontrollingInterestEquityCarryingAmount",
1001
+ "RepaymentsOfDebtMaturingInMoreThanThreeMonths",
1002
+ "RepaymentsOfLongTermDebt",
1003
+ "ResearchAndDevelopmentExpense",
1004
+ "RestrictedCash",
1005
+ "RestrictedCashAndCashEquivalents",
1006
+ "RestrictedStockExpense",
1007
+ "RestructuringCharges",
1008
+ "RetainedEarningsAccumulatedDeficit",
1009
+ "Revenues",
1010
+ "RevenueFromContractWithCustomerExcludingAssessedTax",
1011
+ "SecuredLongTermDebt",
1012
+ "SellingAndMarketingExpense",
1013
+ "SellingGeneralAndAdministrativeExpense",
1014
+ "ShareBasedCompensation",
1015
+ "ShortTermBorrowings",
1016
+ "ShortTermInvestments",
1017
+ "StockholdersEquity",
1018
+ "StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest",
1019
+ "StockholdersEquityOther",
1020
+ "StockIssuedDuringPeriodValueNewIssues",
1021
+ "StockOptionPlanExpense",
1022
+ "StockRedeemedOrCalledDuringPeriodValue",
1023
+ "StockRepurchasedDuringPeriodValue",
1024
+ "StockRepurchasedAndRetiredDuringPeriodValue",
1025
+ "TaxesPayableCurrent",
1026
+ "TradingSecuritiesDebt",
1027
+ "TreasuryStockAcquiredAverageCostPerShare",
1028
+ "TreasuryStockSharesAcquired",
1029
+ "UnrealizedGainLossOnInvestments",
1030
+ "UnrecognizedTaxBenefits",
1031
+ "UnsecuredDebt",
1032
+ "VariableLeaseCost",
1033
+ "WeightedAverageNumberOfDilutedSharesOutstanding",
1034
+ "WeightedAverageNumberOfSharesOutstandingBasic",
1035
+ "WeightedAverageNumberDilutedSharesOutstandingAdjustment",
1036
+ ]
1037
+
1038
+ USD_PER_SHARE_FACTS = [
1039
+ "EarningsPerShareBasic",
1040
+ "EarningsPerShareDiluted",
1041
+ "TreasuryStockAcquiredAverageCostPerShare",
1042
+ "CommonStockDividendsPerShareDeclared",
1043
+ "CommonStockDividendsPerShareCashPaid",
1044
+ "DividendsPayableAmountPerShare",
1045
+ "IncomeLossFromContinuingOperationsPerBasicShare",
1046
+ "IncomeLossFromContinuingOperationsPerDilutedShare",
1047
+ ]
1048
+
1049
+ SHARES_FACTS = [
1050
+ "WeightedAverageNumberOfDilutedSharesOutstanding",
1051
+ "WeightedAverageNumberOfSharesOutstandingBasic",
1052
+ "WeightedAverageNumberDilutedSharesOutstandingAdjustment",
1053
+ "AntidilutiveSecuritiesExcludedFromComputationOfEarningsPerShareAmount",
1054
+ "IncrementalCommonSharesAttributableToShareBasedPaymentArrangements",
1055
+ "TreasuryStockSharesAcquired",
1056
+ "PreferredStockSharesOutstanding",
1057
+ ]
1058
+
1059
+ FACT_CHOICES = Literal[ # pylint: disable=C0103
1060
+ "AccountsPayableCurrent",
1061
+ "AccountsReceivableNet",
1062
+ "AccountsReceivableNetCurrent",
1063
+ "AccrualForTaxesOtherThanIncomeTaxesCurrent",
1064
+ "AccrualForTaxesOtherThanIncomeTaxesCurrentAndNoncurrent",
1065
+ "AccruedIncomeTaxesCurrent",
1066
+ "AccruedIncomeTaxesNoncurrent",
1067
+ "AccruedInsuranceCurrent",
1068
+ "AccruedLiabilitiesCurrent",
1069
+ "AccumulatedDepreciationDepletionAndAmortizationPropertyPlantAndEquipment",
1070
+ "AccumulatedOtherComprehensiveIncomeLossNetOfTax",
1071
+ "AcquisitionsNetOfCashAcquiredAndPurchasesOfIntangibleAndOtherAssets",
1072
+ "AdjustmentsToAdditionalPaidInCapitalSharebasedCompensationRequisiteServicePeriodRecognitionValue",
1073
+ "AdvertisingExpense",
1074
+ "AllocatedShareBasedCompensationExpense",
1075
+ "AntidilutiveSecuritiesExcludedFromComputationOfEarningsPerShareAmount",
1076
+ "Assets",
1077
+ "AssetsCurrent",
1078
+ "AssetsNoncurrent",
1079
+ "NoncurrentAssets",
1080
+ "AssetImpairmentCharges",
1081
+ "BuildingsAndImprovementsGross",
1082
+ "CapitalLeaseObligationsCurrent",
1083
+ "CapitalLeaseObligationsNoncurrent",
1084
+ "Cash",
1085
+ "CashAndCashEquivalentsAtCarryingValue",
1086
+ "CashCashEquivalentsAndShortTermInvestments",
1087
+ "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents",
1088
+ "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalentsIncludingDisposalGroupAndDiscontinuedOperations",
1089
+ "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalentsPeriodIncreaseDecreaseIncludingExchangeRateEffect",
1090
+ "CommitmentsAndContingencies",
1091
+ "CommercialPaper",
1092
+ "CommonStockDividendsPerShareDeclared",
1093
+ "CommonStockDividendsPerShareCashPaid",
1094
+ "CommonStocksIncludingAdditionalPaidInCapital",
1095
+ "ComprehensiveIncomeNetOfTax",
1096
+ "ComprehensiveIncomeNetOfTaxAttributableToNoncontrollingInterest",
1097
+ "ComprehensiveIncomeNetOfTaxIncludingPortionAttributableToNoncontrollingInterest",
1098
+ "ConstructionInProgressGross",
1099
+ "ContractWithCustomerAssetNet",
1100
+ "ContractWithCustomerLiability",
1101
+ "ContractWithCustomerLiabilityCurrent",
1102
+ "ContractWithCustomerLiabilityNoncurrent",
1103
+ "CostOfRevenue",
1104
+ "CostOfGoodsAndServicesSold",
1105
+ "CurrentFederalTaxExpenseBenefit",
1106
+ "CurrentForeignTaxExpenseBenefit",
1107
+ "CurrentIncomeTaxExpenseBenefit",
1108
+ "CurrentStateAndLocalTaxExpenseBenefit",
1109
+ "DebtInstrumentFaceAmount",
1110
+ "DebtInstrumentFairValue",
1111
+ "DebtLongtermAndShorttermCombinedAmount",
1112
+ "DeferredFederalIncomeTaxExpenseBenefit",
1113
+ "DeferredForeignIncomeTaxExpenseBenefit",
1114
+ "DeferredIncomeTaxExpenseBenefit",
1115
+ "DeferredIncomeTaxesAndTaxCredits",
1116
+ "DeferredIncomeTaxLiabilities",
1117
+ "DeferredIncomeTaxLiabilitiesNet",
1118
+ "DeferredRevenue",
1119
+ "DeferredTaxAssetsGross",
1120
+ "DeferredTaxAssetsLiabilitiesNet",
1121
+ "DeferredTaxAssetsNet",
1122
+ "DeferredTaxLiabilities",
1123
+ "DefinedContributionPlanCostRecognized",
1124
+ "Depreciation",
1125
+ "DepreciationAmortizationAndAccretionNet",
1126
+ "DepreciationAmortizationAndOther",
1127
+ "DepreciationAndAmortization",
1128
+ "DepreciationDepletionAndAmortization",
1129
+ "DerivativeCollateralObligationToReturnCash",
1130
+ "DerivativeCollateralRightToReclaimCash",
1131
+ "DerivativeFairValueOfDerivativeNet",
1132
+ "DerivativeLiabilityCollateralRightToReclaimCashOffset",
1133
+ "DerivativeNotionalAmount",
1134
+ "Dividends",
1135
+ "DividendsCash",
1136
+ "DividendsPayableAmountPerShare",
1137
+ "DividendsPayableCurrent",
1138
+ "DistributedEarnings",
1139
+ "EarningsPerShareBasic",
1140
+ "EarningsPerShareDiluted",
1141
+ "EffectOfExchangeRateOnCashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents",
1142
+ "EffectOfExchangeRateOnCashCashEquivalentsRestrictedCashAndRestrictedCashEquivalentsIncludingDisposalGroupAndDiscontinuedOperations",
1143
+ "EmployeeRelatedLiabilitiesCurrent",
1144
+ "EmployeeRelatedLiabilitiesCurrentAndNoncurrent",
1145
+ "EmployeeServiceShareBasedCompensationTaxBenefitFromCompensationExpense",
1146
+ "FinanceLeaseInterestExpense",
1147
+ "FinanceLeaseInterestPaymentOnLiability",
1148
+ "FinanceLeaseLiability",
1149
+ "FinanceLeaseLiabilityCurrent",
1150
+ "FinanceLeaseLiabilityNoncurrent",
1151
+ "FinanceLeaseLiabilityPaymentsDue",
1152
+ "FinanceLeaseLiabilityPaymentsDueAfterYearFive",
1153
+ "FinanceLeaseLiabilityPaymentsDueNextTwelveMonths",
1154
+ "FinanceLeaseLiabilityPaymentsDueYearFive",
1155
+ "FinanceLeaseLiabilityPaymentsDueYearFour",
1156
+ "FinanceLeaseLiabilityPaymentsDueYearThree",
1157
+ "FinanceLeaseLiabilityPaymentsDueYearTwo",
1158
+ "FinanceLeaseLiabilityPaymentsRemainderOfFiscalYear",
1159
+ "FinanceLeaseLiabilityUndiscountedExcessAmount",
1160
+ "FinanceLeasePrincipalPayments",
1161
+ "FinanceLeaseRightOfUseAsset",
1162
+ "FinancingReceivableAllowanceForCreditLosses",
1163
+ "FiniteLivedIntangibleAssetsNet",
1164
+ "FixturesAndEquipmentGross",
1165
+ "GainLossOnInvestments",
1166
+ "GainLossOnInvestmentsAndDerivativeInstruments",
1167
+ "GainLossOnSaleOfBusiness",
1168
+ "GainsLossesOnExtinguishmentOfDebt",
1169
+ "GeneralAndAdministrativeExpense",
1170
+ "Goodwill",
1171
+ "GrossProfit",
1172
+ "ImpairmentOfIntangibleAssetsExcludingGoodwill",
1173
+ "ImpairmentOfIntangibleAssetsIndefinitelivedExcludingGoodwill",
1174
+ "IncomeLossFromContinuingOperations",
1175
+ "IncomeLossFromContinuingOperationsAttributableToNoncontrollingEntity",
1176
+ "IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest",
1177
+ "IncomeLossFromContinuingOperationsPerBasicShare",
1178
+ "IncomeLossFromContinuingOperationsPerDilutedShare",
1179
+ "InterestAndDebtExpense",
1180
+ "IncomeTaxExpenseBenefit",
1181
+ "IncomeTaxesPaid",
1182
+ "IncomeTaxesPaidNet",
1183
+ "IncreaseDecreaseInAccountsAndOtherReceivables",
1184
+ "IncreaseDecreaseInAccountsPayable",
1185
+ "IncreaseDecreaseInAccountsReceivable",
1186
+ "IncreaseDecreaseInAccruedLiabilities",
1187
+ "IncreaseDecreaseInAccruedIncomeTaxesPayable",
1188
+ "IncreaseDecreaseInAccruedTaxesPayable",
1189
+ "IncreaseDecreaseInContractWithCustomerLiability",
1190
+ "IncreaseDecreaseInDeferredIncomeTaxes",
1191
+ "IncreaseDecreaseInInventories",
1192
+ "IncreaseDecreaseInOtherCurrentAssets",
1193
+ "IncreaseDecreaseInOtherCurrentLiabilities",
1194
+ "IncreaseDecreaseInOtherNoncurrentAssets",
1195
+ "IncreaseDecreaseInOtherNoncurrentLiabilities",
1196
+ "IncreaseDecreaseInPensionPlanObligations",
1197
+ "IncrementalCommonSharesAttributableToShareBasedPaymentArrangements",
1198
+ "InterestExpenseDebt",
1199
+ "InterestIncomeExpenseNet",
1200
+ "InterestPaid",
1201
+ "InterestPaidNet",
1202
+ "InventoryNet",
1203
+ "InvestmentIncomeInterest",
1204
+ "Land",
1205
+ "LeaseAndRentalExpense",
1206
+ "LesseeOperatingLeaseLiabilityPaymentsDue",
1207
+ "LesseeOperatingLeaseLiabilityPaymentsDueAfterYearFive",
1208
+ "LesseeOperatingLeaseLiabilityPaymentsDueNextTwelveMonths",
1209
+ "LesseeOperatingLeaseLiabilityPaymentsDueYearFive",
1210
+ "LesseeOperatingLeaseLiabilityPaymentsDueYearFour",
1211
+ "LesseeOperatingLeaseLiabilityPaymentsDueYearThree",
1212
+ "LesseeOperatingLeaseLiabilityPaymentsDueYearTwo",
1213
+ "LesseeOperatingLeaseLiabilityPaymentsRemainderOfFiscalYear",
1214
+ "LettersOfCreditOutstandingAmount",
1215
+ "Liabilities",
1216
+ "LiabilitiesAndStockholdersEquity",
1217
+ "LiabilitiesCurrent",
1218
+ "LineOfCredit",
1219
+ "LineOfCreditFacilityMaximumBorrowingCapacity",
1220
+ "LongTermDebt",
1221
+ "LongTermDebtCurrent",
1222
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalAfterYearFive",
1223
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalInNextTwelveMonths",
1224
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearFive",
1225
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearFour",
1226
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearThree",
1227
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalInYearTwo",
1228
+ "LongTermDebtMaturitiesRepaymentsOfPrincipalRemainderOfFiscalYear",
1229
+ "LongTermDebtNoncurrent",
1230
+ "LongTermInvestments",
1231
+ "LossContingencyEstimateOfPossibleLoss",
1232
+ "MachineryAndEquipmentGross",
1233
+ "MarketableSecuritiesCurrent",
1234
+ "MarketableSecuritiesNoncurrent",
1235
+ "MinorityInterest",
1236
+ "NetCashProvidedByUsedInFinancingActivities",
1237
+ "NetCashProvidedByUsedInInvestingActivities",
1238
+ "NetCashProvidedByUsedInOperatingActivities",
1239
+ "NetIncomeLoss",
1240
+ "NetIncomeLossAttributableToNoncontrollingInterest",
1241
+ "NetIncomeLossAttributableToNonredeemableNoncontrollingInterest",
1242
+ "NetIncomeLossAttributableToRedeemableNoncontrollingInterest",
1243
+ "NoncurrentAssets",
1244
+ "NonoperatingIncomeExpense",
1245
+ "NoninterestIncome",
1246
+ "NotesReceivableNet",
1247
+ "OperatingExpenses",
1248
+ "OperatingIncomeLoss",
1249
+ "OperatingLeaseCost",
1250
+ "OperatingLeaseLiability",
1251
+ "OperatingLeaseLiabilityCurrent",
1252
+ "OperatingLeaseLiabilityNoncurrent",
1253
+ "OperatingLeaseRightOfUseAsset",
1254
+ "OtherAccruedLiabilitiesCurrent",
1255
+ "OtherAssetsCurrent",
1256
+ "OtherAssetsNoncurrent",
1257
+ "OtherComprehensiveIncomeLossAvailableForSaleSecuritiesAdjustmentNetOfTax",
1258
+ "OtherComprehensiveIncomeLossCashFlowHedgeGainLossAfterReclassificationAndTax",
1259
+ "OtherComprehensiveIncomeLossDerivativeInstrumentGainLossafterReclassificationandTax",
1260
+ "OtherComprehensiveIncomeLossDerivativeInstrumentGainLossbeforeReclassificationafterTax",
1261
+ "OtherComprehensiveIncomeLossForeignCurrencyTransactionAndTranslationAdjustmentNetOfTax",
1262
+ "OtherComprehensiveIncomeLossNetOfTax",
1263
+ "OtherComprehensiveIncomeLossNetOfTaxPortionAttributableToParent",
1264
+ "OtherComprehensiveIncomeUnrealizedHoldingGainLossOnSecuritiesArisingDuringPeriodNetOfTax",
1265
+ "OtherIncome",
1266
+ "OtherLiabilitiesCurrent",
1267
+ "OtherLiabilitiesNoncurrent",
1268
+ "OtherLongTermDebt",
1269
+ "OtherNoncashIncomeExpense",
1270
+ "PaymentsForCapitalImprovements",
1271
+ "PaymentsOfDividends",
1272
+ "PaymentsOfDividendsMinorityInterest",
1273
+ "PaymentsForProceedsFromBusinessesAndInterestInAffiliates",
1274
+ "PaymentsForProceedsFromOtherInvestingActivities",
1275
+ "PaymentsForRent",
1276
+ "PaymentsForRepurchaseOfCommonStock",
1277
+ "PaymentsOfDebtExtinguishmentCosts",
1278
+ "PaymentsToAcquireInvestments",
1279
+ "PaymentsToAcquirePropertyPlantAndEquipment",
1280
+ "PreferredStockSharesOutstanding",
1281
+ "PreferredStockValue",
1282
+ "PrepaidExpenseAndOtherAssetsCurrent",
1283
+ "PrepaidExpenseCurrent",
1284
+ "ProceedsFromDebtMaturingInMoreThanThreeMonths",
1285
+ "ProceedsFromDebtNetOfIssuanceCosts",
1286
+ "ProceedsFromDivestitureOfBusinesses",
1287
+ "ProceedsFromInvestments",
1288
+ "ProceedsFromIssuanceOfCommonStock",
1289
+ "ProceedsFromIssuanceOfDebt",
1290
+ "ProceedsFromIssuanceOfLongTermDebt",
1291
+ "ProceedsFromIssuanceOfUnsecuredDebt",
1292
+ "ProceedsFromIssuanceOrSaleOfEquity",
1293
+ "ProceedsFromMaturitiesPrepaymentsAndCallsOfAvailableForSaleSecurities",
1294
+ "ProceedsFromPaymentsForOtherFinancingActivities",
1295
+ "ProceedsFromPaymentsToMinorityShareholders",
1296
+ "ProceedsFromRepaymentsOfShortTermDebt",
1297
+ "ProceedsFromRepaymentsOfShortTermDebtMaturingInThreeMonthsOrLess",
1298
+ "ProceedsFromSaleOfPropertyPlantAndEquipment",
1299
+ "ProceedsFromStockOptionsExercised",
1300
+ "ProfitLoss",
1301
+ "PropertyPlantAndEquipmentGross",
1302
+ "PropertyPlantAndEquipmentNet",
1303
+ "ReceivablesNetCurrent",
1304
+ "RedeemableNoncontrollingInterestEquityCarryingAmount",
1305
+ "RepaymentsOfDebtMaturingInMoreThanThreeMonths",
1306
+ "RepaymentsOfLongTermDebt",
1307
+ "ResearchAndDevelopmentExpense",
1308
+ "RestrictedCash",
1309
+ "RestrictedCashAndCashEquivalents",
1310
+ "RestrictedStockExpense",
1311
+ "RestructuringCharges",
1312
+ "RetainedEarningsAccumulatedDeficit",
1313
+ "Revenues",
1314
+ "RevenueFromContractWithCustomerExcludingAssessedTax",
1315
+ "SecuredLongTermDebt",
1316
+ "SellingAndMarketingExpense",
1317
+ "SellingGeneralAndAdministrativeExpense",
1318
+ "ShareBasedCompensation",
1319
+ "ShortTermBorrowings",
1320
+ "ShortTermInvestments",
1321
+ "StockholdersEquity",
1322
+ "StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest",
1323
+ "StockholdersEquityOther",
1324
+ "StockIssuedDuringPeriodValueNewIssues",
1325
+ "StockOptionPlanExpense",
1326
+ "StockRedeemedOrCalledDuringPeriodValue",
1327
+ "StockRepurchasedDuringPeriodValue",
1328
+ "StockRepurchasedAndRetiredDuringPeriodValue",
1329
+ "TaxesPayableCurrent",
1330
+ "TradingSecuritiesDebt",
1331
+ "TreasuryStockAcquiredAverageCostPerShare",
1332
+ "TreasuryStockSharesAcquired",
1333
+ "UnrealizedGainLossOnInvestments",
1334
+ "UnrecognizedTaxBenefits",
1335
+ "UnsecuredDebt",
1336
+ "VariableLeaseCost",
1337
+ "WeightedAverageNumberOfDilutedSharesOutstanding",
1338
+ "WeightedAverageNumberOfSharesOutstandingBasic",
1339
+ "WeightedAverageNumberDilutedSharesOutstandingAdjustment",
1340
+ ]
1341
+
1342
+ FISCAL_PERIODS = Literal["fy", "q1", "q2", "q3", "q4"] # pylint: disable=C0103
1343
+
1344
+ FISCAL_PERIODS_DICT = {
1345
+ "fy": None,
1346
+ "q1": 1,
1347
+ "q2": 2,
1348
+ "q3": 3,
1349
+ "q4": 4,
1350
+ }
openbb_platform/providers/sec/openbb_sec/utils/form4.py ADDED
@@ -0,0 +1,657 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Module for handling Form 4 data, by company, from the SEC."""
2
+
3
+ import logging
4
+ from datetime import date as dateType
5
+ from typing import Optional
6
+
7
+ from openbb_core.app.model.abstract.error import OpenBBError
8
+
9
+ SEC_HEADERS: dict[str, str] = {
10
+ "User-Agent": "Jesus Window Washing jesus@stainedglass.com",
11
+ "Accept-Encoding": "gzip, deflate",
12
+ "Host": "www.sec.gov",
13
+ }
14
+
15
+ field_map = {
16
+ "filing_date": "filing_date",
17
+ "symbol": "symbol",
18
+ "form": "form",
19
+ "owner": "owner_name",
20
+ "owner_cik": "owner_cik",
21
+ "issuer": "company_name",
22
+ "issuer_cik": "company_cik",
23
+ "isDirector": "director",
24
+ "isOfficer": "officer",
25
+ "isTenPercentOwner": "ten_percent_owner",
26
+ "isOther": "other",
27
+ "otherText": "other_text",
28
+ "officerTitle": "owner_title",
29
+ "securityTitle": "security_type",
30
+ "transactionDate": "transaction_date",
31
+ "footnote": "footnote",
32
+ "transactionShares": "securities_transacted",
33
+ "transactionPricePerShare": "transaction_price",
34
+ "transactionTotalValue": "transaction_value",
35
+ "transactionCode": "transaction_type",
36
+ "transactionAcquiredDisposedCode": "acquisition_or_disposition",
37
+ "sharesOwnedFollowingTransaction": "securities_owned",
38
+ "valueOwnedFollowingTransaction": "value_owned",
39
+ "transactionTimeliness": "transaction_timeliness",
40
+ "directOrIndirectOwnership": "ownership_type",
41
+ "natureOfOwnership": "nature_of_ownership",
42
+ "conversionOrExercisePrice": "conversion_exercise_price",
43
+ "exerciseDate": "exercise_date",
44
+ "expirationDate": "expiration_date",
45
+ "deemedExecutionDate": "deemed_execution_date",
46
+ "underlyingSecurityTitle": "underlying_security_title",
47
+ "underlyingSecurityShares": "underlying_security_shares",
48
+ "underlyingSecurityValue": "underlying_security_value",
49
+ }
50
+
51
+ timeliness_map = {
52
+ "E": "Early",
53
+ "L": "Late",
54
+ "Empty": "On-time",
55
+ }
56
+
57
+ transaction_code_map = {
58
+ "A": "Grant, award or other acquisition pursuant to Rule 16b-3(d)",
59
+ "C": "Conversion of derivative security",
60
+ "D": "Disposition to the issuer of issuer equity securities pursuant to Rule 16b-3(e)",
61
+ "E": "Expiration of short derivative position",
62
+ "F": (
63
+ "Payment of exercise price or tax liability by delivering or withholding securities incident to the receipt, "
64
+ "exercise or vesting of a security issued in accordance with Rule 16b-3"
65
+ ),
66
+ "G": "Bona fide gift",
67
+ "H": "Expiration (or cancellation) of long derivative position with value received",
68
+ "I": (
69
+ "Discretionary transaction in accordance with Rule 16b-3(f) "
70
+ "resulting in acquisition or disposition of issuer securities"
71
+ ),
72
+ "J": "Other acquisition or disposition (describe transaction)",
73
+ "L": "Small acquisition under Rule 16a-6",
74
+ "M": "Exercise or conversion of derivative security exempted pursuant to Rule 16b-3",
75
+ "O": "Exercise of out-of-the-money derivative security",
76
+ "P": "Open market or private purchase of non-derivative or derivative security",
77
+ "S": "Open market or private sale of non-derivative or derivative security",
78
+ "U": "Disposition pursuant to a tender of shares in a change of control transaction",
79
+ "W": "Acquisition or disposition by will or the laws of descent and distribution",
80
+ "X": "Exercise of in-the-money or at-the-money derivative security",
81
+ "Z": "Deposit into or withdrawal from voting trust",
82
+ }
83
+
84
+
85
+ def get_logger():
86
+ """Get the logger."""
87
+ logger_instance = logging.getLogger("openbb.sec")
88
+ handler = logging.StreamHandler()
89
+ handler.setLevel(logging.INFO)
90
+ formatter = logging.Formatter("\n%(message)s\n")
91
+ handler.setFormatter(formatter)
92
+ logger_instance.addHandler(handler)
93
+ logger_instance.setLevel(logging.INFO)
94
+
95
+ return logger_instance
96
+
97
+
98
+ logger = get_logger()
99
+
100
+
101
+ def setup_database(conn):
102
+ """Create a caching database for Form 4 data."""
103
+ create_table_query = """
104
+ CREATE TABLE IF NOT EXISTS form4_data (
105
+ filing_date DATE,
106
+ symbol TEXT,
107
+ form TEXT,
108
+ owner_name TEXT,
109
+ owner_cik TEXT,
110
+ company_name TEXT,
111
+ company_cik TEXT,
112
+ director BOOLEAN,
113
+ officer BOOLEAN,
114
+ ten_percent_owner BOOLEAN,
115
+ other BOOLEAN,
116
+ other_text TEXT,
117
+ owner_title TEXT,
118
+ security_type TEXT,
119
+ transaction_date DATE,
120
+ transaction_type TEXT,
121
+ acquisition_or_disposition TEXT,
122
+ footnote TEXT,
123
+ securities_transacted REAL,
124
+ transaction_price MONEY,
125
+ transaction_value MONEY,
126
+ securities_owned REAL,
127
+ value_owned MONEY,
128
+ transaction_timeliness TEXT,
129
+ ownership_type TEXT,
130
+ nature_of_ownership TEXT,
131
+ conversion_exercise_price MONEY,
132
+ exercise_date DATE,
133
+ expiration_date DATE,
134
+ deemed_execution_date DATE,
135
+ underlying_security_title TEXT,
136
+ underlying_security_shares REAL,
137
+ underlying_security_value MONEY,
138
+ filing_url TEXT NOT NULL
139
+ );
140
+ """
141
+ conn.execute(create_table_query)
142
+ conn.commit()
143
+
144
+
145
+ def add_missing_column(conn, column_name):
146
+ """Add a missing column to the form4_data table."""
147
+ missing_type = (
148
+ "MONEY"
149
+ if "price" in column_name or "value" in column_name
150
+ else (
151
+ "REAL"
152
+ if "shares" in column_name
153
+ else (
154
+ "BOOLEAN"
155
+ if "is_" in column_name
156
+ else "DATE" if "date" in column_name else "TEXT"
157
+ )
158
+ )
159
+ )
160
+ cursor = conn.cursor()
161
+ cursor.execute(f"ALTER TABLE form4_data ADD COLUMN {column_name} {missing_type}")
162
+ conn.commit()
163
+
164
+
165
+ def compress_db(db_path):
166
+ """Compress the database file."""
167
+ # pylint: disable=import-outside-toplevel
168
+ import gzip
169
+ import shutil
170
+
171
+ with open(db_path, "rb") as f_in, gzip.open(f"{db_path}.gz", "wb") as f_out:
172
+ shutil.copyfileobj(f_in, f_out)
173
+
174
+
175
+ def decompress_db(db_path):
176
+ """Decompress the database file."""
177
+ # pylint: disable=import-outside-toplevel
178
+ import gzip
179
+ import shutil
180
+
181
+ with gzip.open(f"{db_path}.gz", "rb") as f_in, open(db_path, "wb") as f_out:
182
+ shutil.copyfileobj(f_in, f_out)
183
+
184
+
185
+ def close_db(conn, db_path):
186
+ """Sort the table by "date" before closing the connection and compressing the database."""
187
+ # pylint: disable=import-outside-toplevel
188
+ import os
189
+
190
+ conn.execute(
191
+ "CREATE TABLE IF NOT EXISTS form4_data_sorted AS SELECT * FROM form4_data ORDER BY filing_date"
192
+ )
193
+ conn.execute("DROP TABLE form4_data")
194
+ conn.execute("ALTER TABLE form4_data_sorted RENAME TO form4_data")
195
+ conn.commit()
196
+ conn.close()
197
+ compress_db(db_path)
198
+ os.remove(db_path)
199
+
200
+
201
+ async def get_form_4_urls(
202
+ symbol,
203
+ start_date: Optional[dateType] = None,
204
+ end_date: Optional[dateType] = None,
205
+ use_cache: bool = True,
206
+ ):
207
+ """Get the form 4 URLs for a symbol."""
208
+ # pylint: disable=import-outside-toplevel
209
+ from openbb_sec.models.company_filings import SecCompanyFilingsFetcher
210
+
211
+ fetcher = SecCompanyFilingsFetcher()
212
+ form_4 = await fetcher.fetch_data(
213
+ dict(
214
+ symbol=symbol,
215
+ form_type="4",
216
+ provider="sec",
217
+ limit=0,
218
+ use_cache=use_cache,
219
+ ),
220
+ {},
221
+ )
222
+ start_date = (
223
+ start_date
224
+ if isinstance(start_date, dateType)
225
+ else (
226
+ dateType.fromisoformat(start_date) # type: ignore
227
+ if start_date and isinstance(start_date, str)
228
+ else None
229
+ )
230
+ )
231
+ end_date = (
232
+ end_date
233
+ if isinstance(end_date, dateType)
234
+ else (
235
+ dateType.fromisoformat(end_date) # type: ignore
236
+ if end_date and isinstance(end_date, str)
237
+ else None
238
+ )
239
+ )
240
+ urls: list = []
241
+ for item in form_4:
242
+ if (
243
+ (not start_date or not item.filing_date)
244
+ or start_date
245
+ and item.filing_date < start_date
246
+ ):
247
+ continue
248
+ if (
249
+ (not end_date or not item.report_date)
250
+ or end_date
251
+ and item.report_date > end_date
252
+ ):
253
+ continue
254
+ to_replace = f"{item.primary_doc.split('/')[0]}/"
255
+ form_url = item.report_url.replace(to_replace, "")
256
+ if form_url.endswith(".xml"):
257
+ urls.append(form_url)
258
+
259
+ return urls
260
+
261
+
262
+ def clean_xml(xml_content):
263
+ """Clean the XML content."""
264
+ # pylint: disable=import-outside-toplevel
265
+ import re
266
+
267
+ xml_content = re.sub(r"\\", "", xml_content)
268
+ xml_content = xml_content.replace("/s/ ", "")
269
+ xml_content = re.sub(r"&(?!amp;|lt;|gt;|quot;|apos;)", "&amp;", xml_content)
270
+ return xml_content
271
+
272
+
273
+ async def get_form_4_data(url) -> dict:
274
+ """Get the form 4 data."""
275
+ # pylint: disable=import-outside-toplevel
276
+ from warnings import warn # noqa
277
+ from xmltodict import parse
278
+ from openbb_core.provider.utils.helpers import amake_request
279
+
280
+ async def response_callback(response, _):
281
+ """Response callback function."""
282
+ return await response.read()
283
+
284
+ response = await amake_request(
285
+ url,
286
+ headers=SEC_HEADERS,
287
+ response_callback=response_callback,
288
+ timeout=30,
289
+ ) # type: ignore
290
+ response_text = response.decode("utf-8")
291
+
292
+ if "Traffic Limit" in response_text:
293
+ raise OpenBBError(
294
+ "You've exceeded the SEC's traffic limit. Access will be limited for 10 minutes."
295
+ " Reduce the number of requests by using a more specific date range."
296
+ )
297
+
298
+ cleaned_response = clean_xml(response_text)
299
+
300
+ try:
301
+ xml_data = parse(cleaned_response)
302
+ except Exception as e:
303
+ warn(f"Error parsing XML from {url}: {e}")
304
+ return {}
305
+
306
+ return (
307
+ xml_data.get("ownershipDocument") if xml_data.get("ownershipDocument") else {}
308
+ )
309
+
310
+
311
+ async def parse_form_4_data( # noqa: PLR0915, PLR0912 # pylint: disable=too-many-branches
312
+ data,
313
+ ):
314
+ """Parse the Form 4 data."""
315
+
316
+ owner = data.get("reportingOwner", {})
317
+ owners = ""
318
+ ciks = ""
319
+ if isinstance(owner, list):
320
+ owners = ";".join(
321
+ [d.get("reportingOwnerId", {}).get("rptOwnerName") for d in owner]
322
+ )
323
+ ciks = ";".join(
324
+ [d.get("reportingOwnerId", {}).get("rptOwnerCik") for d in owner]
325
+ )
326
+
327
+ issuer = data.get("issuer", {})
328
+ owner_relationship = (
329
+ owner.get("reportingOwnerRelationship", {})
330
+ if isinstance(owner, dict)
331
+ else (
332
+ owner[0].get("reportingOwnerRelationship", {})
333
+ if isinstance(owner, list)
334
+ else {}
335
+ )
336
+ )
337
+ signature_data = data.get("ownerSignature")
338
+
339
+ if signature_data and isinstance(signature_data, dict):
340
+ signature_date = signature_data.get("signatureDate")
341
+ elif signature_data and isinstance(signature_data, list):
342
+ signature_date = signature_data[0].get("signatureDate")
343
+ else:
344
+ signature_date = None
345
+
346
+ footnotes = data.get("footnotes", {})
347
+ if footnotes:
348
+ footnote_items = footnotes.get("footnote")
349
+ if isinstance(footnote_items, dict):
350
+ footnote_items = [footnote_items]
351
+ footnotes = {item["@id"]: item["#text"] for item in footnote_items}
352
+
353
+ metadata = {
354
+ "filing_date": signature_date or data.get("periodOfReport"),
355
+ "symbol": issuer.get("issuerTradingSymbol", "").upper(),
356
+ "form": data.get("documentType"),
357
+ "owner": (
358
+ owners if owners else owner.get("reportingOwnerId", {}).get("rptOwnerName")
359
+ ),
360
+ "owner_cik": (
361
+ ciks if ciks else owner.get("reportingOwnerId", {}).get("rptOwnerCik")
362
+ ),
363
+ "issuer": issuer.get("issuerName"),
364
+ "issuer_cik": issuer.get("issuerCik"),
365
+ **owner_relationship,
366
+ }
367
+ results: list = []
368
+
369
+ if data.get("nonDerivativeTable") and (
370
+ data["nonDerivativeTable"].get("nonDerivativeTransaction")
371
+ or data["nonDerivativeTable"].get("nonDerivativeHolding")
372
+ ):
373
+ temp_table = data["nonDerivativeTable"]
374
+ tables = (
375
+ temp_table["nonDerivativeTransaction"]
376
+ if temp_table.get("nonDerivativeTransaction")
377
+ else temp_table["nonDerivativeHolding"]
378
+ )
379
+ parsed_table1: list = []
380
+ if isinstance(tables, dict):
381
+ tables = [tables]
382
+ for table in tables:
383
+ if isinstance(table, str):
384
+ continue
385
+ new_row = {**metadata}
386
+ for key, value in table.items():
387
+ if key == "transactionCoding":
388
+ new_row["transaction_type"] = value.get("transactionCode")
389
+ new_row["form"] = (
390
+ value.get("transactionFormType") or metadata["form"]
391
+ )
392
+ elif isinstance(value, dict):
393
+ if "footnoteId" in value:
394
+ if isinstance(value["footnoteId"], list):
395
+ ids = [item["@id"] for item in value["footnoteId"]]
396
+ footnotes = (
397
+ "; ".join(
398
+ [
399
+ footnotes.get(footnote_id, "")
400
+ for footnote_id in ids
401
+ ]
402
+ )
403
+ if isinstance(footnotes, dict)
404
+ else footnotes
405
+ )
406
+ new_row["footnote"] = footnotes
407
+ else:
408
+ footnote_id = value["footnoteId"]["@id"]
409
+ new_row["footnote"] = (
410
+ (
411
+ footnotes
412
+ if isinstance(footnotes, str)
413
+ else footnotes.get(footnote_id)
414
+ )
415
+ if footnotes
416
+ else None
417
+ )
418
+ for k, v in value.items():
419
+ if k == "value":
420
+ new_row[key] = v
421
+ if isinstance(v, dict):
422
+ if "footnoteId" in v:
423
+ if isinstance(v["footnoteId"], list):
424
+ ids = [item["@id"] for item in v["footnoteId"]]
425
+ footnotes = (
426
+ footnotes
427
+ if isinstance(footnotes, str)
428
+ else (
429
+ "; ".join(
430
+ [
431
+ footnotes.get(footnote_id)
432
+ for footnote_id in ids
433
+ ]
434
+ )
435
+ if footnotes
436
+ else None
437
+ )
438
+ )
439
+ new_row["footnote"] = footnotes
440
+ else:
441
+ footnote_id = v["footnoteId"]["@id"]
442
+ new_row["footnote"] = (
443
+ (
444
+ footnotes
445
+ if isinstance(footnotes, str)
446
+ else footnotes.get(footnote_id)
447
+ )
448
+ if footnotes
449
+ else None
450
+ )
451
+ for k1, v1 in v.items():
452
+ if k1 == "value":
453
+ new_row[k] = v1
454
+ if new_row:
455
+ parsed_table1.append(new_row)
456
+
457
+ results.extend(parsed_table1)
458
+
459
+ if (
460
+ data.get("derivativeTable")
461
+ and data["derivativeTable"].get("derivativeTransaction")
462
+ ) or data.get("derivativeSecurity"):
463
+ parsed_table2: list = []
464
+ tables = (
465
+ data["derivativeSecurity"]
466
+ if data.get("derivativeSecurity")
467
+ else data["derivativeTable"]["derivativeTransaction"]
468
+ )
469
+ if isinstance(tables, dict):
470
+ tables = [tables]
471
+ for table in tables:
472
+ if isinstance(table, str):
473
+ continue
474
+ new_row = {**metadata}
475
+ for key, value in table.items():
476
+ if key == "transactionCoding":
477
+ new_row["transaction_type"] = value.get("transactionCode")
478
+ new_row["form"] = (
479
+ value.get("transactionFormType") or metadata["form"]
480
+ )
481
+ elif isinstance(value, dict):
482
+ for k, v in value.items():
483
+ if k == "value":
484
+ new_row[key] = v
485
+ if isinstance(v, dict):
486
+ for k1, v1 in v.items():
487
+ if k1 == "value":
488
+ new_row[k] = v1
489
+ t_value = new_row.pop("transactionValue", None)
490
+ if t_value:
491
+ new_row["transactionTotalValue"] = t_value
492
+ parsed_table2.append(new_row)
493
+
494
+ results.extend(parsed_table2)
495
+
496
+ return results
497
+
498
+
499
+ async def download_data(urls, use_cache: bool = True): # noqa: PLR0915
500
+ """Get the Form 4 data from a list of URLs."""
501
+ # pylint: disable=import-outside-toplevel
502
+ import asyncio # noqa
503
+ import os
504
+ import sqlite3
505
+ from numpy import nan
506
+ from openbb_core.app.utils import get_user_cache_directory
507
+ from pandas import DataFrame
508
+
509
+ results: list = []
510
+ non_cached_urls: list = []
511
+
512
+ try:
513
+ if use_cache is True:
514
+ db_dir = f"{get_user_cache_directory()}/sql"
515
+ db_path = f"{db_dir}/sec_form4.db"
516
+ # Decompress the database file
517
+ if os.path.exists(f"{db_path}.gz"):
518
+ decompress_db(db_path)
519
+
520
+ os.makedirs(db_dir, exist_ok=True)
521
+
522
+ try:
523
+ conn = sqlite3.connect(db_path)
524
+ setup_database(conn)
525
+ cached_data = get_cached_data(urls, conn)
526
+ cached_urls = {entry["filing_url"] for entry in cached_data}
527
+ for url in urls:
528
+ if url not in cached_urls:
529
+ non_cached_urls.append(url)
530
+ except sqlite3.DatabaseError as e:
531
+ logger.info("Error connecting to the database.")
532
+ retry_input = input(
533
+ "Would you like to retry with a new database? (y/n): "
534
+ )
535
+ if retry_input.lower() == "y":
536
+ faulty_db_path = f"{db_path}.faulty"
537
+ os.rename(db_path, faulty_db_path)
538
+ logger.info("Renamed faulty database to %s", faulty_db_path)
539
+ db_path = f"{db_dir}/sec_form4.db"
540
+ conn = sqlite3.connect(db_path)
541
+ setup_database(conn)
542
+ cached_data = get_cached_data(urls, conn)
543
+ cached_urls = {entry["filing_url"] for entry in cached_data}
544
+ for url in urls:
545
+ if url not in cached_urls:
546
+ non_cached_urls.append(url)
547
+ else:
548
+ raise OpenBBError(e) from e
549
+
550
+ results.extend(cached_data)
551
+ elif use_cache is False:
552
+ non_cached_urls = urls
553
+
554
+ async def get_one(url):
555
+ """Get the data for one URL."""
556
+ data = await get_form_4_data(url)
557
+ result = await parse_form_4_data(data)
558
+ if not result and use_cache is True:
559
+ df = DataFrame([{"filing_url": url}])
560
+ df.to_sql("form4_data", conn, if_exists="append", index=False)
561
+
562
+ if result:
563
+ df = DataFrame(result)
564
+ df.loc[:, "filing_url"] = url
565
+ df = df.replace({nan: None}).rename(columns=field_map)
566
+ try:
567
+ if use_cache is True:
568
+ df.to_sql("form4_data", conn, if_exists="append", index=False)
569
+ except sqlite3.DatabaseError as e:
570
+ if "no column named" in str(e):
571
+ missing_column = (
572
+ str(e).split("no column named ")[1].split(" ")[0]
573
+ )
574
+ missing_column = field_map.get(missing_column, missing_column)
575
+ add_missing_column(conn, missing_column)
576
+ df.to_sql("form4_data", conn, if_exists="append", index=False)
577
+ else:
578
+ raise OpenBBError(e) from e
579
+ results.extend(df.replace({nan: None}).to_dict(orient="records"))
580
+
581
+ time_estimate = (len(non_cached_urls) / 7) * 1.8
582
+ logger.info(
583
+ "Found %d total filings and %d"
584
+ " uncached entries to download, estimated download time: %d seconds.",
585
+ len(urls),
586
+ len(non_cached_urls),
587
+ round(time_estimate),
588
+ )
589
+ min_warn_time = 10
590
+ if time_estimate > min_warn_time:
591
+ logger.info(
592
+ "Warning: This function is not intended for mass data collection."
593
+ " Long download times are due to limitations with concurrent downloads from the SEC."
594
+ "\n\nReduce the number of requests by using a more specific date range."
595
+ )
596
+
597
+ if len(non_cached_urls) > 0:
598
+ async with asyncio.Semaphore(8):
599
+ for url_chunk in [
600
+ non_cached_urls[i : i + 8]
601
+ for i in range(0, len(non_cached_urls), 8)
602
+ ]:
603
+ await asyncio.gather(*[get_one(url) for url in url_chunk])
604
+ await asyncio.sleep(1.125)
605
+
606
+ if use_cache is True:
607
+ close_db(conn, db_path)
608
+
609
+ results = [entry for entry in results if entry.get("filing_date")]
610
+
611
+ return sorted(results, key=lambda x: x["filing_date"], reverse=True)
612
+
613
+ except Exception as e: # pylint: disable=broad-except
614
+ if use_cache is True:
615
+ close_db(conn, db_path)
616
+ raise OpenBBError(
617
+ f"Unexpected error while downloading and processing data -> {e.__class__.__name__}: {e}"
618
+ ) from e
619
+
620
+
621
+ def get_cached_data(urls, conn):
622
+ """Retrieve cached data for a list of URLs."""
623
+ # pylint: disable=import-outside-toplevel
624
+ from numpy import nan
625
+ from pandas import read_sql
626
+
627
+ placeholders = ", ".join("?" for _ in urls)
628
+ query = f"SELECT * FROM form4_data WHERE filing_url IN ({placeholders})" # noqa
629
+ df = read_sql(query, conn, params=urls)
630
+ return df.replace({nan: None}).to_dict(orient="records") if not df.empty else []
631
+
632
+
633
+ async def get_form_4(
634
+ symbol,
635
+ start_date: Optional[dateType] = None,
636
+ end_date: Optional[dateType] = None,
637
+ limit: Optional[int] = None,
638
+ use_cache: bool = True,
639
+ ) -> list[dict]:
640
+ """Get the Form 4 data by ticker symbol or CIK number."""
641
+ # pylint: disable=import-outside-toplevel
642
+ import asyncio
643
+
644
+ try:
645
+ urls = await get_form_4_urls(symbol, start_date, end_date, use_cache)
646
+ if limit is not None:
647
+ urls = urls[:limit]
648
+ data = await download_data(urls, use_cache)
649
+ except asyncio.TimeoutError as e:
650
+ raise OpenBBError(
651
+ "A timeout error occurred while downloading the data. Please try again."
652
+ ) from e
653
+
654
+ if not data:
655
+ raise OpenBBError(f"No Form 4 data was returned for {symbol}.")
656
+
657
+ return data
openbb_platform/providers/sec/openbb_sec/utils/frames.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Frames Utilities."""
2
+
3
+ # pylint: disable=line-too-long
4
+
5
+ import asyncio
6
+ from datetime import datetime
7
+ from typing import Dict, List, Optional, Union
8
+ from warnings import warn
9
+
10
+ from aiohttp_client_cache import SQLiteBackend
11
+ from aiohttp_client_cache.session import CachedSession
12
+ from openbb_core.app.model.abstract.error import OpenBBError
13
+ from openbb_core.app.utils import get_user_cache_directory
14
+ from openbb_core.provider.utils.errors import EmptyDataError
15
+ from openbb_core.provider.utils.helpers import amake_request
16
+ from openbb_sec.utils.definitions import (
17
+ FISCAL_PERIODS,
18
+ FISCAL_PERIODS_DICT,
19
+ HEADERS,
20
+ SHARES_FACTS,
21
+ TAXONOMIES,
22
+ USD_PER_SHARE_FACTS,
23
+ )
24
+ from openbb_sec.utils.helpers import get_all_companies, symbol_map
25
+ from pandas import DataFrame
26
+
27
+
28
+ async def fetch_data(url, use_cache, persist) -> Union[Dict, List[Dict]]:
29
+ """Fetch the data from the constructed URL."""
30
+ response: Union[Dict, List[Dict]] = {}
31
+ if use_cache is True:
32
+ cache_dir = f"{get_user_cache_directory()}/http/sec_frames"
33
+ async with CachedSession(
34
+ cache=(
35
+ SQLiteBackend(cache_dir, expire_after=3600 * 24)
36
+ if persist is False
37
+ else SQLiteBackend(cache_dir)
38
+ )
39
+ ) as session:
40
+ try:
41
+ response = await amake_request(url, headers=HEADERS, session=session) # type: ignore
42
+ finally:
43
+ await session.close()
44
+ else:
45
+ response = await amake_request(url, headers=HEADERS) # type: ignore
46
+ return response
47
+
48
+
49
+ async def get_frame( # pylint: disable =too-many-arguments,too-many-locals, too-many-statements
50
+ fact: str = "Revenues",
51
+ year: Optional[int] = None,
52
+ fiscal_period: Optional[FISCAL_PERIODS] = None,
53
+ taxonomy: Optional[TAXONOMIES] = "us-gaap",
54
+ units: Optional[str] = "USD",
55
+ instantaneous: bool = False,
56
+ use_cache: bool = True,
57
+ ) -> Dict:
58
+ """Get a frame of data for a given fact.
59
+
60
+ Source: https://www.sec.gov/edgar/sec-api-documentation
61
+
62
+ The xbrl/frames API aggregates one fact for each reporting entity
63
+ that is last filed that most closely fits the calendrical period requested.
64
+
65
+ This API supports for annual, quarterly and instantaneous data:
66
+
67
+ https://data.sec.gov/api/xbrl/frames/us-gaap/AccountsPayableCurrent/USD/CY2019Q1I.json
68
+
69
+ Where the units of measure specified in the XBRL contains a numerator and a denominator,
70
+ these are separated by “-per-” such as “USD-per-shares”. Note that the default unit in XBRL is “pure”.
71
+
72
+ The period format is CY#### for annual data (duration 365 days +/- 30 days),
73
+ CY####Q# for quarterly data (duration 91 days +/- 30 days).
74
+
75
+ Because company financial calendars can start and end on any month or day and even change in length from quarter to
76
+ quarter according to the day of the week, the frame data is assembled by the dates that best align with a calendar
77
+ quarter or year. Data users should be mindful different reporting start and end dates for facts contained in a frame.
78
+
79
+ Parameters
80
+ ----------
81
+ fact : str
82
+ The fact to retrieve. This should be a valid fact from the SEC taxonomy, in UpperCamelCase.
83
+ Defaults to "Revenues".
84
+ AAPL, MSFT, GOOG, BRK-A all report revenue as, "RevenueFromContractWithCustomerExcludingAssessedTax".
85
+ In previous years, they may have reported as "Revenues".
86
+ year : int, optional
87
+ The year to retrieve the data for. If not provided, the current year is used.
88
+ fiscal_period: Literal["fy", "q1", "q2", "q3", "q4"], optional
89
+ The fiscal period to retrieve the data for. If not provided, the most recent quarter is used.
90
+ taxonomy : Literal["us-gaap", "dei", "ifrs-full", "srt"], optional
91
+ The taxonomy to use. Defaults to "us-gaap".
92
+ units : str, optional
93
+ The units to use. Defaults to "USD". This should be a valid unit from the SEC taxonomy, see the notes above.
94
+ The most common units are "USD", "shares", and "USD-per-shares". EPS and outstanding shares facts will
95
+ automatically set.
96
+ instantaneous: bool
97
+ Whether to retrieve instantaneous data. See the notes above for more information. Defaults to False.
98
+ Some facts are only available as instantaneous data.
99
+ The function will automatically attempt to retrieve the data if the initial fiscal quarter request fails.
100
+ use_cache: bool
101
+ Whether to use cache for the request. Defaults to True.
102
+
103
+ Returns
104
+ -------
105
+ Dict:
106
+ Nested dictionary with keys, "metadata" and "data".
107
+ The "metadata" key contains information about the frame.
108
+ """
109
+ current_date = datetime.now().date()
110
+ quarter = FISCAL_PERIODS_DICT.get(fiscal_period) if fiscal_period else None
111
+ if year is None and quarter is None:
112
+ quarter = (current_date.month - 1) // 3
113
+ year = current_date.year
114
+
115
+ if year is None:
116
+ year = current_date.year
117
+
118
+ persist = current_date.year == year
119
+
120
+ if fact in SHARES_FACTS:
121
+ units = "shares"
122
+
123
+ if fact in USD_PER_SHARE_FACTS:
124
+ units = "USD-per-shares"
125
+
126
+ url = f"https://data.sec.gov/api/xbrl/frames/{taxonomy}/{fact}/{units}/CY{year}"
127
+
128
+ if quarter:
129
+ url = url + f"Q{quarter}"
130
+
131
+ if instantaneous:
132
+ url = url + "I"
133
+
134
+ url = url + ".json"
135
+ response: Union[Dict, List[Dict]] = {}
136
+ try:
137
+ response = await fetch_data(url, use_cache, persist)
138
+ except Exception as e: # pylint: disable=W0718
139
+ message = (
140
+ "No frame was found with the combination of parameters supplied."
141
+ + " Try adjusting the period."
142
+ + " Not all GAAP measures have frames available."
143
+ )
144
+ if url.endswith("I.json"):
145
+ warn("No instantaneous frame was found, trying calendar period data.")
146
+ url = url.replace("I.json", ".json")
147
+ try:
148
+ response = await fetch_data(url, use_cache, persist)
149
+ except Exception:
150
+ raise OpenBBError(message) from e
151
+ elif "Q" in url and not url.endswith("I.json"):
152
+ warn(
153
+ "No frame was found for the requested quarter, trying instantaneous data."
154
+ )
155
+ url = url.replace(".json", "I.json")
156
+ try:
157
+ response = await fetch_data(url, use_cache, persist)
158
+ except Exception:
159
+ raise OpenBBError(message) from e
160
+ else:
161
+ raise OpenBBError(message) from e
162
+
163
+ data = sorted(response.get("data", {}), key=lambda x: x["val"], reverse=True) # type: ignore
164
+ metadata = {
165
+ "frame": response.get("ccp", ""), # type: ignore
166
+ "tag": response.get("tag", ""), # type: ignore
167
+ "label": response.get("label", ""), # type: ignore
168
+ "description": response.get("description", ""), # type: ignore
169
+ "taxonomy": response.get("taxonomy", ""), # type: ignore
170
+ "unit": response.get("uom", ""), # type: ignore
171
+ "count": response.get("pts", ""), # type: ignore
172
+ }
173
+ df = DataFrame(data)
174
+ companies = await get_all_companies(use_cache=use_cache)
175
+ cik_to_symbol = companies.set_index("cik")["symbol"].to_dict()
176
+ df["symbol"] = df["cik"].astype(str).map(cik_to_symbol)
177
+ df["unit"] = metadata.get("unit")
178
+ df["fact"] = metadata.get("label")
179
+ df["frame"] = metadata.get("frame")
180
+ df = df.fillna("N/A").replace("N/A", None)
181
+ results = {"metadata": metadata, "data": df.to_dict("records")}
182
+
183
+ return results
184
+
185
+
186
+ async def get_concept(
187
+ symbol: str,
188
+ fact: str = "Revenues",
189
+ year: Optional[int] = None,
190
+ taxonomy: Optional[TAXONOMIES] = "us-gaap",
191
+ use_cache: bool = True,
192
+ ) -> Dict:
193
+ """Return all the XBRL disclosures from a single company (CIK) Concept (a taxonomy and tag) into a single JSON file.
194
+
195
+ Each entry contains a separate array of facts for each units of measure that the company has chosen to disclose
196
+ (e.g. net profits reported in U.S. dollars and in Canadian dollars).
197
+
198
+ Parameters
199
+ ----------
200
+ symbol: str
201
+ The ticker symbol to look up.
202
+ fact : str
203
+ The fact to retrieve. This should be a valid fact from the SEC taxonomy, in UpperCamelCase.
204
+ Defaults to "Revenues".
205
+ AAPL, MSFT, GOOG, BRK-A all report revenue as, "RevenueFromContractWithCustomerExcludingAssessedTax".
206
+ In previous years, they may have reported as "Revenues".
207
+ year : int, optional
208
+ The year to retrieve the data for. If not provided, all reported values will be returned.
209
+ taxonomy : Literal["us-gaap", "dei", "ifrs-full", "srt"], optional
210
+ The taxonomy to use. Defaults to "us-gaap".
211
+ use_cache: bool
212
+ Whether to use cache for the request. Defaults to True.
213
+
214
+ Returns
215
+ -------
216
+ Dict:
217
+ Nested dictionary with keys, "metadata" and "data".
218
+ The "metadata" key contains information about the company concept.
219
+ """
220
+ symbols = symbol.split(",")
221
+ results: List[Dict] = []
222
+ messages: List = []
223
+ metadata: Dict = {}
224
+
225
+ async def get_one(ticker):
226
+ """Get data for one symbol."""
227
+ ticker = ticker.upper()
228
+ message = f"Symbol Error: No data was found for, {ticker} and {fact}"
229
+ cik = await symbol_map(ticker)
230
+ if cik == "":
231
+ message = f"Symbol Error: No CIK was found for, {ticker}"
232
+ warn(message)
233
+ messages.append(message)
234
+ else:
235
+ url = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{cik}/{taxonomy}/{fact}.json"
236
+ response: Union[Dict, List[Dict]] = {}
237
+ try:
238
+ response = await fetch_data(url, use_cache, False)
239
+ except Exception as _: # pylint: disable=W0718
240
+ warn(message)
241
+ messages.append(message)
242
+ if response:
243
+ units = response.get("units", {}) # type: ignore
244
+ metadata[ticker] = {
245
+ "cik": response.get("cik", ""), # type: ignore
246
+ "taxonomy": response.get("taxonomy", ""), # type: ignore
247
+ "tag": response.get("tag", ""), # type: ignore
248
+ "label": response.get("label", ""), # type: ignore
249
+ "description": response.get("description", ""), # type: ignore
250
+ "name": response.get("entityName", ""), # type: ignore
251
+ "units": (
252
+ list(units) if units and len(units) > 1 else list(units)[0]
253
+ ),
254
+ }
255
+ for k, v in units.items():
256
+ unit = k
257
+ values = v
258
+ for item in values:
259
+ item["unit"] = unit
260
+ item["symbol"] = ticker
261
+ item["cik"] = metadata[ticker]["cik"]
262
+ item["name"] = metadata[ticker]["name"]
263
+ item["fact"] = metadata[ticker]["label"]
264
+ results.extend(values)
265
+
266
+ await asyncio.gather(*[get_one(ticker) for ticker in symbols])
267
+
268
+ if not results:
269
+ raise EmptyDataError(f"{messages}")
270
+
271
+ if year is not None:
272
+ filtered_results = [d for d in results if str(year) == str(d.get("fy"))]
273
+ if len(filtered_results) > 0:
274
+ results = filtered_results
275
+ if len(filtered_results) == 0:
276
+ warn(
277
+ f"No results were found for {fact} in the year, {year}."
278
+ " Returning all entries instead. Concept and fact names may differ by company and year."
279
+ )
280
+
281
+ return {
282
+ "metadata": metadata,
283
+ "data": sorted(results, key=lambda x: (x["filed"], x["end"]), reverse=True),
284
+ }
openbb_platform/providers/sec/openbb_sec/utils/helpers.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SEC Helpers module."""
2
+
3
+ # pylint: disable =unused-argument
4
+
5
+ from typing import Dict, List, Optional, Union
6
+
7
+ from aiohttp_client_cache import SQLiteBackend
8
+ from aiohttp_client_cache.session import CachedSession
9
+ from openbb_core.app.model.abstract.error import OpenBBError
10
+ from openbb_core.app.utils import get_user_cache_directory
11
+ from openbb_core.provider.utils.helpers import amake_request, make_request
12
+ from openbb_sec.utils.definitions import HEADERS, SEC_HEADERS
13
+ from pandas import DataFrame
14
+
15
+
16
+ async def sec_callback(response, session):
17
+ """Response callback for SEC requests."""
18
+ content_type = response.headers.get("Content-Type", "")
19
+ if "application/json" in content_type:
20
+ return await response.json()
21
+ if "text/html" in content_type:
22
+ return await response.text(encoding="latin-1")
23
+ return await response.text()
24
+
25
+
26
+ async def get_all_companies(use_cache: bool = True) -> DataFrame:
27
+ """Get all company names, tickers, and CIK numbers registered with the SEC.
28
+
29
+ Companies are sorted by market cap.
30
+
31
+ Returns
32
+ -------
33
+ DataFrame: Pandas DataFrame with columns for Symbol, Company Name, and CIK Number.
34
+
35
+ Example
36
+ -------
37
+ >>> tickers = get_all_companies()
38
+ """
39
+ url = "https://www.sec.gov/files/company_tickers.json"
40
+ response: Union[dict, List[dict]] = {}
41
+ if use_cache is True:
42
+ cache_dir = f"{get_user_cache_directory()}/http/sec_companies"
43
+ async with CachedSession(
44
+ cache=SQLiteBackend(cache_dir, expire_after=3600 * 24 * 2)
45
+ ) as session:
46
+ try:
47
+ await session.delete_expired_responses()
48
+ response = await amake_request(url, headers=SEC_HEADERS, session=session) # type: ignore
49
+ finally:
50
+ await session.close()
51
+ else:
52
+ response = await amake_request(url, headers=SEC_HEADERS) # type: ignore
53
+
54
+ df = DataFrame(response).transpose()
55
+ cols = ["cik", "symbol", "name"]
56
+ df.columns = cols
57
+ return df.astype(str)
58
+
59
+
60
+ async def get_all_ciks(use_cache: bool = True) -> DataFrame:
61
+ """Get a list of entity names and their CIK number."""
62
+ url = "https://www.sec.gov/Archives/edgar/cik-lookup-data.txt"
63
+
64
+ async def callback(response, session):
65
+ """Response callback for CIK lookup data."""
66
+ return await response.text(encoding="latin-1")
67
+
68
+ response: Union[dict, List[dict], str] = {}
69
+ if use_cache is True:
70
+ cache_dir = f"{get_user_cache_directory()}/http/sec_ciks"
71
+ async with CachedSession(
72
+ cache=SQLiteBackend(cache_dir, expire_after=3600 * 24 * 2)
73
+ ) as session:
74
+ try:
75
+ await session.delete_expired_responses()
76
+ response = await amake_request(url, headers=SEC_HEADERS, session=session, response_callback=callback) # type: ignore
77
+ finally:
78
+ await session.close()
79
+ else:
80
+ response = await amake_request(url, headers=SEC_HEADERS, response_callback=callback) # type: ignore
81
+ data = response
82
+ lines = data.split("\n") # type: ignore
83
+ data_list = []
84
+ delimiter = ":"
85
+ for line in lines:
86
+ row = line.split(delimiter)
87
+ data_list.append(row)
88
+ df = DataFrame(data_list)
89
+ df = df.iloc[:, 0:2]
90
+ cols = ["Institution", "CIK Number"]
91
+ df.columns = cols
92
+ df = df.dropna()
93
+
94
+ return df.astype(str)
95
+
96
+
97
+ async def get_mf_and_etf_map(use_cache: bool = True) -> DataFrame:
98
+ """Return the CIK number of a ticker symbol for querying the SEC API."""
99
+ symbols = DataFrame()
100
+
101
+ url = "https://www.sec.gov/files/company_tickers_mf.json"
102
+ response: Union[dict, List[dict]] = {}
103
+ if use_cache is True:
104
+ cache_dir = f"{get_user_cache_directory()}/http/sec_mf_etf_map"
105
+ async with CachedSession(
106
+ cache=SQLiteBackend(cache_dir, expire_after=3600 * 24 * 2)
107
+ ) as session:
108
+ try:
109
+ await session.delete_expired_responses()
110
+ response = await amake_request(url, headers=SEC_HEADERS, session=session, response_callback=sec_callback) # type: ignore
111
+ finally:
112
+ await session.close()
113
+ else:
114
+ response = await amake_request(url, headers=SEC_HEADERS, response_callback=sec_callback) # type: ignore
115
+
116
+ symbols = DataFrame(data=response["data"], columns=response["fields"]) # type: ignore
117
+
118
+ return symbols.astype(str)
119
+
120
+
121
+ async def search_institutions(keyword: str, use_cache: bool = True) -> DataFrame:
122
+ """Search for an institution by name. It is case-insensitive."""
123
+ institutions = await get_all_ciks(use_cache=use_cache)
124
+ hp = institutions["Institution"].str.contains(keyword, case=False)
125
+ return institutions[hp]
126
+
127
+
128
+ async def symbol_map(symbol: str, use_cache: bool = True) -> str:
129
+ """Return the CIK number of a ticker symbol for querying the SEC API."""
130
+ symbol = symbol.upper().replace(".", "-")
131
+ symbols = await get_all_companies(use_cache=use_cache)
132
+
133
+ if symbol not in symbols["symbol"].to_list():
134
+ symbols = await get_mf_and_etf_map(use_cache=use_cache)
135
+ if symbol not in symbols["symbol"].to_list():
136
+ return ""
137
+ cik = symbols[symbols["symbol"] == symbol]["cik"].iloc[0]
138
+ cik_: str = ""
139
+ temp = 10 - len(cik)
140
+ for i in range(temp): # pylint: disable=W0612
141
+ cik_ = cik_ + "0"
142
+
143
+ return str(cik_ + cik)
144
+
145
+
146
+ async def cik_map(cik: Union[str, int], use_cache: bool = True) -> str:
147
+ """Convert a CIK number to a ticker symbol. Enter CIK as an integer with no leading zeros.
148
+
149
+ Function is not meant for funds.
150
+
151
+ Parameters
152
+ ----------
153
+ cik : int
154
+ The CIK number to convert to a ticker symbol.
155
+
156
+ Returns
157
+ -------
158
+ str: The ticker symbol associated with the CIK number.
159
+ """
160
+ _cik = str(cik) if isinstance(cik, int) else cik.lstrip("0")
161
+ symbol = ""
162
+ companies = await get_all_companies(use_cache=use_cache)
163
+ if _cik in companies["cik"].to_list():
164
+ symbol = companies[companies["cik"] == _cik]["symbol"].iloc[0]
165
+ else:
166
+ return f"Error: CIK, {_cik}, does not have a unique ticker."
167
+
168
+ return symbol
169
+
170
+
171
+ def get_schema_filelist(query: str = "", url: str = "", use_cache: bool = True) -> List:
172
+ """Get a list of schema files from the SEC website."""
173
+ from pandas import read_html # pylint: disable=import-outside-toplevel
174
+
175
+ results: List = []
176
+ url = url if url else f"https://xbrl.fasb.org/us-gaap/{query}"
177
+ _url = url
178
+ _url = url + "/" if query else _url
179
+ response = make_request(_url)
180
+ data = read_html(response.content)[0]["Name"].dropna()
181
+ if len(data) > 0:
182
+ data.iloc[0] = url if not query else url + "/"
183
+ results = data.to_list()
184
+
185
+ return results
186
+
187
+
188
+ async def download_zip_file(
189
+ url, symbol: Optional[str] = None, use_cache: bool = True
190
+ ) -> List[Dict]:
191
+ """Download a list of files from URLs."""
192
+ # pylint: disable=import-outside-toplevel
193
+ from io import BytesIO
194
+ from zipfile import ZipFile
195
+
196
+ from pandas import concat, read_csv, to_datetime
197
+
198
+ results = DataFrame()
199
+
200
+ async def callback(response, session):
201
+ """Response callback for ZIP file downloads."""
202
+ return await response.read()
203
+
204
+ response: Union[dict, List[dict]] = {}
205
+ if use_cache is True:
206
+ cache_dir = f"{get_user_cache_directory()}/http/sec_ftd"
207
+ async with CachedSession(cache=SQLiteBackend(cache_dir)) as session:
208
+ try:
209
+ response = await amake_request(url, session=session, headers=HEADERS, response_callback=callback) # type: ignore
210
+ finally:
211
+ await session.close()
212
+ else:
213
+ response = await amake_request(url, headers=HEADERS, response_callback=callback) # type: ignore
214
+
215
+ try:
216
+ data = read_csv(BytesIO(response), compression="zip", sep="|") # type: ignore
217
+ results = data.iloc[:-2]
218
+ except ValueError:
219
+ zip_file = ZipFile(BytesIO(response)) # type: ignore
220
+ file_list = [d.filename for d in zip_file.infolist()]
221
+ for item in file_list:
222
+ with zip_file.open(item) as _item:
223
+ _file = read_csv(
224
+ _item,
225
+ encoding="ISO-8859-1",
226
+ sep="|",
227
+ low_memory=False,
228
+ on_bad_lines="skip",
229
+ )
230
+ results = concat([results, _file.iloc[:-2]])
231
+
232
+ if "SETTLEMENT DATE" in results.columns:
233
+ results = results.rename(
234
+ columns={
235
+ "SETTLEMENT DATE": "date",
236
+ "SYMBOL": "symbol",
237
+ "CUSIP": "cusip",
238
+ "QUANTITY (FAILS)": "quantity",
239
+ "PRICE": "price",
240
+ "DESCRIPTION": "description",
241
+ }
242
+ )
243
+ if symbol:
244
+ results = results[results["symbol"] == symbol]
245
+ results["date"] = to_datetime(results["date"], format="%Y%m%d").dt.date
246
+ # Replace invalid decimal values with None
247
+ results["price"] = results["price"].mask(
248
+ ~results["price"].str.contains(r"^\d+(?:\.\d+)?$", regex=True), None
249
+ )
250
+ results["price"] = results["price"].astype(float)
251
+
252
+ return results.reset_index(drop=True).to_dict("records")
253
+
254
+
255
+ async def get_ftd_urls() -> Dict:
256
+ """Get Fails-to-Deliver Data URLs."""
257
+ from pandas import Series # pylint: disable=import-outside-toplevel
258
+
259
+ results = {}
260
+ position = None
261
+ key = "title"
262
+ value = "Fails-to-Deliver Data"
263
+
264
+ r = await amake_request("https://www.sec.gov/data.json", headers=SEC_HEADERS)
265
+ data = r.get("dataset", {}) # type: ignore
266
+
267
+ for index, d in enumerate(data):
268
+ if key in d and d[key] == value:
269
+ position = index
270
+ break
271
+ if position is not None:
272
+ fails = data[position]["distribution"]
273
+ key = "downloadURL"
274
+ urls = list(map(lambda d: d[key], filter(lambda d: key in d, fails)))
275
+ dates = [d[-11:-4] for d in urls]
276
+ ftd_urls = Series(index=dates, data=urls)
277
+ ftd_urls.index = ftd_urls.index.str.replace("_", "")
278
+ results = ftd_urls.to_dict()
279
+
280
+ return results
281
+
282
+
283
+ async def get_series_id(
284
+ symbol: Optional[str] = None, cik: Optional[str] = None, use_cache: bool = True
285
+ ):
286
+ """Map the fund to the series and class IDs for validating the correct filing.
287
+
288
+ For an exact match, use a symbol.
289
+ """
290
+ symbol = symbol if symbol else ""
291
+ cik = cik if cik else ""
292
+
293
+ results = DataFrame()
294
+ if not symbol and not cik:
295
+ raise OpenBBError("Either symbol or cik must be provided.")
296
+
297
+ target = symbol if symbol else cik
298
+ choice = "cik" if not symbol else "symbol"
299
+ funds = await get_mf_and_etf_map(use_cache=use_cache)
300
+
301
+ results = funds[
302
+ funds["cik"].str.contains(target, case=False)
303
+ | funds["seriesId"].str.contains(target, case=False)
304
+ | funds["classId"].str.contains(target, case=False)
305
+ | funds["symbol"].str.contains(target, case=False)
306
+ ]
307
+
308
+ if len(results) > 0:
309
+ results = results[results[choice if not symbol else choice] == target]
310
+
311
+ return results
312
+
313
+
314
+ async def get_nport_candidates(symbol: str, use_cache: bool = True) -> List[Dict]:
315
+ """Get a list of all NPORT-P filings for a given fund's symbol."""
316
+ results = []
317
+ _series_id = await get_series_id(symbol, use_cache=use_cache)
318
+ try:
319
+ series_id = (
320
+ await symbol_map(symbol, use_cache)
321
+ if _series_id is None or len(_series_id) == 0
322
+ else _series_id["seriesId"].iloc[0]
323
+ )
324
+ except IndexError as e:
325
+ raise OpenBBError("Fund not found for, the symbol: " + symbol) from e
326
+ if series_id == "" or series_id is None:
327
+ raise OpenBBError("Fund not found for, the symbol: " + symbol)
328
+
329
+ url = f"https://efts.sec.gov/LATEST/search-index?q={series_id}&dateRange=all&forms=NPORT-P"
330
+ response: Union[dict, List[dict]] = {}
331
+ if use_cache is True:
332
+ cache_dir = f"{get_user_cache_directory()}/http/sec_etf"
333
+ async with CachedSession(cache=SQLiteBackend(cache_dir)) as session:
334
+ try:
335
+ await session.delete_expired_responses()
336
+ response = await amake_request(url, session=session, headers=HEADERS, response_callback=sec_callback) # type: ignore
337
+ finally:
338
+ await session.close()
339
+ else:
340
+ response = await amake_request(url, response_callback=sec_callback) # type: ignore
341
+
342
+ if "hits" in response and len(response["hits"].get("hits")) > 0: # type: ignore
343
+ hits = response["hits"]["hits"] # type: ignore
344
+ results = [
345
+ {
346
+ "name": d["_source"]["display_names"][0],
347
+ "cik": d["_source"]["ciks"][0],
348
+ "file_date": d["_source"]["file_date"],
349
+ "period_ending": d["_source"]["period_ending"],
350
+ "form_type": d["_source"]["form"],
351
+ "primary_doc": (
352
+ f"https://www.sec.gov/Archives/edgar/data/{int(d['_source']['ciks'][0])}" # noqa
353
+ + f"/{d['_id'].replace('-', '').replace(':', '/')}" # noqa
354
+ ),
355
+ }
356
+ for d in hits
357
+ ]
358
+ return (
359
+ sorted(results, key=lambda d: d["file_date"], reverse=True)
360
+ if len(results) > 0
361
+ else results
362
+ )
openbb_platform/providers/sec/openbb_sec/utils/parse_13f.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility functions for parsing SEC Form 13F-HR."""
2
+
3
+ from typing import Any, Optional
4
+
5
+ from openbb_core.app.model.abstract.error import OpenBBError
6
+
7
+
8
+ def date_to_quarter_end(date: str) -> str:
9
+ """Convert a date to the end of the calendar quarter."""
10
+ # pylint: disable=import-outside-toplevel
11
+ from pandas import to_datetime
12
+ from pandas.tseries.offsets import QuarterEnd
13
+
14
+ return (
15
+ (to_datetime(date).to_period("Q").to_timestamp("D") + QuarterEnd())
16
+ .date()
17
+ .strftime("%Y-%m-%d")
18
+ )
19
+
20
+
21
+ async def get_13f_candidates(symbol: Optional[str] = None, cik: Optional[str] = None):
22
+ """Get the 13F-HR filings for a given symbol or CIK."""
23
+ # pylint: disable=import-outside-toplevel
24
+ from openbb_sec.models.company_filings import SecCompanyFilingsFetcher
25
+ from pandas import DataFrame, to_datetime
26
+
27
+ fetcher = SecCompanyFilingsFetcher()
28
+ params: dict[str, Any] = {}
29
+ if cik is not None:
30
+ params["cik"] = str(cik)
31
+ if symbol is not None:
32
+ params["symbol"] = symbol
33
+ if cik is None and symbol is None:
34
+ raise OpenBBError("Either symbol or cik must be provided.")
35
+
36
+ params["use_cache"] = False
37
+ params["form_type"] = "13F-HR"
38
+ filings = await fetcher.fetch_data(params, {})
39
+ filings = [d.model_dump() for d in filings]
40
+ if len(filings) == 0:
41
+ raise OpenBBError(f"No 13F-HR filings found for {symbol if symbol else cik}.")
42
+
43
+ # Filings before June 30, 2013 are non-structured and are not supported by downstream parsers.
44
+ up_to = to_datetime("2013-06-30").date() # pylint: disable=unused-variable # noqa
45
+ return (
46
+ DataFrame(data=filings)
47
+ .query("`report_date` >= @up_to")
48
+ .set_index("report_date")["complete_submission_url"]
49
+ .fillna("N/A")
50
+ .replace("N/A", None)
51
+ )
52
+
53
+
54
+ async def complete_submission_callback(response, _):
55
+ """Use callback function for processing the response object."""
56
+ if response.status == 200:
57
+ return await response.text()
58
+ raise OpenBBError(f"Request failed with status code {response.status}")
59
+
60
+
61
+ async def get_complete_submission(url: str):
62
+ """Get the Complete Submission TXT file string from the SEC API."""
63
+ # pylint: disable=import-outside-toplevel
64
+ from openbb_core.provider.utils.helpers import amake_request
65
+ from openbb_sec.utils.definitions import HEADERS
66
+
67
+ return await amake_request(
68
+ url, headers=HEADERS, response_callback=complete_submission_callback
69
+ )
70
+
71
+
72
+ def parse_header(filing_str: str) -> dict:
73
+ """Parse the header of a Complete Submission TXT file string."""
74
+ # pylint: disable=import-outside-toplevel
75
+ import xmltodict
76
+ from bs4 import BeautifulSoup
77
+
78
+ header_dict: dict = {}
79
+ soup = (
80
+ filing_str
81
+ if filing_str.__class__.__name__ == "BeautifulSoup"
82
+ else BeautifulSoup(filing_str, "xml")
83
+ )
84
+ try:
85
+ header_xml = soup.find("headerData")
86
+ header_dict = xmltodict.parse(str(header_xml))["headerData"]
87
+ except KeyError:
88
+ header_xml = soup.find("type")
89
+ header_dict = xmltodict.parse(str(header_xml)).get("type")
90
+ if header_dict:
91
+ return header_dict # type: ignore
92
+ raise OpenBBError(
93
+ "Failed to parse the form header."
94
+ + " Check the `filing_str` to for the tag, 'headerData'."
95
+ )
96
+
97
+
98
+ def get_submission_type(filing_str: str):
99
+ """Get the submission type of a Complete Submission TXT file string."""
100
+ header = parse_header(filing_str)
101
+ if header:
102
+ try:
103
+ form_type = header["submissionType"]
104
+ return form_type
105
+ except KeyError:
106
+ form_type = header["#text"]
107
+ return form_type
108
+ raise OpenBBError(
109
+ "Failed to get the submission type from the form header."
110
+ + " Check the response from `parse_header`."
111
+ )
112
+
113
+
114
+ def get_period_ending(filing_str: str):
115
+ """Get the report date from a Complete Submission TXT file string."""
116
+ header = parse_header(filing_str)
117
+ if header.get("filerInfo"):
118
+ return header["filerInfo"].get("periodOfReport")
119
+ raise OpenBBError(
120
+ "Failed to get the period of report from the form header."
121
+ + " Check the response from `parse_header`."
122
+ )
123
+
124
+
125
+ async def parse_13f_hr(filing: str):
126
+ """Parse a 13F-HR filing from the Complete Submission TXT file string."""
127
+ # pylint: disable=import-outside-toplevel
128
+ import xmltodict
129
+ from bs4 import BeautifulSoup
130
+ from numpy import nan
131
+ from pandas import DataFrame, to_datetime
132
+
133
+ # Check if the input string is a URL
134
+ if filing.startswith("https://"):
135
+ filing = await get_complete_submission(filing) # type: ignore
136
+
137
+ soup = BeautifulSoup(filing, "xml")
138
+
139
+ info_table = soup.find_all("informationTable")
140
+
141
+ if not info_table:
142
+ info_table = soup.find_all("table")[-1]
143
+
144
+ parsed_xml = xmltodict.parse(
145
+ str(info_table[0]).replace("ns1:", "").replace("n1:", "")
146
+ )["informationTable"]["infoTable"]
147
+
148
+ if parsed_xml is None:
149
+ raise OpenBBError(
150
+ "Failed to parse the 13F-HR information table."
151
+ + " Check the `filing_str` to make sure it is valid and contains the tag 'informationTable'."
152
+ + " Documents filed before Q2 2013 are not supported."
153
+ )
154
+
155
+ period_ending = get_period_ending(soup)
156
+ data = (
157
+ DataFrame(parsed_xml)
158
+ if isinstance(parsed_xml, list)
159
+ else DataFrame([parsed_xml])
160
+ )
161
+ data.columns = data.columns.str.replace("ns1:", "")
162
+ data.loc[:, "value"] = data["value"].astype(int)
163
+ security_type: list = []
164
+ principal_amount: list = []
165
+
166
+ # Unpack the nested objects
167
+ try:
168
+ security_type = [d.get("sshPrnamtType") for d in data["shrsOrPrnAmt"]]
169
+ data.loc[:, "security_type"] = security_type
170
+ principal_amount = [int(d.get("sshPrnamt", 0)) for d in data["shrsOrPrnAmt"]]
171
+ data.loc[:, "principal_amount"] = principal_amount
172
+ _ = data.pop("shrsOrPrnAmt")
173
+ except ValueError:
174
+ pass
175
+ try:
176
+ sole = [d.get("Sole") for d in data["votingAuthority"]]
177
+ shared = [d.get("Shared") for d in data["votingAuthority"]]
178
+ none = [d.get("None") for d in data["votingAuthority"]]
179
+ data.loc[:, "voting_authority_sole"] = [int(s) if s else 0 for s in sole]
180
+ data.loc[:, "voting_authority_shared"] = [int(s) if s else 0 for s in shared]
181
+ data.loc[:, "voting_authority_none"] = [int(s) if s else 0 for s in none]
182
+ _ = data.pop("votingAuthority")
183
+ except ValueError:
184
+ pass
185
+
186
+ if "putCall" in data.columns:
187
+ data.loc[:, "putCall"] = data["putCall"].fillna("--")
188
+
189
+ # Add the period ending so that the filing is identified when multiple are requested.
190
+ data["period_ending"] = to_datetime(period_ending, yearfirst=False).date()
191
+ df = DataFrame(data)
192
+ # Aggregate the data because there are multiple entries for each security and we need the totals.
193
+ # We break it down by CUSIP, security type, and option type.
194
+ agg_index = [
195
+ "period_ending",
196
+ "nameOfIssuer",
197
+ "cusip",
198
+ "titleOfClass",
199
+ "security_type",
200
+ "putCall",
201
+ "investmentDiscretion",
202
+ ]
203
+ agg_columns = {
204
+ "value": "sum",
205
+ "principal_amount": "sum",
206
+ "voting_authority_sole": "sum",
207
+ "voting_authority_shared": "sum",
208
+ "voting_authority_none": "sum",
209
+ }
210
+ # Only aggregate columns that exist in the DataFrame
211
+ agg_columns = {k: v for k, v in agg_columns.items() if k in df.columns}
212
+ agg_index = [k for k in agg_index if k in df.columns]
213
+ df = df.groupby([*agg_index]).agg(agg_columns)
214
+
215
+ for col in [
216
+ "voting_authority_sole",
217
+ "voting_authority_shared",
218
+ "voting_authority_none",
219
+ ]:
220
+ if col in df.columns and all(df[col] == 0):
221
+ df.drop(columns=col, inplace=True)
222
+
223
+ total_value = df["value"].sum()
224
+ df["weight"] = round(df["value"] / total_value, 6)
225
+
226
+ return (
227
+ df.reset_index()
228
+ .replace({nan: None, "--": None})
229
+ .sort_values(by="weight", ascending=False)
230
+ .to_dict("records")
231
+ )
openbb_platform/providers/sec/openbb_sec/utils/py.typed ADDED
File without changes
openbb_platform/providers/sec/poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/pyproject.toml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "openbb-sec"
3
+ version = "1.4.3"
4
+ description = "SEC extension for OpenBB"
5
+ authors = ["OpenBB Team <hello@openbb.co>"]
6
+ license = "AGPL-3.0-only"
7
+ readme = "README.md"
8
+ packages = [{ include = "openbb_sec" }]
9
+
10
+ [tool.poetry.dependencies]
11
+ python = ">=3.9.21,<3.13"
12
+ openbb-core = "^1.4.6"
13
+ aiohttp-client-cache = "^0.11.0"
14
+ aiosqlite = "^0.20.0"
15
+ xmltodict = "^0.13.0"
16
+ beautifulsoup4 = "^4.12"
17
+ lxml = "^5.2.1"
18
+ trafilatura = "^2.0"
19
+ inscriptis = "^2.5.3"
20
+
21
+ [build-system]
22
+ requires = ["poetry-core"]
23
+ build-backend = "poetry.core.masonry.api"
24
+
25
+ [tool.poetry.plugins."openbb_provider_extension"]
26
+ sec = "openbb_sec:sec_provider"
openbb_platform/providers/sec/tests/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """SEC tests."""
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_cik_map_fetcher_urllib3_v1.yaml ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_cik_map_fetcher_urllib3_v2.yaml ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_company_filings_fetcher_urllib3_v1.yaml ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_company_filings_fetcher_urllib3_v2.yaml ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_compare_company_facts_fetcher_urllib3_v1.yaml ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_compare_company_facts_fetcher_urllib3_v2.yaml ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_ftd_fetcher_urllib3_v1.yaml ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_ftd_fetcher_urllib3_v2.yaml ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_search_fetcher_urllib3_v1.yaml ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_equity_search_fetcher_urllib3_v2.yaml ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_etf_holdings_fetcher_urllib3_v1.yaml ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_etf_holdings_fetcher_urllib3_v2.yaml ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_filing_fetcher_urllib3_v1.yaml ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ interactions:
2
+ - request:
3
+ body: null
4
+ headers:
5
+ Accept-Encoding:
6
+ - gzip, deflate
7
+ Host:
8
+ - www.sec.gov
9
+ method: GET
10
+ uri: https://www.sec.gov/Archives/edgar/data/21344/000155278124000634/0001552781-24-000634-index-headers.htm
11
+ response:
12
+ body:
13
+ string: !!binary |
14
+ H4sIAAAAAAAAA7WY33OiSBDHn+GvmNuHfRsFRaOGo2qE0XDLDw9wL9mrqxRJSMLGqIWmzr2//roB
15
+ BQXdvTW3D1mZ6enpT0/3dxT1KrAtTb3izNDUwAwsrvlcJ9wYM4/4b3ev8WoVL+ZEkiS502ld9GTa
16
+ Uig8dduK2swWiOovlIoqrKPoh3tanXXj+SFprJ5eZ2RAWlJLkVtyV1SZrvNJwBydU4MFPDBtrm1n
17
+ ZaXdbncyG983XYc6U3t4xL2oBjcTrvXoJ1GdTIeWqVPD1ac2dwKqu1Mn0HC7CfdM19ju0BZVM+C2
18
+ r3UaUmv7ud+QZFEdmZbpjNOYtCJafKTuiJZmqX7FnHHZCCYhRlHVXXvCnBu0YumjM3I9mxvUYQCp
19
+ uzojumvhH5g0PyGVJEFQCrAw4B07YOubOrjuSaLqemPmmF9YkCYCXUgKscP522N4v35L4vkTEHj+
20
+ NkedntRt9ZQu5M8P8rBNR3e9ieulPjSDY6y+zix6w5lHuWNocqsN8M2D0HPcz8yach+fgYMW6WZ6
21
+ oOEBIHhxRDKVWi35Ih22t8OQo05Hbvf7sMmh1+HUNx04aMoMw4P/NYzc4zyQNdfhpEjYxGJfGOYs
22
+ uNFYYDEnYDmkNoZPX8yJ1pbaeLqTK1ipKZJCuxdd2oLzgX1r9rGZab3jts0Df81tSTRLPSJSiiNZ
23
+ 5w1d4waeJh7XPs7Wl2i2rd6PT+vLuoKn8fwh2jSe1/sdtV2e7XJscW0z4tKafkQfBy2560iSnetA
24
+ EGp7clfzxJ8ObTNbgoUzELBysj4lW1KS9in4gliKlVnLEndEPA61C/O77sWGJWZqmJY0LDWiZZhA
25
+ O0Rk8UiMOInu14tkRRYJ0aNkHcZz4j4+xvdRsrokfAazKG5l00vClstFPF+/RvM1zhyuGxB98bqM
26
+ 5qsQzL8RliTh/ClC61WdeV2Qo3gezu/jcEb8dbjO14bzB8I3z/FdvF6JWDAGYT5i4znAot0x4XM+
27
+ lWlPeTJd6Q1EUch7GJezgSAKu4EitagisFjY0yJ4hLPwmAVBG/yafOI3aFNSJ0GAqncM5hloMoVu
28
+ McFat1C0RqaeUw75Z+6xMffJnyhgf8GysoRlm5Pin1CRM0EAPSsqTBAKTUtDgDRADvZEDc1A1yDF
29
+ qbARFDYCwobjqbaJQiY7JJOdAZpCMvKiTKsSfMMNCKKGldhvK4Tdr/NBTO4uoELicDvLLiItVE4U
30
+ tmpDcjlIjyITGCKDbZ3GwBGAyGDMW53JeXFojE8gNvg50xuh2CQVPJjZkzxRQD36HwNA4Wjui046
31
+ VpawdACzjA+Y5kyofp9y0Bock9MRzDBWBo5ELUW56Ny+LGjvBXUuc8GvM39qSJ6T6PHXD4dmHzRj
32
+ cf+Wtq9MKHmMZ9GAHBqpzVBT7xIti70S6NHI+TXt9xtyJfrWseijTb9P5e+HX9iV4m8dxl9Y/SzA
33
+ 2GOTK1OvALQrAC+L29niadH4unzKXHJf98wJttmhoypVaXGJp73jKc2fcRSyJDd8/aoCo9TA0O21
34
+ 0disHuqAroeeRcAbt1na6EfJ9jyV6JQS3Z7NmYQGH1UIO6cIbx+ix8bmdXaUEjyajpnK8I+Rbj2W
35
+ aDt1tFu7M4ktNqwQd08Sz8K7k8TgkVs/Cps7K8F2a2FzuzNh4UtfBfbiJOwyiU7CgkcfNmT/5YBz
36
+ nyXmi1rm3O5nma9tqwLbr8B6hWAe8JkGZwOSFXG+xxE471BM+zse7ywF5dc6r0LIUoVi9zXv1ouW
37
+ i2Td2MxWm/Ogal2WLzxpB1lr+p7HJldvbP958Xfj6+o8xtxJmaq4x/PJd+Wo3t1JlrH71ZkohZ8y
38
+ TXGrF/PvClS9/kbxDL5R+2+vr2Hy7Zh2/DjXZjVrVlyWEZVSGZbN8NfqDrVU1d9x5bE/CJAeejtD
39
+ h37zXaeauOodY0fr0IrnLysousX8vKzt+yozFtfLvs3P0sG38ypc9U6pfa2wuUtmjX/i5XmsJ12X
40
+ 0Ytb5uSSd+2P3smfGrdQpO/TInVey+y92p8nqWV9o/yIw12v1Bl/J43NwxdPahNfSanN9P2U2kxf
41
+ Fov/AnSFX/M0FgAA
42
+ headers:
43
+ Accept-Ranges:
44
+ - bytes
45
+ Connection:
46
+ - keep-alive
47
+ Content-Encoding:
48
+ - gzip
49
+ Content-Length:
50
+ - '1551'
51
+ Content-Type:
52
+ - text/html
53
+ Date:
54
+ - Mon, 17 Feb 2025 16:56:19 GMT
55
+ Last-Modified:
56
+ - Mon, 16 Dec 2024 19:34:14 GMT
57
+ Strict-Transport-Security:
58
+ - max-age=31536000 ; includeSubDomains ; preload
59
+ Vary:
60
+ - Accept-Encoding
61
+ X-Content-Type-Options:
62
+ - nosniff
63
+ X-Frame-Options:
64
+ - SAMEORIGIN
65
+ X-XSS-Protection:
66
+ - 1; mode=block
67
+ x-amz-id-2:
68
+ - 36KuPmr3UWIQ8x31DyNQbm1OVx9hB40N0dw451XQLYA4IrIsvW2q02kCDDjAMIZPKkuwZB1/wS0=
69
+ x-amz-meta-mode:
70
+ - '33188'
71
+ x-amz-replication-status:
72
+ - COMPLETED
73
+ x-amz-request-id:
74
+ - 5S9961E5DEA95VYH
75
+ x-amz-server-side-encryption:
76
+ - AES256
77
+ x-amz-version-id:
78
+ - Em.xpqQlD0bTs0G3LJTAM3AKc2Z7jmV.
79
+ status:
80
+ code: 200
81
+ message: OK
82
+ - request:
83
+ body: null
84
+ headers:
85
+ Accept-Encoding:
86
+ - gzip, deflate
87
+ Host:
88
+ - www.sec.gov
89
+ method: GET
90
+ uri: https://www.sec.gov/Archives/edgar/data/21344/000155278124000634/R1.htm
91
+ response:
92
+ body:
93
+ string: !!binary |
94
+ H4sIAAAAAAAAA+xaa3PaRhT9XH7FHbWN7YwBSQb8EpohQskwsYEa0ibtdDyLtIBioWVWC5hm8t97
95
+ Vw/b2EQmnXQGJfABYe25y96z5x7tLjaaHevdpd3umwWj/6Frm+8vLwpGz/7tnd22bPO0YLxuXdjt
96
+ xqVtXmmlsZgUjKbds65a3X6r0zZbTbtxBu9fXV1A2hH2Y7+X3SHYlxdKXLwIT/jUNMrxtWD4XnAD
97
+ nPp1JRRLn4ZjSoUCYjmldUXQW1F2wlCBMafDuuIFjj9zaZnTKeOiJFuwh9Dh3lQ8DPlI5iS+q0DI
98
+ nbrSG7NF6SOiyy+hyaDNBFzRCZtT6I+9ECw2mdBAwMuyUY7jzOxezcJP8Ws4CxzhsQAEG4182kZc
99
+ zxtgTiPYpwfwKcV5Q/y7FNw3l6JsS64XTn2yrNf3AhbQvQcRWei9gc+cm73zFPsZqB9S+ASZQdE3
100
+ nMPnu6i7ZAtGOZmdAXOXktIpCSAKrytJ/BnI+HPFnB+V9EqpgsEIMg1BBj4FxydhWFfiiVFgwLhL
101
+ eV1RFXCo7yPSwQHVFV0Bz8V5dCeV6oleramntcppVVflNAou38ZpV8LHWCZDg7qiKcDZIv1sGq43
102
+ T4e38FwxPgNdVae3ODojFJwFI9PCyeXGgKPSkjtGGaOk8Mar3zOO+zOb1CmBdnSIXemVFXA5Hhq/
103
+ zzIar5v+PcWhJqOJEy8OmBBscgbRmGBOfG+EIxdsit9F0jCSyvpeWGdz5rn76gEGMdS659wk2g3x
104
+ rXG1DwLlegh7Lh1i5LVLvesmc2ZSvH0U6t4hLLzAZQvAHsy0BWSTUSYyHXdl4FLWinlSfGvEcxlP
105
+ aSHBPUmbbVHaDczMldm99sloNe+7JpBtGYkPCVbNZqlv44x3KfeYawduk4gvTX2MAQSBRGWQcSf/
106
+ WP+5E4Qd4BNl+dpDE55NBpSvEhK3gmyGuD2DClXViqqua8f500ac5xUdeeh6JBBtMqFrmbiHgMRk
107
+ sGF1rAZYnQv5lldZWFgLnPitwKW3b+lyLSMJBiIQICpTIfjStaPKhoWyfRLpk9uWixl7Q88hcgWT
108
+ UTWIhVXw8zVUPSmqNf2kUqvmVTOtwGEclzNRwj2B7mmxGUpkaTF3fU2tRBxCFAOMQxIHMjDLge28
109
+ iqnhupyGYXK58AKqrSUoARymH0BCoRNksYKtBcCXxRxStJhPoOuTf0heVZVkbuHHDu+zRZBNlMRJ
110
+ CUlkBkkN4aOTb0jK1uonKpgO73I2x53e+hq7Y+auulJ4Bj1vGjmXS5eFgvh/etMves8dLzEUEPuc
111
+ 3xypR9pR/iQjS6LBKXlKRVQssum5zPcrauUgf5K4QAv0u2Pchq97WEetEDU//3iuHdeKuqZr+Zv+
112
+ P7gnBA3kkc0sSJYj4SoTCQRWMd9mI7hNeugxhHnCC0aX6IXcI/4qD/ftkAK+v91wl9P4+M6h0TEI
113
+ 7v0p7wyHj8sDcUXnARBiJETQ708bj2hpheGM8s3JifH/A0fbJJ340WlPKB9hjbzhbCHGSBqOf/0O
114
+ MUVCDIUE+22IGW8JMbOwOCJkeh2tr6QWLNlzZ9gTzLlp3Hph/YZdS2nJTRHeUkt6tUv478Sf0Uv6
115
+ 9LEUQyHCHsIvEg6Ihyggg7oXP2s19Tx/oupRZ4bPn6WmD/ryJ45VNqJbwIag6fuDA0jBWccvEX/R
116
+ FugLHObOm/qcuFhGveVkwB49spImiNsyaHm74VnUNkrDvnXGJBjRp4dzKQJSyHOHc+0PvQ1PDXLl
117
+ L8iDVjo5rraZoGFzRnVVr60zlwj0K0QwQJw8u67tTOVZU4l5i0zlEXc/qJlsmvk2imJnJxvYiVo6
118
+ rqrP2UkE2tnJV9tJzNvOTh7YiZVfVez8ZKPliaY/XJ4cr1+eIOiRnxzv/GSD5Ynk7amfbPiD/Hfo
119
+ Jzn8V4SdnXzV8mTVTk7XL0+e2snpzk42WJ6st5PTH9ZOTjf8QXQbVbHzk//iJ9o6Q0EgqCUspOqj
120
+ wtDgrxj/985edvby9fbyKr+q2GJ7+RcAAP//7J1Lc9s2EIDv+RWYXtJORb1IuY6a8YwfScbjSeLG
121
+ PbRHiIQszFAkS4CWlV/fXYCSSQqKoSSTSCJ8sCUKz+Xi8y6w4u4TXoJ+ZTfF75vNFShUN1f8vuOJ
122
+ BU9Qbhs8Adm1lCd+3/HkuHky6A6ru7O+0VpRhRo8GTieWOymDE27s75l8NcR8uQAw94cTnbAiV/3
123
+ fvyhCSf+5m6KP3Q4eRYnZrm1FSWWM99HhXAosfJ0/GoYiu+bPR2/GYbi+w4lFp6ObwpD8S2/lXCE
124
+ ODnA72M4nOyEk1Ft42QLTkYbGycOJzY4GRk3TlqME3fOc9w8GXRPap7OyLxxcrJhsY8cTyw2Tk5M
125
+ Bzv+AX6l/TvxxHLm+6gUDid2UW0182RL0P1gwzxxUbJWUW1G86S1UbK2M99HpXA4sfJ2Xo2exYkq
126
+ 5HDyFd7OK+OxTotx4ryd4+aJ39iMNQbd+4bNWBd0b3OuY5JbW1niAu6PGyX97mnV0wm2RLCdNj2d
127
+ wEWw2ZgmpyZPJ2htBFvgItiOnCeDbr/Gky0RbP0NnrgINpudk76RJ62NYLOd+T4qhcOJladTOycO
128
+ ArOns3FOHAQOJxaejklubUXJAT7C2qFkJ5TUnnwyMoac+JtPPhm5kBMblJjk1lKU2M58HxXix6Gk
129
+ pzLmwItKEptmjp0XZVqdRh6djEaRyqPTzKuD70upFHL2iU2vqKS/bGldpeDZlkpF9Z2fVSY14xFT
130
+ ci9bC9M4zcdkMeMSG7O+B9gO3AMU6T9r6a2VA3vUeX3KbjET0XoG5cTHZKiS/LyA0Vj3+5SZSd9/
131
+ dVM9csWmPOH4ZGA1GJXuJzu7SNOY0YRMQRRQmkoCFWQOC3sxYwlcYTrJVZgmEp+TSlF2gmQ5e+Bp
132
+ IeKlN+Uxi/C55DQMWSbhtSgmcy4E9NR93cvOdGqhb5/C7wTuM8vxka1iNYVt+gQz+5CSaD1lAn3y
133
+ GFXsOw/piklo+Pnx7KreWkdeKCUp8zmpZTiupHdaLcZVYihUUA/bYOMkXeQ0g4439L3BqFoL5Xi8
134
+ nN/P5JgEoHvGRmsjUp+RW5AjfzSMTo3A1OlTK7h2Vc4mU/XHSR7z8USr6TX849fJnb7U3gWNKSjJ
135
+ 1iYT+uX6ZQ6hbdWjQie8MEFupVirbFrrBb+G4A/kXPXB8S3HnHpIfghyQGMnVCaOA9NPB1NVQQ+T
136
+ S0maz8Fa+8yiO5nDEB2gdgGUMbFby0n1FiwptLreXL07/1QxpVRiToH4ghJzcurdjFW5SGWFmarX
137
+ OhtlZ+M6o6CrTEjCHtCG08VY9CeZ2nX2wRuc697A2EPfCBvXtWkckxQ+yTdq6wpgedRHh9Wx15wm
138
+ QhMwU3e/S/7G9nE9yVUNNQeQ17/w471/711dOYNyH7htXLeHCXBUMQftr4E2TqTlrEZiIeuQV9Eq
139
+ A+mEIeIyTNIVgTP8qyjCGaGCDPreTQd//9Uhweno4vbjXYcwGf6mybeurxqEjmM+5+hNy1SxUMCK
140
+ wJ0h8MyhMXWlyFZAvXtz2eRvh5RsX4BSkJcfkdEvHT73CZ/bmLPP1ITq4ydNw1KOnrvQc2v+yJaj
141
+ tJYgc0BW0DyXMumQi4LHak8eFRwzIeaMrdL4Op79dJ5t1elDg5vz6b8z4J6yvrYcb6i8K6+2nuLW
142
+ 8Wu/+PWkso5eLadXLQlxywGGIlhvF2Y65TK8+8wzdaziOLZvHKspr0NZy1HWyDTfcphVrTEh1UFJ
143
+ rnfsQDZuj2zvWNbQ3kOjmdozq0/BoWx3lF2yROY0vk4i9njDli1n2DkpEv5fwfBEIeL3XOIhgMcx
144
+ jXlUnhPIFAQJQuPTJcE/kjOhY/xm9EGd5UJREH0Yp6KAdUYWXM4UFKGpLrlWsYChSsccLwmdTDDo
145
+ j+KZBBXk8vrmZ5JyXZgMxmQmZTbu9RaLRRcth26a3/eG/b7fy9OY9TKYGkxfqTpUez3Jz4h3W0xi
146
+ LvDcGuaqL6n/CuuY1/NQlpcLjB4nw6Cv39+xUNF5MCzfFxNRXpp46lpbad1YoIeI6bA+BYfp3TH9
147
+ Zs7ye2jsXZ4u5OwynWc0aTusQZ94qIJZpiRn91xg+Iskc8akPstlpdDIvZIaUhfFRmAMErSDOtQ6
148
+ 1FZRa1xkhwZcF9r9jah9CxacXjMt5yssgFXsC1q1JFFCKUMKOYsjjFNMF4IUGVrFgz8IoCenIbBV
149
+ 6FKZUnIyp0v1VRfKgTqeT5RlLXREpWBgbqPy6dYbZU9rZdMMh0hjIorpZrtBpfsOoUlUxlbCQAWh
150
+ OXaVwedoaSuTHMS2zGbMfZ3mGwn8PwAAAP//7J3fT6NAEID/FR41uRIWWj1ffDE+NLkfxmvunhGo
151
+ NlehgWu0//3N7C64S8Faa67szbxaassO39dhdnb5SAO/sOeadjHPnTffnr27v3eneVKUq0J9Z1mc
152
+ uirWcOOw4Ymiy9lTMWrsphvL5cKbMtPZIaa5VuU1UYOHFdmFObKsu+Ho7tVL3kUDZul9XJonwiLc
153
+ X4S3zb0sRo26+fA+/hm85+XG/JIsvG7qhTMv62CwhlqtsmQBaV8K3vMgd5RpIWjzE45Q8oDl1xKS
154
+ zkUJR9xt5OKYSpdtrVItFwe4OHDZR6Vrcua5/4+y8yx+nqr5Hyw+wvfmWoHWNIyMZw+Np/VyMpt+
155
+ O4V78mVVeL/h+s/R03HuXT+ulsUGDuh527V8G3x0XM/LXehpuXrNTrW4z5XHUdvT2x+sbda2qe0e
156
+ XF3zt0yuNS3TlL29j7e/FEm8vHmA11jVYCk5HN4Kx6MuumILqkqpuTowBHW1r1jXbMXZ5uHWguDg
157
+ BAzCg8tap9iDVM6yHD75+3xO3mJv2GKt3tKjLhPAAQvcby1V678ruA6q+cbcgaOA/OtepaDN9hpN
158
+ X0EsN2gbJUZQZB8XZKYqs4DX12W1xmPh39+u4UoRUTYanySnkL5C3NSuIEYyRzxZjbKtbHWc0ExW
159
+ d9Hu5i8AdyEcLn5WvmPKH6cjue8vK79D+eN0S/nhHSvf5pxlT0b27Z3EiSu+2To91punKxlnOFdX
160
+ 6aEibtCOCi9Nf7bRcc2acgGbPgd5AmzO95jT3HKfuD3NFbiNUUAQqu8hbgyqWiBqsbJPWz5NxUjQ
161
+ VqrJlItalW1o9UlgLx2rdS+1FnA5L7Cl9GssF0otiYv1KJWHqomC96jD0FVtiEci5FpDZ60h3lK7
162
+ INoLsU20a1rnGsO7dW49lou4yevnkFVyMGSlIQdRQ3DVPtExbhFd4RoxbEHIvUzrhvsihqAx60p2
163
+ zWCYmP4xT4Atto/FfsGNCeRTV9b0C3GbHSUvfVKB2DkRNg4nRl6qby1xs5z/NDO1z9DOTaNWbgpj
164
+ QzMR7aTYNZNzLvoui7/pUcNXcmcs+afADyc3cfkTW/3V04YJ6d5C+XjMHh6zfw73AWDvonAXxQcx
165
+ 7D678Osm/M/nk5fHhIdnDO6Awe0PGFNLidoAnz/P1LpDbU/AmFpK1ApfhKa6z5naYVPbEzCmlhK1
166
+ QesiuGBqh01tT8CYWtLUCsbWMWwFc0uR23Fg3ChFAWM7dGw7A8bUUqJW+KFZ3oj4x3bg1PYEjKml
167
+ RG1kZ1xRyNQOm9qegDG1lKgN/MicAIwipnbY1PYEjKmlRe3Euk1iagdPbWfAmFpK1Ar/zEq4Jkzt
168
+ sKntCRhTS4taYambu6MGT21nwJhaStQG/sWEqXWI2p6AMbWUqI1axQ3uaTwKtX8BAAD//yNlDBlr
169
+ hI3m2pGUaw30LJAbXCajqywGea7FEWGjuXYk5VpDPQOURDC6ymKQ51ocETaaa0dSrjVGnUgwMRnN
170
+ tYM71+KIsNFcO7JyLcqGTdPR+dpBn2uxRthorqVproUpACVHEJ1RkpsDokNcI0JAtIu/c6ivqx+Q
171
+ DQAQ6ApYaiQBAA==
172
+ headers:
173
+ Accept-Ranges:
174
+ - bytes
175
+ Connection:
176
+ - keep-alive
177
+ Content-Encoding:
178
+ - gzip
179
+ Content-Length:
180
+ - '4399'
181
+ Content-Type:
182
+ - text/html
183
+ Date:
184
+ - Mon, 17 Feb 2025 16:56:19 GMT
185
+ Last-Modified:
186
+ - Mon, 16 Dec 2024 19:34:14 GMT
187
+ Strict-Transport-Security:
188
+ - max-age=31536000 ; includeSubDomains ; preload
189
+ Vary:
190
+ - Accept-Encoding
191
+ X-Content-Type-Options:
192
+ - nosniff
193
+ X-Frame-Options:
194
+ - SAMEORIGIN
195
+ X-XSS-Protection:
196
+ - 1; mode=block
197
+ x-amz-id-2:
198
+ - E26geXEJKqcpKRmxQRW43mwPREtnu0++BGRkV1iaIm+Z/8RtQCrvg0B8lqyUjeoAu3wjcZfwbe8=
199
+ x-amz-meta-mode:
200
+ - '33188'
201
+ x-amz-replication-status:
202
+ - COMPLETED
203
+ x-amz-request-id:
204
+ - 5S93Q8BMTVEG3CVQ
205
+ x-amz-server-side-encryption:
206
+ - AES256
207
+ x-amz-version-id:
208
+ - poa5Dfjus0LpfUlH7sTIt4TIBlhRg.Ab
209
+ status:
210
+ code: 200
211
+ message: OK
212
+ version: 1
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_filing_fetcher_urllib3_v2.yaml ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ interactions:
2
+ - request:
3
+ body: null
4
+ headers:
5
+ Accept-Encoding:
6
+ - gzip, deflate
7
+ Host:
8
+ - www.sec.gov
9
+ method: GET
10
+ uri: https://www.sec.gov/Archives/edgar/data/21344/000155278124000634/0001552781-24-000634-index-headers.htm
11
+ response:
12
+ body:
13
+ string: !!binary |
14
+ H4sIAAAAAAAAA7WY33OiSBDHn+GvmNuHfRsFRaOGo2qE0XDLDw9wL9mrqxRJSMLGqIWmzr2//roB
15
+ BQXdvTW3D1mZ6enpT0/3dxT1KrAtTb3izNDUwAwsrvlcJ9wYM4/4b3ev8WoVL+ZEkiS502ld9GTa
16
+ Uig8dduK2swWiOovlIoqrKPoh3tanXXj+SFprJ5eZ2RAWlJLkVtyV1SZrvNJwBydU4MFPDBtrm1n
17
+ ZaXdbncyG983XYc6U3t4xL2oBjcTrvXoJ1GdTIeWqVPD1ac2dwKqu1Mn0HC7CfdM19ju0BZVM+C2
18
+ r3UaUmv7ud+QZFEdmZbpjNOYtCJafKTuiJZmqX7FnHHZCCYhRlHVXXvCnBu0YumjM3I9mxvUYQCp
19
+ uzojumvhH5g0PyGVJEFQCrAw4B07YOubOrjuSaLqemPmmF9YkCYCXUgKscP522N4v35L4vkTEHj+
20
+ NkedntRt9ZQu5M8P8rBNR3e9ieulPjSDY6y+zix6w5lHuWNocqsN8M2D0HPcz8yach+fgYMW6WZ6
21
+ oOEBIHhxRDKVWi35Ih22t8OQo05Hbvf7sMmh1+HUNx04aMoMw4P/NYzc4zyQNdfhpEjYxGJfGOYs
22
+ uNFYYDEnYDmkNoZPX8yJ1pbaeLqTK1ipKZJCuxdd2oLzgX1r9rGZab3jts0Df81tSTRLPSJSiiNZ
23
+ 5w1d4waeJh7XPs7Wl2i2rd6PT+vLuoKn8fwh2jSe1/sdtV2e7XJscW0z4tKafkQfBy2560iSnetA
24
+ EGp7clfzxJ8ObTNbgoUzELBysj4lW1KS9in4gliKlVnLEndEPA61C/O77sWGJWZqmJY0LDWiZZhA
25
+ O0Rk8UiMOInu14tkRRYJ0aNkHcZz4j4+xvdRsrokfAazKG5l00vClstFPF+/RvM1zhyuGxB98bqM
26
+ 5qsQzL8RliTh/ClC61WdeV2Qo3gezu/jcEb8dbjO14bzB8I3z/FdvF6JWDAGYT5i4znAot0x4XM+
27
+ lWlPeTJd6Q1EUch7GJezgSAKu4EitagisFjY0yJ4hLPwmAVBG/yafOI3aFNSJ0GAqncM5hloMoVu
28
+ McFat1C0RqaeUw75Z+6xMffJnyhgf8GysoRlm5Pin1CRM0EAPSsqTBAKTUtDgDRADvZEDc1A1yDF
29
+ qbARFDYCwobjqbaJQiY7JJOdAZpCMvKiTKsSfMMNCKKGldhvK4Tdr/NBTO4uoELicDvLLiItVE4U
30
+ tmpDcjlIjyITGCKDbZ3GwBGAyGDMW53JeXFojE8gNvg50xuh2CQVPJjZkzxRQD36HwNA4Wjui046
31
+ VpawdACzjA+Y5kyofp9y0Bock9MRzDBWBo5ELUW56Ny+LGjvBXUuc8GvM39qSJ6T6PHXD4dmHzRj
32
+ cf+Wtq9MKHmMZ9GAHBqpzVBT7xIti70S6NHI+TXt9xtyJfrWseijTb9P5e+HX9iV4m8dxl9Y/SzA
33
+ 2GOTK1OvALQrAC+L29niadH4unzKXHJf98wJttmhoypVaXGJp73jKc2fcRSyJDd8/aoCo9TA0O21
34
+ 0disHuqAroeeRcAbt1na6EfJ9jyV6JQS3Z7NmYQGH1UIO6cIbx+ix8bmdXaUEjyajpnK8I+Rbj2W
35
+ aDt1tFu7M4ktNqwQd08Sz8K7k8TgkVs/Cps7K8F2a2FzuzNh4UtfBfbiJOwyiU7CgkcfNmT/5YBz
36
+ nyXmi1rm3O5nma9tqwLbr8B6hWAe8JkGZwOSFXG+xxE471BM+zse7ywF5dc6r0LIUoVi9zXv1ouW
37
+ i2Td2MxWm/Ogal2WLzxpB1lr+p7HJldvbP958Xfj6+o8xtxJmaq4x/PJd+Wo3t1JlrH71ZkohZ8y
38
+ TXGrF/PvClS9/kbxDL5R+2+vr2Hy7Zh2/DjXZjVrVlyWEZVSGZbN8NfqDrVU1d9x5bE/CJAeejtD
39
+ h37zXaeauOodY0fr0IrnLysousX8vKzt+yozFtfLvs3P0sG38ypc9U6pfa2wuUtmjX/i5XmsJ12X
40
+ 0Ytb5uSSd+2P3smfGrdQpO/TInVey+y92p8nqWV9o/yIw12v1Bl/J43NwxdPahNfSanN9P2U2kxf
41
+ Fov/AnSFX/M0FgAA
42
+ headers:
43
+ Accept-Ranges:
44
+ - bytes
45
+ Connection:
46
+ - keep-alive
47
+ Content-Encoding:
48
+ - gzip
49
+ Content-Length:
50
+ - '1551'
51
+ Content-Type:
52
+ - text/html
53
+ Date:
54
+ - Mon, 17 Feb 2025 16:55:12 GMT
55
+ Last-Modified:
56
+ - Mon, 16 Dec 2024 19:34:14 GMT
57
+ Strict-Transport-Security:
58
+ - max-age=31536000 ; includeSubDomains ; preload
59
+ Vary:
60
+ - Accept-Encoding
61
+ X-Content-Type-Options:
62
+ - nosniff
63
+ X-Frame-Options:
64
+ - SAMEORIGIN
65
+ X-XSS-Protection:
66
+ - 1; mode=block
67
+ x-amz-id-2:
68
+ - 36KuPmr3UWIQ8x31DyNQbm1OVx9hB40N0dw451XQLYA4IrIsvW2q02kCDDjAMIZPKkuwZB1/wS0=
69
+ x-amz-meta-mode:
70
+ - '33188'
71
+ x-amz-replication-status:
72
+ - COMPLETED
73
+ x-amz-request-id:
74
+ - 5S9961E5DEA95VYH
75
+ x-amz-server-side-encryption:
76
+ - AES256
77
+ x-amz-version-id:
78
+ - Em.xpqQlD0bTs0G3LJTAM3AKc2Z7jmV.
79
+ status:
80
+ code: 200
81
+ message: OK
82
+ - request:
83
+ body: null
84
+ headers:
85
+ Accept-Encoding:
86
+ - gzip, deflate
87
+ Host:
88
+ - www.sec.gov
89
+ method: GET
90
+ uri: https://www.sec.gov/Archives/edgar/data/21344/000155278124000634/R1.htm
91
+ response:
92
+ body:
93
+ string: !!binary |
94
+ H4sIAAAAAAAAA+xaa3PaRhT9XH7FHbWN7YwBSQb8EpohQskwsYEa0ibtdDyLtIBioWVWC5hm8t97
95
+ Vw/b2EQmnXQGJfABYe25y96z5x7tLjaaHevdpd3umwWj/6Frm+8vLwpGz/7tnd22bPO0YLxuXdjt
96
+ xqVtXmmlsZgUjKbds65a3X6r0zZbTbtxBu9fXV1A2hH2Y7+X3SHYlxdKXLwIT/jUNMrxtWD4XnAD
97
+ nPp1JRRLn4ZjSoUCYjmldUXQW1F2wlCBMafDuuIFjj9zaZnTKeOiJFuwh9Dh3lQ8DPlI5iS+q0DI
98
+ nbrSG7NF6SOiyy+hyaDNBFzRCZtT6I+9ECw2mdBAwMuyUY7jzOxezcJP8Ws4CxzhsQAEG4182kZc
99
+ zxtgTiPYpwfwKcV5Q/y7FNw3l6JsS64XTn2yrNf3AhbQvQcRWei9gc+cm73zFPsZqB9S+ASZQdE3
100
+ nMPnu6i7ZAtGOZmdAXOXktIpCSAKrytJ/BnI+HPFnB+V9EqpgsEIMg1BBj4FxydhWFfiiVFgwLhL
101
+ eV1RFXCo7yPSwQHVFV0Bz8V5dCeV6oleramntcppVVflNAou38ZpV8LHWCZDg7qiKcDZIv1sGq43
102
+ T4e38FwxPgNdVae3ODojFJwFI9PCyeXGgKPSkjtGGaOk8Mar3zOO+zOb1CmBdnSIXemVFXA5Hhq/
103
+ zzIar5v+PcWhJqOJEy8OmBBscgbRmGBOfG+EIxdsit9F0jCSyvpeWGdz5rn76gEGMdS659wk2g3x
104
+ rXG1DwLlegh7Lh1i5LVLvesmc2ZSvH0U6t4hLLzAZQvAHsy0BWSTUSYyHXdl4FLWinlSfGvEcxlP
105
+ aSHBPUmbbVHaDczMldm99sloNe+7JpBtGYkPCVbNZqlv44x3KfeYawduk4gvTX2MAQSBRGWQcSf/
106
+ WP+5E4Qd4BNl+dpDE55NBpSvEhK3gmyGuD2DClXViqqua8f500ac5xUdeeh6JBBtMqFrmbiHgMRk
107
+ sGF1rAZYnQv5lldZWFgLnPitwKW3b+lyLSMJBiIQICpTIfjStaPKhoWyfRLpk9uWixl7Q88hcgWT
108
+ UTWIhVXw8zVUPSmqNf2kUqvmVTOtwGEclzNRwj2B7mmxGUpkaTF3fU2tRBxCFAOMQxIHMjDLge28
109
+ iqnhupyGYXK58AKqrSUoARymH0BCoRNksYKtBcCXxRxStJhPoOuTf0heVZVkbuHHDu+zRZBNlMRJ
110
+ CUlkBkkN4aOTb0jK1uonKpgO73I2x53e+hq7Y+auulJ4Bj1vGjmXS5eFgvh/etMves8dLzEUEPuc
111
+ 3xypR9pR/iQjS6LBKXlKRVQssum5zPcrauUgf5K4QAv0u2Pchq97WEetEDU//3iuHdeKuqZr+Zv+
112
+ P7gnBA3kkc0sSJYj4SoTCQRWMd9mI7hNeugxhHnCC0aX6IXcI/4qD/ftkAK+v91wl9P4+M6h0TEI
113
+ 7v0p7wyHj8sDcUXnARBiJETQ708bj2hpheGM8s3JifH/A0fbJJ340WlPKB9hjbzhbCHGSBqOf/0O
114
+ MUVCDIUE+22IGW8JMbOwOCJkeh2tr6QWLNlzZ9gTzLlp3Hph/YZdS2nJTRHeUkt6tUv478Sf0Uv6
115
+ 9LEUQyHCHsIvEg6Ihyggg7oXP2s19Tx/oupRZ4bPn6WmD/ryJ45VNqJbwIag6fuDA0jBWccvEX/R
116
+ FugLHObOm/qcuFhGveVkwB49spImiNsyaHm74VnUNkrDvnXGJBjRp4dzKQJSyHOHc+0PvQ1PDXLl
117
+ L8iDVjo5rraZoGFzRnVVr60zlwj0K0QwQJw8u67tTOVZU4l5i0zlEXc/qJlsmvk2imJnJxvYiVo6
118
+ rqrP2UkE2tnJV9tJzNvOTh7YiZVfVez8ZKPliaY/XJ4cr1+eIOiRnxzv/GSD5Ynk7amfbPiD/Hfo
119
+ Jzn8V4SdnXzV8mTVTk7XL0+e2snpzk42WJ6st5PTH9ZOTjf8QXQbVbHzk//iJ9o6Q0EgqCUspOqj
120
+ wtDgrxj/985edvby9fbyKr+q2GJ7+RcAAP//7J1Lc9s2EIDv+RWYXtJORb1IuY6a8YwfScbjSeLG
121
+ PbRHiIQszFAkS4CWlV/fXYCSSQqKoSSTSCJ8sCUKz+Xi8y6w4u4TXoJ+ZTfF75vNFShUN1f8vuOJ
122
+ BU9Qbhs8Adm1lCd+3/HkuHky6A6ru7O+0VpRhRo8GTieWOymDE27s75l8NcR8uQAw94cTnbAiV/3
123
+ fvyhCSf+5m6KP3Q4eRYnZrm1FSWWM99HhXAosfJ0/GoYiu+bPR2/GYbi+w4lFp6ObwpD8S2/lXCE
124
+ ODnA72M4nOyEk1Ft42QLTkYbGycOJzY4GRk3TlqME3fOc9w8GXRPap7OyLxxcrJhsY8cTyw2Tk5M
125
+ Bzv+AX6l/TvxxHLm+6gUDid2UW0182RL0P1gwzxxUbJWUW1G86S1UbK2M99HpXA4sfJ2Xo2exYkq
126
+ 5HDyFd7OK+OxTotx4ryd4+aJ39iMNQbd+4bNWBd0b3OuY5JbW1niAu6PGyX97mnV0wm2RLCdNj2d
127
+ wEWw2ZgmpyZPJ2htBFvgItiOnCeDbr/Gky0RbP0NnrgINpudk76RJ62NYLOd+T4qhcOJladTOycO
128
+ ArOns3FOHAQOJxaejklubUXJAT7C2qFkJ5TUnnwyMoac+JtPPhm5kBMblJjk1lKU2M58HxXix6Gk
129
+ pzLmwItKEptmjp0XZVqdRh6djEaRyqPTzKuD70upFHL2iU2vqKS/bGldpeDZlkpF9Z2fVSY14xFT
130
+ ci9bC9M4zcdkMeMSG7O+B9gO3AMU6T9r6a2VA3vUeX3KbjET0XoG5cTHZKiS/LyA0Vj3+5SZSd9/
131
+ dVM9csWmPOH4ZGA1GJXuJzu7SNOY0YRMQRRQmkoCFWQOC3sxYwlcYTrJVZgmEp+TSlF2gmQ5e+Bp
132
+ IeKlN+Uxi/C55DQMWSbhtSgmcy4E9NR93cvOdGqhb5/C7wTuM8vxka1iNYVt+gQz+5CSaD1lAn3y
133
+ GFXsOw/piklo+Pnx7KreWkdeKCUp8zmpZTiupHdaLcZVYihUUA/bYOMkXeQ0g4439L3BqFoL5Xi8
134
+ nN/P5JgEoHvGRmsjUp+RW5AjfzSMTo3A1OlTK7h2Vc4mU/XHSR7z8USr6TX849fJnb7U3gWNKSjJ
135
+ 1iYT+uX6ZQ6hbdWjQie8MEFupVirbFrrBb+G4A/kXPXB8S3HnHpIfghyQGMnVCaOA9NPB1NVQQ+T
136
+ S0maz8Fa+8yiO5nDEB2gdgGUMbFby0n1FiwptLreXL07/1QxpVRiToH4ghJzcurdjFW5SGWFmarX
137
+ OhtlZ+M6o6CrTEjCHtCG08VY9CeZ2nX2wRuc697A2EPfCBvXtWkckxQ+yTdq6wpgedRHh9Wx15wm
138
+ QhMwU3e/S/7G9nE9yVUNNQeQ17/w471/711dOYNyH7htXLeHCXBUMQftr4E2TqTlrEZiIeuQV9Eq
139
+ A+mEIeIyTNIVgTP8qyjCGaGCDPreTQd//9Uhweno4vbjXYcwGf6mybeurxqEjmM+5+hNy1SxUMCK
140
+ wJ0h8MyhMXWlyFZAvXtz2eRvh5RsX4BSkJcfkdEvHT73CZ/bmLPP1ITq4ydNw1KOnrvQc2v+yJaj
141
+ tJYgc0BW0DyXMumQi4LHak8eFRwzIeaMrdL4Op79dJ5t1elDg5vz6b8z4J6yvrYcb6i8K6+2nuLW
142
+ 8Wu/+PWkso5eLadXLQlxywGGIlhvF2Y65TK8+8wzdaziOLZvHKspr0NZy1HWyDTfcphVrTEh1UFJ
143
+ rnfsQDZuj2zvWNbQ3kOjmdozq0/BoWx3lF2yROY0vk4i9njDli1n2DkpEv5fwfBEIeL3XOIhgMcx
144
+ jXlUnhPIFAQJQuPTJcE/kjOhY/xm9EGd5UJREH0Yp6KAdUYWXM4UFKGpLrlWsYChSsccLwmdTDDo
145
+ j+KZBBXk8vrmZ5JyXZgMxmQmZTbu9RaLRRcth26a3/eG/b7fy9OY9TKYGkxfqTpUez3Jz4h3W0xi
146
+ LvDcGuaqL6n/CuuY1/NQlpcLjB4nw6Cv39+xUNF5MCzfFxNRXpp46lpbad1YoIeI6bA+BYfp3TH9
147
+ Zs7ye2jsXZ4u5OwynWc0aTusQZ94qIJZpiRn91xg+Iskc8akPstlpdDIvZIaUhfFRmAMErSDOtQ6
148
+ 1FZRa1xkhwZcF9r9jah9CxacXjMt5yssgFXsC1q1JFFCKUMKOYsjjFNMF4IUGVrFgz8IoCenIbBV
149
+ 6FKZUnIyp0v1VRfKgTqeT5RlLXREpWBgbqPy6dYbZU9rZdMMh0hjIorpZrtBpfsOoUlUxlbCQAWh
150
+ OXaVwedoaSuTHMS2zGbMfZ3mGwn8PwAAAP//7J3fT6NAEID/FR41uRIWWj1ffDE+NLkfxmvunhGo
151
+ NlehgWu0//3N7C64S8Faa67szbxaassO39dhdnb5SAO/sOeadjHPnTffnr27v3eneVKUq0J9Z1mc
152
+ uirWcOOw4Ymiy9lTMWrsphvL5cKbMtPZIaa5VuU1UYOHFdmFObKsu+Ho7tVL3kUDZul9XJonwiLc
153
+ X4S3zb0sRo26+fA+/hm85+XG/JIsvG7qhTMv62CwhlqtsmQBaV8K3vMgd5RpIWjzE45Q8oDl1xKS
154
+ zkUJR9xt5OKYSpdtrVItFwe4OHDZR6Vrcua5/4+y8yx+nqr5Hyw+wvfmWoHWNIyMZw+Np/VyMpt+
155
+ O4V78mVVeL/h+s/R03HuXT+ulsUGDuh527V8G3x0XM/LXehpuXrNTrW4z5XHUdvT2x+sbda2qe0e
156
+ XF3zt0yuNS3TlL29j7e/FEm8vHmA11jVYCk5HN4Kx6MuumILqkqpuTowBHW1r1jXbMXZ5uHWguDg
157
+ BAzCg8tap9iDVM6yHD75+3xO3mJv2GKt3tKjLhPAAQvcby1V678ruA6q+cbcgaOA/OtepaDN9hpN
158
+ X0EsN2gbJUZQZB8XZKYqs4DX12W1xmPh39+u4UoRUTYanySnkL5C3NSuIEYyRzxZjbKtbHWc0ExW
159
+ d9Hu5i8AdyEcLn5WvmPKH6cjue8vK79D+eN0S/nhHSvf5pxlT0b27Z3EiSu+2To91punKxlnOFdX
160
+ 6aEibtCOCi9Nf7bRcc2acgGbPgd5AmzO95jT3HKfuD3NFbiNUUAQqu8hbgyqWiBqsbJPWz5NxUjQ
161
+ VqrJlItalW1o9UlgLx2rdS+1FnA5L7Cl9GssF0otiYv1KJWHqomC96jD0FVtiEci5FpDZ60h3lK7
162
+ INoLsU20a1rnGsO7dW49lou4yevnkFVyMGSlIQdRQ3DVPtExbhFd4RoxbEHIvUzrhvsihqAx60p2
163
+ zWCYmP4xT4Atto/FfsGNCeRTV9b0C3GbHSUvfVKB2DkRNg4nRl6qby1xs5z/NDO1z9DOTaNWbgpj
164
+ QzMR7aTYNZNzLvoui7/pUcNXcmcs+afADyc3cfkTW/3V04YJ6d5C+XjMHh6zfw73AWDvonAXxQcx
165
+ 7D678Osm/M/nk5fHhIdnDO6Awe0PGFNLidoAnz/P1LpDbU/AmFpK1ApfhKa6z5naYVPbEzCmlhK1
166
+ QesiuGBqh01tT8CYWtLUCsbWMWwFc0uR23Fg3ChFAWM7dGw7A8bUUqJW+KFZ3oj4x3bg1PYEjKml
167
+ RG1kZ1xRyNQOm9qegDG1lKgN/MicAIwipnbY1PYEjKmlRe3Euk1iagdPbWfAmFpK1Ar/zEq4Jkzt
168
+ sKntCRhTS4taYambu6MGT21nwJhaStQG/sWEqXWI2p6AMbWUqI1axQ3uaTwKtX8BAAD//yNlDBlr
169
+ hI3m2pGUaw30LJAbXCajqywGea7FEWGjuXYk5VpDPQOURDC6ymKQ51ocETaaa0dSrjVGnUgwMRnN
170
+ tYM71+KIsNFcO7JyLcqGTdPR+dpBn2uxRthorqVproUpACVHEJ1RkpsDokNcI0JAtIu/c6ivqx+Q
171
+ DQAQ6ApYaiQBAA==
172
+ headers:
173
+ Accept-Ranges:
174
+ - bytes
175
+ Connection:
176
+ - keep-alive
177
+ Content-Encoding:
178
+ - gzip
179
+ Content-Length:
180
+ - '4399'
181
+ Content-Type:
182
+ - text/html
183
+ Date:
184
+ - Mon, 17 Feb 2025 16:55:13 GMT
185
+ Last-Modified:
186
+ - Mon, 16 Dec 2024 19:34:14 GMT
187
+ Strict-Transport-Security:
188
+ - max-age=31536000 ; includeSubDomains ; preload
189
+ Vary:
190
+ - Accept-Encoding
191
+ X-Content-Type-Options:
192
+ - nosniff
193
+ X-Frame-Options:
194
+ - SAMEORIGIN
195
+ X-XSS-Protection:
196
+ - 1; mode=block
197
+ x-amz-id-2:
198
+ - E26geXEJKqcpKRmxQRW43mwPREtnu0++BGRkV1iaIm+Z/8RtQCrvg0B8lqyUjeoAu3wjcZfwbe8=
199
+ x-amz-meta-mode:
200
+ - '33188'
201
+ x-amz-replication-status:
202
+ - COMPLETED
203
+ x-amz-request-id:
204
+ - 5S93Q8BMTVEG3CVQ
205
+ x-amz-server-side-encryption:
206
+ - AES256
207
+ x-amz-version-id:
208
+ - poa5Dfjus0LpfUlH7sTIt4TIBlhRg.Ab
209
+ status:
210
+ code: 200
211
+ message: OK
212
+ version: 1
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_form_13FHR_fetcher_urllib3_v1.yaml ADDED
The diff for this file is too large to render. See raw diff
 
openbb_platform/providers/sec/tests/record/http/test_sec_fetchers/test_sec_form_13FHR_fetcher_urllib3_v2.yaml ADDED
The diff for this file is too large to render. See raw diff