policyengine-us-data-pipeline / test /target_config.yaml
anth-volk's picture
Upload test/target_config.yaml with huggingface_hub
743c45d verified
include:
# === DISTRICT β€” age demographics ===
- variable: person_count
geo_level: district
domain_variable: age
# === DISTRICT β€” count targets ===
# REMOVED: person_count by AGI β€” filer-gated, all AGI bins 100% underestimated
- variable: household_count
geo_level: district
# === DISTRICT β€” dollar targets (all <8% mean error, restored) ===
- variable: real_estate_taxes
geo_level: district
- variable: self_employment_income
geo_level: district
- variable: taxable_pension_income
geo_level: district
- variable: refundable_ctc
geo_level: district
- variable: unemployment_compensation
geo_level: district
# === DISTRICT β€” ACA PTC (2% mean error, restored) ===
- variable: aca_ptc
geo_level: district
- variable: tax_unit_count
geo_level: district
domain_variable: aca_ptc
# === STATE ===
- variable: person_count
geo_level: state
domain_variable: medicaid_enrolled
# REMOVED: is_pregnant β€” 100% unachievable across all 51 state geos
- variable: snap
geo_level: state
# === NATIONAL β€” aggregate dollar targets ===
# REMOVED: adjusted_gross_income β€” filer-gated
- variable: child_support_expense
geo_level: national
- variable: child_support_received
geo_level: national
# REMOVED: eitc β€” filer-gated
- variable: health_insurance_premiums_without_medicare_part_b
geo_level: national
- variable: medicaid
geo_level: national
- variable: medicare_part_b_premiums
geo_level: national
- variable: other_medical_expenses
geo_level: national
- variable: over_the_counter_health_expenses
geo_level: national
# REMOVED: qualified_business_income_deduction β€” filer-gated
- variable: rent
geo_level: national
# REMOVED: salt_deduction β€” 11.3x overestimate, worst variable in model
- variable: snap
geo_level: national
- variable: social_security
geo_level: national
- variable: social_security_disability
geo_level: national
- variable: social_security_retirement
geo_level: national
- variable: spm_unit_capped_housing_subsidy
geo_level: national
- variable: spm_unit_capped_work_childcare_expenses
geo_level: national
- variable: ssi
geo_level: national
- variable: tanf
geo_level: national
# REMOVED: tip_income β€” filer-gated
- variable: unemployment_compensation
geo_level: national
# === NATIONAL β€” IRS SOI domain-constrained dollar targets (restored: |rel_err| < 15%) ===
- variable: aca_ptc
geo_level: national
domain_variable: aca_ptc
- variable: net_capital_gains
geo_level: national
domain_variable: net_capital_gains
- variable: refundable_ctc
geo_level: national
domain_variable: refundable_ctc
- variable: self_employment_income
geo_level: national
domain_variable: self_employment_income
- variable: tax_unit_partnership_s_corp_income
geo_level: national
domain_variable: tax_unit_partnership_s_corp_income
- variable: taxable_pension_income
geo_level: national
domain_variable: taxable_pension_income
- variable: unemployment_compensation
geo_level: national
domain_variable: unemployment_compensation
# REMOVED (|rel_err| > 15% or tension with counts):
# adjusted_gross_income (28%), dividend_income (26%, tension), eitc (23%),
# eitc by child_count (14-77%, tension), income_tax_before_credits (21%),
# income_tax_positive (22%), qualified_business_income_deduction (55-63%),
# qualified_dividend_income (29%, tension), rental_income (20%),
# salt (102%), salt_deduction (1130%), tax_exempt_interest_income (61%),
# taxable_interest_income (61%), taxable_ira_distributions (68%),
# taxable_social_security (55%)
# === NATIONAL β€” IRS SOI filer count targets (restored: |rel_err| < 10%) ===
- variable: tax_unit_count
geo_level: national
domain_variable: aca_ptc
- variable: tax_unit_count
geo_level: national
domain_variable: refundable_ctc
# REMOVED (|rel_err| > 10%): all other filer count targets (22-706% error)