Asish22 commited on
Commit
83c4a31
·
verified ·
1 Parent(s): 714bc30

Fix: applymap deprecation and CSV transaction ledger parsing

Browse files
Files changed (1) hide show
  1. features/portfolio_analyzer.py +46 -7
features/portfolio_analyzer.py CHANGED
@@ -33,10 +33,10 @@ COLUMN_ALIASES = {
33
  "ticker": ["ticker", "symbol", "stock", "instrument", "security"],
34
  "shares": ["shares", "quantity", "qty", "units", "amount", "open_quantity", "net_quantity", "quantity_available"],
35
  "avg_cost": ["avg_cost", "average_cost", "cost_basis", "avg_price",
36
- "average_price", "purchase_price", "cost_per_share", "buy_average"],
37
- "current_price": ["current_price", "market_price", "price", "last_price",
38
  "current_value_per_share", "mark"],
39
- "description": ["description", "action", "activity", "type", "transaction", "details"]
40
  }
41
 
42
 
@@ -73,8 +73,18 @@ def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame | None:
73
  # Flag to check if this is an activity log (has tickers/instruments but no shares)
74
  is_activity_log = "shares" not in df.columns
75
 
76
- # Keep only mapped + extra columns
 
 
 
 
 
 
 
77
  available = [c for c in ["ticker", "shares", "avg_cost", "current_price", "description"] if c in df.columns]
 
 
 
78
  if len(available) < 2:
79
  return None
80
 
@@ -88,13 +98,42 @@ def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame | None:
88
  df = df.drop_duplicates(subset=["ticker"]).copy()
89
 
90
  # Ensure numeric columns are forced to float to prevent missing data errors
 
91
  for col in ["shares", "avg_cost", "current_price"]:
92
  if col in df.columns:
 
 
93
  df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- # Final filter: remove rows with 0 shares (closed positions)
96
  if "shares" in df.columns:
97
- df = df[df["shares"] > 0]
98
 
99
  # Cleanup empty tickers which might be generated from summary rows
100
  df = df[df["ticker"].notna()]
@@ -449,7 +488,7 @@ def render_portfolio_analyzer():
449
  display_cols = [c for c in ["ticker", "shares", "avg_cost", "current_price",
450
  "market_value", "unrealized_pnl", "pnl_pct",
451
  "weight_pct", "sector"] if c in holdings.columns]
452
- styled = holdings[display_cols].style.applymap(
453
  _color_pnl, subset=[c for c in ["unrealized_pnl", "pnl_pct"] if c in display_cols]
454
  ).format({
455
  c: "${:,.2f}" for c in ["avg_cost", "current_price", "market_value",
 
33
  "ticker": ["ticker", "symbol", "stock", "instrument", "security"],
34
  "shares": ["shares", "quantity", "qty", "units", "amount", "open_quantity", "net_quantity", "quantity_available"],
35
  "avg_cost": ["avg_cost", "average_cost", "cost_basis", "avg_price",
36
+ "average_price", "purchase_price", "cost_per_share", "buy_average", "price"],
37
+ "current_price": ["current_price", "market_price", "last_price",
38
  "current_value_per_share", "mark"],
39
+ "description": ["description", "action", "activity", "type", "transaction", "details", "trans_code"]
40
  }
41
 
42
 
 
73
  # Flag to check if this is an activity log (has tickers/instruments but no shares)
74
  is_activity_log = "shares" not in df.columns
75
 
76
+ # Map original 'trans_code', 'action' or 'type' column for ledger detection
77
+ trans_col = None
78
+ for c in ["trans_code", "action", "type", "transaction"]:
79
+ if c in df.columns:
80
+ trans_col = c
81
+ break
82
+
83
+ # Keep only mapped + extra columns (including the transaction column if found)
84
  available = [c for c in ["ticker", "shares", "avg_cost", "current_price", "description"] if c in df.columns]
85
+ if trans_col and trans_col not in available:
86
+ available.append(trans_col)
87
+
88
  if len(available) < 2:
89
  return None
90
 
 
98
  df = df.drop_duplicates(subset=["ticker"]).copy()
99
 
100
  # Ensure numeric columns are forced to float to prevent missing data errors
101
+ # Also clean up currency symbols and parentheses for negative numbers (e.g. ($100) -> -100)
102
  for col in ["shares", "avg_cost", "current_price"]:
103
  if col in df.columns:
104
+ df[col] = df[col].astype(str).str.replace(r'[\$,]', '', regex=True)
105
+ df[col] = df[col].str.replace(r'^\((.*)\)$', r'-\1', regex=True)
106
  df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
107
+
108
+ # If this is a transaction ledger (Robinhood), aggregate buys and sells
109
+ if trans_col and "shares" in df.columns:
110
+ df[trans_col] = df[trans_col].astype(str).str.lower()
111
+ # Make Sell quantities negative
112
+ df.loc[df[trans_col].str.contains("sell"), "shares"] = -df["shares"]
113
+
114
+ if "avg_cost" in df.columns:
115
+ # We only track cost basis for positive buys to get the average buy cost
116
+ df["total_cost"] = 0.0
117
+ buy_mask = df[trans_col].str.contains("buy")
118
+ df.loc[buy_mask, "total_cost"] = df.loc[buy_mask, "shares"] * df.loc[buy_mask, "avg_cost"]
119
+ else:
120
+ df["total_cost"] = 0.0
121
+
122
+ # Group by ticker to get net holdings
123
+ grouped = df.groupby("ticker").agg({
124
+ "shares": "sum",
125
+ "total_cost": "sum"
126
+ }).reset_index()
127
+
128
+ grouped["avg_cost"] = 0.0
129
+ # Only compute avg cost if we bought some shares
130
+ grouped.loc[grouped["total_cost"] > 0, "avg_cost"] = grouped["total_cost"] / grouped["shares"]
131
+ grouped.loc[grouped["shares"] <= 0, "avg_cost"] = 0
132
+ df = grouped.drop(columns=["total_cost"])
133
 
134
+ # Final filter: remove rows with 0 or negative shares (closed/short positions not supported yet)
135
  if "shares" in df.columns:
136
+ df = df[df["shares"] > 0.0001]
137
 
138
  # Cleanup empty tickers which might be generated from summary rows
139
  df = df[df["ticker"].notna()]
 
488
  display_cols = [c for c in ["ticker", "shares", "avg_cost", "current_price",
489
  "market_value", "unrealized_pnl", "pnl_pct",
490
  "weight_pct", "sector"] if c in holdings.columns]
491
+ styled = holdings[display_cols].style.map(
492
  _color_pnl, subset=[c for c in ["unrealized_pnl", "pnl_pct"] if c in display_cols]
493
  ).format({
494
  c: "${:,.2f}" for c in ["avg_cost", "current_price", "market_value",