Spaces:
Running
Running
Fix: applymap deprecation and CSV transaction ledger parsing
Browse files
features/portfolio_analyzer.py
CHANGED
|
@@ -33,10 +33,10 @@ COLUMN_ALIASES = {
|
|
| 33 |
"ticker": ["ticker", "symbol", "stock", "instrument", "security"],
|
| 34 |
"shares": ["shares", "quantity", "qty", "units", "amount", "open_quantity", "net_quantity", "quantity_available"],
|
| 35 |
"avg_cost": ["avg_cost", "average_cost", "cost_basis", "avg_price",
|
| 36 |
-
"average_price", "purchase_price", "cost_per_share", "buy_average"],
|
| 37 |
-
"current_price": ["current_price", "market_price", "
|
| 38 |
"current_value_per_share", "mark"],
|
| 39 |
-
"description": ["description", "action", "activity", "type", "transaction", "details"]
|
| 40 |
}
|
| 41 |
|
| 42 |
|
|
@@ -73,8 +73,18 @@ def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame | None:
|
|
| 73 |
# Flag to check if this is an activity log (has tickers/instruments but no shares)
|
| 74 |
is_activity_log = "shares" not in df.columns
|
| 75 |
|
| 76 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
available = [c for c in ["ticker", "shares", "avg_cost", "current_price", "description"] if c in df.columns]
|
|
|
|
|
|
|
|
|
|
| 78 |
if len(available) < 2:
|
| 79 |
return None
|
| 80 |
|
|
@@ -88,13 +98,42 @@ def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame | None:
|
|
| 88 |
df = df.drop_duplicates(subset=["ticker"]).copy()
|
| 89 |
|
| 90 |
# Ensure numeric columns are forced to float to prevent missing data errors
|
|
|
|
| 91 |
for col in ["shares", "avg_cost", "current_price"]:
|
| 92 |
if col in df.columns:
|
|
|
|
|
|
|
| 93 |
df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
-
# Final filter: remove rows with 0 shares (closed positions)
|
| 96 |
if "shares" in df.columns:
|
| 97 |
-
df = df[df["shares"] > 0]
|
| 98 |
|
| 99 |
# Cleanup empty tickers which might be generated from summary rows
|
| 100 |
df = df[df["ticker"].notna()]
|
|
@@ -449,7 +488,7 @@ def render_portfolio_analyzer():
|
|
| 449 |
display_cols = [c for c in ["ticker", "shares", "avg_cost", "current_price",
|
| 450 |
"market_value", "unrealized_pnl", "pnl_pct",
|
| 451 |
"weight_pct", "sector"] if c in holdings.columns]
|
| 452 |
-
styled = holdings[display_cols].style.
|
| 453 |
_color_pnl, subset=[c for c in ["unrealized_pnl", "pnl_pct"] if c in display_cols]
|
| 454 |
).format({
|
| 455 |
c: "${:,.2f}" for c in ["avg_cost", "current_price", "market_value",
|
|
|
|
| 33 |
"ticker": ["ticker", "symbol", "stock", "instrument", "security"],
|
| 34 |
"shares": ["shares", "quantity", "qty", "units", "amount", "open_quantity", "net_quantity", "quantity_available"],
|
| 35 |
"avg_cost": ["avg_cost", "average_cost", "cost_basis", "avg_price",
|
| 36 |
+
"average_price", "purchase_price", "cost_per_share", "buy_average", "price"],
|
| 37 |
+
"current_price": ["current_price", "market_price", "last_price",
|
| 38 |
"current_value_per_share", "mark"],
|
| 39 |
+
"description": ["description", "action", "activity", "type", "transaction", "details", "trans_code"]
|
| 40 |
}
|
| 41 |
|
| 42 |
|
|
|
|
| 73 |
# Flag to check if this is an activity log (has tickers/instruments but no shares)
|
| 74 |
is_activity_log = "shares" not in df.columns
|
| 75 |
|
| 76 |
+
# Map original 'trans_code', 'action' or 'type' column for ledger detection
|
| 77 |
+
trans_col = None
|
| 78 |
+
for c in ["trans_code", "action", "type", "transaction"]:
|
| 79 |
+
if c in df.columns:
|
| 80 |
+
trans_col = c
|
| 81 |
+
break
|
| 82 |
+
|
| 83 |
+
# Keep only mapped + extra columns (including the transaction column if found)
|
| 84 |
available = [c for c in ["ticker", "shares", "avg_cost", "current_price", "description"] if c in df.columns]
|
| 85 |
+
if trans_col and trans_col not in available:
|
| 86 |
+
available.append(trans_col)
|
| 87 |
+
|
| 88 |
if len(available) < 2:
|
| 89 |
return None
|
| 90 |
|
|
|
|
| 98 |
df = df.drop_duplicates(subset=["ticker"]).copy()
|
| 99 |
|
| 100 |
# Ensure numeric columns are forced to float to prevent missing data errors
|
| 101 |
+
# Also clean up currency symbols and parentheses for negative numbers (e.g. ($100) -> -100)
|
| 102 |
for col in ["shares", "avg_cost", "current_price"]:
|
| 103 |
if col in df.columns:
|
| 104 |
+
df[col] = df[col].astype(str).str.replace(r'[\$,]', '', regex=True)
|
| 105 |
+
df[col] = df[col].str.replace(r'^\((.*)\)$', r'-\1', regex=True)
|
| 106 |
df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
|
| 107 |
+
|
| 108 |
+
# If this is a transaction ledger (Robinhood), aggregate buys and sells
|
| 109 |
+
if trans_col and "shares" in df.columns:
|
| 110 |
+
df[trans_col] = df[trans_col].astype(str).str.lower()
|
| 111 |
+
# Make Sell quantities negative
|
| 112 |
+
df.loc[df[trans_col].str.contains("sell"), "shares"] = -df["shares"]
|
| 113 |
+
|
| 114 |
+
if "avg_cost" in df.columns:
|
| 115 |
+
# We only track cost basis for positive buys to get the average buy cost
|
| 116 |
+
df["total_cost"] = 0.0
|
| 117 |
+
buy_mask = df[trans_col].str.contains("buy")
|
| 118 |
+
df.loc[buy_mask, "total_cost"] = df.loc[buy_mask, "shares"] * df.loc[buy_mask, "avg_cost"]
|
| 119 |
+
else:
|
| 120 |
+
df["total_cost"] = 0.0
|
| 121 |
+
|
| 122 |
+
# Group by ticker to get net holdings
|
| 123 |
+
grouped = df.groupby("ticker").agg({
|
| 124 |
+
"shares": "sum",
|
| 125 |
+
"total_cost": "sum"
|
| 126 |
+
}).reset_index()
|
| 127 |
+
|
| 128 |
+
grouped["avg_cost"] = 0.0
|
| 129 |
+
# Only compute avg cost if we bought some shares
|
| 130 |
+
grouped.loc[grouped["total_cost"] > 0, "avg_cost"] = grouped["total_cost"] / grouped["shares"]
|
| 131 |
+
grouped.loc[grouped["shares"] <= 0, "avg_cost"] = 0
|
| 132 |
+
df = grouped.drop(columns=["total_cost"])
|
| 133 |
|
| 134 |
+
# Final filter: remove rows with 0 or negative shares (closed/short positions not supported yet)
|
| 135 |
if "shares" in df.columns:
|
| 136 |
+
df = df[df["shares"] > 0.0001]
|
| 137 |
|
| 138 |
# Cleanup empty tickers which might be generated from summary rows
|
| 139 |
df = df[df["ticker"].notna()]
|
|
|
|
| 488 |
display_cols = [c for c in ["ticker", "shares", "avg_cost", "current_price",
|
| 489 |
"market_value", "unrealized_pnl", "pnl_pct",
|
| 490 |
"weight_pct", "sector"] if c in holdings.columns]
|
| 491 |
+
styled = holdings[display_cols].style.map(
|
| 492 |
_color_pnl, subset=[c for c in ["unrealized_pnl", "pnl_pct"] if c in display_cols]
|
| 493 |
).format({
|
| 494 |
c: "${:,.2f}" for c in ["avg_cost", "current_price", "market_value",
|