Spaces:

Asish22
/

Sentinel_V2

Running

App Files Files Community

Asish22 commited on 11 days ago

Commit

83c4a31

verified ·

1 Parent(s): 714bc30

Fix: applymap deprecation and CSV transaction ledger parsing

Browse files

Files changed (1) hide show

features/portfolio_analyzer.py +46 -7

features/portfolio_analyzer.py CHANGED Viewed

@@ -33,10 +33,10 @@ COLUMN_ALIASES = {
     "ticker": ["ticker", "symbol", "stock", "instrument", "security"],
     "shares": ["shares", "quantity", "qty", "units", "amount", "open_quantity", "net_quantity", "quantity_available"],
     "avg_cost": ["avg_cost", "average_cost", "cost_basis", "avg_price",
-                 "average_price", "purchase_price", "cost_per_share", "buy_average"],
-    "current_price": ["current_price", "market_price", "price", "last_price",
                       "current_value_per_share", "mark"],
-    "description": ["description", "action", "activity", "type", "transaction", "details"]
 }
@@ -73,8 +73,18 @@ def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame | None:
     # Flag to check if this is an activity log (has tickers/instruments but no shares)
     is_activity_log = "shares" not in df.columns
-    # Keep only mapped + extra columns
     available = [c for c in ["ticker", "shares", "avg_cost", "current_price", "description"] if c in df.columns]
     if len(available) < 2:
         return None
@@ -88,13 +98,42 @@ def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame | None:
         df = df.drop_duplicates(subset=["ticker"]).copy()
     # Ensure numeric columns are forced to float to prevent missing data errors
     for col in ["shares", "avg_cost", "current_price"]:
         if col in df.columns:
             df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
-    # Final filter: remove rows with 0 shares (closed positions)
     if "shares" in df.columns:
-        df = df[df["shares"] > 0]
     # Cleanup empty tickers which might be generated from summary rows
     df = df[df["ticker"].notna()]
@@ -449,7 +488,7 @@ def render_portfolio_analyzer():
         display_cols = [c for c in ["ticker", "shares", "avg_cost", "current_price",
                                      "market_value", "unrealized_pnl", "pnl_pct",
                                      "weight_pct", "sector"] if c in holdings.columns]
-        styled = holdings[display_cols].style.applymap(
             _color_pnl, subset=[c for c in ["unrealized_pnl", "pnl_pct"] if c in display_cols]
         ).format({
             c: "${:,.2f}" for c in ["avg_cost", "current_price", "market_value",

     "ticker": ["ticker", "symbol", "stock", "instrument", "security"],
     "shares": ["shares", "quantity", "qty", "units", "amount", "open_quantity", "net_quantity", "quantity_available"],
     "avg_cost": ["avg_cost", "average_cost", "cost_basis", "avg_price",
+                 "average_price", "purchase_price", "cost_per_share", "buy_average", "price"],
+    "current_price": ["current_price", "market_price", "last_price",
                       "current_value_per_share", "mark"],
+    "description": ["description", "action", "activity", "type", "transaction", "details", "trans_code"]
 }
     # Flag to check if this is an activity log (has tickers/instruments but no shares)
     is_activity_log = "shares" not in df.columns
+    # Map original 'trans_code', 'action' or 'type' column for ledger detection
+    trans_col = None
+    for c in ["trans_code", "action", "type", "transaction"]:
+        if c in df.columns:
+            trans_col = c
+            break
+    # Keep only mapped + extra columns (including the transaction column if found)
     available = [c for c in ["ticker", "shares", "avg_cost", "current_price", "description"] if c in df.columns]
+    if trans_col and trans_col not in available:
+        available.append(trans_col)
     if len(available) < 2:
         return None
         df = df.drop_duplicates(subset=["ticker"]).copy()
     # Ensure numeric columns are forced to float to prevent missing data errors
+    # Also clean up currency symbols and parentheses for negative numbers (e.g. ($100) -> -100)
     for col in ["shares", "avg_cost", "current_price"]:
         if col in df.columns:
+            df[col] = df[col].astype(str).str.replace(r'[\$,]', '', regex=True)
+            df[col] = df[col].str.replace(r'^\((.*)\)$', r'-\1', regex=True)
             df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
+    # If this is a transaction ledger (Robinhood), aggregate buys and sells
+    if trans_col and "shares" in df.columns:
+        df[trans_col] = df[trans_col].astype(str).str.lower()
+        # Make Sell quantities negative
+        df.loc[df[trans_col].str.contains("sell"), "shares"] = -df["shares"]
+        if "avg_cost" in df.columns:
+            # We only track cost basis for positive buys to get the average buy cost
+            df["total_cost"] = 0.0
+            buy_mask = df[trans_col].str.contains("buy")
+            df.loc[buy_mask, "total_cost"] = df.loc[buy_mask, "shares"] * df.loc[buy_mask, "avg_cost"]
+        else:
+            df["total_cost"] = 0.0
+        # Group by ticker to get net holdings
+        grouped = df.groupby("ticker").agg({
+            "shares": "sum",
+            "total_cost": "sum"
+        }).reset_index()
+        grouped["avg_cost"] = 0.0
+        # Only compute avg cost if we bought some shares
+        grouped.loc[grouped["total_cost"] > 0, "avg_cost"] = grouped["total_cost"] / grouped["shares"]
+        grouped.loc[grouped["shares"] <= 0, "avg_cost"] = 0
+        df = grouped.drop(columns=["total_cost"])
+    # Final filter: remove rows with 0 or negative shares (closed/short positions not supported yet)
     if "shares" in df.columns:
+        df = df[df["shares"] > 0.0001]
     # Cleanup empty tickers which might be generated from summary rows
     df = df[df["ticker"].notna()]
         display_cols = [c for c in ["ticker", "shares", "avg_cost", "current_price",
                                      "market_value", "unrealized_pnl", "pnl_pct",
                                      "weight_pct", "sector"] if c in holdings.columns]
+        styled = holdings[display_cols].style.map(
             _color_pnl, subset=[c for c in ["unrealized_pnl", "pnl_pct"] if c in display_cols]
         ).format({
             c: "${:,.2f}" for c in ["avg_cost", "current_price", "market_value",