gaurv007
/

alpha-factory

@@ -107,11 +107,19 @@ def cleanup_orphans():
         if not expression:
             continue
         # Extract all word-like tokens that could be field names
-        tokens = re.findall(r"\b([a-z][a-z0-9_]{8,})\b", expression.lower())
         # Filter out operators and known keywords
-        skip = {"subindustry", "industry", "sector", "market", "close", "high", "low", "open", "volume", "vwap"}
         for t in tokens:
-            if t.startswith("ts_") or t.startswith("group_") or t.startswith("vec_") or t.startswith("pv13_") or t.startswith("mdl") or t.startswith("snt") or t.startswith("scl"):
                 continue
             if t in skip:
                 continue

         if not expression:
             continue
         # Extract all word-like tokens that could be field names
+        # Require at least 10 chars to avoid matching common words like "backfill"
+        tokens = re.findall(r"\b([a-z][a-z0-9_]{10,})\b", expression.lower())
         # Filter out operators and known keywords
+        skip = {
+            "subindustry", "industry", "sector", "market",
+            "close", "high", "low", "open", "volume", "vwap",
+            # Common English words that might match length filter
+            "backfill", "neutralize", "expression",
+        }
         for t in tokens:
+            if t.startswith("ts_") or t.startswith("group_") or t.startswith("vec_"):
+                continue
+            if t.startswith("pv13_") or t.startswith("mdl") or t.startswith("snt") or t.startswith("scl"):
                 continue
             if t in skip:
                 continue