gaurv007 commited on
Commit
ee996e3
·
verified ·
1 Parent(s): 69fdb51

Upload alpha_factory/cleanup.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. alpha_factory/cleanup.py +11 -3
alpha_factory/cleanup.py CHANGED
@@ -107,11 +107,19 @@ def cleanup_orphans():
107
  if not expression:
108
  continue
109
  # Extract all word-like tokens that could be field names
110
- tokens = re.findall(r"\b([a-z][a-z0-9_]{8,})\b", expression.lower())
 
111
  # Filter out operators and known keywords
112
- skip = {"subindustry", "industry", "sector", "market", "close", "high", "low", "open", "volume", "vwap"}
 
 
 
 
 
113
  for t in tokens:
114
- if t.startswith("ts_") or t.startswith("group_") or t.startswith("vec_") or t.startswith("pv13_") or t.startswith("mdl") or t.startswith("snt") or t.startswith("scl"):
 
 
115
  continue
116
  if t in skip:
117
  continue
 
107
  if not expression:
108
  continue
109
  # Extract all word-like tokens that could be field names
110
+ # Require at least 10 chars to avoid matching common words like "backfill"
111
+ tokens = re.findall(r"\b([a-z][a-z0-9_]{10,})\b", expression.lower())
112
  # Filter out operators and known keywords
113
+ skip = {
114
+ "subindustry", "industry", "sector", "market",
115
+ "close", "high", "low", "open", "volume", "vwap",
116
+ # Common English words that might match length filter
117
+ "backfill", "neutralize", "expression",
118
+ }
119
  for t in tokens:
120
+ if t.startswith("ts_") or t.startswith("group_") or t.startswith("vec_"):
121
+ continue
122
+ if t.startswith("pv13_") or t.startswith("mdl") or t.startswith("snt") or t.startswith("scl"):
123
  continue
124
  if t in skip:
125
  continue