gaurv007 commited on
Commit
d5121aa
·
verified ·
1 Parent(s): 5dd23cd

Upload alpha_factory/cleanup.py

Browse files
Files changed (1) hide show
  1. alpha_factory/cleanup.py +18 -14
alpha_factory/cleanup.py CHANGED
@@ -5,8 +5,11 @@ Run: uv run python -m alpha_factory.cleanup
5
  Also provides batch-level dedup to prevent theme repetition.
6
  """
7
  import duckdb
 
8
  from pathlib import Path
9
 
 
 
10
  DB_PATH = Path("factor_store/alphas.duckdb")
11
 
12
  # Fields that genuinely do NOT exist on BRAIN — any alpha using these is invalid.
@@ -19,15 +22,15 @@ FAKE_FIELDS = {
19
  "placeholder",
20
  "dummy_field",
21
  "test_field",
22
- "my_field",
23
  "sample_field",
 
24
  }
25
 
26
 
27
  def cleanup_fake_alphas():
28
  """Remove alphas that used placeholder field names."""
29
  if not DB_PATH.exists():
30
- print("No database found.")
31
  return
32
 
33
  conn = duckdb.connect(str(DB_PATH))
@@ -57,15 +60,15 @@ def cleanup_fake_alphas():
57
  if to_delete:
58
  placeholders = ",".join(["?" for _ in to_delete])
59
  conn.execute(f"DELETE FROM alphas WHERE alpha_id IN ({placeholders})", to_delete)
60
- print(f"Deleted {len(to_delete)} alphas with fake/placeholder fields:")
61
  for aid in to_delete:
62
- print(f" - {aid}")
63
  else:
64
- print("No fake alphas found. Database is clean.")
65
 
66
  # Show remaining
67
  count = conn.execute("SELECT COUNT(*) FROM alphas").fetchone()[0]
68
- print(f"\nRemaining alphas in store: {count}")
69
  conn.close()
70
 
71
 
@@ -86,9 +89,9 @@ def cleanup_quoted_expressions():
86
  fixed += 1
87
 
88
  if fixed:
89
- print(f"Fixed {fixed} expressions with quoted field names.")
90
  else:
91
- print("No quoted field names found.")
92
  conn.close()
93
 
94
 
@@ -130,17 +133,18 @@ def cleanup_orphans():
130
  if to_delete:
131
  placeholders = ",".join(["?" for _ in to_delete])
132
  conn.execute(f"DELETE FROM alphas WHERE alpha_id IN ({placeholders})", to_delete)
133
- print(f"Deleted {len(to_delete)} alphas with unknown field references.")
134
 
135
  conn.close()
136
 
137
 
138
  if __name__ == "__main__":
139
- print("=== Alpha Factory Cleanup v0.2.0 ===\n")
140
- print("1. Removing alphas with fake/placeholder fields...")
 
141
  cleanup_fake_alphas()
142
- print("\n2. Fixing quoted field names in expressions...")
143
  cleanup_quoted_expressions()
144
- print("\n3. Removing alphas with unknown field references...")
145
  cleanup_orphans()
146
- print("\nDone!")
 
5
  Also provides batch-level dedup to prevent theme repetition.
6
  """
7
  import duckdb
8
+ import logging
9
  from pathlib import Path
10
 
11
+ logger = logging.getLogger(__name__)
12
+
13
  DB_PATH = Path("factor_store/alphas.duckdb")
14
 
15
  # Fields that genuinely do NOT exist on BRAIN — any alpha using these is invalid.
 
22
  "placeholder",
23
  "dummy_field",
24
  "test_field",
 
25
  "sample_field",
26
+ "my_field",
27
  }
28
 
29
 
30
  def cleanup_fake_alphas():
31
  """Remove alphas that used placeholder field names."""
32
  if not DB_PATH.exists():
33
+ logger.info("No database found.")
34
  return
35
 
36
  conn = duckdb.connect(str(DB_PATH))
 
60
  if to_delete:
61
  placeholders = ",".join(["?" for _ in to_delete])
62
  conn.execute(f"DELETE FROM alphas WHERE alpha_id IN ({placeholders})", to_delete)
63
+ logger.info(f"Deleted {len(to_delete)} alphas with fake/placeholder fields")
64
  for aid in to_delete:
65
+ logger.info(f" - {aid}")
66
  else:
67
+ logger.info("No fake alphas found. Database is clean.")
68
 
69
  # Show remaining
70
  count = conn.execute("SELECT COUNT(*) FROM alphas").fetchone()[0]
71
+ logger.info(f"Remaining alphas in store: {count}")
72
  conn.close()
73
 
74
 
 
89
  fixed += 1
90
 
91
  if fixed:
92
+ logger.info(f"Fixed {fixed} expressions with quoted field names.")
93
  else:
94
+ logger.info("No quoted field names found.")
95
  conn.close()
96
 
97
 
 
133
  if to_delete:
134
  placeholders = ",".join(["?" for _ in to_delete])
135
  conn.execute(f"DELETE FROM alphas WHERE alpha_id IN ({placeholders})", to_delete)
136
+ logger.info(f"Deleted {len(to_delete)} alphas with unknown field references.")
137
 
138
  conn.close()
139
 
140
 
141
  if __name__ == "__main__":
142
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
143
+ logger.info("=== Alpha Factory Cleanup v0.2.0 ===")
144
+ logger.info("1. Removing alphas with fake/placeholder fields...")
145
  cleanup_fake_alphas()
146
+ logger.info("2. Fixing quoted field names in expressions...")
147
  cleanup_quoted_expressions()
148
+ logger.info("3. Removing alphas with unknown field references...")
149
  cleanup_orphans()
150
+ logger.info("Done!")