Upload alpha_factory/cleanup.py
Browse files- alpha_factory/cleanup.py +18 -14
alpha_factory/cleanup.py
CHANGED
|
@@ -5,8 +5,11 @@ Run: uv run python -m alpha_factory.cleanup
|
|
| 5 |
Also provides batch-level dedup to prevent theme repetition.
|
| 6 |
"""
|
| 7 |
import duckdb
|
|
|
|
| 8 |
from pathlib import Path
|
| 9 |
|
|
|
|
|
|
|
| 10 |
DB_PATH = Path("factor_store/alphas.duckdb")
|
| 11 |
|
| 12 |
# Fields that genuinely do NOT exist on BRAIN — any alpha using these is invalid.
|
|
@@ -19,15 +22,15 @@ FAKE_FIELDS = {
|
|
| 19 |
"placeholder",
|
| 20 |
"dummy_field",
|
| 21 |
"test_field",
|
| 22 |
-
"my_field",
|
| 23 |
"sample_field",
|
|
|
|
| 24 |
}
|
| 25 |
|
| 26 |
|
| 27 |
def cleanup_fake_alphas():
|
| 28 |
"""Remove alphas that used placeholder field names."""
|
| 29 |
if not DB_PATH.exists():
|
| 30 |
-
|
| 31 |
return
|
| 32 |
|
| 33 |
conn = duckdb.connect(str(DB_PATH))
|
|
@@ -57,15 +60,15 @@ def cleanup_fake_alphas():
|
|
| 57 |
if to_delete:
|
| 58 |
placeholders = ",".join(["?" for _ in to_delete])
|
| 59 |
conn.execute(f"DELETE FROM alphas WHERE alpha_id IN ({placeholders})", to_delete)
|
| 60 |
-
|
| 61 |
for aid in to_delete:
|
| 62 |
-
|
| 63 |
else:
|
| 64 |
-
|
| 65 |
|
| 66 |
# Show remaining
|
| 67 |
count = conn.execute("SELECT COUNT(*) FROM alphas").fetchone()[0]
|
| 68 |
-
|
| 69 |
conn.close()
|
| 70 |
|
| 71 |
|
|
@@ -86,9 +89,9 @@ def cleanup_quoted_expressions():
|
|
| 86 |
fixed += 1
|
| 87 |
|
| 88 |
if fixed:
|
| 89 |
-
|
| 90 |
else:
|
| 91 |
-
|
| 92 |
conn.close()
|
| 93 |
|
| 94 |
|
|
@@ -130,17 +133,18 @@ def cleanup_orphans():
|
|
| 130 |
if to_delete:
|
| 131 |
placeholders = ",".join(["?" for _ in to_delete])
|
| 132 |
conn.execute(f"DELETE FROM alphas WHERE alpha_id IN ({placeholders})", to_delete)
|
| 133 |
-
|
| 134 |
|
| 135 |
conn.close()
|
| 136 |
|
| 137 |
|
| 138 |
if __name__ == "__main__":
|
| 139 |
-
|
| 140 |
-
|
|
|
|
| 141 |
cleanup_fake_alphas()
|
| 142 |
-
|
| 143 |
cleanup_quoted_expressions()
|
| 144 |
-
|
| 145 |
cleanup_orphans()
|
| 146 |
-
|
|
|
|
| 5 |
Also provides batch-level dedup to prevent theme repetition.
|
| 6 |
"""
|
| 7 |
import duckdb
|
| 8 |
+
import logging
|
| 9 |
from pathlib import Path
|
| 10 |
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
DB_PATH = Path("factor_store/alphas.duckdb")
|
| 14 |
|
| 15 |
# Fields that genuinely do NOT exist on BRAIN — any alpha using these is invalid.
|
|
|
|
| 22 |
"placeholder",
|
| 23 |
"dummy_field",
|
| 24 |
"test_field",
|
|
|
|
| 25 |
"sample_field",
|
| 26 |
+
"my_field",
|
| 27 |
}
|
| 28 |
|
| 29 |
|
| 30 |
def cleanup_fake_alphas():
|
| 31 |
"""Remove alphas that used placeholder field names."""
|
| 32 |
if not DB_PATH.exists():
|
| 33 |
+
logger.info("No database found.")
|
| 34 |
return
|
| 35 |
|
| 36 |
conn = duckdb.connect(str(DB_PATH))
|
|
|
|
| 60 |
if to_delete:
|
| 61 |
placeholders = ",".join(["?" for _ in to_delete])
|
| 62 |
conn.execute(f"DELETE FROM alphas WHERE alpha_id IN ({placeholders})", to_delete)
|
| 63 |
+
logger.info(f"Deleted {len(to_delete)} alphas with fake/placeholder fields")
|
| 64 |
for aid in to_delete:
|
| 65 |
+
logger.info(f" - {aid}")
|
| 66 |
else:
|
| 67 |
+
logger.info("No fake alphas found. Database is clean.")
|
| 68 |
|
| 69 |
# Show remaining
|
| 70 |
count = conn.execute("SELECT COUNT(*) FROM alphas").fetchone()[0]
|
| 71 |
+
logger.info(f"Remaining alphas in store: {count}")
|
| 72 |
conn.close()
|
| 73 |
|
| 74 |
|
|
|
|
| 89 |
fixed += 1
|
| 90 |
|
| 91 |
if fixed:
|
| 92 |
+
logger.info(f"Fixed {fixed} expressions with quoted field names.")
|
| 93 |
else:
|
| 94 |
+
logger.info("No quoted field names found.")
|
| 95 |
conn.close()
|
| 96 |
|
| 97 |
|
|
|
|
| 133 |
if to_delete:
|
| 134 |
placeholders = ",".join(["?" for _ in to_delete])
|
| 135 |
conn.execute(f"DELETE FROM alphas WHERE alpha_id IN ({placeholders})", to_delete)
|
| 136 |
+
logger.info(f"Deleted {len(to_delete)} alphas with unknown field references.")
|
| 137 |
|
| 138 |
conn.close()
|
| 139 |
|
| 140 |
|
| 141 |
if __name__ == "__main__":
|
| 142 |
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
| 143 |
+
logger.info("=== Alpha Factory Cleanup v0.2.0 ===")
|
| 144 |
+
logger.info("1. Removing alphas with fake/placeholder fields...")
|
| 145 |
cleanup_fake_alphas()
|
| 146 |
+
logger.info("2. Fixing quoted field names in expressions...")
|
| 147 |
cleanup_quoted_expressions()
|
| 148 |
+
logger.info("3. Removing alphas with unknown field references...")
|
| 149 |
cleanup_orphans()
|
| 150 |
+
logger.info("Done!")
|