bobbypaton commited on
Commit Β·
899f17e
1
Parent(s): 99ae44b
Log predictions to patonlab/analytics dataset
Browse files- requirements.txt +3 -0
- worker.py +44 -5
requirements.txt
CHANGED
|
@@ -21,4 +21,7 @@ scipy==1.3.3
|
|
| 21 |
scikit-learn==0.21.3
|
| 22 |
tqdm==4.64.1
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
# nfp is vendored locally β no PyPI entry needed
|
|
|
|
| 21 |
scikit-learn==0.21.3
|
| 22 |
tqdm==4.64.1
|
| 23 |
|
| 24 |
+
# HF dataset logging
|
| 25 |
+
huggingface_hub==0.16.4
|
| 26 |
+
|
| 27 |
# nfp is vendored locally β no PyPI entry needed
|
worker.py
CHANGED
|
@@ -12,6 +12,7 @@ warnings.filterwarnings("ignore", category=FutureWarning)
|
|
| 12 |
import json
|
| 13 |
import math
|
| 14 |
import pickle
|
|
|
|
| 15 |
from io import StringIO
|
| 16 |
|
| 17 |
import redis
|
|
@@ -68,6 +69,46 @@ redis_client = redis.StrictRedis(
|
|
| 68 |
host="localhost", port=6379, db=0, decode_responses=True
|
| 69 |
)
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
def _mol_to_sdf(mol, conf_id=0):
|
| 73 |
sio = StringIO()
|
|
@@ -140,7 +181,6 @@ def _fmt_relative_E(spread_df, energy_order):
|
|
| 140 |
def run_job(task_id, smiles, type_):
|
| 141 |
result_key = f"task_result_{task_id}"
|
| 142 |
try:
|
| 143 |
-
# Prepare mol β preprocess_C/H expects a mol without Hs (it adds them internally)
|
| 144 |
mol = Chem.MolFromSmiles(smiles)
|
| 145 |
AllChem.EmbedMolecule(mol, useRandomCoords=True)
|
| 146 |
mol_with_h = Chem.AddHs(mol, addCoords=True)
|
|
@@ -149,12 +189,8 @@ def run_job(task_id, smiles, type_):
|
|
| 149 |
mol_with_confs, ids, nr = genConf(mol_with_h, rms=-1, nc=200, efilter=10.0, rmspost=0.5)
|
| 150 |
print(f"genConf: {len(ids)} conformers", flush=True)
|
| 151 |
|
| 152 |
-
# Build SDFs directly from genConf mol (energy order, lowest first)
|
| 153 |
conf_sdfs, energy_order = _build_sdfs_from_genconf(mol_with_confs, ids)
|
| 154 |
|
| 155 |
-
# Run GNN preprocessing and inference using the same mol_with_confs
|
| 156 |
-
# Pass mol_with_confs as a list β preprocess will use its existing conformers
|
| 157 |
-
# by calling genConf internally, but we suppress that output
|
| 158 |
mols = [Chem.MolFromSmiles(smiles)]
|
| 159 |
for m in mols:
|
| 160 |
AllChem.EmbedMolecule(m, useRandomCoords=True)
|
|
@@ -215,6 +251,9 @@ def run_job(task_id, smiles, type_):
|
|
| 215 |
redis_client.set(result_key, json.dumps(result), ex=3600)
|
| 216 |
print(f"Task {task_id} complete β {len(conf_sdfs)} conformers", flush=True)
|
| 217 |
|
|
|
|
|
|
|
|
|
|
| 218 |
except Exception as e:
|
| 219 |
import traceback; traceback.print_exc()
|
| 220 |
redis_client.set(result_key, json.dumps({"errMessage": str(e)}), ex=3600)
|
|
|
|
| 12 |
import json
|
| 13 |
import math
|
| 14 |
import pickle
|
| 15 |
+
import datetime
|
| 16 |
from io import StringIO
|
| 17 |
|
| 18 |
import redis
|
|
|
|
| 69 |
host="localhost", port=6379, db=0, decode_responses=True
|
| 70 |
)
|
| 71 |
|
| 72 |
+
# ββ Analytics logging to HF Dataset ββββββββββββββββββββββββββββββββββββββββββ
|
| 73 |
+
_HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 74 |
+
_ANALYTICS_REPO = "patonlab/analytics"
|
| 75 |
+
_ANALYTICS_FILE = "data.csv"
|
| 76 |
+
|
| 77 |
+
def _log_prediction():
|
| 78 |
+
"""Append one row to the existing patonlab/analytics data.csv.
|
| 79 |
+
Format matches the alfabet log: space,timestamp
|
| 80 |
+
"""
|
| 81 |
+
if not _HF_TOKEN:
|
| 82 |
+
return
|
| 83 |
+
try:
|
| 84 |
+
from huggingface_hub import HfApi
|
| 85 |
+
import tempfile
|
| 86 |
+
|
| 87 |
+
api = HfApi(token=_HF_TOKEN)
|
| 88 |
+
timestamp = datetime.datetime.utcnow().isoformat()
|
| 89 |
+
|
| 90 |
+
# Download the current CSV, append a row, re-upload
|
| 91 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 92 |
+
local_path = os.path.join(tmpdir, "data.csv")
|
| 93 |
+
api.hf_hub_download(
|
| 94 |
+
repo_id=_ANALYTICS_REPO,
|
| 95 |
+
filename=_ANALYTICS_FILE,
|
| 96 |
+
repo_type="dataset",
|
| 97 |
+
local_dir=tmpdir,
|
| 98 |
+
)
|
| 99 |
+
with open(local_path, "a") as f:
|
| 100 |
+
f.write(f"patonlab/cascade,{timestamp}\n")
|
| 101 |
+
|
| 102 |
+
api.upload_file(
|
| 103 |
+
path_or_fileobj=local_path,
|
| 104 |
+
path_in_repo=_ANALYTICS_FILE,
|
| 105 |
+
repo_id=_ANALYTICS_REPO,
|
| 106 |
+
repo_type="dataset",
|
| 107 |
+
commit_message=f"log: cascade prediction {timestamp[:10]}",
|
| 108 |
+
)
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print(f"Analytics logging failed (non-fatal): {e}", flush=True)
|
| 111 |
+
|
| 112 |
|
| 113 |
def _mol_to_sdf(mol, conf_id=0):
|
| 114 |
sio = StringIO()
|
|
|
|
| 181 |
def run_job(task_id, smiles, type_):
|
| 182 |
result_key = f"task_result_{task_id}"
|
| 183 |
try:
|
|
|
|
| 184 |
mol = Chem.MolFromSmiles(smiles)
|
| 185 |
AllChem.EmbedMolecule(mol, useRandomCoords=True)
|
| 186 |
mol_with_h = Chem.AddHs(mol, addCoords=True)
|
|
|
|
| 189 |
mol_with_confs, ids, nr = genConf(mol_with_h, rms=-1, nc=200, efilter=10.0, rmspost=0.5)
|
| 190 |
print(f"genConf: {len(ids)} conformers", flush=True)
|
| 191 |
|
|
|
|
| 192 |
conf_sdfs, energy_order = _build_sdfs_from_genconf(mol_with_confs, ids)
|
| 193 |
|
|
|
|
|
|
|
|
|
|
| 194 |
mols = [Chem.MolFromSmiles(smiles)]
|
| 195 |
for m in mols:
|
| 196 |
AllChem.EmbedMolecule(m, useRandomCoords=True)
|
|
|
|
| 251 |
redis_client.set(result_key, json.dumps(result), ex=3600)
|
| 252 |
print(f"Task {task_id} complete β {len(conf_sdfs)} conformers", flush=True)
|
| 253 |
|
| 254 |
+
# Log to analytics dataset (non-blocking)
|
| 255 |
+
_log_prediction()
|
| 256 |
+
|
| 257 |
except Exception as e:
|
| 258 |
import traceback; traceback.print_exc()
|
| 259 |
redis_client.set(result_key, json.dumps({"errMessage": str(e)}), ex=3600)
|