Spaces:

ChatterjeeLab
/

PeptiVerse

Running

App Files Files Community

yinuozhang commited on 7 days ago

Commit

c0948c4

1 Parent(s): 7cf9dfd

update model

Browse files

Files changed (1) hide show

app.py +12 -15

app.py CHANGED Viewed

@@ -73,6 +73,15 @@ ASSETS_DATA   = ASSETS / "training_data_cleaned"; ASSETS_DATA.mkdir(parents=True
 MODEL_REPO = "ChatterjeeLab/PeptiVerse"       # model repo
 DATASET_REPO = "ChatterjeeLab/PeptiVerse"        # dataset repo
 def canon_model(parsed) -> Optional[str]:
     """Return the bare lowercase model name from a parsed (model, emb_tag) tuple or raw string."""
@@ -88,9 +97,6 @@ def get_required_patterns(manifest_path: Path) -> List[str]:
     manifest = read_best_manifest_csv(manifest_path)
     patterns = set()
-    patterns.add("tokenizer/new_vocab.txt")
-    patterns.add("tokenizer/new_splits.txt")
     patterns.add("training_data_cleaned/**/*.csv")
     for prop_key, row in manifest.items():
@@ -159,18 +165,9 @@ def fetch_models_and_data():
             "training_data_cleaned/**/*.csv",
         ],
     )
-fetch_models_and_data()
 """
-BEST_TXT = Path("basic_models.txt")
-TRAINING_ROOT = ASSETS_MODELS / "training_classifiers"
-TOKENIZER_DIR = ASSETS_MODELS / "tokenizer"
-# Banned models that should fall back to XGB
-BANNED_MODELS = {"svm", "enet", "svm_gpu", "enet_gpu"}
-# "lower is better" exceptions for classification labeling
-LOWER_BETTER = {"hemolysis", "toxicity"}
 # Property display names and descriptions
 PROPERTY_INFO = {
@@ -313,8 +310,8 @@ class AppContext:
             classifier_weight_root=ASSETS_MODELS,
             esm_name="facebook/esm2_t33_650M_UR50D",
             clm_name="aaronfeller/PeptideCLM-23M-all",
-            smiles_vocab=str(TOKENIZER_DIR / "new_vocab.txt"),
-            smiles_splits=str(TOKENIZER_DIR / "new_splits.txt"),
             device=str(self.device),
         )

 MODEL_REPO = "ChatterjeeLab/PeptiVerse"       # model repo
 DATASET_REPO = "ChatterjeeLab/PeptiVerse"        # dataset repo
+BEST_TXT = Path("basic_models.txt")
+TRAINING_ROOT = ASSETS_MODELS / "training_classifiers"
+#TOKENIZER_DIR = ASSETS_MODELS / "tokenizer"
+# Banned models that should fall back to XGB
+BANNED_MODELS = {"svm", "enet", "svm_gpu", "enet_gpu"}
+# "lower is better" exceptions for classification labeling
+LOWER_BETTER = {"hemolysis", "toxicity"}
 def canon_model(parsed) -> Optional[str]:
     """Return the bare lowercase model name from a parsed (model, emb_tag) tuple or raw string."""
     manifest = read_best_manifest_csv(manifest_path)
     patterns = set()
     patterns.add("training_data_cleaned/**/*.csv")
     for prop_key, row in manifest.items():
             "training_data_cleaned/**/*.csv",
         ],
     )
 """
+fetch_models_and_data()
 # Property display names and descriptions
 PROPERTY_INFO = {
             classifier_weight_root=ASSETS_MODELS,
             esm_name="facebook/esm2_t33_650M_UR50D",
             clm_name="aaronfeller/PeptideCLM-23M-all",
+            smiles_vocab=str(Path(__file__).parent / "tokenizer" / "new_vocab.txt"),
+            smiles_splits=str(Path(__file__).parent / "tokenizer" / "new_splits.txt"),
             device=str(self.device),
         )