yinuozhang commited on
Commit
c0948c4
·
1 Parent(s): 7cf9dfd

update model

Browse files
Files changed (1) hide show
  1. app.py +12 -15
app.py CHANGED
@@ -73,6 +73,15 @@ ASSETS_DATA = ASSETS / "training_data_cleaned"; ASSETS_DATA.mkdir(parents=True
73
 
74
  MODEL_REPO = "ChatterjeeLab/PeptiVerse" # model repo
75
  DATASET_REPO = "ChatterjeeLab/PeptiVerse" # dataset repo
 
 
 
 
 
 
 
 
 
76
 
77
  def canon_model(parsed) -> Optional[str]:
78
  """Return the bare lowercase model name from a parsed (model, emb_tag) tuple or raw string."""
@@ -88,9 +97,6 @@ def get_required_patterns(manifest_path: Path) -> List[str]:
88
 
89
  manifest = read_best_manifest_csv(manifest_path)
90
  patterns = set()
91
-
92
- patterns.add("tokenizer/new_vocab.txt")
93
- patterns.add("tokenizer/new_splits.txt")
94
  patterns.add("training_data_cleaned/**/*.csv")
95
 
96
  for prop_key, row in manifest.items():
@@ -159,18 +165,9 @@ def fetch_models_and_data():
159
  "training_data_cleaned/**/*.csv",
160
  ],
161
  )
162
-
163
- fetch_models_and_data()
164
  """
165
- BEST_TXT = Path("basic_models.txt")
166
- TRAINING_ROOT = ASSETS_MODELS / "training_classifiers"
167
- TOKENIZER_DIR = ASSETS_MODELS / "tokenizer"
168
-
169
- # Banned models that should fall back to XGB
170
- BANNED_MODELS = {"svm", "enet", "svm_gpu", "enet_gpu"}
171
 
172
- # "lower is better" exceptions for classification labeling
173
- LOWER_BETTER = {"hemolysis", "toxicity"}
174
 
175
  # Property display names and descriptions
176
  PROPERTY_INFO = {
@@ -313,8 +310,8 @@ class AppContext:
313
  classifier_weight_root=ASSETS_MODELS,
314
  esm_name="facebook/esm2_t33_650M_UR50D",
315
  clm_name="aaronfeller/PeptideCLM-23M-all",
316
- smiles_vocab=str(TOKENIZER_DIR / "new_vocab.txt"),
317
- smiles_splits=str(TOKENIZER_DIR / "new_splits.txt"),
318
  device=str(self.device),
319
  )
320
 
 
73
 
74
  MODEL_REPO = "ChatterjeeLab/PeptiVerse" # model repo
75
  DATASET_REPO = "ChatterjeeLab/PeptiVerse" # dataset repo
76
+ BEST_TXT = Path("basic_models.txt")
77
+ TRAINING_ROOT = ASSETS_MODELS / "training_classifiers"
78
+ #TOKENIZER_DIR = ASSETS_MODELS / "tokenizer"
79
+
80
+ # Banned models that should fall back to XGB
81
+ BANNED_MODELS = {"svm", "enet", "svm_gpu", "enet_gpu"}
82
+
83
+ # "lower is better" exceptions for classification labeling
84
+ LOWER_BETTER = {"hemolysis", "toxicity"}
85
 
86
  def canon_model(parsed) -> Optional[str]:
87
  """Return the bare lowercase model name from a parsed (model, emb_tag) tuple or raw string."""
 
97
 
98
  manifest = read_best_manifest_csv(manifest_path)
99
  patterns = set()
 
 
 
100
  patterns.add("training_data_cleaned/**/*.csv")
101
 
102
  for prop_key, row in manifest.items():
 
165
  "training_data_cleaned/**/*.csv",
166
  ],
167
  )
 
 
168
  """
169
+ fetch_models_and_data()
 
 
 
 
 
170
 
 
 
171
 
172
  # Property display names and descriptions
173
  PROPERTY_INFO = {
 
310
  classifier_weight_root=ASSETS_MODELS,
311
  esm_name="facebook/esm2_t33_650M_UR50D",
312
  clm_name="aaronfeller/PeptideCLM-23M-all",
313
+ smiles_vocab=str(Path(__file__).parent / "tokenizer" / "new_vocab.txt"),
314
+ smiles_splits=str(Path(__file__).parent / "tokenizer" / "new_splits.txt"),
315
  device=str(self.device),
316
  )
317