ntranoslab
/

vesm

English

Model card Files Files and versions

xet

Community

tuandinh commited on Oct 14, 2025

Commit

946749e

verified ·

1 Parent(s): c350f8f

Update README.md

Browse files

Files changed (1) hide show

README.md +21 -24

README.md CHANGED Viewed

@@ -81,7 +81,7 @@ def load_vesm(model_name="VESM_3B", local_dir="vesm", device='cuda'):
 ```py
 # scoring functions
 import torch.nn.functional as F
-# calcualte log-likelihood ratio from the logits
 def get_llrs(sequence_logits, input_ids):
     token_probs = torch.log_softmax(sequence_logits, dim=-1)
     wt_positions = F.one_hot(input_ids, num_classes=token_probs.shape[-1])
@@ -91,23 +91,23 @@ def get_llrs(sequence_logits, input_ids):
     llrs = token_probs - wt_probs.expand(token_probs.shape)
     return llrs
-# compute mutant score
-def score_mutant(llrs, mutant, sequence_vocabs):
-    mutant_score = 0
-    for mut in mutant.split(":"):
         _, idx, mt = mut[0], int(mut[1:-1]), mut[-1]
         pred = llrs[idx, sequence_vocabs[mt]]
-        mutant_score += pred.item()
-    return mutant_score
 ```
 #### Sequence-only Models
-Here, we provide sample scripts to compute mutant scores with VESM models
 ```py
-# sequence and mutant
 sequence = "MVNSTHRGMHTSLHLWNRSSYRLHSNASESLGKGYSDGGCYEQLFVSPEVFVTLGVISLLENILV"
-mutant = "M1Y:V2T"
 ```
 ```py
@@ -123,21 +123,18 @@ def inference(model, tokenizer, sequence, device):
         outputs = model(**tokens)
     logits = outputs['logits'][0]
     input_ids = tokens['input_ids'][0]
-    # calcualte log-likelihood ratio from the logits
     llrs = get_llrs(logits, input_ids)
     return llrs
-"""
-    Prediction with VESM models
-"""
-# load vesm models
 model_name = 'VESM_3B'
 model, tokenizer = load_vesm(model_name, local_dir=local_dir, device=device)
 sequence_vocabs = tokenizer.get_vocab()
-# inference
 llrs = inference(model, tokenizer, sequence, device)
-mutant_score = score_mutant(llrs, mutant, sequence_vocabs)
-print(f"Predicted score by {model_name}: ", mutant_score)
 ```
@@ -149,13 +146,13 @@ from esm.sdk.api import ESMProtein
 # !wget https://alphafold.ebi.ac.uk/files/AF-P32245-F1-model_v6.pdb
 pdb_file = "AF-P32245-F1-model_v6.pdb"
 protein = ESMProtein.from_pdb(pdb_file)
-mutant = "M1Y:V2T"
 ```
 ```py
 # load model
 model, tokenizer = load_vesm('VESM3', local_dir=local_dir, device=device)
-sequence_vocabs = model.tokenizers.sequence.vocab
 # inference
 tokens = model.encode(protein)
@@ -168,13 +165,13 @@ with torch.no_grad():
 # calculate log-likelihood ratio from the logits
 llrs = get_llrs(logits, input_ids)
-# compute mutant score
-mutant_score = score_mutant(llrs, mutant, sequence_vocabs)
-print("Mutant score: ", mutant_score)
 ```
 ## License  <a name="license"></a>
-The source code and model weights for VESM1 and VESM2 are distributed under the MIT License.
 The VESM3 model is a fine-tuned version of ESM3-Open (EvolutionaryScale) and is available under a [non-commercial license agreement](https://www.evolutionaryscale.ai/policies/cambrian-open-license-agreement).

 ```py
 # scoring functions
 import torch.nn.functional as F
+# calculate log-likelihood ratio from the logits
 def get_llrs(sequence_logits, input_ids):
     token_probs = torch.log_softmax(sequence_logits, dim=-1)
     wt_positions = F.one_hot(input_ids, num_classes=token_probs.shape[-1])
     llrs = token_probs - wt_probs.expand(token_probs.shape)
     return llrs
+# compute mutation score
+def score_mutation(llrs, mutation, sequence_vocabs):
+    mutation_score = 0
+    for mut in mutation.split(":"):
         _, idx, mt = mut[0], int(mut[1:-1]), mut[-1]
         pred = llrs[idx, sequence_vocabs[mt]]
+        mutation_score += pred.item()
+    return mutation_score
 ```
 #### Sequence-only Models
+Here, we provide sample scripts to compute mutation scores.
 ```py
+# sequence and mutation
 sequence = "MVNSTHRGMHTSLHLWNRSSYRLHSNASESLGKGYSDGGCYEQLFVSPEVFVTLGVISLLENILV"
+mutation = "M1Y:V2T"
 ```
 ```py
         outputs = model(**tokens)
     logits = outputs['logits'][0]
     input_ids = tokens['input_ids'][0]
+    # calculate log-likelihood ratio from the logits
     llrs = get_llrs(logits, input_ids)
     return llrs
+# Prediction with VESM
 model_name = 'VESM_3B'
 model, tokenizer = load_vesm(model_name, local_dir=local_dir, device=device)
 sequence_vocabs = tokenizer.get_vocab()
+# compute mutation score
 llrs = inference(model, tokenizer, sequence, device)
+mutation_score = score_mutation(llrs, mutation, sequence_vocabs)
+print(f"Predicted score by {model_name}: ", mutation_score)
 ```
 # !wget https://alphafold.ebi.ac.uk/files/AF-P32245-F1-model_v6.pdb
 pdb_file = "AF-P32245-F1-model_v6.pdb"
 protein = ESMProtein.from_pdb(pdb_file)
+mutation = "M1Y:V2T"
 ```
 ```py
 # load model
 model, tokenizer = load_vesm('VESM3', local_dir=local_dir, device=device)
+sequence_vocabs = tokenizer.get_vocab()
 # inference
 tokens = model.encode(protein)
 # calculate log-likelihood ratio from the logits
 llrs = get_llrs(logits, input_ids)
+# compute mutation score
+mutation_score = score_mutation(llrs, mutation, sequence_vocabs)
+print("mutation score: ", mutation_score)
 ```
 ## License  <a name="license"></a>
+The source code and model weights for VESM models are distributed under the MIT License.
 The VESM3 model is a fine-tuned version of ESM3-Open (EvolutionaryScale) and is available under a [non-commercial license agreement](https://www.evolutionaryscale.ai/policies/cambrian-open-license-agreement).