"""Gradio app for Caliby sequence design."""
import base64
from pathlib import Path
import gradio as gr
# Eagerly import so the wandb/pydantic init runs in the main thread
# (where sys.modules['__main__'] exists), not in a Gradio worker thread.
import caliby.data.preprocessing.atomworks.clean_pdbs # noqa: F401
from design import design_sequences
from file_utils import _get_file_path, _write_zip_from_paths
from viewers import (
_csv_download_output,
_file_output,
_format_results_display,
_get_best_sc_sample,
_render_af2_viewer,
_update_viewers,
)
def _get_upload_instructions(mode: str) -> str:
if mode == "none":
return "Upload a single PDB or CIF file."
elif mode == "synthetic":
return "Upload a single PDB or CIF file. Conformers will be generated automatically."
else:
return "Upload all PDB files — primary conformer first, then additional conformers."
def _clean_uploaded_pdbs(pdb_files: list | None):
if not pdb_files:
return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=False)
from caliby import clean_pdbs
pdb_paths = [str(_get_file_path(f)) for f in pdb_files]
cleaned_paths = clean_pdbs(pdb_paths)
zip_path = _write_zip_from_paths(cleaned_paths, "cleaned_pdbs", ".zip")
return (
cleaned_paths,
gr.update(
value="**Note:** Your files have been cleaned and standardized to mmCIF format "
"to avoid downstream parsing and alignment issues. "
"If you plan to use positional constraints, please download the cleaned files and double "
"check the new residue indices.",
visible=True,
),
gr.update(value=zip_path, visible=True),
gr.update(interactive=True),
)
def _reset_cleaned_state():
return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=False)
def submit_design_sequences(
cleaned_files: list[str] | None,
ensemble_mode: str,
model_variant: str,
num_seqs: int,
omit_aas: list[str] | None,
temperature: float,
fixed_pos_seq: str,
fixed_pos_scn: str,
fixed_pos_override_seq: str,
pos_restrict_aatype: str,
symmetry_pos: str,
num_protpardelle_conformers: int,
run_af2_eval: bool = False,
):
df, fasta_text, out_zip_path, sc_zip_path, af2_pdb_data, input_pdb_data = design_sequences(
pdb_files=cleaned_files,
ensemble_mode=ensemble_mode,
model_variant=model_variant,
num_seqs=num_seqs,
omit_aas=omit_aas,
temperature=temperature,
fixed_pos_seq=fixed_pos_seq,
fixed_pos_scn=fixed_pos_scn,
fixed_pos_override_seq=fixed_pos_override_seq,
pos_restrict_aatype=pos_restrict_aatype,
symmetry_pos=symmetry_pos,
num_protpardelle_conformers=num_protpardelle_conformers,
run_af2_eval=run_af2_eval,
)
has_af2 = bool(af2_pdb_data)
best_sample = _get_best_sc_sample(df) if has_af2 else ""
af2_html = _render_af2_viewer(best_sample, af2_pdb_data) if has_af2 else ""
return (
gr.update(visible=True),
gr.update(value=_format_results_display(df), visible=True),
df,
gr.update(value=fasta_text, visible=True),
_file_output(out_zip_path),
_file_output(sc_zip_path),
af2_pdb_data,
input_pdb_data,
best_sample,
gr.update(visible=has_af2),
af2_html,
gr.update(value="", visible=False),
gr.update(visible=False),
gr.update(visible=False),
)
theme = gr.themes.Base(
primary_hue="amber",
secondary_hue="orange",
radius_size="lg",
font=[gr.themes.GoogleFont('Instrument Sans'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
).set(
body_text_color='*neutral_700',
body_text_color_dark='*neutral_300',
body_text_color_subdued='*neutral_500',
block_title_text_color='*neutral_700',
block_info_text_color='*neutral_500',
block_border_width_dark='0px',
block_padding='*spacing_xl calc(*spacing_xl + 3px)',
block_label_border_width_dark='0px',
block_label_padding='*spacing_md *spacing_lg',
button_secondary_background_fill_dark='*neutral_600',
checkbox_label_text_color_dark='*neutral_100',
)
css = """
.loading-pulse { animation: pulse 2.5s ease-in-out infinite; }
@keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.3; } }
.omit-aa-dropdown ul { max-height: 200px !important; overflow-y: auto; }
.compact-file .large { min-height: 50px !important; }
#results-table th:nth-child(2),
#results-table td:nth-child(2) {
max-width: 28rem;
width: 28rem;
}
#results-table td:nth-child(2) {
overflow: hidden;
}
#results-table td:nth-child(2) > div {
display: block;
max-width: 100%;
overflow-x: auto;
overflow-y: hidden;
white-space: nowrap !important;
scrollbar-width: thin;
}
#af2-viewer, #ref-viewer {
display: flex;
justify-content: center;
}
#af2-viewer iframe, #ref-viewer iframe {
max-width: 100%;
}
"""
_LOGO_B64 = base64.b64encode(Path(__file__).with_name("caliby_transparent.png").read_bytes()).decode()
with gr.Blocks(title="Caliby - Protein Sequence Design") as demo:
gr.HTML(
'
'
f'

'
'
Caliby - Protein Sequence Design
'
'
'
)
with gr.Row():
with gr.Column(scale=1):
model_variant = gr.Radio(
choices=[
("Caliby", "caliby"),
("SolubleCaliby v1", "soluble_caliby_v1"),
],
value="caliby",
label="Model",
)
ensemble_mode = gr.Radio(
choices=[
("Fixed backbone", "none"),
("Synthetic ensemble", "synthetic"),
("Upload your own ensemble", "user"),
],
value="synthetic",
label="Ensemble mode",
)
run_af2_eval = gr.Checkbox(
label="Run AF2 self-consistency evaluation",
value=False,
info="Refold designed sequences with AlphaFold2 and compute scRMSD, pLDDT, and TM-score",
)
upload_instructions = gr.Markdown(
_get_upload_instructions("synthetic"),
)
pdb_input = gr.File(
file_count="multiple",
label="PDB/CIF file(s)",
file_types=[".pdb", ".cif"],
)
finish_upload_btn = gr.Button("Upload", variant="secondary")
cleaned_files_state = gr.State(None)
clean_notification = gr.Markdown(visible=False)
clean_download = gr.File(
label="Download cleaned files", visible=False, elem_classes=["compact-file"]
)
num_seqs = gr.Slider(
minimum=1,
maximum=4,
value=1,
step=1,
label="Number of sequences",
)
omit_aas = gr.Dropdown(
choices=[
"A",
"C",
"D",
"E",
"F",
"G",
"H",
"I",
"K",
"L",
"M",
"N",
"P",
"Q",
"R",
"S",
"T",
"V",
"W",
"Y",
],
multiselect=True,
label="Amino acids to omit",
elem_classes=["omit-aa-dropdown"],
)
temperature = gr.Slider(
minimum=0.01,
maximum=1,
value=0.01,
step=0.01,
label="Sampling temperature",
)
submit_btn = gr.Button("Design sequences", variant="primary", interactive=False)
with gr.Accordion("Advanced constraints", open=False):
fixed_pos_seq = gr.Textbox(
label="Fixed positions",
info="Format: A1-100,B1-100 \nSequence positions in the input PDB to condition on so that they"
" remain fixed during design. For ensemble-conditioned design, fixed_pos_seq is applied using"
" the primary conformer's sequence.",
placeholder="e.g. A1-100,B1-100",
)
fixed_pos_scn = gr.Textbox(
label="Fixed sidechain positions",
info="Format: A1-10,A12,A15-20 \nSidechain positions in the input PDB to condition on so that they"
" remain fixed during design. Note that fixed sidechain positions must be a subset of fixed"
" sequence positions, since it does not make sense to condition on a sidechain without also"
" conditioning on its sequence identity.",
placeholder="e.g. A1-10,A12,A15-20",
)
fixed_pos_override_seq = gr.Textbox(
label="Override sequence at positions",
info="Format: A26:A,A27:L \nSequence positions in the input PDB to first override the sequence at,"
" and then condition on. The colon separates the position and the desired amino acid.",
placeholder="e.g. A26:A,A27:L",
)
pos_restrict_aatype = gr.Textbox(
label="Position restrictions",
info="Format: A26:AVG,A27:VG \nAllowed amino acids for certain positions in the input PDB. The"
" colon separates the position and the allowed amino acids.",
placeholder="e.g. A26:AVG,A27:VG",
)
symmetry_pos = gr.Textbox(
label="Symmetry positions",
info="Format: A10,B10,C10|A11,B11,C11 \nSymmetry positions for tying sampling across residue"
" positions. The pipe separates groups of positions to sample symmetrically. In the example,"
" A10, B10, and C10 are tied together, and A11, B11, and C11 are tied together.",
placeholder="e.g. A10,B10,C10|A11,B11,C11",
)
num_protpardelle_conformers = gr.Slider(
minimum=1,
maximum=15,
value=15,
step=1,
label="Number of conformers to generate",
visible=True,
)
with gr.Column(scale=2):
raw_results_df = gr.State(None)
af2_pdb_state = gr.State({})
input_pdb_state = gr.State({})
best_sample_state = gr.State("")
results_placeholder = gr.Markdown(
"Results will appear here after designing sequences.",
)
results_header = gr.Markdown("### Results", visible=False)
results_df = gr.Dataframe(
show_label=False,
interactive=False,
wrap=False,
column_widths=[160, 448],
elem_id="results-table",
visible=False,
)
fasta_output = gr.Textbox(
label="Sequences (FASTA)",
lines=10,
visible=False,
)
with gr.Row():
csv_download = gr.File(label="Download results CSV", elem_classes=["compact-file"], visible=False)
output_files = gr.File(label="Download CIF files", elem_classes=["compact-file"], visible=False)
sc_output_files = gr.File(
label="Download AF2 self-consistency outputs",
elem_classes=["compact-file"],
visible=False,
)
with gr.Column(visible=False) as viewer_section:
gr.Markdown("---")
with gr.Row():
gr.Markdown("### AF2 Prediction")
af2_color_mode = gr.Dropdown(
choices=[
("pLDDT", "plddt"),
("Chain", "chain"),
("Rainbow", "rainbow"),
("Secondary structure", "secondary"),
],
value="plddt",
label="Color by",
scale=0,
)
af2_viewer = gr.HTML(elem_id="af2-viewer")
show_overlay = gr.Checkbox(label="Show reference structure", value=False)
with gr.Column(visible=False) as ref_section:
with gr.Row():
gr.Markdown("### Reference Structure")
ref_color_mode = gr.Dropdown(
choices=[
("Chain", "chain"),
("pLDDT", "plddt"),
("Rainbow", "rainbow"),
("Secondary structure", "secondary"),
],
value="chain",
label="Color by",
scale=0,
)
reference_viewer = gr.HTML(elem_id="ref-viewer")
submit_btn.click(
fn=lambda: gr.update(value='Running design pipeline\u2026
', visible=True),
outputs=[results_placeholder],
).then(
fn=submit_design_sequences,
inputs=[
cleaned_files_state,
ensemble_mode,
model_variant,
num_seqs,
omit_aas,
temperature,
fixed_pos_seq,
fixed_pos_scn,
fixed_pos_override_seq,
pos_restrict_aatype,
symmetry_pos,
num_protpardelle_conformers,
run_af2_eval,
],
outputs=[
results_header,
results_df,
raw_results_df,
fasta_output,
output_files,
sc_output_files,
af2_pdb_state,
input_pdb_state,
best_sample_state,
viewer_section,
af2_viewer,
reference_viewer,
ref_section,
results_placeholder,
],
)
raw_results_df.change(fn=_csv_download_output, inputs=[raw_results_df], outputs=[csv_download])
finish_upload_btn.click(
fn=lambda: gr.update(value="Processing\u2026", interactive=False),
outputs=[finish_upload_btn],
).then(
fn=_clean_uploaded_pdbs,
inputs=[pdb_input],
outputs=[cleaned_files_state, clean_notification, clean_download, submit_btn],
).then(
fn=lambda: gr.update(value="Upload", interactive=True),
outputs=[finish_upload_btn],
)
pdb_input.change(
fn=_reset_cleaned_state,
outputs=[cleaned_files_state, clean_notification, clean_download, submit_btn],
)
ensemble_mode.change(
fn=lambda mode: (gr.update(visible=(mode == "synthetic")), _get_upload_instructions(mode)),
inputs=[ensemble_mode],
outputs=[num_protpardelle_conformers, upload_instructions],
)
viewer_inputs = [best_sample_state, af2_pdb_state, input_pdb_state, show_overlay, af2_color_mode, ref_color_mode]
viewer_outputs = [af2_viewer, reference_viewer, ref_section]
show_overlay.change(fn=_update_viewers, inputs=viewer_inputs, outputs=viewer_outputs)
af2_color_mode.change(fn=_update_viewers, inputs=viewer_inputs, outputs=viewer_outputs)
ref_color_mode.change(fn=_update_viewers, inputs=viewer_inputs, outputs=viewer_outputs)
if __name__ == "__main__":
demo.launch(theme=theme, css=css, ssr_mode=False)