"""Gradio app for Caliby sequence design.""" import base64 from pathlib import Path import gradio as gr # Eagerly import so the wandb/pydantic init runs in the main thread # (where sys.modules['__main__'] exists), not in a Gradio worker thread. import caliby.data.preprocessing.atomworks.clean_pdbs # noqa: F401 from design import design_sequences from file_utils import _get_file_path, _write_zip_from_paths from viewers import ( _csv_download_output, _file_output, _format_results_display, _get_best_sc_sample, _render_af2_viewer, _update_viewers, ) def _get_upload_instructions(mode: str) -> str: if mode == "none": return "Upload a single PDB or CIF file." elif mode == "synthetic": return "Upload a single PDB or CIF file. Conformers will be generated automatically." else: return "Upload all PDB files — primary conformer first, then additional conformers." def _clean_uploaded_pdbs(pdb_files: list | None): if not pdb_files: return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=False) from caliby import clean_pdbs pdb_paths = [str(_get_file_path(f)) for f in pdb_files] cleaned_paths = clean_pdbs(pdb_paths) zip_path = _write_zip_from_paths(cleaned_paths, "cleaned_pdbs", ".zip") return ( cleaned_paths, gr.update( value="**Note:** Your files have been cleaned and standardized to mmCIF format " "to avoid downstream parsing and alignment issues. " "If you plan to use positional constraints, please download the cleaned files and double " "check the new residue indices.", visible=True, ), gr.update(value=zip_path, visible=True), gr.update(interactive=True), ) def _reset_cleaned_state(): return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=False) def submit_design_sequences( cleaned_files: list[str] | None, ensemble_mode: str, model_variant: str, num_seqs: int, omit_aas: list[str] | None, temperature: float, fixed_pos_seq: str, fixed_pos_scn: str, fixed_pos_override_seq: str, pos_restrict_aatype: str, symmetry_pos: str, num_protpardelle_conformers: int, run_af2_eval: bool = False, ): df, fasta_text, out_zip_path, sc_zip_path, af2_pdb_data, input_pdb_data = design_sequences( pdb_files=cleaned_files, ensemble_mode=ensemble_mode, model_variant=model_variant, num_seqs=num_seqs, omit_aas=omit_aas, temperature=temperature, fixed_pos_seq=fixed_pos_seq, fixed_pos_scn=fixed_pos_scn, fixed_pos_override_seq=fixed_pos_override_seq, pos_restrict_aatype=pos_restrict_aatype, symmetry_pos=symmetry_pos, num_protpardelle_conformers=num_protpardelle_conformers, run_af2_eval=run_af2_eval, ) has_af2 = bool(af2_pdb_data) best_sample = _get_best_sc_sample(df) if has_af2 else "" af2_html = _render_af2_viewer(best_sample, af2_pdb_data) if has_af2 else "" return ( gr.update(visible=True), gr.update(value=_format_results_display(df), visible=True), df, gr.update(value=fasta_text, visible=True), _file_output(out_zip_path), _file_output(sc_zip_path), af2_pdb_data, input_pdb_data, best_sample, gr.update(visible=has_af2), af2_html, gr.update(value="", visible=False), gr.update(visible=False), gr.update(visible=False), ) theme = gr.themes.Base( primary_hue="amber", secondary_hue="orange", radius_size="lg", font=[gr.themes.GoogleFont('Instrument Sans'), 'ui-sans-serif', 'system-ui', 'sans-serif'], ).set( body_text_color='*neutral_700', body_text_color_dark='*neutral_300', body_text_color_subdued='*neutral_500', block_title_text_color='*neutral_700', block_info_text_color='*neutral_500', block_border_width_dark='0px', block_padding='*spacing_xl calc(*spacing_xl + 3px)', block_label_border_width_dark='0px', block_label_padding='*spacing_md *spacing_lg', button_secondary_background_fill_dark='*neutral_600', checkbox_label_text_color_dark='*neutral_100', ) css = """ .loading-pulse { animation: pulse 2.5s ease-in-out infinite; } @keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.3; } } .omit-aa-dropdown ul { max-height: 200px !important; overflow-y: auto; } .compact-file .large { min-height: 50px !important; } #results-table th:nth-child(2), #results-table td:nth-child(2) { max-width: 28rem; width: 28rem; } #results-table td:nth-child(2) { overflow: hidden; } #results-table td:nth-child(2) > div { display: block; max-width: 100%; overflow-x: auto; overflow-y: hidden; white-space: nowrap !important; scrollbar-width: thin; } #af2-viewer, #ref-viewer { display: flex; justify-content: center; } #af2-viewer iframe, #ref-viewer iframe { max-width: 100%; } """ _LOGO_B64 = base64.b64encode(Path(__file__).with_name("caliby_transparent.png").read_bytes()).decode() with gr.Blocks(title="Caliby - Protein Sequence Design") as demo: gr.HTML( '
' f'Caliby logo' '

Caliby - Protein Sequence Design

' '
' ) with gr.Row(): with gr.Column(scale=1): model_variant = gr.Radio( choices=[ ("Caliby", "caliby"), ("SolubleCaliby v1", "soluble_caliby_v1"), ], value="caliby", label="Model", ) ensemble_mode = gr.Radio( choices=[ ("Fixed backbone", "none"), ("Synthetic ensemble", "synthetic"), ("Upload your own ensemble", "user"), ], value="synthetic", label="Ensemble mode", ) run_af2_eval = gr.Checkbox( label="Run AF2 self-consistency evaluation", value=False, info="Refold designed sequences with AlphaFold2 and compute scRMSD, pLDDT, and TM-score", ) upload_instructions = gr.Markdown( _get_upload_instructions("synthetic"), ) pdb_input = gr.File( file_count="multiple", label="PDB/CIF file(s)", file_types=[".pdb", ".cif"], ) finish_upload_btn = gr.Button("Upload", variant="secondary") cleaned_files_state = gr.State(None) clean_notification = gr.Markdown(visible=False) clean_download = gr.File( label="Download cleaned files", visible=False, elem_classes=["compact-file"] ) num_seqs = gr.Slider( minimum=1, maximum=4, value=1, step=1, label="Number of sequences", ) omit_aas = gr.Dropdown( choices=[ "A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y", ], multiselect=True, label="Amino acids to omit", elem_classes=["omit-aa-dropdown"], ) temperature = gr.Slider( minimum=0.01, maximum=1, value=0.01, step=0.01, label="Sampling temperature", ) submit_btn = gr.Button("Design sequences", variant="primary", interactive=False) with gr.Accordion("Advanced constraints", open=False): fixed_pos_seq = gr.Textbox( label="Fixed positions", info="Format: A1-100,B1-100 \nSequence positions in the input PDB to condition on so that they" " remain fixed during design. For ensemble-conditioned design, fixed_pos_seq is applied using" " the primary conformer's sequence.", placeholder="e.g. A1-100,B1-100", ) fixed_pos_scn = gr.Textbox( label="Fixed sidechain positions", info="Format: A1-10,A12,A15-20 \nSidechain positions in the input PDB to condition on so that they" " remain fixed during design. Note that fixed sidechain positions must be a subset of fixed" " sequence positions, since it does not make sense to condition on a sidechain without also" " conditioning on its sequence identity.", placeholder="e.g. A1-10,A12,A15-20", ) fixed_pos_override_seq = gr.Textbox( label="Override sequence at positions", info="Format: A26:A,A27:L \nSequence positions in the input PDB to first override the sequence at," " and then condition on. The colon separates the position and the desired amino acid.", placeholder="e.g. A26:A,A27:L", ) pos_restrict_aatype = gr.Textbox( label="Position restrictions", info="Format: A26:AVG,A27:VG \nAllowed amino acids for certain positions in the input PDB. The" " colon separates the position and the allowed amino acids.", placeholder="e.g. A26:AVG,A27:VG", ) symmetry_pos = gr.Textbox( label="Symmetry positions", info="Format: A10,B10,C10|A11,B11,C11 \nSymmetry positions for tying sampling across residue" " positions. The pipe separates groups of positions to sample symmetrically. In the example," " A10, B10, and C10 are tied together, and A11, B11, and C11 are tied together.", placeholder="e.g. A10,B10,C10|A11,B11,C11", ) num_protpardelle_conformers = gr.Slider( minimum=1, maximum=15, value=15, step=1, label="Number of conformers to generate", visible=True, ) with gr.Column(scale=2): raw_results_df = gr.State(None) af2_pdb_state = gr.State({}) input_pdb_state = gr.State({}) best_sample_state = gr.State("") results_placeholder = gr.Markdown( "Results will appear here after designing sequences.", ) results_header = gr.Markdown("### Results", visible=False) results_df = gr.Dataframe( show_label=False, interactive=False, wrap=False, column_widths=[160, 448], elem_id="results-table", visible=False, ) fasta_output = gr.Textbox( label="Sequences (FASTA)", lines=10, visible=False, ) with gr.Row(): csv_download = gr.File(label="Download results CSV", elem_classes=["compact-file"], visible=False) output_files = gr.File(label="Download CIF files", elem_classes=["compact-file"], visible=False) sc_output_files = gr.File( label="Download AF2 self-consistency outputs", elem_classes=["compact-file"], visible=False, ) with gr.Column(visible=False) as viewer_section: gr.Markdown("---") with gr.Row(): gr.Markdown("### AF2 Prediction") af2_color_mode = gr.Dropdown( choices=[ ("pLDDT", "plddt"), ("Chain", "chain"), ("Rainbow", "rainbow"), ("Secondary structure", "secondary"), ], value="plddt", label="Color by", scale=0, ) af2_viewer = gr.HTML(elem_id="af2-viewer") show_overlay = gr.Checkbox(label="Show reference structure", value=False) with gr.Column(visible=False) as ref_section: with gr.Row(): gr.Markdown("### Reference Structure") ref_color_mode = gr.Dropdown( choices=[ ("Chain", "chain"), ("pLDDT", "plddt"), ("Rainbow", "rainbow"), ("Secondary structure", "secondary"), ], value="chain", label="Color by", scale=0, ) reference_viewer = gr.HTML(elem_id="ref-viewer") submit_btn.click( fn=lambda: gr.update(value='
Running design pipeline\u2026
', visible=True), outputs=[results_placeholder], ).then( fn=submit_design_sequences, inputs=[ cleaned_files_state, ensemble_mode, model_variant, num_seqs, omit_aas, temperature, fixed_pos_seq, fixed_pos_scn, fixed_pos_override_seq, pos_restrict_aatype, symmetry_pos, num_protpardelle_conformers, run_af2_eval, ], outputs=[ results_header, results_df, raw_results_df, fasta_output, output_files, sc_output_files, af2_pdb_state, input_pdb_state, best_sample_state, viewer_section, af2_viewer, reference_viewer, ref_section, results_placeholder, ], ) raw_results_df.change(fn=_csv_download_output, inputs=[raw_results_df], outputs=[csv_download]) finish_upload_btn.click( fn=lambda: gr.update(value="Processing\u2026", interactive=False), outputs=[finish_upload_btn], ).then( fn=_clean_uploaded_pdbs, inputs=[pdb_input], outputs=[cleaned_files_state, clean_notification, clean_download, submit_btn], ).then( fn=lambda: gr.update(value="Upload", interactive=True), outputs=[finish_upload_btn], ) pdb_input.change( fn=_reset_cleaned_state, outputs=[cleaned_files_state, clean_notification, clean_download, submit_btn], ) ensemble_mode.change( fn=lambda mode: (gr.update(visible=(mode == "synthetic")), _get_upload_instructions(mode)), inputs=[ensemble_mode], outputs=[num_protpardelle_conformers, upload_instructions], ) viewer_inputs = [best_sample_state, af2_pdb_state, input_pdb_state, show_overlay, af2_color_mode, ref_color_mode] viewer_outputs = [af2_viewer, reference_viewer, ref_section] show_overlay.change(fn=_update_viewers, inputs=viewer_inputs, outputs=viewer_outputs) af2_color_mode.change(fn=_update_viewers, inputs=viewer_inputs, outputs=viewer_outputs) ref_color_mode.change(fn=_update_viewers, inputs=viewer_inputs, outputs=viewer_outputs) if __name__ == "__main__": demo.launch(theme=theme, css=css, ssr_mode=False)