| #pragma once |
|
|
| #include "llama.h" |
| #include "common.h" |
|
|
| struct common_speculative; |
|
|
| struct common_speculative_params { |
| int n_draft = 16; |
| int n_reuse = 256; |
|
|
| float p_min = 0.9f; |
| }; |
|
|
| struct common_speculative * common_speculative_init(struct llama_context * ctx_dft); |
|
|
| void common_speculative_free(struct common_speculative * spec); |
|
|
| bool common_speculative_are_compatible( |
| const struct llama_context * ctx_tgt, |
| const struct llama_context * ctx_dft); |
|
|
| |
| llama_tokens common_speculative_gen_draft( |
| struct common_speculative * spec, |
| struct common_speculative_params params, |
| const llama_tokens & prompt, |
| llama_token id_last); |
|
|