| import gradio as gr |
| from llama_cpp import Llama |
| from huggingface_hub import hf_hub_download |
| import os |
|
|
| print("Starting model download...") |
|
|
| |
| try: |
| |
| |
| model_path = hf_hub_download( |
| repo_id="TheBloke/CodeLlama-7B-Python-GGUF", |
| filename="codellama-7b-python.Q4_K_M.gguf", |
| cache_dir="./models" |
| ) |
| print(f"β Model downloaded to: {model_path}") |
| except Exception as e: |
| print(f"Error downloading model: {e}") |
| raise |
|
|
| |
| print("Loading model into memory...") |
| llm = Llama( |
| model_path=model_path, |
| n_ctx=2048, |
| n_threads=int(os.getenv("N_THREADS", "2")), |
| n_batch=512, |
| verbose=True |
| ) |
| print("β Model loaded successfully!") |
|
|
| def generate_code(prompt, max_tokens=500, temperature=0.7): |
| """Generate code from prompt""" |
| try: |
| response = llm( |
| prompt, |
| max_tokens=max_tokens, |
| temperature=temperature, |
| stop=["</s>", "###", "\n\n\n"], |
| echo=False |
| ) |
| return response['choices'][0]['text'] |
| except Exception as e: |
| return f"Error generating code: {str(e)}" |
|
|
| |
| with gr.Blocks(title="CodeLlama Assistant", theme=gr.themes.Soft()) as demo: |
| gr.Markdown("# π¦ CodeLlama-7B Python Assistant") |
| gr.Markdown("AI-powered code generation using CodeLlama-7B (4GB GGUF model)") |
| |
| with gr.Row(): |
| with gr.Column(): |
| prompt_input = gr.Textbox( |
| label="Enter your coding question or task", |
| placeholder="Write a Python function to...", |
| lines=5 |
| ) |
| with gr.Row(): |
| max_tokens = gr.Slider( |
| minimum=100, |
| maximum=1000, |
| value=500, |
| step=50, |
| label="Max Tokens" |
| ) |
| temperature = gr.Slider( |
| minimum=0.1, |
| maximum=1.0, |
| value=0.7, |
| step=0.1, |
| label="Temperature" |
| ) |
| submit_btn = gr.Button("π Generate Code", variant="primary", size="lg") |
| clear_btn = gr.Button("ποΈ Clear", size="sm") |
| |
| with gr.Column(): |
| output = gr.Textbox( |
| label="Generated Code", |
| lines=15, |
| show_copy_button=True |
| ) |
| |
| |
| submit_btn.click( |
| fn=generate_code, |
| inputs=[prompt_input, max_tokens, temperature], |
| outputs=output |
| ) |
| |
| clear_btn.click( |
| fn=lambda: ("", ""), |
| inputs=None, |
| outputs=[prompt_input, output] |
| ) |
| |
| |
| gr.Examples( |
| examples=[ |
| ["Write a Python function to calculate fibonacci numbers"], |
| ["Create a binary search tree class with insert and search methods"], |
| ["Write a function to reverse a linked list"], |
| ["Implement quicksort algorithm in Python"], |
| ["Create a decorator to measure function execution time"] |
| ], |
| inputs=prompt_input |
| ) |
| |
| gr.Markdown(""" |
| ### π‘ Tips: |
| - Be specific in your prompts for better results |
| - Lower temperature (0.3-0.5) for more focused code |
| - Higher temperature (0.7-0.9) for more creative solutions |
| - Model works best for Python code generation |
| """) |
|
|
| if __name__ == "__main__": |
| demo.launch(server_name="0.0.0.0", server_port=7860) |