| """ |
| Amazon SageMaker Deployment Script for Legion Coder 8M |
| |
| This script demonstrates how to deploy the Legion Coder model to Amazon SageMaker |
| for production inference. |
| |
| Requirements: |
| pip install sagemaker boto3 |
| |
| Usage: |
| python sagemaker_deploy.py |
| """ |
|
|
| import sagemaker |
| from sagemaker.huggingface import HuggingFaceModel |
| import boto3 |
|
|
| |
| ROLE_ARN = "arn:aws:iam::YOUR_ACCOUNT_ID:role/YOUR_SAGEMAKER_ROLE" |
| MODEL_ID = "dineth554/legion-coder-8m" |
| INSTANCE_TYPE = "ml.m5.large" |
| INSTANCE_COUNT = 1 |
|
|
|
|
| def deploy_to_sagemaker(): |
| """ |
| Deploy Legion Coder 8M to Amazon SageMaker. |
| |
| This creates a SageMaker endpoint with the model ready for inference. |
| """ |
| |
| sess = sagemaker.Session() |
| |
| |
| huggingface_model = HuggingFaceModel( |
| model_data=f"https://huggingface.co/{MODEL_ID}/resolve/main/model.safetensors", |
| transformers_version="4.36.0", |
| pytorch_version="2.1.0", |
| py_version="py310", |
| role=ROLE_ARN, |
| sagemaker_session=sess, |
| env={ |
| "HF_MODEL_ID": MODEL_ID, |
| "HF_TASK": "text-generation", |
| "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", |
| "SAGEMAKER_PROGRAM": "inference.py" |
| } |
| ) |
| |
| |
| predictor = huggingface_model.deploy( |
| initial_instance_count=INSTANCE_COUNT, |
| instance_type=INSTANCE_TYPE, |
| endpoint_name="legion-coder-8m-endpoint" |
| ) |
| |
| print(f"Model deployed successfully!") |
| print(f"Endpoint name: legion-coder-8m-endpoint") |
| print(f"Instance type: {INSTANCE_TYPE}") |
| |
| return predictor |
|
|
|
|
| def test_endpoint(predictor): |
| """ |
| Test the deployed endpoint with a sample prompt. |
| """ |
| test_payload = { |
| "inputs": "Write a Python function to calculate fibonacci numbers:", |
| "parameters": { |
| "temperature": 0.8, |
| "top_p": 0.95, |
| "top_k": 50, |
| "max_new_tokens": 200 |
| } |
| } |
| |
| response = predictor.predict(test_payload) |
| print("Test response:", response) |
| return response |
|
|
|
|
| def cleanup_endpoint(predictor): |
| """ |
| Clean up the SageMaker endpoint when done. |
| """ |
| predictor.delete_endpoint() |
| print("Endpoint deleted successfully.") |
|
|
|
|
| if __name__ == "__main__": |
| |
| print("Deploying Legion Coder 8M to SageMaker...") |
| predictor = deploy_to_sagemaker() |
| |
| |
| print("\nTesting endpoint...") |
| test_endpoint(predictor) |
| |
| |
| |
|
|