Upload sagemaker_deploy.py with huggingface_hub
Browse files- sagemaker_deploy.py +101 -0
sagemaker_deploy.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Amazon SageMaker Deployment Script for Legion Coder 8M
|
| 3 |
+
|
| 4 |
+
This script demonstrates how to deploy the Legion Coder model to Amazon SageMaker
|
| 5 |
+
for production inference.
|
| 6 |
+
|
| 7 |
+
Requirements:
|
| 8 |
+
pip install sagemaker boto3
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
python sagemaker_deploy.py
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import sagemaker
|
| 15 |
+
from sagemaker.huggingface import HuggingFaceModel
|
| 16 |
+
import boto3
|
| 17 |
+
|
| 18 |
+
# Configuration
|
| 19 |
+
ROLE_ARN = "arn:aws:iam::YOUR_ACCOUNT_ID:role/YOUR_SAGEMAKER_ROLE"
|
| 20 |
+
MODEL_ID = "dineth554/legion-coder-8m"
|
| 21 |
+
INSTANCE_TYPE = "ml.m5.large"
|
| 22 |
+
INSTANCE_COUNT = 1
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def deploy_to_sagemaker():
|
| 26 |
+
"""
|
| 27 |
+
Deploy Legion Coder 8M to Amazon SageMaker.
|
| 28 |
+
|
| 29 |
+
This creates a SageMaker endpoint with the model ready for inference.
|
| 30 |
+
"""
|
| 31 |
+
# Initialize SageMaker session
|
| 32 |
+
sess = sagemaker.Session()
|
| 33 |
+
|
| 34 |
+
# Create Hugging Face Model
|
| 35 |
+
huggingface_model = HuggingFaceModel(
|
| 36 |
+
model_data=f"https://huggingface.co/{MODEL_ID}/resolve/main/model.safetensors",
|
| 37 |
+
transformers_version="4.36.0",
|
| 38 |
+
pytorch_version="2.1.0",
|
| 39 |
+
py_version="py310",
|
| 40 |
+
role=ROLE_ARN,
|
| 41 |
+
sagemaker_session=sess,
|
| 42 |
+
env={
|
| 43 |
+
"HF_MODEL_ID": MODEL_ID,
|
| 44 |
+
"HF_TASK": "text-generation",
|
| 45 |
+
"SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
|
| 46 |
+
"SAGEMAKER_PROGRAM": "inference.py"
|
| 47 |
+
}
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# Deploy to SageMaker
|
| 51 |
+
predictor = huggingface_model.deploy(
|
| 52 |
+
initial_instance_count=INSTANCE_COUNT,
|
| 53 |
+
instance_type=INSTANCE_TYPE,
|
| 54 |
+
endpoint_name="legion-coder-8m-endpoint"
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
print(f"Model deployed successfully!")
|
| 58 |
+
print(f"Endpoint name: legion-coder-8m-endpoint")
|
| 59 |
+
print(f"Instance type: {INSTANCE_TYPE}")
|
| 60 |
+
|
| 61 |
+
return predictor
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def test_endpoint(predictor):
|
| 65 |
+
"""
|
| 66 |
+
Test the deployed endpoint with a sample prompt.
|
| 67 |
+
"""
|
| 68 |
+
test_payload = {
|
| 69 |
+
"inputs": "Write a Python function to calculate fibonacci numbers:",
|
| 70 |
+
"parameters": {
|
| 71 |
+
"temperature": 0.8,
|
| 72 |
+
"top_p": 0.95,
|
| 73 |
+
"top_k": 50,
|
| 74 |
+
"max_new_tokens": 200
|
| 75 |
+
}
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
response = predictor.predict(test_payload)
|
| 79 |
+
print("Test response:", response)
|
| 80 |
+
return response
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def cleanup_endpoint(predictor):
|
| 84 |
+
"""
|
| 85 |
+
Clean up the SageMaker endpoint when done.
|
| 86 |
+
"""
|
| 87 |
+
predictor.delete_endpoint()
|
| 88 |
+
print("Endpoint deleted successfully.")
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
if __name__ == "__main__":
|
| 92 |
+
# Deploy the model
|
| 93 |
+
print("Deploying Legion Coder 8M to SageMaker...")
|
| 94 |
+
predictor = deploy_to_sagemaker()
|
| 95 |
+
|
| 96 |
+
# Test the endpoint
|
| 97 |
+
print("\nTesting endpoint...")
|
| 98 |
+
test_endpoint(predictor)
|
| 99 |
+
|
| 100 |
+
# Uncomment to clean up
|
| 101 |
+
# cleanup_endpoint(predictor)
|