Upload sagemaker_deploy.py with huggingface_hub

Browse files

Files changed (1) hide show

sagemaker_deploy.py +101 -0

sagemaker_deploy.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""
+Amazon SageMaker Deployment Script for Legion Coder 8M
+This script demonstrates how to deploy the Legion Coder model to Amazon SageMaker
+for production inference.
+Requirements:
+    pip install sagemaker boto3
+Usage:
+    python sagemaker_deploy.py
+"""
+import sagemaker
+from sagemaker.huggingface import HuggingFaceModel
+import boto3
+# Configuration
+ROLE_ARN = "arn:aws:iam::YOUR_ACCOUNT_ID:role/YOUR_SAGEMAKER_ROLE"
+MODEL_ID = "dineth554/legion-coder-8m"
+INSTANCE_TYPE = "ml.m5.large"
+INSTANCE_COUNT = 1
+def deploy_to_sagemaker():
+    """
+    Deploy Legion Coder 8M to Amazon SageMaker.
+    This creates a SageMaker endpoint with the model ready for inference.
+    """
+    # Initialize SageMaker session
+    sess = sagemaker.Session()
+    # Create Hugging Face Model
+    huggingface_model = HuggingFaceModel(
+        model_data=f"https://huggingface.co/{MODEL_ID}/resolve/main/model.safetensors",
+        transformers_version="4.36.0",
+        pytorch_version="2.1.0",
+        py_version="py310",
+        role=ROLE_ARN,
+        sagemaker_session=sess,
+        env={
+            "HF_MODEL_ID": MODEL_ID,
+            "HF_TASK": "text-generation",
+            "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
+            "SAGEMAKER_PROGRAM": "inference.py"
+        }
+    )
+    # Deploy to SageMaker
+    predictor = huggingface_model.deploy(
+        initial_instance_count=INSTANCE_COUNT,
+        instance_type=INSTANCE_TYPE,
+        endpoint_name="legion-coder-8m-endpoint"
+    )
+    print(f"Model deployed successfully!")
+    print(f"Endpoint name: legion-coder-8m-endpoint")
+    print(f"Instance type: {INSTANCE_TYPE}")
+    return predictor
+def test_endpoint(predictor):
+    """
+    Test the deployed endpoint with a sample prompt.
+    """
+    test_payload = {
+        "inputs": "Write a Python function to calculate fibonacci numbers:",
+        "parameters": {
+            "temperature": 0.8,
+            "top_p": 0.95,
+            "top_k": 50,
+            "max_new_tokens": 200
+        }
+    }
+    response = predictor.predict(test_payload)
+    print("Test response:", response)
+    return response
+def cleanup_endpoint(predictor):
+    """
+    Clean up the SageMaker endpoint when done.
+    """
+    predictor.delete_endpoint()
+    print("Endpoint deleted successfully.")
+if __name__ == "__main__":
+    # Deploy the model
+    print("Deploying Legion Coder 8M to SageMaker...")
+    predictor = deploy_to_sagemaker()
+    # Test the endpoint
+    print("\nTesting endpoint...")
+    test_endpoint(predictor)
+    # Uncomment to clean up
+    # cleanup_endpoint(predictor)