dineth554 commited on
Commit
97cda6b
·
verified ·
1 Parent(s): d9ab257

Upload sagemaker_deploy.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. sagemaker_deploy.py +101 -0
sagemaker_deploy.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Amazon SageMaker Deployment Script for Legion Coder 8M
3
+
4
+ This script demonstrates how to deploy the Legion Coder model to Amazon SageMaker
5
+ for production inference.
6
+
7
+ Requirements:
8
+ pip install sagemaker boto3
9
+
10
+ Usage:
11
+ python sagemaker_deploy.py
12
+ """
13
+
14
+ import sagemaker
15
+ from sagemaker.huggingface import HuggingFaceModel
16
+ import boto3
17
+
18
+ # Configuration
19
+ ROLE_ARN = "arn:aws:iam::YOUR_ACCOUNT_ID:role/YOUR_SAGEMAKER_ROLE"
20
+ MODEL_ID = "dineth554/legion-coder-8m"
21
+ INSTANCE_TYPE = "ml.m5.large"
22
+ INSTANCE_COUNT = 1
23
+
24
+
25
+ def deploy_to_sagemaker():
26
+ """
27
+ Deploy Legion Coder 8M to Amazon SageMaker.
28
+
29
+ This creates a SageMaker endpoint with the model ready for inference.
30
+ """
31
+ # Initialize SageMaker session
32
+ sess = sagemaker.Session()
33
+
34
+ # Create Hugging Face Model
35
+ huggingface_model = HuggingFaceModel(
36
+ model_data=f"https://huggingface.co/{MODEL_ID}/resolve/main/model.safetensors",
37
+ transformers_version="4.36.0",
38
+ pytorch_version="2.1.0",
39
+ py_version="py310",
40
+ role=ROLE_ARN,
41
+ sagemaker_session=sess,
42
+ env={
43
+ "HF_MODEL_ID": MODEL_ID,
44
+ "HF_TASK": "text-generation",
45
+ "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
46
+ "SAGEMAKER_PROGRAM": "inference.py"
47
+ }
48
+ )
49
+
50
+ # Deploy to SageMaker
51
+ predictor = huggingface_model.deploy(
52
+ initial_instance_count=INSTANCE_COUNT,
53
+ instance_type=INSTANCE_TYPE,
54
+ endpoint_name="legion-coder-8m-endpoint"
55
+ )
56
+
57
+ print(f"Model deployed successfully!")
58
+ print(f"Endpoint name: legion-coder-8m-endpoint")
59
+ print(f"Instance type: {INSTANCE_TYPE}")
60
+
61
+ return predictor
62
+
63
+
64
+ def test_endpoint(predictor):
65
+ """
66
+ Test the deployed endpoint with a sample prompt.
67
+ """
68
+ test_payload = {
69
+ "inputs": "Write a Python function to calculate fibonacci numbers:",
70
+ "parameters": {
71
+ "temperature": 0.8,
72
+ "top_p": 0.95,
73
+ "top_k": 50,
74
+ "max_new_tokens": 200
75
+ }
76
+ }
77
+
78
+ response = predictor.predict(test_payload)
79
+ print("Test response:", response)
80
+ return response
81
+
82
+
83
+ def cleanup_endpoint(predictor):
84
+ """
85
+ Clean up the SageMaker endpoint when done.
86
+ """
87
+ predictor.delete_endpoint()
88
+ print("Endpoint deleted successfully.")
89
+
90
+
91
+ if __name__ == "__main__":
92
+ # Deploy the model
93
+ print("Deploying Legion Coder 8M to SageMaker...")
94
+ predictor = deploy_to_sagemaker()
95
+
96
+ # Test the endpoint
97
+ print("\nTesting endpoint...")
98
+ test_endpoint(predictor)
99
+
100
+ # Uncomment to clean up
101
+ # cleanup_endpoint(predictor)