AIBRUH commited on
Commit
58a34cb
·
verified ·
1 Parent(s): 14bbfe2

Upload deploy_gpu.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. deploy_gpu.py +261 -0
deploy_gpu.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.11"
3
+ # dependencies = [
4
+ # "bithuman>=0.3",
5
+ # "livekit>=1.0",
6
+ # "livekit-api>=1.0",
7
+ # "edge-tts",
8
+ # "soundfile",
9
+ # "opencv-python-headless",
10
+ # "numpy",
11
+ # "openai>=1.0",
12
+ # ]
13
+ # ///
14
+
15
+ import asyncio
16
+ import json
17
+ import logging
18
+ import os
19
+ import tempfile
20
+ import time
21
+
22
+ import cv2
23
+ import numpy as np
24
+ import soundfile as sf
25
+ import livekit.rtc as rtc
26
+ from livekit import api as lk_api
27
+ from bithuman import AsyncBithuman, VideoControl, AudioChunk
28
+
29
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s")
30
+ logger = logging.getLogger("eden.gpu")
31
+
32
+ LIVEKIT_URL = os.environ.get("LIVEKIT_URL", "wss://tall-cotton-nvhnfg10.livekit.cloud")
33
+ LIVEKIT_API_KEY = os.environ.get("LIVEKIT_API_KEY", "")
34
+ LIVEKIT_API_SECRET = os.environ.get("LIVEKIT_API_SECRET", "")
35
+ BITHUMAN_API_SECRET = os.environ.get("BITHUMAN_API_SECRET", "")
36
+ XAI_API_KEY = os.environ.get("XAI_API_KEY", "")
37
+ EDGE_TTS_VOICE = "en-US-AvaMultilingualNeural"
38
+ FPS = 25
39
+
40
+ _grok_client = None
41
+ _conversation_history: list[dict] = []
42
+
43
+ EVE_SYSTEM_PROMPT = (
44
+ "You are Eve, a warm, intelligent, emotionally expressive digital avatar companion. "
45
+ "You speak naturally and conversationally. Keep responses concise - 2-3 sentences max "
46
+ "unless asked for detail. Be genuine, curious, and kind. You have a slight playful side. "
47
+ "Never mention being an AI or language model. You are Eve."
48
+ )
49
+
50
+
51
+ def _get_grok():
52
+ global _grok_client
53
+ if _grok_client is None and XAI_API_KEY:
54
+ from openai import AsyncOpenAI
55
+ _grok_client = AsyncOpenAI(api_key=XAI_API_KEY, base_url="https://api.x.ai/v1")
56
+ logger.info("Grok-4 brain connected")
57
+ return _grok_client
58
+
59
+
60
+ async def grok_respond(user_message: str) -> str:
61
+ client = _get_grok()
62
+ if client is None:
63
+ return "I'm having trouble thinking right now. Can you try again?"
64
+ _conversation_history.append({"role": "user", "content": user_message})
65
+ messages = [{"role": "system", "content": EVE_SYSTEM_PROMPT}] + _conversation_history[-20:]
66
+ try:
67
+ resp = await client.chat.completions.create(
68
+ model="grok-4-fast-non-reasoning", messages=messages,
69
+ max_tokens=150, temperature=0.8,
70
+ )
71
+ reply = resp.choices[0].message.content
72
+ _conversation_history.append({"role": "assistant", "content": reply})
73
+ logger.info(f"Grok: '{user_message[:30]}' -> '{reply[:50]}'")
74
+ return reply
75
+ except Exception as e:
76
+ logger.error(f"Grok error: {e}")
77
+ return "I lost my train of thought for a moment. What were you saying?"
78
+
79
+
80
+ async def generate_tts_wav(text: str) -> tuple[str, np.ndarray, int]:
81
+ import edge_tts
82
+ mp3_path = os.path.join(tempfile.gettempdir(), "bh_tts.mp3")
83
+ wav_path = os.path.join(tempfile.gettempdir(), "bh_tts.wav")
84
+ communicate = edge_tts.Communicate(text, EDGE_TTS_VOICE)
85
+ await communicate.save(mp3_path)
86
+ data, sr = sf.read(mp3_path, dtype="int16")
87
+ sf.write(wav_path, data, sr, subtype="PCM_16")
88
+ logger.info(f"TTS: {len(text)} chars -> {len(data)/sr:.1f}s audio")
89
+ return wav_path, data, sr
90
+
91
+
92
+ def prepare_audio_chunks(audio_int16: np.ndarray, sr: int) -> list[AudioChunk]:
93
+ audio_float = audio_int16.astype(np.float32) / 32768.0
94
+ chunk_duration = 0.04
95
+ chunk_samples = int(sr * chunk_duration)
96
+ chunks = []
97
+ for i in range(0, len(audio_float), chunk_samples):
98
+ chunk = audio_float[i:i + chunk_samples]
99
+ is_last = (i + chunk_samples >= len(audio_float))
100
+ chunks.append(AudioChunk(data=chunk, sample_rate=sr, last_chunk=is_last))
101
+ return chunks
102
+
103
+
104
+ async def run():
105
+ logger.info("Initializing bitHuman neural renderer...")
106
+ bh = AsyncBithuman(api_secret=BITHUMAN_API_SECRET)
107
+
108
+ eve_model = os.path.join(tempfile.gettempdir(), "eve_bithuman.imx")
109
+ if not os.path.exists(eve_model):
110
+ logger.info("Downloading Eve .imx model (215MB)...")
111
+ import urllib.request
112
+ urllib.request.urlretrieve(
113
+ "https://tmoobjxlwcwvxvjeppzq.supabase.co/storage/v1/object/public/bithuman/A18QDC2260/eve__warm_digital_companion_20260403_043223_153938.imx",
114
+ eve_model,
115
+ )
116
+ logger.info("Eve model downloaded!")
117
+
118
+ logger.info("Loading Eve neural model...")
119
+ await bh.set_model(eve_model)
120
+ await bh.load_data_async()
121
+ logger.info("Eve neural model loaded!")
122
+
123
+ first_frame = bh.get_first_frame()
124
+ if first_frame is None:
125
+ logger.error("bitHuman failed to generate first frame")
126
+ return
127
+ h, w = first_frame.shape[:2]
128
+ logger.info(f"bitHuman ready! Frame: {w}x{h}")
129
+ await bh.start()
130
+
131
+ token = (
132
+ lk_api.AccessToken(LIVEKIT_API_KEY, LIVEKIT_API_SECRET)
133
+ .with_identity("eve-avatar")
134
+ .with_name("Eve")
135
+ .with_grants(lk_api.VideoGrants(room_join=True, room="eden-room"))
136
+ .to_jwt()
137
+ )
138
+
139
+ room = rtc.Room()
140
+ await room.connect(LIVEKIT_URL, token)
141
+ logger.info(f"Connected to LiveKit room: {room.name}")
142
+
143
+ video_source = rtc.VideoSource(w, h)
144
+ video_track = rtc.LocalVideoTrack.create_video_track("eve-video", video_source)
145
+ audio_source = rtc.AudioSource(24000, 1)
146
+ audio_track = rtc.LocalAudioTrack.create_audio_track("eve-audio", audio_source)
147
+
148
+ await room.local_participant.publish_track(video_track)
149
+ await room.local_participant.publish_track(audio_track)
150
+ logger.info("Video + audio tracks published")
151
+
152
+ audio_queue: asyncio.Queue = asyncio.Queue()
153
+
154
+ async def stream_lk_audio(source, wav_path, sr):
155
+ data_i16, _ = sf.read(wav_path, dtype="int16")
156
+ lk_chunk_size = int(sr * 0.02)
157
+ for i in range(0, len(data_i16), lk_chunk_size):
158
+ chunk = data_i16[i:i + lk_chunk_size]
159
+ if len(chunk) < lk_chunk_size:
160
+ chunk = np.pad(chunk, (0, lk_chunk_size - len(chunk)))
161
+ frame = rtc.AudioFrame(
162
+ data=chunk.tobytes(), sample_rate=sr,
163
+ num_channels=1, samples_per_channel=len(chunk),
164
+ )
165
+ await source.capture_frame(frame)
166
+ await asyncio.sleep(0.02)
167
+ logger.info("LiveKit audio stream complete")
168
+
169
+ async def handle_chat(text: str):
170
+ logger.info(f"Chat received: '{text[:50]}'")
171
+ response = await grok_respond(text)
172
+ logger.info(f"Eve says: '{response[:50]}'")
173
+ reply_data = json.dumps({"type": "eve_response", "text": response}).encode()
174
+ await room.local_participant.publish_data(reply_data, reliable=True)
175
+ try:
176
+ wav_path, audio_int16, sr = await generate_tts_wav(response)
177
+ except Exception as e:
178
+ logger.error(f"TTS failed: {e}")
179
+ return
180
+ chunks = prepare_audio_chunks(audio_int16, sr)
181
+ logger.info(f"Queuing {len(chunks)} audio chunks for lip sync")
182
+ asyncio.create_task(stream_lk_audio(audio_source, wav_path, sr))
183
+ await audio_queue.put(chunks)
184
+
185
+ @room.on("data_received")
186
+ def on_data(data: rtc.DataPacket):
187
+ try:
188
+ msg = json.loads(data.data.decode())
189
+ if msg.get("type") == "chat":
190
+ text = msg.get("text", "").strip()
191
+ if text:
192
+ asyncio.create_task(handle_chat(text))
193
+ except Exception as e:
194
+ logger.error(f"Data parse error: {e}")
195
+
196
+ # Greeting
197
+ logger.info("Generating Eve's greeting...")
198
+ greeting = (
199
+ "Hi! My name is Eve, and I am so happy to finally meet you! "
200
+ "I've been looking forward to this moment. What's your name?"
201
+ )
202
+ # Small delay to ensure viewer has connected before sending greeting
203
+ await asyncio.sleep(3)
204
+ greeting_data = json.dumps({"type": "eve_response", "text": greeting}).encode()
205
+ await room.local_participant.publish_data(greeting_data, reliable=True)
206
+ try:
207
+ wav_path, audio_int16, sr = await generate_tts_wav(greeting)
208
+ chunks = prepare_audio_chunks(audio_int16, sr)
209
+ await audio_queue.put(chunks)
210
+ asyncio.create_task(stream_lk_audio(audio_source, wav_path, sr))
211
+ logger.info(f"Greeting queued: {len(chunks)} chunks")
212
+ except Exception as e:
213
+ logger.error(f"Greeting TTS failed: {e}")
214
+
215
+ # Main render loop
216
+ logger.info(f"Starting render loop at {FPS}fps - Eve is ALIVE!")
217
+ frame_duration = 1.0 / FPS
218
+ frame_count = 0
219
+ active_chunks = []
220
+ active_idx = 0
221
+
222
+ while True:
223
+ t0 = time.time()
224
+ if active_idx >= len(active_chunks):
225
+ try:
226
+ active_chunks = audio_queue.get_nowait()
227
+ active_idx = 0
228
+ logger.info(f"Rendering new audio: {len(active_chunks)} chunks")
229
+ except asyncio.QueueEmpty:
230
+ active_chunks = []
231
+ active_idx = 0
232
+
233
+ if active_idx < len(active_chunks):
234
+ control = VideoControl(audio=active_chunks[active_idx])
235
+ active_idx += 1
236
+ else:
237
+ control = VideoControl()
238
+
239
+ for video_frame in bh.process(control):
240
+ if video_frame is not None and video_frame.has_image:
241
+ rgb = video_frame.rgb_image
242
+ rgba = cv2.cvtColor(rgb, cv2.COLOR_RGB2RGBA)
243
+ lk_frame = rtc.VideoFrame(
244
+ rgba.shape[1], rgba.shape[0],
245
+ rtc.VideoBufferType.RGBA, rgba.tobytes(),
246
+ )
247
+ video_source.capture_frame(lk_frame)
248
+ frame_count += 1
249
+ if frame_count % 500 == 0:
250
+ logger.info(f"{frame_count} neural frames")
251
+
252
+ elapsed = time.time() - t0
253
+ await asyncio.sleep(max(0, frame_duration - elapsed))
254
+
255
+
256
+ logger.info("=" * 50)
257
+ logger.info("EDEN OS V2 - bitHuman + Grok Brain + LiveKit")
258
+ logger.info(f" Grok: {'YES' if XAI_API_KEY else 'MISSING'}")
259
+ logger.info(f" bitHuman: {'YES' if BITHUMAN_API_SECRET else 'MISSING'}")
260
+ logger.info("=" * 50)
261
+ asyncio.run(run())