Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,20 +7,20 @@ import logging
|
|
| 7 |
import os
|
| 8 |
from typing import Optional, Dict
|
| 9 |
import re
|
| 10 |
-
from functools import lru_cache
|
| 11 |
import asyncio
|
|
|
|
| 12 |
|
| 13 |
-
# --- 1. Konfigurasi Awal
|
| 14 |
-
# Create necessary directories
|
| 15 |
os.makedirs("./cache", exist_ok=True)
|
| 16 |
os.makedirs("./logs", exist_ok=True)
|
| 17 |
|
| 18 |
-
# Set environment variables
|
| 19 |
os.environ["HF_HOME"] = "./cache"
|
| 20 |
os.environ["TRANSFORMERS_CACHE"] = "./cache"
|
| 21 |
|
| 22 |
# Environment configuration
|
| 23 |
-
DEVICE = -1 # Selalu CPU untuk
|
| 24 |
MAX_TEXT_LENGTH = int(os.getenv("MAX_TEXT_LENGTH", "5000"))
|
| 25 |
|
| 26 |
# Configure logging
|
|
@@ -44,7 +44,7 @@ PROTECTED_TERMS = ["2030 Aspirations", "Griffith"]
|
|
| 44 |
# Cache untuk translator (pipeline)
|
| 45 |
translators: Dict[str, pipeline] = {}
|
| 46 |
|
| 47 |
-
# --- Pydantic Models
|
| 48 |
class TranslationRequest(BaseModel):
|
| 49 |
text: str
|
| 50 |
source_lang_override: Optional[str] = None
|
|
@@ -53,13 +53,10 @@ class TranslationResponse(BaseModel):
|
|
| 53 |
translated_text: str
|
| 54 |
source_language: Optional[str] = None
|
| 55 |
|
| 56 |
-
# ---
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
@app.on_event("startup")
|
| 61 |
-
async def startup_event():
|
| 62 |
-
"""Memuat semua model translasi saat aplikasi dimulai."""
|
| 63 |
logger.info("Memulai prapemuatan model translasi...")
|
| 64 |
for lang, model_name in MODEL_MAP.items():
|
| 65 |
try:
|
|
@@ -69,7 +66,12 @@ async def startup_event():
|
|
| 69 |
except Exception as e:
|
| 70 |
logger.error(f"Gagal memuat model untuk {lang}: {str(e)}")
|
| 71 |
logger.info("Semua model telah dimuat.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
|
|
|
| 73 |
def get_translator(lang: str) -> pipeline:
|
| 74 |
"""Mengambil translator yang sudah dimuat dari cache."""
|
| 75 |
translator = translators.get(lang)
|
|
@@ -78,12 +80,10 @@ def get_translator(lang: str) -> pipeline:
|
|
| 78 |
raise HTTPException(status_code=500, detail=f"Model terjemahan untuk '{lang}' tidak tersedia.")
|
| 79 |
return translator
|
| 80 |
|
| 81 |
-
|
| 82 |
-
@lru_cache(maxsize=128) # Cache lebih besar jika perlu
|
| 83 |
def detect_language(text: str) -> str:
|
| 84 |
"""Deteksi bahasa dengan cache."""
|
| 85 |
try:
|
| 86 |
-
# Potong teks untuk deteksi yang lebih cepat jika teks sangat panjang
|
| 87 |
preview_text = text[:500]
|
| 88 |
detected_lang = langdetect.detect(preview_text)
|
| 89 |
if detected_lang.startswith('zh'):
|
|
@@ -94,100 +94,77 @@ def detect_language(text: str) -> str:
|
|
| 94 |
return "en"
|
| 95 |
|
| 96 |
def protect_terms(text: str, protected_terms: list) -> tuple[str, dict]:
|
| 97 |
-
"""Mengganti istilah yang dilindungi dengan placeholder."""
|
| 98 |
replacements = {}
|
| 99 |
for i, term in enumerate(protected_terms):
|
| 100 |
placeholder = f"__PROTECTED_{i}__"
|
| 101 |
-
# Gunakan word boundary (\b) untuk memastikan hanya kata utuh yang diganti
|
| 102 |
modified_text = re.sub(r'\b' + re.escape(term) + r'\b', placeholder, text, flags=re.IGNORECASE)
|
| 103 |
-
# Hanya tambahkan ke replacement jika ada perubahan
|
| 104 |
if modified_text != text:
|
| 105 |
-
|
| 106 |
-
|
| 107 |
return text, replacements
|
| 108 |
|
| 109 |
def restore_terms(text: str, replacements: dict) -> str:
|
| 110 |
-
"""Mengembalikan istilah yang dilindungi."""
|
| 111 |
for placeholder, term in replacements.items():
|
| 112 |
text = text.replace(placeholder, term)
|
| 113 |
return text
|
| 114 |
|
| 115 |
-
# ---
|
| 116 |
async def perform_translation(text: str, source_lang_override: Optional[str] = None) -> TranslationResponse:
|
| 117 |
-
"""Fungsi inti translasi yang sepenuhnya async."""
|
| 118 |
if not text or not text.strip():
|
| 119 |
raise HTTPException(status_code=400, detail="Teks input tidak boleh kosong.")
|
| 120 |
-
|
| 121 |
if len(text) > MAX_TEXT_LENGTH:
|
| 122 |
raise HTTPException(
|
| 123 |
status_code=413,
|
| 124 |
detail=f"Teks terlalu panjang. Panjang maksimal yang diizinkan: {MAX_TEXT_LENGTH}."
|
| 125 |
)
|
| 126 |
-
|
| 127 |
try:
|
| 128 |
-
# Tentukan bahasa sumber
|
| 129 |
if source_lang_override and source_lang_override in MODEL_MAP:
|
| 130 |
source_lang = source_lang_override
|
| 131 |
else:
|
| 132 |
source_lang = detect_language(text)
|
| 133 |
|
| 134 |
-
# Jika bahasa sumber adalah Inggris, kembalikan teks asli
|
| 135 |
if source_lang == "en":
|
| 136 |
return TranslationResponse(translated_text=text, source_language=source_lang)
|
| 137 |
|
| 138 |
-
# Ambil translator
|
| 139 |
translator = get_translator(source_lang)
|
| 140 |
-
|
| 141 |
-
# Lindungi istilah sebelum translasi
|
| 142 |
modified_text, replacements = protect_terms(text, PROTECTED_TERMS)
|
| 143 |
|
| 144 |
-
# --- OPTIMASI KUNCI: Jalankan model di thread terpisah ---
|
| 145 |
-
# Ini mencegah pipeline yang berat memblokir event loop utama
|
| 146 |
def _translate_task():
|
| 147 |
return translator(modified_text, max_length=512, num_beams=4)
|
| 148 |
|
| 149 |
result = await asyncio.to_thread(_translate_task)
|
| 150 |
translated_text = result[0]["translation_text"]
|
| 151 |
-
|
| 152 |
-
# Kembalikan istilah yang dilindungi
|
| 153 |
final_text = restore_terms(translated_text, replacements)
|
| 154 |
|
| 155 |
return TranslationResponse(translated_text=final_text, source_language=source_lang)
|
| 156 |
-
|
| 157 |
except HTTPException as e:
|
| 158 |
-
raise e
|
| 159 |
except Exception as e:
|
| 160 |
logger.error(f"Terjadi kesalahan saat translasi: {str(e)}")
|
| 161 |
raise HTTPException(status_code=500, detail=f"Proses translasi gagal: {str(e)}")
|
| 162 |
|
| 163 |
@app.post("/translate", response_model=TranslationResponse)
|
| 164 |
async def translate_api(request: TranslationRequest):
|
| 165 |
-
"""Endpoint API untuk translasi."""
|
| 166 |
return await perform_translation(request.text, request.source_lang_override)
|
| 167 |
|
| 168 |
@app.get("/health")
|
| 169 |
async def health_check():
|
| 170 |
return {"status": "healthy", "loaded_models": list(translators.keys())}
|
| 171 |
|
| 172 |
-
|
| 173 |
-
# --- 5. OPTIMASI: Handler Gradio menjadi Asynchronous ---
|
| 174 |
async def translate_gradio(text: str, source_lang: str = "auto"):
|
| 175 |
-
"""Wrapper Gradio yang sekarang async dan lebih efisien."""
|
| 176 |
if not text or not text.strip():
|
| 177 |
return "Masukkan teks untuk diterjemahkan.", "N/A"
|
| 178 |
-
|
| 179 |
try:
|
| 180 |
source_lang_param = source_lang if source_lang != "auto" else None
|
| 181 |
result = await perform_translation(text, source_lang_param)
|
| 182 |
return result.translated_text, result.source_language or "Unknown"
|
| 183 |
-
|
| 184 |
except HTTPException as e:
|
| 185 |
return f"Error: {e.detail}", "Error"
|
| 186 |
except Exception as e:
|
| 187 |
return f"Error: {str(e)}", "Error"
|
| 188 |
|
| 189 |
-
# ---
|
| 190 |
-
# Fungsi untuk membuat UI Gradio tetap sama
|
| 191 |
def create_gradio_interface():
|
| 192 |
with gr.Blocks(
|
| 193 |
title="Multi-Language Translation Service",
|
|
@@ -199,7 +176,6 @@ def create_gradio_interface():
|
|
| 199 |
Terjemahkan teks dari **Thai**, **Jepang**, **Mandarin**, atau **Vietnam** ke **Inggris**.
|
| 200 |
✨ Fitur: Deteksi bahasa otomatis • Perlindungan istilah • Model Helsinki-NLP yang cepat.
|
| 201 |
""")
|
| 202 |
-
|
| 203 |
with gr.Row():
|
| 204 |
with gr.Column(scale=1):
|
| 205 |
text_input = gr.Textbox(label="📝 Input Text", placeholder="Enter text to translate...", lines=6, max_lines=10)
|
|
@@ -212,11 +188,9 @@ def create_gradio_interface():
|
|
| 212 |
value="auto", label="Source Language"
|
| 213 |
)
|
| 214 |
translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg")
|
| 215 |
-
|
| 216 |
with gr.Column(scale=1):
|
| 217 |
output_text = gr.Textbox(label="🎯 Translation Result", lines=6, max_lines=10, interactive=False)
|
| 218 |
detected_lang = gr.Textbox(label="🔍 Detected Language", interactive=False, max_lines=1)
|
| 219 |
-
|
| 220 |
gr.Examples(
|
| 221 |
examples=[
|
| 222 |
["สวัสดีครับ ยินดีที่ได้รู้จัก การพัฒนา 2030 Aspirations เป็นเป้าหมายสำคัญ", "th"],
|
|
@@ -226,24 +200,13 @@ def create_gradio_interface():
|
|
| 226 |
],
|
| 227 |
inputs=[text_input, lang_dropdown],
|
| 228 |
outputs=[output_text, detected_lang],
|
| 229 |
-
fn=
|
| 230 |
cache_examples=False
|
| 231 |
)
|
| 232 |
-
|
| 233 |
-
# Event handlers sekarang bisa langsung memanggil fungsi async
|
| 234 |
translate_btn.click(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
|
| 235 |
text_input.submit(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
|
| 236 |
-
|
| 237 |
return interface
|
| 238 |
|
| 239 |
-
#
|
| 240 |
gradio_app = create_gradio_interface()
|
| 241 |
-
|
| 242 |
-
# Mount Gradio app ke FastAPI di path "/"
|
| 243 |
-
# Ini adalah cara yang benar untuk mengintegrasikan keduanya
|
| 244 |
-
app = gr.mount_gradio_app(app, gradio_app, path="/")
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
# Untuk menjalankan:
|
| 248 |
-
# Simpan file ini sebagai app.py dan jalankan dengan uvicorn
|
| 249 |
-
# > uvicorn app:app --reload --port 7860
|
|
|
|
| 7 |
import os
|
| 8 |
from typing import Optional, Dict
|
| 9 |
import re
|
| 10 |
+
from functools import lru_cache, partial
|
| 11 |
import asyncio
|
| 12 |
+
from contextlib import asynccontextmanager
|
| 13 |
|
| 14 |
+
# --- 1. Konfigurasi Awal ---
|
|
|
|
| 15 |
os.makedirs("./cache", exist_ok=True)
|
| 16 |
os.makedirs("./logs", exist_ok=True)
|
| 17 |
|
| 18 |
+
# Set environment variables untuk Hugging Face cache
|
| 19 |
os.environ["HF_HOME"] = "./cache"
|
| 20 |
os.environ["TRANSFORMERS_CACHE"] = "./cache"
|
| 21 |
|
| 22 |
# Environment configuration
|
| 23 |
+
DEVICE = -1 # Selalu CPU untuk kompatibilitas
|
| 24 |
MAX_TEXT_LENGTH = int(os.getenv("MAX_TEXT_LENGTH", "5000"))
|
| 25 |
|
| 26 |
# Configure logging
|
|
|
|
| 44 |
# Cache untuk translator (pipeline)
|
| 45 |
translators: Dict[str, pipeline] = {}
|
| 46 |
|
| 47 |
+
# --- Pydantic Models ---
|
| 48 |
class TranslationRequest(BaseModel):
|
| 49 |
text: str
|
| 50 |
source_lang_override: Optional[str] = None
|
|
|
|
| 53 |
translated_text: str
|
| 54 |
source_language: Optional[str] = None
|
| 55 |
|
| 56 |
+
# --- Lifespan Event Handler ---
|
| 57 |
+
@asynccontextmanager
|
| 58 |
+
async def lifespan(app: FastAPI):
|
| 59 |
+
"""Handler lifecycle aplikasi menggunakan lifespan"""
|
|
|
|
|
|
|
|
|
|
| 60 |
logger.info("Memulai prapemuatan model translasi...")
|
| 61 |
for lang, model_name in MODEL_MAP.items():
|
| 62 |
try:
|
|
|
|
| 66 |
except Exception as e:
|
| 67 |
logger.error(f"Gagal memuat model untuk {lang}: {str(e)}")
|
| 68 |
logger.info("Semua model telah dimuat.")
|
| 69 |
+
yield # Aplikasi berjalan di sini
|
| 70 |
+
|
| 71 |
+
# --- Inisialisasi Aplikasi FastAPI dengan Lifespan ---
|
| 72 |
+
app = FastAPI(title="Translation Service API", lifespan=lifespan)
|
| 73 |
|
| 74 |
+
# --- Fungsi Utility ---
|
| 75 |
def get_translator(lang: str) -> pipeline:
|
| 76 |
"""Mengambil translator yang sudah dimuat dari cache."""
|
| 77 |
translator = translators.get(lang)
|
|
|
|
| 80 |
raise HTTPException(status_code=500, detail=f"Model terjemahan untuk '{lang}' tidak tersedia.")
|
| 81 |
return translator
|
| 82 |
|
| 83 |
+
@lru_cache(maxsize=128)
|
|
|
|
| 84 |
def detect_language(text: str) -> str:
|
| 85 |
"""Deteksi bahasa dengan cache."""
|
| 86 |
try:
|
|
|
|
| 87 |
preview_text = text[:500]
|
| 88 |
detected_lang = langdetect.detect(preview_text)
|
| 89 |
if detected_lang.startswith('zh'):
|
|
|
|
| 94 |
return "en"
|
| 95 |
|
| 96 |
def protect_terms(text: str, protected_terms: list) -> tuple[str, dict]:
|
|
|
|
| 97 |
replacements = {}
|
| 98 |
for i, term in enumerate(protected_terms):
|
| 99 |
placeholder = f"__PROTECTED_{i}__"
|
|
|
|
| 100 |
modified_text = re.sub(r'\b' + re.escape(term) + r'\b', placeholder, text, flags=re.IGNORECASE)
|
|
|
|
| 101 |
if modified_text != text:
|
| 102 |
+
replacements[placeholder] = term
|
| 103 |
+
text = modified_text
|
| 104 |
return text, replacements
|
| 105 |
|
| 106 |
def restore_terms(text: str, replacements: dict) -> str:
|
|
|
|
| 107 |
for placeholder, term in replacements.items():
|
| 108 |
text = text.replace(placeholder, term)
|
| 109 |
return text
|
| 110 |
|
| 111 |
+
# --- Fungsi Inti dan Endpoint API ---
|
| 112 |
async def perform_translation(text: str, source_lang_override: Optional[str] = None) -> TranslationResponse:
|
|
|
|
| 113 |
if not text or not text.strip():
|
| 114 |
raise HTTPException(status_code=400, detail="Teks input tidak boleh kosong.")
|
|
|
|
| 115 |
if len(text) > MAX_TEXT_LENGTH:
|
| 116 |
raise HTTPException(
|
| 117 |
status_code=413,
|
| 118 |
detail=f"Teks terlalu panjang. Panjang maksimal yang diizinkan: {MAX_TEXT_LENGTH}."
|
| 119 |
)
|
|
|
|
| 120 |
try:
|
|
|
|
| 121 |
if source_lang_override and source_lang_override in MODEL_MAP:
|
| 122 |
source_lang = source_lang_override
|
| 123 |
else:
|
| 124 |
source_lang = detect_language(text)
|
| 125 |
|
|
|
|
| 126 |
if source_lang == "en":
|
| 127 |
return TranslationResponse(translated_text=text, source_language=source_lang)
|
| 128 |
|
|
|
|
| 129 |
translator = get_translator(source_lang)
|
|
|
|
|
|
|
| 130 |
modified_text, replacements = protect_terms(text, PROTECTED_TERMS)
|
| 131 |
|
|
|
|
|
|
|
| 132 |
def _translate_task():
|
| 133 |
return translator(modified_text, max_length=512, num_beams=4)
|
| 134 |
|
| 135 |
result = await asyncio.to_thread(_translate_task)
|
| 136 |
translated_text = result[0]["translation_text"]
|
|
|
|
|
|
|
| 137 |
final_text = restore_terms(translated_text, replacements)
|
| 138 |
|
| 139 |
return TranslationResponse(translated_text=final_text, source_language=source_lang)
|
|
|
|
| 140 |
except HTTPException as e:
|
| 141 |
+
raise e
|
| 142 |
except Exception as e:
|
| 143 |
logger.error(f"Terjadi kesalahan saat translasi: {str(e)}")
|
| 144 |
raise HTTPException(status_code=500, detail=f"Proses translasi gagal: {str(e)}")
|
| 145 |
|
| 146 |
@app.post("/translate", response_model=TranslationResponse)
|
| 147 |
async def translate_api(request: TranslationRequest):
|
|
|
|
| 148 |
return await perform_translation(request.text, request.source_lang_override)
|
| 149 |
|
| 150 |
@app.get("/health")
|
| 151 |
async def health_check():
|
| 152 |
return {"status": "healthy", "loaded_models": list(translators.keys())}
|
| 153 |
|
| 154 |
+
# --- Handler Gradio Async ---
|
|
|
|
| 155 |
async def translate_gradio(text: str, source_lang: str = "auto"):
|
|
|
|
| 156 |
if not text or not text.strip():
|
| 157 |
return "Masukkan teks untuk diterjemahkan.", "N/A"
|
|
|
|
| 158 |
try:
|
| 159 |
source_lang_param = source_lang if source_lang != "auto" else None
|
| 160 |
result = await perform_translation(text, source_lang_param)
|
| 161 |
return result.translated_text, result.source_language or "Unknown"
|
|
|
|
| 162 |
except HTTPException as e:
|
| 163 |
return f"Error: {e.detail}", "Error"
|
| 164 |
except Exception as e:
|
| 165 |
return f"Error: {str(e)}", "Error"
|
| 166 |
|
| 167 |
+
# --- UI Gradio ---
|
|
|
|
| 168 |
def create_gradio_interface():
|
| 169 |
with gr.Blocks(
|
| 170 |
title="Multi-Language Translation Service",
|
|
|
|
| 176 |
Terjemahkan teks dari **Thai**, **Jepang**, **Mandarin**, atau **Vietnam** ke **Inggris**.
|
| 177 |
✨ Fitur: Deteksi bahasa otomatis • Perlindungan istilah • Model Helsinki-NLP yang cepat.
|
| 178 |
""")
|
|
|
|
| 179 |
with gr.Row():
|
| 180 |
with gr.Column(scale=1):
|
| 181 |
text_input = gr.Textbox(label="📝 Input Text", placeholder="Enter text to translate...", lines=6, max_lines=10)
|
|
|
|
| 188 |
value="auto", label="Source Language"
|
| 189 |
)
|
| 190 |
translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg")
|
|
|
|
| 191 |
with gr.Column(scale=1):
|
| 192 |
output_text = gr.Textbox(label="🎯 Translation Result", lines=6, max_lines=10, interactive=False)
|
| 193 |
detected_lang = gr.Textbox(label="🔍 Detected Language", interactive=False, max_lines=1)
|
|
|
|
| 194 |
gr.Examples(
|
| 195 |
examples=[
|
| 196 |
["สวัสดีครับ ยินดีที่ได้รู้จัก การพัฒนา 2030 Aspirations เป็นเป้าหมายสำคัญ", "th"],
|
|
|
|
| 200 |
],
|
| 201 |
inputs=[text_input, lang_dropdown],
|
| 202 |
outputs=[output_text, detected_lang],
|
| 203 |
+
fn=partial(asyncio.run, translate_gradio), # Agar bisa dipakai di contoh
|
| 204 |
cache_examples=False
|
| 205 |
)
|
|
|
|
|
|
|
| 206 |
translate_btn.click(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
|
| 207 |
text_input.submit(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
|
|
|
|
| 208 |
return interface
|
| 209 |
|
| 210 |
+
# Mount Gradio ke FastAPI
|
| 211 |
gradio_app = create_gradio_interface()
|
| 212 |
+
app = gr.mount_gradio_app(app, gradio_app, path="/")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|