Spaces:

boffire
/

LID-test

Running

boffire commited on Feb 17

Commit

41a4e04

verified ·

1 Parent(s): 2181426

Added MAX_INPUT_LENGTH

Added MAX_INPUT_LENGTH = 10000

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,6 +4,9 @@ import regex
 import gradio as gr
 import os
 # Preprocessing patterns
 NONWORD_REPLACE_STR = r"[^\p{Word}\p{Zs}]|\d"
 NONWORD_REPLACE_PATTERN = regex.compile(NONWORD_REPLACE_STR)
@@ -34,6 +37,10 @@ def predict_language(text, top_k=3, threshold=0.5):
         top_k: Number of top predictions to return (1-10)
         threshold: Confidence threshold (0.0-1.0)
     """
     if not text or not text.strip():
         return "Please enter some text to analyze."
@@ -79,7 +86,8 @@ with gr.Blocks(title="OpenLID-v3 Language Identification") as demo:
                 label="Input Text",
                 placeholder="Enter text to identify its language...",
                 lines=5,
-                max_lines=10
             )
             with gr.Row():
                 top_k = gr.Slider(
@@ -117,12 +125,13 @@ with gr.Blocks(title="OpenLID-v3 Language Identification") as demo:
         label="Try these examples (Kabyle and Occitan featured)"
     )
-    gr.Markdown("""
     ### Tips for best results:
     - Text is automatically preprocessed (lowercased, normalized)
     - Longer texts generally give more accurate predictions
     - The model supports 194+ language varieties
     - Use higher thresholds to filter out uncertain predictions
     """)
     # Event handlers

 import gradio as gr
 import os
+# Constants
+MAX_INPUT_LENGTH = 10000  # Maximum characters allowed
 # Preprocessing patterns
 NONWORD_REPLACE_STR = r"[^\p{Word}\p{Zs}]|\d"
 NONWORD_REPLACE_PATTERN = regex.compile(NONWORD_REPLACE_STR)
         top_k: Number of top predictions to return (1-10)
         threshold: Confidence threshold (0.0-1.0)
     """
+    # Check input length first
+    if len(text) > MAX_INPUT_LENGTH:
+        return f"**Error**: Input too long ({len(text):,} characters). Maximum allowed is {MAX_INPUT_LENGTH:,} characters."
     if not text or not text.strip():
         return "Please enter some text to analyze."
                 label="Input Text",
                 placeholder="Enter text to identify its language...",
                 lines=5,
+                max_lines=10,
+                max_length=MAX_INPUT_LENGTH  # Also enforce in UI
             )
             with gr.Row():
                 top_k = gr.Slider(
         label="Try these examples (Kabyle and Occitan featured)"
     )
+    gr.Markdown(f"""
     ### Tips for best results:
     - Text is automatically preprocessed (lowercased, normalized)
     - Longer texts generally give more accurate predictions
     - The model supports 194+ language varieties
     - Use higher thresholds to filter out uncertain predictions
+    - **Maximum input length: {MAX_INPUT_LENGTH:,} characters**
     """)
     # Event handlers