boffire commited on
Commit
41a4e04
·
verified ·
1 Parent(s): 2181426

Added MAX_INPUT_LENGTH

Browse files

Added MAX_INPUT_LENGTH = 10000

Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -4,6 +4,9 @@ import regex
4
  import gradio as gr
5
  import os
6
 
 
 
 
7
  # Preprocessing patterns
8
  NONWORD_REPLACE_STR = r"[^\p{Word}\p{Zs}]|\d"
9
  NONWORD_REPLACE_PATTERN = regex.compile(NONWORD_REPLACE_STR)
@@ -34,6 +37,10 @@ def predict_language(text, top_k=3, threshold=0.5):
34
  top_k: Number of top predictions to return (1-10)
35
  threshold: Confidence threshold (0.0-1.0)
36
  """
 
 
 
 
37
  if not text or not text.strip():
38
  return "Please enter some text to analyze."
39
 
@@ -79,7 +86,8 @@ with gr.Blocks(title="OpenLID-v3 Language Identification") as demo:
79
  label="Input Text",
80
  placeholder="Enter text to identify its language...",
81
  lines=5,
82
- max_lines=10
 
83
  )
84
  with gr.Row():
85
  top_k = gr.Slider(
@@ -117,12 +125,13 @@ with gr.Blocks(title="OpenLID-v3 Language Identification") as demo:
117
  label="Try these examples (Kabyle and Occitan featured)"
118
  )
119
 
120
- gr.Markdown("""
121
  ### Tips for best results:
122
  - Text is automatically preprocessed (lowercased, normalized)
123
  - Longer texts generally give more accurate predictions
124
  - The model supports 194+ language varieties
125
  - Use higher thresholds to filter out uncertain predictions
 
126
  """)
127
 
128
  # Event handlers
 
4
  import gradio as gr
5
  import os
6
 
7
+ # Constants
8
+ MAX_INPUT_LENGTH = 10000 # Maximum characters allowed
9
+
10
  # Preprocessing patterns
11
  NONWORD_REPLACE_STR = r"[^\p{Word}\p{Zs}]|\d"
12
  NONWORD_REPLACE_PATTERN = regex.compile(NONWORD_REPLACE_STR)
 
37
  top_k: Number of top predictions to return (1-10)
38
  threshold: Confidence threshold (0.0-1.0)
39
  """
40
+ # Check input length first
41
+ if len(text) > MAX_INPUT_LENGTH:
42
+ return f"**Error**: Input too long ({len(text):,} characters). Maximum allowed is {MAX_INPUT_LENGTH:,} characters."
43
+
44
  if not text or not text.strip():
45
  return "Please enter some text to analyze."
46
 
 
86
  label="Input Text",
87
  placeholder="Enter text to identify its language...",
88
  lines=5,
89
+ max_lines=10,
90
+ max_length=MAX_INPUT_LENGTH # Also enforce in UI
91
  )
92
  with gr.Row():
93
  top_k = gr.Slider(
 
125
  label="Try these examples (Kabyle and Occitan featured)"
126
  )
127
 
128
+ gr.Markdown(f"""
129
  ### Tips for best results:
130
  - Text is automatically preprocessed (lowercased, normalized)
131
  - Longer texts generally give more accurate predictions
132
  - The model supports 194+ language varieties
133
  - Use higher thresholds to filter out uncertain predictions
134
+ - **Maximum input length: {MAX_INPUT_LENGTH:,} characters**
135
  """)
136
 
137
  # Event handlers