boffire commited on
Commit
1e8cb8a
·
verified ·
1 Parent(s): 4e24c99

Update src/gradio_app.py

Browse files
Files changed (1) hide show
  1. src/gradio_app.py +7 -4
src/gradio_app.py CHANGED
@@ -25,6 +25,9 @@ DICT_DIR = os.path.join(os.path.dirname(__file__), "dicts")
25
  AFF_PATH = os.path.join(DICT_DIR, "kab.aff")
26
  DIC_PATH = os.path.join(DICT_DIR, "kab.dic")
27
 
 
 
 
28
  _hunspell_dict = None
29
 
30
  def get_hunspell():
@@ -45,7 +48,7 @@ def correct_word(word: str) -> str:
45
  dic = get_hunspell()
46
 
47
  # Nettoyage: séparer ponctuation
48
- stripped = word.strip(".,!?;:\"'()[]{}«»—–-").lower()
49
  if not stripped:
50
  return word
51
 
@@ -71,8 +74,8 @@ def correct_word(word: str) -> str:
71
  best = best[0].upper() + best[1:]
72
 
73
  # Restaurer la ponctuation attachée
74
- prefix_len = len(word) - len(word.lstrip(".,!?;:"'()[]{}«»—–-"))
75
- suffix_len = len(word) - len(word.rstrip(".,!?;:"'()[]{}«»—–-"))
76
  prefix = word[:prefix_len]
77
  suffix = word[-suffix_len:] if suffix_len > 0 else ""
78
 
@@ -97,7 +100,7 @@ def spellcheck_transcript(text: str, auto_correct: bool = True) -> tuple[str, li
97
  else:
98
  # Mode suggestion seule: on ne corrige pas, on signale juste
99
  dic = get_hunspell()
100
- stripped = word.strip(".,!?;:\"'()[]{}«»—–-").lower()
101
  corrected = word if (not stripped or dic.lookup(stripped)) else word + " [?]"
102
 
103
  corrected_words.append(corrected)
 
25
  AFF_PATH = os.path.join(DICT_DIR, "kab.aff")
26
  DIC_PATH = os.path.join(DICT_DIR, "kab.dic")
27
 
28
+ # Caractères de ponctuation à stripper (définis avec des escapes Unicode pour éviter les problèmes d'encodage)
29
+ PUNCTUATION_CHARS = '.,!?;:"\'()[]{}«»—–-'
30
+
31
  _hunspell_dict = None
32
 
33
  def get_hunspell():
 
48
  dic = get_hunspell()
49
 
50
  # Nettoyage: séparer ponctuation
51
+ stripped = word.strip(PUNCTUATION_CHARS).lower()
52
  if not stripped:
53
  return word
54
 
 
74
  best = best[0].upper() + best[1:]
75
 
76
  # Restaurer la ponctuation attachée
77
+ prefix_len = len(word) - len(word.lstrip(PUNCTUATION_CHARS))
78
+ suffix_len = len(word) - len(word.rstrip(PUNCTUATION_CHARS))
79
  prefix = word[:prefix_len]
80
  suffix = word[-suffix_len:] if suffix_len > 0 else ""
81
 
 
100
  else:
101
  # Mode suggestion seule: on ne corrige pas, on signale juste
102
  dic = get_hunspell()
103
+ stripped = word.strip(PUNCTUATION_CHARS).lower()
104
  corrected = word if (not stripped or dic.lookup(stripped)) else word + " [?]"
105
 
106
  corrected_words.append(corrected)