JakgritB commited on
Commit
6eb98ab
·
1 Parent(s): caa2f69

fix(backend): localize demo highlight text

Browse files
backend/app/services/highlight.py CHANGED
@@ -138,7 +138,7 @@ Transcript:
138
  start_seconds=start,
139
  end_seconds=end,
140
  title=self._title_for(segment.text),
141
- reason=f"Matches the {profile.clip_style} style for a {niche} audience.",
142
  score=round(score, 1),
143
  subtitle_text=segment.text,
144
  metadata={"model": "heuristic-fallback"},
@@ -147,12 +147,128 @@ Transcript:
147
  return sorted(clips, key=lambda clip: clip.start_seconds)
148
 
149
  def _title_for(self, text: str) -> str:
150
- words = re.sub(r"[^A-Za-z0-9 ]+", "", text).split()
151
- title = " ".join(words[:7])
152
- return title or "Highlight"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
 
155
  def _effective_niche(profile: ChannelProfile) -> str:
156
  if profile.niche.lower() == "other" and profile.niche_custom:
157
  return profile.niche_custom
158
  return profile.niche
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  start_seconds=start,
139
  end_seconds=end,
140
  title=self._title_for(segment.text),
141
+ reason=self._reason_for(profile, niche),
142
  score=round(score, 1),
143
  subtitle_text=segment.text,
144
  metadata={"model": "heuristic-fallback"},
 
147
  return sorted(clips, key=lambda clip: clip.start_seconds)
148
 
149
  def _title_for(self, text: str) -> str:
150
+ clean = re.sub(r"\s+", " ", text).strip(" \t\r\n.,!?;:()[]{}\"'")
151
+ words = clean.split()
152
+ if len(words) > 1:
153
+ title = " ".join(words[:7])
154
+ else:
155
+ title = clean[:48]
156
+ return title[:72].rstrip() or "Highlight"
157
+
158
+ def _reason_for(self, profile: ChannelProfile, niche: str) -> str:
159
+ language = profile.primary_language.lower()
160
+ style = _localized_profile_word(profile.clip_style, language, "style")
161
+ niche_label = _localized_profile_word(niche, language, "niche")
162
+ if "thai" in language:
163
+ return f"ตรงกับสไตล์ {style} สำหรับผู้ชมช่องแนว {niche_label}"
164
+ if "japanese" in language:
165
+ return f"{niche_label} の視聴者に合う {style} スタイルの候補です。"
166
+ if "chinese" in language:
167
+ return f"符合 {niche_label} 受众期待的 {style} 风格。"
168
+ if "korean" in language:
169
+ return f"{niche_label} 시청자에게 맞는 {style} 스타일의 후보입니다."
170
+ return f"Matches the {profile.clip_style} style for a {niche} audience."
171
 
172
 
173
  def _effective_niche(profile: ChannelProfile) -> str:
174
  if profile.niche.lower() == "other" and profile.niche_custom:
175
  return profile.niche_custom
176
  return profile.niche
177
+
178
+
179
+ def _localized_profile_word(value: str, language: str, group: str) -> str:
180
+ key = value.lower().replace(" ", "_")
181
+ localized = {
182
+ "thai": {
183
+ "niche": {
184
+ "education": "การศึกษา",
185
+ "gaming": "เกม",
186
+ "podcast": "พอดแคสต์",
187
+ "commentary": "เล่า/วิเคราะห์",
188
+ "cars": "รถยนต์",
189
+ "beauty": "บิวตี้",
190
+ "fitness": "ฟิตเนส",
191
+ "finance": "การเงิน",
192
+ "tech": "เทคโนโลยี",
193
+ "lifestyle": "ไลฟ์สไตล์",
194
+ "music": "ดนตรี",
195
+ },
196
+ "style": {
197
+ "informative": "ให้ข้อมูล",
198
+ "funny": "ตลก",
199
+ "dramatic": "ดราม่า",
200
+ "educational": "สอนเข้าใจง่าย",
201
+ "commentary": "วิเคราะห์",
202
+ },
203
+ },
204
+ "japanese": {
205
+ "niche": {
206
+ "education": "教育",
207
+ "gaming": "ゲーム",
208
+ "podcast": "ポッドキャスト",
209
+ "commentary": "解説",
210
+ "cars": "車",
211
+ "beauty": "美容",
212
+ "fitness": "フィットネス",
213
+ "finance": "金融",
214
+ "tech": "テック",
215
+ "lifestyle": "ライフスタイル",
216
+ "music": "音楽",
217
+ },
218
+ "style": {
219
+ "informative": "情報性の高い",
220
+ "funny": "ユーモアのある",
221
+ "dramatic": "ドラマチックな",
222
+ "educational": "学びやすい",
223
+ "commentary": "解説型の",
224
+ },
225
+ },
226
+ "chinese": {
227
+ "niche": {
228
+ "education": "教育",
229
+ "gaming": "游戏",
230
+ "podcast": "播客",
231
+ "commentary": "解说",
232
+ "cars": "汽车",
233
+ "beauty": "美妆",
234
+ "fitness": "健身",
235
+ "finance": "金融",
236
+ "tech": "科技",
237
+ "lifestyle": "生活方式",
238
+ "music": "音乐",
239
+ },
240
+ "style": {
241
+ "informative": "信息量高",
242
+ "funny": "有趣",
243
+ "dramatic": "戏剧化",
244
+ "educational": "教学型",
245
+ "commentary": "评论型",
246
+ },
247
+ },
248
+ "korean": {
249
+ "niche": {
250
+ "education": "교육",
251
+ "gaming": "게임",
252
+ "podcast": "팟캐스트",
253
+ "commentary": "해설",
254
+ "cars": "자동차",
255
+ "beauty": "뷰티",
256
+ "fitness": "피트니스",
257
+ "finance": "금융",
258
+ "tech": "테크",
259
+ "lifestyle": "라이프스타일",
260
+ "music": "음악",
261
+ },
262
+ "style": {
263
+ "informative": "정보형",
264
+ "funny": "재미있는",
265
+ "dramatic": "극적인",
266
+ "educational": "교육형",
267
+ "commentary": "해설형",
268
+ },
269
+ },
270
+ }
271
+ for language_key, groups in localized.items():
272
+ if language_key in language:
273
+ return groups.get(group, {}).get(key, value)
274
+ return value
backend/app/services/transcription.py CHANGED
@@ -67,6 +67,7 @@ class WhisperTranscriber:
67
 
68
  def _demo_transcript(self, profile: ChannelProfile) -> list[TranscriptSegment]:
69
  style = profile.clip_style.lower()
 
70
  niche_value = (
71
  profile.niche_custom
72
  if profile.niche.lower() == "other" and profile.niche_custom
@@ -77,17 +78,12 @@ class WhisperTranscriber:
77
  profile.channel_description
78
  or "The creator wants clips that feel useful and easy to share."
79
  )
80
- lines = [
81
- "This opening sets up the main problem creators face when a long video hides the best moments.",
82
- "Here is the surprising mistake most teams make when they choose clips only by view count.",
83
- "The important question is simple: which moment would make someone stop scrolling right now?",
84
- f"For a {niche} channel, the answer changes because the audience expects a {style} rhythm.",
85
- f"The channel context is simple: {creator_context}",
86
- "This section has the clearest explanation and a strong before-and-after contrast.",
87
- "Then the guest reacts with a punchy line that works well as a short hook.",
88
- "A practical takeaway lands here, with enough context to stand alone as a sixty second clip.",
89
- "The final segment wraps the idea with a direct callout that is easy to subtitle.",
90
- ]
91
  segments: list[TranscriptSegment] = []
92
  cursor = 0.0
93
  for line in lines:
@@ -103,3 +99,163 @@ class WhisperTranscriber:
103
  )
104
  cursor = end
105
  return segments
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  def _demo_transcript(self, profile: ChannelProfile) -> list[TranscriptSegment]:
69
  style = profile.clip_style.lower()
70
+ language = profile.primary_language.lower()
71
  niche_value = (
72
  profile.niche_custom
73
  if profile.niche.lower() == "other" and profile.niche_custom
 
78
  profile.channel_description
79
  or "The creator wants clips that feel useful and easy to share."
80
  )
81
+ lines = _demo_lines(
82
+ language,
83
+ _localized_profile_word(niche, language, "niche"),
84
+ _localized_profile_word(style, language, "style"),
85
+ creator_context,
86
+ )
 
 
 
 
 
87
  segments: list[TranscriptSegment] = []
88
  cursor = 0.0
89
  for line in lines:
 
99
  )
100
  cursor = end
101
  return segments
102
+
103
+
104
+ def _demo_lines(language: str, niche: str, style: str, creator_context: str) -> list[str]:
105
+ if "thai" in language:
106
+ return [
107
+ "ช่วงเปิดนี้วางปัญหาหลักของครีเอเตอร์ เวลาวิดีโอยาวซ่อนช่วงที่ดีที่สุดไว้",
108
+ "นี่คือความผิดพลาดที่หลายทีมทำ คือเลือกคลิปจากยอดวิวอย่างเดียว",
109
+ "คำถามสำคัญคือ ช่วงไหนที่จะทำให้คนหยุดเลื่อนหน้าจอได้ทันที",
110
+ f"สำหรับช่องแนว {niche} คำตอบจะเปลี่ยน เพราะผู้ชมคาดหวังจังหวะที่ {style}",
111
+ f"บริบทของช่องคือ {creator_context}",
112
+ "ช่วงนี้อธิบายได้ชัดที่สุด และมีภาพเปรียบเทียบก่อนกับหลังที่แรง",
113
+ "จากนั้นแขกรับเชิญตอบสนองด้วยประโยคสั้นที่เหมาะมากสำหรับ hook",
114
+ "ตรงนี้มีข้อคิดที่เอาไปใช้ได้ทันที และยืนเป็นคลิปสั้นได้ด้วยตัวเอง",
115
+ "ช่วงท้ายสรุปไอเดียด้วยประโยคชัด ๆ ที่ทำซับได้ง่าย",
116
+ ]
117
+ if "japanese" in language:
118
+ return [
119
+ "この冒頭では、長い動画に最高の瞬間が埋もれてしまう問題を示しています。",
120
+ "多くのチームが再生数だけでクリップを選ぶという意外なミスをしています。",
121
+ "大事な問いは、この瞬間が今すぐスクロールを止めさせるかどうかです。",
122
+ f"{niche} チャンネルでは、視聴者が {style} なテンポを期待するため答えが変わります。",
123
+ f"チャンネルの文脈はこうです。{creator_context}",
124
+ "この部分は説明が最も明確で、ビフォーアフターの対比も強いです。",
125
+ "その後、ゲストが短いフックとして使いやすい一言で反応します。",
126
+ "ここには単独の短尺クリップとして成立する実用的な学びがあります。",
127
+ "最後の部分は字幕にしやすい明確な一言でアイデアをまとめます。",
128
+ ]
129
+ if "chinese" in language:
130
+ return [
131
+ "这个开头点出了创作者常遇到的问题:长视频里藏着最好的瞬间。",
132
+ "很多团队都会犯一个意外错误,只根据播放量来选择剪辑片段。",
133
+ "关键问题很简单:哪个瞬间能让观众立刻停下滑动?",
134
+ f"对于 {niche} 频道,答案会不同,因为观众期待 {style} 的节奏。",
135
+ f"频道背景是:{creator_context}",
136
+ "这一段解释最清楚,并且有很强的前后对比。",
137
+ "接着嘉宾给出一句有冲击力的回应,很适合作为短视频 hook。",
138
+ "这里有一个实用结论,足够独立成为一个短视频片段。",
139
+ "最后一段用一句清晰的话收束观点,也很适合做字幕。",
140
+ ]
141
+ if "korean" in language:
142
+ return [
143
+ "이 오프닝은 긴 영상 속 좋은 순간이 묻히는 문제를 보여줍니다.",
144
+ "많은 팀이 조회수만 보고 클립을 고르는 의외의 실수를 합니다.",
145
+ "핵심 질문은 간단합니다. 어떤 순간이 시청자의 스크롤을 멈추게 할까요?",
146
+ f"{niche} 채널에서는 시청자가 {style} 리듬을 기대하기 때문에 답이 달라집니다.",
147
+ f"채널 맥락은 다음과 같습니다. {creator_context}",
148
+ "이 부분은 설명이 가장 명확하고 전후 대비도 강합니다.",
149
+ "그다음 게스트가 짧은 훅으로 쓰기 좋은 강한 한마디를 합니다.",
150
+ "여기에는 단독 숏폼 클립으로도 충분한 실용적인 takeaway가 있습니다.",
151
+ "마지막 부분은 자막으로 만들기 쉬운 명확한 문장으로 아이디어를 정리합니다.",
152
+ ]
153
+ return [
154
+ "This opening sets up the main problem creators face when a long video hides the best moments.",
155
+ "Here is the surprising mistake most teams make when they choose clips only by view count.",
156
+ "The important question is simple: which moment would make someone stop scrolling right now?",
157
+ f"For a {niche} channel, the answer changes because the audience expects a {style} rhythm.",
158
+ f"The channel context is simple: {creator_context}",
159
+ "This section has the clearest explanation and a strong before-and-after contrast.",
160
+ "Then the guest reacts with a punchy line that works well as a short hook.",
161
+ "A practical takeaway lands here, with enough context to stand alone as a sixty second clip.",
162
+ "The final segment wraps the idea with a direct callout that is easy to subtitle.",
163
+ ]
164
+
165
+
166
+ def _localized_profile_word(value: str, language: str, group: str) -> str:
167
+ key = value.lower().replace(" ", "_")
168
+ localized = {
169
+ "thai": {
170
+ "niche": {
171
+ "education": "การศึกษา",
172
+ "gaming": "เกม",
173
+ "podcast": "พอดแคสต์",
174
+ "commentary": "เล่า/วิเคราะห์",
175
+ "cars": "รถยนต์",
176
+ "beauty": "บิวตี้",
177
+ "fitness": "ฟิตเนส",
178
+ "finance": "การเงิน",
179
+ "tech": "เทคโนโลยี",
180
+ "lifestyle": "ไลฟ์สไตล์",
181
+ "music": "ดนตรี",
182
+ },
183
+ "style": {
184
+ "informative": "ให้ข้อมูล",
185
+ "funny": "ตลก",
186
+ "dramatic": "ดราม่า",
187
+ "educational": "สอนเข้าใจง่าย",
188
+ "commentary": "วิเคราะห์",
189
+ },
190
+ },
191
+ "japanese": {
192
+ "niche": {
193
+ "education": "教育",
194
+ "gaming": "ゲーム",
195
+ "podcast": "ポッドキャスト",
196
+ "commentary": "解説",
197
+ "cars": "車",
198
+ "beauty": "美容",
199
+ "fitness": "フィットネス",
200
+ "finance": "金融",
201
+ "tech": "テック",
202
+ "lifestyle": "ライフスタイル",
203
+ "music": "音楽",
204
+ },
205
+ "style": {
206
+ "informative": "情報性の高い",
207
+ "funny": "ユーモアのある",
208
+ "dramatic": "ドラマチックな",
209
+ "educational": "学びやすい",
210
+ "commentary": "解説型の",
211
+ },
212
+ },
213
+ "chinese": {
214
+ "niche": {
215
+ "education": "教育",
216
+ "gaming": "游戏",
217
+ "podcast": "播客",
218
+ "commentary": "解说",
219
+ "cars": "汽车",
220
+ "beauty": "美妆",
221
+ "fitness": "健身",
222
+ "finance": "金融",
223
+ "tech": "科技",
224
+ "lifestyle": "生活方式",
225
+ "music": "音乐",
226
+ },
227
+ "style": {
228
+ "informative": "信息量高",
229
+ "funny": "有趣",
230
+ "dramatic": "戏剧化",
231
+ "educational": "教学型",
232
+ "commentary": "评论型",
233
+ },
234
+ },
235
+ "korean": {
236
+ "niche": {
237
+ "education": "교육",
238
+ "gaming": "게임",
239
+ "podcast": "팟캐스트",
240
+ "commentary": "해설",
241
+ "cars": "자동차",
242
+ "beauty": "뷰티",
243
+ "fitness": "피트니스",
244
+ "finance": "금융",
245
+ "tech": "테크",
246
+ "lifestyle": "라이프스타일",
247
+ "music": "음악",
248
+ },
249
+ "style": {
250
+ "informative": "정보형",
251
+ "funny": "재미있는",
252
+ "dramatic": "극적인",
253
+ "educational": "교육형",
254
+ "commentary": "해설형",
255
+ },
256
+ },
257
+ }
258
+ for language_key, groups in localized.items():
259
+ if language_key in language:
260
+ return groups.get(group, {}).get(key, value)
261
+ return value