Spaces:

himahande45
/

indicvox-hindi-tamil-codeswitching-tts

Running

App Files Files Community

indicvox-hindi-tamil-codeswitching-tts / code_switch_prompts.json

himahande45

Add IndicVox paper demo Space

402a61f verified 11 days ago

raw

history blame contribute delete

7.72 kB

	{
	"hi_en": [
	{
	"id": "hi_en_001",
	"text": "आज morning standup में हमने Hindi और English prompts पर ASR output compare किया।"
	},
	{
	"id": "hi_en_002",
	"text": "कल client demo से पहले तुम latest checkpoint का audio sample एक बार verify कर लो।"
	},
	{
	"id": "hi_en_003",
	"text": "अगर final report ready है तो उसे shared drive में upload कर दो।"
	},
	{
	"id": "hi_en_004",
	"text": "ये model normal sentences अच्छा बोलता है, लेकिन code-switch parts में अभी भी थोड़ा hesitation आता है।"
	},
	{
	"id": "hi_en_005",
	"text": "मुझे लगता है कि speaker similarity के लिए हमें clean reference clip use करना चाहिए।"
	},
	{
	"id": "hi_en_006",
	"text": "तुमने meeting notes में Tamil section add किया या वो अभी pending है?"
	},
	{
	"id": "hi_en_007",
	"text": "आज lab में GPU free है, इसलिए full evaluation run अभी start कर देते हैं।"
	},
	{
	"id": "hi_en_008",
	"text": "अगर transcript में punctuation ज्यादा हो तो Whisper कभी कभी extra words insert कर देता है।"
	},
	{
	"id": "hi_en_009",
	"text": "इस experiment के लिए मैंने short reference audio चुना ताकि cloning stable रहे।"
	},
	{
	"id": "hi_en_010",
	"text": "हम paper में monolingual results और code-switch results अलग tables में दिखाएँगे।"
	},
	{
	"id": "hi_en_011",
	"text": "please final plots save कर लेना, वरना thesis draft फिर से update करना पड़ेगा।"
	},
	{
	"id": "hi_en_012",
	"text": "आज के test set में proper nouns, news style और casual conversation तीनों mix किए गए हैं।"
	},
	{
	"id": "hi_en_013",
	"text": "अगर base model Tamil शब्द गलत बोलता है तो LoRA adaptation का effect तुरंत दिख जाएगा।"
	},
	{
	"id": "hi_en_014",
	"text": "मैंने summary sheet में WER, CER, switch-WER और speaker similarity सब add कर दिया है।"
	},
	{
	"id": "hi_en_015",
	"text": "आज evening तक तुम generated audio folders को model-wise sort कर दो।"
	},
	{
	"id": "hi_en_016",
	"text": "meeting के बाद हम ASR transcripts manually spot-check भी करेंगे ताकि obvious errors miss न हों।"
	},
	{
	"id": "hi_en_017",
	"text": "ये checkpoint short prompts पर ठीक है, पर long mixed sentences में इसकी rhythm थोड़ी uneven लगती है।"
	},
	{
	"id": "hi_en_018",
	"text": "अगर inference time ज्यादा हुआ तो पहले pilot run करेंगे और फिर full batch launch करेंगे।"
	},
	{
	"id": "hi_en_019",
	"text": "reference speaker clean है, लेकिन generated output में English words का stress अभी consistent नहीं है।"
	},
	{
	"id": "hi_en_020",
	"text": "इस बार final appendix में example prompts, transcripts और metric formulas तीनों include करना।"
	}
	],
	"ta_en": [
	{
	"id": "ta_en_001",
	"text": "நேத்து team meetingல புதிய checkpoint பற்றி detailedஆ பேசினோம்."
	},
	{
	"id": "ta_en_002",
	"text": "இந்த experimentக்கு clean reference audio use பண்ணணும், இல்லனா output quality drop ஆகும்."
	},
	{
	"id": "ta_en_003",
	"text": "final report ready ஆனதும் அதை shared folderல upload பண்ணிடு."
	},
	{
	"id": "ta_en_004",
	"text": "இந்த model Tamil words நல்லா பேசுது, ஆனா English switch வரும் இடங்களில் இன்னும் slight hesitation இருக்கு."
	},
	{
	"id": "ta_en_005",
	"text": "speaker similarity score stable ஆகணும்னா same voice reference தொடர்ந்து use பண்ணணும்."
	},
	{
	"id": "ta_en_006",
	"text": "இன்று full evaluation run start பண்ணலாம், ஏன்னா GPU slot இப்போ free இருக்கு."
	},
	{
	"id": "ta_en_007",
	"text": "Whisper transcriptல punctuation இல்லாதப்போ சில code-switch words betterஆ capture ஆகுது."
	},
	{
	"id": "ta_en_008",
	"text": "paper tableல monolingual Tamil resultsவும் Tamil-English resultsவும் separateஆ காட்டணும்."
	},
	{
	"id": "ta_en_009",
	"text": "இந்த promptல proper noun, news style, casual speech மூன்றும் mixedஆ இருக்கு."
	},
	{
	"id": "ta_en_010",
	"text": "latest checkpoint load பண்ணதுக்குப் பிறகு ஒரு short sanity test முதலில் run பண்ணலாம்."
	},
	{
	"id": "ta_en_011",
	"text": "please generated audio files எல்லாம் model-wise sort பண்ணி metrics folderக்குள் move பண்ணு."
	},
	{
	"id": "ta_en_012",
	"text": "இந்த setupல base modelக்கு Tamil pronunciation கொஞ்சம் weakஆ இருந்தா LoRA gain clearஆ தெரியும்."
	},
	{
	"id": "ta_en_013",
	"text": "summary sheetல WER, CER, switch-WER, speaker similarity எல்லாமே சேர்க்கணும்."
	},
	{
	"id": "ta_en_014",
	"text": "meeting முடிஞ்சதும் manual spot-check பண்ணி obvious ASR mistakes இருக்கா என்று பார்க்கலாம்."
	},
	{
	"id": "ta_en_015",
	"text": "short promptsல output cleanஆ இருக்கு, ஆனா long mixed sentenceல rhythm கொஞ்சம் unevenஆ இருக்கு."
	},
	{
	"id": "ta_en_016",
	"text": "if the plots look clean, appendixல example promptsமும் generated transcriptsமும் add பண்ணலாம்."
	},
	{
	"id": "ta_en_017",
	"text": "இந்த reference clip calmஆ இருக்குது, அதனால் generated voiceவும் naturalஆ வர வாய்ப்பு அதிகம்."
	},
	{
	"id": "ta_en_018",
	"text": "tonightக்குள் full batch finish ஆயிடுச்சுனா நாளைக்கு paper draftல numbers insert பண்ணலாம்."
	},
	{
	"id": "ta_en_019",
	"text": "speaker clone நல்லா இருக்கு, ஆனால் English stress pattern இன்னும் fully consistent இல்ல."
	},
	{
	"id": "ta_en_020",
	"text": "இந்த round முடிஞ்சதும் next stepஆ human listening test plan பண்ணலாம்."
	}
	]
	}