{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import re\n", "from updated_api import *\n", "from typing_extensions import Annotated\n", "import nltk\n", "from nltk.corpus import stopwords\n", "from nltk.stem import WordNetLemmatizer\n", "import string" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "nltk.download('punkt')\n", "nltk.download('stopwords')\n", "nltk.download('wordnet')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "patterns = {\n", " 'Unique Capsule': r\"\\b(((u(?:nit|niq).*?)\\s+(?:capsul))|(?:.*?uni.*?capsul))\",\n", " 'Refreshing Taste and Smell': r\"\\b((((ref|rif|rip|rep|ep|pre).*?)\\s+t(?:a|e|i|y)s(.*?)\\s+(sm|(?:.*?(sm|m)))(?:el|il|al|ol|.*?))|((?:in.*?)\\s+t(?:a|e|i|y)s.*?\\s+(.*?)(sm|m)(?:el|il|al|ol|ail|eal)))\",\n", " 'Benson & Hadges Breeze':r\"\\b((b|p|v|f)(?:(an|en|a|e)(?:s|ch|t)(?:on|an|en).*?)\\s+h(?:.*?)\\s+(b|p|v|f)(?:re|ee|e))\",\n", "}\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def nlp_bat(text):\n", " results = {}\n", " all_match = {}\n", " for name, pattern in patterns.items():\n", " matches = re.compile(pattern, text, re.IGNORECASE)\n", " m = {name:matches}\n", " all_match.update(m)\n", " count = len(matches)\n", " results[name] = count\n", " \n", " \n", " print(all_match) \n", "\n", " return results" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "filename = input(\"Give Audio Name: \")\n", "audio_url = upload(filename)\n", "\n", "detect_audio(audio_url, 'file_title')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "patterns = {\n", " 'Unique Capsule': r\"unique capsul|unit capsul|uniq...capsul|uni..capsul\\b\",\n", " 'Refreshing Taste and Smell': r\"refreshing taste smell|refreshing taste milk|refreshing test smell|ripe singh taste|repressing taste smell\\b\",\n", " 'Benson & Hadges Breeze': r\"benson.hage.bree|benson.hage..bree|banson.hage.bree|banson.hage..bree|benson he.es breez|benson hess breez|benson he..e breez|benson haze breez|benson hezes bee|banson breez|banson hedge breathe|banson hedge bridge|benson hedge bre|benson hedge bridge| benson haze brie|banson haze breeze|banson hedge breez\\b\",\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "patterns = {\n", " 'Unique Capsule': r\"\\b(?:uni(?:que)?|unit|uniq\\.+|uni\\.+)\\s*capsul\",\n", " 'Refreshing Taste and Smell': r\"\\b(?:refreshing|ripe|repressing)\\s+(?:taste\\s+(?:smell|milk)|test\\s+smell)\",\n", " 'Benson & Hadges Breeze':r\"\\b(?:benson\\s+h(?:ess|aze|ezes|edge)\\s+breez|banson\\s+(?:haze\\s+breez|hedge\\s+(?:breez|bre))|benson\\s+h(?:aze\\s+brie|edge\\s+bridge))\",\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "patterns = {\n", " 'Unique Capsule': r\"\\b(?:uni(?:que)?|unit|uniq\\.+|uni\\.+)\\s*capsul\",\n", " 'Refreshing Taste and Smell': r\"\\b(?:refreshing|ripe|repressing)\\s+(?:taste\\s+(?:smell|milk)|test\\s+smell)\",\n", " 'Benson & Hadges Breeze':r\"\\b(?:((b|p|v|f)(a|e).*?son)\\s+(h(?:.*?))\\s+(br))\",\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def nlp_bat(text):\n", " results = {}\n", " all_match = {}\n", " for name, pattern in patterns.items():\n", " matches = re.findall(pattern, text, re.IGNORECASE)\n", " m = {name:matches}\n", " all_match.update(m)\n", " count = len(matches)\n", " results[name] = count\n", " \n", " \n", " print(all_match) \n", "\n", " return results" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "text = \"Clean text : apnea janet kushihaban banson hages niyashe ekti unique capsule offer panson hages bridge panson hages breeze air capsule atom agnoton tharna refreshing taste smell darn offer tea trial cora jonu apnea ekti trial kit nitaparin thunobat\"\n", "\n", "nlp_bat(text)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "old_patterns = {\n", " 'Unique Capsule': r\"\\b(?:uni(?:que)?|unit|uniq\\.+|uni\\.+)\\s*capsul\",\n", " 'Refreshing Taste and Smell': r\"\\b(?:refreshing|ripe|repressing)\\s+(?:taste\\s+(?:smell|milk)|test\\s+smell)\",\n", " 'Benson & Hadges Breeze': r\"\\b(?:((b|p|v|f)(a|e).*?son)\\s+(h(?:.*?))\\s+(br))\",\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "newPattern = {\n", " 'Unique Capsule': r\"\\b(((u(?:nit|niq).*?)\\s+(?:capsul))|(?:.*?uni.*?capsul))\",\n", " 'Refreshing Taste and Smell': r\"\\b((((ref|rif|rip|rep|ep|pre).*?)\\s+t(?:a|e|i|y)s(.*?)\\s+(sm|(?:.*?(sm|m)))(?:el|il|al|ol|.*?))|((?:in.*?)\\s+t(?:a|e|i|y)s.*?\\s+(.*?)(sm|m)(?:el|il|al|ol|ail|eal)))\",\n", " 'Benson & Hadges Breeze':r\"\\b((b|p|v|f)(?:(an|en|a|e)(?:s|ch|t)(?:on|an|en).*?)\\s+h(?:.*?)\\s+(b|p|v|f)(?:re|ee|e|ri))\",\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install assemblyai" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import assemblyai as aai\n", "from updated_api import *\n", "\n", "# Replace with your API key\n", "aai.settings.api_key = \"5bd662961e754f148a581e0070f09c88\"\n", "\n", "# URL of the file to transcribe\n", "FILE_URL = \"https://form.hedigital.online/file-1702199439520-529630625.mp4\"\n", "\n", "# You can also transcribe a local file by passing in a file path\n", "# FILE_URL = './path/to/file.mp3'\n", "\n", "transcriber = aai.Transcriber()\n", "transcript = transcriber.transcribe(FILE_URL)\n", "print(transcript)\n" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Processing Audio\n", "town saw revival new business opening\n", "{'Unique Capsule': [], 'Refreshing Taste and Smell': [], 'Benson & Hadges Breeze': []}\n" ] }, { "data": { "text/plain": [ "{'Unique Capsule': 0,\n", " 'Refreshing Taste and Smell': 0,\n", " 'Benson & Hadges Breeze': 0}" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import re\n", "from nlp_api import *\n", "from typing_extensions import Annotated\n", "import string\n", "\n", "patterns = {\n", " 'Unique Capsule': r\"\\b(((u(?:nit|niq).*?)\\s+(?:capsul))|(?:.*?uni.*?capsul))\",\n", " 'Refreshing Taste and Smell': r\"\\b((((ref|rif|rip|rep|ep|pre).*?)\\s+t(?:a|e|i|y)s(.*?)\\s+(sm|(?:.*?(sm|m)))(?:el|il|al|ol|.*?))|((?:in.*?)\\s+t(?:a|e|i|y)s.*?\\s+(.*?)(sm|m)(?:el|il|al|ol|ail|eal)))\",\n", " 'Benson & Hadges Breeze':r\"\\b((b|p|v|f)(?:(an|en|a|e)(?:s|ch|t)(?:on|an|en).*?)\\s+h(?:.*?)\\s+(b|p|v|f)(?:re|ee|e|ri))\",\n", "}\n", "\n", "\n", " # Find and count matches for each pattern\n", "def nlp_bat(text):\n", " results = {}\n", " all_match = {}\n", " for name, pattern in patterns.items():\n", " matches = re.findall(pattern, text, re.IGNORECASE)\n", " m = {name:matches}\n", " all_match.update(m)\n", " count = len(matches)\n", " results[name] = count\n", " \n", " \n", " print(all_match) \n", "\n", " return results\n", "\n", "async def lemmatize_and_clean(text):\n", " words = nltk.word_tokenize(text.lower())\n", " words = [word for word in words if word.isalpha() and word not in set(stopwords.words('english'))]\n", " lemmatizer = WordNetLemmatizer()\n", " words = [await asyncio.to_thread(lemmatizer.lemmatize, word) for word in words]\n", " return ' '.join(words)\n", "\n", "\n", "# # input\n", "filename = input(\"Give Audio Name: \")\n", "audio_url = upload(filename)\n", "\n", "\n", "# # transcribe\n", "detect_audio(audio_url, 'file_title')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "patterns = {\n", " \"Clear\":r\"\\b((c|k)(:?l..r))\",\n", " \"Confidence\":r\"\\b(((f|t|th)(u|i))(?:(|c|q|k|ck)(?:(a|e|o))))|(((f|t|th)(u|i))(?:(|c|q|k|ck)(?:a|.a)))\",\n", " \"Revival\":r\"((a)(?:(sh|yush|rch)))\",\n", " \"Anti-Dandruff\":r\"((al)(?:.*?(k|q)(?:a|i|o|u)(?:(s|sh))))|((k|q)(?:a|i|o|u)(?:(s|sh)((?:a|o))))|((k|q)(?:a|i|o|u)(?:(s|sh)(r(?:a|o|u))))\",\n", "}" ] } ], "metadata": { "kernelspec": { "display_name": "nlpBat", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }