rakib72642 commited on
Commit
493d4f4
·
1 Parent(s): d2cc651

updated pattern

Browse files
Files changed (4) hide show
  1. README.md +12 -1
  2. get_text.ipynb +98 -74
  3. textData.md +49 -0
  4. updated_api.py +4 -4
README.md CHANGED
@@ -1,4 +1,15 @@
1
- # BAT NLP Campaign Audio Data
 
 
 
 
 
 
 
 
 
 
 
2
  config the ngrok auth: ngrok config add-authtoken 2Qm8hS1zPhVXiLjEdlI4738tLzF_2QJwGJMK5oTbQD33QSVXS
3
 
4
  ngrok http --domain=batnlp.ngrok.app 1111
 
1
+ # Arabic NLP
2
+ HuggingFace: https://huggingface.co/rakib72642/Arabic_NLP
3
+
4
+ sudo apt install iproute2 && sudo apt install wget && sudo apt install unzip && sudo apt install nvtop && sudo apt-get install git-lfs && sudo apt-get update && sudo apt-get install libgl1 && curl -s https://ngrok-agent.s3.amazonaws.com/ngrok.asc | sudo tee /etc/apt/trusted.gpg.d/ngrok.asc >/dev/null && echo "deb https://ngrok-agent.s3.amazonaws.com buster main" | sudo tee /etc/apt/sources.list.d/ngrok.list && sudo apt update && sudo apt install ngrok && ngrok config add-authtoken 2Qm8hS1zPhVXiLjEdlI4738tLzF_2QJwGJMK5oTbQD33QSVXS && sudo apt update && sudo apt upgrade && ngrok http --domain=hawkeyes.ngrok.app 8000
5
+
6
+ git clone https://huggingface.co/rakib72642/Arabic_NLP && cd Arabic_NLP && sudo apt update && sudo apt upgrade && python updated_api.py
7
+
8
+ cd Arabic_NLP && python updated_api.py
9
+
10
+ hypercorn updated_api:app --bind 127.0.0.1:8020 --workers 4
11
+
12
+
13
  config the ngrok auth: ngrok config add-authtoken 2Qm8hS1zPhVXiLjEdlI4738tLzF_2QJwGJMK5oTbQD33QSVXS
14
 
15
  ngrok http --domain=batnlp.ngrok.app 1111
get_text.ipynb CHANGED
@@ -2,26 +2,9 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 3,
6
  "metadata": {},
7
- "outputs": [
8
- {
9
- "ename": "ModuleNotFoundError",
10
- "evalue": "No module named 'certifi'",
11
- "output_type": "error",
12
- "traceback": [
13
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
14
- "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
15
- "\u001b[1;32md:\\Projects\\BAT\\BAT_NLP_Campaign\\get_text.ipynb Cell 1\u001b[0m line \u001b[0;36m2\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/Projects/BAT/BAT_NLP_Campaign/get_text.ipynb#W0sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mre\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/d%3A/Projects/BAT/BAT_NLP_Campaign/get_text.ipynb#W0sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mupdated_api\u001b[39;00m \u001b[39mimport\u001b[39;00m \u001b[39m*\u001b[39m\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/Projects/BAT/BAT_NLP_Campaign/get_text.ipynb#W0sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtyping_extensions\u001b[39;00m \u001b[39mimport\u001b[39;00m Annotated\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/Projects/BAT/BAT_NLP_Campaign/get_text.ipynb#W0sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnltk\u001b[39;00m\n",
16
- "File \u001b[1;32md:\\Projects\\BAT\\BAT_NLP_Campaign\\updated_api.py:9\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39muvicorn\u001b[39;00m\n\u001b[0;32m 8\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnltk\u001b[39;00m\n\u001b[1;32m----> 9\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mhttpx\u001b[39;00m\n\u001b[0;32m 10\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mfastapi\u001b[39;00m \u001b[39mimport\u001b[39;00m FastAPI\n\u001b[0;32m 11\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpydantic\u001b[39;00m \u001b[39mimport\u001b[39;00m BaseModel\n",
17
- "File \u001b[1;32mc:\\Users\\naymm\\miniconda3\\envs\\nlpBat\\lib\\site-packages\\httpx\\__init__.py:2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m__version__\u001b[39;00m \u001b[39mimport\u001b[39;00m __description__, __title__, __version__\n\u001b[1;32m----> 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_api\u001b[39;00m \u001b[39mimport\u001b[39;00m delete, get, head, options, patch, post, put, request, stream\n\u001b[0;32m 3\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_auth\u001b[39;00m \u001b[39mimport\u001b[39;00m Auth, BasicAuth, DigestAuth, NetRCAuth\n\u001b[0;32m 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_client\u001b[39;00m \u001b[39mimport\u001b[39;00m USE_CLIENT_DEFAULT, AsyncClient, Client\n",
18
- "File \u001b[1;32mc:\\Users\\naymm\\miniconda3\\envs\\nlpBat\\lib\\site-packages\\httpx\\_api.py:4\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mtyping\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mcontextlib\u001b[39;00m \u001b[39mimport\u001b[39;00m contextmanager\n\u001b[1;32m----> 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_client\u001b[39;00m \u001b[39mimport\u001b[39;00m Client\n\u001b[0;32m 5\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_config\u001b[39;00m \u001b[39mimport\u001b[39;00m DEFAULT_TIMEOUT_CONFIG\n\u001b[0;32m 6\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_models\u001b[39;00m \u001b[39mimport\u001b[39;00m Response\n",
19
- "File \u001b[1;32mc:\\Users\\naymm\\miniconda3\\envs\\nlpBat\\lib\\site-packages\\httpx\\_client.py:11\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m__version__\u001b[39;00m \u001b[39mimport\u001b[39;00m __version__\n\u001b[0;32m 10\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_auth\u001b[39;00m \u001b[39mimport\u001b[39;00m Auth, BasicAuth, FunctionAuth\n\u001b[1;32m---> 11\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_config\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m 12\u001b[0m DEFAULT_LIMITS,\n\u001b[0;32m 13\u001b[0m DEFAULT_MAX_REDIRECTS,\n\u001b[0;32m 14\u001b[0m DEFAULT_TIMEOUT_CONFIG,\n\u001b[0;32m 15\u001b[0m Limits,\n\u001b[0;32m 16\u001b[0m Proxy,\n\u001b[0;32m 17\u001b[0m Timeout,\n\u001b[0;32m 18\u001b[0m )\n\u001b[0;32m 19\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_decoders\u001b[39;00m \u001b[39mimport\u001b[39;00m SUPPORTED_DECODERS\n\u001b[0;32m 20\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_exceptions\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m 21\u001b[0m InvalidURL,\n\u001b[0;32m 22\u001b[0m RemoteProtocolError,\n\u001b[0;32m 23\u001b[0m TooManyRedirects,\n\u001b[0;32m 24\u001b[0m request_context,\n\u001b[0;32m 25\u001b[0m )\n",
20
- "File \u001b[1;32mc:\\Users\\naymm\\miniconda3\\envs\\nlpBat\\lib\\site-packages\\httpx\\_config.py:7\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mtyping\u001b[39;00m\n\u001b[0;32m 5\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpathlib\u001b[39;00m \u001b[39mimport\u001b[39;00m Path\n\u001b[1;32m----> 7\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mcertifi\u001b[39;00m\n\u001b[0;32m 9\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_compat\u001b[39;00m \u001b[39mimport\u001b[39;00m set_minimum_tls_version_1_2\n\u001b[0;32m 10\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m_models\u001b[39;00m \u001b[39mimport\u001b[39;00m Headers\n",
21
- "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'certifi'"
22
- ]
23
- }
24
- ],
25
  "source": [
26
  "import re\n",
27
  "from updated_api import *\n",
@@ -45,7 +28,7 @@
45
  },
46
  {
47
  "cell_type": "code",
48
- "execution_count": 15,
49
  "metadata": {},
50
  "outputs": [],
51
  "source": [
@@ -58,7 +41,7 @@
58
  },
59
  {
60
  "cell_type": "code",
61
- "execution_count": 16,
62
  "metadata": {},
63
  "outputs": [],
64
  "source": [
@@ -80,20 +63,9 @@
80
  },
81
  {
82
  "cell_type": "code",
83
- "execution_count": 17,
84
  "metadata": {},
85
- "outputs": [
86
- {
87
- "data": {
88
- "text/plain": [
89
- "<coroutine object detect_audio at 0x00000255D1384900>"
90
- ]
91
- },
92
- "execution_count": 17,
93
- "metadata": {},
94
- "output_type": "execute_result"
95
- }
96
- ],
97
  "source": [
98
  "filename = input(\"Give Audio Name: \")\n",
99
  "audio_url = upload(filename)\n",
@@ -103,7 +75,7 @@
103
  },
104
  {
105
  "cell_type": "code",
106
- "execution_count": 6,
107
  "metadata": {},
108
  "outputs": [],
109
  "source": [
@@ -116,7 +88,7 @@
116
  },
117
  {
118
  "cell_type": "code",
119
- "execution_count": 7,
120
  "metadata": {},
121
  "outputs": [],
122
  "source": [
@@ -129,7 +101,7 @@
129
  },
130
  {
131
  "cell_type": "code",
132
- "execution_count": 8,
133
  "metadata": {},
134
  "outputs": [],
135
  "source": [
@@ -149,7 +121,7 @@
149
  },
150
  {
151
  "cell_type": "code",
152
- "execution_count": 9,
153
  "metadata": {},
154
  "outputs": [],
155
  "source": [
@@ -171,29 +143,9 @@
171
  },
172
  {
173
  "cell_type": "code",
174
- "execution_count": 10,
175
  "metadata": {},
176
- "outputs": [
177
- {
178
- "name": "stdout",
179
- "output_type": "stream",
180
- "text": [
181
- "{'Unique Capsule': ['unique capsul'], 'Refreshing Taste and Smell': ['refreshing taste smell'], 'Benson & Hadges Breeze': [('banson', 'b', 'a', 'hages niyashe ekti unique capsule offer panson hages', 'br'), ('panson', 'p', 'a', 'hages', 'br')]}\n"
182
- ]
183
- },
184
- {
185
- "data": {
186
- "text/plain": [
187
- "{'Unique Capsule': 1,\n",
188
- " 'Refreshing Taste and Smell': 1,\n",
189
- " 'Benson & Hadges Breeze': 2}"
190
- ]
191
- },
192
- "execution_count": 10,
193
- "metadata": {},
194
- "output_type": "execute_result"
195
- }
196
- ],
197
  "source": [
198
  "text = \"Clean text : apnea janet kushihaban banson hages niyashe ekti unique capsule offer panson hages bridge panson hages breeze air capsule atom agnoton tharna refreshing taste smell darn offer tea trial cora jonu apnea ekti trial kit nitaparin thunobat\"\n",
199
  "\n",
@@ -202,7 +154,7 @@
202
  },
203
  {
204
  "cell_type": "code",
205
- "execution_count": 11,
206
  "metadata": {},
207
  "outputs": [],
208
  "source": [
@@ -215,7 +167,7 @@
215
  },
216
  {
217
  "cell_type": "code",
218
- "execution_count": 14,
219
  "metadata": {},
220
  "outputs": [],
221
  "source": [
@@ -237,17 +189,9 @@
237
  },
238
  {
239
  "cell_type": "code",
240
- "execution_count": 9,
241
  "metadata": {},
242
- "outputs": [
243
- {
244
- "name": "stdout",
245
- "output_type": "stream",
246
- "text": [
247
- "<assemblyai.transcriber.Transcript object at 0x0000029377EFD480>\n"
248
- ]
249
- }
250
- ],
251
  "source": [
252
  "import assemblyai as aai\n",
253
  "from updated_api import *\n",
@@ -266,12 +210,92 @@
266
  "print(transcript)\n"
267
  ]
268
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  {
270
  "cell_type": "code",
271
  "execution_count": null,
272
  "metadata": {},
273
  "outputs": [],
274
- "source": []
 
 
 
 
 
 
 
275
  }
276
  ],
277
  "metadata": {
@@ -290,7 +314,7 @@
290
  "name": "python",
291
  "nbconvert_exporter": "python",
292
  "pygments_lexer": "ipython3",
293
- "version": "3.10.13"
294
  }
295
  },
296
  "nbformat": 4,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import re\n",
10
  "from updated_api import *\n",
 
28
  },
29
  {
30
  "cell_type": "code",
31
+ "execution_count": null,
32
  "metadata": {},
33
  "outputs": [],
34
  "source": [
 
41
  },
42
  {
43
  "cell_type": "code",
44
+ "execution_count": null,
45
  "metadata": {},
46
  "outputs": [],
47
  "source": [
 
63
  },
64
  {
65
  "cell_type": "code",
66
+ "execution_count": null,
67
  "metadata": {},
68
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
69
  "source": [
70
  "filename = input(\"Give Audio Name: \")\n",
71
  "audio_url = upload(filename)\n",
 
75
  },
76
  {
77
  "cell_type": "code",
78
+ "execution_count": null,
79
  "metadata": {},
80
  "outputs": [],
81
  "source": [
 
88
  },
89
  {
90
  "cell_type": "code",
91
+ "execution_count": null,
92
  "metadata": {},
93
  "outputs": [],
94
  "source": [
 
101
  },
102
  {
103
  "cell_type": "code",
104
+ "execution_count": null,
105
  "metadata": {},
106
  "outputs": [],
107
  "source": [
 
121
  },
122
  {
123
  "cell_type": "code",
124
+ "execution_count": null,
125
  "metadata": {},
126
  "outputs": [],
127
  "source": [
 
143
  },
144
  {
145
  "cell_type": "code",
146
+ "execution_count": null,
147
  "metadata": {},
148
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  "source": [
150
  "text = \"Clean text : apnea janet kushihaban banson hages niyashe ekti unique capsule offer panson hages bridge panson hages breeze air capsule atom agnoton tharna refreshing taste smell darn offer tea trial cora jonu apnea ekti trial kit nitaparin thunobat\"\n",
151
  "\n",
 
154
  },
155
  {
156
  "cell_type": "code",
157
+ "execution_count": null,
158
  "metadata": {},
159
  "outputs": [],
160
  "source": [
 
167
  },
168
  {
169
  "cell_type": "code",
170
+ "execution_count": null,
171
  "metadata": {},
172
  "outputs": [],
173
  "source": [
 
189
  },
190
  {
191
  "cell_type": "code",
192
+ "execution_count": null,
193
  "metadata": {},
194
+ "outputs": [],
 
 
 
 
 
 
 
 
195
  "source": [
196
  "import assemblyai as aai\n",
197
  "from updated_api import *\n",
 
210
  "print(transcript)\n"
211
  ]
212
  },
213
+ {
214
+ "cell_type": "code",
215
+ "execution_count": 57,
216
+ "metadata": {},
217
+ "outputs": [
218
+ {
219
+ "name": "stdout",
220
+ "output_type": "stream",
221
+ "text": [
222
+ "Processing Audio\n",
223
+ "town saw revival new business opening\n",
224
+ "{'Unique Capsule': [], 'Refreshing Taste and Smell': [], 'Benson & Hadges Breeze': []}\n"
225
+ ]
226
+ },
227
+ {
228
+ "data": {
229
+ "text/plain": [
230
+ "{'Unique Capsule': 0,\n",
231
+ " 'Refreshing Taste and Smell': 0,\n",
232
+ " 'Benson & Hadges Breeze': 0}"
233
+ ]
234
+ },
235
+ "execution_count": 57,
236
+ "metadata": {},
237
+ "output_type": "execute_result"
238
+ }
239
+ ],
240
+ "source": [
241
+ "import re\n",
242
+ "from nlp_api import *\n",
243
+ "from typing_extensions import Annotated\n",
244
+ "import string\n",
245
+ "\n",
246
+ "patterns = {\n",
247
+ " 'Unique Capsule': r\"\\b(((u(?:nit|niq).*?)\\s+(?:capsul))|(?:.*?uni.*?capsul))\",\n",
248
+ " 'Refreshing Taste and Smell': r\"\\b((((ref|rif|rip|rep|ep|pre).*?)\\s+t(?:a|e|i|y)s(.*?)\\s+(sm|(?:.*?(sm|m)))(?:el|il|al|ol|.*?))|((?:in.*?)\\s+t(?:a|e|i|y)s.*?\\s+(.*?)(sm|m)(?:el|il|al|ol|ail|eal)))\",\n",
249
+ " 'Benson & Hadges Breeze':r\"\\b((b|p|v|f)(?:(an|en|a|e)(?:s|ch|t)(?:on|an|en).*?)\\s+h(?:.*?)\\s+(b|p|v|f)(?:re|ee|e|ri))\",\n",
250
+ "}\n",
251
+ "\n",
252
+ "\n",
253
+ " # Find and count matches for each pattern\n",
254
+ "def nlp_bat(text):\n",
255
+ " results = {}\n",
256
+ " all_match = {}\n",
257
+ " for name, pattern in patterns.items():\n",
258
+ " matches = re.findall(pattern, text, re.IGNORECASE)\n",
259
+ " m = {name:matches}\n",
260
+ " all_match.update(m)\n",
261
+ " count = len(matches)\n",
262
+ " results[name] = count\n",
263
+ " \n",
264
+ " \n",
265
+ " print(all_match) \n",
266
+ "\n",
267
+ " return results\n",
268
+ "\n",
269
+ "async def lemmatize_and_clean(text):\n",
270
+ " words = nltk.word_tokenize(text.lower())\n",
271
+ " words = [word for word in words if word.isalpha() and word not in set(stopwords.words('english'))]\n",
272
+ " lemmatizer = WordNetLemmatizer()\n",
273
+ " words = [await asyncio.to_thread(lemmatizer.lemmatize, word) for word in words]\n",
274
+ " return ' '.join(words)\n",
275
+ "\n",
276
+ "\n",
277
+ "# # input\n",
278
+ "filename = input(\"Give Audio Name: \")\n",
279
+ "audio_url = upload(filename)\n",
280
+ "\n",
281
+ "\n",
282
+ "# # transcribe\n",
283
+ "detect_audio(audio_url, 'file_title')"
284
+ ]
285
+ },
286
  {
287
  "cell_type": "code",
288
  "execution_count": null,
289
  "metadata": {},
290
  "outputs": [],
291
+ "source": [
292
+ "patterns = {\n",
293
+ " \"Clear\":r\"\\b((c|k)(:?l..r))\",\n",
294
+ " \"Confidence\":r\"\\b(((f|t|th)(u|i))(?:(|c|q|k|ck)(?:(a|e|o))))|(((f|t|th)(u|i))(?:(|c|q|k|ck)(?:a|.a)))\",\n",
295
+ " \"Revival\":r\"((a)(?:(sh|yush|rch)))\",\n",
296
+ " \"Anti-Dandruff\":r\"((al)(?:.*?(k|q)(?:a|i|o|u)(?:(s|sh))))|((k|q)(?:a|i|o|u)(?:(s|sh)((?:a|o))))|((k|q)(?:a|i|o|u)(?:(s|sh)(r(?:a|o|u))))\",\n",
297
+ "}"
298
+ ]
299
  }
300
  ],
301
  "metadata": {
 
314
  "name": "python",
315
  "nbconvert_exporter": "python",
316
  "pygments_lexer": "ipython3",
317
+ "version": "3.12.2"
318
  }
319
  },
320
  "nbformat": 4,
textData.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1. clear fikoh ash al qasra
2
+ 2. clear fika ash al qaeda
3
+ 3. clear fikah ash al kishra
4
+ 4. ash alokosra vika clear
5
+ 5. clear al kashara
6
+ 6. clear fuqah ayush vidal kashakti
7
+ 7. clear fuqah ayush vidal kashakti
8
+ 8. ash alokosra vika clear
9
+ 9. clear fik india loch
10
+ 10. al koshro clear fikah asha
11
+ 11. clear vika asha al koshuru
12
+ 12. clear fik ash vid al khushra
13
+ 13. asha al ghoshuru clear fikah
14
+ 14. asha al kosharo clear
15
+ 15. lehrer fik ash dudul kusha
16
+ 16. clear fika taj vit al khashoggi
17
+ 17. fikah clear asha al kosru
18
+ 18. clear fikar ash vid al khusho
19
+ 19. clear fika inash bidal kushu may
20
+ 20. clear fika ash mid al kushu
21
+ 21. clear fico bush
22
+ 22. clear ar vital kosho
23
+ 23. clear fico vidal kusho
24
+ 24. clear fikah ash al kishra
25
+ 25. clear thickah ash al kishra
26
+ 26. fika vidal kishra ash fika
27
+ 27. clear fik ayesh vidal kashir
28
+ 28. player fika faith al kish ash
29
+ 30. fid alakishra ash fika clear ash fika alakashra
30
+ 31. clear fikach ash vidal kosher
31
+ 32. clear fica arch alcohol
32
+ 33. clear fikach ash vidal kosher
33
+ 34. clear fika ash al kosher
34
+ 35. clear thicker arch alcohol
35
+ # ###################################################
36
+ 1. yunkenulil belsamil mudadil il kashrati yuhadiya farwata rasi waemna al jafa
37
+ 2. yusaidu shampul mudadul il kushrati ala muharrabatil kushrati wal hakati
38
+ 4. intarshal edabul kilesi kiyo fishabiyetihi
39
+ 6. alistair mul mustamirula jatil mudodati lil kushrati yunkinu yuaziza sahata faruatiratsi
40
+ 7. hatidanal fashali kafur satin lita lumiyua zizuthiqata
41
+ 8. artini zojajatan mina sham ko shafi
42
+ 10. alistair mul muntadimul muntajatil mudadati lil kashrati yumkinu yukala minta yuji faruatirat
43
+ 11. tahdida ehdefin kabilatin litikata
44
+ 12. tahdida ehdefin kabilatin litikata
45
+ 13. ihatota nafsibi afraid
46
+ 14. shahid al mokaotari khiyu inti ashan ladazuari badata tashdidi
47
+ 15. shahid al fari kuriyad yuntyashan mahmuda ribin jadedan
48
+ 16. town saw revival new business opening
49
+
updated_api.py CHANGED
@@ -62,12 +62,12 @@ async def lemmatize_and_clean(text):
62
 
63
 
64
  patterns = {
65
- 'Unique Capsule': r"\b(((u(?:nit|niq).*?)\s+(?:capsul))|(?:.*?uni.*?capsul))",
66
- 'Refreshing Taste and Smell': r"\b((((ref|rif|rip|rep|ep|pre).*?)\s+t(?:a|e|i|y)s(.*?)\s+(sm|(?:.*?(sm|m)))(?:el|il|al|ol|.*?))|((?:in.*?)\s+t(?:a|e|i|y)s.*?\s+(.*?)(sm|m)(?:el|il|al|ol|ail|eal)))",
67
- 'Benson & Hadges Breeze':r"\b((b|p|v|f)(?:(an|en|a|e)(?:s|ch|t)(?:on|an|en).*?)\s+h(?:.*?)\s+(b|p|v|f)(?:re|ee|e|ri))",
 
68
  }
69
 
70
-
71
  async def nlp_bat(text):
72
  results = {}
73
  all_match = {}
 
62
 
63
 
64
  patterns = {
65
+ "Clear":r"\b(((c|k)(?:(l..r|lir|lar|il))))",
66
+ "Confidence":r"\b((((f|t|th)(u|i))(?:(|c|q|k|ck)(?:(a|e|o))))|(((f|t|th)(u|i))(?:(|c|q|k|ck)(?:a|.a))))",
67
+ "Revival":r"\b(((a)(?:(sh|yush|rch))))",
68
+ "Anti-Dandruff":r"\b(((al)(?:.*?(k|q|kh)(?:a|i|o|u)(?:(s|sh))))|((k|q|kh)(?:a|i|o|u)(?:(s|sh)((?:a|o))))|((k|q|kh)(?:a|i|o|u)(?:(s|sh)(r(?:a|o|u)))))",
69
  }
70
 
 
71
  async def nlp_bat(text):
72
  results = {}
73
  all_match = {}