IsmatS commited on
Commit
ebad198
·
1 Parent(s): 01b4019
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12.0
main.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, Form
2
+ from fastapi.responses import HTMLResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.templating import Jinja2Templates
5
+ from pydantic import BaseModel
6
+ from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
7
+ import numpy as np
8
+
9
+ app = FastAPI()
10
+
11
+ # Serve static files like CSS and JavaScript
12
+ app.mount("/static", StaticFiles(directory="static"), name="static")
13
+
14
+ # Set up Jinja2 templates
15
+ templates = Jinja2Templates(directory="templates")
16
+
17
+ # Load the Hugging Face model and tokenizer
18
+ model_name = "IsmatS/xlm-roberta-az-ner"
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
20
+ model = AutoModelForTokenClassification.from_pretrained(model_name)
21
+ nlp_ner = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
22
+
23
+ label_mapping = {
24
+ "LABEL_0": "Other",
25
+ "LABEL_1": "Person",
26
+ "LABEL_2": "Location",
27
+ "LABEL_3": "Organization",
28
+ "LABEL_4": "Date",
29
+ "LABEL_5": "Time",
30
+ "LABEL_6": "Money",
31
+ "LABEL_7": "Percentage",
32
+ "LABEL_8": "Facility",
33
+ "LABEL_9": "Product",
34
+ "LABEL_10": "Event",
35
+ "LABEL_11": "Art",
36
+ "LABEL_12": "Law",
37
+ "LABEL_13": "Language",
38
+ "LABEL_14": "Government",
39
+ "LABEL_15": "Nationality or Religion",
40
+ "LABEL_16": "Ordinal",
41
+ "LABEL_17": "Cardinal",
42
+ "LABEL_18": "Disease",
43
+ "LABEL_19": "Contact",
44
+ "LABEL_20": "Proverb or Saying",
45
+ "LABEL_21": "Quantity",
46
+ "LABEL_22": "Miscellaneous",
47
+ "LABEL_23": "Position",
48
+ "LABEL_24": "Project"
49
+ }
50
+
51
+ def convert_numpy_types(obj):
52
+ if isinstance(obj, np.float32):
53
+ return float(obj)
54
+ elif isinstance(obj, np.int32):
55
+ return int(obj)
56
+ elif isinstance(obj, list):
57
+ return [convert_numpy_types(item) for item in obj]
58
+ elif isinstance(obj, dict):
59
+ return {key: convert_numpy_types(value) for key, value in obj.items()}
60
+ else:
61
+ return obj
62
+
63
+ @app.get("/", response_class=HTMLResponse)
64
+ async def index(request: Request):
65
+ return templates.TemplateResponse("index.html", {"request": request})
66
+
67
+ @app.post("/predict/")
68
+ async def predict_ner(text: str = Form(...)):
69
+ ner_results = nlp_ner(text)
70
+
71
+ # Initialize dictionary to store entities by type
72
+ entities_by_type = {}
73
+
74
+ # Process each detected entity
75
+ for entity in ner_results:
76
+ # Get the human-readable label
77
+ entity_type = label_mapping.get(entity["entity_group"], entity["entity_group"])
78
+
79
+ # Filter out non-entities (label "Other" in this case)
80
+ if entity_type == "Other":
81
+ continue
82
+
83
+ # Add entity to the dictionary by its type
84
+ if entity_type not in entities_by_type:
85
+ entities_by_type[entity_type] = [] # Initialize list for new entity type
86
+
87
+ # Append the entity word to the corresponding type list
88
+ entities_by_type[entity_type].append(entity["word"])
89
+
90
+ return {"entities": entities_by_type}
91
+
92
+
93
+ # Run with uvicorn main:app --reload
94
+ # curl -X POST "http://127.0.0.1:8000/predict/" \
95
+ # -H "Content-Type: application/json" \
96
+ # -d '{"text": "Bakı şəhərində Azərbaycan Respublikasının prezidenti İlham Əliyev."}'
97
+
98
+ # 2014 - cu ilde Azərbaycan Respublikasının prezidenti İlham Əliyev Salyanda olub.
XLM-RoBERTa.ipynb → models/XLM-RoBERTa.ipynb RENAMED
File without changes
mBERT.ipynb → models/mBERT.ipynb RENAMED
File without changes
push_to_HF.py → models/push_to_HF.py RENAMED
File without changes
requirements.txt CHANGED
@@ -1,13 +1,35 @@
 
 
1
  certifi==2024.8.30
2
  charset-normalizer==3.4.0
 
 
3
  filelock==3.16.1
4
  fsspec==2024.10.0
 
5
  huggingface-hub==0.26.2
6
  idna==3.10
 
 
 
 
 
7
  packaging==24.1
8
- python-dotenv==1.0.1
 
 
9
  PyYAML==6.0.2
 
10
  requests==2.32.3
 
 
 
 
 
 
 
11
  tqdm==4.66.6
 
12
  typing_extensions==4.12.2
13
  urllib3==2.2.3
 
 
1
+ annotated-types==0.7.0
2
+ anyio==4.6.2.post1
3
  certifi==2024.8.30
4
  charset-normalizer==3.4.0
5
+ click==8.1.7
6
+ fastapi==0.115.4
7
  filelock==3.16.1
8
  fsspec==2024.10.0
9
+ h11==0.14.0
10
  huggingface-hub==0.26.2
11
  idna==3.10
12
+ Jinja2==3.1.4
13
+ MarkupSafe==3.0.2
14
+ mpmath==1.3.0
15
+ networkx==3.4.2
16
+ numpy==2.1.3
17
  packaging==24.1
18
+ pydantic==2.9.2
19
+ pydantic_core==2.23.4
20
+ python-multipart==0.0.17
21
  PyYAML==6.0.2
22
+ regex==2024.9.11
23
  requests==2.32.3
24
+ safetensors==0.4.5
25
+ setuptools==75.3.0
26
+ sniffio==1.3.1
27
+ starlette==0.41.2
28
+ sympy==1.13.1
29
+ tokenizers==0.20.1
30
+ torch==2.5.1
31
  tqdm==4.66.6
32
+ transformers==4.46.1
33
  typing_extensions==4.12.2
34
  urllib3==2.2.3
35
+ uvicorn==0.32.0
static/app.js ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ document.getElementById("nerForm").addEventListener("submit", async function (e) {
2
+ e.preventDefault();
3
+ const text = document.getElementById("textInput").value;
4
+ const response = await fetch("/predict/", {
5
+ method: "POST",
6
+ headers: { "Content-Type": "application/x-www-form-urlencoded" },
7
+ body: new URLSearchParams({ text })
8
+ });
9
+ const result = await response.json();
10
+
11
+ // Display results in a structured format
12
+ displayResults(result.entities);
13
+ });
14
+
15
+ function displayResults(entities) {
16
+ const resultsDiv = document.getElementById("resultsContent");
17
+ resultsDiv.innerHTML = ""; // Clear previous results
18
+
19
+ // Check if any entities are returned
20
+ if (Object.keys(entities).length === 0) {
21
+ resultsDiv.innerHTML = "<p>No high-confidence entities found.</p>";
22
+ return;
23
+ }
24
+
25
+ // Display entities grouped by type in the desired format
26
+ Object.entries(entities).forEach(([entityType, words]) => {
27
+ const entityGroup = document.createElement("div");
28
+ entityGroup.classList.add("entity-group");
29
+
30
+ const title = document.createElement("h3");
31
+ title.textContent = entityType; // Display entity type (e.g., Date, Government)
32
+ entityGroup.appendChild(title);
33
+
34
+ words.forEach(word => {
35
+ const entityEl = document.createElement("p");
36
+ entityEl.textContent = word; // Display the actual entity word
37
+ entityGroup.appendChild(entityEl);
38
+ });
39
+
40
+ resultsDiv.appendChild(entityGroup);
41
+ });
42
+ }
static/style.css ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Basic Reset */
2
+ * {
3
+ box-sizing: border-box;
4
+ margin: 0;
5
+ padding: 0;
6
+ }
7
+
8
+ /* Body Styling */
9
+ body {
10
+ font-family: Arial, sans-serif;
11
+ display: flex;
12
+ justify-content: center;
13
+ align-items: center;
14
+ min-height: 100vh;
15
+ background-color: #f4f4f9;
16
+ margin: 0;
17
+ padding: 20px;
18
+ }
19
+
20
+ /* Container Styling */
21
+ .container {
22
+ width: 100%;
23
+ max-width: 600px;
24
+ text-align: center;
25
+ background: white;
26
+ border-radius: 8px;
27
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
28
+ padding: 20px;
29
+ }
30
+
31
+ /* Title and Description */
32
+ h1 {
33
+ font-size: 24px;
34
+ color: #333;
35
+ margin-bottom: 10px;
36
+ }
37
+
38
+ p {
39
+ font-size: 16px;
40
+ color: #666;
41
+ margin-bottom: 20px;
42
+ }
43
+
44
+ /* Form and Button Styling */
45
+ textarea {
46
+ width: 100%;
47
+ height: 100px;
48
+ padding: 10px;
49
+ font-size: 16px;
50
+ border: 1px solid #ddd;
51
+ border-radius: 5px;
52
+ margin-bottom: 10px;
53
+ resize: vertical;
54
+ }
55
+
56
+ button {
57
+ padding: 10px 20px;
58
+ font-size: 16px;
59
+ color: #fff;
60
+ background-color: #007bff;
61
+ border: none;
62
+ border-radius: 5px;
63
+ cursor: pointer;
64
+ transition: background-color 0.3s;
65
+ }
66
+
67
+ button:hover {
68
+ background-color: #0056b3;
69
+ }
70
+
71
+ /* Results Section Styling */
72
+ .results-section {
73
+ margin-top: 20px;
74
+ text-align: left;
75
+ }
76
+
77
+ .results-section h2 {
78
+ font-size: 20px;
79
+ color: #333;
80
+ margin-bottom: 10px;
81
+ }
82
+
83
+ /* Entity Group Styling */
84
+ .entity-group {
85
+ margin-bottom: 15px;
86
+ padding: 10px;
87
+ border: 1px solid #ddd;
88
+ border-radius: 5px;
89
+ background-color: #fafafa;
90
+ }
91
+
92
+ .entity-group h3 {
93
+ font-size: 18px;
94
+ color: #007bff;
95
+ margin-bottom: 8px;
96
+ }
97
+
98
+ .entity-group p {
99
+ font-size: 16px;
100
+ color: #555;
101
+ }
102
+
103
+ /* Responsive Design */
104
+ @media (max-width: 600px) {
105
+ .container {
106
+ width: 90%;
107
+ }
108
+
109
+ h1 {
110
+ font-size: 20px;
111
+ }
112
+
113
+ button {
114
+ font-size: 14px;
115
+ }
116
+
117
+ .results-section h2, .entity-group h3 {
118
+ font-size: 18px;
119
+ }
120
+
121
+ textarea {
122
+ height: 80px;
123
+ }
124
+ }
templates/index.html ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Named Entity Recognition</title>
7
+ <link rel="stylesheet" href="/static/style.css">
8
+ </head>
9
+ <body>
10
+ <div class="container">
11
+ <h1>Named Entity Recognition</h1>
12
+ <p>Enter your text below to analyze entities and see their types, such as Date, Person, or Location.</p>
13
+
14
+ <form id="nerForm">
15
+ <!-- Add the default sentence as a value in the textarea -->
16
+ <textarea id="textInput" name="text">2014 - cu ilde Azərbaycan Respublikasının prezidenti İlham Əliyev Salyanda olub.</textarea>
17
+ <button type="submit">Analyze</button>
18
+ </form>
19
+
20
+ <div id="results" class="results-section">
21
+ <h2>Results</h2>
22
+ <div id="resultsContent"></div> <!-- Display JSON here -->
23
+ </div>
24
+ </div>
25
+ <script src="/static/app.js"></script>
26
+ </body>
27
+ </html>