| import torch |
| import orjson |
| from transformers import pipeline |
| from transformers import BertTokenizerFast, AutoTokenizer |
|
|
| tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased") |
|
|
| nlp = pipeline( |
| "ner", |
| model="./model", |
| tokenizer=tokenizer, |
| device=0 if torch.cuda.is_available() else -1, |
| ) |
|
|
| def get_entities(tokens): |
| entities = [] |
| entity = None |
| for token in tokens: |
| if token["entity"].startswith("B-"): |
| if entity: |
| entity["score"] /= entity["token_count"] |
| entities.append(entity) |
| entity = { |
| "label": token["entity"][2:], |
| "ranges": [token["start"], token["end"]], |
| "score": token["score"], |
| "token_count": 1, |
| } |
| elif token["entity"].startswith("I-"): |
| if entity and entity["label"] == token["entity"][2:]: |
| entity["ranges"][1] = token["end"] |
| entity["token_count"] += 1 |
| entity["score"] += token["score"] |
| else: |
| if entity: |
| entity["ranges"][1] = token["end"] |
| entity["token_count"] += 1 |
| entity["score"] += token["score"] |
| entity["score"] /= entity["token_count"] |
| entities.append(entity) |
| entity = None |
| else: |
| if entity: |
| entity["score"] /= entity["token_count"] |
| entities.append(entity) |
| entity = None |
| if entity: |
| entity["score"] /= entity["token_count"] |
| entities.append(entity) |
| return entities |
|
|
| def process(text): |
| nlp_output = nlp(text) |
| entities = get_entities(nlp_output) |
| for entity in entities: |
| print(f"{text[entity['ranges'][0]:entity['ranges'][1]]:<35} {entity['label']:>15} {entity['score'] * 100:.2f}%") |
| print("Average Score: ", sum([token["score"] for token in nlp_output]) / len(nlp_output)) |
| print("Labels Found: ", len(entities)) |
| print("-" * 70) |
|
|
| if __name__ == "__main__": |
| examples = [ |
| "Osmangazi Mahallesi, Hoca Ahmet Yesevi Cd. No:34, 16050 Osmangazi/Bursa", |
| "Karşıyaka Mahallesi, Mavişehir Caddesi No: 91, Daire 4, 35540 Karşıyaka/İzmir", |
| "Selçuklu Mahallesi, Atatürk Bulvarı No: 55, 42050 Selçuklu/Konya", |
| "Alsancak Mahallesi, 1475. Sk. No:3, 35220 Konak/İzmir", |
| "Kocatepe Mahallesi, Yaşam Caddesi 3. Sokak No:4, 06420 Bayrampaşa/İstanbul", |
| ] |
| for example in examples: |
| print(example) |
| process(example) |
| while True: |
| text = input("Enter text: ") |
| if not text: |
| break |
| process(text) |