Ludovic Moncla
Update app.py
0ca79f5
import gradio as gr
from transformers import pipeline
import geopy
import plotly.graph_objects as go
import torch
device = torch.device("mps" if torch.backends.mps.is_available() else ("cuda" if torch.cuda.is_available() else "cpu"))
binary_classifier = pipeline("text-classification", model="GEODE/bert-base-multilingual-cased-binary-classifier-edda-coords", truncation=True, device=device)
ner_pipeline = pipeline("token-classification", model="GEODE/camembert-base-edda-span-classification", aggregation_strategy="simple", device=device)
generator = pipeline("text2text-generation", model="GEODE/mt5-small-coords-norm", truncation=True, device=device)
def create_map(lat, long):
fig = go.Figure(go.Scattermapbox(
#customdata=text_list,
lat=[lat],
lon=[long],
mode='markers',
marker=go.scattermapbox.Marker(
size=10
),
#hoverinfo="text",
#hovertemplate='<b>Name</b>: %{customdata[0]}<br><b>Price</b>: $%{customdata[1]}'
))
fig.update_layout(
mapbox_style="open-street-map",
#hovermode='closest',
mapbox=dict(
bearing=0,
center=go.layout.mapbox.Center(
lat=lat,
lon=long
),
pitch=0,
zoom=3
),
margin=dict(l=0, r=0, t=0, b=0) # <-- removes margins
)
return fig
def dms_to_dd(dms):
try:
point = geopy.Point(dms)
return [point[0], point[1]-17.66]
except:
return None
def detect_coordinates(text):
# Run binary classification
result = binary_classifier(text)
if result[0]['label'] == 'Positive':
return "Coordinates found"
else:
return "No coordinates found"
def extract_coordinates(text):
if detect_coordinates(text) == "No coordinates found":
return "No coordinates found"
# Run NER
entities = ner_pipeline(text)
# Collect coordinate entities
coords_text = [ent['word'] for ent in entities if ent['entity_group'] == 'Latlong']
if coords_text:
return "\n".join(coords_text)
else:
return "No coordinates found"
# bert-base-multilingual-cased-binary-classifier-edda-coords
def norm_coordinates(text):
result_text = ""
if detect_coordinates(text) == "No coordinates found":
result_text = "No coordinates found"
# Example input text
input_text = "extract_coordinates: " + text
# Generate prediction using the pipeline
predicted_coordinates_from_pipeline = generator(input_text, max_length=128)
result_text = predicted_coordinates_from_pipeline[0]['generated_text']
coords = dms_to_dd(result_text)
return result_text, create_map(coords[0], coords[1]) if coords else None
examples = [
"* AACH ou ACH, s. f. petite ville d'Allemagne dans le cercle de Souabe, près de la source de l'Aach. Long. 26. 57. lat. 47. 55.",
"* ARCALU (Principauté d') petit état des Tartares-Monguls, sur la riviere d'Hoamko, où commence la grande muraille de la Chine, sous le 122e degré de longitude & le 42e de latitude septentrionale.",
"* ARÉQUIPE, ou ARIQUIPA, (Géog.) ville de l'Amérique méridion. dans le Pérou, sur une riviere, dans un terrein fertile. Long. 308. lat. mérid. 16. 40.",
"* AUTAN-KELURAN, (Géog.) ville du Turquestan. Long. 110d. & lat. 46. 45. selon Uluhbeg ; & long. 116. & lat. 45. selon Nassiredden.",
"Boston ; c'est le nom qu'on a donné à la ville capitale de la nouvelle Angleterre, dans l'Amérique septentrionale ; elle est grande & a un très-bon port. Lat. 42 degrés, 20 minutes ; long. 306 degrés, 50 & quelques minutes.",
"CABEÇA-DE-VIDE, (Géog.) petite ville avec château, en Portugal, dans l'Alentéjo, à cinq lieues de Port-Alegre. Longitude 10. 48. latitude 39."
]
description = """
# 🌍 Geographic Coordinate Extractor
This Space demonstrates the performance of AI models trained to automate the extraction of geographical coordinates from the 18th-century **Encyclopédie of Diderot and d'Alembert**.
### How it Works:
1. **Classification:** A BERT-like model identifies whether a given encyclopedia entry contains geographic coordinates.
2. **Extraction & Normalization:** An mT5-based model retrieves raw coordinate strings from the text and normalizes them into a standard DMS format.
You can test the models by entering a custom text or choosing from the examples provided.
- **Authors:** Ludovic Moncla, Pierre Nugues, Thierry Joliveau, and Katherine McDonough.
- **Project:** [GEODE](https://geode-project.github.io)
### Reference:
> Moncla, L., Nugues, P., Joliveau, T., & McDonough, K. (2026). **EDDA-Coordinata: An Annotated Dataset of Historical Geographic Coordinates**. *arXiv preprint [arXiv:2602.23941](https://arxiv.org/abs/2602.23941). (accepted at LREC 2026)*
---
"""
with gr.Blocks() as demo:
gr.Markdown(description)
with gr.Row():
with gr.Column():
inp = gr.Textbox(
label="Enter text",
placeholder="e.g. * AACH ou ACH, s. f. petite ville d'Allemagne dans le cercle de Souabe, près de la source de l'Aach. Long. 26. 57. lat. 47. 55.",
lines=3
)
run_btn = gr.Button("Extract & Show")
gr.Examples(
examples=examples,
inputs=inp,
label="Examples"
)
with gr.Column():
out_text = gr.Textbox(label="Detect coordinates (fine-tuned BERT binary classifier)")
run_btn.click(fn=detect_coordinates, inputs=inp, outputs=out_text)
out_text = gr.Textbox(label="Extract coordinates (fine-tuned CamemBERT NER)")
run_btn.click(fn=extract_coordinates, inputs=inp, outputs=out_text)
out_text = gr.Textbox(label="Extract and normalize DMS coordinates (fine-tuned mT5)")
map = gr.Plot(label="Coordinates on Map")
run_btn.click(fn=norm_coordinates, inputs=inp, outputs=[out_text, map])
# Launch
if __name__ == "__main__":
demo.launch()