IndicBertology / src /extract_sentences.py
JagritiRawat's picture
Add files using upload-large-folder tool
b123f1a verified
import pandas as pd
# Extract sentences from one of the CSV files (let's use subnum.csv)
df = pd.read_csv("./probingData/hindi/subnum.csv")
# Save sentences to a text file
with open("./gold/hindi/sentences.txt", "w", encoding="utf-8") as f:
for sentence in df["sentences"]:
if pd.notna(sentence): # Skip empty/NaN values
f.write(sentence.strip() + "\n")
print("Sentences extracted successfully!")
print(f"Total sentences: {len(df)}")