IndicBertology / src /probing /getDepth.py
JagritiRawat's picture
Add files using upload-large-folder tool
b123f1a verified
import ssfAPI as ssf
from argparse import ArgumentParser
from re import search
from sys import argv
import os
import pandas as pd
def find_file_list(folder_path):
"""Find file list inside a folder."""
file_list = ssf.folderWalk(folder_path)
return file_list
def write_lines_to_file(lines, file_path):
"""Write lines to a file."""
with open(file_path, 'w', encoding='utf-8') as file_write:
file_write.write('\n'.join(lines))
def getDepth(sentence):
for chunknode in sentence.nodeList:
sentence.addEdge(chunknode.parent, chunknode.name)
namedict = {}
namedict.update({"0":0})
for key in sentence.edges:
for item in sentence.edges[key]:
if item not in namedict.keys():
namedict.update({item:0})
queue = []
queue.append(sentence.edges["0"][0])
namedict[str(sentence.edges["0"][0])] += 1
while(queue != []):
popped = queue.pop(0)
if popped in sentence.edges.keys():
for item in sentence.edges[str(popped)]:
queue.append(item)
namedict[item] = namedict[popped]+1
return max(namedict.values())
def main():
parser = ArgumentParser()
depth = []
sentences = []
parser.add_argument('-i', dest='inp')
parser.add_argument('-o',dest='out')
args = parser.parse_args()
sentences_with_tc = []
if not os.path.isdir(args.inp):
ssf_doc = ssf.Document(args.inp)
for sentence in ssf_doc.nodeList:
try:
# sentences_with_tc.append(str(getDepth(sentence))+"-"+sentence.generateSentence())
depth.append(getDepth(sentence))
sentences.append(sentence.generateSentence())
except:
# sentences_with_tc.append("0"+"-"+sentence.generateSentence())
depth.append(0)
sentences.append(sentence.generateSentence())
else:
file_list = find_file_list(args.inp)
sentences_with_tc = []
for file in file_list:
ssf_document = ssf.Document(file)
for sentence in ssf_document.nodeList:
try:
# sentences_with_tc.append(str(getDepth(sentence))+"-"+sentence.generateSentence())
depth.append(getDepth(sentence))
sentences.append(sentence.generateSentence())
except:
# sentences_with_tc.append("0"+"-"+sentence.generateSentence())
depth.append(0)
sentences.append(sentence.generateSentence())
# write_lines_to_file(sentences_with_tc, args.out)
d = {"depth":depth,"sentences":sentences}
df = pd.DataFrame(d)
df.to_csv(args.out,index=False)
if __name__ == '__main__':
main()