import ssfAPI as ssf from argparse import ArgumentParser from re import search from sys import argv import os import pandas as pd def find_file_list(folder_path): """Find file list inside a folder.""" file_list = ssf.folderWalk(folder_path) return file_list def write_lines_to_file(lines, file_path): """Write lines to a file.""" with open(file_path, 'w', encoding='utf-8') as file_write: file_write.write('\n'.join(lines)) def getnumber(sentence): for chunkNode in sentence.nodeList: if chunkNode.parentRelation == "root": for node in chunkNode.nodeList: if node.type[0] == "V": features_list = node.getAttribute("af").split(",") return features_list[3] def main(): parser = ArgumentParser() number = [] sentences = [] parser.add_argument('-i', dest='inp') parser.add_argument('-o',dest='out') args = parser.parse_args() sentences_with_tc = [] if not os.path.isdir(args.inp): ssf_doc = ssf.Document(args.inp) for sentence in ssf_doc.nodeList: try: number.append(getnumber(sentence)) sentences.append(sentence.generateSentence()) except: number.append(0) sentences.append(sentence.generateSentence()) else: file_list = find_file_list(args.inp) sentences_with_tc = [] for file in file_list: ssf_document = ssf.Document(file) for sentence in ssf_document.nodeList: try: # sentences_with_tc.append(str(getnumber(sentence))+"-"+sentence.generateSentence()) number.append(getnumber(sentence)) sentences.append(sentence.generateSentence()) except: # sentences_with_tc.append("0"+"-"+sentence.generateSentence()) number.append(0) sentences.append(sentence.generateSentence()) # write_lines_to_file(sentences_with_tc, args.out) d = {"number":number,"sentences":sentences} df = pd.DataFrame(d) df.to_csv(args.out,index=False) if __name__ == '__main__': main()