| import os, shutil |
| from uparser import wordparse |
| from joblib import Parallel, delayed |
| from tqdm import tqdm |
|
|
| num_jobs = 20 |
| infolder = 'Original' |
| outfolder = 'Words' |
|
|
| for fdr in [outfolder]: |
| if os.path.exists(fdr): |
| shutil.rmtree(fdr) |
| os.mkdir(fdr) |
|
|
| flist = os.listdir(infolder) |
| for fname in flist: |
| with open(f'{infolder}/{fname}', 'r') as f: |
| cnts = f.readlines() |
|
|
| i = 0 |
| |
| words = [] |
| for l in cnts: |
| l = l.strip().split('\t') |
| words.append(l[0]) |
| |
| fout = fname.split('_')[1] |
| fout = fout.split('.')[0] |
| print(fout) |
|
|
| with open(f'{outfolder}/{fout}.words', 'w') as f: |
| for w in words: |
| f.write(w + '\n') |