| """ |
| formats output text file from llampia |
| into markdown tables |
| """ |
|
|
| import os |
|
|
| def format_edu(turn_str): |
| no = turn_str.split('<')[0].strip() |
| text = turn_str.split('>')[1].strip() |
| speaker = 'BUIL' |
| if 'Arch' in turn_str.split('>')[0]: |
| speaker = 'ARCH' |
| edu_str = ' ' + '**' + no + ' ' + speaker + '**' + ' ' + text |
| return edu_str |
|
|
| current_folder=os.getcwd() |
|
|
| output_path = '/path/to/test-output-file.txt' |
| table_path = current_folder + '/test_predictions.md' |
|
|
| with open(output_path, 'r') as txt: |
| text = txt.read().split('\n') |
|
|
| group = 0 |
| pairs = [] |
| pair = [] |
| for t in text: |
| if t.startswith('New Turn'): |
| group = 1 |
| pair.append(t) |
| elif t.startswith(' ### DS:'): |
| pair.append(t) |
| pairs.append(pair) |
| pair = [] |
| group = 0 |
| elif group == 1: |
| pair.append(t) |
|
|
| tables = [] |
| rows = [] |
| dial_str = '' |
| struct_str = '' |
| for pair in pairs: |
| |
| if pair[0].startswith('New Turn: 1 <'): |
| if len(rows) > 0: |
| tables.append(rows) |
| rows = [] |
| dial_str = '**0 BUIL** Mission has started.' |
| dial_str += format_edu(pair[0].lstrip('New Turn: ')) |
| for p in pair[1:]: |
| if p.startswith(' ###'): |
| struct_str = p.lstrip(' ### DS:') |
| rows.append([dial_str, struct_str]) |
| |
| |
| else: |
| dial_str += format_edu(p) |
| elif pair[0].startswith('New Turn: '): |
| dial_str = format_edu(pair[0].lstrip('New Turn: ')) |
| |
| for p in pair[1:]: |
| if p.startswith(' ###'): |
| struct_str = p.lstrip(' ### DS:') |
| |
| |
| rows.append([dial_str, struct_str]) |
| |
| |
| else: |
| dial_str += format_edu(p) |
|
|
| all_md_tables = [] |
|
|
| for table in tables: |
| table_rows = ['| Dialogue | Structure |', '| ----- | ----- |'] |
| for tr in table: |
| st = '| ' + tr[0] + ' | ' + tr[1] + ' |' |
| table_rows.append(st) |
| all_md_tables.extend(table_rows) |
| all_md_tables.extend(' ') |
|
|
| md_tables_string = '\n'.join(all_md_tables) |
|
|
|
|
| f = open(table_path, 'w') |
| for r in all_md_tables: |
| print(r) |
| print(r, file=f) |
| print("markdown printed") |
|
|
| |