| import os |
| def apply_RL_prompt(chunk, args, budget): |
| if args.prompt_type == "deepseek3" and os.environ['tip'] == "withoutremaining": |
| return withoutremaining_prompt(chunk, budget) |
| elif args.prompt_type == "deepseek3" and os.environ['tip'] == "8ratio": |
| return _8ratio_prompt(chunk, budget) |
| elif args.prompt_type == "deepseek3" and "prompt_v1" in os.environ['tip']: |
| return prompt_v1_prompt(chunk, budget) |
| elif args.prompt_type == "deepseek3" and "prompt_v2" in os.environ['tip']: |
| return prompt_v2_prompt(chunk, budget) |
| else: |
| return chunk |
|
|
| def withoutremaining_prompt(chunk, budget): |
| find_strings = "<|Assistant|>" |
| for i in range(len(chunk)): |
| head = chunk[i].split(find_strings)[0] |
| tail = chunk[i].split(find_strings)[1] |
| |
| |
| add_prompt = f"\n(Complete thinking within {budget} tokens or fewer.)" |
| |
|
|
| add_response = f"" |
| |
| chunk[i] = head + add_prompt + find_strings + add_response + tail |
| |
| return chunk |
|
|
|
|
| def _8ratio_prompt(chunk, budget): |
| os.environ['budget'] = str(budget) |
| print(f"budget = {budget}") |
| find_strings = "<|Assistant|>" |
| for i in range(len(chunk)): |
| head = chunk[i].split(find_strings)[0] |
| tail = chunk[i].split(find_strings)[1] |
| |
| add_prompt = f"\n(Complete thinking within {budget} tokens or fewer, 7 special tokens ( \n<remaining>7/8</remaining>\n , \n<remaining>6/8</remaining>\n , \n<remaining>5/8</remaining>\n , \n<remaining>4/8</remaining>\n , \n<remaining>3/8</remaining>\n , \n<remaining>2/8</remaining>\n , \n<remaining>1/8</remaining>\n ) will split the thinking process into 8 parts.)" |
| |
| add_response = f"" |
|
|
| chunk[i] = head + add_prompt + find_strings + add_response + tail |
| |
| return chunk |
|
|
|
|
| def prompt_v1_prompt(chunk, budget): |
| os.environ['budget'] = str(budget) |
| print(f"budget = {budget}") |
| find_strings = "<|Assistant|>" |
| for i in range(len(chunk)): |
| head = chunk[i].split(find_strings)[0] |
| tail = chunk[i].split(find_strings)[1] |
| |
| add_prompt = f"\n(Complete thinking within {budget} tokens or fewer, please output the remaining number of tokens every 200 tokens to facilitate control of the remaining length of the thinking process, here is a template: 'now remaining tokens: xxx', xxx is the real remaining number of tokens.)" |
| add_response = f"" |
|
|
| chunk[i] = head + add_prompt + find_strings + add_response + tail |
| |
| return chunk |
|
|
|
|
| def prompt_v2_prompt(chunk, budget): |
| os.environ['budget'] = str(budget) |
| print(f"budget = {budget}") |
| find_strings = "<|Assistant|>" |
| for i in range(len(chunk)): |
| head = chunk[i].split(find_strings)[0] |
| tail = chunk[i].split(find_strings)[1] |
| |
| add_prompt = f"\n(Complete thinking within {budget} tokens or fewer)" |
| add_response = f"" |
|
|
| chunk[i] = head + add_prompt + find_strings + add_response + tail |
| |
| return chunk |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
|
|
| def is_balanced(s: str) -> bool: |
| """验证大括号是否成对且正确嵌套""" |
| stack = 0 |
| for char in s: |
| if char == "{": |
| stack += 1 |
| elif char == "}": |
| stack -= 1 |
| if stack < 0: |
| return False |
| return stack == 0 |
|
|
| def solve_final_answer(chunk: list) -> list: |
| |
| """处理包含嵌套大括号的答案匹配""" |
| |
| end_chunk = [] |
| open_chunk = [] |
| |
| k = 0 |
| pattern = "**Final Answer**\\boxed{" |
| |
| for i in range(len(chunk)): |
| line = chunk[i] |
| if not pattern in line: |
| open_chunk.append(chunk[i]) |
| continue |
| start_idx = line.find('**Final Answer**\\boxed{') |
| if start_idx == -1: |
| open_chunk.append(chunk[i]) |
| continue |
| stack = 1 |
| end_idx = start_idx + len('**Final Answer**\\boxed{') |
| while end_idx < len(line) and stack > 0: |
| if line[end_idx] == "{": |
| stack += 1 |
| elif line[end_idx] == "}": |
| stack -= 1 |
| end_idx += 1 |
| |
| |
| if stack == 0 and is_balanced(line[start_idx:end_idx]): |
| |
| chunk[i] += "<|end▁of▁sentence|>" |
| k += 1 |
| end_chunk.append(chunk[i]) |
| else: |
| open_chunk.append(chunk[i]) |
|
|
| print(f"### Find {k} anwsers have final answer!") |
| return chunk, end_chunk, open_chunk |