| import random
|
|
|
| def generate_v1_data():
|
| """Generates all exhaustive single-digit math problems."""
|
| data = []
|
|
|
|
|
| ops = {'+': lambda a, b: a + b,
|
| '-': lambda a, b: a - b,
|
| '*': lambda a, b: a * b,
|
| '/': lambda a, b: a / b}
|
|
|
|
|
| for a in range(10):
|
| for b in range(10):
|
| for op_char, op_func in ops.items():
|
|
|
|
|
|
|
| if op_char == '+':
|
| result = op_func(a, b)
|
|
|
| if result <= 9:
|
| problem = f"{a} + {b} = {result}"
|
| data.append(problem)
|
|
|
| elif op_char == '-':
|
| result = op_func(a, b)
|
|
|
| if 0 <= result <= 9:
|
| problem = f"{a} - {b} = {result}"
|
| data.append(problem)
|
|
|
| elif op_char == '*':
|
| result = op_func(a, b)
|
|
|
| if result <= 9:
|
| problem = f"{a} * {b} = {result}"
|
| data.append(problem)
|
|
|
| elif op_char == '/':
|
|
|
| if b == 0:
|
| continue
|
| result = op_func(a, b)
|
|
|
| if a % b == 0 and result <= 9:
|
|
|
| problem = f"{a} / {b} = {int(result)}"
|
| data.append(problem)
|
|
|
|
|
| random.shuffle(data)
|
| final_data = [d + "<EOS>" for d in data]
|
|
|
| return final_data
|
|
|
| class CharacterTokenizer:
|
| """A simple character-level tokenizer for the math problems."""
|
|
|
| def __init__(self, data):
|
|
|
|
|
| chars = sorted(list(set("".join(data))))
|
|
|
|
|
| if '<PAD>' not in chars:
|
| chars.append('<PAD>')
|
|
|
| self.stoi = {ch: i for i, ch in enumerate(chars)}
|
| self.itos = {i: ch for i, ch in enumerate(chars)}
|
| self.vocab_size = len(chars)
|
| self.pad_token_id = self.stoi['<PAD>']
|
|
|
| def encode(self, s):
|
| """Encodes a string into a list of integers."""
|
| return [self.stoi[c] for c in s]
|
|
|
| def decode(self, l):
|
| """Decodes a list of integers back into a string."""
|
| return "".join([self.itos[i] for i in l]) |