| |
|
|
| from datasets import Dataset |
| from text_generation import YourTextGenerationModel |
|
|
| def generate_text_to_text_data(): |
| |
| prompts = ["Tell a story about", "Describe a scene with", "Explain the concept of"] |
|
|
| vocab_size = 10000 |
| embedding_dim = 128 |
| hidden_dim = 256 |
|
|
| your_model = YourTextGenerationModel(vocab_size, embedding_dim, hidden_dim) |
| generated_texts = [your_model.generate_text(prompt) for prompt in prompts] |
|
|
| data = { |
| "input_text": prompts, |
| "target_text": generated_texts |
| } |
| return data |
|
|
| def create_text_to_text_huggingface_dataset(): |
| data = generate_text_to_text_data() |
| dataset = Dataset.from_dict(data) |
| return dataset |
|
|
| if __name__ == "__main__": |
| text_to_text_huggingface_dataset = create_text_to_text_huggingface_dataset() |
| text_to_text_huggingface_dataset.save_to_disk("my_text_to_text_dataset") |