| """ |
| Simple test to verify dataset integration setup. |
| This test doesn't require external libraries to be installed. |
| """ |
|
|
| import json |
| import os |
|
|
| def test_dataset_files(): |
| """Test that dataset files exist and are properly formatted.""" |
| |
| dataset_path = os.path.expanduser("~/huggingface.co/datasets/chiedo/hello-world") |
| |
| print("Testing Dataset Integration Setup") |
| print("=" * 50) |
| |
| |
| required_files = ["train.jsonl", "validation.jsonl", "test.jsonl", "README.md", "hello_world.py"] |
| |
| print("\n1. Checking dataset files:") |
| for file in required_files: |
| file_path = os.path.join(dataset_path, file) |
| if os.path.exists(file_path): |
| print(f" β {file} exists") |
| else: |
| print(f" β {file} missing") |
| |
| |
| print("\n2. Validating dataset content:") |
| splits = ["train", "validation", "test"] |
| |
| for split in splits: |
| file_path = os.path.join(dataset_path, f"{split}.jsonl") |
| try: |
| with open(file_path, 'r') as f: |
| lines = f.readlines() |
| print(f"\n {split} split:") |
| print(f" - Examples: {len(lines)}") |
| |
| |
| first_example = json.loads(lines[0]) |
| print(f" - First example: {first_example}") |
| |
| |
| if "text" in first_example and "label" in first_example: |
| print(f" - Structure: β Valid") |
| else: |
| print(f" - Structure: β Invalid") |
| except Exception as e: |
| print(f" Error reading {split}: {e}") |
| |
| |
| print("\n3. Checking model integration:") |
| model_file = "model.py" |
| |
| if os.path.exists(model_file): |
| with open(model_file, 'r') as f: |
| content = f.read() |
| |
| |
| if "load_dataset" in content: |
| print(" β load_dataset method found in model.py") |
| else: |
| print(" β load_dataset method not found") |
| |
| if "prepare_dataset_batch" in content: |
| print(" β prepare_dataset_batch method found in model.py") |
| else: |
| print(" β prepare_dataset_batch method not found") |
| |
| if "from datasets import load_dataset" in content: |
| print(" β datasets import found in model.py") |
| else: |
| print(" β datasets import not found") |
| |
| print("\n4. Dataset URLs:") |
| print(f" Model: https://huggingface.co/chiedo/hello-world") |
| print(f" Dataset: https://huggingface.co/datasets/chiedo/hello-world") |
| |
| print("\n" + "=" * 50) |
| print("Dataset integration setup complete!") |
| print("\nTo use the dataset with the model, install dependencies:") |
| print(" pip install torch transformers datasets") |
| print("\nThen run:") |
| print(" python example_with_dataset.py") |
|
|
| if __name__ == "__main__": |
| test_dataset_files() |