| import json |
| import os |
| from pathlib import Path |
| import requests |
| from tqdm import tqdm |
|
|
|
|
| def download_eurorad_figures(metadata_path: str, output_dir: str) -> None: |
| """ |
| Download figures from Eurorad dataset and save them organized by case_id. |
| |
| Args: |
| metadata_path: Path to the eurorad_metadata.json file |
| output_dir: Base directory where figures will be saved |
| |
| The figures will be saved as: |
| {output_dir}/{case_id}/{figure_number}.jpg |
| Example: |
| figures/189/Figure_1a.jpg |
| """ |
| |
| output_path = Path(output_dir) |
| output_path.mkdir(exist_ok=True) |
|
|
| |
| with open(metadata_path) as f: |
| metadata = json.load(f) |
|
|
| |
| for case_id in tqdm(metadata, desc="Downloading cases", unit="case"): |
| case = metadata[case_id] |
| case_dir = output_path / str(case["case_id"]) |
| case_dir.mkdir(exist_ok=True) |
|
|
| |
| for figure in case["figures"]: |
| for subfig in figure["subfigures"]: |
|
|
| |
| subfig_name = f"{subfig['number'].strip().replace(' ', '_').lower()}.jpg" |
| subfig_path = Path(case_dir) / subfig_name |
|
|
| save_figure( |
| url=subfig["url"], |
| output_path=subfig_path, |
| ) |
|
|
|
|
| def save_figure(url: str, output_path: Path) -> None: |
| """ |
| Download and save a single figure. |
| |
| Args: |
| url: URL of the figure to download |
| output_path: Path where the figure should be saved |
| """ |
| if output_path.exists(): |
| return |
|
|
| try: |
| response = requests.get(url, timeout=10) |
| response.raise_for_status() |
| with open(output_path, "wb") as f: |
| f.write(response.content) |
| except Exception as e: |
| print(f"Error downloading {url}: {e}") |
|
|
|
|
| if __name__ == "__main__": |
| root = os.path.dirname(os.path.abspath(__file__)) |
| download_eurorad_figures( |
| metadata_path=os.path.join(root, "eurorad_metadata.json"), |
| output_dir=os.path.join(root, "figures"), |
| ) |
|
|