| |
| import os |
| import shutil |
| import json |
| from pathlib import Path |
| from huggingface_hub import HfApi, create_repo |
| import tarfile |
| import tempfile |
|
|
| class HFStorageSync: |
| def __init__(self, repo_id, token=None, data_dir="/tmp/open-webui-data"): |
| self.repo_id = repo_id |
| self.data_dir = Path(data_dir) |
| self.token = token |
| |
| |
| self.api = HfApi(token=token) if token else HfApi() |
| |
| def ensure_repo_exists(self): |
| """Create repository if it doesn't exist""" |
| if not self.token: |
| print("No token provided, cannot create repository") |
| return False |
| |
| try: |
| |
| repo_info = self.api.repo_info(repo_id=self.repo_id, repo_type="dataset") |
| print(f"Repository {self.repo_id} exists") |
| return True |
| except Exception as e: |
| print(f"Repository {self.repo_id} not found, attempting to create...") |
| try: |
| create_repo( |
| repo_id=self.repo_id, |
| repo_type="dataset", |
| token=self.token, |
| private=True, |
| exist_ok=True |
| ) |
| print(f"Created repository {self.repo_id}") |
| |
| |
| readme_content = """# Open WebUI Storage |
| |
| This dataset stores persistent data for Open WebUI deployment. |
| |
| ## Contents |
| |
| - `data.tar.gz`: Compressed archive containing all Open WebUI data including: |
| - User configurations |
| - Chat histories |
| - Uploaded files |
| - Database files |
| |
| This repository is automatically managed by the Open WebUI sync system. |
| """ |
| |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as tmp: |
| tmp.write(readme_content) |
| tmp.flush() |
| |
| self.api.upload_file( |
| path_or_fileobj=tmp.name, |
| path_in_repo="README.md", |
| repo_id=self.repo_id, |
| repo_type="dataset", |
| commit_message="Initial repository setup", |
| token=self.token |
| ) |
| |
| os.unlink(tmp.name) |
| |
| return True |
| except Exception as create_error: |
| print(f"Failed to create repository: {create_error}") |
| return False |
| |
| def download_data(self): |
| """Download and extract data from HF dataset repo""" |
| try: |
| print("Downloading data from Hugging Face...") |
| |
| |
| self.data_dir.mkdir(parents=True, exist_ok=True) |
| |
| |
| test_file = self.data_dir / "test_write" |
| try: |
| test_file.touch() |
| test_file.unlink() |
| print(f"Data directory {self.data_dir} is writable") |
| except Exception as e: |
| print(f"Warning: Data directory may not be writable: {e}") |
| return |
| |
| if not self.token: |
| print("No HF_TOKEN provided, skipping download") |
| return |
| |
| |
| if not self.ensure_repo_exists(): |
| print("Could not access or create repository") |
| return |
| |
| |
| try: |
| file_path = self.api.hf_hub_download( |
| repo_id=self.repo_id, |
| filename="data.tar.gz", |
| repo_type="dataset", |
| token=self.token |
| ) |
| |
| with tarfile.open(file_path, 'r:gz') as tar: |
| tar.extractall(self.data_dir) |
| |
| print(f"Data extracted to {self.data_dir}") |
| |
| except Exception as e: |
| print(f"No existing data found (this is normal for first run): {e}") |
| |
| except Exception as e: |
| print(f"Error during download: {e}") |
| |
| def upload_data(self): |
| """Compress and upload data to HF dataset repo""" |
| try: |
| if not self.token: |
| print("No HF_TOKEN provided, skipping upload") |
| return |
| |
| print("Uploading data to Hugging Face...") |
| |
| if not self.data_dir.exists() or not any(self.data_dir.iterdir()): |
| print("No data to upload") |
| return |
| |
| |
| if not self.ensure_repo_exists(): |
| print("Could not access or create repository") |
| return |
| |
| |
| with tempfile.NamedTemporaryFile(suffix='.tar.gz', delete=False) as tmp: |
| with tarfile.open(tmp.name, 'w:gz') as tar: |
| for item in self.data_dir.iterdir(): |
| if item.name not in ["test_write", ".gitkeep"]: |
| tar.add(item, arcname=item.name) |
| |
| |
| self.api.upload_file( |
| path_or_fileobj=tmp.name, |
| path_in_repo="data.tar.gz", |
| repo_id=self.repo_id, |
| repo_type="dataset", |
| commit_message="Update Open WebUI data", |
| token=self.token |
| ) |
| |
| |
| os.unlink(tmp.name) |
| |
| print("Data uploaded successfully") |
| |
| except Exception as e: |
| print(f"Error uploading data: {e}") |
|
|
| def main(): |
| import sys |
| |
| repo_id = os.getenv("HF_STORAGE_REPO", "nxdev-org/open-webui-storage") |
| token = os.getenv("HF_TOKEN") |
| data_dir = os.getenv("DATA_DIR", "/tmp/open-webui-data") |
| |
| sync = HFStorageSync(repo_id, token, data_dir) |
| |
| if len(sys.argv) > 1: |
| if sys.argv[1] == "download": |
| sync.download_data() |
| elif sys.argv[1] == "upload": |
| sync.upload_data() |
| else: |
| print("Usage: sync_storage.py [download|upload]") |
| else: |
| print("Usage: sync_storage.py [download|upload]") |
|
|
| if __name__ == "__main__": |
| main() |
|
|