File size: 3,243 Bytes
523358f
b124a22
9a91666
064c454
9a91666
6221aa6
064c454
9a91666
 
77c4d7d
9a91666
 
064c454
 
 
 
 
 
 
 
523358f
9a91666
 
064c454
9a91666
064c454
 
 
 
 
 
 
523358f
064c454
 
 
b124a22
9a91666
 
b124a22
9a91666
064c454
9a91666
 
 
 
 
064c454
 
9a91666
064c454
9a91666
 
 
 
 
 
064c454
 
 
9a91666
77c4d7d
064c454
9a91666
523358f
9a91666
064c454
 
 
9a91666
064c454
 
 
9a91666
 
064c454
 
 
 
 
 
 
9a91666
 
 
 
 
 
 
064c454
9a91666
064c454
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import os
import requests
import gradio as gr
from huggingface_hub import HfApi
from pathlib import Path

# Get token from Space Secrets
HF_TOKEN = os.environ.get("HF_TOKEN")
api = HfApi(token=HF_TOKEN)

def get_repo_info(url):
    """Extracts repo_id and repo_type from a standard HF URL."""
    try:
        parts = url.strip().split("huggingface.co/")[1].split("/")
        if parts[0] == "datasets":
            return f"{parts[1]}/{parts[2]}", "dataset"
        else:
            return f"{parts[0]}/{parts[1]}", "model"
    except:
        return None, None

def download_and_upload(dataset_url, download_url, progress=gr.Progress()):
    if not HF_TOKEN:
        return "❌ Error: HF_TOKEN not found in Secrets."
    
    repo_id, repo_type = get_repo_info(dataset_url)
    if not repo_id:
        return "❌ Error: Invalid Dataset/Repo URL format."

    filename = download_url.split("/")[-1].split("?")[0]
    local_path = Path(filename)

    try:
        # Step 1: Download with status updates
        progress(0, desc="Initializing stream...")
        with requests.get(download_url, stream=True, timeout=60) as r:
            r.raise_for_status()
            total_size = int(r.headers.get('content-length', 0))
            downloaded = 0
            
            with open(local_path, 'wb') as f:
                for chunk in r.iter_content(chunk_size=1024*1024): # 1MB chunks
                    if chunk:
                        f.write(chunk)
                        downloaded += len(chunk)
                        if total_size > 0:
                            done = downloaded / total_size
                            # Update every few MBs to keep the connection alive
                            progress(done * 0.5, desc=f"Downloading: {downloaded/(1024**3):.2f}GB / {total_size/(1024**3):.2f}GB")
        
        # Step 2: Upload to HF
        progress(0.6, desc="Uploading to Hugging Face...")
        api.upload_file(
            path_or_fileobj=str(local_path),
            path_in_repo=filename,
            repo_id=repo_id,
            repo_type=repo_type,
            commit_message=f"Uploaded {filename} via UpDownUrl",
            # This is key for large files:
            run_as_future=False 
        )
        
        return f"βœ… Done! '{filename}' is now in {repo_id}"

    except Exception as e:
        return f"❌ Error: {str(e)}"
    finally:
        if local_path.exists():
            os.remove(local_path)

# --- UI Setup ---
with gr.Blocks() as demo:
    gr.Markdown("# πŸš€ UpDownUrl v1.1")
    
    with gr.Row():
        with gr.Column():
            dataset_link = gr.Textbox(label="Target Repo Link", placeholder="https://huggingface.co/datasets/AIDev07/AIModelsLoaded")
            download_link = gr.Textbox(label="Download URL", placeholder="Direct link to file")
            upload_btn = gr.Button("Boom! Upload", variant="primary")
        
        with gr.Column():
            output_log = gr.Textbox(label="Status", interactive=False)

    upload_btn.click(
        fn=download_and_upload,
        inputs=[dataset_link, download_link],
        outputs=output_log
    )

# Fixed Gradio 6.0 syntax: Theme goes here
if __name__ == "__main__":
    demo.launch(theme=gr.themes.Soft())