ismdrobiul489 commited on
Commit
3a25ef1
·
1 Parent(s): 28bbf81

chore: Remove YouTube uploader (not working in cloud Docker)

Browse files
modules/shared/__init__.py CHANGED
@@ -1,23 +1,3 @@
1
  """
2
  Shared utilities and services for NCAkit modules
3
  """
4
-
5
- import logging
6
- from fastapi import FastAPI
7
-
8
- logger = logging.getLogger(__name__)
9
-
10
- # Module metadata
11
- MODULE_NAME = "shared"
12
- MODULE_PREFIX = "/api/utils"
13
- MODULE_DESCRIPTION = "Shared utilities including YouTube to HF uploader"
14
-
15
-
16
- def register(app: FastAPI, config=None):
17
- """Register the shared module with FastAPI."""
18
- from .router import router
19
-
20
- # Include router
21
- app.include_router(router)
22
-
23
- logger.info("shared module registered (utilities endpoints available)")
 
1
  """
2
  Shared utilities and services for NCAkit modules
3
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
modules/shared/router.py DELETED
@@ -1,117 +0,0 @@
1
- """
2
- Shared Utilities Router.
3
- Provides common API endpoints for media management.
4
- """
5
-
6
- import logging
7
- from fastapi import APIRouter, HTTPException
8
- from pydantic import BaseModel, Field
9
- from typing import Optional, List
10
-
11
- from .services.youtube_uploader import get_uploader
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
- router = APIRouter(prefix="/api/utils", tags=["Utilities"])
16
-
17
-
18
- # ============================================
19
- # SCHEMAS
20
- # ============================================
21
-
22
- class FolderCreateRequest(BaseModel):
23
- """Request to create a new folder."""
24
- name: str = Field(..., min_length=1, max_length=50, description="Folder name")
25
-
26
-
27
- class YouTubeUploadRequest(BaseModel):
28
- """Request to download and upload YouTube content."""
29
- url: str = Field(..., description="YouTube video or playlist URL")
30
- folder: str = Field(..., description="Target folder in HF Dataset")
31
- format: str = Field(default="mp4", description="Format: mp4 (video) or mp3 (audio)")
32
- category: Optional[str] = Field(
33
- default=None,
34
- description="Category name for audio files (e.g., 'emotional', 'energetic')"
35
- )
36
-
37
-
38
- # ============================================
39
- # ENDPOINTS
40
- # ============================================
41
-
42
- @router.get("/hf-folders", summary="List HF Dataset folders")
43
- async def list_hf_folders():
44
- """Get all folders in HuggingFace Dataset."""
45
- uploader = get_uploader()
46
-
47
- if not uploader.enabled:
48
- raise HTTPException(
49
- status_code=503,
50
- detail="Uploader not available. Check HF_REPO and HF_TOKEN."
51
- )
52
-
53
- folders = uploader.list_folders()
54
- return {"folders": folders}
55
-
56
-
57
- @router.post("/hf-folders", summary="Create new folder")
58
- async def create_hf_folder(request: FolderCreateRequest):
59
- """Create a new folder in HuggingFace Dataset."""
60
- uploader = get_uploader()
61
-
62
- if not uploader.enabled:
63
- raise HTTPException(status_code=503, detail="Uploader not available")
64
-
65
- success = uploader.create_folder(request.name)
66
-
67
- if not success:
68
- raise HTTPException(status_code=500, detail="Failed to create folder")
69
-
70
- return {"success": True, "folder": request.name}
71
-
72
-
73
- @router.post("/youtube-upload", summary="Upload YouTube content to HF")
74
- async def upload_youtube_content(request: YouTubeUploadRequest):
75
- """
76
- Download YouTube video/playlist and upload to HF Dataset.
77
-
78
- - **url**: YouTube video or playlist URL
79
- - **folder**: Target folder (e.g., 'gameplay_backgrounds', 'music')
80
- - **format**: 'mp4' for video, 'mp3' for audio
81
- - **category**: For audio, category name like 'emotional', 'energetic'
82
- """
83
- uploader = get_uploader()
84
-
85
- if not uploader.enabled:
86
- raise HTTPException(status_code=503, detail="Uploader not available")
87
-
88
- if request.format == "mp3":
89
- # Audio mode
90
- category = request.category or "music"
91
- result = uploader.download_audio(
92
- url=request.url,
93
- folder=request.folder,
94
- category=category
95
- )
96
- else:
97
- # Video mode
98
- result = uploader.download_video(
99
- url=request.url,
100
- folder=request.folder
101
- )
102
-
103
- if not result.get("success", False):
104
- raise HTTPException(status_code=500, detail=result.get("error", "Upload failed"))
105
-
106
- return result
107
-
108
-
109
- @router.get("/health", summary="Check uploader status")
110
- async def uploader_health():
111
- """Check if YouTube uploader is available."""
112
- uploader = get_uploader()
113
-
114
- return {
115
- "enabled": uploader.enabled,
116
- "repo_id": uploader.repo_id if uploader.enabled else None
117
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
modules/shared/services/youtube_uploader.py DELETED
@@ -1,340 +0,0 @@
1
- """
2
- YouTube to HuggingFace Media Uploader.
3
- Downloads YouTube videos/playlists and uploads to HF Dataset.
4
- No local storage - temp files deleted immediately.
5
- """
6
-
7
- import os
8
- import logging
9
- import tempfile
10
- import shutil
11
- from typing import List, Optional, Dict
12
- from pathlib import Path
13
-
14
- logger = logging.getLogger(__name__)
15
-
16
- # Try to import dependencies
17
- try:
18
- import yt_dlp
19
- YTDLP_AVAILABLE = True
20
- except ImportError:
21
- YTDLP_AVAILABLE = False
22
- logger.warning("yt-dlp not installed. YouTube uploads disabled.")
23
-
24
- try:
25
- from huggingface_hub import HfApi, list_repo_files
26
- HF_HUB_AVAILABLE = True
27
- except ImportError:
28
- HF_HUB_AVAILABLE = False
29
- logger.warning("huggingface_hub not installed.")
30
-
31
-
32
- class YouTubeToHFUploader:
33
- """
34
- Downloads YouTube videos/audio and uploads to HuggingFace Dataset.
35
-
36
- Features:
37
- - Single video or playlist download
38
- - MP4 (video) or MP3 (audio) format
39
- - Custom category naming for audio
40
- - Direct upload to HF, no local storage
41
- """
42
-
43
- def __init__(self, repo_id: str = None, token: str = None):
44
- """
45
- Initialize uploader.
46
-
47
- Args:
48
- repo_id: HF repo ID (e.g., "username/dataset")
49
- token: HF token with write access
50
- """
51
- self.repo_id = repo_id or os.getenv("HF_REPO", "")
52
- self.token = token or os.getenv("HF_TOKEN", "")
53
- self.api = None
54
-
55
- if not self.repo_id or not self.token:
56
- logger.warning("YouTubeUploader: HF_REPO or HF_TOKEN not set")
57
- self.enabled = False
58
- return
59
-
60
- if not YTDLP_AVAILABLE or not HF_HUB_AVAILABLE:
61
- self.enabled = False
62
- return
63
-
64
- self.enabled = True
65
- self.api = HfApi(token=self.token)
66
- logger.info(f"YouTubeUploader: Initialized for {self.repo_id}")
67
-
68
- def list_folders(self) -> List[str]:
69
- """List all folders in HF Dataset."""
70
- if not self.enabled:
71
- return []
72
-
73
- try:
74
- all_files = list_repo_files(
75
- repo_id=self.repo_id,
76
- repo_type="dataset"
77
- )
78
-
79
- # Extract unique folder names
80
- folders = set()
81
- for f in all_files:
82
- if "/" in f:
83
- folder = f.split("/")[0]
84
- folders.add(folder)
85
-
86
- return sorted(list(folders))
87
-
88
- except Exception as e:
89
- logger.error(f"Failed to list folders: {e}")
90
- return []
91
-
92
- def create_folder(self, folder_name: str) -> bool:
93
- """
94
- Create a new folder in HF Dataset.
95
-
96
- Args:
97
- folder_name: Name of folder to create
98
-
99
- Returns:
100
- True if successful
101
- """
102
- if not self.enabled:
103
- return False
104
-
105
- try:
106
- # Create placeholder file to create folder
107
- placeholder = f"# {folder_name}\n\nFolder for media files."
108
-
109
- with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
110
- f.write(placeholder)
111
- temp_path = f.name
112
-
113
- self.api.upload_file(
114
- path_or_fileobj=temp_path,
115
- path_in_repo=f"{folder_name}/README.md",
116
- repo_id=self.repo_id,
117
- repo_type="dataset"
118
- )
119
-
120
- os.remove(temp_path)
121
- logger.info(f"Created folder: {folder_name}")
122
- return True
123
-
124
- except Exception as e:
125
- logger.error(f"Failed to create folder: {e}")
126
- return False
127
-
128
- def _get_next_index(self, folder: str, category: str) -> int:
129
- """Get next available index for category files."""
130
- try:
131
- all_files = list_repo_files(
132
- repo_id=self.repo_id,
133
- repo_type="dataset"
134
- )
135
-
136
- # Find existing files with this category
137
- existing = [
138
- f for f in all_files
139
- if f.startswith(f"{folder}/{category}_") and f.endswith(".mp3")
140
- ]
141
-
142
- if not existing:
143
- return 1
144
-
145
- # Extract highest index
146
- indices = []
147
- for f in existing:
148
- try:
149
- # Extract number from filename like "emotional_003.mp3"
150
- name = f.split("/")[-1]
151
- num = int(name.replace(f"{category}_", "").replace(".mp3", ""))
152
- indices.append(num)
153
- except:
154
- pass
155
-
156
- return max(indices) + 1 if indices else 1
157
-
158
- except:
159
- return 1
160
-
161
- def download_video(self, url: str, folder: str) -> Dict:
162
- """
163
- Download YouTube video and upload to HF.
164
-
165
- Args:
166
- url: YouTube video or playlist URL
167
- folder: Target folder in HF Dataset
168
-
169
- Returns:
170
- Dict with results
171
- """
172
- if not self.enabled:
173
- return {"success": False, "error": "Uploader not enabled"}
174
-
175
- temp_dir = tempfile.mkdtemp(prefix="yt_video_")
176
- results = {"success": True, "uploaded": [], "errors": []}
177
-
178
- try:
179
- ydl_opts = {
180
- 'format': 'best[height<=720][ext=mp4]/best[height<=720]/best',
181
- 'outtmpl': os.path.join(temp_dir, '%(id)s.%(ext)s'),
182
- 'quiet': True,
183
- 'no_warnings': True,
184
- }
185
-
186
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
187
- # Get video info
188
- info = ydl.extract_info(url, download=True)
189
-
190
- # Handle playlist
191
- entries = info.get('entries', [info])
192
-
193
- for entry in entries:
194
- if not entry:
195
- continue
196
-
197
- video_id = entry.get('id', 'unknown')
198
-
199
- # Find downloaded file
200
- for ext in ['mp4', 'webm', 'mkv']:
201
- local_path = os.path.join(temp_dir, f"{video_id}.{ext}")
202
- if os.path.exists(local_path):
203
- break
204
- else:
205
- results["errors"].append(f"File not found for {video_id}")
206
- continue
207
-
208
- # Upload to HF
209
- try:
210
- path_in_repo = f"{folder}/{video_id}.mp4"
211
- self.api.upload_file(
212
- path_or_fileobj=local_path,
213
- path_in_repo=path_in_repo,
214
- repo_id=self.repo_id,
215
- repo_type="dataset"
216
- )
217
-
218
- cloud_url = f"https://huggingface.co/datasets/{self.repo_id}/resolve/main/{path_in_repo}"
219
- results["uploaded"].append({
220
- "id": video_id,
221
- "title": entry.get('title', video_id),
222
- "url": cloud_url
223
- })
224
-
225
- # Delete local file immediately
226
- os.remove(local_path)
227
- logger.info(f"Uploaded: {video_id}")
228
-
229
- except Exception as e:
230
- results["errors"].append(f"{video_id}: {str(e)}")
231
-
232
- except Exception as e:
233
- results["success"] = False
234
- results["error"] = str(e)
235
- logger.error(f"Download failed: {e}")
236
-
237
- finally:
238
- # Cleanup temp directory
239
- shutil.rmtree(temp_dir, ignore_errors=True)
240
-
241
- return results
242
-
243
- def download_audio(self, url: str, folder: str, category: str = "music") -> Dict:
244
- """
245
- Download YouTube audio as MP3 and upload to HF.
246
-
247
- Args:
248
- url: YouTube video or playlist URL
249
- folder: Target folder in HF Dataset
250
- category: Category name for files (e.g., "emotional", "energetic")
251
-
252
- Returns:
253
- Dict with results
254
- """
255
- if not self.enabled:
256
- return {"success": False, "error": "Uploader not enabled"}
257
-
258
- temp_dir = tempfile.mkdtemp(prefix="yt_audio_")
259
- results = {"success": True, "uploaded": [], "errors": []}
260
-
261
- try:
262
- ydl_opts = {
263
- 'format': 'bestaudio/best',
264
- 'outtmpl': os.path.join(temp_dir, '%(id)s.%(ext)s'),
265
- 'quiet': True,
266
- 'no_warnings': True,
267
- 'postprocessors': [{
268
- 'key': 'FFmpegExtractAudio',
269
- 'preferredcodec': 'mp3',
270
- 'preferredquality': '192',
271
- }],
272
- }
273
-
274
- # Get starting index for category
275
- next_index = self._get_next_index(folder, category)
276
-
277
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
278
- info = ydl.extract_info(url, download=True)
279
- entries = info.get('entries', [info])
280
-
281
- for entry in entries:
282
- if not entry:
283
- continue
284
-
285
- video_id = entry.get('id', 'unknown')
286
- local_path = os.path.join(temp_dir, f"{video_id}.mp3")
287
-
288
- if not os.path.exists(local_path):
289
- results["errors"].append(f"MP3 not found for {video_id}")
290
- continue
291
-
292
- # Upload with category naming
293
- try:
294
- filename = f"{category}_{next_index:03d}.mp3"
295
- path_in_repo = f"{folder}/{filename}"
296
-
297
- self.api.upload_file(
298
- path_or_fileobj=local_path,
299
- path_in_repo=path_in_repo,
300
- repo_id=self.repo_id,
301
- repo_type="dataset"
302
- )
303
-
304
- cloud_url = f"https://huggingface.co/datasets/{self.repo_id}/resolve/main/{path_in_repo}"
305
- results["uploaded"].append({
306
- "id": video_id,
307
- "title": entry.get('title', video_id),
308
- "filename": filename,
309
- "url": cloud_url
310
- })
311
-
312
- # Delete local file immediately
313
- os.remove(local_path)
314
- logger.info(f"Uploaded: {filename}")
315
- next_index += 1
316
-
317
- except Exception as e:
318
- results["errors"].append(f"{video_id}: {str(e)}")
319
-
320
- except Exception as e:
321
- results["success"] = False
322
- results["error"] = str(e)
323
- logger.error(f"Audio download failed: {e}")
324
-
325
- finally:
326
- shutil.rmtree(temp_dir, ignore_errors=True)
327
-
328
- return results
329
-
330
-
331
- # Singleton instance
332
- _uploader: Optional[YouTubeToHFUploader] = None
333
-
334
-
335
- def get_uploader() -> YouTubeToHFUploader:
336
- """Get or create uploader instance."""
337
- global _uploader
338
- if _uploader is None:
339
- _uploader = YouTubeToHFUploader()
340
- return _uploader
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -29,6 +29,3 @@ imageio-ffmpeg>=0.4.9
29
  # Trends Analysis
30
  pytrends
31
  pandas
32
-
33
- # YouTube Downloads
34
- yt-dlp
 
29
  # Trends Analysis
30
  pytrends
31
  pandas