SuveenE commited on
Commit
8a891dd
·
1 Parent(s): e47c689

Update App Ui

Browse files
Files changed (1) hide show
  1. app.py +74 -53
app.py CHANGED
@@ -4,11 +4,7 @@ from typing import List
4
  import gradio as gr
5
  from huggingface_hub import HfApi
6
 
7
- from get_dataset_stats import (
8
- get_dataset_stats,
9
- format_stats_display,
10
- compare_metadata_with_actual,
11
- )
12
 
13
 
14
  def search_datasets_fn(query: str) -> List[str]:
@@ -31,48 +27,71 @@ def search_datasets_fn(query: str) -> List[str]:
31
  return []
32
 
33
 
34
- def fetch_stats_fn(repo_id: str, progress=gr.Progress()):
35
- """Fetch dataset statistics"""
36
- if not repo_id:
37
- return ""
38
 
39
- try:
40
- progress(0.3, desc="Fetching dataset info...")
41
- token = os.environ.get("HF_TOKEN")
42
-
43
- progress(0.5, desc="Analyzing files...")
44
- stats = get_dataset_stats(repo_id, hf_token=token)
45
-
46
- progress(0.8, desc="Formatting results...")
47
-
48
- # Format main stats display
49
- stats_display = format_stats_display(stats)
50
-
51
- # Format comparison if metadata exists
52
- comparison_display = ""
53
- if stats.get("info_metadata"):
54
- comparison_display = "\n\n" + compare_metadata_with_actual(stats)
55
-
56
- # Format episode list (if not too many)
57
- episodes_list = ""
58
- if stats["episode_numbers"]:
59
- episodes = stats["episode_numbers"]
60
- if len(episodes) <= 100:
61
- episodes_list = f"\n\n**Episode Numbers:** {', '.join(map(str, episodes))}"
62
  else:
63
- episodes_list = f"\n\n**Episode Numbers:** {', '.join(map(str, episodes[:50]))}... (showing first 50 of {len(episodes)})"
64
-
65
- progress(1.0, desc="Complete!")
66
-
67
- # Combine all into one output
68
- full_output = stats_display + comparison_display + episodes_list
69
- return full_output
70
-
71
- except Exception as e:
72
- import traceback
73
- error_msg = f"❌ Error fetching stats: {str(e)}\n\n{traceback.format_exc()}"
74
- print(error_msg)
75
- return error_msg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
 
78
  # Build the Gradio interface
@@ -90,31 +109,33 @@ with gr.Blocks(title="LeRobot Dataset Stats Viewer") as demo:
90
  )
91
  load_btn = gr.Button("Load Datasets")
92
 
93
- dataset_dropdown = gr.Dropdown(
94
- label="Select dataset",
95
  choices=_initial_choices,
96
  interactive=True,
97
  )
98
 
 
 
99
  stats_output = gr.Markdown(
100
  label="Dataset Statistics",
101
- value="Select a dataset to view statistics"
102
  )
103
 
104
  # Event handlers
105
  def load_datasets_from_org(org_name):
106
  results = search_datasets_fn(org_name)
107
- return gr.update(choices=results, value=None)
108
 
109
  load_btn.click(
110
  load_datasets_from_org,
111
  inputs=org_input,
112
- outputs=dataset_dropdown,
113
  )
114
 
115
- dataset_dropdown.change(
116
- fetch_stats_fn,
117
- inputs=dataset_dropdown,
118
  outputs=stats_output,
119
  )
120
 
 
4
  import gradio as gr
5
  from huggingface_hub import HfApi
6
 
7
+ from get_dataset_stats import get_dataset_stats
 
 
 
 
8
 
9
 
10
  def search_datasets_fn(query: str) -> List[str]:
 
27
  return []
28
 
29
 
30
+ def fetch_stats_for_selected(selected_datasets: List[str], progress=gr.Progress()):
31
+ """Fetch statistics for selected datasets"""
32
+ if not selected_datasets:
33
+ return "Please select at least one dataset"
34
 
35
+ token = os.environ.get("HF_TOKEN")
36
+ results = []
37
+ total_episodes = 0
38
+ total_parquet_files = 0
39
+ total_video_files = 0
40
+
41
+ results.append(f"**Fetching stats for {len(selected_datasets)} dataset(s)...**\n")
42
+ results.append("=" * 80 + "\n")
43
+
44
+ for i, repo_id in enumerate(selected_datasets):
45
+ try:
46
+ progress((i + 1) / len(selected_datasets), desc=f"Processing {repo_id}...")
47
+ stats = get_dataset_stats(repo_id, hf_token=token)
48
+
49
+ results.append(f"\n### {i+1}. {repo_id}")
50
+ if stats.get("error"):
51
+ results.append(f"❌ **Error:** {stats['error']}")
 
 
 
 
 
 
52
  else:
53
+ episodes = stats['total_episodes']
54
+ parquet = stats['total_parquet_files']
55
+ videos = stats['total_video_files']
56
+
57
+ results.append(f"- **Episodes:** {episodes}")
58
+ results.append(f"- **Parquet files:** {parquet}")
59
+ results.append(f"- **Video files:** {videos}")
60
+
61
+ if stats.get("codebase_version"):
62
+ results.append(f"- **Version:** {stats['codebase_version']}")
63
+
64
+ # Show episode range if available
65
+ if stats["episode_numbers"]:
66
+ episode_nums = stats["episode_numbers"]
67
+ results.append(f"- **Episode range:** {episode_nums[0]} to {episode_nums[-1]}")
68
+
69
+ # Check for gaps
70
+ expected = list(range(episode_nums[0], episode_nums[-1] + 1))
71
+ missing = set(expected) - set(episode_nums)
72
+ if missing:
73
+ results.append(f"- **⚠️ Missing episodes:** {sorted(list(missing))}")
74
+
75
+ # Add to totals
76
+ total_episodes += episodes
77
+ total_parquet_files += parquet
78
+ total_video_files += videos
79
+
80
+ results.append("")
81
+
82
+ except Exception as e:
83
+ results.append(f"\n### {i+1}. {repo_id}")
84
+ results.append(f"❌ **Error:** {str(e)}\n")
85
+
86
+ # Summary
87
+ results.append("=" * 80)
88
+ results.append("\n## 📊 **Total Summary**")
89
+ results.append(f"- **Total Episodes:** {total_episodes}")
90
+ results.append(f"- **Total Parquet Files:** {total_parquet_files}")
91
+ results.append(f"- **Total Video Files:** {total_video_files}")
92
+ results.append(f"- **Datasets Processed:** {len(selected_datasets)}")
93
+
94
+ return "\n".join(results)
95
 
96
 
97
  # Build the Gradio interface
 
109
  )
110
  load_btn = gr.Button("Load Datasets")
111
 
112
+ dataset_checkboxes = gr.CheckboxGroup(
113
+ label="Select datasets",
114
  choices=_initial_choices,
115
  interactive=True,
116
  )
117
 
118
+ fetch_btn = gr.Button("Fetch Statistics", variant="primary")
119
+
120
  stats_output = gr.Markdown(
121
  label="Dataset Statistics",
122
+ value="Select datasets and click 'Fetch Statistics'"
123
  )
124
 
125
  # Event handlers
126
  def load_datasets_from_org(org_name):
127
  results = search_datasets_fn(org_name)
128
+ return gr.update(choices=results, value=[])
129
 
130
  load_btn.click(
131
  load_datasets_from_org,
132
  inputs=org_input,
133
+ outputs=dataset_checkboxes,
134
  )
135
 
136
+ fetch_btn.click(
137
+ fetch_stats_for_selected,
138
+ inputs=dataset_checkboxes,
139
  outputs=stats_output,
140
  )
141