SuveenE commited on
Commit
ca1cd0c
Β·
1 Parent(s): bd2a0c4

Update UI

Browse files
Files changed (1) hide show
  1. app.py +101 -18
app.py CHANGED
@@ -58,29 +58,113 @@ def fetch_stats_for_selected(selected_datasets: List[str], progress=gr.Progress(
58
  return "Please select at least one dataset"
59
 
60
  token = os.environ.get("HF_TOKEN")
61
- results = []
62
  total_episodes = 0
63
 
 
 
 
 
 
 
 
 
 
64
  for i, repo_id in enumerate(selected_datasets):
65
  try:
66
  progress((i + 1) / len(selected_datasets), desc=f"Processing {repo_id}...")
67
  stats = get_dataset_stats(repo_id, hf_token=token)
68
 
69
  if stats.get("error"):
70
- results.append(f"❌ {repo_id}: Error - {stats['error']}")
71
  else:
72
  episodes = stats['total_episodes']
73
  total_episodes += episodes
74
- results.append(f"{repo_id}: **{episodes}** episodes")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  except Exception as e:
77
- results.append(f"❌ {repo_id}: Error - {str(e)}")
78
 
79
- # Build output with total at top (larger font)
80
  output = [f"## Total Episodes: {total_episodes}\n"]
81
- output.extend(results)
82
 
83
- return "\n\n".join(output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
 
86
  # Build the Gradio interface
@@ -91,6 +175,9 @@ with gr.Blocks(title="LeRobot Dataset Stats Viewer") as demo:
91
  _user_orgs = get_user_organizations()
92
  _initial_datasets = search_datasets_fn(_user_orgs[0]) if _user_orgs else []
93
 
 
 
 
94
  with gr.Row():
95
  org_dropdown = gr.Dropdown(
96
  label="Select Organization",
@@ -120,32 +207,28 @@ with gr.Blocks(title="LeRobot Dataset Stats Viewer") as demo:
120
  # Event handlers
121
  def load_datasets_from_org(org_name):
122
  results = search_datasets_fn(org_name)
123
- return gr.update(choices=results, value=[])
124
-
125
- def select_all_datasets(current_choices):
126
- # Get all available choices from the checkbox group
127
- return current_choices
128
-
129
- def deselect_all_datasets():
130
- return []
131
 
132
  # Load datasets on button click or dropdown change
133
  load_btn.click(
134
  load_datasets_from_org,
135
  inputs=org_dropdown,
136
- outputs=dataset_checkboxes,
137
  )
138
 
139
  org_dropdown.change(
140
  load_datasets_from_org,
141
  inputs=org_dropdown,
142
- outputs=dataset_checkboxes,
143
  )
144
 
145
  # Select/Deselect all buttons
146
  select_all_btn.click(
147
  lambda choices: gr.update(value=choices),
148
- inputs=dataset_checkboxes,
149
  outputs=dataset_checkboxes,
150
  )
151
 
 
58
  return "Please select at least one dataset"
59
 
60
  token = os.environ.get("HF_TOKEN")
 
61
  total_episodes = 0
62
 
63
+ # Separate v3 and non-v3 datasets, organize by date
64
+ from collections import defaultdict
65
+ import re
66
+ from datetime import datetime
67
+
68
+ v3_by_date = defaultdict(list) # date -> list of (repo_id, episodes, stats)
69
+ non_v3_results = []
70
+ errors = []
71
+
72
  for i, repo_id in enumerate(selected_datasets):
73
  try:
74
  progress((i + 1) / len(selected_datasets), desc=f"Processing {repo_id}...")
75
  stats = get_dataset_stats(repo_id, hf_token=token)
76
 
77
  if stats.get("error"):
78
+ errors.append(f"❌ {repo_id}: Error - {stats['error']}")
79
  else:
80
  episodes = stats['total_episodes']
81
  total_episodes += episodes
82
+
83
+ # Check if v3 format
84
+ is_v3 = stats.get("format_version") == "v3.0"
85
+
86
+ if is_v3:
87
+ # Try to extract date from repo_id (format: org/DDMMYYYY-name)
88
+ date_match = re.search(r'/(\d{8})', repo_id)
89
+ if date_match:
90
+ date_str = date_match.group(1)
91
+ try:
92
+ # Parse as DDMMYYYY
93
+ date_obj = datetime.strptime(date_str, '%d%m%Y')
94
+ date_key = date_obj.strftime('%Y-%m-%d') # ISO format for sorting
95
+ date_display = date_obj.strftime('%B %d, %Y') # Nice display format
96
+ except ValueError:
97
+ date_key = date_str
98
+ date_display = date_str
99
+
100
+ v3_by_date[date_key].append({
101
+ 'repo_id': repo_id,
102
+ 'episodes': episodes,
103
+ 'date_display': date_display,
104
+ 'stats': stats
105
+ })
106
+ else:
107
+ # v3 but no date in name
108
+ v3_by_date['unknown'].append({
109
+ 'repo_id': repo_id,
110
+ 'episodes': episodes,
111
+ 'date_display': 'Unknown Date',
112
+ 'stats': stats
113
+ })
114
+ else:
115
+ non_v3_results.append(f"{repo_id}: **{episodes}** episodes")
116
 
117
  except Exception as e:
118
+ errors.append(f"❌ {repo_id}: Error - {str(e)}")
119
 
120
+ # Build output
121
  output = [f"## Total Episodes: {total_episodes}\n"]
 
122
 
123
+ # Display v3 datasets grouped by date
124
+ if v3_by_date:
125
+ output.append("### πŸ“… v3.0 Datasets by Date\n")
126
+
127
+ # Sort dates (most recent first)
128
+ sorted_dates = sorted([k for k in v3_by_date.keys() if k != 'unknown'], reverse=True)
129
+ if 'unknown' in v3_by_date:
130
+ sorted_dates.append('unknown')
131
+
132
+ for date_key in sorted_dates:
133
+ datasets = v3_by_date[date_key]
134
+ date_display = datasets[0]['date_display']
135
+ date_total = sum(d['episodes'] for d in datasets)
136
+
137
+ output.append(f"**{date_display}** β€” Total: **{date_total} episodes**")
138
+
139
+ for dataset in sorted(datasets, key=lambda x: x['repo_id']):
140
+ repo_name = dataset['repo_id'].split('/')[-1] # Just the dataset name
141
+ episodes = dataset['episodes']
142
+
143
+ # Add metadata if available
144
+ info_meta = dataset['stats'].get('info_metadata', {})
145
+ extra_info = []
146
+ if info_meta.get('total_frames'):
147
+ extra_info.append(f"{info_meta['total_frames']:,} frames")
148
+ if info_meta.get('robot_type'):
149
+ extra_info.append(f"{info_meta['robot_type']}")
150
+
151
+ extra_str = f" ({', '.join(extra_info)})" if extra_info else ""
152
+ output.append(f" β€’ `{repo_name}`: {episodes} episodes{extra_str}")
153
+
154
+ output.append("") # Empty line between dates
155
+
156
+ # Display non-v3 datasets
157
+ if non_v3_results:
158
+ output.append("### πŸ“¦ v2.1 Datasets\n")
159
+ output.extend(non_v3_results)
160
+ output.append("")
161
+
162
+ # Display errors at the end
163
+ if errors:
164
+ output.append("### ⚠️ Errors\n")
165
+ output.extend(errors)
166
+
167
+ return "\n".join(output)
168
 
169
 
170
  # Build the Gradio interface
 
175
  _user_orgs = get_user_organizations()
176
  _initial_datasets = search_datasets_fn(_user_orgs[0]) if _user_orgs else []
177
 
178
+ # State to track current dataset choices
179
+ current_choices = gr.State(_initial_datasets)
180
+
181
  with gr.Row():
182
  org_dropdown = gr.Dropdown(
183
  label="Select Organization",
 
207
  # Event handlers
208
  def load_datasets_from_org(org_name):
209
  results = search_datasets_fn(org_name)
210
+ return [
211
+ gr.update(choices=results, value=[]), # Update checkboxes
212
+ results # Update state
213
+ ]
 
 
 
 
214
 
215
  # Load datasets on button click or dropdown change
216
  load_btn.click(
217
  load_datasets_from_org,
218
  inputs=org_dropdown,
219
+ outputs=[dataset_checkboxes, current_choices],
220
  )
221
 
222
  org_dropdown.change(
223
  load_datasets_from_org,
224
  inputs=org_dropdown,
225
+ outputs=[dataset_checkboxes, current_choices],
226
  )
227
 
228
  # Select/Deselect all buttons
229
  select_all_btn.click(
230
  lambda choices: gr.update(value=choices),
231
+ inputs=current_choices,
232
  outputs=dataset_checkboxes,
233
  )
234