Spaces:
Running
Running
Update UI
Browse files
app.py
CHANGED
|
@@ -58,29 +58,113 @@ def fetch_stats_for_selected(selected_datasets: List[str], progress=gr.Progress(
|
|
| 58 |
return "Please select at least one dataset"
|
| 59 |
|
| 60 |
token = os.environ.get("HF_TOKEN")
|
| 61 |
-
results = []
|
| 62 |
total_episodes = 0
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
for i, repo_id in enumerate(selected_datasets):
|
| 65 |
try:
|
| 66 |
progress((i + 1) / len(selected_datasets), desc=f"Processing {repo_id}...")
|
| 67 |
stats = get_dataset_stats(repo_id, hf_token=token)
|
| 68 |
|
| 69 |
if stats.get("error"):
|
| 70 |
-
|
| 71 |
else:
|
| 72 |
episodes = stats['total_episodes']
|
| 73 |
total_episodes += episodes
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
except Exception as e:
|
| 77 |
-
|
| 78 |
|
| 79 |
-
# Build output
|
| 80 |
output = [f"## Total Episodes: {total_episodes}\n"]
|
| 81 |
-
output.extend(results)
|
| 82 |
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
|
| 86 |
# Build the Gradio interface
|
|
@@ -91,6 +175,9 @@ with gr.Blocks(title="LeRobot Dataset Stats Viewer") as demo:
|
|
| 91 |
_user_orgs = get_user_organizations()
|
| 92 |
_initial_datasets = search_datasets_fn(_user_orgs[0]) if _user_orgs else []
|
| 93 |
|
|
|
|
|
|
|
|
|
|
| 94 |
with gr.Row():
|
| 95 |
org_dropdown = gr.Dropdown(
|
| 96 |
label="Select Organization",
|
|
@@ -120,32 +207,28 @@ with gr.Blocks(title="LeRobot Dataset Stats Viewer") as demo:
|
|
| 120 |
# Event handlers
|
| 121 |
def load_datasets_from_org(org_name):
|
| 122 |
results = search_datasets_fn(org_name)
|
| 123 |
-
return
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
return current_choices
|
| 128 |
-
|
| 129 |
-
def deselect_all_datasets():
|
| 130 |
-
return []
|
| 131 |
|
| 132 |
# Load datasets on button click or dropdown change
|
| 133 |
load_btn.click(
|
| 134 |
load_datasets_from_org,
|
| 135 |
inputs=org_dropdown,
|
| 136 |
-
outputs=dataset_checkboxes,
|
| 137 |
)
|
| 138 |
|
| 139 |
org_dropdown.change(
|
| 140 |
load_datasets_from_org,
|
| 141 |
inputs=org_dropdown,
|
| 142 |
-
outputs=dataset_checkboxes,
|
| 143 |
)
|
| 144 |
|
| 145 |
# Select/Deselect all buttons
|
| 146 |
select_all_btn.click(
|
| 147 |
lambda choices: gr.update(value=choices),
|
| 148 |
-
inputs=
|
| 149 |
outputs=dataset_checkboxes,
|
| 150 |
)
|
| 151 |
|
|
|
|
| 58 |
return "Please select at least one dataset"
|
| 59 |
|
| 60 |
token = os.environ.get("HF_TOKEN")
|
|
|
|
| 61 |
total_episodes = 0
|
| 62 |
|
| 63 |
+
# Separate v3 and non-v3 datasets, organize by date
|
| 64 |
+
from collections import defaultdict
|
| 65 |
+
import re
|
| 66 |
+
from datetime import datetime
|
| 67 |
+
|
| 68 |
+
v3_by_date = defaultdict(list) # date -> list of (repo_id, episodes, stats)
|
| 69 |
+
non_v3_results = []
|
| 70 |
+
errors = []
|
| 71 |
+
|
| 72 |
for i, repo_id in enumerate(selected_datasets):
|
| 73 |
try:
|
| 74 |
progress((i + 1) / len(selected_datasets), desc=f"Processing {repo_id}...")
|
| 75 |
stats = get_dataset_stats(repo_id, hf_token=token)
|
| 76 |
|
| 77 |
if stats.get("error"):
|
| 78 |
+
errors.append(f"β {repo_id}: Error - {stats['error']}")
|
| 79 |
else:
|
| 80 |
episodes = stats['total_episodes']
|
| 81 |
total_episodes += episodes
|
| 82 |
+
|
| 83 |
+
# Check if v3 format
|
| 84 |
+
is_v3 = stats.get("format_version") == "v3.0"
|
| 85 |
+
|
| 86 |
+
if is_v3:
|
| 87 |
+
# Try to extract date from repo_id (format: org/DDMMYYYY-name)
|
| 88 |
+
date_match = re.search(r'/(\d{8})', repo_id)
|
| 89 |
+
if date_match:
|
| 90 |
+
date_str = date_match.group(1)
|
| 91 |
+
try:
|
| 92 |
+
# Parse as DDMMYYYY
|
| 93 |
+
date_obj = datetime.strptime(date_str, '%d%m%Y')
|
| 94 |
+
date_key = date_obj.strftime('%Y-%m-%d') # ISO format for sorting
|
| 95 |
+
date_display = date_obj.strftime('%B %d, %Y') # Nice display format
|
| 96 |
+
except ValueError:
|
| 97 |
+
date_key = date_str
|
| 98 |
+
date_display = date_str
|
| 99 |
+
|
| 100 |
+
v3_by_date[date_key].append({
|
| 101 |
+
'repo_id': repo_id,
|
| 102 |
+
'episodes': episodes,
|
| 103 |
+
'date_display': date_display,
|
| 104 |
+
'stats': stats
|
| 105 |
+
})
|
| 106 |
+
else:
|
| 107 |
+
# v3 but no date in name
|
| 108 |
+
v3_by_date['unknown'].append({
|
| 109 |
+
'repo_id': repo_id,
|
| 110 |
+
'episodes': episodes,
|
| 111 |
+
'date_display': 'Unknown Date',
|
| 112 |
+
'stats': stats
|
| 113 |
+
})
|
| 114 |
+
else:
|
| 115 |
+
non_v3_results.append(f"{repo_id}: **{episodes}** episodes")
|
| 116 |
|
| 117 |
except Exception as e:
|
| 118 |
+
errors.append(f"β {repo_id}: Error - {str(e)}")
|
| 119 |
|
| 120 |
+
# Build output
|
| 121 |
output = [f"## Total Episodes: {total_episodes}\n"]
|
|
|
|
| 122 |
|
| 123 |
+
# Display v3 datasets grouped by date
|
| 124 |
+
if v3_by_date:
|
| 125 |
+
output.append("### π
v3.0 Datasets by Date\n")
|
| 126 |
+
|
| 127 |
+
# Sort dates (most recent first)
|
| 128 |
+
sorted_dates = sorted([k for k in v3_by_date.keys() if k != 'unknown'], reverse=True)
|
| 129 |
+
if 'unknown' in v3_by_date:
|
| 130 |
+
sorted_dates.append('unknown')
|
| 131 |
+
|
| 132 |
+
for date_key in sorted_dates:
|
| 133 |
+
datasets = v3_by_date[date_key]
|
| 134 |
+
date_display = datasets[0]['date_display']
|
| 135 |
+
date_total = sum(d['episodes'] for d in datasets)
|
| 136 |
+
|
| 137 |
+
output.append(f"**{date_display}** β Total: **{date_total} episodes**")
|
| 138 |
+
|
| 139 |
+
for dataset in sorted(datasets, key=lambda x: x['repo_id']):
|
| 140 |
+
repo_name = dataset['repo_id'].split('/')[-1] # Just the dataset name
|
| 141 |
+
episodes = dataset['episodes']
|
| 142 |
+
|
| 143 |
+
# Add metadata if available
|
| 144 |
+
info_meta = dataset['stats'].get('info_metadata', {})
|
| 145 |
+
extra_info = []
|
| 146 |
+
if info_meta.get('total_frames'):
|
| 147 |
+
extra_info.append(f"{info_meta['total_frames']:,} frames")
|
| 148 |
+
if info_meta.get('robot_type'):
|
| 149 |
+
extra_info.append(f"{info_meta['robot_type']}")
|
| 150 |
+
|
| 151 |
+
extra_str = f" ({', '.join(extra_info)})" if extra_info else ""
|
| 152 |
+
output.append(f" β’ `{repo_name}`: {episodes} episodes{extra_str}")
|
| 153 |
+
|
| 154 |
+
output.append("") # Empty line between dates
|
| 155 |
+
|
| 156 |
+
# Display non-v3 datasets
|
| 157 |
+
if non_v3_results:
|
| 158 |
+
output.append("### π¦ v2.1 Datasets\n")
|
| 159 |
+
output.extend(non_v3_results)
|
| 160 |
+
output.append("")
|
| 161 |
+
|
| 162 |
+
# Display errors at the end
|
| 163 |
+
if errors:
|
| 164 |
+
output.append("### β οΈ Errors\n")
|
| 165 |
+
output.extend(errors)
|
| 166 |
+
|
| 167 |
+
return "\n".join(output)
|
| 168 |
|
| 169 |
|
| 170 |
# Build the Gradio interface
|
|
|
|
| 175 |
_user_orgs = get_user_organizations()
|
| 176 |
_initial_datasets = search_datasets_fn(_user_orgs[0]) if _user_orgs else []
|
| 177 |
|
| 178 |
+
# State to track current dataset choices
|
| 179 |
+
current_choices = gr.State(_initial_datasets)
|
| 180 |
+
|
| 181 |
with gr.Row():
|
| 182 |
org_dropdown = gr.Dropdown(
|
| 183 |
label="Select Organization",
|
|
|
|
| 207 |
# Event handlers
|
| 208 |
def load_datasets_from_org(org_name):
|
| 209 |
results = search_datasets_fn(org_name)
|
| 210 |
+
return [
|
| 211 |
+
gr.update(choices=results, value=[]), # Update checkboxes
|
| 212 |
+
results # Update state
|
| 213 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
# Load datasets on button click or dropdown change
|
| 216 |
load_btn.click(
|
| 217 |
load_datasets_from_org,
|
| 218 |
inputs=org_dropdown,
|
| 219 |
+
outputs=[dataset_checkboxes, current_choices],
|
| 220 |
)
|
| 221 |
|
| 222 |
org_dropdown.change(
|
| 223 |
load_datasets_from_org,
|
| 224 |
inputs=org_dropdown,
|
| 225 |
+
outputs=[dataset_checkboxes, current_choices],
|
| 226 |
)
|
| 227 |
|
| 228 |
# Select/Deselect all buttons
|
| 229 |
select_all_btn.click(
|
| 230 |
lambda choices: gr.update(value=choices),
|
| 231 |
+
inputs=current_choices,
|
| 232 |
outputs=dataset_checkboxes,
|
| 233 |
)
|
| 234 |
|