CrispStrobe commited on
Commit
aea9373
·
1 Parent(s): 2d8f720

feat: implement canonical model IDs for improved UI grouping

Browse files
Files changed (3) hide show
  1. data/providers.json +0 -0
  2. scripts/fetch-providers.js +23 -2
  3. src/App.tsx +6 -5
data/providers.json CHANGED
The diff for this file is too large to render. See raw diff
 
scripts/fetch-providers.js CHANGED
@@ -87,6 +87,22 @@ function updateProviderModels(providers, providerName, models) {
87
  const normName = (s) =>
88
  s.toLowerCase().replace(/[-_.:]/g, ' ').replace(/[^a-z0-9 ]/g, '').replace(/\s+/g, ' ').trim();
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  // Estimate parameters from config.json (vLLM style fallback)
91
  function estimateParams(config, hfId) {
92
  if (!config) return null;
@@ -305,6 +321,7 @@ async function propagateExtraData(data) {
305
 
306
  // 1. Initial manual and fuzzy mapping
307
  data.providers.forEach(p => p.models.forEach(model => {
 
308
  const n = normName(model.name);
309
  for (const [key, val] of Object.entries(MANUAL_HF_ID_MAP)) {
310
  const nk = normName(key);
@@ -367,9 +384,11 @@ async function propagateExtraData(data) {
367
  hf_id: m.hf_id,
368
  ollama_id: m.ollama_id,
369
  hf_private: m.hf_private,
370
- capabilities: m.capabilities
 
371
  };
372
  if (m.hf_id) technicalPool.set('id:' + m.hf_id.toLowerCase(), meta);
 
373
  technicalPool.set('name:' + baseName, meta);
374
  }
375
  }));
@@ -378,12 +397,14 @@ async function propagateExtraData(data) {
378
  const baseName = m.name.split('/').pop().replace(/:free$/, '').toLowerCase();
379
  const metaByName = technicalPool.get('name:' + baseName);
380
  const metaById = m.hf_id ? technicalPool.get('id:' + m.hf_id.toLowerCase()) : null;
381
- const best = metaById || metaByName;
 
382
  if (best) {
383
  m.size_b = m.size_b || best.size_b;
384
  m.size_source = m.size_source || best.size_source;
385
  m.hf_id = m.hf_id || best.hf_id;
386
  m.ollama_id = m.ollama_id || best.ollama_id;
 
387
  if (best.capabilities && (!m.capabilities || m.capabilities.length === 0)) {
388
  m.capabilities = best.capabilities;
389
  }
 
87
  const normName = (s) =>
88
  s.toLowerCase().replace(/[-_.:]/g, ' ').replace(/[^a-z0-9 ]/g, '').replace(/\s+/g, ' ').trim();
89
 
90
+ // Generates a stable ID for grouping models that lack an HF ID.
91
+ // Strips dates (2507, 2411), provider prefixes, and common suffixes.
92
+ function getCanonicalId(name) {
93
+ let id = name.toLowerCase()
94
+ .split('/').pop() // Strip provider prefixes (e.g. "openai/")
95
+ .replace(/[:@].*$/, '') // Strip tags/versions (e.g. ":free", "@latest")
96
+ .replace(/[-_.]/g, ' ') // Standardize separators
97
+ .replace(/\b(instruct|it|chat|thinking|latest|preview|vision|experimental|exp|v\d+(\.\d+)*)\b/g, '') // Strip common suffixes
98
+ .replace(/\b\d{4}\b/g, '') // Strip 4-digit dates (e.g. 2507, 2411)
99
+ .replace(/\s+/g, '-') // Collapse to kebab-case
100
+ .trim()
101
+ .replace(/^-+|-+$/g, ''); // Trim leading/trailing dashes
102
+
103
+ return id || name.toLowerCase();
104
+ }
105
+
106
  // Estimate parameters from config.json (vLLM style fallback)
107
  function estimateParams(config, hfId) {
108
  if (!config) return null;
 
321
 
322
  // 1. Initial manual and fuzzy mapping
323
  data.providers.forEach(p => p.models.forEach(model => {
324
+ model.canonical_id = getCanonicalId(model.name);
325
  const n = normName(model.name);
326
  for (const [key, val] of Object.entries(MANUAL_HF_ID_MAP)) {
327
  const nk = normName(key);
 
384
  hf_id: m.hf_id,
385
  ollama_id: m.ollama_id,
386
  hf_private: m.hf_private,
387
+ capabilities: m.capabilities,
388
+ canonical_id: m.canonical_id
389
  };
390
  if (m.hf_id) technicalPool.set('id:' + m.hf_id.toLowerCase(), meta);
391
+ if (m.canonical_id) technicalPool.set('canon:' + m.canonical_id.toLowerCase(), meta);
392
  technicalPool.set('name:' + baseName, meta);
393
  }
394
  }));
 
397
  const baseName = m.name.split('/').pop().replace(/:free$/, '').toLowerCase();
398
  const metaByName = technicalPool.get('name:' + baseName);
399
  const metaById = m.hf_id ? technicalPool.get('id:' + m.hf_id.toLowerCase()) : null;
400
+ const metaByCanon = m.canonical_id ? technicalPool.get('canon:' + m.canonical_id.toLowerCase()) : null;
401
+ const best = metaById || metaByCanon || metaByName;
402
  if (best) {
403
  m.size_b = m.size_b || best.size_b;
404
  m.size_source = m.size_source || best.size_source;
405
  m.hf_id = m.hf_id || best.hf_id;
406
  m.ollama_id = m.ollama_id || best.ollama_id;
407
+ m.canonical_id = m.canonical_id || best.canonical_id;
408
  if (best.capabilities && (!m.capabilities || m.capabilities.length === 0)) {
409
  m.capabilities = best.capabilities;
410
  }
src/App.tsx CHANGED
@@ -16,9 +16,10 @@ interface Model {
16
  price_per_1m_tokens_30d?: number
17
  currency: string
18
  capabilities?: string[]
19
- display_name?: string
20
- hf_id?: string
21
- ollama_id?: string
 
22
  hf_private?: boolean;
23
  size_source?: 'hf-total' | 'hf-config-estimate' | 'hf-card' | 'ollama' | 'manual' | 'benchmark' | 'openrouter';
24
  provider?: Provider;
@@ -399,8 +400,8 @@ function App() {
399
 
400
  const groups: Record<string, typeof sortedModels> = {};
401
  sortedModels.forEach(m => {
402
- // Prioritize hf_id for grouping key
403
- const key = (m.hf_id || m.name || '').toLowerCase();
404
  if (!groups[key]) groups[key] = [];
405
  groups[key].push(m);
406
  });
 
16
  price_per_1m_tokens_30d?: number
17
  currency: string
18
  capabilities?: string[]
19
+ display_name?: string;
20
+ hf_id?: string;
21
+ canonical_id?: string;
22
+ ollama_id?: string;
23
  hf_private?: boolean;
24
  size_source?: 'hf-total' | 'hf-config-estimate' | 'hf-card' | 'ollama' | 'manual' | 'benchmark' | 'openrouter';
25
  provider?: Provider;
 
400
 
401
  const groups: Record<string, typeof sortedModels> = {};
402
  sortedModels.forEach(m => {
403
+ // Grouping priority: HF ID > Canonical ID > Name
404
+ const key = (m.hf_id || m.canonical_id || m.name || '').toLowerCase();
405
  if (!groups[key]) groups[key] = [];
406
  groups[key].push(m);
407
  });