Spaces:
Running
Running
| ; | |
| const fs = require('fs'); | |
| const path = require('path'); | |
| const { fetchRobust } = require('./fetch-utils'); | |
| const PROVIDERS_FILE = path.join(__dirname, '..', 'data', 'providers.json'); | |
| async function checkHfId(hfId) { | |
| if (!hfId) return { valid: true, status: 'N/A' }; | |
| const url = `https://huggingface.co/${hfId}`; | |
| try { | |
| const res = await fetchRobust(url, { method: 'HEAD', retries: 1 }); | |
| if (res.status === 200 || res.status === 302) { | |
| return { valid: true, status: res.status }; | |
| } | |
| return { valid: false, status: res.status }; | |
| } catch (e) { | |
| if (e.message.includes('404')) return { valid: false, status: 404 }; | |
| return { valid: true, status: 'Error (Assume valid)' }; | |
| } | |
| } | |
| async function main() { | |
| const force = process.argv.includes('--force'); | |
| console.log('Starting Hugging Face Repository Validation...'); | |
| if (force) console.log(' [!] Force mode enabled: checking all IDs regardless of cache.\n'); | |
| else console.log(' [i] Using cache: only checking IDs not validated in the last 30 days.\n'); | |
| const data = JSON.parse(fs.readFileSync(PROVIDERS_FILE, 'utf8')); | |
| const hfIdToModels = new Map(); | |
| const hfIdMeta = new Map(); // Store metadata (validated_at, status) | |
| data.providers.forEach(p => { | |
| p.models.forEach(m => { | |
| if (m.hf_id) { | |
| if (!hfIdToModels.has(m.hf_id)) hfIdToModels.set(m.hf_id, []); | |
| hfIdToModels.get(m.hf_id).push(`${p.name}: ${m.name}`); | |
| // Cache metadata if present | |
| if (m.hf_validated_at && m.hf_status === 200) { | |
| const existing = hfIdMeta.get(m.hf_id); | |
| if (!existing || new Date(m.hf_validated_at) > new Date(existing.at)) { | |
| hfIdMeta.set(m.hf_id, { at: m.hf_validated_at, status: m.hf_status }); | |
| } | |
| } | |
| } | |
| }); | |
| }); | |
| const ids = Array.from(hfIdToModels.keys()); | |
| console.log(`Found ${ids.length} unique HF IDs to validate.\n`); | |
| const invalidIds = new Set(); | |
| const now = new Date(); | |
| const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000; | |
| const validationResults = new Map(); // id -> { status, at } | |
| for (let i = 0; i < ids.length; i++) { | |
| const id = ids[i]; | |
| const progress = `[${i + 1}/${ids.length}]`.padEnd(10); | |
| const cached = hfIdMeta.get(id); | |
| const isRecent = cached && (now - new Date(cached.at) < THIRTY_DAYS_MS); | |
| if (isRecent && !force) { | |
| console.log(`${progress} ≈ CACHED (${cached.status}) ${id} (last checked ${new Date(cached.at).toLocaleDateString()})`); | |
| validationResults.set(id, { status: cached.status, at: cached.at }); | |
| continue; | |
| } | |
| const check = await checkHfId(id); | |
| validationResults.set(id, { status: typeof check.status === 'number' ? check.status : 200, at: now.toISOString() }); | |
| if (check.valid) { | |
| console.log(`${progress} ✓ VALID (${check.status}) ${id}`); | |
| } else { | |
| console.log(`${progress} ✗ INVALID (${check.status}) ${id}`); | |
| console.log(` Used by: ${hfIdToModels.get(id).join(', ')}`); | |
| invalidIds.add(id); | |
| } | |
| // Small delay to prevent rate limiting | |
| await new Promise(r => setTimeout(r, 50)); | |
| } | |
| console.log('\nUpdating providers.json with validation results...'); | |
| let updatedCount = 0; | |
| let removalCount = 0; | |
| data.providers.forEach(p => { | |
| p.models.forEach(m => { | |
| if (m.hf_id) { | |
| const res = validationResults.get(m.hf_id); | |
| if (invalidIds.has(m.hf_id)) { | |
| delete m.hf_id; | |
| delete m.hf_validated_at; | |
| delete m.hf_status; | |
| removalCount++; | |
| } else if (res) { | |
| m.hf_validated_at = res.at; | |
| m.hf_status = res.status; | |
| updatedCount++; | |
| } | |
| } | |
| }); | |
| }); | |
| fs.writeFileSync(PROVIDERS_FILE, JSON.stringify(data, null, 2)); | |
| console.log(`Done. Updated ${updatedCount} models, removed ${removalCount} invalid IDs.`); | |
| } | |
| main().catch(err => { | |
| console.error('\nFatal error during validation:', err); | |
| process.exit(1); | |
| }); | |