// Best-effort icon resolver for radio stations. // Order: // 1. Radio-Browser favicon by exact-ish name (only if station.source !== 'radiobrowser', else // we already have it). // 2. Scrape , , // from the homepage HTML. // 3. HEAD-probe /favicon.ico at the homepage origin. // Returns the best absolute URL found, or null. const UA = 'OnlineRadioExplorer/0.1 (+icon-scraper)'; const FETCH_TIMEOUT_MS = 8000; const MAX_HTML_BYTES = 256 * 1024; const RB_BASE = 'https://de1.api.radio-browser.info'; function withTimeout(ms) { const ctl = new AbortController(); const t = setTimeout(() => ctl.abort(), ms); return { signal: ctl.signal, done: () => clearTimeout(t) }; } async function fetchText(url) { const t = withTimeout(FETCH_TIMEOUT_MS); try { const res = await fetch(url, { headers: { 'User-Agent': UA, 'Accept': 'text/html,application/xhtml+xml' }, redirect: 'follow', signal: t.signal }); if (!res.ok) return null; const reader = res.body?.getReader(); if (!reader) return null; let received = 0; const chunks = []; while (true) { const { done, value } = await reader.read(); if (done) break; received += value.length; chunks.push(value); if (received >= MAX_HTML_BYTES) { try { await reader.cancel(); } catch { } break; } } return Buffer.concat(chunks.map((c) => Buffer.from(c))).toString('utf8'); } catch { return null; } finally { t.done(); } } async function head(url) { const t = withTimeout(FETCH_TIMEOUT_MS); try { const res = await fetch(url, { method: 'HEAD', headers: { 'User-Agent': UA }, signal: t.signal, redirect: 'follow' }); return res.ok; } catch { return false; } finally { t.done(); } } function abs(base, href) { if (!href) return null; try { return new URL(href, base).toString(); } catch { return null; } } // Extract candidate icon URLs from raw HTML. Returns array of { href, size } sorted best-first. function parseIconCandidates(html, baseUrl) { const out = []; // const linkRe = /]*?)\/?>/gi; let m; while ((m = linkRe.exec(html))) { const attrs = m[1]; const rel = (/\brel\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1] || ''; if (!/icon/i.test(rel)) continue; const href = (/\bhref\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1]; if (!href) continue; const sizes = (/\bsizes\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1] || ''; const sz = parseInt((/(\d+)x\d+/.exec(sizes) || [])[1] || '0', 10); const apple = /apple-touch-icon/i.test(rel) ? 64 : 0; // bias: apple-touch-icons usually larger PNGs const u = abs(baseUrl, href); if (u) out.push({ href: u, score: sz + apple }); } // const metaRe = /]*?)\/?>/gi; while ((m = metaRe.exec(html))) { const attrs = m[1]; const prop = (/\b(?:property|name)\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1] || ''; if (!/^og:image|^twitter:image/i.test(prop)) continue; const content = (/\bcontent\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1]; const u = abs(baseUrl, content); if (u) out.push({ href: u, score: 200 }); // og:image preferred } out.sort((a, b) => b.score - a.score); // de-dupe preserving order const seen = new Set(); return out.filter((c) => (seen.has(c.href) ? false : (seen.add(c.href), true))); } async function fromRadioBrowserByName(name) { if (!name) return null; try { const url = `${RB_BASE}/json/stations/search?name=${encodeURIComponent(name)}&limit=3&hidebroken=true&order=clickcount&reverse=true`; const t = withTimeout(FETCH_TIMEOUT_MS); const res = await fetch(url, { headers: { 'User-Agent': UA }, signal: t.signal }); t.done(); if (!res.ok) return null; const list = await res.json(); const target = name.toLowerCase().trim(); const exact = list.find((s) => (s.name || '').toLowerCase().trim() === target); const pick = exact || list[0]; if (pick?.favicon) return pick.favicon; } catch { } return null; } async function fromHomepage(homepage) { if (!homepage) return null; let base; try { base = new URL(homepage); } catch { return null; } const html = await fetchText(base.toString()); if (html) { const cands = parseIconCandidates(html, base.toString()); for (const c of cands) { if (await head(c.href)) return c.href; } } // last resort: /favicon.ico const ico = `${base.origin}/favicon.ico`; if (await head(ico)) return ico; return null; } /** * Try to find an icon URL for a station. * @param {{ name?: string, homepage?: string|null, source?: string }} station * @returns {Promise} */ export async function scrapeIcon(station) { if (!station) return null; // For non-RB stations, RB often still has an entry → cheap win. if (station.source !== 'radiobrowser') { const rb = await fromRadioBrowserByName(station.name); if (rb) return rb; } return fromHomepage(station.homepage); }