// Best-effort icon resolver for radio stations. // Order: // 1. Radio-Browser favicon by exact-ish name (only if station.source !== 'radiobrowser', else // we already have it). // 2. Scrape , , // from the homepage HTML. // 3. HEAD-probe /favicon.ico at the homepage origin. // Returns the best absolute URL found, or null. // Browser-like UA: many station homepages (Cloudflare, Wikimedia) block opaque bots. const UA = process.env.IMAGE_FETCH_UA || 'Mozilla/5.0 (compatible; OnlineRadioExplorer/0.1; +https://github.com/marcoheine/onlineRadioExplorer)'; const FETCH_TIMEOUT_MS = 8000; const MAX_HTML_BYTES = 256 * 1024; const RB_BASE = 'https://de1.api.radio-browser.info'; function withTimeout(ms) { const ctl = new AbortController(); const t = setTimeout(() => ctl.abort(), ms); return { signal: ctl.signal, done: () => clearTimeout(t) }; } async function fetchText(url) { const t = withTimeout(FETCH_TIMEOUT_MS); try { const res = await fetch(url, { headers: { 'User-Agent': UA, 'Accept': 'text/html,application/xhtml+xml' }, redirect: 'follow', signal: t.signal }); if (!res.ok) return null; const reader = res.body?.getReader(); if (!reader) return null; let received = 0; const chunks = []; while (true) { const { done, value } = await reader.read(); if (done) break; received += value.length; chunks.push(value); if (received >= MAX_HTML_BYTES) { try { await reader.cancel(); } catch { } break; } } return Buffer.concat(chunks.map((c) => Buffer.from(c))).toString('utf8'); } catch { return null; } finally { t.done(); } } async function head(url) { // We can't trust real HEAD: many CDNs/SPAs return 200 for *every* path with // HTML. So we issue a small ranged GET and check the response is actually // an image (content-type AND/OR magic bytes). const t = withTimeout(FETCH_TIMEOUT_MS); try { const res = await fetch(url, { method: 'GET', headers: { 'User-Agent': UA, 'Accept': 'image/*', 'Range': 'bytes=0-1023' }, signal: t.signal, redirect: 'follow' }); if (!res.ok && res.status !== 206) return false; const ct = (res.headers.get('content-type') || '').toLowerCase().split(';')[0].trim(); if (ct.startsWith('text/') || ct.includes('html')) return false; // Sniff the first chunk to make sure it's not HTML masquerading as image/*. const reader = res.body?.getReader(); if (!reader) return ct.startsWith('image/'); const { value } = await reader.read(); try { await reader.cancel(); } catch { } const buf = value ? Buffer.from(value) : Buffer.alloc(0); const head = buf.slice(0, 256).toString('utf8').trimStart().toLowerCase(); if (head.startsWith(' accept. return isImageMagic(buf); } catch { return false; } finally { t.done(); } } function isImageMagic(buf) { if (buf.length < 4) return false; if (buf[0] === 0x89 && buf[1] === 0x50 && buf[2] === 0x4E && buf[3] === 0x47) return true; // PNG if (buf[0] === 0xFF && buf[1] === 0xD8 && buf[2] === 0xFF) return true; // JPEG if (buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46) return true; // GIF if (buf.length >= 12 && buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x57 && buf[9] === 0x45 && buf[10] === 0x42 && buf[11] === 0x50) return true; // WEBP if (buf[0] === 0x00 && buf[1] === 0x00 && buf[2] === 0x01 && buf[3] === 0x00) return true; // ICO const head = buf.slice(0, 256).toString('utf8').trimStart().toLowerCase(); if (head.startsWith(' const linkRe = /]*?)\/?>/gi; let m; while ((m = linkRe.exec(html))) { const attrs = m[1]; const rel = (/\brel\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1] || ''; if (!/icon/i.test(rel)) continue; const href = (/\bhref\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1]; if (!href) continue; const sizes = (/\bsizes\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1] || ''; const sz = parseInt((/(\d+)x\d+/.exec(sizes) || [])[1] || '0', 10); const apple = /apple-touch-icon/i.test(rel) ? 64 : 0; // bias: apple-touch-icons usually larger PNGs const u = abs(baseUrl, href); if (u) out.push({ href: u, score: sz + apple }); } // const metaRe = /]*?)\/?>/gi; while ((m = metaRe.exec(html))) { const attrs = m[1]; const prop = (/\b(?:property|name)\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1] || ''; if (!/^og:image|^twitter:image/i.test(prop)) continue; const content = (/\bcontent\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1]; const u = abs(baseUrl, content); if (u) out.push({ href: u, score: 200 }); // og:image preferred } out.sort((a, b) => b.score - a.score); // de-dupe preserving order const seen = new Set(); return out.filter((c) => (seen.has(c.href) ? false : (seen.add(c.href), true))); } async function fromRadioBrowserByName(name) { if (!name) return null; const q = String(name).trim(); if (!q) return null; try { const url = `${RB_BASE}/json/stations/search?name=${encodeURIComponent(q)}&limit=3&hidebroken=true&order=clickcount&reverse=true`; const t = withTimeout(FETCH_TIMEOUT_MS); const res = await fetch(url, { headers: { 'User-Agent': UA }, signal: t.signal }); t.done(); if (!res.ok) return null; const list = await res.json(); const target = q.toLowerCase(); const exact = list.find((s) => (s.name || '').toLowerCase().trim() === target); const pick = exact || list[0]; if (pick?.favicon) return pick.favicon; } catch { } return null; } async function fromHomepage(homepage) { if (!homepage) return null; let base; try { base = new URL(homepage); } catch { return null; } const html = await fetchText(base.toString()); if (html) { const cands = parseIconCandidates(html, base.toString()); for (const c of cands) { if (await head(c.href)) return c.href; } } // last resort on this host: /favicon.ico const ico = `${base.origin}/favicon.ico`; if (await head(ico)) return ico; return null; } // Final fallback: Google's public favicon service. Returns a real PNG (the // browser-side favicon Google has on file) for virtually any homepage, so // even SPA/JS-only sites end up with *some* artwork. function fromGoogleFavicon(homepage, size = 128) { if (!homepage) return null; let host; try { host = new URL(homepage).hostname; } catch { return null; } if (!host) return null; return `https://www.google.com/s2/favicons?sz=${size}&domain=${encodeURIComponent(host)}`; } /** * Try to find an icon URL for a station. * @param {{ name?: string, homepage?: string|null, source?: string }} station * @returns {Promise} */ export async function scrapeIcon(station) { if (!station) return null; // For non-RB stations, RB often still has an entry → cheap win. if (station.source !== 'radiobrowser') { const rb = await fromRadioBrowserByName(station.name); if (rb && await head(rb)) return rb; } const fromPage = await fromHomepage(station.homepage); if (fromPage) return fromPage; // Last-ditch: ask Google's favicon service. It almost always returns a // 128×128 PNG, even for SPA-only homepages where direct scraping fails. const g = fromGoogleFavicon(station.homepage, 128); if (g && await head(g)) return g; return null; }