// Best-effort icon resolver for radio stations.
// Order:
// 1. Radio-Browser favicon by exact-ish name (only if station.source !== 'radiobrowser', else
// we already have it).
// 2. Scrape , ,
// from the homepage HTML.
// 3. HEAD-probe /favicon.ico at the homepage origin.
// Returns the best absolute URL found, or null.
// Browser-like UA: many station homepages (Cloudflare, Wikimedia) block opaque bots.
const UA = process.env.IMAGE_FETCH_UA
|| 'Mozilla/5.0 (compatible; OnlineRadioExplorer/0.1; +https://github.com/marcoheine/onlineRadioExplorer)';
const FETCH_TIMEOUT_MS = 8000;
const MAX_HTML_BYTES = 256 * 1024;
const RB_BASE = 'https://de1.api.radio-browser.info';
function withTimeout(ms) {
const ctl = new AbortController();
const t = setTimeout(() => ctl.abort(), ms);
return { signal: ctl.signal, done: () => clearTimeout(t) };
}
async function fetchText(url) {
const t = withTimeout(FETCH_TIMEOUT_MS);
try {
const res = await fetch(url, {
headers: { 'User-Agent': UA, 'Accept': 'text/html,application/xhtml+xml' },
redirect: 'follow',
signal: t.signal
});
if (!res.ok) return null;
const reader = res.body?.getReader();
if (!reader) return null;
let received = 0;
const chunks = [];
while (true) {
const { done, value } = await reader.read();
if (done) break;
received += value.length;
chunks.push(value);
if (received >= MAX_HTML_BYTES) { try { await reader.cancel(); } catch { } break; }
}
return Buffer.concat(chunks.map((c) => Buffer.from(c))).toString('utf8');
} catch {
return null;
} finally { t.done(); }
}
async function head(url) {
// We can't trust real HEAD: many CDNs/SPAs return 200 for *every* path with
// HTML. So we issue a small ranged GET and check the response is actually
// an image (content-type AND/OR magic bytes).
const t = withTimeout(FETCH_TIMEOUT_MS);
try {
const res = await fetch(url, {
method: 'GET',
headers: { 'User-Agent': UA, 'Accept': 'image/*', 'Range': 'bytes=0-1023' },
signal: t.signal,
redirect: 'follow'
});
if (!res.ok && res.status !== 206) return false;
const ct = (res.headers.get('content-type') || '').toLowerCase().split(';')[0].trim();
if (ct.startsWith('text/') || ct.includes('html')) return false;
// Sniff the first chunk to make sure it's not HTML masquerading as image/*.
const reader = res.body?.getReader();
if (!reader) return ct.startsWith('image/');
const { value } = await reader.read();
try { await reader.cancel(); } catch { }
const buf = value ? Buffer.from(value) : Buffer.alloc(0);
const head = buf.slice(0, 256).toString('utf8').trimStart().toLowerCase();
if (head.startsWith(' accept.
return isImageMagic(buf);
} catch { return false; } finally { t.done(); }
}
function isImageMagic(buf) {
if (buf.length < 4) return false;
if (buf[0] === 0x89 && buf[1] === 0x50 && buf[2] === 0x4E && buf[3] === 0x47) return true; // PNG
if (buf[0] === 0xFF && buf[1] === 0xD8 && buf[2] === 0xFF) return true; // JPEG
if (buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46) return true; // GIF
if (buf.length >= 12 && buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46
&& buf[8] === 0x57 && buf[9] === 0x45 && buf[10] === 0x42 && buf[11] === 0x50) return true; // WEBP
if (buf[0] === 0x00 && buf[1] === 0x00 && buf[2] === 0x01 && buf[3] === 0x00) return true; // ICO
const head = buf.slice(0, 256).toString('utf8').trimStart().toLowerCase();
if (head.startsWith('
const linkRe = /]*?)\/?>/gi;
let m;
while ((m = linkRe.exec(html))) {
const attrs = m[1];
const rel = (/\brel\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1] || '';
if (!/icon/i.test(rel)) continue;
const href = (/\bhref\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1];
if (!href) continue;
const sizes = (/\bsizes\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1] || '';
const sz = parseInt((/(\d+)x\d+/.exec(sizes) || [])[1] || '0', 10);
const apple = /apple-touch-icon/i.test(rel) ? 64 : 0; // bias: apple-touch-icons usually larger PNGs
const u = abs(baseUrl, href);
if (u) out.push({ href: u, score: sz + apple });
}
//
const metaRe = /]*?)\/?>/gi;
while ((m = metaRe.exec(html))) {
const attrs = m[1];
const prop = (/\b(?:property|name)\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1] || '';
if (!/^og:image|^twitter:image/i.test(prop)) continue;
const content = (/\bcontent\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1];
const u = abs(baseUrl, content);
if (u) out.push({ href: u, score: 200 }); // og:image preferred
}
out.sort((a, b) => b.score - a.score);
// de-dupe preserving order
const seen = new Set();
return out.filter((c) => (seen.has(c.href) ? false : (seen.add(c.href), true)));
}
async function fromRadioBrowserByName(name) {
if (!name) return null;
const q = String(name).trim();
if (!q) return null;
try {
const url = `${RB_BASE}/json/stations/search?name=${encodeURIComponent(q)}&limit=3&hidebroken=true&order=clickcount&reverse=true`;
const t = withTimeout(FETCH_TIMEOUT_MS);
const res = await fetch(url, { headers: { 'User-Agent': UA }, signal: t.signal });
t.done();
if (!res.ok) return null;
const list = await res.json();
const target = q.toLowerCase();
const exact = list.find((s) => (s.name || '').toLowerCase().trim() === target);
const pick = exact || list[0];
if (pick?.favicon) return pick.favicon;
} catch { }
return null;
}
async function fromHomepage(homepage) {
if (!homepage) return null;
let base;
try { base = new URL(homepage); } catch { return null; }
const html = await fetchText(base.toString());
if (html) {
const cands = parseIconCandidates(html, base.toString());
for (const c of cands) {
if (await head(c.href)) return c.href;
}
}
// last resort on this host: /favicon.ico
const ico = `${base.origin}/favicon.ico`;
if (await head(ico)) return ico;
return null;
}
// Final fallback: Google's public favicon service. Returns a real PNG (the
// browser-side favicon Google has on file) for virtually any homepage, so
// even SPA/JS-only sites end up with *some* artwork.
function fromGoogleFavicon(homepage, size = 128) {
if (!homepage) return null;
let host;
try { host = new URL(homepage).hostname; } catch { return null; }
if (!host) return null;
return `https://www.google.com/s2/favicons?sz=${size}&domain=${encodeURIComponent(host)}`;
}
/**
* Try to find an icon URL for a station.
* @param {{ name?: string, homepage?: string|null, source?: string }} station
* @returns {Promise}
*/
export async function scrapeIcon(station) {
if (!station) return null;
// For non-RB stations, RB often still has an entry → cheap win.
if (station.source !== 'radiobrowser') {
const rb = await fromRadioBrowserByName(station.name);
if (rb && await head(rb)) return rb;
}
const fromPage = await fromHomepage(station.homepage);
if (fromPage) return fromPage;
// Last-ditch: ask Google's favicon service. It almost always returns a
// 128×128 PNG, even for SPA-only homepages where direct scraping fails.
const g = fromGoogleFavicon(station.homepage, 128);
if (g && await head(g)) return g;
return null;
}