Files
radio-explorer/server/sources/iconScraper.js
Marco Mooren b86dcfbb8d Add master display UI with audio output management and styling
- Implement main.js for the master display functionality, including WebSocket connection, audio output management, and state handling.
- Create style.css for the master display's visual design, ensuring a cohesive look and feel with a dark theme and responsive layout.
- Integrate device management with a fallback for non-Electron environments, allowing users to select audio outputs.
- Add features for managing favorites, including toggling favorites and filtering by genre.
- Enhance user experience with a responsive favorites grid and drag-to-scroll functionality.
2026-05-11 17:55:09 +02:00

197 lines
8.4 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Best-effort icon resolver for radio stations.
// Order:
// 1. Radio-Browser favicon by exact-ish name (only if station.source !== 'radiobrowser', else
// we already have it).
// 2. Scrape <link rel="icon">, <link rel="apple-touch-icon">, <meta property="og:image">
// from the homepage HTML.
// 3. HEAD-probe /favicon.ico at the homepage origin.
// Returns the best absolute URL found, or null.
// Browser-like UA: many station homepages (Cloudflare, Wikimedia) block opaque bots.
const UA = process.env.IMAGE_FETCH_UA
|| 'Mozilla/5.0 (compatible; OnlineRadioExplorer/0.1; +https://github.com/marcoheine/onlineRadioExplorer)';
const FETCH_TIMEOUT_MS = 8000;
const MAX_HTML_BYTES = 256 * 1024;
const RB_BASE = 'https://de1.api.radio-browser.info';
function withTimeout(ms) {
const ctl = new AbortController();
const t = setTimeout(() => ctl.abort(), ms);
return { signal: ctl.signal, done: () => clearTimeout(t) };
}
async function fetchText(url) {
const t = withTimeout(FETCH_TIMEOUT_MS);
try {
const res = await fetch(url, {
headers: { 'User-Agent': UA, 'Accept': 'text/html,application/xhtml+xml' },
redirect: 'follow',
signal: t.signal
});
if (!res.ok) return null;
const reader = res.body?.getReader();
if (!reader) return null;
let received = 0;
const chunks = [];
while (true) {
const { done, value } = await reader.read();
if (done) break;
received += value.length;
chunks.push(value);
if (received >= MAX_HTML_BYTES) { try { await reader.cancel(); } catch { } break; }
}
return Buffer.concat(chunks.map((c) => Buffer.from(c))).toString('utf8');
} catch {
return null;
} finally { t.done(); }
}
async function head(url) {
// We can't trust real HEAD: many CDNs/SPAs return 200 for *every* path with
// HTML. So we issue a small ranged GET and check the response is actually
// an image (content-type AND/OR magic bytes).
const t = withTimeout(FETCH_TIMEOUT_MS);
try {
const res = await fetch(url, {
method: 'GET',
headers: { 'User-Agent': UA, 'Accept': 'image/*', 'Range': 'bytes=0-1023' },
signal: t.signal,
redirect: 'follow'
});
if (!res.ok && res.status !== 206) return false;
const ct = (res.headers.get('content-type') || '').toLowerCase().split(';')[0].trim();
if (ct.startsWith('text/') || ct.includes('html')) return false;
// Sniff the first chunk to make sure it's not HTML masquerading as image/*.
const reader = res.body?.getReader();
if (!reader) return ct.startsWith('image/');
const { value } = await reader.read();
try { await reader.cancel(); } catch { }
const buf = value ? Buffer.from(value) : Buffer.alloc(0);
const head = buf.slice(0, 256).toString('utf8').trimStart().toLowerCase();
if (head.startsWith('<!doctype') || head.startsWith('<html')) return false;
if (ct.startsWith('image/')) return true;
// No content-type but bytes look like a known image format -> accept.
return isImageMagic(buf);
} catch { return false; } finally { t.done(); }
}
function isImageMagic(buf) {
if (buf.length < 4) return false;
if (buf[0] === 0x89 && buf[1] === 0x50 && buf[2] === 0x4E && buf[3] === 0x47) return true; // PNG
if (buf[0] === 0xFF && buf[1] === 0xD8 && buf[2] === 0xFF) return true; // JPEG
if (buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46) return true; // GIF
if (buf.length >= 12 && buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46
&& buf[8] === 0x57 && buf[9] === 0x45 && buf[10] === 0x42 && buf[11] === 0x50) return true; // WEBP
if (buf[0] === 0x00 && buf[1] === 0x00 && buf[2] === 0x01 && buf[3] === 0x00) return true; // ICO
const head = buf.slice(0, 256).toString('utf8').trimStart().toLowerCase();
if (head.startsWith('<?xml') || head.startsWith('<svg')) return true; // SVG
return false;
}
function abs(base, href) {
if (!href) return null;
try { return new URL(href, base).toString(); } catch { return null; }
}
// Extract candidate icon URLs from raw HTML. Returns array of { href, size } sorted best-first.
function parseIconCandidates(html, baseUrl) {
const out = [];
// <link rel="...icon..." href="..." sizes="...">
const linkRe = /<link\b([^>]*?)\/?>/gi;
let m;
while ((m = linkRe.exec(html))) {
const attrs = m[1];
const rel = (/\brel\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1] || '';
if (!/icon/i.test(rel)) continue;
const href = (/\bhref\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1];
if (!href) continue;
const sizes = (/\bsizes\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1] || '';
const sz = parseInt((/(\d+)x\d+/.exec(sizes) || [])[1] || '0', 10);
const apple = /apple-touch-icon/i.test(rel) ? 64 : 0; // bias: apple-touch-icons usually larger PNGs
const u = abs(baseUrl, href);
if (u) out.push({ href: u, score: sz + apple });
}
// <meta property="og:image" content="...">
const metaRe = /<meta\b([^>]*?)\/?>/gi;
while ((m = metaRe.exec(html))) {
const attrs = m[1];
const prop = (/\b(?:property|name)\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1] || '';
if (!/^og:image|^twitter:image/i.test(prop)) continue;
const content = (/\bcontent\s*=\s*["']([^"']+)["']/i.exec(attrs) || [])[1];
const u = abs(baseUrl, content);
if (u) out.push({ href: u, score: 200 }); // og:image preferred
}
out.sort((a, b) => b.score - a.score);
// de-dupe preserving order
const seen = new Set();
return out.filter((c) => (seen.has(c.href) ? false : (seen.add(c.href), true)));
}
async function fromRadioBrowserByName(name) {
if (!name) return null;
const q = String(name).trim();
if (!q) return null;
try {
const url = `${RB_BASE}/json/stations/search?name=${encodeURIComponent(q)}&limit=3&hidebroken=true&order=clickcount&reverse=true`;
const t = withTimeout(FETCH_TIMEOUT_MS);
const res = await fetch(url, { headers: { 'User-Agent': UA }, signal: t.signal });
t.done();
if (!res.ok) return null;
const list = await res.json();
const target = q.toLowerCase();
const exact = list.find((s) => (s.name || '').toLowerCase().trim() === target);
const pick = exact || list[0];
if (pick?.favicon) return pick.favicon;
} catch { }
return null;
}
async function fromHomepage(homepage) {
if (!homepage) return null;
let base;
try { base = new URL(homepage); } catch { return null; }
const html = await fetchText(base.toString());
if (html) {
const cands = parseIconCandidates(html, base.toString());
for (const c of cands) {
if (await head(c.href)) return c.href;
}
}
// last resort on this host: /favicon.ico
const ico = `${base.origin}/favicon.ico`;
if (await head(ico)) return ico;
return null;
}
// Final fallback: Google's public favicon service. Returns a real PNG (the
// browser-side favicon Google has on file) for virtually any homepage, so
// even SPA/JS-only sites end up with *some* artwork.
function fromGoogleFavicon(homepage, size = 128) {
if (!homepage) return null;
let host;
try { host = new URL(homepage).hostname; } catch { return null; }
if (!host) return null;
return `https://www.google.com/s2/favicons?sz=${size}&domain=${encodeURIComponent(host)}`;
}
/**
* Try to find an icon URL for a station.
* @param {{ name?: string, homepage?: string|null, source?: string }} station
* @returns {Promise<string|null>}
*/
export async function scrapeIcon(station) {
if (!station) return null;
// For non-RB stations, RB often still has an entry → cheap win.
if (station.source !== 'radiobrowser') {
const rb = await fromRadioBrowserByName(station.name);
if (rb && await head(rb)) return rb;
}
const fromPage = await fromHomepage(station.homepage);
if (fromPage) return fromPage;
// Last-ditch: ask Google's favicon service. It almost always returns a
// 128×128 PNG, even for SPA-only homepages where direct scraping fails.
const g = fromGoogleFavicon(station.homepage, 128);
if (g && await head(g)) return g;
return null;
}