From 2b67f652b806e85c91fd0950796c3b53b70d2335 Mon Sep 17 00:00:00 2001 From: rmens Date: Sat, 6 Jul 2024 15:21:43 +0200 Subject: [PATCH] Remove custom scraper --- README.md | 1 - rds-rucphen.js | 112 ------------------------------------------------- 2 files changed, 113 deletions(-) delete mode 100644 rds-rucphen.js diff --git a/README.md b/README.md index a90e332..7d82d76 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ This repository contains a metadata management solution specifically designed fo These services operate on https://rds.zuidwestfm.nl/ and https://rds-rucphen.zuidwestfm.nl/. - **rds.js**: Generates RDS data. Use it with `?ps` to display the programme service or `?rt` to display the radio text. Defaults to radio text for legacy integrations. -- **rds-rucphen.js**: Generates RDS data for Rucphen RTV. Custom implementation based on html parsing of the website rucphenrtv.nl. - **push.js**: Pushes metadata to Icecast servers. Use this with the scheduler of Cloudflare Workers to update the metadata as often as you want. All scripts run serverless as Cloudflare Worker. \ No newline at end of file diff --git a/rds-rucphen.js b/rds-rucphen.js deleted file mode 100644 index e536de6..0000000 --- a/rds-rucphen.js +++ /dev/null @@ -1,112 +0,0 @@ -addEventListener('fetch', event => { - event.respondWith(handleRequest(event.request)) -}) - -async function handleRequest (request) { - const url = 'https://rucphenrtv.nl/programmering/' - - try { - // Fetch the HTML content from the URL - const response = await fetch(url) - const html = await response.text() - - // Decode HTML entities in the fetched HTML - const decodedHtml = decodeHtmlEntities(html) - - // Get the current time in Europe/Amsterdam timezone - const now = new Date().toLocaleString('en-US', { timeZone: 'Europe/Amsterdam' }) - const currentDate = new Date(now) - const currentDayIndex = currentDate.getDay() - const daysOfWeek = ['Zondag', 'Maandag', 'Dinsdag', 'Woensdag', 'Donderdag', 'Vrijdag', 'Zaterdag'] - const currentDay = daysOfWeek[currentDayIndex] - const currentHour = currentDate.toTimeString().slice(0, 5) - - // Log the current day and time for debugging - console.log(`Current day: ${currentDay}`) - console.log(`Current time: ${currentHour}`) - - // Regular expression to capture the current day's section - const dayRegex = new RegExp(`]*class="elementor-accordion-title"[^>]*>\\s*${currentDay}\\s*[\\s\\S]*?]*class="elementor-tab-content[^>]*"[^>]*>([\\s\\S]*?)`, 'i') - const dayMatch = decodedHtml.match(dayRegex) - - if (!dayMatch) { - console.log('Schedule section not found for today.') - return new Response('Error: Unable to find the schedule for today.', { status: 500 }) - } - - // Extract the schedule for the current day - const scheduleHtml = dayMatch[1] - - // Log the extracted HTML for debugging - console.log(`Extracted schedule HTML: ${scheduleHtml}`) - - // Decode HTML entities in the extracted schedule - const decodedScheduleHtml = decodeHtmlEntities(scheduleHtml) - - // Regular expression to find program entries - const programRegex = /(\d{2}:\d{2})\s*–\s*(\d{2}:\d{2})\s*\|\s*([^<\n\r]*)/g - let match - let currentProgramName = 'No program scheduled for the current time.' - - while ((match = programRegex.exec(decodedScheduleHtml)) !== null) { - const startTime = match[1] - const endTime = match[2] - const programName = match[3].trim() - - // Convert times to Date objects for comparison - const startTimeParts = startTime.split(':').map(Number) - const endTimeParts = endTime.split(':').map(Number) - - const startTimeDate = new Date(currentDate) - startTimeDate.setHours(startTimeParts[0], startTimeParts[1], 0, 0) - - const endTimeDate = new Date(currentDate) - endTimeDate.setHours(endTimeParts[0], endTimeParts[1], 0, 0) - - const currentHourParts = currentHour.split(':').map(Number) - const currentHourDate = new Date(currentDate) - currentHourDate.setHours(currentHourParts[0], currentHourParts[1], 0, 0) - - // Adjust for overnight programs that might end past midnight - if (endTimeDate < startTimeDate) { - endTimeDate.setDate(endTimeDate.getDate() + 1) - } - - // Log the times for debugging - console.log(`Comparing ${currentHourDate} with ${startTimeDate} - ${endTimeDate}`) - - if (currentHourDate >= startTimeDate && currentHourDate < endTimeDate) { - currentProgramName = programName - break - } - } - - return new Response(currentProgramName, { - status: 200, - headers: { - 'Content-Type': 'text/plain; charset=UTF-8' - } - }) - } catch (error) { - console.log(`Error: ${error.message}`) - return new Response(`Error: ${error.message}`, { status: 500 }) - } -} - -// Function to decode HTML entities -function decodeHtmlEntities (str) { - const entities = { - '–': '–', - '’': "'", - '“': '“', - '”': '”', - '&': '&', - '<': '<', - '>': '>', - '"': '"', - ' ': ' ' - // Add more entities if needed - } - - return str.replace(/&#?\w+;/g, match => entities[match] || match) -}