#!/usr/bin/env node import fs from 'fs/promises'; import path from 'path'; import { fileURLToPath } from 'url'; import { spawn } from 'child_process'; import process from 'process'; import puppeteer from 'puppeteer'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const VIEWPORT = { width: 390, height: 844, deviceScaleFactor: 3, isMobile: true, hasTouch: true }; const MEDIA_MIME_PREFIXES = ['audio/', 'video/', 'image/']; const MEDIA_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.aac', '.m4a', '.flac', '.mp4', '.webm', '.ogv', '.avi', '.mov', '.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.bmp', '.ico']; function getCurrentDate() { const now = new Date(); const year = now.getFullYear(); const month = String(now.getMonth() + 1).padStart(2, '0'); const day = String(now.getDate()).padStart(2, '0'); return `${year}${month}${day}`; } function extractPageId(url) { const patterns = [ /storeview\/([a-z0-9]+)/i, /\/v\/([a-z0-9]+)/i, ]; for (const pattern of patterns) { const match = url.match(pattern); if (match && match[1]) { return match[1]; } } return null; } function parseArgs() { const args = process.argv.slice(2); const pageUrl = args[0]; if (!pageUrl) { console.error('Usage: node run.mjs '); process.exit(1); } const pageId = extractPageId(pageUrl); if (!pageId) { console.error('Error: Could not extract page-id from the URL'); process.exit(1); } return { pageId, pageUrl }; } function isMediaUrl(url, mimeType) { if (mimeType && MEDIA_MIME_PREFIXES.some(prefix => mimeType.toLowerCase().startsWith(prefix))) { return true; } const lowerUrl = url.toLowerCase(); return MEDIA_EXTENSIONS.some(ext => lowerUrl.includes(ext)); } function buildCaptureUrl(pageUrl) { const pageId = extractPageId(pageUrl); return `https://ca.rrx.cn/v/${pageId}?rrxsrc=2&iframe=1&tpl=1`; } async function captureNetworkAndScreenshot(browser, captureUrl, pageId, today) { const page = await browser.newPage(); const requests = []; page.on('request', request => { const url = request.url(); const resourceType = request.resourceType(); if (['image', 'media', 'font'].includes(resourceType)) { requests.push(url); } }); page.on('response', response => { const url = response.url(); const headers = response.headers(); const mimeType = headers['content-type'] || ''; if (isMediaUrl(url, mimeType)) { if (!requests.includes(url)) { requests.push(url); } } }); await page.emulate(VIEWPORT); await page.setViewport(VIEWPORT); console.log('Navigating to:', captureUrl); await page.goto(captureUrl, { waitUntil: 'networkidle2', timeout: 30000 }); await new Promise(resolve => setTimeout(resolve, 3000)); let pageTitle = 'page'; try { pageTitle = await page.title(); pageTitle = pageTitle.replace(/[<>:"/\\|?*]/g, '').trim().slice(0, 100) || 'page'; } catch {} const destRoot = path.join('downloads', `${today}-${pageTitle}-${pageId}`); await fs.mkdir(destRoot, { recursive: true }); const screenshotPath = path.join(destRoot, 'index.png'); console.log('Taking screenshot...'); await page.screenshot({ path: screenshotPath, fullPage: false }); console.log('Screenshot saved:', screenshotPath); const mediaUrls = [...new Set(requests.filter(url => isMediaUrl(url, null)))]; return { page, mediaUrls, pageTitle }; } function runDownloadScript(pageId, title, urlsFile, sourcesFile) { return new Promise((resolve) => { const args = [ 'download.mjs', '--page-id', pageId, '--title', title, '--urls', urlsFile, '--sources', sourcesFile, ]; const proc = spawn('node', args, { stdio: 'inherit' }); proc.on('close', resolve); }); } async function main() { const { pageId, pageUrl } = parseArgs(); console.log('Extracted page-id:', pageId); const today = getCurrentDate(); const tempDir = 'temp'; await fs.mkdir(tempDir, { recursive: true }); const urlsFile = path.join(tempDir, 'urls.txt'); const sourcesFile = path.join(tempDir, 'sources.txt'); await fs.writeFile(sourcesFile, pageUrl + '\n', 'utf-8'); console.log('Launching browser...'); const browser = await puppeteer.launch({ headless: 'new' }); const captureUrl = buildCaptureUrl(pageUrl); console.log('Capture URL:', captureUrl); try { const { page, mediaUrls, pageTitle } = await captureNetworkAndScreenshot(browser, captureUrl, pageId, today); console.log('Found', mediaUrls.length, 'media URLs'); if (mediaUrls.length > 0) { await fs.writeFile(urlsFile, mediaUrls.join('\n') + '\n', 'utf-8'); await runDownloadScript(pageId, pageTitle, urlsFile, sourcesFile); } else { console.log('No media URLs found, skipping download'); } console.log('Closing captured page...'); await page.close(); } catch (error) { console.error('Error during capture:', error.message); } console.log('Closing browser...'); await browser.close(); console.log('Workflow completed.'); } main().catch(err => { console.error(err); process.exit(1); });