Scraper Command Center
Generate and run browser console scrapers, then verify results in one place.
Wohu Product Scraper
(async function wohuProductScraper() {
// ============ CONFIGURATION ============
const DASHBOARD_URL = 'YOUR_DASHBOARD_URL';
const API_KEY = 'YOUR_SCRAPE_API_KEY';
// Mode options:
// 'selected' — checked boxes: products + images
// 'all' — all Not Listed: products + images
// 'products-selected' — checked boxes: products only (no images)
// 'products-all' — all Not Listed: products only (no images)
//
// "selected" modes use sessionStorage to accumulate picks across pages.
// Run the script on each page to collect, then run once more with none
// checked (or on any page) — it will scrape all collected IDs.
const SCRAPE_MODE = 'products-selected';
const DELAY_MS = 500;
const MAX_PAGES = 0; // 0 = all pages, or set a number to limit (e.g., 5 = first 5 pages)
// ========================================
const STORAGE_KEY = 'wohu_scraper_collected_ids';
const includeImages = !SCRAPE_MODE.startsWith('products-');
const selectionMode = SCRAPE_MODE.includes('selected') ? 'selected' : 'all';
const log = (msg) => console.log(`[Wohu Scraper] ${msg}`);
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
log(`Mode: ${SCRAPE_MODE} (images: ${includeImages ? 'yes' : 'no'}, selection: ${selectionMode})`);
// Step 1: Get commodity IDs to scrape
let commodityIds = [];
if (selectionMode === 'selected') {
// Read any previously collected IDs from sessionStorage
let collected = [];
try {
collected = JSON.parse(sessionStorage.getItem(STORAGE_KEY) || '[]');
} catch { collected = []; }
// Collect checked IDs from current page
const checkboxes = document.querySelectorAll('input[type="checkbox"][name="chk_ids"]:checked');
const newIds = Array.from(checkboxes).map(cb => {
const row = cb.closest('tr');
const detailBtn = row ? row.querySelector('.updateModal[data-id]') : null;
return detailBtn ? detailBtn.dataset.id : null;
}).filter(id => id);
if (newIds.length > 0) {
// Merge new IDs with existing (deduplicate)
const merged = [...new Set([...collected, ...newIds])];
sessionStorage.setItem(STORAGE_KEY, JSON.stringify(merged));
const added = merged.length - collected.length;
log(`✅ Collected ${newIds.length} from this page (${added} new). Total queued: ${merged.length}`);
log(`Navigate to another page and run again to add more, or uncheck all and run to start scraping.`);
return;
}
// No checkboxes checked — scrape everything we've collected
commodityIds = collected;
if (commodityIds.length === 0) {
log('❌ No products collected. Check some boxes and run the script to collect, then run again with none checked to scrape.');
return;
}
log(`Found ${commodityIds.length} collected products across pages. Starting scrape...`);
// Clear the collection now that we're scraping
sessionStorage.removeItem(STORAGE_KEY);
} else {
log('Scraping all products matching current filters...');
// Build base URL from current page, preserving all filter params
const currentUrl = new URL(window.location.href);
const baseParams = currentUrl.searchParams;
baseParams.delete('page');
const basePath = currentUrl.pathname + '?' + baseParams.toString();
// Find total pages from pagination links
let totalPages = 1;
document.querySelectorAll('a[href*="page="]').forEach(a => {
const m = a.href.match(/page=(\d+)/);
if (m) totalPages = Math.max(totalPages, parseInt(m[1]));
});
const pagesToScrape = MAX_PAGES > 0 ? Math.min(totalPages, MAX_PAGES) : totalPages;
log(`Total pages: ${totalPages}, scraping: ${pagesToScrape} (URL: ${basePath})`);
for (let page = 1; page <= pagesToScrape; page++) {
log(`Fetching page ${page}/${pagesToScrape}...`);
const resp = await fetch(`${basePath}&page=${page}`);
const html = await resp.text();
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
// Use data-id from Detail buttons (same as selected mode) — NOT checkbox values
const detailBtns = doc.querySelectorAll('.updateModal[data-id]');
detailBtns.forEach(btn => {
if (btn.dataset.id) commodityIds.push(btn.dataset.id);
});
await sleep(DELAY_MS);
}
log(`Found ${commodityIds.length} total products`);
}
// Step 2: Fetch detail page for each product and extract data
const products = [];
const allImages = [];
for (let i = 0; i < commodityIds.length; i++) {
const commodityId = commodityIds[i];
log(`Fetching detail ${i + 1}/${commodityIds.length} (commodity_id=${commodityId})...`);
try {
const resp = await fetch(`/admin_dcyy/flat/show_commodity?commodity_id=${commodityId}`);
const html = await resp.text();
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
const getVal = (name) => {
const el = doc.querySelector(`[name="${name}"]`);
if (!el) return '';
if (el.type === 'checkbox' || el.type === 'radio') {
const checked = doc.querySelectorAll(`[name="${name}"]:checked`);
return Array.from(checked).map(c => c.value).join(', ');
}
return (el.value || '').trim();
};
const getMultiVal = (name) => {
const els = doc.querySelectorAll(`[name="${name}"]:checked, [name="${name}"] option:checked`);
return Array.from(els).map(e => e.value || e.textContent).filter(v => v).join(', ');
};
const product = {
commodityId,
spu: getVal('spu'),
commodityName: getVal('commodity_name'),
goodsType: getVal('goods_type'),
gender: getMultiVal('gender[]'),
frameMaterial: getMultiVal('frame_material[]'),
frameShape: getMultiVal('frame_shape[]'),
rim: getVal('rim'),
weight: getVal('weight'),
lensWidth: getVal('lens_width'),
bridge: getVal('bridge'),
lensHeight: getVal('lens_height'),
templeLength: getVal('temple_length'),
frameWidth: getVal('frame_width'),
size: getVal('size'),
springHinge: getVal('spring_hinge'),
nosePad: getVal('nose_pad'),
clipOns: getVal('clip_ons'),
pdRange: getVal('goodfor_pd'),
isFitReading: getVal('is_fit_reading'),
isFitBlue: getVal('is_fit_blue'),
isFitSport: getVal('is_fit_sport'),
highRx: getVal('high_rx'),
customEngraving: getVal('custom_engraving'),
availableProgressiveBifocal: getVal('available_progressive_bifocal'),
variants: []
};
// Extract SKU/variant table rows
const table = doc.querySelector('table.price-stock');
if (table) {
const rows = table.querySelectorAll('tbody tr');
rows.forEach((row) => {
const cells = row.querySelectorAll('td');
const getByClass = (className) => {
const input = row.querySelector(`input.${className}, select.${className}`);
return input ? (input.value || '').trim() : '';
};
const getInputVal = (namePattern) => {
const input = row.querySelector(`[name*="${namePattern}"]`);
return input ? (input.value || '').trim() : '';
};
const frameColor = cells[0] ? cells[0].textContent.trim() : '';
const skuCode = getByClass('sku_code');
const costPrice = getByClass('cost_price');
const skuStock = getByClass('sku_stock');
const skuColor = getByClass('sku_color');
const isEnable = getInputVal('is_enable');
// Skip disabled (out of stock) variants
if (isEnable === '0') return;
const variant = {
frameColor,
skuCode,
costPrice,
skuStock,
skuColor,
isEnable
};
// Only extract image paths when images are needed
if (includeImages) {
const img21 = getInputVal('sku_image21');
const img11 = getInputVal('sku_image11');
const imgTry = getInputVal('sku_image_try');
variant.images = {
ratio21: img21 ? img21.split(',').filter(p => p.trim()) : [],
ratio11: img11 ? img11.split(',').filter(p => p.trim()) : [],
tryOn: imgTry ? imgTry.split(',').filter(p => p.trim()) : []
};
// Collect image URLs with type tagging
const addImgs = (paths, type) => {
paths.forEach((imgPath, idx) => {
if (imgPath) {
const fullUrl = imgPath.startsWith('http') ? imgPath : `https://crm.wohuoptical.com${imgPath}`;
allImages.push({
spu: product.spu,
sku: `${product.spu}-${skuCode}`,
type,
isTryOn: type === 'tryon',
index: idx + 1,
url: fullUrl
});
}
});
};
addImgs(variant.images.ratio21, '2_1');
addImgs(variant.images.ratio11, '1_1');
addImgs(variant.images.tryOn, 'tryon');
}
product.variants.push(variant);
});
}
products.push(product);
} catch (err) {
log(`❌ Error fetching commodity_id=${commodityId}: ${err.message}`);
}
await sleep(DELAY_MS);
}
log(`Scraped ${products.length} products` + (includeImages ? ` with ${allImages.length} total images` : ''));
// Step 3: Send product data to Candye Hub
log('Sending product data to Candye Hub...');
try {
const productResp = await fetch(`${DASHBOARD_URL}/api/scrape/products`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Scrape-API-Key': API_KEY
},
body: JSON.stringify({ products })
});
const productResult = await productResp.json();
log(`✅ Product data sent: ${productResult.message || 'OK'}`);
} catch (err) {
log(`❌ Failed to send product data: ${err.message}`);
}
// Step 4: Download and send images (only in image modes)
if (includeImages && allImages.length > 0) {
log('Downloading images...');
const imageData = [];
for (let i = 0; i < allImages.length; i++) {
const img = allImages[i];
try {
const resp = await fetch(img.url);
if (resp.ok) {
const blob = await resp.blob();
const ct = resp.headers.get('content-type') || '';
let ext = '.jpg';
if (ct.includes('png')) ext = '.png';
else if (ct.includes('webp')) ext = '.webp';
const base64 = await new Promise((resolve) => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result);
reader.readAsDataURL(blob);
});
const filename = `F${img.sku}-${img.type}-${img.index}${ext}`;
imageData.push({
spu: img.spu,
sku: img.sku,
type: img.type,
isTryOn: img.isTryOn,
index: img.index,
filename,
mimeType: ct || 'image/jpeg',
base64
});
} else {
log(`⚠️ Image ${img.url} returned ${resp.status}`);
}
} catch (err) {
log(`⚠️ Failed to download ${img.url}: ${err.message}`);
}
if (i % 10 === 0 && i > 0) {
log(`Downloaded ${i}/${allImages.length} images...`);
}
await sleep(100);
}
log(`Downloaded ${imageData.length}/${allImages.length} images`);
// Send images in batches
const BATCH_SIZE = 5;
for (let i = 0; i < imageData.length; i += BATCH_SIZE) {
const batch = imageData.slice(i, i + BATCH_SIZE);
try {
const imgResp = await fetch(`${DASHBOARD_URL}/api/scrape/images`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Scrape-API-Key': API_KEY
},
body: JSON.stringify({ images: batch })
});
const imgResult = await imgResp.json();
log(`✅ Image batch ${Math.floor(i / BATCH_SIZE) + 1} sent: ${imgResult.message || 'OK'}`);
} catch (err) {
log(`❌ Failed to send image batch: ${err.message}`);
}
await sleep(200);
}
log(`🎉 Scraping complete! ${products.length} products, ${imageData.length} images sent`);
} else {
log(`🎉 Product scrape complete! ${products.length} products sent (no images)`);
}
})();
How to run
- 1Log into crm.wohuoptical.com
- 2Go to Listing Support page and apply any filters
- 3Select products (if using a 'Selected' mode)
- 4Open Chrome DevTools (F12) then Console tab
- 5Paste the script and press Enter
- 6Wait for completion, then check Scraped Products page