Scraper Command Center

Generate and run browser console scrapers, then verify results in one place.

Wohu Product Scraper

(async function wohuProductScraper() {
  // ============ CONFIGURATION ============
  const DASHBOARD_URL = 'YOUR_DASHBOARD_URL';
  const API_KEY = 'YOUR_SCRAPE_API_KEY';

  // Mode options:
  //   'selected'          — checked boxes: products + images
  //   'all'               — all Not Listed: products + images
  //   'products-selected' — checked boxes: products only (no images)
  //   'products-all'      — all Not Listed: products only (no images)
  //
  // "selected" modes use sessionStorage to accumulate picks across pages.
  // Run the script on each page to collect, then run once more with none
  // checked (or on any page) — it will scrape all collected IDs.
  const SCRAPE_MODE = 'products-selected';
  const DELAY_MS = 500;
  const MAX_PAGES = 0; // 0 = all pages, or set a number to limit (e.g., 5 = first 5 pages)
  // ========================================

  const STORAGE_KEY = 'wohu_scraper_collected_ids';
  const includeImages = !SCRAPE_MODE.startsWith('products-');
  const selectionMode = SCRAPE_MODE.includes('selected') ? 'selected' : 'all';

  const log = (msg) => console.log(`[Wohu Scraper] ${msg}`);
  const sleep = (ms) => new Promise(r => setTimeout(r, ms));

  log(`Mode: ${SCRAPE_MODE} (images: ${includeImages ? 'yes' : 'no'}, selection: ${selectionMode})`);

  // Step 1: Get commodity IDs to scrape
  let commodityIds = [];

  if (selectionMode === 'selected') {
    // Read any previously collected IDs from sessionStorage
    let collected = [];
    try {
      collected = JSON.parse(sessionStorage.getItem(STORAGE_KEY) || '[]');
    } catch { collected = []; }

    // Collect checked IDs from current page
    const checkboxes = document.querySelectorAll('input[type="checkbox"][name="chk_ids"]:checked');
    const newIds = Array.from(checkboxes).map(cb => {
      const row = cb.closest('tr');
      const detailBtn = row ? row.querySelector('.updateModal[data-id]') : null;
      return detailBtn ? detailBtn.dataset.id : null;
    }).filter(id => id);

    if (newIds.length > 0) {
      // Merge new IDs with existing (deduplicate)
      const merged = [...new Set([...collected, ...newIds])];
      sessionStorage.setItem(STORAGE_KEY, JSON.stringify(merged));
      const added = merged.length - collected.length;
      log(`✅ Collected ${newIds.length} from this page (${added} new). Total queued: ${merged.length}`);
      log(`Navigate to another page and run again to add more, or uncheck all and run to start scraping.`);
      return;
    }

    // No checkboxes checked — scrape everything we've collected
    commodityIds = collected;

    if (commodityIds.length === 0) {
      log('❌ No products collected. Check some boxes and run the script to collect, then run again with none checked to scrape.');
      return;
    }

    log(`Found ${commodityIds.length} collected products across pages. Starting scrape...`);
    // Clear the collection now that we're scraping
    sessionStorage.removeItem(STORAGE_KEY);
  } else {
    log('Scraping all products matching current filters...');

    // Build base URL from current page, preserving all filter params
    const currentUrl = new URL(window.location.href);
    const baseParams = currentUrl.searchParams;
    baseParams.delete('page');
    const basePath = currentUrl.pathname + '?' + baseParams.toString();

    // Find total pages from pagination links
    let totalPages = 1;
    document.querySelectorAll('a[href*="page="]').forEach(a => {
      const m = a.href.match(/page=(\d+)/);
      if (m) totalPages = Math.max(totalPages, parseInt(m[1]));
    });
    const pagesToScrape = MAX_PAGES > 0 ? Math.min(totalPages, MAX_PAGES) : totalPages;
    log(`Total pages: ${totalPages}, scraping: ${pagesToScrape} (URL: ${basePath})`);

    for (let page = 1; page <= pagesToScrape; page++) {
      log(`Fetching page ${page}/${pagesToScrape}...`);
      const resp = await fetch(`${basePath}&page=${page}`);
      const html = await resp.text();
      const parser = new DOMParser();
      const doc = parser.parseFromString(html, 'text/html');
      // Use data-id from Detail buttons (same as selected mode) — NOT checkbox values
      const detailBtns = doc.querySelectorAll('.updateModal[data-id]');
      detailBtns.forEach(btn => {
        if (btn.dataset.id) commodityIds.push(btn.dataset.id);
      });
      await sleep(DELAY_MS);
    }
    log(`Found ${commodityIds.length} total products`);
  }

  // Step 2: Fetch detail page for each product and extract data
  const products = [];
  const allImages = [];

  for (let i = 0; i < commodityIds.length; i++) {
    const commodityId = commodityIds[i];
    log(`Fetching detail ${i + 1}/${commodityIds.length} (commodity_id=${commodityId})...`);

    try {
      const resp = await fetch(`/admin_dcyy/flat/show_commodity?commodity_id=${commodityId}`);
      const html = await resp.text();
      const parser = new DOMParser();
      const doc = parser.parseFromString(html, 'text/html');

      const getVal = (name) => {
        const el = doc.querySelector(`[name="${name}"]`);
        if (!el) return '';
        if (el.type === 'checkbox' || el.type === 'radio') {
          const checked = doc.querySelectorAll(`[name="${name}"]:checked`);
          return Array.from(checked).map(c => c.value).join(', ');
        }
        return (el.value || '').trim();
      };

      const getMultiVal = (name) => {
        const els = doc.querySelectorAll(`[name="${name}"]:checked, [name="${name}"] option:checked`);
        return Array.from(els).map(e => e.value || e.textContent).filter(v => v).join(', ');
      };

      const product = {
        commodityId,
        spu: getVal('spu'),
        commodityName: getVal('commodity_name'),
        goodsType: getVal('goods_type'),
        gender: getMultiVal('gender[]'),
        frameMaterial: getMultiVal('frame_material[]'),
        frameShape: getMultiVal('frame_shape[]'),
        rim: getVal('rim'),
        weight: getVal('weight'),
        lensWidth: getVal('lens_width'),
        bridge: getVal('bridge'),
        lensHeight: getVal('lens_height'),
        templeLength: getVal('temple_length'),
        frameWidth: getVal('frame_width'),
        size: getVal('size'),
        springHinge: getVal('spring_hinge'),
        nosePad: getVal('nose_pad'),
        clipOns: getVal('clip_ons'),
        pdRange: getVal('goodfor_pd'),
        isFitReading: getVal('is_fit_reading'),
        isFitBlue: getVal('is_fit_blue'),
        isFitSport: getVal('is_fit_sport'),
        highRx: getVal('high_rx'),
        customEngraving: getVal('custom_engraving'),
        availableProgressiveBifocal: getVal('available_progressive_bifocal'),
        variants: []
      };

      // Extract SKU/variant table rows
      const table = doc.querySelector('table.price-stock');
      if (table) {
        const rows = table.querySelectorAll('tbody tr');
        rows.forEach((row) => {
          const cells = row.querySelectorAll('td');

          const getByClass = (className) => {
            const input = row.querySelector(`input.${className}, select.${className}`);
            return input ? (input.value || '').trim() : '';
          };

          const getInputVal = (namePattern) => {
            const input = row.querySelector(`[name*="${namePattern}"]`);
            return input ? (input.value || '').trim() : '';
          };

          const frameColor = cells[0] ? cells[0].textContent.trim() : '';
          const skuCode = getByClass('sku_code');
          const costPrice = getByClass('cost_price');
          const skuStock = getByClass('sku_stock');
          const skuColor = getByClass('sku_color');
          const isEnable = getInputVal('is_enable');

          // Skip disabled (out of stock) variants
          if (isEnable === '0') return;

          const variant = {
            frameColor,
            skuCode,
            costPrice,
            skuStock,
            skuColor,
            isEnable
          };

          // Only extract image paths when images are needed
          if (includeImages) {
            const img21 = getInputVal('sku_image21');
            const img11 = getInputVal('sku_image11');
            const imgTry = getInputVal('sku_image_try');

            variant.images = {
              ratio21: img21 ? img21.split(',').filter(p => p.trim()) : [],
              ratio11: img11 ? img11.split(',').filter(p => p.trim()) : [],
              tryOn: imgTry ? imgTry.split(',').filter(p => p.trim()) : []
            };

            // Collect image URLs with type tagging
            const addImgs = (paths, type) => {
              paths.forEach((imgPath, idx) => {
                if (imgPath) {
                  const fullUrl = imgPath.startsWith('http') ? imgPath : `https://crm.wohuoptical.com${imgPath}`;
                  allImages.push({
                    spu: product.spu,
                    sku: `${product.spu}-${skuCode}`,
                    type,
                    isTryOn: type === 'tryon',
                    index: idx + 1,
                    url: fullUrl
                  });
                }
              });
            };

            addImgs(variant.images.ratio21, '2_1');
            addImgs(variant.images.ratio11, '1_1');
            addImgs(variant.images.tryOn, 'tryon');
          }

          product.variants.push(variant);
        });
      }

      products.push(product);
    } catch (err) {
      log(`❌ Error fetching commodity_id=${commodityId}: ${err.message}`);
    }

    await sleep(DELAY_MS);
  }

  log(`Scraped ${products.length} products` + (includeImages ? ` with ${allImages.length} total images` : ''));

  // Step 3: Send product data to Candye Hub
  log('Sending product data to Candye Hub...');

  try {
    const productResp = await fetch(`${DASHBOARD_URL}/api/scrape/products`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'X-Scrape-API-Key': API_KEY
      },
      body: JSON.stringify({ products })
    });
    const productResult = await productResp.json();
    log(`✅ Product data sent: ${productResult.message || 'OK'}`);
  } catch (err) {
    log(`❌ Failed to send product data: ${err.message}`);
  }

  // Step 4: Download and send images (only in image modes)
  if (includeImages && allImages.length > 0) {
    log('Downloading images...');
    const imageData = [];

    for (let i = 0; i < allImages.length; i++) {
      const img = allImages[i];
      try {
        const resp = await fetch(img.url);
        if (resp.ok) {
          const blob = await resp.blob();
          const ct = resp.headers.get('content-type') || '';
          let ext = '.jpg';
          if (ct.includes('png')) ext = '.png';
          else if (ct.includes('webp')) ext = '.webp';

          const base64 = await new Promise((resolve) => {
            const reader = new FileReader();
            reader.onloadend = () => resolve(reader.result);
            reader.readAsDataURL(blob);
          });

          const filename = `F${img.sku}-${img.type}-${img.index}${ext}`;

          imageData.push({
            spu: img.spu,
            sku: img.sku,
            type: img.type,
            isTryOn: img.isTryOn,
            index: img.index,
            filename,
            mimeType: ct || 'image/jpeg',
            base64
          });
        } else {
          log(`⚠️ Image ${img.url} returned ${resp.status}`);
        }
      } catch (err) {
        log(`⚠️ Failed to download ${img.url}: ${err.message}`);
      }

      if (i % 10 === 0 && i > 0) {
        log(`Downloaded ${i}/${allImages.length} images...`);
      }
      await sleep(100);
    }

    log(`Downloaded ${imageData.length}/${allImages.length} images`);

    // Send images in batches
    const BATCH_SIZE = 5;
    for (let i = 0; i < imageData.length; i += BATCH_SIZE) {
      const batch = imageData.slice(i, i + BATCH_SIZE);
      try {
        const imgResp = await fetch(`${DASHBOARD_URL}/api/scrape/images`, {
          method: 'POST',
          headers: {
            'Content-Type': 'application/json',
            'X-Scrape-API-Key': API_KEY
          },
          body: JSON.stringify({ images: batch })
        });
        const imgResult = await imgResp.json();
        log(`✅ Image batch ${Math.floor(i / BATCH_SIZE) + 1} sent: ${imgResult.message || 'OK'}`);
      } catch (err) {
        log(`❌ Failed to send image batch: ${err.message}`);
      }
      await sleep(200);
    }

    log(`🎉 Scraping complete! ${products.length} products, ${imageData.length} images sent`);
  } else {
    log(`🎉 Product scrape complete! ${products.length} products sent (no images)`);
  }
})();

How to run

  1. 1Log into crm.wohuoptical.com
  2. 2Go to Listing Support page and apply any filters
  3. 3Select products (if using a 'Selected' mode)
  4. 4Open Chrome DevTools (F12) then Console tab
  5. 5Paste the script and press Enter
  6. 6Wait for completion, then check Scraped Products page