0% found this document useful (0 votes)
10 views4 pages

Script

This document is a JavaScript code that uses Puppeteer and Cheerio to scrape information about hotels, cafes, clubs, and rental services for specified places in West Bengal, India. It automates scrolling through Google Maps search results, collects details such as ratings, phone numbers, websites, and images, and organizes the data into a structured JSON format. The script handles errors and logs the scraping process for each place name provided in the input array.

Uploaded by

abdhaka394
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views4 pages

Script

This document is a JavaScript code that uses Puppeteer and Cheerio to scrape information about hotels, cafes, clubs, and rental services for specified places in West Bengal, India. It automates scrolling through Google Maps search results, collects details such as ratings, phone numbers, websites, and images, and organizes the data into a structured JSON format. The script handles errors and logs the scraping process for each place name provided in the input array.

Uploaded by

abdhaka394
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

import puppeteer from 'puppeteer';

import * as cheerio from 'cheerio';

// Define the JSON input of place names


const placeNames = [
"Phalut, Darjeeling West Bengal",
"Alipurduar West Bengal",
"Jalpaiguri West Bengal",
"Kalimpong, West Bengal",
];

async function autoScroll(page) {


await page.evaluate(async () => {
const feed = document.querySelector('div[role="feed"]');
if (!feed) return;
await new Promise((resolve) => {
let totalHeight = 0;
const distance = 300;
const timer = setInterval(() => {
const scrollHeight = feed.scrollHeight;
feed.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeight - feed.clientHeight) {
clearInterval(timer);
resolve();
}
}, 200);
});
});
}

async function delay(ms) {


return new Promise(resolve => setTimeout(resolve, ms));
}

async function scrapePlace(placeName) {


const searches = [
{ key: 'hotels', query: `${placeName} nearby hotels` },
{ key: 'cafe', query: `${placeName} nearby cafe` },
{ key: 'clubs', query: `${placeName} nearby bar or night club or wine shop`
},
{ key: 'rantel', query: `${placeName} nearby car and bike rental` },
];

const browser = await puppeteer.launch({


headless: false,
args: ['--lang=en-US', '--accept-lang=en-US', '--no-sandbox']
});

const finalResults = {};


for (const search of searches) {
console.log(`\n-> Scraping category: "${search.key}" for place: "$
{placeName}"`);
const categoryResults = [];
let listPage;
try {
listPage = await browser.newPage();
await listPage.setViewport({ width: 1440, height: 900 });
const listUrl = `https://www.google.com/maps/search/$
{encodeURIComponent(search.query)}`;
await listPage.goto(listUrl, { waitUntil: 'domcontentloaded', timeout:
10000 });
const feedSelector = 'div[role="feed"]';
await listPage.waitForSelector(feedSelector, { timeout: 10000 });
console.log(' ... Scrolling to load all place listings...');
await autoScroll(listPage);
await delay(2000);
const itemLinks = await listPage.$$eval('div.Nv2PK', (els) => {
return els.slice(0, 50).map(el => {
const linkElement = el.querySelector('a.hfpxzc');
return linkElement ? {
name: linkElement.getAttribute('aria-label'),
href: linkElement.href
} : null;
}).filter(item => item !== null);
});
await listPage.close();
if (itemLinks.length === 0) {
console.log(` ... Found 0 items. The page structure may have
changed. Skipping category.`);
finalResults[search.key] = [];
continue;
}
console.log(` ... Found ${itemLinks.length} items. Now visiting each
URL for details.`);
for (let i = 0; i < itemLinks.length; i++) {
const item = itemLinks[i];
if (!item.name || !item.href) continue;
console.log(` - Processing (${i + 1}/${itemLinks.length}): $
{item.name}`);
const detailPage = await browser.newPage();
try {
await detailPage.goto(item.href, { waitUntil:
'domcontentloaded', timeout: 10000 });
await delay(1500);
await detailPage.waitForSelector('h1.DUwDvf', { timeout:
7000 });
const $ = cheerio.load(await detailPage.content());
const rating = $('div.F7nice > span > span[aria-
hidden="true"]').first().text().trim() || 'Not found';
const phone = $('button[data-item-
id*="phone:tel:"]').text().trim() || 'Not found';
const website = $('a[data-item-id="authority"]').attr('href')
|| 'Not found';
const mainImage = $('button[jsaction*="heroHeaderImage"]
img').attr('src') || 'Not found';
let resultData = {};
if (search.key === 'rantel') {
const lowerCaseName = item.name.toLowerCase();
let prefix = 'rentCar';
if (lowerCaseName.includes('bike') ||
lowerCaseName.includes('scooter')) {
prefix = 'rentBike';
}
resultData = {
[`${prefix}Name`]: item.name,
[`${prefix}Rating`]: parseFloat(rating) || rating,
[`${prefix}Number`]: phone,
[`${prefix}Location`]: item.href,
};
if (website !== 'Not found') resultData[`${prefix}Website`]
= website;
} else {
const keyPrefix = search.key.replace(/s$/, '');
resultData = {
[`${keyPrefix}Name`]: item.name,
[`${keyPrefix}Rating`]: parseFloat(rating) || rating,
[`${keyPrefix}Number`]: phone,
[`${keyPrefix}Location`]: item.href,
};
if (website !== 'Not found') resultData[`$
{keyPrefix}Website`] = website;
if (mainImage !== 'Not found') resultData[`$
{keyPrefix}Image`] = [mainImage];
}
categoryResults.push(resultData);
} catch (detailError) {
console.error(` - SKIPPING "${item.name}" due to an
error: ${detailError.message.split('\n')[0]}`);
} finally {
await detailPage.close();
}
}
finalResults[search.key] = categoryResults;
console.log(` ... Finished category "${search.key}", successfully
parsed ${categoryResults.length} items.`);
} catch (criticalError) {
console.error(`\n--- SCRIPT INTERRUPTED BY A CRITICAL ERROR ---`);
console.error(`Failed during category "${search.key}": $
{criticalError.message}`);
if (listPage && !listPage.isClosed()) await listPage.close();
}
}
await browser.close();
return finalResults;
}

(async () => {
console.log(`Starting the definitive detailed scrape...`);
const finalOutput = [];

for (const placeName of placeNames) {


console.log(`\nProcessing place: "${placeName}"`);
const results = await scrapePlace(placeName);

finalOutput.push({
placeName: placeName,
hotels: results.hotels || [],
cafe: results.cafe || [],
clubs: results.clubs || [],
rantel: results.rantel || []
});
}

console.log("\n\n--- SCRAPING COMPLETE ---");


console.log("\nFinal JSON Output:\n");
console.log(JSON.stringify(finalOutput, null, 2));
})();

You might also like