import puppeteer from 'puppeteer';
import * as cheerio from 'cheerio';
// Define the JSON input of place names
const placeNames = [
"Phalut, Darjeeling West Bengal",
"Alipurduar West Bengal",
"Jalpaiguri West Bengal",
"Kalimpong, West Bengal",
];
async function autoScroll(page) {
await page.evaluate(async () => {
const feed = document.querySelector('div[role="feed"]');
if (!feed) return;
await new Promise((resolve) => {
let totalHeight = 0;
const distance = 300;
const timer = setInterval(() => {
const scrollHeight = feed.scrollHeight;
feed.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeight - feed.clientHeight) {
clearInterval(timer);
resolve();
}
}, 200);
});
});
}
async function delay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function scrapePlace(placeName) {
const searches = [
{ key: 'hotels', query: `${placeName} nearby hotels` },
{ key: 'cafe', query: `${placeName} nearby cafe` },
{ key: 'clubs', query: `${placeName} nearby bar or night club or wine shop`
},
{ key: 'rantel', query: `${placeName} nearby car and bike rental` },
];
const browser = await puppeteer.launch({
headless: false,
args: ['--lang=en-US', '--accept-lang=en-US', '--no-sandbox']
});
const finalResults = {};
for (const search of searches) {
console.log(`\n-> Scraping category: "${search.key}" for place: "$
{placeName}"`);
const categoryResults = [];
let listPage;
try {
listPage = await browser.newPage();
await listPage.setViewport({ width: 1440, height: 900 });
const listUrl = `https://www.google.com/maps/search/$
{encodeURIComponent(search.query)}`;
await listPage.goto(listUrl, { waitUntil: 'domcontentloaded', timeout:
10000 });
const feedSelector = 'div[role="feed"]';
await listPage.waitForSelector(feedSelector, { timeout: 10000 });
console.log(' ... Scrolling to load all place listings...');
await autoScroll(listPage);
await delay(2000);
const itemLinks = await listPage.$$eval('div.Nv2PK', (els) => {
return els.slice(0, 50).map(el => {
const linkElement = el.querySelector('a.hfpxzc');
return linkElement ? {
name: linkElement.getAttribute('aria-label'),
href: linkElement.href
} : null;
}).filter(item => item !== null);
});
await listPage.close();
if (itemLinks.length === 0) {
console.log(` ... Found 0 items. The page structure may have
changed. Skipping category.`);
finalResults[search.key] = [];
continue;
}
console.log(` ... Found ${itemLinks.length} items. Now visiting each
URL for details.`);
for (let i = 0; i < itemLinks.length; i++) {
const item = itemLinks[i];
if (!item.name || !item.href) continue;
console.log(` - Processing (${i + 1}/${itemLinks.length}): $
{item.name}`);
const detailPage = await browser.newPage();
try {
await detailPage.goto(item.href, { waitUntil:
'domcontentloaded', timeout: 10000 });
await delay(1500);
await detailPage.waitForSelector('h1.DUwDvf', { timeout:
7000 });
const $ = cheerio.load(await detailPage.content());
const rating = $('div.F7nice > span > span[aria-
hidden="true"]').first().text().trim() || 'Not found';
const phone = $('button[data-item-
id*="phone:tel:"]').text().trim() || 'Not found';
const website = $('a[data-item-id="authority"]').attr('href')
|| 'Not found';
const mainImage = $('button[jsaction*="heroHeaderImage"]
img').attr('src') || 'Not found';
let resultData = {};
if (search.key === 'rantel') {
const lowerCaseName = item.name.toLowerCase();
let prefix = 'rentCar';
if (lowerCaseName.includes('bike') ||
lowerCaseName.includes('scooter')) {
prefix = 'rentBike';
}
resultData = {
[`${prefix}Name`]: item.name,
[`${prefix}Rating`]: parseFloat(rating) || rating,
[`${prefix}Number`]: phone,
[`${prefix}Location`]: item.href,
};
if (website !== 'Not found') resultData[`${prefix}Website`]
= website;
} else {
const keyPrefix = search.key.replace(/s$/, '');
resultData = {
[`${keyPrefix}Name`]: item.name,
[`${keyPrefix}Rating`]: parseFloat(rating) || rating,
[`${keyPrefix}Number`]: phone,
[`${keyPrefix}Location`]: item.href,
};
if (website !== 'Not found') resultData[`$
{keyPrefix}Website`] = website;
if (mainImage !== 'Not found') resultData[`$
{keyPrefix}Image`] = [mainImage];
}
categoryResults.push(resultData);
} catch (detailError) {
console.error(` - SKIPPING "${item.name}" due to an
error: ${detailError.message.split('\n')[0]}`);
} finally {
await detailPage.close();
}
}
finalResults[search.key] = categoryResults;
console.log(` ... Finished category "${search.key}", successfully
parsed ${categoryResults.length} items.`);
} catch (criticalError) {
console.error(`\n--- SCRIPT INTERRUPTED BY A CRITICAL ERROR ---`);
console.error(`Failed during category "${search.key}": $
{criticalError.message}`);
if (listPage && !listPage.isClosed()) await listPage.close();
}
}
await browser.close();
return finalResults;
}
(async () => {
console.log(`Starting the definitive detailed scrape...`);
const finalOutput = [];
for (const placeName of placeNames) {
console.log(`\nProcessing place: "${placeName}"`);
const results = await scrapePlace(placeName);
finalOutput.push({
placeName: placeName,
hotels: results.hotels || [],
cafe: results.cafe || [],
clubs: results.clubs || [],
rantel: results.rantel || []
});
}
console.log("\n\n--- SCRAPING COMPLETE ---");
console.log("\nFinal JSON Output:\n");
console.log(JSON.stringify(finalOutput, null, 2));
})();