Skip to content

Commit 1fb672e

Browse files
committed
web scrapping
1 parent 7c89733 commit 1fb672e

File tree

5 files changed

+107
-12
lines changed

5 files changed

+107
-12
lines changed

package-lock.json

Lines changed: 61 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"dependencies": {
1717
"@types/axios": "^0.14.0",
1818
"@types/body-parser": "^1.19.0",
19+
"@types/cheerio": "^0.22.22",
1920
"@types/express": "^4.17.8",
2021
"@types/joi": "^14.3.4",
2122
"@types/jsonwebtoken": "^8.5.0",
@@ -24,9 +25,11 @@
2425
"@types/node": "^14.14.2",
2526
"@types/node-schedule": "^1.3.1",
2627
"@types/nodemailer": "^6.4.0",
28+
"@types/request": "^2.48.5",
2729
"axios": "^0.21.0",
2830
"bcrypt": "^5.0.0",
2931
"body-parser": "^1.19.0",
32+
"cheerio": "^1.0.0-rc.3",
3033
"emailjs": "^3.3.0",
3134
"emailjs-com": "^2.6.3",
3235
"express": "^4.17.1",
@@ -40,7 +43,8 @@
4043
"nodemailer": "^6.4.14",
4144
"nodemailer-sendgrid-transport": "^0.2.0",
4245
"nodemon": "^2.0.6",
46+
"request": "^2.88.2",
4347
"ts-node": "^9.0.0",
4448
"typescript": "^4.0.3"
4549
}
46-
}
50+
}

readme.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,17 @@ npm install --save @types/joi
137137
- npm i loadtest
138138
-loadtest [-n requests] [-c concurrency] [-k] URL
139139
-loadtest -n 100 -c 20 -k "https://node-ts-blog.herokuapp.com/api/user/login"
140+
141+
142+
## For Web Scraping
143+
144+
- To get the html data from the website use "Request - Simplified HTTP client"
145+
- npm i request
146+
- npm i @types/request
147+
148+
- To get the specific data from the html use "cheerio"
149+
- npm i cheerio
150+
- npm i @types/cheerio
140151

141152

142153

src/controllers/UserControllers.ts

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ import { getEnvironmentVariable } from '../environments/env';
55
import { Emailjs } from '../utils/Emailjs';
66
import { EmailTemplate } from '../utils/TemplateEmailjs';
77

8+
import * as Cheerio from 'cheerio';
9+
import * as Request from 'request';
10+
811
export class UserController {
912

1013
static async signup(req, res, next) {
@@ -178,10 +181,30 @@ export class UserController {
178181

179182
// Multi Field Indexing
180183
static async test(req, res, next) {
181-
const user = await User.find({ email: 'rahul.k@mailinator.com',
182-
password:'$2b$10$5mRQr1IU5SjQfDR6WeKNJOoLcZB9IiFg0i81tdgSWReXRKV7Cm5VG"' })
183-
.setOptions({explain:'executionStats'});
184-
res.send(user)
184+
const user = await User.find({
185+
email: 'rahul.k@mailinator.com',
186+
password: '$2b$10$5mRQr1IU5SjQfDR6WeKNJOoLcZB9IiFg0i81tdgSWReXRKV7Cm5VG"'
187+
})
188+
.setOptions({ explain: 'executionStats' });
189+
res.send(user)
190+
}
191+
192+
193+
static async webScraping(req, res, next) {
194+
Request('https://webscraper.io/test-sites/e-commerce/allinone', ((error, response, html) => {
195+
if (!error && response.statusCode == 200) {
196+
const $ = Cheerio.load(html);
197+
const data = [];
198+
$('.thumbnail').each((index, element) => {
199+
const image = $(element).find('.img-responsive').attr('src');
200+
const title = $(element).find('.title').text();
201+
const description = $(element).find('.description').text();
202+
const price = $(element).find('.price').text();
203+
data.push({title: title, image: image, description: description, price: price});
204+
});
205+
res.send(data);
206+
}
207+
}))
185208
}
186209

187210

src/routers/UserRouter.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ class UserRouter {
2929

3030
//To check the index
3131
this.router.get('/test',UserController.test)
32+
33+
//To check web-scraping
34+
this.router.get('/web-scraping',UserController.webScraping)
3235

3336
}
3437
postRoutes() {

0 commit comments

Comments
 (0)