Skip to content

Commit d6b455a

Browse files
committed
Error handling improved
1 parent 12328b4 commit d6b455a

File tree

8 files changed

+97
-30
lines changed

8 files changed

+97
-30
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
package com.olegshan.exception;
2+
3+
public class ParserException extends Exception{
4+
5+
public ParserException(String message) {
6+
super(message);
7+
}
8+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package com.olegshan.notifier;
2+
3+
import org.slf4j.Logger;
4+
import org.slf4j.LoggerFactory;
5+
import org.springframework.stereotype.Component;
6+
7+
@Component
8+
public class Notifier {
9+
10+
private static final Logger LOGGER = LoggerFactory.getLogger(Notifier.class);
11+
12+
public void notifyAdmin(String message) {
13+
//TODO implement the method
14+
LOGGER.error("Admin was notified about following issue: " + message);
15+
}
16+
}

src/main/java/com/olegshan/parser/impl/ParserImpl.java

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package com.olegshan.parser.impl;
22

33
import com.olegshan.entity.Job;
4+
import com.olegshan.exception.ParserException;
5+
import com.olegshan.notifier.Notifier;
46
import com.olegshan.parser.Parser;
57
import com.olegshan.parser.siteparsers.JobParser;
68
import com.olegshan.service.JobService;
@@ -18,24 +20,27 @@
1820
@Component
1921
public class ParserImpl implements Parser {
2022

21-
2223
private static final Logger LOGGER = LoggerFactory.getLogger(ParserImpl.class);
2324

2425
private JobService jobService;
26+
private Notifier notifier;
2527

2628
@Autowired
27-
public ParserImpl(JobService jobService) {
29+
public ParserImpl(JobService jobService, Notifier notifier) {
2830
this.jobService = jobService;
31+
this.notifier = notifier;
2932
}
3033

3134
public void parse(JobSite jobSite) {
3235

3336
JobParser jobParser = jobSite.getParser();
34-
Document doc = jobParser.getDoc(jobSite.getSiteUrl());
3537

36-
if (doc != null) {
38+
try {
39+
Document doc = jobParser.getDoc(jobSite.getSiteUrl());
3740
Elements jobBlocks = jobParser.getJobBlocks(doc);
41+
3842
for (Element job : jobBlocks) {
43+
3944
Elements titleBlock = jobParser.getTitleBlock(job);
4045
String url = jobSite.getUrlPrefix() + titleBlock.attr("href");
4146
String title = jobParser.getTitle(titleBlock);
@@ -46,7 +51,11 @@ public void parse(JobSite jobSite) {
4651
Job parsedJob = new Job(title, description, company, jobSite.getSiteName(), url, date);
4752
jobService.save(parsedJob);
4853
}
49-
LOGGER.info("Parsing of {} completed", jobSite.getSiteName());
54+
LOGGER.info("Parsing of {} completed\n", jobSite.getSiteName());
55+
} catch (ParserException e) {
56+
notifier.notifyAdmin(e.getMessage());
5057
}
5158
}
59+
60+
5261
}

src/main/java/com/olegshan/parser/siteparsers/DouUaJobParser.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.olegshan.parser.siteparsers;
22

3+
import com.olegshan.exception.ParserException;
34
import com.olegshan.sites.JobSite;
45
import com.olegshan.tools.MonthsTools;
56
import org.jsoup.nodes.Document;
@@ -18,12 +19,14 @@ public DouUaJobParser(JobSite jobSite) {
1819
}
1920

2021
@Override
21-
public LocalDateTime getDate(Element job, String url, Elements titleBlock) {
22+
public LocalDateTime getDate(Element job, String url, Elements titleBlock) throws ParserException {
2223

2324
Document dateDoc = getDoc(url);
25+
2426
String dateLine = dateDoc.getElementsByAttributeValue(
2527
jobSite.getDateData()[0],
2628
jobSite.getDateData()[1]).text();
29+
check(dateLine, "date line");
2730
String[] dateParts = dateLine.split(jobSite.getSplit());
2831
MonthsTools.removeZero(dateParts);
2932

src/main/java/com/olegshan/parser/siteparsers/JobParser.java

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.olegshan.parser.siteparsers;
22

3+
import com.olegshan.exception.ParserException;
34
import com.olegshan.parser.Parser;
45
import com.olegshan.sites.JobSite;
56
import com.olegshan.tools.MonthsTools;
@@ -16,7 +17,6 @@
1617
import java.time.LocalTime;
1718
import java.time.ZoneId;
1819

19-
import static java.lang.Integer.decode;
2020
import static java.lang.Integer.parseInt;
2121

2222
public class JobParser {
@@ -29,22 +29,27 @@ public JobParser(JobSite jobSite) {
2929
this.jobSite = jobSite;
3030
}
3131

32-
public Document getDoc(String siteUrl) {
33-
Document doc = null;
32+
public Document getDoc(String siteUrl) throws ParserException {
33+
Document doc;
3434
try {
3535
doc = Jsoup.connect(siteUrl).userAgent("Mozilla").timeout(0).get();
3636
} catch (IOException e) {
3737
LOGGER.error("Connecting to {} failed", siteUrl);
38+
throw new ParserException("Failed connecting to " + siteUrl);
3839
}
3940
return doc;
4041
}
4142

42-
public Elements getJobBlocks(Document doc) {
43-
return doc.getElementsByAttributeValue(jobSite.getJobBox()[0], jobSite.getJobBox()[1]);
43+
public Elements getJobBlocks(Document doc) throws ParserException {
44+
Elements jobBlocks = doc.getElementsByAttributeValue(jobSite.getJobBox()[0], jobSite.getJobBox()[1]);
45+
check(jobBlocks, "job blocks");
46+
return jobBlocks;
4447
}
4548

46-
public Elements getTitleBlock(Element job) {
47-
return job.getElementsByAttributeValue(jobSite.getTitleBox()[0], jobSite.getTitleBox()[1]);
49+
public Elements getTitleBlock(Element job) throws ParserException {
50+
Elements titleBlock = job.getElementsByAttributeValue(jobSite.getTitleBox()[0], jobSite.getTitleBox()[1]);
51+
check(titleBlock, "title blocks");
52+
return titleBlock;
4853
}
4954

5055
public String getTitle(Elements titleBlock) {
@@ -53,15 +58,22 @@ public String getTitle(Elements titleBlock) {
5358

5459
public String getDescription(Element job) {
5560
String[] descriptionData = jobSite.getDescriptionData();
56-
return job.getElementsByAttributeValue(descriptionData[0], descriptionData[1]).text();
61+
String description = job.getElementsByAttributeValue(descriptionData[0], descriptionData[1]).text();
62+
return description;
5763
}
5864

59-
public String getCompany(Element job, String url) {
60-
return job.getElementsByAttributeValue(jobSite.getCompanyData()[0], jobSite.getCompanyData()[1]).text();
65+
public String getCompany(Element job, String url) throws ParserException {
66+
String company = job.getElementsByAttributeValue(jobSite.getCompanyData()[0], jobSite.getCompanyData()[1]).text();
67+
check(company, "company");
68+
return company;
6169
}
6270

63-
public LocalDateTime getDate(Element job, String url, Elements titleBlock) {
64-
return getDateByLine(job.getElementsByAttributeValue(jobSite.getDateData()[0], jobSite.getDateData()[1]).text());
71+
public LocalDateTime getDate(Element job, String url, Elements titleBlock) throws ParserException {
72+
String dateLine = job.getElementsByAttributeValue(jobSite.getDateData()[0],
73+
jobSite.getDateData()[1]).text();
74+
check(dateLine, "date");
75+
return getDateByLine(job.getElementsByAttributeValue(jobSite.getDateData()[0],
76+
jobSite.getDateData()[1]).text());
6577
}
6678

6779
protected LocalDateTime getDateByLine(String dateLine) {
@@ -73,4 +85,11 @@ protected LocalDateTime getDateByLine(String dateLine) {
7385
protected LocalTime getTime() {
7486
return LocalTime.now(ZoneId.of("Europe/Athens"));
7587
}
88+
89+
protected void check(Object o, String data) throws ParserException {
90+
if (o == null || o.toString().length() == 0) {
91+
LOGGER.error("Error getting {} from {}", data, jobSite.getSiteName());
92+
throw new ParserException("Error getting " + data + " from " + jobSite.getSiteName());
93+
}
94+
}
7695
}

src/main/java/com/olegshan/parser/siteparsers/JobsUaJobParser.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.olegshan.parser.siteparsers;
22

3+
import com.olegshan.exception.ParserException;
34
import com.olegshan.sites.JobSite;
45
import com.olegshan.tools.MonthsTools;
56
import org.jsoup.nodes.Document;
@@ -25,10 +26,13 @@ protected LocalDateTime getDateByLine(String dateLine) {
2526
}
2627

2728
@Override
28-
public String getCompany(Element job, String url) {
29+
public String getCompany(Element job, String url) throws ParserException {
2930
String[] companyData = jobSite.getCompanyData();
3031
Document jobDoc = getDoc(url);
3132
Elements companyBlock = jobDoc.getElementsByAttributeValue(companyData[0], companyData[1]);
32-
return companyBlock.get(0).getElementsByTag("a").first().text();
33+
34+
String company = companyBlock.get(0).getElementsByTag("a").first().text();
35+
check(company, "company");
36+
return company;
3337
}
3438
}

src/main/java/com/olegshan/parser/siteparsers/RabotaUaJobParser.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.olegshan.parser.siteparsers;
22

3+
import com.olegshan.exception.ParserException;
34
import com.olegshan.sites.JobSite;
45
import com.olegshan.tools.MonthsTools;
56
import org.jsoup.nodes.Document;
@@ -11,6 +12,7 @@
1112
import java.time.LocalDate;
1213
import java.time.LocalDateTime;
1314
import java.time.ZoneId;
15+
import java.util.regex.Pattern;
1416

1517
public class RabotaUaJobParser extends JobParser {
1618

@@ -21,14 +23,15 @@ public RabotaUaJobParser(JobSite jobSite) {
2123
}
2224

2325
@Override
24-
public Elements getJobBlocks(Document doc) {
26+
public Elements getJobBlocks(Document doc) throws ParserException {
2527
Elements jobBlocks = new Elements();
2628
for (int i = 1; i < jobSite.getJobBox().length; i++) {
2729
Elements jobElements = doc.getElementsByAttributeValue(jobSite.getJobBox()[0], jobSite.getJobBox()[i]);
2830
if (jobElements != null && !jobElements.isEmpty()) {
2931
jobBlocks.addAll(jobElements);
3032
}
3133
}
34+
check(jobBlocks, "job blocks");
3235
return jobBlocks;
3336
}
3437

@@ -42,7 +45,7 @@ public String getTitle(Elements titleBlock) {
4245
}
4346

4447
@Override
45-
public LocalDateTime getDate(Element job, String url, Elements titleBlock) {
48+
public LocalDateTime getDate(Element job, String url, Elements titleBlock) throws ParserException {
4649
/*
4750
* There are several problems here.
4851
* First: there are two types of date tags, used on rabota.ua on different pages: "d-date" and "datePosted".
@@ -64,19 +67,19 @@ public LocalDateTime getDate(Element job, String url, Elements titleBlock) {
6467
if (dateLine.length() == 0) {
6568
//no date at all, sometimes it happens
6669
LocalDateTime ldt = LocalDateTime.now(ZoneId.of("Europe/Athens"));
67-
LOGGER.debug("There was no date on Rabota.ua, return {}", ldt);
70+
LOGGER.warn("There was no date on Rabota.ua, return {}", ldt);
6871
return ldt;
6972
}
7073
}
71-
try {
72-
//for format dd.mm.yyyy
74+
75+
if (Pattern.matches("\\d{2}\\.\\d{2}\\.\\d{4}", dateLine)) {
7376
dateParts = dateLine.split("\\.");
7477
MonthsTools.removeZero(dateParts);
7578
year = Integer.parseInt(dateParts[2]);
7679
month = Integer.parseInt(dateParts[1]);
7780
day = Integer.parseInt(dateParts[0]);
7881

79-
} catch (ArrayIndexOutOfBoundsException e) {
82+
} else {
8083
//for format yyyy-mm-dd
8184
dateParts = dateLine.split("-");
8285
MonthsTools.removeZero(dateParts);

src/main/java/com/olegshan/parser/siteparsers/WorkUaJobParser.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.olegshan.parser.siteparsers;
22

3+
import com.olegshan.exception.ParserException;
34
import com.olegshan.sites.JobSite;
45
import org.jsoup.nodes.Document;
56
import org.jsoup.nodes.Element;
@@ -17,8 +18,10 @@ public WorkUaJobParser(JobSite jobSite) {
1718
}
1819

1920
@Override
20-
public Elements getJobBlocks(Document doc) {
21-
return doc.getElementsByAttributeValueStarting(jobSite.getJobBox()[0], jobSite.getJobBox()[1]);
21+
public Elements getJobBlocks(Document doc) throws ParserException {
22+
Elements jobBlocks = doc.getElementsByAttributeValueStarting(jobSite.getJobBox()[0], jobSite.getJobBox()[1]);
23+
check(jobBlocks, "job blocks");
24+
return jobBlocks;
2225
}
2326

2427
@Override
@@ -27,9 +30,10 @@ public Elements getTitleBlock(Element job) {
2730
}
2831

2932
@Override
30-
public LocalDateTime getDate(Element job, String url, Elements titleBlock) {
33+
public LocalDateTime getDate(Element job, String url, Elements titleBlock) throws ParserException {
3134
String dateLine = titleBlock.attr("title");
3235
String[] dateParts = dateLine.substring(dateLine.length() - 8).split(jobSite.getSplit());
36+
check(dateParts, "date parts");
3337

3438
int year = parseInt(dateParts[2]) + 2000;
3539
int month = parseInt(dateParts[1]);
@@ -39,10 +43,11 @@ public LocalDateTime getDate(Element job, String url, Elements titleBlock) {
3943
}
4044

4145
@Override
42-
public String getCompany(Element job, String url) {
46+
public String getCompany(Element job, String url) throws ParserException {
4347
String[] companyData = jobSite.getCompanyData();
4448
Document jobDoc = getDoc(url);
4549
Elements companyBlock = jobDoc.getElementsByAttributeValue(companyData[0], companyData[1]);
50+
check(companyBlock, "company block");
4651
return companyBlock.get(0).getElementsByTag("a").text();
4752
}
4853
}

0 commit comments

Comments
 (0)