diff --git a/README.md b/README.md index 1b09987..47dee1c 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,3 @@ This app parses Java developer vacancies in Kyiv, Ukraine on four main Ukrainian Thanks to @Antrakos for help with implementation of Strategy pattern and common improvements. Please run it locally with following VM-option: `-Dspring.profiles.active="dev"` and set Maven profile in your IDE to `dev`. - -Live: http://www.jparser.info - -Twitter: https://twitter.com/jParser_info diff --git a/pom.xml b/pom.xml index b20568a..d8d8ce1 100644 --- a/pom.xml +++ b/pom.xml @@ -14,7 +14,7 @@ org.springframework.boot spring-boot-starter-parent - 1.4.1.RELEASE + 1.5.7.RELEASE @@ -103,6 +103,18 @@ 1.9.2 + + io.prometheus + simpleclient + 0.5.0 + + + + io.prometheus + simpleclient_common + 0.5.0 + + diff --git a/src/main/java/com/olegshan/controllers/ParseController.java b/src/main/java/com/olegshan/controllers/ParseController.java index e39c01d..d3f7cdc 100644 --- a/src/main/java/com/olegshan/controllers/ParseController.java +++ b/src/main/java/com/olegshan/controllers/ParseController.java @@ -2,10 +2,13 @@ import com.olegshan.entity.Job; import com.olegshan.service.JobService; -import com.olegshan.tools.PageBox; +import com.olegshan.util.PageBox; +import io.prometheus.client.CollectorRegistry; +import io.prometheus.client.exporter.common.TextFormat; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.domain.Page; import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; import org.springframework.data.domain.Sort; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.RequestMapping; @@ -13,11 +16,14 @@ import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.servlet.ModelAndView; +import java.io.IOException; +import java.io.Writer; + @Controller public class ParseController { - private static final int PAGE_SIZE = 40; - private JobService jobService; + private static final int PAGE_SIZE = 40; + private JobService jobService; @Autowired public ParseController(JobService jobService) { @@ -30,7 +36,8 @@ public ModelAndView showJobs(@RequestParam(value = "page", required = false) Int ModelAndView modelAndView = new ModelAndView("index"); int currentPageNumber = (page == null || page < 1) ? 0 : page - 1; - Page jobs = jobService.getJobs(new PageRequest(currentPageNumber, PAGE_SIZE, Sort.Direction.DESC, "date")); + Pageable request = new PageRequest(currentPageNumber, PAGE_SIZE, Sort.Direction.DESC, "date"); + Page jobs = jobService.getJobs(request); PageBox pageBox = new PageBox(jobs.getTotalPages(), jobs.getNumber()); modelAndView.addObject("jobs", jobs); @@ -43,4 +50,10 @@ public ModelAndView showJobs(@RequestParam(value = "page", required = false) Int public String about() { return "about"; } + + @RequestMapping(path = "/metrics") + public void metrics(Writer responseWriter) throws IOException { + TextFormat.write004(responseWriter, CollectorRegistry.defaultRegistry.metricFamilySamples()); + responseWriter.close(); + } } \ No newline at end of file diff --git a/src/main/java/com/olegshan/entity/Job.java b/src/main/java/com/olegshan/entity/Job.java index 122b9c2..fcb2d24 100644 --- a/src/main/java/com/olegshan/entity/Job.java +++ b/src/main/java/com/olegshan/entity/Job.java @@ -14,15 +14,15 @@ public class Job { @Id - private String url; - private String title; + private String url; + private String title; // Max value for PostgreSQL @Column(length = 10485760) - private String description; - private String company; - private String source; + private String description; + private String company; + private String source; private LocalDateTime date; - private String dateToDisplay; + private String dateToDisplay; public Job() { } diff --git a/src/main/java/com/olegshan/exception/ParserException.java b/src/main/java/com/olegshan/exception/ParserException.java index 4183a10..cf0293c 100644 --- a/src/main/java/com/olegshan/exception/ParserException.java +++ b/src/main/java/com/olegshan/exception/ParserException.java @@ -1,6 +1,6 @@ package com.olegshan.exception; -public class ParserException extends Exception{ +public class ParserException extends Exception { public ParserException(String message) { super(message); diff --git a/src/main/java/com/olegshan/notifier/impl/NotifierImpl.java b/src/main/java/com/olegshan/notifier/impl/NotifierImpl.java index d50432a..19f3ef5 100644 --- a/src/main/java/com/olegshan/notifier/impl/NotifierImpl.java +++ b/src/main/java/com/olegshan/notifier/impl/NotifierImpl.java @@ -12,10 +12,8 @@ @Service public class NotifierImpl implements Notifier { - private static final Logger LOGGER = LoggerFactory.getLogger(NotifierImpl.class); - - @Value("${spring.mail.username}") - private String recipient; + @Value("${mail.recipient}") + private String recipient; private MailSender mailSender; @Autowired @@ -31,6 +29,8 @@ public void notifyAdmin(String issue) { message.setText(issue + "\n\nhttp://www.jparser.info"); mailSender.send(message); - LOGGER.info("Admin was notified about following issue: " + issue + "\n"); + log.info("Admin was notified about following issue: " + issue + "\n"); } + + private static final Logger log = LoggerFactory.getLogger(NotifierImpl.class); } \ No newline at end of file diff --git a/src/main/java/com/olegshan/parser/Performer.java b/src/main/java/com/olegshan/parser/Performer.java index 6131055..3a8fc5a 100644 --- a/src/main/java/com/olegshan/parser/Performer.java +++ b/src/main/java/com/olegshan/parser/Performer.java @@ -1,17 +1,26 @@ package com.olegshan.parser; import com.olegshan.sites.JobSite; +import io.prometheus.client.Gauge; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Component; import java.util.List; +import static com.olegshan.util.TimeUtil.LOCAL_TIME_ZONE; + @Component public class Performer { private List sites; - private Parser parser; + private Parser parser; + private boolean isParsingRunning; + + private static final Gauge lastRun = Gauge.build() + .name("last_run") + .help("Last run.") + .register(); @Autowired public Performer(List sites, Parser parser) { @@ -19,9 +28,15 @@ public Performer(List sites, Parser parser) { this.parser = parser; } - @Scheduled(cron = "0 1 7-23 * * *", zone = "Europe/Athens") + @Scheduled(cron = "0 1 7-23 * * *", zone = LOCAL_TIME_ZONE) public void perform() { - for (JobSite jobSite : sites) + if (isParsingRunning) + return; + isParsingRunning = true; + for (JobSite jobSite : sites) { parser.parse(jobSite); + } + isParsingRunning = false; + lastRun.setToCurrentTime(); } } diff --git a/src/main/java/com/olegshan/parser/impl/ParserImpl.java b/src/main/java/com/olegshan/parser/impl/ParserImpl.java index 398cd57..9480f9d 100644 --- a/src/main/java/com/olegshan/parser/impl/ParserImpl.java +++ b/src/main/java/com/olegshan/parser/impl/ParserImpl.java @@ -1,11 +1,11 @@ package com.olegshan.parser.impl; import com.olegshan.entity.Job; -import com.olegshan.exception.ParserException; import com.olegshan.notifier.Notifier; import com.olegshan.parser.Parser; import com.olegshan.parser.siteparsers.JobParser; import com.olegshan.service.JobService; +import com.olegshan.service.StatisticsService; import com.olegshan.sites.JobSite; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -17,43 +17,55 @@ import java.time.LocalDateTime; +import static java.time.temporal.ChronoUnit.MINUTES; + @Component public class ParserImpl implements Parser { - private static final Logger LOGGER = LoggerFactory.getLogger(ParserImpl.class); - - private JobService jobService; - private Notifier notifier; + private JobService jobService; + private Notifier notifier; + private StatisticsService statisticsService; @Autowired - public ParserImpl(JobService jobService, Notifier notifier) { + public ParserImpl(JobService jobService, Notifier notifier, StatisticsService statisticsService) { this.jobService = jobService; this.notifier = notifier; + this.statisticsService = statisticsService; } public void parse(JobSite jobSite) { JobParser jobParser = jobSite.getParser(); + String url = ""; try { - Document doc = jobParser.getDoc(jobSite.getSiteUrl()); - Elements jobBlocks = jobParser.getJobBlocks(doc); + Document doc = jobParser.getDoc(jobSite.url()); - for (Element job : jobBlocks) { + for (Element job : jobParser.getJobBlocks(doc)) { Elements titleBlock = jobParser.getTitleBlock(job); - String url = jobParser.getUrl(titleBlock); + url = jobParser.getUrl(titleBlock); + LocalDateTime date = jobParser.getDate(job, url).truncatedTo(MINUTES); + if (isJobTooOld(date)) continue; + String title = jobParser.getTitle(titleBlock); String description = jobParser.getDescription(job, url); String company = jobParser.getCompany(job, url); - LocalDateTime date = jobParser.getDate(job, url); - Job parsedJob = new Job(title, description, company, jobSite.getSiteName(), url, date); + Job parsedJob = new Job(title, description, company, jobSite.name(), url, date); jobService.save(parsedJob); } - LOGGER.info("Parsing of {} completed\n", jobSite.getSiteName()); - } catch (ParserException e) { - notifier.notifyAdmin(e.getMessage()); + + statisticsService.saveStatistics(jobSite.name()); + } catch (Exception e) { + log.error("Error while parsing", e); + notifier.notifyAdmin("Error while parsing " + url + "\nError message is: " + e.getMessage()); } } + + private boolean isJobTooOld(LocalDateTime date) { + return LocalDateTime.now().minusMonths(2).isAfter(date); + } + + private static final Logger log = LoggerFactory.getLogger(ParserImpl.class); } \ No newline at end of file diff --git a/src/main/java/com/olegshan/parser/siteparsers/DouUaJobParser.java b/src/main/java/com/olegshan/parser/siteparsers/DouUaJobParser.java index a9c4950..80c307e 100644 --- a/src/main/java/com/olegshan/parser/siteparsers/DouUaJobParser.java +++ b/src/main/java/com/olegshan/parser/siteparsers/DouUaJobParser.java @@ -2,10 +2,9 @@ import com.olegshan.exception.ParserException; import com.olegshan.sites.JobSite; -import com.olegshan.tools.MonthsTools; +import com.olegshan.util.TimeUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; import java.time.LocalDate; import java.time.LocalDateTime; @@ -23,15 +22,13 @@ public LocalDateTime getDate(Element job, String url) throws ParserException { Document dateDoc = getDoc(url); - String dateLine = dateDoc.getElementsByAttributeValue( - jobSite.getDateData()[0], - jobSite.getDateData()[1]).text(); + String dateLine = getElements(dateDoc, jobSite.date()).text(); check(dateLine, "date line", url); - String[] dateParts = dateLine.split(jobSite.getSplit()); - MonthsTools.removeZero(dateParts); + String[] dateParts = dateLine.split(jobSite.split()); + TimeUtil.removeZero(dateParts); int year = parseInt(dateParts[2]); - int month = MonthsTools.MONTHS.get(dateParts[1].toLowerCase()); + int month = TimeUtil.MONTHS.get(dateParts[1].toLowerCase()); int day = parseInt(dateParts[0]); return LocalDate.of(year, month, day).atTime(getTime()); diff --git a/src/main/java/com/olegshan/parser/siteparsers/HeadHunterUaJobParser.java b/src/main/java/com/olegshan/parser/siteparsers/HeadHunterUaJobParser.java index 6b00e99..e69f562 100644 --- a/src/main/java/com/olegshan/parser/siteparsers/HeadHunterUaJobParser.java +++ b/src/main/java/com/olegshan/parser/siteparsers/HeadHunterUaJobParser.java @@ -1,11 +1,13 @@ package com.olegshan.parser.siteparsers; +import com.olegshan.exception.ParserException; import com.olegshan.sites.JobSite; -import com.olegshan.tools.MonthsTools; +import com.olegshan.util.TimeUtil; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; import java.time.LocalDate; import java.time.LocalDateTime; -import java.time.ZoneId; import static java.lang.Integer.parseInt; @@ -17,13 +19,19 @@ public HeadHunterUaJobParser(JobSite jobSite) { @Override protected LocalDateTime getDateByLine(String dateLine) { - String[] dateParts = dateLine.split(jobSite.getSplit()); - MonthsTools.removeZero(dateParts); + String[] dateParts = dateLine.split(jobSite.split()); + TimeUtil.removeZero(dateParts); int day = parseInt(dateParts[0]); - int month = MonthsTools.MONTHS.get(dateParts[1].toLowerCase()); + int month = TimeUtil.MONTHS.get(dateParts[1].toLowerCase()); int year = getYear(month); return LocalDate.of(year, month, day).atTime(getTime()); } + + @Override + public String getCompany(Element job, String url) throws ParserException { + Document innerJob = getDoc(url); + return super.getCompany(innerJob, url); + } } diff --git a/src/main/java/com/olegshan/parser/siteparsers/JobParser.java b/src/main/java/com/olegshan/parser/siteparsers/JobParser.java index 6666b1a..b4cc052 100644 --- a/src/main/java/com/olegshan/parser/siteparsers/JobParser.java +++ b/src/main/java/com/olegshan/parser/siteparsers/JobParser.java @@ -3,7 +3,7 @@ import com.olegshan.exception.ParserException; import com.olegshan.parser.Parser; import com.olegshan.sites.JobSite; -import com.olegshan.tools.MonthsTools; +import com.olegshan.util.TimeUtil; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -15,44 +15,41 @@ import java.time.LocalDate; import java.time.LocalDateTime; import java.time.LocalTime; -import java.time.ZoneId; +import static com.olegshan.util.TimeUtil.localTimeZone; import static java.lang.Integer.parseInt; public class JobParser { - private static final Logger LOGGER = LoggerFactory.getLogger(Parser.class); + public static final String NBSP = "\u00a0"; - protected JobSite jobSite; + JobSite jobSite; public JobParser(JobSite jobSite) { this.jobSite = jobSite; } public Document getDoc(String siteUrl) throws ParserException { - Document doc; try { - doc = Jsoup.connect(siteUrl).userAgent("Mozilla").timeout(0).get(); + return Jsoup.connect(siteUrl).userAgent("Mozilla").timeout(0).get(); } catch (IOException e) { - LOGGER.error("Connecting to {} failed", siteUrl); throw new ParserException("Failed connecting to " + siteUrl + "\n" + e.getMessage()); } - return doc; } public String getUrl(Elements titleBlock) { - return jobSite.getUrlPrefix() + titleBlock.attr("href"); + return jobSite.urlPrefix() + titleBlock.attr("href"); } public Elements getJobBlocks(Document doc) throws ParserException { - Elements jobBlocks = doc.getElementsByAttributeValue(jobSite.getJobBox()[0], jobSite.getJobBox()[1]); - check(jobBlocks, "job blocks", null); + Elements jobBlocks = getElements(doc, jobSite.jobBox()); + check(jobBlocks, "job blocks"); return jobBlocks; } public Elements getTitleBlock(Element job) throws ParserException { - Elements titleBlock = job.getElementsByAttributeValue(jobSite.getTitleBox()[0], jobSite.getTitleBox()[1]); - check(titleBlock, "title blocks", null); + Elements titleBlock = getElements(job, jobSite.titleBox()); + check(titleBlock, "title blocks"); return titleBlock; } @@ -60,51 +57,64 @@ public String getTitle(Elements titleBlock) { return titleBlock.text(); } - public String getDescription(Element job, String url) throws ParserException { - String[] descriptionData = jobSite.getDescriptionData(); - return job.getElementsByAttributeValue(descriptionData[0], descriptionData[1]).text(); + public String getDescription(Element job, String url) { + return getElements(job, jobSite.description()).text(); } public String getCompany(Element job, String url) throws ParserException { - String company = job.getElementsByAttributeValue(jobSite.getCompanyData()[0], jobSite.getCompanyData()[1]).text(); + String company = removeNbsp(getElements(job, jobSite.company()).text()); check(company, "company", url); return company; } - public LocalDateTime getDate(Element job, String url) throws ParserException { - String dateLine = job.getElementsByAttributeValue(jobSite.getDateData()[0], - jobSite.getDateData()[1]).text(); + public LocalDateTime getDate(Element job, String url) throws Exception { + String dateLine = getElements(job, jobSite.date()).text(); check(dateLine, "date", url); - return getDateByLine(job.getElementsByAttributeValue(jobSite.getDateData()[0], - jobSite.getDateData()[1]).text()); + return getDateByLine(dateLine); } protected LocalDateTime getDateByLine(String dateLine) { - String[] dateParts = dateLine.split(jobSite.getSplit()); - MonthsTools.removeZero(dateParts); + String[] dateParts = dateLine.split(jobSite.split()); + TimeUtil.removeZero(dateParts); return LocalDate.of(parseInt(dateParts[2]), parseInt(dateParts[1]), parseInt(dateParts[0])).atTime(getTime()); } protected LocalTime getTime() { - return LocalTime.now(ZoneId.of("Europe/Athens")); + return LocalTime.now(localTimeZone()); } //in case we parse in January jobs of last December. Needed for jobs.ua and hh.ua - protected int getYear(int month) { - int year; - if (month > LocalDate.now(ZoneId.of("Europe/Athens")).getMonthValue()) { - year = LocalDate.now().getYear() - 1; - } else { - year = LocalDate.now(ZoneId.of("Europe/Athens")).getYear(); - } - return year; + int getYear(int month) { + if (month > LocalDate.now(localTimeZone()).getMonthValue()) + return LocalDate.now().getYear() - 1; + return LocalDate.now(localTimeZone()).getYear(); + } + + Elements getElements(Element element, JobSite.Holder holder) { + return getElements(element, holder, false); + } + + Elements getElements(Element element, JobSite.Holder holder, boolean starting) { + if (starting) + return element.getElementsByAttributeValueStarting(holder.key, holder.value); + return element.getElementsByAttributeValue(holder.key, holder.value); } - protected void check(Object o, String data, String url) throws ParserException { + String removeNbsp(String text) { + return text.replaceAll(NBSP, ""); + } + + void check(Object o, String data) throws ParserException { + check(o, data, null); + } + + void check(Object o, String data, String url) throws ParserException { String jobUrl = url == null ? "" : url; - if (o == null || o.toString().length() == 0) { - LOGGER.error("Error getting {} from {}, {}", data, jobSite.getSiteName(), jobUrl); - throw new ParserException("Error getting " + data + " from " + jobSite.getSiteName() + "\n" + jobUrl); + if (o == null || o.toString().trim().length() == 0) { + log.error("Error getting {} from {}, {}", data, jobSite.name(), jobUrl); + throw new ParserException("Error getting " + data + " from " + jobSite.name() + "\n" + jobUrl); } } + + private static final Logger log = LoggerFactory.getLogger(Parser.class); } diff --git a/src/main/java/com/olegshan/parser/siteparsers/JobsUaJobParser.java b/src/main/java/com/olegshan/parser/siteparsers/JobsUaJobParser.java index 70a6e57..dd09789 100644 --- a/src/main/java/com/olegshan/parser/siteparsers/JobsUaJobParser.java +++ b/src/main/java/com/olegshan/parser/siteparsers/JobsUaJobParser.java @@ -2,7 +2,8 @@ import com.olegshan.exception.ParserException; import com.olegshan.sites.JobSite; -import com.olegshan.tools.MonthsTools; +import com.olegshan.sites.JobSite.Holder; +import com.olegshan.util.TimeUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -20,44 +21,55 @@ public JobsUaJobParser(JobSite jobSite) { @Override public Elements getJobBlocks(Document doc) throws ParserException { - Elements jobBlocks = doc.getElementsByAttributeValue(jobSite.getJobBox()[0], jobSite.getJobBox()[1]); - check(jobBlocks, "job blocks", null); + Elements jobBlocks = getElements(doc, jobSite.jobBox()); + check(jobBlocks, "job blocks"); + removeAd(jobBlocks); + + return jobBlocks; + } + + private void removeAd(Elements jobBlocks) { // ad block on jobs.ua has the same tags as the job blocks, so it should be removed for (int i = 0; i < jobBlocks.size(); i++) { - if (jobBlocks.get(i).getElementsByAttributeValue("class", "b-city__title b-city__companies-title") - .text().contains("VIP компании в Украине:")) { + + String jobBlock = getElements( + jobBlocks.get(i), + Holder.of("class", "b-city__title b-city__companies-title"), + true + ) + .text(); + + if (jobBlock.contains("VIP компании в Украине:")) jobBlocks.remove(i); - } } - return jobBlocks; } @Override - public String getDescription(Element job, String url) throws ParserException { - String[] descriptionData = jobSite.getDescriptionData(); - Document descDoc = getDoc(url); - String description = descDoc.getElementsByAttributeValue(descriptionData[0], descriptionData[1]).text(); - return description.length() > 250 ? description.substring(0, 250) + ("...") : description; + public LocalDateTime getDate(Element job, String url) throws ParserException { + Document dateDoc = getDoc(url); + String dateLine = getElements(dateDoc, jobSite.date()).text(); + + check(dateLine, "date line", url); + return getDateByLine(dateLine); } @Override protected LocalDateTime getDateByLine(String dateLine) { - dateLine = dateLine.replaceAll("\u00a0", "").trim(); - String[] dateParts = dateLine.trim().split(jobSite.getSplit()); - MonthsTools.removeZero(dateParts); + dateLine = dateLine.substring(dateLine.indexOf(NBSP) + 1, dateLine.lastIndexOf(NBSP)).trim(); + String[] dateParts = dateLine.split(jobSite.split()); + TimeUtil.removeZero(dateParts); int day = parseInt(dateParts[0]); - int month = MonthsTools.MONTHS.get(dateParts[1].toLowerCase()); - int year = getYear(month); + int month = TimeUtil.MONTHS.get(dateParts[1].toLowerCase()); + int year = dateParts.length > 2 ? Integer.parseInt(dateParts[2]) : getYear(month); return LocalDate.of(year, month, day).atTime(getTime()); } @Override public String getCompany(Element job, String url) throws ParserException { - String company = job.getElementsByAttributeValue(jobSite.getCompanyData()[0], jobSite.getCompanyData()[1]) - .first().text(); + String company = removeNbsp(getElements(job, jobSite.company()).first().text()); check(company, "company", url); return company; } diff --git a/src/main/java/com/olegshan/parser/siteparsers/RabotaUaJobParser.java b/src/main/java/com/olegshan/parser/siteparsers/RabotaUaJobParser.java index 0ab3026..a09ab11 100644 --- a/src/main/java/com/olegshan/parser/siteparsers/RabotaUaJobParser.java +++ b/src/main/java/com/olegshan/parser/siteparsers/RabotaUaJobParser.java @@ -1,22 +1,21 @@ package com.olegshan.parser.siteparsers; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import com.olegshan.exception.ParserException; import com.olegshan.sites.JobSite; -import com.olegshan.tools.MonthsTools; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.util.StringUtils; -import java.time.LocalDate; import java.time.LocalDateTime; -import java.time.ZoneId; -import java.util.regex.Pattern; -public class RabotaUaJobParser extends JobParser { +import static com.olegshan.util.TimeUtil.localTimeZone; - private static final Logger LOGGER = LoggerFactory.getLogger(RabotaUaJobParser.class); +public class RabotaUaJobParser extends JobParser { public RabotaUaJobParser(JobSite jobSite) { super(jobSite); @@ -24,83 +23,57 @@ public RabotaUaJobParser(JobSite jobSite) { @Override public String getUrl(Elements titleBlock) { - return jobSite.getUrlPrefix() + titleBlock - .get(0) - .getElementsByTag("a") - .attr("href"); + return jobSite.urlPrefix() + titleBlock + .get(0) + .getElementsByTag("a") + .attr("href"); } + @Override public Elements getTitleBlock(Element job) throws ParserException { - Elements titleBlock = job.getElementsByAttributeValueStarting(jobSite.getTitleBox()[0], jobSite.getTitleBox()[1]); - check(titleBlock, "title blocks", null); + Elements titleBlock = getElements(job, jobSite.titleBox(), true); + check(titleBlock, "title blocks"); return titleBlock; } @Override - public String getDescription(Element job, String url) throws ParserException { - String[] descriptionData = jobSite.getDescriptionData(); - return job.getElementsByAttributeValueStarting(descriptionData[0], descriptionData[1]).text(); + public String getDescription(Element job, String url) { + return getElements(job, jobSite.description(), true).text(); } @Override - public String getCompany(Element job, String url) throws ParserException { - String company = job.getElementsByAttributeValueStarting(jobSite.getCompanyData()[0], jobSite.getCompanyData()[1]).text(); - if (company.length() == 0) { - company = "Анонимный работодатель"; - } + public String getCompany(Element job, String url) { + String company = removeNbsp(getElements(job, jobSite.company(), true).text()); + if (company.length() == 0) + company = "Anonymous employer"; return company; } - public String getDescription(Element job) { - String[] descriptionData = jobSite.getDescriptionData(); - return job.getElementsByAttributeValueStarting(descriptionData[0], descriptionData[1]).text(); - } - @Override - public LocalDateTime getDate(Element job, String url) throws ParserException { - /* - * There are several problems here. - * First: there are different types of date tags, used on rabota.ua on different pages - * Second: sometimes date format is dd.mm.yyyy and sometimes — yyyy-mm-dd. - * Third: sometimes there is no date at all. - */ + public LocalDateTime getDate(Element job, String url) throws Exception { Document dateDoc = getDoc(url); - String dateLine; - String[] dateParts; - int year; - int month; - int day; - - Elements dateElements = dateDoc.getElementsByAttributeValue("id", "d-date"); - if (!dateElements.isEmpty()) { - dateLine = dateElements.get(0).getElementsByAttributeValue("class", "d-ph-value").text(); - } else { - dateLine = dateDoc.getElementsByAttributeValue("itemprop", "datePosted").text(); - if (dateLine.length() == 0) { - dateLine = dateDoc.getElementsByAttributeValueStarting("class", "f-date-holder").text(); - } - if (dateLine.length() == 0) { - //no date at all, sometimes it happens - LocalDateTime ldt = LocalDateTime.now(ZoneId.of("Europe/Athens")); - LOGGER.warn("There was no date on Rabota.ua, return {}", ldt); - return ldt; - } + + Elements scriptElements = dateDoc.getElementsByTag("script"); + + String varScript = null; + + for (Element scriptElement : scriptElements) { + if (scriptElement.data().contains("var ruavars")) + varScript = scriptElement.data(); } - if (Pattern.matches("\\d{2}\\.\\d{2}\\.\\d{4}", dateLine)) { - dateParts = dateLine.split("\\."); - MonthsTools.removeZero(dateParts); - year = Integer.parseInt(dateParts[2]); - month = Integer.parseInt(dateParts[1]); - day = Integer.parseInt(dateParts[0]); - } else { - //for format yyyy-mm-dd - dateParts = dateLine.split("-"); - MonthsTools.removeZero(dateParts); - year = Integer.parseInt(dateParts[0]); - month = Integer.parseInt(dateParts[1]); - day = Integer.parseInt(dateParts[2]); + if (StringUtils.isEmpty(varScript)) { + LocalDateTime ldt = LocalDateTime.now(localTimeZone()); + log.warn("There was no date for job {}, return current date {}", url, ldt); + return ldt; } - return LocalDate.of(year, month, day).atTime(getTime()); + + String json = varScript.substring(varScript.indexOf("{"), varScript.lastIndexOf("}") + 1); + JsonNode jsonNode = new ObjectMapper().readTree(json); + String vacancyDate = jsonNode.get("vacancy_VacancyDate").toString().replaceAll("\\\"", ""); + + return LocalDateTime.parse(vacancyDate); } + + private static final Logger log = LoggerFactory.getLogger(RabotaUaJobParser.class); } diff --git a/src/main/java/com/olegshan/parser/siteparsers/WorkUaJobParser.java b/src/main/java/com/olegshan/parser/siteparsers/WorkUaJobParser.java index fd5d2dd..cc3ee8b 100644 --- a/src/main/java/com/olegshan/parser/siteparsers/WorkUaJobParser.java +++ b/src/main/java/com/olegshan/parser/siteparsers/WorkUaJobParser.java @@ -2,6 +2,7 @@ import com.olegshan.exception.ParserException; import com.olegshan.sites.JobSite; +import com.olegshan.util.TimeUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -19,8 +20,8 @@ public WorkUaJobParser(JobSite jobSite) { @Override public Elements getJobBlocks(Document doc) throws ParserException { - Elements jobBlocks = doc.getElementsByAttributeValueStarting(jobSite.getJobBox()[0], jobSite.getJobBox()[1]); - check(jobBlocks, "job blocks", null); + Elements jobBlocks = getElements(doc, jobSite.jobBox(), true); + check(jobBlocks, "job blocks"); return jobBlocks; } @@ -29,14 +30,24 @@ public Elements getTitleBlock(Element job) { return job.getElementsByTag("a"); } + @Override + public String getTitle(Elements titleBlock) { + return titleBlock.first().text(); + } + + @Override + public String getDescription(Element job, String url) { + return getElements(job, jobSite.description(), true).text(); + } + @Override public LocalDateTime getDate(Element job, String url) throws ParserException { - String dateLine = getTitleBlock(job).attr("title"); - String[] dateParts = dateLine.substring(dateLine.length() - 8).split(jobSite.getSplit()); + String title = getTitleBlock(job).attr("title"); + String[] dateParts = title.substring(title.indexOf("вакансія від ") + "вакансія від ".length()).split(jobSite.split()); check(dateParts, "date parts", url); - int year = parseInt(dateParts[2]) + 2000; - int month = parseInt(dateParts[1]); + int year = parseInt(dateParts[2]); + int month = TimeUtil.MONTHS.get(dateParts[1]); int day = parseInt(dateParts[0]); return LocalDate.of(year, month, day).atTime(getTime()); @@ -44,10 +55,9 @@ public LocalDateTime getDate(Element job, String url) throws ParserException { @Override public String getCompany(Element job, String url) throws ParserException { - String[] companyData = jobSite.getCompanyData(); - Document jobDoc = getDoc(url); - Elements companyBlock = jobDoc.getElementsByAttributeValue(companyData[0], companyData[1]); - check(companyBlock, "company block", url); - return companyBlock.get(0).getElementsByTag("a").text(); + Elements company = job.getElementsByTag("b"); + check(company, "company", url); + + return (company != null && !company.isEmpty()) ? removeNbsp(company.get(0).text()) : "Anonymous company"; } } diff --git a/src/main/java/com/olegshan/service/JobService.java b/src/main/java/com/olegshan/service/JobService.java index 878d8e3..ad82b68 100644 --- a/src/main/java/com/olegshan/service/JobService.java +++ b/src/main/java/com/olegshan/service/JobService.java @@ -2,11 +2,11 @@ import com.olegshan.entity.Job; import org.springframework.data.domain.Page; -import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; public interface JobService { void save(Job job); - Page getJobs(PageRequest request); + Page getJobs(Pageable request); } diff --git a/src/main/java/com/olegshan/service/StatisticsService.java b/src/main/java/com/olegshan/service/StatisticsService.java new file mode 100644 index 0000000..c4e0804 --- /dev/null +++ b/src/main/java/com/olegshan/service/StatisticsService.java @@ -0,0 +1,10 @@ +package com.olegshan.service; + +import com.olegshan.entity.Job; + +public interface StatisticsService { + + void saveStatistics(String siteName); + + void updateStatistics(Job job, boolean isNew); +} diff --git a/src/main/java/com/olegshan/service/impl/JobServiceImpl.java b/src/main/java/com/olegshan/service/impl/JobServiceImpl.java index 7f2be7f..1b0ce8b 100644 --- a/src/main/java/com/olegshan/service/impl/JobServiceImpl.java +++ b/src/main/java/com/olegshan/service/impl/JobServiceImpl.java @@ -4,12 +4,13 @@ import com.olegshan.notifier.Notifier; import com.olegshan.repository.JobRepository; import com.olegshan.service.JobService; +import com.olegshan.service.StatisticsService; import com.olegshan.social.JTwitter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.domain.Page; -import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; import org.springframework.stereotype.Service; import java.time.LocalDate; @@ -17,45 +18,54 @@ @Service public class JobServiceImpl implements JobService { - private static final Logger LOGGER = LoggerFactory.getLogger(JobServiceImpl.class); - - private JobRepository jobRepository; - private JTwitter twitter; - private Notifier notifier; + private JobRepository jobRepository; + private StatisticsService statisticsService; + private JTwitter twitter; + private Notifier notifier; @Autowired - public JobServiceImpl(JobRepository jobRepository, JTwitter twitter, Notifier notifier) { + public JobServiceImpl( + JobRepository jobRepository, + StatisticsService statisticsService, + JTwitter twitter, + Notifier notifier + ) { this.jobRepository = jobRepository; + this.statisticsService = statisticsService; this.twitter = twitter; this.notifier = notifier; } public void save(Job job) { - if (jobExists(job)) { - update(job); + if (jobRepository.exists(job.getUrl())) { + updateIfNeeded(job); } else { - saveJob(job); - twitter.tweet(job); - LOGGER.info("New job '{}' on {} found", job.getTitle(), job.getSource()); + saveAndTweet(job); + updateStatistics(job, true); + log.info("New job '{}' on {} found", job.getTitle(), job.getSource()); } } - private boolean jobExists(Job job) { - return jobRepository.findOne(job.getUrl()) != null; - } - - private void update(Job job) { + private void updateIfNeeded(Job job) { Job jobFromDb = jobRepository.findOne(job.getUrl()); LocalDate jobFromDbDate = jobFromDb.getDate().toLocalDate(); LocalDate jobDate = job.getDate().toLocalDate(); if (!jobFromDbDate.equals(jobDate)) { - saveJob(job); - twitter.tweet(job); - LOGGER.info("Job '{}', {}, was updated", job.getTitle(), job.getUrl()); + saveAndTweet(job); + updateStatistics(job, false); } } - public Page getJobs(PageRequest request) { + private void saveAndTweet(Job job) { + saveJob(job); + twitter.tweet(job); + } + + private void updateStatistics(Job job, boolean isNew) { + statisticsService.updateStatistics(job, isNew); + } + + public Page getJobs(Pageable request) { return jobRepository.findAll(request); } @@ -63,9 +73,11 @@ private void saveJob(Job job) { try { jobRepository.save(job); } catch (Exception e) { - LOGGER.error("Error while saving job '{}', {} into database", job.getTitle(), job.getUrl()); + log.error("Error while saving job '{}', {} into database", job.getTitle(), job.getUrl(), e); notifier.notifyAdmin("Error while saving following job into database: '" + - job.getTitle() + "', " + job.getUrl() + "\n\n" + e.getMessage()); + job.getTitle() + "', " + job.getUrl() + "\n\n" + e.getMessage()); } } + + private static final Logger log = LoggerFactory.getLogger(JobServiceImpl.class); } diff --git a/src/main/java/com/olegshan/service/impl/StatisticsServiceImpl.java b/src/main/java/com/olegshan/service/impl/StatisticsServiceImpl.java new file mode 100644 index 0000000..03ab924 --- /dev/null +++ b/src/main/java/com/olegshan/service/impl/StatisticsServiceImpl.java @@ -0,0 +1,61 @@ +package com.olegshan.service.impl; + +import com.olegshan.entity.Job; +import com.olegshan.service.StatisticsService; +import io.prometheus.client.Counter; +import io.prometheus.client.Gauge; +import org.springframework.stereotype.Service; + +import java.util.concurrent.atomic.AtomicInteger; + +@Service +public class StatisticsServiceImpl implements StatisticsService { + + private final AtomicInteger newJobs = new AtomicInteger(); + private final AtomicInteger updatedJobs = new AtomicInteger(); + + private static final Gauge newJobsFoundPerRun = Gauge.build() + .name("new_jobs_per_run") + .help("New jobs per run.") + .labelNames("site_name") + .register(); + + private static final Gauge updatedJobsFoundPerRun = Gauge.build() + .name("updated_jobs_per_run") + .help("Updated jobs per run.") + .labelNames("site_name") + .register(); + + private static final Counter totalJobsCount = Counter.build() + .name("total_jobs_count") + .help("Total jobs count.") + .labelNames("site_name") + .register(); + + @Override + public void updateStatistics(Job job, boolean isNew) { + if (isNew) { + newJobs.incrementAndGet(); + totalJobsCount + .labels(job.getSource()) + .inc(); + } else { + updatedJobs.incrementAndGet(); + } + } + + @Override + public void saveStatistics(String siteName) { + + newJobsFoundPerRun + .labels(siteName) + .set(newJobs.get()); + + updatedJobsFoundPerRun + .labels(siteName) + .set(updatedJobs.get()); + + newJobs.set(0); + updatedJobs.set(0); + } +} \ No newline at end of file diff --git a/src/main/java/com/olegshan/sites/DouUa.java b/src/main/java/com/olegshan/sites/DouUa.java index 75327b0..941a733 100644 --- a/src/main/java/com/olegshan/sites/DouUa.java +++ b/src/main/java/com/olegshan/sites/DouUa.java @@ -4,54 +4,47 @@ import com.olegshan.parser.siteparsers.JobParser; import org.springframework.stereotype.Component; - @Component -public class DouUa implements JobSite { - - private static final String SITE_NAME = "Dou.ua"; - private static final String SITE_URL = "https://jobs.dou.ua/vacancies/?city=%D0%9A%D0%B8%D1%97%D0%B2&category=Java"; - private static final String URL_PREFIX = ""; - private static final String[] JOB_BOX = {"class", "vacancy"}; - private static final String[] TITLE_BOX = {"class", "vt"}; - private static final String[] COMPANY_DATA = {"class", "company"}; - private static final String[] DESCRIPTION_DATA = {"class", "sh-info"}; - private static final String[] DATE_DATA = {"class", "date"}; - private static final String SPLIT = " "; - - public String getSiteName() { - return SITE_NAME; - } +public class DouUa extends JobSite { - public String getSiteUrl() { - return SITE_URL; + @Override + public String name() { + return "Dou.ua"; } - public String getUrlPrefix() { - return URL_PREFIX; + @Override + public String url() { + return "https://jobs.dou.ua/vacancies/?city=%D0%9A%D0%B8%D1%97%D0%B2&category=Java"; } - public String[] getJobBox() { - return JOB_BOX; + @Override + public String split() { + return " "; } - public String[] getTitleBox() { - return TITLE_BOX; + @Override + public Holder jobBox() { + return Holder.of("class", "vacancy"); } - public String[] getCompanyData() { - return COMPANY_DATA; + @Override + public Holder titleBox() { + return Holder.of("class", "vt"); } - public String[] getDescriptionData() { - return DESCRIPTION_DATA; + @Override + public Holder company() { + return Holder.of("class", "company"); } - public String[] getDateData() { - return DATE_DATA; + @Override + public Holder description() { + return Holder.of("class", "sh-info"); } - public String getSplit() { - return SPLIT; + @Override + public Holder date() { + return Holder.of("class", "date"); } @Override diff --git a/src/main/java/com/olegshan/sites/HeadHunterUa.java b/src/main/java/com/olegshan/sites/HeadHunterUa.java index c6b7152..5287fa3 100644 --- a/src/main/java/com/olegshan/sites/HeadHunterUa.java +++ b/src/main/java/com/olegshan/sites/HeadHunterUa.java @@ -4,53 +4,49 @@ import com.olegshan.parser.siteparsers.JobParser; import org.springframework.stereotype.Component; -@Component -public class HeadHunterUa implements JobSite { - - private static final String SITE_NAME = "HeadHunter.ua"; - private static final String SITE_URL = "https://hh.ua/search/vacancy?text=java&area=115"; - private static final String URL_PREFIX = ""; - private static final String[] JOB_BOX = {"class", "search-result-description"}; - private static final String[] TITLE_BOX = {"data-qa", "vacancy-serp__vacancy-title"}; - private static final String[] COMPANY_DATA = {"data-qa", "vacancy-serp__vacancy-employer"}; - private static final String[] DESCRIPTION_DATA = {"data-qa", "vacancy-serp__vacancy_snippet_requirement"}; - private static final String[] DATE_DATA = {"data-qa", "vacancy-serp__vacancy-date"}; - private static final String SPLIT = "\u00a0"; +import static com.olegshan.parser.siteparsers.JobParser.NBSP; - public String getSiteName() { - return SITE_NAME; - } +@Component +public class HeadHunterUa extends JobSite { - public String getSiteUrl() { - return SITE_URL; + @Override + public String name() { + return "HeadHunter.ua"; } - public String getUrlPrefix() { - return URL_PREFIX; + @Override + public String url() { + return "https://hh.ua/search/vacancy?text=java&area=115"; } - public String[] getJobBox() { - return JOB_BOX; + @Override + public String split() { + return NBSP; } - public String[] getTitleBox() { - return TITLE_BOX; + @Override + public Holder jobBox() { + return Holder.of("data-qa", "vacancy-serp__vacancy"); } - public String[] getCompanyData() { - return COMPANY_DATA; + @Override + public Holder titleBox() { + return Holder.of("data-qa", "vacancy-serp__vacancy-title"); } - public String[] getDescriptionData() { - return DESCRIPTION_DATA; + @Override + public Holder company() { + return Holder.of("class", "vacancy-company-name-wrapper"); } - public String[] getDateData() { - return DATE_DATA; + @Override + public Holder description() { + return Holder.of("data-qa", "vacancy-serp__vacancy_snippet_requirement"); } - public String getSplit() { - return SPLIT; + @Override + public Holder date() { + return Holder.of("class", "vacancy-serp-item__publication-date"); } @Override diff --git a/src/main/java/com/olegshan/sites/JobSite.java b/src/main/java/com/olegshan/sites/JobSite.java index ac33a89..0c157b3 100644 --- a/src/main/java/com/olegshan/sites/JobSite.java +++ b/src/main/java/com/olegshan/sites/JobSite.java @@ -2,27 +2,58 @@ import com.olegshan.parser.siteparsers.JobParser; -public interface JobSite { +public abstract class JobSite { - String getSiteName(); + public abstract String name(); - String getSiteUrl(); + public abstract String url(); - String getUrlPrefix(); + public String urlPrefix() { + return ""; + } - String[] getJobBox(); + public String split() { + return ""; + } - String[] getTitleBox(); + public Holder jobBox() { + return Holder.empty(); + } - String[] getCompanyData(); + public Holder titleBox() { + return Holder.empty(); + } - String[] getDescriptionData(); + public Holder company() { + return Holder.empty(); + } - String[] getDateData(); + public Holder description() { + return Holder.empty(); + } - String getSplit(); + public Holder date() { + return Holder.empty(); + } - default JobParser getParser() { + public JobParser getParser() { return new JobParser(this); } + + public static class Holder { + public String key; + public String value; + + public static Holder of(String key, String value) { + Holder holder = new Holder(); + holder.key = key; + holder.value = value; + + return holder; + } + + public static Holder empty() { + return Holder.of("", ""); + } + } } diff --git a/src/main/java/com/olegshan/sites/JobsUa.java b/src/main/java/com/olegshan/sites/JobsUa.java deleted file mode 100644 index c5e9838..0000000 --- a/src/main/java/com/olegshan/sites/JobsUa.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.olegshan.sites; - -import com.olegshan.parser.siteparsers.JobParser; -import com.olegshan.parser.siteparsers.JobsUaJobParser; -import org.springframework.stereotype.Component; - -@Component -public class JobsUa implements JobSite { - - private static final String SITE_NAME = "Jobs.ua"; - private static final String SITE_URL = "https://jobs.ua/vacancy/kiev/rabota-java"; - private static final String URL_PREFIX = ""; - private static final String[] JOB_BOX = {"class", "b-vacancy__item js-item_list"}; - private static final String[] TITLE_BOX = {"class", "b-vacancy__top__title js-item_title"}; - private static final String[] COMPANY_DATA = {"class", "b-vacancy__tech__item"}; - private static final String[] DESCRIPTION_DATA = {"class", "b-vacancy-full__block b-text"}; - private static final String[] DATE_DATA = {"class", "b-vacancy__tech__item b-vacancy__tech__item-top"}; - private static final String SPLIT = " "; - - public String getSiteName() { - return SITE_NAME; - } - - public String getSiteUrl() { - return SITE_URL; - } - - public String getUrlPrefix() { - return URL_PREFIX; - } - - public String[] getJobBox() { - return JOB_BOX; - } - - public String[] getTitleBox() { - return TITLE_BOX; - } - - public String[] getCompanyData() { - return COMPANY_DATA; - } - - public String[] getDescriptionData() { - return DESCRIPTION_DATA; - } - - public String[] getDateData() { - return DATE_DATA; - } - - public String getSplit() { - return SPLIT; - } - - @Override - public JobParser getParser() { - return new JobsUaJobParser(this); - } -} diff --git a/src/main/java/com/olegshan/sites/RabotaUa.java b/src/main/java/com/olegshan/sites/RabotaUa.java index 7a795ee..5c9da29 100644 --- a/src/main/java/com/olegshan/sites/RabotaUa.java +++ b/src/main/java/com/olegshan/sites/RabotaUa.java @@ -5,52 +5,41 @@ import org.springframework.stereotype.Component; @Component -public class RabotaUa implements JobSite { +public class RabotaUa extends JobSite { - private static final String SITE_NAME = "Rabota.ua"; - private static final String SITE_URL = "https://rabota.ua/zapros/java/%D0%BA%D0%B8%D0%B5%D0%B2"; - private static final String URL_PREFIX = "http://rabota.ua"; - private static final String[] JOB_BOX = {"class", "f-vacancylist-vacancyblock"}; - private static final String[] TITLE_BOX = {"class", "fd-beefy-gunso"}; - private static final String[] COMPANY_DATA = {"class", "f-vacancylist-companyname"}; - private static final String[] DESCRIPTION_DATA = {"class", "f-vacancylist-shortdescr"}; - private static final String[] DATE_DATA = {"", ""}; - private static final String SPLIT = ""; - - public String getSiteName() { - return SITE_NAME; - } - - public String getSiteUrl() { - return SITE_URL; - } - - public String getUrlPrefix() { - return URL_PREFIX; + @Override + public String name() { + return "Rabota.ua"; } - public String[] getJobBox() { - return JOB_BOX; + @Override + public String url() { + return "https://rabota.ua/jobsearch/vacancy_list?regionId=1&keyWords=java"; } - public String[] getTitleBox() { - return TITLE_BOX; + @Override + public String urlPrefix() { + return "https://rabota.ua"; } - public String[] getCompanyData() { - return COMPANY_DATA; + @Override + public Holder jobBox() { + return Holder.of("class", "card-body"); } - public String[] getDescriptionData() { - return DESCRIPTION_DATA; + @Override + public Holder titleBox() { + return Holder.of("class", "card-title"); } - public String[] getDateData() { - return DATE_DATA; + @Override + public Holder company() { + return Holder.of("class", "company-profile-name"); } - public String getSplit() { - return SPLIT; + @Override + public Holder description() { + return Holder.of("class", "card-description"); } @Override diff --git a/src/main/java/com/olegshan/sites/WorkUa.java b/src/main/java/com/olegshan/sites/WorkUa.java index 9594251..7e106c3 100644 --- a/src/main/java/com/olegshan/sites/WorkUa.java +++ b/src/main/java/com/olegshan/sites/WorkUa.java @@ -5,52 +5,36 @@ import org.springframework.stereotype.Component; @Component -public class WorkUa implements JobSite { - - private static final String SITE_NAME = "Work.ua"; - private static final String SITE_URL = "https://www.work.ua/jobs-kyiv-java/"; - private static final String URL_PREFIX = "https://work.ua"; - private static final String[] JOB_BOX = {"class", "card card-hover card-visited job-link"}; - private static final String[] TITLE_BOX = {"", ""}; - private static final String[] COMPANY_DATA = {"class", "dl-horizontal"}; - private static final String[] DESCRIPTION_DATA = {"class", "text-muted overflow"}; - private static final String[] DATE_DATA = {"", ""}; - private static final String SPLIT = "\\."; - - public String getSiteName() { - return SITE_NAME; - } - - public String getSiteUrl() { - return SITE_URL; - } +public class WorkUa extends JobSite { - public String getUrlPrefix() { - return URL_PREFIX; - } - - public String[] getJobBox() { - return JOB_BOX; + @Override + public String name() { + return "Work.ua"; } - public String[] getTitleBox() { - return TITLE_BOX; + @Override + public String url() { + return "https://www.work.ua/jobs-kyiv-java/"; } - public String[] getCompanyData() { - return COMPANY_DATA; + @Override + public String urlPrefix() { + return "https://work.ua"; } - public String[] getDescriptionData() { - return DESCRIPTION_DATA; + @Override + public String split() { + return " "; } - public String[] getDateData() { - return DATE_DATA; + @Override + public Holder jobBox() { + return Holder.of("class", "card card-hover card-visited wordwrap job-link"); } - public String getSplit() { - return SPLIT; + @Override + public Holder description() { + return Holder.of("class", "overflow"); } @Override diff --git a/src/main/java/com/olegshan/social/JTwitter.java b/src/main/java/com/olegshan/social/JTwitter.java index 934ccdb..c0159c5 100644 --- a/src/main/java/com/olegshan/social/JTwitter.java +++ b/src/main/java/com/olegshan/social/JTwitter.java @@ -1,26 +1,57 @@ package com.olegshan.social; import com.olegshan.entity.Job; +import com.olegshan.notifier.Notifier; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.env.Environment; import org.springframework.social.twitter.api.Twitter; import org.springframework.social.twitter.api.impl.TwitterTemplate; import org.springframework.stereotype.Component; +import java.util.Arrays; + @Component public class JTwitter { - private static final String CONSUMER_KEY = System.getProperty("CKjP"); - private static final String CONSUMER_SECRET = System.getProperty("CSjP"); - private static final String ACCESS_TOKEN = System.getProperty("ATjP"); - private static final String ACCESS_TOKEN_SECRET = System.getProperty("ATSjP"); - - private Twitter twitter; + private Twitter twitter; + private Environment environment; + private Notifier notifier; - public JTwitter() { - twitter = new TwitterTemplate(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET); + @Autowired + public JTwitter(Environment environment, Notifier notifier) { + this.environment = environment; + this.notifier = notifier; + initTwitter(); } public void tweet(Job job) { - twitter.timelineOperations().updateStatus(job.getTitle() + " " + job.getUrl() - + " More jobs here: http://jparser.info"); + if (twitter == null) return; + + String tweet = String.format("%s %s More jobs here: http://jparser.info", job.getTitle(), job.getUrl()); + try { + twitter.timelineOperations().updateStatus(tweet); + } catch (Exception e) { + if (!"Status is a duplicate.".equals(e.getMessage())) + notifier.notifyAdmin( + "Error while twitting following tweet:\n " + tweet + + "\nException was:\n" + e.getMessage() + ); + } + } + + private void initTwitter() { + if (isDevEnv()) return; + + String consumerKey = System.getProperty("CKjP"); + String consumerSecret = System.getProperty("CSjP"); + String accessToken = System.getProperty("ATjP"); + String accessTokenSecret = System.getProperty("ATSjP"); + + twitter = new TwitterTemplate(consumerKey, consumerSecret, accessToken, accessTokenSecret); + } + + private boolean isDevEnv() { + return Arrays.stream(environment.getActiveProfiles()) + .anyMatch(env -> env.equalsIgnoreCase("dev")); } } diff --git a/src/main/java/com/olegshan/statistics/Statistics.java b/src/main/java/com/olegshan/statistics/Statistics.java new file mode 100644 index 0000000..f1fc683 --- /dev/null +++ b/src/main/java/com/olegshan/statistics/Statistics.java @@ -0,0 +1,31 @@ +package com.olegshan.statistics; + +import lombok.Data; + +import javax.persistence.Entity; +import javax.persistence.Id; +import java.time.LocalDateTime; + +@Entity +@Data +public class Statistics { + + @Id + private String id; + private String siteName; + private LocalDateTime run; + private int newJobsFoundByRun; + private int updatedJobsByRun; + + public void setId(String siteName) { + id = siteName + run.toString(); + } + + public void incrementNewJobsCount() { + newJobsFoundByRun = newJobsFoundByRun + 1; + } + + public void incrementUpdatedJobsCount() { + updatedJobsByRun = updatedJobsByRun + 1; + } +} diff --git a/src/main/java/com/olegshan/tools/MonthsTools.java b/src/main/java/com/olegshan/tools/MonthsTools.java deleted file mode 100644 index 00a363c..0000000 --- a/src/main/java/com/olegshan/tools/MonthsTools.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.olegshan.tools; - -import java.util.HashMap; -import java.util.Map; - -public class MonthsTools { - - public static final Map MONTHS = new HashMap<>(); - - static { - - MONTHS.put("січня", 1); - MONTHS.put("лютого", 2); - MONTHS.put("березня", 3); - MONTHS.put("квітня", 4); - MONTHS.put("травня", 5); - MONTHS.put("червня", 6); - MONTHS.put("липня", 7); - MONTHS.put("серпня", 8); - MONTHS.put("вересня", 9); - MONTHS.put("жовтня", 10); - MONTHS.put("листопада", 11); - MONTHS.put("грудня", 12); - - MONTHS.put("января", 1); - MONTHS.put("февраля", 2); - MONTHS.put("марта", 3); - MONTHS.put("апреля", 4); - MONTHS.put("мая", 5); - MONTHS.put("июня", 6); - MONTHS.put("июля", 7); - MONTHS.put("августа", 8); - MONTHS.put("сентября", 9); - MONTHS.put("октября", 10); - MONTHS.put("ноября", 11); - MONTHS.put("декабря", 12); - - MONTHS.put("january", 1); - MONTHS.put("february", 2); - MONTHS.put("march", 3); - MONTHS.put("april", 4); - MONTHS.put("may", 5); - MONTHS.put("june", 6); - MONTHS.put("july", 7); - MONTHS.put("august", 8); - MONTHS.put("september", 9); - MONTHS.put("october", 10); - MONTHS.put("november", 11); - MONTHS.put("december", 12); - - } - - //if day or month starts with '0' - public static void removeZero(String[] dateParts) { - for (int i = 0; i < dateParts.length; i++) { - if (dateParts[i].startsWith("0")) { - dateParts[i] = dateParts[i].substring(1); - } - } - } -} diff --git a/src/main/java/com/olegshan/tools/PageBox.java b/src/main/java/com/olegshan/util/PageBox.java similarity index 98% rename from src/main/java/com/olegshan/tools/PageBox.java rename to src/main/java/com/olegshan/util/PageBox.java index 1088b5a..95d6b60 100644 --- a/src/main/java/com/olegshan/tools/PageBox.java +++ b/src/main/java/com/olegshan/util/PageBox.java @@ -1,4 +1,4 @@ -package com.olegshan.tools; +package com.olegshan.util; public class PageBox { diff --git a/src/main/java/com/olegshan/util/TimeUtil.java b/src/main/java/com/olegshan/util/TimeUtil.java new file mode 100644 index 0000000..a4a8d55 --- /dev/null +++ b/src/main/java/com/olegshan/util/TimeUtil.java @@ -0,0 +1,77 @@ +package com.olegshan.util; + +import java.time.ZoneId; +import java.util.HashMap; +import java.util.Map; + +public class TimeUtil { + + public static final String LOCAL_TIME_ZONE = "Europe/Athens"; + public static final Map MONTHS = new HashMap() {{ + + put("січня", 1); + put("лютого", 2); + put("березня", 3); + put("квітня", 4); + put("травня", 5); + put("червня", 6); + put("липня", 7); + put("серпня", 8); + put("вересня", 9); + put("жовтня", 10); + put("листопада", 11); + put("грудня", 12); + + put("января", 1); + put("февраля", 2); + put("марта", 3); + put("апреля", 4); + put("мая", 5); + put("июня", 6); + put("июля", 7); + put("августа", 8); + put("сентября", 9); + put("октября", 10); + put("ноября", 11); + put("декабря", 12); + + put("янв", 1); + put("фев", 2); + put("мар", 3); + put("апр", 4); + put("май", 5); + put("июн", 6); + put("июл", 7); + put("авг", 8); + put("сен", 9); + put("окт", 10); + put("ноя", 11); + put("дек", 12); + + put("january", 1); + put("february", 2); + put("march", 3); + put("april", 4); + put("may", 5); + put("june", 6); + put("july", 7); + put("august", 8); + put("september", 9); + put("october", 10); + put("november", 11); + put("december", 12); + }}; + + public static ZoneId localTimeZone() { + return ZoneId.of(LOCAL_TIME_ZONE); + } + + //if day or month starts with '0' + public static void removeZero(String[] dateParts) { + for (int i = 0; i < dateParts.length; i++) { + if (dateParts[i].startsWith("0")) { + dateParts[i] = dateParts[i].substring(1); + } + } + } +} diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 0aa8c7c..3eefd28 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -3,7 +3,8 @@ spring.mvc.throw-exception-if-no-handler-found = true spring.mail.host = smtp.gmail.com spring.mail.port = 465 -spring.mail.username = *** -spring.mail.password = *** +spring.mail.username = ${jMailSender} +mail.recipient = ${jMailRecipient} +spring.mail.password = ${jMailPassword} spring.mail.properties.smtp.auth = true spring.mail.properties.mail.smtp.ssl.enable = true \ No newline at end of file diff --git a/src/main/resources/static/favicon.ico b/src/main/resources/static/favicon.ico index 1e264a3..9abc741 100644 Binary files a/src/main/resources/static/favicon.ico and b/src/main/resources/static/favicon.ico differ diff --git a/src/main/resources/static/style.css b/src/main/resources/static/style.css index 081bfa8..af8fd26 100644 --- a/src/main/resources/static/style.css +++ b/src/main/resources/static/style.css @@ -13,10 +13,6 @@ font-size: 14px; } -.logo small, .logo span, .logo h2 { - color: cadetblue; -} - .logo a:hover { text-decoration: none; } @@ -26,11 +22,57 @@ float: none; } +.logo small, .logo span { + color: #ffc66d; +} + +.logo h2, .title small:hover, .under, .footer { + color: #cc7832; +} + +.title small, .about small { + color: #ffc66d; +} + +.description, .about p { + color: #6a8759 +} + +.company { + color: #bbb529 +} + +.footer a, .about a { + color: #6796a3 +} + +.pagination>li>a { + background-color: transparent; + color: #cc7832; + border: none; +} + +.pagination>li>a:hover { + background-color: #ffc66d; +} + +.pagination>li.active>a { + background-color: #cc7832; +} + +.pagination>li.active>a:hover { + background-color: #cc7832; +} + +body { + background-color: #2b2b2b; +} + .twitter { vertical-align: super; } -.under { +.under, .statistics { font-size: 12px; } @@ -48,4 +90,4 @@ .footer { margin: 30px 0 30px 0; -} +} \ No newline at end of file diff --git a/src/main/resources/static/twitter.png b/src/main/resources/static/twitter.png index 438f3ef..d5db21a 100644 Binary files a/src/main/resources/static/twitter.png and b/src/main/resources/static/twitter.png differ diff --git a/src/main/resources/templates/about.html b/src/main/resources/templates/about.html index 263f719..a747d1d 100644 --- a/src/main/resources/templates/about.html +++ b/src/main/resources/templates/about.html @@ -6,12 +6,11 @@

About jParser

- jParser helps Java developers to find a job in Kyiv. Every hour it parses vacancies on four main Ukrainian + jParser helps Java developers to find a job in Kyiv. Every hour it parses vacancies on three main Ukrainian job sites: Rabota.ua, - Work.ua, - Headhunter.ua and - Jobs.ua + Work.ua and + Headhunter.ua and on main Ukrainian site for developers — Dou.ua.

@@ -20,10 +19,11 @@

Twitter account automatically.

- jParser was created by Java developer and journalist Oleg Shankovskyi. + jParser was created by Java developer + Oleg Shankovskyi. +

- If you have any ideas to improve this project, feel free to join. Source code is on Github.


diff --git a/src/main/resources/templates/index.html b/src/main/resources/templates/index.html index 39d65ae..4e8d5de 100644 --- a/src/main/resources/templates/index.html +++ b/src/main/resources/templates/index.html @@ -5,48 +5,46 @@
-

+

-

-
- - - -
+

+ + +
diff --git a/src/main/resources/templates/layout.html b/src/main/resources/templates/layout.html index f33652b..6b46529 100644 --- a/src/main/resources/templates/layout.html +++ b/src/main/resources/templates/layout.html @@ -65,7 +65,6 @@

jParser

- β

Jobs for Java developers in Kyiv

diff --git a/src/test/java/com/olegshan/controllers/ErrorHandlerTest.java b/src/test/java/com/olegshan/controllers/ErrorHandlerTest.java index ca6382c..8f2516c 100644 --- a/src/test/java/com/olegshan/controllers/ErrorHandlerTest.java +++ b/src/test/java/com/olegshan/controllers/ErrorHandlerTest.java @@ -16,15 +16,14 @@ public class ErrorHandlerTest { @Mock - ParseController parseController; - - private MockMvc mockMvc; + private ParseController parseController; + private MockMvc mockMvc; @Before public void setUp() throws Exception { mockMvc = MockMvcBuilders.standaloneSetup(parseController) - .setControllerAdvice(new ErrorHandler()) - .build(); + .setControllerAdvice(new ErrorHandler()) + .build(); } @Test @@ -33,8 +32,8 @@ public void unexpectedExceptionsAreCaught() throws Exception { when(parseController.about()).thenThrow(new RuntimeException("Unexpected exception")); mockMvc.perform(get("/about")) - .andExpect(status().isOk()) - .andExpect(view().name("exception")) - .andExpect(model().attribute("errorMessage", "Unexpected exception")); + .andExpect(status().isOk()) + .andExpect(view().name("exception")) + .andExpect(model().attribute("errorMessage", "Unexpected exception")); } } \ No newline at end of file diff --git a/src/test/java/com/olegshan/controllers/ParseControllerTest.java b/src/test/java/com/olegshan/controllers/ParseControllerTest.java index 96163d6..5742144 100644 --- a/src/test/java/com/olegshan/controllers/ParseControllerTest.java +++ b/src/test/java/com/olegshan/controllers/ParseControllerTest.java @@ -13,12 +13,12 @@ public class ParseControllerTest extends AbstractTest { - private MockMvc mockMvc; + private MockMvc mockMvc; @Autowired private WebApplicationContext webApplicationContext; @Before - public void setUp() throws Exception { + public void setUp() { mockMvc = MockMvcBuilders.webAppContextSetup(webApplicationContext).build(); } @@ -26,16 +26,16 @@ public void setUp() throws Exception { public void showJobsReturnsCorrectModelAndView() throws Exception { mockMvc.perform(get("/")) - .andExpect(status().isOk()) - .andExpect(view().name("index")) - .andExpect(model().attributeExists("jobs")) - .andExpect(model().attributeExists("pageBox")); + .andExpect(status().isOk()) + .andExpect(view().name("index")) + .andExpect(model().attributeExists("jobs")) + .andExpect(model().attributeExists("pageBox")); } @Test public void aboutPageTest() throws Exception { mockMvc.perform(get("/about")) - .andExpect(status().isOk()) - .andExpect(view().name("about")); + .andExpect(status().isOk()) + .andExpect(view().name("about")); } } \ No newline at end of file diff --git a/src/test/java/com/olegshan/service/JobServiceTest.java b/src/test/java/com/olegshan/service/JobServiceTest.java index 8ee4f84..1c95b6b 100644 --- a/src/test/java/com/olegshan/service/JobServiceTest.java +++ b/src/test/java/com/olegshan/service/JobServiceTest.java @@ -6,23 +6,20 @@ import com.olegshan.social.JTwitter; import org.junit.After; import org.junit.Before; -import org.junit.Rule; import org.junit.Test; import org.mockito.InjectMocks; import org.mockito.Mock; -import org.mockito.junit.MockitoJUnit; -import org.mockito.junit.MockitoRule; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.domain.Page; import org.springframework.data.domain.PageRequest; import org.springframework.data.domain.Sort; import java.time.LocalDateTime; -import java.time.ZoneId; import java.util.List; import java.util.Random; import java.util.stream.IntStream; +import static com.olegshan.util.TimeUtil.localTimeZone; import static java.time.LocalDateTime.now; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -31,38 +28,38 @@ public class JobServiceTest extends AbstractTest { - private static final String JOB_URL = "http://somesite.ua/company/vacancy"; - private static final int CURRENT_PAGE = 1; - private static final int PAGE_SIZE = 5; + private static final String JOB_URL = "http://somesite.ua/company/vacancy"; + private static final int CURRENT_PAGE = 1; + private static final int PAGE_SIZE = 5; @Mock private JTwitter mockTwitter; @InjectMocks @Autowired - private JobService jobService; + private JobService jobService; @Autowired private JobRepository jobRepository; @Before - public void setUp() throws Exception { + public void setUp() { Job job; Random random = new Random(); for (int i = 0; i < 10; i++) { //jobs are saved into database with random dates job = new Job("Title" + i, "Description" + i, "Company" + i, "Site" + i, JOB_URL + i, - now(ZoneId.of("Europe/Athens")).minusDays(random.nextInt(20))); + now(localTimeZone()).minusDays(random.nextInt(20))); jobService.save(job); } } @Test - public void jobsInSetUpMethodWereSaved() throws Exception { + public void jobsInSetUpMethodWereSaved() { assertEquals("There should be 10 elements in the database", jobRepository.findAll().size(), 10); } @Test - public void savingOfNewJobWithTheSameUrlAndDifferentDateUpdatesExistingJob() throws Exception { + public void savingOfNewJobWithTheSameUrlAndDifferentDateUpdatesExistingJob() { Job job = jobRepository.findOne(JOB_URL + 5); assertEquals("Title5", job.getTitle()); LocalDateTime newDate = job.getDate().minusDays(1); @@ -75,11 +72,11 @@ public void savingOfNewJobWithTheSameUrlAndDifferentDateUpdatesExistingJob() thr assertEquals("New title", job.getTitle()); assertEquals(newDate, job.getDate()); assertEquals("There should be still 10 elements in the database after updating", - jobRepository.findAll().size(), 10); + jobRepository.findAll().size(), 10); } @Test - public void savingOfJobWithTheSameUrlAndSameDateDoesNotUpdateExistingJob() throws Exception { + public void savingOfJobWithTheSameUrlAndSameDateDoesNotUpdateExistingJob() { Job job = jobRepository.findOne(JOB_URL + 7); assertEquals("Title7", job.getTitle()); job.setTitle("New title"); @@ -92,7 +89,7 @@ public void savingOfJobWithTheSameUrlAndSameDateDoesNotUpdateExistingJob() throw } @Test - public void jobsAreRetrievedFromDatabaseSortedByDateInDescendingOrder() throws Exception { + public void jobsAreRetrievedFromDatabaseSortedByDateInDescendingOrder() { Page jobs = jobService.getJobs(new PageRequest(CURRENT_PAGE, PAGE_SIZE, Sort.Direction.DESC, "date")); assertEquals(PAGE_SIZE + " elements should be retrieved", PAGE_SIZE, jobs.getContent().size()); assertTrue("The jobs should be sorted from new to old", isSortedDescending(jobs)); @@ -101,11 +98,11 @@ public void jobsAreRetrievedFromDatabaseSortedByDateInDescendingOrder() throws E private boolean isSortedDescending(Page page) { List list = page.getContent(); return IntStream.range(0, PAGE_SIZE - 1).allMatch(i -> list.get(i).getDate() - .compareTo(list.get(i + 1).getDate()) > 0); + .compareTo(list.get(i + 1).getDate()) >= 0); } @After - public void tearDown() throws Exception { + public void tearDown() { jobRepository.deleteAll(); } } \ No newline at end of file