X-Git-Url: https://git.wpitchoune.net/gitweb/?p=pnews.git;a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2Fservlet%2FArticleProvider.java;h=55898a479a898713e94a3b57bc261fc14b5e89bd;hp=5efea15c67db941278f05d30b3da62519744ebb8;hb=aff83c8798602b535d13edeaffdb8f4238e2bbf5;hpb=c6f5722d9572158f0d561819f080f8ccf4e3c7d9 diff --git a/war/src/main/java/pnews/servlet/ArticleProvider.java b/war/src/main/java/pnews/servlet/ArticleProvider.java index 5efea15..55898a4 100644 --- a/war/src/main/java/pnews/servlet/ArticleProvider.java +++ b/war/src/main/java/pnews/servlet/ArticleProvider.java @@ -3,6 +3,8 @@ package pnews.servlet; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -25,11 +27,13 @@ import com.rometools.rome.io.FeedException; import com.rometools.rome.io.SyndFeedInput; import com.rometools.rome.io.XmlReader; -import pnews.Article; -import pnews.Category; -import pnews.EntityStat; -import pnews.Feed; -import pnews.NER; +import net.wpitchoune.pnews.Article; +import net.wpitchoune.pnews.ArticleStore; +import net.wpitchoune.pnews.Category; +import net.wpitchoune.pnews.Config; +import net.wpitchoune.pnews.EntityStat; +import net.wpitchoune.pnews.Feed; +import net.wpitchoune.pnews.classifier.NamedEntityRecognizer; public class ArticleProvider { private static final String CLASS_NAME = ArticleProvider.class.getName(); @@ -68,15 +72,27 @@ public class ArticleProvider { private boolean exists(String articleLink, List
articles) { synchronized (articles) { for (Article a: articles) - if (a.link.equals(articleLink)) + if (a.getLink().equals(articleLink)) return true; } return false; } - private static Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang) { - String desc, title, thumbnail, feedTitle, str; + private Instant getArticleInstant(SyndEntry entry) { Date date; + + date = entry.getUpdatedDate(); + if (date == null) + date = entry.getPublishedDate(); + + if (date == null) + return Instant.now(); + + return date.toInstant(); + } + + private Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang, Instant instant) { + String desc, title, thumbnail, feedTitle, str; List entities; feedTitle = feed.getTitle(); @@ -90,11 +106,7 @@ public class ArticleProvider { thumbnail = e.getUrl(); break; } - - if (thumbnail == null && feed.getImage() != null) - thumbnail = feed.getImage().getUrl(); - - + title = entry.getTitle().trim(); if (entry.getDescription() != null) { @@ -104,24 +116,18 @@ public class ArticleProvider { desc = null; LOG.severe("No description for " + feedTitle + " - " + title); } - - date = entry.getPublishedDate(); - if (date == null) - date = entry.getUpdatedDate(); - if (date == null) - LOG.severe("The article " + feedTitle + " - " + title + " does not have a date"); - - + entities = new ArrayList<>(); - if (desc != null && lang.equals("en")) + if (lang.equals("en")) try { - NER.classify(title, entities); - NER.classify(desc, entities); + NamedEntityRecognizer.classify(title, entities, config); + if (desc != null) + NamedEntityRecognizer.classify(desc, entities, config); } catch (ClassCastException | ClassNotFoundException | IOException e1) { LOG.log(Level.SEVERE, "Cannot classify " + feedTitle, e1); } - return new Article(link, title, desc, thumbnail, date, feedTitle, entities.toArray(new String[0])); + return new Article(link, title, desc, thumbnail, instant, feedTitle, entities.toArray(new String[0])); } private void addArticles(Category cat, SyndFeed feed) { @@ -141,7 +147,12 @@ public class ArticleProvider { continue ; } - a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed, cat.getLanguage())); + final Instant instant = getArticleInstant(entry); + + if (config.isObsolete(instant)) + continue ; + + a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed, cat.getLanguage(), instant)); synchronized (articles) { articles.add(a); @@ -149,13 +160,13 @@ public class ArticleProvider { Collections.sort(articles, new Comparator
() { @Override public int compare(Article o1, Article o2) { - if (o1.publicationDate == o2.publicationDate) + if (o1.getPublicationDate() == o2.getPublicationDate()) return 0; - if (o1.publicationDate == null) + if (o1.getPublicationDate() == null) return 1; - if (o2.publicationDate == null) + if (o2.getPublicationDate() == null) return -1; - return o2.publicationDate.compareTo(o1.publicationDate); + return o2.getPublicationDate().compareTo(o1.getPublicationDate()); } }); } @@ -185,16 +196,24 @@ public class ArticleProvider { /** * Returns a copy. */ - public List
getArticles(Category cat) + public List
getArticles(Category cat, String entity) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { - List
articles; + List
articles, result; synchronized (articlesByCategory) { articles = getArticlesForUpdate(cat); } - synchronized (articles) { - return new ArrayList<>(articles); + synchronized (articles) { + if (entity == null) + return new ArrayList<>(articles); + + result = new ArrayList<>(articles.size()); + for (Article a: articles) + if (a.hasEntity(entity)) + result.add(a); + + return result; } } @@ -204,14 +223,17 @@ public class ArticleProvider { final String FUNCTION_NAME = "getEntities"; EntityStat s; List stats; + Instant minInstant; LOG.entering(CLASS_NAME, FUNCTION_NAME, cat); - articles = getArticles(cat); + articles = getArticles(cat, null); + + minInstant = Instant.now().minus(15, ChronoUnit.DAYS); entities = new HashMap<>(); - for (Article a: articles) - if (a.getEntities() != null) { + for (Article a: articles) + if (a.getPublicationDate().isAfter(minInstant) && a.getEntities() != null) for (String e: a.getEntities()) { s = entities.get(e); if (s == null) { @@ -220,8 +242,7 @@ public class ArticleProvider { } s.increment(); } - } - + stats = new ArrayList<>(entities.values()); stats.sort(new Comparator() {