X-Git-Url: http://git.wpitchoune.net/gitweb/?a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2Fservlet%2FArticleProvider.java;h=ba96fe39856c1aff444dc4dc2aabe0c9be5e5f8a;hb=8372537c3503ab0ce4681fac13ccd65023fbcd84;hp=3bef20e712f1216025e89fbc98d22241328e5775;hpb=180bf164e8751ae680b27be5302c681b955eba76;p=pnews.git diff --git a/war/src/main/java/pnews/servlet/ArticleProvider.java b/war/src/main/java/pnews/servlet/ArticleProvider.java index 3bef20e..ba96fe3 100644 --- a/war/src/main/java/pnews/servlet/ArticleProvider.java +++ b/war/src/main/java/pnews/servlet/ArticleProvider.java @@ -3,6 +3,8 @@ package pnews.servlet; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -27,12 +29,15 @@ import com.rometools.rome.io.XmlReader; import pnews.Article; import pnews.Category; +import pnews.EntityStat; import pnews.Feed; +import pnews.NER; public class ArticleProvider { - private static final Logger LOG = Logger.getLogger(ArticleProvider.class.getName()); + private static final String CLASS_NAME = ArticleProvider.class.getName(); + private static final Logger LOG = Logger.getLogger(CLASS_NAME); private final Map> articlesByCategory = new HashMap<>(); - private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(2); + private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(Runtime.getRuntime().availableProcessors()); private final Config config; public ArticleProvider(Config config) { @@ -71,9 +76,23 @@ public class ArticleProvider { return false; } - private static Article toArticle(String link, SyndEntry entry, SyndFeed feed) { + private Instant getArticleInstant(SyndEntry entry) { + Date date; + + date = entry.getUpdatedDate(); + if (date == null) + date = entry.getPublishedDate(); + + if (date == null) + return Instant.now(); + + return date.toInstant(); + } + + private Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang, Instant instant) { String desc, title, thumbnail, feedTitle, str; Date date; + List entities; feedTitle = feed.getTitle(); if (feedTitle != null) { @@ -86,11 +105,7 @@ public class ArticleProvider { thumbnail = e.getUrl(); break; } - - if (thumbnail == null && feed.getImage() != null) - thumbnail = feed.getImage().getUrl(); - - + title = entry.getTitle().trim(); if (entry.getDescription() != null) { @@ -100,14 +115,17 @@ public class ArticleProvider { desc = null; LOG.severe("No description for " + feedTitle + " - " + title); } + + entities = new ArrayList<>(); + if (desc != null && lang.equals("en")) + try { + NER.classify(title, entities, config); + NER.classify(desc, entities, config); + } catch (ClassCastException | ClassNotFoundException | IOException e1) { + LOG.log(Level.SEVERE, "Cannot classify " + feedTitle, e1); + } - date = entry.getPublishedDate(); - if (date == null) - date = entry.getUpdatedDate(); - if (date == null) - LOG.severe("The article " + feedTitle + " - " + title + " does not have a date"); - - return new Article(link, title, desc, thumbnail, date, feedTitle); + return new Article(link, title, desc, thumbnail, instant, feedTitle, entities.toArray(new String[0])); } private void addArticles(Category cat, SyndFeed feed) { @@ -127,7 +145,12 @@ public class ArticleProvider { continue ; } - a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed)); + final Instant instant = getArticleInstant(entry); + + if (config.isObsolete(instant)) + continue ; + + a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed, cat.getLanguage(), instant)); synchronized (articles) { articles.add(a); @@ -171,19 +194,68 @@ public class ArticleProvider { /** * Returns a copy. */ - public List
getArticles(Category cat) + public List
getArticles(Category cat, String entity) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { - List
articles; + List
articles, result; synchronized (articlesByCategory) { articles = getArticlesForUpdate(cat); } - synchronized (articles) { - return new ArrayList<>(articles); + synchronized (articles) { + if (entity == null) + return new ArrayList<>(articles); + + result = new ArrayList<>(articles.size()); + for (Article a: articles) + if (a.hasEntity(entity)) + result.add(a); + + return result; } } + public List getEntityStats(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { + List
articles; + Map entities; + final String FUNCTION_NAME = "getEntities"; + EntityStat s; + List stats; + Instant minInstant; + + LOG.entering(CLASS_NAME, FUNCTION_NAME, cat); + + articles = getArticles(cat, null); + + minInstant = Instant.now().minus(15, ChronoUnit.DAYS); + + entities = new HashMap<>(); + for (Article a: articles) + if (a.getPublicationDate().isAfter(minInstant) && a.getEntities() != null) + for (String e: a.getEntities()) { + s = entities.get(e); + if (s == null) { + s = new EntityStat(e); + entities.put(e, s); + } + s.increment(); + } + + stats = new ArrayList<>(entities.values()); + stats.sort(new Comparator() { + + @Override + public int compare(EntityStat o1, EntityStat o2) { + return Integer.compare(o2.getCount(), o1.getCount()); + } + + }); + + LOG.exiting(CLASS_NAME, FUNCTION_NAME, stats); + + return stats; + } + private class Refresher implements Runnable { private final Category category;