X-Git-Url: https://git.wpitchoune.net/gitweb/?p=pnews.git;a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2Fservlet%2FArticleProvider.java;h=05ddeddcff753d120eb655318a49136588f96945;hp=3bef20e712f1216025e89fbc98d22241328e5775;hb=56c07f5de3319eb61182b7100855801644538e6f;hpb=47bc9d3dc6b3cf77957f11bceea1bcda492f8818 diff --git a/war/src/main/java/pnews/servlet/ArticleProvider.java b/war/src/main/java/pnews/servlet/ArticleProvider.java index 3bef20e..05ddedd 100644 --- a/war/src/main/java/pnews/servlet/ArticleProvider.java +++ b/war/src/main/java/pnews/servlet/ArticleProvider.java @@ -27,10 +27,13 @@ import com.rometools.rome.io.XmlReader; import pnews.Article; import pnews.Category; +import pnews.EntityStat; import pnews.Feed; +import pnews.NER; public class ArticleProvider { - private static final Logger LOG = Logger.getLogger(ArticleProvider.class.getName()); + private static final String CLASS_NAME = ArticleProvider.class.getName(); + private static final Logger LOG = Logger.getLogger(CLASS_NAME); private final Map> articlesByCategory = new HashMap<>(); private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(2); private final Config config; @@ -71,9 +74,10 @@ public class ArticleProvider { return false; } - private static Article toArticle(String link, SyndEntry entry, SyndFeed feed) { + private static Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang) { String desc, title, thumbnail, feedTitle, str; Date date; + String[] entities; feedTitle = feed.getTitle(); if (feedTitle != null) { @@ -107,7 +111,16 @@ public class ArticleProvider { if (date == null) LOG.severe("The article " + feedTitle + " - " + title + " does not have a date"); - return new Article(link, title, desc, thumbnail, date, feedTitle); + + entities = null; + if (desc != null && lang.equals("en")) + try { + entities = NER.classify(desc); + } catch (ClassCastException | ClassNotFoundException | IOException e1) { + LOG.log(Level.SEVERE, "Cannot classify " + feedTitle, e1); + } + + return new Article(link, title, desc, thumbnail, date, feedTitle, entities); } private void addArticles(Category cat, SyndFeed feed) { @@ -127,7 +140,7 @@ public class ArticleProvider { continue ; } - a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed)); + a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed, cat.getLanguage())); synchronized (articles) { articles.add(a); @@ -184,6 +197,44 @@ public class ArticleProvider { } } + public List getEntityStats(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { + List
articles; + Map entities; + final String FUNCTION_NAME = "getEntities"; + EntityStat s; + List stats; + + LOG.entering(CLASS_NAME, FUNCTION_NAME, cat); + + articles = getArticles(cat); + + entities = new HashMap<>(); + for (Article a: articles) + if (a.getEntities() != null) + for (String e: a.getEntities()) { + s = entities.get(e); + if (s == null) { + s = new EntityStat(e); + entities.put(e, s); + } + s.increment(); + } + + stats = new ArrayList<>(entities.values()); + stats.sort(new Comparator() { + + @Override + public int compare(EntityStat o1, EntityStat o2) { + return Integer.compare(o2.getCount(), o1.getCount()); + } + + }); + + LOG.exiting(CLASS_NAME, FUNCTION_NAME, stats); + + return stats; + } + private class Refresher implements Runnable { private final Category category;