X-Git-Url: https://git.wpitchoune.net/gitweb/?a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2Fservlet%2FArticleProvider.java;h=5efea15c67db941278f05d30b3da62519744ebb8;hb=c6f5722d9572158f0d561819f080f8ccf4e3c7d9;hp=3e90d11320ae4024a3d1124c340ce9a6df0dec81;hpb=386f46525e32212ac5f3653135a6539c1b2639eb;p=pnews.git diff --git a/war/src/main/java/pnews/servlet/ArticleProvider.java b/war/src/main/java/pnews/servlet/ArticleProvider.java index 3e90d11..5efea15 100644 --- a/war/src/main/java/pnews/servlet/ArticleProvider.java +++ b/war/src/main/java/pnews/servlet/ArticleProvider.java @@ -27,12 +27,15 @@ import com.rometools.rome.io.XmlReader; import pnews.Article; import pnews.Category; +import pnews.EntityStat; import pnews.Feed; +import pnews.NER; public class ArticleProvider { - private static final Logger LOG = Logger.getLogger(ArticleProvider.class.getName()); + private static final String CLASS_NAME = ArticleProvider.class.getName(); + private static final Logger LOG = Logger.getLogger(CLASS_NAME); private final Map> articlesByCategory = new HashMap<>(); - private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(2); + private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(Runtime.getRuntime().availableProcessors()); private final Config config; public ArticleProvider(Config config) { @@ -71,9 +74,10 @@ public class ArticleProvider { return false; } - private static Article toArticle(String link, SyndEntry entry, SyndFeed feed) { + private static Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang) { String desc, title, thumbnail, feedTitle, str; Date date; + List entities; feedTitle = feed.getTitle(); if (feedTitle != null) { @@ -107,7 +111,17 @@ public class ArticleProvider { if (date == null) LOG.severe("The article " + feedTitle + " - " + title + " does not have a date"); - return new Article(link, title, desc, thumbnail, date, feedTitle); + + entities = new ArrayList<>(); + if (desc != null && lang.equals("en")) + try { + NER.classify(title, entities); + NER.classify(desc, entities); + } catch (ClassCastException | ClassNotFoundException | IOException e1) { + LOG.log(Level.SEVERE, "Cannot classify " + feedTitle, e1); + } + + return new Article(link, title, desc, thumbnail, date, feedTitle, entities.toArray(new String[0])); } private void addArticles(Category cat, SyndFeed feed) { @@ -117,7 +131,7 @@ public class ArticleProvider { feedTitle = feed.getTitle().trim(); - LOG.info("addArticles " + cat.getName() + " " + feedTitle + " number of articles: " + feed.getEntries().size()); + LOG.info("addArticles " + cat.getLabel() + " " + feedTitle + " number of articles: " + feed.getEntries().size()); for (SyndEntry entry: feed.getEntries()) { String link = entry.getLink().trim(); @@ -127,7 +141,7 @@ public class ArticleProvider { continue ; } - a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed)); + a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed, cat.getLanguage())); synchronized (articles) { articles.add(a); @@ -147,7 +161,7 @@ public class ArticleProvider { } } - LOG.info("addArticles done " + cat.getName()); + LOG.info("addArticles done " + cat.getLabel()); } private void retrieveArticles(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { @@ -161,7 +175,7 @@ public class ArticleProvider { addArticles(cat, getSyndFeed(f.getURL())); } catch (Throwable e) { LOG.log(Level.SEVERE, - "retrieveArticles failure " + cat.getName() + " " + f.toString(), + "retrieveArticles failure " + cat.getLabel() + " " + f.toString(), e); } else @@ -184,6 +198,45 @@ public class ArticleProvider { } } + public List getEntityStats(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { + List
articles; + Map entities; + final String FUNCTION_NAME = "getEntities"; + EntityStat s; + List stats; + + LOG.entering(CLASS_NAME, FUNCTION_NAME, cat); + + articles = getArticles(cat); + + entities = new HashMap<>(); + for (Article a: articles) + if (a.getEntities() != null) { + for (String e: a.getEntities()) { + s = entities.get(e); + if (s == null) { + s = new EntityStat(e); + entities.put(e, s); + } + s.increment(); + } + } + + stats = new ArrayList<>(entities.values()); + stats.sort(new Comparator() { + + @Override + public int compare(EntityStat o1, EntityStat o2) { + return Integer.compare(o2.getCount(), o1.getCount()); + } + + }); + + LOG.exiting(CLASS_NAME, FUNCTION_NAME, stats); + + return stats; + } + private class Refresher implements Runnable { private final Category category; @@ -193,7 +246,7 @@ public class ArticleProvider { @Override public void run() { - LOG.info("refresher "+ category.getName()); + LOG.info("refresher "+ category.getLabel()); try { retrieveArticles(category); @@ -201,7 +254,7 @@ public class ArticleProvider { LOG.log(Level.SEVERE, "refresher failure", e); } - LOG.info("refresher "+ category.getName() + " done"); + LOG.info("refresher "+ category.getLabel() + " done"); } } }