find hot named entities using stanford ner
[pnews.git] / war / src / main / java / pnews / servlet / ArticleProvider.java
index 3bef20e..05ddedd 100644 (file)
@@ -27,10 +27,13 @@ import com.rometools.rome.io.XmlReader;
 
 import pnews.Article;
 import pnews.Category;
+import pnews.EntityStat;
 import pnews.Feed;
+import pnews.NER;
 
 public class ArticleProvider {
-        private static final Logger LOG = Logger.getLogger(ArticleProvider.class.getName());
+        private static final String CLASS_NAME = ArticleProvider.class.getName();
+        private static final Logger LOG = Logger.getLogger(CLASS_NAME);
         private final Map<Category, List<Article>> articlesByCategory = new HashMap<>();
         private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(2);
         private final Config config;
@@ -71,9 +74,10 @@ public class ArticleProvider {
                 return false;
         }
         
-        private static Article toArticle(String link, SyndEntry entry, SyndFeed feed) {
+        private static Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang) {
                 String desc, title, thumbnail, feedTitle, str;
                 Date date;
+                String[] entities;
                 
                 feedTitle = feed.getTitle();
                 if (feedTitle != null) {
@@ -107,7 +111,16 @@ public class ArticleProvider {
                 if (date == null)
                         LOG.severe("The article " + feedTitle + " - " + title + " does not have a date");
                                      
-                return new Article(link, title, desc, thumbnail, date, feedTitle);
+                
+                entities = null;
+                if (desc != null && lang.equals("en"))
+                        try {
+                                entities = NER.classify(desc);
+                        } catch (ClassCastException | ClassNotFoundException | IOException e1) {
+                                LOG.log(Level.SEVERE, "Cannot classify " + feedTitle, e1);                         
+                        }
+                
+                return new Article(link, title, desc, thumbnail, date, feedTitle, entities);
         }
         
         private void addArticles(Category cat, SyndFeed feed) {
@@ -127,7 +140,7 @@ public class ArticleProvider {
                                 continue ;
                         }
                         
-                        a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed));
+                        a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed, cat.getLanguage()));
                         
                         synchronized (articles) {
                                 articles.add(a);
@@ -184,6 +197,44 @@ public class ArticleProvider {
                 }
         }
         
+        public List<EntityStat> getEntityStats(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException {
+                List<Article> articles;
+                Map<String, EntityStat> entities;
+                final String FUNCTION_NAME = "getEntities";
+                EntityStat s;
+                List<EntityStat> stats;
+                
+                LOG.entering(CLASS_NAME, FUNCTION_NAME, cat);
+                
+                articles = getArticles(cat);
+                
+                entities = new HashMap<>();
+                for (Article a: articles) 
+                        if (a.getEntities() != null)
+                                for (String e: a.getEntities()) {
+                                        s = entities.get(e);
+                                        if (s == null) {
+                                                s = new EntityStat(e);
+                                                entities.put(e,  s);
+                                        }
+                                        s.increment();
+                                }                
+                
+                stats = new ArrayList<>(entities.values());
+                stats.sort(new Comparator<EntityStat>() {
+
+                        @Override
+                        public int compare(EntityStat o1, EntityStat o2) {
+                                return Integer.compare(o2.getCount(), o1.getCount());
+                        }
+                        
+                });
+                
+                LOG.exiting(CLASS_NAME, FUNCTION_NAME, stats);
+                
+                return stats;
+        }
+        
         private class Refresher implements Runnable {
                 private final Category category;