blacklisted entities are now in the configuration file
[pnews.git] / war / src / main / java / pnews / servlet / ArticleProvider.java
index 05ddedd..ec74123 100644 (file)
@@ -35,7 +35,7 @@ public class ArticleProvider {
         private static final String CLASS_NAME = ArticleProvider.class.getName();
         private static final Logger LOG = Logger.getLogger(CLASS_NAME);
         private final Map<Category, List<Article>> articlesByCategory = new HashMap<>();
-        private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(2);
+        private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(Runtime.getRuntime().availableProcessors());
         private final Config config;
         
         public ArticleProvider(Config config) {
@@ -74,10 +74,10 @@ public class ArticleProvider {
                 return false;
         }
         
-        private static Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang) {
+        private Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang) {
                 String desc, title, thumbnail, feedTitle, str;
                 Date date;
-                String[] entities;
+                List<String> entities;
                 
                 feedTitle = feed.getTitle();
                 if (feedTitle != null) {
@@ -90,11 +90,7 @@ public class ArticleProvider {
                                 thumbnail = e.getUrl();    
                         break;
                 }
-                
-                if (thumbnail == null && feed.getImage() != null)
-                        thumbnail = feed.getImage().getUrl();
-                             
-                
+                                
                 title = entry.getTitle().trim();
                 
                 if (entry.getDescription() != null) {
@@ -112,15 +108,16 @@ public class ArticleProvider {
                         LOG.severe("The article " + feedTitle + " - " + title + " does not have a date");
                                      
                 
-                entities = null;
+                entities = new ArrayList<>();
                 if (desc != null && lang.equals("en"))
                         try {
-                                entities = NER.classify(desc);
+                                NER.classify(title, entities, config);
+                                NER.classify(desc, entities, config);
                         } catch (ClassCastException | ClassNotFoundException | IOException e1) {
                                 LOG.log(Level.SEVERE, "Cannot classify " + feedTitle, e1);                         
                         }
                 
-                return new Article(link, title, desc, thumbnail, date, feedTitle, entities);
+                return new Article(link, title, desc, thumbnail, date, feedTitle, entities.toArray(new String[0]));
         }
         
         private void addArticles(Category cat, SyndFeed feed) {
@@ -184,16 +181,24 @@ public class ArticleProvider {
         /**
          * Returns a copy.
          */
-        public List<Article> getArticles(Category cat)
+        public List<Article> getArticles(Category cat, String entity)
                         throws IllegalArgumentException, MalformedURLException, FeedException, IOException {
-                List<Article> articles;
+                List<Article> articles, result;                
                 
                 synchronized (articlesByCategory) {
                         articles = getArticlesForUpdate(cat);
                 }
                 
-                synchronized (articles) {
-                        return new ArrayList<>(articles);
+                synchronized (articles) {                       
+                        if (entity == null)
+                                return new ArrayList<>(articles);
+                        
+                        result = new ArrayList<>(articles.size());
+                        for (Article a: articles)
+                                if (a.hasEntity(entity))
+                                        result.add(a);
+                        
+                        return result;
                 }
         }
         
@@ -206,11 +211,11 @@ public class ArticleProvider {
                 
                 LOG.entering(CLASS_NAME, FUNCTION_NAME, cat);
                 
-                articles = getArticles(cat);
+                articles = getArticles(cat, null);
                 
                 entities = new HashMap<>();
                 for (Article a: articles) 
-                        if (a.getEntities() != null)
+                        if (a.getEntities() != null) {
                                 for (String e: a.getEntities()) {
                                         s = entities.get(e);
                                         if (s == null) {
@@ -219,6 +224,7 @@ public class ArticleProvider {
                                         }
                                         s.increment();
                                 }                
+                        }
                 
                 stats = new ArrayList<>(entities.values());
                 stats.sort(new Comparator<EntityStat>() {