cleanup and refactored to move to net.wpitchoune package
[pnews.git] / war / src / main / java / pnews / servlet / ArticleProvider.java
index c2d8f59..55898a4 100644 (file)
@@ -3,6 +3,8 @@ package pnews.servlet;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.time.Instant;
+import java.time.temporal.ChronoUnit;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
@@ -25,11 +27,13 @@ import com.rometools.rome.io.FeedException;
 import com.rometools.rome.io.SyndFeedInput;
 import com.rometools.rome.io.XmlReader;
 
-import pnews.Article;
-import pnews.Category;
-import pnews.EntityStat;
-import pnews.Feed;
-import pnews.NER;
+import net.wpitchoune.pnews.Article;
+import net.wpitchoune.pnews.ArticleStore;
+import net.wpitchoune.pnews.Category;
+import net.wpitchoune.pnews.Config;
+import net.wpitchoune.pnews.EntityStat;
+import net.wpitchoune.pnews.Feed;
+import net.wpitchoune.pnews.classifier.NamedEntityRecognizer;
 
 public class ArticleProvider {
         private static final String CLASS_NAME = ArticleProvider.class.getName();
@@ -68,15 +72,27 @@ public class ArticleProvider {
         private boolean exists(String articleLink, List<Article> articles) {
                 synchronized (articles) {
                         for (Article a: articles)
-                                if (a.link.equals(articleLink))
+                                if (a.getLink().equals(articleLink))
                                         return true;
                 }
                 return false;
         }
         
-        private static Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang) {
-                String desc, title, thumbnail, feedTitle, str;
+        private Instant getArticleInstant(SyndEntry entry) {
                 Date date;
+                
+                date = entry.getUpdatedDate();       
+                if (date == null)
+                        date = entry.getPublishedDate();
+
+                if (date == null)
+                        return Instant.now();
+                
+                return date.toInstant();
+        }
+        
+        private Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang, Instant instant) {
+                String desc, title, thumbnail, feedTitle, str;
                 List<String> entities;
                 
                 feedTitle = feed.getTitle();
@@ -100,24 +116,18 @@ public class ArticleProvider {
                         desc = null;
                         LOG.severe("No description for " + feedTitle + " - " + title);
                 }
-                
-                date = entry.getPublishedDate();
-                if (date == null)
-                        date = entry.getUpdatedDate();
-                if (date == null)
-                        LOG.severe("The article " + feedTitle + " - " + title + " does not have a date");
-                                     
-                
+                                
                 entities = new ArrayList<>();
-                if (desc != null && lang.equals("en"))
+                if (lang.equals("en"))
                         try {
-                                NER.classify(title, entities);
-                                NER.classify(desc, entities);
+                                NamedEntityRecognizer.classify(title, entities, config);
+                                if (desc != null)
+                                        NamedEntityRecognizer.classify(desc, entities, config);
                         } catch (ClassCastException | ClassNotFoundException | IOException e1) {
                                 LOG.log(Level.SEVERE, "Cannot classify " + feedTitle, e1);                         
                         }
                 
-                return new Article(link, title, desc, thumbnail, date, feedTitle, entities.toArray(new String[0]));
+                return new Article(link, title, desc, thumbnail, instant, feedTitle, entities.toArray(new String[0]));
         }
         
         private void addArticles(Category cat, SyndFeed feed) {
@@ -137,7 +147,12 @@ public class ArticleProvider {
                                 continue ;
                         }
                         
-                        a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed, cat.getLanguage()));
+                        final Instant instant = getArticleInstant(entry);
+                        
+                        if (config.isObsolete(instant))
+                                continue ;
+                        
+                        a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed, cat.getLanguage(), instant));
                         
                         synchronized (articles) {
                                 articles.add(a);
@@ -145,13 +160,13 @@ public class ArticleProvider {
                                 Collections.sort(articles, new Comparator<Article>() {
                                         @Override
                                         public int compare(Article o1, Article o2) {
-                                                if (o1.publicationDate == o2.publicationDate)
+                                                if (o1.getPublicationDate() == o2.getPublicationDate())
                                                         return 0;
-                                                if (o1.publicationDate == null)
+                                                if (o1.getPublicationDate() == null)
                                                         return 1;
-                                                if (o2.publicationDate == null)
+                                                if (o2.getPublicationDate() == null)
                                                         return -1;
-                                                return o2.publicationDate.compareTo(o1.publicationDate);
+                                                return o2.getPublicationDate().compareTo(o1.getPublicationDate());
                                         }
                                 });
                         }
@@ -208,14 +223,17 @@ public class ArticleProvider {
                 final String FUNCTION_NAME = "getEntities";
                 EntityStat s;
                 List<EntityStat> stats;
+                Instant minInstant;
                 
                 LOG.entering(CLASS_NAME, FUNCTION_NAME, cat);
                 
                 articles = getArticles(cat, null);
                 
+                minInstant = Instant.now().minus(15, ChronoUnit.DAYS);
+                
                 entities = new HashMap<>();
-                for (Article a: articles) 
-                        if (a.getEntities() != null) {
+                for (Article a: articles)
+                        if (a.getPublicationDate().isAfter(minInstant) && a.getEntities() != null)
                                 for (String e: a.getEntities()) {
                                         s = entities.get(e);
                                         if (s == null) {
@@ -224,8 +242,7 @@ public class ArticleProvider {
                                         }
                                         s.increment();
                                 }                
-                        }
-                
+               
                 stats = new ArrayList<>(entities.values());
                 stats.sort(new Comparator<EntityStat>() {