cleanup, moved to net.wpitchoune package
[pnews.git] / war / src / main / java / pnews / servlet / ArticleProvider.java
diff --git a/war/src/main/java/pnews/servlet/ArticleProvider.java b/war/src/main/java/pnews/servlet/ArticleProvider.java
deleted file mode 100644 (file)
index 55898a4..0000000
+++ /dev/null
@@ -1,281 +0,0 @@
-package pnews.servlet;
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.time.Instant;
-import java.time.temporal.ChronoUnit;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.Executors;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.TimeUnit;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.jsoup.Jsoup;
-
-import com.rometools.rome.feed.synd.SyndEnclosure;
-import com.rometools.rome.feed.synd.SyndEntry;
-import com.rometools.rome.feed.synd.SyndFeed;
-import com.rometools.rome.io.FeedException;
-import com.rometools.rome.io.SyndFeedInput;
-import com.rometools.rome.io.XmlReader;
-
-import net.wpitchoune.pnews.Article;
-import net.wpitchoune.pnews.ArticleStore;
-import net.wpitchoune.pnews.Category;
-import net.wpitchoune.pnews.Config;
-import net.wpitchoune.pnews.EntityStat;
-import net.wpitchoune.pnews.Feed;
-import net.wpitchoune.pnews.classifier.NamedEntityRecognizer;
-
-public class ArticleProvider {
-        private static final String CLASS_NAME = ArticleProvider.class.getName();
-        private static final Logger LOG = Logger.getLogger(CLASS_NAME);
-        private final Map<Category, List<Article>> articlesByCategory = new HashMap<>();
-        private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(Runtime.getRuntime().availableProcessors());
-        private final Config config;
-        
-        public ArticleProvider(Config config) {
-                this.config = config;
-                for (Category cat: config.getCategories())
-                        scheduler.scheduleAtFixedRate(new Refresher(cat), 2, 600, TimeUnit.SECONDS);
-        }
-        
-        private static SyndFeed getSyndFeed(String u) throws IllegalArgumentException, FeedException, MalformedURLException, IOException {
-                XmlReader r;
-                
-                r = new XmlReader(new URL(u));
-                
-                return new SyndFeedInput().build(r);                
-        }
-        
-        private List<Article> getArticlesForUpdate(Category cat) {
-                List<Article> result;
-                
-                synchronized (articlesByCategory) {
-                        result = articlesByCategory.get(cat);
-                        if (result == null) {
-                                result = new ArrayList<>();
-                                articlesByCategory.put(cat, result);
-                        }
-                        return result;
-                }                
-        }
-        
-        private boolean exists(String articleLink, List<Article> articles) {
-                synchronized (articles) {
-                        for (Article a: articles)
-                                if (a.getLink().equals(articleLink))
-                                        return true;
-                }
-                return false;
-        }
-        
-        private Instant getArticleInstant(SyndEntry entry) {
-                Date date;
-                
-                date = entry.getUpdatedDate();       
-                if (date == null)
-                        date = entry.getPublishedDate();
-
-                if (date == null)
-                        return Instant.now();
-                
-                return date.toInstant();
-        }
-        
-        private Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang, Instant instant) {
-                String desc, title, thumbnail, feedTitle, str;
-                List<String> entities;
-                
-                feedTitle = feed.getTitle();
-                if (feedTitle != null) {
-                        feedTitle = feedTitle.trim();
-                }
-                
-                thumbnail = null;
-                for (SyndEnclosure e: entry.getEnclosures()) {
-                        if (e.getType().startsWith("image/"))
-                                thumbnail = e.getUrl();    
-                        break;
-                }
-                                
-                title = entry.getTitle().trim();
-                
-                if (entry.getDescription() != null) {
-                        str = entry.getDescription().getValue();
-                        desc = Jsoup.parse(str).text();
-                } else {       
-                        desc = null;
-                        LOG.severe("No description for " + feedTitle + " - " + title);
-                }
-                                
-                entities = new ArrayList<>();
-                if (lang.equals("en"))
-                        try {
-                                NamedEntityRecognizer.classify(title, entities, config);
-                                if (desc != null)
-                                        NamedEntityRecognizer.classify(desc, entities, config);
-                        } catch (ClassCastException | ClassNotFoundException | IOException e1) {
-                                LOG.log(Level.SEVERE, "Cannot classify " + feedTitle, e1);                         
-                        }
-                
-                return new Article(link, title, desc, thumbnail, instant, feedTitle, entities.toArray(new String[0]));
-        }
-        
-        private void addArticles(Category cat, SyndFeed feed) {
-                String feedTitle;
-                List<Article> articles;
-                Article a;
-                
-                feedTitle = feed.getTitle().trim();
-                
-                LOG.info("addArticles " + cat.getLabel() + " " + feedTitle + " number of articles: " + feed.getEntries().size());
-                
-                for (SyndEntry entry: feed.getEntries()) {
-                        String link = entry.getLink().trim();
-                        articles = getArticlesForUpdate(cat);
-                        if (exists(link, articles)) {
-                                LOG.fine("addArticles " + link + " is already present");
-                                continue ;
-                        }
-                        
-                        final Instant instant = getArticleInstant(entry);
-                        
-                        if (config.isObsolete(instant))
-                                continue ;
-                        
-                        a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed, cat.getLanguage(), instant));
-                        
-                        synchronized (articles) {
-                                articles.add(a);
-
-                                Collections.sort(articles, new Comparator<Article>() {
-                                        @Override
-                                        public int compare(Article o1, Article o2) {
-                                                if (o1.getPublicationDate() == o2.getPublicationDate())
-                                                        return 0;
-                                                if (o1.getPublicationDate() == null)
-                                                        return 1;
-                                                if (o2.getPublicationDate() == null)
-                                                        return -1;
-                                                return o2.getPublicationDate().compareTo(o1.getPublicationDate());
-                                        }
-                                });
-                        }
-                }          
-                
-                LOG.info("addArticles done " + cat.getLabel());
-        }
-             
-        private void retrieveArticles(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException {
-                List<Feed> feeds;
-                
-                feeds = config.getFeedsByCategory().get(cat);
-                
-                if (feeds != null)
-                        for (Feed f: feeds)
-                                try {
-                                        addArticles(cat, getSyndFeed(f.getURL()));
-                                } catch (Throwable e) {
-                                        LOG.log(Level.SEVERE,
-                                                "retrieveArticles failure " + cat.getLabel() + " " + f.toString(),
-                                                e);
-                                }
-                else
-                        LOG.severe("No feed for category " + cat);
-        }
-        
-        /**
-         * Returns a copy.
-         */
-        public List<Article> getArticles(Category cat, String entity)
-                        throws IllegalArgumentException, MalformedURLException, FeedException, IOException {
-                List<Article> articles, result;                
-                
-                synchronized (articlesByCategory) {
-                        articles = getArticlesForUpdate(cat);
-                }
-                
-                synchronized (articles) {                       
-                        if (entity == null)
-                                return new ArrayList<>(articles);
-                        
-                        result = new ArrayList<>(articles.size());
-                        for (Article a: articles)
-                                if (a.hasEntity(entity))
-                                        result.add(a);
-                        
-                        return result;
-                }
-        }
-        
-        public List<EntityStat> getEntityStats(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException {
-                List<Article> articles;
-                Map<String, EntityStat> entities;
-                final String FUNCTION_NAME = "getEntities";
-                EntityStat s;
-                List<EntityStat> stats;
-                Instant minInstant;
-                
-                LOG.entering(CLASS_NAME, FUNCTION_NAME, cat);
-                
-                articles = getArticles(cat, null);
-                
-                minInstant = Instant.now().minus(15, ChronoUnit.DAYS);
-                
-                entities = new HashMap<>();
-                for (Article a: articles)
-                        if (a.getPublicationDate().isAfter(minInstant) && a.getEntities() != null)
-                                for (String e: a.getEntities()) {
-                                        s = entities.get(e);
-                                        if (s == null) {
-                                                s = new EntityStat(e);
-                                                entities.put(e,  s);
-                                        }
-                                        s.increment();
-                                }                
-               
-                stats = new ArrayList<>(entities.values());
-                stats.sort(new Comparator<EntityStat>() {
-
-                        @Override
-                        public int compare(EntityStat o1, EntityStat o2) {
-                                return Integer.compare(o2.getCount(), o1.getCount());
-                        }
-                        
-                });
-                
-                LOG.exiting(CLASS_NAME, FUNCTION_NAME, stats);
-                
-                return stats;
-        }
-        
-        private class Refresher implements Runnable {
-                private final Category category;
-                
-                public Refresher(Category category) {
-                        this.category = category;
-                }
-                
-                @Override
-                public void run() {                       
-                        LOG.info("refresher "+ category.getLabel());
-                        
-                        try {
-                                retrieveArticles(category);
-                        } catch (IllegalArgumentException | FeedException | IOException e) {
-                                LOG.log(Level.SEVERE, "refresher failure", e);
-                        }                        
-                        
-                        LOG.info("refresher "+ category.getLabel() + " done");
-                }                
-        }
-}