X-Git-Url: https://git.wpitchoune.net/gitweb/?p=pnews.git;a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2Fservlet%2FArticleProvider.java;fp=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2Fservlet%2FArticleProvider.java;h=0000000000000000000000000000000000000000;hp=55898a479a898713e94a3b57bc261fc14b5e89bd;hb=a0c6addfd9ac6ac45f37b4202e787602c40e6bf7;hpb=aff83c8798602b535d13edeaffdb8f4238e2bbf5 diff --git a/war/src/main/java/pnews/servlet/ArticleProvider.java b/war/src/main/java/pnews/servlet/ArticleProvider.java deleted file mode 100644 index 55898a4..0000000 --- a/war/src/main/java/pnews/servlet/ArticleProvider.java +++ /dev/null @@ -1,281 +0,0 @@ -package pnews.servlet; - -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URL; -import java.time.Instant; -import java.time.temporal.ChronoUnit; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.jsoup.Jsoup; - -import com.rometools.rome.feed.synd.SyndEnclosure; -import com.rometools.rome.feed.synd.SyndEntry; -import com.rometools.rome.feed.synd.SyndFeed; -import com.rometools.rome.io.FeedException; -import com.rometools.rome.io.SyndFeedInput; -import com.rometools.rome.io.XmlReader; - -import net.wpitchoune.pnews.Article; -import net.wpitchoune.pnews.ArticleStore; -import net.wpitchoune.pnews.Category; -import net.wpitchoune.pnews.Config; -import net.wpitchoune.pnews.EntityStat; -import net.wpitchoune.pnews.Feed; -import net.wpitchoune.pnews.classifier.NamedEntityRecognizer; - -public class ArticleProvider { - private static final String CLASS_NAME = ArticleProvider.class.getName(); - private static final Logger LOG = Logger.getLogger(CLASS_NAME); - private final Map> articlesByCategory = new HashMap<>(); - private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(Runtime.getRuntime().availableProcessors()); - private final Config config; - - public ArticleProvider(Config config) { - this.config = config; - for (Category cat: config.getCategories()) - scheduler.scheduleAtFixedRate(new Refresher(cat), 2, 600, TimeUnit.SECONDS); - } - - private static SyndFeed getSyndFeed(String u) throws IllegalArgumentException, FeedException, MalformedURLException, IOException { - XmlReader r; - - r = new XmlReader(new URL(u)); - - return new SyndFeedInput().build(r); - } - - private List
getArticlesForUpdate(Category cat) { - List
result; - - synchronized (articlesByCategory) { - result = articlesByCategory.get(cat); - if (result == null) { - result = new ArrayList<>(); - articlesByCategory.put(cat, result); - } - return result; - } - } - - private boolean exists(String articleLink, List
articles) { - synchronized (articles) { - for (Article a: articles) - if (a.getLink().equals(articleLink)) - return true; - } - return false; - } - - private Instant getArticleInstant(SyndEntry entry) { - Date date; - - date = entry.getUpdatedDate(); - if (date == null) - date = entry.getPublishedDate(); - - if (date == null) - return Instant.now(); - - return date.toInstant(); - } - - private Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang, Instant instant) { - String desc, title, thumbnail, feedTitle, str; - List entities; - - feedTitle = feed.getTitle(); - if (feedTitle != null) { - feedTitle = feedTitle.trim(); - } - - thumbnail = null; - for (SyndEnclosure e: entry.getEnclosures()) { - if (e.getType().startsWith("image/")) - thumbnail = e.getUrl(); - break; - } - - title = entry.getTitle().trim(); - - if (entry.getDescription() != null) { - str = entry.getDescription().getValue(); - desc = Jsoup.parse(str).text(); - } else { - desc = null; - LOG.severe("No description for " + feedTitle + " - " + title); - } - - entities = new ArrayList<>(); - if (lang.equals("en")) - try { - NamedEntityRecognizer.classify(title, entities, config); - if (desc != null) - NamedEntityRecognizer.classify(desc, entities, config); - } catch (ClassCastException | ClassNotFoundException | IOException e1) { - LOG.log(Level.SEVERE, "Cannot classify " + feedTitle, e1); - } - - return new Article(link, title, desc, thumbnail, instant, feedTitle, entities.toArray(new String[0])); - } - - private void addArticles(Category cat, SyndFeed feed) { - String feedTitle; - List
articles; - Article a; - - feedTitle = feed.getTitle().trim(); - - LOG.info("addArticles " + cat.getLabel() + " " + feedTitle + " number of articles: " + feed.getEntries().size()); - - for (SyndEntry entry: feed.getEntries()) { - String link = entry.getLink().trim(); - articles = getArticlesForUpdate(cat); - if (exists(link, articles)) { - LOG.fine("addArticles " + link + " is already present"); - continue ; - } - - final Instant instant = getArticleInstant(entry); - - if (config.isObsolete(instant)) - continue ; - - a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed, cat.getLanguage(), instant)); - - synchronized (articles) { - articles.add(a); - - Collections.sort(articles, new Comparator
() { - @Override - public int compare(Article o1, Article o2) { - if (o1.getPublicationDate() == o2.getPublicationDate()) - return 0; - if (o1.getPublicationDate() == null) - return 1; - if (o2.getPublicationDate() == null) - return -1; - return o2.getPublicationDate().compareTo(o1.getPublicationDate()); - } - }); - } - } - - LOG.info("addArticles done " + cat.getLabel()); - } - - private void retrieveArticles(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { - List feeds; - - feeds = config.getFeedsByCategory().get(cat); - - if (feeds != null) - for (Feed f: feeds) - try { - addArticles(cat, getSyndFeed(f.getURL())); - } catch (Throwable e) { - LOG.log(Level.SEVERE, - "retrieveArticles failure " + cat.getLabel() + " " + f.toString(), - e); - } - else - LOG.severe("No feed for category " + cat); - } - - /** - * Returns a copy. - */ - public List
getArticles(Category cat, String entity) - throws IllegalArgumentException, MalformedURLException, FeedException, IOException { - List
articles, result; - - synchronized (articlesByCategory) { - articles = getArticlesForUpdate(cat); - } - - synchronized (articles) { - if (entity == null) - return new ArrayList<>(articles); - - result = new ArrayList<>(articles.size()); - for (Article a: articles) - if (a.hasEntity(entity)) - result.add(a); - - return result; - } - } - - public List getEntityStats(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { - List
articles; - Map entities; - final String FUNCTION_NAME = "getEntities"; - EntityStat s; - List stats; - Instant minInstant; - - LOG.entering(CLASS_NAME, FUNCTION_NAME, cat); - - articles = getArticles(cat, null); - - minInstant = Instant.now().minus(15, ChronoUnit.DAYS); - - entities = new HashMap<>(); - for (Article a: articles) - if (a.getPublicationDate().isAfter(minInstant) && a.getEntities() != null) - for (String e: a.getEntities()) { - s = entities.get(e); - if (s == null) { - s = new EntityStat(e); - entities.put(e, s); - } - s.increment(); - } - - stats = new ArrayList<>(entities.values()); - stats.sort(new Comparator() { - - @Override - public int compare(EntityStat o1, EntityStat o2) { - return Integer.compare(o2.getCount(), o1.getCount()); - } - - }); - - LOG.exiting(CLASS_NAME, FUNCTION_NAME, stats); - - return stats; - } - - private class Refresher implements Runnable { - private final Category category; - - public Refresher(Category category) { - this.category = category; - } - - @Override - public void run() { - LOG.info("refresher "+ category.getLabel()); - - try { - retrieveArticles(category); - } catch (IllegalArgumentException | FeedException | IOException e) { - LOG.log(Level.SEVERE, "refresher failure", e); - } - - LOG.info("refresher "+ category.getLabel() + " done"); - } - } -}