X-Git-Url: https://git.wpitchoune.net/gitweb/?a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2Fservlet%2FArticleProvider.java;h=3e90d11320ae4024a3d1124c340ce9a6df0dec81;hb=386f46525e32212ac5f3653135a6539c1b2639eb;hp=95e3951f4fd46e18330262cfdedb034a34a892e4;hpb=6225887c0f8a1c22754ac98941908f47281bb2d3;p=pnews.git diff --git a/war/src/main/java/pnews/servlet/ArticleProvider.java b/war/src/main/java/pnews/servlet/ArticleProvider.java index 95e3951..3e90d11 100644 --- a/war/src/main/java/pnews/servlet/ArticleProvider.java +++ b/war/src/main/java/pnews/servlet/ArticleProvider.java @@ -4,6 +4,7 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; import java.util.Date; import java.util.HashMap; @@ -26,136 +27,160 @@ import com.rometools.rome.io.XmlReader; import pnews.Article; import pnews.Category; +import pnews.Feed; public class ArticleProvider { - public final static ArticleProvider singleton = new ArticleProvider(); private static final Logger LOG = Logger.getLogger(ArticleProvider.class.getName()); private final Map> articlesByCategory = new HashMap<>(); private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(2); + private final Config config; - private ArticleProvider() { - for (Category cat:Category.values()) - scheduler.scheduleAtFixedRate(new Refresher(cat), 2, 120, TimeUnit.SECONDS); + public ArticleProvider(Config config) { + this.config = config; + for (Category cat: config.getCategories()) + scheduler.scheduleAtFixedRate(new Refresher(cat), 2, 600, TimeUnit.SECONDS); } private static SyndFeed getSyndFeed(String u) throws IllegalArgumentException, FeedException, MalformedURLException, IOException { - try (XmlReader reader = new XmlReader(new URL(u))) { - return new SyndFeedInput().build(reader); - } - } - - private static Map getFeeds() { - Map result; + XmlReader r; - result = new HashMap<>(); + r = new XmlReader(new URL(u)); - result.put(Category.TOP, - new String[] { - "http://www.francetvinfo.fr/titres.rss", - "http://www.france24.com/fr/actualites/rss", - //"https://www.franceinter.fr/rss/a-la-une.xml", - "http://www.rfi.fr/general/rss", - "http://www.cnews.fr/rss/une", - "http://www.bfmtv.com/rss/info/flux-rss/flux-toutes-les-actualites/" - }); + return new SyndFeedInput().build(r); + } + + private List
getArticlesForUpdate(Category cat) { + List
result; - result.put(Category.SPORT, - new String[] { "http://www.france24.com/fr/sports/rss" }); + synchronized (articlesByCategory) { + result = articlesByCategory.get(cat); + if (result == null) { + result = new ArrayList<>(); + articlesByCategory.put(cat, result); + } + return result; + } + } + + private boolean exists(String articleLink, List
articles) { + synchronized (articles) { + for (Article a: articles) + if (a.link.equals(articleLink)) + return true; + } + return false; + } + + private static Article toArticle(String link, SyndEntry entry, SyndFeed feed) { + String desc, title, thumbnail, feedTitle, str; + Date date; - result.put(Category.FRANCE, - new String[] { "http://www.france24.com/fr/france/rss", - "http://www.rfi.fr/france/rss"}); + feedTitle = feed.getTitle(); + if (feedTitle != null) { + feedTitle = feedTitle.trim(); + } - result.put(Category.EUROPE, - new String[] { "http://www.france24.com/fr/europe/rss" }); + thumbnail = null; + for (SyndEnclosure e: entry.getEnclosures()) { + if (e.getType().startsWith("image/")) + thumbnail = e.getUrl(); + break; + } - result.put(Category.ECO, - new String[] { "http://www.france24.com/fr/economie/rss", - "http://www.rfi.fr/economie/rss" }); + if (thumbnail == null && feed.getImage() != null) + thumbnail = feed.getImage().getUrl(); + - result.put(Category.ESSONNE, - new String[] { /*"https://www.essonneinfo.fr/feed/"*/ }); + title = entry.getTitle().trim(); - result.put(Category.TECHNOLOGIE, - new String[] { "http://feeds.feedburner.com/lesnumeriques/news", - "http://www.zdnet.fr/feeds/rss/actualites/"}); + if (entry.getDescription() != null) { + str = entry.getDescription().getValue(); + desc = Jsoup.parse(str).text(); + } else { + desc = null; + LOG.severe("No description for " + feedTitle + " - " + title); + } - return result; + date = entry.getPublishedDate(); + if (date == null) + date = entry.getUpdatedDate(); + if (date == null) + LOG.severe("The article " + feedTitle + " - " + title + " does not have a date"); + + return new Article(link, title, desc, thumbnail, date, feedTitle); } private void addArticles(Category cat, SyndFeed feed) { - String thumbnail; - String desc; - Date date; + String feedTitle; List
articles; + Article a; - LOG.info("addArticles" + cat.getId()); + feedTitle = feed.getTitle().trim(); + + LOG.info("addArticles " + cat.getName() + " " + feedTitle + " number of articles: " + feed.getEntries().size()); for (SyndEntry entry: feed.getEntries()) { - thumbnail = null; - for (SyndEnclosure e: entry.getEnclosures()) { - if (e.getType().startsWith("image/")) - thumbnail = e.getUrl(); - break; - } - - if (entry.getDescription() != null) { - desc = Jsoup.parse(entry.getDescription().getValue()).text(); - } else { - desc = null; - LOG.severe("No description for " + feed.getTitle() + " - " + entry.getTitle()); + String link = entry.getLink().trim(); + articles = getArticlesForUpdate(cat); + if (exists(link, articles)) { + LOG.fine("addArticles " + link + " is already present"); + continue ; } - date = entry.getPublishedDate(); - if (date == null) - date = entry.getUpdatedDate(); + a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed)); - synchronized(articlesByCategory) { - articles = articlesByCategory.get(cat); - if (articles == null) { - articles = new ArrayList<>(); - articlesByCategory.put(cat, articles); - } else { - for (Article a: articles) - if (a.link.equals(entry.getLink())) - return ; - } - - articles.add(new Article(entry.getLink(), - cat, - entry.getTitle(), - desc, - thumbnail, - date, - feed.getTitle())); + synchronized (articles) { + articles.add(a); - articles.sort(new Comparator
() { + Collections.sort(articles, new Comparator
() { @Override public int compare(Article o1, Article o2) { + if (o1.publicationDate == o2.publicationDate) + return 0; + if (o1.publicationDate == null) + return 1; + if (o2.publicationDate == null) + return -1; return o2.publicationDate.compareTo(o1.publicationDate); } - }); } - } + } + + LOG.info("addArticles done " + cat.getName()); } - + private void retrieveArticles(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { - String[] feeds; + List feeds; - feeds = getFeeds().get(cat); + feeds = config.getFeedsByCategory().get(cat); if (feeds != null) - for (String str: feeds) - addArticles(cat, getSyndFeed(str)); + for (Feed f: feeds) + try { + addArticles(cat, getSyndFeed(f.getURL())); + } catch (Throwable e) { + LOG.log(Level.SEVERE, + "retrieveArticles failure " + cat.getName() + " " + f.toString(), + e); + } else LOG.severe("No feed for category " + cat); } + /** + * Returns a copy. + */ public List
getArticles(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { + List
articles; + synchronized (articlesByCategory) { - return articlesByCategory.get(cat); + articles = getArticlesForUpdate(cat); + } + + synchronized (articles) { + return new ArrayList<>(articles); } } @@ -167,27 +192,16 @@ public class ArticleProvider { } @Override - public void run() { - List
articles; - - LOG.info("refresher "+ category.getId()); + public void run() { + LOG.info("refresher "+ category.getName()); try { retrieveArticles(category); - - synchronized (articlesByCategory) { - articles = articlesByCategory.get(category); - if (articles != null && articles.size() > 100) { - articlesByCategory.put(category, - articles.subList(0, 100)); - - } - } } catch (IllegalArgumentException | FeedException | IOException e) { LOG.log(Level.SEVERE, "refresher failure", e); } - LOG.info("refresher "+ category.getId() + " done"); + LOG.info("refresher "+ category.getName() + " done"); } } }