X-Git-Url: http://git.wpitchoune.net/gitweb/?a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2Fservlet%2FArticleProvider.java;h=ddec874a0863cb56646817dcd3e8edbc8166f7ce;hb=9c7a682f3e891b86fb1c14a881d360dcf65e7d47;hp=d09cb0c9d91d1c060b7115d3dcfc0eb00c02fbfe;hpb=25c166b94c52128d7a18372d19f1f0f704ad138a;p=pnews.git diff --git a/war/src/main/java/pnews/servlet/ArticleProvider.java b/war/src/main/java/pnews/servlet/ArticleProvider.java index d09cb0c..ddec874 100644 --- a/war/src/main/java/pnews/servlet/ArticleProvider.java +++ b/war/src/main/java/pnews/servlet/ArticleProvider.java @@ -1,6 +1,7 @@ package pnews.servlet; import java.io.IOException; +import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; @@ -17,13 +18,13 @@ import java.util.logging.Level; import java.util.logging.Logger; import org.jsoup.Jsoup; +import org.xml.sax.InputSource; import com.rometools.rome.feed.synd.SyndEnclosure; import com.rometools.rome.feed.synd.SyndEntry; import com.rometools.rome.feed.synd.SyndFeed; import com.rometools.rome.io.FeedException; import com.rometools.rome.io.SyndFeedInput; -import com.rometools.rome.io.XmlReader; import pnews.Article; import pnews.Category; @@ -36,13 +37,15 @@ public class ArticleProvider { private ArticleProvider() { for (Category cat:Category.values()) - scheduler.scheduleAtFixedRate(new Refresher(cat), 2, 120, TimeUnit.SECONDS); + scheduler.scheduleAtFixedRate(new Refresher(cat), 2, 600, TimeUnit.SECONDS); } private static SyndFeed getSyndFeed(String u) throws IllegalArgumentException, FeedException, MalformedURLException, IOException { - try (XmlReader reader = new XmlReader(new URL(u))) { - return new SyndFeedInput().build(reader); - } + InputStream is = new URL(u).openConnection().getInputStream(); + InputSource source = new InputSource(is); + + return new SyndFeedInput().build(source); + } private static Map getFeeds() { @@ -53,10 +56,12 @@ public class ArticleProvider { result.put(Category.TOP, new String[] { "http://www.francetvinfo.fr/titres.rss", - "http://www.france24.com/fr/actualites/rss", - //"https://www.franceinter.fr/rss/a-la-une.xml", + "http://www.france24.com/fr/actualites/rss", "http://www.rfi.fr/general/rss", "http://www.cnews.fr/rss/une", + "http://www.ladepeche.fr/rss/a-la-une.rss", + "https://www.franceinter.fr/rss/a-la-une.xml", + "https://www.francebleu.fr/rss/a-la-une.xml", "http://www.bfmtv.com/rss/info/flux-rss/flux-toutes-les-actualites/" }); @@ -75,7 +80,14 @@ public class ArticleProvider { "http://www.rfi.fr/economie/rss" }); result.put(Category.ESSONNE, - new String[] { /*"https://www.essonneinfo.fr/feed/"*/ }); + new String[] { "http://www.tourisme-essonne.com/rss/actus/", + "http://www.ville-palaiseau.fr/rss/actualites.htm" + /*"https://www.essonneinfo.fr/feed/"*/ }); + + result.put(Category.PEOPLE, + new String[] { "http://www.premiere.fr/rss/actu-live", + "http://www.purepeople.com/rss/news_t0.xml" + }); result.put(Category.TECHNOLOGIE, new String[] { "http://feeds.feedburner.com/lesnumeriques/news", @@ -86,11 +98,14 @@ public class ArticleProvider { private void addArticles(Category cat, SyndFeed feed) { String thumbnail; - String desc; + String desc, link, title, feedTitle; Date date; List
articles; + boolean exist; - LOG.info("addArticles" + cat.getId()); + feedTitle = feed.getTitle().trim(); + + LOG.info("addArticles " + cat.getId() + " " + feedTitle + " number of articles: " + feed.getEntries().size()); for (SyndEntry entry: feed.getEntries()) { thumbnail = null; @@ -100,46 +115,59 @@ public class ArticleProvider { break; } - if (entry.getDescription() != null) { + title = entry.getTitle().trim(); + + if (entry.getDescription() != null) { desc = Jsoup.parse(entry.getDescription().getValue()).text(); } else { desc = null; - LOG.severe("No description for " + feed.getTitle() + " - " + entry.getTitle()); + LOG.severe("No description for " + feedTitle + " - " + title); } date = entry.getPublishedDate(); if (date == null) date = entry.getUpdatedDate(); + if (date == null) { + LOG.severe("The article " + feedTitle + " - " + title + " does not have a date"); + continue; + } synchronized(articlesByCategory) { + link = entry.getLink().trim(); + articles = articlesByCategory.get(cat); + exist = false; if (articles == null) { articles = new ArrayList<>(); articlesByCategory.put(cat, articles); } else { for (Article a: articles) - if (a.link.equals(entry.getLink())) - return ; + if (a.link.equals(link)) { + LOG.info("addArticles " + link + " already present"); + exist = true; + } } - articles.add(new Article(entry.getLink(), - cat, - entry.getTitle(), - desc, - thumbnail, - date, - feed.getTitle())); - + if (!exist) { + LOG.fine("add " + cat.getId() + " " + feedTitle + " " + title); - Collections.sort(articles, new Comparator
() { - @Override - public int compare(Article o1, Article o2) { - return o2.publicationDate.compareTo(o1.publicationDate); - } - - }); + articles.add(new Article(link, cat, title, desc, thumbnail, date, + feed.getTitle())); + + Collections.sort(articles, new Comparator
() { + @Override + public int compare(Article o1, Article o2) { + return o2.publicationDate.compareTo(o1.publicationDate); + } + + }); + } else { + LOG.finest("addArticles already exist: " + title); + } } - } + } + + LOG.info("addArticles done " + cat.getId()); } private void retrieveArticles(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { @@ -149,7 +177,13 @@ public class ArticleProvider { if (feeds != null) for (String str: feeds) - addArticles(cat, getSyndFeed(str)); + try { + addArticles(cat, getSyndFeed(str)); + } catch (Throwable e) { + LOG.log(Level.SEVERE, + "retrieveArticles failure " + cat.getId() + " " + str, + e); + } else LOG.severe("No feed for category " + cat); } @@ -184,6 +218,7 @@ public class ArticleProvider { articles.subList(0, 100)); } + LOG.info("refresher " + category.getId() + " number of articles: " + articles.size()); } } catch (IllegalArgumentException | FeedException | IOException e) { LOG.log(Level.SEVERE, "refresher failure", e);