X-Git-Url: http://git.wpitchoune.net/gitweb/?a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2Fservlet%2FArticleProvider.java;h=1dece09c19970607c6dd36571ad6858f2854bccf;hb=538d228606be7cd7e432a9086cb04552cf2b0958;hp=b28bbf5701aea8765e427b7c02d7a8cfed8dd6d1;hpb=14d5e6993de84967141710b3d3c44edf055a71ec;p=pnews.git diff --git a/war/src/main/java/pnews/servlet/ArticleProvider.java b/war/src/main/java/pnews/servlet/ArticleProvider.java index b28bbf5..1dece09 100644 --- a/war/src/main/java/pnews/servlet/ArticleProvider.java +++ b/war/src/main/java/pnews/servlet/ArticleProvider.java @@ -43,7 +43,6 @@ public class ArticleProvider { XmlReader r; r = new XmlReader(new URL(u)); - XmlReader.setDefaultEncoding("UTF-8"); return new SyndFeedInput().build(r); } @@ -55,6 +54,7 @@ public class ArticleProvider { result.put(Category.TOP, new String[] { + "http://www.europe1.fr/var/export/rss/europe1/actus.xml", "http://www.francetvinfo.fr/titres.rss", "http://www.rfi.fr/general/rss", "http://www.cnews.fr/rss/une", @@ -65,21 +65,26 @@ public class ArticleProvider { }); result.put(Category.SPORT, - new String[] { "http://www.france24.com/fr/sports/rss" }); + new String[] { "http://www.europe1.fr/var/export/rss/europe1/sport.xml", + "http://www.sportune.fr/feed", + "http://www.france24.com/fr/sports/rss" }); result.put(Category.FRANCE, new String[] { "http://www.france24.com/fr/france/rss", + "http://www.francetvinfo.fr/france.rss", "http://www.rfi.fr/france/rss"}); result.put(Category.EUROPE, new String[] { "http://www.france24.com/fr/europe/rss" }); result.put(Category.MONDE, - new String[] { "http://www.france24.com/fr/actualites/rss" }); + new String[] { "http://www.europe1.fr/var/export/rss/europe1/international.xml", + "http://www.france24.com/fr/actualites/rss" }); result.put(Category.ECO, new String[] { "http://www.france24.com/fr/economie/rss", + "http://www.europe1.fr/var/export/rss/europe1/economie.xml", "http://www.rfi.fr/economie/rss" }); result.put(Category.ESSONNE, @@ -92,8 +97,15 @@ public class ArticleProvider { }); result.put(Category.TECHNOLOGIE, - new String[] { "http://feeds.feedburner.com/lesnumeriques/news", - "http://www.zdnet.fr/feeds/rss/actualites/"}); + new String[] { "http://www.generation-nt.com/export/rss.xml", + "http://www.europe1.fr/var/export/rss/europe1/sciences.xml", + "http://feeds.feedburner.com/lesnumeriques/news", + "http://www.zdnet.fr/feeds/rss/actualites/", + "http://www.frandroid.com/feed", + "http://www.silicon.fr/feed", + "http://www.fredzone.org/feed", + "http://www.futura-sciences.com/rss/actualites.xml", + "https://www-03.ibm.com/press/fr/fr/rssfeed.wss?keyword=null&maxFeed=&feedType=RSS&topic=all"}); return result; } @@ -120,10 +132,15 @@ public class ArticleProvider { return false; } - private Article toArticle(String link, Category cat, SyndEntry entry, SyndFeed feed) { - String desc, title, thumbnail; + private static Article toArticle(String link, SyndEntry entry, SyndFeed feed) { + String desc, title, thumbnail, feedTitle, str; Date date; - + + feedTitle = feed.getTitle(); + if (feedTitle != null) { + feedTitle = feedTitle.trim(); + } + thumbnail = null; for (SyndEnclosure e: entry.getEnclosures()) { if (e.getType().startsWith("image/")) @@ -133,52 +150,45 @@ public class ArticleProvider { if (thumbnail == null && feed.getImage() != null) thumbnail = feed.getImage().getUrl(); - + + title = entry.getTitle().trim(); - if (entry.getDescription() != null) { - desc = Jsoup.parse(entry.getDescription().getValue()).text(); + if (entry.getDescription() != null) { + str = entry.getDescription().getValue(); + desc = Jsoup.parse(str).text(); } else { desc = null; - LOG.severe("No description for " + feed.getTitle() + " - " + title); + LOG.severe("No description for " + feedTitle + " - " + title); } date = entry.getPublishedDate(); if (date == null) date = entry.getUpdatedDate(); if (date == null) - LOG.severe("The article " + feed.getTitle() + " - " + title + " does not have a date"); + LOG.severe("The article " + feedTitle + " - " + title + " does not have a date"); - return new Article(link, cat, title, desc, thumbnail, date, title); + return new Article(link, title, desc, thumbnail, date, feedTitle); } - - private void addArticles(Category cat, SyndFeed feed) { - String thumbnail; - String desc, link, title, feedTitle, feedImage; - Date date; + String feedTitle; List
articles; Article a; feedTitle = feed.getTitle().trim(); - if (feed.getImage() != null) - feedImage = feed.getImage().getUrl(); - else - feedImage = null; - LOG.info("addArticles " + cat.getId() + " " + feedTitle + " number of articles: " + feed.getEntries().size()); for (SyndEntry entry: feed.getEntries()) { - link = entry.getLink().trim(); + String link = entry.getLink().trim(); articles = getArticlesForUpdate(cat); if (exists(link, articles)) { LOG.fine("addArticles " + link + " is already present"); continue ; } - a = toArticle(link, cat, entry, feed); + a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed)); synchronized (articles) { articles.add(a); @@ -194,14 +204,13 @@ public class ArticleProvider { return -1; return o2.publicationDate.compareTo(o1.publicationDate); } - }); } } LOG.info("addArticles done " + cat.getId()); } - + private void retrieveArticles(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { String[] feeds; @@ -244,23 +253,11 @@ public class ArticleProvider { } @Override - public void run() { - List
articles; - + public void run() { LOG.info("refresher "+ category.getId()); try { retrieveArticles(category); - - synchronized (articlesByCategory) { - articles = articlesByCategory.get(category); - if (articles != null && articles.size() > 100) { - articlesByCategory.put(category, - articles.subList(0, 100)); - - } - LOG.info("refresher " + category.getId() + " number of articles: " + articles.size()); - } } catch (IllegalArgumentException | FeedException | IOException e) { LOG.log(Level.SEVERE, "refresher failure", e); }