X-Git-Url: https://git.wpitchoune.net/gitweb/?a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2Fservlet%2FArticleProvider.java;h=d3e4f3de76dde49e3262a3e97ea94c9b86cd6938;hb=f4a4051dca40f99db1d84e6967dafbb6b1af647d;hp=90955260e9018c3136176dae66952dadc7712b27;hpb=f054556aba464d783802add05ffb374b7f9cc26b;p=pnews.git diff --git a/war/src/main/java/pnews/servlet/ArticleProvider.java b/war/src/main/java/pnews/servlet/ArticleProvider.java index 9095526..d3e4f3d 100644 --- a/war/src/main/java/pnews/servlet/ArticleProvider.java +++ b/war/src/main/java/pnews/servlet/ArticleProvider.java @@ -1,7 +1,6 @@ package pnews.servlet; import java.io.IOException; -import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; @@ -18,13 +17,13 @@ import java.util.logging.Level; import java.util.logging.Logger; import org.jsoup.Jsoup; -import org.xml.sax.InputSource; import com.rometools.rome.feed.synd.SyndEnclosure; import com.rometools.rome.feed.synd.SyndEntry; import com.rometools.rome.feed.synd.SyndFeed; import com.rometools.rome.io.FeedException; import com.rometools.rome.io.SyndFeedInput; +import com.rometools.rome.io.XmlReader; import pnews.Article; import pnews.Category; @@ -41,11 +40,12 @@ public class ArticleProvider { } private static SyndFeed getSyndFeed(String u) throws IllegalArgumentException, FeedException, MalformedURLException, IOException { - InputStream is = new URL(u).openConnection().getInputStream(); - InputSource source = new InputSource(is); - - return new SyndFeedInput().build(source); + XmlReader r; + + r = new XmlReader(new URL(u)); + XmlReader.setDefaultEncoding("UTF-8"); + return new SyndFeedInput().build(r); } private static Map getFeeds() { @@ -56,7 +56,6 @@ public class ArticleProvider { result.put(Category.TOP, new String[] { "http://www.francetvinfo.fr/titres.rss", - "http://www.france24.com/fr/actualites/rss", "http://www.rfi.fr/general/rss", "http://www.cnews.fr/rss/une", "http://www.ladepeche.fr/rss/a-la-une.rss", @@ -74,6 +73,10 @@ public class ArticleProvider { result.put(Category.EUROPE, new String[] { "http://www.france24.com/fr/europe/rss" }); + + result.put(Category.MONDE, + new String[] { "http://www.france24.com/fr/actualites/rss" }); + result.put(Category.ECO, new String[] { "http://www.france24.com/fr/economie/rss", @@ -81,8 +84,7 @@ public class ArticleProvider { result.put(Category.ESSONNE, new String[] { "http://www.tourisme-essonne.com/rss/actus/", - "http://www.ville-palaiseau.fr/rss/actualites.htm" - /*"https://www.essonneinfo.fr/feed/"*/ }); + "http://www.ville-palaiseau.fr/rss/actualites.htm" }); result.put(Category.PEOPLE, new String[] { "http://www.premiere.fr/rss/actu-live", @@ -96,87 +98,100 @@ public class ArticleProvider { return result; } - private void addArticles(Category cat, SyndFeed feed) { - String thumbnail; - String desc, link, title, feedTitle, feedImage; + private List
getArticlesForUpdate(Category cat) { + List
result; + + synchronized (articlesByCategory) { + result = articlesByCategory.get(cat); + if (result == null) { + result = new ArrayList<>(); + articlesByCategory.put(cat, result); + } + return result; + } + } + + private boolean exists(String articleLink, List
articles) { + synchronized (articles) { + for (Article a: articles) + if (a.link.equals(articleLink)) + return true; + } + return false; + } + + private static Article toArticle(String link, SyndEntry entry, SyndFeed feed) { + String desc, title, thumbnail; Date date; + + thumbnail = null; + for (SyndEnclosure e: entry.getEnclosures()) { + if (e.getType().startsWith("image/")) + thumbnail = e.getUrl(); + break; + } + + if (thumbnail == null && feed.getImage() != null) + thumbnail = feed.getImage().getUrl(); + + title = entry.getTitle().trim(); + + if (entry.getDescription() != null) { + desc = Jsoup.parse(entry.getDescription().getValue()).text(); + } else { + desc = null; + LOG.severe("No description for " + feed.getTitle() + " - " + title); + } + + date = entry.getPublishedDate(); + if (date == null) + date = entry.getUpdatedDate(); + if (date == null) + LOG.severe("The article " + feed.getTitle() + " - " + title + " does not have a date"); + + return new Article(link, title, desc, thumbnail, date, title); + } + + private void addArticles(Category cat, SyndFeed feed) { + String feedTitle; List
articles; - boolean exist; + Article a; feedTitle = feed.getTitle().trim(); - if (feed.getImage() != null) - feedImage = feed.getImage().getUrl(); - else - feedImage = null; - LOG.info("addArticles " + cat.getId() + " " + feedTitle + " number of articles: " + feed.getEntries().size()); for (SyndEntry entry: feed.getEntries()) { - thumbnail = null; - for (SyndEnclosure e: entry.getEnclosures()) { - if (e.getType().startsWith("image/")) - thumbnail = e.getUrl(); - break; - } - if (thumbnail == null) - thumbnail = feedImage; - - title = entry.getTitle().trim(); - - if (entry.getDescription() != null) { - desc = Jsoup.parse(entry.getDescription().getValue()).text(); - } else { - desc = null; - LOG.severe("No description for " + feedTitle + " - " + title); + String link = entry.getLink().trim(); + articles = getArticlesForUpdate(cat); + if (exists(link, articles)) { + LOG.fine("addArticles " + link + " is already present"); + continue ; } - date = entry.getPublishedDate(); - if (date == null) - date = entry.getUpdatedDate(); - if (date == null) { - LOG.severe("The article " + feedTitle + " - " + title + " does not have a date"); - continue; - } + a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed)); - synchronized(articlesByCategory) { - link = entry.getLink().trim(); - - articles = articlesByCategory.get(cat); - exist = false; - if (articles == null) { - articles = new ArrayList<>(); - articlesByCategory.put(cat, articles); - } else { - for (Article a: articles) - if (a.link.equals(link)) { - LOG.fine("addArticles " + link + " is already present"); - exist = true; - } - } - - if (!exist) { - LOG.fine("add " + cat.getId() + " " + feedTitle + " " + title); - - articles.add(new Article(link, cat, title, desc, thumbnail, date, - feed.getTitle())); - - Collections.sort(articles, new Comparator
() { - @Override - public int compare(Article o1, Article o2) { - return o2.publicationDate.compareTo(o1.publicationDate); - } + synchronized (articles) { + articles.add(a); - }); - } else { - LOG.finest("addArticles already exist: " + title); - } + Collections.sort(articles, new Comparator
() { + @Override + public int compare(Article o1, Article o2) { + if (o1.publicationDate == o2.publicationDate) + return 0; + if (o1.publicationDate == null) + return 1; + if (o2.publicationDate == null) + return -1; + return o2.publicationDate.compareTo(o1.publicationDate); + } + }); } } LOG.info("addArticles done " + cat.getId()); } - + private void retrieveArticles(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { String[] feeds; @@ -195,10 +210,19 @@ public class ArticleProvider { LOG.severe("No feed for category " + cat); } + /** + * Returns a copy. + */ public List
getArticles(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { + List
articles; + synchronized (articlesByCategory) { - return articlesByCategory.get(cat); + articles = getArticlesForUpdate(cat); + } + + synchronized (articles) { + return new ArrayList<>(articles); } }