add rss feed of ibm
[pnews.git] / war / src / main / java / pnews / servlet / ArticleProvider.java
index aae0c87..d65002d 100644 (file)
@@ -1,7 +1,6 @@
 package pnews.servlet;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
@@ -41,14 +40,11 @@ public class ArticleProvider {
         }
         
         private static SyndFeed getSyndFeed(String u) throws IllegalArgumentException, FeedException, MalformedURLException, IOException {
-                InputStream is;
                 XmlReader r;
                 
                 r = new XmlReader(new URL(u));
-                XmlReader.setDefaultEncoding("UTF-8");
-                
-                return new SyndFeedInput().build(r);
                 
+                return new SyndFeedInput().build(r);                
         }
         
         private static Map<Category, String[]> getFeeds() {
@@ -87,8 +83,7 @@ public class ArticleProvider {
                 
                 result.put(Category.ESSONNE,
                                 new String[] { "http://www.tourisme-essonne.com/rss/actus/",
-                                               "http://www.ville-palaiseau.fr/rss/actualites.htm"
-                                                /*"https://www.essonneinfo.fr/feed/"*/ });
+                                               "http://www.ville-palaiseau.fr/rss/actualites.htm" });
                 
                 result.put(Category.PEOPLE,
                                 new String[] { "http://www.premiere.fr/rss/actu-live",
@@ -97,92 +92,113 @@ public class ArticleProvider {
                 
                 result.put(Category.TECHNOLOGIE,
                                 new String[] { "http://feeds.feedburner.com/lesnumeriques/news",
-                                               "http://www.zdnet.fr/feeds/rss/actualites/"});
+                                               "http://www.zdnet.fr/feeds/rss/actualites/",
+                                               "https://www-03.ibm.com/press/fr/fr/rssfeed.wss?keyword=null&maxFeed=&feedType=RSS&topic=all"});
                 
                 return result;
         }
         
-        private void addArticles(Category cat, SyndFeed feed) {
-                String thumbnail;
-                String desc, link, title, feedTitle, feedImage;
+        private List<Article> getArticlesForUpdate(Category cat) {
+                List<Article> result;
+                
+                synchronized (articlesByCategory) {
+                        result = articlesByCategory.get(cat);
+                        if (result == null) {
+                                result = new ArrayList<>();
+                                articlesByCategory.put(cat, result);
+                        }
+                        return result;
+                }                
+        }
+        
+        private boolean exists(String articleLink, List<Article> articles) {
+                synchronized (articles) {
+                        for (Article a: articles)
+                                if (a.link.equals(articleLink))
+                                        return true;
+                }
+                return false;
+        }
+        
+        private static Article toArticle(String link, SyndEntry entry, SyndFeed feed) {
+                String desc, title, thumbnail, feedTitle, str;
                 Date date;
+                
+                feedTitle = feed.getTitle();
+                if (feedTitle != null) {
+                        feedTitle = feedTitle.trim();
+                }
+                
+                thumbnail = null;
+                for (SyndEnclosure e: entry.getEnclosures()) {
+                        if (e.getType().startsWith("image/"))
+                                thumbnail = e.getUrl();    
+                        break;
+                }
+                
+                if (thumbnail == null && feed.getImage() != null)
+                        thumbnail = feed.getImage().getUrl();
+                             
+                
+                title = entry.getTitle().trim();
+                
+                if (entry.getDescription() != null) {
+                        str = entry.getDescription().getValue();
+                        desc = Jsoup.parse(str).text();
+                } else {       
+                        desc = null;
+                        LOG.severe("No description for " + feedTitle + " - " + title);
+                }
+                
+                date = entry.getPublishedDate();
+                if (date == null)
+                        date = entry.getUpdatedDate();
+                if (date == null)
+                        LOG.severe("The article " + feedTitle + " - " + title + " does not have a date");
+                                     
+                return new Article(link, title, desc, thumbnail, date, feedTitle);
+        }
+        
+        private void addArticles(Category cat, SyndFeed feed) {
+                String feedTitle;
                 List<Article> articles;
-                boolean exist;
+                Article a;
                 
                 feedTitle = feed.getTitle().trim();
                 
-                if (feed.getImage() != null)
-                        feedImage = feed.getImage().getUrl();
-                else
-                        feedImage = null;
-                
                 LOG.info("addArticles " + cat.getId() + " " + feedTitle + " number of articles: " + feed.getEntries().size());
                 
                 for (SyndEntry entry: feed.getEntries()) {
-                        thumbnail = null;
-                        for (SyndEnclosure e: entry.getEnclosures()) {
-                                if (e.getType().startsWith("image/"))
-                                        thumbnail = e.getUrl();    
-                                break;
+                        String link = entry.getLink().trim();
+                        articles = getArticlesForUpdate(cat);
+                        if (exists(link, articles)) {
+                                LOG.fine("addArticles " + link + " is already present");
+                                continue ;
                         }
-                        if (thumbnail == null)
-                                thumbnail = feedImage;
-                                                        
-                        title = entry.getTitle().trim();
                         
-                        if (entry.getDescription() != null) {                                      
-                                desc = Jsoup.parse(entry.getDescription().getValue()).text();
-                        } else {       
-                                desc = null;
-                                LOG.severe("No description for " + feedTitle + " - " + title);
-                        }
-                        
-                        date = entry.getPublishedDate();
-                        if (date == null)
-                                date = entry.getUpdatedDate();
-                        if (date == null) {
-                                LOG.severe("The article " + feedTitle + " - " + title + " does not have a date");
-                                continue;
-                        }                                
+                        a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed));
                         
-                        synchronized(articlesByCategory) {
-                                link = entry.getLink().trim();
-
-                                articles = articlesByCategory.get(cat);
-                                exist = false;
-                                if (articles == null) {
-                                        articles = new ArrayList<>();
-                                        articlesByCategory.put(cat, articles);
-                                } else {                                
-                                        for (Article a: articles)
-                                                if (a.link.equals(link)) {
-                                                        LOG.fine("addArticles " + link + " is already present");
-                                                        exist = true;
-                                                }
-                                }
-                                
-                                if (!exist) {
-                                        LOG.fine("add " + cat.getId() + " " + feedTitle + " " + title);
-                                
-                                        articles.add(new Article(link, cat, title, desc, thumbnail, date,
-                                                        feed.getTitle()));
-
-                                        Collections.sort(articles, new Comparator<Article>() {
-                                                @Override
-                                                public int compare(Article o1, Article o2) {
-                                                        return o2.publicationDate.compareTo(o1.publicationDate);
-                                                }
+                        synchronized (articles) {
+                                articles.add(a);
 
-                                        });
-                                } else {
-                                        LOG.finest("addArticles already exist: " + title);
-                                }
+                                Collections.sort(articles, new Comparator<Article>() {
+                                        @Override
+                                        public int compare(Article o1, Article o2) {
+                                                if (o1.publicationDate == o2.publicationDate)
+                                                        return 0;
+                                                if (o1.publicationDate == null)
+                                                        return 1;
+                                                if (o2.publicationDate == null)
+                                                        return -1;
+                                                return o2.publicationDate.compareTo(o1.publicationDate);
+                                        }
+                                });
                         }
                 }          
                 
                 LOG.info("addArticles done " + cat.getId());
         }
-        
+             
         private void retrieveArticles(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException {
                 String[] feeds;
                 
@@ -201,10 +217,19 @@ public class ArticleProvider {
                         LOG.severe("No feed for category " + cat);
         }
         
+        /**
+         * Returns a copy.
+         */
         public List<Article> getArticles(Category cat)
                         throws IllegalArgumentException, MalformedURLException, FeedException, IOException {
+                List<Article> articles;
+                
                 synchronized (articlesByCategory) {
-                        return articlesByCategory.get(cat);
+                        articles = getArticlesForUpdate(cat);
+                }
+                
+                synchronized (articles) {
+                        return new ArrayList<>(articles);
                 }
         }
         
@@ -216,23 +241,11 @@ public class ArticleProvider {
                 }
                 
                 @Override
-                public void run() {
-                        List<Article> articles;
-                        
+                public void run() {                       
                         LOG.info("refresher "+ category.getId());
                         
                         try {
                                 retrieveArticles(category);
-                                
-                                synchronized (articlesByCategory) {
-                                        articles = articlesByCategory.get(category);
-                                        if (articles != null && articles.size() > 100) {
-                                                articlesByCategory.put(category,
-                                                                       articles.subList(0, 100));
-                                                                
-                                        }
-                                        LOG.info("refresher " + category.getId() + " number of articles: " + articles.size());
-                                }
                         } catch (IllegalArgumentException | FeedException | IOException e) {
                                 LOG.log(Level.SEVERE, "refresher failure", e);
                         }