read categories information from the configuration
[pnews.git] / war / src / main / java / pnews / servlet / ArticleProvider.java
1 package pnews.servlet;
2
3 import java.io.IOException;
4 import java.net.MalformedURLException;
5 import java.net.URL;
6 import java.util.ArrayList;
7 import java.util.Collections;
8 import java.util.Comparator;
9 import java.util.Date;
10 import java.util.HashMap;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.concurrent.Executors;
14 import java.util.concurrent.ScheduledExecutorService;
15 import java.util.concurrent.TimeUnit;
16 import java.util.logging.Level;
17 import java.util.logging.Logger;
18
19 import org.jsoup.Jsoup;
20
21 import com.rometools.rome.feed.synd.SyndEnclosure;
22 import com.rometools.rome.feed.synd.SyndEntry;
23 import com.rometools.rome.feed.synd.SyndFeed;
24 import com.rometools.rome.io.FeedException;
25 import com.rometools.rome.io.SyndFeedInput;
26 import com.rometools.rome.io.XmlReader;
27
28 import pnews.Article;
29 import pnews.Category;
30 import pnews.Feed;
31
32 public class ArticleProvider {
33         private static final Logger LOG = Logger.getLogger(ArticleProvider.class.getName());
34         private final Map<Category, List<Article>> articlesByCategory = new HashMap<>();
35         private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(2);
36         private final Config config;
37         
38         public ArticleProvider(Config config) {
39                 this.config = config;
40                 for (Category cat: config.getCategories())
41                         scheduler.scheduleAtFixedRate(new Refresher(cat), 2, 600, TimeUnit.SECONDS);
42         }
43         
44         private static SyndFeed getSyndFeed(String u) throws IllegalArgumentException, FeedException, MalformedURLException, IOException {
45                 XmlReader r;
46                 
47                 r = new XmlReader(new URL(u));
48                 
49                 return new SyndFeedInput().build(r);                
50         }
51         
52         private List<Article> getArticlesForUpdate(Category cat) {
53                 List<Article> result;
54                 
55                 synchronized (articlesByCategory) {
56                         result = articlesByCategory.get(cat);
57                         if (result == null) {
58                                 result = new ArrayList<>();
59                                 articlesByCategory.put(cat, result);
60                         }
61                         return result;
62                 }                
63         }
64         
65         private boolean exists(String articleLink, List<Article> articles) {
66                 synchronized (articles) {
67                         for (Article a: articles)
68                                 if (a.link.equals(articleLink))
69                                         return true;
70                 }
71                 return false;
72         }
73         
74         private static Article toArticle(String link, SyndEntry entry, SyndFeed feed) {
75                 String desc, title, thumbnail, feedTitle, str;
76                 Date date;
77                 
78                 feedTitle = feed.getTitle();
79                 if (feedTitle != null) {
80                         feedTitle = feedTitle.trim();
81                 }
82                 
83                 thumbnail = null;
84                 for (SyndEnclosure e: entry.getEnclosures()) {
85                         if (e.getType().startsWith("image/"))
86                                 thumbnail = e.getUrl();    
87                         break;
88                 }
89                 
90                 if (thumbnail == null && feed.getImage() != null)
91                         thumbnail = feed.getImage().getUrl();
92                              
93                 
94                 title = entry.getTitle().trim();
95                 
96                 if (entry.getDescription() != null) {
97                         str = entry.getDescription().getValue();
98                         desc = Jsoup.parse(str).text();
99                 } else {       
100                         desc = null;
101                         LOG.severe("No description for " + feedTitle + " - " + title);
102                 }
103                 
104                 date = entry.getPublishedDate();
105                 if (date == null)
106                         date = entry.getUpdatedDate();
107                 if (date == null)
108                         LOG.severe("The article " + feedTitle + " - " + title + " does not have a date");
109                                      
110                 return new Article(link, title, desc, thumbnail, date, feedTitle);
111         }
112         
113         private void addArticles(Category cat, SyndFeed feed) {
114                 String feedTitle;
115                 List<Article> articles;
116                 Article a;
117                 
118                 feedTitle = feed.getTitle().trim();
119                 
120                 LOG.info("addArticles " + cat.getLabel() + " " + feedTitle + " number of articles: " + feed.getEntries().size());
121                 
122                 for (SyndEntry entry: feed.getEntries()) {
123                         String link = entry.getLink().trim();
124                         articles = getArticlesForUpdate(cat);
125                         if (exists(link, articles)) {
126                                 LOG.fine("addArticles " + link + " is already present");
127                                 continue ;
128                         }
129                         
130                         a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed));
131                         
132                         synchronized (articles) {
133                                 articles.add(a);
134
135                                 Collections.sort(articles, new Comparator<Article>() {
136                                         @Override
137                                         public int compare(Article o1, Article o2) {
138                                                 if (o1.publicationDate == o2.publicationDate)
139                                                         return 0;
140                                                 if (o1.publicationDate == null)
141                                                         return 1;
142                                                 if (o2.publicationDate == null)
143                                                         return -1;
144                                                 return o2.publicationDate.compareTo(o1.publicationDate);
145                                         }
146                                 });
147                         }
148                 }          
149                 
150                 LOG.info("addArticles done " + cat.getLabel());
151         }
152              
153         private void retrieveArticles(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException {
154                 List<Feed> feeds;
155                 
156                 feeds = config.getFeedsByCategory().get(cat);
157                 
158                 if (feeds != null)
159                         for (Feed f: feeds)
160                                 try {
161                                         addArticles(cat, getSyndFeed(f.getURL()));
162                                 } catch (Throwable e) {
163                                         LOG.log(Level.SEVERE,
164                                                 "retrieveArticles failure " + cat.getLabel() + " " + f.toString(),
165                                                 e);
166                                 }
167                 else
168                         LOG.severe("No feed for category " + cat);
169         }
170         
171         /**
172          * Returns a copy.
173          */
174         public List<Article> getArticles(Category cat)
175                         throws IllegalArgumentException, MalformedURLException, FeedException, IOException {
176                 List<Article> articles;
177                 
178                 synchronized (articlesByCategory) {
179                         articles = getArticlesForUpdate(cat);
180                 }
181                 
182                 synchronized (articles) {
183                         return new ArrayList<>(articles);
184                 }
185         }
186         
187         private class Refresher implements Runnable {
188                 private final Category category;
189                 
190                 public Refresher(Category category) {
191                         this.category = category;
192                 }
193                 
194                 @Override
195                 public void run() {                       
196                         LOG.info("refresher "+ category.getLabel());
197                         
198                         try {
199                                 retrieveArticles(category);
200                         } catch (IllegalArgumentException | FeedException | IOException e) {
201                                 LOG.log(Level.SEVERE, "refresher failure", e);
202                         }                        
203                         
204                         LOG.info("refresher "+ category.getLabel() + " done");
205                 }                
206         }
207 }