From a0c6addfd9ac6ac45f37b4202e787602c40e6bf7 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Orsini Date: Mon, 6 Nov 2017 12:46:26 +0100 Subject: [PATCH] cleanup, moved to net.wpitchoune package --- .../wpitchoune/pnews/servlet/ArticleProvider.java | 281 +++++++++++++++++++++ .../java/net/wpitchoune/pnews/servlet/HTML.java | 187 ++++++++++++++ .../java/net/wpitchoune/pnews/servlet/JSON.java | 60 +++++ .../java/net/wpitchoune/pnews/servlet/Pnews.java | 224 ++++++++++++++++ .../net/wpitchoune/pnews/servlet/RequesterLog.java | 39 +++ .../main/java/pnews/servlet/ArticleProvider.java | 281 --------------------- war/src/main/java/pnews/servlet/HTML.java | 187 -------------- war/src/main/java/pnews/servlet/JSON.java | 60 ----- war/src/main/java/pnews/servlet/Pnews.java | 224 ---------------- war/src/main/java/pnews/servlet/RequesterLog.java | 39 --- war/src/main/resources/feeds.json | 3 +- war/src/main/webapp/WEB-INF/web.xml | 4 +- 12 files changed, 795 insertions(+), 794 deletions(-) create mode 100644 war/src/main/java/net/wpitchoune/pnews/servlet/ArticleProvider.java create mode 100644 war/src/main/java/net/wpitchoune/pnews/servlet/HTML.java create mode 100644 war/src/main/java/net/wpitchoune/pnews/servlet/JSON.java create mode 100644 war/src/main/java/net/wpitchoune/pnews/servlet/Pnews.java create mode 100644 war/src/main/java/net/wpitchoune/pnews/servlet/RequesterLog.java delete mode 100644 war/src/main/java/pnews/servlet/ArticleProvider.java delete mode 100644 war/src/main/java/pnews/servlet/HTML.java delete mode 100644 war/src/main/java/pnews/servlet/JSON.java delete mode 100644 war/src/main/java/pnews/servlet/Pnews.java delete mode 100644 war/src/main/java/pnews/servlet/RequesterLog.java diff --git a/war/src/main/java/net/wpitchoune/pnews/servlet/ArticleProvider.java b/war/src/main/java/net/wpitchoune/pnews/servlet/ArticleProvider.java new file mode 100644 index 0000000..d52d780 --- /dev/null +++ b/war/src/main/java/net/wpitchoune/pnews/servlet/ArticleProvider.java @@ -0,0 +1,281 @@ +package net.wpitchoune.pnews.servlet; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.jsoup.Jsoup; + +import com.rometools.rome.feed.synd.SyndEnclosure; +import com.rometools.rome.feed.synd.SyndEntry; +import com.rometools.rome.feed.synd.SyndFeed; +import com.rometools.rome.io.FeedException; +import com.rometools.rome.io.SyndFeedInput; +import com.rometools.rome.io.XmlReader; + +import net.wpitchoune.pnews.Article; +import net.wpitchoune.pnews.ArticleStore; +import net.wpitchoune.pnews.Category; +import net.wpitchoune.pnews.Config; +import net.wpitchoune.pnews.EntityStat; +import net.wpitchoune.pnews.Feed; +import net.wpitchoune.pnews.classifier.NamedEntityRecognizer; + +public class ArticleProvider { + private static final String CLASS_NAME = ArticleProvider.class.getName(); + private static final Logger LOG = Logger.getLogger(CLASS_NAME); + private final Map> articlesByCategory = new HashMap<>(); + private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(Runtime.getRuntime().availableProcessors()); + private final Config config; + + public ArticleProvider(Config config) { + this.config = config; + for (Category cat: config.getCategories()) + scheduler.scheduleAtFixedRate(new Refresher(cat), 2, 600, TimeUnit.SECONDS); + } + + private static SyndFeed getSyndFeed(String u) throws IllegalArgumentException, FeedException, MalformedURLException, IOException { + XmlReader r; + + r = new XmlReader(new URL(u)); + + return new SyndFeedInput().build(r); + } + + private List
getArticlesForUpdate(Category cat) { + List
result; + + synchronized (articlesByCategory) { + result = articlesByCategory.get(cat); + if (result == null) { + result = new ArrayList<>(); + articlesByCategory.put(cat, result); + } + return result; + } + } + + private boolean exists(String articleLink, List
articles) { + synchronized (articles) { + for (Article a: articles) + if (a.getLink().equals(articleLink)) + return true; + } + return false; + } + + private Instant getArticleInstant(SyndEntry entry) { + Date date; + + date = entry.getUpdatedDate(); + if (date == null) + date = entry.getPublishedDate(); + + if (date == null) + return Instant.now(); + + return date.toInstant(); + } + + private Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang, Instant instant) { + String desc, title, thumbnail, feedTitle, str; + List entities; + + feedTitle = feed.getTitle(); + if (feedTitle != null) { + feedTitle = feedTitle.trim(); + } + + thumbnail = null; + for (SyndEnclosure e: entry.getEnclosures()) { + if (e.getType().startsWith("image/")) + thumbnail = e.getUrl(); + break; + } + + title = entry.getTitle().trim(); + + if (entry.getDescription() != null) { + str = entry.getDescription().getValue(); + desc = Jsoup.parse(str).text(); + } else { + desc = null; + LOG.severe("No description for " + feedTitle + " - " + title); + } + + entities = new ArrayList<>(); + if (lang.equals("en")) + try { + NamedEntityRecognizer.classify(title, entities, config); + if (desc != null) + NamedEntityRecognizer.classify(desc, entities, config); + } catch (ClassCastException | ClassNotFoundException | IOException e1) { + LOG.log(Level.SEVERE, "Cannot classify " + feedTitle, e1); + } + + return new Article(link, title, desc, thumbnail, instant, feedTitle, entities.toArray(new String[0])); + } + + private void addArticles(Category cat, SyndFeed feed) { + String feedTitle; + List
articles; + Article a; + + feedTitle = feed.getTitle().trim(); + + LOG.info("addArticles " + cat.getLabel() + " " + feedTitle + " number of articles: " + feed.getEntries().size()); + + for (SyndEntry entry: feed.getEntries()) { + String link = entry.getLink().trim(); + articles = getArticlesForUpdate(cat); + if (exists(link, articles)) { + LOG.fine("addArticles " + link + " is already present"); + continue ; + } + + final Instant instant = getArticleInstant(entry); + + if (config.isObsolete(instant)) + continue ; + + a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed, cat.getLanguage(), instant)); + + synchronized (articles) { + articles.add(a); + + Collections.sort(articles, new Comparator
() { + @Override + public int compare(Article o1, Article o2) { + if (o1.getPublicationDate() == o2.getPublicationDate()) + return 0; + if (o1.getPublicationDate() == null) + return 1; + if (o2.getPublicationDate() == null) + return -1; + return o2.getPublicationDate().compareTo(o1.getPublicationDate()); + } + }); + } + } + + LOG.info("addArticles done " + cat.getLabel()); + } + + private void retrieveArticles(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { + List feeds; + + feeds = config.getFeedsByCategory().get(cat); + + if (feeds != null) + for (Feed f: feeds) + try { + addArticles(cat, getSyndFeed(f.getURL())); + } catch (Throwable e) { + LOG.log(Level.SEVERE, + "retrieveArticles failure " + cat.getLabel() + " " + f.toString(), + e); + } + else + LOG.severe("No feed for category " + cat); + } + + /** + * Returns a copy. + */ + public List
getArticles(Category cat, String entity) + throws IllegalArgumentException, MalformedURLException, FeedException, IOException { + List
articles, result; + + synchronized (articlesByCategory) { + articles = getArticlesForUpdate(cat); + } + + synchronized (articles) { + if (entity == null) + return new ArrayList<>(articles); + + result = new ArrayList<>(articles.size()); + for (Article a: articles) + if (a.hasEntity(entity)) + result.add(a); + + return result; + } + } + + public List getEntityStats(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { + List
articles; + Map entities; + final String FUNCTION_NAME = "getEntities"; + EntityStat s; + List stats; + Instant minInstant; + + LOG.entering(CLASS_NAME, FUNCTION_NAME, cat); + + articles = getArticles(cat, null); + + minInstant = Instant.now().minus(15, ChronoUnit.DAYS); + + entities = new HashMap<>(); + for (Article a: articles) + if (a.getPublicationDate().isAfter(minInstant) && a.getEntities() != null) + for (String e: a.getEntities()) { + s = entities.get(e); + if (s == null) { + s = new EntityStat(e); + entities.put(e, s); + } + s.increment(); + } + + stats = new ArrayList<>(entities.values()); + stats.sort(new Comparator() { + + @Override + public int compare(EntityStat o1, EntityStat o2) { + return Integer.compare(o2.getCount(), o1.getCount()); + } + + }); + + LOG.exiting(CLASS_NAME, FUNCTION_NAME, stats); + + return stats; + } + + private class Refresher implements Runnable { + private final Category category; + + public Refresher(Category category) { + this.category = category; + } + + @Override + public void run() { + LOG.info("refresher "+ category.getLabel()); + + try { + retrieveArticles(category); + } catch (IllegalArgumentException | FeedException | IOException e) { + LOG.log(Level.SEVERE, "refresher failure", e); + } + + LOG.info("refresher "+ category.getLabel() + " done"); + } + } +} diff --git a/war/src/main/java/net/wpitchoune/pnews/servlet/HTML.java b/war/src/main/java/net/wpitchoune/pnews/servlet/HTML.java new file mode 100644 index 0000000..28d6fac --- /dev/null +++ b/war/src/main/java/net/wpitchoune/pnews/servlet/HTML.java @@ -0,0 +1,187 @@ +package net.wpitchoune.pnews.servlet; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +import com.rometools.rome.io.FeedException; + +import net.wpitchoune.pnews.Article; +import net.wpitchoune.pnews.Category; +import net.wpitchoune.pnews.Config; +import net.wpitchoune.pnews.EntityStat; +import net.wpitchoune.pnews.Language; + +public class HTML { + private static final String CLASS_NAME= HTML.class.getName(); + private static final Logger LOG = Logger.getLogger(CLASS_NAME); + + private static void appendA(StringBuffer buf, String child, String href, String cl) { + buf.append("'); + buf.append(child); + buf.append(""); + } + + private static void appendDiv(StringBuffer buf, String child) { + buf.append("
"); + buf.append(child); + buf.append("
\n"); + } + + private static void appendP(StringBuffer buf, String child) { + buf.append("

"); + buf.append(child); + buf.append("

\n"); + } + + private static void append(StringBuffer buf, Article a) throws UnsupportedEncodingException { + buf.append("
\n"); + + buf.append("
\n"); + if (a.getThumbnail() != null) { + buf.append("\n"); + } + buf.append("
\n"); + + buf.append("
\n"); + + buf.append("
\n"); + appendA(buf, a.getTitle(), "/redirect?url=" + URLEncoder.encode(a.getLink(), "UTF-8"), null); + buf.append("
\n"); + + buf.append(""); + + buf.append("
\n"); + if (a.getDescription() != null) { + buf.append("

"); + if (a.getDescription().length() < 512) { + buf.append(a.getDescription()); + } else { + buf.append(a.getDescription().substring(0, 512)); + buf.append("[..]"); + } + buf.append("

"); + } + buf.append("
\n"); + + buf.append("
\n"); + + buf.append("
\n"); + } + + private static void appendMenu(StringBuffer buf, Category catActive, Config cfg) { + String cl; + + buf.append("\n"); + } + + private static String toURL(Category catActive, String entity) { + try { + return catActive.getURL() + "?entity=" + URLEncoder.encode(entity, "UTF-8"); + } catch (UnsupportedEncodingException e) { + LOG.log(Level.SEVERE, "Failed to generate link to entity " + entity, e); + return catActive.getURL(); + } + } + + public static String toHTML(List
articles, Category catActive, String entityActive, Config cfg, ArticleProvider provider) { + StringBuffer buf; + int i; + List entities; + String cl; + + buf = new StringBuffer(); + buf.append("\n"); + buf.append("\n"); + buf.append("\n"); + buf.append("\n"); + buf.append("\n"); + buf.append(""); + buf.append(catActive.getTitle()); + buf.append(" - PNews\n"); + buf.append("\n"); + buf.append("\n"); + + appendMenu(buf, catActive, cfg); + + try { + entities = provider.getEntityStats(catActive); + + if (entities.size() > 0) { + buf.append("\n"); + } + } catch (IllegalArgumentException | FeedException | IOException e2) { + LOG.log(Level.SEVERE, "Failed to get entities", e2); + } + + i = 0; + for (Article e: articles) { + try { + append(buf, e); + } catch (UnsupportedEncodingException e1) { + LOG.log(Level.SEVERE, "Failed to convert article to HTML", e1); + } + if (i == 100) + break; + else + i++; + } + + buf.append("\n"); + buf.append("\n"); + + return buf.toString(); + } +} diff --git a/war/src/main/java/net/wpitchoune/pnews/servlet/JSON.java b/war/src/main/java/net/wpitchoune/pnews/servlet/JSON.java new file mode 100644 index 0000000..70b1c24 --- /dev/null +++ b/war/src/main/java/net/wpitchoune/pnews/servlet/JSON.java @@ -0,0 +1,60 @@ +package net.wpitchoune.pnews.servlet; + +import java.io.IOException; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +import com.google.gson.Gson; +import com.google.gson.JsonObject; +import com.rometools.rome.io.FeedException; + +import net.wpitchoune.pnews.Article; +import net.wpitchoune.pnews.ArticleStore; +import net.wpitchoune.pnews.Category; +import net.wpitchoune.pnews.Config; + +public class JSON { + private static final Logger LOG = Logger.getLogger(JSON.class.getName()); + + public static String getStats(ArticleProvider provider, Config config) { + JsonObject jstats, jreadcounts, jcategories, jmemory; + Runtime runtime; + List
articles; + Article[] allArticles; + + jstats = new JsonObject(); + + jstats.addProperty("articles-count", ArticleStore.singleton.size()); + + jreadcounts = new JsonObject(); + jstats.add("read-counts", jreadcounts); + + allArticles = ArticleStore.singleton.getArticles(); + for (Article a: allArticles) + if (a.getReadCount() > 0) + jreadcounts.addProperty(a.getLink(), a.getReadCount()); + + jcategories = new JsonObject(); + jstats.add("categories", jcategories); + + for (Category cat: config.getCategories()) + try { + articles = provider.getArticles(cat, null); + jcategories.addProperty(cat.getLabel(), + articles.size()); + } catch (IllegalArgumentException | FeedException | IOException e) { + LOG.log(Level.SEVERE, "Fail to retrieve articles", e); + } + + jmemory = new JsonObject(); + jstats.add("memory", jmemory); + + runtime = Runtime.getRuntime(); + jmemory.addProperty("total", runtime.totalMemory()); + jmemory.addProperty("max", runtime.maxMemory()); + jmemory.addProperty("free", runtime.freeMemory()); + + return new Gson().toJson(jstats); + } +} diff --git a/war/src/main/java/net/wpitchoune/pnews/servlet/Pnews.java b/war/src/main/java/net/wpitchoune/pnews/servlet/Pnews.java new file mode 100644 index 0000000..6b5d917 --- /dev/null +++ b/war/src/main/java/net/wpitchoune/pnews/servlet/Pnews.java @@ -0,0 +1,224 @@ +package net.wpitchoune.pnews.servlet; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.net.URLDecoder; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.rometools.rome.io.FeedException; + +import net.wpitchoune.pnews.Article; +import net.wpitchoune.pnews.ArticleStore; +import net.wpitchoune.pnews.Category; +import net.wpitchoune.pnews.Config; +import net.wpitchoune.pnews.Language; + +public class Pnews extends HttpServlet { + private static final String CLASS_NAME = Pnews.class.getName(); + private static final Logger LOG = Logger.getLogger(Pnews.class.getName()); + private static final long serialVersionUID = 1L; + private ArticleProvider provider; + private Config config; + + private static String getQueryParameter(HttpServletRequest rq, String key) + throws UnsupportedEncodingException { + final String METHOD_NAME="getQueryParameter"; + String[] params; + int idx; + String q; + + + LOG.entering(CLASS_NAME, METHOD_NAME, new Object[] { rq, key} ); + + q = rq.getQueryString(); + + if (q == null) + return null; + + params = q.split("&"); + + for (String p: params) { + idx = p.indexOf('='); + + if (idx > 1 && p.substring(0, idx).equals(key)) + return URLDecoder.decode(p.substring(idx + 1), "UTF-8"); + } + + return null; + } + + private static void redirect(HttpServletRequest rq, HttpServletResponse rp) { + String redirectURL; + Article a; + + LOG.entering(Pnews.class.getName(), "redirect", new Object[] { rq, rp }); + + try { + redirectURL = getQueryParameter(rq, "url"); + + LOG.info("Request redirection to " + redirectURL); + + if (redirectURL != null) { + a = ArticleStore.singleton.get(redirectURL); + if (a != null) + a.incrementReadCount(); + else + LOG.severe("Cannot find the article " + redirectURL); + + rp.setHeader("Location", redirectURL); + rp.setStatus(HttpServletResponse.SC_TEMPORARY_REDIRECT); + } else { + LOG.severe("No redirection URL"); + rp.setStatus(HttpServletResponse.SC_NOT_FOUND); + } + + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + LOG.log(Level.SEVERE, "redirect failure", e); + rp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + } + + LOG.exiting(Pnews.class.getName(), "redirect"); + } + + private static void doTemporaryRedirect(String newURL, HttpServletResponse rp) { + rp.setHeader("Location", newURL); + rp.setStatus(HttpServletResponse.SC_TEMPORARY_REDIRECT); + } + + private void writeStats(HttpServletResponse rp) throws IOException { + rp.setContentType("application/json;charset=utf-8"); + rp.setCharacterEncoding("utf-8"); + + rp.getWriter().write(JSON.getStats(provider, config)); + } + + + private void writeArticles(Category cat, String entity, HttpServletResponse rp) { + String html; + List
articles; + + try { + articles = provider.getArticles(cat, entity); + if (articles != null) { + html = HTML.toHTML(articles, cat, entity, config, provider); + rp.setContentType("text/html;charset=utf-8"); + rp.getWriter().write(html); + rp.setCharacterEncoding("utf-8"); + } else { + LOG.severe("writeArticles cannot retrieve any articles"); + html = HTML.toHTML(new ArrayList
(), cat, entity, config, provider); + rp.setContentType("text/html"); + rp.getWriter().write(html); + } + } catch (IOException | IllegalArgumentException | FeedException e) { + LOG.log(Level.SEVERE, "writeArticles failure", e); + rp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + } + } + + private void copy(InputStream in, Writer writer) throws IOException { + Reader r; + char[] buf; + int n; + + buf = new char[1024]; + r = new InputStreamReader(in); + while ( (n = r.read(buf, 0, buf.length)) != -1) + writer.write(buf, 0, n); + } + + @Override + protected void doGet(HttpServletRequest req, HttpServletResponse resp) { + final String METHOD_NAME = "doGet"; + String path; + InputStream in; + + RequesterLog.singleton.writeRequest(req); + + LOG.info("doGet " + req.getRemoteAddr().toString() + " " + req.getRequestURI() + " " + req.getQueryString()); + + LOG.info(METHOD_NAME + " queryString=" + req.getQueryString()); + + path = req.getPathInfo(); + + if (path.equals("/redirect")) { + redirect(req, resp); + return ; + } + + if (path.equals("/style.css")) { + try { + in = HTML.class.getClassLoader().getResourceAsStream("style.css"); + copy(in, resp.getWriter()); + resp.setContentType("text/css"); + + return ; + } catch (IOException e) { + LOG.log(Level.SEVERE, "doGet failure", e); + resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + + return ; + } + } + + if (path.equals("/")) { + doTemporaryRedirect(config.getDefaultLanguage().toURL(), resp); + return ; + } + + try { + if (path.equals("/stats")) { + writeStats(resp); + return ; + } + + for (Category cat: config.getCategories()) { + if (path.equals(cat.getURL())) { + writeArticles(cat, getQueryParameter(req, "entity"), resp); + return ; + } + } + + for (Language l: config.getLanguages()) { + if (path.equals(l.toURL())) { + doTemporaryRedirect(config.getDefaultCategory(l).getURL(), resp); + return ; + } + } + + resp.getWriter().write("Not found " + req.getPathInfo()); + resp.setStatus(HttpServletResponse.SC_NOT_FOUND); + } catch (IOException | RuntimeException e) { + LOG.log(Level.SEVERE, "doGet failure", e); + resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + } + } + + @Override + public void init(ServletConfig cfg) throws ServletException { + LOG.info("Pnews servlet init " + cfg.getServletContext().getContextPath()); + + config = new Config(); + try { + config.loadConfig(); + } catch (UnsupportedEncodingException e) { + throw new ServletException(e); + } + + provider = new ArticleProvider(config); + } +} diff --git a/war/src/main/java/net/wpitchoune/pnews/servlet/RequesterLog.java b/war/src/main/java/net/wpitchoune/pnews/servlet/RequesterLog.java new file mode 100644 index 0000000..4dce083 --- /dev/null +++ b/war/src/main/java/net/wpitchoune/pnews/servlet/RequesterLog.java @@ -0,0 +1,39 @@ +package net.wpitchoune.pnews.servlet; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Locale; +import java.util.logging.Level; +import java.util.logging.Logger; + +import javax.servlet.http.HttpServletRequest; + +public class RequesterLog { + public static final RequesterLog singleton = new RequesterLog(); + private Writer writer; + private Logger LOG = Logger.getLogger(RequesterLog.class.getName()); + private SimpleDateFormat dateFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z", Locale.US); + + public void writeRequest(HttpServletRequest rq) { + try { + synchronized(this) { + if (writer == null) + writer = new BufferedWriter(new FileWriter("access.log", true), 1024); + } + + synchronized (writer) { + writer.write("[" + dateFormat.format(new Date()) + "] "); + writer.write(rq.getRemoteAddr() + " " + rq.getRequestURI() + " " + rq.getQueryString()); + writer.write(" " + rq.getHeader("User-Agent")); + writer.write("\n"); + writer.flush(); + } + } catch (IOException e) { + LOG.log(Level.SEVERE, "Fails to log requester information", e); + } + } +} diff --git a/war/src/main/java/pnews/servlet/ArticleProvider.java b/war/src/main/java/pnews/servlet/ArticleProvider.java deleted file mode 100644 index 55898a4..0000000 --- a/war/src/main/java/pnews/servlet/ArticleProvider.java +++ /dev/null @@ -1,281 +0,0 @@ -package pnews.servlet; - -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URL; -import java.time.Instant; -import java.time.temporal.ChronoUnit; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.jsoup.Jsoup; - -import com.rometools.rome.feed.synd.SyndEnclosure; -import com.rometools.rome.feed.synd.SyndEntry; -import com.rometools.rome.feed.synd.SyndFeed; -import com.rometools.rome.io.FeedException; -import com.rometools.rome.io.SyndFeedInput; -import com.rometools.rome.io.XmlReader; - -import net.wpitchoune.pnews.Article; -import net.wpitchoune.pnews.ArticleStore; -import net.wpitchoune.pnews.Category; -import net.wpitchoune.pnews.Config; -import net.wpitchoune.pnews.EntityStat; -import net.wpitchoune.pnews.Feed; -import net.wpitchoune.pnews.classifier.NamedEntityRecognizer; - -public class ArticleProvider { - private static final String CLASS_NAME = ArticleProvider.class.getName(); - private static final Logger LOG = Logger.getLogger(CLASS_NAME); - private final Map> articlesByCategory = new HashMap<>(); - private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(Runtime.getRuntime().availableProcessors()); - private final Config config; - - public ArticleProvider(Config config) { - this.config = config; - for (Category cat: config.getCategories()) - scheduler.scheduleAtFixedRate(new Refresher(cat), 2, 600, TimeUnit.SECONDS); - } - - private static SyndFeed getSyndFeed(String u) throws IllegalArgumentException, FeedException, MalformedURLException, IOException { - XmlReader r; - - r = new XmlReader(new URL(u)); - - return new SyndFeedInput().build(r); - } - - private List
getArticlesForUpdate(Category cat) { - List
result; - - synchronized (articlesByCategory) { - result = articlesByCategory.get(cat); - if (result == null) { - result = new ArrayList<>(); - articlesByCategory.put(cat, result); - } - return result; - } - } - - private boolean exists(String articleLink, List
articles) { - synchronized (articles) { - for (Article a: articles) - if (a.getLink().equals(articleLink)) - return true; - } - return false; - } - - private Instant getArticleInstant(SyndEntry entry) { - Date date; - - date = entry.getUpdatedDate(); - if (date == null) - date = entry.getPublishedDate(); - - if (date == null) - return Instant.now(); - - return date.toInstant(); - } - - private Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang, Instant instant) { - String desc, title, thumbnail, feedTitle, str; - List entities; - - feedTitle = feed.getTitle(); - if (feedTitle != null) { - feedTitle = feedTitle.trim(); - } - - thumbnail = null; - for (SyndEnclosure e: entry.getEnclosures()) { - if (e.getType().startsWith("image/")) - thumbnail = e.getUrl(); - break; - } - - title = entry.getTitle().trim(); - - if (entry.getDescription() != null) { - str = entry.getDescription().getValue(); - desc = Jsoup.parse(str).text(); - } else { - desc = null; - LOG.severe("No description for " + feedTitle + " - " + title); - } - - entities = new ArrayList<>(); - if (lang.equals("en")) - try { - NamedEntityRecognizer.classify(title, entities, config); - if (desc != null) - NamedEntityRecognizer.classify(desc, entities, config); - } catch (ClassCastException | ClassNotFoundException | IOException e1) { - LOG.log(Level.SEVERE, "Cannot classify " + feedTitle, e1); - } - - return new Article(link, title, desc, thumbnail, instant, feedTitle, entities.toArray(new String[0])); - } - - private void addArticles(Category cat, SyndFeed feed) { - String feedTitle; - List
articles; - Article a; - - feedTitle = feed.getTitle().trim(); - - LOG.info("addArticles " + cat.getLabel() + " " + feedTitle + " number of articles: " + feed.getEntries().size()); - - for (SyndEntry entry: feed.getEntries()) { - String link = entry.getLink().trim(); - articles = getArticlesForUpdate(cat); - if (exists(link, articles)) { - LOG.fine("addArticles " + link + " is already present"); - continue ; - } - - final Instant instant = getArticleInstant(entry); - - if (config.isObsolete(instant)) - continue ; - - a = ArticleStore.singleton.getArticle(link, ()->toArticle(link, entry, feed, cat.getLanguage(), instant)); - - synchronized (articles) { - articles.add(a); - - Collections.sort(articles, new Comparator
() { - @Override - public int compare(Article o1, Article o2) { - if (o1.getPublicationDate() == o2.getPublicationDate()) - return 0; - if (o1.getPublicationDate() == null) - return 1; - if (o2.getPublicationDate() == null) - return -1; - return o2.getPublicationDate().compareTo(o1.getPublicationDate()); - } - }); - } - } - - LOG.info("addArticles done " + cat.getLabel()); - } - - private void retrieveArticles(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { - List feeds; - - feeds = config.getFeedsByCategory().get(cat); - - if (feeds != null) - for (Feed f: feeds) - try { - addArticles(cat, getSyndFeed(f.getURL())); - } catch (Throwable e) { - LOG.log(Level.SEVERE, - "retrieveArticles failure " + cat.getLabel() + " " + f.toString(), - e); - } - else - LOG.severe("No feed for category " + cat); - } - - /** - * Returns a copy. - */ - public List
getArticles(Category cat, String entity) - throws IllegalArgumentException, MalformedURLException, FeedException, IOException { - List
articles, result; - - synchronized (articlesByCategory) { - articles = getArticlesForUpdate(cat); - } - - synchronized (articles) { - if (entity == null) - return new ArrayList<>(articles); - - result = new ArrayList<>(articles.size()); - for (Article a: articles) - if (a.hasEntity(entity)) - result.add(a); - - return result; - } - } - - public List getEntityStats(Category cat) throws IllegalArgumentException, MalformedURLException, FeedException, IOException { - List
articles; - Map entities; - final String FUNCTION_NAME = "getEntities"; - EntityStat s; - List stats; - Instant minInstant; - - LOG.entering(CLASS_NAME, FUNCTION_NAME, cat); - - articles = getArticles(cat, null); - - minInstant = Instant.now().minus(15, ChronoUnit.DAYS); - - entities = new HashMap<>(); - for (Article a: articles) - if (a.getPublicationDate().isAfter(minInstant) && a.getEntities() != null) - for (String e: a.getEntities()) { - s = entities.get(e); - if (s == null) { - s = new EntityStat(e); - entities.put(e, s); - } - s.increment(); - } - - stats = new ArrayList<>(entities.values()); - stats.sort(new Comparator() { - - @Override - public int compare(EntityStat o1, EntityStat o2) { - return Integer.compare(o2.getCount(), o1.getCount()); - } - - }); - - LOG.exiting(CLASS_NAME, FUNCTION_NAME, stats); - - return stats; - } - - private class Refresher implements Runnable { - private final Category category; - - public Refresher(Category category) { - this.category = category; - } - - @Override - public void run() { - LOG.info("refresher "+ category.getLabel()); - - try { - retrieveArticles(category); - } catch (IllegalArgumentException | FeedException | IOException e) { - LOG.log(Level.SEVERE, "refresher failure", e); - } - - LOG.info("refresher "+ category.getLabel() + " done"); - } - } -} diff --git a/war/src/main/java/pnews/servlet/HTML.java b/war/src/main/java/pnews/servlet/HTML.java deleted file mode 100644 index ac7a749..0000000 --- a/war/src/main/java/pnews/servlet/HTML.java +++ /dev/null @@ -1,187 +0,0 @@ -package pnews.servlet; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.net.URLEncoder; -import java.util.List; -import java.util.logging.Level; -import java.util.logging.Logger; - -import com.rometools.rome.io.FeedException; - -import net.wpitchoune.pnews.Article; -import net.wpitchoune.pnews.Category; -import net.wpitchoune.pnews.Config; -import net.wpitchoune.pnews.EntityStat; -import net.wpitchoune.pnews.Language; - -public class HTML { - private static final String CLASS_NAME= HTML.class.getName(); - private static final Logger LOG = Logger.getLogger(CLASS_NAME); - - private static void appendA(StringBuffer buf, String child, String href, String cl) { - buf.append("'); - buf.append(child); - buf.append(""); - } - - private static void appendDiv(StringBuffer buf, String child) { - buf.append("
"); - buf.append(child); - buf.append("
\n"); - } - - private static void appendP(StringBuffer buf, String child) { - buf.append("

"); - buf.append(child); - buf.append("

\n"); - } - - private static void append(StringBuffer buf, Article a) throws UnsupportedEncodingException { - buf.append("
\n"); - - buf.append("
\n"); - if (a.getThumbnail() != null) { - buf.append("\n"); - } - buf.append("
\n"); - - buf.append("
\n"); - - buf.append("
\n"); - appendA(buf, a.getTitle(), "/redirect?url=" + URLEncoder.encode(a.getLink(), "UTF-8"), null); - buf.append("
\n"); - - buf.append(""); - - buf.append("
\n"); - if (a.getDescription() != null) { - buf.append("

"); - if (a.getDescription().length() < 512) { - buf.append(a.getDescription()); - } else { - buf.append(a.getDescription().substring(0, 512)); - buf.append("[..]"); - } - buf.append("

"); - } - buf.append("
\n"); - - buf.append("
\n"); - - buf.append("
\n"); - } - - private static void appendMenu(StringBuffer buf, Category catActive, Config cfg) { - String cl; - - buf.append("\n"); - } - - private static String toURL(Category catActive, String entity) { - try { - return catActive.getURL() + "?entity=" + URLEncoder.encode(entity, "UTF-8"); - } catch (UnsupportedEncodingException e) { - LOG.log(Level.SEVERE, "Failed to generate link to entity " + entity, e); - return catActive.getURL(); - } - } - - public static String toHTML(List
articles, Category catActive, String entityActive, Config cfg, ArticleProvider provider) { - StringBuffer buf; - int i; - List entities; - String cl; - - buf = new StringBuffer(); - buf.append("\n"); - buf.append("\n"); - buf.append("\n"); - buf.append("\n"); - buf.append("\n"); - buf.append(""); - buf.append(catActive.getTitle()); - buf.append(" - PNews\n"); - buf.append("\n"); - buf.append("\n"); - - appendMenu(buf, catActive, cfg); - - try { - entities = provider.getEntityStats(catActive); - - if (entities.size() > 0) { - buf.append("\n"); - } - } catch (IllegalArgumentException | FeedException | IOException e2) { - LOG.log(Level.SEVERE, "Failed to get entities", e2); - } - - i = 0; - for (Article e: articles) { - try { - append(buf, e); - } catch (UnsupportedEncodingException e1) { - LOG.log(Level.SEVERE, "Failed to convert article to HTML", e1); - } - if (i == 100) - break; - else - i++; - } - - buf.append("\n"); - buf.append("\n"); - - return buf.toString(); - } -} diff --git a/war/src/main/java/pnews/servlet/JSON.java b/war/src/main/java/pnews/servlet/JSON.java deleted file mode 100644 index 78379a4..0000000 --- a/war/src/main/java/pnews/servlet/JSON.java +++ /dev/null @@ -1,60 +0,0 @@ -package pnews.servlet; - -import java.io.IOException; -import java.util.List; -import java.util.logging.Level; -import java.util.logging.Logger; - -import com.google.gson.Gson; -import com.google.gson.JsonObject; -import com.rometools.rome.io.FeedException; - -import net.wpitchoune.pnews.Article; -import net.wpitchoune.pnews.ArticleStore; -import net.wpitchoune.pnews.Category; -import net.wpitchoune.pnews.Config; - -public class JSON { - private static final Logger LOG = Logger.getLogger(JSON.class.getName()); - - public static String getStats(ArticleProvider provider, Config config) { - JsonObject jstats, jreadcounts, jcategories, jmemory; - Runtime runtime; - List
articles; - Article[] allArticles; - - jstats = new JsonObject(); - - jstats.addProperty("articles-count", ArticleStore.singleton.size()); - - jreadcounts = new JsonObject(); - jstats.add("read-counts", jreadcounts); - - allArticles = ArticleStore.singleton.getArticles(); - for (Article a: allArticles) - if (a.getReadCount() > 0) - jreadcounts.addProperty(a.getLink(), a.getReadCount()); - - jcategories = new JsonObject(); - jstats.add("categories", jcategories); - - for (Category cat: config.getCategories()) - try { - articles = provider.getArticles(cat, null); - jcategories.addProperty(cat.getLabel(), - articles.size()); - } catch (IllegalArgumentException | FeedException | IOException e) { - LOG.log(Level.SEVERE, "Fail to retrieve articles", e); - } - - jmemory = new JsonObject(); - jstats.add("memory", jmemory); - - runtime = Runtime.getRuntime(); - jmemory.addProperty("total", runtime.totalMemory()); - jmemory.addProperty("max", runtime.maxMemory()); - jmemory.addProperty("free", runtime.freeMemory()); - - return new Gson().toJson(jstats); - } -} diff --git a/war/src/main/java/pnews/servlet/Pnews.java b/war/src/main/java/pnews/servlet/Pnews.java deleted file mode 100644 index c51f946..0000000 --- a/war/src/main/java/pnews/servlet/Pnews.java +++ /dev/null @@ -1,224 +0,0 @@ -package pnews.servlet; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.io.UnsupportedEncodingException; -import java.io.Writer; -import java.net.URLDecoder; -import java.util.ArrayList; -import java.util.List; -import java.util.logging.Level; -import java.util.logging.Logger; - -import javax.servlet.ServletConfig; -import javax.servlet.ServletException; -import javax.servlet.http.HttpServlet; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.rometools.rome.io.FeedException; - -import net.wpitchoune.pnews.Article; -import net.wpitchoune.pnews.ArticleStore; -import net.wpitchoune.pnews.Category; -import net.wpitchoune.pnews.Config; -import net.wpitchoune.pnews.Language; - -public class Pnews extends HttpServlet { - private static final String CLASS_NAME = Pnews.class.getName(); - private static final Logger LOG = Logger.getLogger(Pnews.class.getName()); - private static final long serialVersionUID = 1L; - private ArticleProvider provider; - private Config config; - - private static String getQueryParameter(HttpServletRequest rq, String key) - throws UnsupportedEncodingException { - final String METHOD_NAME="getQueryParameter"; - String[] params; - int idx; - String q; - - - LOG.entering(CLASS_NAME, METHOD_NAME, new Object[] { rq, key} ); - - q = rq.getQueryString(); - - if (q == null) - return null; - - params = q.split("&"); - - for (String p: params) { - idx = p.indexOf('='); - - if (idx > 1 && p.substring(0, idx).equals(key)) - return URLDecoder.decode(p.substring(idx + 1), "UTF-8"); - } - - return null; - } - - private static void redirect(HttpServletRequest rq, HttpServletResponse rp) { - String redirectURL; - Article a; - - LOG.entering(Pnews.class.getName(), "redirect", new Object[] { rq, rp }); - - try { - redirectURL = getQueryParameter(rq, "url"); - - LOG.info("Request redirection to " + redirectURL); - - if (redirectURL != null) { - a = ArticleStore.singleton.get(redirectURL); - if (a != null) - a.incrementReadCount(); - else - LOG.severe("Cannot find the article " + redirectURL); - - rp.setHeader("Location", redirectURL); - rp.setStatus(HttpServletResponse.SC_TEMPORARY_REDIRECT); - } else { - LOG.severe("No redirection URL"); - rp.setStatus(HttpServletResponse.SC_NOT_FOUND); - } - - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); - LOG.log(Level.SEVERE, "redirect failure", e); - rp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); - } - - LOG.exiting(Pnews.class.getName(), "redirect"); - } - - private static void doTemporaryRedirect(String newURL, HttpServletResponse rp) { - rp.setHeader("Location", newURL); - rp.setStatus(HttpServletResponse.SC_TEMPORARY_REDIRECT); - } - - private void writeStats(HttpServletResponse rp) throws IOException { - rp.setContentType("application/json;charset=utf-8"); - rp.setCharacterEncoding("utf-8"); - - rp.getWriter().write(JSON.getStats(provider, config)); - } - - - private void writeArticles(Category cat, String entity, HttpServletResponse rp) { - String html; - List
articles; - - try { - articles = provider.getArticles(cat, entity); - if (articles != null) { - html = HTML.toHTML(articles, cat, entity, config, provider); - rp.setContentType("text/html;charset=utf-8"); - rp.getWriter().write(html); - rp.setCharacterEncoding("utf-8"); - } else { - LOG.severe("writeArticles cannot retrieve any articles"); - html = HTML.toHTML(new ArrayList
(), cat, entity, config, provider); - rp.setContentType("text/html"); - rp.getWriter().write(html); - } - } catch (IOException | IllegalArgumentException | FeedException e) { - LOG.log(Level.SEVERE, "writeArticles failure", e); - rp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); - } - } - - private void copy(InputStream in, Writer writer) throws IOException { - Reader r; - char[] buf; - int n; - - buf = new char[1024]; - r = new InputStreamReader(in); - while ( (n = r.read(buf, 0, buf.length)) != -1) - writer.write(buf, 0, n); - } - - @Override - protected void doGet(HttpServletRequest req, HttpServletResponse resp) { - final String METHOD_NAME = "doGet"; - String path; - InputStream in; - - RequesterLog.singleton.writeRequest(req); - - LOG.info("doGet " + req.getRemoteAddr().toString() + " " + req.getRequestURI() + " " + req.getQueryString()); - - LOG.info(METHOD_NAME + " queryString=" + req.getQueryString()); - - path = req.getPathInfo(); - - if (path.equals("/redirect")) { - redirect(req, resp); - return ; - } - - if (path.equals("/style.css")) { - try { - in = HTML.class.getClassLoader().getResourceAsStream("style.css"); - copy(in, resp.getWriter()); - resp.setContentType("text/css"); - - return ; - } catch (IOException e) { - LOG.log(Level.SEVERE, "doGet failure", e); - resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); - - return ; - } - } - - if (path.equals("/")) { - doTemporaryRedirect(config.getDefaultLanguage().toURL(), resp); - return ; - } - - try { - if (path.equals("/stats")) { - writeStats(resp); - return ; - } - - for (Category cat: config.getCategories()) { - if (path.equals(cat.getURL())) { - writeArticles(cat, getQueryParameter(req, "entity"), resp); - return ; - } - } - - for (Language l: config.getLanguages()) { - if (path.equals(l.toURL())) { - doTemporaryRedirect(config.getDefaultCategory(l).getURL(), resp); - return ; - } - } - - resp.getWriter().write("Not found " + req.getPathInfo()); - resp.setStatus(HttpServletResponse.SC_NOT_FOUND); - } catch (IOException | RuntimeException e) { - LOG.log(Level.SEVERE, "doGet failure", e); - resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); - } - } - - @Override - public void init(ServletConfig cfg) throws ServletException { - LOG.info("Pnews servlet init " + cfg.getServletContext().getContextPath()); - - config = new Config(); - try { - config.loadConfig(); - } catch (UnsupportedEncodingException e) { - throw new ServletException(e); - } - - provider = new ArticleProvider(config); - } -} diff --git a/war/src/main/java/pnews/servlet/RequesterLog.java b/war/src/main/java/pnews/servlet/RequesterLog.java deleted file mode 100644 index da1cdf9..0000000 --- a/war/src/main/java/pnews/servlet/RequesterLog.java +++ /dev/null @@ -1,39 +0,0 @@ -package pnews.servlet; - -import java.io.BufferedWriter; -import java.io.FileWriter; -import java.io.IOException; -import java.io.Writer; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.Locale; -import java.util.logging.Level; -import java.util.logging.Logger; - -import javax.servlet.http.HttpServletRequest; - -public class RequesterLog { - public static final RequesterLog singleton = new RequesterLog(); - private Writer writer; - private Logger LOG = Logger.getLogger(RequesterLog.class.getName()); - private SimpleDateFormat dateFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z", Locale.US); - - public void writeRequest(HttpServletRequest rq) { - try { - synchronized(this) { - if (writer == null) - writer = new BufferedWriter(new FileWriter("access.log", true), 1024); - } - - synchronized (writer) { - writer.write("[" + dateFormat.format(new Date()) + "] "); - writer.write(rq.getRemoteAddr() + " " + rq.getRequestURI() + " " + rq.getQueryString()); - writer.write(" " + rq.getHeader("User-Agent")); - writer.write("\n"); - writer.flush(); - } - } catch (IOException e) { - LOG.log(Level.SEVERE, "Fails to log requester information", e); - } - } -} diff --git a/war/src/main/resources/feeds.json b/war/src/main/resources/feeds.json index b309cd7..71730d6 100644 --- a/war/src/main/resources/feeds.json +++ b/war/src/main/resources/feeds.json @@ -155,7 +155,8 @@ "CA", "Read", "5 :", - "InfoWorld" + "InfoWorld", + "Here" ], "aliases": { "U.S.": ["United States", "US"], diff --git a/war/src/main/webapp/WEB-INF/web.xml b/war/src/main/webapp/WEB-INF/web.xml index 3c47adf..36cf0d6 100644 --- a/war/src/main/webapp/WEB-INF/web.xml +++ b/war/src/main/webapp/WEB-INF/web.xml @@ -7,14 +7,14 @@ Pnews pnews - pnews.servlet.Pnews + net.wpitchoune.pnews.servlet.Pnews 1 css text/css - + pnews -- 2.7.4