--- /dev/null
+package net.wpitchoune.pnews;
+
+import java.time.Instant;
+import java.util.concurrent.atomic.AtomicLong;
+
+public class Article {
+ private final String title;
+ private final String description;
+ private final String thumbnail;
+ private final String link;
+ private final Instant publicationDate;
+ private final String website;
+ private final String[] entities;
+ private final AtomicLong readCount = new AtomicLong();
+
+ public Article(String link, String title, String description, String thumbnail, Instant publicationDate, String website, String[] entities) {
+ this.link = link;
+ this.title = title;
+ this.description = description;
+ this.thumbnail = thumbnail;
+ this.publicationDate = publicationDate;
+ this.website = website;
+ this.entities = entities;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+
+ public String getLink() {
+ return link;
+ }
+
+ public String getThumbnail() {
+ return thumbnail;
+ }
+
+ public String getWebsite() {
+ return website;
+ }
+
+ public long getReadCount() {
+ return readCount.get();
+ }
+
+ public String[] getEntities() {
+ return entities;
+ }
+
+ public boolean hasEntity(String entity) {
+ for (String e: entities)
+ if (e.equals(entity))
+ return true;
+
+ return false;
+ }
+
+ public Instant getPublicationDate() {
+ return publicationDate;
+ }
+
+ public void incrementReadCount() {
+ readCount.incrementAndGet();
+ }
+}
--- /dev/null
+package net.wpitchoune.pnews;
+
+@FunctionalInterface
+public interface ArticleFactory {
+ Article create();
+}
--- /dev/null
+package net.wpitchoune.pnews;
+
+import java.lang.ref.WeakReference;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.logging.Logger;
+
+public class ArticleStore {
+ public final static ArticleStore singleton = new ArticleStore();
+ private final HashMap<String, WeakReference<Article>> articles = new HashMap<>();
+ private final static Logger LOG = Logger.getLogger(ArticleStore.class.getName());
+
+ public Article get(String link) {
+ WeakReference<Article> ref;
+ Article result;
+
+ synchronized (articles) {
+ ref = articles.get(link);
+ if (ref == null) {
+ return null;
+ }
+
+ result = ref.get();
+ if (result == null) {
+ LOG.info("Article has been released from memory: " + link);
+ articles.remove(link);
+ }
+
+ return result;
+ }
+ }
+
+ public Article getArticle(String link, ArticleFactory factory) {
+ Article result;
+
+ synchronized (articles) {
+ result = get(link);
+ if (result == null) {
+ result = factory.create();
+ articles.put(link, new WeakReference<>(result));
+ LOG.fine("Article has been added: " + result.getLink());
+ }
+
+ return result;
+ }
+ }
+
+ public long size() {
+ synchronized (this) {
+ return articles.size();
+ }
+ }
+
+ public Article[] getArticles() {
+ ArrayList<Article> result;
+ Article a;
+
+ synchronized (articles) {
+ result = new ArrayList<>(articles.size());
+
+ for (WeakReference<Article> r: articles.values()) {
+ a = r.get();
+ if (a != null)
+ result.add(a);
+ }
+
+ return result.toArray(new Article[] {});
+ }
+ }
+}
--- /dev/null
+package net.wpitchoune.pnews;
+
+public class Category {
+ private final String id;
+ private final String label;
+ private final String title;
+ private final String language;
+
+ public Category(String id, String label, String title, String language) {
+ this.id = id;
+ this.label = label;
+ this.title = title;
+ this.language = language;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public String getURL() {
+ return "/" + language + "/" + id.toLowerCase();
+ }
+
+ public String getLabel() {
+ return label;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public String getLanguage() {
+ return language;
+ }
+
+ @Override
+ public String toString() {
+ return getLabel();
+ }
+}
--- /dev/null
+package net.wpitchoune.pnews;
+
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
+import java.time.Instant;
+import java.time.temporal.ChronoUnit;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import javax.json.Json;
+import javax.json.JsonArray;
+import javax.json.JsonObject;
+import javax.json.JsonString;
+import javax.json.JsonValue;
+
+public class Config {
+ private Feed[] feeds;
+ private Category[] categories;
+ private Language[] languages;
+ private final Set<String> blacklistedEntities = new HashSet<>();
+ private final HashMap<String, String> entityAliases = new HashMap<>();
+ private static final String CLASS_NAME = Config.class.getName();
+
+ /**
+ * The key is the language, the value is the default category for this language.
+ */
+ private Map<String, Category> defaultCategories = new HashMap<>();
+ private static final Logger LOG = Logger.getLogger(CLASS_NAME);
+
+ private void loadCategories(JsonArray jcats) {
+ List<Category> cats;
+ JsonObject jcat;
+ Category cat;
+ String id, label, title, language;
+
+ cats = new ArrayList<>(jcats.size());
+
+ for (JsonValue v: jcats) {
+ jcat = (JsonObject)v;
+ id = jcat.getString("id");
+ label = jcat.getString("label");
+ title = jcat.getString("title");
+ language = jcat.getString("language");
+ cat = new Category(id, label, title, language);
+ cats.add(cat);
+ if (defaultCategories.get(language) == null)
+ defaultCategories.put(language, cat);
+ }
+
+ categories = cats.toArray(new Category[0]);
+ }
+
+ private void loadLanguages(JsonArray jlangs) {
+ List<Language> langs;
+ JsonObject jlang;
+ String id;
+
+ langs = new ArrayList<>(jlangs.size());
+
+ for (JsonValue v: jlangs) {
+ jlang = (JsonObject)v;
+ id = jlang.getString("id");
+ langs.add(new Language(id));
+ }
+
+ languages = langs.toArray(new Language[0]);
+ }
+
+ private Category getCategory(String id) {
+ for (Category c: categories)
+ if (c.getId().equals(id))
+ return c;
+ return null;
+ }
+
+ private void loadEntities(JsonObject jroot) {
+ JsonObject jentities, jaliases;
+ JsonArray jblacklist;
+ final String METHOD_NAME = "loadEntities";
+
+ jentities = jroot.getJsonObject("entities");
+
+ jblacklist = jentities.getJsonArray("blacklist");
+ jblacklist.forEach((jv)-> {
+ JsonString js;
+
+ js = (JsonString)jv;
+ blacklistedEntities.add(js.getString());
+ });
+
+ jaliases = jentities.getJsonObject("aliases");
+ jaliases.forEach((k, v)-> {
+ JsonArray jsources = (JsonArray)v;
+
+ jsources.forEach((jsource)-> {
+ entityAliases.put(((JsonString)jsource).getString(), k);
+ });
+ });
+
+ LOG.logp(Level.FINEST, CLASS_NAME, METHOD_NAME, " blacklistedEntities=" + blacklistedEntities);
+ LOG.logp(Level.FINEST, CLASS_NAME, METHOD_NAME, " entityAliases=" + entityAliases);
+ }
+
+ public String getEntityAlias(String entity) {
+ String result;
+
+ result = entityAliases.get(entity);
+
+ if (result == null)
+ return entity;
+ else
+ return result;
+ }
+
+ public void loadConfig() throws UnsupportedEncodingException {
+ Reader r;
+ JsonObject jfeeds, jroot;
+ List<Feed> feedList;
+
+ r = null;
+ try {
+ r = new InputStreamReader(Config.class.getClassLoader().getResourceAsStream("feeds.json"),
+ "UTF-8");
+ jroot = Json.createReader(r).readObject();
+ } finally {
+ if (r != null)
+ try { r.close(); } catch (IOException e) { };
+ }
+
+ loadLanguages(jroot.getJsonArray("languages"));
+ loadCategories(jroot.getJsonArray("categories"));
+
+ jfeeds = jroot.getJsonObject("feeds");
+
+ feedList = new ArrayList<Feed>(jfeeds.size());
+
+ jfeeds.forEach((k, v)-> {
+ JsonObject jf;
+ String str;
+ Category cat;
+ JsonArray jcategories;
+
+ jf = (JsonObject)v;
+ jcategories = jf.getJsonArray("categories");
+ str = jcategories.getString(0);
+
+ cat = getCategory(str);
+
+ if (cat != null)
+ feedList.add(new Feed(k, cat));
+ else
+ LOG.severe("Missing category: " + str);
+ });
+
+ feeds = feedList.toArray(new Feed[0]);
+
+ loadEntities(jroot);
+ }
+
+ public boolean isBlacklistedEntity(String e) {
+ final String METHOD_NAME = "isBlacklistedEntity";
+ boolean result;
+
+ LOG.entering(CLASS_NAME, METHOD_NAME, e);
+
+ result = blacklistedEntities.contains(e);
+
+ LOG.exiting(CLASS_NAME, METHOD_NAME, result);
+
+ return result;
+ }
+
+ public boolean isObsolete(Instant instant) {
+ Instant olderInstant;
+
+ olderInstant = Instant.now().minus(60, ChronoUnit.DAYS);
+
+ if (instant.isAfter(olderInstant))
+ return false;
+ else
+ return true;
+ }
+
+ public Feed[] getFeeds() {
+ return feeds;
+ }
+
+ public Map<Category, List<Feed>> getFeedsByCategory() {
+ Map<Category, List<Feed>> result;
+ Feed[] feeds;
+ List<Feed> catFeeds;
+ Category cat;
+
+ result = new HashMap<>();
+
+ feeds = getFeeds();
+ for (Feed f: feeds) {
+ cat = f.getCategory();
+
+ catFeeds = result.get(cat);
+ if (catFeeds == null) {
+ catFeeds = new ArrayList<Feed>();
+ result.put(cat, catFeeds);
+ }
+ catFeeds.add(f);
+ }
+
+ return result;
+ }
+
+ public Category[] getCategories() {
+ return categories;
+ }
+
+ public Category getDefaultCategory(Language lang) {
+ return defaultCategories.get(lang.getId());
+ }
+
+ public Language[] getLanguages() {
+ return languages;
+ }
+
+ public Language getDefaultLanguage() {
+ return languages[0];
+ }
+}
--- /dev/null
+package net.wpitchoune.pnews;
+
+public class EntityStat {
+ private final String entity;
+ private int count;
+
+ public EntityStat(String entity) {
+ this.entity = entity;
+ }
+
+ public void increment() {
+ count++;
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public String getEntity() {
+ return entity;
+ }
+
+ @Override
+ public String toString() {
+ return entity + "(" + count + ")";
+ }
+}
--- /dev/null
+package net.wpitchoune.pnews;
+
+public class Feed {
+ private final String URL;
+ private final Category category;
+
+ public Feed(String URL, Category category) {
+ this.URL = URL;
+ this.category = category;
+ }
+
+ public String getURL() {
+ return URL;
+ }
+
+ public Category getCategory() {
+ return category;
+ }
+
+ @Override
+ public String toString() {
+ return getURL() + "[" + category + "]";
+ }
+}
--- /dev/null
+package net.wpitchoune.pnews;
+
+public class Language {
+ private final String id;
+
+ public Language(String id) {
+ this.id = id;
+ }
+
+ public String toURL() {
+ return "/" + id;
+ }
+
+ public String getLabel() {
+ return id;
+ }
+
+ public String getId() {
+ return id;
+ }
+}
--- /dev/null
+package net.wpitchoune.pnews.classifier;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import edu.stanford.nlp.ie.crf.CRFClassifier;
+import edu.stanford.nlp.util.CoreMap;
+import edu.stanford.nlp.util.Triple;
+import net.wpitchoune.pnews.Config;
+
+/** https://stanfordnlp.github.io/CoreNLP/api.html */
+public class NamedEntityRecognizer {
+ private static final String CLASS_NAME = NamedEntityRecognizer.class.getName();
+ private static final Logger LOG = Logger.getLogger(CLASS_NAME);
+ private static final CRFClassifier<CoreMap> classifier = CRFClassifier.getDefaultClassifier();
+
+ public static List<String> classify(String str, List<String> entities, Config config) throws ClassCastException, ClassNotFoundException, IOException {
+
+ List<Triple<String, Integer, Integer>> triples;
+ String w;
+ final String FUNCTION_NAME = "classify";
+
+ LOG.entering(CLASS_NAME, FUNCTION_NAME, str);
+
+ OpenNLP.classify(str, entities, config);
+
+ synchronized (classifier) {
+ triples = classifier.classifyToCharacterOffsets(str);
+ }
+
+ for (Triple<String, Integer, Integer> t: triples) {
+ w = str.substring(t.second, t.third);
+ if (!config.isBlacklistedEntity(w) && !entities.contains(w))
+ entities.add(config.getEntityAlias(w));
+ }
+
+ LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);
+
+ return entities;
+ }
+
+ public static void main(String[] args) throws Exception {
+ List<String> lst;
+
+ lst = classify("I live in Washington and New York in United States.", new ArrayList<>(), new Config());
+ for (String str: lst)
+ System.out.println(str);
+ }
+}
\ No newline at end of file
--- /dev/null
+package net.wpitchoune.pnews.classifier;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+import java.util.logging.Logger;
+
+import net.wpitchoune.pnews.Config;
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.Span;
+
+/** http://www.devglan.com/artificial-intelligence/opennlp-named-entity-recognition-example **/
+public class OpenNLP {
+ private static final String CLASS_NAME = OpenNLP.class.getName();
+ private static final Logger LOG = Logger.getLogger(CLASS_NAME);
+ private static TokenNameFinderModel organizationModel;
+ private static TokenNameFinderModel personModel;
+ private static TokenNameFinderModel locationModel;
+ private static TokenizerModel tokenModel;
+
+ public static List<String> classify(String str, List<String> entities, Config config) throws IOException {
+ classify(str, getOrganizationModel(), entities, config);
+ classify(str, getPersonModel(), entities, config);
+ classify(str, getLocationModel(), entities, config);
+
+ return entities;
+ }
+
+ private static TokenNameFinderModel getOrganizationModel() throws IOException {
+ synchronized (OpenNLP.class) {
+ if (organizationModel == null) {
+ InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-organization.bin");
+ organizationModel = new TokenNameFinderModel(inputStream);
+ }
+ }
+
+ return organizationModel;
+ }
+
+ private static TokenNameFinderModel getPersonModel() throws IOException {
+ synchronized (OpenNLP.class) {
+ if (personModel == null) {
+ InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-person.bin");
+ personModel = new TokenNameFinderModel(inputStream);
+ }
+ }
+
+ return personModel;
+ }
+
+ private static TokenNameFinderModel getLocationModel() throws IOException {
+ synchronized (OpenNLP.class) {
+ if (locationModel == null) {
+ InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-location.bin");
+ locationModel = new TokenNameFinderModel(inputStream);
+ }
+ }
+
+ return locationModel;
+ }
+
+ private static List<String> classify(String str, TokenNameFinderModel model, List<String> entities, Config config) throws IOException {
+ String entity;
+
+ NameFinderME nameFinder = new NameFinderME(model);
+ String[] tokens = tokenize(str);
+ Span nameSpans[] = nameFinder.find(tokens);
+
+ for(Span s: nameSpans) {
+ if (s.getProb() < 0.60)
+ continue ;
+
+ entity = null;
+ for (int i = s.getStart(); i < s.getEnd(); i++)
+ if (entity == null)
+ entity = tokens[i];
+ else
+ entity += " " + tokens[i];
+
+ LOG.finest(entity + " " + s.getProb() + " " + s.toString());
+ if (!config.isBlacklistedEntity(entity) && !entities.contains(entity))
+ entities.add(config.getEntityAlias(entity));
+ }
+
+ return entities;
+ }
+
+ private static String[] tokenize(String sentence) throws IOException {
+ synchronized (OpenNLP.class) {
+ if (tokenModel == null) {
+ InputStream inputStreamTokenizer = OpenNLP.class.getResourceAsStream("/en-token.bin");
+ tokenModel = new TokenizerModel(inputStreamTokenizer);
+ }
+ }
+ TokenizerME tokenizer = new TokenizerME(tokenModel);
+ return tokenizer.tokenize(sentence);
+ }
+}
+++ /dev/null
-package pnews;
-
-import java.time.Instant;
-import java.util.concurrent.atomic.AtomicLong;
-
-public class Article {
- public final String title;
- public final String description;
- public final String thumbnail;
- public final String link;
- public final Instant publicationDate;
- public final String website;
- public final String[] entities;
- public final AtomicLong readCount = new AtomicLong();
-
- public Article(String link, String title, String description, String thumbnail, Instant publicationDate, String website, String[] entities) {
- this.link = link;
- this.title = title;
- this.description = description;
- this.thumbnail = thumbnail;
- this.publicationDate = publicationDate;
- this.website = website;
- this.entities = entities;
- }
-
- public String[] getEntities() {
- return entities;
- }
-
- public boolean hasEntity(String entity) {
- for (String e: entities)
- if (e.equals(entity))
- return true;
-
- return false;
- }
-
- public Instant getPublicationDate() {
- return publicationDate;
- }
-}
+++ /dev/null
-package pnews;
-
-public class Category {
- private final String id;
- private final String label;
- private final String title;
- private final String language;
-
- public Category(String id, String label, String title, String language) {
- this.id = id;
- this.label = label;
- this.title = title;
- this.language = language;
- }
-
- public String getTitle() {
- return title;
- }
-
- public String getURL() {
- return "/" + language + "/" + id.toLowerCase();
- }
-
- public String getLabel() {
- return label;
- }
-
- public String getId() {
- return id;
- }
-
- public String getLanguage() {
- return language;
- }
-
- @Override
- public String toString() {
- return getLabel();
- }
-}
+++ /dev/null
-package pnews;
-
-public class EntityStat {
- private final String entity;
- private int count;
-
- public EntityStat(String entity) {
- this.entity = entity;
- }
-
- public void increment() {
- count++;
- }
-
- public int getCount() {
- return count;
- }
-
- public String getEntity() {
- return entity;
- }
-
- @Override
- public String toString() {
- return entity + "(" + count + ")";
- }
-}
+++ /dev/null
-package pnews;
-
-public class Feed {
- private final String URL;
- private final Category category;
-
- public Feed(String URL, Category category) {
- this.URL = URL;
- this.category = category;
- }
-
- public String getURL() {
- return URL;
- }
-
- public Category getCategory() {
- return category;
- }
-
- @Override
- public String toString() {
- return getURL() + "[" + category + "]";
- }
-}
+++ /dev/null
-package pnews;
-
-public class Language {
- private final String id;
-
- public Language(String id) {
- this.id = id;
- }
-
- public String toURL() {
- return "/" + id;
- }
-
- public String getLabel() {
- return id;
- }
-
- public String getId() {
- return id;
- }
-}
+++ /dev/null
-package pnews;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.logging.Logger;
-
-import edu.stanford.nlp.ie.crf.CRFClassifier;
-import edu.stanford.nlp.util.CoreMap;
-import edu.stanford.nlp.util.Triple;
-import pnews.servlet.Config;
-
-/** https://stanfordnlp.github.io/CoreNLP/api.html */
-public class NER {
- private static final String CLASS_NAME = NER.class.getName();
- private static final Logger LOG = Logger.getLogger(CLASS_NAME);
- private static final CRFClassifier<CoreMap> classifier = CRFClassifier.getDefaultClassifier();
-
- public static List<String> classify(String str, List<String> entities, Config config) throws ClassCastException, ClassNotFoundException, IOException {
-
- List<Triple<String, Integer, Integer>> triples;
- String w;
- final String FUNCTION_NAME = "classify";
-
- LOG.entering(CLASS_NAME, FUNCTION_NAME, str);
-
- OpenNLP.classify(str, entities, config);
-
- synchronized (classifier) {
- triples = classifier.classifyToCharacterOffsets(str);
- }
-
- for (Triple<String, Integer, Integer> t: triples) {
- w = str.substring(t.second, t.third);
- if (!config.isBlacklistedEntity(w) && !entities.contains(w))
- entities.add(config.getEntityAlias(w));
- }
-
- LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);
-
- return entities;
- }
-
- public static void main(String[] args) throws Exception {
- List<String> lst;
-
- lst = classify("I live in Washington and New York in United States.", new ArrayList<>(), new Config());
- for (String str: lst)
- System.out.println(str);
- }
-}
\ No newline at end of file
+++ /dev/null
-package pnews;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.List;
-import java.util.logging.Logger;
-
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.TokenNameFinderModel;
-import opennlp.tools.tokenize.TokenizerME;
-import opennlp.tools.tokenize.TokenizerModel;
-import opennlp.tools.util.Span;
-import pnews.servlet.Config;
-
-/** http://www.devglan.com/artificial-intelligence/opennlp-named-entity-recognition-example **/
-public class OpenNLP {
- private static final String CLASS_NAME = OpenNLP.class.getName();
- private static final Logger LOG = Logger.getLogger(CLASS_NAME);
- private static TokenNameFinderModel organizationModel;
- private static TokenNameFinderModel personModel;
- private static TokenNameFinderModel locationModel;
- private static TokenizerModel tokenModel;
-
- public static List<String> classify(String str, List<String> entities, Config config) throws IOException {
- classify(str, getOrganizationModel(), entities, config);
- classify(str, getPersonModel(), entities, config);
- classify(str, getLocationModel(), entities, config);
-
- return entities;
- }
-
- private static TokenNameFinderModel getOrganizationModel() throws IOException {
- synchronized (OpenNLP.class) {
- if (organizationModel == null) {
- InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-organization.bin");
- organizationModel = new TokenNameFinderModel(inputStream);
- }
- }
-
- return organizationModel;
- }
-
- private static TokenNameFinderModel getPersonModel() throws IOException {
- synchronized (OpenNLP.class) {
- if (personModel == null) {
- InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-person.bin");
- personModel = new TokenNameFinderModel(inputStream);
- }
- }
-
- return personModel;
- }
-
- private static TokenNameFinderModel getLocationModel() throws IOException {
- synchronized (OpenNLP.class) {
- if (locationModel == null) {
- InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-location.bin");
- locationModel = new TokenNameFinderModel(inputStream);
- }
- }
-
- return locationModel;
- }
-
- private static List<String> classify(String str, TokenNameFinderModel model, List<String> entities, Config config) throws IOException {
- String entity;
-
- NameFinderME nameFinder = new NameFinderME(model);
- String[] tokens = tokenize(str);
- Span nameSpans[] = nameFinder.find(tokens);
-
- for(Span s: nameSpans) {
- if (s.getProb() < 0.60)
- continue ;
-
- entity = null;
- for (int i = s.getStart(); i < s.getEnd(); i++)
- if (entity == null)
- entity = tokens[i];
- else
- entity += " " + tokens[i];
-
- LOG.finest(entity + " " + s.getProb() + " " + s.toString());
- if (!config.isBlacklistedEntity(entity) && !entities.contains(entity))
- entities.add(config.getEntityAlias(entity));
- }
-
- return entities;
- }
-
- public static String[] tokenize(String sentence) throws IOException {
- synchronized (OpenNLP.class) {
- if (tokenModel == null) {
- InputStream inputStreamTokenizer = OpenNLP.class.getResourceAsStream("/en-token.bin");
- tokenModel = new TokenizerModel(inputStreamTokenizer);
- }
- }
- TokenizerME tokenizer = new TokenizerME(tokenModel);
- return tokenizer.tokenize(sentence);
- }
-}
+++ /dev/null
-package pnews.servlet;
-
-import pnews.Article;
-
-@FunctionalInterface
-public interface ArticleFactory {
- Article create();
-}
import com.rometools.rome.io.SyndFeedInput;
import com.rometools.rome.io.XmlReader;
-import pnews.Article;
-import pnews.Category;
-import pnews.EntityStat;
-import pnews.Feed;
-import pnews.NER;
+import net.wpitchoune.pnews.Article;
+import net.wpitchoune.pnews.ArticleStore;
+import net.wpitchoune.pnews.Category;
+import net.wpitchoune.pnews.Config;
+import net.wpitchoune.pnews.EntityStat;
+import net.wpitchoune.pnews.Feed;
+import net.wpitchoune.pnews.classifier.NamedEntityRecognizer;
public class ArticleProvider {
private static final String CLASS_NAME = ArticleProvider.class.getName();
private boolean exists(String articleLink, List<Article> articles) {
synchronized (articles) {
for (Article a: articles)
- if (a.link.equals(articleLink))
+ if (a.getLink().equals(articleLink))
return true;
}
return false;
private Article toArticle(String link, SyndEntry entry, SyndFeed feed, String lang, Instant instant) {
String desc, title, thumbnail, feedTitle, str;
- Date date;
List<String> entities;
feedTitle = feed.getTitle();
entities = new ArrayList<>();
if (lang.equals("en"))
try {
- NER.classify(title, entities, config);
+ NamedEntityRecognizer.classify(title, entities, config);
if (desc != null)
- NER.classify(desc, entities, config);
+ NamedEntityRecognizer.classify(desc, entities, config);
} catch (ClassCastException | ClassNotFoundException | IOException e1) {
LOG.log(Level.SEVERE, "Cannot classify " + feedTitle, e1);
}
Collections.sort(articles, new Comparator<Article>() {
@Override
public int compare(Article o1, Article o2) {
- if (o1.publicationDate == o2.publicationDate)
+ if (o1.getPublicationDate() == o2.getPublicationDate())
return 0;
- if (o1.publicationDate == null)
+ if (o1.getPublicationDate() == null)
return 1;
- if (o2.publicationDate == null)
+ if (o2.getPublicationDate() == null)
return -1;
- return o2.publicationDate.compareTo(o1.publicationDate);
+ return o2.getPublicationDate().compareTo(o1.getPublicationDate());
}
});
}
+++ /dev/null
-package pnews.servlet;
-
-import java.lang.ref.WeakReference;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.logging.Logger;
-
-import pnews.Article;
-
-public class ArticleStore {
- public final static ArticleStore singleton = new ArticleStore();
- private final HashMap<String, WeakReference<Article>> articles = new HashMap<>();
- private final static Logger LOG = Logger.getLogger(ArticleStore.class.getName());
-
- public Article get(String link) {
- WeakReference<Article> ref;
- Article result;
-
- synchronized (articles) {
- ref = articles.get(link);
- if (ref == null) {
- return null;
- }
-
- result = ref.get();
- if (result == null) {
- LOG.info("Article has been released from memory: " + link);
- articles.remove(link);
- }
-
- return result;
- }
- }
-
- public Article getArticle(String link, ArticleFactory factory) {
- Article result;
-
- synchronized (articles) {
- result = get(link);
- if (result == null) {
- result = factory.create();
- articles.put(link, new WeakReference<>(result));
- LOG.fine("Article has been added: " + result.link);
- }
-
- return result;
- }
- }
-
- public long size() {
- synchronized (this) {
- return articles.size();
- }
- }
-
- public Article[] getArticles() {
- ArrayList<Article> result;
- Article a;
-
- synchronized (articles) {
- result = new ArrayList<>(articles.size());
-
- for (WeakReference<Article> r: articles.values()) {
- a = r.get();
- if (a != null)
- result.add(a);
- }
-
- return result.toArray(new Article[] {});
- }
- }
-}
+++ /dev/null
-package pnews.servlet;
-
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.io.UnsupportedEncodingException;
-import java.time.Instant;
-import java.time.temporal.ChronoUnit;
-import java.time.temporal.TemporalUnit;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import javax.json.Json;
-import javax.json.JsonArray;
-import javax.json.JsonObject;
-import javax.json.JsonString;
-import javax.json.JsonValue;
-
-import pnews.Article;
-import pnews.Category;
-import pnews.Feed;
-import pnews.Language;
-
-public class Config {
- private Feed[] feeds;
- private Category[] categories;
- private Language[] languages;
- private final Set<String> blacklistedEntities = new HashSet<>();
- private final HashMap<String, String> entityAliases = new HashMap<>();
- private static final String CLASS_NAME = Config.class.getName();
-
- /**
- * The key is the language, the value is the default category for this language.
- */
- private Map<String, Category> defaultCategories = new HashMap<>();
- private static final Logger LOG = Logger.getLogger(CLASS_NAME);
-
- private void loadCategories(JsonArray jcats) {
- List<Category> cats;
- JsonObject jcat;
- Category cat;
- String id, label, title, language;
-
- cats = new ArrayList<>(jcats.size());
-
- for (JsonValue v: jcats) {
- jcat = (JsonObject)v;
- id = jcat.getString("id");
- label = jcat.getString("label");
- title = jcat.getString("title");
- language = jcat.getString("language");
- cat = new Category(id, label, title, language);
- cats.add(cat);
- if (defaultCategories.get(language) == null)
- defaultCategories.put(language, cat);
- }
-
- categories = cats.toArray(new Category[0]);
- }
-
- private void loadLanguages(JsonArray jlangs) {
- List<Language> langs;
- JsonObject jlang;
- String id;
-
- langs = new ArrayList<>(jlangs.size());
-
- for (JsonValue v: jlangs) {
- jlang = (JsonObject)v;
- id = jlang.getString("id");
- langs.add(new Language(id));
- }
-
- languages = langs.toArray(new Language[0]);
- }
-
- private Category getCategory(String id) {
- for (Category c: categories)
- if (c.getId().equals(id))
- return c;
- return null;
- }
-
- private void loadEntities(JsonObject jroot) {
- JsonObject jentities, jaliases;
- JsonArray jblacklist;
- final String METHOD_NAME = "loadEntities";
-
- jentities = jroot.getJsonObject("entities");
-
- jblacklist = jentities.getJsonArray("blacklist");
- jblacklist.forEach((jv)-> {
- JsonString js;
-
- js = (JsonString)jv;
- blacklistedEntities.add(js.getString());
- });
-
- jaliases = jentities.getJsonObject("aliases");
- jaliases.forEach((k, v)-> {
- JsonArray jsources = (JsonArray)v;
-
- jsources.forEach((jsource)-> {
- entityAliases.put(((JsonString)jsource).getString(), k);
- });
- });
-
- LOG.logp(Level.FINEST, CLASS_NAME, METHOD_NAME, " blacklistedEntities=" + blacklistedEntities);
- LOG.logp(Level.FINEST, CLASS_NAME, METHOD_NAME, " entityAliases=" + entityAliases);
- }
-
- public String getEntityAlias(String entity) {
- String result;
-
- result = entityAliases.get(entity);
-
- if (result == null)
- return entity;
- else
- return result;
- }
-
- public void loadConfig() throws UnsupportedEncodingException {
- Reader r;
- JsonObject jfeeds, jroot;
- List<Feed> feedList;
-
- r = null;
- try {
- r = new InputStreamReader(Config.class.getClassLoader().getResourceAsStream("feeds.json"),
- "UTF-8");
- jroot = Json.createReader(r).readObject();
- } finally {
- if (r != null)
- try { r.close(); } catch (IOException e) { };
- }
-
- loadLanguages(jroot.getJsonArray("languages"));
- loadCategories(jroot.getJsonArray("categories"));
-
- jfeeds = jroot.getJsonObject("feeds");
-
- feedList = new ArrayList<Feed>(jfeeds.size());
-
- jfeeds.forEach((k, v)-> {
- JsonObject jf;
- String str;
- Category cat;
- JsonArray jcategories;
-
- jf = (JsonObject)v;
- jcategories = jf.getJsonArray("categories");
- str = jcategories.getString(0);
-
- cat = getCategory(str);
-
- if (cat != null)
- feedList.add(new Feed(k, cat));
- else
- LOG.severe("Missing category: " + str);
- });
-
- feeds = feedList.toArray(new Feed[0]);
-
- loadEntities(jroot);
- }
-
- public boolean isBlacklistedEntity(String e) {
- final String METHOD_NAME = "isBlacklistedEntity";
- boolean result;
-
- LOG.entering(CLASS_NAME, METHOD_NAME, e);
-
- result = blacklistedEntities.contains(e);
-
- LOG.exiting(CLASS_NAME, METHOD_NAME, result);
-
- return result;
- }
-
- public boolean isObsolete(Instant instant) {
- Instant olderInstant;
-
- olderInstant = Instant.now().minus(60, ChronoUnit.DAYS);
-
- if (instant.isAfter(olderInstant))
- return false;
- else
- return true;
- }
-
- public Feed[] getFeeds() {
- return feeds;
- }
-
- public Map<Category, List<Feed>> getFeedsByCategory() {
- Map<Category, List<Feed>> result;
- Feed[] feeds;
- List<Feed> catFeeds;
- Category cat;
-
- result = new HashMap<>();
-
- feeds = getFeeds();
- for (Feed f: feeds) {
- cat = f.getCategory();
-
- catFeeds = result.get(cat);
- if (catFeeds == null) {
- catFeeds = new ArrayList<Feed>();
- result.put(cat, catFeeds);
- }
- catFeeds.add(f);
- }
-
- return result;
- }
-
- public Category[] getCategories() {
- return categories;
- }
-
- public Category getDefaultCategory(Language lang) {
- return defaultCategories.get(lang.getId());
- }
-
- public Language[] getLanguages() {
- return languages;
- }
-
- public Language getDefaultLanguage() {
- return languages[0];
- }
-
- public static void main(String[] args) throws UnsupportedEncodingException {
- Config cfg;
- Feed[] feeds;
- Category[] cats;
-
- cfg = new Config();
- cfg.loadConfig();
-
- cats = cfg.getCategories();
- for (Category cat: cats)
- System.out.println(cat);
-
- feeds = cfg.getFeeds();
-
- System.out.println("Number of feeds: " + feeds.length);
- for (Feed f: feeds)
- System.out.println(f);
- }
-}
import com.rometools.rome.io.FeedException;
-import pnews.Article;
-import pnews.Category;
-import pnews.EntityStat;
-import pnews.Language;
+import net.wpitchoune.pnews.Article;
+import net.wpitchoune.pnews.Category;
+import net.wpitchoune.pnews.Config;
+import net.wpitchoune.pnews.EntityStat;
+import net.wpitchoune.pnews.Language;
public class HTML {
private static final String CLASS_NAME= HTML.class.getName();
buf.append("<div class='article'>\n");
buf.append("<div class='article-image'>\n");
- if (a.thumbnail != null) {
+ if (a.getThumbnail() != null) {
buf.append("<img class='left' src='");
- buf.append(a.thumbnail);
+ buf.append(a.getThumbnail());
buf.append("'/>\n");
}
buf.append("</div>\n");
buf.append("<div class='article-content'>\n");
buf.append("<div class='article-title'>\n");
- appendA(buf, a.title, "/redirect?url=" + URLEncoder.encode(a.link, "UTF-8"), null);
+ appendA(buf, a.getTitle(), "/redirect?url=" + URLEncoder.encode(a.getLink(), "UTF-8"), null);
buf.append("</div>\n");
- buf.append("<div class='article-info'>" + a.website + " - " + a.publicationDate + "</div>");
+ buf.append("<div class='article-info'>" + a.getWebsite() + " - " + a.getPublicationDate() + "</div>");
buf.append("<div class='article-description'>\n");
- if (a.description != null) {
+ if (a.getDescription() != null) {
buf.append("<p>");
- if (a.description.length() < 512) {
- buf.append(a.description);
+ if (a.getDescription().length() < 512) {
+ buf.append(a.getDescription());
} else {
- buf.append(a.description.substring(0, 512));
+ buf.append(a.getDescription().substring(0, 512));
buf.append("[..]");
}
buf.append("</p>");
import com.google.gson.JsonObject;
import com.rometools.rome.io.FeedException;
-import pnews.Article;
-import pnews.Category;
+import net.wpitchoune.pnews.Article;
+import net.wpitchoune.pnews.ArticleStore;
+import net.wpitchoune.pnews.Category;
+import net.wpitchoune.pnews.Config;
public class JSON {
private static final Logger LOG = Logger.getLogger(JSON.class.getName());
allArticles = ArticleStore.singleton.getArticles();
for (Article a: allArticles)
- if (a.readCount.get() > 0)
- jreadcounts.addProperty(a.link, a.readCount);
+ if (a.getReadCount() > 0)
+ jreadcounts.addProperty(a.getLink(), a.getReadCount());
jcategories = new JsonObject();
jstats.add("categories", jcategories);
import com.rometools.rome.io.FeedException;
-import pnews.Article;
-import pnews.Category;
-import pnews.Language;
+import net.wpitchoune.pnews.Article;
+import net.wpitchoune.pnews.ArticleStore;
+import net.wpitchoune.pnews.Category;
+import net.wpitchoune.pnews.Config;
+import net.wpitchoune.pnews.Language;
public class Pnews extends HttpServlet {
private static final String CLASS_NAME = Pnews.class.getName();
if (redirectURL != null) {
a = ArticleStore.singleton.get(redirectURL);
if (a != null)
- a.readCount.incrementAndGet();
+ a.incrementReadCount();
else
LOG.severe("Cannot find the article " + redirectURL);
"https://opensource.com/feed": { "categories": ["en_linux"]},
"https://www.infoworld.com/category/linux/index.rss": { "categories": ["en_linux"]},
"https://www.infoworld.com/category/open-source-tools/index.rss": { "categories": ["en_linux"]},
+ "https://www.techrepublic.com/rssfeeds/topic/open-source/": { "categories": ["en_linux"]},
"https://www.space.com/home/feed/site.xml": { "categories": ["en_space"]},
"https://www.nasa.gov/rss/dyn/breaking_news.rss": { "categories": ["en_space"]},
"http://www.esa.int/rssfeed/Our_Activities/Space_News": { "categories": ["en_space"]},