hot topics are determined according to the last 15 days articles
authorJean-Philippe Orsini <orsinije@fr.ibm.com>
Sun, 5 Nov 2017 21:44:14 +0000 (22:44 +0100)
committerJean-Philippe Orsini <orsinije@fr.ibm.com>
Sun, 5 Nov 2017 21:44:14 +0000 (22:44 +0100)
war/src/main/java/pnews/OpenNLP.java
war/src/main/java/pnews/servlet/ArticleProvider.java
war/src/main/java/pnews/servlet/Config.java
war/src/main/resources/en-ner-date.bin [deleted file]
war/src/main/resources/en-ner-time.bin [deleted file]

index e158a00..99e344d 100644 (file)
@@ -19,17 +19,13 @@ public class OpenNLP {
         private static TokenNameFinderModel organizationModel;
         private static TokenNameFinderModel personModel;
         private static TokenNameFinderModel locationModel;
-        private static TokenNameFinderModel timeModel;
         private static TokenizerModel tokenModel;
 
         public static List<String> classify(String str, List<String> entities, Config config) throws IOException {
                 classify(str, getOrganizationModel(), entities, config);
-                
                 classify(str, getPersonModel(), entities, config);
                 classify(str, getLocationModel(), entities, config);
                 
-                classify(str, getTimeModel(), entities, config);                
-                
                 return entities;
         }
         
@@ -65,17 +61,6 @@ public class OpenNLP {
                 
                 return locationModel;
         }
-
-        private static TokenNameFinderModel getTimeModel() throws IOException {
-                synchronized (OpenNLP.class) {
-                        if (timeModel == null) {
-                                InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-time.bin");
-                                timeModel = new TokenNameFinderModel(inputStream);
-                        }
-                }
-                
-                return timeModel;
-        }
         
         private static List<String> classify(String str, TokenNameFinderModel model, List<String> entities, Config config) throws IOException {
                 String entity;
index 5d4959b..ba96fe3 100644 (file)
@@ -4,6 +4,7 @@ import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.time.Instant;
+import java.time.temporal.ChronoUnit;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
@@ -220,14 +221,17 @@ public class ArticleProvider {
                 final String FUNCTION_NAME = "getEntities";
                 EntityStat s;
                 List<EntityStat> stats;
+                Instant minInstant;
                 
                 LOG.entering(CLASS_NAME, FUNCTION_NAME, cat);
                 
                 articles = getArticles(cat, null);
                 
+                minInstant = Instant.now().minus(15, ChronoUnit.DAYS);
+                
                 entities = new HashMap<>();
-                for (Article a: articles) 
-                        if (a.getEntities() != null) {
+                for (Article a: articles)
+                        if (a.getPublicationDate().isAfter(minInstant) && a.getEntities() != null)
                                 for (String e: a.getEntities()) {
                                         s = entities.get(e);
                                         if (s == null) {
@@ -236,8 +240,7 @@ public class ArticleProvider {
                                         }
                                         s.increment();
                                 }                
-                        }
-                
+               
                 stats = new ArrayList<>(entities.values());
                 stats.sort(new Comparator<EntityStat>() {
 
index 36bdc62..78db694 100644 (file)
@@ -194,7 +194,7 @@ public class Config {
                 else
                         return true;
         }
-        
+
         public Feed[] getFeeds() {
                 return feeds;
         }
diff --git a/war/src/main/resources/en-ner-date.bin b/war/src/main/resources/en-ner-date.bin
deleted file mode 100644 (file)
index a69923a..0000000
Binary files a/war/src/main/resources/en-ner-date.bin and /dev/null differ
diff --git a/war/src/main/resources/en-ner-time.bin b/war/src/main/resources/en-ner-time.bin
deleted file mode 100644 (file)
index a5d8aa1..0000000
Binary files a/war/src/main/resources/en-ner-time.bin and /dev/null differ