added todo
[pnews.git] / war / src / main / java / pnews / OpenNLP.java
index 07fbba5..99e344d 100644 (file)
@@ -10,6 +10,7 @@ import opennlp.tools.namefind.TokenNameFinderModel;
 import opennlp.tools.tokenize.TokenizerME;
 import opennlp.tools.tokenize.TokenizerModel;
 import opennlp.tools.util.Span;
+import pnews.servlet.Config;
 
 /** http://www.devglan.com/artificial-intelligence/opennlp-named-entity-recognition-example **/
 public class OpenNLP {
@@ -18,16 +19,12 @@ public class OpenNLP {
         private static TokenNameFinderModel organizationModel;
         private static TokenNameFinderModel personModel;
         private static TokenNameFinderModel locationModel;
-        private static TokenNameFinderModel timeModel;
         private static TokenizerModel tokenModel;
 
-        public static List<String> classify(String str, List<String> entities) throws IOException {
-                classify(str, getOrganizationModel(), entities);
-                
-                classify(str, getPersonModel(), entities);
-                classify(str, getLocationModel(), entities);
-                
-                classify(str, getTimeModel(), entities);                
+        public static List<String> classify(String str, List<String> entities, Config config) throws IOException {
+                classify(str, getOrganizationModel(), entities, config);
+                classify(str, getPersonModel(), entities, config);
+                classify(str, getLocationModel(), entities, config);
                 
                 return entities;
         }
@@ -64,19 +61,8 @@ public class OpenNLP {
                 
                 return locationModel;
         }
-
-        private static TokenNameFinderModel getTimeModel() throws IOException {
-                synchronized (OpenNLP.class) {
-                        if (timeModel == null) {
-                                InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-time.bin");
-                                timeModel = new TokenNameFinderModel(inputStream);
-                        }
-                }
-                
-                return timeModel;
-        }
         
-        private static List<String> classify(String str, TokenNameFinderModel model, List<String> entities) throws IOException {
+        private static List<String> classify(String str, TokenNameFinderModel model, List<String> entities, Config config) throws IOException {
                 String entity;
                 
                 NameFinderME nameFinder = new NameFinderME(model);
@@ -95,8 +81,8 @@ public class OpenNLP {
                                         entity += " " + tokens[i];
                                 
                         LOG.finest(entity + " " + s.getProb() + " " + s.toString());
-                        if (!entities.contains(entity))
-                                entities.add(entity);
+                        if (!config.isBlacklistedEntity(entity) && !entities.contains(entity))
+                                entities.add(config.getEntityAlias(entity));
                 }
                 
                 return entities;