blacklisted entities are now in the configuration file
[pnews.git] / war / src / main / java / pnews / NER.java
index 2745868..2055cf1 100644 (file)
@@ -8,6 +8,7 @@ import java.util.logging.Logger;
 import edu.stanford.nlp.ie.crf.CRFClassifier;
 import edu.stanford.nlp.util.CoreMap;
 import edu.stanford.nlp.util.Triple;
+import pnews.servlet.Config;
 
 /** https://stanfordnlp.github.io/CoreNLP/api.html */
 public class NER {
@@ -15,7 +16,7 @@ public class NER {
         private static final Logger LOG = Logger.getLogger(CLASS_NAME);
         private static final CRFClassifier<CoreMap> classifier = CRFClassifier.getDefaultClassifier();
         
-        public static List<String> classify(String str, List<String> entities) throws ClassCastException, ClassNotFoundException, IOException {
+        public static List<String> classify(String str, List<String> entities, Config config) throws ClassCastException, ClassNotFoundException, IOException {
                 
                 List<Triple<String, Integer, Integer>> triples;
                 String w;
@@ -23,7 +24,7 @@ public class NER {
                 
                 LOG.entering(CLASS_NAME, FUNCTION_NAME, str);
 
-                OpenNLP.classify(str, entities);
+                OpenNLP.classify(str, entities, config);
                                 
                 synchronized (classifier) {
                         triples = classifier.classifyToCharacterOffsets(str);
@@ -31,16 +32,10 @@ public class NER {
                  
                 for (Triple<String, Integer, Integer> t: triples) {
                         w = str.substring(t.second, t.third);
-                        if (!entities.contains(w))
+                        if (!config.isBlacklistedEntity(w) && !entities.contains(w))
                                 entities.add(w);
                 }
                 
-                entities.remove("CNET");
-                entities.remove("Read More");
-                entities.remove("New");
-                entities.remove("App");
-                entities.remove("Digital Trends");
-                
                 LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);
                 
                 return entities;
@@ -49,7 +44,7 @@ public class NER {
         public static void main(String[] args) throws Exception {
                 List<String> lst;
                 
-                lst = classify("I live in Washington and New York in United States.", new ArrayList<>());
+                lst = classify("I live in Washington and New York in United States.", new ArrayList<>(), new Config());
                 for (String str: lst)
                         System.out.println(str);
         }