limit memory by using only one classifier instance
[pnews.git] / war / src / main / java / pnews / NER.java
index 3a6fc82..2745868 100644 (file)
@@ -6,16 +6,17 @@ import java.util.List;
 import java.util.logging.Logger;
 
 import edu.stanford.nlp.ie.crf.CRFClassifier;
-import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.util.CoreMap;
 import edu.stanford.nlp.util.Triple;
 
 /** https://stanfordnlp.github.io/CoreNLP/api.html */
 public class NER {
         private static final String CLASS_NAME = NER.class.getName();
-        private static final Logger LOG = Logger.getLogger(CLASS_NAME); 
+        private static final Logger LOG = Logger.getLogger(CLASS_NAME);
+        private static final CRFClassifier<CoreMap> classifier = CRFClassifier.getDefaultClassifier();
         
         public static List<String> classify(String str, List<String> entities) throws ClassCastException, ClassNotFoundException, IOException {
-                final CRFClassifier<CoreLabel> classifier = CRFClassifier.getDefaultClassifier();
+                
                 List<Triple<String, Integer, Integer>> triples;
                 String w;
                 final String FUNCTION_NAME = "classify";                       
@@ -26,17 +27,19 @@ public class NER {
                                 
                 synchronized (classifier) {
                         triples = classifier.classifyToCharacterOffsets(str);
-                        for (Triple<String, Integer, Integer> t: triples) {
-                                w = str.substring(t.second, t.third);
-                                if (!entities.contains(w))
-                                        entities.add(w);
-                        }
+                }
+                 
+                for (Triple<String, Integer, Integer> t: triples) {
+                        w = str.substring(t.second, t.third);
+                        if (!entities.contains(w))
+                                entities.add(w);
                 }
                 
                 entities.remove("CNET");
                 entities.remove("Read More");
                 entities.remove("New");
                 entities.remove("App");
+                entities.remove("Digital Trends");
                 
                 LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);