do classification in //
[pnews.git] / war / src / main / java / pnews / NER.java
index 2868239..bcb8951 100644 (file)
@@ -1,31 +1,59 @@
 package pnews;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
+import java.util.logging.Logger;
 
 import edu.stanford.nlp.ie.crf.CRFClassifier;
-import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation;
 import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.util.Triple;
 
 /** https://stanfordnlp.github.io/CoreNLP/api.html */
 public class NER {
-        public static void classify(String str) throws ClassCastException, ClassNotFoundException, IOException {
-                CRFClassifier<CoreLabel> classifier;
-                List<List<CoreLabel>> out;
-                String cat, w;
+        private static final String CLASS_NAME = NER.class.getName();
+        private static final Logger LOG = Logger.getLogger(CLASS_NAME);
+        private static final ThreadLocal<CRFClassifier<CoreLabel>> classifier = new ThreadLocal<CRFClassifier<CoreLabel>>() {
+                @Override 
+                protected CRFClassifier<CoreLabel> initialValue() {
+                        return CRFClassifier.getDefaultClassifier();
+                }
+        };
+        
+        public static List<String> classify(String str, List<String> entities) throws ClassCastException, ClassNotFoundException, IOException {
                 
-                classifier = CRFClassifier.getDefaultClassifier();
-                out = classifier.classify(str);
+                List<Triple<String, Integer, Integer>> triples;
+                String w;
+                final String FUNCTION_NAME = "classify";                       
                 
-                for (List<CoreLabel> labels: out)
-                        for (CoreLabel l: labels) {
-                                cat = l.getString(AnswerAnnotation.class);
-                                w = l.word();
-                                System.out.println(cat + " " + w);
+                LOG.entering(CLASS_NAME, FUNCTION_NAME, str);
+
+                OpenNLP.classify(str, entities);
+                                
+                synchronized (classifier) {
+                        triples = classifier.get().classifyToCharacterOffsets(str);
+                        for (Triple<String, Integer, Integer> t: triples) {
+                                w = str.substring(t.second, t.third);
+                                if (!entities.contains(w))
+                                        entities.add(w);
                         }
+                }
+                
+                entities.remove("CNET");
+                entities.remove("Read More");
+                entities.remove("New");
+                entities.remove("App");
+                
+                LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);
+                
+                return entities;
         }
         
         public static void main(String[] args) throws Exception {
-                classify("I live in Washington.");
+                List<String> lst;
+                
+                lst = classify("I live in Washington and New York in United States.", new ArrayList<>());
+                for (String str: lst)
+                        System.out.println(str);
         }
 }
\ No newline at end of file