fixed multi term of standford ner
[pnews.git] / war / src / main / java / pnews / NER.java
index ac34c08..3a6fc82 100644 (file)
@@ -6,8 +6,8 @@ import java.util.List;
 import java.util.logging.Logger;
 
 import edu.stanford.nlp.ie.crf.CRFClassifier;
-import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation;
 import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.util.Triple;
 
 /** https://stanfordnlp.github.io/CoreNLP/api.html */
 public class NER {
@@ -15,25 +15,23 @@ public class NER {
         private static final Logger LOG = Logger.getLogger(CLASS_NAME); 
         
         public static List<String> classify(String str, List<String> entities) throws ClassCastException, ClassNotFoundException, IOException {
-                CRFClassifier<CoreLabel> classifier;
-                List<List<CoreLabel>> out;
-                String cat, w;
-                final String FUNCTION_NAME = "classify";                
+                final CRFClassifier<CoreLabel> classifier = CRFClassifier.getDefaultClassifier();
+                List<Triple<String, Integer, Integer>> triples;
+                String w;
+                final String FUNCTION_NAME = "classify";                       
                 
                 LOG.entering(CLASS_NAME, FUNCTION_NAME, str);
 
                 OpenNLP.classify(str, entities);
-                
-                classifier = CRFClassifier.getDefaultClassifier();
-                out = classifier.classify(str);
-                
-                for (List<CoreLabel> labels: out)
-                        for (CoreLabel l: labels) {
-                                cat = l.getString(AnswerAnnotation.class);
-                                w = l.word();
-                                if (!cat.equals("O") && !entities.contains(w))
+                                
+                synchronized (classifier) {
+                        triples = classifier.classifyToCharacterOffsets(str);
+                        for (Triple<String, Integer, Integer> t: triples) {
+                                w = str.substring(t.second, t.third);
+                                if (!entities.contains(w))
                                         entities.add(w);
                         }
+                }
                 
                 entities.remove("CNET");
                 entities.remove("Read More");
@@ -46,6 +44,10 @@ public class NER {
         }
         
         public static void main(String[] args) throws Exception {
-                classify("I live in Washington.", new ArrayList<>());
+                List<String> lst;
+                
+                lst = classify("I live in Washington and New York in United States.", new ArrayList<>());
+                for (String str: lst)
+                        System.out.println(str);
         }
 }
\ No newline at end of file