X-Git-Url: https://git.wpitchoune.net/gitweb/?a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2FNER.java;h=2055cf1c27c82918aa24300bd606d67118246fdd;hb=6d94fd5a39e6f78a68201230bd57b9ceb95e125d;hp=2868239d90d1ef5f330777b73dd550ed46e5b0db;hpb=9dd396afa60532248fc053ef6063f7b602d58f36;p=pnews.git diff --git a/war/src/main/java/pnews/NER.java b/war/src/main/java/pnews/NER.java index 2868239..2055cf1 100644 --- a/war/src/main/java/pnews/NER.java +++ b/war/src/main/java/pnews/NER.java @@ -1,31 +1,51 @@ package pnews; import java.io.IOException; +import java.util.ArrayList; import java.util.List; +import java.util.logging.Logger; import edu.stanford.nlp.ie.crf.CRFClassifier; -import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation; -import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.util.CoreMap; +import edu.stanford.nlp.util.Triple; +import pnews.servlet.Config; /** https://stanfordnlp.github.io/CoreNLP/api.html */ public class NER { - public static void classify(String str) throws ClassCastException, ClassNotFoundException, IOException { - CRFClassifier classifier; - List> out; - String cat, w; + private static final String CLASS_NAME = NER.class.getName(); + private static final Logger LOG = Logger.getLogger(CLASS_NAME); + private static final CRFClassifier classifier = CRFClassifier.getDefaultClassifier(); + + public static List classify(String str, List entities, Config config) throws ClassCastException, ClassNotFoundException, IOException { + + List> triples; + String w; + final String FUNCTION_NAME = "classify"; - classifier = CRFClassifier.getDefaultClassifier(); - out = classifier.classify(str); + LOG.entering(CLASS_NAME, FUNCTION_NAME, str); + + OpenNLP.classify(str, entities, config); + + synchronized (classifier) { + triples = classifier.classifyToCharacterOffsets(str); + } + + for (Triple t: triples) { + w = str.substring(t.second, t.third); + if (!config.isBlacklistedEntity(w) && !entities.contains(w)) + entities.add(w); + } - for (List labels: out) - for (CoreLabel l: labels) { - cat = l.getString(AnswerAnnotation.class); - w = l.word(); - System.out.println(cat + " " + w); - } + LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities); + + return entities; } public static void main(String[] args) throws Exception { - classify("I live in Washington."); + List lst; + + lst = classify("I live in Washington and New York in United States.", new ArrayList<>(), new Config()); + for (String str: lst) + System.out.println(str); } } \ No newline at end of file