X-Git-Url: http://git.wpitchoune.net/gitweb/?a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2FNER.java;h=bcb8951fb6904a02db5b2c6d42ef608134cb8ced;hb=7e7f5169cbba419822c4fd7e05a85e81972a9fd6;hp=f8238c1d7ce51869727304c65b9336fb5b08ca13;hpb=56c07f5de3319eb61182b7100855801644538e6f;p=pnews.git diff --git a/war/src/main/java/pnews/NER.java b/war/src/main/java/pnews/NER.java index f8238c1..bcb8951 100644 --- a/war/src/main/java/pnews/NER.java +++ b/war/src/main/java/pnews/NER.java @@ -6,41 +6,54 @@ import java.util.List; import java.util.logging.Logger; import edu.stanford.nlp.ie.crf.CRFClassifier; -import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation; import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.util.Triple; /** https://stanfordnlp.github.io/CoreNLP/api.html */ public class NER { private static final String CLASS_NAME = NER.class.getName(); - private static final Logger LOG = Logger.getLogger(CLASS_NAME); + private static final Logger LOG = Logger.getLogger(CLASS_NAME); + private static final ThreadLocal> classifier = new ThreadLocal>() { + @Override + protected CRFClassifier initialValue() { + return CRFClassifier.getDefaultClassifier(); + } + }; - public static String[] classify(String str) throws ClassCastException, ClassNotFoundException, IOException { - CRFClassifier classifier; - List> out; - String cat, w; - List entities; - final String FUNCTION_NAME = "classify"; + public static List classify(String str, List entities) throws ClassCastException, ClassNotFoundException, IOException { - LOG.entering(CLASS_NAME, FUNCTION_NAME, str); - - classifier = CRFClassifier.getDefaultClassifier(); - out = classifier.classify(str); + List> triples; + String w; + final String FUNCTION_NAME = "classify"; - entities = new ArrayList<>(); - for (List labels: out) - for (CoreLabel l: labels) { - cat = l.getString(AnswerAnnotation.class); - w = l.word(); - if (!cat.equals("O") && !entities.contains(w)) + LOG.entering(CLASS_NAME, FUNCTION_NAME, str); + + OpenNLP.classify(str, entities); + + synchronized (classifier) { + triples = classifier.get().classifyToCharacterOffsets(str); + for (Triple t: triples) { + w = str.substring(t.second, t.third); + if (!entities.contains(w)) entities.add(w); } + } + + entities.remove("CNET"); + entities.remove("Read More"); + entities.remove("New"); + entities.remove("App"); LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities); - return entities.toArray(new String[0]); + return entities; } public static void main(String[] args) throws Exception { - classify("I live in Washington."); + List lst; + + lst = classify("I live in Washington and New York in United States.", new ArrayList<>()); + for (String str: lst) + System.out.println(str); } } \ No newline at end of file