X-Git-Url: https://git.wpitchoune.net/gitweb/?p=pnews.git;a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fnet%2Fwpitchoune%2Fpnews%2Fclassifier%2FNamedEntityRecognizer.java;fp=war%2Fsrc%2Fmain%2Fjava%2Fnet%2Fwpitchoune%2Fpnews%2Fclassifier%2FNamedEntityRecognizer.java;h=0f9ee73ff06479bc0b78cde5ea5b8d77fa5cd227;hp=0000000000000000000000000000000000000000;hb=aff83c8798602b535d13edeaffdb8f4238e2bbf5;hpb=88a7ba9745b8318ca6c4f741906a40e3d6a8f07e diff --git a/war/src/main/java/net/wpitchoune/pnews/classifier/NamedEntityRecognizer.java b/war/src/main/java/net/wpitchoune/pnews/classifier/NamedEntityRecognizer.java new file mode 100644 index 0000000..0f9ee73 --- /dev/null +++ b/war/src/main/java/net/wpitchoune/pnews/classifier/NamedEntityRecognizer.java @@ -0,0 +1,51 @@ +package net.wpitchoune.pnews.classifier; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Logger; + +import edu.stanford.nlp.ie.crf.CRFClassifier; +import edu.stanford.nlp.util.CoreMap; +import edu.stanford.nlp.util.Triple; +import net.wpitchoune.pnews.Config; + +/** https://stanfordnlp.github.io/CoreNLP/api.html */ +public class NamedEntityRecognizer { + private static final String CLASS_NAME = NamedEntityRecognizer.class.getName(); + private static final Logger LOG = Logger.getLogger(CLASS_NAME); + private static final CRFClassifier classifier = CRFClassifier.getDefaultClassifier(); + + public static List classify(String str, List entities, Config config) throws ClassCastException, ClassNotFoundException, IOException { + + List> triples; + String w; + final String FUNCTION_NAME = "classify"; + + LOG.entering(CLASS_NAME, FUNCTION_NAME, str); + + OpenNLP.classify(str, entities, config); + + synchronized (classifier) { + triples = classifier.classifyToCharacterOffsets(str); + } + + for (Triple t: triples) { + w = str.substring(t.second, t.third); + if (!config.isBlacklistedEntity(w) && !entities.contains(w)) + entities.add(config.getEntityAlias(w)); + } + + LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities); + + return entities; + } + + public static void main(String[] args) throws Exception { + List lst; + + lst = classify("I live in Washington and New York in United States.", new ArrayList<>(), new Config()); + for (String str: lst) + System.out.println(str); + } +} \ No newline at end of file