X-Git-Url: https://git.wpitchoune.net/gitweb/?p=pnews.git;a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2FNER.java;fp=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2FNER.java;h=0000000000000000000000000000000000000000;hp=5e7ce292a143a3865dd94ff7e143bb8028798740;hb=aff83c8798602b535d13edeaffdb8f4238e2bbf5;hpb=88a7ba9745b8318ca6c4f741906a40e3d6a8f07e diff --git a/war/src/main/java/pnews/NER.java b/war/src/main/java/pnews/NER.java deleted file mode 100644 index 5e7ce29..0000000 --- a/war/src/main/java/pnews/NER.java +++ /dev/null @@ -1,51 +0,0 @@ -package pnews; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.logging.Logger; - -import edu.stanford.nlp.ie.crf.CRFClassifier; -import edu.stanford.nlp.util.CoreMap; -import edu.stanford.nlp.util.Triple; -import pnews.servlet.Config; - -/** https://stanfordnlp.github.io/CoreNLP/api.html */ -public class NER { - private static final String CLASS_NAME = NER.class.getName(); - private static final Logger LOG = Logger.getLogger(CLASS_NAME); - private static final CRFClassifier classifier = CRFClassifier.getDefaultClassifier(); - - public static List classify(String str, List entities, Config config) throws ClassCastException, ClassNotFoundException, IOException { - - List> triples; - String w; - final String FUNCTION_NAME = "classify"; - - LOG.entering(CLASS_NAME, FUNCTION_NAME, str); - - OpenNLP.classify(str, entities, config); - - synchronized (classifier) { - triples = classifier.classifyToCharacterOffsets(str); - } - - for (Triple t: triples) { - w = str.substring(t.second, t.third); - if (!config.isBlacklistedEntity(w) && !entities.contains(w)) - entities.add(config.getEntityAlias(w)); - } - - LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities); - - return entities; - } - - public static void main(String[] args) throws Exception { - List lst; - - lst = classify("I live in Washington and New York in United States.", new ArrayList<>(), new Config()); - for (String str: lst) - System.out.println(str); - } -} \ No newline at end of file