X-Git-Url: https://git.wpitchoune.net/gitweb/?a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2FNER.java;h=5e7ce292a143a3865dd94ff7e143bb8028798740;hb=9ff314621235d6b748abb128edf0331480d0eaaf;hp=274586869bcf68b6034d17fcb5c38e51fb728343;hpb=63c2717409a3235573418e6bc0d9bd0fae8356e4;p=pnews.git diff --git a/war/src/main/java/pnews/NER.java b/war/src/main/java/pnews/NER.java index 2745868..5e7ce29 100644 --- a/war/src/main/java/pnews/NER.java +++ b/war/src/main/java/pnews/NER.java @@ -8,6 +8,7 @@ import java.util.logging.Logger; import edu.stanford.nlp.ie.crf.CRFClassifier; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.Triple; +import pnews.servlet.Config; /** https://stanfordnlp.github.io/CoreNLP/api.html */ public class NER { @@ -15,7 +16,7 @@ public class NER { private static final Logger LOG = Logger.getLogger(CLASS_NAME); private static final CRFClassifier classifier = CRFClassifier.getDefaultClassifier(); - public static List classify(String str, List entities) throws ClassCastException, ClassNotFoundException, IOException { + public static List classify(String str, List entities, Config config) throws ClassCastException, ClassNotFoundException, IOException { List> triples; String w; @@ -23,7 +24,7 @@ public class NER { LOG.entering(CLASS_NAME, FUNCTION_NAME, str); - OpenNLP.classify(str, entities); + OpenNLP.classify(str, entities, config); synchronized (classifier) { triples = classifier.classifyToCharacterOffsets(str); @@ -31,16 +32,10 @@ public class NER { for (Triple t: triples) { w = str.substring(t.second, t.third); - if (!entities.contains(w)) - entities.add(w); + if (!config.isBlacklistedEntity(w) && !entities.contains(w)) + entities.add(config.getEntityAlias(w)); } - entities.remove("CNET"); - entities.remove("Read More"); - entities.remove("New"); - entities.remove("App"); - entities.remove("Digital Trends"); - LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities); return entities; @@ -49,7 +44,7 @@ public class NER { public static void main(String[] args) throws Exception { List lst; - lst = classify("I live in Washington and New York in United States.", new ArrayList<>()); + lst = classify("I live in Washington and New York in United States.", new ArrayList<>(), new Config()); for (String str: lst) System.out.println(str); }