X-Git-Url: https://git.wpitchoune.net/gitweb/?a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2FNER.java;h=274586869bcf68b6034d17fcb5c38e51fb728343;hb=63c2717409a3235573418e6bc0d9bd0fae8356e4;hp=3a6fc8256c86371d2fa9c54dff88675d204f36a7;hpb=72409cb0ef5532e14fa3f7cb9907bcf2f41f805f;p=pnews.git diff --git a/war/src/main/java/pnews/NER.java b/war/src/main/java/pnews/NER.java index 3a6fc82..2745868 100644 --- a/war/src/main/java/pnews/NER.java +++ b/war/src/main/java/pnews/NER.java @@ -6,16 +6,17 @@ import java.util.List; import java.util.logging.Logger; import edu.stanford.nlp.ie.crf.CRFClassifier; -import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.Triple; /** https://stanfordnlp.github.io/CoreNLP/api.html */ public class NER { private static final String CLASS_NAME = NER.class.getName(); - private static final Logger LOG = Logger.getLogger(CLASS_NAME); + private static final Logger LOG = Logger.getLogger(CLASS_NAME); + private static final CRFClassifier classifier = CRFClassifier.getDefaultClassifier(); public static List classify(String str, List entities) throws ClassCastException, ClassNotFoundException, IOException { - final CRFClassifier classifier = CRFClassifier.getDefaultClassifier(); + List> triples; String w; final String FUNCTION_NAME = "classify"; @@ -26,17 +27,19 @@ public class NER { synchronized (classifier) { triples = classifier.classifyToCharacterOffsets(str); - for (Triple t: triples) { - w = str.substring(t.second, t.third); - if (!entities.contains(w)) - entities.add(w); - } + } + + for (Triple t: triples) { + w = str.substring(t.second, t.third); + if (!entities.contains(w)) + entities.add(w); } entities.remove("CNET"); entities.remove("Read More"); entities.remove("New"); entities.remove("App"); + entities.remove("Digital Trends"); LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);