From 72409cb0ef5532e14fa3f7cb9907bcf2f41f805f Mon Sep 17 00:00:00 2001 From: Jean-Philippe Orsini Date: Thu, 2 Nov 2017 17:25:14 +0100 Subject: [PATCH] fixed multi term of standford ner --- war/src/main/java/pnews/NER.java | 32 +++++++++++++++++--------------- war/src/main/resources/feeds.json | 3 ++- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/war/src/main/java/pnews/NER.java b/war/src/main/java/pnews/NER.java index ac34c08..3a6fc82 100644 --- a/war/src/main/java/pnews/NER.java +++ b/war/src/main/java/pnews/NER.java @@ -6,8 +6,8 @@ import java.util.List; import java.util.logging.Logger; import edu.stanford.nlp.ie.crf.CRFClassifier; -import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation; import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.util.Triple; /** https://stanfordnlp.github.io/CoreNLP/api.html */ public class NER { @@ -15,25 +15,23 @@ public class NER { private static final Logger LOG = Logger.getLogger(CLASS_NAME); public static List classify(String str, List entities) throws ClassCastException, ClassNotFoundException, IOException { - CRFClassifier classifier; - List> out; - String cat, w; - final String FUNCTION_NAME = "classify"; + final CRFClassifier classifier = CRFClassifier.getDefaultClassifier(); + List> triples; + String w; + final String FUNCTION_NAME = "classify"; LOG.entering(CLASS_NAME, FUNCTION_NAME, str); OpenNLP.classify(str, entities); - - classifier = CRFClassifier.getDefaultClassifier(); - out = classifier.classify(str); - - for (List labels: out) - for (CoreLabel l: labels) { - cat = l.getString(AnswerAnnotation.class); - w = l.word(); - if (!cat.equals("O") && !entities.contains(w)) + + synchronized (classifier) { + triples = classifier.classifyToCharacterOffsets(str); + for (Triple t: triples) { + w = str.substring(t.second, t.third); + if (!entities.contains(w)) entities.add(w); } + } entities.remove("CNET"); entities.remove("Read More"); @@ -46,6 +44,10 @@ public class NER { } public static void main(String[] args) throws Exception { - classify("I live in Washington.", new ArrayList<>()); + List lst; + + lst = classify("I live in Washington and New York in United States.", new ArrayList<>()); + for (String str: lst) + System.out.println(str); } } \ No newline at end of file diff --git a/war/src/main/resources/feeds.json b/war/src/main/resources/feeds.json index e4d9079..0118278 100644 --- a/war/src/main/resources/feeds.json +++ b/war/src/main/resources/feeds.json @@ -105,6 +105,7 @@ "http://www.zdnet.com/news/rss.xml": { "categories": ["en_technologie"]}, "https://www.androidheadlines.com/feed": { "categories": ["en_technologie"]}, "https://www.nasa.gov/rss/dyn/breaking_news.rss": { "categories": ["en_technologie"]}, - "http://www.computerweekly.com/rss/RSS-Feed.xml": { "categories": ["en_technologie"]} + "http://www.computerweekly.com/rss/RSS-Feed.xml": { "categories": ["en_technologie"]}, + "https://www.debian.org/News/news": { "categories": ["en_technologie"]} } } -- 2.7.4