import java.util.logging.Logger;
import edu.stanford.nlp.ie.crf.CRFClassifier;
-import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.util.Triple;
/** https://stanfordnlp.github.io/CoreNLP/api.html */
public class NER {
private static final String CLASS_NAME = NER.class.getName();
- private static final Logger LOG = Logger.getLogger(CLASS_NAME);
+ private static final Logger LOG = Logger.getLogger(CLASS_NAME);
+ private static final ThreadLocal<CRFClassifier<CoreLabel>> classifier = new ThreadLocal<CRFClassifier<CoreLabel>>() {
+ @Override
+ protected CRFClassifier<CoreLabel> initialValue() {
+ return CRFClassifier.getDefaultClassifier();
+ }
+ };
- public static String[] classify(String str) throws ClassCastException, ClassNotFoundException, IOException {
- CRFClassifier<CoreLabel> classifier;
- List<List<CoreLabel>> out;
- String cat, w;
- List<String> entities;
- final String FUNCTION_NAME = "classify";
+ public static List<String> classify(String str, List<String> entities) throws ClassCastException, ClassNotFoundException, IOException {
- LOG.entering(CLASS_NAME, FUNCTION_NAME, str);
-
- classifier = CRFClassifier.getDefaultClassifier();
- out = classifier.classify(str);
+ List<Triple<String, Integer, Integer>> triples;
+ String w;
+ final String FUNCTION_NAME = "classify";
- entities = new ArrayList<>();
- for (List<CoreLabel> labels: out)
- for (CoreLabel l: labels) {
- cat = l.getString(AnswerAnnotation.class);
- w = l.word();
- if (!cat.equals("O") && !entities.contains(w))
+ LOG.entering(CLASS_NAME, FUNCTION_NAME, str);
+
+ OpenNLP.classify(str, entities);
+
+ synchronized (classifier) {
+ triples = classifier.get().classifyToCharacterOffsets(str);
+ for (Triple<String, Integer, Integer> t: triples) {
+ w = str.substring(t.second, t.third);
+ if (!entities.contains(w))
entities.add(w);
}
+ }
+
+ entities.remove("CNET");
+ entities.remove("Read More");
+ entities.remove("New");
+ entities.remove("App");
LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);
- return entities.toArray(new String[0]);
+ return entities;
}
public static void main(String[] args) throws Exception {
- classify("I live in Washington.");
+ List<String> lst;
+
+ lst = classify("I live in Washington and New York in United States.", new ArrayList<>());
+ for (String str: lst)
+ System.out.println(str);
}
}
\ No newline at end of file