- OpenNLP.classify(str, entities);
-
- classifier = CRFClassifier.getDefaultClassifier();
- out = classifier.classify(str);
-
- for (List<CoreLabel> labels: out)
- for (CoreLabel l: labels) {
- cat = l.getString(AnswerAnnotation.class);
- w = l.word();
- if (!cat.equals("O") && !entities.contains(w))
- entities.add(w);
- }
-
- entities.remove("CNET");
- entities.remove("Read More");
- entities.remove("New");
- entities.remove("App");
+ OpenNLP.classify(str, entities, config);
+
+ synchronized (classifier) {
+ triples = classifier.classifyToCharacterOffsets(str);
+ }
+
+ for (Triple<String, Integer, Integer> t: triples) {
+ w = str.substring(t.second, t.third);
+ if (!config.isBlacklistedEntity(w) && !entities.contains(w))
+ entities.add(w);
+ }