+package net.wpitchoune.pnews.classifier;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+import java.util.logging.Logger;
+
+import net.wpitchoune.pnews.Config;
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.Span;
+
+/** http://www.devglan.com/artificial-intelligence/opennlp-named-entity-recognition-example **/
+public class OpenNLP {
+ private static final String CLASS_NAME = OpenNLP.class.getName();
+ private static final Logger LOG = Logger.getLogger(CLASS_NAME);
+ private static TokenNameFinderModel organizationModel;
+ private static TokenNameFinderModel personModel;
+ private static TokenNameFinderModel locationModel;
+ private static TokenizerModel tokenModel;
+
+ public static List<String> classify(String str, List<String> entities, Config config) throws IOException {
+ classify(str, getOrganizationModel(), entities, config);
+ classify(str, getPersonModel(), entities, config);
+ classify(str, getLocationModel(), entities, config);
+
+ return entities;
+ }
+
+ private static TokenNameFinderModel getOrganizationModel() throws IOException {
+ synchronized (OpenNLP.class) {
+ if (organizationModel == null) {
+ InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-organization.bin");
+ organizationModel = new TokenNameFinderModel(inputStream);
+ }
+ }
+
+ return organizationModel;
+ }
+
+ private static TokenNameFinderModel getPersonModel() throws IOException {
+ synchronized (OpenNLP.class) {
+ if (personModel == null) {
+ InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-person.bin");
+ personModel = new TokenNameFinderModel(inputStream);
+ }
+ }
+
+ return personModel;
+ }
+
+ private static TokenNameFinderModel getLocationModel() throws IOException {
+ synchronized (OpenNLP.class) {
+ if (locationModel == null) {
+ InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-location.bin");
+ locationModel = new TokenNameFinderModel(inputStream);
+ }
+ }
+
+ return locationModel;
+ }
+
+ private static List<String> classify(String str, TokenNameFinderModel model, List<String> entities, Config config) throws IOException {
+ String entity;
+
+ NameFinderME nameFinder = new NameFinderME(model);
+ String[] tokens = tokenize(str);
+ Span nameSpans[] = nameFinder.find(tokens);
+
+ for(Span s: nameSpans) {
+ if (s.getProb() < 0.60)
+ continue ;
+
+ entity = null;
+ for (int i = s.getStart(); i < s.getEnd(); i++)
+ if (entity == null)
+ entity = tokens[i];
+ else
+ entity += " " + tokens[i];
+
+ LOG.finest(entity + " " + s.getProb() + " " + s.toString());
+ if (!config.isBlacklistedEntity(entity) && !entities.contains(entity))
+ entities.add(config.getEntityAlias(entity));
+ }
+
+ return entities;
+ }
+
+ private static String[] tokenize(String sentence) throws IOException {
+ synchronized (OpenNLP.class) {
+ if (tokenModel == null) {
+ InputStream inputStreamTokenizer = OpenNLP.class.getResourceAsStream("/en-token.bin");
+ tokenModel = new TokenizerModel(inputStreamTokenizer);
+ }
+ }
+ TokenizerME tokenizer = new TokenizerME(tokenModel);
+ return tokenizer.tokenize(sentence);
+ }
+}