1 package net.wpitchoune.pnews.classifier;
3 import java.io.IOException;
4 import java.io.InputStream;
6 import java.util.logging.Logger;
8 import net.wpitchoune.pnews.Config;
9 import opennlp.tools.namefind.NameFinderME;
10 import opennlp.tools.namefind.TokenNameFinderModel;
11 import opennlp.tools.tokenize.TokenizerME;
12 import opennlp.tools.tokenize.TokenizerModel;
13 import opennlp.tools.util.Span;
15 /** http://www.devglan.com/artificial-intelligence/opennlp-named-entity-recognition-example **/
16 public class OpenNLP {
17 private static final String CLASS_NAME = OpenNLP.class.getName();
18 private static final Logger LOG = Logger.getLogger(CLASS_NAME);
19 private static TokenNameFinderModel organizationModel;
20 private static TokenNameFinderModel personModel;
21 private static TokenNameFinderModel locationModel;
22 private static TokenizerModel tokenModel;
24 public static List<String> classify(String str, List<String> entities, Config config) throws IOException {
25 classify(str, getOrganizationModel(), entities, config);
26 classify(str, getPersonModel(), entities, config);
27 classify(str, getLocationModel(), entities, config);
32 private static TokenNameFinderModel getOrganizationModel() throws IOException {
33 synchronized (OpenNLP.class) {
34 if (organizationModel == null) {
35 InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-organization.bin");
36 organizationModel = new TokenNameFinderModel(inputStream);
40 return organizationModel;
43 private static TokenNameFinderModel getPersonModel() throws IOException {
44 synchronized (OpenNLP.class) {
45 if (personModel == null) {
46 InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-person.bin");
47 personModel = new TokenNameFinderModel(inputStream);
54 private static TokenNameFinderModel getLocationModel() throws IOException {
55 synchronized (OpenNLP.class) {
56 if (locationModel == null) {
57 InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-location.bin");
58 locationModel = new TokenNameFinderModel(inputStream);
65 private static List<String> classify(String str, TokenNameFinderModel model, List<String> entities, Config config) throws IOException {
68 NameFinderME nameFinder = new NameFinderME(model);
69 String[] tokens = tokenize(str);
70 Span nameSpans[] = nameFinder.find(tokens);
72 for(Span s: nameSpans) {
73 if (s.getProb() < 0.60)
77 for (int i = s.getStart(); i < s.getEnd(); i++)
81 entity += " " + tokens[i];
83 LOG.finest(entity + " " + s.getProb() + " " + s.toString());
84 if (!config.isBlacklistedEntity(entity) && !entities.contains(entity))
85 entities.add(config.getEntityAlias(entity));
91 private static String[] tokenize(String sentence) throws IOException {
92 synchronized (OpenNLP.class) {
93 if (tokenModel == null) {
94 InputStream inputStreamTokenizer = OpenNLP.class.getResourceAsStream("/en-token.bin");
95 tokenModel = new TokenizerModel(inputStreamTokenizer);
98 TokenizerME tokenizer = new TokenizerME(tokenModel);
99 return tokenizer.tokenize(sentence);