3 import java.io.IOException;
4 import java.io.InputStream;
6 import java.util.logging.Logger;
8 import opennlp.tools.namefind.NameFinderME;
9 import opennlp.tools.namefind.TokenNameFinderModel;
10 import opennlp.tools.tokenize.TokenizerME;
11 import opennlp.tools.tokenize.TokenizerModel;
12 import opennlp.tools.util.Span;
14 /** http://www.devglan.com/artificial-intelligence/opennlp-named-entity-recognition-example **/
15 public class OpenNLP {
16 private static final String CLASS_NAME = OpenNLP.class.getName();
17 private static final Logger LOG = Logger.getLogger(CLASS_NAME);
18 private static TokenNameFinderModel organizationModel;
19 private static TokenNameFinderModel personModel;
20 private static TokenNameFinderModel locationModel;
21 private static TokenNameFinderModel timeModel;
22 private static TokenizerModel tokenModel;
24 public static List<String> classify(String str, List<String> entities) throws IOException {
25 classify(str, getOrganizationModel(), entities);
27 classify(str, getPersonModel(), entities);
28 classify(str, getLocationModel(), entities);
30 classify(str, getTimeModel(), entities);
35 private static TokenNameFinderModel getOrganizationModel() throws IOException {
36 synchronized (OpenNLP.class) {
37 if (organizationModel == null) {
38 InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-organization.bin");
39 organizationModel = new TokenNameFinderModel(inputStream);
43 return organizationModel;
46 private static TokenNameFinderModel getPersonModel() throws IOException {
47 synchronized (OpenNLP.class) {
48 if (personModel == null) {
49 InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-person.bin");
50 personModel = new TokenNameFinderModel(inputStream);
57 private static TokenNameFinderModel getLocationModel() throws IOException {
58 synchronized (OpenNLP.class) {
59 if (locationModel == null) {
60 InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-location.bin");
61 locationModel = new TokenNameFinderModel(inputStream);
68 private static TokenNameFinderModel getTimeModel() throws IOException {
69 synchronized (OpenNLP.class) {
70 if (timeModel == null) {
71 InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-time.bin");
72 timeModel = new TokenNameFinderModel(inputStream);
79 private static List<String> classify(String str, TokenNameFinderModel model, List<String> entities) throws IOException {
82 NameFinderME nameFinder = new NameFinderME(model);
83 String[] tokens = tokenize(str);
84 Span nameSpans[] = nameFinder.find(tokens);
86 for(Span s: nameSpans) {
87 if (s.getProb() < 0.60)
91 for (int i = s.getStart(); i < s.getEnd(); i++)
95 entity += " " + tokens[i];
97 LOG.finest(entity + " " + s.getProb() + " " + s.toString());
98 if (!entities.contains(entity))
105 public static String[] tokenize(String sentence) throws IOException {
106 synchronized (OpenNLP.class) {
107 if (tokenModel == null) {
108 InputStream inputStreamTokenizer = OpenNLP.class.getResourceAsStream("/en-token.bin");
109 tokenModel = new TokenizerModel(inputStreamTokenizer);
112 TokenizerME tokenizer = new TokenizerME(tokenModel);
113 return tokenizer.tokenize(sentence);