+++ /dev/null
-package pnews;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.List;
-import java.util.logging.Logger;
-
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.TokenNameFinderModel;
-import opennlp.tools.tokenize.TokenizerME;
-import opennlp.tools.tokenize.TokenizerModel;
-import opennlp.tools.util.Span;
-import pnews.servlet.Config;
-
-/** http://www.devglan.com/artificial-intelligence/opennlp-named-entity-recognition-example **/
-public class OpenNLP {
- private static final String CLASS_NAME = OpenNLP.class.getName();
- private static final Logger LOG = Logger.getLogger(CLASS_NAME);
- private static TokenNameFinderModel organizationModel;
- private static TokenNameFinderModel personModel;
- private static TokenNameFinderModel locationModel;
- private static TokenizerModel tokenModel;
-
- public static List<String> classify(String str, List<String> entities, Config config) throws IOException {
- classify(str, getOrganizationModel(), entities, config);
- classify(str, getPersonModel(), entities, config);
- classify(str, getLocationModel(), entities, config);
-
- return entities;
- }
-
- private static TokenNameFinderModel getOrganizationModel() throws IOException {
- synchronized (OpenNLP.class) {
- if (organizationModel == null) {
- InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-organization.bin");
- organizationModel = new TokenNameFinderModel(inputStream);
- }
- }
-
- return organizationModel;
- }
-
- private static TokenNameFinderModel getPersonModel() throws IOException {
- synchronized (OpenNLP.class) {
- if (personModel == null) {
- InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-person.bin");
- personModel = new TokenNameFinderModel(inputStream);
- }
- }
-
- return personModel;
- }
-
- private static TokenNameFinderModel getLocationModel() throws IOException {
- synchronized (OpenNLP.class) {
- if (locationModel == null) {
- InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-location.bin");
- locationModel = new TokenNameFinderModel(inputStream);
- }
- }
-
- return locationModel;
- }
-
- private static List<String> classify(String str, TokenNameFinderModel model, List<String> entities, Config config) throws IOException {
- String entity;
-
- NameFinderME nameFinder = new NameFinderME(model);
- String[] tokens = tokenize(str);
- Span nameSpans[] = nameFinder.find(tokens);
-
- for(Span s: nameSpans) {
- if (s.getProb() < 0.60)
- continue ;
-
- entity = null;
- for (int i = s.getStart(); i < s.getEnd(); i++)
- if (entity == null)
- entity = tokens[i];
- else
- entity += " " + tokens[i];
-
- LOG.finest(entity + " " + s.getProb() + " " + s.toString());
- if (!config.isBlacklistedEntity(entity) && !entities.contains(entity))
- entities.add(config.getEntityAlias(entity));
- }
-
- return entities;
- }
-
- public static String[] tokenize(String sentence) throws IOException {
- synchronized (OpenNLP.class) {
- if (tokenModel == null) {
- InputStream inputStreamTokenizer = OpenNLP.class.getResourceAsStream("/en-token.bin");
- tokenModel = new TokenizerModel(inputStreamTokenizer);
- }
- }
- TokenizerME tokenizer = new TokenizerME(tokenModel);
- return tokenizer.tokenize(sentence);
- }
-}