X-Git-Url: http://git.wpitchoune.net/gitweb/?p=pnews.git;a=blobdiff_plain;f=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2FOpenNLP.java;fp=war%2Fsrc%2Fmain%2Fjava%2Fpnews%2FOpenNLP.java;h=0000000000000000000000000000000000000000;hp=99e344df25a6fadeb3604d0386776f78f11cae5b;hb=aff83c8798602b535d13edeaffdb8f4238e2bbf5;hpb=88a7ba9745b8318ca6c4f741906a40e3d6a8f07e diff --git a/war/src/main/java/pnews/OpenNLP.java b/war/src/main/java/pnews/OpenNLP.java deleted file mode 100644 index 99e344d..0000000 --- a/war/src/main/java/pnews/OpenNLP.java +++ /dev/null @@ -1,101 +0,0 @@ -package pnews; - -import java.io.IOException; -import java.io.InputStream; -import java.util.List; -import java.util.logging.Logger; - -import opennlp.tools.namefind.NameFinderME; -import opennlp.tools.namefind.TokenNameFinderModel; -import opennlp.tools.tokenize.TokenizerME; -import opennlp.tools.tokenize.TokenizerModel; -import opennlp.tools.util.Span; -import pnews.servlet.Config; - -/** http://www.devglan.com/artificial-intelligence/opennlp-named-entity-recognition-example **/ -public class OpenNLP { - private static final String CLASS_NAME = OpenNLP.class.getName(); - private static final Logger LOG = Logger.getLogger(CLASS_NAME); - private static TokenNameFinderModel organizationModel; - private static TokenNameFinderModel personModel; - private static TokenNameFinderModel locationModel; - private static TokenizerModel tokenModel; - - public static List classify(String str, List entities, Config config) throws IOException { - classify(str, getOrganizationModel(), entities, config); - classify(str, getPersonModel(), entities, config); - classify(str, getLocationModel(), entities, config); - - return entities; - } - - private static TokenNameFinderModel getOrganizationModel() throws IOException { - synchronized (OpenNLP.class) { - if (organizationModel == null) { - InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-organization.bin"); - organizationModel = new TokenNameFinderModel(inputStream); - } - } - - return organizationModel; - } - - private static TokenNameFinderModel getPersonModel() throws IOException { - synchronized (OpenNLP.class) { - if (personModel == null) { - InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-person.bin"); - personModel = new TokenNameFinderModel(inputStream); - } - } - - return personModel; - } - - private static TokenNameFinderModel getLocationModel() throws IOException { - synchronized (OpenNLP.class) { - if (locationModel == null) { - InputStream inputStream = OpenNLP.class.getResourceAsStream("/en-ner-location.bin"); - locationModel = new TokenNameFinderModel(inputStream); - } - } - - return locationModel; - } - - private static List classify(String str, TokenNameFinderModel model, List entities, Config config) throws IOException { - String entity; - - NameFinderME nameFinder = new NameFinderME(model); - String[] tokens = tokenize(str); - Span nameSpans[] = nameFinder.find(tokens); - - for(Span s: nameSpans) { - if (s.getProb() < 0.60) - continue ; - - entity = null; - for (int i = s.getStart(); i < s.getEnd(); i++) - if (entity == null) - entity = tokens[i]; - else - entity += " " + tokens[i]; - - LOG.finest(entity + " " + s.getProb() + " " + s.toString()); - if (!config.isBlacklistedEntity(entity) && !entities.contains(entity)) - entities.add(config.getEntityAlias(entity)); - } - - return entities; - } - - public static String[] tokenize(String sentence) throws IOException { - synchronized (OpenNLP.class) { - if (tokenModel == null) { - InputStream inputStreamTokenizer = OpenNLP.class.getResourceAsStream("/en-token.bin"); - tokenModel = new TokenizerModel(inputStreamTokenizer); - } - } - TokenizerME tokenizer = new TokenizerME(tokenModel); - return tokenizer.tokenize(sentence); - } -}