blacklisted entities are now in the configuration file
[pnews.git] / war / src / main / java / pnews / NER.java
1 package pnews;
2
3 import java.io.IOException;
4 import java.util.ArrayList;
5 import java.util.List;
6 import java.util.logging.Logger;
7
8 import edu.stanford.nlp.ie.crf.CRFClassifier;
9 import edu.stanford.nlp.util.CoreMap;
10 import edu.stanford.nlp.util.Triple;
11 import pnews.servlet.Config;
12
13 /** https://stanfordnlp.github.io/CoreNLP/api.html */
14 public class NER {
15         private static final String CLASS_NAME = NER.class.getName();
16         private static final Logger LOG = Logger.getLogger(CLASS_NAME);
17         private static final CRFClassifier<CoreMap> classifier = CRFClassifier.getDefaultClassifier();
18         
19         public static List<String> classify(String str, List<String> entities, Config config) throws ClassCastException, ClassNotFoundException, IOException {
20                 
21                 List<Triple<String, Integer, Integer>> triples;
22                 String w;
23                 final String FUNCTION_NAME = "classify";                       
24                 
25                 LOG.entering(CLASS_NAME, FUNCTION_NAME, str);
26
27                 OpenNLP.classify(str, entities, config);
28                                 
29                 synchronized (classifier) {
30                         triples = classifier.classifyToCharacterOffsets(str);
31                 }
32                  
33                 for (Triple<String, Integer, Integer> t: triples) {
34                         w = str.substring(t.second, t.third);
35                         if (!config.isBlacklistedEntity(w) && !entities.contains(w))
36                                 entities.add(w);
37                 }
38                 
39                 LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);
40                 
41                 return entities;
42         }
43         
44         public static void main(String[] args) throws Exception {
45                 List<String> lst;
46                 
47                 lst = classify("I live in Washington and New York in United States.", new ArrayList<>(), new Config());
48                 for (String str: lst)
49                         System.out.println(str);
50         }
51 }