do classification in //
[pnews.git] / war / src / main / java / pnews / NER.java
1 package pnews;
2
3 import java.io.IOException;
4 import java.util.ArrayList;
5 import java.util.List;
6 import java.util.logging.Logger;
7
8 import edu.stanford.nlp.ie.crf.CRFClassifier;
9 import edu.stanford.nlp.ling.CoreLabel;
10 import edu.stanford.nlp.util.Triple;
11
12 /** https://stanfordnlp.github.io/CoreNLP/api.html */
13 public class NER {
14         private static final String CLASS_NAME = NER.class.getName();
15         private static final Logger LOG = Logger.getLogger(CLASS_NAME);
16         private static final ThreadLocal<CRFClassifier<CoreLabel>> classifier = new ThreadLocal<CRFClassifier<CoreLabel>>() {
17                 @Override 
18                 protected CRFClassifier<CoreLabel> initialValue() {
19                         return CRFClassifier.getDefaultClassifier();
20                 }
21         };
22         
23         public static List<String> classify(String str, List<String> entities) throws ClassCastException, ClassNotFoundException, IOException {
24                 
25                 List<Triple<String, Integer, Integer>> triples;
26                 String w;
27                 final String FUNCTION_NAME = "classify";                       
28                 
29                 LOG.entering(CLASS_NAME, FUNCTION_NAME, str);
30
31                 OpenNLP.classify(str, entities);
32                                 
33                 synchronized (classifier) {
34                         triples = classifier.get().classifyToCharacterOffsets(str);
35                         for (Triple<String, Integer, Integer> t: triples) {
36                                 w = str.substring(t.second, t.third);
37                                 if (!entities.contains(w))
38                                         entities.add(w);
39                         }
40                 }
41                 
42                 entities.remove("CNET");
43                 entities.remove("Read More");
44                 entities.remove("New");
45                 entities.remove("App");
46                 
47                 LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);
48                 
49                 return entities;
50         }
51         
52         public static void main(String[] args) throws Exception {
53                 List<String> lst;
54                 
55                 lst = classify("I live in Washington and New York in United States.", new ArrayList<>());
56                 for (String str: lst)
57                         System.out.println(str);
58         }
59 }