projects
/
pnews.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
blacklisted entities are now in the configuration file
[pnews.git]
/
war
/
src
/
main
/
java
/
pnews
/
NER.java
diff --git
a/war/src/main/java/pnews/NER.java
b/war/src/main/java/pnews/NER.java
index
2745868
..
2055cf1
100644
(file)
--- a/
war/src/main/java/pnews/NER.java
+++ b/
war/src/main/java/pnews/NER.java
@@
-8,6
+8,7
@@
import java.util.logging.Logger;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Triple;
+import pnews.servlet.Config;
/** https://stanfordnlp.github.io/CoreNLP/api.html */
public class NER {
/** https://stanfordnlp.github.io/CoreNLP/api.html */
public class NER {
@@
-15,7
+16,7
@@
public class NER {
private static final Logger LOG = Logger.getLogger(CLASS_NAME);
private static final CRFClassifier<CoreMap> classifier = CRFClassifier.getDefaultClassifier();
private static final Logger LOG = Logger.getLogger(CLASS_NAME);
private static final CRFClassifier<CoreMap> classifier = CRFClassifier.getDefaultClassifier();
- public static List<String> classify(String str, List<String> entities) throws ClassCastException, ClassNotFoundException, IOException {
+ public static List<String> classify(String str, List<String> entities
, Config config
) throws ClassCastException, ClassNotFoundException, IOException {
List<Triple<String, Integer, Integer>> triples;
String w;
List<Triple<String, Integer, Integer>> triples;
String w;
@@
-23,7
+24,7
@@
public class NER {
LOG.entering(CLASS_NAME, FUNCTION_NAME, str);
LOG.entering(CLASS_NAME, FUNCTION_NAME, str);
- OpenNLP.classify(str, entities);
+ OpenNLP.classify(str, entities
, config
);
synchronized (classifier) {
triples = classifier.classifyToCharacterOffsets(str);
synchronized (classifier) {
triples = classifier.classifyToCharacterOffsets(str);
@@
-31,16
+32,10
@@
public class NER {
for (Triple<String, Integer, Integer> t: triples) {
w = str.substring(t.second, t.third);
for (Triple<String, Integer, Integer> t: triples) {
w = str.substring(t.second, t.third);
- if (!entities.contains(w))
+ if (!
config.isBlacklistedEntity(w) && !
entities.contains(w))
entities.add(w);
}
entities.add(w);
}
- entities.remove("CNET");
- entities.remove("Read More");
- entities.remove("New");
- entities.remove("App");
- entities.remove("Digital Trends");
-
LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);
return entities;
LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);
return entities;
@@
-49,7
+44,7
@@
public class NER {
public static void main(String[] args) throws Exception {
List<String> lst;
public static void main(String[] args) throws Exception {
List<String> lst;
- lst = classify("I live in Washington and New York in United States.", new ArrayList<>());
+ lst = classify("I live in Washington and New York in United States.", new ArrayList<>()
, new Config()
);
for (String str: lst)
System.out.println(str);
}
for (String str: lst)
System.out.println(str);
}