added support of entity aliases
authorJean-Philippe Orsini <orsinije@fr.ibm.com>
Sat, 4 Nov 2017 23:13:41 +0000 (00:13 +0100)
committerJean-Philippe Orsini <orsinije@fr.ibm.com>
Sat, 4 Nov 2017 23:13:41 +0000 (00:13 +0100)
war/src/main/java/pnews/NER.java
war/src/main/java/pnews/OpenNLP.java
war/src/main/java/pnews/servlet/Config.java
war/src/main/resources/feeds.json

index 2055cf1..5e7ce29 100644 (file)
@@ -33,7 +33,7 @@ public class NER {
                 for (Triple<String, Integer, Integer> t: triples) {
                         w = str.substring(t.second, t.third);
                         if (!config.isBlacklistedEntity(w) && !entities.contains(w))
-                                entities.add(w);
+                                entities.add(config.getEntityAlias(w));
                 }
                 
                 LOG.exiting(CLASS_NAME, FUNCTION_NAME, entities);
index c383cee..e158a00 100644 (file)
@@ -97,7 +97,7 @@ public class OpenNLP {
                                 
                         LOG.finest(entity + " " + s.getProb() + " " + s.toString());
                         if (!config.isBlacklistedEntity(entity) && !entities.contains(entity))
-                                entities.add(entity);
+                                entities.add(config.getEntityAlias(entity));
                 }
                 
                 return entities;
index fec3770..46e75f6 100644 (file)
@@ -28,6 +28,7 @@ public class Config {
         private Category[] categories;
         private Language[] languages;
         private final Set<String> blacklistedEntities = new HashSet<>();
+        private final HashMap<String, String> entityAliases = new HashMap<>();
         private static final String CLASS_NAME = Config.class.getName();
         
         /**
@@ -83,13 +84,13 @@ public class Config {
         }
         
         private void loadEntities(JsonObject jroot) {
-                JsonObject jentities;
+                JsonObject jentities, jaliases;
                 JsonArray jblacklist;
                 final String METHOD_NAME = "loadEntities";
                 
                 jentities = jroot.getJsonObject("entities");
-                jblacklist = jentities.getJsonArray("blacklist");
                 
+                jblacklist = jentities.getJsonArray("blacklist");                
                 jblacklist.forEach((jv)-> {
                         JsonString js;
                         
@@ -97,7 +98,28 @@ public class Config {
                         blacklistedEntities.add(js.getString());
                 });
                 
+                jaliases = jentities.getJsonObject("aliases");
+                jaliases.forEach((k, v)-> {
+                        JsonArray jsources = (JsonArray)v;
+                        
+                        jsources.forEach((jsource)-> {
+                                entityAliases.put(((JsonString)jsource).getString(), k);
+                        });
+                });
+                
                 LOG.logp(Level.FINEST, CLASS_NAME, METHOD_NAME, " blacklistedEntities=" + blacklistedEntities);
+                LOG.logp(Level.FINEST, CLASS_NAME, METHOD_NAME, " entityAliases=" + entityAliases);
+        }
+        
+        public String getEntityAlias(String entity) {
+                String result;
+                
+                result = entityAliases.get(entity);
+                
+                if (result == null)
+                        return entity;
+                else
+                        return result;
         }
         
         public void loadConfig() throws UnsupportedEncodingException {
index 7ec9e49..337e19a 100644 (file)
                         "Digital Trends",
                         "Joey Sneddon",
                         "CA"
-                ]
+                ],
+                "aliases": {
+                        "U.S.": ["United States", "US"]
+                }
         }
 }