Skip to content

Commit 96400f9

Browse files
author
Rimi Das
committed
NLP3 main
1 parent 7324493 commit 96400f9

33 files changed

+14865
-0
lines changed
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package edu.buffalo.ir.proj4;
2+
3+
import java.util.ArrayList;
4+
import java.util.List;
5+
6+
import org.apache.solr.client.solrj.SolrQuery;
7+
import org.apache.solr.client.solrj.impl.HttpSolrServer;
8+
import org.apache.solr.client.solrj.response.QueryResponse;
9+
import org.apache.solr.common.SolrDocument;
10+
import org.apache.solr.common.SolrDocumentList;
11+
12+
import edu.buffalo.ir.proj4.nlpImpl.Answer;
13+
import edu.buffalo.ir.proj4.nlpImpl.AnswerFormulator;
14+
import edu.buffalo.ir.proj4.nlpImpl.ParserOutput;
15+
import edu.buffalo.ir.proj4.nlpImpl.QuestionParser;
16+
import edu.buffalo.ir.proj4.nlpInterface.AnswerFormulatorInterface;
17+
import edu.buffalo.ir.proj4.nlpInterface.QuestionParserInterface;
18+
19+
public class WelcomeAction {
20+
21+
private String question;
22+
public List<String> tweetList;
23+
private String finalAnswer;
24+
private String confidence;
25+
26+
27+
public String execute() throws Exception {
28+
tweetList = new ArrayList<String>();
29+
Answer ansObj = new Answer();
30+
//assuming question gives the keyword to be searched for
31+
//HttpSolrServer solr = new HttpSolrServer("http://localhost:8983/solr/test_core_5");
32+
QuestionParserInterface qIntf = new QuestionParser();
33+
ParserOutput pOut = qIntf.getQuery(question);
34+
// SolrQuery query = new SolrQuery();
35+
AnswerFormulatorInterface ansFormulated = new AnswerFormulator();
36+
System.out.println("here" +pOut.getAnswerType());
37+
// String queryString = "text_en:"+pOut.getQueryText();
38+
ansObj = ansFormulated.formulateAnswer(pOut);
39+
tweetList = ansObj.getTweetList();
40+
finalAnswer = ansObj.getAnswerString();
41+
confidence = ansObj.getConfidence();
42+
43+
// query.setQuery(queryString);
44+
// //query.addFilterQuery("cat:electronics","store:amazon.com");
45+
// query.setFields("id","price","merchant","cat","store");
46+
//query.set("q", "tweet_hashtags:Syria");
47+
//query.setStart(0);
48+
//query.set("defType", "edismax");
49+
/*
50+
QueryResponse response = solr.query(query);
51+
SolrDocumentList results = response.getResults();
52+
for (int i = 0; i < results.size(); ++i) {
53+
SolrDocument dd = results.get(i);
54+
tweetList.add((dd.getFieldValue("text_en")).toString());
55+
}
56+
*/
57+
58+
59+
return "success";
60+
}
61+
62+
63+
public String getFinalAnswer() {
64+
return finalAnswer;
65+
}
66+
67+
68+
public void setFinalAnswer(String finalAnswer) {
69+
this.finalAnswer = finalAnswer;
70+
}
71+
72+
73+
public String getConfidence() {
74+
return confidence;
75+
}
76+
77+
78+
public void setConfidence(String confidence) {
79+
this.confidence = confidence;
80+
}
81+
82+
83+
public String getQuestion() {
84+
return question;
85+
}
86+
87+
public void setQuestion(String question) {
88+
this.question = question;
89+
}
90+
91+
public List<String> getTweetList() {
92+
return tweetList;
93+
}
94+
95+
public void setTweetList(List<String> tweetList) {
96+
this.tweetList = tweetList;
97+
}
98+
99+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package edu.buffalo.ir.proj4.nlpImpl;
2+
3+
import java.util.List;
4+
5+
public class Answer {
6+
7+
List<String> tweetList;
8+
String answerString;
9+
String confidence;
10+
11+
public String getConfidence() {
12+
return confidence;
13+
}
14+
public void setConfidence(String confidence) {
15+
this.confidence = confidence;
16+
}
17+
public List<String> getTweetList() {
18+
return tweetList;
19+
}
20+
public void setTweetList(List<String> tweetList) {
21+
this.tweetList = tweetList;
22+
}
23+
public String getAnswerString() {
24+
return answerString;
25+
}
26+
public void setAnswerString(String answerString) {
27+
this.answerString = answerString;
28+
}
29+
30+
31+
32+
}
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
package edu.buffalo.ir.proj4.nlpImpl;
2+
import java.io.File;
3+
import java.io.IOException;
4+
import java.util.ArrayList;
5+
import java.util.Collections;
6+
import java.util.Comparator;
7+
import java.util.HashMap;
8+
import java.util.Iterator;
9+
import java.util.LinkedHashMap;
10+
import java.util.LinkedList;
11+
import java.util.List;
12+
import java.util.Map;
13+
import java.util.Map.Entry;
14+
import java.util.Set;
15+
import java.util.regex.Matcher;
16+
import java.util.regex.Pattern;
17+
18+
import org.json.JSONArray;
19+
import org.json.JSONException;
20+
import org.json.JSONObject;
21+
22+
import edu.buffalo.ir.proj4.nlpInterface.AnswerFormulatorInterface;
23+
import edu.buffalo.ir.proj4.nlpInterface.SolrQueryInterface;
24+
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
25+
import edu.stanford.nlp.ie.crf.CRFClassifier;
26+
import edu.stanford.nlp.ling.CoreLabel;
27+
public class AnswerFormulator implements AnswerFormulatorInterface{
28+
29+
String Label = "";
30+
SolrQueryInterface solrQueryInterface = new SolrQuery();
31+
AbstractSequenceClassifier<CoreLabel> classifier;
32+
static Matcher m ;
33+
/*public static void main(String[] args) throws IOException, ClassCastException, ClassNotFoundException {
34+
AnswerFormulator af = new AnswerFormulator();
35+
ParserOutput pOutputObj = new ParserOutput();
36+
pOutputObj.setAnswerType("PERSON");
37+
pOutputObj.setQueryText("minister India");
38+
af.formulateAnswer(pOutputObj);
39+
}*/
40+
41+
public AnswerFormulator() throws ClassNotFoundException, ClassCastException, IOException
42+
{
43+
44+
ClassLoader classLoader = getClass().getClassLoader();
45+
File file = new File(classLoader.getResource(("edu/buffalo/ir/proj4/nlpImpl/english.all.3class.distsim.crf.ser.gz")).getFile());
46+
System.out.println(file.getPath());
47+
classifier = CRFClassifier.getClassifier(file.getPath());
48+
}
49+
50+
public Answer formulateAnswer(ParserOutput pOutputObj) throws IOException, ClassCastException, ClassNotFoundException
51+
{
52+
53+
54+
Label = pOutputObj.getAnswerType();
55+
System.out.println("label =========="+Label);
56+
HashMap<String, Integer> answerMap = new HashMap<String, Integer>();
57+
58+
59+
String final_answer = "No definite answer found";
60+
String jsonString = solrQueryInterface.callURL(pOutputObj.getQueryText());
61+
//System.out.println("\n\njsonString: " + jsonString);
62+
63+
// String serializedClassifier = "C:\\Users\\Raman\\Documents\\NetBeansProjects\\IRproject4\\src\\irproject4\\classifiers\\english.all.3class.distsim.crf.ser.gz";
64+
// AbstractSequenceClassifier<CoreLabel> classifier = CRFClassifier.getClassifier(serializedClassifier);
65+
66+
ArrayList<String> tweet_text = new ArrayList<String>();
67+
ArrayList<String> final_tweet_text = new ArrayList<String>();
68+
// Replace this try catch block for all below subsequent examples
69+
try {
70+
//JSONObject opoutput=jsonString
71+
JSONArray jsonArray = new JSONArray("[" + jsonString + "]");
72+
//System.out.println("\n\njsonArrayelementOne: " + jsonArray.length());
73+
//System.out.println(jsonArray.getJSONObject(0));
74+
JSONObject first_elementObjects = jsonArray.getJSONObject(0);
75+
//System.out.println(first_elementObjects.getJSONObject("response"));
76+
JSONObject second_elementObjects = first_elementObjects.getJSONObject("response");
77+
//System.out.println(second_elementObjects.getJSONArray("docs"));
78+
JSONArray docs = second_elementObjects.getJSONArray("docs");
79+
80+
for (int i = 0; i < docs.length(); i++) {
81+
82+
// if both event_values and event_codes are of equal length
83+
JSONObject tweets = (JSONObject) docs.get(i);
84+
// System.out.println("tweets ==== "+ tweets);
85+
// System.out.println("text : "+tweets.getString("tweet_text"));
86+
//System.out.println(tweets.optJSONArray("tweet_text").getString(0));
87+
tweet_text.add(tweets.optJSONArray("tweet_text").getString(0).trim());
88+
//System.out.println(docs.get(i));
89+
//valueList.add(event_codes.getString(i));
90+
//displayList.add(event_values.getString(i));
91+
}
92+
} catch (JSONException e) {
93+
e.printStackTrace();
94+
}
95+
96+
if ("DEFINITION".equals(Label)) {
97+
String split_query[] = pOutputObj.getQueryText().split("\\s+");
98+
String entity = split_query[0].toLowerCase();
99+
// String entity = "demonetisation"; // make this dynamic
100+
int StrCounter = 0;
101+
for (String str_tweet : tweet_text) {
102+
//String regex = "^" + entity + "+[A-Za-z,;'\"\\s]+[.?!]$";
103+
//String regex = "(donald trump,)[A-Za-z,;'\"\\s]+[.?!]";
104+
String regex = "(" + entity + ",)[A-Za-z,;'\"\\s]+[.?!]";
105+
//System.out.println(regex);
106+
Pattern p = Pattern.compile(regex);
107+
Matcher m = p.matcher(str_tweet.toLowerCase());
108+
StrCounter++;
109+
if (m.find() && StrCounter==1) {
110+
System.out.println(m.group());
111+
final_answer = m.group();
112+
}
113+
114+
}
115+
} else {
116+
for (String str_tweet : tweet_text) {
117+
String categorized_text = classifier.classifyToString(str_tweet.trim(), "tabbedEntities", false);
118+
//System.out.println(categorized_text);
119+
String split_text[] = categorized_text.split("\\r\\n|\\n|\\r");
120+
121+
for (String line : split_text) {
122+
if (!line.isEmpty()) {
123+
String[] splited_WhiteSpace = line.split("\\t");
124+
String name = "";
125+
String label = "";
126+
name = splited_WhiteSpace[0];
127+
label = splited_WhiteSpace[1];
128+
if (!name.isEmpty()) {
129+
if (Label.equals(label)) {
130+
if (answerMap.containsKey(name.toLowerCase())) {
131+
int val = answerMap.get(name.toLowerCase());
132+
val++;
133+
answerMap.put(name.toLowerCase(), val);
134+
} else {
135+
answerMap.put(name.toLowerCase(), 1);
136+
}
137+
}
138+
}
139+
}
140+
}
141+
}
142+
//}
143+
144+
//}
145+
}
146+
Map<String, Integer> sorted_answerMap = sortByValues(answerMap);
147+
148+
Set<Entry<String, Integer>> answerSet = sorted_answerMap.entrySet();
149+
150+
String answer_conf = "";
151+
Iterator iAnswerset = answerSet.iterator();
152+
String answer ="";
153+
int topTweetCounter = 0;
154+
for (String str_tweet : tweet_text) {
155+
//System.out.println("Item is: " + str_tweet);
156+
//if(topTweetCounter<=5 && (!str_tweet.contains("AIB") && !str_tweet.contains("Aib")))
157+
if(topTweetCounter<=5)
158+
{
159+
final_tweet_text.add(str_tweet);
160+
topTweetCounter++;
161+
}
162+
else if(topTweetCounter>5)
163+
break;
164+
165+
}
166+
if (!answerSet.isEmpty() )
167+
{
168+
int mapCounter = 0;
169+
while (iAnswerset.hasNext() && (mapCounter < 1)) {
170+
Map.Entry<String, Integer> conceptMe = (Map.Entry<String, Integer>) iAnswerset.next();
171+
answer = conceptMe.getKey().toString();
172+
answer_conf = conceptMe.getValue().toString();
173+
final_answer = convert_each_letter_to_uppercase(answer);
174+
System.out.println("Answer : " + final_answer + " " + " ; Confidence : " + answer_conf);
175+
if (mapCounter <= 5) {
176+
// System.exit(-1);
177+
mapCounter++;
178+
179+
}
180+
181+
}
182+
}
183+
Answer ans = new Answer();
184+
ans.setTweetList(final_tweet_text);
185+
ans.setAnswerString(final_answer);
186+
int conf = 0;
187+
if(null!= answer_conf && ""!= answer_conf)
188+
conf = Integer.parseInt(answer_conf);
189+
if( conf > 100)
190+
answer_conf = "100";
191+
ans.setConfidence(answer_conf);
192+
return ans;
193+
194+
}
195+
196+
public static String convert_each_letter_to_uppercase(String str) {
197+
198+
StringBuffer stringbf = new StringBuffer();
199+
m = Pattern.compile("([a-z])([a-z]*)",
200+
Pattern.CASE_INSENSITIVE).matcher(str);
201+
while (m.find()) {
202+
m.appendReplacement(stringbf,
203+
m.group(1).toUpperCase() + m.group(2).toLowerCase());
204+
}
205+
return m.appendTail(stringbf).toString();
206+
}
207+
208+
private static HashMap<String, Integer> sortByValues(HashMap<String, Integer> map) {
209+
List list = new LinkedList(map.entrySet());
210+
// Defined Custom Comparator here
211+
Collections.sort(list, new Comparator() {
212+
public int compare(Object o1, Object o2) {
213+
return ((Comparable) ((Map.Entry) (o2)).getValue())
214+
.compareTo(((Map.Entry) (o1)).getValue());
215+
}
216+
});
217+
// Here I am copying the sorted list in HashMap
218+
// using LinkedHashMap to preserve the insertion order
219+
HashMap<String, Integer> sortedHashMap = new LinkedHashMap<String, Integer>();
220+
for (Iterator it = list.iterator(); it.hasNext();) {
221+
Map.Entry<String, Integer> entry = (Map.Entry) it.next();
222+
sortedHashMap.put(entry.getKey(), entry.getValue());
223+
}
224+
return sortedHashMap;
225+
}
226+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package edu.buffalo.ir.proj4.nlpImpl;
2+
3+
public class ParserOutput {
4+
String answerType;
5+
String queryText;
6+
public String getAnswerType() {
7+
return answerType;
8+
}
9+
public void setAnswerType(String answerType) {
10+
this.answerType = answerType;
11+
}
12+
public String getQueryText() {
13+
return queryText;
14+
}
15+
public void setQueryText(String queryText) {
16+
this.queryText = queryText;
17+
}
18+
19+
20+
}

0 commit comments

Comments
 (0)