From 1dd8560d3b970d6f9eb18843a03bd51418b1844f Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Wed, 19 Oct 2016 18:04:33 -0700 Subject: [PATCH 01/16] changed version to v1 --- language/movie_nl/main.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/language/movie_nl/main.py b/language/movie_nl/main.py index ba5c63b60b9..c85a22c192e 100644 --- a/language/movie_nl/main.py +++ b/language/movie_nl/main.py @@ -24,6 +24,9 @@ from oauth2client.client import GoogleCredentials import requests +# TODO REMOVE - when discovery is public +DISCOVERY_URL = ('https://language.googleapis.com/$discovery/rest?' + 'version=v1&labels=GOOGLE_INTERNAL') def analyze_document(service, document): """Analyze the document and get the distribution of sentiments and @@ -98,7 +101,7 @@ def extract_all_sentences(self, service): docs = service.documents() request_body = get_request_body( self.text, - syntax=True, + syntax=False, entities=True, sentiment=False) request = docs.annotateText(body=request_body) @@ -319,8 +322,9 @@ def get_service(): credentials = GoogleCredentials.get_application_default() - return discovery.build('language', 'v1beta1', - credentials=credentials) + return discovery.build('language', 'v1', + credentials=credentials, + discoveryServiceUrl=DISCOVERY_URL) def analyze(input_dir, sentiment_writer, entity_writer, sample, log_file): From 2753f2faabcaab438e77a9fd640e1ea33c1c6974 Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Mon, 24 Oct 2016 13:37:41 -0700 Subject: [PATCH 02/16] made changes to main for api changes --- language/movie_nl/main.py | 46 +++++++-------------------------------- 1 file changed, 8 insertions(+), 38 deletions(-) diff --git a/language/movie_nl/main.py b/language/movie_nl/main.py index c85a22c192e..679da5cd305 100644 --- a/language/movie_nl/main.py +++ b/language/movie_nl/main.py @@ -33,10 +33,7 @@ def analyze_document(service, document): the movie name.""" logging.info('Analyzing {}'.format(document.doc_id)) - sentences, entities = document.extract_all_sentences(service) - - sentiments = [get_sentiment(service, sentence) for sentence in sentences] - + sentiments, entities = document.extract_sentiment_entities(service) return sentiments, entities @@ -59,29 +56,6 @@ def get_request_body(text, syntax=True, entities=True, sentiment=True): return body -def get_sentiment(service, sentence): - """Get the sentence-level sentiment.""" - body = get_request_body( - sentence, syntax=False, entities=True, sentiment=True) - - docs = service.documents() - request = docs.annotateText(body=body) - - response = request.execute(num_retries=3) - - sentiment = response.get('documentSentiment') - - if sentiment is None: - return (None, None) - else: - pol = sentiment.get('polarity') - mag = sentiment.get('magnitude') - - if pol is None and mag is not None: - pol = 0 - return (pol, mag) - - class Document(object): """Document class captures a single document of movie reviews.""" @@ -89,13 +63,13 @@ def __init__(self, text, doc_id, doc_path): self.text = text self.doc_id = doc_id self.doc_path = doc_path - self.sentence_entity_pair = None + self.sentiment_entity_pair = None self.label = None - def extract_all_sentences(self, service): + def extract_sentiment_entities(self, service): """Extract the sentences in a document.""" - if self.sentence_entity_pair is not None: + if self.sentiment_entity_pair is not None: return self.sentence_entity_pair docs = service.documents() @@ -103,18 +77,14 @@ def extract_all_sentences(self, service): self.text, syntax=False, entities=True, - sentiment=False) + sentiment=True) request = docs.annotateText(body=request_body) ent_list = [] response = request.execute() entities = response.get('entities', []) - sentences = response.get('sentences', []) - - sent_list = [ - sentence.get('text', {}).get('content') for sentence in sentences - ] + documentSentiment = response.get('documentSentiment', {}) for entity in entities: ent_type = entity.get('type') @@ -123,9 +93,9 @@ def extract_all_sentences(self, service): if ent_type == 'PERSON' and wiki_url is not None: ent_list.append(wiki_url) - self.sentence_entity_pair = (sent_list, ent_list) + self.sentiment_entity_pair = (documentSentiment, ent_list) - return self.sentence_entity_pair + return self.sentiment_entity_pair def to_sentiment_json(doc_id, sent, label): From 1e8f574f83703f8625483082c042f8781e53ecbb Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Wed, 26 Oct 2016 12:07:33 -0700 Subject: [PATCH 03/16] added score to get sentiment --- language/movie_nl/main.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/language/movie_nl/main.py b/language/movie_nl/main.py index 679da5cd305..d73550f1e3f 100644 --- a/language/movie_nl/main.py +++ b/language/movie_nl/main.py @@ -21,6 +21,7 @@ from googleapiclient import discovery from googleapiclient.errors import HttpError +import httplib2 from oauth2client.client import GoogleCredentials import requests @@ -173,18 +174,9 @@ def get_sentiment_entities(service, document): """ sentiments, entities = analyze_document(service, document) + score = sentiments.get('score') - sentiments = [sent for sent in sentiments if sent[0] is not None] - negative_sentiments = [ - polarity for polarity, magnitude in sentiments if polarity < 0.0] - positive_sentiments = [ - polarity for polarity, magnitude in sentiments if polarity > 0.0] - - negative = sum(negative_sentiments) - positive = sum(positive_sentiments) - total = positive + negative - - return (total, entities) + return (score, entities) def get_sentiment_label(sentiment): @@ -288,12 +280,15 @@ def rank_entities(reader, sentiment=None, topn=None, reverse_bool=False): def get_service(): - """Build a client to the Google Cloud Natural Language API.""" + """"Build a client to the Google Cloud Natural Language API.""" credentials = GoogleCredentials.get_application_default() - + scoped_credentials = credentials.create_scoped( + ['https://www.googleapis.com/auth/cloud-platform']) + http = httplib2.Http() + scoped_credentials.authorize(http) return discovery.build('language', 'v1', - credentials=credentials, + http=http, discoveryServiceUrl=DISCOVERY_URL) From 1a61c1517cf62d3df1cacda70f09cf03a249f1c5 Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Wed, 26 Oct 2016 12:10:06 -0700 Subject: [PATCH 04/16] made fixes to unit test --- language/movie_nl/main_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/language/movie_nl/main_test.py b/language/movie_nl/main_test.py index 8e22a1da34e..9a87e2f368e 100644 --- a/language/movie_nl/main_test.py +++ b/language/movie_nl/main_test.py @@ -69,10 +69,10 @@ def test_process_movie_reviews(): entities = [json.loads(entity) for entity in entities] # assert sentiments - assert sentiments[0].get('sentiment') == 1.0 + assert sentiments[0].get('sentiment') == 0.9 assert sentiments[0].get('label') == 1 - assert sentiments[1].get('sentiment') == 1.0 + assert sentiments[1].get('sentiment') == 0.8 assert sentiments[1].get('label') == 1 # assert entities @@ -80,7 +80,7 @@ def test_process_movie_reviews(): assert entities[0].get('name') == 'Tom Cruise' assert (entities[0].get('wiki_url') == 'http://en.wikipedia.org/wiki/Tom_Cruise') - assert entities[0].get('sentiment') == 2.0 + assert entities[0].get('sentiment') == 1.7 def test_rank_positive_entities(capsys): From f9aa66cad5b2724427a7c8cc8964de28c81e0a60 Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Mon, 7 Nov 2016 14:14:53 -0800 Subject: [PATCH 05/16] made changes to ocr_nl sample to point to v1 --- language/ocr_nl/main.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/language/ocr_nl/main.py b/language/ocr_nl/main.py index 6e329f53386..a649b600863 100755 --- a/language/ocr_nl/main.py +++ b/language/ocr_nl/main.py @@ -113,12 +113,19 @@ class TextAnalyzer(object): """Construct and use the Google Natural Language API service.""" def __init__(self, db_filename=None): + # TODO REMOVE - when discovery is public + DISCOVERY_URL = ('https://language.googleapis.com/$discovery/rest?' + 'version=v1&labels=GOOGLE_INTERNAL') + + credentials = GoogleCredentials.get_application_default() scoped_credentials = credentials.create_scoped( - ['https://www.googleapis.com/auth/cloud-platform']) + ['https://www.googleapis.com/auth/cloud-platform']) http = httplib2.Http() scoped_credentials.authorize(http) - self.service = discovery.build('language', 'v1beta1', http=http) + self.service = discovery.build('language', 'v1', + http=http, + discoveryServiceUrl=DISCOVERY_URL) # This list will store the entity information gleaned from the # image files. From 3eb84979e70866ce45da7f53d9d074a24af9a34f Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Thu, 10 Nov 2016 11:15:55 -0800 Subject: [PATCH 06/16] added two lines at the start of function --- language/movie_nl/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/language/movie_nl/main.py b/language/movie_nl/main.py index d73550f1e3f..b0c396f9156 100644 --- a/language/movie_nl/main.py +++ b/language/movie_nl/main.py @@ -29,6 +29,7 @@ DISCOVERY_URL = ('https://language.googleapis.com/$discovery/rest?' 'version=v1&labels=GOOGLE_INTERNAL') + def analyze_document(service, document): """Analyze the document and get the distribution of sentiments and the movie name.""" From 5287e472bcd3c078555fc804a3f626f4fd1dd05e Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Thu, 10 Nov 2016 13:33:38 -0800 Subject: [PATCH 07/16] removed extra double quote --- language/movie_nl/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/language/movie_nl/main.py b/language/movie_nl/main.py index b0c396f9156..f3f29871cd5 100644 --- a/language/movie_nl/main.py +++ b/language/movie_nl/main.py @@ -281,7 +281,7 @@ def rank_entities(reader, sentiment=None, topn=None, reverse_bool=False): def get_service(): - """"Build a client to the Google Cloud Natural Language API.""" + """Build a client to the Google Cloud Natural Language API.""" credentials = GoogleCredentials.get_application_default() scoped_credentials = credentials.create_scoped( From c3f44ab884a9b6a64ac8250769e33490f7766eff Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Sat, 12 Nov 2016 10:16:41 -0800 Subject: [PATCH 08/16] fixed lint errors --- language/ocr_nl/main.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/language/ocr_nl/main.py b/language/ocr_nl/main.py index a649b600863..70dc95a20a0 100755 --- a/language/ocr_nl/main.py +++ b/language/ocr_nl/main.py @@ -117,15 +117,14 @@ def __init__(self, db_filename=None): DISCOVERY_URL = ('https://language.googleapis.com/$discovery/rest?' 'version=v1&labels=GOOGLE_INTERNAL') - credentials = GoogleCredentials.get_application_default() scoped_credentials = credentials.create_scoped( ['https://www.googleapis.com/auth/cloud-platform']) http = httplib2.Http() scoped_credentials.authorize(http) self.service = discovery.build('language', 'v1', - http=http, - discoveryServiceUrl=DISCOVERY_URL) + http=http, + discoveryServiceUrl=DISCOVERY_URL) # This list will store the entity information gleaned from the # image files. From 884f431b6b9d2e3bf5b852e76fd5171d18f07176 Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Mon, 14 Nov 2016 11:42:47 -0800 Subject: [PATCH 09/16] removed discovery url --- language/movie_nl/main.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/language/movie_nl/main.py b/language/movie_nl/main.py index f3f29871cd5..d6ef5d169a5 100644 --- a/language/movie_nl/main.py +++ b/language/movie_nl/main.py @@ -25,10 +25,6 @@ from oauth2client.client import GoogleCredentials import requests -# TODO REMOVE - when discovery is public -DISCOVERY_URL = ('https://language.googleapis.com/$discovery/rest?' - 'version=v1&labels=GOOGLE_INTERNAL') - def analyze_document(service, document): """Analyze the document and get the distribution of sentiments and @@ -290,7 +286,7 @@ def get_service(): scoped_credentials.authorize(http) return discovery.build('language', 'v1', http=http, - discoveryServiceUrl=DISCOVERY_URL) + credentials=credentials) def analyze(input_dir, sentiment_writer, entity_writer, sample, log_file): From 531b256462777b27422c66e9f6857d9b884f6cc3 Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Mon, 14 Nov 2016 11:43:47 -0800 Subject: [PATCH 10/16] removed discovery url --- language/ocr_nl/main.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/language/ocr_nl/main.py b/language/ocr_nl/main.py index 70dc95a20a0..03fbdf9d458 100755 --- a/language/ocr_nl/main.py +++ b/language/ocr_nl/main.py @@ -113,10 +113,6 @@ class TextAnalyzer(object): """Construct and use the Google Natural Language API service.""" def __init__(self, db_filename=None): - # TODO REMOVE - when discovery is public - DISCOVERY_URL = ('https://language.googleapis.com/$discovery/rest?' - 'version=v1&labels=GOOGLE_INTERNAL') - credentials = GoogleCredentials.get_application_default() scoped_credentials = credentials.create_scoped( ['https://www.googleapis.com/auth/cloud-platform']) @@ -124,7 +120,7 @@ def __init__(self, db_filename=None): scoped_credentials.authorize(http) self.service = discovery.build('language', 'v1', http=http, - discoveryServiceUrl=DISCOVERY_URL) + credentials=credentials) # This list will store the entity information gleaned from the # image files. From 0df605cd961d797a5a26da6834ef6063a56d8749 Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Mon, 14 Nov 2016 14:09:14 -0800 Subject: [PATCH 11/16] fixed sentiment tutorial --- language/sentiment/sentiment_analysis.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/language/sentiment/sentiment_analysis.py b/language/sentiment/sentiment_analysis.py index 8e250881305..9d2e4e4600d 100644 --- a/language/sentiment/sentiment_analysis.py +++ b/language/sentiment/sentiment_analysis.py @@ -22,7 +22,7 @@ def main(movie_review_filename): '''Run a sentiment analysis request on text within a passed filename.''' credentials = GoogleCredentials.get_application_default() - service = discovery.build('language', 'v1beta1', credentials=credentials) + service = discovery.build('language', 'v1', credentials=credentials) with open(movie_review_filename, 'r') as review_file: service_request = service.documents().analyzeSentiment( @@ -35,11 +35,11 @@ def main(movie_review_filename): ) response = service_request.execute() - polarity = response['documentSentiment']['polarity'] + score = response['documentSentiment']['score'] magnitude = response['documentSentiment']['magnitude'] - print('Sentiment: polarity of {} with magnitude of {}'.format( - polarity, magnitude)) + print('Sentiment: score of {} with magnitude of {}'.format( + score, magnitude)) return 0 From 6d25d408c699e759867b2840e4e7444a81d88540 Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Mon, 14 Nov 2016 14:10:25 -0800 Subject: [PATCH 12/16] fixed tests --- language/sentiment/sentiment_analysis_test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/language/sentiment/sentiment_analysis_test.py b/language/sentiment/sentiment_analysis_test.py index d6b6a7abfea..ff28211944e 100644 --- a/language/sentiment/sentiment_analysis_test.py +++ b/language/sentiment/sentiment_analysis_test.py @@ -18,25 +18,25 @@ def test_pos(resource, capsys): main(resource('pos.txt')) out, err = capsys.readouterr() - polarity = float(re.search('polarity of (.+?) with', out).group(1)) + score = float(re.search('score of (.+?) with', out).group(1)) magnitude = float(re.search('magnitude of (.+?)', out).group(1)) - assert polarity * magnitude > 0 + assert score * magnitude > 0 def test_neg(resource, capsys): main(resource('neg.txt')) out, err = capsys.readouterr() - polarity = float(re.search('polarity of (.+?) with', out).group(1)) + score = float(re.search('score of (.+?) with', out).group(1)) magnitude = float(re.search('magnitude of (.+?)', out).group(1)) - assert polarity * magnitude < 0 + assert score * magnitude < 0 def test_mixed(resource, capsys): main(resource('mixed.txt')) out, err = capsys.readouterr() - polarity = float(re.search('polarity of (.+?) with', out).group(1)) - assert polarity <= 0.3 - assert polarity >= -0.3 + score = float(re.search('score of (.+?) with', out).group(1)) + assert score <= 0.3 + assert score >= -0.3 def test_neutral(resource, capsys): From 9b5c64b7483e4cbf443d6a0b80252e6774354226 Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Mon, 14 Nov 2016 14:32:03 -0800 Subject: [PATCH 13/16] added sentence level sentiment --- language/sentiment/sentiment_analysis.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/language/sentiment/sentiment_analysis.py b/language/sentiment/sentiment_analysis.py index 9d2e4e4600d..01bffd76d27 100644 --- a/language/sentiment/sentiment_analysis.py +++ b/language/sentiment/sentiment_analysis.py @@ -38,6 +38,16 @@ def main(movie_review_filename): score = response['documentSentiment']['score'] magnitude = response['documentSentiment']['magnitude'] + for i, sentence in enumerate(response['sentences']): + sentence_sentiment = sentence['sentiment']['score'] + print('Sentence {} has a sentiment score of {}'.format(i, sentence_sentiment)) + + print('Overall Sentiment: score of {} with magnitude of {}'.format( + score, + magnitude) + ) + return 0 + print('Sentiment: score of {} with magnitude of {}'.format( score, magnitude)) return 0 From affaf19e29e5c6617457ec31680146ed6eded223 Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Mon, 14 Nov 2016 17:11:22 -0800 Subject: [PATCH 14/16] fixed lint error --- language/sentiment/sentiment_analysis.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/language/sentiment/sentiment_analysis.py b/language/sentiment/sentiment_analysis.py index 01bffd76d27..31a8b88c0c0 100644 --- a/language/sentiment/sentiment_analysis.py +++ b/language/sentiment/sentiment_analysis.py @@ -40,7 +40,9 @@ def main(movie_review_filename): for i, sentence in enumerate(response['sentences']): sentence_sentiment = sentence['sentiment']['score'] - print('Sentence {} has a sentiment score of {}'.format(i, sentence_sentiment)) + print('Sentence {} has a sentiment score of {}'.format( + i, + sentence_sentiment)) print('Overall Sentiment: score of {} with magnitude of {}'.format( score, From 0c60cb595841ee63a7f7056ea5722511ff0135f5 Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Mon, 14 Nov 2016 17:22:26 -0800 Subject: [PATCH 15/16] added sentiment fix --- language/movie_nl/main_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/language/movie_nl/main_test.py b/language/movie_nl/main_test.py index 9a87e2f368e..354e074ffca 100644 --- a/language/movie_nl/main_test.py +++ b/language/movie_nl/main_test.py @@ -72,7 +72,7 @@ def test_process_movie_reviews(): assert sentiments[0].get('sentiment') == 0.9 assert sentiments[0].get('label') == 1 - assert sentiments[1].get('sentiment') == 0.8 + assert sentiments[1].get('sentiment') == 0.9 assert sentiments[1].get('label') == 1 # assert entities From a63ab55c94dbeb34412b44ba6cb004b45be947d6 Mon Sep 17 00:00:00 2001 From: Puneith Kaul Date: Mon, 14 Nov 2016 17:28:14 -0800 Subject: [PATCH 16/16] fixed tests --- language/movie_nl/main_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/language/movie_nl/main_test.py b/language/movie_nl/main_test.py index 354e074ffca..74c62eb382a 100644 --- a/language/movie_nl/main_test.py +++ b/language/movie_nl/main_test.py @@ -80,7 +80,7 @@ def test_process_movie_reviews(): assert entities[0].get('name') == 'Tom Cruise' assert (entities[0].get('wiki_url') == 'http://en.wikipedia.org/wiki/Tom_Cruise') - assert entities[0].get('sentiment') == 1.7 + assert entities[0].get('sentiment') == 1.8 def test_rank_positive_entities(capsys):