GoogleCloudPlatform · jerjou · Jul 20, 2016 · Apr 6, 2016 · Jul 20, 2016 · Jul 20, 2016
diff --git a/.coveragerc b/.coveragerc
@@ -9,6 +9,7 @@ include =
     dns/*
     datastore/*
     error_reporting/*
+    language/*
     managed_vms/*
     monitoring/*
     speech/*

diff --git a/conftest.py b/conftest.py
@@ -15,9 +15,10 @@
 import os
 
 import pytest
+import requests
 
 
-class Namespace:
+class Namespace(object):
     def __init__(self, **kwargs):
         self.__dict__.update(kwargs)
 
@@ -48,3 +49,24 @@ def resource(request):
     testing resource"""
     local_path = os.path.dirname(request.module.__file__)
     return lambda *args: get_resource_path(args, local_path)
+
+
+def fetch_gcs_resource(resource, tmpdir, _chunk_size=1024):
+    resp = requests.get(resource, stream=True)
+    dest_file = str(tmpdir.join(os.path.basename(resource)))
+    with open(dest_file, 'wb') as f:
+        for chunk in resp.iter_content(_chunk_size):
+            f.write(chunk)
+
+    return dest_file
+
+
+@pytest.fixture(scope='module')
+def remote_resource(cloud_config):
+    """Provides a function that downloads the given resource from Cloud
+    Storage, returning the path to the downloaded resource."""
+    remote_uri = 'http://storage.googleapis.com/{}/'.format(
+        cloud_config.storage_bucket)
+
+    return lambda path, tmpdir: fetch_gcs_resource(
+        remote_uri + path.strip('/'), tmpdir)
diff --git a/language/README.md b/language/README.md
@@ -0,0 +1,14 @@
+# Google Cloud Natural Language API examples
+
+This directory contains Python examples that use the
+[Google Cloud Natural Language API](https://cloud.google.com/natural-language/).
+
+- [api](api) has a simple command line tool that shows off the API's features.
+
+- [ocr_nl](ocr_nl) uses the [Cloud Vision API](https://cloud.google.com/vision/)
+to extract text from images, then uses the NL API to extract entity information
+from those texts, and stores the extracted information in a database in support
+of further analysis and correlation.
+
+- [syntax_triples](syntax_triples) uses syntax analysis to find
+subject-verb-object triples in a given piece of text.
diff --git a/language/api/README.md b/language/api/README.md
@@ -0,0 +1,87 @@
+
+# Google Cloud Natural Language API Sample
+
+This Python sample demonstrates the use of the [Google Cloud Natural Language API][NL-Docs]
+for sentiment, entity, and syntax analysis.
+
+[NL-Docs]: https://cloud.google.com/natural-language/docs/
+
+## Setup
+
+Please follow the [Set Up Your Project](https://cloud.google.com/natural-language/docs/getting-started#set_up_your_project)
+steps in the Quickstart doc to create a project and enable the
+Cloud Natural Language API. Following those steps, make sure that you
+[Set Up a Service Account](https://cloud.google.com/natural-language/docs/common/auth#set_up_a_service_account),
+and export the following environment variable:
+
+```
+export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your-project-credentials.json
+```
+
+## Run the sample
+
+Install [pip](https://pip.pypa.io/en/stable/installing) if not already installed.
+
+To run the example, install the necessary libraries using pip:
+
+```sh
+$ pip install -r requirements.txt
+```
+
+Then, run the script:
+
+```sh
+$ python analyze.py <command> <text-string>
+```
+
+where `<command>` is one of:  `entities`, `sentiment`, or `syntax`.
+
+The script will write to STDOUT the json returned from the API for the requested feature.
+
+For example, if you run:
+
+```sh
+$ python analyze.py entities "Tom Sawyer is a book written by a guy known as Mark Twain."
+```
+
+You will see something like the following returned:
+
+```
+{
+  "entities": [
+    {
+      "salience": 0.49785897,
+      "mentions": [
+        {
+          "text": {
+            "content": "Tom Sawyer",
+            "beginOffset": 0
+          }
+        }
+      ],
+      "type": "PERSON",
+      "name": "Tom Sawyer",
+      "metadata": {
+        "wikipedia_url": "http://en.wikipedia.org/wiki/The_Adventures_of_Tom_Sawyer"
+      }
+    },
+    {
+      "salience": 0.12209519,
+      "mentions": [
+        {
+          "text": {
+            "content": "Mark Twain",
+            "beginOffset": 47
+          }
+        }
+      ],
+      "type": "PERSON",
+      "name": "Mark Twain",
+      "metadata": {
+        "wikipedia_url": "http://en.wikipedia.org/wiki/Mark_Twain"
+      }
+    }
+  ],
+  "language": "en"
+}
+```
diff --git a/language/api/analyze.py b/language/api/analyze.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+
+# Copyright 2016 Google, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Analyzes text using the Google Cloud Natural Language API."""
+
+import argparse
+import json
+import sys
+
+from googleapiclient import discovery
+import httplib2
+from oauth2client.client import GoogleCredentials
+
+
+def get_service():
+    credentials = GoogleCredentials.get_application_default()
+    scoped_credentials = credentials.create_scoped(
+        ['https://www.googleapis.com/auth/cloud-platform'])
+    http = httplib2.Http()
+    scoped_credentials.authorize(http)
+    return discovery.build('language', 'v1beta1', http=http)
+
+
+def get_native_encoding_type():
+    """Returns the encoding type that matches Python's native strings."""
+    if sys.maxunicode == 65535:
+        return 'UTF16'
+    else:
+        return 'UTF32'
+
+
+def analyze_entities(text, encoding='UTF32'):
+    body = {
+        'document': {
+            'type': 'PLAIN_TEXT',
+            'content': text,
+        },
+        'encodingType': encoding,
+    }
+
+    service = get_service()
+
+    request = service.documents().analyzeEntities(body=body)
+    response = request.execute()
+
+    return response
+
+
+def analyze_sentiment(text):
+    body = {
+        'document': {
+            'type': 'PLAIN_TEXT',
+            'content': text,
+        }
+    }
+
+    service = get_service()
+
+    request = service.documents().analyzeSentiment(body=body)
+    response = request.execute()
+
+    return response
+
+
+def analyze_syntax(text, encoding='UTF32'):
+    body = {
+        'document': {
+            'type': 'PLAIN_TEXT',
+            'content': text,
+        },
+        'features': {
+            'extract_syntax': True,
+        },
+        'encodingType': encoding,
+    }
+
+    service = get_service()
+
+    request = service.documents().annotateText(body=body)
+    response = request.execute()
+
+    return response
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument('command', choices=[
+        'entities', 'sentiment', 'syntax'])
+    parser.add_argument('text')
+
+    args = parser.parse_args()
+
+    if args.command == 'entities':
+        result = analyze_entities(args.text, get_native_encoding_type())
+    elif args.command == 'sentiment':
+        result = analyze_sentiment(args.text)
+    elif args.command == 'syntax':
+        result = analyze_syntax(args.text, get_native_encoding_type())
+
+    print(json.dumps(result, indent=2))