Skip to content

Commit 04a3bcb

Browse files
committed
Merge branch 'develop'
2 parents 75138e4 + b88b78d commit 04a3bcb

File tree

9 files changed

+114
-48
lines changed

9 files changed

+114
-48
lines changed

docs/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
# The short X.Y version.
5555
version = '0.8'
5656
# The full version, including alpha/beta/rc tags.
57-
release = '0.8.0'
57+
release = '0.8.2'
5858

5959
# The language for content autogenerated by Sphinx. Refer to documentation
6060
# for a list of supported languages.

docs/source/predictionio.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ The SDK comprises of two clients:
1010
and extract prediction results.
1111

1212
Please read `PredictionIO Quick Start
13-
<http://docs.prediction.io/0.8.0/tutorials/engines/quickstart.html>`_ for
13+
<http://docs.prediction.io/0.8.2/recommendation/quickstart.html>`_ for
1414
detailed explanation.
1515

1616
predictionio.EventClient Class
@@ -104,7 +104,7 @@ status at a later time to minimize run time.
104104
For example, to import 100000 of user records::
105105

106106
>>> # generate 100000 asynchronous requests and store the AsyncRequest objects
107-
>>> event_client = EventClient(app_id=1)
107+
>>> event_client = EventClient(access_key=<YOUR_ACCESS_KEY>)
108108
>>> for i in range(100000):
109109
>>> event_client.aset_user(user_record[i].uid)
110110
>>>

examples/demo-movielens/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ Please execute all commands from repository root.
55

66
Step 1. Get sample data and unzip it.
77
```
8-
$ curl -o ml-100k.zip http://www.grouplens.org/system/files/ml-100k.zip
8+
$ curl -o ml-100k.zip http://files.grouplens.org/datasets/movielens/ml-100k.zip
99
$ unzip ml-100k.zip
1010
```
1111

1212
Step 2. Run this app:
1313
```
14-
$ python -m examples.demo-movielens.batch_import <app_id> <server_url>
14+
$ python -m examples.demo-movielens.batch_import <access_key> <server_url>
1515
```
1616

examples/demo-movielens/batch_import.py

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,27 @@
33
import predictionio
44
import sys
55
import pytz
6-
7-
def batch_import_task(app_id, app_data, client, all_info=False):
6+
import datetime
7+
8+
def batch_import_task(app_data, client, all_info=False):
9+
# event_time is an important properties used by the PredictionIO platform. It
10+
# is particularly useful in generating training and testing set, which uses
11+
# event_time for splitting. Hence, when we import data, better to make the
12+
# event_time as approximate to fact as possible.
13+
#
14+
# However, in many cases, the data doesn't come with a time. Movie-lens' user
15+
# data, for example, only reveals the age, gender, occupation, and zip code of
16+
# a user. It doesn't report when the user is "created". Likewise, for items,
17+
# it only reports the release date.
18+
#
19+
# To remedy this problem, we have to make some assumptions to the data. In
20+
# this import script, the event_time for user is set to epoch=0, and the
21+
# event_time for item is set to the release_date + 00:00:00 UTC.
822

923
print "[Info] Importing users to PredictionIO..."
24+
user_create_time = datetime.datetime.fromtimestamp(0, tz=pytz.utc)
1025
count = 0
26+
set_user_request_list = []
1127
for k, v in app_data.get_users().iteritems():
1228
count += 1
1329
if all_info:
@@ -17,13 +33,20 @@ def batch_import_task(app_id, app_data, client, all_info=False):
1733
sys.stdout.write('\r[Info] %s' % count)
1834
sys.stdout.flush()
1935

20-
client.aset_user(uid=v.uid)
36+
set_user_request_list.append(
37+
client.aset_user(uid=v.uid, event_time=user_create_time))
2138

39+
[r.get_response() for r in set_user_request_list]
2240
sys.stdout.write('\r[Info] %s users were imported.\n' % count)
2341
sys.stdout.flush()
2442

2543
print "[Info] Importing items to PredictionIO..."
2644
count = 0
45+
set_item_request_list = []
46+
# event_time is a datetime, hence need to add a time component to the release
47+
# date.
48+
midnight_utc = datetime.time(0, 0, 0, tzinfo=pytz.utc)
49+
epoch = datetime.datetime.fromtimestamp(0, tz=pytz.utc)
2750
for k, v in app_data.get_items().iteritems():
2851
count += 1
2952
if all_info:
@@ -34,18 +57,34 @@ def batch_import_task(app_id, app_data, client, all_info=False):
3457
sys.stdout.flush()
3558

3659
itypes = ("movie",) + v.genres
37-
client.aset_item(iid=v.iid,
38-
properties={
39-
"pio_itypes" : list(itypes),
40-
"pio_starttime" : v.release_date.isoformat() + 'Z',
41-
"name" : v.name,
42-
"year" : v.year } )
4360

61+
release_datetime = datetime.datetime.combine(
62+
v.release_date,
63+
midnight_utc)
64+
65+
# event_time must be after epoch.
66+
event_time = release_datetime if release_datetime > epoch else epoch
67+
68+
utf8_name = v.name.decode('utf-8', 'ignore')
69+
70+
set_item_request = client.aset_item(
71+
iid=v.iid,
72+
event_time=event_time,
73+
properties={
74+
"pio_itypes": list(itypes),
75+
"pio_starttime": release_datetime.isoformat(),
76+
"name": utf8_name,
77+
"year": v.year } )
78+
79+
set_item_request_list.append(set_item_request)
80+
81+
[r.get_response() for r in set_item_request_list]
4482
sys.stdout.write('\r[Info] %s items were imported.\n' % count)
4583
sys.stdout.flush()
4684

4785
print "[Info] Importing rate actions to PredictionIO..."
4886
count = 0
87+
create_event_request_list = []
4988
for v in app_data.get_rate_actions():
5089
count += 1
5190
if all_info:
@@ -66,19 +105,22 @@ def batch_import_task(app_id, app_data, client, all_info=False):
66105
event_time=v.t.replace(tzinfo=pytz.utc),
67106
)
68107

108+
create_event_request_list.append(req)
109+
110+
[r.get_response() for r in create_event_request_list]
69111
sys.stdout.write('\r[Info] %s rate actions were imported.\n' % count)
70112
sys.stdout.flush()
71113

72114

73115
if __name__ == '__main__':
74116
if len(sys.argv) < 3:
75117
sys.exit("Usage: python -m examples.demo-movielens.batch_import "
76-
"<app_id> <url>")
118+
"<access_key> <url>")
77119

78-
app_id = int(sys.argv[1])
120+
access_key = sys.argv[1]
79121

80122
client = predictionio.EventClient(
81-
app_id=app_id,
123+
access_key=access_key,
82124
url=sys.argv[2],
83125
threads=5,
84126
qsize=500)
@@ -87,5 +129,5 @@ def batch_import_task(app_id, app_data, client, all_info=False):
87129
print "Status:", client.get_status()
88130

89131
app_data = AppData()
90-
batch_import_task(app_id, app_data, client)
132+
batch_import_task(app_data, client)
91133
client.close()

examples/event_sample.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
import pytz
55
import sys
66

7-
client = EventClient(app_id=4, url="http://localhost:7070")
7+
access_key = None
8+
assert access_key is not None, "Please create an access key with 'pio app new'"
9+
10+
client = EventClient(access_key=access_key, url="http://localhost:7070")
811

912
# Check status
1013
print("Check status")

examples/import_yahoo.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22
Import historical stock data from yahoo finance.
33
"""
44

5-
import argparse
65
from datetime import datetime
6+
from pandas.io import data as pdata
7+
import argparse
8+
import numpy
79
import predictionio
810
import pytz
11+
import sys
912
import time
10-
from pandas.io import data as pdata
11-
import numpy
1213

1314
EPOCH = datetime(1970, 1, 1, tzinfo=pytz.utc)
1415

@@ -70,7 +71,7 @@ def since_epoch(dt):
7071
return (dt - EPOCH).total_seconds()
7172

7273

73-
def import_data(client, app_id, ticker, start_time, end_time, event_time):
74+
def import_data(client, access_key, ticker, start_time, end_time, event_time):
7475
print "Importing:", ticker, start_time, end_time
7576

7677
try:
@@ -114,7 +115,7 @@ def import_data(client, app_id, ticker, start_time, end_time, event_time):
114115
print(response)
115116

116117

117-
def import_all(app_id):
118+
def import_all(access_key):
118119
"""This method import all SP500 stocks and some SPDR ETFs."""
119120
time_slices = [
120121
(datetime(1999, 1, 1), datetime(2004, 1, 1), datetime(2004, 1, 2)),
@@ -123,17 +124,17 @@ def import_all(app_id):
123124
]
124125

125126
url = 'http://localhost:7070'
126-
client = predictionio.EventClient(app_id=app_id, threads=1, url=url)
127+
client = predictionio.EventClient(access_key=access_key, threads=1, url=url)
127128

128129
tickers = SP500_LIST + ETF_LIST
129130

130131
for ticker in tickers:
131132
for time_slice in time_slices:
132-
import_data(client, app_id, ticker,
133+
import_data(client, access_key, ticker,
133134
time_slice[0], time_slice[1], time_slice[2])
134135

135136

136-
def import_data_with_gaps(app_id):
137+
def import_data_with_gaps(access_key):
137138
"""This method import data with time gaps.
138139
139140
Data imported by this method is used by stock engine, it demonsrates how it
@@ -154,11 +155,11 @@ def import_data_with_gaps(app_id):
154155
tickers = ['SPY', 'AAPL', 'IBM', 'MSFT']
155156

156157
url = 'http://localhost:7070'
157-
client = predictionio.EventClient(app_id=app_id, threads=1, url=url)
158+
client = predictionio.EventClient(access_key=access_key, threads=1, url=url)
158159

159160
for ticker in tickers:
160161
for time_slice in time_slices:
161-
import_data(client, app_id, ticker,
162+
import_data(client, access_key, ticker,
162163
time_slice[0], time_slice[1], time_slice[2])
163164

164165
# below are data with holes
@@ -171,7 +172,7 @@ def import_data_with_gaps(app_id):
171172
tickers = ['AMZN']
172173
for ticker in tickers:
173174
for time_slice in time_slices:
174-
import_data(client, app_id, ticker,
175+
import_data(client, access_key, ticker,
175176
time_slice[0], time_slice[1], time_slice[2])
176177

177178
time_slices = [
@@ -181,11 +182,11 @@ def import_data_with_gaps(app_id):
181182
tickers = ['FB']
182183
for ticker in tickers:
183184
for time_slice in time_slices:
184-
import_data(client, app_id, ticker,
185+
import_data(client, access_key, ticker,
185186
time_slice[0], time_slice[1], time_slice[2])
186187

187188

188-
def import_one(app_id):
189+
def import_one(access_key):
189190
"""Import TSLA.
190191
191192
Import data with from 2014-01-01 until 2014-03-01. event_time specifies when
@@ -197,12 +198,16 @@ def import_one(app_id):
197198
ticker = 'TSLA'
198199

199200
url = 'http://localhost:7070'
200-
client = predictionio.EventClient(app_id=app_id, threads=1, url=url)
201+
client = predictionio.EventClient(access_key=access_key, threads=1, url=url)
201202

202-
import_data(client, app_id, ticker, start_time, end_time, event_time)
203+
import_data(client, access_key, ticker, start_time, end_time, event_time)
203204

204205

205206
if __name__ == '__main__':
206-
#import_all(app_id=2)
207-
import_data_with_gaps(app_id=1)
208-
#import_one(app_id=1)
207+
if len(sys.argv) < 2:
208+
sys.exit("Usage: python -m examples.import_yahoo <access_key>")
209+
210+
access_key = sys.argv[1]
211+
import_all(access_key=access_key)
212+
#import_data_with_gaps(access_key=access_key)
213+
#import_one(access_key=access_key)

examples/itemrank_quick_start.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55
import predictionio
66

77
import random
8+
import sys
89

9-
def import_itemrank(app_id):
10+
def import_itemrank(access_key):
1011

1112
random.seed()
1213

13-
client = predictionio.EventClient(app_id=app_id)
14+
client = predictionio.EventClient(access_key)
1415

1516
print client.get_status()
1617

@@ -39,4 +40,6 @@ def import_itemrank(app_id):
3940

4041

4142
if __name__ == '__main__':
42-
import_itemrank(7)
43+
if len(sys.argv) < 2:
44+
sys.exit("Usage: python -m examples.itemrank_quick_start <access_key>")
45+
import_itemrank(sys.argv[1])

predictionio/__init__.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"""
66

77

8-
__version__ = "0.8.1"
8+
__version__ = "0.8.2"
99

1010
# import deprecated libraries.
1111
from predictionio.obsolete import Client
@@ -151,7 +151,10 @@ def _adelete_resp(self, response):
151151
class EventClient(BaseClient):
152152
"""Client for importing data into PredictionIO Event Server.
153153
154-
:param app_id: the id used to identify application data.
154+
Notice that app_id has been deprecated as of 0.8.2. Please use access_token
155+
instead.
156+
157+
:param access_key: the access key for your application.
155158
:param url: the url of PredictionIO Event Server.
156159
:param threads: number of threads to handle PredictionIO API requests.
157160
Must be >= 1.
@@ -162,13 +165,24 @@ class EventClient(BaseClient):
162165
:param timeout: timeout for HTTP connection attempts and requests in
163166
seconds (optional).
164167
Default value is 5.
165-
166168
"""
167169

168-
def __init__(self, app_id, url="http://localhost:7070",
170+
def __init__(self, access_key,
171+
url="http://localhost:7070",
169172
threads=1, qsize=0, timeout=5):
173+
assert type(access_key) is str, ("access_key must be string. "
174+
"Notice that app_id has been deprecated in Prediction.IO 0.8.2. "
175+
"Please use access_key instead.")
176+
170177
super(EventClient, self).__init__(url, threads, qsize, timeout)
171-
self.app_id = app_id
178+
179+
if len(access_key) <= 8:
180+
raise DeprecationWarning(
181+
"It seems like you are specifying an app_id. It is deprecated in "
182+
"Prediction.IO 0.8.2. Please use access_key instead. Or, "
183+
"you may use an earlier version of this sdk.")
184+
185+
self.access_key = access_key
172186

173187
def acreate_event(self, event, entity_type, entity_id,
174188
target_entity_type=None, target_entity_id=None, properties=None,
@@ -194,7 +208,6 @@ def acreate_event(self, event, entity_type, entity_id,
194208
object to get the final resuls or status of this asynchronous request.
195209
"""
196210
data = {
197-
"appId": self.app_id,
198211
"event": event,
199212
"entityType": entity_type,
200213
"entityId": entity_id,
@@ -215,7 +228,7 @@ def acreate_event(self, event, entity_type, entity_id,
215228
et_str = et.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + et.strftime("%z")
216229
data["eventTime"] = et_str
217230

218-
path = "/events.json"
231+
path = "/events.json?accessKey=" + self.access_key
219232
request = AsyncRequest("POST", path, **data)
220233
request.set_rfunc(self._acreate_resp)
221234
self._connection.make_request(request)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
setup(
1212
name='PredictionIO',
13-
version="0.8.1",
13+
version="0.8.2",
1414
author=__author__,
1515
author_email=__email__,
1616
packages=['predictionio'],

0 commit comments

Comments
 (0)