Skip to content

Commit ee01b3a

Browse files
committed
PIOTEAM-41 yahoo stock import example.
1 parent cd439ed commit ee01b3a

File tree

2 files changed

+211
-1
lines changed

2 files changed

+211
-1
lines changed

examples/import_yahoo.py

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
"""
2+
Import historical stock data from yahoo finance.
3+
"""
4+
5+
import argparse
6+
from datetime import datetime
7+
import predictionio
8+
import pytz
9+
import time
10+
from pandas.io import data as pdata
11+
import numpy
12+
13+
EPOCH = datetime(1970, 1, 1, tzinfo=pytz.utc)
14+
15+
SP500_LIST = [
16+
"A", "AA", "AAPL", "ABBV", "ABC", "ABT", "ACE", "ACN", "ACT", "ADBE", "ADI",
17+
"ADM", "ADP", "ADS", "ADSK", "ADT", "AEE", "AEP", "AES", "AET", "AFL",
18+
"AGN", "AIG", "AIV", "AIZ", "AKAM", "ALL", "ALLE", "ALTR", "ALXN", "AMAT",
19+
"AME", "AMGN", "AMP", "AMT", "AMZN", "AN", "AON", "APA", "APC", "APD",
20+
"APH", "ARG", "ATI", "AVB", "AVP", "AVY", "AXP", "AZO", "BA", "BAC", "BAX",
21+
"BBBY", "BBT", "BBY", "BCR", "BDX", "BEAM", "BEN", "BF-B", "BHI", "BIIB",
22+
"BK", "BLK", "BLL", "BMS", "BMY", "BRCM", "BRK-B", "BSX", "BTU", "BWA",
23+
"BXP", "C", "CA", "CAG", "CAH", "CAM", "CAT", "CB", "CBG", "CBS", "CCE",
24+
"CCI", "CCL", "CELG", "CERN", "CF", "CFN", "CHK", "CHRW", "CI", "CINF",
25+
"CL", "CLX", "CMA", "CMCSA", "CME", "CMG", "CMI", "CMS", "CNP", "CNX",
26+
"COF", "COG", "COH", "COL", "COP", "COST", "COV", "CPB", "CRM", "CSC",
27+
"CSCO", "CSX", "CTAS", "CTL", "CTSH", "CTXS", "CVC", "CVS", "CVX", "D",
28+
"DAL", "DD", "DE", "DFS", "DG", "DGX", "DHI", "DHR", "DIS", "DISCA", "DLPH",
29+
"DLTR", "DNB", "DNR", "DO", "DOV", "DOW", "DPS", "DRI", "DTE", "DTV", "DUK",
30+
"DVA", "DVN", "EA", "EBAY", "ECL", "ED", "EFX", "EIX", "EL", "EMC", "EMN",
31+
"EMR", "EOG", "EQR", "EQT", "ESRX", "ESS", "ESV", "ETFC", "ETN", "ETR",
32+
"EW", "EXC", "EXPD", "EXPE", "F", "FAST", "FB", "FCX", "FDO", "FDX", "FE",
33+
"FFIV", "FIS", "FISV", "FITB", "FLIR", "FLR", "FLS", "FMC", "FOSL", "FOXA",
34+
"FRX", "FSLR", "FTI", "FTR", "GAS", "GCI", "GD", "GE", "GGP", "GHC", "GILD",
35+
"GIS", "GLW", "GM", "GMCR", "GME", "GNW", "GOOG", "GOOGL", "GPC", "GPS",
36+
"GRMN", "GS", "GT", "GWW", "HAL", "HAR", "HAS", "HBAN", "HCBK", "HCN",
37+
"HCP", "HD", "HES", "HIG", "HOG", "HON", "HOT", "HP", "HPQ", "HRB", "HRL",
38+
"HRS", "HSP", "HST", "HSY", "HUM", "IBM", "ICE", "IFF", "IGT", "INTC",
39+
"INTU", "IP", "IPG", "IR", "IRM", "ISRG", "ITW", "IVZ", "JBL", "JCI", "JEC",
40+
"JNJ", "JNPR", "JOY", "JPM", "JWN", "K", "KEY", "KIM", "KLAC", "KMB", "KMI",
41+
"KMX", "KO", "KORS", "KR", "KRFT", "KSS", "KSU", "L", "LB", "LEG", "LEN",
42+
"LH", "LLL", "LLTC", "LLY", "LM", "LMT", "LNC", "LO", "LOW", "LRCX", "LSI",
43+
"LUK", "LUV", "LYB", "M", "MA", "MAC", "MAR", "MAS", "MAT", "MCD", "MCHP",
44+
"MCK", "MCO", "MDLZ", "MDT", "MET", "MHFI", "MHK", "MJN", "MKC", "MMC",
45+
"MMM", "MNST", "MO", "MON", "MOS", "MPC", "MRK", "MRO", "MS", "MSFT", "MSI",
46+
"MTB", "MU", "MUR", "MWV", "MYL", "NBL", "NBR", "NDAQ", "NE", "NEE", "NEM",
47+
"NFLX", "NFX", "NI", "NKE", "NLSN", "NOC", "NOV", "NRG", "NSC", "NTAP",
48+
"NTRS", "NU", "NUE", "NVDA", "NWL", "NWSA", "OI", "OKE", "OMC", "ORCL",
49+
"ORLY", "OXY", "PAYX", "PBCT", "PBI", "PCAR", "PCG", "PCL", "PCLN", "PCP",
50+
"PDCO", "PEG", "PEP", "PETM", "PFE", "PFG", "PG", "PGR", "PH", "PHM", "PKI",
51+
"PLD", "PLL", "PM", "PNC", "PNR", "PNW", "POM", "PPG", "PPL", "PRGO", "PRU",
52+
"PSA", "PSX", "PVH", "PWR", "PX", "PXD", "QCOM", "QEP", "R", "RAI", "RDC",
53+
"REGN", "RF", "RHI", "RHT", "RIG", "RL", "ROK", "ROP", "ROST", "RRC", "RSG",
54+
"RTN", "SBUX", "SCG", "SCHW", "SE", "SEE", "SHW", "SIAL", "SJM", "SLB",
55+
"SLM", "SNA", "SNDK", "SNI", "SO", "SPG", "SPLS", "SRCL", "SRE", "STI",
56+
"STJ", "STT", "STX", "STZ", "SWK", "SWN", "SWY", "SYK", "SYMC", "SYY", "T",
57+
"TAP", "TDC", "TE", "TEG", "TEL", "TGT", "THC", "TIF", "TJX", "TMK", "TMO",
58+
"TRIP", "TROW", "TRV", "TSCO", "TSN", "TSO", "TSS", "TWC", "TWX", "TXN",
59+
"TXT", "TYC", "UNH", "UNM", "UNP", "UPS", "URBN", "USB", "UTX", "V", "VAR",
60+
"VFC", "VIAB", "VLO", "VMC", "VNO", "VRSN", "VRTX", "VTR", "VZ", "WAG",
61+
"WAT", "WDC", "WEC", "WFC", "WFM", "WHR", "WIN", "WLP", "WM", "WMB", "WMT",
62+
"WU", "WY", "WYN", "WYNN", "X", "XEL", "XL", "XLNX", "XOM", "XRAY", "XRX",
63+
"XYL", "YHOO", "YUM", "ZION", "ZMH", "ZTS"]
64+
65+
ETF_LIST = ["QQQ", "SPY", "XLY", "XLP", "XLE", "XLF", "XLV",
66+
"XLI", "XLB", "XLK", "XLU"]
67+
68+
69+
def since_epoch(dt):
70+
return (dt - EPOCH).total_seconds()
71+
72+
73+
def import_data(client, app_id, ticker, start_time, end_time, event_time):
74+
print "Importing:", ticker, start_time, end_time
75+
76+
try:
77+
df = pdata.DataReader(ticker, 'yahoo', start_time, end_time)
78+
print "Extracted:", df.index[0], df.index[-1]
79+
except IOError, ex:
80+
print ex
81+
print "Data not exist. Returning"
82+
return
83+
84+
# assume we only extract US data
85+
eastern = pytz.timezone('US/Eastern')
86+
87+
columns = [
88+
('Open', 'open'),
89+
('High', 'high'),
90+
('Low', 'low'),
91+
('Close', 'close'),
92+
('Volume', 'volume'),
93+
('Adj Close', 'adjclose')]
94+
95+
yahoo_data = dict()
96+
yahoo_data['ticker'] = ticker
97+
yahoo_data['t'] = [
98+
# hour=16 to indicate market close time
99+
since_epoch(eastern.localize(date_.to_pydatetime().replace(hour=16)))
100+
for date_ in df.index]
101+
102+
for column in columns:
103+
yahoo_data[column[1]] = map(numpy.asscalar, df[column[0]].values)
104+
105+
properties = {'yahoo': yahoo_data}
106+
107+
data = {
108+
'event': '$set',
109+
'entityType': 'yahoo',
110+
'entityId': ticker,
111+
'properties': properties,
112+
'appId': app_id,
113+
'eventTime': event_time.replace(tzinfo=pytz.utc).isoformat(),
114+
}
115+
116+
response = client.create_event(data)
117+
print(response)
118+
119+
120+
def import_all(app_id):
121+
"""This method import all SP500 stocks and some SPDR ETFs."""
122+
time_slices = [
123+
(datetime(1999, 1, 1), datetime(2004, 1, 1), datetime(2004, 1, 2)),
124+
(datetime(2003, 12, 1), datetime(2009, 1, 1), datetime(2009, 1, 2)),
125+
(datetime(2008, 12, 1), datetime(2014, 9, 1), datetime(2014, 9, 2)),
126+
]
127+
128+
url = 'http://localhost:7070'
129+
client = predictionio.EventClient(app_id=app_id, threads=1, url=url)
130+
131+
tickers = SP500_LIST + ETF_LIST
132+
133+
for ticker in tickers:
134+
for time_slice in time_slices:
135+
import_data(client, app_id, ticker,
136+
time_slice[0], time_slice[1], time_slice[2])
137+
138+
139+
def import_data_with_gaps(app_id):
140+
"""This method import data with time gaps.
141+
142+
Data imported by this method is used by stock engine, it demonsrates how it
143+
can handle time series data with gaps.
144+
"""
145+
146+
# time_slices is discontinuted
147+
# startTime, endTime, eventDate
148+
time_slices = [
149+
(datetime(2013, 12, 1), datetime(2014, 2, 1), datetime(2014, 2, 2)),
150+
(datetime(2014, 1, 1), datetime(2014, 1, 20), datetime(2014, 2, 10)),
151+
(datetime(2014, 1, 10), datetime(2014, 2, 20), datetime(2014, 2, 28)),
152+
(datetime(2014, 2, 10), datetime(2014, 3, 31), datetime(2014, 4, 2)),
153+
(datetime(2014, 5, 1), datetime(2014, 6, 15), datetime(2014, 6, 20)),
154+
(datetime(2014, 6, 1), datetime(2014, 7, 1), datetime(2014, 7, 15)),
155+
]
156+
157+
tickers = ['SPY', 'AAPL', 'IBM', 'MSFT']
158+
159+
url = 'http://localhost:7070'
160+
client = predictionio.EventClient(app_id=app_id, threads=1, url=url)
161+
162+
for ticker in tickers:
163+
for time_slice in time_slices:
164+
import_data(client, app_id, ticker,
165+
time_slice[0], time_slice[1], time_slice[2])
166+
167+
# below are data with holes
168+
time_slices = [
169+
(datetime(2014, 1, 1), datetime(2014, 1, 20), datetime(2014, 2, 10)),
170+
(datetime(2014, 2, 10), datetime(2014, 3, 31), datetime(2014, 4, 2)),
171+
(datetime(2014, 6, 1), datetime(2014, 7, 1), datetime(2014, 7, 15)),
172+
]
173+
174+
tickers = ['AMZN']
175+
for ticker in tickers:
176+
for time_slice in time_slices:
177+
import_data(client, app_id, ticker,
178+
time_slice[0], time_slice[1], time_slice[2])
179+
180+
time_slices = [
181+
(datetime(2014, 1, 10), datetime(2014, 2, 20), datetime(2014, 2, 28)),
182+
(datetime(2014, 2, 10), datetime(2014, 3, 31), datetime(2014, 4, 2)),
183+
]
184+
tickers = ['FB']
185+
for ticker in tickers:
186+
for time_slice in time_slices:
187+
import_data(client, app_id, ticker,
188+
time_slice[0], time_slice[1], time_slice[2])
189+
190+
191+
def import_one(app_id):
192+
"""Import TSLA.
193+
194+
Import data with from 2014-01-01 until 2014-03-01. event_time specifies when
195+
this data is extracted.
196+
"""
197+
start_time = datetime(2014, 1, 1)
198+
end_time = datetime(2014, 3, 1)
199+
event_time = datetime(2014, 9, 1)
200+
ticker = 'TSLA'
201+
202+
url = 'http://localhost:7070'
203+
client = predictionio.EventClient(app_id=app_id, threads=1, url=url)
204+
205+
import_data(client, app_id, ticker, start_time, end_time, event_time)
206+
207+
208+
if __name__ == '__main__':
209+
#import_all(app_id=2)
210+
#import_data_with_gaps(app_id=1)
211+
import_one(app_id=1)

predictionio/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,6 @@ def __init__(self, app_id, url="http://localhost:7070",
147147
self.app_id = app_id
148148

149149
def acreate_event(self, data):
150-
print data
151150
path = "/events.json"
152151
request = AsyncRequest("POST", path, **data)
153152
request.set_rfunc(self._acreate_resp)

0 commit comments

Comments
 (0)