|
70 | 70 | import numpy as np
|
71 | 71 | import matplotlib.pyplot as plt
|
72 | 72 | from matplotlib.collections import LineCollection
|
73 |
| -from six.moves.urllib.request import urlopen |
74 |
| -from six.moves.urllib.parse import urlencode |
75 |
| -from sklearn import cluster, covariance, manifold |
76 | 73 |
|
77 |
| -print(__doc__) |
| 74 | +import pandas as pd |
78 | 75 |
|
| 76 | +from sklearn import cluster, covariance, manifold |
79 | 77 |
|
80 |
| -def retry(f, n_attempts=3): |
81 |
| - "Wrapper function to retry function calls in case of exceptions" |
82 |
| - def wrapper(*args, **kwargs): |
83 |
| - for i in range(n_attempts): |
84 |
| - try: |
85 |
| - return f(*args, **kwargs) |
86 |
| - except Exception: |
87 |
| - if i == n_attempts - 1: |
88 |
| - raise |
89 |
| - return wrapper |
90 |
| - |
91 |
| - |
92 |
| -def quotes_historical_google(symbol, start_date, end_date): |
93 |
| - """Get the historical data from Google finance. |
94 |
| -
|
95 |
| - Parameters |
96 |
| - ---------- |
97 |
| - symbol : str |
98 |
| - Ticker symbol to query for, for example ``"DELL"``. |
99 |
| - start_date : datetime.datetime |
100 |
| - Start date. |
101 |
| - end_date : datetime.datetime |
102 |
| - End date. |
103 |
| -
|
104 |
| - Returns |
105 |
| - ------- |
106 |
| - X : array |
107 |
| - The columns are ``date`` -- date, ``open``, ``high``, |
108 |
| - ``low``, ``close`` and ``volume`` of type float. |
109 |
| - """ |
110 |
| - params = { |
111 |
| - 'q': symbol, |
112 |
| - 'startdate': start_date.strftime('%Y-%m-%d'), |
113 |
| - 'enddate': end_date.strftime('%Y-%m-%d'), |
114 |
| - 'output': 'csv', |
115 |
| - } |
116 |
| - url = 'https://finance.google.com/finance/historical?' + urlencode(params) |
117 |
| - response = urlopen(url) |
118 |
| - dtype = { |
119 |
| - 'names': ['date', 'open', 'high', 'low', 'close', 'volume'], |
120 |
| - 'formats': ['object', 'f4', 'f4', 'f4', 'f4', 'f4'] |
121 |
| - } |
122 |
| - converters = { |
123 |
| - 0: lambda s: datetime.strptime(s.decode(), '%d-%b-%y').date()} |
124 |
| - data = np.genfromtxt(response, delimiter=',', skip_header=1, |
125 |
| - dtype=dtype, converters=converters, |
126 |
| - missing_values='-', filling_values=-1) |
127 |
| - min_date = min(data['date']) if len(data) else datetime.min.date() |
128 |
| - max_date = max(data['date']) if len(data) else datetime.max.date() |
129 |
| - start_end_diff = (end_date - start_date).days |
130 |
| - min_max_diff = (max_date - min_date).days |
131 |
| - data_is_fine = ( |
132 |
| - start_date <= min_date <= end_date and |
133 |
| - start_date <= max_date <= end_date and |
134 |
| - start_end_diff - 7 <= min_max_diff <= start_end_diff) |
135 |
| - |
136 |
| - if not data_is_fine: |
137 |
| - message = ( |
138 |
| - 'Data looks wrong for symbol {}, url {}\n' |
139 |
| - ' - start_date: {}, end_date: {}\n' |
140 |
| - ' - min_date: {}, max_date: {}\n' |
141 |
| - ' - start_end_diff: {}, min_max_diff: {}'.format( |
142 |
| - symbol, url, |
143 |
| - start_date, end_date, |
144 |
| - min_date, max_date, |
145 |
| - start_end_diff, min_max_diff)) |
146 |
| - raise RuntimeError(message) |
147 |
| - return data |
| 78 | +print(__doc__) |
148 | 79 |
|
149 | 80 |
|
150 | 81 | # #############################################################################
|
151 | 82 | # Retrieve the data from Internet
|
152 | 83 |
|
153 |
| -# Choose a time period reasonably calm (not too long ago so that we get |
154 |
| -# high-tech firms, and before the 2008 crash) |
| 84 | +# The data is from 2003 - 2008. This is reasonably calm: (not too long ago so |
| 85 | +# that we get high-tech firms, and before the 2008 crash). This kind of |
| 86 | +# historical data can be obtained for from APIs like the quandl.com and |
| 87 | +# alphavantage.co ones. |
155 | 88 | start_date = datetime(2003, 1, 1).date()
|
156 | 89 | end_date = datetime(2008, 1, 1).date()
|
157 | 90 |
|
158 | 91 | symbol_dict = {
|
159 |
| - 'NYSE:TOT': 'Total', |
160 |
| - 'NYSE:XOM': 'Exxon', |
161 |
| - 'NYSE:CVX': 'Chevron', |
162 |
| - 'NYSE:COP': 'ConocoPhillips', |
163 |
| - 'NYSE:VLO': 'Valero Energy', |
164 |
| - 'NASDAQ:MSFT': 'Microsoft', |
165 |
| - 'NYSE:IBM': 'IBM', |
166 |
| - 'NYSE:TWX': 'Time Warner', |
167 |
| - 'NASDAQ:CMCSA': 'Comcast', |
168 |
| - 'NYSE:CVC': 'Cablevision', |
169 |
| - 'NASDAQ:YHOO': 'Yahoo', |
170 |
| - 'NASDAQ:DELL': 'Dell', |
171 |
| - 'NYSE:HPQ': 'HP', |
172 |
| - 'NASDAQ:AMZN': 'Amazon', |
173 |
| - 'NYSE:TM': 'Toyota', |
174 |
| - 'NYSE:CAJ': 'Canon', |
175 |
| - 'NYSE:SNE': 'Sony', |
176 |
| - 'NYSE:F': 'Ford', |
177 |
| - 'NYSE:HMC': 'Honda', |
178 |
| - 'NYSE:NAV': 'Navistar', |
179 |
| - 'NYSE:NOC': 'Northrop Grumman', |
180 |
| - 'NYSE:BA': 'Boeing', |
181 |
| - 'NYSE:KO': 'Coca Cola', |
182 |
| - 'NYSE:MMM': '3M', |
183 |
| - 'NYSE:MCD': 'McDonald\'s', |
184 |
| - 'NYSE:PEP': 'Pepsi', |
185 |
| - 'NYSE:K': 'Kellogg', |
186 |
| - 'NYSE:UN': 'Unilever', |
187 |
| - 'NASDAQ:MAR': 'Marriott', |
188 |
| - 'NYSE:PG': 'Procter Gamble', |
189 |
| - 'NYSE:CL': 'Colgate-Palmolive', |
190 |
| - 'NYSE:GE': 'General Electrics', |
191 |
| - 'NYSE:WFC': 'Wells Fargo', |
192 |
| - 'NYSE:JPM': 'JPMorgan Chase', |
193 |
| - 'NYSE:AIG': 'AIG', |
194 |
| - 'NYSE:AXP': 'American express', |
195 |
| - 'NYSE:BAC': 'Bank of America', |
196 |
| - 'NYSE:GS': 'Goldman Sachs', |
197 |
| - 'NASDAQ:AAPL': 'Apple', |
198 |
| - 'NYSE:SAP': 'SAP', |
199 |
| - 'NASDAQ:CSCO': 'Cisco', |
200 |
| - 'NASDAQ:TXN': 'Texas Instruments', |
201 |
| - 'NYSE:XRX': 'Xerox', |
202 |
| - 'NYSE:WMT': 'Wal-Mart', |
203 |
| - 'NYSE:HD': 'Home Depot', |
204 |
| - 'NYSE:GSK': 'GlaxoSmithKline', |
205 |
| - 'NYSE:PFE': 'Pfizer', |
206 |
| - 'NYSE:SNY': 'Sanofi-Aventis', |
207 |
| - 'NYSE:NVS': 'Novartis', |
208 |
| - 'NYSE:KMB': 'Kimberly-Clark', |
209 |
| - 'NYSE:R': 'Ryder', |
210 |
| - 'NYSE:GD': 'General Dynamics', |
211 |
| - 'NYSE:RTN': 'Raytheon', |
212 |
| - 'NYSE:CVS': 'CVS', |
213 |
| - 'NYSE:CAT': 'Caterpillar', |
214 |
| - 'NYSE:DD': 'DuPont de Nemours'} |
| 92 | + 'TOT': 'Total', |
| 93 | + 'XOM': 'Exxon', |
| 94 | + 'CVX': 'Chevron', |
| 95 | + 'COP': 'ConocoPhillips', |
| 96 | + 'VLO': 'Valero Energy', |
| 97 | + 'MSFT': 'Microsoft', |
| 98 | + 'IBM': 'IBM', |
| 99 | + 'TWX': 'Time Warner', |
| 100 | + 'CMCSA': 'Comcast', |
| 101 | + 'CVC': 'Cablevision', |
| 102 | + 'YHOO': 'Yahoo', |
| 103 | + 'DELL': 'Dell', |
| 104 | + 'HPQ': 'HP', |
| 105 | + 'AMZN': 'Amazon', |
| 106 | + 'TM': 'Toyota', |
| 107 | + 'CAJ': 'Canon', |
| 108 | + 'SNE': 'Sony', |
| 109 | + 'F': 'Ford', |
| 110 | + 'HMC': 'Honda', |
| 111 | + 'NAV': 'Navistar', |
| 112 | + 'NOC': 'Northrop Grumman', |
| 113 | + 'BA': 'Boeing', |
| 114 | + 'KO': 'Coca Cola', |
| 115 | + 'MMM': '3M', |
| 116 | + 'MCD': 'McDonald\'s', |
| 117 | + 'PEP': 'Pepsi', |
| 118 | + 'K': 'Kellogg', |
| 119 | + 'UN': 'Unilever', |
| 120 | + 'MAR': 'Marriott', |
| 121 | + 'PG': 'Procter Gamble', |
| 122 | + 'CL': 'Colgate-Palmolive', |
| 123 | + 'GE': 'General Electrics', |
| 124 | + 'WFC': 'Wells Fargo', |
| 125 | + 'JPM': 'JPMorgan Chase', |
| 126 | + 'AIG': 'AIG', |
| 127 | + 'AXP': 'American express', |
| 128 | + 'BAC': 'Bank of America', |
| 129 | + 'GS': 'Goldman Sachs', |
| 130 | + 'AAPL': 'Apple', |
| 131 | + 'SAP': 'SAP', |
| 132 | + 'CSCO': 'Cisco', |
| 133 | + 'TXN': 'Texas Instruments', |
| 134 | + 'XRX': 'Xerox', |
| 135 | + 'WMT': 'Wal-Mart', |
| 136 | + 'HD': 'Home Depot', |
| 137 | + 'GSK': 'GlaxoSmithKline', |
| 138 | + 'PFE': 'Pfizer', |
| 139 | + 'SNY': 'Sanofi-Aventis', |
| 140 | + 'NVS': 'Novartis', |
| 141 | + 'KMB': 'Kimberly-Clark', |
| 142 | + 'R': 'Ryder', |
| 143 | + 'GD': 'General Dynamics', |
| 144 | + 'RTN': 'Raytheon', |
| 145 | + 'CVS': 'CVS', |
| 146 | + 'CAT': 'Caterpillar', |
| 147 | + 'DD': 'DuPont de Nemours'} |
215 | 148 |
|
216 | 149 |
|
217 | 150 | symbols, names = np.array(sorted(symbol_dict.items())).T
|
218 | 151 |
|
219 |
| -# retry is used because quotes_historical_google can temporarily fail |
220 |
| -# for various reasons (e.g. empty result from Google API). |
221 | 152 | quotes = []
|
222 | 153 |
|
223 | 154 | for symbol in symbols:
|
224 | 155 | print('Fetching quote history for %r' % symbol, file=sys.stderr)
|
225 |
| - quotes.append(retry(quotes_historical_google)( |
226 |
| - symbol, start_date, end_date)) |
| 156 | + url = ('https://raw.githubusercontent.com/scikit-learn/examples-data/' |
| 157 | + 'master/financial-data/{}.csv') |
| 158 | + quotes.append(pd.read_csv(url.format(symbol))) |
227 | 159 |
|
228 | 160 | close_prices = np.vstack([q['close'] for q in quotes])
|
229 | 161 | open_prices = np.vstack([q['open'] for q in quotes])
|
|
0 commit comments