|
| 1 | +import requests as rq |
| 2 | +from bs4 import BeautifulSoup as Bs |
| 3 | +import matplotlib.pyplot as plt |
| 4 | +import pandas as pd |
| 5 | +import numpy as np |
| 6 | +import time |
| 7 | +import matplotlib as mpl |
| 8 | + |
| 9 | +# 设置中文字体 |
| 10 | +mpl.rcParams['font.sans-serif'] = ['KaiTi'] |
| 11 | +mpl.rcParams['font.serif'] = ['KaiTi'] |
| 12 | + |
| 13 | +def fund(code): |
| 14 | + url = 'http://quotes.money.163.com/fund/jzzs_%s_%d.html?start=2001-01-01&end=2020-12-31&sort=TDATE&order=asc' |
| 15 | + # 先获取第一页 |
| 16 | + data = pd.DataFrame() |
| 17 | + for i in range(0, 100): |
| 18 | + html = getHtml(url % (code, i)) |
| 19 | + page = dataFund(html) |
| 20 | + if page is not None: |
| 21 | + data = data.append(page, ignore_index=True) |
| 22 | + else: |
| 23 | + break |
| 24 | + print("page ", i) |
| 25 | + time.sleep(1) |
| 26 | + filename = 'fund_%s.xlsx' % code |
| 27 | + data.to_excel(filename, index=False) |
| 28 | + print("数据文件:", filename) |
| 29 | + |
| 30 | +def stock(code): |
| 31 | + url = "http://quotes.money.163.com/trade/lsjysj_{code}.html?year={year}&season={season}" |
| 32 | + |
| 33 | + data = pd.DataFrame() |
| 34 | + for year in range(2001, 2021): |
| 35 | + print('year ', year) |
| 36 | + for season in range(1, 4): |
| 37 | + html = getHtml(url.format(code=code, year=year, season=season)) |
| 38 | + page = dataStock(html) |
| 39 | + if page is not None: |
| 40 | + data = data.append(page, ignore_index=True) |
| 41 | + data.sort_values(by='日期') |
| 42 | + filename = 'stock_%s.xlsx' % code |
| 43 | + data.to_excel('stock_%s.xlsx' % code, index=False) |
| 44 | + print("数据文件:", filename) |
| 45 | + |
| 46 | +def getHtml(resLoc): |
| 47 | + while(True): |
| 48 | + rp = rq.get(resLoc) |
| 49 | + rp.encoding = 'utf-8' |
| 50 | + if rp.text.find("对不起!您所访问的页面出现错误") > -1: |
| 51 | + print("获取过于频繁,等待 5 秒再试") |
| 52 | + time.sleep(5) |
| 53 | + continue |
| 54 | + else: |
| 55 | + break |
| 56 | + return rp.text |
| 57 | + |
| 58 | +def dataFund(html): |
| 59 | + table = Bs(html, 'html.parser').table |
| 60 | + if table is None: |
| 61 | + print(html) |
| 62 | + return None |
| 63 | + rows = table.find_all('tr', recursive=True) |
| 64 | + data = [] |
| 65 | + columns = [th.text for th in rows[0].find_all('th')] |
| 66 | + for i in range(1, len(rows)): |
| 67 | + data.append(rows[i].text.split('\n')[1:-1]) |
| 68 | + if len(data) > 0: |
| 69 | + pdata = pd.DataFrame(np.array(data), columns=columns) |
| 70 | + return pdata |
| 71 | + else: |
| 72 | + return None |
| 73 | + |
| 74 | +def dataStock(html): |
| 75 | + table = Bs(html, 'html.parser').find('table', class_='table_bg001 border_box limit_sale') |
| 76 | + if table is None: |
| 77 | + print(html) |
| 78 | + return None |
| 79 | + rows = table.find_all('tr', recursive=True) |
| 80 | + data = [] |
| 81 | + columns = [th.text for th in rows[0].find_all('th')] |
| 82 | + for i in range(1, len(rows)): |
| 83 | + row = [td.text for td in rows[i].find_all('td')] |
| 84 | + data.append(row) |
| 85 | + |
| 86 | + if len(data) > 0: |
| 87 | + data.sort(key=lambda row: row[0]) |
| 88 | + pdata = pd.DataFrame(np.array(data), columns=columns) |
| 89 | + return pdata |
| 90 | + else: |
| 91 | + return None |
| 92 | + |
| 93 | +def dataFormat(code, type_='fund', cycleDays=5, begin='2001-01-01'): |
| 94 | + rawdf = pd.read_excel('%s_%s.xlsx' % (type_, code)) |
| 95 | + buydf = rawdf[rawdf.index % cycleDays==0] ## 选出定投时机 |
| 96 | + # 选择对应的列 |
| 97 | + if type_ == 'fund': |
| 98 | + buydf = buydf[['公布日期','单位净值']] |
| 99 | + else: |
| 100 | + buydf = buydf[['日期','收盘价']] |
| 101 | + buydf.columns = ["日期","单价"] |
| 102 | + |
| 103 | + buydf = buydf[buydf['日期']>=begin] |
| 104 | + return buydf |
| 105 | + |
| 106 | +def show(buydf, amount=1000): |
| 107 | + buydf.insert(2,'定投金额', np.array(len(buydf)*[amount])) # 增加定投列 |
| 108 | + buydf.insert(3,'数量', buydf['单价'].apply(lambda x: amount/x)) # 计算出价值 |
| 109 | + buydf.insert(4,'累计本金', buydf['定投金额'].cumsum()) # 计算定投累计 |
| 110 | + buydf.insert(5,'累计数量', buydf['数量'].cumsum()) # 计算价值累计 |
| 111 | + buydf.insert(6,'当前价值', buydf['累计数量']*buydf['单价']) # 计算实际单价 |
| 112 | + # 选取投资比较 |
| 113 | + data = pd.DataFrame(columns=['累计本金','当前价值'], |
| 114 | + index=buydf['日期'].to_list(), |
| 115 | + data={'累计本金': buydf['累计本金'].to_list(), |
| 116 | + '当前价值': buydf['当前价值'].to_list()}) |
| 117 | + |
| 118 | + # 净值趋势 |
| 119 | + tend = pd.DataFrame(columns=['单价'],index=buydf['日期'].to_list(),data={'单价':buydf['单价'].to_list()}) |
| 120 | + |
| 121 | + tend.plot.line(title="价格走势", linewidth=1, yticks=[]) |
| 122 | + plt.show() |
| 123 | + data.plot.line(title="定投效果", linewidth=1, yticks=[]) |
| 124 | + plt.show() |
| 125 | + |
| 126 | +if __name__ == "__main__": |
| 127 | + fund("150124") # 获取数据 |
| 128 | + show(dataFormat('150124', begin='2015-05-26')) # 效果展示 |
0 commit comments