-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbillboard.py
253 lines (217 loc) · 10.1 KB
/
billboard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# -*- coding:utf-8 -*-
import time
import re
import pandas as pd
from pandas.compat import StringIO
import lxml.html
from lxml import etree
from gugu.base import Base, cf
from gugu.utility import Utility
class BillBoard(Base):
def topList(self, date = None, retry=3, pause=0.001):
"""
获取每日龙虎榜列表
Parameters
--------
date:string
明细数据日期 format:YYYY-MM-DD 如果为空,返回最近一个交易日的数据
retry : int, 默认 3
如遇网络等问题重复执行的次数
pause : int, 默认 0
重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
Return
------
DataFrame or List: [{'code':, 'name':, ...}, ...]
code:代码
name :名称
pchange:涨跌幅
amount:龙虎榜成交额(万)
buy:买入额(万)
bratio:占总成交比例
sell:卖出额(万)
sratio :占总成交比例
reason:上榜原因
unscramble: 解读
date :日期
"""
self._data = pd.DataFrame()
if date is None:
if Utility.getHour() < 18:
date = Utility.lastTradeDate()
else:
date = Utility.getToday()
else:
if not Utility.isTradeDay(date):
return None
for _ in range(retry):
time.sleep(pause)
try:
# http://data.eastmoney.com/DataCenter_V3/stock2016/TradeDetail/pagesize=200,page=1,sortRule=-1,sortType=,startDate=2019-01-10,endDate=2019-01-10,gpfw=0,js=vardata_tab_1.html
request = self._session.get( cf.LHB_URL % (date, date), timeout=10 )
request.encoding = 'gbk'
text = request.text.split('_1=')[1]
dataDict = Utility.str2Dict(text)
self._data = pd.DataFrame(dataDict['data'], columns=cf.LHB_TMP_COLS)
self._data.columns = cf.LHB_COLS
self._data['buy'] = self._data['buy'].astype(float)
self._data['sell'] = self._data['sell'].astype(float)
self._data['amount'] = self._data['amount'].astype(float)
self._data['Turnover'] = self._data['Turnover'].astype(float)
self._data['bratio'] = self._data['buy'] / self._data['Turnover']
self._data['sratio'] = self._data['sell'] / self._data['Turnover']
self._data['bratio'] = self._data['bratio'].map(cf.FORMAT)
self._data['sratio'] = self._data['sratio'].map(cf.FORMAT)
self._data['date'] = date
for col in ['amount', 'buy', 'sell']:
self._data[col] = self._data[col].astype(float)
self._data[col] = self._data[col] / 10000
self._data[col] = self._data[col].map(cf.FORMAT)
self._data = self._data.drop('Turnover', axis=1)
except:
pass
else:
return self._result()
raise IOError(cf.NETWORK_URL_ERROR_MSG)
def countTops(self, days=5, retry=3, pause=0.001):
"""
获取个股上榜统计数据
Parameters
--------
days:int
天数,统计n天以来上榜次数,默认为5天,其余是10、30、60
retry : int, 默认 3
如遇网络等问题重复执行的次数
pause : int, 默认 0
重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
Return
------
DataFrame or List: [{'code':, 'name':, ...}, ...]
code:代码
name:名称
count:上榜次数
bamount:累积购买额(万)
samount:累积卖出额(万)
net:净额(万)
bcount:买入席位数
scount:卖出席位数
"""
self._data = pd.DataFrame()
if Utility.checkLhbInput(days) is True:
self._writeHead()
# http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml?last=5&p=1
self._data = self.__parsePage(kind=cf.LHB_KINDS[0], last=days, column=cf.LHB_GGTJ_COLS, dataArr=pd.DataFrame(), pageNo=1, retry=retry, pause=pause)
self._data['code'] = self._data['code'].map(lambda x: str(x).zfill(6))
if self._data is not None:
self._data = self._data.drop_duplicates('code')
return self._result()
def brokerTops(self, days=5, retry=3, pause=0.001):
"""
获取营业部上榜统计数据
Parameters
--------
days:int
天数,统计n天以来上榜次数,默认为5天,其余是10、30、60
retry : int, 默认 3
如遇网络等问题重复执行的次数
pause : int, 默认 0
重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
Return
---------
DataFrame or List: [{'broker':, 'count':, ...}, ...]
broker:营业部名称
count:上榜次数
bamount:累积购买额(万)
bcount:买入席位数
samount:累积卖出额(万)
scount:卖出席位数
top3:买入前三股票
"""
self._data = pd.DataFrame()
if Utility.checkLhbInput(days) is True:
self._writeHead()
# http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/yytj/index.phtml?last=5&p=1
self._data = self.__parsePage(kind=cf.LHB_KINDS[1], last=days, column=cf.LHB_YYTJ_COLS, dataArr=pd.DataFrame(), pageNo=1, retry=retry, pause=pause)
return self._result()
def instTops(self, days=5, retry=3, pause=0.001):
"""
获取机构席位追踪统计数据
Parameters
--------
days:int
天数,统计n天以来上榜次数,默认为5天,其余是10、30、60
retry : int, 默认 3
如遇网络等问题重复执行的次数
pause : int, 默认 0
重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
Return
--------
DataFrame or List: [{'code':, 'name':, ...}, ...]
code:代码
name:名称
bamount:累积买入额(万)
bcount:买入次数
samount:累积卖出额(万)
scount:卖出次数
net:净额(万)
"""
self._data = pd.DataFrame()
if Utility.checkLhbInput(days) is True:
self._writeHead()
# http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgzz/index.phtml?last=5&p=1
self._data = self.__parsePage(kind=cf.LHB_KINDS[2], last=days, column=cf.LHB_JGZZ_COLS, dataArr=pd.DataFrame(), pageNo=1, retry=retry, pause=pause, drop_column=[2,3])
self._data['code'] = self._data['code'].map(lambda x: str(x).zfill(6))
return self._result()
def instDetail(self, retry=3, pause=0.001):
"""
获取最近一个交易日机构席位成交明细统计数据
Parameters
--------
retry : int, 默认 3
如遇网络等问题重复执行的次数
pause : int, 默认 0
重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
Return
----------
DataFrame or List: [{'code':, 'name':, ...}, ...]
code:股票代码
name:股票名称
date:交易日期
bamount:机构席位买入额(万)
samount:机构席位卖出额(万)
type:类型
"""
self._data = pd.DataFrame()
self._writeHead()
# http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgmx/index.phtml?last=&p=1
self._data = self.__parsePage(kind=cf.LHB_KINDS[3], last='', column=cf.LHB_JGMX_COLS, dataArr=pd.DataFrame(), pageNo=1, retry=retry, pause=pause)
if len(self._data) > 0:
self._data['code'] = self._data['code'].map(lambda x: str(x).zfill(6))
return self._result()
def __parsePage(self, kind, last, column, dataArr, pageNo=1, retry=3, pause=0.001, drop_column=None):
self._writeConsole()
for _ in range(retry):
time.sleep(pause)
try:
request = self._session.get( cf.LHB_SINA_URL % (kind, last, pageNo), timeout=10 )
request.encoding = 'gbk'
html = lxml.html.parse(StringIO(request.text))
res = html.xpath("//table[@id=\"dataTable\"]/tr")
if self._PY3:
sarr = [etree.tostring(node).decode('utf-8') for node in res]
else:
sarr = [etree.tostring(node) for node in res]
sarr = ''.join(sarr)
sarr = '<table>%s</table>'%sarr
df = pd.read_html(sarr)[0]
if drop_column is not None:
df = df.drop(drop_column, axis=1)
df.columns = column
dataArr = dataArr.append(df, ignore_index=True)
nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
if len(nextPage) > 0:
pageNo = re.findall(r'\d+', nextPage[0])[0]
return self.__parsePage(kind, last, column, dataArr, pageNo, retry, pause, drop_column)
else:
return dataArr
except Exception as e:
print(e)