print('-------------------通过新浪财经获取股票实时行情--------------------')
import pandas as pd
import time
from urllib.request import urlopen # python自带爬虫库
import urllib.request
pd.set_option('expand_frame_repr',False)
# # =====直接通过网址获取数据
# import sys
# import requests
# url = 'https://27.push2his.eastmoney.com/api/qt/stock/kline/get?secid=1.600000&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5%2Cf6&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58%2Cf59%2Cf60%2Cf61&klt=101&fqt=0&end=20500101&lmt=120'
# r = requests.get(url).json()['data']['klines']
# l = [i.split(',') for i in r]
# df = pd.DataFrame(l)
# df = df[[0, 1, 2, 3, 4, 5, 6]]
# df.columns = ['交易日期', '开盘价', '收盘价', '最高价', '最低价', '成交量', '成交额']
# print(df)
# sys.exit()
# # 由于新浪网站接口一月份更新后无法直接从网页抓取,可以通过东财网址进行查看,具体可以看目录里的神奇的网址图文攻略
def requestForNew(url, max_try_num=10, sleep_time=5):
headers = {
'Referer': 'http://finance.sina.com.cn',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36 Edg/97.0.1072.62'
}
request = urllib.request.Request(url, headers=headers)
for i in range(max_try_num):
response = urlopen(request)
if response.code == 200:
return response.read().decode('gbk')
else:
print("链接失败", response)
time.sleep(sleep_time)
# =============================以上代码不需要修改==============================================
# =====构建网址
# 正常股票:sh600000 sz000002,退市股票:sh600002 sz000003、停牌股票:sz300124,除权股票:sh600276,上市新股:sz002952
stock_code_list = ['sh601636', 'sz000001', 'sz002738',''] #在后面添加,' '即可,注意一定要用英文符号
url = "https://hq.sinajs.cn/list=" + ",".join(stock_code_list)
print(url)
'''
var hq_str_sh601636="旗滨集团,9.850,9.850,9.650,9.930,9.620,9.650,9.660,18030331,175827770.000,16173,9.650,37800,9.640,53400,9.630,112800,9.620,74600,9.610,80600,9.660,55000,9.670,52100,9.680,50500,9.690,50300,9.700,2022-09-30,15:00:03,00,";
var hq_str_sz000001="平安银行,11.870,11.860,11.840,11.960,11.830,11.840,11.850,53723019,638906947.770,316361,11.840,1225400,11.830,622300,11.820,541600,11.810,879900,11.800,221516,11.850,216800,11.860,202900,11.870,229100,11.880,166900,11.890,2022-09-30,15:00:00,00";
var hq_str_sz002738="中矿资源,94.550,96.110,92.000,96.180,91.910,92.000,92.030,13971273,1305195342.930,51431,92.000,33400,91.990,13600,91.980,2700,91.970,6800,91.960,100,92.030,6600,92.040,19700,92.050,10000,92.060,500,92.070,2022-09-30,15:00:00,00";
'''
# =============================以下代码不需要修改==============================================
# =====抓取数据
# 需要电脑联网
content = requestForNew(url)
print(content)
print('---')
# =====将数据转换成DataFrame
data_line = content.strip().split('\n') # strip可以去掉文本前后的空格、回车等。每行是一个股票的数据,split可以根据指定的字符进行拆分,\n是换行符
'''
['var hq_str_sh600352="浙江龙盛,9.010,9.040,9.010,9.100,9.000,9.010,9.020,6150808,55662677.000,94871,9.010,506300,9.000,72500,8.990,107500,8.980,9800,8.970,13000,9.020,78800,9.030,29200,9.040,20100,9.050,12218,9.060,2022-09-30,15:00:03,00,";', 'var hq_str_sz000001="平安银行,11.870,11.860,11.840,11.960,11.830,11.840,11.850,53723019,638906947.770,316361,11.840,1225400,11.830,622300,11.820,541600,11.810,879900,11.800,221516,11.850,216800,11.860,202900,11.870,229100,11.880,166900,11.890,2022-09-30,15:00:00,00";', 'var hq_str_sh600356="恒丰纸业,6.490,6.450,6.450,6.510,6.370,6.450,6.460,1119000,7209906.000,5100,6.450,1000,6.430,17400,6.420,21400,6.410,10300,6.400,21900,6.460,6100,6.470,1000,6.480,2900,6.490,14700,6.500,2022-09-30,15:00:01,00,";']
'''
data_line = [i.replace('var hq_str_', '').split(',') for i in data_line] #遍历列表中的元素,移除字符串中的'var hq_str_'
print(data_line)
'''[['sh600352="浙江龙盛', '9.010', '9.040', '9.010', '9.100', '9.000', '9.010', '9.020', '6150808', '55662677.000', '94871', '9.010', '506300', '9.000', '72500', '8.990', '107500', '8.980', '9800', '8.970', '13000', '9.020', '78800', '9.030', '29200', '9.040', '20100', '9.050', '12218', '9.060', '2022-09-30', '15:00:03', '00', '";'], ['sz000001="平安银行', '11.870', '11.860', '11.840', '11.960', '11.830', '11.840', '11.850', '53723019', '638906947.770', '316361', '11.840', '1225400', '11.830', '622300', '11.820', '541600', '11.810', '879900', '11.800', '221516', '11.850', '216800', '11.860', '202900', '11.870', '229100', '11.880', '166900', '11.890', '2022-09-30', '15:00:00', '00";'], ['sh600356="恒丰纸业', '6.490', '6.450', '6.450', '6.510', '6.370', '6.450', '6.460', '1119000', '7209906.000', '5100', '6.450', '1000', '6.430', '17400', '6.420', '21400', '6.410', '10300', '6.400', '21900', '6.460', '6100', '6.470', '1000', '6.480', '2900', '6.490', '14700', '6.500', '2022-09-30', '15:00:01', '00', '";']]
'''
print('---')
df = pd.DataFrame(data_line, dtype='float') #转换成数据表,并且将能转换成数值类型的列转换成数值类型
#print(df)
'''
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
0 sh600352="浙江龙盛 9.01 9.04 9.01 9.10 9.00 9.01 9.02 6150808.0 5.566268e+07 94871.0 9.01 506300.0 9.00 72500.0 8.99 107500.0 8.98 9800.0 8.97 13000.0 9.02 78800.0 9.03 29200.0 9.04 20100.0 9.05 12218.0 9.06 2022-09-30 15:00:03 00 ";
1 sz000001="平安银行 11.87 11.86 11.84 11.96 11.83 11.84 11.85 53723019.0 6.389069e+08 316361.0 11.84 1225400.0 11.83 622300.0 11.82 541600.0 11.81 879900.0 11.80 221516.0 11.85 216800.0 11.86 202900.0 11.87 229100.0 11.88 166900.0 11.89 2022-09-30 15:00:00 00"; None
2 sh600356="恒丰纸业 6.49 6.45 6.45 6.51 6.37 6.45 6.46 1119000.0 7.209906e+06 5100.0 6.45 1000.0 6.43 17400.0 6.42 21400.0 6.41 10300.0 6.40 21900.0 6.46 6100.0 6.47 1000.0 6.48 2900.0 6.49 14700.0 6.50 2022-09-30 15:00:01 00 ";'''
# =====对DataFrame进行整理
df[0] = df[0].str.split('="') #第一列的股票代码与股票名称夹杂在了一起,根据=号分成2个元素(这2个元素还在第一列中)
df['stock_code'] = df[0].str[0].str.strip() #新建股票代码列,取第1列的第1个元素,再移除字符串元素前后的空格
df['stock_name'] = df[0].str[-1].str.strip() #新建股票名称列,取第1列的最后一个元素,再strip字符串
df['candle_end_time'] = pd.to_datetime(df[30] + ' ' + df[31])
# 股票市场的K线,是普遍以当跟K线结束时间来命名的 #取源数据的日期列与时间列合成新的一列
df['candle_end_time']=pd.to_datetime(df['candle_end_time']) #将这一列转换成时间格式,再返回给自身
#对列标签进行重命名
rename_dict = {1: 'open', 2: 'pre_close', 3: 'close', 4: 'high', 5: 'low', 6: 'buy1', 7: 'sell1',
8: 'amount', 9: 'volume', 32: 'status'} # 自己去对比数据,会有新的发现 #status买一卖一
df.rename(columns=rename_dict, inplace=True)
df['status'] = df['status'].str.strip('";')
df = df[['stock_code', 'stock_name', 'candle_end_time', 'open', 'high', 'low', 'close', 'pre_close', 'amount', 'volume',
'buy1', 'sell1', 'status']] #amount是股数不是手数
print(df)
# =====保存数据
df.to_csv('股票实时行情.csv', index=False,encoding='gbk')
print(df)
# =====考察退市、停牌股票
# 根据特征去删除股票数据
# 通过amount来考察?
df = df[df['open'] - 0 > 0.00001]
# 如何区分退市和停牌?可能可以通过pre_close,还有status
# =====考察新上市的股票
# 考察sz002952
# 对于新上市的股票,pre_close指的是发行价
# =====考察除权股票
# 考察sh600276
# 对于今天除权的股票,pre_close不是昨天真正的收盘价,而是交易所计算出来并且公布的昨天的收盘价。
# 有了这个数据,才能算出这个股票真正的涨跌幅
print('-----------------通过腾讯财经获取近日股票日K线数据----------------')
from urllib.request import urlopen # python自带爬虫库
import json # python自带的json数据库
from random import randint # python自带的随机数库
import pandas as pd
pd.set_option('expand_frame_repr', False) # 当列太多时不换行
pd.set_option('display.max_rows', 5000) # 最多显示数据的行数
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)
# =====创建随机数的函数
def _random(n=16): #生成16位随机整数(访问腾讯财经接口需要)
"""
创建一个n位的随机整数
:param n:
:return:
"""
start = 10**(n-1)
end = (10**n)-1
return str(randint(start, end))
# =====获取日、周、月的K线数据
# ===神奇的网址
# 获取K线数据:http://web.ifzq.gtimg.cn/appstock/app/fqkline/get?_var=kline_dayqfq¶m=sz000001,day,,,50,qfq&r=0.5643184591626897
# 正常网址:http://stockhtm.finance.qq.com/sstock/ggcx/000001.shtml
# ===构建网址
# 参数
stock_code = 'sh000001' # 正常股票sz000001,指数sh000001, ETF sh510500
k_type = 'day' # day, week, month分别对用日线、周线、月线
num = 2 # #获取多少根K线,股票最多不能超过640,指数、etf等没有限制
# 构建url
url = 'http://web.ifzq.gtimg.cn/appstock/app/fqkline/get?_var=kline_%sqfq¶m=%s,%s,,,%s,qfq&r=0.%s' #字符串中4个%s,后面将会被指定的内容替换
url = url % (k_type, stock_code, k_type, num, _random())
print(url)
#http://web.ifzq.gtimg.cn/appstock/app/fqkline/get?_var=kline_dayqfq¶m=sh000001,day,,,2,qfq&r=0.8473481183931692
print('---')
# ===获取数据
content = urlopen(url).read().decode() # 使用python自带的库,从网络上获取信息
# ===将数据转换成dict格式
content = content.split('=', maxsplit=1)[-1] #用=分隔成2份数据,maxsplit=1表示只对第一个=号进行分割,然后只取后面一份
print(type(content)) #<class 'str'> #字符串类型
content = json.loads(content) # 自己去仔细看下这里面有什么数据
print(type(content)) #<class 'dict'> 变成了字典
print(content)
'''
{'code': 0, 'msg': '', 'data': {'sh000001': {'day': [['2022-09-29', '3067.470', '3041.200', '3076.760', '3026.080', '230030416.000'], ['2022-09-30', '3042.170', '3024.390', '3054.610', '3021.930', '204115336.000']], 'qt': {'sh000001': ['1', '上证指数', '000001', '3024.39', '3041.20', '3042.17', '204115336', '102057668', '102057668', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '', '20220930155910', '-16.81', '-0.55', '3054.61', '3021.93', '3024.39/204115336/240262768409', '204115336', '24026277', '0.47', '11.85', '', '3054.61', '3021.93', '1.07', '377071.89', '480720.18', '0.00', '-1', '-1', '0.85', '0', '3036.39', '', '', '', '', '', '24026276.8409', '0.0000', '0', ' ', 'ZS', '-16.91', '-2.07', '', '', '', '3708.94', '2863.65', '-3.26', '-5.04', '-10.11', '4077465388940', '', '-6.09', '-15.88', '4077465388940', '', '', '-15.24', '-0.02', ''], 'market': ['2022-10-04 19:44:07|HK_close_重阳节休市|SH_close_国庆节休市|SZ_close_国庆节休市|US_close_未开盘|SQ_close_已休市|DS_close_已休市|ZS_close_已休市|NEWSH_close_国庆节休市|NEWSZ_close_国庆节休市|NEWHK_close_重阳节休市|NEWUS_close_未开盘|REPO_close_国庆节休市|UK_open_交易中|KCB_close_国庆节休市|IT_open_交易中|MY_close_已收盘|EU_open_交易中|AH_close_重阳节休市|DE_open_交易中|JW_close_国庆节休市|CYB_close_国庆节休市|USA_close_未开盘|USB_open_盘前交易|ZQ_close_国庆节休市'], 'zhishu': ['Rank_A_sh', 'Rank_A_sh', '578', '101', '1454', '2133', '146.735', '-0.892', '-0.604', '1000544233', '24004694', 'sh600322', 'sh688439']}, 'mx_price': {'mx': [], 'price': []}, 'prec': '3045.070', 'version': '16'}}}
'''
print('------')
# ===将数据转换成DataFrame格式
k_data = content['data'][stock_code] #通过字典的键提取数据
'''如果是股票,返回的是qfq(前复权)day,如果是指数,只有day,下面的代码用于判断是股票还是指数'''
if k_type in k_data:
k_data = k_data[k_type]
elif 'qfq' + k_type in k_data: # qfq是前复权的缩写
k_data = k_data['qfq' + k_type]
else:
raise ValueError('已知的key在dict中均不存在,请检查数据')
print(k_data) #[['2022-09-29', '3067.470', '3041.200', '3076.760', '3026.080', '230030416.000'], ['2022-09-30', '3042.170', '3024.390', '3054.610', '3021.930', '204115336.000']]
print('---')
df = pd.DataFrame(k_data) #将获取的数据创建成数据表
print(df)
'''
0 1 2 3 4 5
0 2022-09-29 3067.470 3041.200 3076.760 3026.080 230030416.000
1 2022-09-30 3042.170 3024.390 3054.610 3021.930 204115336.000'''
print('---')
# ===对数据进行整理 #重命名列标签
rename_dict = {0: 'candle_end_time', 1: 'open', 2: 'close', 3: 'high', 4: 'low', 5: 'amount', 6: 'info'} #info是当日发生除权才显示,其他时候都是None
# 其中amount单位是手,说明数据不够精确
df.rename(columns=rename_dict, inplace=True)
df['candle_end_time'] = pd.to_datetime(df['candle_end_time'])
if 'info' not in df:
df['info'] = None
df = df[['candle_end_time', 'open', 'high', 'low', 'close', 'amount', 'info']]
print(df)
'''
candle_end_time open high low close amount info
0 2022-09-29 3067.470 3076.760 3026.080 3041.200 230030416.000 None
1 2022-09-30 3042.170 3054.610 3021.930 3024.390 204115336.000 None'''
# ===考察其他周期、指数、ETF
# ===考察特殊情况
# 正常股票:sz000001 sz000002,退市股票:sh600002 sz000003、停牌股票:sz300124,上市新股:sz002952,除权股票:sh600276
print('----------------通过腾讯财经获取最近分钟级别K线数据---------------')
from urllib.request import urlopen # python自带爬虫库
import json # python自带的json数据库
from random import randint # python自带的随机数库
import pandas as pd
pd.set_option('expand_frame_repr', False) # 当列太多时不换行
pd.set_option('display.max_rows', 5000) # 最多显示数据的行数
# =====创建随机数的函数
def _random(n=16):
"""
创建一个n位的随机整数
:param n:
:return:
"""
start = 10**(n-1)
end = (10**n)-1
return str(randint(start, end))
# =====获取分钟级别的K线
# 获取K线数据:http://ifzq.gtimg.cn/appstock/app/kline/mkline?param=sz000001,m5,,640&_var=m5_today&r=0.6508601564534552
# 正常网址:http://stockhtm.finance.qq.com/sstock/ggcx/000001.shtml
# ===构建网址
# 参数
stock_code = 'sh601636' # # 正常股票sz000001,指数sh000001, ETF sh510500
k_type = 60 # 1, 5, 15, 30, 60
num = 1000 # 最多不能超过320
# 构建url
url = 'http://ifzq.gtimg.cn/appstock/app/kline/mkline?param=%s,m%s,,%s&_var=m%s_today&r=0.%s'
url = url % (stock_code, k_type, num, k_type, _random())
# ===获取数据
content = urlopen(url=url, timeout=15).read().decode() # 使用python自带的库,从网络上获取信息
# ===将数据转换成dict格式
content = content.split('=', maxsplit=1)[-1]
content = json.loads(content)
# ===将数据转换成DataFrame格式
k_data = content['data'][stock_code]['m'+str(k_type)]
df = pd.DataFrame(k_data)
# ===对数据进行整理
rename_dict = {0: 'candle_end_time', 1: 'open', 2: 'close', 3: 'high', 4: 'low', 5: 'amount'}
# 其中amount单位是手
df.rename(columns=rename_dict, inplace=True)
df['candle_end_time'] = df['candle_end_time'].apply(lambda x: '%s-%s-%s %s:%s' % (x[0:4], x[4:6], x[6:8], x[8:10], x[10:12]))
df['candle_end_time'] = pd.to_datetime(df['candle_end_time'])
df = df[['candle_end_time', 'open', 'high', 'low', 'close', 'amount']]
print(df)
'''
candle_end_time open high low close amount
0 2022-06-10 10:30:00 10.75 10.85 10.65 10.76 163157.61
1 2022-06-10 11:30:00 10.76 10.90 10.76 10.85 64518.90
2 2022-06-10 14:00:00 10.85 10.95 10.84 10.94 76461.54
...'''
# ===考察其他周期、指数、ETF
# ===考察特殊情况
# 正常股票:sz000001 sz000002,退市股票:sh600002 sz000003、停牌股票:sz300124,上市新股:sz002952,除权股票:sh600276,