投资交易 解决方案

python通过新浪财经与腾讯财经获取股票实时行情、日K线以及分钟K线数据

print('-------------------通过新浪财经获取股票实时行情--------------------')

import pandas as pd
import time
from urllib.request import urlopen  # python自带爬虫库
import urllib.request


pd.set_option('expand_frame_repr',False)


# # =====直接通过网址获取数据
# import sys
# import requests
# url = 'https://27.push2his.eastmoney.com/api/qt/stock/kline/get?secid=1.600000&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5%2Cf6&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58%2Cf59%2Cf60%2Cf61&klt=101&fqt=0&end=20500101&lmt=120'
# r = requests.get(url).json()['data']['klines']
# l = [i.split(',') for i in r]
# df = pd.DataFrame(l)
# df = df[[0, 1, 2, 3, 4, 5, 6]]
# df.columns = ['交易日期', '开盘价', '收盘价', '最高价', '最低价', '成交量', '成交额']
# print(df)
# sys.exit()
# # 由于新浪网站接口一月份更新后无法直接从网页抓取,可以通过东财网址进行查看,具体可以看目录里的神奇的网址图文攻略


def requestForNew(url, max_try_num=10, sleep_time=5):
    headers = {
        'Referer': 'http://finance.sina.com.cn',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36 Edg/97.0.1072.62'
    }
    request = urllib.request.Request(url, headers=headers)
    for i in range(max_try_num):
        response = urlopen(request)
        if response.code == 200:
            return response.read().decode('gbk')
        else:
            print("链接失败", response)
            time.sleep(sleep_time)

# =============================以上代码不需要修改==============================================


# =====构建网址
# 正常股票:sh600000 sz000002,退市股票:sh600002 sz000003、停牌股票:sz300124,除权股票:sh600276,上市新股:sz002952
stock_code_list = ['sh601636', 'sz000001', 'sz002738',''] #在后面添加,' '即可,注意一定要用英文符号
url = "https://hq.sinajs.cn/list=" + ",".join(stock_code_list)
print(url)
'''
var hq_str_sh601636="旗滨集团,9.850,9.850,9.650,9.930,9.620,9.650,9.660,18030331,175827770.000,16173,9.650,37800,9.640,53400,9.630,112800,9.620,74600,9.610,80600,9.660,55000,9.670,52100,9.680,50500,9.690,50300,9.700,2022-09-30,15:00:03,00,";
var hq_str_sz000001="平安银行,11.870,11.860,11.840,11.960,11.830,11.840,11.850,53723019,638906947.770,316361,11.840,1225400,11.830,622300,11.820,541600,11.810,879900,11.800,221516,11.850,216800,11.860,202900,11.870,229100,11.880,166900,11.890,2022-09-30,15:00:00,00";
var hq_str_sz002738="中矿资源,94.550,96.110,92.000,96.180,91.910,92.000,92.030,13971273,1305195342.930,51431,92.000,33400,91.990,13600,91.980,2700,91.970,6800,91.960,100,92.030,6600,92.040,19700,92.050,10000,92.060,500,92.070,2022-09-30,15:00:00,00";
'''



# =============================以下代码不需要修改==============================================

# =====抓取数据
# 需要电脑联网
content = requestForNew(url)
print(content)

print('---')
# =====将数据转换成DataFrame
data_line = content.strip().split('\n')  # strip可以去掉文本前后的空格、回车等。每行是一个股票的数据,split可以根据指定的字符进行拆分,\n是换行符

'''
['var hq_str_sh600352="浙江龙盛,9.010,9.040,9.010,9.100,9.000,9.010,9.020,6150808,55662677.000,94871,9.010,506300,9.000,72500,8.990,107500,8.980,9800,8.970,13000,9.020,78800,9.030,29200,9.040,20100,9.050,12218,9.060,2022-09-30,15:00:03,00,";', 'var hq_str_sz000001="平安银行,11.870,11.860,11.840,11.960,11.830,11.840,11.850,53723019,638906947.770,316361,11.840,1225400,11.830,622300,11.820,541600,11.810,879900,11.800,221516,11.850,216800,11.860,202900,11.870,229100,11.880,166900,11.890,2022-09-30,15:00:00,00";', 'var hq_str_sh600356="恒丰纸业,6.490,6.450,6.450,6.510,6.370,6.450,6.460,1119000,7209906.000,5100,6.450,1000,6.430,17400,6.420,21400,6.410,10300,6.400,21900,6.460,6100,6.470,1000,6.480,2900,6.490,14700,6.500,2022-09-30,15:00:01,00,";']

'''

data_line = [i.replace('var hq_str_', '').split(',') for i in data_line]    #遍历列表中的元素,移除字符串中的'var hq_str_'
print(data_line)
'''[['sh600352="浙江龙盛', '9.010', '9.040', '9.010', '9.100', '9.000', '9.010', '9.020', '6150808', '55662677.000', '94871', '9.010', '506300', '9.000', '72500', '8.990', '107500', '8.980', '9800', '8.970', '13000', '9.020', '78800', '9.030', '29200', '9.040', '20100', '9.050', '12218', '9.060', '2022-09-30', '15:00:03', '00', '";'], ['sz000001="平安银行', '11.870', '11.860', '11.840', '11.960', '11.830', '11.840', '11.850', '53723019', '638906947.770', '316361', '11.840', '1225400', '11.830', '622300', '11.820', '541600', '11.810', '879900', '11.800', '221516', '11.850', '216800', '11.860', '202900', '11.870', '229100', '11.880', '166900', '11.890', '2022-09-30', '15:00:00', '00";'], ['sh600356="恒丰纸业', '6.490', '6.450', '6.450', '6.510', '6.370', '6.450', '6.460', '1119000', '7209906.000', '5100', '6.450', '1000', '6.430', '17400', '6.420', '21400', '6.410', '10300', '6.400', '21900', '6.460', '6100', '6.470', '1000', '6.480', '2900', '6.490', '14700', '6.500', '2022-09-30', '15:00:01', '00', '";']]
'''
print('---')

df = pd.DataFrame(data_line, dtype='float') #转换成数据表,并且将能转换成数值类型的列转换成数值类型
#print(df)
'''
               0      1      2      3      4      5      6      7           8             9         10     11         12     13        14     15        16     17        18     19        20     21        22     23        24     25        26     27        28     29          30        31    32    33
0  sh600352="浙江龙盛   9.01   9.04   9.01   9.10   9.00   9.01   9.02   6150808.0  5.566268e+07   94871.0   9.01   506300.0   9.00   72500.0   8.99  107500.0   8.98    9800.0   8.97   13000.0   9.02   78800.0   9.03   29200.0   9.04   20100.0   9.05   12218.0   9.06  2022-09-30  15:00:03    00    ";
1  sz000001="平安银行  11.87  11.86  11.84  11.96  11.83  11.84  11.85  53723019.0  6.389069e+08  316361.0  11.84  1225400.0  11.83  622300.0  11.82  541600.0  11.81  879900.0  11.80  221516.0  11.85  216800.0  11.86  202900.0  11.87  229100.0  11.88  166900.0  11.89  2022-09-30  15:00:00  00";  None
2  sh600356="恒丰纸业   6.49   6.45   6.45   6.51   6.37   6.45   6.46   1119000.0  7.209906e+06    5100.0   6.45     1000.0   6.43   17400.0   6.42   21400.0   6.41   10300.0   6.40   21900.0   6.46    6100.0   6.47    1000.0   6.48    2900.0   6.49   14700.0   6.50  2022-09-30  15:00:01    00    ";'''



# =====对DataFrame进行整理
df[0] = df[0].str.split('="')   #第一列的股票代码与股票名称夹杂在了一起,根据=号分成2个元素(这2个元素还在第一列中)


df['stock_code'] = df[0].str[0].str.strip() #新建股票代码列,取第1列的第1个元素,再移除字符串元素前后的空格
df['stock_name'] = df[0].str[-1].str.strip()    #新建股票名称列,取第1列的最后一个元素,再strip字符串


df['candle_end_time'] = pd.to_datetime(df[30] + ' ' + df[31])
# 股票市场的K线,是普遍以当跟K线结束时间来命名的   #取源数据的日期列与时间列合成新的一列

df['candle_end_time']=pd.to_datetime(df['candle_end_time']) #将这一列转换成时间格式,再返回给自身



#对列标签进行重命名
rename_dict = {1: 'open', 2: 'pre_close', 3: 'close', 4: 'high', 5: 'low', 6: 'buy1', 7: 'sell1',
               8: 'amount', 9: 'volume', 32: 'status'}  # 自己去对比数据,会有新的发现    #status买一卖一
df.rename(columns=rename_dict, inplace=True)
df['status'] = df['status'].str.strip('";')
df = df[['stock_code', 'stock_name', 'candle_end_time', 'open', 'high', 'low', 'close', 'pre_close', 'amount', 'volume',
         'buy1', 'sell1', 'status']]    #amount是股数不是手数
print(df)

# =====保存数据
df.to_csv('股票实时行情.csv', index=False,encoding='gbk')

print(df)
# =====考察退市、停牌股票
# 根据特征去删除股票数据
# 通过amount来考察?
df = df[df['open'] - 0 > 0.00001]

# 如何区分退市和停牌?可能可以通过pre_close,还有status

# =====考察新上市的股票
# 考察sz002952
# 对于新上市的股票,pre_close指的是发行价

# =====考察除权股票
# 考察sh600276
# 对于今天除权的股票,pre_close不是昨天真正的收盘价,而是交易所计算出来并且公布的昨天的收盘价。
# 有了这个数据,才能算出这个股票真正的涨跌幅
print('-----------------通过腾讯财经获取近日股票日K线数据----------------')

from urllib.request import urlopen  # python自带爬虫库
import json  # python自带的json数据库
from random import randint  # python自带的随机数库
import pandas as pd
pd.set_option('expand_frame_repr', False)  # 当列太多时不换行
pd.set_option('display.max_rows', 5000)  # 最多显示数据的行数

pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)

# =====创建随机数的函数
def _random(n=16):  #生成16位随机整数(访问腾讯财经接口需要)
    """
    创建一个n位的随机整数
    :param n:
    :return:
    """
    start = 10**(n-1)
    end = (10**n)-1

    return str(randint(start, end))

# =====获取日、周、月的K线数据
# ===神奇的网址
# 获取K线数据:http://web.ifzq.gtimg.cn/appstock/app/fqkline/get?_var=kline_dayqfq&param=sz000001,day,,,50,qfq&r=0.5643184591626897
# 正常网址:http://stockhtm.finance.qq.com/sstock/ggcx/000001.shtml


# ===构建网址
# 参数
stock_code = 'sh000001'  # 正常股票sz000001,指数sh000001, ETF sh510500
k_type = 'day'  # day, week, month分别对用日线、周线、月线
num = 2  # #获取多少根K线,股票最多不能超过640,指数、etf等没有限制

# 构建url
url = 'http://web.ifzq.gtimg.cn/appstock/app/fqkline/get?_var=kline_%sqfq&param=%s,%s,,,%s,qfq&r=0.%s'  #字符串中4个%s,后面将会被指定的内容替换
url = url % (k_type, stock_code, k_type, num, _random())

print(url)
#http://web.ifzq.gtimg.cn/appstock/app/fqkline/get?_var=kline_dayqfq&param=sh000001,day,,,2,qfq&r=0.8473481183931692
print('---')

# ===获取数据
content = urlopen(url).read().decode()  # 使用python自带的库,从网络上获取信息

# ===将数据转换成dict格式
content = content.split('=', maxsplit=1)[-1]    #用=分隔成2份数据,maxsplit=1表示只对第一个=号进行分割,然后只取后面一份
print(type(content))        #<class 'str'>  #字符串类型
content = json.loads(content)  # 自己去仔细看下这里面有什么数据
print(type(content))    #<class 'dict'> 变成了字典
print(content)
'''
{'code': 0, 'msg': '', 'data': {'sh000001': {'day': [['2022-09-29', '3067.470', '3041.200', '3076.760', '3026.080', '230030416.000'], ['2022-09-30', '3042.170', '3024.390', '3054.610', '3021.930', '204115336.000']], 'qt': {'sh000001': ['1', '上证指数', '000001', '3024.39', '3041.20', '3042.17', '204115336', '102057668', '102057668', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '0.00', '0', '', '20220930155910', '-16.81', '-0.55', '3054.61', '3021.93', '3024.39/204115336/240262768409', '204115336', '24026277', '0.47', '11.85', '', '3054.61', '3021.93', '1.07', '377071.89', '480720.18', '0.00', '-1', '-1', '0.85', '0', '3036.39', '', '', '', '', '', '24026276.8409', '0.0000', '0', ' ', 'ZS', '-16.91', '-2.07', '', '', '', '3708.94', '2863.65', '-3.26', '-5.04', '-10.11', '4077465388940', '', '-6.09', '-15.88', '4077465388940', '', '', '-15.24', '-0.02', ''], 'market': ['2022-10-04 19:44:07|HK_close_重阳节休市|SH_close_国庆节休市|SZ_close_国庆节休市|US_close_未开盘|SQ_close_已休市|DS_close_已休市|ZS_close_已休市|NEWSH_close_国庆节休市|NEWSZ_close_国庆节休市|NEWHK_close_重阳节休市|NEWUS_close_未开盘|REPO_close_国庆节休市|UK_open_交易中|KCB_close_国庆节休市|IT_open_交易中|MY_close_已收盘|EU_open_交易中|AH_close_重阳节休市|DE_open_交易中|JW_close_国庆节休市|CYB_close_国庆节休市|USA_close_未开盘|USB_open_盘前交易|ZQ_close_国庆节休市'], 'zhishu': ['Rank_A_sh', 'Rank_A_sh', '578', '101', '1454', '2133', '146.735', '-0.892', '-0.604', '1000544233', '24004694', 'sh600322', 'sh688439']}, 'mx_price': {'mx': [], 'price': []}, 'prec': '3045.070', 'version': '16'}}}
'''
print('------')

# ===将数据转换成DataFrame格式
k_data = content['data'][stock_code]    #通过字典的键提取数据


'''如果是股票,返回的是qfq(前复权)day,如果是指数,只有day,下面的代码用于判断是股票还是指数'''
if k_type in k_data:
    k_data = k_data[k_type]
elif 'qfq' + k_type in k_data:  # qfq是前复权的缩写
    k_data = k_data['qfq' + k_type]
else:
    raise ValueError('已知的key在dict中均不存在,请检查数据')

print(k_data)   #[['2022-09-29', '3067.470', '3041.200', '3076.760', '3026.080', '230030416.000'], ['2022-09-30', '3042.170', '3024.390', '3054.610', '3021.930', '204115336.000']]

print('---')
df = pd.DataFrame(k_data)   #将获取的数据创建成数据表
print(df)
'''
            0         1         2         3         4              5
0  2022-09-29  3067.470  3041.200  3076.760  3026.080  230030416.000
1  2022-09-30  3042.170  3024.390  3054.610  3021.930  204115336.000'''

print('---')

# ===对数据进行整理    #重命名列标签
rename_dict = {0: 'candle_end_time', 1: 'open', 2: 'close', 3: 'high', 4: 'low', 5: 'amount', 6: 'info'}    #info是当日发生除权才显示,其他时候都是None
# 其中amount单位是手,说明数据不够精确

df.rename(columns=rename_dict, inplace=True)

df['candle_end_time'] = pd.to_datetime(df['candle_end_time'])
if 'info' not in df:
    df['info'] = None
df = df[['candle_end_time', 'open', 'high', 'low', 'close', 'amount', 'info']]
print(df)
'''

  candle_end_time      open      high       low     close         amount  info
0      2022-09-29  3067.470  3076.760  3026.080  3041.200  230030416.000  None
1      2022-09-30  3042.170  3054.610  3021.930  3024.390  204115336.000  None'''


# ===考察其他周期、指数、ETF

# ===考察特殊情况
# 正常股票:sz000001 sz000002,退市股票:sh600002 sz000003、停牌股票:sz300124,上市新股:sz002952,除权股票:sh600276

print('----------------通过腾讯财经获取最近分钟级别K线数据---------------')
from urllib.request import urlopen  # python自带爬虫库
import json  # python自带的json数据库
from random import randint  # python自带的随机数库
import pandas as pd
pd.set_option('expand_frame_repr', False)  # 当列太多时不换行
pd.set_option('display.max_rows', 5000)  # 最多显示数据的行数


# =====创建随机数的函数
def _random(n=16):
    """
    创建一个n位的随机整数
    :param n:
    :return:
    """
    start = 10**(n-1)
    end = (10**n)-1
    return str(randint(start, end))


# =====获取分钟级别的K线
# 获取K线数据:http://ifzq.gtimg.cn/appstock/app/kline/mkline?param=sz000001,m5,,640&_var=m5_today&r=0.6508601564534552
# 正常网址:http://stockhtm.finance.qq.com/sstock/ggcx/000001.shtml

# ===构建网址
# 参数
stock_code = 'sh601636'  # # 正常股票sz000001,指数sh000001, ETF sh510500
k_type = 60  # 1, 5, 15, 30, 60
num = 1000  # 最多不能超过320

# 构建url
url = 'http://ifzq.gtimg.cn/appstock/app/kline/mkline?param=%s,m%s,,%s&_var=m%s_today&r=0.%s'
url = url % (stock_code, k_type, num, k_type, _random())

# ===获取数据
content = urlopen(url=url, timeout=15).read().decode()  # 使用python自带的库,从网络上获取信息

# ===将数据转换成dict格式
content = content.split('=', maxsplit=1)[-1]
content = json.loads(content)

# ===将数据转换成DataFrame格式
k_data = content['data'][stock_code]['m'+str(k_type)]
df = pd.DataFrame(k_data)

# ===对数据进行整理
rename_dict = {0: 'candle_end_time', 1: 'open', 2: 'close', 3: 'high', 4: 'low', 5: 'amount'}
# 其中amount单位是手
df.rename(columns=rename_dict, inplace=True)
df['candle_end_time'] = df['candle_end_time'].apply(lambda x: '%s-%s-%s %s:%s' % (x[0:4], x[4:6], x[6:8], x[8:10], x[10:12]))
df['candle_end_time'] = pd.to_datetime(df['candle_end_time'])
df = df[['candle_end_time', 'open', 'high', 'low', 'close', 'amount']]
print(df)
'''
        candle_end_time   open   high    low  close     amount
0   2022-06-10 10:30:00  10.75  10.85  10.65  10.76  163157.61
1   2022-06-10 11:30:00  10.76  10.90  10.76  10.85   64518.90
2   2022-06-10 14:00:00  10.85  10.95  10.84  10.94   76461.54
...'''
# ===考察其他周期、指数、ETF

# ===考察特殊情况
# 正常股票:sz000001 sz000002,退市股票:sh600002 sz000003、停牌股票:sz300124,上市新股:sz002952,除权股票:sh600276,

发表回复