请 [注册] 或 [登录]  | 返回主站

量化交易吧 /  源码分享 帖子:3119096 新帖:113

【共享函数 】外部数据获取及分享

外汇老老法师发表于:7 月 17 日 21:37回复(1)

行业类¶

1.申万二级及三级的行情及估值数据¶


#获取申万官网申万行业数据#导入库import numpy as npimport pandas as pdimport requestsimport jsonfrom datetime import timedelta,date# 获取申万官网申万行业数据# code:行业代码  https://www.joinquant.com/help/api/help?name=plateData#申万行业# frequency:day/week/month# start_date:None(表示最早日期)# end_date:None(表示今天日期)# fields:None(表示所有字段)def get_sw_data(code=None,start_date=None,end_date=None,frequency='day',fields=None): #headersheader={'HOST':'www.swsindex.com','Referer':'http://www.swsindex.com/idx0200.aspx?columnid=8838&type=Day','User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) \    Chrome/53.0.2785.104 Safari/537.36 Core/1.53.4482.400 QQBrowser/9.7.13001.400'}#传入参数param={'tablename':'V_Report','key':'id',#页面序号,每页返回20条数据'p':'1',#查询语句,查询的代码、日期、数据类型"where":"swindexcode in ('801020') and   BargainDate>='2018-04-02' and  BargainDate<='2018-04-24' and type='Day'",#排序(swindexcode asc表示按照代码升序,BargainDate_1表示按照日期降序,_2表示按照升序)'orderby':'swindexcode asc,BargainDate_2',#返回的字段'fieldlist':'SwIndexCode,SwIndexName,BargainDate,OpenIndex,CloseIndex,MaxIndex,MinIndex,BargainAmount,BargainSum,Markup,TurnoverRate,\    PE,PB,MeanPrice,BargainSumRate,NegotiablesShareSum,NegotiablesShareSum2,DP','pagecount':'1','timed':'1524497094532',}#数据表表头sw_columns_list=['SwIndexCode','SwIndexName','BargainDate','OpenIndex','CloseIndex','MaxIndex','MinIndex','BargainAmount','BargainSum', 'Markup','TurnoverRate','PE','PB','MeanPrice','BargainSumRate','NegotiablesShareSum','NegotiablesShareSum2','DP']#数据类型(日、周、月)frequency_list=['day','week','month']#配置查询语句where="swindexcode in ("if code is None:#如果代码为空,则代码为代码列表code='801010'else:    if type(code)==list:code_str=str(code).replace('[','').replace(']','')if type(code)==str:code_str="'"+code+"'"where+=code_str   
    #配置日期today_str=pd.datetime.today().strftime('%Y-%m-%d')if (start_date is None) or (start_date<'1999-12-30') or (start_date>today_str):start_date='1999-12-30'where+=") and BargainDate>='"     where+=start_dateif (end_date is None) or (end_date>today_str) or (end_date<'1999-12-30'):end_date=today_strwhere+="' and BargainDate<='" where+=end_date  
    #配置数据类型if not(frequency in frequency_list):  frequency='day'where+="' and type='"where+=frequencywhere+="'"param['where']=where 
    #配置字段columns=sw_columns_listfieldlist=str(sw_columns_list).replace(" ","").replace("'","").replace('[',"").replace(']',"")   if not(fields is None):if(set(fields).issubset(set(sw_columns_list))):  if not (['SwIndexCode','SwIndexName','BargainDate'] in fields):fields=['SwIndexCode','SwIndexName','BargainDate']+fieldsfieldlist=str(fields).replace(" ","").replace("'","").replace('[',"").replace(']',"") columns=fieldsparam['fieldlist']=fieldlistdf=pd.DataFrame()#urlurl='http://www.swsindex.com/handler.aspx'#页面计数器page=1while True:#获取数据ret=requests.get(url,data=param,headers=header)if not (ret.ok is True):break#整理引号、日期格式    data=ret.text.replace("'", '"').replace(' 0:00:00','').replace('/','-')#解析数据data=json.loads(data).get('root')if len(data)==0:break#追加数据表    df=df.append(pd.DataFrame(data,columns=columns))#设置页面计数器page+=1param['p']=str(page)    if len(df)!=0:   df.BargainDate=pd.to_datetime(df.BargainDate,format='%Y-%m-%d')#返回数据return df.set_index('BargainDate')df=get_sw_data('850111',start_date='2019-02-23')df.head()

.dataframe tbody tr th:only-of-type {        vertical-align: middle;    }    .dataframe tbody tr th {        vertical-align: top;    }    .dataframe thead th {        text-align: right;    }


SwIndexCodeSwIndexNameOpenIndexCloseIndexMaxIndexMinIndexBargainAmountBargainSumMarkupTurnoverRatePEPBMeanPriceBargainSumRateNegotiablesShareSumNegotiablesShareSum2DP
BargainDate
















2019-02-25850111种子生产2493.652603.852612.972469.67185441090294.573.604345.492.686.700.103496540.23437067.530.57
2019-02-26850111种子生产2601.412577.022643.982534.8320089115323-1.033.904745.022.656.650.113470405.45433800.680.58
2019-02-27850111种子生产2571.522547.742603.462530.191365178331-1.142.653344.512.626.580.093430769.77428846.220.59
2019-02-28850111种子生产2550.002559.182584.132523.738255503260.451.604444.712.646.620.083449990.37431248.800.58
2019-03-01850111种子生产2567.252570.262590.562519.639037532910.431.756444.912.656.640.083462555.34432819.420.58

股票类数据¶

1.获取新浪热门股票¶


import requestsimport anyjsonimport pandas as pddef get_hot_stock_from_sina():'''从新浪得到热门数据'''html = requests.get('https://ssl-data.sina.com.cn/api/openapi.php/WeiboReferService.getListSymbol?code=CNHOUR6&callback=var%20AHM=').content.decode()  n = html[html.index('(')+1:html.index(')')]h = anyjson.deserialize(n)data = pd.DataFrame(h['result']['data'])data.SYMBOL = data.SYMBOL.apply(normalize_code)return dataget_hot_stock_from_sina().head()

.dataframe tbody tr th:only-of-type {        vertical-align: middle;    }    .dataframe tbody tr th {        vertical-align: top;    }    .dataframe thead th {        text-align: right;    }


NAMEREFSYMBOL
0中兴通讯1638400000063.XSHE
1西安旅游1025241000610.XSHE
2国际实业1021921000159.XSHE
3士兰微1014388600460.XSHG
4连云港656515601008.XSHG

2.获取选股宝涨停原因¶


import urllibimport jsonimport pandas as pddef Xuangubao():url = "https://flash-api.xuangubao.cn/api/pool/detail?pool_name=limit_up"  #涨停#     url = 'https://flash-api.xuangubao.cn/api/pool/detail?pool_name=limit_up_broken'  #炸板header_dict = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'}#     req = urllib2.Request(url=url, headers=header_dict)#     df = pd.DataFrame(json.loads(urllib2.urlopen(req).read())['data'])req = urllib.request.Request(url,headers = header_dict)df = pd.DataFrame(json.loads(urllib.request.urlopen(req).read())['data'])df['stock_reason'] = df.surge_reason.apply(lambda x: x['stock_reason'])df['plate_name'] = df.surge_reason.apply(lambda x: x['related_plates'][0]['plate_name'])def get_plate_reason(x):try: return x['related_plates'][0][u'plate_reason']except:returndf['plate_reason'] = df.surge_reason.apply(get_plate_reason)df['limit_timeline'] = df.limit_timeline.apply(lambda x: datetime.datetime.fromtimestamp(x['items'][0]['timestamp']))df.index = df.surge_reason.apply(lambda x: normalize_code(x['symbol']))df.index.name=Nonereturn df.drop('surge_reason',axis=1)Xuangubao().head()

.dataframe tbody tr th:only-of-type {        vertical-align: middle;    }    .dataframe tbody tr th {        vertical-align: top;    }    .dataframe thead th {        text-align: right;    }


break_limit_down_timesbreak_limit_up_timesbuy_lock_volume_ratiochange_percentfirst_break_limit_downfirst_break_limit_upfirst_limit_downfirst_limit_upis_new_stockissue_pricelast_break_limit_downlast_break_limit_uplast_limit_downlast_limit_uplimit_down_dayslimit_timelinelimit_up_dayslisted_datem_days_n_boards_boardsm_days_n_boards_daysmtmnearly_new_acc_*nearly_new_break_daysnew_stock_acc_*new_stock_break_limit_upnew_stock_limit_up_daysnew_stock_limit_up_price_before_brokennon_restricted_capitalpricesell_lock_volume_ratiostock_chi_namesymboltotal_capitalturnover_ratiovolume_bias_ratioyesterday_break_limit_up_timesyesterday_first_limit_upyesterday_last_limit_upyesterday_limit_down_daysyesterday_limit_up_daysstock_reasonplate_nameplate_reason
600860.XSHG0230.0038610.1003860155373850401553736701False5.30015537560430155375604402019-03-28 09:31:412768153600220.0000.6132080002.753100e+098.550京城股份600860.SS3.608100e+090.1156172.87511021553649900155365235601公司拥有亚洲地区最具规模的、技术水平最先进的铝内胆碳纤维全缠绕复合气瓶的设计测试中心及生产线...燃料电池新能源车补贴转向充电及加氢设施
000638.XSHE000.0185420.1006390001553737362False8.38000155373736202019-03-28 09:42:421848937600000.000-0.1778040002.131766e+096.890万方发展000638.SZ2.131766e+090.0368360.65452000000主营木材销售,转型互联网医疗大平台,参股辽宁华盛信托等公司其他None
300507.XSHE010.0252240.1000950155373766501553736303False24.92015537376650155373771002019-03-28 09:25:0311461859200000.000-0.0694220001.359967e+0923.190苏奥传感300507.SZ2.837111e+090.0652842.2690870000018年年报拟10转8高送转None
002274.XSHE040.0033350.1001150155373672001553736726False10.01015537551970155375522102019-03-28 09:32:0611222272000000.000-0.0449550005.978181e+099.560华昌化工002274.SZ6.069737e+090.0786871.41326661553650494155365051200间接参股虹软科技科创板概念股第二批科创板受理名单出炉
000590.XSHE000.0647590.0997890001553736303False6.00000155373630302019-03-28 09:25:0368219808007140.0001.6083330003.493647e+0915.650启迪古汉000590.SZ3.747725e+090.0041831.68313101553649903155364990305间接控股股东启迪控股签署《合作框架协议》,本次合作后雄安集团和/或雄安新区管委会控股的基金与...雄安新区雄安新区由规划阶段转入建设实施阶段

3.东财股吧热门帖¶

import requestsfrom bs4 import BeautifulSoupdef get_post_data(html):soup = BeautifulSoup(html)post_list = soup.find_all("div", class_="articleh")post_list_convert = []for itm in post_list:single_post = []ll = itm.find_all("span")for idx in range(len(ll)):if idx == 2:# 帖子地址和标题if ll[idx].em:single_post.append(ll[idx].em.string)else:single_post.append("")single_post.append(ll[idx].a.string)single_post.append("http://guba.eastmoney.com" + ll[idx].a['href'])elif idx == 3:# 作者信息single_post.append(ll[idx].a.string)single_post.append(ll[idx].a['href'])else:# 其他信息single_post.append(ll[idx].string)post_list_convert.append(single_post)return post_list_convertdef get_html(urls):g_ret = []for itm in urls:g_ret.append(requests.get(itm))ret = [itm.content.decode("utf-8") for itm in g_ret]return retdef control(page_count=20, step = 50):url_list = []# 构造url列表for idx in range(1, page_count + 1):url_list.append("http://guba.eastmoney.com/list,cjpl,99_{}.html".format(idx))for idx in range(0, page_count, step):# 每次爬step页print("start get html {} -> {}".format(idx, idx + step))all_html_data = get_html(url_list[idx:idx + step])print("start convert data")all_post_data = get_post_data(all_html_data[0])for itm in all_html_data[1:]:for itm2 in get_post_data(itm):all_post_data.append(itm2)return pd.DataFrame(all_post_data)control(page_count=3).head()
start get html 0 -> 50
start convert data

.dataframe tbody tr th:only-of-type {        vertical-align: middle;    }    .dataframe tbody tr th {        vertical-align: top;    }    .dataframe thead th {        text-align: right;    }


012345678
0631541186None易纲:继续放宽对外资金融机构股比限制http://guba.eastmoney.com/news,cjpl,783238468....财经评论http://iguba.eastmoney.com/931301369386491609-1603-26 16:02
1277405626None李大霄:七大特征佐证2647大底 A股“大”牛市将http://guba.eastmoney.com/news,cjpl,783131642....财经评论http://iguba.eastmoney.com/931301369386491609-1403-20 18:25
217800439None百亿女富豪也踩雷:这家公司业绩变脸市值缩水15http://guba.eastmoney.com/news,cjpl,780243957....财经评论http://iguba.eastmoney.com/931301369386491608-2702-15 16:14
3303617
国家网络安全宣传周开幕  360安全大脑现出“真http://guba.eastmoney.com/news,cjpl,783552067....财经评论http://iguba.eastmoney.com/931301369386491609-1802-14 07:31
4294823
关于处理商誉减值问题的思考http://guba.eastmoney.com/news,cjpl,803866689....股友jyo67whttp://iguba.eastmoney.com/795206529023246802-0702-12 22:42

4.获取最新公布的机构调研数据¶

def get_jgdy_all(page_count=1,page_size=100):'''获取最近公布的机构调研数据,count为返回页数,一页为page_size条数据(不得超过5000条),默认返回100条,以发布时间排序'''url = 'http://data.eastmoney.com/DataCenter_V3/jgdy/xx.ashx?'l = []for i in range(1,page_count+1):param = {"pagesize":page_size,"page":i,'sortRule':-1,'sortType':0,'rt':51777724,}ret=requests.get(url,params=param)l.append(pd.DataFrame(json.loads(ret.text)['data']))return pd.concat(l,axis=0).set_index('SCode')df = get_jgdy_all(1)df.head()

.dataframe tbody tr th:only-of-type {        vertical-align: middle;    }    .dataframe tbody tr th {        vertical-align: top;    }    .dataframe thead th {        text-align: right;    }


ChangePercentCloseCompanyCodeCompanyNameDescriptionEndDateLicostaffMaincontentNoticeDateOrgCodeOrgNameOrgSumOrgtypeOrgtypeNamePersonnelPlaceSNameStartDate
SCode

















3005450.2729.1880319562
特定对象调研
董事会秘书 钟辉,证券事务代表 杨晓芬
2019-03-2880000073华泰证券
005001证券公司陈家辉深圳市联得自动化装备股份有限公司会议室联得装备2019-03-28
3005450.2729.1880319562
特定对象调研
董事会秘书 钟辉,证券事务代表 杨晓芬
2019-03-2810000082国金证券
005001证券公司韦俊龙深圳市联得自动化装备股份有限公司会议室联得装备2019-03-28
3005450.2729.1880319562
特定对象调研
董事会秘书 钟辉,证券事务代表 杨晓芬
2019-03-2810001005东北证券
005001证券公司王少男深圳市联得自动化装备股份有限公司会议室联得装备2019-03-28
000598-1.534.5210000935成都市兴蓉投资股份有限公司特定对象调研
赵璐,李峥,文雅
2019-03-2880560391中庚基金
004001基金管理公司胡坤公司会议室兴蓉环境2019-03-27
000598-1.534.5210000935成都市兴蓉投资股份有限公司特定对象调研
赵璐,李峥,文雅
2019-03-2810001081长江证券
005001证券公司徐科公司会议室兴蓉环境2019-03-27

5.获取个股机构调研数据¶

def get_jgdy_one(code):'''获取个股调研数据'''url = 'http://data.eastmoney.com/DataCenter_V3/jgdy/gsjsdy.ashx?'param = {"pagesize":100,"page":1,'sortRule':-1,'sortType':0,'code':code}ret=requests.get(url,params=param)return pd.DataFrame(json.loads(ret.text)['data'])get_jgdy_one('000568').head()

.dataframe tbody tr th:only-of-type {        vertical-align: middle;    }    .dataframe tbody tr th {        vertical-align: top;    }    .dataframe thead th {        text-align: right;    }


ChangePercentCloseCompanyCodeCompanyNameDescriptionEndDateLicostaffMaincontentNoticeDateOrgCodeOrgNameOrgSumOrgtypeOrgtypeNamePersonnelPlaceSCodeSNameStartDate
04.6364.3910000910泸州老窖股份有限公司特定对象调研
王川,赵亮
2019-03-22

6


公司七楼会议室000568泸州老窖2019-03-22
14.6364.3910000910泸州老窖股份有限公司特定对象调研
林锋,王川,赵亮
2019-03-18

7


公司八楼会议室000568泸州老窖2019-03-17
24.6364.3910000910泸州老窖股份有限公司特定对象调研
林锋,王洪波
2018-12-20

14


公司一楼会议室000568泸州老窖2018-12-19
34.6364.3910000910泸州老窖股份有限公司特定对象调研
王川,王钰
2018-11-29

12


公司二楼会议室000568泸州老窖2018-11-29
44.6364.3910000910泸州老窖股份有限公司特定对象调研
王川,赵亮
2018-11-07

12


公司三楼会议室000568泸州老窖2018-11-06

6.获取券商月报数据¶

#-*- coding:utf-8 -*-import urllibimport reimport pandas as pddef get_yb(mon):http_url = 'http://data.eastmoney.com/other/qsjy/yb.%s.html'%monheader_dict = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'}rs = urllib.request.Request(http_url,headers=header_dict)response = urllib.request.urlopen(rs)token = re.search("token=(.*?)&st", response.read().decode('gbk')).group(1)day = mon[0:4]+"-"+mon[4:6]url = "http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get?type=QSYJBG_MReport&token="+token+"&st=RQ&sr=-1&p=1&ps=1000&amp;js=(x)&filter=(RQ='"+day+"-01T00:00:00')&rt=51785853"rs = urllib.request.Request(url,headers=header_dict)response = urllib.request.urlopen(rs)return pd.read_json(response.read())get_yb('201901').head()

.dataframe tbody tr th:only-of-type {        vertical-align: middle;    }    .dataframe tbody tr th {        vertical-align: top;    }    .dataframe thead th {        text-align: right;    }


ENDATERQjlrjlrhbjlrtbjzcjzchbjzctbljjlrljjlrhbljjlrtbljyysrljyysrhbljyysrtbmgsdmmgsmcyysryysrhbyysrtb
02019-01-31T00:00:002019-01-01T00:00:005842.810000-0.36890212.18711.344589e+060.001659-0.005288015842.810000-0.7936912.187120355.500000-0.8605721.465342500山西证券20355.500000-0.0154741.46534
12019-01-31T00:00:002019-01-01T00:00:0015203.8700001.719180-0.2313932.738747e+060.0059720.0054284515203.870000-0.581059-0.23139356251.700000-0.851884-0.0641378783长江证券56251.7000003.269388-0.0641378
22019-01-31T00:00:002019-01-01T00:00:005222.8359451.788267-0.1985881.321474e+060.003800-0.01966755222.8359455.61216-0.19858816485.213626-0.869353-0.168739750国海证券16485.213626-0.222980-0.168739
32019-01-31T00:00:002019-01-01T00:00:0015907.5600003.1765574.965691.506301e+06-0.003985-0.04144815907.560000-0.02358194.9656935895.330000-0.8386241.70169686东北证券35895.330000-0.2828621.70169
42019-01-31T00:00:002019-01-01T00:00:0020874.600000-0.552881-4.635912e+060.006607-20874.600000--79549.510000--601066中信建投79549.510000-0.401797-

抓取港股新股数据统计打新收益¶


内容较多,请直接点击原文链接查


期货类¶

1. 生意社大宗商品报价数据¶

import pandas as pdimport requestsfrom bs4 import BeautifulSoupdef get_sys(symbol='',page=1):"""    查询生意社大宗商品报价数据。    symbol: 生意社网页上对品种的编号,打开对应品种直接可以从url上看到 如806代表*    page: 查询第几页"""txt = requests.get('http://www.100ppi.com/mprice/plist-'+symbol+'-'+str(page)+'.html').texttable = BeautifulSoup(txt).find_all("table", class_="lp-table mb15")df = pd.read_html(str(table),header=0)[0]df['price'] = df['报价'].apply(lambda x: re.findall(r"\d+\.?\d*",x)[0]).astype(int)return dfget_sys('806').head()

.dataframe tbody tr th:only-of-type {        vertical-align: middle;    }    .dataframe tbody tr th {        vertical-align: top;    }    .dataframe thead th {        text-align: right;    }


产地与品牌规格报价报价提供方发布时间price
0兰花清洁二*的质量分数:≥99.0%出厂价  3150元/吨山西兰花清洁能源有限责任公司2019-03-273150
1盛德源二*的质量分数:≥99.0%出厂价  3450元/吨德州盛德源化工有限公司2019-03-273450
2玉皇金宇二*的质量分数:≥99.0%出厂价  3360元/吨山东玉皇化工(集团)有限公司2019-03-273360
3冀春化工二*的质量分数:≥99.0%出厂价  3450元/吨河北冀春化工有限公司2019-03-273450
4河南义马二*的质量分数:≥99.0%出厂价  3310元/吨河南义马新源化工能源有限责任公司2019-03-273310

其他¶

1.获取国债收益率数据¶


import requestsimport jsonimport pandas as pdimport timefrom sqlalchemy import create_enginedef get_bnd_yield(year=10):ids = {10: '29227', 5: '29234', 1: '29231'}url = 'https://cn.investing.com/common/modules/js_instrument_chart/api/data.php?' + \'pair_id={}&pair_id_for_news={}'.format(ids[year], ids[year]) +\'&chart_type=area&pair_interval=month&candle_count=120&events=yes&volume_series=yes&period=5-years'headers = {}headers['X-Requested-With'] = 'XMLHttpRequest'headers['Host'] = 'cn.investing.com'headers['Referer'] = 'https://cn.investing.com/rates-bonds/china-{}-year-bond-yield'.format(year)headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'res = requests.get(url, headers=headers)res = json.loads(res.content.decode('utf-8').replace("'", "\""))data = pd.DataFrame(res['candles'])data = data.iloc[:, :2]data.columns = ['date', 'y'+str(year)]data['date'] = data['date'].map(lambda x: time.strftime("%Y-%m-%d", time.localtime(int(str(x)[:10]))))data.set_index('date', inplace=True)return datadef get_bnd_yields(years=[1, 5, 10]):bag = pd.DataFrame()for yr in years:bag = pd.concat([bag, get_bnd_yield(year=yr)], axis=1)#print(bag.head())return bagget_bnd_yields().head()

.dataframe tbody tr th:only-of-type {        vertical-align: middle;    }    .dataframe tbody tr th {        vertical-align: top;    }    .dataframe thead th {        text-align: right;    }


y1y5y10
date


2014-04-013.6504.1604.330
2014-05-013.3604.0104.160
2014-06-013.3703.8604.060
2014-07-013.7634.0314.298
2014-08-013.7993.9984.248
 

全部回复

0/140

量化课程

    移动端课程