1、导入所需的包¶

#导入需要的函数
from jqfactor import get_factor_values
import matplotlib.pyplot as plt
import time
import jqdata as jq
import numpy as np
import pandas as pd
import math
import statsmodels.api as sm
from sklearn.model_selection import train_test_split #这里是引用了交叉验证
from sklearn.linear_model import LinearRegression  #线性回归
import matplotlib.pyplot as plt
import datetime
from jqfactor import *
import warnings  
import pickle
warnings.filterwarnings('ignore') 
plt.style.use('seaborn-bright')

2、确认基本参数¶

2.1 起止时间，因子极值处理时的分位数，以及分组回测交易情况下(剔除一些开盘涨跌幅超过ct_pct的个股)--不追高不抄底

#首先取交易时间；
#设置统计起止日期、极端值的边界分位数、以及要求第二天开盘涨跌幅不能超过ct_pct
Begin_date='2018-01-01'
End_date='2019-07-11'
quantile=0.01
ct_pct=0.05
#获取统计期内交易日列表、用于计算因子数据
date_list = jq.get_trade_days(start_date=Begin_date,end_date=End_date)#获取回测日期间的所有交易日

2.2 取成分股--本文以全部A股为例，做初步筛选

#剔除当前时间点上市不足三个月的新股、st股以及停牌股
date='2019-07-05'
stock_pool=get_all_securities(types=[], date=date)
stock_pool=filter_stock(stock_pool,date,skip_paused=1)

2.3 为这些成分股构建行业哑变量（提前构建，为之后分组回测做准备）

hi_here_time=time.time()
stock_industry=get_industry(security=list(stock_pool.index), date=None)
#column增加了一列sw行业
stock_pool['industry']=[x[1]['sw_l1']['industry_name'] for x in list(stock_industry.items()) if x[1].setdefault('sw_l1',{'industry_name':np.nan})]
stock_pool=stock_pool.dropna() 
#28个行业名称
sw_lv1_28=stock_pool.drop_duplicates(subset=['industry'],keep='first')['industry'].values  
#构建行业哑变量矩阵
hy_dummy_matrix=pd.DataFrame(np.zeros([len(stock_pool),28]),index=stock_pool.index,columns=sw_lv1_28)
for i in sw_lv1_28:
    hy_dummy_matrix[i][stock_pool['industry']==i]=1

#合并形成新的stocklist
stocklist=pd.concat([stock_pool[['display_name','start_date']],hy_dummy_matrix],axis=1)

hello_there_time=time.time()
time_last=hello_there_time-hi_here_time
print("持续时间:",time_last,"s")

持续时间: 0.8006150722503662 s

3、因子构建和描述¶

这里构建一个日涨幅偏离因子：

构建方法:Day_bias= (t+1开盘价/t-1日收盘价)/(1+相应指数t日涨跌幅)

其中：指数涨跌幅，600开头的股票都用上证指数，其他都用创业板

因为A股是T+1的，因此用第二天的开盘价(而非当日收盘价）更能反映当日的涨跌幅，加入分母，用于表示一个相对指数的偏离，逻辑上来讲，相对跌的多了第二天应该涨，而相对涨的多了第二天应该跌。

3.1 提取所需数据

#需要的三个数据
dependencies = ['clp_pred','open_nextd','index_chg']

#提取起止日期内所有数据
security=list(stocklist.index)
data=get_price(security, Begin_date,End_date, fields=["close","open","pre_close","avg"])
temp_indexdata=get_price(['000001.XSHG','399006.XSHE'],Begin_date,End_date, fields=["pre_close","close"])
index_data=temp_indexdata["close"]/temp_indexdata["pre_close"]

3.2 构建因子矩阵：index为股票代码，columns为日期

#因子值矩阵，index为股票代码，columns为日期
Factor_values_matrix=pd.DataFrame(index=stocklist.index,columns=date_list[:-1])

stock_600=pd.DataFrame([True if i[0]=="6" else False for i in security],index=stocklist.index)
stock_else=pd.DataFrame([False if i[0]=="6" else True for i in security],index=stocklist.index)
index_chg_pd=pd.DataFrame(index=stocklist.index,columns=[0])
for i in date_list[:-1]:
    date=i
    next_date=ShiftTradingDay(date,1,all_trade_day)

    clp_pred=data.major_xs(date)["pre_close"]
    open_nextd=data.major_xs(next_date)["open"]
    #index_chg=pd.Series([index_data.loc[date][0] if i[0]=="6" else index_data.loc[date][1] for i in security],index=clp_pred.index)
    index_chg_pd[stock_600]=index_data.loc[date][0]
    index_chg_pd[stock_else]=index_data.loc[date][1]
    index_chg=index_chg_pd[0]
    #计算因子值：
    factor=Day_bias(clp_pred,open_nextd,index_chg,date)
    Factor_values_matrix[i]=factor.calc()

把因子值保存下来

#import pickle
#output = open('17-19_Day_bias.pkl', 'wb')
#pickle.dump(Factor_values_matrix, output)
#output.close()

#pkl_file = open('17-19_Day_bias.pkl', 'rb')
#Factor_values_matrix = pickle.load(pkl_file)
#pkl_file.close()

3.3 描述特征

#描述性统计
#画出Day_bias分布直方图
stock_num=len(stocklist.index)
date_num=len(date_list)
Allperiod_FV=sort([i[0] for i in Factor_values_matrix.values.reshape((date_num-1)*stock_num,1)])

plt.hist(Allperiod_FV[200:-200], bins=100,range=(0.9,1.2),density=True,color='r',rwidth=0.5,label='prob')
plt.show()
calc_feature(Allperiod_FV)

4. 因子解释(linear regression)和分组回测¶

4.1 提取流通市值

#提取流通市值
start=time.time()
Cir_mv_pd=pd.DataFrame(index=stocklist.index,columns=date_list)
for i in date_list:
    df = get_fundamentals(query(valuation.code, valuation.circulating_market_cap).filter(valuation.code.in_(stocklist.index)),i)
    Cir_mv_pd[i].loc[df.code]=list(df.circulating_market_cap)
Cir_mv_ifvalues=~Cir_mv_pd.isnull()
end=time.time()
print("持续时间:",end-start,"s")

持续时间: 113.22435355186462 s

4.2 回归部分正文

生成IC_IR表，每组每日选出的股票和权重

IC_IR=pd.DataFrame(index=["IC","IR","IC_mvnetural","IR_mvnetural"],columns=date_list)
tvalue=[]
#Stockandweight=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp1=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp2=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp3=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp4=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp5=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
#factor_netural=pd.DataFrame(index=stocklist.index,columns=date_list)
 
for i in date_list[0:-1]:
    
    ct_pct=0.05
    stocklist_canbuy=Canbuy_stock(stocklist,ct_pct)
    stocklist_ex_new_notrade=ex_new_notrade_stock(stocklist,date_list)

    #提取次日收益率（y值）
    next_date=ShiftTradingDay(i,1,all_trade_day)
    day_ret_t1=data['close'].loc[next_date]/data['close'].loc[i]-1

    #提取log市值
    logmv=pd.DataFrame(log(list(Cir_mv_pd[i])),index=stocklist.index,columns=['mv'])


    #首先要提取因子并处理
    factor=normalize(winsor_quantile(pd.DataFrame(Factor_values_matrix[i]),quantile=0.01))
    factor_ifvalue=~factor.isnull()

    #合成df
    Set=pd.concat([day_ret_t1,stocklist[sw_lv1_28[:-1]],factor,logmv],axis=1).dropna()
    
    #IC值
    IC_IR.loc["IC"][i]=Set[0].corr(Set[i],method='spearman')
    if i>date_list[19]:
        loc=np.where(date_list==i)[0][0]
        mean_IC=IC_IR.loc["IC"][date_list[loc-19:loc+1]].mean()
        std_IC=IC_IR.loc["IC"][date_list[loc-19:loc+1]].std()
        IC_IR.loc["IR"][i]=mean_IC/std_IC
        
    #首先市值中性
    y=Set[i]
    mvi=Set["mv"]
    slr=LinearRegression()
    mv_netural=slr.fit(np.array(mvi).reshape(-1,1),np.array(y))
    factor_mv_netural=np.array(y)-mv_netural.predict(np.array(mvi).reshape(-1,1))

    Set[i]=factor_mv_netural
    
    factor_netural=pd.DataFrame(index=stocklist.index,columns=[i])
    factor_netural[i].loc[Set.index]=factor_mv_netural
    factor_netural_ifvalue=~factor_netural.isnull()

    
    #市值中性IC值,IR值(取20天作为周期)
    IC_IR.loc["IC_mvnetural"][i]=Set[0].corr(Set[i],method='spearman')    
    if i>date_list[19]:
        loc=np.where(date_list==i)[0][0]
        mean_ICn=IC_IR.loc["IC_mvnetural"][date_list[loc-19:loc+1]].mean()
        std_ICn=IC_IR.loc["IC_mvnetural"][date_list[loc-19:loc+1]].std()
        IC_IR.loc["IR_mvnetural"][i]=mean_ICn/std_ICn
 
    #其次行业中性 --并采用流通市值加权回归
    logmv_exna=logmv.loc[y.index]
    y=Set.iloc[:,0]*logmv_exna["mv"]
    x_temp=sm.add_constant(Set.iloc[:,1:-1])
    x=pd.DataFrame(x_temp.values*logmv_exna.values,columns=x_temp.columns,index=x_temp.index)
    #x[i]=x[i]*logmv_exna["mv"]

    result = sm.OLS(y,x.astype(float)).fit()
    #print(result.summary())
    tvalue.append(result.tvalues[-1])    
    print("T统计量：",round(tvalue[-1],3),"日期：",i)
    
    #共分5组，输出股票名单和权重    
    hy_weight=pd.DataFrame(1/28*np.ones([1,28]),columns=sw_lv1_28)

    thatday=i

    totalgroupnum=5
    

    St_weight_gp1[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
                               hy_weight,factor_netural,logmv,1,totalgroupnum=5,st_weight_method=None,
                               mv_netural="NO",hy_netural="YES")
    St_weight_gp2[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
                               hy_weight,factor_netural,logmv,2,totalgroupnum=5,st_weight_method=None,
                               mv_netural="NO",hy_netural="YES")
    St_weight_gp3[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
                               hy_weight,factor_netural,logmv,3,totalgroupnum=5,st_weight_method=None,
                               mv_netural="NO",hy_netural="YES")
    St_weight_gp4[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
                               hy_weight,factor_netural,logmv,4,totalgroupnum=5,st_weight_method=None,
                               mv_netural="NO",hy_netural="YES")
    St_weight_gp5[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
                               hy_weight,factor_netural,logmv,5,totalgroupnum=5,st_weight_method=None,
                               mv_netural="NO",hy_netural="YES")

T统计量： 4.395 日期： 2018-01-02
T统计量： 2.352 日期： 2018-01-03
T统计量： 0.85 日期： 2018-01-04
T统计量： 10.077 日期： 2018-01-05
T统计量： -0.272 日期： 2018-01-08
T统计量： 6.352 日期： 2018-01-09
T统计量： 1.486 日期： 2018-01-10
T统计量： 1.405 日期： 2018-01-11
T统计量： 14.432 日期： 2018-01-12
T统计量： 1.035 日期： 2018-01-15

#保存一下这些信息
save_st_weight={"gp1":St_weight_gp1,"gp2":St_weight_gp2,"gp3":St_weight_gp3,"gp4":St_weight_gp4,
                "gp5":St_weight_gp5,"fac_exmv":factor_netural,"Tvalue":tvalue,"ICIR":IC_IR}
output = open('save_st_weight.pkl', 'wb')
pickle.dump(save_st_weight, output)
output.close()

pkl_file = open('save_st_weight.pkl', 'rb')
save_st_weight = pickle.load(pkl_file)
pkl_file.close()

St_weight_gp1=save_st_weight["gp1"]
St_weight_gp2=save_st_weight["gp2"]
St_weight_gp3=save_st_weight["gp3"]
St_weight_gp4=save_st_weight["gp4"]
St_weight_gp5=save_st_weight["gp5"]
factor_netural=save_st_weight["fac_exmv"]

4.3 画图展示IC、IR情况

fig = plt.figure(figsize=(8,4), dpi=500)
fig, ax= plt.subplots(figsize=(8,4))

ic_df = IC_IR.loc["IC"].T
ax.plot(date_list, ic_df, 'r')
ax.plot(date_list, ic_df.rolling(20).mean(), 'black')
ax.grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5)
#my_x_ticks=[date_list[int(i)] for i in np.linspace(0,len(date_list)-1,5)]
#plt.xticks(my_x_ticks)

#ax.set_xticks([date_list[i] for i in np.arange(0,len(date_list)-1,5)])
#ax.set_xticklabels([date_list[i] for i in np.arange(0,len(date_list)-1,5)], fontsize=8)
ax.set_xlabel('日期')
ax.set_ylabel('IC')

IC_IR.mean(axis=1)

IC              0.044466
IR              0.344876
IC_mvnetural    0.042246
IR_mvnetural    0.351147
dtype: float64

<Figure size 4000x2000 with 0 Axes>

4.4 各组净值

#手续费万1.5,税千1，这里的净值属于近似净值(默认无手续费）--详细净值写在其他函数中
margin=0.00015
tax=0.001
#需要注意：Stockandweight的values是T日选出的股票的权重，这些股票是！！T+1！！日买入并持有

groupindex=["nv_gp"+str(i) for i in range(1,totalgroupnum+1,1)]
Netv=pd.DataFrame(index=groupindex,columns=date_list)

for m,n in zip(groupindex,[St_weight_gp1,St_weight_gp2,St_weight_gp3,St_weight_gp4,St_weight_gp5]):
    Netv.loc[m]=group_netv(data,n,date_list,fee="NO",cal_method="fast").values
Netv=Netv.cumprod(axis=1)
    #color_list =pd.DataFrame([['#5698c6', '#ff9e4a', '#60b760', '#e05c5d', '#ae8ccd']],columns=groupindex)
#基准指数：000001.XSHG
Base=get_price("000001.XSHG",date_list[0],date_list[-2],fields="close")
    
fig = plt.figure(figsize=(14,8)) 
ax= fig.add_subplot(1,1,1)
Netv.T.plot(ax = ax)
ax.plot(Base/Base.iloc[0,0],'black',label='000001.SH')
ax.legend(loc=2) 
#x_label=[date_list[i] for i in np.arange(0,len(date_list)-1,8)]  
#plt.xticks(x_label, rotation='vertical')
plt.ylabel('净值',{"size":18})

Text(0, 0.5, '净值')

4.5 多空组合净值

y=Netv.loc["nv_gp5"]-Netv.loc["nv_gp1"]
y.plot(figsize=(12,8))
plt.ylabel('多空组合',{'size': 15})

Text(0, 0.5, '多空组合')

5. 评价表现¶

计算传统表现

#每组的总收益，年化收益，最大回撤，SR，
risk_matrix=getrisk(groupindex,Netv,date_list)
print('===============各组收益如下================')
risk_matrix

===============各组收益如下================

计算换手率

#计算换手率
factor_turnover_rate=pd.DataFrame(index=["gp1","gp5"],columns=date_list)
for group in ["gp1","gp5"]:
    #最大分位换手率
    if group=="gp1":
        for i in range(1,len(date_list)):            
            factor_turnover_rate.loc[group][date_list[i]]=sum(abs(St_weight_gp1[date_list[i]]-St_weight_gp1[date_list[i-1]]))/2
    #最小分位换手率
    else:
        for i in range(1,len(date_list)):
            factor_turnover_rate.loc[group][date_list[i]]=sum(abs(St_weight_gp5[date_list[i]]-St_weight_gp5[date_list[i-1]]))/2

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(1, 1, 1)
#     ax.xaxis.set_major_formatter(mdate.DateFormatter('%Y%m%d'))  # 设置时间标签显示格式
#     ax.xaxis.set_major_locator(mdate.DayLocator())
ax.set_title("因子换手率")
#x = np.arange(0, len(factor_dict))
x=date_list
ax.scatter(x, factor_turnover_rate.loc["gp1"], label='5分位换手率')
ax.scatter(x, factor_turnover_rate.loc["gp5"], label='1分位换手率')
plt.grid(axis='y')

工具函数¶

1.剔除股票

def filter_stock(stock_pool,date,N=30*3,skip_paused=1):
    
    #剔除上市不足三个月的股票
    start_date=stock_pool['start_date']
    stock_pool['IPO more than N days']=start_date+datetime.timedelta(days=N)<datetime.date(*map(int, date.split('-')))
    stock_pool=stock_pool[stock_pool['IPO more than N days']==True]

    #剔除st股票(含*st)
    hi_st=[x[:] for x in stock_pool['display_name'] if not ((x[0:2]=='ST') |(x[0:2]=='S*')|(x[0:2]=='*S')|(x[0:1]=='S'))]
    stock_pool=stock_pool[stock_pool['display_name'].isin(hi_st)]

    #保留交易的股票
    if skip_paused==1:
        trade_status=get_price(list(stock_pool.index), end_date=date,frequency='daily', fields='paused', skip_paused=False,count=1)    
        #ts_transpose=trade_status['paused'].T  两种方法均可
        #ts_transpose[ts_transpose.values==0]
        hi_trade=trade_status['paused'][trade_status['paused']==0]
        hi_trade=hi_trade.dropna(1)
        stock_pool=stock_pool.loc[list(hi_trade.columns)]
    elif skip_paused==0:
        stock_pool=stock_pool
    else:
        print('skip_paused的值为0或1,请正确输入')
    
    return stock_pool

def ex_new_notrade_stock(stocklist,date_list,N=30*3,skip_paused=1):
    
    stocklist_ex_new=pd.DataFrame(index=stocklist.index,columns=date_list)
    #剔除上市不足三个月的股票
    start_date=stocklist['start_date']
    for i in date_list:
        stocklist_ex_new[i]=start_date+datetime.timedelta(days=N)<i

    #保留交易的股票
    stocklist_trade=pd.DataFrame(index=stocklist.index,columns=date_list)
    
    if skip_paused==1:
        trade_status=get_price(list(stock_pool.index),date_list[0],date_list[-1],frequency='daily', fields='paused')    
        trade_status=trade_status["paused"].fillna(1).T
        stocklist_trade=trade_status==0
    elif skip_paused==0:
        stocklist_trade=stocklist_trade.fillna(True)
    else:
        print('skip_paused的值为0或1,请正确输入')
    
    
    return stocklist_trade&stocklist_ex_new

2.日期偏移

all_trade_day=get_price("000001.XSHG","2000-01-01","2019-07-13", fields="close").index
def ShiftTradingDay(date,shift,all_trade_day):
    # 获取所有的交易日，返回一个包含所有交易日的 list,元素值为 datetime.date 类型.
    try:
        # 得到date之后shift天那一天在列表中的行标号 返回一个数
        if type(date)!=datetime.date:
            date = datetime.date(*map(int,date.split('-')))
        shiftday_index = [time.mktime(i.timetuple()) for i in all_trade_day].index(time.mktime(date.timetuple()))+shift
        # 根据行号返回该日日期 为datetime.date类型
        temp=all_trade_day[shiftday_index]
        format_shiftdate=datetime.date(temp.year,temp.month,temp.day)
        return format_shiftdate 
    except:
        return print('错误：请输入交易日，格式为2019-07-06')

3.因子构成

class Day_bias:
    
    factor_name="Day_bias" 
    # 返回一个pd.Series数据
    # 设置依赖的数据
    dependencies = ['pct','open_nextd','index_chg']
    def __init__(self,clp_pred,open_nextd,index_chg,date):
        self.clp_pred = clp_pred
        self.open_nextd = open_nextd
        self.index_pct = index_chg
        self.date = date
    def calc(self):
        factor_value=(open_nextd/clp_pred)/(index_chg)
        return factor_value

4.计算特征

def feature(data):
    data=data[~np.isnan(data)]
    n = len(data)
    niu = 0.0
    niu2 = 0.0
    niu3 = 0.0
    for a in data:
        niu += a
        niu2 += a**2
        niu3 += a**3
    niu/= n   #这是求E(X)
    niu2 /= n #这是E(X^2)
    niu3 /= n #这是E(X^3)
    sigma = math.sqrt(niu2 - niu*niu) #这是D（X）的开方，标准差
    return [round(niu,3),round(sigma,3),niu3] #返回[E（X）,标准差，E（X^3）]

def calc_feature(data):
    data=data[~np.isnan(data)]
    median = data[len(data)//2] if len(data)%2==1 else round(0.5*(data[len(data)//2-1]+data[len(data)//2]),3)   
    [niu,sigma,niu3] = feature(data)
    n = len(data)
    niu4 = 0.0
    for a in data:
        a -= niu
        niu4 += a ** 4
    niu4 /= n   
    skew = round((niu3 - 3*niu*sigma**2 - niu**3)/(sigma**3),3)
    kurt = round(niu4/(sigma**2),3)
    return pd.DataFrame([[niu,median,sigma,skew,kurt]],columns=['均值','中位数','标准差','偏度','峰度']) #返回了均值，标准差，偏度，峰度，中位数

去极值和标准化

def winsor_quantile(factor,quantile=0.01):
    factor_winsored=factor[(factor[factor.columns]<=factor[i].quantile(1-quantile)) & (factor[factor.columns]>=factor[i].quantile(quantile))]
    #暂时不dropna()
    return factor_winsored

#标准化：Z_SCORE,减均值除标准差；
def normalize(factor):
    avg_df=factor.mean()
    std_df=factor.std()
    factor_normed=(factor-avg_df)/std_df
    return factor_normed

第二天开盘涨跌幅不超过ct_pct的个股

def Canbuy_stock(stocklist,ct_pct):
    trade_price=get_price(list(stocklist.index), Begin_date,End_date,fields=['open','high_limit','low_limit'])
    open_price_df=trade_price['open'].T
    high_limit_df=trade_price['high_limit'].T/1.1*(1+ct_pct)
    low_limit_df=trade_price['low_limit'].T/0.9*(1-ct_pct)
    price_ifhigher_df=(open_price_df<high_limit_df)
    price_iflower_df=(open_price_df>low_limit_df)
    
    stocklist_canbuy=price_iflower_df & price_ifhigher_df
    
    return stocklist_canbuy

7.生成各组成分和权重

#要实现功能，生成一组Stockandweight-即为每天选出的股票（第二日要持仓的）,type:df,index为股票，columns为日期,values为权重;返回一个pd.Series
def grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
              hy_weight,factor_netural,logmv,groupid,totalgroupnum=5,st_weight_method=None,
              mv_netural="NO",hy_netural="YES"):
    "st_weight_method可以选择logmv"
    
    Stockandweight=pd.DataFrame(np.zeros([stocklist_canbuy.shape[0],len(date_list)]),
                            columns=date_list,index=stocklist_canbuy.index)
    #市值中性后的因子
    nd=ShiftTradingDay(thatday,1,all_trade_day)

    if mv_netural=="YES":
        """
        type factor,factor_netural:df,index=股票代码,columns=当日
        type stocklist:df,index=股票代码，columns=股票名称,上市日期,sw28个行业
        """
        #剔除不能交易、没有因子值以及开盘涨跌幅太大的个股（不买入）
        con=stocklist_canbuy[nd] &stocklist_ex_new_notrade[thatday] & (~factor_netural.isnull()[thatday])
        #仅保留有效候选
        st=stocklist[con]  
        fac=factor_netural[con]
    #用原因子        
    else:
        con=stocklist_canbuy[nd] &stocklist_ex_new_notrade[thatday] & (~factor.isnull()[thatday])
        st=stocklist[con]  
        fac=factor_netural[con]
        
    #确定是否有分组,根据groupid确定分位数
    if groupid!=None:
        #各组分位数
        quant=np.linspace(0,1,totalgroupnum+1)
        upperlim=quant[-groupid]
        lowerlim=quant[-groupid-1]
    else:
        upperlim=1
        lowerlim=0   

    #行业中性        
    if hy_netural=="YES":
        #求该组因子上下界，返回fac_edge(type:df),返回每个行业取出的股票名称及权重                        
        fac_byhy=fac.values*st[sw_lv1_28]
        fac_byhy[fac_byhy==0]=NaN
        fac_edge=pd.DataFrame(columns=sw_lv1_28,index=[upperlim,lowerlim])

        for i in sw_lv1_28:
            fac_edge[i].loc[upperlim]=up=fac_byhy[i].quantile(upperlim)
            fac_edge[i].loc[lowerlim]=low=fac_byhy[i].quantile(lowerlim)

            hyi_st=fac_byhy[i][(fac_byhy[i]<=up) & (fac_byhy[i]>low)].index
            #是否要按log流通市值分配股票权重：目的是对冲时候敞口更小一点（这里也可以用总市值，IR等方法，看个人喜好）
            if st_weight_method=='logmv':
                temp=(logmv.loc[hyi_st])/(logmv.loc[hyi_st].sum())
                temp.columns=["stweight"]
                temp["stweight"]=temp["stweight"]*hy_weight[i][0]
                hyi_stweight=temp
            else:
                temp=np.array([hy_weight[i]*1/len(hyi_st) for x in range(len(hyi_st))])
                hyi_stweight=pd.DataFrame(temp,index=hyi_st,columns=["stweight"])

            Stockandweight[thatday].loc[hyi_st]=hyi_stweight["stweight"]
    #不考虑行业中性
    else:
        up=fac[thatday].quantile(upperlim)
        low=fac[thatday].quantile(lowerlim)
        hyi_st=fac[thatday][(fac[thatday]<=up)& (fac[thatday]>low)].index
        #是否要按log流通市值分配股票权重：目的是对冲时候敞口更小一点（这里也可以用总市值，IR等方法，看个人喜好）
        if st_weight_method=='logmv':
            temp=(logmv.loc[hyi_st])/(logmv.loc[hyi_st].sum())
            temp.columns=["stweight"]
            hyi_stweight=temp
        else:
            temp=np.array([1/len(hyi_st) for x in range(len(hyi_st))])
            hyi_stweight=pd.DataFrame(temp,index=hyi_st,columns=["stweight"])

        Stockandweight[thatday].loc[hyi_st]=hyi_stweight["stweight"]

    return Stockandweight[thatday]

8.分组净值

def group_netv(data,St_weight_gp5,date_list,fee="NO",cal_method="fast"):
    """simple:以T+1涨跌幅作为收益--仅在特定因子的情况下接近实际
       avg:以T+1日均价作为买入成本，T+2日均价作为平仓价格，不太贴近实际---因为每一天既要开仓又要平仓，很难把握价格
       fast:以T+1日开盘价作为买入成本，T+2日开盘价作为平仓价格，更接近实际，但缺点就是容量小    
       我们这里选择fast作为默认选项
    """
    
    net_v=pd.DataFrame(np.ones([1,len(date_list)]),index=["net_value"],columns=date_list)
    if fee=="NO":
        fee=0
    else:
        fee=tax+2*margin
    
    
    if cal_method=="simple":
        oprice_anchor=data["pre_close"].T
        cprice_anchor=data["close"].T        
        for i in date_list[0:-3]:
            next1_i=date_list[list(date_list).index(i)+1]
            ret_array=(1-fee)*cprice_anchor[next1_i]/oprice_anchor[next1_i]-1                
            net_v[next1_i]=1+(St_weight_gp5[i]*ret_array).sum()

    elif cal_method=="avg":
        oprice_anchor=cprice_anchor=data["avg"].T
        for i in date_list[0:-3]:
            next1_i=date_list[list(date_list).index(i)+1]
            next2_i=date_list[list(date_list).index(i)+2]
            ret_array=(1-fee)*cprice_anchor[next2_i]/oprice_anchor[next1_i]-1                
            net_v[next1_i]=1+(St_weight_gp5[i]*ret_array).sum()

    elif cal_method=="fast":
        oprice_anchor=cprice_anchor=data["open"].T        
        for i in date_list[0:-3]:
            next1_i=date_list[list(date_list).index(i)+1]
            next2_i=date_list[list(date_list).index(i)+2]
            ret_array=(1-fee)*cprice_anchor[next2_i]/oprice_anchor[next1_i]-1                
            net_v[next1_i]=1+(St_weight_gp5[i]*ret_array).sum()
    return net_v

计算回撤等指标

def getrisk(groupindex,Netv,date_list):
    risk_matrix=pd.DataFrame(columns=["total_return","Yearly_ret","MaxDraw","SharpR"],index=groupindex)

    total_return=Netv[date_list[-2]]/Netv[date_list[0]]-1

    Yearly_ret=(total_return+1)**(250/(len(date_list)-2))-1

    MaxDraw=pd.DataFrame(index=groupindex,columns=["MD"])
    Tocal=Netv.T
    for i in groupindex:
        gp=Tocal[i]
        MaxDraw_value=0
        for j in range(len(gp)):                  
            high=max(gp[:j+1])  #此前高点        
            high_loc=list(gp).index(high)  #高点的位置
            low=min(gp[high_loc:j+1])   #高点之后的低点
            Draw=(high-low)/high 
            MaxDraw_value=max(MaxDraw_value,Draw)
        MaxDraw.loc[i]=MaxDraw_value

    SharpR=(Yearly_ret)/((Netv.diff(axis=1).std(axis=1))*(250**0.5))

    risk_matrix["total_return"]=["{:.2%}".format(total_return[i]) for i in range(len(groupindex))]
    risk_matrix["Yearly_ret"]=["{:.2%}".format(Yearly_ret[i]) for i in range(len(groupindex))]
    risk_matrix["MaxDraw"]=["{:.2%}".format(MaxDraw["MD"][i]) for i in range(len(groupindex))]
    risk_matrix["SharpR"]=SharpR
    return risk_matrix

	total_return	Yearly_ret	MaxDraw	SharpR
nv_gp1	-57.33%	-43.93%	57.79%	-2.85612
nv_gp2	-18.86%	-13.24%	39.50%	-0.655763
nv_gp3	4.57%	3.09%	32.06%	0.133261
nv_gp4	26.08%	17.05%	26.96%	0.645323
nv_gp5	42.14%	26.99%	24.27%	0.91201

一个简单的单因子测试，附上一个修正的反转因子

1、导入所需的包¶

2、确认基本参数¶

3、因子构建和描述¶

4. 因子解释(linear regression)和分组回测¶

5. 评价表现¶

工具函数¶

审核消息

该文章已通过审核

全部回复

0/140

热门文章最新文章

热门标签

更多人气分析师

财经资讯

行情数据