繁簡切換您正在訪問的是FX168財經網,本網站所提供的內容及信息均遵守中華人民共和國香港特別行政區當地法律法規。

FX168财经网>人物频道>帖子

多因子回测框架(下)--检验因子

作者/adadsfd 2019-08-06 11:34 0 来源: FX168财经网人物频道

从IC,IC_IR,分组收益,超额收益几个角度统计因子有效性。

多因子回测框架(上)--生成因子

# 盘古开天地-load数据
import datetime
import jqdata
import datetime
from multiprocessing.dummy import Pool as ThreadPool
from jqfactor import Factor,calc_factors
import pandas as pd
import statsmodels.api as sm
import scipy.stats as st
import pickle
pkl_file = open('Z1Package.pkl', 'rb')
load_Package = pickle.load(pkl_file)
univ_dict,return_df,all_return_df,all_factor_dict,MC_df,all_industry_df=load_Package
/opt/conda/envs/python3new/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools
# 1.回测基础数据计算
def all_Group_Return_calculator(factor,univ_dict,all_return_df,GroupNum=10):
    all_date_list=list(all_return_df.index)
    date_list=list(univ_dict.keys())
    all_Group_Ret_df=pd.DataFrame(index=all_date_list,columns=list(np.array(range(GroupNum))))
    for n in range(len(date_list)-1):
        start=date_list[n]
        end=date_list[n+1]
        univ=univ_dict[start]
        univ=set(univ)&set(factor.loc[start].dropna().index)
        factor_se_stock=list(factor.loc[start,univ].dropna().sort_values().index)
        N=len(factor_se_stock)
        for i in range(GroupNum):
            group_stock=factor_se_stock[int(N/GroupNum*i):int(N/GroupNum*(i+1))]
            #我他妈就是个天才
            cumret=(all_return_df.loc[start:end,group_stock]+1).cumprod().mean(axis=1)
            all_Group_Ret_df.loc[start:end,i]=cumret.shift(1).fillna(1).pct_change().shift(-1)
            #(((all_return_df.loc[start:end,group_stock]+1).cumprod()-1).mean(axis=1)+1).pct_change().shift(-1)
    all_Group_Ret_df=all_Group_Ret_df[date_list[0]:].shift(1).fillna(0)
    return all_Group_Ret_df

def Group_Return_calculator(factor,univ_dict,return_df,GroupNum=10):
    GroupRet_df=pd.DataFrame(index=list(list(univ_dict.keys())),columns=list(np.array(range(GroupNum))))
    for date in list(univ_dict.keys()):    #这个也是个循环
        univ=univ_dict[date]
        univ=list(set(univ)&set(factor.loc[date].dropna().index)&set(return_df.loc[date].dropna().index))
        factor_se_stock=list(factor.loc[date,univ].sort_values().index)
        N=len(factor_se_stock)
        for i in range(GroupNum):
            group_stock=factor_se_stock[int(N/GroupNum*i):int(N/GroupNum*(i+1))]
            GroupRet_df.loc[date,i]=return_df.loc[date,group_stock].mean()
    return GroupRet_df.shift(1).fillna(0)

def ic_calculator(factor,return_df,univ_dict):
    ic_list=[]
    p_value_list=[]
    for date in list(univ_dict.keys()):   #这里是循环
        univ=univ_dict[date]
        univ=list(set(univ)&set(factor.loc[date].dropna().index)&set(return_df.loc[date].dropna().index))
        if len(univ)<10:
            continue
        factor_se=factor.loc[date,univ]
        return_se=return_df.loc[date,univ]
        ic,p_value=st.spearmanr(factor_se,return_se)
        ic_list.append(ic)
        p_value_list.append(p_value)
    return ic_list

def get_index_return(univ_dict,index,count=250):
    trade_date_list=list(univ_dict.keys())
    date=max(trade_date_list)
    price=get_price(index,end_date=date,count=count,fields=['close'])['close']
    price_return=price.loc[trade_date_list[0]:].pct_change().fillna(0)
    price_return_by_tradeday=price.loc[trade_date_list].pct_change().fillna(0)
    return price_return,price_return_by_tradeday

def effect_test(univ_dict,key,group_return,group_excess_return):

    daylength=(list(univ_dict.keys())[-1]-list(univ_dict.keys())[0]).days
    annual_return=np.power(cumprod(group_return+1).iloc[-1,:],365/daylength)
    index_annual_return=np.power((index_return+1).cumprod().iloc[-1],365/daylength)

    # Test One: 组合序列与组合收益的相关性,相关性大于0.5
    sequence=pd.Series(np.array(range(10)))
    test_one_corr=annual_return.corr(sequence)
    test_one_passgrade=0.5
    test_one_pass=abs(test_one_corr)>test_one_passgrade
    
    if test_one_corr<0:
        wingroup,losegroup=0,9
    else:
        wingroup,losegroup=9,0
        
    # Test Two: 赢家组合明显跑赢市场,输家组合明显跑输市场,程度大于5%     
    test_two_passgrade=0.05
    test_two_win_pass=annual_return[wingroup]-index_annual_return>test_two_passgrade
    test_two_lose_pass=index_annual_return-annual_return[losegroup]>test_two_passgrade
    test_two_pass=test_two_win_pass&test_two_lose_pass

    # Test Tree: 高收益组合跑赢基准的概率,低收益组合跑赢基准的概率,概率大小0.5
    test_three_grade=0.5
    test_three_win_pass=(group_excess_return[wingroup]>0).sum()/len(group_excess_return[wingroup])>0.5
    test_three_lose_pass=(group_excess_return[losegroup]<0).sum()/len(group_excess_return[losegroup])>0.5
    test_three_pass=test_three_win_pass&test_three_lose_pass

    return [test_one_pass,test_two_win_pass,test_two_lose_pass,test_three_win_pass,test_three_lose_pass]

#2. 计算绩效
def plot_nav(all_return_df,index_return,key):
# Preallocate figures
    fig = plt.figure(figsize=(12,12))
    fig.set_facecolor('white')
    fig.set_tight_layout(True)
    ax1 = fig.add_subplot(211)
    ax2 = fig.add_subplot(212)
    ax1.grid()
    ax2.grid()
    ax1.set_ylabel(u"净值", fontsize=16)
    ax2.set_ylabel(u"对冲净值", fontsize=16)
    ax1.set_title(u"因子选股 - 净值走势",fontsize=16)
    ax2.set_title(u"因子选股 - 对冲指数后净值走势", fontsize=16)
# preallocate data    
    date=list(all_return_df.index)
    sequence=all_return_df.columns
# plot nav
    for sq in sequence:
        nav=(1+all_return_df[sq]).cumprod()
        nav_excess=(1+all_return_df[sq]-index_return).cumprod()
        ax1.plot(date,nav,label=str(sq))
        ax2.plot(date,nav_excess,label=str(sq))
    ax1.legend(loc=0,fontsize=12)
    ax2.legend(loc=0,fontsize=12)
    
def polish(x):
    return '%.2f%%' % (x*100)

def result_stats(key,all_return_df,index_return):  

    # Preallocate result DataFrame
    sequences=all_return_df.columns

    cols = [(u'风险指标', u'Alpha'), (u'风险指标', u'Beta'), (u'风险指标', u'信息比率'), (u'风险指标', u'夏普比率'),
            (u'纯多头', u'年化收益'), (u'纯多头', u'最大回撤'), (u'纯多头', u'收益波动率'), 
            (u'对冲后', u'年化收益'), (u'对冲后', u'最大回撤'), (u'对冲后', u'收益波动率')]
    columns = pd.MultiIndex.from_tuples(cols)
    result_df = pd.DataFrame(index = sequences,columns=columns)
    result_df.index.name = "%s" % (key)

    for sq in sequences:  #循环在这里开始

        # 净值
        return_data=all_return_df[sq]
        return_data_excess=return_data-index_return
        nav=(1+return_data).cumprod()
        nav_excess=(1+return_data_excess).cumprod()
        nav_index=(1+index_return).cumprod()

        # Beta
        beta=return_data.corr(index_return)*return_data.std()/index_return.std()
        beta_excess=return_data_excess.corr(index_return)*return_data_excess.std()/index_return.std()

        #年化收益
        daylength=(return_data.index[-1]-return_data.index[0]).days
        yearly_return=np.power(nav.iloc[-1],1.0*365/daylength)-1
        yearly_return_excess=np.power(nav_excess.iloc[-1],1.0*365/daylength)-1
        yearly_index_return=np.power(nav_index.iloc[-1],1.0*365/daylength)-1

        # 最大回撤 其实这个完全看不懂
        max_drawdown=max([1-v/max(1,max(nav.iloc[:i+1])) for i,v in enumerate(nav)])
        max_drawdown_excess=max([1-v/max(1,max(nav_excess.iloc[:i+1])) for i,v in enumerate(nav_excess)])

        # 波动率
        vol=return_data.std()*sqrt(252)
        vol_excess=return_data_excess.std()*sqrt(252)

        # Alpha
        rf=0.04
        alpha=yearly_return-(rf+beta*(yearly_return-yearly_index_return))
        alpha_excess=yearly_return_excess-(rf+beta_excess*(yearly_return-yearly_index_return))

        # 信息比率
        ir=(yearly_return-yearly_index_return)/(return_data_excess.std()*sqrt(252))

        # 夏普比率
        sharpe=(yearly_return-rf)/vol

        # 美化打印

        alpha,yearly_return,max_drawdown,vol,yearly_return_excess,max_drawdown_excess,vol_excess=\
        map(polish,[alpha,yearly_return,max_drawdown,vol,yearly_return_excess,max_drawdown_excess,vol_excess])
        sharpe=round(sharpe,2)
        ir=round(ir,2)
        beta=round(ir,2)

        result_df.loc[sq]=[alpha,beta,ir,sharpe,yearly_return,max_drawdown,vol,yearly_return_excess,max_drawdown_excess,vol_excess]
    return result_df

def draw_excess_return(excess_return):
    excess_return_mean=excess_return[1:].mean()
    excess_return_mean.index = map(lambda x:int(x)+1,excess_return_mean.index)
    excess_plus=excess_return_mean[excess_return_mean>0]
    excess_minus=excess_return_mean[excess_return_mean<0]

    fig = plt.figure(figsize=(12, 6))
    fig.set_facecolor('white')
    ax1 = fig.add_subplot(111)
    ax1.bar(excess_plus.index, excess_plus.values, align='center', color='r', width=0.35)
    ax1.bar(excess_minus.index, excess_minus.values, align='center', color='g', width=0.35)
    ax1.set_xlim(left=0.5, right=len(excess_return_mean)+0.5)
    ax1.set_ylabel(u'超额收益', fontsize=16)
    ax1.set_xlabel(u'十分位分组', fontsize=16)
    ax1.set_xticks(excess_return_mean.index)
    ax1.set_xticklabels([int(x) for x in ax1.get_xticks()], fontsize=14)
    ax1.set_yticklabels([str(x*100)+'0%' for x in ax1.get_yticks()], fontsize=14)
    ax1.set_title(u"因子选股分组超额收益", fontsize=16)
    ax1.grid()
print('计算IC_IR......')
ic_list_dict={}
for key,factor in all_factor_dict.items():
    ic_list=ic_calculator(factor,return_df,univ_dict)
    ic_list_dict[key]=ic_list
# 整理结果
ic_df=pd.DataFrame(ic_list_dict,index=list(univ_dict.keys())[:-1])
ic_ir_se=ic_df.mean()/ic_df.std()

print('计算分组收益......')
GroupNum=10
all_Factor_Group_Return_dict={}
Factor_Group_Return_dict={}
for key,factor in all_factor_dict.items():
# 全return    
    all_GroupRet_df=all_Group_Return_calculator(factor,univ_dict,all_return_df,GroupNum)
    all_Factor_Group_Return_dict[key]=all_GroupRet_df
# 调仓期return    
    GroupRet_df=Group_Return_calculator(factor,univ_dict,return_df,GroupNum)   
    Factor_Group_Return_dict[key]=GroupRet_df
    
print('计算指数收益......')
index='000300.XSHG'
index_return,index_return_by_tradeday=get_index_return(univ_dict,index)
Factor_Group_Excess_Return_dict={}
for key,group_return in Factor_Group_Return_dict.items():
    Factor_Group_Excess_Return_dict[key]=group_return.subtract(index_return_by_tradeday,axis=0)

print('因子有效性测试......')
effect_test_dict={}
for key,group_return in Factor_Group_Return_dict.items():
    group_excess_return=Factor_Group_Excess_Return_dict[key]   
    effect_test_dict[key]=effect_test(univ_dict,key,group_return,group_excess_return)
    
#----------有效因子列表-----------
effect_factor_list=[]
for key,effect in effect_test_dict.items():
    if all(effect):
        effect_factor_list.append(key)
effect_factor_list
#------------有效因子-------------
effect_factor_dict={key:value for key,value in all_factor_dict.items() if key in effect_factor_list}

print('完成')
计算IC_IR......
计算分组收益......
计算指数收益......
因子有效性测试......
完成
EffectTestresult=pd.concat([ic_ir_se.to_frame('a'),pd.DataFrame(effect_test_dict).T],axis=1)
columns=[['ICIR','测试一', '测试二', '测试二', '测试三', '测试三', ], [' ', ' ', '胜者组','败者组','胜者组','败者组']]
EffectTestresult.columns=columns
EffectTestresult
ICIR 测试一 测试二 测试三
胜者组 败者组 胜者组 败者组
AQI -1.372226 True True True True True
DSRI -0.165384 False False True False True
GMI -0.089589 False False True True True
LVGI 0.358615 False True False False True
SGAI 0.425046 False False False False False
SGI 0.153619 True False True False True
TATA -0.213534 False False False False False
#for key,factor in effect_factor_dict.items():
key='AQI'
plot_nav(all_Factor_Group_Return_dict[key],index_return,key)  
/opt/conda/envs/python3new/lib/python3.6/site-packages/matplotlib/figure.py:1743: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "
#for key,factor in effect_factor_dict.items():
key='AQI'
result_df=result_stats(key,all_Factor_Group_Return_dict[key],index_return)
result_df
风险指标 纯多头 对冲后
Alpha Beta 信息比率 夏普比率 年化收益 最大回撤 收益波动率 年化收益 最大回撤 收益波动率
AQI
0 -9.06% 1.46 1.46 0.36 10.88% 14.90% 19.08% 17.21% 11.70% 11.22%
1 -9.85% -0.43 -0.43 -0.8 -9.06% 24.32% 16.31% -4.07% 9.79% 8.45%
2 -9.29% 0.26 0.26 -0.45 -3.42% 20.17% 16.54% 1.96% 7.43% 7.95%
3 -9.84% -0.4 -0.4 -0.81 -8.39% 21.50% 15.39% -3.42% 11.02% 7.28%
4 -10.01% -0.64 -0.64 -0.9 -10.19% 19.36% 15.68% -5.28% 12.88% 7.39%
5 -10.46% -1.17 -1.17 -1.14 -13.24% 23.08% 15.18% -8.52% 12.20% 6.68%
6 -10.62% -0.86 -0.86 -1.07 -13.35% 25.46% 16.15% -8.68% 16.11% 9.19%
7 -11.25% -1.25 -1.25 -1.31 -17.53% 25.81% 16.39% -13.08% 20.16% 9.64%
8 -13.13% -2.56 -2.56 -2.05 -28.40% 35.27% 15.84% -24.55% 27.16% 8.98%
9 -9.63% -1.01 -1.01 -1.05 -15.32% 30.95% 18.34% -10.46% 16.39% 9.75%
#for key,factor in effect_factor_dict.items():
key='AQI'
draw_excess_return(Factor_Group_Excess_Return_dict[key])
 
分享到:
举报财经168客户端下载

全部回复

0/140

投稿 您想发表你的观点和看法?

更多人气分析师

  • 张亦巧

    人气2144文章4145粉丝45

    暂无个人简介信息

  • 梁孟梵

    人气2152文章3177粉丝39

    qq:2294906466 了解群指导添加微信mfmacd

  • 指导老师

    人气1856文章4423粉丝52

    暂无个人简介信息

  • 李冉晴

    人气2296文章3821粉丝34

    李冉晴,专业现贷实盘分析师。

  • 刘钥钥1

    人气2016文章3119粉丝34

    专业从事现货黄金、现货白银模似实盘操作分析指导

  • 张迎妤

    人气1896文章3305粉丝34

    个人专注于行情技术分析,消息面解读剖析,给予您第一时间方向...

  • 金泰铬J

    人气2320文章3925粉丝51

    投资问答解咨询金泰铬V/信tgtg67即可获取每日的实时资讯、行情...

  • 金算盘

    人气2696文章7761粉丝125

    高级分析师,混过名校,厮杀于股市和期货、证券市场多年,专注...

  • 金帝财神

    人气4728文章8329粉丝118

    本文由资深分析师金帝财神微信:934295330,指导黄金,白银,...