看了一下文章多因子模型(三)-交易回测(策略收益90%,回撤12%)
就回测了一下,发现这里的收益更多是来源于未来函数,计算得到的当期IC。
使用上一期和当期的IC计算,有着巨大的区别。
@颖da
因子读取¶
#第一步-因子生成
import time
import jqdata
import datetime
from multiprocessing.dummy import Pool as ThreadPool
from jqfactor import Factor,calc_factors
import pandas as pd
from pandas import Panel,DataFrame,Series
import statsmodels.api as sm
import scipy.stats as st
from jqfactor import get_factor_values
from jqfactor import winsorize,winsorize_med,neutralize,standardlize
import pickle
import xlrd # 手工输入156个因子太麻烦,所以我就在EXCEL里上传了,也可手工输入。
ExcelFile=xlrd.open_workbook('FactorTable.xlsx')
name=ExcelFile.sheet_names()
sheet=ExcelFile.sheet_by_name(name[0])
factor_quality=list(sheet.col_values(1))
factor_fundamental=list(sheet.col_values(2))[:28]
factor_mood=list(sheet.col_values(3))[:35]
factor_growth=list(sheet.col_values(4))[:8]
factor_risk=list(sheet.col_values(5))[:12]
factor_stock=list(sheet.col_values(6))[:15]
starttime=time.clock()
global g_index
global g_count
global g_factor_list
global g_univ_dict
global g_neu_factor
global g_factor_dict
g_index='000300.XSHG'
g_count=500
g_factor_list=factor_quality+factor_fundamental+factor_mood+factor_growth+factor_risk+factor_stock
g_neu_factor=factor_quality+factor_fundamental+factor_growth+factor_stock
g_factor_dict = {}
import cPickle as pickle
from six import StringIO
# 文件写入
#使用pickle模块从文件中重构python对象
content = pickle.dumps(g_factor_list) # 该方法返回字符串
write_file('JQFactorAuto/g_factor_list.pkl', content, append=False)
import cPickle as pickle
from six import StringIO
# 文件写入
#使用pickle模块从文件中重构python对象
content = pickle.dumps(g_neu_factor) # 该方法返回字符串
write_file('JQFactorAuto/g_neu_factor.pkl', content, append=False)
方法¶
获得因子数据方法¶
# 获取过去一段时间的交易日列表【间隔时间默认20个交易日】
def get_trade_dates(end,count=250,interval=20):
date_list=list(jqdata.get_trade_days(end_date=end,count=count))
date_list=date_list[::-1]
date_list=list(filter(lambda x:date_list.index(x)%interval==0,date_list))
date_list=date_list[::-1]
return date_list
# 获得股票池
def get_stock_pool(date,index='all'):
df=get_all_securities(types=['stock'],date=date)
dayBefore=jqdata.get_trade_days(end_date=date,count=60)[0] #上市不足60天
df=df[df['start_date']<dayBefore] #上市不足count天的去掉
universe_pool=list(df.index)
if index=='all':
stock_pool=universe_pool
else:
index_pool=get_index_stocks(index,date=date)
stock_pool=list(set(index_pool)&set(universe_pool))
return stock_pool
def get_stock_universe(trade_date_list,index='all'):
univ_list=[]
univ_dict={}
for date in trade_date_list:
stock_pool=get_stock_pool(date,index)
univ_list.append(stock_pool)
univ_dict[date]=stock_pool
return univ_list,univ_dict
# 获得申万一级行业
def get_Industry_by_day(date,stock_list):
industry_set = ['801010', '801020', '801030', '801040', '801050', '801080', '801110', '801120', '801130',
'801140', '801150', '801160', '801170', '801180', '801200', '801210', '801230', '801710',
'801720', '801730', '801740', '801750', '801760', '801770', '801780', '801790', '801880','801890']
industry_se = Series(name=date,index=stock_list)
for industry in industry_set:
industry_stocks = get_industry_stocks(industry,date = date)
industry_stocks = list(set(industry_stocks)&set(stock_list))
industry_se.loc[industry_stocks] = industry
return industry_se
"""def get_Industry_by_day(date):
industry_set = ['801010', '801020', '801030', '801040', '801050', '801080', '801110', '801120', '801130',
'801140', '801150', '801160', '801170', '801180', '801200', '801210', '801230', '801710',
'801720', '801730', '801740', '801750', '801760', '801770', '801780', '801790', '801880','801890']
industry_df = pd.DataFrame(index=[date],columns=g_univ_dict[date])
for industry in industry_set:
industry_stocks = get_industry_stocks(industry,date = date)
industry_stocks = list(set(industry_stocks)&set(g_univ_dict[date]))
industry_df.loc[date,industry_stocks] = industry
return industry_df
# 得到对应日期的行业数据
def get_industry_df(trade_date_list):
all_industry_df = pd.DataFrame()
for date in trade_date_list:
data = get_Industry_by_day(date)
all_industry_df = pd.concat([all_industry_df, data])
return all_industry_df
all_industry_df = get_industry_df(trade_date_list)
# 行业日期类型
all_industry_df.index = list(map(lambda x:x.strftime("%Y-%m-%d"),all_industry_df.index))
"""
# 得到对应日期的所有因子数据
def get_jq_factor_by_day(date,stock_list):
factor_dict=get_factor_values(securities=stock_list, factors=g_factor_list, start_date=date, end_date=date)
return factor_dict
# 行业空值使用中位数替换
def replace_nan_indu(factor_se,indu_se):
#factor_se = factor_se.apply(np.float)
#indu_se = indu_se.dropna().apply(np.int)
# 因子值
factor_array = factor_se.to_frame('values')
# 行业值
indu_array = indu_se.dropna().to_frame('industryName1')
# 合并
factor_array = factor_array.merge(indu_array, left_index=True, right_index=True, how='inner')
# 行业中值
mid = factor_array.groupby('industryName1').median()
factor_array = factor_array.merge(mid, left_on='industryName1', right_index=True, how='left')
# 行业中值填充缺失
factor_array['values_x'][pd.isnull(factor_array['values_x'])] = factor_array['values_y'][pd.isnull(factor_array['values_x'])]
return factor_array['values_x']
# 得到某个日期的最终因子dataframe
def get_final_factors(date):
# 得到某日因子数据
stock_list = get_stock_pool(date,index=g_index)
# 得到对应日期的所有因子数据
factor_dict = get_jq_factor_by_day(date,stock_list)
# 得到行业数据
indu_se = get_Industry_by_day(date,stock_list)
#indu_se = all_industry_df.ix[date]
# 因子数据
factor_df = DataFrame()
for fac in list(g_factor_list):
# 因子se
factor_se = factor_dict[fac].iloc[0]
# 行业空值替换
factor_se = replace_nan_indu(factor_se,indu_se)
# 去极值、中性化、标准化处理
factor_se=winsorize_med(factor_se, scale=3, inclusive=True, inf2nan=True, axis=1) # 去极值
# 如果因子在需要中性化处理的因子列表中,则进行中性化处理
if fac in g_neu_factor:
factor_se=neutralize(factor_se, how=['jq_l1', 'market_cap'], date=date, axis=1) # 中性化
factor_se=standardlize(factor_se, inf2nan=True, axis=0) # 标准化
tmp_df = factor_se.to_frame(fac)
factor_df = pd.concat([factor_df,tmp_df],axis=1)
return factor_df
def get_all_final_factors(trade_date_list):
pool=ThreadPool(processes=len(trade_date_list))
frame_list=pool.map(get_final_factors,trade_date_list)
pool.close()
pool.join()
def get_all_final_factors(trade_date_list):
for date in trade_date_list:
# 每个年份保存一个文件
year = date[:4]
# 如果不存在文件,则新建
try:
pkl_file = open('g_factor_dict_%s.pkl'%year, 'rb')
g_factor_dict_tmp = pickle.load(pkl_file)
except:
g_factor_dict_tmp = {}
# 如果已经计算过,则跳过循环
if date in g_factor_dict_tmp.keys():
print("跳过:%s"%date)
continue
# 计算
g_factor_dict_tmp[date] = get_final_factors(date)
# 保存数据
print("已完成:%s"%date)
pkl_file = open('g_factor_dict_%s.pkl'%year, 'wb')
pickle.dump(g_factor_dict_tmp,pkl_file,0)
pkl_file.close()
当前日期¶
# 当前日期
#today=datetime.date.today()
today = '2018-11-13'
today = '2015-07-31' # 开始时间
today = '2016-03-02'
g_count=500
yesterday=jqdata.get_trade_days(end_date=today,count=2)[0]
print("today:%s"%today)
print("yesterday:%s"%yesterday)
第一步:获得因子数据¶
过去两年的交易日【每隔20交易日】¶
# 过去两年的交易日【每隔20交易日】
g_count=250*2
g_index='000300.XSHG'
trade_date_list=get_trade_dates(yesterday,g_count,20)
trade_date_list = list(map(lambda x:x.strftime("%Y-%m-%d"),trade_date_list))
trade_date_list.reverse()
trade_date_list
开始计算¶
starttime=time.clock()
get_all_final_factors(trade_date_list)
endtime=time.clock()
runtime=endtime-starttime
print('因子生成运行完成,用时 %.2f 秒' % runtime)
第二步:因子检验¶
读取因子数据¶
# 当前日期
print("today:%s"%today)
print("yesterday:%s"%yesterday)
# 过去两年的交易日【每隔20交易日】
g_count=250*2
g_index='000300.XSHG'
trade_date_list=get_trade_dates(yesterday,g_count,20)
trade_date_list = list(map(lambda x:x.strftime("%Y-%m-%d"),trade_date_list))
# 得到需要读取的年份
years = list(set(map(lambda x:x[:4],trade_date_list)))
# 读取得到字典
g_factor_dict = {}
for year in years:
pkl_file = open('g_factor_dict_%s.pkl'%year, 'rb')
g_factor_dict_tmp = pickle.load(pkl_file)
g_factor_dict.update(g_factor_dict_tmp)
# 删除不必要的日期
for k in g_factor_dict.keys():
if k not in trade_date_list:
del g_factor_dict[k]
## 因子格式转换
p = Panel(g_factor_dict)
all_factor_dict = {}
for fac in p.minor_axis:
all_factor_dict[fac] = p[:,:,fac].T
获取收益数据¶
def get_return(trade_date_list,count=250): #小概率风险:一个股票曾经是指数成分股而如今已经退市
date=max(trade_date_list)
universe=get_stock_pool(date,index='all')
price=get_price(universe,end_date=date,count=count,fields=['close'],fq='pre')['close']
price.index = list(map(lambda x:x.strftime("%Y-%m-%d"),price.index))
return_df=price.loc[trade_date_list].pct_change().shift(-1)
all_return_df=price.pct_change().shift(-1)
return return_df,all_return_df
return_df,all_return_df=get_return(trade_date_list,count=g_count) # 获得所有股票的历史回报 (all stocks)
获取股票池¶
def get_stock_universe(trade_date_list,index='all'):
univ_list=[]
univ_dict={}
for date in trade_date_list:
stock_pool=get_stock_pool(date,index)
univ_list.append(stock_pool)
univ_dict[date]=stock_pool
return univ_list,univ_dict
print('获取股票池')
univ_list,univ_dict=get_stock_universe(trade_date_list,index=g_index) # 获取股票池
第二步:因子检验¶
读取数据¶
# Step II: 因子筛选用到的函数
def ic_calculator(factor,return_df,univ_dict):
ic_list=[]
p_value_list=[]
for date in sorted(list(univ_dict.keys())): #这里是循环
univ=univ_dict[date]
univ=list(set(univ)&set(factor.loc[date].dropna().index)&set(return_df.loc[date].dropna().index))
#if len(univ)<10:
# continue
factor_se=factor.loc[date,univ]
return_se=return_df.loc[date,univ]
ic,p_value=st.spearmanr(factor_se,return_se)
ic_list.append(ic)
p_value_list.append(p_value)
return ic_list
# 1.回测基础数据计算
def all_Group_Return_calculator(factor,univ_dict,all_return_df,GroupNum=10):
all_date_list=list(all_return_df.index) # 全部日期
date_list=sorted(list(univ_dict.keys())) # 交易日
all_Group_Ret_df=pd.DataFrame(index=all_date_list,columns=list(np.array(range(GroupNum)))) #根据交易日构造dataframe
for n in range(len(date_list)-1):
start=date_list[n] # 开始日期
end=date_list[n+1] # 结束日期
univ=univ_dict[start] # 开始日期的股票池
univ=set(univ)&set(factor.loc[start].dropna().index) # 和因子池的并集
factor_se_stock=list(factor.loc[start,univ].dropna().to_frame('a').sort('a',ascending=False).index) # 排序后的因子
N=len(factor_se_stock)
for i in range(GroupNum):
group_stock=factor_se_stock[int(N/GroupNum*i):int(N/GroupNum*(i+1))]
# 下面两行是关键
cumret=(all_return_df.loc[start:end,group_stock]+1).cumprod().mean(axis=1)
all_Group_Ret_df.loc[start:end,i]=cumret.shift(1).fillna(1).pct_change().shift(-1)
#(((all_return_df.loc[start:end,group_stock]+1).cumprod()-1).mean(axis=1)+1).pct_change().shift(-1)
all_Group_Ret_df=all_Group_Ret_df[date_list[0]:].shift(1).fillna(0)
return all_Group_Ret_df
#list(factor.loc[date,univ].dropna().to_frame('a').sort('a',ascending=False).index)
# 分组收益计算
def Group_Return_calculator(factor,univ_dict,return_df,GroupNum=10):
GroupRet_df=pd.DataFrame(index=sorted(list(univ_dict.keys())),columns=list(np.array(range(GroupNum))))
for date in sorted(list(univ_dict.keys())): #这个也是个循环
univ=univ_dict[date]
univ=list(set(univ)&set(factor.loc[date].dropna().index)&set(return_df.loc[date].dropna().index))
factor_se_stock=list(factor.loc[date,univ].dropna().to_frame('a').sort('a',ascending=False).index)
N=len(factor_se_stock)
for i in range(GroupNum):
group_stock=factor_se_stock[int(N*1.0/GroupNum*i):int(N*1.0/GroupNum*(i+1))]
GroupRet_df.loc[date,i]=return_df.loc[date,group_stock].mean()
return GroupRet_df.shift(1).fillna(0)
# 指数收益计算
def get_index_return(univ_dict,index,count=250):
trade_date_list=sorted(list(univ_dict.keys()))
date=max(trade_date_list)
price=get_price(index,end_date=date,count=count,fields=['close'])['close']
price.index = list(map(lambda x:x.strftime("%Y-%m-%d"),price.index))
price_return=price.loc[trade_date_list[0]:].pct_change().fillna(0)
price_return_by_tradeday=price.loc[trade_date_list].pct_change().fillna(0)
return price_return,price_return_by_tradeday
# 因子检验
def effect_test(univ_dict,key,group_return,index_return,group_excess_return):
start = sorted(list(univ_dict.keys()))[0]
end = sorted(list(univ_dict.keys()))[-1]
start = datetime.datetime.strptime(start, '%Y-%m-%d')
end = datetime.datetime.strptime(end, '%Y-%m-%d')
daylength=(end-start).days
annual_return=np.power(cumprod(group_return+1).iloc[-1,:],365.0/daylength)
index_annual_return=np.power((index_return+1).cumprod().iloc[-1],365.0/daylength)
# Test One: 组合序列与组合收益的相关性,相关性大于0.5
sequence=pd.Series(np.array(range(10)))
test_one_corr=annual_return.corr(sequence)
test_one_passgrade=0.4
test_one_pass=abs(test_one_corr)>test_one_passgrade
if test_one_corr<0:
wingroup,losegroup=0,9
else:
wingroup,losegroup=9,0
# Test Two: 赢家组合明显跑赢市场,输家组合明显跑输市场,程度大于5%
test_two_passgrade=0.05
test_two_win_excess=annual_return[wingroup]-index_annual_return
test_two_win_pass=test_two_win_excess>test_two_passgrade
test_two_lose_excess=index_annual_return-annual_return[losegroup]
test_two_lose_pass=test_two_lose_excess>test_two_passgrade
test_two_pass=test_two_win_pass&test_two_lose_pass
# Test Tree: 高收益组合跑赢基准的概率,低收益组合跑赢基准的概率,概率大小0.5
test_three_grade=0.5
test_three_win_prob=(group_excess_return[wingroup]>0).sum()*1.0/len(group_excess_return[wingroup])
test_three_win_pass=test_three_win_prob>0.5
test_three_lose_prob=(group_excess_return[losegroup]<0).sum()*1.0/len(group_excess_return[losegroup])
test_three_lose_pass=test_three_lose_prob>0.5
test_three_pass=test_three_win_pass&test_three_lose_pass
test_result=[test_one_pass,test_two_win_pass,test_two_lose_pass,test_three_win_pass,test_three_lose_pass]
test_score=[test_one_corr,test_two_win_excess,test_two_lose_excess,test_three_win_prob,test_three_lose_prob]
return test_result,test_score
计算每个因子的评分和筛选结果¶
# 计算每个因子的评分和筛选结果
starttime=time.clock()
print('\n计算IC_IR:')
count=1
ic_list_dict={}
for key,factor in all_factor_dict.items():
ic_list=ic_calculator(factor,return_df,univ_dict)
ic_list_dict[key]=ic_list
print(count)
count=count+1
# 整理结果
ic_df=pd.DataFrame(ic_list_dict,index=sorted(list(univ_dict.keys())))
ic_df = ic_df.iloc[:-1]
IC延续性¶
ic_df
count = 0
for col in ic_df.columns:
tmpdf = ic_df[[col]]
pct = 1.0*len(tmpdf[tmpdf[col]>0])/len(tmpdf) # IC大于0的占比
if pct < 0.4 or pct > 0.6 :
count += 1
print "IC存在偏向和延续的概率:%.2f%%"%(count / len(ic_df.columns))
计算分组收益¶
# 计算分组收益
print('\n计算分组收益:')
count=1
GroupNum=10
all_Factor_Group_Return_dict={} ##这个用于计算NAV,再筛选出因子之后再用更效率
Factor_Group_Return_dict={}
for key,factor in all_factor_dict.items():
# 全return
#all_GroupRet_df=all_Group_Return_calculator(factor,univ_dict,all_return_df,GroupNum)
#all_Factor_Group_Return_dict[key]=all_GroupRet_df.sort_index()
# 调仓期return
GroupRet_df=Group_Return_calculator(factor,univ_dict,return_df,GroupNum)
Factor_Group_Return_dict[key]=GroupRet_df.sort_index()
print(count)
count=count+1
计算指数收益¶
# 计算指数收益
print('\n计算指数收益:')
count=1
index='000300.XSHG'
index_return,index_return_by_tradeday=get_index_return(univ_dict,index)
Factor_Group_Excess_Return_dict={}
for key,group_return in Factor_Group_Return_dict.items():
Factor_Group_Excess_Return_dict[key]=group_return.subtract(index_return_by_tradeday,axis=0)
print(count)
count=count+1
因子有效性测试¶
# 因子有效性测试
print('\n因子有效性测试:')
count=1
effect_test_result_dict={}
effect_test_score_dict={}
for key,group_return in Factor_Group_Return_dict.items():
group_excess_return=Factor_Group_Excess_Return_dict[key]
effect_test_result_dict[key],effect_test_score_dict[key]=effect_test(univ_dict,key,group_return,index_return,group_excess_return)
print(count)
count=count+1
第三步:因子筛选¶
ic_ir_se=ic_df.mean()/ic_df.std()
ic_avg_se=ic_df.mean().abs()
EffectTestresult=pd.concat([ic_avg_se.to_frame('a'),ic_ir_se.to_frame('b'),pd.DataFrame(effect_test_result_dict).T],axis=1)
columns=['IC','ICIR','测试一', '测试二-胜者组', '测试二-败者组', '测试三-胜者组', '测试三-败者组']
EffectTestresult.columns=columns
EffectTestresult2=pd.concat([ic_avg_se.to_frame('a'),ic_ir_se.to_frame('b'),pd.DataFrame(effect_test_score_dict).T],axis=1)
columns=['IC','ICIR','测试一', '测试二-胜者组', '测试二-败者组', '测试三-胜者组', '测试三-败者组']
EffectTestresult2.columns=columns
EffectTestresult
EffectTestresult2
因子IC和ICIR筛选 quantile¶
EffectTestresult['IC'].hist()
IC_ratio = EffectTestresult['IC'].quantile(0.80)
IC_ratio
EffectTestresult['ICIR'].abs().hist()
ICIR_ratio = EffectTestresult['ICIR'].abs().quantile(0.75)
ICIR_ratio
#筛选有效因子
# IC大于0.07,ICIR大于0.4,测试一,测试二-胜者组,测试三-胜者组,必须通过
# 测试二、测试三中要至少通过3个。
index_ic=EffectTestresult['IC']>IC_ratio
index_icir=EffectTestresult['ICIR'].abs()>ICIR_ratio
test_index=all(EffectTestresult.iloc[:,[2,3,5]],axis=1)
test2_index=sum(EffectTestresult.iloc[:,3:7],axis=1)>=3
filter_index=index_ic&index_icir&test_index&test2_index
EffectFactorresult=EffectTestresult.loc[filter_index,:]
# 生成有效因子字典
EffectFactor=list(EffectFactorresult.index)
Effect_factor_dict={key:value for key,value in all_factor_dict.items() if key in EffectFactor}
EffectFactorresult
IC排序,相关性筛选¶
# IC排序
effect_fac_list = EffectFactorresult.sort('IC',ascending=False).index.tolist()
def Group_Score_calculator(factor,univ_dict,signal,GroupNum=20):
Score_df=pd.DataFrame(index=list(factor.index),columns=list(factor.columns))
for date in sorted(list(univ_dict.keys())): #这个也是个循环
univ=univ_dict[date]
univ=list(set(univ)&set(factor.loc[date].dropna().index))
factor_se_stock=list(factor.loc[date,univ].to_frame('a').sort('a',ascending=False).index)
N=len(factor_se_stock)
for i in range(GroupNum):
group_stock=factor_se_stock[int(N/GroupNum*i):int(N/GroupNum*(i+1))]
if signal=='ascending':
Score_df.loc[date,group_stock]=i
else:
Score_df.loc[date,group_stock]=GroupNum-i
return Score_df
# 计算相关性矩阵
def factor_corr_calculator(Group_Score_dict,univ_dict):
Group_Score_dict_by_day={}
Group_Score_Corr_dict_by_day={}
# 每日的因子序列
for Date in sorted(list(univ_dict.keys())):
Group_Score_df=pd.DataFrame()
univ=univ_dict[Date]
for Factor in list(Group_Score_dict.keys()):
Group_Score_df=Group_Score_df.append(Group_Score_dict[Factor].loc[Date,univ].to_frame(Factor).T)
Group_Score_dict_by_day[Date]=Group_Score_df.T.fillna(4.5)
Group_Score_Corr_dict_by_day[Date]=Group_Score_dict_by_day[Date].corr()
# 算平均数
N=len(list(univ_dict.keys()))
Group_Score_Corr=Group_Score_Corr_dict_by_day[sorted(list(univ_dict.keys()))[0]]
for Date in sorted(list(univ_dict.keys()))[1:]:
Group_Score_Corr=Group_Score_Corr+Group_Score_Corr_dict_by_day[Date]
return np.round(Group_Score_Corr/N,2)
# 给因子赋值
Group_Score_dict={}
for key,factor in Effect_factor_dict.items():
signal='ascending' if ic_ir_se[key]>0 else 'descending'
Group_Score_dict[key]=Group_Score_calculator(factor,univ_dict,signal,20)
# 计算因子相关系数
factor_corrmatrix=factor_corr_calculator(Group_Score_dict,univ_dict)
factor_corrmatrix
fac_corr = factor_corrmatrix
# 相关性大于0.95的因子B剔除
MinCorr = 0.9
result_fac_list = effect_fac_list[:1]
for fac in effect_fac_list:
# 如果因子已经在结果列表中,则继续循环
if fac in result_fac_list:
continue
fac_corr_se = fac_corr[fac]
# 得到相关性大于0.95的因子
fac_corr_list = fac_corr_se[fac_corr_se > MinCorr].index.tolist()
# 相关因子结合与结果因子集合有没有交集,则将因子添加到结果因子集合中
if len(set(fac_corr_list) & set(result_fac_list)) <= 0:
result_fac_list.append(fac)
result_fac_list = sorted(result_fac_list)
result_fac_list
result_fac_ic_se = EffectTestresult['IC'].loc[result_fac_list]
result_fac_ic_se
date = today
g_factor_list
stock_list = get_stock_pool(date,index=g_index)
len(stock_list)
factor_dict=get_factor_values(securities=stock_list, factors=result_fac_ic_se.index.tolist(), start_date=date, end_date=date)
需要对因子进行去极值、中性化、标准化处理¶
# 得到行业数据
indu_se = get_Industry_by_day(date,stock_list)
# 因子数据
factor_df = DataFrame()
for fac in list(factor_dict.keys()):
# 因子se
factor_se = factor_dict[fac].iloc[0]
# 行业空值替换
factor_se = replace_nan_indu(factor_se,indu_se)
# 去极值、中性化、标准化处理
factor_se=winsorize_med(factor_se, scale=3, inclusive=True, inf2nan=True, axis=1) # 去极值
# 如果因子在需要中性化处理的因子列表中,则进行中性化处理
if fac in g_neu_factor:
factor_se=neutralize(factor_se, how=['jq_l1', 'market_cap'], date=date, axis=1) # 中性化
factor_se=standardlize(factor_se, inf2nan=True, axis=0) # 标准化
tmp_df = factor_se.to_frame(fac)
factor_df = pd.concat([factor_df,tmp_df],axis=1)
fianl_factor_df = factor_df.T
ic_se = ic_df[result_fac_list].iloc[-1]
stocks_to_buy = 30
final_stock_list = fianl_factor_df.multiply(ic_se,axis=0).sum().to_frame('a').sort('a',ascending=False).index[0:stocks_to_buy]
final_stock_list