import pandas as pd
from pandas import DataFrame,Series
import numpy as np

一、计算q因子函数¶

在回测中调用¶

def calculate_q_factor(stock,frequency,count,threshold):
    '''
    在回测中调用
    params:
        stock:股票
        frequency：数据频率
        count：数据长度
        threshold：聪明钱比例
    return:
        q_factor:Q因子
    '''
    price = attribute_history(stock, count, frequency, ['open','close','volume','money'],df=True)
    price.loc[price['volume'] == 0,'index_s'] = 0
    price.loc[price['volume'] != 0,'index_s'] = abs((price['close'] - price['open']) / price['open']) / np.sqrt(price['volume'])
    price.sort_values(by = ['index_s'],ascending = False,inplace = True)
    vol_sum = price['volume'].sum()
    mon_sum = price['money'].sum()
    price['cum_volume'] = price['volume'].cumsum()
    vol_threshold = threshold * vol_sum
    price_smart = price[price['cum_volume']<= vol_threshold]
    vols_sum = price_smart['volume'].sum()
    mons_sum = price_smart['money'].sum()
    # VWAPs000001.XSHEmart是聪明钱的成交量加权平均价，VWAPall是所有交易的成交量加权平均价。
    if vol_sum == 0:
        VWAPall = VWAPsmart = np.NaN
    else:
        VWAPall = mon_sum/vol_sum
        if vols_sum == 0:
            vols_sum = price['cum_volume'].iloc[0]
        VWAPsmart = mons_sum/vols_sum
    if VWAPsmart == np.NaN or VWAPall == np.NaN:
        q = np.NaN
    else:
        q = VWAPsmart/VWAPall
    return q

在研究中调用¶

def calculate_q_factor(stock,frequency,count,threshold,end_date,num_try = 0):
    '''
    在研究中调用
    params:
        stock:股票
        frequency：数据频率
        count：数据长度
        threshold：聪明钱比例
    return:
        q_factor:Q因子
    '''
    try:
        price = get_price(stock,fields = ['open','close','volume','money'],end_date=end_date,count = count, frequency=frequency)
        price.loc[price['volume'] == 0,'index_s'] = 0
        price.loc[price['volume'] != 0,'index_s'] = abs((price['close'] - price['open']) / price['open']) / np.sqrt(price['volume'])
        price.sort_values(by = ['index_s'],ascending = False,inplace = True)
        vol_sum = price['volume'].sum()
        mon_sum = price['money'].sum()
        price['cum_volume'] = price['volume'].cumsum()
        vol_threshold = threshold * vol_sum
        price_smart = price[price['cum_volume']<= vol_threshold]
        vols_sum = price_smart['volume'].sum()
        mons_sum = price_smart['money'].sum()
        # VWAPs000001.XSHEmart是聪明钱的成交量加权平均价，VWAPall是所有交易的成交量加权平均价。
        if vol_sum == 0:
            VWAPall = VWAPsmart = np.NaN
        else:
            VWAPall = mon_sum/vol_sum
            if vols_sum == 0:
                vols_sum = price['cum_volume'].iloc[0]
            VWAPsmart = mons_sum/vols_sum
        if VWAPsmart == np.NaN or VWAPall == np.NaN:
            q = np.NaN
        else:
            q = VWAPsmart/VWAPall
        return q
    except Exception as e:
        num_try += 1
        if num_try <5:
            return calculate_q_factor(stock,frequency,count,threshold,end_date,num_try)

二、查看q值对参数的敏感性¶

测算不同时间频率、不同聪明钱比例的q值¶

timeDict = {'1':int(240/1),
            '3':int(240/3),
            '5':int(240/5),
            '7':int(240/7),
            '9':int(240/9),
            '11':int(240/11),
            '13':int(240/13),
            '15':int(240/15),
            '17':int(240/17),
            '19':int(240/19),
            '21':int(240/21)
           }
N = 20

import matplotlib.pyplot as plt
import pandas as pd

横轴是不同的时间频率，纵轴是不同的聪明钱比例参数¶

f = plt.figure(figsize=(8,120))
dfs = {}
for threshold in range(2,10,1):
    threshold = threshold/20
    df = {}
    for key,value in timeDict.items():
        df[key] = calculate_q_factor('000001.XSHE','{}m'.format(key),int(N*value),threshold,datetime.datetime.now().date())
    dfs[threshold] = df


dfs = pd.DataFrame(dfs)
dfs.index = [int(x) for x in dfs.index.values]
dfs.sort_index(ascending= True,inplace = True)


dfs.plot(figsize=(20,10))

<matplotlib.axes._subplots.AxesSubplot at 0x7fa47882bc88>

<Figure size 576x8640 with 0 Axes>

结果上来看，不同频率的时间波动比较大，说明q因子计算对选定的时间很敏感，另外只有1m的时候，不论定义的聪明钱比例是多少，都是小于1的，其他时间频率是没有这个特征。所以这个因子用起来可能存在参数拟合的因素。¶

三、插件¶

文件保存¶

def save_file(filename,data,mode = True):
    import pickle
    if mode:
        with open(filename,'wb') as f:
            pickle.dump(data,f)
    elif not mode:
        with open(filename,'rb') as f:
            data = pickle.load(f)
        return data

获取交易时间¶

# 计算一段时间每个月的开始和最后一个交易日
def calculate_FL(time_list):
    time_list_df = pd.DataFrame(time_list,columns=['time'])
    time_list_df['time_str'] = time_list_df['time'].apply(lambda x:datetime.datetime.strftime(x,'%Y-%m-%d'))
    time_list_df['year'] = time_list_df['time_str'].apply(lambda x:int(x.split('-')[0]))
    time_list_df['month'] = time_list_df['time_str'].apply(lambda x:int(x.split('-')[1]))
    time_list_df['day'] = time_list_df['time_str'].apply(lambda x:int(x.split('-')[2]))
    time_list_df['cum_year'] = time_list_df['year']-time_list_df['year'].iloc[0]
    time_list_df['cum_month'] = time_list_df['cum_year']*12 + time_list_df['month']
    time_list_df['diff_month'] = time_list_df['cum_month'].diff()
    time_list_df['diff_shift_month'] = time_list_df['diff_month'].shift(-1)
    trade_end = list(time_list_df[time_list_df['diff_shift_month']==1]['time_str'].values)
    trade_start = list(time_list_df[time_list_df['diff_month'] == 1]['time_str'].values)
    trade_start.append(time_list_df['time_str'].iloc[0])
    trade_start = sorted(trade_start)
    trade_end.append(time_list_df['time_str'].iloc[-1])
    return trade_start,trade_end

剔除涨跌停股票¶

def delect_stop(stocks,beginDate,n=30*2):
    stockList=[]
    beginDate = datetime.datetime.strptime(beginDate, "%Y-%m-%d")
    for stock in stocks:
        start_date=get_security_info(stock).start_date
        if start_date<(beginDate-datetime.timedelta(days=n)).date():
            stockList.append(stock)
    return stockList
#获取股票池
def get_stock(stockPool,begin_date):
    if stockPool=='HS300':
        stockList=get_index_stocks('000300.XSHG',begin_date)
    elif stockPool=='ZZ500':
        stockList=get_index_stocks('399905.XSHE',begin_date)
    elif stockPool=='ZZ800':
        stockList=get_index_stocks('399906.XSHE',begin_date)   
    elif stockPool=='CYBZ':
        stockList=get_index_stocks('399006.XSHE',begin_date)
    elif stockPool=='ZXBZ':
        stockList=get_index_stocks('399005.XSHE',begin_date)
    elif stockPool=='A':
        stockList=get_index_stocks('000002.XSHG',begin_date)+get_index_stocks('399107.XSHE',begin_date)
    #剔除ST股
    st_data=get_extras('is_st',stockList, count = 1,end_date=begin_date)
    stockList = [stock for stock in stockList if not st_data[stock][0]]
    #剔除停牌、新股及退市股票
    stockList=delect_stop(stockList,begin_date)
    return stockList

四、计算每一期的q值¶

# 计算每一期的股票的池，key为时间，value为所有股票当期的因子值
q_dicts = {}
frequency = '1m'
count = 2400
threshold = 0.1

# 获取每个月的最后一个交易日
from jqdata import *
year_list = ['2010','2011','2012','2013','2014','2015','2016','2017']
for i in year_list:
    year_start = str(int(i)-1)
    tradeTimeList = get_trade_days(start_date='{}-12-31'.format(year_start), end_date='{}-12-31'.format(i), count=None)
    FL = calculate_FL(tradeTimeList)
    monthFisrtDay = FL[0]
    monthLastDay = FL[1]
    print(monthLastDay)
    for j in range(len(monthLastDay)):
        tradeT = monthLastDay[j]
        q_dict = {}
        stockPool = get_stock('A',tradeT)  # 过滤后的股票池
        for stk in stockPool:
                q_dict[stk] = calculate_q_factor(stk,frequency,count,threshold,tradeT)        
        q_dicts[tradeT] = q_dict
        
    # 存储计算的q因子值
    df_qdict = pd.DataFrame(q_dicts)
    df_qdict.to_csv('all_q_dicts_{}.csv'.format(i))

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-a7a5387dee07> in <module>()
      5     year_start = str(int(i)-1)
      6     tradeTimeList = get_trade_days(start_date='{}-12-31'.format(year_start), end_date='{}-12-31'.format(i), count=None)
----> 7     FL = calculate_FL(tradeTimeList)
      8     monthFisrtDay = FL[0]
      9     monthLastDay = FL[1]

NameError: name 'calculate_FL' is not defined

tradeTimeList = get_trade_days(start_date='{}-12-31'.format(2017), end_date='{}-11-30'.format(2018), count=None)
FL = calculate_FL(tradeTimeList)
monthFisrtDay = FL[0]
monthLastDay = FL[1]
for j in range(len(monthLastDay)):
    tradeT = monthLastDay[j]
    q_dict = {}
    stockPool = get_stock('A',tradeT)  # 过滤后的股票池
    for stk in stockPool:
            q_dict[stk] = calculate_q_factor(stk,frequency,count,threshold,tradeT)        
    q_dicts[tradeT] = q_dict
# 存储计算的q因子值
df_qdict = pd.DataFrame(q_dicts)
df_qdict.to_csv('all_q_dicts_{}.csv'.format(2018))

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-d9927fa5edaf> in <module>()
      1 tradeTimeList = get_trade_days(start_date='{}-12-31'.format(2017), end_date='{}-11-30'.format(2018), count=None)
----> 2 FL = calculate_FL(tradeTimeList)
      3 monthFisrtDay = FL[0]
      4 monthLastDay = FL[1]
      5 for j in range(len(monthLastDay)):

NameError: name 'calculate_FL' is not defined

五、分组回测¶

import pandas as pd
# 读取数据
df_dict_2010 = pd.read_csv('all_q_dicts_2010.csv',index_col=0)
df_dict_2011 = pd.read_csv('all_q_dicts_2011.csv',index_col=0)
df_dict_2012 = pd.read_csv('all_q_dicts_2012.csv',index_col=0)
df_dict_2013 = pd.read_csv('all_q_dicts_2013.csv',index_col=0)
df_dict_2014 = pd.read_csv('all_q_dicts_2014.csv',index_col=0)
df_dict_2015 = pd.read_csv('all_q_dicts_2015.csv',index_col=0)
df_dict_2016 = pd.read_csv('all_q_dicts_2016.csv',index_col=0)
df_dict_2017 = pd.read_csv('all_q_dicts_2017.csv',index_col=0)
df_dict_2018 = pd.read_csv('all_q_dicts_2018.csv',index_col=0)

df_dict_2018.head()

dateList = df_dict_2018.columns.values

dateList

array(['2009-12-31', '2010-01-29', '2010-02-26', '2010-03-31',
       '2010-04-30', '2010-05-31', '2010-06-30', '2010-07-30',
       '2010-08-31', '2010-09-30', '2010-10-29', '2010-11-30',
       '2010-12-31', '2011-01-31', '2011-02-28', '2011-03-31',
       '2011-04-29', '2011-05-31', '2011-06-30', '2011-07-29',
       '2011-08-31', '2011-09-30', '2011-10-31', '2011-11-30',
       '2011-12-30', '2012-01-31', '2012-02-29', '2012-03-30',
       '2012-04-27', '2012-05-31', '2012-06-29', '2012-07-31',
       '2012-08-31', '2012-09-28', '2012-10-31', '2012-11-30',
       '2012-12-31', '2013-01-31', '2013-02-28', '2013-03-29',
       '2013-04-26', '2013-05-31', '2013-06-28', '2013-07-31',
       '2013-08-30', '2013-09-30', '2013-10-31', '2013-11-29',
       '2013-12-31', '2014-01-30', '2014-02-28', '2014-03-31',
       '2014-04-30', '2014-05-30', '2014-06-30', '2014-07-31',
       '2014-08-29', '2014-09-30', '2014-10-31', '2014-11-28',
       '2014-12-31', '2015-01-30', '2015-02-27', '2015-03-31',
       '2015-04-30', '2015-05-29', '2015-06-30', '2015-07-31',
       '2015-08-31', '2015-09-30', '2015-10-30', '2015-11-30',
       '2015-12-31', '2016-01-29', '2016-02-29', '2016-03-31',
       '2016-04-29', '2016-05-31', '2016-06-30', '2016-07-29',
       '2016-08-31', '2016-09-30', '2016-10-31', '2016-11-30',
       '2016-12-30', '2017-01-26', '2017-02-28', '2017-03-31',
       '2017-04-28', '2017-05-31', '2017-06-30', '2017-07-31',
       '2017-08-31', '2017-09-29', '2017-10-31', '2017-11-30',
       '2017-12-29', '2018-01-31', '2018-02-28', '2018-03-30',
       '2018-04-27', '2018-05-31', '2018-06-29', '2018-07-31',
       '2018-08-31', '2018-09-28', '2018-10-31', '2018-11-30'],
      dtype=object)

计算下个月的起止时间¶

import calendar
def calculate_next_month(day):
    current_year = day.year
    next_month_number = day.month + 1
    if next_month_number == 13:
        next_month_number = 1
        current_year = current_year + 1
    # 计算下个月的起止时间
    next_month_start = datetime.datetime.strptime('{}-{}-1'.format(current_year,next_month_number),'%Y-%m-%d')
    days = calendar.monthrange(next_month_start.year, next_month_start.month)[1]
    next_month_end = next_month_start+ datetime.timedelta(days-1)
    return next_month_start,next_month_end

计算股票池平均收益¶

def calculate_class_rts(stocklist,start_date,end_date):
    rts_list = []
    for stk in stocklist:
        price = get_price(stk, start_date=start_date, end_date=end_date, frequency='daily', fields=['close','open'])
        rts = price['close'].iloc[-1]/price['close'].iloc[0]-1
        rts_list.append(rts)
    rts_list = pd.Series(rts_list)
    rts_list.dropna(axis = 0,inplace = True)
    mean = rts_list.sum()/len(rts_list)
    return mean

classA = {}
classB = {}
classC = {}
classD = {}
classE = {}

for i in range(len(dateList)-1):
    date = dateList[i]
    date_datetime = datetime.datetime.strptime(date,'%Y-%m-%d').date()
    # 计算下个月的起止时间
    next_month_start,next_month_end = calculate_next_month(date_datetime)
    # 根据因子分组
    q_factors = df_dict_2018[date]
    q_factors.dropna(axis = 0,inplace = True)
    q_factors = q_factors.copy()
    
    q_factors.sort_values(ascending = True,inplace = True)
    stockListLast=list(q_factors.index.values)
    lens = len(stockListLast)
    q_A = list(stockListLast[:int(0.2*lens)])
    q_B = list(stockListLast[int(0.2*lens):int(0.4*lens)])
    q_C = list(stockListLast[int(0.4*lens):int(0.6*lens)])
    q_D = list(stockListLast[int(0.6*lens):int(0.8*lens)])
    q_E = list(stockListLast[int(0.8*lens):])
    classA[date] = calculate_class_rts(q_A,next_month_start,next_month_end)
    classB[date] = calculate_class_rts(q_B,next_month_start,next_month_end)
    classC[date] = calculate_class_rts(q_C,next_month_start,next_month_end)
    classD[date] = calculate_class_rts(q_D,next_month_start,next_month_end)
    classE[date] = calculate_class_rts(q_E,next_month_start,next_month_end)

df = pd.DataFrame({'A':classA,
                   'B':classB,
                   'C':classC,
                   'D':classD,
                   'E':classE})

df.head()

(df+1).cumprod().plot(figsize = (20,8))

<matplotlib.axes._subplots.AxesSubplot at 0x7fce97108d30>

	2009-12-31	2010-01-29	2010-02-26	2010-03-31	2010-04-30	2010-05-31	2010-06-30	2010-07-30	2010-08-31	2010-09-30	...	2018-02-28	2018-03-30	2018-04-27	2018-05-31	2018-06-29	2018-07-31	2018-08-31	2018-09-28	2018-10-31	2018-11-30
000001.XSHE	1.006798	0.998273	0.998496	0.993877	0.993433	0.996267	1.001254	NaN	NaN	0.993616	...	1.003686	0.984279	0.993165	1.001389	0.993320	0.994469	0.988993	0.993574	0.996810	0.994382
000002.XSHE	0.997598	0.980102	0.997362	0.998059	0.989203	0.983346	0.996349	0.991116	0.994350	0.999582	...	0.997788	0.989308	1.004136	0.996622	0.987748	0.994332	0.987894	0.996752	1.010091	0.996233
000004.XSHE	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	0.999956	0.991227	0.993812	0.998745	1.003219	1.001096	1.005379	0.989531	0.968292	0.999837
000005.XSHE	0.989354	0.958115	0.992183	0.992988	0.994881	0.990632	0.988380	1.000019	0.997607	0.996333	...	0.996079	1.002573	1.000663	0.998847	1.007336	0.999687	0.994861	0.999278	0.980513	0.986307
000006.XSHE	0.995111	0.969805	0.999073	1.003019	0.989185	0.988331	0.995760	0.992118	0.993605	0.993541	...	NaN	0.974044	1.003488	0.988209	0.986300	0.993393	0.996315	0.989064	0.986726	0.998673

	A	B	C	D	E
2009-12-31	-0.026634	-0.027453	-0.032847	-0.038114	-0.052188
2010-01-29	0.085057	0.088479	0.082246	0.076639	0.064585
2010-02-26	0.031484	0.038035	0.029599	0.022562	0.011440
2010-03-31	-0.102429	-0.093342	-0.084084	-0.069611	-0.082053
2010-04-30	-0.086284	-0.082056	-0.084876	-0.076366	-0.055908

方正聪明钱Q因子再探探

来聊一聊Q因子

一、计算q因子函数¶

在回测中调用¶

在研究中调用¶

二、查看q值对参数的敏感性¶

测算不同时间频率、不同聪明钱比例的q值¶

横轴是不同的时间频率，纵轴是不同的聪明钱比例参数¶

结果上来看，不同频率的时间波动比较大，说明q因子计算对选定的时间很敏感，另外只有1m的时候，不论定义的聪明钱比例是多少，都是小于1的，其他时间频率是没有这个特征。所以这个因子用起来可能存在参数拟合的因素。¶

三、插件¶

文件保存¶

获取交易时间¶

剔除涨跌停股票¶

四、计算每一期的q值¶

五、分组回测¶

计算下个月的起止时间¶

计算股票池平均收益¶

单调性并不明显。。。主要是最小组的表现比较意外。¶

审核消息

该文章已通过审核

全部回复

0/140

热门文章最新文章

热门标签

更多人气分析师

财经资讯

行情数据