from jqfactor import Factor, calc_factors
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
stock = get_index_stocks('000300.XSHG')

class Hs300Alpha(Factor):
    # 设置因子名称
    name = 'hs300_alpha'
    # 设置获取数据的时间窗口长度
    max_window = 10
    # 设置依赖的数据
    dependencies = ['close']

    # 计算因子的函数， 需要返回一个 pandas.Series, index 是股票代码，value 是因子值
    def calc(self, data):
        # 获取个股的收盘价数据
        close = data['close']
        # 计算个股近10日收益
        stock_return = close.iloc[-1,:]/close.iloc[0,:] -1
        # 获取指数（沪深300）的收盘价数据
        index_close = self._get_extra_data(securities=['000300.XSHG'], fields=['close'])['close']
        # 计算指数的近10日收益
        index_return = index_close.iat[-1,0]/index_close.iat[0,0] - 1
        # 计算 alpha
        alpha = stock_return - index_return
        return alpha
factors = calc_factors(stock, [Hs300Alpha()], start_date='2017-01-01', end_date='2017-12-31')

/opt/conda/envs/python3new/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools

data=factors['hs300_alpha']

from jqdata import *
sw=get_industries(name='sw_l1').index
a=[0]*len(sw)
for i in range(len(sw)):
    a[i]=data[list(set(data.columns).intersection(set(get_industry_stocks(sw[i]))))].mean().mean()

from pyecharts import Bar
from pyecharts import online
online()
bar = Bar()
names=["农林牧渔","采掘","化工","钢铁",'有色金属','电子','家用电器','食品饮料','纺织服装','轻工制造',
'医药生物','公用事业','交通运输','房地产','商业贸易','休闲服务','综合','建筑材料','建筑装饰','电气设备',
'国防军工','计算机','传媒I','通信','银行','非银金融','汽车','机械设备']
bar.add("各行业因子值均值", names, a,
        is_more_utils=True)
bar

#获得行业哑变量矩阵
from jqdata import *
sw=get_industries(name='sw_l1').index
industry=pd.DataFrame(0,columns=data.columns,index=range(0,28))
for i in range(len(sw)):
    temp=list(set(data.columns).intersection(set(get_industry_stocks(sw[i]))))
    industry.loc[i,temp]=1

#去除市值、行业因素，得到新的因子值 
newx=pd.DataFrame()
for i in range(len(data.index)):
    m= get_fundamentals(query(valuation.circulating_cap,valuation.code).filter(valuation.code.in_(data.columns)), date=data.index[i])
    m.index=np.array(m['code'])
    m=m.iloc[:,0]
    m=(m-mean(m))/std(m)
    x=data.iloc[i,:]
    conc=pd.concat([x,m,industry.T],axis=1).fillna(mean(m))
    est=sm.OLS(conc.iloc[:,0],conc.iloc[:,1:]).fit()
    y_fitted = est.fittedvalues
    newx[i]=est.resid
newx=newx.T
newx.index=data.index
newx=newx.iloc[1:,:]

【笔记】单因子有效性分析（二）：行业市值中性化

1、为什么要做行业中性化

2、如何做行业市值中性化

审核消息

该文章已通过审核

全部回复

0/140

热门文章最新文章

热门标签

更多人气分析师

财经资讯

行情数据