量化交易吧 / 量化平台帖子：3370942 新帖：28

选股宝涨停原因数据清洗

专门套利发表于：5 月 10 日 02：09回复(1)

原文提供了从选股宝API获取数据的代码，本文在原代码基础上对数据进行处理，有需要的可在此基础上增加其他数据内容的处理。因为格式问题，引用的时候要稍微调整下。

collector.py代码：

import urllib.request
import http.cookiejar
import time
import datetime

class Collector(object):
def init(self):
self.cookies = http.cookiejar.CookieJar()
self.handler=urllib.request.HTTPCookieProcessor(self.cookies)
self.opener = urllib.request.build_opener(self.handler)

def requestURL(self,url):retryCount = 200while retryCount>0:
        retryCount = retryCount-1try:
            response = self.opener.open(url)return response.read()except Exception as e:
            print(url,e,datetime.datetime.now())
            time.sleep(0.7)continue

xuangubao.py代码：

-- coding: utf-8 --

import datetime
import json
import pandas as pd
from collector import Collector
from jqdatasdk import *

auth('*', '*')

class Xuangubao(Collector):
def init(self):
Collector.init(self)
pass

def get_limitup_info(self):# trade_date = self.get_recent_tradingday()#trade_date = '2019-03-1'trade_date = datetime.datetime.now()print(trade_date)#url = 'https://flash-api.xuangubao.cn/api/pool/detail?pool_name=limit_up'url = 'https://flash-api.xuangubao.cn/api/pool/detail?pool_name=limit_up'content = self.requestURL(url)
    content = json.loads(content)#print(content)if content["code"] != 20000:print(content)return None# content 就是数据了，是个listcontent = content["data"]# 把数据放到dataframe中df_result = pd.DataFrame(content)
    stock_code = []
    stock_name = []
    stock_reason = []
    plate_name = []
    plate_reason = []
    turnover_ratio = []
    volume_bias_ratio = []for i in range(0,len(list(df_result['stock_chi_name']))):

        stock_code.append(df_result.iloc[i]['surge_reason']['symbol'][:6])
        stock_name.append(df_result.iloc[i]['stock_chi_name'])
        stock_reason.append(df_result.iloc[i]['surge_reason']['stock_reason'])
        plate_name.append(df_result.iloc[i]['surge_reason']['related_plates'][0]['plate_name'])
        try:
            plate_reason.append(df_result.iloc[i]['surge_reason']['related_plates'][0]['plate_reason'])
        except:
            plate_reason.append('NA')
            pass
        turnover_ratio.append(df_result.iloc[i]['turnover_ratio'])
        volume_bias_ratio.append(df_result.iloc[i]['volume_bias_ratio'])print (stock_code)print(stock_name)print(stock_reason)
    df1 = pd.DataFrame(stock_code, columns=['股票代码'])

    df1['股票名称'] = pd.Series(stock_name, index=df1.index)
    df1['涨停原因'] = pd.Series(stock_reason, index=df1.index)
    df1['板块'] = pd.Series(plate_name, index=df1.index)
    df1['板块原因'] = pd.Series(plate_reason, index=df1.index)
    df1['换手率'] = pd.Series(turnover_ratio, index=df1.index)
    df1['volume_bias_ratio'] = pd.Series(volume_bias_ratio, index=df1.index)#df1=df1[df1['板块']!='ST股']#过滤STdf1 = df1[df1['板块'] != 'ST股']#过滤创业板，可以根据情况取消df2 = df1[(df1['股票代码']) < '300000']
    df3 = df1[(df1['股票代码']) > '400000']
    df1 = pd.concat([df2, df3])for i in range(0, len(list(df1['股票代码']))):#print("df1:")for j in range(0, len(list(df_result['stock_chi_name']))):if df1['股票名称'].iloc[i]==df_result['stock_chi_name'].iloc[j]:

                df1['股票代码'].iloc[i] = df_result['symbol'].iloc[j]# 注意文件中的时间类型都是时间戳，可以格式化为字符串df_result['first_break_limit_down'] = df_result['first_break_limit_down'].apply(
        lambda x: datetime.datetime.utcfromtimestamp(x).strftime("%Y-%m-%d %H:%M:%S"))
    df_result['first_break_limit_up'] = df_result['first_break_limit_up'].apply(
        lambda x: datetime.datetime.utcfromtimestamp(x).strftime("%Y-%m-%d %H:%M:%S"))
    df_result['first_limit_down'] = df_result['first_limit_down'].apply(
        lambda x: datetime.datetime.utcfromtimestamp(x).strftime("%Y-%m-%d %H:%M:%S"))
    df_result['first_limit_up'] = df_result['first_limit_up'].apply(
        lambda x: datetime.datetime.utcfromtimestamp(x).strftime("%Y-%m-%d %H:%M:%S"))
    df_result['last_break_limit_down'] = df_result['last_break_limit_down'].apply(
        lambda x: datetime.datetime.utcfromtimestamp(x).strftime("%Y-%m-%d %H:%M:%S"))
    df_result['last_break_limit_up'] = df_result['last_break_limit_up'].apply(
        lambda x: datetime.datetime.utcfromtimestamp(x).strftime("%Y-%m-%d %H:%M:%S"))
    df_result['last_limit_down'] = df_result['last_limit_down'].apply(
        lambda x: datetime.datetime.utcfromtimestamp(x).strftime("%Y-%m-%d %H:%M:%S"))
    df_result['last_limit_up'] = df_result['last_limit_up'].apply(
        lambda x: datetime.datetime.utcfromtimestamp(x).strftime("%Y-%m-%d %H:%M:%S"))
    df_result['listed_date'] = df_result['listed_date'].apply(
        lambda x: datetime.datetime.utcfromtimestamp(x).strftime("%Y-%m-%d %H:%M:%S"))
    df_result['new_stock_break_limit_up'] = df_result['new_stock_break_limit_up'].apply(
        lambda x: datetime.datetime.utcfromtimestamp(x).strftime("%Y-%m-%d %H:%M:%S"))# 保存文件或者存数据库，之后就可以在本地进行统计分析了df_result.to_csv("F:\limit_up.csv", encoding="gbk", index=False)
    df1.to_csv("F:\chulihou.csv", encoding="gbk", index=False)print("all done")

if name == "main":
cc = Xuangubao()
cc.get_limitup_info()