蟒蛇熊猫找到中间50%

我使用python和pandas来处理股票交易量数据，我想将其压缩为当天的总交易量、高点、低点、平均值、25%的交易量和75%的交易量。我不确定如何找到25%和75%的水平。

#Refrences
from time import *
import urllib.request as web
import pandas as pd
import os
dateToday = "2014-10-31"
def pullData(exchange,stock,date):
    baseUrl='http://netfonds.no/quotes/tradedump.php?csv_format=csv'
    fullUrl=baseUrl+'&date='+date.replace("-","")+'&paper='+stock+'.'+exchange
    fileName=('netfonds/trades/'+stock+'.txt')
    try:
        if not os.path.isdir(os.path.dirname(fileName)):
            os.makedirs(os.path.dirname(fileName))
    except OSError:
        print("Directory Error")
    #print(fullUrl)    
    webBuffer=web.urlopen(fullUrl)
    webData=pd.read_csv(webBuffer,usecols=['price','quantity'])
    low = webData['price'].min()
    high = webData['price'].max()
    print(low,high)

def getList(fileName):
    stockList = []
    file = open(fileName+'.txt', 'r').read()
    fileByLines = file.split('n')
    for eachLine in fileByLines:
        if '#' not in eachLine:
            lineByValues = eachLine.split('.')
            stockList.append(lineByValues)
    return stockList
def fromList():
    print("Parsing stock tickers...")
    stockList = getList('stocks')
    print("Found "+str(len(stockList))+" stocks")
    for eachEntry in stockList:
        start_time = time()
        try:
            print("Attempting to pull data for "+eachEntry[1])
            pullData(eachEntry[0],eachEntry[1],dateToday)
            print("Pulled succcessfully in "+str(round(time()-start_time))+" seconds")
        except Exception:
            print("Unable to pull data... "+eachEntry[1])
first_time = time()
fromList()
print("Program Finished! Took "+str(round((time()-first_time)/60))+' minutes')

pandas Series和DataFrame有一个描述方法，类似于R的总结：

In [3]: import numpy as np
In [4]: import pandas as pd
In [5]: s = series.values()
In [6]: s.describe()
Out[6]: 
count    100.000000
mean       0.540376
std        0.296250
min        0.002514
25%        0.268722
50%        0.593436
75%        0.831067
max        0.991971

我只需使用numpy.rerepeat（）.就找到了我需要的东西

inflated=pd.DataFrame(np.repeat(webData['price'].values,webData['quantity'].values))

相关内容

最新更新

热门标签：