r-来自的调整后的关闭数据的问题http://chart.yahoo.com.



我有一个R投资组合构建代码,它使用雅虎的每日调整收盘价数据。我在NA值方面遇到了一些问题,但代码已经运行了一段时间。直到本周末(例如,2015年2月28日)。

现在,当我使用tseries函数get.hist.quote()时,雅虎的数据源似乎完全被破坏了。所谓的破坏,我的意思是它不会正确返回VTV和许多其他ETF的数据。我不知道雅虎时间序列的来源是不是坏了。

有一个帖子(https://stackoverflow.com/a/3507948/2341077)这表明将get.hist.quote()中的URL从chart.yahoo.com更改为ichart.yahoo.com可以解决这个问题。但这并没有改变我的任何事情。我还确保我安装了最新版本的tseries。

其他人对雅虎的收盘价格时间序列有意见吗?我一直在想我是否应该更改我的代码以使用quantmod函数getSymbols,显然,它可以使用谷歌金融作为数据源。

下面的代码用于读取数百个ETF符号,并返回包含ETF时间序列数据的矩阵。试图按日期排列数据。

即使雅虎似乎在提供数据,仍然有缺失的值,这就是fillHoles()函数要解决的问题。

<pre>
#
# Fill "NA" holes in the time series.
#
fillHoles = function(ts.zoo) {
  v_approx = na.approx(ts.zoo, maxgap=4, na.rm=FALSE)
  v_fill = na.fill(v_approx, fill="extend")
  return( v_fill)
}
<i>
#
# The yahoo market data has problems (at least when it's fetched with get.hist.quote()) when the compression
# argument is used to fetch weekly adjusted close data.
#
# Two time series are shown below, for VXF and MINT. The weekly boundaries appear on different dates.
# 
#              VXF
# 2007-04-04 48.55
# 2007-04-09 48.98
# 2007-04-16 49.52 &lt;==
# 2007-04-23 49.70
# 2007-04-30 50.03
# 2007-05-07 50.04 &lt;==
# 
#            MINT
# 2007-04-04 8.03
# 2007-04-09 8.03
# 2007-04-17 7.88 &lt;==
# 2007-04-23 8.11
# 2007-04-30 8.92
# 2007-05-08 9.14 &lt;==
#   
# If the two time series are merged via a cbind NA values
# end up being inserted where the time series don't line up:'
# 
#              VXF MINT
# 2007-04-04 48.55 8.03
# 2007-04-09 48.98 8.03
# 2007-04-16 49.52   NA
# 2007-04-23 49.70 8.11
# 2007-04-30 50.03 8.92
# 2007-05-07 50.04   NA
#
# To avoid this problem of data alignment, the function fetches daily adjusted close that can then be converted
# into weekly adjusted close.
#
# Given a vector of symbols, this function will fetch the daily adjusted close price data from 
# Yahoo. The data is aligned since not all time series will have exactly the same start and end
# dates (although with daily data, as noted above, this should be less of an issue)
#
</i>
getDailyCloseData = function(symbols, startDate, endDate )
{
  closeData.z = c()
  firstTime = TRUE
  minDate = c()
  maxDate = c()
  fetchedSyms = c()
  startDate.ch = as.character( findMarketDate(as.Date(startDate)))
  endDate.ch = as.character( findMarketDate(as.Date(endDate)))
  for (i in 1:length(symbols)) {
    sym = symbols[i]
    print(sym)
    symClose.z = NULL
    timeOut = 1
    tsEndDate.ch = endDate.ch
    while ((timeOut < 7) && is.null(symClose.z)) {
      try(
        (symClose.z = get.hist.quote(instrument=sym, start=startDate.ch, end=tsEndDate.ch, quote="AdjClose",
                                     provider="yahoo", compression="d", retclass="zoo", quiet=T)),
        silent = TRUE)
      tsEndDate.ch = as.character( findMarketDate( (as.Date(tsEndDate.ch) - 1)))
      timeOut = timeOut + 1
    }
    if (! is.null(symClose.z)) {
      fetchedSyms = c(fetchedSyms, sym)
      dateIx = index(symClose.z)
      if (firstTime) {
        closeData.z = symClose.z
        firstTime = FALSE
        minDate = min(dateIx)
        maxDate = max(dateIx)
      } else {
        minDate = max(minDate, min(dateIx))
        maxDate = min(maxDate, max(dateIx))
        matIx = index(closeData.z)
        repeat {
          startIx = which(matIx == minDate)
          if (length(startIx) > 0 && startIx > 0) {
            break()
          } else {
            minDate = minDate + 1
          }
        } # repeat
        repeat {
           endIx = which(matIx == maxDate)
           if (length(endIx) > 0 && endIx > 0) {
             break()
           } else {
             maxDate = maxDate - 1
           }
        }
        matIxAdj = matIx[startIx:endIx]
        closeData.z = cbind(closeData.z[matIxAdj,], symClose.z[matIxAdj])
      }
    } # if (! is.null(symClose.z))
  } # for
  if (length(closeData.z) > 0) {
    dateIx = index(closeData.z)
     # fill any NA "holes" created by daily date alignment
     closeData.mat = apply(closeData.z, 2, FUN=fillHoles)
     rownames(closeData.mat) = as.character(dateIx)
     colnames(closeData.mat) = fetchedSyms
  }
  return( closeData.mat )
} # getDailyCloseData
</pre>

一些观察结果和问题。您正在使用get.history.requote返回动物园时间序列。您是否尝试过使用zoo包中的merge.zoo来组合不同资产的时间历史。这应该在日期上保持一致,没有任何问题。其次,谷歌和雅虎以不同的方式更正历史价格,因此两者的价格不同。雅虎给出了开盘、高点、低点和收盘的历史价格,然后是调整后的价格,该价格根据拆分、股息和分配进行了调整。谷歌调整所有价格,但只针对拆分,忽略股息和分配。您可以在使用VXF的2007数据中看到这种差异。

我通过访问雅虎没有问题quantmod的getSymbols,所以你可以使用它,而不是切换到谷歌。最后,根据太平洋投资管理公司的说法,MINT的成立日期是2009年11月16日,所以我不明白你是如何获得2007年的数据的。

xts包是zoo的扩展,我发现它有一些有用的附加功能,比如to.weekly,它在下面使用。下面的代码是使用quantmod和xts包为您的ETF提供每日和每周价格的示例。请注意,MINT数据直到2009年11月17日才开始,与太平洋投资管理公司的成立日期一致。

library(quantmod)
library(xts)
 getDailyCloseData = function(symbols, startDate, endDate ) {
  close_daily  <- getSymbols(symbols[1], src="yahoo", from=startDate, to=endDate, auto.assign=FALSE)[,6] 
  for(sym in symbols[-1]) {
     close_daily <- merge(close_daily, getSymbols(sym, src="yahoo", from=startDate, to=endDate, auto.assign=FALSE)[,6])
   }
    colnames(close_daily) <- symbols
    return(close_daily)
  }
 symbols <- c("VXF","MINT")
 startDate <- "2007-03-15"
 endDate <- Sys.Date()
 close_daily <- getDailyCloseData(symbols, startDate, endDate)
 close_weekly <- to.weekly(close_daily[,1], OHLC=FALSE)
 for(sym in symbols[-1]) {
   close_weekly <- merge(close_weekly, to.weekly(close_daily[,sym], OHLC=FALSE))
  }

我已经切换到使用quantmod()getSymbols函数。雅虎数据的问题是不一致的,所以很难知道这是否是一个完整的解决方案。但是代码比我上面发布的要干净。

事实是,如果你投资的是真金白银,而不仅仅是做量化金融作业,你可能应该购买专业级别的数据。

#
# Find the nearest market date (moving backward in time)
#
findMarketDate = function( date )
{
  while(! isBizday(x = as.timeDate(date), holidays=holidayNYSE(as.numeric(format(date, "%Y"))))) {
    date = date - 1
  }
  return(date)
}
#
# Fill "NA" holes in the time series.
#
fillHoles = function(ts.zoo) {
  v_approx = na.approx(ts.zoo, maxgap=4, na.rm=FALSE)
  v_fill = na.fill(v_approx, fill="extend")
  return( v_fill)
}
#
# Get daily equity market prices (e.g., stocks, ETFs). This code is designed to work
# with both Yahoo and Google. Yahoo is preferred because they have adjusted prices. An adjusted
# price is adjusted for splits and dividends. As a result, an ETF that doesn't move that much in price
# may still move in dividend adjusted price. Using these prices avoids omitting high divident assets.
#
getDailyPriceData = function(symbols, startDate, endDate, dataSource = "yahoo" )
{
  closeData.z = c()
  firstTime = TRUE
  fetchedSyms = c()
  startDate.d = findMarketDate(as.Date(startDate))
  endDate.d = findMarketDate(as.Date(endDate))
  for (i in 1:length(symbols)) {
    sym = symbols[i]
    print(sym)
    close.m = NULL
    timeOut = 1
    while ((timeOut < 7) && is.null(close.m)) {
      try(
        (close.m = getSymbols(Symbols=sym,src=dataSource, auto.assign=getOption('loadSymbols.auto.assign', FALSE),
                              warnings=FALSE)),
        silent = TRUE)
      timeOut = timeOut + 1
    } # while
    if (! is.null(close.m)) {
      dateIx = index(close.m)
      startIx = which(startDate.d == dateIx)
      endIx = which(endDate.d == dateIx)
      if ((length(startIx) > 0 && startIx > 0) && (length(endIx) > 0 && endIx > 0)) {
        fetchedSyms = c(fetchedSyms, sym)
        closeAdj.m = close.m[startIx:endIx,]
        price.z = NULL
        if (dataSource == "yahoo") {
           yahooAdjCol = paste(sym, "Adjusted", sep=".")
           price.z = closeAdj.m[, yahooAdjCol]
        } else {
           highCol = paste(sym, "High", sep=".")
           lowCol = highIx = paste(sym, "Low", sep=".")
           price.z = (closeAdj.m[,highCol] + closeAdj.m[,lowCol])/2
        }
        if (firstTime) {
          closeData.z = price.z
          firstTime = FALSE
        } else {
          closeData.z = cbind(closeData.z, price.z)
        }
      } # if (! is.null(symClose.z))
    } # if not null
  } # for
  closeData.m = c()
  if (length(closeData.z) > 0) {
    dateIx = index(closeData.z)
    closeData.m = coredata(closeData.z)
    numHoles = sum(is.na(closeData.m))
    if (numHoles > 0) {
      # fill any NA "holes" created by daily date alignment
      closeData.m = apply(closeData.m, 2, FUN=fillHoles)
    }
    rownames(closeData.m) = as.character(dateIx)
    colnames(closeData.m) = fetchedSyms
  }
  return( closeData.m )
} # getDailyPriceData

最新更新