r语言 - 使用 "dimnames" =2 的列表格式化 zoo 对象



我正在使用事件研究包。我正在使用phys2eventtime(..)来设置我的数据。但是我得到了

Error in `colnames<-`(`*tmp*`, value = integer(0)) : 
attempt to set 'colnames' on an object with less than two dimensions

我的猜测是,我的数据格式错误。当我查看使用示例中函数的示例数据时:

> es <- phys2eventtime(z=StockPriceReturns, events=SplitDates, width=10)
> (str(StockPriceReturns))
'zoo' series from 2000-04-03 to 2013-03-28
Data: num [1:3246, 1:30] NA NA NA NA NA NA NA NA NA NA ...
- attr(*, "dimnames")=List of 2
..$ : NULL
..$ : chr [1:30] "Bajaj.Auto" "BHEL" "Bharti.Airtel" "Cipla" ...
Index:  Date[1:3246], format: "2000-04-03" "2000-04-04" "2000-04-05" "2000-04-06" ...
NULL

与此相比,我的数据如下所示:

> (str(zoo_Data))
'zoo' series from 2002-01-01 to 2013-08-20
Data: num [1:3036] 183 183 186 191 191 ...
Index:  Date[1:3036], format: "2002-01-01" "2002-01-02" "2002-01-03" "2002-01-04" ...
NULL    

这是不同的:

  • 示例数据使用 sth 类似- attr(*, "dimnames")=List of 2

在查看原始数据时也可以看到这一点:

示例数据:

> (head(StockPriceReturns))
Bajaj.Auto       BHEL Bharti.Airtel     Cipla Coal.India   Dr.Reddy
2000-04-03         NA  4.9171044            NA  6.810041         NA -3.2541653
2000-04-04         NA -8.3348496            NA -3.368606         NA -8.3353739
2000-04-05         NA  0.3305788            NA  0.836825         NA  0.2616345
2000-04-06         NA -2.7605266            NA -2.466056         NA -1.8941289
2000-04-07         NA  3.2543548            NA  7.690426         NA  7.6961041
2000-04-10         NA  3.3107586            NA  6.154276         NA  6.4769648

我的数据:

> (head(zoo_Data))
2002-01-01 2002-01-02 2002-01-03 2002-01-04 2002-01-07 2002-01-08 
182.83     182.83     186.40     190.57     191.17     193.25 

df data

我正在从我的数据中构建动物园对象,如下所示:

> dfToZoo <- function(df) {
require(zoo)
date <- as.Date(df[, 1], format = '%d.%m.%Y')
#TODO have a look if the column are rightly named
with(df, zoo(TotalReturns, date))
} 

csv_data <- read.csv(..., header = TRUE, sep = ";",stringsAsFactors=FALSE)
totalReturns <- (as.double(gsub(",",".",csv_data$TotalReturn)))

df <- data.frame(csv_data$Date, totalReturns)
names(df) <- c("Date" ,"TotalReturns")

zoo_Data <- dfToZoo(df)

如何将attr(*, "dimnames")=List of 2添加到我的数据中?

感谢您的回复!

更新 1

这是库包中的example data code

library(eventstudies)
library(zoo)
###########################
# Load data
(data(SplitDates))
(data(StockPriceReturns))
data <- StockPriceReturns
(head(StockPriceReturns))

es <- phys2eventtime(z=StockPriceReturns, events=SplitDates, width=10)
es.w <- window(es$z.e, start=-10, end=10)
SplitDates[1:3,]

更新 2

这是我自己的数据的格式

> (str(s_dates))
'data.frame':   36799 obs. of  2 variables:
$ unit: chr  "ZAE000006284" "ZAE000006284" "ZAE000006284" "ZAE000006284" ...
$ when: Date, format: "2003-12-18" "2005-04-06" ...
NULL
> (str(zoo_Data))
'zoo' series from 2002-01-01 to 2013-08-20
Data: num [1:3036] 183 183 186 191 191 ...
Index:  Date[1:3036], format: "2002-01-01" "2002-01-02" "2002-01-03" "2002-01-04" ...
NULL

以下是事件研究包中数据的格式设置方式:

> (str(SplitDates))
'data.frame':   22 obs. of  2 variables:
$ unit: chr  "BHEL" "Bharti.Airtel" "Cipla" "Coal.India" ...
$ when: Date, format: "2011-10-03" "2009-07-24" ...
NULL
> (str(StockPriceReturns))
'zoo' series from 2000-04-03 to 2013-03-28
Data: num [1:3246, 1:30] NA NA NA NA NA NA NA NA NA NA ...
- attr(*, "dimnames")=List of 2
..$ : NULL
..$ : chr [1:30] "Bajaj.Auto" "BHEL" "Bharti.Airtel" "Cipla" ...
Index:  Date[1:3246], format: "2000-04-03" "2000-04-04" "2000-04-05" "2000-04-06" ...
NULL

更新 3

可重现的示例

我最初通过不同的csv文件读取数据。请参阅我的可重现示例:

可重现的示例

library(eventstudies)
library(zoo)
s_dates <- 
structure(list(unit = c("ZAE000006284", "ZAE000006284", "ZAE000006284", 
"ZAE000006284", "ZAE000006284", "XS0430907989", "XS0430907989"
), when = structure(c(12404, 12879, 12879, 12880, 12930, 14411, 
14411), class = "Date")), .Names = c("unit", "when"), row.names = c(NA, 
                      7L), class = "data.frame")
zoo_Data <- 
structure(c(182.83, 182.83, 186.4, 190.57, 191.17, 193.25, 190.57
), index = structure(c(11688, 11689, 11690, 11691, 11694, 11695, 
11696), class = "Date"), class = "zoo")
es <- phys2eventtime(z=zoo_Data, events=s_dates, width=10)
Error in `colnames<-`(`*tmp*`, value = integer(0)) : 
attempt to set 'colnames' on an object with less than two dimensions
> es.w <- window(es$z.e, start=-10, end=10)
Error in window(es$z.e, start = -10, end = 10) : object 'es' not found

更新 4

好的,当我尝试转换我的zoo_data

> library(eventstudies)
> library(zoo)
> 
> s_dates <- dput(head(s_dates,30))
structure(list(unit = c("ZAE000006284", "ZAE000006284", "ZAE000006284", 
"ZAE000006284", "ZAE000006284", "XS0430907989", "XS0430907989", 
"XS0302626899", "XS0302626899", "XS0302626899", "XS0302626899", 
"XS0302626899", "XS0302626899", "XS0266838746", "XS0187043079", 
"XS0187043079", "XS0187043079", "XF0000TZ7757", "XF0000AK5197", 
"XF0000AK5197", "XF0000AK5197", "XF0000AK5197", "XF0000AK5197", 
"USU02681027", "USU026281027", "USU026281027", "USU026281027", 
"USU026281027", "USU026281027", "USU026281027"), when = structure(c(12404, 
12879, 12879, 12880, 12930, 14411, 14411, 14599, 14600, 15134, 
15139, 15328, 15328, 13913, 14330, 14335, 14593, 13049, 12953, 
12954, 12954, 12954, 12955, 12934, 13537, 13537, 13537, 13648, 
13649, 13649), class = "Date")), .Names = c("unit", "when"), row.names = c(NA, 
30L), class = "data.frame")
> zoo_Data <- dput(head(zoo_Data,30))
structure(c(182.83, 182.83, 186.4, 190.57, 191.17, 193.25, 190.57, 
184.02, 181.34, 172.11, 169.73, 160.2, 175.09, 172.11, 170.92, 
176.58, 171.51, 170.92, 173.9, 168.54, 167.34, 166.75, 166.45, 
167.34, 164.37, 159.01, 158.11, 154.84, 156.63, 161.99), index = structure(c(11688, 
11689, 11690, 11691, 11694, 11695, 11696, 11697, 11698, 11701, 
11702, 11703, 11704, 11705, 11708, 11709, 11710, 11711, 11712, 
11715, 11716, 11717, 11718, 11719, 11722, 11723, 11724, 11725, 
11726, 11729), class = "Date"), class = "zoo")
> 
> n = 20                       ## number of observation
> cn <- unique(s_dates$unit)   ## get response variable 
> 
> as.xts(zoo_Data)
> 
> names(zoo_Data) <- cn
> 
> es <- phys2eventtime(z=zoo_Data), events=s_dates, width=10)
Error in `colnames<-`(`*tmp*`, value = integer(0)) : 
attempt to set 'colnames' on an object with less than two dimensions
> es.w <- window(es$z.e, start=-3, end=3)
Error in window(es$z.e, start = -3, end = 3) : object 'es' not found

phys2eventtime将实际日期转换为事件日期。

因此,给定动物园时间序列和事件日期,它将物理日期转换为事件日期。 这意味着,如果它在动物园对象的范围内找到一个事件日期,这个日期就变成零,所有其他日期也会相应地移动。

所以给这个事件日期对象:

unit       when
1 ZAE000006284 2003-12-18
2 ZAE000006284 2005-04-06
3 ZAE000006284 2005-04-06
4 ZAE000006284 2005-04-07
5 ZAE000006284 2005-05-27
6 XS0430907989 2009-06-16
7 XS0430907989 2009-06-16

转换 ts 的条件 -->事件日期

物理时间序列日期(动物园对象)应满足 2 个条件:

  • 单位值 (COND1) 中应至少有一个列名(响应变量)
  • 此响应变量的范围日期应至少包含一个事件日期( when 列)。(康德2)

创建可转换为事件日期的物理时间序列

现在我将创建一个满足这两个条件的时代系列:一个多时间序列(xts 对象),其索引随s_dates范围落下,并且列名在单位值s_dates内。

n = 20                       ## number of observation
cn <- unique(s_dates$unit)   ## get response variable 
rr = range(s_dates$when)     ## get date range
## COND2
index = seq(rr[1],rr[2],length.out=n)   
mat = matrix(round(runif(n*2,min=150,max=200),2),ncol=2)
ev.dat = xts(mat,index)
## COND1
names(ev.dat) <- cn

检查我们的时间系列对象:

head(ev.dat)
ZAE000006284 XS0430907989
2003-12-18       183.27       152.89
2004-04-01       195.23       172.78
2004-07-16       190.18       164.92
2004-10-29       182.24       191.19
2005-02-12       151.78       195.81
2005-05-29       153.78       189.27

毫不奇怪,它工作正常:

(es <- phys2eventtime(z=ev.dat, events=s_dates, width=5))
(es.w <- window(es$z.e, start=-10, end=10))
head(es.w)
2      3      4      5
-4 197.28 197.28 197.28 197.28
-3 158.54 158.54 158.54 158.54
-2 180.56 180.56 180.56 180.56
-1 194.41 194.41 194.41 194.41
0  157.64 157.64 157.64 157.64
1  151.06 151.06 151.06 151.06

如何测试时序是否满足以下 2 个条件:

作为奖励,我为您创建了一个小函数,其中包含 2 个条件:

check.phys2ev <- function(ev.dat,s_dates){
is.sucess = lapply(s_dates$when,function(when){
inter = findInterval(when, index(ev.dat))
inter < nrow(ev.dat) && inter >1 
})
sum(unlist(is.sucess)) >0 &
all(colnames(ev.dat) %in% s_dates$unit)
}
check.phys2ev(ev.dat,s_dates)
[1] TRUE

@agstudy已经提出了一个非常好的答案,以多变量时间序列为例。因为您似乎有一个单变量时间序列,所以我想添加一些关于在创建zoo对象时如何避免对"小于二维的对象"的强制。这是一种可能的解决方案,我向时间序列添加一个虚拟列,并在使用drop = FALSE时选择焦点列。

从数据框中的单变量时间序列开始。由于zoo对象和events中的日期不匹配(如 @agstudy 所述),以及zoo对象的维度小于两个,第一次尝试显然不起作用。

df <- data.frame(date = index(zoo_Data),
ZAE000006284 = coredata(zoo_Data))
df
# convert to zoo object
z1 <- zoo(x = df[ , -1], order.by = df$date)
z1
phys2eventtime(z = z, events = s_dates, width = 10)
# Error in `colnames<-`(`*tmp*`, value = integer(0)) : 
#   attempt to set 'colnames' on an object with less than two dimensions

添加一个与s_dates中最大日期一样大的日期,只是在这里用作实现"COND2"(sensu @agstudy)的快速而肮脏的方式。

df <- data.frame(date = c(index(zoo_Data), max(s_dates$when)),
ZAE000006284 = c(coredata(zoo_Data), 200))
df
z1 <- zoo(x = df[ , -1], order.by = df$date)
z1
phys2eventtime(z = z1, events = s_dates, width = 10)
# Error in `colnames<-`(`*tmp*`, value = integer(0)) : 
#   attempt to set 'colnames' on an object with less than two dimensions

避免强制向量 向单
变量时间序列添加虚拟列。选择焦点柱时,使用drop = FALSE以避免强制向量。然后将单变量(但二维)时间序列转换为zoo

df <- data.frame(date = c(index(zoo_Data), max(s_dates$when)),
ZAE000006284 = c(coredata(zoo_Data), 200),
ZAE000006284_2 = c(coredata(zoo_Data), 200))
df
z2 <- zoo(x = df[ , "ZAE000006284", drop = FALSE], order.by = df$date)
z2
str(z2)   
....
attr(*, "dimnames")=List of 2

phys2eventtime(z = z2, events = s_dates, width = 10)
# seems to work

感谢大家指出这一点和伟大的讨论。作为该软件包的作者之一,我看到了问题所在。如果您查看phys2eventtime函数,它需要一个至少具有一列的zoo系列,如上面的答案所示。zoo/data.frame删除最后一列并将其转换为向量,除非为下标函数提供drop = FALSE

正如 @Henrik 和 @agstudy 所建议的那样,我们无法真正在phys2eventtime函数内部执行drop = FALSE,因为该函数不知道系列名称。它依赖于对象的列名作为系列名称。对于单变量序列,如果对象是向量,则无法知道序列名称并将其与事件对象匹配。

我现在看到的唯一方法是创建单列zoo对象。当然欢迎提出建议。

编辑:我正在提交一个小补丁,如果"z"对象是向量,则会抛出错误:

if (is.null(ncol(z))) { stop(paste(deparse("z"), "should be a zoo series with at least one column.")) }

代码可在此处获得:https://r-forge.r-project.org/scm/viewvc.php/?root=eventstudies

也请加入软件包讨论邮件列表:http://lists.r-forge.r-project.org/pipermail/eventstudies-discussion/

最新更新