我正在用JSON包读取数据。
基本上,数据具有以下格式:
{"a":1,"b":2,"c":3}
{"a": null,"b":2,"c":3}
我将数据存储在R:中,如下所示
DAT<-data.table(read.csv("D:/file.csv"))
i<-1
#create unified variable names
while (i<=nrow(DAT)) {
OUT[[i]]<-fromJSON(as.character(DAT[i]$results))
vnames<-c(vnames,names(OUT[[i]]))
i<-i+1
}
#create the corresponding content
content <- NULL
Applicant <- NULL
i<-1
while (i<=nrow(DAT)) {
temp<-fromJSON(as.character(DAT[i]$results))
laenge <- length(fromJSON(as.character(DAT[i]$results)))
for(j in 1:laenge)
{
content_new <- as.character(temp[[j]])
content <- c(content, content_new)
}
i <- i+1
}
然后我想通过以下方式加入列表(以便获得典型格式的数据):
assets_mren = data.frame(asset_class=vnames, value=content)
然而,我收到一条错误消息,说明vnames和content具有不同的行数。我认为问题是要读入的数据中的"null"。你知道如何读入上述的"null"[/strong>或如何更好地读入数据吗?
是的,问题为空。每一行都有不同的结构。
ll <- '{"a":1,"b":2,"c":3}
{"a": null,"b":2,"c":3}'
res <- lapply(ll,function(x)str(fromJSON(x)))
Named num [1:3] 1 2 3 ## named vector for the first line
- attr(*, "names")= chr [1:3] "a" "b" "c"
List of 3
$ a: NULL ## list for the second line
$ b: num 2
$ c: num 3
所以你必须使每一行的输出均匀化。这里有两个选项:
1-用伪值(0或-1)替换null,例如:
ll <- readLines(textConnection(gsub("null",-1,ll)))
do.call(rbind,lapply(ll,function(x)
fromJSON(x)))
a b c
[1,] 1 2 3
[2,] -1 2 3 ## res[res==-1] <- NA to replace dummy value
2-保留null,但应该使用rbind.fill
来获取数据。帧:
ll <- readLines(textConnection(gsub("null",-1,ll)))
do.call(rbind,lapply(ll,function(x)
fromJSON(x)))
ll <- '{"a":1,"b":2,"c":3}
{"a": null,"b":2,"c":3}'
ll <- readLines(textConnection(ll))
res <- lapply(ll,function(x)
as.data.frame(t(as.matrix(unlist(fromJSON(x))))))
library(plyr)
rbind.fill(res)
a b c
1 1 2 3
2 NA 2 3