下面是嵌套列表,我想将其转换为data.table。
list(list(symbol = "AAPL210212P00065000", description = "AAPL Feb 12 2021 $65.00 Put",
exch = "Z", type = "option", last = 0.01, change = 0, volume = 0L,
open = NULL, high = NULL, low = NULL, close = NULL, bid = 0,
ask = 0.01, underlying = "AAPL", strike = 65, greeks = list(
delta = 0, gamma = -1.09695e-14, theta = -0.00308333,
vega = 2e-05, rho = 0.00713933, phi = -0.0149899, bid_iv = 0,
mid_iv = 1.199176, ask_iv = 2.39835, smv_vol = 0.619,
updated_at = "2021-02-08 20:56:01"), change_percentage = 0,
average_volume = 0L, last_volume = 1L, trade_date = 1612544837252,
prevclose = 0.01, week_52_high = 0, week_52_low = 0, bidsize = 0L,
bidexch = "J", bid_date = 1612817742000, asksize = 0L, askexch = "H",
ask_date = 1612857600000, open_interest = 28L, contract_size = 100L,
expiration_date = "2021-02-12", expiration_type = "weeklys",
option_type = "put", root_symbol = "AAPL"), list(symbol = "AAPL210212C00065000",
description = "AAPL Feb 12 2021 $65.00 Call", exch = "Z",
type = "option", last = 70.53, change = 0, volume = 0L, open = NULL,
high = NULL, low = NULL, close = NULL, bid = 71, ask = 72.75,
underlying = "AAPL", strike = 65, greeks = list(delta = 1,
gamma = -1.09695e-14, theta = -0.00308333, vega = 2e-05,
rho = 0.00713933, phi = -0.0149899, bid_iv = 0, mid_iv = 1.658792,
ask_iv = 3.31758, smv_vol = 0.619, updated_at = "2021-02-08 20:56:01"),
change_percentage = 0, average_volume = 0L, last_volume = 5L,
trade_date = 1612795977049, prevclose = 70.53, week_52_high = 0,
week_52_low = 0, bidsize = 0L, bidexch = "Q", bid_date = 1612857600000,
asksize = 0L, askexch = "Q", ask_date = 1612857600000, open_interest = 7L,
contract_size = 100L, expiration_date = "2021-02-12", expiration_type = "weeklys",
option_type = "call", root_symbol = "AAPL"))
目前,我正在使用以下代码进行转换,但是在进行转换时丢失了类信息,并且所有列都变成了字符。
library(plyr)
plyr::rbind.fill(lapply(lst,function(y){as.data.frame(t(unlist(y)), stringsAsFactors=FALSE)})) %>% as.data.table
是否有一种方法可以使这个列表扁平化,以便类信息也被保留?
我不想使用plyr::rbind.fill
函数来平化列表,因为它很慢。我尝试使用data.table::rbindlist
函数,但一些嵌套列表的信息丢失了。
library(data.table)
rbindlist(d, use.names=TRUE, idcol = "ID")
谢谢你的解决方案!下面是两个解决方案的结果——
> microbenchmark::microbenchmark(tibblify =as.data.table(tibblify(lst)),
+ rrapply = as.data.table(rrapply(lst, f = function(x) ifelse(is.null(x), NA, x), how = "bind")), times = 100)
Unit: milliseconds
expr min lq mean median uq max neval cld
tibblify 23.018137 27.082619 32.579234 29.164100 31.895398 116.16178 100 b
rrapply 1.522039 1.901585 2.599128 2.224162 2.866303 21.86894 100 a
谢谢!
另一个选择是在rrapply
包中使用rrapply()
,默认情况下它也会打开更深的嵌套层(例如greeks
):
library(rrapply)
rrapply(lst, how = "bind")
#> symbol description exch type last change
#> 1 AAPL210212P00065000 AAPL Feb 12 2021 $65.00 Put Z option 0.01 0
#> 2 AAPL210212C00065000 AAPL Feb 12 2021 $65.00 Call Z option 70.53 0
#> volume open high low close bid ask underlying strike greeks.delta
#> 1 0 NULL NULL NULL NULL 0 0.01 AAPL 65 0
#> 2 0 NULL NULL NULL NULL 71 72.75 AAPL 65 1
#> greeks.gamma greeks.theta greeks.vega greeks.rho greeks.phi greeks.bid_iv
#> 1 -1.09695e-14 -0.00308333 2e-05 0.00713933 -0.0149899 0
#> 2 -1.09695e-14 -0.00308333 2e-05 0.00713933 -0.0149899 0
#> greeks.mid_iv greeks.ask_iv greeks.smv_vol greeks.updated_at
#> 1 1.199176 2.39835 0.619 2021-02-08 20:56:01
#> 2 1.658792 3.31758 0.619 2021-02-08 20:56:01
#> change_percentage average_volume last_volume trade_date prevclose
#> 1 0 0 1 1.612545e+12 0.01
#> 2 0 0 5 1.612796e+12 70.53
#> week_52_high week_52_low bidsize bidexch bid_date asksize askexch
#> 1 0 0 0 J 1.612818e+12 0 H
#> 2 0 0 0 Q 1.612858e+12 0 Q
#> ask_date open_interest contract_size expiration_date expiration_type
#> 1 1.612858e+12 28 100 2021-02-12 weeklys
#> 2 1.612858e+12 7 100 2021-02-12 weeklys
#> option_type root_symbol
#> 1 put AAPL
#> 2 call AAPL
注:与tibblify()
相比,rrapply()
往往更有效率:
library(tibblify)
library(data.table)
## create a large nested list
lst_large <- do.call(c, replicate(1E5, lst, simplify = FALSE))
system.time({
as.data.table(rrapply(lst_large, how = "bind"))
})
#> user system elapsed
#> 4.882 0.143 5.026
system.time({
as.data.table(tibblify(lst_large))
})
#> user system elapsed
#> 45.954 0.016 45.980
编辑:为了避免在data.frame/data.table中出现由于元素为NULL
而导致的列表列,一个选项是在解嵌套列表之前先用NA
替换NULL
值,
res <- rrapply(lst, f = function(x) ifelse(is.null(x), NA, x), how = "bind")
str(res)
#> 'data.frame': 2 obs. of 45 variables:
#> $ symbol : chr "AAPL210212P00065000" "AAPL210212C00065000"
#> $ description : chr "AAPL Feb 12 2021 $65.00 Put" "AAPL Feb 12 2021 $65.00 Call"
#> $ exch : chr "Z" "Z"
#> $ type : chr "option" "option"
#> $ last : num 0.01 70.53
#> $ change : num 0 0
#> $ volume : int 0 0
#> $ open : logi NA NA
#> $ high : logi NA NA
#> $ low : logi NA NA
#> $ close : logi NA NA
#> $ bid : num 0 71
#> $ ask : num 0.01 72.75
#> $ underlying : chr "AAPL" "AAPL"
#> $ strike : num 65 65
#> $ greeks.delta : num 0 1
#> $ greeks.gamma : num -1.1e-14 -1.1e-14
#> $ greeks.theta : num -0.00308 -0.00308
#> $ greeks.vega : num 2e-05 2e-05
#> $ greeks.rho : num 0.00714 0.00714
#> $ greeks.phi : num -0.015 -0.015
#> $ greeks.bid_iv : num 0 0
#> $ greeks.mid_iv : num 1.2 1.66
#> $ greeks.ask_iv : num 2.4 3.32
#> $ greeks.smv_vol : num 0.619 0.619
#> $ greeks.updated_at: chr "2021-02-08 20:56:01" "2021-02-08 20:56:01"
#> $ change_percentage: num 0 0
#> $ average_volume : int 0 0
#> $ last_volume : int 1 5
#> $ trade_date : num 1.61e+12 1.61e+12
#> $ prevclose : num 0.01 70.53
#> $ week_52_high : num 0 0
#> $ week_52_low : num 0 0
#> $ bidsize : int 0 0
#> $ bidexch : chr "J" "Q"
#> $ bid_date : num 1.61e+12 1.61e+12
#> $ asksize : int 0 0
#> $ askexch : chr "H" "Q"
#> $ ask_date : num 1.61e+12 1.61e+12
#> $ open_interest : int 28 7
#> $ contract_size : int 100 100
#> $ expiration_date : chr "2021-02-12" "2021-02-12"
#> $ expiration_type : chr "weeklys" "weeklys"
#> $ option_type : chr "put" "call"
#> $ root_symbol : chr "AAPL" "AAPL"
先tibblify
它,然后转换为data.table:
library(data.table)
library(tibblify)
as.data.table(tibblify(lst))