r语言 - 如何将嵌套列表转换为data.table?



下面是嵌套列表,我想将其转换为data.table。

list(list(symbol = "AAPL210212P00065000", description = "AAPL Feb 12 2021 $65.00 Put", 
exch = "Z", type = "option", last = 0.01, change = 0, volume = 0L, 
open = NULL, high = NULL, low = NULL, close = NULL, bid = 0, 
ask = 0.01, underlying = "AAPL", strike = 65, greeks = list(
delta = 0, gamma = -1.09695e-14, theta = -0.00308333, 
vega = 2e-05, rho = 0.00713933, phi = -0.0149899, bid_iv = 0, 
mid_iv = 1.199176, ask_iv = 2.39835, smv_vol = 0.619, 
updated_at = "2021-02-08 20:56:01"), change_percentage = 0, 
average_volume = 0L, last_volume = 1L, trade_date = 1612544837252, 
prevclose = 0.01, week_52_high = 0, week_52_low = 0, bidsize = 0L, 
bidexch = "J", bid_date = 1612817742000, asksize = 0L, askexch = "H", 
ask_date = 1612857600000, open_interest = 28L, contract_size = 100L, 
expiration_date = "2021-02-12", expiration_type = "weeklys", 
option_type = "put", root_symbol = "AAPL"), list(symbol = "AAPL210212C00065000", 
description = "AAPL Feb 12 2021 $65.00 Call", exch = "Z", 
type = "option", last = 70.53, change = 0, volume = 0L, open = NULL, 
high = NULL, low = NULL, close = NULL, bid = 71, ask = 72.75, 
underlying = "AAPL", strike = 65, greeks = list(delta = 1, 
gamma = -1.09695e-14, theta = -0.00308333, vega = 2e-05, 
rho = 0.00713933, phi = -0.0149899, bid_iv = 0, mid_iv = 1.658792, 
ask_iv = 3.31758, smv_vol = 0.619, updated_at = "2021-02-08 20:56:01"), 
change_percentage = 0, average_volume = 0L, last_volume = 5L, 
trade_date = 1612795977049, prevclose = 70.53, week_52_high = 0, 
week_52_low = 0, bidsize = 0L, bidexch = "Q", bid_date = 1612857600000, 
asksize = 0L, askexch = "Q", ask_date = 1612857600000, open_interest = 7L, 
contract_size = 100L, expiration_date = "2021-02-12", expiration_type = "weeklys", 
option_type = "call", root_symbol = "AAPL"))

目前,我正在使用以下代码进行转换,但是在进行转换时丢失了类信息,并且所有列都变成了字符。

library(plyr)
plyr::rbind.fill(lapply(lst,function(y){as.data.frame(t(unlist(y)), stringsAsFactors=FALSE)})) %>% as.data.table

是否有一种方法可以使这个列表扁平化,以便类信息也被保留?

我不想使用plyr::rbind.fill函数来平化列表,因为它很慢。我尝试使用data.table::rbindlist函数,但一些嵌套列表的信息丢失了。

library(data.table)
rbindlist(d, use.names=TRUE, idcol = "ID")

谢谢你的解决方案!下面是两个解决方案的结果——

>           microbenchmark::microbenchmark(tibblify =as.data.table(tibblify(lst)),
+                                                           rrapply = as.data.table(rrapply(lst, f = function(x) ifelse(is.null(x), NA, x), how = "bind")), times = 100)
Unit: milliseconds
expr       min        lq      mean    median        uq       max neval cld
tibblify 23.018137 27.082619 32.579234 29.164100 31.895398 116.16178   100   b
rrapply  1.522039  1.901585  2.599128  2.224162  2.866303  21.86894   100  a 

谢谢!

另一个选择是在rrapply包中使用rrapply(),默认情况下它也会打开更深的嵌套层(例如greeks):

library(rrapply)
rrapply(lst, how = "bind")
#>                symbol                  description exch   type  last change
#> 1 AAPL210212P00065000  AAPL Feb 12 2021 $65.00 Put    Z option  0.01      0
#> 2 AAPL210212C00065000 AAPL Feb 12 2021 $65.00 Call    Z option 70.53      0
#>   volume open high  low close bid   ask underlying strike greeks.delta
#> 1      0 NULL NULL NULL  NULL   0  0.01       AAPL     65            0
#> 2      0 NULL NULL NULL  NULL  71 72.75       AAPL     65            1
#>   greeks.gamma greeks.theta greeks.vega greeks.rho greeks.phi greeks.bid_iv
#> 1 -1.09695e-14  -0.00308333       2e-05 0.00713933 -0.0149899             0
#> 2 -1.09695e-14  -0.00308333       2e-05 0.00713933 -0.0149899             0
#>   greeks.mid_iv greeks.ask_iv greeks.smv_vol   greeks.updated_at
#> 1      1.199176       2.39835          0.619 2021-02-08 20:56:01
#> 2      1.658792       3.31758          0.619 2021-02-08 20:56:01
#>   change_percentage average_volume last_volume   trade_date prevclose
#> 1                 0              0           1 1.612545e+12      0.01
#> 2                 0              0           5 1.612796e+12     70.53
#>   week_52_high week_52_low bidsize bidexch     bid_date asksize askexch
#> 1            0           0       0       J 1.612818e+12       0       H
#> 2            0           0       0       Q 1.612858e+12       0       Q
#>       ask_date open_interest contract_size expiration_date expiration_type
#> 1 1.612858e+12            28           100      2021-02-12         weeklys
#> 2 1.612858e+12             7           100      2021-02-12         weeklys
#>   option_type root_symbol
#> 1         put        AAPL
#> 2        call        AAPL

注:与tibblify()相比,rrapply()往往更有效率:

library(tibblify)
library(data.table)
## create a large nested list 
lst_large <- do.call(c, replicate(1E5, lst, simplify = FALSE))
system.time({
as.data.table(rrapply(lst_large, how = "bind"))
})
#>    user  system elapsed 
#>   4.882   0.143   5.026
system.time({
as.data.table(tibblify(lst_large))
})
#>    user  system elapsed 
#>  45.954   0.016  45.980

编辑:为了避免在data.frame/data.table中出现由于元素为NULL而导致的列表列,一个选项是在解嵌套列表之前先用NA替换NULL值,

res <- rrapply(lst, f = function(x) ifelse(is.null(x), NA, x), how = "bind")
str(res)
#> 'data.frame':    2 obs. of  45 variables:
#>  $ symbol           : chr  "AAPL210212P00065000" "AAPL210212C00065000"
#>  $ description      : chr  "AAPL Feb 12 2021 $65.00 Put" "AAPL Feb 12 2021 $65.00 Call"
#>  $ exch             : chr  "Z" "Z"
#>  $ type             : chr  "option" "option"
#>  $ last             : num  0.01 70.53
#>  $ change           : num  0 0
#>  $ volume           : int  0 0
#>  $ open             : logi  NA NA
#>  $ high             : logi  NA NA
#>  $ low              : logi  NA NA
#>  $ close            : logi  NA NA
#>  $ bid              : num  0 71
#>  $ ask              : num  0.01 72.75
#>  $ underlying       : chr  "AAPL" "AAPL"
#>  $ strike           : num  65 65
#>  $ greeks.delta     : num  0 1
#>  $ greeks.gamma     : num  -1.1e-14 -1.1e-14
#>  $ greeks.theta     : num  -0.00308 -0.00308
#>  $ greeks.vega      : num  2e-05 2e-05
#>  $ greeks.rho       : num  0.00714 0.00714
#>  $ greeks.phi       : num  -0.015 -0.015
#>  $ greeks.bid_iv    : num  0 0
#>  $ greeks.mid_iv    : num  1.2 1.66
#>  $ greeks.ask_iv    : num  2.4 3.32
#>  $ greeks.smv_vol   : num  0.619 0.619
#>  $ greeks.updated_at: chr  "2021-02-08 20:56:01" "2021-02-08 20:56:01"
#>  $ change_percentage: num  0 0
#>  $ average_volume   : int  0 0
#>  $ last_volume      : int  1 5
#>  $ trade_date       : num  1.61e+12 1.61e+12
#>  $ prevclose        : num  0.01 70.53
#>  $ week_52_high     : num  0 0
#>  $ week_52_low      : num  0 0
#>  $ bidsize          : int  0 0
#>  $ bidexch          : chr  "J" "Q"
#>  $ bid_date         : num  1.61e+12 1.61e+12
#>  $ asksize          : int  0 0
#>  $ askexch          : chr  "H" "Q"
#>  $ ask_date         : num  1.61e+12 1.61e+12
#>  $ open_interest    : int  28 7
#>  $ contract_size    : int  100 100
#>  $ expiration_date  : chr  "2021-02-12" "2021-02-12"
#>  $ expiration_type  : chr  "weeklys" "weeklys"
#>  $ option_type      : chr  "put" "call"
#>  $ root_symbol      : chr  "AAPL" "AAPL"

tibblify它,然后转换为data.table:

library(data.table)
library(tibblify)
as.data.table(tibblify(lst))

最新更新