我有一个不同项目随时间(年(的现金流表,想计算每个项目的内部收益率。我似乎无法为每个项目选择合适的列,这些列各不相同。表格结构如下:
structure(list(`Portfolio Company` = c("Ventures II", "Pal III",
"River Fund II", "Ventures III"),
minc = c(2007, 2008, 2008, 2012),
maxc = c(2021, 2021, 2021, 2020),
num_pers = c(14, 13, 13, 8),
`2007` = c(-660000, NA, NA, NA),
`2008` = c(-525000, -954219, -1427182.55, NA),
`2009` = c(-351991.03, -626798, -1694353.41, NA),
`2010` = c(-299717.06, -243248, -1193954, NA),
`2011` = c(-239257.08, 465738, -288309, NA),
`2012` = c(-9057.31000000001, -369011, 128509.63, -480000),
`2013` = c(-237233.9, -131111, 53718, -411734.58),
`2014` = c(-106181.76, -271181, 887640, -600000),
`2015` = c(-84760.51, 441808, 906289, -900000),
`2016` = c(2770719.21, -377799, 166110, -150000),
`2017` = c(157820.08, -12147, 1425198, -255000),
`2018` = c(204424.36,-1626110, 361270, -180000),
`2019` = c(563463.62, 119577, 531555, 3300402.62),
`2020` = c(96247.29, 7057926, 2247027, 36111.6),
`2021` = c(614848.68, 1277996, 258289, NA)),
class = c("grouped_df", "tbl_df", "tbl", "data.frame"),
row.names = c(NA, -4L),
groups = structure(list(`Portfolio Company` =c("Ventures II","Ventures III","Pal III", "River Fund II"),
.rows = structure(list(1L, 4L, 2L, 3L),
ptype = integer(0),
class = c("vctrs_list_of", "vctrs_vctr", "list"))),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -4L), .drop = TRUE))
每个项目(Portfolio Company
(都有不同的开始和结束日期,由minc
和maxc
列捕获。我想使用minc和maxc中的文本为每个项目从minc:maxc中进行选择,以执行IRR计算。我收到了各种错误,包括:找不到对象maxc
,参数不正确。。。已经尝试了大约20种组合!!sym,作为.String(来自NLP包(。。。没有一个有效。
这是创建表的代码和有问题的选择代码:
sum_fund_CF <- funds %>% group_by(`TX_YR`, `Portfolio Company`) %>%
summarise(CF=sum(if_else(is.na(Proceeds),0,Proceeds)-if_else(is.na(Investment),0,Investment))) %>% ungroup() #organizes source data and calculates cash flows
sum_fund_CF <- sum_fund_CF %>%
group_by(`Portfolio Company`) %>% mutate(minc=min(`TX_YR`),maxc=max(`TX_YR`),num_pers=maxc-minc) %>%
pivot_wider(names_from = TX_YR, values_from = `CF`) #creates the table and finds first year and last year of cash flow, and num of periods between them
sum_fund_CF %>% group_by(`Portfolio Company`)%>% select(!!sym(as.String(maxc))):!!sym(as.String(max))) #want to select appropriate columns for each record to do the IRR analysis ... IRR() ... need a string of cash flows and no NA.
I'm sure it's something simple, but this has me perplexed. Thanks !
您可以相应地修改IRR的定义。我跟随这篇关于如何使用jrvFinance
软件包计算IRR的文章。
dplyr
包中的filter
函数在group_by
之后使用,用于选择minc和maxc列所表示的年份。
library(tidyverse)
library(janitor)
#>
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#>
#> chisq.test, fisher.test
library(jrvFinance)
data <- structure(list(`Portfolio Company` = c("Ventures II", "Pal III",
"River Fund II", "Ventures III"),
minc = c(2007, 2008, 2008, 2012),
maxc = c(2021, 2021, 2021, 2020),
num_pers = c(14, 13, 13, 8),
`2007` = c(-660000, NA, NA, NA),
`2008` = c(-525000, -954219, -1427182.55, NA),
`2009` = c(-351991.03, -626798, -1694353.41, NA),
`2010` = c(-299717.06, -243248, -1193954, NA),
`2011` = c(-239257.08, 465738, -288309, NA),
`2012` = c(-9057.31000000001, -369011, 128509.63, -480000),
`2013` = c(-237233.9, -131111, 53718, -411734.58),
`2014` = c(-106181.76, -271181, 887640, -600000),
`2015` = c(-84760.51, 441808, 906289, -900000),
`2016` = c(2770719.21, -377799, 166110, -150000),
`2017` = c(157820.08, -12147, 1425198, -255000),
`2018` = c(204424.36,-1626110, 361270, -180000),
`2019` = c(563463.62, 119577, 531555, 3300402.62),
`2020` = c(96247.29, 7057926, 2247027, 36111.6),
`2021` = c(614848.68, 1277996, 258289, NA)),
class = c("grouped_df", "tbl_df", "tbl", "data.frame"),
row.names = c(NA, -4L),
groups = structure(list(`Portfolio Company` =c("Ventures II","Ventures III","Pal III", "River Fund II"),
.rows = structure(list(1L, 4L, 2L, 3L),
ptype = integer(0),
class = c("vctrs_list_of", "vctrs_vctr", "list"))),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -4L), .drop = TRUE))
clean_data <- data %>%
clean_names() %>%
ungroup() %>%
pivot_longer(cols = -1:-4,
names_to = "year",
values_to = "cashflow") %>%
mutate(year = str_replace(year, "x", ""),
year = as.numeric(year))
clean_data %>%
print(n = 20)
#> # A tibble: 60 x 6
#> portfolio_company minc maxc num_pers year cashflow
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Ventures II 2007 2021 14 2007 -660000
#> 2 Ventures II 2007 2021 14 2008 -525000
#> 3 Ventures II 2007 2021 14 2009 -351991.
#> 4 Ventures II 2007 2021 14 2010 -299717.
#> 5 Ventures II 2007 2021 14 2011 -239257.
#> 6 Ventures II 2007 2021 14 2012 -9057.
#> 7 Ventures II 2007 2021 14 2013 -237234.
#> 8 Ventures II 2007 2021 14 2014 -106182.
#> 9 Ventures II 2007 2021 14 2015 -84761.
#> 10 Ventures II 2007 2021 14 2016 2770719.
#> 11 Ventures II 2007 2021 14 2017 157820.
#> 12 Ventures II 2007 2021 14 2018 204424.
#> 13 Ventures II 2007 2021 14 2019 563464.
#> 14 Ventures II 2007 2021 14 2020 96247.
#> 15 Ventures II 2007 2021 14 2021 614849.
#> 16 Pal III 2008 2021 13 2007 NA
#> 17 Pal III 2008 2021 13 2008 -954219
#> 18 Pal III 2008 2021 13 2009 -626798
#> 19 Pal III 2008 2021 13 2010 -243248
#> 20 Pal III 2008 2021 13 2011 465738
#> # ... with 40 more rows
clean_data %>%
group_by(portfolio_company) %>%
filter(between(year, min(minc), max(maxc))) %>%
summarise(irr = irr(cashflow,
cf.freq = 1))
#> # A tibble: 4 x 2
#> portfolio_company irr
#> <chr> <dbl>
#> 1 Pal III 0.111
#> 2 River Fund II 0.0510
#> 3 Ventures II 0.0729
#> 4 Ventures III 0.0251
创建于2022-01-04由reprex包(v2.0.1(
使用jvrFinance::irr()
的另一种方法。
library(jrvFinance)
library(tidyverse)
df %>%
rowwise() %>%
summarise(irr = irr(na.omit(c_across(matches('^\d')))), .groups = 'drop')
#> # A tibble: 4 × 2
#> `Portfolio Company` irr
#> <chr> <dbl>
#> 1 Ventures II 0.0729
#> 2 Pal III 0.111
#> 3 River Fund II 0.0510
#> 4 Ventures III 0.0251
创建于2022-01-04由reprex包(v2.0.1(