五分位数的回归和描述性统计



我想计算变量ESG的每五分位数(20%百分位数(的变量ExReturn和MarketCap的平均值。此外,我想计算回归的标准差、平方误差和β变量:

lm(ExReturn ~ Mkt-rf)

此外,上述所有测试统计数据都需要mu=0的t检验的测试统计数据:均值、标准差、贝塔值等。从数据中可以明显看出,数据包含日期变量,因此五分位数会随着时间的推移而变化,因此最有可能需要group_by函数,但我不知道如何实现它

df <- df %>%    
group_by(Date) %>%
summarize

数据如下:

structure(list(Date = structure(c(18261, 18261, 18261, 18261, 
18261, 18261, 18261, 18261, 18261, 18261, 18261, 18261, 18261, 
18261, 18261, 18261, 18261, 18261, 18261, 18261, 18261, 18261, 
18261, 18261, 18261, 18261, 18261, 18261, 18261, 18261, 18261, 
18261, 18261, 18261, 18261, 18261, 18261, 18261, 18261, 18261, 
18261, 18261, 18261, 18261, 18261, 18261, 18261, 18261, 18261, 
18261, 18261, 18261, 18261, 18261, 18261, 18261, 18261, 18261, 
18261, 18261, 18261, 18292, 18292, 18292, 18292, 18292, 18292, 
18292, 18292, 18292, 18292, 18292), class = "Date"), MarketCap = c(2.3e+08, 
93248769, 685800000, 720729660, 69698568873.2, 2.898e+10, 8487267199.4, 
1172170050.24, 77345586400, 160624888800, 28340263200, 30678750000, 
2109861209.2, 579946500, 348386902, 720160370600, 29679065926.1, 
33577513.2, 2.79e+09, 3.0561e+10, 59669490000, 19055262850, 1556797580.8, 
8.125e+09, 4060622494.5, 4001662000, 51461219943, 778128000, 
24532252.8, 204203754, 8370780.288, 15246163144, 22833379, 3955024600.2, 
1446899447.6, 1.26e+09, 24298056320, 1.585e+10, 252750450, 720858177.6, 
23319193.05, 2708610652, 103989436.6, 189068183.1, 103366335.6, 
180865915.8, 222054772.5, 550254536, 18299048547.2, 60541231.5, 
2013978364.8, 23168778.6, 47733818.82, 408102796.63, 29321651893.56, 
1046400000, 22187160, 32477407.305, 3382475870.48, 5568744476, 
22247259149.2, 4570101995, 22535207.675, 136500448, 206250000, 
237110000, 87991801.2, 90518520, 142583011.2, 87252000, 89607542.1, 
15853616350), ExReturn = c(-0.0268237288135594, -0.0014, 0.0569333333333333, 
0.0720463276836159, 0.0265611650485436, 0.0601384615384615, 0.127075551294343, 
-0.00731715976331364, 0.00477283950617287, 0.0300528207688467, 
0.046462156987875, 0.0419884297520661, 0.0984003992015968, 0.0118450331125828, 
0.108769491525424, 0.0128969569779643, 0.025583617089624, 0.0352492146596857, 
0.1146, -0.00205530799475755, 0.0114205128205128, 0.134169531795947, 
-0.00362717149220497, 0.0349520408163265, 0.0103924528301887, 
0.0190081632653061, 0.00968433734939762, 0.00729565217391299, 
-0.134191327913279, -0.0566486187845304, -0.0516283105022832, 
0.0454431771894093, -0.139764779874214, 0.0370615384615385, 0.0388298850574714, 
0.0990366812227073, -0.0483283276450512, 0.105443575418994, 0.0503241379310345, 
0.05845401459854, 0.0332153846153845, 0.173357281553398, -0.04061568627451, 
2.04545454545856e-05, -0.0622695652173913, -0.0173999999999999, 
-0.109508108108108, 0.133971179039301, 0.0716638090599123, -0.0915639344262295, 
0.0190545454545455, -0.143772881355932, -0.0556635658914729, 
NA, 0.124221890547264, 0.00320829493087567, -0.0711674418604651, 
0.0225852398523985, 0.0411633668101386, 0.123977643504532, -0.0014, 
0.00240370370370376, 0.0915143712574852, 0.130407317073171, 0.0149601626016261, 
0.0155539325842696, -0.066626633165829, -0.00568596491228072, 
-0.0412999999999999, 0.0463190476190477, 0.119000751879699, 0.133620634920635
), ESG = c(2.53345959078982, 2.56657369964234, 2.56374908926396, 
2.78400588938968, 5.41203491734033, 2.98361526230021, 2.83862944152638, 
2.56271436335429, 6.09627197644367, 6.13367262905784, 5.90068439532432, 
2.73764085376302, 2.97653666066233, 2.56809572483847, 0, 2.88640554272283, 
6.28231235369481, 2.23637634112363, 2.85853100185986, 3.0251042465279, 
2.61094821837499, 3.00059560304552, 2.5785955609722, 2.81530685170549, 
2.83749518321015, 2.95037743733033, 6.26987193861428, 2.94874134091518, 
3.08852743072243, 2.49196332602072, 1.94170592484541, 2.60900875593629, 
2.22546168330046, 2.91260967823007, 2.82029536475896, 2.29931303661489, 
6.23096111132129, 2.8714061942084, 3.31214287506649, 2.57424687520427, 
0, 2.90337260621492, 0, 2.13898276179352, 0, 0, 0, 0, 2.25120014610637, 
0, 2.80336046350687, 2.88548114406871, 3.33333333333333, 0, 0, 
2.57372969572693, 3.67890089856877, 2.86091329774841, 6.09384042471603, 
2.98628736414934, 2.87370999322901, 2.65897111869754, 2.9448373574074, 
2.29855507699792, 2.50610862040669, 2.56555256853195, 2.14468054223976, 
2.74932949703411, 2.42753898147479, 2.00071863465065, 2.81584191847644, 
2.83974299107863), `Mkt-RF` = c(0.0407, 0.0407, 0.0407, 0.0407, 
0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 
0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 
0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 
0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 
0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 
0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 
0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 0.0407, 
0.0407, -0.0255, -0.0255, -0.0255, -0.0255, -0.0255, -0.0255, 
-0.0255, -0.0255, -0.0255, -0.0255, -0.0255)), row.names = c(14508L, 
14450L, 14451L, 14452L, 14400L, 14509L, 14510L, 14511L, 14401L, 
14402L, 14376L, 14465L, 14512L, 14453L, 14424L, 14473L, 14388L, 
14513L, 14514L, 14384L, 14454L, 14403L, 14455L, 14515L, 14516L, 
14517L, 14393L, 14474L, 14389L, 14518L, 14418L, 14456L, 14425L, 
14477L, 14478L, 14519L, 14404L, 14520L, 14377L, 14457L, 14426L, 
14521L, 14427L, 14419L, 14428L, 14429L, 14430L, 14431L, 14522L, 
14432L, 14470L, 14523L, 14378L, 14433L, 14434L, 14458L, 14385L, 
14479L, 14390L, 14524L, 14480L, 14608L, 14617L, 14622L, 14623L, 
14581L, 14554L, 14603L, 14624L, 14555L, 14582L, 14583L), class = "data.frame")

为了回答问题的第一部分,我最终使用了以下函数:

CombData <- CombData %>%
group_by(Date) %>%
mutate(ESG.quint = percent_rank(ESG) %/% .201 + 1)
desc <- CombData %>%
group_by(ESG.quint) %>%
summarize_at(vars(ExReturn, MarketCap), function(x) {mean(x, na.rm = TRUE)})

然而,我仍然需要一个允许条件t检验的代码

最新更新