复制/拖动R数据帧中所有单元格中的地址



我拥有多年的属性数据。然而,从2007年起,我只有该房产的地址。我想将该房产的地址(根据房产编号(复制/拖动到2007年之前的所有条目中。

样本数据低于

structure(list(year = c(1990L, 1992L, 1998L, 1999L, 2000L, 2001L, 
2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L, 
2011L, 2012L, 2013L, 2014L, 2015L, 1990L, 1992L, 1998L, 1999L, 
2000L, 2001L, 2002L, 2003L, 2004L, 2005L), property = c("1182 047", 
"1182 047", "1182 047", "1182 047", "1182 047", "1182 047", "1182 047", 
"1182 047", "1182 047", "1182 047", "1182 047", "1182 047", "1182 047", 
"1182 047", "1182 047", "1182 047", "1182 047", "1182 047", "1182 047", 
"1182 047", "5421 047", "5421 047", "5421 047", "5421 047", "5421 047", 
"5421 047", "5421 047", "5421 047", "5421 047", "5421 047"), 
street = c("", "", "", "", "", "", "", "", "", "", "", "", 
"400 MINER RD", "400 MINER RD", "400 MINER RD", "400 MINER RD", 
"400 MINER RD", "400 MINER RD", "400 MINER RD", "400 MINER RD", 
"", "", "", "", "", "", "", "", "", ""), city = c("", "", 
"", "", "", "", "", "", "", "", "", "", "ORINDA CA", "ORINDA CA", 
"ORINDA CA", "ORINDA CA", "ORINDA CA", "ORINDA CA", "ORINDA CA", 
"ORINDA CA", "", "", "", "", "", "", "", "", "", ""), city_overflow = c("", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", ""), 
zip = c("", "", "", "", "", "", "", "", "", "", "", "", "94563", 
"94563", "94563", "94563", "94563", "94563", "94563", "94563", 
"", "", "", "", "", "", "", "", "", ""), name = c("BARSAMIAN BETSY A", 
"BARSAMIAN BETSY A", "BARSAMIAN BETSY A", "BARSAMIAN BETSY A", 
"BARSAMIAN BETSY A", "BARSAMIAN BETSY A", "BARSAMIAN BETSY A", 
"BARSAMIAN BETSY A", "BARSAMIAN BETSY A", "BARSAMIAN BETSY A", 
"BARSAMIAN BETSY A", "BARSAMIAN BETSY A", "BARSAMIAN BETSY A", 
"BARSAMIAN BETSY A", "BARSAMIAN BETSY A", "BARSAMIAN BETSY A", 
"BARSAMIAN BETSY A", "BARSAMIAN BETSY A", "BARSAMIAN BETSY A", 
"BARSAMIAN BETSY A", "ZHANG ZHI HAO & JIANG JIE YING", "ZHANG ZHI HAO & JIANG JIE YING", 
"ZHANG ZHI HAO & JIANG JIE YING", "ZHANG ZHI HAO & JIANG JIE YING", 
"ZHANG ZHI HAO & JIANG JIE YING", "ZHANG ZHI HAO & JIANG JIE YING", 
"ZHANG ZHI HAO & JIANG JIE YING", "ZHANG ZHI HAO & JIANG JIE YING", 
"ZHANG ZHI HAO & JIANG JIE YING", "ZHANG ZHI HAO & JIANG JIE YING"
), period = c("0000", "0000", "0000", "0000", "0000", "0106", 
"0106", "0106", "0106", "0106", "0106", "0709", "0709", "0709", 
"1014", "1014", "1014", "1014", "1014", "1500", "0000", "0000", 
"0000", "0000", "0000", "0106", "0106", "0106", "0106", "0106"
), type = c("individual", "individual", "individual", "individual", 
"individual", "individual", "individual", "individual", "individual", 
"individual", "individual", "individual", "individual", "individual", 
"individual", "individual", "individual", "individual", "individual", 
"individual", "individual", "individual", "individual", "individual", 
"individual", "individual", "individual", "individual", "individual", 
"individual")), row.names = c(NA, -30L), class = c("data.table", 
"data.frame"))

我希望每一行都包含与其属性号相关的地址,该属性号可以在2007年之后的所有数据条目中找到。

使用tidyr::fill()这将是解决这类问题的一个很好的办法。我们首先需要确保,数据按propertyyeardplyr::arrange()排序为之工作。为了确保地址数据永远不会粘贴到另一个性质的细胞,我们使用dplyr::group_by()

library(dplyr, warn.conflicts = FALSE)
library(tidyr)
prop_df %>% 
arrange(property, -year) %>% 
group_by(property) %>% 
mutate_at(vars(street, city, zip), ~ifelse(. == "", NA, .)) %>% 
fill(street, city, zip) 
#> # A tibble: 30 x 9
#>     year property street    city    city_overflow zip   name      period type   
#>    <int> <chr>    <chr>     <chr>   <chr>         <chr> <chr>     <chr>  <chr>  
#>  1  2015 1182 047 400 MINE… ORINDA… ""            94563 BARSAMIA… 1500   indivi…
#>  2  2014 1182 047 400 MINE… ORINDA… ""            94563 BARSAMIA… 1014   indivi…
#>  3  2013 1182 047 400 MINE… ORINDA… ""            94563 BARSAMIA… 1014   indivi…
#>  4  2012 1182 047 400 MINE… ORINDA… ""            94563 BARSAMIA… 1014   indivi…
#>  5  2011 1182 047 400 MINE… ORINDA… ""            94563 BARSAMIA… 1014   indivi…
#>  6  2010 1182 047 400 MINE… ORINDA… ""            94563 BARSAMIA… 1014   indivi…
#>  7  2009 1182 047 400 MINE… ORINDA… ""            94563 BARSAMIA… 0709   indivi…
#>  8  2008 1182 047 400 MINE… ORINDA… ""            94563 BARSAMIA… 0709   indivi…
#>  9  2007 1182 047 400 MINE… ORINDA… ""            94563 BARSAMIA… 0709   indivi…
#> 10  2006 1182 047 400 MINE… ORINDA… ""            94563 BARSAMIA… 0106   indivi…
#> # … with 20 more rows

另一种方法可能是首先为每个用dplyrselect()filter()distinct()和然后用CCD_ 10将其加入到原始数据集中。

library(dplyr, warn.conflicts = FALSE)
address_dict <- 
prop_df %>% 
select(property, street, city, zip) %>% 
filter_at(vars(street, city, zip), ~. != "") %>% 
distinct()
prop_df %>% 
select(-c(street, city, zip)) %>% 
full_join(address_dict)
#> Joining, by = "property"
#> # A tibble: 30 x 9
#>     year property city_overflow name       period type     street   city   zip  
#>    <int> <chr>    <chr>         <chr>      <chr>  <chr>    <chr>    <chr>  <chr>
#>  1  1990 1182 047 ""            BARSAMIAN… 0000   individ… 400 MIN… ORIND… 94563
#>  2  1992 1182 047 ""            BARSAMIAN… 0000   individ… 400 MIN… ORIND… 94563
#>  3  1998 1182 047 ""            BARSAMIAN… 0000   individ… 400 MIN… ORIND… 94563
#>  4  1999 1182 047 ""            BARSAMIAN… 0000   individ… 400 MIN… ORIND… 94563
#>  5  2000 1182 047 ""            BARSAMIAN… 0000   individ… 400 MIN… ORIND… 94563
#>  6  2001 1182 047 ""            BARSAMIAN… 0106   individ… 400 MIN… ORIND… 94563
#>  7  2002 1182 047 ""            BARSAMIAN… 0106   individ… 400 MIN… ORIND… 94563
#>  8  2003 1182 047 ""            BARSAMIAN… 0106   individ… 400 MIN… ORIND… 94563
#>  9  2004 1182 047 ""            BARSAMIAN… 0106   individ… 400 MIN… ORIND… 94563
#> 10  2005 1182 047 ""            BARSAMIAN… 0106   individ… 400 MIN… ORIND… 94563
#> # … with 20 more rows

使用data.tablezoo::na.locf

library(data.table)
#Columns to replace values
cols <- c('street', 'city', 'city_overflow', 'zip')
#Replace empty values with NA
df[, (cols) := replace(.SD, .SD == '', NA), .SDcols = cols]
#Replace NA's values with next values. 
df[,(cols) := lapply(.SD, zoo::na.locf, fromLast = TRUE),.SDcols = cols, property]

最新更新