



# A tibble: 6 x 9
chr   source         type      start       end strand gene_id         symbol        gene_biotype  
<chr> <chr>          <chr>     <int>     <int> <chr>  <chr>           <chr>         <chr>         
1 2     pseudogene     gene  143300987 143301544 +      ENSG00000228134 AC092578.1    pseudogene    
2 2     pseudogene     gene  143611664 143613567 +      ENSG00000229781 AC013444.1    pseudogene    
3 2     protein_coding gene  143635067 143799890 +      ENSG00000115919 KYNU          protein_coding
4 2     pseudogene     gene  143704869 143705655 -      ENSG00000270390 RP11-470B22.1 pseudogene    
5 2     miRNA          gene  143763269 143763360 -      ENSG00000221169 AC013444.2    miRNA         
6 2     protein_coding gene  143848931 144525921 +      ENSG00000075884 ARHGAP15      protein_coding


chr_a   point A
1     2 143301002 
2     2 143625061
3     2 143700941
4     2 143811317
5     2 144127323
6     2 144224689



x$geneA <- ifelse(sapply(x$`point A`, function(g)
any(genes$start >= g & genes$end <=g)), genes$symbol, NA)





x$geneA <- sapply(x$`point A`,
function(g) filter(genes, g >= start & g <= end)$symbol[1])


# A tibble: 6 x 3
chr_a `point A` geneA     
<int>     <int> <chr>     
1     2 143301002 AC092578.1
2     2 143625061 NA        
3     2 143700941 KYNU      
4     2 143811317 NA        
5     2 144127323 ARHGAP15  
6     2 144224689 ARHGAP15 


x %>% 
nest_join(genes, by = c("chr_a" = "chr")) %>% 
group_by(`point A`) %>% 
mutate(genes = map(genes, ~filter(., `point A` >= start & `point A` <= end))) %>% 
unnest(genes, keep_empty = TRUE)

用于获得非匹配行为CCD_ 2的合并表。或者只需在不使用嵌套tibbles 的情况下找到匹配的

x %>% 
left_join(genes, by = c("chr_a" = "chr")) %>% 
filter(`point A` >= start & `point A` <= end)


df2out <- within(df2,symbol <- sapply(A, function(x) df1$symbol[which(x>=df1$start & x<=df1$end)]))


> df2out
chr_a point         A     symbol
1     1     2 143301002 AC092578.1
2     2     2 143625061           
3     3     2 143700941       KYNU
4     4     2 143811317           
5     5     2 144127323   ARHGAP15
6     6     2 144224689   ARHGAP15


gr1 = makeGRangesFromDataFrame(genes,keep.extra.columns=TRUE)
x = data.frame(x,check.names=FALSE)
gr2 = GRanges(seqnames=x$chr_a,IRanges(start=x[,"point A"],width=1))
x$gene = NA
ovlp = findOverlaps(gr2,gr1)
x$gene[queryHits(ovlp)] = gr1$symbol[subjectHits(ovlp)]
chr_a   point A       gene
1     2 143301002 AC092578.1
2     2 143625061       <NA>
3     2 143700941       KYNU
4     2 143811317       <NA>
5     2 144127323   ARHGAP15
6     2 144224689   ARHGAP15


#Solution using a for loop
for(i in 1:nrow(x)){ #Iterate through every row of x
for(j in 1:nrow(genes)){ #Iterate through every row of genes
if(x$point_A[i] >= genes$start[j] & x$point_A[i] < genes$end[j]){ #If the ith point_A falls within the jth start & end
if(is.na(x$symbol[i])){ #If there is no symbol assigned to the ith row of x
x$symbol[i] <- genes$symbol[j] #Assign the symbol from the jth row
} else{ #If there is a symbol assigned to the ith row of x already, and it matches (now, another) jth row of genes
x$symbol[i] <- paste(x$symbol[i], genes$symbol[j]) #Concatenate the new symbol from the jth row of genes to the ith row of x
#   chr_1   point_A                         symbol
# 1     2 143300988 AC092578.1 Newadditionalsymbol
# 2     2 143611665                     AC013444.1
# 3     2 143635068                           KYNU
# 4     2 143704870             KYNU RP11-470B22.1
# 5     2 143763270                KYNU AC013444.2
# 6     2 143848932                       ARHGAP15
