Question

data prerprocessing coding

0

Entering edit mode

3.9 years ago

sskimvd • 0

I want to move the row that matches the first row of data1 to the last row of data1.

data1

id  s1   s2   s3   s4

a    1    0    2     1

b    2    0    1     2

b   0    0    1     1

c    0   1    2     2

data2

id    here

a      a1

b      a2 

c      a3

after

id  s1   s2   s3   s4   here

a    1    0    2     1    a1

b    2    0    1     2    a2

b   0    0    1     1     a2

c    0   1    2     2     a3

help me

thank you

R • 779 views

ADD COMMENT • link updated 3.9 years ago by cpad0112 21k • written 3.9 years ago by sskimvd • 0

0

Entering edit mode

with join function:

$ join -1 1 -2 1 data1.txt data2.txt -t $'\t' 

id  s1  s2  s3  s4  here
a   1   0   2   1   a1
b   2   0   1   2   a2  
b   0   0   1   1   a2  
c   0   1   2   2   a3

with tsv-utils join function:

input:

$ cat data1.txt 
id  s1  s2  s3  s4
a   1   0   2   1
b   2   0   1   2
b   0   0   1   1
c   0   1   2   2

$ cat data2.txt 
id  here
a   a1
b   a2  
c   a3

output:

$ tsv-join -H -f data2.txt -k 1  -a 2   data1.txt 

id  s1  s2  s3  s4  here
a   1   0   2   1   a1
b   2   0   1   2   a2
b   0   0   1   1   a2
c   0   1   2   2   a3

ADD REPLY • link 3.9 years ago by cpad0112 21k

0

Entering edit mode

Related post at StackOverflow:

How to join (merge) data frames (inner, outer, left, right)

ADD REPLY • link 3.9 years ago by zx8754 12k

score 2 · Answer 1 · 2021-01-11

Your data.

data_1 <- structure(list(id = c("a", "b", "b", "c"), s1 = c(1L, 2L, 0L, 
0L), s2 = c(0L, 0L, 0L, 1L), s3 = c(2L, 1L, 1L, 2L), s4 = c(1L, 
2L, 1L, 2L)), class = "data.frame", row.names = c(NA, -4L))

data_2 <- structure(list(id = c("a", "b", "c"), here = c("a1", "a2", "a3"
)), class = "data.frame", row.names = c(NA, -3L))

tidyverse solution

library("dplyr")

merged <- left_join(data_1, data_2, by="id")

> merged
  id s1 s2 s3 s4 here
1  a  1  0  2  1   a1
2  b  2  0  1  2   a2
3  b  0  0  1  1   a2
4  c  0  1  2  2   a3

data.table solution

library("data.table")

setDT(data_1, key="id")
setDT(data_2, key="id")

merged <- data_1[data_2]

> merged
   id s1 s2 s3 s4 here
1:  a  1  0  2  1   a1
2:  b  2  0  1  2   a2
3:  b  0  0  1  1   a2
4:  c  0  1  2  2   a3

score 0 · Answer 2 · 2021-01-12

library(sqldf)

input:

> data_1
  id s1 s2 s3 s4
1  a  1  0  2  1
2  b  2  0  1  2
3  b  0  0  1  1
4  c  0  1  2  2

> data_2
  id here
1  a   a1
2  b   a2
3  c   a3

output:

> sqldf('select d1.*, d2."here"  from data_1 as d1  join data_2 as d2 where d1.id=d2.id')

  id s1 s2 s3 s4 here
1  a  1  0  2  1   a1
2  b  2  0  1  2   a2
3  b  0  0  1  1   a2
4  c  0  1  2  2   a3