Here’s a sample column of my data frame, RR is a header :
RR
Cvv
Cvv
Caa
What I need is to “invert” the datas, so to get substrings vv and aa as headers, and RR in the data frame. The resulting matrix would be :
vv | aa
CRR |
CRR |
| CRR
So we get the same relationships in both matrix. On the first and second row, vv is coupled with RR. On the third row, aa is coupled with RR.
Is this achievable with R ? Any ideas ?
Thanks for looking !
I oversimplified my datas in the example above. So here’s a sample of my actual dataset :
> dput(head(A1F[4:15],n=20))
structure(list(RR = structure(c(15L, 15L, 15L, 27L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
" ", "Caa", "Caj", "Cbb", "Cbb ", "Cbv", "Cja", "Cjr", "Crj",
"Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbb", "Gbv",
"Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"), class = "factor"),
AA = structure(c(13L, 13L, 13L, 1L, 1L, 1L, 1L, 15L, 27L,
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 1L), .Label = c("",
"Caa", "Caj", "Car", "Cbb", "Cbv", "Cja", "Cjr", "Cjr ",
"Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbb",
"Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"
), class = "factor"), BB = structure(c(9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L
), .Label = c("", "?", "Caa", "Caj", "Cbv", "Cja", "Cjr",
"Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbv",
"Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"), class = "factor"),
VV = structure(c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 1L, 1L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("",
" ", "Caa", "Caj", "Caj+", "Cbb", "Cbv", "Cja", "Cjr", "Crv",
"Cvb", "Cvr", "Cvv", "Gaa", "Gbb", "Gja", "Gjr", "Grv", "Gvb",
"Gvr"), class = "factor"), RJ = structure(c(8L, 3L, 3L, 1L,
1L, 12L, 12L, 12L, 12L, 12L, 1L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L), .Label = c("", "Caa", "Caj", "Cbv",
"Ccrj", "Cja", "Cjr", "Crj", "Crj ", "Crr", "Crv", "Cvr",
"Cvv", "Gaa", "Gaj", "Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv",
"Gvr", "Gvv"), class = "factor"), JR = structure(c(7L, 7L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L), .Label = c("", "Caa", "Caj",
"Cbv", "Cja", "Cjr", "Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv",
"Gaa", "Gaj", "Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv", "Grv ",
"Gvb", "Gvb ", "Gvr", "Gvv"), class = "factor"), BV = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), .Label = c("", "Caa", "Caj", "Cbb", "Cbv",
"Cja", "Cjr", "Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa",
"Gaj", "Gbb", "Gbv", "Gja", "Gjr", "Grj", "Grv", "Gvb", "Gvr",
"Gvv", "R"), class = "factor"), VB = structure(c(1L, 1L,
7L, 7L, 18L, 18L, 1L, 1L, 10L, 10L, 21L, 21L, 21L, 1L, 21L,
21L, 21L, 21L, 21L, 1L), .Label = c("", "Caa", "Caj", "Cbb",
"Cbv", "Cja", "Cjr", "Crj", "Crr", "Crv", "Cvb", "Cvv", "Gaa",
"Gaj", "Gbb", "Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb",
"Gvr", "Gvv"), class = "factor"), AJ = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 10L,
1L, 10L, 10L), .Label = c("", "Caa", "Caj", "Cbb", "Cbv",
"Cja", "Cjr", "Crj", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj",
"Gbb", "Gbv", "Gja", "Gjr", "Grj", "Grj ", "Grr", "Grv",
"Gvb", "Gvr", "Gvv"), class = "factor"), JA = structure(c(10L,
10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 6L, 6L, 6L, 6L), .Label = c("", "Caa", "Caj", "Cbv",
"Cja", "Cjr", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj",
"Gbv", "Gja", "Gjr", "Grr", "Grv", "Gvb", "Gvv"), class = "factor"),
VR = structure(c(1L, 5L, 5L, 5L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("",
"Caa", "Caj", "Caj ", "Cbv", "Cja", "Cjr", "Crj", "Crr",
"Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbv", "Gja", "Gjr",
"Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"), class = "factor"),
RV = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 1L, 1L), .Label = c("",
"Caa", "Caj", "Cbb", "Cbv", "Cja", "Cjr", "Crj", "Crr", "Crv",
"Cvr", "Cvv", "Cvv ", "Gaa", "Gaj", "Gbb", "Gbv", "Gja",
"Gjr", "Grj", "Grr", "Grv", "Gvr", "Gvv"), class = "factor")), .Names = c("RR",
"AA", "BB", "VV", "RJ", "JR", "BV", "VB", "AJ", "JA", "VR", "RV"
), row.names = c(NA, 20L), class = "data.frame")
The desired matrix would keep the relationships and row order, as stated above. GSee provided an answer that I could apply, but only to one column of my matrix, as [[ select for just specific entry and selecting multiple entry’s with [ doesn’t work. I’m not sure if I’m heading in the right direction with this…
Here’s what the desired output (the first three rows) would look like, based on the actual dataset (as above):
structure(list(vv = structure(c(1L, 1L, 1L), .Label = "CRR", class = "factor"),
rv = c(NA, NA, NA), ja = structure(c(1L, 1L, 1L), .Label = "CVV", class = "factor"),
aa = structure(c(1L, 1L, 1L), .Label = "CAJ", class = "factor"),
bv = structure(c(1L, 2L, 2L), .Label = c("", "CVR"), class = "factor"),
aj = structure(c(1L, 2L, 2L), .Label = c("", "CRJ"), class = "factor"),
vb = structure(c(1L, 1L, 1L), .Label = "CAA", class = "factor"),
rj = structure(c(2L, 1L, 1L), .Label = c("", "CRJ"), class = "factor"),
rr = structure(c(1L, 1L, 1L), .Label = "CBB", class = "factor"),
vr = structure(c(1L, 1L, 1L), .Label = "CJA", class = "factor"),
bb = structure(c(1L, 1L, 1L), .Label = "CBV", class = "factor"),
jr = c(NA, NA, NA)), .Names = c("vv", "rv", "ja", "aa", "bv",
"aj", "vb", "rj", "rr", "vr", "bb", "jr"), class = "data.frame", row.names = c(NA,
-3L))
I hope this make more sense.
Alright. i haz teh codez 4 u.
Find the names of the output matrix, and make the matrix (filled with NA for now)
loop through each row of each column