Different behaviors between data frame and matrix in R
> # Generate an artificial matrix
> test.m <- matrix(1:6, nrow = 3)
> row.names(test.m) <- c('x1', 'x2', 'x3')
> col.names(test.m) <- c('a', 'b')
Error in col.names(test.m) <- c("a", "b") :
could not find function "col.names<-"
>
> # Generate a data frame from the matrix
> test.df <- as.data.frame(test.m)
>
>
> # Selecting elements
> ## the row names can be used to select elemnts from a data frame or a matrix
> test.idx <- c('x3', 'x1')
> test.df[test.idx, ]
V1 V2
x3 3 6
x1 1 4
> test.m[test.idx, ]
[,1] [,2]
x3 3 6
x1 1 4
>
> # Selecting elements with index having a name which is not in the data
> ## data frame returns NA rows
> ## matrix returns an error
> test.idx <- c('x4', 'x1')
> test.df[test.idx, ]
V1 V2
NA NA NA
x1 1 4
> test.m[test.idx, ]
Error: subscript out of bounds
>
>
> # Duplicate row names
> ## duplicate row names are not allowed in data frame.
> ## duplicate row names are allowed in matrix.
> test.row.names <- c('x1', 'x2', 'x1')
> row.names(test.df) <- test.row.names
Error in `row.names<-.data.frame`(`*tmp*`, value = c("x1", "x2", "x1")) :
duplicate 'row.names' are not allowed
In addition: Warning message:
non-unique value when setting 'row.names': ‘x1’
> rownames(test.m) <- test.row.names
>
> # names
> ## names() returns column name in data frame
> ## names() returns NULL in matrix
> names(test.df)
[1] "V1" "V2"
> names(test.m)
NULL
Leave a Reply