Monday, September 25, 2017

cut() {base}


cut() function divides the range of x into intervals. The values are coded according to which interval they fall. The leftmost interval corresponds to level one, the next leftmost to level two and so on.

cut(x, breaks, labels = NULL, include.lowest = FALSE, right = TRUE, dig.lab = 3, ordered_result = FALSE, ...)

The parameters are:
 -x: a numeric vector to be converted into a factor 
breaks: a numeric vector giving the cutting points or a single number giving the number of intervals 
labels: labels to be used for the levels created. If labels = FALSE an integer code is returned 
include.lowest: logical, if the lowest value is included in the interval (or the highest, when right = FALSE)
 - right: logical, indicates if the intervals should be close on the right or on the left 
dig.lab: integer, number of digits used when labels are not given
 - ordered_result:logical, indicates if the result should be shown as ordered factor

head(women)
##   height weight
## 1     58    115
## 2     59    117
## 3     60    120
## 4     61    123
## 5     62    126
## 6     63    129
summary(women)
##      height         weight     
##  Min.   :58.0   Min.   :115.0  
##  1st Qu.:61.5   1st Qu.:124.5  
##  Median :65.0   Median :135.0  
##  Mean   :65.0   Mean   :136.7  
##  3rd Qu.:68.5   3rd Qu.:148.0  
##  Max.   :72.0   Max.   :164.0

breaks:
cut(women$height, breaks = c(50,60,70,80)) #numeric vector giving the cutting points
##  [1] (50,60] (50,60] (50,60] (60,70] (60,70] (60,70] (60,70] (60,70]
##  [9] (60,70] (60,70] (60,70] (60,70] (60,70] (70,80] (70,80]
## Levels: (50,60] (60,70] (70,80]
cut(women$height, breaks = 3) #number of intervals  
##  [1] (58,62.7]   (58,62.7]   (58,62.7]   (58,62.7]   (58,62.7]  
##  [6] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3]
## [11] (67.3,72]   (67.3,72]   (67.3,72]   (67.3,72]   (67.3,72]  
## Levels: (58,62.7] (62.7,67.3] (67.3,72]

labels:
cut(women$height, c(50,60,70,80)) #no labels
##  [1] (50,60] (50,60] (50,60] (60,70] (60,70] (60,70] (60,70] (60,70]
##  [9] (60,70] (60,70] (60,70] (60,70] (60,70] (70,80] (70,80]
## Levels: (50,60] (60,70] (70,80]
cut(women$height, c(50,60,70,80), labels = c(1,2,3)) #with labels
##  [1] 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
## Levels: 1 2 3
cut(women$height, breaks = 3, labels = c("Short", "Medium", "Tall")) #with labels
##  [1] Short  Short  Short  Short  Short  Medium Medium Medium Medium Medium
## [11] Tall   Tall   Tall   Tall   Tall  
## Levels: Short Medium Tall
cut(women$height, c(50,60,70,80), labels = FALSE)  #labels = FALSE, integer code is returned
##  [1] 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3

include.lowest:
cut(women$height, breaks = 3, include.lowest = TRUE)
##  [1] [58,62.7]   [58,62.7]   [58,62.7]   [58,62.7]   [58,62.7]  
##  [6] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3]
## [11] (67.3,72]   (67.3,72]   (67.3,72]   (67.3,72]   (67.3,72]  
## Levels: [58,62.7] (62.7,67.3] (67.3,72]
cut(women$height, breaks = 3, include.lowest = FALSE)
##  [1] (58,62.7]   (58,62.7]   (58,62.7]   (58,62.7]   (58,62.7]  
##  [6] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3]
## [11] (67.3,72]   (67.3,72]   (67.3,72]   (67.3,72]   (67.3,72]  
## Levels: (58,62.7] (62.7,67.3] (67.3,72]

right:
#indicates if intervals are closed on the right or left
cut(women$height, breaks = 3, right = TRUE) #closed on the right
##  [1] (58,62.7]   (58,62.7]   (58,62.7]   (58,62.7]   (58,62.7]  
##  [6] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3]
## [11] (67.3,72]   (67.3,72]   (67.3,72]   (67.3,72]   (67.3,72]  
## Levels: (58,62.7] (62.7,67.3] (67.3,72]
cut(women$height, breaks = 3, right = FALSE) #closed on the left
##  [1] [58,62.7)   [58,62.7)   [58,62.7)   [58,62.7)   [58,62.7)  
##  [6] [62.7,67.3) [62.7,67.3) [62.7,67.3) [62.7,67.3) [62.7,67.3)
## [11] [67.3,72)   [67.3,72)   [67.3,72)   [67.3,72)   [67.3,72)  
## Levels: [58,62.7) [62.7,67.3) [67.3,72)
cut(women$height, breaks = 3, right = FALSE, include.lowest = TRUE)
##  [1] [58,62.7)   [58,62.7)   [58,62.7)   [58,62.7)   [58,62.7)  
##  [6] [62.7,67.3) [62.7,67.3) [62.7,67.3) [62.7,67.3) [62.7,67.3)
## [11] [67.3,72]   [67.3,72]   [67.3,72]   [67.3,72]   [67.3,72]  
## Levels: [58,62.7) [62.7,67.3) [67.3,72]

dig.lab:
#number of digits used when labels are not given
cut(women$height, breaks = 3, dig.lab = 1)
##  [1] (58,63] (58,63] (58,63] (58,63] (58,63] (63,67] (63,67] (63,67]
##  [9] (63,67] (63,67] (67,72] (67,72] (67,72] (67,72] (67,72]
## Levels: (58,63] (63,67] (67,72]
cut(women$height, breaks = 3, dig.lab = 5)
##  [1] (57.986,62.667] (57.986,62.667] (57.986,62.667] (57.986,62.667]
##  [5] (57.986,62.667] (62.667,67.333] (62.667,67.333] (62.667,67.333]
##  [9] (62.667,67.333] (62.667,67.333] (67.333,72.014] (67.333,72.014]
## [13] (67.333,72.014] (67.333,72.014] (67.333,72.014]
## Levels: (57.986,62.667] (62.667,67.333] (67.333,72.014]

ordered_result:
cut(women$height, breaks = 3, ordered_result = FALSE) #levels are not ordered
##  [1] (58,62.7]   (58,62.7]   (58,62.7]   (58,62.7]   (58,62.7]  
##  [6] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3]
## [11] (67.3,72]   (67.3,72]   (67.3,72]   (67.3,72]   (67.3,72]  
## Levels: (58,62.7] (62.7,67.3] (67.3,72]
cut(women$height, breaks = 3, ordered_result = TRUE) #levels are ordered
##  [1] (58,62.7]   (58,62.7]   (58,62.7]   (58,62.7]   (58,62.7]  
##  [6] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3]
## [11] (67.3,72]   (67.3,72]   (67.3,72]   (67.3,72]   (67.3,72]  
## Levels: (58,62.7] < (62.7,67.3] < (67.3,72]
women$factorheight = cut(women$height, breaks = 3, labels = c("Short", "Medium", "Tall"))
par(mfrow = c(1,2))
plot(women$height, women$weight, col = 'deeppink', type = "b", ylab = 'Weight', xlab = 'Height (C.Variable)' ) #height as continuos variable
plot(women$factorheight, women$weight, col = c('gold', 'darkslategray1', 'violet'), ylab = 'Weight', xlab = 'Height (Factor)') #height as factor

No comments:

Post a Comment

duplicated() {base}

duplicated()  function determines which elements are duplicated and returns a logical vector. The parameters of the function are:   ...