cut()
function divides the range of x into intervals. The values are coded according to which interval they fall. The leftmost interval corresponds to level one, the next leftmost to level two and so on.cut(x, breaks, labels = NULL, include.lowest = FALSE, right = TRUE, dig.lab = 3, ordered_result = FALSE, ...)
The parameters are:
-
x
: a numeric vector to be converted into a factor
-
breaks
: a numeric vector giving the cutting points or a single number giving the number of intervals
-
labels
: labels to be used for the levels created. If labels = FALSE
an integer code is returned
-
include.lowest
: logical, if the lowest value is included in the interval (or the highest, when right = FALSE
)
-
right
: logical, indicates if the intervals should be close on the right or on the left
-
dig.lab
: integer, number of digits used when labels are not given
-
ordered_result
:logical, indicates if the result should be shown as ordered factorhead(women)
## height weight
## 1 58 115
## 2 59 117
## 3 60 120
## 4 61 123
## 5 62 126
## 6 63 129
summary(women)
## height weight
## Min. :58.0 Min. :115.0
## 1st Qu.:61.5 1st Qu.:124.5
## Median :65.0 Median :135.0
## Mean :65.0 Mean :136.7
## 3rd Qu.:68.5 3rd Qu.:148.0
## Max. :72.0 Max. :164.0
breaks
:cut(women$height, breaks = c(50,60,70,80)) #numeric vector giving the cutting points
## [1] (50,60] (50,60] (50,60] (60,70] (60,70] (60,70] (60,70] (60,70]
## [9] (60,70] (60,70] (60,70] (60,70] (60,70] (70,80] (70,80]
## Levels: (50,60] (60,70] (70,80]
cut(women$height, breaks = 3) #number of intervals
## [1] (58,62.7] (58,62.7] (58,62.7] (58,62.7] (58,62.7]
## [6] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3]
## [11] (67.3,72] (67.3,72] (67.3,72] (67.3,72] (67.3,72]
## Levels: (58,62.7] (62.7,67.3] (67.3,72]
labels
:cut(women$height, c(50,60,70,80)) #no labels
## [1] (50,60] (50,60] (50,60] (60,70] (60,70] (60,70] (60,70] (60,70]
## [9] (60,70] (60,70] (60,70] (60,70] (60,70] (70,80] (70,80]
## Levels: (50,60] (60,70] (70,80]
cut(women$height, c(50,60,70,80), labels = c(1,2,3)) #with labels
## [1] 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
## Levels: 1 2 3
cut(women$height, breaks = 3, labels = c("Short", "Medium", "Tall")) #with labels
## [1] Short Short Short Short Short Medium Medium Medium Medium Medium
## [11] Tall Tall Tall Tall Tall
## Levels: Short Medium Tall
cut(women$height, c(50,60,70,80), labels = FALSE) #labels = FALSE, integer code is returned
## [1] 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
include.lowest
:cut(women$height, breaks = 3, include.lowest = TRUE)
## [1] [58,62.7] [58,62.7] [58,62.7] [58,62.7] [58,62.7]
## [6] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3]
## [11] (67.3,72] (67.3,72] (67.3,72] (67.3,72] (67.3,72]
## Levels: [58,62.7] (62.7,67.3] (67.3,72]
cut(women$height, breaks = 3, include.lowest = FALSE)
## [1] (58,62.7] (58,62.7] (58,62.7] (58,62.7] (58,62.7]
## [6] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3]
## [11] (67.3,72] (67.3,72] (67.3,72] (67.3,72] (67.3,72]
## Levels: (58,62.7] (62.7,67.3] (67.3,72]
right
:#indicates if intervals are closed on the right or left
cut(women$height, breaks = 3, right = TRUE) #closed on the right
## [1] (58,62.7] (58,62.7] (58,62.7] (58,62.7] (58,62.7]
## [6] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3]
## [11] (67.3,72] (67.3,72] (67.3,72] (67.3,72] (67.3,72]
## Levels: (58,62.7] (62.7,67.3] (67.3,72]
cut(women$height, breaks = 3, right = FALSE) #closed on the left
## [1] [58,62.7) [58,62.7) [58,62.7) [58,62.7) [58,62.7)
## [6] [62.7,67.3) [62.7,67.3) [62.7,67.3) [62.7,67.3) [62.7,67.3)
## [11] [67.3,72) [67.3,72) [67.3,72) [67.3,72) [67.3,72)
## Levels: [58,62.7) [62.7,67.3) [67.3,72)
cut(women$height, breaks = 3, right = FALSE, include.lowest = TRUE)
## [1] [58,62.7) [58,62.7) [58,62.7) [58,62.7) [58,62.7)
## [6] [62.7,67.3) [62.7,67.3) [62.7,67.3) [62.7,67.3) [62.7,67.3)
## [11] [67.3,72] [67.3,72] [67.3,72] [67.3,72] [67.3,72]
## Levels: [58,62.7) [62.7,67.3) [67.3,72]
dig.lab
:#number of digits used when labels are not given
cut(women$height, breaks = 3, dig.lab = 1)
## [1] (58,63] (58,63] (58,63] (58,63] (58,63] (63,67] (63,67] (63,67]
## [9] (63,67] (63,67] (67,72] (67,72] (67,72] (67,72] (67,72]
## Levels: (58,63] (63,67] (67,72]
cut(women$height, breaks = 3, dig.lab = 5)
## [1] (57.986,62.667] (57.986,62.667] (57.986,62.667] (57.986,62.667]
## [5] (57.986,62.667] (62.667,67.333] (62.667,67.333] (62.667,67.333]
## [9] (62.667,67.333] (62.667,67.333] (67.333,72.014] (67.333,72.014]
## [13] (67.333,72.014] (67.333,72.014] (67.333,72.014]
## Levels: (57.986,62.667] (62.667,67.333] (67.333,72.014]
ordered_result
:cut(women$height, breaks = 3, ordered_result = FALSE) #levels are not ordered
## [1] (58,62.7] (58,62.7] (58,62.7] (58,62.7] (58,62.7]
## [6] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3]
## [11] (67.3,72] (67.3,72] (67.3,72] (67.3,72] (67.3,72]
## Levels: (58,62.7] (62.7,67.3] (67.3,72]
cut(women$height, breaks = 3, ordered_result = TRUE) #levels are ordered
## [1] (58,62.7] (58,62.7] (58,62.7] (58,62.7] (58,62.7]
## [6] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3] (62.7,67.3]
## [11] (67.3,72] (67.3,72] (67.3,72] (67.3,72] (67.3,72]
## Levels: (58,62.7] < (62.7,67.3] < (67.3,72]
women$factorheight = cut(women$height, breaks = 3, labels = c("Short", "Medium", "Tall"))
par(mfrow = c(1,2))
plot(women$height, women$weight, col = 'deeppink', type = "b", ylab = 'Weight', xlab = 'Height (C.Variable)' ) #height as continuos variable
plot(women$factorheight, women$weight, col = c('gold', 'darkslategray1', 'violet'), ylab = 'Weight', xlab = 'Height (Factor)') #height as factor
No comments:
Post a Comment