热线电话:13121318867

登录
2019-03-13 阅读量: 656
数据采样和离散化

数据采样

setwd("E:\\Rwork")
set.seed(1234)
index <- sample(1:nrow(iris),10, replace = T)
index
sample_set <- iris[index,]

index <- sample(nrow(iris),0.75*nrow(iris))
sample_set <- iris[index,]

数据离散化

data(iris)
buckets <- 10
maxseplen <- max(iris$Sepal.Length)
minseplen <- min(iris$Sepal.Length)
cutpoints <- seq(minseplen, maxseplen, by = (maxseplen - minseplen ) / buckets )

cutpoints

cutseplen <- cut(iris$Sepal.Length, breaks = cutpoints , include.lowest = TRUE)
newiris <- data.frame(contseplen = iris$Sepal.Length , discseplen = cutseplen)
newiris

0.9178
3
关注作者
收藏
评论(0)

发表评论

暂无数据