forcats包
library(tidyverse)
x <- c("A","z","g",NA)
x %>% as.factor() # 默认按照字母排序
x %>% as_factor() # 默认按照出现顺序排列
gss_cat$relig %>% fct_count() %>% arrange(-n)
#fct_anon:用任意数字标识符替换因子级别
gss_cat$relig %>% fct_anon() %>% fct_count() %>% arrange(-n)
gss_cat$relig %>% fct_anon("X") %>% fct_count()
#fct_c:合并级别,连接因子
fa <- factor("a")
fb <- factor("b")
fab <- factor(c("a", "b"))
c(fa, fb, fab)
fct_c(fa, fb, fab)
#fct_collapse:因子转换,将多个因子合并
fct_count(gss_cat$partyid) %>% arrange(n)
partyid2 <- fct_collapse(gss_cat$partyid,
missing = c("No answer", "Don't know"),
other = "Other party",
rep = c("Strong republican", "Not str republican"),
ind = c("Ind,near rep", "Independent", "Ind,near dem"),
dem = c("Not str democrat", "Strong democrat")
)
fct_count(partyid2) # 五个因子水平了
#fct_count:计算因子水平的数目
f <- factor(sample(letters)[rpois(1000, 10)])
f %>% fct_count() %>% arrange(-n)
fct_count(f, sort = TRUE)
#fct_cross:交互处理多个因子数据
fruit <- factor(c("apple", "kiwi", "apple", "apple"))
colour <- factor(c("red","green", "green", "green"))
eaten <- c("yes", "no", "yes", "no")
fct_cross(fruit, colour)
fct_cross(fruit, colour, eaten)
fct_cross(fruit, colour, keep_empty = TRUE) # If TRUE, keep combinations with no observations as levels
#fct_drop:删除没有用的因子
f <- factor(c("a", "b"), levels = c("a", "b", "c"))
fct_drop(f)
#fct_expand:扩展因子
f <- factor(sample(letters[1:3], 20, replace = TRUE))
fct_expand(f, "d", "e", "f")
#fct_explicit_na:直接显示缺失值
f1 <- factor(c("a", "a", NA, NA, "a", "b", NA, "c", "a", "c", "b"))
table(f1,useNA = "ifany")
f2 <- fct_explicit_na(f1)
table(f2)
#fct_inorder:按照因子出现顺序排列因子
f <- factor(c("b", "b", "a", "c", "c", "c"))
# Levels: a b c
fct_inorder(f) # Levels: b a c
fct_infreq(f) # Levels: c b a按照因子出现次数排列因子
fct_inorder(f, ordered = TRUE)
f <- factor(sample(1:10))
fct_inseq(f)
#fct_lump:合并出现少的因子
x <- factor(rep(LETTERS[1:9], times = c(40, 10, 5, 27, 1, 1, 1, 1, 1)))
x %>% table()
x %>% fct_lump() %>% table()
# Use positive values to collapse the rarest,保留前4位因子
fct_lump(x, n = 4) %>% table()
fct_lump(x, prop = 0.1) %>% table() #保留因子比例大于0.1
fct_lump(x, n = 6, ties.method = "max")
fct_lump(x, n = 6)
#fct_match:因子匹配
gss_cat$marital %>% table
(fct_match(gss_cat$marital, c("Married", "Divorced"))) %>% table()
#fct_other:留下需要的因子,其它other
x <- factor(rep(LETTERS[1:9], times = c(40, 10, 5, 27, 1, 1, 1, 1, 1)))
fct_other(x, keep = c("A", "B"))
fct_other(x, drop = c("A", "B"))
#fct_recode:重新编码
x <- factor(c("apple", "bear", "banana", "dear"))
fct_recode(x, fruit = "apple", fruit = "banana")
fct_recode(x, NULL = "apple", fruit = "banana")
x <- factor(c("apple", "bear", "banana", "dear"))
levels <- c(fruit = "apple", fruit = "banana")
fct_recode(x, !!!levels)
#fct_reorder:通过对另一个变量排序来重新排序因子级别
boxplot(Sepal.Width ~ Species, data = iris)
boxplot(Sepal.Width ~ fct_reorder(Species, Sepal.Width), data = iris)
boxplot(Sepal.Width ~ fct_reorder(Species, Sepal.Width, .desc = TRUE), data = iris)
chks <- subset(ChickWeight, as.integer(Chick) < 10)
chks <- transform(chks, Chick = fct_shuffle(Chick))
ggplot(chks, aes(Time, weight, colour = Chick)) +
geom_point() +
geom_line()
# Note that lines match order in legend
ggplot(chks, aes(Time, weight, colour = fct_reorder2(Chick, Time, weight))) +
geom_point() +
geom_line() +
labs(colour = "Chick")
#fct_rev:翻转因子
f <- factor(c("a", "b", "c"))
fct_rev(f)
f <- factor(letters[rpois(100, 10)])
unique(f) # in order of appearance
fct_unique(f) # in order of levels
x <- c("A","z","g",NA)
x %>% as.factor() # 默认按照字母排序
x %>% as_factor() # 默认按照出现顺序排列
gss_cat$relig %>% fct_count() %>% arrange(-n)
#fct_anon:用任意数字标识符替换因子级别
gss_cat$relig %>% fct_anon() %>% fct_count() %>% arrange(-n)
gss_cat$relig %>% fct_anon("X") %>% fct_count()
#fct_c:合并级别,连接因子
fa <- factor("a")
fb <- factor("b")
fab <- factor(c("a", "b"))
c(fa, fb, fab)
fct_c(fa, fb, fab)
#fct_collapse:因子转换,将多个因子合并
fct_count(gss_cat$partyid) %>% arrange(n)
partyid2 <- fct_collapse(gss_cat$partyid,
missing = c("No answer", "Don't know"),
other = "Other party",
rep = c("Strong republican", "Not str republican"),
ind = c("Ind,near rep", "Independent", "Ind,near dem"),
dem = c("Not str democrat", "Strong democrat")
)
fct_count(partyid2) # 五个因子水平了
#fct_count:计算因子水平的数目
f <- factor(sample(letters)[rpois(1000, 10)])
f %>% fct_count() %>% arrange(-n)
fct_count(f, sort = TRUE)
#fct_cross:交互处理多个因子数据
fruit <- factor(c("apple", "kiwi", "apple", "apple"))
colour <- factor(c("red","green", "green", "green"))
eaten <- c("yes", "no", "yes", "no")
fct_cross(fruit, colour)
fct_cross(fruit, colour, eaten)
fct_cross(fruit, colour, keep_empty = TRUE) # If TRUE, keep combinations with no observations as levels
#fct_drop:删除没有用的因子
f <- factor(c("a", "b"), levels = c("a", "b", "c"))
fct_drop(f)
#fct_expand:扩展因子
f <- factor(sample(letters[1:3], 20, replace = TRUE))
fct_expand(f, "d", "e", "f")
#fct_explicit_na:直接显示缺失值
f1 <- factor(c("a", "a", NA, NA, "a", "b", NA, "c", "a", "c", "b"))
table(f1,useNA = "ifany")
f2 <- fct_explicit_na(f1)
table(f2)
#fct_inorder:按照因子出现顺序排列因子
f <- factor(c("b", "b", "a", "c", "c", "c"))
# Levels: a b c
fct_inorder(f) # Levels: b a c
fct_infreq(f) # Levels: c b a按照因子出现次数排列因子
fct_inorder(f, ordered = TRUE)
f <- factor(sample(1:10))
fct_inseq(f)
#fct_lump:合并出现少的因子
x <- factor(rep(LETTERS[1:9], times = c(40, 10, 5, 27, 1, 1, 1, 1, 1)))
x %>% table()
x %>% fct_lump() %>% table()
# Use positive values to collapse the rarest,保留前4位因子
fct_lump(x, n = 4) %>% table()
fct_lump(x, prop = 0.1) %>% table() #保留因子比例大于0.1
fct_lump(x, n = 6, ties.method = "max")
fct_lump(x, n = 6)
#fct_match:因子匹配
gss_cat$marital %>% table
(fct_match(gss_cat$marital, c("Married", "Divorced"))) %>% table()
#fct_other:留下需要的因子,其它other
x <- factor(rep(LETTERS[1:9], times = c(40, 10, 5, 27, 1, 1, 1, 1, 1)))
fct_other(x, keep = c("A", "B"))
fct_other(x, drop = c("A", "B"))
#fct_recode:重新编码
x <- factor(c("apple", "bear", "banana", "dear"))
fct_recode(x, fruit = "apple", fruit = "banana")
fct_recode(x, NULL = "apple", fruit = "banana")
x <- factor(c("apple", "bear", "banana", "dear"))
levels <- c(fruit = "apple", fruit = "banana")
fct_recode(x, !!!levels)
#fct_reorder:通过对另一个变量排序来重新排序因子级别
boxplot(Sepal.Width ~ Species, data = iris)
boxplot(Sepal.Width ~ fct_reorder(Species, Sepal.Width), data = iris)
boxplot(Sepal.Width ~ fct_reorder(Species, Sepal.Width, .desc = TRUE), data = iris)
chks <- subset(ChickWeight, as.integer(Chick) < 10)
chks <- transform(chks, Chick = fct_shuffle(Chick))
ggplot(chks, aes(Time, weight, colour = Chick)) +
geom_point() +
geom_line()
# Note that lines match order in legend
ggplot(chks, aes(Time, weight, colour = fct_reorder2(Chick, Time, weight))) +
geom_point() +
geom_line() +
labs(colour = "Chick")
#fct_rev:翻转因子
f <- factor(c("a", "b", "c"))
fct_rev(f)
f <- factor(letters[rpois(100, 10)])
unique(f) # in order of appearance
fct_unique(f) # in order of levels
评论
发表评论