时序数据异常检测
library(tidyverse)
library(anomalize)
data("tidyverse_cran_downloads")
# time_decompose():将时间序列分为季节性,趋势和余数
# anomalize():将异常检测方法应用于余数组件。
# time_recompose():计算将“正常”数据与异常分开
tidyverse_cran_downloads %>%
ggplot(aes(date, count)) +
geom_point(color = "#2c3e50", alpha = 0.25) +
facet_wrap(~ package, scale = "free_y", ncol = 3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
labs(title = "Tidyverse Package Daily Download Counts",
subtitle = "Data from CRAN by way of cranlogs package")
tidyverse_cran_downloads %>%
# Data Manipulation / Anomaly Detection
time_decompose(count, method = "stl") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
# Anomaly Visualization
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.25) +
labs(title = "Tidyverse Anomalies", subtitle = "STL + IQR Methods")
# Get only lubridate downloads
lubridate_dloads <- tidyverse_cran_downloads %>%
filter(package == "lubridate") %>%
ungroup()
# Anomalize!!
lubridate_dloads %>%
# Twitter + GESD(Generalized Extreme Studentized Deviation)
time_decompose(count, method = "twitter", trend = "2 months") %>%
anomalize(remainder, method = "gesd") %>%
time_recompose() %>%
# Anomaly Visualziation
plot_anomalies(time_recomposed = TRUE) +
labs(title = "Lubridate Anomalies", subtitle = "Twitter + GESD Methods")
#iqr()和gesd()仅适用于数字数据。 请注意,对于时间序列数据,应已删除趋势和季节性。
lubridate_dloads %>%
# STL + IQR Anomaly Detection
time_decompose(count, method = "stl", trend = "2 months") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
# Anomaly Visualization
plot_anomalies(time_recomposed = TRUE) +
labs(title = "Lubridate Anomalies", subtitle = "STL + IQR Methods")
# Time Frequency
time_frequency(lubridate_dloads, period = "auto")
# Time Trend
time_trend(lubridate_dloads, period = "auto")
tidyverse_cran_downloads %>%
filter(package == "lubridate") %>%
ungroup() %>%
time_decompose(count) %>%
anomalize(remainder) %>%
plot_anomaly_decomposition() +
labs(title = "Decomposition of Anomalized Lubridate Downloads")
library(anomalize)
data("tidyverse_cran_downloads")
# time_decompose():将时间序列分为季节性,趋势和余数
# anomalize():将异常检测方法应用于余数组件。
# time_recompose():计算将“正常”数据与异常分开
tidyverse_cran_downloads %>%
ggplot(aes(date, count)) +
geom_point(color = "#2c3e50", alpha = 0.25) +
facet_wrap(~ package, scale = "free_y", ncol = 3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
labs(title = "Tidyverse Package Daily Download Counts",
subtitle = "Data from CRAN by way of cranlogs package")
tidyverse_cran_downloads %>%
# Data Manipulation / Anomaly Detection
time_decompose(count, method = "stl") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
# Anomaly Visualization
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.25) +
labs(title = "Tidyverse Anomalies", subtitle = "STL + IQR Methods")
# Get only lubridate downloads
lubridate_dloads <- tidyverse_cran_downloads %>%
filter(package == "lubridate") %>%
ungroup()
# Anomalize!!
lubridate_dloads %>%
# Twitter + GESD(Generalized Extreme Studentized Deviation)
time_decompose(count, method = "twitter", trend = "2 months") %>%
anomalize(remainder, method = "gesd") %>%
time_recompose() %>%
# Anomaly Visualziation
plot_anomalies(time_recomposed = TRUE) +
labs(title = "Lubridate Anomalies", subtitle = "Twitter + GESD Methods")
#iqr()和gesd()仅适用于数字数据。 请注意,对于时间序列数据,应已删除趋势和季节性。
lubridate_dloads %>%
# STL + IQR Anomaly Detection
time_decompose(count, method = "stl", trend = "2 months") %>%
anomalize(remainder, method = "iqr") %>%
time_recompose() %>%
# Anomaly Visualization
plot_anomalies(time_recomposed = TRUE) +
labs(title = "Lubridate Anomalies", subtitle = "STL + IQR Methods")
# Time Frequency
time_frequency(lubridate_dloads, period = "auto")
# Time Trend
time_trend(lubridate_dloads, period = "auto")
tidyverse_cran_downloads %>%
filter(package == "lubridate") %>%
ungroup() %>%
time_decompose(count) %>%
anomalize(remainder) %>%
plot_anomaly_decomposition() +
labs(title = "Decomposition of Anomalized Lubridate Downloads")
评论
发表评论