我找到了DeepAnT(一种用于时间序列中无监督异常检测的深度学习方法(的例子,并想尝试一下。
所以我试着转换Python脚本(https://github.com/swlee23/deep-learning-time-series-anomaly-detection/blob/master/deep-ant-main.ipynb)由于我在拟合模型时出错,所以被卡住了。
代码:
library(ggplot2)
library(rlist)
library(keras)
library(tensorflow)
library(readr)
### Anomaly Detection ###
anomaly_detector <- function(prediction_seq, ground_truth_seq){
# calculate Euclidean between actual seq and predicted seq
dist <- norm(ground_truth_seq - prediction_seq, type = "F")
if (dist > anm_det_thr){
return (TRUE) # anomaly
}
else{
return (FALSE) # normal
}
}
### Hyperparameters ###
w = 2000 # History window (number of time stamps taken into account)
# i.e., filter(kernel) size
p_w = 300 # Prediction window (number of time stampes required to be
# predicted)
n_features = 1 # Univariate time series
kernel_size = 2 # Size of filter in conv layers
num_filt_1 = 32 # Number of filters in first conv layer
num_filt_2 = 32 # Number of filters in second conv layer
num_nrn_dl = 40 # Number of neurons in dense layer
num_nrn_ol = p_w # Number of neurons in output layer
conv_strides = 1
pool_size_1 = 2 # Length of window of pooling layer 1
pool_size_2 = 2 # Length of window of pooling layer 2
pool_strides_1 = 2 # Stride of window of pooling layer 1
pool_strides_2 = 2 # Stride of window of pooling layer 2
epochs = 30
dropout_rate = 0.5 # Dropout rate in the fully connected layer
learning_rate = 2e-5
anm_det_thr = 0.8 # Threshold for classifying anomaly (0.5~0.8)
# Loading data
df_sine <- read_csv('https://raw.githubusercontent.com/swlee23/Deep-Learning-Time-Series-Anomaly-Detection/master/data/sinewave.csv')
### Data preprocessing ###
# split a univariate sequence into samples
split_sequence <- function(sequence){
X <- list()
y <- list()
for(i in 1:length(sequence)){
# find the end of this pattern
end_ix <- i + w
out_end_ix <- end_ix + p_w
# check if we are beyond the sequence
if (out_end_ix > length(sequence)){
# print("if-break")
break
}
# gather input and output parts of the pattern
seq_x <- list(sequence[i:end_ix])
seq_y <- list(sequence[end_ix:out_end_ix])
X[length(X)+1] <- seq_x
y[length(y)+1] <- seq_y
}
result <- list("x" = X, "y" = y)
return(result)
}
# define input sequence
raw_seq = df_sine['sinewave'][[1]]
# split into samples
batch_ <- split_sequence(raw_seq)
batch_sample <- batch_$x
batch_label <-batch_$y
# tried to convert to matrix (did not change anything)
batch_sample_2 <- matrix(unlist(batch_sample), ncol = 2001, byrow = TRUE)
batch_label_2 <- matrix(unlist(batch_label), ncol = 301, byrow = TRUE)
### Generate model for predictor ###
model <- keras_model_sequential() %>%
layer_conv_1d(filters = num_filt_1,
kernel_size = kernel_size,
strides = conv_strides,
padding = 'valid',
activation = 'relu',
input_shape = c(w,n_features)
) %>% layer_max_pooling_1d(
pool_size = pool_size_1
)%>% layer_conv_1d(
filters = num_filt_2,
kernel_size = kernel_size,
strides = conv_strides,
padding = 'valid',
activation = 'relu'
) %>% layer_max_pooling_1d(
pool_size = pool_size_2
)
# Flatten tensor into a batch of vectors
# Input Tensor Shape: [batch_size, 0.25 * w, num_filt_1 * num_filt_2]
# Output Tensor Shape: [batch_size, 0.25 * w * num_filt_1 * num_filt_2]
model <- model %>% layer_flatten()
# Dense Layer (Output layer)
# Densely connected layer with 1024 neurons
# Input Tensor Shape: [batch_size, 0.25 * w * num_filt_1 * num_filt_2]
# Output Tensor Shape: [batch_size, 1024]
model <- model %>% layer_dense(units = num_nrn_dl, activation = 'relu')
# Dropout
# Prevents overfitting in deep neural networks
model <- model %>% layer_dropout(rate = dropout_rate)
# Output layer
# Input Tensor Shape: [batch_size, 1024]
# Output Tensor Shape: [batch_size, p_w]
model <- model %>% layer_dense(units = num_nrn_ol)
# Summarize model structure
summary(model)
### Configure model ###
model <- model %>% compile(optimizer = 'adam',
loss = 'mean_absolute_error')
### Training ###
model_fit <- model %>% fit(batch_sample_2,
batch_label_2,
epochs = epochs,
verbose = 1)
我得到的错误:
Error in py_call_impl(callable, dots$args, dots$keywords) :
ValueError: Error when checking input: expected conv1d_6_input to have 3 dimensions, but got array with shape (2701, 2001)
在Python中,batch_sample
如下所示:
[[0.8737364 ]
[0.90255357]
[0.92780878]
...
[0.7671179 ]
[0.80588467]
[0.84147098]]
batch_label
像这样:
[[0.84147098 0.8737364 0.90255357 ... 0.72532366 0.7671179 0.80588467]
[0.8737364 0.90255357 0.92780878 ... 0.7671179 0.80588467 0.84147098]
[0.90255357 0.92780878 0.94940235 ... 0.80588467 0.84147098 0.8737364 ]
...
[0.80588467 0.84147098 0.8737364 ... 0.68066691 0.72532366 0.7671179 ]
[0.84147098 0.8737364 0.90255357 ... 0.72532366 0.7671179 0.80588467]
[0.8737364 0.90255357 0.92780878 ... 0.7671179 0.80588467 0.84147098]]
因此,我需要为拟合过程重塑数据,但我所尝试的一切都不起作用。尝试了列表(就像Python中一样(、矩阵和向量的列表,但每次维度错误都会累积。我需要如何塑造数据,使其符合拟合函数的要求?
您的输入形状不正确。模型需要3个dims的形状输入:(n_samples,window_size,n_features(
在您的代码中,n_features等于1,因此您所需要做的就是将当前输入形状重塑为模型期望的形状(基于输入配置(,因此:(27012001(->(27012001,1(
为了做到这一点,您可以使用listarrays 中的expand_dims等函数
expand_dims(x, -1)