Selection algorithms for informative features can be represented by the following groups: Wrappers (wrapping), Filters (filtering), and Embedded (built-in machines). (I will leave these terms without exact translation in view of the vagueness of their sound for the Russian-speaking community - my comment.)
Wrapping algorithms create subsets using a search in the space of possible input variables and evaluate the obtained input subsets by training the complete model on the available data. Wrapping algorithms can be very expensive and risk retraining the model. (If you do not use a validation sample - note my.)
Filtering algorithms are similar to wrapping in that they also search for subsets of input data, but instead of running the full model, the importance of the subset for the output variable is estimated using a simpler (filtering) algorithm.
The algorithms built into the machines evaluate the importance of the input features using heuristics previously embedded in the training.
Approaches to search include:
- Brute force
- First best candidate
- Annealing imitation
- Genetic algorithm
- Greedy search for inclusion
- Greedy Exception Search
- Particle swarm optimization
- Targeted projection pursuit
- Scatter search
- Variable Neighborhood Search
(O) = 2 ^ n
output = input1 * 10 + input2 * 10
output = input1 ^ 2 + input2 ^ 2
m = 2 ^ n,
where n is the number of all input features.
library(onion) data(bunny) #XYZ point plot open3d() points3d(bunny, col=8, size=0.1) bunny_dat <- as.data.frame(bunny) inputs <- append(as.numeric(bunny_dat$x), as.numeric(bunny_dat$y)) for (i in 1:10){ naming <- paste('input_noise_', i, sep = '') bunny_dat[, eval(naming)] <- inputs[sample(length(inputs), nrow(bunny_dat), replace = T)] }
### greedy search with filter function library(FSelector) sampleA <- bunny_dat[sample(nrow(bunny_dat), nrow(bunny_dat) / 2, replace = F), c("x", "y", "input_noise_1", "input_noise_2", "input_noise_3", "input_noise_4", "input_noise_5", "input_noise_6", "input_noise_7", "input_noise_8", "input_noise_9", "input_noise_10", "z")] sampleB <- bunny_dat[!row.names(bunny_dat) %in% rownames(sampleA), c("x", "y", "input_noise_1", "input_noise_2", "input_noise_3", "input_noise_4", "input_noise_5", "input_noise_6", "input_noise_7", "input_noise_8", "input_noise_9", "input_noise_10", "z")] linear_fit <- function(subset){ dat <- sampleA[, c(subset, "z")] lm_m <- lm(formula = z ~., data = dat, model = T) lm_predict <- predict(lm_m, newdata = sampleB) r_sq_validate <- 1 - sum((sampleB$z - lm_predict) ^ 2) / sum((sampleB$z - mean(sampleB$z)) ^ 2) print(subset) print(r_sq_validate) return(r_sq_validate) } #subset <- backward.search(attributes = names(sampleA)[1:(ncol(sampleA) - 1)], eval.fun = linear_fit) subset <- forward.search(attributes = names(sampleA)[1:(ncol(sampleA) - 1)], eval.fun = linear_fit)
> subset <b>[1] "x" "y" "input_noise_2" "input_noise_5" "input_noise_6" "input_noise_8" "input_noise_9"</b>
> summary(lm_m) Call: lm(formula = z ~ ., data = dat, model = T) Residuals: Min 1Q Median 3Q Max -0.060613 -0.022650 -0.000173 0.024939 0.048544 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.0232453 0.0005581 41.651 < 2e-16 *** <b>x -0.0257686 0.0052998 -4.862 1.17e-06 *** y -0.1572786 0.0052585 -29.910 < 2e-16 ***</b> input_noise_2 -0.0017249 0.0027680 -0.623 0.533 input_noise_5 -0.0027391 0.0027848 -0.984 0.325 input_noise_6 0.0032417 0.0027907 1.162 0.245 input_noise_8 0.0044998 0.0027723 1.623 0.105 input_noise_9 0.0006839 0.0027808 0.246 0.806 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 0.02742 on 17965 degrees of freedom Multiple R-squared: 0.04937, Adjusted R-squared: 0.049 F-statistic: 133.3 on 7 and 17965 DF, p-value: < 2.2e-16
subset <- backward.search(attributes = names(sampleA)[1:(ncol(sampleA) - 1)], eval.fun = linear_fit) #subset <- forward.search(attributes = names(sampleA)[1:(ncol(sampleA) - 1)], eval.fun = linear_fit)
> subset <b>[1] "x" "y" "input_noise_2" "input_noise_5" "input_noise_6" "input_noise_8" "input_noise_9"</b>
> summary(lm_m) Call: lm(formula = z ~ ., data = dat, model = T) Residuals: Min 1Q Median 3Q Max -0.060613 -0.022650 -0.000173 0.024939 0.048544 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.0232453 0.0005581 41.651 < 2e-16 *** <b>x -0.0257686 0.0052998 -4.862 1.17e-06 *** y -0.1572786 0.0052585 -29.910 < 2e-16 ***</b> input_noise_2 -0.0017249 0.0027680 -0.623 0.533 input_noise_5 -0.0027391 0.0027848 -0.984 0.325 input_noise_6 0.0032417 0.0027907 1.162 0.245 input_noise_8 0.0044998 0.0027723 1.623 0.105 input_noise_9 0.0006839 0.0027808 0.246 0.806 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 0.02742 on 17965 degrees of freedom Multiple R-squared: 0.04937, Adjusted R-squared: 0.049 F-statistic: 133.3 on 7 and 17965 DF, p-value: < 2.2e-16
> print(subset) <b>[1] "x" "y"</b> > print(r_sq_validate) <b>[1] 0.05185492</b> > summary(lm_m) Call: lm(formula = z ~ ., data = dat, model = T) Residuals: Min 1Q Median 3Q Max -0.059884 -0.022653 -0.000209 0.024955 0.048238 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.0233808 0.0005129 45.590 < 2e-16 *** <b>x -0.0257813 0.0052995 -4.865 1.15e-06 *** y -0.1573098 0.0052576 -29.920 < 2e-16 ***</b> --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 0.02742 on 17970 degrees of freedom Multiple R-squared: 0.04908, Adjusted R-squared: 0.04898 F-statistic: 463.8 on 2 and 17970 DF, p-value: < 2.2e-16
lm_m <- lm(formula = z ~ x * y, data = dat, model = T)
> summary(lm_m) Call: lm(formula = z ~ x * y, data = dat, model = T) Residuals: Min 1Q Median 3Q Max -0.057761 -0.023067 -0.000119 0.024762 0.049747 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.0196766 0.0006545 30.062 <2e-16 *** x -0.1513484 0.0148113 -10.218 <2e-16 *** y -0.1084295 0.0075183 -14.422 <2e-16 *** x:y 1.3771299 0.1517363 9.076 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 0.02736 on 17969 degrees of freedom Multiple R-squared: 0.05342, Adjusted R-squared: 0.05327 F-statistic: 338.1 on 3 and 17969 DF, p-value: < 2.2e-16
> lm_predict <- predict(lm_m, + newdata = sampleB) > 1 - sum((sampleB$z - lm_predict) ^ 2) / sum((sampleB$z - mean(sampleB$z)) ^ 2) <b>[1] 0.05464066</b>
linear_fit_f <- function(subset){ dat <- sampleA[, c(subset, "z")] lm_m <- lm(formula = z ~., data = dat, model = T) print(subset) print(summary(lm_m)$fstatistic[[1]]) return(summary(lm_m)$fstatistic[[1]]) } #subset <- backward.search(attributes = names(sampleA)[1:(ncol(sampleA) - 1)], eval.fun = linear_fit_f) subset <- forward.search(attributes = names(sampleA)[1:(ncol(sampleA) - 1)], eval.fun = linear_fit_f)
> r_sq_validate <b>[1] 0.05034534</b>
correlation_arr <- data.frame() for (i in 1:12){ correlation_arr[i, 1] <- colnames(sampleA)[i] correlation_arr[i, 2] <- cor(sampleA[, i], sampleA[, 'z']) }
> correlation_arr V1 V2 <b>1 x 0.0413782832 2 y -0.2187061876</b> 3 input_noise_1 -0.0097719425 4 input_noise_2 -0.0019297383 5 input_noise_3 0.0002143946 6 input_noise_4 -0.0142325764 7 input_noise_5 -0.0048206943 8 input_noise_6 0.0090877674 9 input_noise_7 -0.0152897433 10 input_noise_8 0.0143477495 11 input_noise_9 0.0027560459 12 input_noise_10 -0.0079526578
> cor(sampleA$x * sampleA$y, sampleA$z) <b>[1] 0.1211382</b>
library(gbm) gbm_dat <- bunny_dat[, c("x", "y", "input_noise_1", "input_noise_2", "input_noise_3", "input_noise_4", "input_noise_5", "input_noise_6", "input_noise_7", "input_noise_8", "input_noise_9", "input_noise_10", "z")] gbm_fit <- gbm(formula = z ~., distribution = "gaussian", data = gbm_dat, n.trees = 500, interaction.depth = 12, n.minobsinnode = 100, shrinkage = 0.0001, bag.fraction = 0.9, train.fraction = 0.7, n.cores = 6) gbm.perf(object = gbm_fit, plot.it = TRUE, oobag.curve = F, overlay = TRUE) summary(gbm_fit)
> summary(gbm_fit) var rel.inf <b>yy 69.7919 xx 30.2081</b> input_noise_1 input_noise_1 0.0000 input_noise_2 input_noise_2 0.0000 input_noise_3 input_noise_3 0.0000 input_noise_4 input_noise_4 0.0000 input_noise_5 input_noise_5 0.0000 input_noise_6 input_noise_6 0.0000 input_noise_7 input_noise_7 0.0000 input_noise_8 input_noise_8 0.0000 input_noise_9 input_noise_9 0.0000 input_noise_10 input_noise_10 0.0000
### simulated annealing search with linear model interactions stats library(scales) library(GenSA) sampleA <- bunny_dat[sample(nrow(bunny_dat), nrow(bunny_dat) / 2, replace = F), c("x", "y", "input_noise_1", "input_noise_2", "input_noise_3", "input_noise_4", "input_noise_5", "input_noise_6", "input_noise_7", "input_noise_8", "input_noise_9", "input_noise_10", "z")] sampleB <- bunny_dat[!row.names(bunny_dat) %in% rownames(sampleA), c("x", "y", "input_noise_1", "input_noise_2", "input_noise_3", "input_noise_4", "input_noise_5", "input_noise_6", "input_noise_7", "input_noise_8", "input_noise_9", "input_noise_10", "z")] #calculate parameters predictor_number <- dim(sampleA)[2] - 1 sample_size <- dim(sampleA)[1] par_v <- runif(predictor_number, min = 0, max = 1) par_low <- rep(0, times = predictor_number) par_upp <- rep(1, times = predictor_number) ############### the fitness function sa_fit_f <- function(par){ indexes <- c(1:predictor_number) for (i in 1:predictor_number){ if (par[i] >= threshold) { indexes[i] <- i } else { indexes[i] <- 0 } } local_predictor_number <- 0 for (i in 1:predictor_number){ if (indexes[i] > 0) { local_predictor_number <- local_predictor_number + 1 } } if (local_predictor_number > 0) { sampleAf <- as.data.frame(sampleA[, c(indexes[], dim(sampleA)[2])]) lm_m <- lm(formula = z ~ .^2, data = sampleAf, model = T) lm_predict <- predict(lm_m, newdata = sampleB) r_sq_validate <- 1 - sum((sampleB$z - lm_predict) ^ 2) / sum((sampleB$z - mean(sampleB$z)) ^ 2) } else { r_sq_validate <- 0 } return(-r_sq_validate) } #stimating threshold for variable inclusion threshold <- 0.5 # do it simply #run feature selection start <- Sys.time() sao <- GenSA(par = par_v, fn = sa_fit_f, lower = par_low, upper = par_upp , control = list( #maxit = 10 max.time = 300 , smooth = F , simple.function = F)) trace_ff <- data.frame(sao$trace)$function.value plot(trace_ff, type = "l") percent(- sao$value) final_vector <- c((sao$par >= threshold), T) names(sampleA)[final_vector] final_sample <- as.data.frame(sampleA[, final_vector]) Sys.time() - start # check model lm_m <- lm(formula = z ~ .^2, data = sampleA[, final_vector], model = T) summary(lm_m)
<b>[1] "5.53%"</b> > final_vector <- c((sao$par >= threshold), T) > names(sampleA)[final_vector] <b>[1] "x" "y" "input_noise_7" "input_noise_8" "input_noise_9" "z" </b> > summary(lm_m) Call: lm(formula = z ~ .^2, data = sampleA[, final_vector], model = T) Residuals: Min 1Q Median 3Q Max -0.058691 -0.023202 -0.000276 0.024953 0.050618 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.0197777 0.0007776 25.434 <2e-16 *** <b>x -0.1547889 0.0154268 -10.034 <2e-16 *** y -0.1148349 0.0085787 -13.386 <2e-16 ***</b> input_noise_7 -0.0102894 0.0071871 -1.432 0.152 input_noise_8 -0.0013928 0.0071508 -0.195 0.846 input_noise_9 0.0026736 0.0071910 0.372 0.710 <b>x:y 1.3098676 0.1515268 8.644 <2e-16 ***</b> x:input_noise_7 0.0352997 0.0709842 0.497 0.619 x:input_noise_8 0.0653103 0.0714883 0.914 0.361 x:input_noise_9 0.0459939 0.0716704 0.642 0.521 y:input_noise_7 0.0512392 0.0710949 0.721 0.471 y:input_noise_8 0.0563148 0.0707809 0.796 0.426 y:input_noise_9 -0.0085022 0.0710267 -0.120 0.905 input_noise_7:input_noise_8 0.0129156 0.0374855 0.345 0.730 input_noise_7:input_noise_9 0.0519535 0.0376869 1.379 0.168 input_noise_8:input_noise_9 0.0128397 0.0379640 0.338 0.735 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 0.0274 on 17957 degrees of freedom Multiple R-squared: 0.05356, Adjusted R-squared: 0.05277 F-statistic: 67.75 on 15 and 17957 DF, p-value: < 2.2e-16
# fittness based on p-values sa_fit_f2 <- function(par){ indexes <- c(1:predictor_number) for (i in 1:predictor_number){ if (par[i] >= threshold) { indexes[i] <- i } else { indexes[i] <- 0 } } local_predictor_number <- 0 for (i in 1:predictor_number){ if (indexes[i] > 0) { local_predictor_number <- local_predictor_number + 1 } } if (local_predictor_number > 0) { sampleAf <- as.data.frame(sampleA[, c(indexes[], dim(sampleA)[2])]) lm_m <- lm(formula = z ~ .^2, data = sampleAf, model = T) mean_val <- mean(summary(lm_m)$coefficients[, 4]) } else { mean_val <- 1 } return(mean_val) } #stimating threshold for variable inclusion threshold <- 0.5 # do it simply #run feature selection start <- Sys.time() sao <- GenSA(par = par_v, fn = sa_fit_f2, lower = par_low, upper = par_upp , control = list( #maxit = 10 max.time = 60 , smooth = F , simple.function = F)) trace_ff <- data.frame(sao$trace)$function.value plot(trace_ff, type = "l") percent(- sao$value) final_vector <- c((sao$par >= threshold), T) names(sampleA)[final_vector] final_sample <- as.data.frame(sampleA[, final_vector]) Sys.time() - start
> percent(- sao$value) <b>[1] "-4.7e-208%"</b> > final_vector <- c((sao$par >= threshold), T) > names(sampleA)[final_vector] <b>[1] "y" "z"</b>
### greedy search with wrapper GBM validation fitness library(FSelector) library(gbm) sampleA <- bunny_dat[sample(nrow(bunny_dat), nrow(bunny_dat) / 2, replace = F), c("x", "y", "input_noise_1", "input_noise_2", "input_noise_3", "input_noise_4", "input_noise_5", "input_noise_6", "input_noise_7", "input_noise_8", "input_noise_9", "input_noise_10", "z")] sampleB <- bunny_dat[!row.names(bunny_dat) %in% rownames(sampleA), c("x", "y", "input_noise_1", "input_noise_2", "input_noise_3", "input_noise_4", "input_noise_5", "input_noise_6", "input_noise_7", "input_noise_8", "input_noise_9", "input_noise_10", "z")] gbm_fit <- function(subset){ dat <- sampleA[, c(subset, "z")] gbm_fit <- gbm(formula = z ~., distribution = "gaussian", data = dat, n.trees = 50, interaction.depth = 10, n.minobsinnode = 100, shrinkage = 0.1, bag.fraction = 0.9, train.fraction = 1, n.cores = 7) gbm_predict <- predict(gbm_fit, newdata = sampleB, n.trees = 50) r_sq_validate <- 1 - sum((sampleB$z - gbm_predict) ^ 2) / sum((sampleB$z - mean(sampleB$z)) ^ 2) print(subset) print(r_sq_validate) return(r_sq_validate) } subset <- backward.search(attributes = names(sampleA)[1:(ncol(sampleA) - 1)], eval.fun = gbm_fit) subset <- forward.search(attributes = names(sampleA)[1:(ncol(sampleA) - 1)], eval.fun = gbm_fit)
> subset <b>[1] "x" "y"</b> > r_sq_validate <b>[1] 0.2363794</b>
> subset <b> [1] "x" "y" "input_noise_1" "input_noise_2" "input_noise_3" "input_noise_4" "input_noise_5" "input_noise_6" "input_noise_7" [10] "input_noise_9" "input_noise_10"</b> > r_sq_validate <b>[1] 0.2266737</b>
p (x, y) == p (x) * p (y)
p (x, y)! = p (x) * p (y)
Granger, C, E. Maasoumi e J. Racine, 'A Dependence Metric for Possibly Nonlinear Processes', Journal of Time Series Analysis 25, 2004, 649-669.
Watanabe S (1960). Information theoretical analysis of multivariate correlation, IBM Journal of Research and Development 4, 66–82
- 1) . , — , , , — . , , 2. , , .
- 2) . , , . , , , — .
- 3) . , .
, . . , . ., , 1973 — 512 .
.. — « ».
optim_var_num < — log(x = sample_size / 100, base = round(mean_levels, 0))
threshold < — 1 — optim_var_num / predictor_number
############## simlated annelaing search with mutual information fitness function library(infotheo) # measured in nats, converted to bits library(scales) library(GenSA) sampleA <- bunny_dat[sample(nrow(bunny_dat), nrow(bunny_dat) / 2, replace = F), c("x", "y", "input_noise_1", "input_noise_2", "input_noise_3", "input_noise_4", "input_noise_5", "input_noise_6", "input_noise_7", "input_noise_8", "input_noise_9", "input_noise_10", "z")] sampleB <- bunny_dat[!row.names(bunny_dat) %in% rownames(sampleA), c("x", "y", "input_noise_1", "input_noise_2", "input_noise_3", "input_noise_4", "input_noise_5", "input_noise_6", "input_noise_7", "input_noise_8", "input_noise_9", "input_noise_10", "z")] # discretize all variables dat <- sampleA disc_levels <- 5 for (i in 1:13){ naming <- paste(names(dat[i]), 'discrete', sep = "_") dat[, eval(naming)] <- discretize(dat[, eval(names(dat[i]))], disc = "equalfreq", nbins = disc_levels)[,1] } sampleA <- dat[, 14:26] #calculate parameters predictor_number <- dim(sampleA)[2] - 1 sample_size <- dim(sampleA)[1] par_v <- runif(predictor_number, min = 0, max = 1) par_low <- rep(0, times = predictor_number) par_upp <- rep(1, times = predictor_number) #load functions to memory shuffle_f_inp <- function(x = data.frame(), iterations_inp, quantile_val_inp){ mutins <- c(1:iterations_inp) for (count in 1:iterations_inp){ xx <- data.frame(1:dim(x)[1]) for (count1 in 1:(dim(x)[2] - 1)){ y <- as.data.frame(x[, count1]) y$count <- sample(1 : dim(x)[1], dim(x)[1], replace = F) y <- y[order(y$count), ] xx <- cbind(xx, y[, 1]) } mutins[count] <- multiinformation(xx[, 2:dim(xx)[2]]) } quantile(mutins, probs = quantile_val_inp) } shuffle_f <- function(x = data.frame(), iterations, quantile_val){ height <- dim(x)[1] mutins <- c(1:iterations) for (count in 1:iterations){ x$count <- sample(1 : height, height, replace = F) y <- as.data.frame(c(x[dim(x)[2] - 1], x[dim(x)[2]])) y <- y[order(y$count), ] x[dim(x)[2]] <- NULL x[dim(x)[2]] <- NULL x$dep <- y[, 1] rm(y) receiver_entropy <- entropy(x[, dim(x)[2]]) received_inf <- mutinformation(x[, 1 : dim(x)[2] - 1], x[, dim(x)[2]]) corr_ff <- received_inf / receiver_entropy mutins[count] <- corr_ff } quantile(mutins, probs = quantile_val) } ############### the fitness function fitness_f <- function(par){ indexes <- c(1:predictor_number) for (i in 1:predictor_number){ if (par[i] >= threshold) { indexes[i] <- i } else { indexes[i] <- 0 } } local_predictor_number <- 0 for (i in 1:predictor_number){ if (indexes[i] > 0) { local_predictor_number <- local_predictor_number + 1 } } if (local_predictor_number > 1) { sampleAf <- as.data.frame(sampleA[, c(indexes[], dim(sampleA)[2])]) pred_entrs <- c(1:local_predictor_number) for (count in 1:local_predictor_number){ pred_entrs[count] <- entropy(sampleAf[count]) } max_pred_ent <- sum(pred_entrs) - max(pred_entrs) pred_multiinf <- multiinformation(sampleAf[, 1:dim(sampleAf)[2] - 1]) pred_multiinf <- pred_multiinf - shuffle_f_inp(sampleAf, iterations_inp, quantile_val_inp) if (pred_multiinf < 0){ pred_multiinf <- 0 } pred_mult_perc <- pred_multiinf / max_pred_ent inf_corr_val <- shuffle_f(sampleAf, iterations, quantile_val) receiver_entropy <- entropy(sampleAf[, dim(sampleAf)[2]]) received_inf <- mutinformation(sampleAf[, 1:local_predictor_number], sampleAf[, dim(sampleAf)[2]]) if (inf_corr_val - (received_inf / receiver_entropy) < 0){ fact_ff <- (inf_corr_val - (received_inf / receiver_entropy)) * (1 - pred_mult_perc) } else { fact_ff <- inf_corr_val - (received_inf / receiver_entropy) } } else if (local_predictor_number == 1) { sampleAf<- as.data.frame(sampleA[, c(indexes[], dim(sampleA)[2])]) inf_corr_val <- shuffle_f(sampleAf, iterations, quantile_val) receiver_entropy <- entropy(sampleAf[, dim(sampleAf)[2]]) received_inf <- mutinformation(sampleAf[, 1:local_predictor_number], sampleAf[, dim(sampleAf)[2]]) fact_ff <- inf_corr_val - (received_inf / receiver_entropy) } else { fact_ff <- 0 } return(fact_ff) } ########## estimating threshold for variable inclusion iterations = 10 quantile_val = 0.9 iterations_inp = 1 quantile_val_inp = 1 levels_arr <- numeric() for (i in 1:predictor_number){ levels_arr[i] <- length(unique(sampleA[, i])) } mean_levels <- mean(levels_arr) optim_var_num <- log(x = sample_size / 100, base = round(mean_levels, 0)) if (optim_var_num / predictor_number < 1){ threshold <- 1 - optim_var_num / predictor_number } else { threshold <- 0.5 } #run feature selection start <- Sys.time() sao <- GenSA(par = par_v, fn = fitness_f, lower = par_low, upper = par_upp , control = list( #maxit = 10 max.time = 300 , smooth = F , simple.function = F)) trace_ff <- data.frame(sao$trace)$function.value plot(trace_ff, type = "l") percent(- sao$value) final_vector <- c((sao$par >= threshold), T) names(sampleA)[final_vector] final_sample <- as.data.frame(sampleA[, final_vector]) Sys.time() - start
> percent(- sao$value) <b>[1] "18.1%"</b> > final_vector <- c((sao$par >= threshold), T) > names(sampleA)[final_vector] <b>[1] "x_discrete" "y_discrete" "input_noise_2_discrete" "z_discrete" </b> > final_sample <- as.data.frame(sampleA[, final_vector]) > > Sys.time() - start Time difference of 10.00453 mins
> percent(- sao$value) <b><b>[1] "18.2%"</b></b> > final_vector <- c((sao$par >= threshold), T) > names(sampleA)[final_vector] <b><b>[1] "x_discrete" "y_discrete" "input_noise_1_discrete" </b>"z_discrete" </b> > final_sample <- as.data.frame(sampleA[, final_vector]) > > Sys.time() - start Time difference of 20.00585 mins
Source: https://habr.com/ru/post/303750/
All Articles