What to plot (candidates) for one variable: 2-way sample plot

The following sample may be better. ########## Preamble <p

Implemented in <a class="issue-link js-issue-link" data-error-text="Failed to load tit

Implement plot() for numerical and ordered data about aglm HOT 3 CLOSED

kkondo1981 commented on September 26, 2024

Implement plot() for numerical and ordered data

from aglm.

Comments (3)

kkondo1981 commented on September 26, 2024

sample code

library(devtools)
devtools::install_github("kkondo1981/aglm", build_vignettes=TRUE)
library(aglm)

##########

library(MASS) # For Boston

Read data

xy <- Boston # xy is a data.frame to be processed.
colnames(xy)[ncol(xy)] <- "y" # Let medv be the objective variable, y.
xy <- cbind(logCRIM = log(xy$crim), xy)
xy$rad <- as.ordered(xy$rad)

Split data into train and test

n <- nrow(xy) # Sample size.
set.seed(2018) # For reproducibility.
test.id <- sample(n, round(n/4)) # ID numbders for test data.
test <- xy[test.id,] # test is the data.frame for testing.
train <- xy[-test.id,] # train is the data.frame for training.
x <- train[-ncol(xy)]
y <- train$y
newx <- test[-ncol(xy)]
y_true <- test$y

Model

set.seed(2018)
aglm_CV <- cv.aglm(x, y)
aglm_lambda <- aglm_CV$lambda.min
aglm_model <- aglm(x, y, lambda = aglm_lambda)
aglm_pred1 <- predict(aglm_model, newx = newx, type = "response")
cat("RMSE: ", rmse1 <- sqrt(mean((y_true - aglm_pred1)^2)), "\n")
##RMSE: 3.082243
#aglme_pred2 <- predict(aglm_CV, s = aglm_lambda,

newx = newx, type = "response")

##Error
#cat("RMSE: ", sqrt(mean((y_true - aglm_pred2)^2)), "\n")
##Error

Plot

plot(aglm_model@vars_info[[1]]$OD_info$breaks,
cumsum(coef(aglm_model)[3:102]),
type = "s",
xlab = aglm_model@vars_info[[1]]$name,
ylab = "Coefficients")

barplot(cumsum(coef(aglm_model)[652:660]),
names.arg = aglm_model@vars_info[[10]]$OD_info$breaks,
type = "h",
xlab = aglm_model@vars_info[[10]]$name,
ylab = "Coefficients")

from aglm.

iwahiropuzzles commented on September 26, 2024

The following sample may be better.
##########

Preamble

library(aglm)
library(MASS) # For Boston

Preprocessing

xy <- Boston # xy is a data.frame to be processed.
colnames(xy)[ncol(xy)] <- "y" # Let medv be the objective variable, y.
n <- nrow(xy) # Sample size.
set.seed(2018) # For reproducibility.
test.id <- sample(n, round(n/4)) # ID numbders for test data.
test <- xy[test.id,] # test is the data.frame for testing.
train <- xy[-test.id,] # train is the data.frame for training.
x <- train[-ncol(xy)]
y <- train$y
newx <- test[-ncol(xy)]
y_true <- test$y
x$chas <- as.factor(x$chas)
x$rad <- as.ordered(x$rad)
newx$chas <- factor(newx$chas, levels=levels(x$chas))
newx$rad <- ordered(newx$rad, levels=levels(x$rad))

Modeling

set.seed(2018)
aglmCV <- # CV for LASSO aglm
cv.aglm(x, log(y), alpha = 1,
lambda = 0.1^seq(1, 3, length.out = 100),
add_interaction_columns = FALSE
)
aglmLambda <- [email protected]
aglmFit <- as.vector(predict(aglmCV, newx = x,
s = aglmLambda, type = "response"))
aglmDispersion <- var(log(y) - aglmFit)
aglm_model <- aglm(x, log(y), alpha = 1,
lambda = aglmLambda,
add_interaction_columns = FALSE)
aglmMu <- predict(aglm_model, s= aglmLambda, newx = newx, type = "response")
pred10 <- exp(aglmMu + aglmDispersion / 2)
print(rmse10 <- sqrt(mean(((test$y - pred10)^2)[!test$y == 50]))) # 3.078149
plot(test$y, pred10)
curve(identity, add = TRUE)

Plots

For numeric features

variable <- aglm_model@vars_info[[12]]
xbreaks <- variable$OD_info$breaks
slope <- coef(aglm_model)[666]
type <- ifelse(slope == 0, "s", "l")
ybreaks <- cumsum(coef(aglm_model)[666 + 1:77]) + slope * xbreaks
plot(xbreaks, ybreaks, type = type,
xlab = variable$name, ylab = "Coefficients")

variable <- aglm_model@vars_info[[13]]
xbreaks <- variable$OD_info$breaks
slope <- coef(aglm_model)[744]
type <- ifelse(slope == 0, "s", "l")
ybreaks <- cumsum(coef(aglm_model)[744 + 1:100]) + slope * xbreaks
plot(xbreaks, ybreaks, type = type,
xlab = variable$name, ylab = "Coefficients")

For ordered features

variable <- aglm_model@vars_info[[9]]
xnames <- variable$OD_info$breaks
UDcoef <- coef(aglm_model)[558 + 1:9]
ybreaks <- cumsum(coef(aglm_model)[549 + 1:9]) + UDcoef
barplot(ybreaks, names.arg = xnames,
xlab = variable$name, ylab = "Coefficients")

from aglm.

kkondo1981 commented on September 26, 2024

Implemented in #25

from aglm.

Implement plot() for numerical and ordered data about aglm HOT 3 CLOSED

Comments (3)

Read data

Split data into train and test

Model

newx = newx, type = "response")

Plot

Preamble

Preprocessing

Modeling

Plots

For numeric features

For ordered features

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent