Comments (3)
sample code
library(devtools)
devtools::install_github("kkondo1981/aglm", build_vignettes=TRUE)
library(aglm)
##########
library(MASS) # For Boston
Read data
xy <- Boston # xy is a data.frame to be processed.
colnames(xy)[ncol(xy)] <- "y" # Let medv be the objective variable, y.
xy <- cbind(logCRIM = log(xy$crim), xy)
xy$rad <- as.ordered(xy$rad)
Split data into train and test
n <- nrow(xy) # Sample size.
set.seed(2018) # For reproducibility.
test.id <- sample(n, round(n/4)) # ID numbders for test data.
test <- xy[test.id,] # test is the data.frame for testing.
train <- xy[-test.id,] # train is the data.frame for training.
x <- train[-ncol(xy)]
y <- train$y
newx <- test[-ncol(xy)]
y_true <- test$y
Model
set.seed(2018)
aglm_CV <- cv.aglm(x, y)
aglm_lambda <- aglm_CV$lambda.min
aglm_model <- aglm(x, y, lambda = aglm_lambda)
aglm_pred1 <- predict(aglm_model, newx = newx, type = "response")
cat("RMSE: ", rmse1 <- sqrt(mean((y_true - aglm_pred1)^2)), "\n")
##RMSE: 3.082243
#aglme_pred2 <- predict(aglm_CV, s = aglm_lambda,
newx = newx, type = "response")
##Error
#cat("RMSE: ", sqrt(mean((y_true - aglm_pred2)^2)), "\n")
##Error
Plot
plot(aglm_model@vars_info[[1]]$OD_info$breaks,
cumsum(coef(aglm_model)[3:102]),
type = "s",
xlab = aglm_model@vars_info[[1]]$name,
ylab = "Coefficients")
barplot(cumsum(coef(aglm_model)[652:660]),
names.arg = aglm_model@vars_info[[10]]$OD_info$breaks,
type = "h",
xlab = aglm_model@vars_info[[10]]$name,
ylab = "Coefficients")
from aglm.
The following sample may be better.
##########
Preamble
library(aglm)
library(MASS) # For Boston
Preprocessing
xy <- Boston # xy is a data.frame to be processed.
colnames(xy)[ncol(xy)] <- "y" # Let medv be the objective variable, y.
n <- nrow(xy) # Sample size.
set.seed(2018) # For reproducibility.
test.id <- sample(n, round(n/4)) # ID numbders for test data.
test <- xy[test.id,] # test is the data.frame for testing.
train <- xy[-test.id,] # train is the data.frame for training.
x <- train[-ncol(xy)]
y <- train$y
newx <- test[-ncol(xy)]
y_true <- test$y
x$chas <- as.factor(x$chas)
x$rad <- as.ordered(x$rad)
newx$chas <- factor(newx$chas, levels=levels(x$chas))
newx$rad <- ordered(newx$rad, levels=levels(x$rad))
Modeling
set.seed(2018)
aglmCV <- # CV for LASSO aglm
cv.aglm(x, log(y), alpha = 1,
lambda = 0.1^seq(1, 3, length.out = 100),
add_interaction_columns = FALSE
)
aglmLambda <- [email protected]
aglmFit <- as.vector(predict(aglmCV, newx = x,
s = aglmLambda, type = "response"))
aglmDispersion <- var(log(y) - aglmFit)
aglm_model <- aglm(x, log(y), alpha = 1,
lambda = aglmLambda,
add_interaction_columns = FALSE)
aglmMu <- predict(aglm_model, s= aglmLambda, newx = newx, type = "response")
pred10 <- exp(aglmMu + aglmDispersion / 2)
print(rmse10 <- sqrt(mean(((test$y - pred10)^2)[!test$y == 50]))) # 3.078149
plot(test$y, pred10)
curve(identity, add = TRUE)
Plots
For numeric features
variable <- aglm_model@vars_info[[12]]
xbreaks <- variable$OD_info$breaks
slope <- coef(aglm_model)[666]
type <- ifelse(slope == 0, "s", "l")
ybreaks <- cumsum(coef(aglm_model)[666 + 1:77]) + slope * xbreaks
plot(xbreaks, ybreaks, type = type,
xlab = variable$name, ylab = "Coefficients")
variable <- aglm_model@vars_info[[13]]
xbreaks <- variable$OD_info$breaks
slope <- coef(aglm_model)[744]
type <- ifelse(slope == 0, "s", "l")
ybreaks <- cumsum(coef(aglm_model)[744 + 1:100]) + slope * xbreaks
plot(xbreaks, ybreaks, type = type,
xlab = variable$name, ylab = "Coefficients")
For ordered features
variable <- aglm_model@vars_info[[9]]
xnames <- variable$OD_info$breaks
UDcoef <- coef(aglm_model)[558 + 1:9]
ybreaks <- cumsum(coef(aglm_model)[549 + 1:9]) + UDcoef
barplot(ybreaks, names.arg = xnames,
xlab = variable$name, ylab = "Coefficients")
from aglm.
Implemented in #25
from aglm.
Related Issues (20)
- Small changes HOT 1
- predict.aglm should accept a cv.aglm object HOT 3
- Doesn't "add_linear_columns = FALSE" work? HOT 1
- newoffset for predict.glmnet HOT 1
- Change names? HOT 2
- predict.AccurateGLM with type = "coefficients" or "nonzero" HOT 1
- Line 97 in 2904927 HOT 1
- Keep fit.preval, etc. in cv.aglm() HOT 1
- Set license HOT 2
- logical features
- Formula input HOT 1
- Partial residuals HOT 3
- Error with predict function HOT 2
- Defaults of type.measure HOT 1
- An additional option of plot.AccurateGLM HOT 4
- Another additional option of plot.AccurateGLM HOT 1
- L dummy option HOT 2
- install_github in case of not installing glmnet HOT 2
- Heavy vignettes
- Tests for more datasets
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from aglm.