The original BMIQ function from Teschendorff 2013 adjusts for the type-2 bias in # Illumina Infinium 450k data.

ORIGINAL AUTHOR: Andrew Teschendorff

The original BMIQ function from Teschendorff 2013 adjusts for the type-2 bias in

Illumina Infinium 450k data.

Later functions and edits were provided by yours truly, Steve Horvath.

I changed the code so that one can calibrate methylation data to a gold standard.

Specifically, I took version v_1.2 by Teschendorff and fixed minor issues.

Also I made the code more robust e.g. by changing the optimization algorithm.

Toward this end, I used the method="Nelder-Mead" in optim()

Later functions and edits by Steve Horvath

# Steve Horvath took version v_1.2 by Teschendorff

and fixed minor errors. Also he made the code more robust.

Importantly, SH changed the optimization algorithm to make it #more robust.

SH used method="Nelder-Mead" in optim() since the other #optimization method sometimes gets stuck.

#Toward this end, the function blc was replaced by blc2.

require(RPMM);

betaEst2=function (y, w, weights)
{
yobs = !is.na(y)
if (sum(yobs) <= 1)
return(c(1, 1))
y = y[yobs]
w = w[yobs]
weights = weights[yobs]
N = sum(weights * w)
p = sum(weights * w * y)/N
v = sum(weights * w * y * y)/N - p * p
logab = log(c(p, 1 - p)) + log(pmax(1e-06, p * (1 - p)/v -
1))
if (sum(yobs) == 2)
return(exp(logab))
opt = try(optim(logab, betaObjf, ydata = y, wdata = w, weights = weights,
method = "Nelder-Mead",control=list(maxit=50) ), silent = TRUE)
if (inherits(opt, "try-error"))
return(c(1, 1))
exp(opt$par)
} # end of function betaEst

blc2=function (Y, w, maxiter = 25, tol = 1e-06, weights = NULL, verbose = TRUE)
{
Ymn = min(Y[Y > 0], na.rm = TRUE)
Ymx = max(Y[Y < 1], na.rm = TRUE)
Y = pmax(Y, Ymn/2)
Y = pmin(Y, 1 - (1 - Ymx)/2)
Yobs = !is.na(Y)
J = dim(Y)[2]
K = dim(w)[2]
n = dim(w)[1]
if (n != dim(Y)[1])
stop("Dimensions of w and Y do not agree")
if (is.null(weights))
weights = rep(1, n)
mu = a = b = matrix(Inf, K, J)
crit = Inf
for (i in 1:maxiter) {
warn0 = options()$warn
options(warn = -1)
eta = apply(weights * w, 2, sum)/sum(weights)
mu0 = mu
for (k in 1:K) {
for (j in 1:J) {
ab = betaEst2(Y[, j], w[, k], weights)
a[k, j] = ab[1]
b[k, j] = ab[2]
mu[k, j] = ab[1]/sum(ab)
}
}
ww = array(0, dim = c(n, J, K))
for (k in 1:K) {
for (j in 1:J) {
ww[Yobs[, j], j, k] = dbeta(Y[Yobs[, j], j],
a[k, j], b[k, j], log = TRUE)
}
}
options(warn = warn0)
w = apply(ww, c(1, 3), sum, na.rm = TRUE)
wmax = apply(w, 1, max)
for (k in 1:K) w[, k] = w[, k] - wmax
w = t(eta * t(exp(w)))
like = apply(w, 1, sum)
w = (1/like) * w
llike = weights * (log(like) + wmax)
crit = max(abs(mu - mu0))
if (verbose)
print(crit)
if (crit < tol)
break
}
return(list(a = a, b = b, eta = eta, mu = mu, w = w, llike = sum(llike)))
}

The function BMIQcalibration was created by Steve Horvath by heavily recycling code

from A. Teschendorff's BMIQ function.

BMIQ stands for beta mixture quantile normalization.

Explanation: datM is a data frame with Illumina beta values (rows are samples, colums are CpGs.

goldstandard is a numeric vector with beta values that is used as gold standard for calibrating the columns of datM.

The length of goldstandard has to equal the number of columns of datM.

Example code: First we impute missing values.

library(WGCNA); dimnames1=dimnames(datMeth)

datMeth= data.frame(t(impute.knn(as.matrix(t(datMeth)))$data))

dimnames(datMeth)=dimnames1

gold.mean=as.numeric(apply(datMeth,2,mean,na.rm=TRUE))

#datMethCalibrated=BMIQcalibration(datM=datMeth,goldstandard.beta=gold.mean)

BMIQcalibration=function(datM,goldstandard.beta,nL=3,doH=TRUE,nfit=20000,th1.v=c(0.2,0.75),th2.v=NULL,niter=5,tol=0.001,plots=FALSE,calibrateUnitInterval=TRUE){
if (length(goldstandard.beta) !=dim(datM)[[2]] ) {stop("Error in function arguments length(goldstandard.beta) !=dim(datM)[[2]]. Consider transposing datM.")}
if (plots ) {par(mfrow=c(2,2))}
beta1.v = goldstandard.beta

if (calibrateUnitInterval ) {datM=CalibrateUnitInterval(datM)}

estimate initial weight matrix from type1 distribution

w0.m = matrix(0,nrow=length(beta1.v),ncol=nL);
w0.m[which(beta1.v <= th1.v[1]),1] = 1;
w0.m[intersect(which(beta1.v > th1.v[1]),which(beta1.v <= th1.v[2])),2] = 1;
w0.m[which(beta1.v > th1.v[2]),3] = 1;

fit type1

print("Fitting EM beta mixture to goldstandard probes");
set.seed(1)
rand.idx = sample(1:length(beta1.v),min(c(nfit, length(beta1.v)) ) ,replace=FALSE)
em1.o = blc(matrix(beta1.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol);
subsetclass1.v = apply(em1.o$w,1,which.max);
subsetth1.v = c(mean(max(beta1.v[rand.idx[subsetclass1.v==1]]),min(beta1.v[rand.idx[subsetclass1.v==2]])),mean(max(beta1.v[rand.idx[subsetclass1.v==2]]),min(beta1.v[rand.idx[subsetclass1.v==3]])));
class1.v = rep(2,length(beta1.v));
class1.v[which(beta1.v < subsetth1.v[1])] = 1;
class1.v[which(beta1.v > subsetth1.v[2])] = 3;
nth1.v = subsetth1.v;
print("Done");

generate plot from estimated mixture

if(plots){
print("Check");
tmpL.v = as.vector(rmultinom(1:nL,length(beta1.v),prob=em1.o$eta));
tmpB.v = vector();
for(l in 1:nL){
tmpB.v = c(tmpB.v,rbeta(tmpL.v[l],em1.o$a[l,1],em1.o$b[l,1]));
}
plot(density(beta1.v),main= paste("Type1fit-", sep=""));
d.o = density(tmpB.v);
points(d.o$x,d.o$y,col="green",type="l")
legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
}

Estimate Modes

if ( sum(class1.v==1)==1 ){ mod1U= beta1.v[class1.v==1]}
if ( sum(class1.v==3)==1 ){ mod1M= beta1.v[class1.v==3]}
if ( sum(class1.v==1) >1){
d1U.o = density(beta1.v[class1.v==1])
mod1U = d1U.o$x[which.max(d1U.o$y)]
}
if ( sum(class1.v==3)>1 ){
d1M.o = density(beta1.v[class1.v==3])
mod1M = d1M.o$x[which.max(d1M.o$y)]
}

BETA 2

for (ii in 1:dim(datM)[[1]] ){
printFlush(paste("ii=",ii))
sampleID=ii
beta2.v = as.numeric(datM[ii,])

d2U.o = density(beta2.v[which(beta2.v<0.4)]);
d2M.o = density(beta2.v[which(beta2.v>0.6)]);
mod2U = d2U.o$x[which.max(d2U.o$y)]
mod2M = d2M.o$x[which.max(d2M.o$y)]

now deal with type2 fit

th2.v = vector();
th2.v[1] = nth1.v[1] + (mod2U-mod1U);
th2.v[2] = nth1.v[2] + (mod2M-mod1M);

estimate initial weight matrix

w0.m = matrix(0,nrow=length(beta2.v),ncol=nL);
w0.m[which(beta2.v <= th2.v[1]),1] = 1;
w0.m[intersect(which(beta2.v > th2.v[1]),which(beta2.v <= th2.v[2])),2] = 1;
w0.m[which(beta2.v > th2.v[2]),3] = 1;

print("Fitting EM beta mixture to input probes");

I fixed an error in the following line (replaced beta1 by beta2)

set.seed(1)
rand.idx = sample(1:length(beta2.v),min(c(nfit, length(beta2.v)),na.rm=TRUE) ,replace=FALSE)
em2.o = blc2(Y=matrix(beta2.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol,verbose=TRUE);
print("Done");

for type II probes assign to state (unmethylated, hemi or full methylation)

subsetclass2.v = apply(em2.o$w,1,which.max);

if (sum(subsetclass2.v==2)>0 ){
subsetth2.v = c(mean(max(beta2.v[rand.idx[subsetclass2.v==1]]),min(beta2.v[rand.idx[subsetclass2.v==2]])),
mean(max(beta2.v[rand.idx[subsetclass2.v==2]]),min(beta2.v[rand.idx[subsetclass2.v==3]])));
}
if (sum(subsetclass2.v==2)==0 ){
subsetth2.v = c(1/2max(beta2.v[rand.idx[subsetclass2.v==1]])+ 1/2mean(beta2.v[rand.idx[subsetclass2.v==3]]), 1/3max(beta2.v[rand.idx[subsetclass2.v==1]])+ 2/3mean(beta2.v[rand.idx[subsetclass2.v==3]]));
}

class2.v = rep(2,length(beta2.v));
class2.v[which(beta2.v <= subsetth2.v[1])] = 1;
class2.v[which(beta2.v >= subsetth2.v[2])] = 3;

generate plot

if(plots){
tmpL.v = as.vector(rmultinom(1:nL,length(beta2.v),prob=em2.o$eta));
tmpB.v = vector();
for(lt in 1:nL){
tmpB.v = c(tmpB.v,rbeta(tmpL.v[lt],em2.o$a[lt,1],em2.o$b[lt,1]));
}
plot(density(beta2.v), main= paste("Type2fit-",sampleID,sep="") );
d.o = density(tmpB.v);
points(d.o$x,d.o$y,col="green",type="l")
legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
}

classAV1.v = vector();classAV2.v = vector();
for(l in 1:nL){
classAV1.v[l] = em1.o$mu[l,1];
classAV2.v[l] = em2.o$mu[l,1];
}

start normalising input probes

print("Start normalising input probes");
nbeta2.v = beta2.v;

select U probes

lt = 1;
selU.idx = which(class2.v==lt);
selUR.idx = selU.idx[which(beta2.v[selU.idx] > classAV2.v[lt])];
selUL.idx = selU.idx[which(beta2.v[selU.idx] < classAV2.v[lt])];

find prob according to typeII distribution

p.v = pbeta(beta2.v[selUR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);

find corresponding quantile in type I distribution

q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
nbeta2.v[selUR.idx] = q.v;
p.v = pbeta(beta2.v[selUL.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=TRUE);

find corresponding quantile in type I distribution

q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=TRUE);
nbeta2.v[selUL.idx] = q.v;

select M probes

lt = 3;
selM.idx = which(class2.v==lt);
selMR.idx = selM.idx[which(beta2.v[selM.idx] > classAV2.v[lt])];
selML.idx = selM.idx[which(beta2.v[selM.idx] < classAV2.v[lt])];

find prob according to typeII distribution

p.v = pbeta(beta2.v[selMR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);

find corresponding quantile in type I distribution

q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
nbeta2.v[selMR.idx] = q.v;

if(doH){ ### if TRUE also correct type2 hemimethylated probes

select H probes and include ML probes (left ML tail is not well described by a beta-distribution).

lt = 2;
selH.idx = c(which(class2.v==lt),selML.idx);
minH = min(beta2.v[selH.idx],na.rm=TRUE)
maxH = max(beta2.v[selH.idx],na.rm=TRUE)
deltaH = maxH - minH;

need to do some patching

deltaUH = -max(beta2.v[selU.idx],na.rm=TRUE) + min(beta2.v[selH.idx],na.rm=TRUE)
deltaHM = -max(beta2.v[selH.idx],na.rm=TRUE) + min(beta2.v[selMR.idx],na.rm=TRUE)

new maximum of H probes should be

nmaxH = min(nbeta2.v[selMR.idx],na.rm=TRUE) - deltaHM;

new minimum of H probes should be

nminH = max(nbeta2.v[selU.idx],na.rm=TRUE) + deltaUH;
ndeltaH = nmaxH - nminH;

perform conformal transformation (shift+dilation)

new_beta_H(i) = a + hf*(beta_H(i)-minH);

hf = ndeltaH/deltaH ;

fix lower point first

nbeta2.v[selH.idx] = nminH + hf*(beta2.v[selH.idx]-minH);

}

generate final plot to check normalisation

if(plots){
print("Generating final plot");
d1.o = density(beta1.v);
d2.o = density(beta2.v);
d2n.o = density(nbeta2.v);
ymax = max(d2.o$y,d1.o$y,d2n.o$y);
plot(density(beta2.v),type="l",ylim=c(0,ymax),xlim=c(0,1), main=paste("CheckBMIQ-",sampleID,sep="") );
points(d1.o$x,d1.o$y,col="red",type="l");
points(d2n.o$x,d2n.o$y,col="blue",type="l");
legend(x=0.5,y=ymax,legend=c("type1","type2","type2-BMIQ"),bty="n",fill=c("red","black","blue"));
}

datM[ii,]= nbeta2.v ;
} # end of for (ii=1 loop
datM
} # end of function BMIQcalibration

BMIQ = function(beta.v,design.v,nL=3,doH=TRUE,nfit=50000,th1.v=c(0.2,0.75),th2.v=NULL,niter=5,tol=0.001,plots=TRUE,sampleID=1,calibrateUnitInterval=TRUE){

if (calibrateUnitInterval) {
rangeBySample=range(beta.v,na.rm=TRUE)
minBySample=rangeBySample[1]
maxBySample=rangeBySample[2]
if ( (minBySample<0 | maxBySample>1) & !is.na(minBySample) & !is.na(maxBySample) ) {
y1=c(0.001,.999)
x1=c(minBySample,maxBySample)
lm1=lm( y1 ~ x1 )
intercept1=coef(lm1)[[1]]
slope1=coef(lm1)[[2]]
beta.v=intercept1+slope1*beta.v
} # end of if
} # end of if (calibrateUnitInterval

type1.idx = which(design.v==1);
type2.idx = which(design.v==2);

beta1.v = beta.v[type1.idx];
beta2.v = beta.v[type2.idx];

estimate initial weight matrix from type1 distribution

fit type1

print("Fitting EM beta mixture to goldstandard probes");
set.seed(1)
rand.idx = sample(1:length(beta1.v),min(c(nfit, length(beta1.v)) ) ,replace=FALSE)
em1.o = blc2(Y=matrix(beta1.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol);
subsetclass1.v = apply(em1.o$w,1,which.max);
subsetth1.v = c(mean(max(beta1.v[rand.idx[subsetclass1.v==1]]),min(beta1.v[rand.idx[subsetclass1.v==2]])),mean(max(beta1.v[rand.idx[subsetclass1.v==2]]),min(beta1.v[rand.idx[subsetclass1.v==3]],na.rm=TRUE)));
class1.v = rep(2,length(beta1.v));
class1.v[which(beta1.v < subsetth1.v[1])] = 1;
class1.v[which(beta1.v > subsetth1.v[2])] = 3;
nth1.v = subsetth1.v;
print("Done");

generate plot from estimated mixture

if(plots){
print("Check");
tmpL.v = as.vector(rmultinom(1:nL,length(beta1.v),prob=em1.o$eta));
tmpB.v = vector();
for(l in 1:nL){
tmpB.v = c(tmpB.v,rbeta(tmpL.v[l],em1.o$a[l,1],em1.o$b[l,1]));
}

pdf(paste("Type1fit-",sampleID,".pdf",sep=""),width=6,height=4);
plot(density(beta1.v));
d.o = density(tmpB.v);
points(d.o$x,d.o$y,col="green",type="l")
legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
dev.off();
}

Estimate Modes

d2U.o = density(beta2.v[which(beta2.v<0.4)]);
d2M.o = density(beta2.v[which(beta2.v>0.6)]);
mod2U = d2U.o$x[which.max(d2U.o$y)]
mod2M = d2M.o$x[which.max(d2M.o$y)]

now deal with type2 fit

th2.v = vector();
th2.v[1] = nth1.v[1] + (mod2U-mod1U);
th2.v[2] = nth1.v[2] + (mod2M-mod1M);

estimate initial weight matrix

print("Fitting EM beta mixture to input probes");
set.seed(1)
rand.idx = sample(1:length(beta2.v),min(c(nfit, length(beta2.v)),na.rm=TRUE) ,replace=FALSE)
em2.o = blc2(Y=matrix(beta2.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol);
print("Done");

for type II probes assign to state (unmethylated, hemi or full methylation)

subsetclass2.v = apply(em2.o$w,1,which.max);

class2.v = rep(2,length(beta2.v));
class2.v[which(beta2.v <= subsetth2.v[1])] = 1;
class2.v[which(beta2.v >= subsetth2.v[2])] = 3;

generate plot

if(plots){
tmpL.v = as.vector(rmultinom(1:nL,length(beta2.v),prob=em2.o$eta));
tmpB.v = vector();
for(lt in 1:nL){
tmpB.v = c(tmpB.v,rbeta(tmpL.v[lt],em2.o$a[lt,1],em2.o$b[lt,1]));
}
pdf(paste("Type2fit-",sampleID,".pdf",sep=""),width=6,height=4);
plot(density(beta2.v));
d.o = density(tmpB.v);
points(d.o$x,d.o$y,col="green",type="l")
legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
dev.off();
}

classAV1.v = vector();classAV2.v = vector();
for(l in 1:nL){
classAV1.v[l] = em1.o$mu[l,1];
classAV2.v[l] = em2.o$mu[l,1];
}

start normalising input probes

print("Start normalising input probes");
nbeta2.v = beta2.v;

select U probes

lt = 1;
selU.idx = which(class2.v==lt);
selUR.idx = selU.idx[which(beta2.v[selU.idx] > classAV2.v[lt])];
selUL.idx = selU.idx[which(beta2.v[selU.idx] < classAV2.v[lt])];

find prob according to typeII distribution

p.v = pbeta(beta2.v[selUR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);

find corresponding quantile in type I distribution

q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
nbeta2.v[selUR.idx] = q.v;
p.v = pbeta(beta2.v[selUL.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=TRUE);

find corresponding quantile in type I distribution

q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=TRUE);
nbeta2.v[selUL.idx] = q.v;

select M probes

lt = 3;
selM.idx = which(class2.v==lt);
selMR.idx = selM.idx[which(beta2.v[selM.idx] > classAV2.v[lt])];
selML.idx = selM.idx[which(beta2.v[selM.idx] < classAV2.v[lt])];

find prob according to typeII distribution

p.v = pbeta(beta2.v[selMR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);

find corresponding quantile in type I distribution

q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
nbeta2.v[selMR.idx] = q.v;

if(doH){ ### if TRUE also correct type2 hemimethylated probes

select H probes and include ML probes (left ML tail is not well described by a beta-distribution).

lt = 2;
selH.idx = c(which(class2.v==lt),selML.idx);
minH = min(beta2.v[selH.idx],na.rm=TRUE)
maxH = max(beta2.v[selH.idx],na.rm=TRUE)
deltaH = maxH - minH;

need to do some patching

deltaUH = -max(beta2.v[selU.idx],na.rm=TRUE) + min(beta2.v[selH.idx],na.rm=TRUE)
deltaHM = -max(beta2.v[selH.idx],na.rm=TRUE) + min(beta2.v[selMR.idx],na.rm=TRUE)

new maximum of H probes should be

nmaxH = min(nbeta2.v[selMR.idx],na.rm=TRUE) - deltaHM;

new minimum of H probes should be

nminH = max(nbeta2.v[selU.idx],na.rm=TRUE) + deltaUH;
ndeltaH = nmaxH - nminH;

perform conformal transformation (shift+dilation)

new_beta_H(i) = a + hf*(beta_H(i)-minH);

hf = ndeltaH/deltaH ;

fix lower point first

nbeta2.v[selH.idx] = nminH + hf*(beta2.v[selH.idx]-minH);

}

pnbeta.v = beta.v;
pnbeta.v[type1.idx] = beta1.v;
pnbeta.v[type2.idx] = nbeta2.v;

generate final plot to check normalisation

if(plots){
print("Generating final plot");
d1.o = density(beta1.v);
d2.o = density(beta2.v);
d2n.o = density(nbeta2.v);
ymax = max(d2.o$y,d1.o$y,d2n.o$y);
pdf(paste("CheckBMIQ-",sampleID,".pdf",sep=""),width=6,height=4)
plot(density(beta2.v),type="l",ylim=c(0,ymax),xlim=c(0,1));
points(d1.o$x,d1.o$y,col="red",type="l");
points(d2n.o$x,d2n.o$y,col="blue",type="l");
legend(x=0.5,y=ymax,legend=c("type1","type2","type2-BMIQ"),bty="n",fill=c("red","black","blue"));
dev.off();
}

print(paste("Finished for sample ",sampleID,sep=""));

return(list(nbeta=pnbeta.v,class1=class1.v,class2=class2.v,av1=classAV1.v,av2=classAV2.v,hf=hf,th1=nth1.v,th2=th2.v));

}

CheckBMIQ = function(beta.v,design.v,pnbeta.v){### pnbeta is BMIQ normalised profile

type1.idx = which(design.v==1);
type2.idx = which(design.v==2);

beta1.v = beta.v[type1.idx];
beta2.v = beta.v[type2.idx];
pnbeta2.v = pnbeta.v[type2.idx];

} # end of function CheckBMIQ

CalibrateUnitInterval=function(datM,onlyIfOutside=TRUE){

rangeBySample=data.frame(lapply(data.frame(t(datM)),range,na.rm=TRUE))
minBySample=as.numeric(rangeBySample[1,])
maxBySample=as.numeric(rangeBySample[2,])
if (onlyIfOutside) { indexSamples=which((minBySample<0 | maxBySample>1) & !is.na(minBySample) & !is.na(maxBySample))
}
if (!onlyIfOutside) { indexSamples=1:length(minBySample)}
if ( length(indexSamples)>=1 ){
for ( i in indexSamples) {
y1=c(0.001,0.999)
x1=c(minBySample[i],maxBySample[i])
lm1=lm( y1 ~ x1 )
intercept1=coef(lm1)[[1]]
slope1=coef(lm1)[[2]]
datM[i,]=intercept1+slope1*datM[i,]
} # end of for loop
}
datM
} #end of function for calibrating to [0,1]

devotional-peacekeepers / dna Goto Github PK

dna's People

Contributors

dna's Issues

ORIGINAL AUTHOR: Andrew Teschendorff

The original BMIQ function from Teschendorff 2013 adjusts for the type-2 bias in

Illumina Infinium 450k data.

Later functions and edits were provided by yours truly, Steve Horvath.

I changed the code so that one can calibrate methylation data to a gold standard.

Specifically, I took version v_1.2 by Teschendorff and fixed minor issues.

Also I made the code more robust e.g. by changing the optimization algorithm.

Toward this end, I used the method="Nelder-Mead" in optim()

Later functions and edits by Steve Horvath

# Steve Horvath took version v_1.2 by Teschendorff

and fixed minor errors. Also he made the code more robust.

Importantly, SH changed the optimization algorithm to make it #more robust.

SH used method="Nelder-Mead" in optim() since the other #optimization method sometimes gets stuck.

The function BMIQcalibration was created by Steve Horvath by heavily recycling code

from A. Teschendorff's BMIQ function.

BMIQ stands for beta mixture quantile normalization.

Explanation: datM is a data frame with Illumina beta values (rows are samples, colums are CpGs.

goldstandard is a numeric vector with beta values that is used as gold standard for calibrating the columns of datM.

The length of goldstandard has to equal the number of columns of datM.

Example code: First we impute missing values.

library(WGCNA); dimnames1=dimnames(datMeth)

datMeth= data.frame(t(impute.knn(as.matrix(t(datMeth)))$data))

dimnames(datMeth)=dimnames1

gold.mean=as.numeric(apply(datMeth,2,mean,na.rm=TRUE))

estimate initial weight matrix from type1 distribution

fit type1

generate plot from estimated mixture

Estimate Modes

BETA 2

now deal with type2 fit

estimate initial weight matrix

I fixed an error in the following line (replaced beta1 by beta2)

for type II probes assign to state (unmethylated, hemi or full methylation)

generate plot

start normalising input probes

select U probes

find prob according to typeII distribution

find corresponding quantile in type I distribution

find corresponding quantile in type I distribution

select M probes

find prob according to typeII distribution

find corresponding quantile in type I distribution

select H probes and include ML probes (left ML tail is not well described by a beta-distribution).

need to do some patching

new maximum of H probes should be

new minimum of H probes should be

perform conformal transformation (shift+dilation)

new_beta_H(i) = a + hf*(beta_H(i)-minH);

fix lower point first

generate final plot to check normalisation

estimate initial weight matrix from type1 distribution

fit type1

generate plot from estimated mixture

Estimate Modes

now deal with type2 fit

estimate initial weight matrix

for type II probes assign to state (unmethylated, hemi or full methylation)

generate plot

start normalising input probes

select U probes

find prob according to typeII distribution

find corresponding quantile in type I distribution

find corresponding quantile in type I distribution

select M probes

find prob according to typeII distribution

find corresponding quantile in type I distribution

select H probes and include ML probes (left ML tail is not well described by a beta-distribution).

need to do some patching

new maximum of H probes should be

new minimum of H probes should be

perform conformal transformation (shift+dilation)

new_beta_H(i) = a + hf*(beta_H(i)-minH);

fix lower point first

generate final plot to check normalisation

Recommend Projects

Recommend Topics