X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;ds=sidebyside;f=EBSeq%2FR%2FGeneMultiSimu.R;fp=EBSeq%2FR%2FGeneMultiSimu.R;h=e71babdb84ec717015019630a54af062f45264b2;hb=4496388fd52d4354c746f36b1998477f31c2b0dd;hp=0000000000000000000000000000000000000000;hpb=9210a5fece7ec2854eb834d5b2dcbe2d12fbebf1;p=rsem.git diff --git a/EBSeq/R/GeneMultiSimu.R b/EBSeq/R/GeneMultiSimu.R new file mode 100644 index 0000000..e71babd --- /dev/null +++ b/EBSeq/R/GeneMultiSimu.R @@ -0,0 +1,111 @@ +GeneMultiSimu<- +function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions,AllParti, NumofSample, NumofGene=NULL, DEGeneProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL, Meanconstant=NULL,NormFactor=NULL, OnlyData=T) +{ +# 2012 feb 1 paired simulation +if(is.null(NormFactor)) NormFactor=rep(1,NumofSample) +data(GeneEBresultGouldBart2) +MeansC1=GeneEBresultGouldBart2$C1Mean[[1]] +MeansC2=GeneEBresultGouldBart2$C2Mean[[1]] + +MeanDVD=MeansC1/MeansC2 + +if(is.null(DVDconstant))DVDLibrary=MeanDVD[MeanDVDquantile(MeanDVD[MeanDVD!=Inf],DVDqt1)] +if(!is.null(DVDconstant))DVDLibrary=DVDconstant + +# If DVD constant, use constant when generate +# If not, use DVDLibrary + +MeanInputraw=GeneEBresultGouldBart2$MeanList[[1]] + +if(length(NumofGene)!=0) +NumofGene.raw=NumofGene*2 + +if(length(NumofGene)==0) +NumofGene.raw=length(MeanInputraw) + + +PhiInput.raw=GeneEBresultGouldBart2$RList[[1]] +if (length(Phiconstant)==0){ + PhiLibrary=PhiInput.raw[(1/PhiInput.raw)quantile(1/PhiInput.raw,Phi.qt1)] + PhiInputNames=sample(names(PhiLibrary),NumofGene.raw,replace=T) + PhiInput=PhiInput.raw[PhiInputNames] +} + +if (length(Phiconstant)!=0)PhiInput=rep(Phiconstant,length(MeanInputraw)) +if(length(Meanconstant)==0)MeanInput=GeneEBresultGouldBart2$MeanList[[1]][PhiInputNames] +if(length(Meanconstant)!=0)MeanInput=rep(Meanconstant,length(GeneEBresultGouldBart2$MeanList[[1]])) + +# length(DEGeneNumbers) should be num of patterns -1. the others EE +PatternGeneNumbers=round(NumofGene.raw*DEGeneProp/2)*2 +names(PatternGeneNumbers)=rownames(AllParti) +EEWhich=which(rowSums(AllParti)==ncol(AllParti)) +DEGeneNumbers=PatternGeneNumbers[-EEWhich] + + +OutGeneNumbers=round(NumofGene*DEGeneProp/2)*2 +names(OutGeneNumbers)=rownames(AllParti) +OutDEGeneNumbers=OutGeneNumbers[-EEWhich] +OutEEGeneNumbers=OutGeneNumbers[EEWhich] +OutGenePatterns=c(unlist(sapply(1:length(OutDEGeneNumbers), + function(i)rep(names(OutDEGeneNumbers)[i],OutDEGeneNumbers[i]),simplify=F)), + rep(names(OutEEGeneNumbers),OutEEGeneNumbers)) + +GeneNames=paste("G",c(1:NumofGene.raw),sep="_") +names(PhiInput)=GeneNames +names(MeanInput)=GeneNames +######### +# data +######### +EEList=sapply(1:NumofGene.raw, function(j) sapply(1:NumofSample, function(i)suppressWarnings(rnbinom(1,mu=NormFactor[i]*MeanInput[j], size=PhiInput[j])))) + +generateDataraw=t(EEList) +DVDSample=sample(DVDLibrary,sum(DEGeneNumbers),replace=T) + +DErawNames=vector("list",length(DEGeneNumbers)) +st=1 +for(i in 1:length(DEGeneNumbers)){ + for(j in st:(st+DEGeneNumbers[i]-1)){ + NumGroup=max(AllParti[names(DEGeneNumbers)[i],]) + SampleGroup=sample(NumGroup,NumGroup) + DVDSampleEach=c(1,DVDSample[j]^c(1:(NumGroup-1))) + for(k in 1:NumGroup){ + CondWhich=which(AllParti[names(DEGeneNumbers)[i],]==SampleGroup[k]) + SampleChoose=which(Conditions%in%colnames(AllParti)[CondWhich]) + generateDataraw[j,SampleChoose]=sapply(1:length(SampleChoose), function(i)suppressWarnings(rnbinom(1, size=PhiInput[j], mu=DVDSampleEach[k]*MeanInput[j]*NormFactor[i])),simplify=T) + }} + DErawNames[[i]]=GeneNames[st:(st+DEGeneNumbers[i]-1)] + st=st+DEGeneNumbers[i] +} + +rownames(generateDataraw)=GeneNames +MeanVector=rowMeans(generateDataraw) +VarVector=apply(generateDataraw,1,var) +MOV.post=MeanVector/VarVector +EErawNames=GeneNames[!GeneNames%in%unlist(DErawNames)] + + +### Remove MOV=NA +generateData=generateDataraw +generateData=generateData[!is.na(MOV.post)& MeanVector>2 & MeanVector<10000 ,] +InName=rownames(generateData) +#print(paste("NA MOV's",sum(is.na(MOV.post)),sum( MeanVector<2), sum(MeanVector>10000))) +## DE +################################## +FinalDEInName=sapply(1:length(DEGeneNumbers),function(i)InName[InName%in%DErawNames[[i]]][1:OutDEGeneNumbers[i]],simplify=F) +FinalEEInName=InName[InName%in%EErawNames][1:OutEEGeneNumbers] +FinalNames=c(unlist(FinalDEInName),FinalEEInName) + +generateData=generateData[FinalNames,] +######################################## + +UseName=rownames(generateData) +phiuse=PhiInput[rownames(generateData)] +meanuse=MeanInput[rownames(generateData)] + +OutName=paste("Gene",c(1:nrow(generateData)),sep="_") +names(OutName)=rownames(generateData) +OutData=generateData +rownames(OutData)=as.vector(OutName) +names(OutGenePatterns)=as.vector(OutName) +output=list(data=OutData, Patterns=OutGenePatterns) +}