]> git.donarmstrong.com Git - rsem.git/blob - EBSeq/R/GeneMultiSimu.R
Included EBSeq for downstream differential expression analysis
[rsem.git] / EBSeq / R / GeneMultiSimu.R
1 GeneMultiSimu<-
2 function(DVDconstant=NULL, DVDqt1=NULL, DVDqt2=NULL, Conditions,AllParti, NumofSample, NumofGene=NULL, DEGeneProp, Phiconstant=NULL, Phi.qt1=NULL, Phi.qt2=NULL, Meanconstant=NULL,NormFactor=NULL, OnlyData=T)
3 {
4 # 2012 feb 1 paired simulation
5 if(is.null(NormFactor)) NormFactor=rep(1,NumofSample)
6 data(GeneEBresultGouldBart2)
7 MeansC1=GeneEBresultGouldBart2$C1Mean[[1]]
8 MeansC2=GeneEBresultGouldBart2$C2Mean[[1]]
9
10 MeanDVD=MeansC1/MeansC2
11
12 if(is.null(DVDconstant))DVDLibrary=MeanDVD[MeanDVD<quantile(MeanDVD[MeanDVD!=Inf],DVDqt2) & MeanDVD>quantile(MeanDVD[MeanDVD!=Inf],DVDqt1)]
13 if(!is.null(DVDconstant))DVDLibrary=DVDconstant
14
15 # If DVD constant, use constant when generate
16 # If not, use DVDLibrary
17
18 MeanInputraw=GeneEBresultGouldBart2$MeanList[[1]]
19
20 if(length(NumofGene)!=0)
21 NumofGene.raw=NumofGene*2
22
23 if(length(NumofGene)==0)
24 NumofGene.raw=length(MeanInputraw)
25
26
27 PhiInput.raw=GeneEBresultGouldBart2$RList[[1]]
28 if (length(Phiconstant)==0){
29         PhiLibrary=PhiInput.raw[(1/PhiInput.raw)<quantile(1/PhiInput.raw,Phi.qt2) & 1/PhiInput.raw>quantile(1/PhiInput.raw,Phi.qt1)]
30         PhiInputNames=sample(names(PhiLibrary),NumofGene.raw,replace=T)
31         PhiInput=PhiInput.raw[PhiInputNames]
32 }
33
34 if (length(Phiconstant)!=0)PhiInput=rep(Phiconstant,length(MeanInputraw))
35 if(length(Meanconstant)==0)MeanInput=GeneEBresultGouldBart2$MeanList[[1]][PhiInputNames]
36 if(length(Meanconstant)!=0)MeanInput=rep(Meanconstant,length(GeneEBresultGouldBart2$MeanList[[1]]))
37
38 # length(DEGeneNumbers) should be num of patterns -1. the others EE
39 PatternGeneNumbers=round(NumofGene.raw*DEGeneProp/2)*2
40 names(PatternGeneNumbers)=rownames(AllParti)
41 EEWhich=which(rowSums(AllParti)==ncol(AllParti))
42 DEGeneNumbers=PatternGeneNumbers[-EEWhich]
43
44
45 OutGeneNumbers=round(NumofGene*DEGeneProp/2)*2
46 names(OutGeneNumbers)=rownames(AllParti)
47 OutDEGeneNumbers=OutGeneNumbers[-EEWhich]
48 OutEEGeneNumbers=OutGeneNumbers[EEWhich]
49 OutGenePatterns=c(unlist(sapply(1:length(OutDEGeneNumbers),
50                                                           function(i)rep(names(OutDEGeneNumbers)[i],OutDEGeneNumbers[i]),simplify=F)),
51                                   rep(names(OutEEGeneNumbers),OutEEGeneNumbers))
52
53 GeneNames=paste("G",c(1:NumofGene.raw),sep="_")
54 names(PhiInput)=GeneNames
55 names(MeanInput)=GeneNames
56 #########
57 # data
58 #########
59 EEList=sapply(1:NumofGene.raw, function(j) sapply(1:NumofSample, function(i)suppressWarnings(rnbinom(1,mu=NormFactor[i]*MeanInput[j], size=PhiInput[j]))))
60
61 generateDataraw=t(EEList)
62 DVDSample=sample(DVDLibrary,sum(DEGeneNumbers),replace=T)
63
64 DErawNames=vector("list",length(DEGeneNumbers))
65 st=1
66 for(i in 1:length(DEGeneNumbers)){
67         for(j in st:(st+DEGeneNumbers[i]-1)){
68                 NumGroup=max(AllParti[names(DEGeneNumbers)[i],])
69                 SampleGroup=sample(NumGroup,NumGroup)
70                 DVDSampleEach=c(1,DVDSample[j]^c(1:(NumGroup-1)))
71                 for(k in 1:NumGroup){
72                 CondWhich=which(AllParti[names(DEGeneNumbers)[i],]==SampleGroup[k])
73                 SampleChoose=which(Conditions%in%colnames(AllParti)[CondWhich])
74                 generateDataraw[j,SampleChoose]=sapply(1:length(SampleChoose), function(i)suppressWarnings(rnbinom(1, size=PhiInput[j], mu=DVDSampleEach[k]*MeanInput[j]*NormFactor[i])),simplify=T)
75                 }}
76                 DErawNames[[i]]=GeneNames[st:(st+DEGeneNumbers[i]-1)]
77                 st=st+DEGeneNumbers[i]
78 }
79
80 rownames(generateDataraw)=GeneNames
81 MeanVector=rowMeans(generateDataraw)
82 VarVector=apply(generateDataraw,1,var)
83 MOV.post=MeanVector/VarVector
84 EErawNames=GeneNames[!GeneNames%in%unlist(DErawNames)]
85
86
87 ### Remove MOV=NA
88 generateData=generateDataraw
89 generateData=generateData[!is.na(MOV.post)& MeanVector>2 & MeanVector<10000 ,] 
90 InName=rownames(generateData)
91 #print(paste("NA MOV's",sum(is.na(MOV.post)),sum( MeanVector<2), sum(MeanVector>10000)))
92 ## DE
93 ##################################
94 FinalDEInName=sapply(1:length(DEGeneNumbers),function(i)InName[InName%in%DErawNames[[i]]][1:OutDEGeneNumbers[i]],simplify=F)
95 FinalEEInName=InName[InName%in%EErawNames][1:OutEEGeneNumbers]
96 FinalNames=c(unlist(FinalDEInName),FinalEEInName)
97
98 generateData=generateData[FinalNames,]
99 ########################################
100
101 UseName=rownames(generateData)
102 phiuse=PhiInput[rownames(generateData)]
103 meanuse=MeanInput[rownames(generateData)]
104
105 OutName=paste("Gene",c(1:nrow(generateData)),sep="_")
106 names(OutName)=rownames(generateData)
107 OutData=generateData
108 rownames(OutData)=as.vector(OutName)
109 names(OutGenePatterns)=as.vector(OutName)
110 output=list(data=OutData, Patterns=OutGenePatterns)
111 }