#required library: APcluster algorithm library("apcluster") #A function to calculate p-value of fiSher combination; a - a vector of p-values fisher_p<-function(a) 1-pchisq(-2*sum(log(a)),df=2*length(a)) #Similarity function: v1, v2 - 2 numeric or character vectors of variant identifiers simf<-function(v1,v2){ nom<-length(intersect(v1,v2)) denom<-length(union(v1,v2)) nom/denom } # AP cluster function: # p - a vector of ethnic group p-values. # snp_id_list is a list of SNP identifiers (e.g. SNP ids, or any other values which uniquely define variants withit a region). # simf - similarity function which takes as an input two numeric or character vectors of variants identifiers and outputs a numeric similarity. APcluster_meta<-function(p,snp_id_list,simf){ #Check input if(!(is.numeric(p))) stop('p should be numeric vector') for(i in 1:length(snp_id_list)) if(!(is.numeric(snp_id_list[[i]]) || is.character(snp_id_list[[i]]))) stop('elements of snp_id_list should be either numeric or character vectors') if(length(p)!=length(snp_id_list)) stop('Length of p and snp_id_list should be the same') #Building similarity matrix s () npop<-length(snp_id_list) s<-matrix(0,npop,npop) for(i in 1:npop) for(j in 1:npop) s[i,j]<-simf(snp_id_list[[i]],snp_id_list[[j]]) #Clustering b<-apcluster(s) cl<-b@clusters #If algorithm has not converged, assume one cluster if(length(cl)==0) cl[[1]]<-1:length(pop) #P-value p1<-c() for(i in 1:length(cl)) p1<-c(p1,fisher_p(p[cl[[i]]])) fisher_p(p1) } ############################################################################################################################## #EXAMPLE DATA ANALYSIS: THE WORKING DIRECTORY IS IN THE EXAMPLE DATA FOLDER ############################################################################################################################## #required library for SKAT test library("SKAT") #A FUNCTION TO CALCULATE SKAT P-VALUE FOR A SINGLE ETHNIC GROUP, geno - matrix of genotype, obj - output of SKAT_Null_Model function skatf<-function(geno,obj) SKAT(geno,obj)$p.value #A MATRIX FOR MAF WITHIN EACH ETHNIC GROUP maf_f<-c() #pskat - VECTOR OF SKAT P-VALUES pskat<-c() #READING DATA, CALCULATING SKAT P-VALUES, AND CALCULATING MAF FOR EACH ETHNIC GROUP pop<-c("E1","E2","E3","E4","A1","A2","A3","A4","Af1","Af2","Af3","Af4") for(p in pop){ geno<-matrix(scan(paste(p,"_example.txt",sep=""),what=numeric()),byrow=TRUE,nrow=1000) #PHENOTYPE IS IN THE FIRST COLUMN phreal<-geno[,1] geno<-geno[,-1] #CLACULATE MAF FOR maf<-apply(geno,2,sum)/dim(geno)[1]/2 maf_f<-rbind(maf_f,maf) #DELETE UNOBSERVED VARIANTS FOR AN ETHNIC GROUP BEFORE SKAT ANALYSIS maf<-apply(geno,2,sum)/2/dim(geno)[1] out<-(1:length(maf))[maf>0.01 | maf==0] if(length(out)>0) geno<-geno[,-out] #SKAT P-VALUE obj<-SKAT_Null_Model(phreal~1,data=data.frame(phreal=phreal),out_type="C") pskat<-c(pskat,skatf(geno,obj)) } #CREATE snp_id_list FROM A MATRIX OF MAF #NOTE: IN THIS CASE VARIANT IDENTIFIER IS JUST A NUMBER OF A VARIANT, FIRST BEING THE FIRST VARIANT IN GENOTYPE MATRICES. #nsnps - NUMBER OF VARIANTS OBSERVED ACROSS ALL THE 12 ETHNIC GROUPS snp_id_list<-list() nsnps<-dim(maf_f)[2] for(i in 1:length(pop)) snp_id_list[[i]]<-(1:nsnps)[maf_f[i,]>0] #apcluster meta-analysis APcluster_meta(pskat,snp_id_list,simf) #OUTPUT: 0.2239268