1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
| library(SummarizedExperiment) tmp <- load('../../../DEG/TCGA/PRAD_tp.rda') counts <- data@assays@data$unstranded colnames(counts) <- data@colData$patient f_rm_duplicated <- function(NameL, reverse=F){ tmp <- data.frame(table(NameL)) if(reverse){ tmp <- tmp$NameL[tmp$Freq > 1] }else{ tmp <- tmp$NameL[tmp$Freq == 1] } which(NameL %in% as.character(tmp)) } counts <- counts[,f_rm_duplicated(colnames(counts))] geneInfo <- as.data.frame(data@rowRanges)[c('gene_id','gene_type','gene_name')] f_dedup_IQR <- function(df, rowNn, select_func='IQR'){ if(typeof(select_func) == 'character'){ select_func = get(select_func) } noDup <- f_rm_duplicated(rowNn) tmp <- rowNn[noDup] noDup <- df[noDup,] rownames(noDup) <- tmp Dup <- f_rm_duplicated(rowNn, T) rowNn <- rowNn[Dup] Dup <- df[Dup,] rownames(Dup) <- NULL lc_tmp = by(Dup, rowNn, function(x){rownames(x)[which.max(apply(X = x, FUN = select_func, MARGIN = 1))]}) lc_probes = as.integer(lc_tmp) Dup = Dup[lc_probes,] rownames(Dup) <- rowNn[lc_probes] return(rbind(noDup,Dup)) } require(stringr) rininiang <- t(as.data.frame(str_split(geneInfo$gene_id, '\\.'))) rownames(rininiang) <- NULL rininiang <- f_dedup_IQR(as.data.frame(counts),unlist(rininiang[,1])) rininiang f_counts2VST <- function(countsMatrix){ require(DESeq2) conditions <- factor(rep("Control",ncol(countsMatrix))) colData_b <- data.frame(row.names = colnames(countsMatrix), conditions) dds <- DESeqDataSetFromMatrix(countData = countsMatrix, colData = colData_b, design = ~ 1) vsd <- vst(object=dds, blind=T) assay(vsd) } VST <- f_counts2VST(rininiang) VST saveRDS(VST, 'rininiang.rds')
|