TCGAbiolinks下载CNV数据 下载Gene水平的数据12345678910library(TCGAbiolinks)query <- GDCquery( project = "TCGA-PRAD", data.category = "Copy Number Variation", data.type = "Gene Level Copy Number", access = "open")GDCdownload(query)data <- GDCprepare(query)saveRDS(data, 'prad_cnv.rds') 下载Masked数据123456789query <- GDCquery( project = "TCGA-PRAD", data.category = "Copy Number Variation", data.type = "Masked Copy Number Segment", access = "open")GDCdownload(query)data <- GDCprepare(query)saveRDS(data, 'prad_cnv_masked.rds') 清洗数据初步清洗123456789library(SummarizedExperiment)data <- readRDS('prad_cnv.rds')cnT <- data@assays@data$copy_numbercnTcol <- colnames(cnT)type <- as.numeric(substr(cnTcol, 14, 15))cnT <- cnT[, type<10]colnames(cnT) <- substr(cnTcol,1, 12)rownames(cnT) <- data@rowRanges$gene_namecnT <- na.omit(cnT) 精细清洗f_dedup_IQR 1234cnT <- f_dedup_IQR(cnT, rownames(cnT))cnT <- cnT[,f_rm_duplicated(colnames(cnT))]group <- readRDS('../idea_2/fig3.2/fig5/tcga.predict.rds')cnT <- cnT[,colnames(cnT) %in% rownames(group)] 构造cnTable慢速1234567df <- NULLfor (i in 1:ncol(cnT)){ colnames(cnT)[[i]] tmp_df <- data.frame(Hugo_Symbol = rownames(cnT), Tumor_Sample_Barcode = colnames(cnT)[[i]], Variant_Classification=cnT[,i]) tmp_df <- subset(tmp_df, Variant_Classification != 2) df <- rbind(df, tmp_df)} 快速12345library(reshape2)df <- melt(cnT)colnames(df) = c('Hugo_Symbol', 'Tumor_Sample_Barcode', 'Variant_Classification')df <- subset(df, Variant_Classification != 2)df 贴标签123df$Variant_Classification[df$Variant_Classification > 2] <- 'Amp'df$Variant_Classification[df$Variant_Classification < 2] <- 'Del'table(df$Variant_Classification) 分组别1234df_l <- subset(df, Tumor_Sample_Barcode %in% rownames(group)[group$group == 'Low Risk'])df_h <- subset(df, Tumor_Sample_Barcode %in% rownames(group)[group$group == 'High Risk'])saveRDS(df_l, 'cnT.l.rds')saveRDS(df_h, 'cnT.h.rds') 导入maftoolsTCGAbiolinks下载maf数据 清洗数据12345678cnv_l <- readRDS('cnT.l.rds')cnv_h <- readRDS('cnT.h.rds')prad_l$Tumor_Sample_Barcode <- prad_l$BarCodeprad_l <- subset(prad_l, Tumor_Sample_Barcode %in% cnv_l$Tumor_Sample_Barcode)cnv_l <- subset(cnv_l, Tumor_Sample_Barcode %in% prad_l$Tumor_Sample_Barcode)prad_h$Tumor_Sample_Barcode <- prad_h$BarCodeprad_h <- subset(prad_h, Tumor_Sample_Barcode %in% cnv_h$Tumor_Sample_Barcode)cnv_h <- subset(cnv_h, Tumor_Sample_Barcode %in% prad_h$Tumor_Sample_Barcode) 读入maftools12maf_l <- read.maf(prad_l, cnTable = cnv_l)maf_h <- read.maf(prad_h, cnTable = cnv_h) 绘制瀑布图123options(repr.plot.width=12, repr.plot.height=8)genes <- subset(lvsh$results, pval < 0.05)$Hugo_SymbolcoOncoplot(m1=maf_l, m2=maf_h, m1Name="Low Risk", m2Name="High Risk",genes=genes) 数据库 TCGAbiolinks下载CNV数据 https://occdn.limour.top/2307.html Author Limour Posted on September 8, 2022 Licensed under TCGAbiolinks下载甲基化数据 Previous TCGAbiolinks下载maf数据 Next Please enable JavaScript to view the comments