加载包
- 更新包:
- BiocManager::install("BioinformaticsFMRP/TCGAbiolinksGUI.data")
- BiocManager::install("BioinformaticsFMRP/TCGAbiolinks")
- packageVersion("TCGAbiolinks") # 2.25.0
library(TCGAbiolinks)
library(plyr)
library(SummarizedExperiment)
查看信息
# 查看癌症类型
TCGAbiolinks:::getGDCprojects()$project_id
# 查看对应癌症的数据类型
TCGAbiolinks:::getProjectSummary('TCGA-PRAD') # 以前列腺癌为例
筛选数据
# 一般的前列腺癌 GDC Data Portal 是 hg38 的
PRAD <- GDCquery(project = 'TCGA-PRAD',
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "STAR - Counts")
# 选择病例列 ,不加cols参数则是完整结果的全部列
PRAD_cases <- getResults(PRAD,cols=c("cases"))
# 选择癌组织数据
PRAD_tp <- TCGAquery_SampleTypes(barcode = PRAD_cases, typesample = "TP")
PRAD_D <- GDCquery(project = 'TCGA-PRAD',
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "STAR - Counts",
barcode = PRAD_tp)
获取数据
PRAD_D <- GDCquery(project = 'TCGA-PRAD',
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "STAR - Counts",
barcode = PRAD_tp)
GDCdownload(query = PRAD_D)
PRAD <- GDCprepare(query = PRAD_D, save = TRUE, save.filename = "PRAD.rda")
获取矩阵
counts <- [email protected]@data$unstranded
colnames(counts) <- [email protected]$patient
rownames(counts) <- [email protected]$gene_name
Comments NOTHING