cBioPortal的原始数据获取

来源

https://github.com/cBioPortal/datahub/tree/master/public

https://www.cbioportal.org/study/summary?id=prad_su2c_2019

第一步 获取数据

1
2
3
4
5
6
d <- read.table('data_mrna_seq_fpkm_capture.txt', header = T, sep = '\t', allowEscapes = T, quote = '')
d
meta <- read.table('data_clinical_sample.txt', header = T, sep = '\t', comment.char = '#')
meta
clinical <- read.table('data_clinical_patient.txt', header = T, sep = '\t', comment.char = '#')
clinical

第二步 获取生存分析的数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
f_rm_duplicated <- function(NameL, reverse=F){
tmp <- data.frame(table(NameL))
if(reverse){
tmp <- tmp$NameL[tmp$Freq > 1]
}else{
tmp <- tmp$NameL[tmp$Freq == 1]
}
which(NameL %in% as.character(tmp))
}
f_name_dedup <- function(lc_exp, rowN = 1){
if (rowN == 0){
res <- lc_exp
rowNn <- rownames(lc_exp)
}else{
res <- lc_exp[-rowN]
rowNn <- lc_exp[[rowN]]
}
noDup <- f_rm_duplicated(rowNn)
tmp <- rowNn[noDup]
noDup <- res[noDup,]
rownames(noDup) <- tmp
Dup <- f_rm_duplicated(rowNn, T)
rowNn <- rowNn[Dup]
Dup <- res[Dup,]
rownames(Dup) <- NULL
lc_tmp = by(Dup,
rowNn,
function(x) rownames(x)[which.max(rowMeans(x))])
lc_probes = as.integer(lc_tmp)
Dup = Dup[lc_probes,]
rownames(Dup) <- rowNn[lc_probes]
return(rbind(noDup,Dup))
}
meta <- meta[f_rm_duplicated(meta$PATIENT_ID),]
rownames(meta)<- meta$PATIENT_ID
meta
rownames(clinical) <- clinical$PATIENT_ID
clinical
mergeID <- intersect(rownames(clinical), rownames(meta))
df <- cbind(clinical[mergeID,], meta[mergeID,])
rownames(df) <- df$SAMPLE_ID
df
saveRDS(df, 'meta.rds')
saveRDS(d, 'fpkm.rds')

cBioPortal的原始数据获取
https://occdn.limour.top/1956.html
Author
Limour
Posted on
July 7, 2022
Licensed under