# BiocManager::install("BioinformaticsFMRP/TCGAbiolinksGUI.data")
# BiocManager::install("BioinformaticsFMRP/TCGAbiolinks")
gdcdata=function(i){
library(TCGAbiolinks)
projects <- getGDCprojects()
library(dplyr)
projects <- projects %>%
as.data.frame() %>%
select(project_id,tumor) %>%
filter(grepl(pattern="TCGA",project_id))
## 0.运行信息
print(paste0("Downloading number ",i,",project name: ",projects$project_id[i]))
## 1.查询信息
query.exp = GDCquery(project = projects$project_id[i],
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "STAR - Counts")
## 2.正式下载
GDCdownload(query.exp)
## 3.多个数据合并
pre.exp = GDCprepare(query = query.exp)
## 4.提取表达量数据
library(SummarizedExperiment)
countsdata = SummarizedExperiment::assay(pre.exp,1)
fpkmdata=SummarizedExperiment::assay(pre.exp,5)
tpmdata=SummarizedExperiment::assay(pre.exp,4)
gene_id=data.frame(id=rowData(pre.exp)@listData[["gene_id"]], gene_name= rowData(pre.exp)@listData[["gene_name"]],gene_type=rowData(pre.exp)@listData[["gene_type"]])
counts=cbind(gene_id,countsdata)
fpkm=cbind(gene_id,fpkmdata)
tpm=cbind(gene_id,tpmdata)
#临床信息
clinical <- GDCquery_clinic(project = projects$project_id[i], type = "clinical")
## 5.保存数据
filename1 = paste0("result/",projects$project_id[i],"-counts.txt")
filename2 = paste0("result/",projects$project_id[i],"-fpkm.txt")
filename3 = paste0("result/",projects$project_id[i],"-tpm.txt")
filename4 = paste0("result/",projects$project_id[i],"-clinical.txt")
write.table(counts,filename1,sep="\t",col.names=T,row.names=F,quote=F)
write.table(fpkm,filename2,sep="\t",col.names=T,row.names=F,quote=F)
write.table(tpm,filename3,sep="\t",col.names=T,row.names=F,quote=F)
write.table(clinical,filename4,sep="\t",col.names=T,row.names=F,quote=F)
}
dir.create("result")
for (i in 1:33) {
gdcdata(i)
}