计算fpkm
setwd("E:\\read_count")
library(tidyverse)
library(magrittr)
library(hablar)
library(DESeq2)
library(GenomicFeatures)
##计算fpkm
#根据参考基因组获得每条reads的exon
txdb <- makeTxDbFromGFF("E:\\read_count\\Saccharomyces_cerevisiae.R64-1-1.106.gtf",format="gtf")
exons_gene <- exonsBy(txdb, by = "gene")
exons_gene_lens <- lapply(exons_gene,function(x){sum(width(reduce(x)))})
n=t(as.data.frame(exons_gene_lens))
#传入Count矩阵和基因长度
FPKM <- function(data_count,gene_len) {
lens <- nrow(data_count)
colnames <- data_count %>% colnames()
for (i in 1:ncol(data_count)) {
mapped_reads <- sum(data_count[1:lens,i])#计算每个样本的mapped reads数,由于data_count文本最后几行是综述数据,需要舍去,因此选择58721行之前的基因数据。
FPKM <- data_count[1:lens,i]/(10^-9*mapped_reads*gene_len[1:lens,1])#计算FPKM值,10^-9是单位转换后的换算,不能放在分母外的原因是mapped_reads*n[1:58721,1]的乘积很大会报错,因此需要先缩小倍数再进行计算
data_count = data.frame(data_count[1:lens,],FPKM)#添加到data_count表格后面i
}
data_count %>%
dplyr::select(starts_with('FPKM')) -> data_count
colnames(data_count) <- colnames
return(data_count)
}
gene_fpkm <- FPKM(gene_expr,n)
##转TPM,更科学定量
max(gene_fpkm)
fpkmToTpm <- function(fpkm) {
exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
}
gene_tpm <- as.data.frame(apply(gene_fpkm,2,fpkmToTpm))
max(gene_tpm)
评论
发表评论