setwd("E:\\read_count") library(tidyverse) library(magrittr) library(hablar) library(DESeq2) library(GenomicFeatures) ##计算fpkm #根据参考基因组获得每条reads的exon txdb <- makeTxDbFromGFF("E:\\read_count\\Saccharomyces_cerevisiae.R64-1-1.106.gtf",format="gtf") exons_gene <- exonsBy(txdb, by = "gene") exons_gene_lens <- lapply(exons_gene,function(x){sum(width(reduce(x)))}) n=t(as.data.frame(exons_gene_lens)) #传入Count矩阵和基因长度 FPKM <- function(data_count,gene_len) { lens <- nrow(data_count) colnames <- data_count %>% colnames() for (i in 1:ncol(data_count)) { mapped_reads <- sum(data_count[1:lens,i])#计算每个样本的mapped reads数,由于data_count文本最后几行是综述数据,需要舍去,因此选择58721行之前的基因数据。 FPKM <- data_count[1:lens,i]/(10^-9*mapped_reads*gene_len[1:lens,1])#计算FPKM值,10^-9是单位转换后的换算,不能放在分母外的原因是mapped_reads*n[1:58721,1]的乘积很大会报错,因此需要先缩小倍数再进行计算 data_count = data.frame(data_count[1:lens,],FPKM)#添加到data_count表格后面i } data_count %>% dplyr