Find differentially expressed genes in PLancenta tissue with preeclampsia We will need the control samples for normal placenta tissue and samples of tissue with preeclampsia .
Set working directory:
setwd("C:/Users/manso/OneDrive - University of West London/MSc Bioinformatics - UWL/6.BGA - Bioinformatics and Genome Analysis/week 5 - Microarray analysis/practical")
Install necessary packages and load libraries:
countsN <- read.csv("ExpData.csv")
rownames(countsN) <- countsN[,1]
countsN <- countsN[,-1]
dim(countsN)
[1] 60715 16
In this dataset we have 16 samples ans 60715 gene ids.
#DEseqDataset
dataset <- DESeqDataSetFromMatrix(countData = countsN,
colData = colData,
design = ~condition)
dataset
class: DESeqDataSet
dim: 60715 16
metadata(1): version
assays(1): counts
rownames(60715): ENSG00000000003 ENSG00000000005 ...
ERCC-00170 ERCC-00171
rowData names(0):
colnames(16): NP513 NP518 ... PE616 PE621
colData names(1): condition
dds <- DESeq(dataset)
print(dds)
class: DESeqDataSet
dim: 60715 16
metadata(1): version
assays(6): counts mu ... replaceCounts replaceCooks
rownames(60715): ENSG00000000003 ENSG00000000005 ...
ERCC-00170 ERCC-00171
rowData names(23): baseMean baseVar ... maxCooks replace
colnames(16): NP513 NP518 ... PE616 PE621
colData names(3): condition sizeFactor replaceable
result <- results(dds, contrast=c('condition','NP','PE'), alpha = 0.01)
result <- results(dds, contrast=c('condition','NP','PE'), alpha = 0.01)
summary(result)
out of 37813 with nonzero total read count
adjusted p-value < 0.01
LFC > 0 (up) : 63, 0.17%
LFC < 0 (down) : 12, 0.032%
outliers [1] : 0, 0%
low counts [2] : 25952, 69%
(mean count < 6)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
countsN <- read.csv("ExpData.csv", header = T)
rownames(countsN) <- countsN[,1]
countsN <- countsN[,-1]
countsN <- as.matrix(countsN)
dim(countsN)
[1] 60715 16
dge <- DGEList(counts=countsN)
dge <- calcNormFactors(dge, method = "TMM")
TPMnorm_NPET1 <- cpm(dge, log = TRUE, normalized.lib.sizes=TRUE)
dim(countsN)
[1] 60715 16
origin <- rep(1, 13) # If origin is single, origin information is not needed for RP analysis.
RP <- RP(countsN, cl, num.perm=100,
logged=TRUE, na.rm=FALSE,
gene.names=rownames (countsN),
plot=F, rand=123)
Rank Product analysis for unpaired case
done
TopGene <- topGene(RP, cutoff=0.001, method="pfp",
logged=TRUE, logbase=2, gene.names=rownames(countsN))
Table1: Genes called significant under class1 < class2
Table2: Genes called significant under class1 > class2
T1 <- TopGene[["Table1"]]
T2 <- TopGene[["Table2"]]
write.table(T1 , col.names=NA, row.names=T, file ="Week9_CGDEUpReg0.01.tsv", sep ="\t")
write.table(T2 , col.names=NA, row.names=T, file ="Week9_CGDEDnReg0.01.tsv", sep ="\t")
dim(T1)
[1] 58 5
dim(T2)
[1] 752 5