DNA sequences of short reads generated by high throughput sequencing are stored in the Sequence Read Archive (SRA), a bioinformatics database.
SRA is an inter-institutional collaboration among NCBI SRA, EBI ENA and DDBJ DRA.
Using SRA, organisms are arranged hierarchically based on sequence and are classified into six categories:
setwd("C:/Users/manso/OneDrive - University of West London/MSc Bioinformatics - UWL/7.MSc Bioinformatics Project/W2 - Sample size and power analysis/practical")
if (!requireNamespace("BiocMansger", quietly=TRUE))
install.packages("BiocManager", repos ="http://cran.us.r-project.org")
package 'BiocManager' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\manso\AppData\Local\Temp\RtmpeQDety\downloaded_packages
BiocManager::install("SRAdb")
This code chunk will only download if the file does not exist. It can take a while to download(~2.4GB zip file)
if( ! file.exists('SRAmetadb.sqlite.gz') ) {
sra_dbname <- getSRAdbFile()
} else {
sra_dbname <- 'SRAmetadb.sqlite.gz'
}
sra_dbname <- file.path(system.file('extdata', package='SRAdb'), 'SRAmetadb_demo.sqlite')
#Connect to the database
sra_con <- dbConnect( dbDriver("SQLite"), sra_dbname )
listSRAfile( c("SRR000648","SRR000657"), sra_con, fileType = 'sra', srcType='fasp')
run study sample experiment
1 SRR000648 SRP000098 SRS000290 SRX000122
2 SRR000657 SRP000098 SRS000290 SRX000122
fasp
1 anonftp@ftp-trace.ncbi.nlm.nih.gov:/sra/sra-instant/reads/ByRun/sra/SRR/SRR000/SRR000648/SRR000648.sra
2 anonftp@ftp-trace.ncbi.nlm.nih.gov:/sra/sra-instant/reads/ByRun/sra/SRR/SRR000/SRR000657/SRR000657.sra
ascpCMD <- 'ascp -QT -l 300m -i /usr/local/aspera/connect/etc/asperaweb_id_dsa.putty'
SRAFiles <- getSRAfile( c("SRR000648","SRR000657"), sra_con,
fileType = 'sra', srcType = 'fasp', ascpCMD = ascpCMD )
-T : disable encryption for maximum throughput -l : set the target transfer rate in kbps -i : to specify a private key file.
getFASTQinfo( in_acc = c("SRR000648","SRR000657"), sra_con, srcType = 'fasp' )
run submission study sample experiment
1 SRR000648 SRA000241 SRP000098 SRS000290 SRX000122
2 SRR000657 SRA000241 SRP000098 SRS000290 SRX000122
fasp
1 era-fasp@fasp.sra.ebi.ac.uk:vol1/fastq/SRR000/SRR000648/SRR000648.fastq.gz
2 era-fasp@fasp.sra.ebi.ac.uk:vol1/fastq/SRR000/SRR000657/SRR000657.fastq.gz
getFASTQfile( in_acc = c("SRR000648","SRR000657"), sra_con, destDir = getwd(),
srcType = 'ftp', makeDirectory = FALSE, method = 'curl', ascpCMD = NULL)
run submission study sample experiment
1 SRR000648 SRA000241 SRP000098 SRS000290 SRX000122
2 SRR000657 SRA000241 SRP000098 SRS000290 SRX000122
ftp
1 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR000/SRR000648/SRR000648.fastq.gz
2 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR000/SRR000657/SRR000657.fastq.gz
The sra or sra-lite data files are downloaded from NCBI SRA and the fastq files are downloaded from EBI ENA.
The files are donloaded in to the working directory.
dbDisconnect( sra_con )