Source code for pangea.contrib.covid19.bio
import subprocess as sp
from os import environ
from os.path import isdir, abspath
KRAKEN2_EXC = environ.get('COVID19_KRAKEN2_EXC', 'kraken2')
KRAKEN2_DB = environ.get('COVID19_KRAKEN2_DB', None)
THREADS = int(environ.get('COVID19_THREADS', 1))
KRAKEN2_DB_URL = 'https://s3.wasabisys.com/metasub/covid/kraken2_covid_2020_03_13.tar.gz'
[docs]def download_kraken2():
"""Download a custom Kraken2 database for detecting COVID."""
tarball_base = KRAKEN2_DB_URL.split("/")[-1]
base = tarball_base.split('.tar.gz')[0]
local_path = f'covid19/dbs/{base}'
if isdir(local_path):
return local_path
cmd = (
f'cd covid19/dbs/ && '
f'wget {KRAKEN2_DB_URL} && '
f'tar -xzf {tarball_base} '
)
sp.check_call(cmd, shell=True)
return local_path
[docs]def kraken2_search_reads(reads, outprefix):
"""Use Kraken2 to make a fast pass report on reads. Write report to outfile."""
reads = abspath(reads)
report_filepath = f'{outprefix}.kraken2_report'
kraken2_db = KRAKEN2_DB
if kraken2_db is None:
kraken2_db = download_kraken2()
cmd = (
f'{KRAKEN2_EXC} '
f'--db {kraken2_db} '
f'--threads {THREADS} '
f'--report {report_filepath} '
f'--gzip-compressed '
f'{reads} '
'> /dev/null'
)
sp.run(cmd, check=True, shell=True)
return report_filepath