Source code for pangea.contrib.treeoflife.taxa_tree
import pandas as pd
from os import environ
from os.path import join, dirname
import gzip
from .models import (
TaxonName,
TreeNode,
)
NCBI_DELIM = '\t|' # really...
NAMES_ENV_VAR = 'CAPALYZER_NCBI_NAMES'
NODES_ENV_VAR = 'CAPALYZER_NCBI_NODES'
NAMES_DEF = join(dirname(__file__), 'ncbi_tree/names.dmp.gz')
NODES_DEF = join(dirname(__file__), 'ncbi_tree/nodes.dmp.gz')
RANKS = ['species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom']
[docs]class TaxaTree:
[docs] @staticmethod
def ancestors(taxon):
"""Return a list of all ancestors of the taxon starting with the taxon itself."""
node = TreeNode.byname(taxon)
parents = [node.canon_name.name]
while node.parent.canon_name.name != 'root':
parents.append(node.parent.canon_name.name)
node = node.parent
return parents
[docs] @staticmethod
def ranked_ancestors(taxon):
"""Return a dict of all ancestors of the taxon starting with the taxon itself.
Keys of the dict are taxon ranks
"""
node = TreeNode.byname(taxon)
parents = {node.rank: node.canon_name.name}
while node.parent.canon_name.name != 'root':
parents[node.parent.rank] = node.parent.canon_name.name
node = node.parent
return parents
[docs] @staticmethod
def ancestor_rank(rank, taxon, default=None):
"""Return the ancestor of taxon at the given rank."""
node = TreeNode.byname(taxon)
while node.parent.canon_name.name != 'root':
if rank == node.parent.rank:
return node.parent.canon_name.name
node = node.parent
if not default:
raise KeyError(f'{rank} for taxa {taxon} not found.')
return default
[docs] @staticmethod
def get_taxon_parent_lists(taxa):
"""Return a pair of lists giving the name of each taxon and its parent.
Give an empty string as the parent of the root.
This function is used to prepare data for a Plotly suburst plot.
"""
queue = [TreeNode.byname(taxon) for taxon in taxa]
added = set()
taxon_list, parent_list = [], []
while queue:
node = queue.pop()
if node.taxon_id in added:
continue
node_name = node.canon_name.name
if node_name in added:
continue
taxon_list.append(node_name)
parent_name = ''
if not node.is_root:
parent_node = node.parent
parent_name = parent_node.canon_name.name
while parent_name == node_name:
parent_node = parent_node.parent
parent_name = ''
if parent_node:
parent_name = parent_node.canon_name.name
queue.append(parent_node)
parent_list.append(parent_name)
added.add(node.taxon_id)
added.add(node_name)
return taxon_list, parent_list