Source code for pydna.contig
# -*- coding: utf-8 -*-
import textwrap as _textwrap
import networkx as _nx
from pydna._pretty import pretty_str as _pretty_str
from pydna.dseqrecord import Dseqrecord as _Dseqrecord
from pydna.utils import rc as _rc
[docs]class Contig(_Dseqrecord):
"""This class holds information about a DNA assembly. This class is instantiated by
the :class:`Assembly` class and is not meant to be used directly.
"""
def __init__(self, record, *args, graph=None, nodemap=None, **kwargs):
super().__init__(record, *args, **kwargs)
self.graph = graph
self.nodemap = nodemap
[docs] @classmethod
def from_string(cls, record: str = "", *args, graph=None, nodemap=None, **kwargs):
obj = super().from_string(record, *args, **kwargs)
obj.graph = graph
obj.nodemap = nodemap
return obj
[docs] @classmethod
def from_SeqRecord(cls, record, *args, graph=None, nodemap=None, **kwargs):
obj = super().from_SeqRecord(record, *args, **kwargs)
obj.graph = graph
obj.nodemap = nodemap
return obj
def __repr__(self):
return "Contig({}{})".format({True: "-", False: "o"}[not self.circular], len(self))
def _repr_pretty_(self, p, cycle):
"""returns a short string representation of the object"""
p.text("Contig({}{})".format({True: "-", False: "o"}[not self.circular], len(self)))
def _repr_html_(self):
return "<pre>" + self.figure() + "</pre>"
[docs] def reverse_complement(self):
answer = type(self)(super().reverse_complement())
g = _nx.DiGraph()
nm = self.nodemap
g.add_edges_from([(nm[v], nm[u], d) for u, v, d in list(self.graph.edges(data=True))[::-1]])
g.add_nodes_from((nm[n], d) for n, d in list(self.graph.nodes(data=True))[::-1])
for u, v, ed in g.edges(data=True):
ed["name"] = ed["name"][:-3] if ed["name"].endswith("_rc") else "{}_rc".format(ed["name"])[:13]
ed["seq"] = _rc(ed["seq"])
ln = len(ed["seq"])
start, stop = ed["piece"].start, ed["piece"].stop
ed["piece"] = slice(ln - stop - g.nodes[u]["length"], ln - start - g.nodes[v]["length"])
ed["features"] = [f._flip(ln) for f in ed["features"]]
answer.graph = g
answer.nodemap = {v: k for k, v in self.nodemap.items()}
return answer
rc = reverse_complement
[docs] def detailed_figure(self):
"""Returns a text representation of the assembled fragments.
Linear:
::
acgatgctatactgCCCCCtgtgctgtgctcta
TGTGCTGTGCTCTA
tgtgctgtgctctaTTTTTtattctggctgtatc
Circular:
::
||||||||||||||
acgatgctatactgCCCCCtgtgctgtgctcta
TGTGCTGTGCTCTA
tgtgctgtgctctaTTTTTtattctggctgtatc
TATTCTGGCTGTATC
tattctggctgtatcGGGGGtacgatgctatactg
ACGATGCTATACTG
"""
fig = ""
fragmentposition = 0
nodeposition = 0
mylist = []
for u, v, e in self.graph.edges(data=True):
nodeposition += e["piece"].stop - e["piece"].start
fragmentposition -= e["piece"].start
mylist.append([fragmentposition, e["seq"]])
mylist.append([nodeposition, v.upper()])
fragmentposition += e["piece"].stop
if self.circular:
edges = list(self.graph.edges(data=True))
nodeposition = edges[0][2]["piece"].start
nodelength = len(v)
mylist = [[nodeposition, "|" * nodelength]] + mylist
else:
mylist = mylist[:-1]
firstpos = -1 * min(0, min(mylist)[0])
for p, s in mylist:
fig += "{}{}\n".format(" " * (p + firstpos), s)
return _pretty_str(fig)
[docs] def figure(self):
r"""Compact ascii representation of the assembled fragments.
Each fragment is represented by:
::
Size of common 5' substring|Name and size of DNA fragment|
Size of common 5' substring
Linear:
::
frag20| 6
\\/
/\\
6|frag23| 6
\\/
/\\
6|frag14
Circular:
::
-|2577|61
| \\/
| /\\
| 61|5681|98
| \\/
| /\\
| 98|2389|557
| \\/
| /\\
| 557-
| |
--------------------------
"""
nodes = list(self.graph.nodes(data=True))
edges = list(self.graph.edges(data=True))
if not self.circular:
r"""
frag20| 6
\/
/\
6|frag23| 6
\/
/\
6|frag14
"""
f = edges[0]
space2 = len(f[2]["name"])
fig = ("{name}|{o2:>2}\n" "{space2} \\/\n" "{space2} /\\\n").format(
name=f[2]["name"], o2=nodes[1][1]["length"], space2=" " * space2
)
space = space2 # len(f.name)
for i, f in enumerate(edges[1:-1]):
name = "{o1:>2}|{name}|".format(o1=nodes[i + 1][1]["length"], name=f[2]["name"])
space2 = len(name)
fig += ("{space} {name}{o2:>2}\n" "{space} {space2}\\/\n" "{space} {space2}/\\\n").format(
name=name,
o2=nodes[i + 2][1]["length"],
space=" " * space,
space2=" " * space2,
)
space += space2
f = edges[-1]
fig += ("{space} {o1:>2}|{name}").format(name=f[2]["name"], o1=nodes[-2][1]["length"], space=" " * (space))
else: # circular
r"""
-|2577|61
| \/
| /\
| 61|5681|98
| \/
| /\
| 98|2389|557
| \/
| /\
| 557-
| |
--------------------------
"""
nodes.append(nodes[0])
f = edges[0]
space = len(f[2]["name"]) + 3
fig = (" -|{name}|{o2:>2}\n" "|{space}\\/\n" "|{space}/\\\n").format(
name=f[2]["name"], o2=nodes[1][1]["length"], space=" " * space
)
for i, f in enumerate(edges[1:]):
name = "{o1:>2}|{name}|".format(o1=nodes[i + 1][1]["length"], name=f[2]["name"])
space2 = len(name)
fig += ("|{space}{name}{o2:>2}\n" "|{space}{space2}\\/\n" "|{space}{space2}/\\\n").format(
o2=nodes[i + 2][1]["length"],
name=name,
space=" " * space,
space2=" " * space2,
)
space += space2
fig += "|{space}{o1:>2}-\n".format(space=" " * (space), o1=nodes[0][1]["length"])
fig += "|{space} |\n".format(space=" " * (space))
fig += " {space}".format(space="-" * (space + 3))
return _pretty_str(_textwrap.dedent(fig))
if __name__ == "__main__":
import os as _os
cached = _os.getenv("pydna_cached_funcs", "")
_os.environ["pydna_cached_funcs"] = ""
import doctest
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
_os.environ["pydna_cached_funcs"] = cached