Source code for hit_module
"""
:platform: Unix
:synopsis: Create the class HIT which represents a TFBS hit.
"""
[docs]class HIT:
""" Define the representation of a TFBS hit. """
# TODO raise an error when the strand is wrong
[docs] def __init__(self, seq_record, start, end, strand, score, tffm=None,
tffm_matched_state=-1):
"""
Create an instance of the :class:`HIT`.
:arg seq_record: Sequence containing the hit.
:type seq_record: :class:`Bio.SeqRecord`
:arg start: Start position of the hit.
:type start: int
:arg end: End position of the hit.
:type end: int
:arg strand: Strand of the hit on the sequence. It should be either '+'
or '-').
:type strand: str
:arg score: TFFM score of the hit.
:type score: float
:arg tffm: TFFM used to predict the hit (default: None).
:type tffm: :class:`TFFM`
:arg tffm_matched_state: Matching state in the TFFM to predict the hit
(default: -1, i.e. no TFFM).
:type tffm_matched_state: int
:warning: start and end are 1-based.
:warning: The seq_record attribute is not the actual sequence of the
hit but the whole sequence containing the hit.
:todo: Raise an error when the strand is wrong.
"""
self.seq_record = seq_record
self.start = start
self.end = end
self.strand = strand
self.score = score
self.tffm_matched_state = tffm_matched_state
self.tffm = tffm
[docs] def __len__(self):
"""
Give the length of the TFBS hit.
:returns: The length of the TFBS hit.
:rtype: int
"""
return self.end - self.start + 1
[docs] def __str__(self):
"""
Give the string representation of the TFBS hit.
:returns: The string representing the hit in the following format:
start\tend\tstrand\tsequence\ttffm-name\ttffm-state\tscore
:rtype: str
"""
if self.tffm:
name = self.tffm.name
else:
name = "NoName"
string = "%s\t%d\t%d\t%s\t%s\t%s\t%d\t%s" % (self.seq_record.id,
self.start, self.end,
self.strand,
self.sequence(),
name,
self.tffm_matched_state,
repr(self.score))
return string
[docs] def __lt__(self, other):
"""
Implement the **<** operator. The comparison looks at the score.
"""
if other:
return self.score < other.score
else:
return False
[docs] def __le__(self, other):
"""
Implement the **<=** operator. The comparison looks at the score.
"""
if other:
return self.score <= other.score
else:
return False
[docs] def __eq__(self, other):
"""
Implement the **==** operator. The comparison looks at the score.
"""
if other:
return self.score == other.score
else:
return False
[docs] def __ne__(self, other):
"""
Implement the **!=** operator. The comparison looks at the score.
"""
if other:
return self.score != other.score
else:
return True
[docs] def __gt__(self, other):
"""
Implement the **>** operator. The comparison looks at the score.
"""
if other:
return self.score > other.score
else:
return True
[docs] def __ge__(self, other):
"""
Implement the **>=** operator. The comparison looks at the score.
"""
if other:
return self.score >= other.score
else:
return True
[docs] def sequence(self):
"""
Give the sequence of the TFBS hit.
:returns: The sequence of the TFBS hit.
:rtype: str
"""
seq = self.seq_record.seq[self.start - 1:self.end]
if self.strand == "+" or not self.strand:
return seq
else:
return seq.reverse_complement()
[docs]def get_start_end_strand(position, seq_record, tffm, negative):
"""
Get the start and end positions of a TFBS hit given its end position on the
positive strand.
:arg position: End position of the TFBS hit on the positive strand of the
sequence.
:type position: int
:arg seq_record: The actual sequence.
:type seq_record: :class:`Bio.SeqRecord`
:arg tffm: The TFFM used to predict the TFBS hit.
:type tffm: :class:`TFFM`
:arg negative: Boolean set to True if the TFBS hit is on the negative
strand of the sequence and False otherwise.
:returns: The start and end positions and the strand.
:rtype: tuple(int, int, str)
:note: The strand is '+' if the hit is on the positive strand and '-'
otherwise.
:warning: The input *position* is given 0-based as extracted from TFFM
computations but the output start and end are 1-based since it is a
more conventionnal way to print the TFBS hit positions.
"""
# position is 0-based and we need 1-based coordinates
if negative:
start = len(seq_record) - position
end = len(seq_record) - position + len(tffm) - 1
strand = "-"
else:
end = position + 1
start = position - len(tffm) + 2
strand = "+"
return start, end, strand