line |
bran |
sub |
pod |
code |
1
|
|
|
|
package Sanger::CGP::TraFiC::Cluster; |
2
|
|
|
|
|
3
|
|
|
|
##########LICENCE########## |
4
|
|
|
|
# Copyright (c) 2015 Genome Research Ltd. |
5
|
|
|
|
# |
6
|
|
|
|
# Author: Cancer Genome Project cgpit@sanger.ac.uk |
7
|
|
|
|
# |
8
|
|
|
|
# This file is part of TraFiC. |
9
|
|
|
|
# |
10
|
|
|
|
# TraFiC is free software: you can redistribute it and/or modify it under |
11
|
|
|
|
# the terms of the GNU Affero General Public License as published by the Free |
12
|
|
|
|
# Software Foundation; either version 3 of the License, or (at your option) any |
13
|
|
|
|
# later version. |
14
|
|
|
|
# |
15
|
|
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT |
16
|
|
|
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
17
|
|
|
|
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
18
|
|
|
|
# details. |
19
|
|
|
|
# |
20
|
|
|
|
# You should have received a copy of the GNU Affero General Public License |
21
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
22
|
|
|
|
# |
23
|
|
|
|
# 1. The usage of a range of years within a copyright statement contained within |
24
|
|
|
|
# this distribution should be interpreted as being equivalent to a list of years |
25
|
|
|
|
# including the first and last year specified and all consecutive years between |
26
|
|
|
|
# them. For example, a copyright statement that reads 'Copyright (c) 2005, 2007- |
27
|
|
|
|
# 2009, 2011-2012' should be interpreted as being identical to a statement that |
28
|
|
|
|
# reads 'Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012' and a copyright |
29
|
|
|
|
# statement that reads "Copyright (c) 2005-2012' should be interpreted as being |
30
|
|
|
|
# identical to a statement that reads 'Copyright (c) 2005, 2006, 2007, 2008, |
31
|
|
|
|
# 2009, 2010, 2011, 2012'." |
32
|
|
|
|
##########LICENCE########## |
33
|
|
|
|
|
34
|
|
|
|
|
35
|
|
2
|
|
use strict; |
36
|
|
2
|
|
use autodie qw(:all); |
37
|
|
2
|
|
use English qw( -no_match_vars ); |
38
|
|
2
|
|
use warnings FATAL => 'all'; |
39
|
|
2
|
|
use Carp qw(croak); |
40
|
|
2
|
|
use Const::Fast qw(const); |
41
|
|
2
|
|
use File::Which qw(which); |
42
|
|
|
|
|
43
|
|
2
|
|
use Sanger::CGP::TraFiC; |
44
|
|
|
|
|
45
|
|
|
|
const my $TAB => "\t"; |
46
|
|
|
|
|
47
|
|
|
|
const my @CLASSES => qw(SINGLE_END INTER_CHROM ABERRANT); |
48
|
|
|
|
const my @HEADER_ROOT => qw(CHR FAMILY); |
49
|
|
|
|
const my @HEADER_MAIN => qw(L_POS R_POS TOTAL_READS SINGLE_END_COUNT INTER_CHROM_COUNT ABERRANT_COUNT SINGLE_END_READS INTER_CHROM_READS ABERRANT_READS); |
50
|
|
|
|
|
51
|
|
|
|
sub new { |
52
|
|
2
|
1
|
my ($class) = @_; |
53
|
50
|
|
|
croak "Unable to find standard unix 'sort' in path." unless(defined which('sort')); |
54
|
|
|
|
my $self = { 'min_reads' => 5, |
55
|
|
|
|
'paired_alu' => 0,}; |
56
|
|
|
|
bless $self, $class; |
57
|
|
|
|
return $self; |
58
|
|
|
|
} |
59
|
|
|
|
|
60
|
|
|
|
sub reciprocal_clusters { |
61
|
|
0
|
1
|
my $self = shift; |
62
|
|
|
|
# results are small so load into hash structures to simplify searching by family/chr |
63
|
|
|
|
my $pos = $self->clustered_struct('+'); |
64
|
|
|
|
my $neg = $self->clustered_struct('-'); |
65
|
|
|
|
$self->reciprocal_hits($pos, $neg); |
66
|
|
|
|
return 1; |
67
|
|
|
|
} |
68
|
|
|
|
|
69
|
|
|
|
sub reciprocal_hits { |
70
|
|
1
|
0
|
my ($self, $pos, $neg) = @_; |
71
|
|
|
|
# keyed as family->chr |
72
|
|
|
|
# if family in pos not found in neg then can skip the whole block |
73
|
|
|
|
# if no chr for a family then you can skip that too... |
74
|
|
|
|
my @reciprocal_clusters; |
75
|
|
|
|
my @families = sort keys %{$pos}; |
76
|
|
|
|
for my $family(@families) { |
77
|
50
|
|
|
unless(exists $neg->{$family}) { |
78
|
|
|
|
delete $pos->{$family}; |
79
|
|
|
|
next; |
80
|
|
|
|
} |
81
|
|
|
|
my @chrs = sort keys %{$pos->{$family}}; |
82
|
|
|
|
for my $chr(@chrs) { |
83
|
100
|
|
|
unless(exists $neg->{$family}->{$chr}) { |
84
|
|
|
|
delete $pos->{$family}->{$chr}; |
85
|
|
|
|
next; |
86
|
|
|
|
} |
87
|
|
|
|
push @reciprocal_clusters, @{$self->compare_hits($chr, $family, $pos->{$family}->{$chr}, $neg->{$family}->{$chr})}; |
88
|
|
|
|
} |
89
|
|
|
|
} |
90
|
|
|
|
$self->output_reciprocal(\@reciprocal_clusters); |
91
|
|
|
|
return 1; |
92
|
|
|
|
} |
93
|
|
|
|
|
94
|
|
|
|
sub output_reciprocal { |
95
|
|
1
|
0
|
my ($self, $clusters) = @_; |
96
|
|
|
|
|
97
|
|
|
|
# output clusters |
98
|
|
|
|
open my $RECIP, '>', "$self->{reciprocal_clusters}.raw"; |
99
|
|
|
|
for my $cluster(@{$clusters}) { |
100
|
|
|
|
my ($chr, $family, $pos, $neg) = @{$cluster}; |
101
|
|
|
|
print $RECIP (join $TAB, |
102
|
|
|
|
$chr, $family, # shared |
103
|
|
|
|
$pos->{'l_pos'}, $pos->{'r_pos'}, @{$pos->{'detail'}}, # pos cluster info |
104
|
|
|
|
$neg->{'l_pos'}, $neg->{'r_pos'}, @{$neg->{'detail'}}) # neg cluster info |
105
|
|
|
|
,$RS; |
106
|
|
|
|
} |
107
|
|
|
|
close $RECIP; |
108
|
|
|
|
|
109
|
|
|
|
# sort clusters |
110
|
|
|
|
# safest to use unix command as will be fast and lean |
111
|
|
|
|
my $sort_command = sprintf "sort -k 1,1 -k 3,4n -k 2,2 -o %s %s", |
112
|
|
|
|
"$self->{reciprocal_clusters}.sort", # output file |
113
|
|
|
|
"$self->{reciprocal_clusters}.raw"; # input file |
114
|
50
|
|
|
croak "Failed to sort file using command: $sort_command" unless(system($sort_command) == 0); |
115
|
|
|
|
|
116
|
|
|
|
# write header |
117
|
|
|
|
open $RECIP, '>', $self->{'reciprocal_clusters'}; |
118
|
|
|
|
print $RECIP '#',(join $TAB, @HEADER_ROOT); |
119
|
|
|
|
for(@HEADER_MAIN) { |
120
|
|
|
|
print $RECIP $TAB,'P_',$_; |
121
|
|
|
|
} |
122
|
|
|
|
for(@HEADER_MAIN) { |
123
|
|
|
|
print $RECIP $TAB,'N_',$_; |
124
|
|
|
|
} |
125
|
|
|
|
print $RECIP $RS; |
126
|
|
|
|
|
127
|
|
|
|
# append the sorted results |
128
|
|
|
|
open my $SORT, '<', "$self->{reciprocal_clusters}.sort"; |
129
|
|
|
|
while(<$SORT>) { print $RECIP $_; } |
130
|
|
|
|
close $SORT; |
131
|
|
|
|
close $RECIP; |
132
|
|
|
|
|
133
|
|
|
|
# cleanup intermediate files |
134
|
|
|
|
unlink "$self->{reciprocal_clusters}.raw"; |
135
|
|
|
|
unlink "$self->{reciprocal_clusters}.sort"; |
136
|
|
|
|
return 1; |
137
|
|
|
|
} |
138
|
|
|
|
|
139
|
|
|
|
sub compare_hits { |
140
|
|
2
|
0
|
my ($self, $chr, $family, $pos_hits, $neg_hits) = @_; |
141
|
|
|
|
my $neg_tot = scalar @{$neg_hits}; |
142
|
|
|
|
my $neg_i = 0; |
143
|
|
|
|
my @reciprocal; |
144
|
|
|
|
PRIMARY: for my $pos(@{$pos_hits}) { |
145
|
|
|
|
# if pos l_pos is > neg r_pos+200 |
146
|
|
|
|
while($neg_i < $neg_tot) { |
147
|
100
|
|
|
if($neg_hits->[$neg_i]->{'l_pos'} < $pos->{'r_pos'}) { |
148
|
|
|
|
$neg_i++; |
149
|
|
|
|
next; |
150
|
|
|
|
} |
151
|
50
|
|
|
if($neg_hits->[$neg_i]->{'r_pos'}+200 < $pos->{'l_pos'}) { |
152
|
|
|
|
# neg data is prior to pos data so catch up |
153
|
|
|
|
$neg_i++; |
154
|
|
|
|
next; |
155
|
|
|
|
} |
156
|
50
|
|
|
if($neg_hits->[$neg_i]->{'l_pos'}-200 > $pos->{'r_pos'}) { |
157
|
|
|
|
# neg is ahead of pos so catchup pos and re-check this entry |
158
|
|
|
|
next PRIMARY; |
159
|
|
|
|
} |
160
|
|
|
|
push @reciprocal, [$chr, $family, $pos, $neg_hits->[$neg_i]]; |
161
|
|
|
|
$neg_i++; |
162
|
|
|
|
} |
163
|
|
|
|
} |
164
|
|
|
|
return \@reciprocal; |
165
|
|
|
|
} |
166
|
|
|
|
|
167
|
|
|
|
sub clustered_struct { |
168
|
|
2
|
0
|
my ($self, $strand) = @_; |
169
|
|
|
|
my %clusters; |
170
|
|
|
|
open my $CLUSTERS, '<', $self->{$strand.'_clusters'}; |
171
|
|
|
|
while (my $cluster = <$CLUSTERS>) { |
172
|
100
|
|
|
next if(substr($cluster, 0,1) eq '#'); |
173
|
|
|
|
chomp $cluster; |
174
|
|
|
|
my ($chr, $family, $l_pos, $r_pos, @detail) = split /\t/, $cluster; |
175
|
|
|
|
push @{$clusters{$family}{$chr}}, { 'l_pos' => int $l_pos, |
176
|
|
|
|
'r_pos' => int $r_pos, |
177
|
|
|
|
'detail' => \@detail, |
178
|
|
|
|
}; |
179
|
|
|
|
} |
180
|
|
|
|
close $CLUSTERS; |
181
|
|
|
|
return \%clusters; |
182
|
|
|
|
} |
183
|
|
|
|
|
184
|
|
|
|
sub cluster_hits { |
185
|
|
2
|
1
|
my ($self) = @_; |
186
|
|
|
|
$self->cluster_file('+'); |
187
|
|
|
|
$self->cluster_file('-'); |
188
|
|
|
|
return 1; |
189
|
|
|
|
} |
190
|
|
|
|
|
191
|
|
|
|
sub cluster_file { |
192
|
|
8
|
0
|
my ($self, $strand) = @_; |
193
|
|
|
|
open my $HITS, '<', $self->{$strand.'_sorted_hits'}; |
194
|
|
|
|
my ($p_chr, $p_pos, $p_fam) = (q{}, 0, q{}); |
195
|
|
|
|
my $cluster = []; |
196
|
|
|
|
open my $CLUSTERED, '>', $self->{$strand.'_clusters'}; |
197
|
|
|
|
print $CLUSTERED '#',(join $TAB, @HEADER_ROOT, @HEADER_MAIN),$RS; |
198
|
|
|
|
while (my $line = <$HITS>) { |
199
|
|
|
|
chomp $line; |
200
|
|
|
|
my ($rn, $chr, $pos, $fam, $class) = split $TAB, $line; |
201
|
100
|
|
|
next if($fam eq 'Alu' && $class ne 'INTER_CHROM' && $self->{'paired_alu'} == 1); |
202
|
|
|
|
|
203
|
100
|
|
|
if($p_chr eq $chr && $p_fam eq $fam && ($pos - $p_pos) <= 200) { |
204
|
|
|
|
push @{$cluster}, [$rn, $chr, $pos, $class]; |
205
|
|
|
|
} |
206
|
|
|
|
else { |
207
|
|
|
|
$cluster = $self->finalise_cluster($cluster, $p_fam, $strand, $CLUSTERED); |
208
|
|
|
|
push @{$cluster}, [$rn, $chr, $pos, $class]; |
209
|
|
|
|
} |
210
|
|
|
|
($p_chr, $p_pos, $p_fam) = ($chr, $pos, $fam); |
211
|
|
|
|
} |
212
|
|
|
|
$self->finalise_cluster($cluster, $p_fam, $strand, $CLUSTERED); |
213
|
|
|
|
close $CLUSTERED; |
214
|
|
|
|
close $HITS; |
215
|
|
|
|
return 1; |
216
|
|
|
|
} |
217
|
|
|
|
|
218
|
|
|
|
sub finalise_cluster { |
219
|
|
102408
|
0
|
my ($self, $cluster, $fam, $strand, $CLUSTERED) = @_; |
220
|
100
|
|
|
if(scalar @{$cluster} >= $self->{'min_reads'}) { |
221
|
|
|
|
my %classes = map { $_ => [] } @CLASSES; |
222
|
|
|
|
for my $item(@{$cluster}) { |
223
|
|
|
|
push @{$classes{$item->[3]}}, $item->[0]; |
224
|
|
|
|
} |
225
|
|
|
|
# chr, family, l_pos, r_pos, read_count_*, read_names_* |
226
|
|
|
|
print $CLUSTERED (join $TAB, $cluster->[0]->[1], |
227
|
|
|
|
$fam, |
228
|
|
|
|
$cluster->[0]->[2], |
229
|
|
|
|
$cluster->[-1]->[2], |
230
|
|
|
|
scalar @{$classes{'SINGLE_END'}} + scalar @{$classes{'INTER_CHROM'}} + scalar @{$classes{'ABERRANT'}}, |
231
|
|
|
|
scalar @{$classes{'SINGLE_END'}}, |
232
|
|
|
|
scalar @{$classes{'INTER_CHROM'}}, |
233
|
|
|
|
scalar @{$classes{'ABERRANT'}}, |
234
|
|
|
|
${_format_readnames($classes{'SINGLE_END'})}, |
235
|
|
|
|
${_format_readnames($classes{'INTER_CHROM'})}, |
236
|
|
|
|
${_format_readnames($classes{'ABERRANT'})}, |
237
|
|
|
|
),$RS; |
238
|
|
|
|
} |
239
|
|
|
|
$cluster = []; |
240
|
|
|
|
return $cluster; |
241
|
|
|
|
} |
242
|
|
|
|
|
243
|
|
|
|
sub _format_readnames { |
244
|
|
170
|
|
my $reads = shift; |
245
|
|
|
|
my $ret_val = q{.}; |
246
|
100
|
|
|
if(defined $reads && @{$reads} > 0) { |
247
|
|
|
|
$ret_val = join ',', @{$reads}; |
248
|
|
|
|
} |
249
|
|
|
|
return \$ret_val |
250
|
|
|
|
} |
251
|
|
|
|
|
252
|
|
|
|
sub set_min_reads { |
253
|
|
6
|
1
|
my ($self, $min_reads) = @_; |
254
|
|
|
|
$self->{'min_reads'} = $min_reads || 5; |
255
|
|
|
|
return 1; |
256
|
|
|
|
} |
257
|
|
|
|
|
258
|
|
|
|
sub set_paired_alu { |
259
|
|
2
|
0
|
my ($self, $paired_alu) = @_; |
260
|
50
|
|
|
$self->{'paired_alu'} = 1 if(defined $paired_alu); |
261
|
|
|
|
return 1; |
262
|
|
|
|
} |
263
|
|
|
|
|
264
|
|
|
|
sub set_input_output { |
265
|
|
1
|
1
|
my ($self, $root_path) = @_; |
266
|
50
|
|
|
croak "$root_path does not exist." unless(-e $root_path); |
267
|
50
|
|
|
croak "$root_path is not a directory." unless(-d $root_path); |
268
|
50
|
|
|
croak "$root_path cannot be written to." unless(-w $root_path); |
269
|
|
|
|
|
270
|
|
|
|
$self->{'+_sorted_hits'} = "$root_path/pos_hits.txt"; |
271
|
|
|
|
$self->{'-_sorted_hits'} = "$root_path/neg_hits.txt"; |
272
|
|
|
|
|
273
|
|
|
|
$self->{'+_clusters'} = "$root_path/pos_clusters.txt"; |
274
|
|
|
|
$self->{'-_clusters'} = "$root_path/neg_clusters.txt"; |
275
|
|
|
|
$self->{'reciprocal_clusters'} = "$root_path/reciprocal_clusters.txt"; |
276
|
|
|
|
|
277
|
|
|
|
return 1; |
278
|
|
|
|
} |
279
|
|
|
|
|
280
|
|
|
|
|
281
|
|
|
|
1; |
282
|
|
|
|
|
283
|
|
|
|
__END__ |
284
|
|
|
|
|
285
|
|
|
|
=head1 NAME |
286
|
|
|
|
|
287
|
|
|
|
Sanger::CGP::TraFiC::Cluster |
288
|
|
|
|
|
289
|
|
|
|
=head1 SYNOPSIS |
290
|
|
|
|
|
291
|
|
|
|
my $cluster = Sanger::CGP::TraFiC::Cluster->new; |
292
|
|
|
|
|
293
|
|
|
|
$cluster->set_input_output($output_folder); |
294
|
|
|
|
# which should already contain data from Formatter.pm |
295
|
|
|
|
|
296
|
|
|
|
$cluster->set_min_reads($minimum_reads_required); |
297
|
|
|
|
$cluster->cluster_hits; |
298
|
|
|
|
$cluster->reciprocal_clusters; |
299
|
|
|
|
|
300
|
|
|
|
=head1 GENERAL |
301
|
|
|
|
|
302
|
|
|
|
Take the merged repeat-masker *.out and fasta data generated by Formatter.pm and generate clusters |
303
|
|
|
|
of nearby/overlapping events. |
304
|
|
|
|
|
305
|
|
|
|
Generates three files: |
306
|
|
|
|
|
307
|
|
|
|
=over |
308
|
|
|
|
|
309
|
|
|
|
=item pos_clusters.txt |
310
|
|
|
|
|
311
|
|
|
|
Data for reads anchored on positive strand that are able to be clustered with greater or equal to |
312
|
|
|
|
the value of I<min_reads>. |
313
|
|
|
|
|
314
|
|
|
|
=item neg_clusters.txt |
315
|
|
|
|
|
316
|
|
|
|
Data for reads anchored on negative strand that are able to be clustered with greater or equal to |
317
|
|
|
|
the value of I<min_reads>. |
318
|
|
|
|
|
319
|
|
|
|
=item reciprocal_clusters.txt |
320
|
|
|
|
|
321
|
|
|
|
Where entries in pos/neg_clusters.txt support each other they are additionally output here. The |
322
|
|
|
|
pos_cluster is always the primary entry. |
323
|
|
|
|
|
324
|
|
|
|
=back |
325
|
|
|
|
|
326
|
|
|
|
=head2 Format of output |
327
|
|
|
|
|
328
|
|
|
|
The output files all follow the same format |
329
|
|
|
|
|
330
|
|
|
|
=head3 Core fields |
331
|
|
|
|
|
332
|
|
|
|
These are present at the start of each line in every file. |
333
|
|
|
|
|
334
|
|
|
|
=over |
335
|
|
|
|
|
336
|
|
|
|
=item CHR |
337
|
|
|
|
|
338
|
|
|
|
Chromosome or sequence identifier. |
339
|
|
|
|
|
340
|
|
|
|
=item FAMILY |
341
|
|
|
|
|
342
|
|
|
|
Masking identified reads as hitting repeat of this type. |
343
|
|
|
|
|
344
|
|
|
|
=back |
345
|
|
|
|
|
346
|
|
|
|
=head3 Data fields |
347
|
|
|
|
|
348
|
|
|
|
These are present in the pos/neg_clusters.txt files as described here. |
349
|
|
|
|
|
350
|
|
|
|
In the reciprocal_clusters.txt file these are presented twice, the positive cluster first (prefixed B<P_>) |
351
|
|
|
|
and then the negative cluster data (prefixed B<N_>). |
352
|
|
|
|
|
353
|
|
|
|
=over |
354
|
|
|
|
|
355
|
|
|
|
=item L_POS |
356
|
|
|
|
|
357
|
|
|
|
Left most position of cluster. |
358
|
|
|
|
|
359
|
|
|
|
=item R_POS |
360
|
|
|
|
|
361
|
|
|
|
Left most position of cluster. |
362
|
|
|
|
|
363
|
|
|
|
=item TOTAL_READS |
364
|
|
|
|
|
365
|
|
|
|
Total number of reads that support this cluster |
366
|
|
|
|
|
367
|
|
|
|
=item SINGLE_END_COUNT |
368
|
|
|
|
|
369
|
|
|
|
Number of single end mapped reads that contribute to this cluster. |
370
|
|
|
|
|
371
|
|
|
|
=item INTER_CHROM_COUNT |
372
|
|
|
|
|
373
|
|
|
|
Number of inter-chromosomal mapped reads that contribute to this cluster. |
374
|
|
|
|
|
375
|
|
|
|
=item ABERRANT_COUNT |
376
|
|
|
|
|
377
|
|
|
|
Number of aberrantly paired reads that contribute to this cluster. |
378
|
|
|
|
|
379
|
|
|
|
=item SINGLE_END_READS |
380
|
|
|
|
|
381
|
|
|
|
Names of reads contributing to I<SINGLE_END_COUNT>. |
382
|
|
|
|
|
383
|
|
|
|
=item INTER_CHROM_READS |
384
|
|
|
|
|
385
|
|
|
|
Names of reads contributing to I<INTER_CHROM_COUNT>. |
386
|
|
|
|
|
387
|
|
|
|
=item ABERRANT_READS |
388
|
|
|
|
|
389
|
|
|
|
Names of reads contributing to I<ABERRANT_COUNT>. |
390
|
|
|
|
|
391
|
|
|
|
=back |
392
|
|
|
|
|
393
|
|
|
|
=head1 METHODS |
394
|
|
|
|
|
395
|
|
|
|
=head2 Constructor/configuration |
396
|
|
|
|
|
397
|
|
|
|
=head3 new |
398
|
|
|
|
|
399
|
|
|
|
No options, sets up object with default values. |
400
|
|
|
|
|
401
|
|
|
|
=head3 set_input_output |
402
|
|
|
|
|
403
|
|
|
|
=over |
404
|
|
|
|
|
405
|
|
|
|
=item Description |
406
|
|
|
|
|
407
|
|
|
|
Define the path to the input files (generated by L<Sanger::CGP::TraFiC::Formatter>). The output will |
408
|
|
|
|
be written to this area as it is expected for these file to be generated in a single step. |
409
|
|
|
|
|
410
|
|
|
|
=item Args |
411
|
|
|
|
|
412
|
|
|
|
Path to output of L<Sanger::CGP::TraFiC::Formatter> |
413
|
|
|
|
|
414
|
|
|
|
=back |
415
|
|
|
|
|
416
|
|
|
|
=head3 set_min_reads |
417
|
|
|
|
|
418
|
|
|
|
=over |
419
|
|
|
|
|
420
|
|
|
|
=item Description |
421
|
|
|
|
|
422
|
|
|
|
Define the minimum number of reads required to generate a cluster. Defaults to 5. |
423
|
|
|
|
|
424
|
|
|
|
=item Args |
425
|
|
|
|
|
426
|
|
|
|
Integer, number of reads that must exist within a cluster. |
427
|
|
|
|
|
428
|
|
|
|
=back |
429
|
|
|
|
|
430
|
|
|
|
=head2 Processing |
431
|
|
|
|
|
432
|
|
|
|
=head3 cluster_hits |
433
|
|
|
|
|
434
|
|
|
|
=over |
435
|
|
|
|
|
436
|
|
|
|
=item Description |
437
|
|
|
|
|
438
|
|
|
|
Load the hit files, cluster and output in L<cluster format|Format of output>. |
439
|
|
|
|
|
440
|
|
|
|
=item Note |
441
|
|
|
|
|
442
|
|
|
|
No arguments as paths are determined at higher level. |
443
|
|
|
|
|
444
|
|
|
|
=back |
445
|
|
|
|
|
446
|
|
|
|
=head3 reciprocal_clusters |
447
|
|
|
|
|
448
|
|
|
|
=over |
449
|
|
|
|
|
450
|
|
|
|
=item Description |
451
|
|
|
|
|
452
|
|
|
|
Compare the clustered data for pos and neg data identifying reciprocal entities. A reciprocal output |
453
|
|
|
|
file is generated. See L<formatting section|Format of output>. |
454
|
|
|
|
|
455
|
|
|
|
=item Note |
456
|
|
|
|
|
457
|
|
|
|
No arguments as paths are determined at higher level. |
458
|
|
|
|
|
459
|
|
|
|
=back |