File Coverage

lib/Sanger/CGP/TraFiC/Implement.pm
Criterion Covered Total %
branch 0 68 0.0
subroutine 13 24 54.1
pod 0 10 0.0
total 13 102 12.7


line bran sub pod code
1       package Sanger::CGP::TraFiC::Implement;
2        
3       ##########LICENCE##########
4       # Copyright (c) 2015 Genome Research Ltd.
5       #
6       # Author: Cancer Genome Project cgpit@sanger.ac.uk
7       #
8       # This file is part of TraFiC.
9       #
10       # TraFiC is free software: you can redistribute it and/or modify it under
11       # the terms of the GNU Affero General Public License as published by the Free
12       # Software Foundation; either version 3 of the License, or (at your option) any
13       # later version.
14       #
15       # This program is distributed in the hope that it will be useful, but WITHOUT
16       # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17       # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
18       # details.
19       #
20       # You should have received a copy of the GNU Affero General Public License
21       # along with this program. If not, see <http://www.gnu.org/licenses/>.
22       #
23       # 1. The usage of a range of years within a copyright statement contained within
24       # this distribution should be interpreted as being equivalent to a list of years
25       # including the first and last year specified and all consecutive years between
26       # them. For example, a copyright statement that reads 'Copyright (c) 2005, 2007-
27       # 2009, 2011-2012' should be interpreted as being identical to a statement that
28       # reads 'Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012' and a copyright
29       # statement that reads "Copyright (c) 2005-2012' should be interpreted as being
30       # identical to a statement that reads 'Copyright (c) 2005, 2006, 2007, 2008,
31       # 2009, 2010, 2011, 2012'."
32       ##########LICENCE##########
33        
34   1   use strict;
35   1   use warnings FATAL => 'all';
36   1   use English qw( -no_match_vars );
37   1   use autodie qw(:all);
38   1   use Const::Fast qw(const);
39   1   use File::Which qw(which);
40   1   use File::Path qw(make_path remove_tree);
41   1   use File::Temp qw(tempfile);
42   1   use FindBin qw($Bin);
43   1   use File::Basename qw(fileparse);
44        
45   1   use Sanger::CGP::TraFiC;
46        
47   1   use PCAP::Threaded;
48   1   use PCAP::Bam;
49        
50       const my $CAND_READS => q{ -c %s -o %s -i %s};
51       const my $CAND_ORPH => q{ -i %s};
52       const my $CAND_SPLIT => q{ -o %s -i %s};
53       const my $CLUSTER => q{ -r %s/rm/%%/%s.%%.out -f %s/split/%s.%% -m %s -o %s};
54        
55       ### the core work
56        
57       sub select_candidate_reads {
58   0 0 my ($index, $options) = @_;
59 0     return 1 if(exists $options->{'index'} && $index != $options->{'index'});
60        
61       my $tmp = $options->{'tmp'};
62 0     return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), $index);
63        
64       my @inputs = ($options->{'tumour'}, $options->{'normal'});
65       my $iter = 1;
66       for my $input(@inputs) {
67       for my $chr(@{$options->{'ordered_seqs'}}) {
68 0     next if($iter++ != $index); # skip to the relevant input in the list
69        
70       ## build command for this index
71       #
72        
73       my $sample = sanitised_sample_from_bam($input);
74       my $output = File::Spec->catdir($tmp, 'candidates', $sample);
75 0     make_path($output) unless(-e $output);
76        
77       my $command = "$^X ";
78       $command .= _which('TraFiC_candReads.pl');
79       $command .= sprintf $CAND_READS, $chr, $output, $input;
80 0     if(exists $options->{'badloci'}) {
81       $command .= ' -e '.$options->{'badloci'};
82       }
83        
84       PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, $index);
85        
86       #
87       ## The rest is auto-magical
88       PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), $index);
89       }
90       }
91       return 1;
92       }
93        
94       sub merge_select_orphans {
95   0 0 my ($index, $options) = @_;
96 0     return 1 if(exists $options->{'index'} && $index != $options->{'index'});
97        
98       my $tmp = $options->{'tmp'};
99 0     return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), $index);
100        
101       my @inputs = ($options->{'tumour'}, $options->{'normal'});
102       my $iter = 1;
103       for my $input(@inputs) {
104 0     next if($iter++ != $index); # skip to the relevant input in the list
105        
106       ## build command for this index
107       #
108        
109       my $sample = sanitised_sample_from_bam($input);
110       my $inputdir = File::Spec->catdir($tmp, 'candidates', $sample);
111        
112       my $command = "$^X ";
113       $command .= _which('TraFiC_candOrph.pl');
114       $command .= sprintf $CAND_ORPH, $inputdir;
115        
116       PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, $index);
117        
118       #
119       ## The rest is auto-magical
120       PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), $index);
121       }
122       return 1;
123       }
124        
125       sub split_candidate_reads {
126   0 0 my ($index, $options) = @_;
127 0     return 1 if(exists $options->{'index'} && $index != $options->{'index'});
128        
129       my $tmp = $options->{'tmp'};
130 0     return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), $index);
131        
132       my @inputs = ($options->{'tumour'}, $options->{'normal'});
133       my $iter = 1;
134       for my $input(@inputs) {
135 0     next if($iter++ != $index); # skip to the relevant input in the list
136        
137       ## build command for this index
138       #
139        
140       my $sample = sanitised_sample_from_bam($input);
141       my $inputdir = File::Spec->catdir($tmp, 'candidates', $sample);
142       my $outdir = File::Spec->catdir($tmp, 'split');
143 0     make_path($outdir) unless(-e $outdir);
144        
145       my $command = "$^X ";
146       $command .= _which('TraFiC_candSplit.pl');
147       $command .= sprintf $CAND_SPLIT, File::Spec->catfile($outdir, $sample.'.'), $inputdir;
148        
149       PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, $index);
150        
151       #
152       ## The rest is auto-magical
153       PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), $index);
154       }
155       return 1;
156       }
157        
158       sub repeat_mask_te {
159   0 0 my ($index, $options) = @_;
160 0     return 1 if(exists $options->{'index'} && $index != $options->{'index'});
161       my $tmp = $options->{'tmp'};
162 0     return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), $index);
163        
164       my $infile = $options->{'split_files'}->[$index-1];
165        
166       # build command
167        
168       my $rm_outdir = File::Spec->catdir($tmp, 'rm', $index);
169 0     if(-e $rm_outdir){
170       remove_tree($rm_outdir); # always cleanup any failed attempt first
171       }
172       make_path($rm_outdir);
173        
174       my $n_name = $options->{'normal_name'};
175       my $t_name = $options->{'tumour_name'};
176       my $file_sample = fileparse($infile);
177        
178       my $divergence;
179       # files are name sorted so index won't line up
180 0     if($file_sample =~ m/^$n_name[.][[:digit:]]+$/) {
  0      
181       $divergence = $options->{'ndiv'};
182       }
183       elsif($file_sample =~ m/^$t_name[.][[:digit:]]+$/) {
184       $divergence = $options->{'tdiv'};
185       }
186       else {
187       die "Unexpected file name: $file_sample\n";
188       }
189        
190       my $repeatmasker;
191 0     if(defined $options->{'rm'}) {
192       $repeatmasker = $options->{'rm'};
193       }
194       else {
195       $repeatmasker = _which('RepeatMasker');
196       }
197 0     die "Unable to find RepeatMasker binary, try setting '-rm'" unless(defined $repeatmasker);
198        
199       my $command = "cd $rm_outdir;"; # you can set -dir, but tmp files written to cwd
200       $command .= $repeatmasker;
201       $command .= " -no_is -nolow";
202       $command .= " -lib $options->{maskdb}";
203       $command .= " -e $options->{engine}";
204       $command .= " -norna $options->{accuracy}";
205       $command .= " -frag 2500000";
206       $command .= " -div $divergence"; # should this be configurable?
207       $command .= " -dir $rm_outdir";
208       $command .= " $infile";
209        
210       PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, $index);
211       PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), $index);
212        
213       my $basefile = "$rm_outdir/$file_sample.*";
214       system('rm', '-f', "$basefile.cat.gz", "$basefile.log" , "$basefile.masked", "$basefile.tbl");
215        
216       return 1;
217       }
218        
219       sub cluster_te {
220   0 0 my ($index, $options) = @_;
221 0     return 1 if(exists $options->{'index'} && $index != $options->{'index'});
222       my $tmp = $options->{'tmp'};
223 0     return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), $index);
224        
225       my $sample;
226 0     if($index == 1) {
  0      
227       $sample = $options->{'tumour_name'};
228       }
229       elsif($index == 2) {
230       $sample = $options->{'normal_name'};
231       }
232        
233       my $cluster_outdir = File::Spec->catdir($tmp, 'clusters', $sample);
234 0     if(-e $cluster_outdir){
235       remove_tree($cluster_outdir); # always cleanup any failed attempt first
236       }
237       make_path($cluster_outdir);
238        
239       my $command = "$^X ";
240       $command .= _which('TraFiC_cluster.pl');
241       $command .= sprintf $CLUSTER,
242       $tmp, $sample, # repeatmaster.out
243       $tmp, $sample, # repeatmasker in modified fasta
244       $options->{'support'},
245       $cluster_outdir;
246 0     $command .= ' -p' if($sample eq $options->{'tumour_name'});
247        
248        
249       PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, $index);
250       PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), $index);
251       return 1;
252       }
253        
254       sub filter_results {
255   0 0 my $options = shift;
256       my $tmp = $options->{'tmp'};
257 0     return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0);
258        
259       my $clusters_tumour = File::Spec->catdir($tmp, 'clusters', $options->{'tumour_name'});
260       my $clusters_normal = File::Spec->catdir($tmp, 'clusters', $options->{'normal_name'});
261        
262       my $np;
263 0     if(exists $options->{'germline'}) {
264       $np = $options->{'germline'}
265       }
266       else {
267       $np = File::Spec->catfile($tmp, 'faked_np.txt');
268       open my $NPFH, '>', $np;
269       print $NPFH "#\n";
270       close $NPFH;
271       }
272        
273       my $command = "$^X ";
274       $command .= _which('TraFiC_filter.pl');
275       $command .= " -o $options->{outdir}";
276       $command .= ' -i '.File::Spec->catfile($clusters_tumour, 'reciprocal_clusters.txt');
277       $command .= ' -f '.File::Spec->catfile($clusters_normal, 'pos_clusters.txt');
278       $command .= ' -f '.File::Spec->catfile($clusters_normal, 'neg_clusters.txt');
279       $command .= " -g $np";
280 0     if(exists $options->{'known'}) {
281       $command .= " -t $options->{known}";
282       $command .= " -d $options->{div}";
283       }
284       $command .= " -fm $options->{filtmin}";
285       $command .= " -gm $options->{germmin}";
286        
287       PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, 0);
288        
289 0     unless(exists $options->{'germline'}) {
290       unlink $np;
291       }
292        
293       PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 0);
294       return 1;
295       }
296        
297       ### Utility methods
298        
299       sub prepare {
300   0 0 my $options = shift;
301       $options->{'tumour_name'} = (PCAP::Bam::sample_name($options->{'tumour'}))[0];
302       $options->{'normal_name'} = (PCAP::Bam::sample_name($options->{'normal'}))[0];
303       return 1;
304       }
305        
306       sub sanitised_sample_from_bam {
307   0 0 my $sample = (PCAP::Bam::sample_name(shift))[0];
308       $sample =~ s/[^a-z0-9_-]/_/ig; # sanitise sample name
309       return $sample;
310       }
311        
312       sub _which {
313   0   my $prog = shift;
314       my $l_bin = $Bin;
315       my $path = File::Spec->catfile($l_bin, $prog);
316 0     $path = which($prog) unless(-e $path);
317       return $path;
318       }
319        
320       sub get_sequences {
321   0 0 my $bam_file = shift;
322       my $bam = PCAP::Bam->new($bam_file);
323       my @ordered_sq = sort @{$bam->header_sq};
324       return \@ordered_sq;
325       }
326        
327       sub split_files {
328   0 0 my $tmp_folder = shift;
329       my @files;
330       my $indir = File::Spec->catdir($tmp_folder, 'split');
331 0     if(-e $indir) {
332 0     opendir(my $dh, $indir) || die "Unable to read from $indir: $OS_ERROR";
333       while(my $thing = readdir $dh) {
334 0     next if($thing =~ m/^[.]/);
335 0     push @files, "$indir/$thing" if($thing =~ m/.*[.][[:digit:]]+$/);
336       }
337       closedir($dh);
338       }
339       @files = sort @files; # make sure these are always in same order
340       return \@files;
341       }
342        
343       1;