#!/usr/bin/env perl use warnings; use strict; use Getopt::Long qw(:config auto_help pass_through); use POSIX qw/ceil/; my $covAdjust = 5; my $printedHeader = 0; # false my $accuracyDP = 0; my $DCOnly = 0; # false my $threshold = 0; # Log2 threshold to report DC (otherwise DC is set to 0) my $header = 1; # true my $includeSkip = 0; # false my $window = 1; # coverage window size (in bp) my $verbosity = 0; # verbosity level my $compare = ""; GetOptions("adjustment=i" => $covAdjust, "header!" => $header, "compare=s" => \$compare, "dpaccuracy=i" => \$accuracyDP, "threshold=f" => \$threshold, "onlydc!" => \$DCOnly, "includeSkip!" => \$includeSkip, "window=i" => \$window, "verbose!" =>\$verbosity) or die("Error in command line arguments"); my @comp1 = (); my @comp2 = (); my @comps = (); my @adjCovs = (); my @lastCovs = (); my @startPoss = (); my @endPoss = (); my @refNames = (); my @descLines = (); my $lastDescRef = ""; my $lastCovLine = ""; my $lastPrint = ""; my $line = ""; my $lastLine = 0; # false my $nextLine = ""; my $changed = 0; # false my $dbgFill = " "; while($line = <>){ if($lastLine){ if($verbosity > 0){ print("${dbgFill}Last line!\n"); } } chomp $line; if(!$line){ next; } if($verbosity > 0){ print("${dbgFill}Processing: '${line}'\n"); } ##print("[preprocessed] lastPos: '${lastPos}'; startPos: '${startPos}';". ## " adjCovs: '".join(",", @adjCovs)."'\n"); ## Parse the line my ($refName, $pos, $refAllele, $cov, $bases, $qual, $rest) = split(/\t/, $line, 7); my $skip = ($bases =~ tr/<>//); push(@startPoss, $pos); push(@endPoss, $pos); push(@refNames, $refName); # print("${dbgFill}startPoss[$#startPoss] => '${pos}'\n"); # print("${dbgFill}startPoss: ".join(";", @startPoss)."\n"); ## If ref has changed, or window exceeded, reset coverage array $changed = (($lastLine) || (scalar(@startPoss) > 1) && (($refNames[$#refNames] ne $refNames[$#refNames-1]) || ($startPoss[$#startPoss] - $startPoss[$#startPoss-1] >= $window))); if(!$changed && (scalar(@startPoss) > 1)){ if($verbosity > 0){ print("${dbgFill} Unchanged; startPoss[$#startPoss] => X\n"); } pop(@startPoss); pop(@endPoss); $endPoss[$#endPoss] = $pos; # still within window; update end location pop(@refNames); if($verbosity > 0){ print("${dbgFill} startPoss: ".join(";", @startPoss)."\n"); print("${dbgFill} endPoss: ".join(";", @endPoss)."\n"); print("${dbgFill} refNames: ".join(";", @refNames)."\n"); print("${dbgFill} adjCovs: ".join(";", @adjCovs)."\n"); } } ## Repopulate the coverage array; storing the previous result $lastCovs[0] = $adjCovs[0]; $adjCovs[0] = (($changed || (scalar(@adjCovs) == 0)) ? 0 : $adjCovs[0]) + ($cov - $skip); my $covPos = 1; while($rest){ ($cov, $bases, $qual, $rest) = split(/\t/, $rest, 4); $skip = ($bases =~ tr/<>//); $lastCovs[$covPos] = $adjCovs[$covPos]; $adjCovs[$covPos] = ($changed ? 0 : $adjCovs[$covPos]) + ($cov - $skip); $covPos++; } if($verbosity > 0){ print("${dbgFill} adjCovs: ".join(";", @adjCovs)."\n"); print("${dbgFill} lastCovs: ".join(";", @lastCovs)."\n"); } if($changed){ ## window has changed; see if anything is different if($verbosity > 0){ print("${dbgFill}Changed; seeing if printing is needed\n"); print("${dbgFill} lastCovs: ".join(";", @lastCovs)."\n"); } ## set up comparison array and header (if needed) if (!$printedHeader) { if (!$compare) { for (my $x = 0; $x < $#lastCovs; $x++) { for (my $y = $x+1; $y <= $#lastCovs; $y++) { push(@comp1, $x+1); push(@comp2, $y+1); push(@comps, sprintf("%d,%d", $x+1, $y+1)); } } } else { while ($compare =~ s/([0-9]+?)[-,]([0-9]+?)[;,\s]?//) { push(@comp1, $1); push(@comp2, $2); push(@comps, "${1},${2}"); } } if ($header) { if ($DCOnly) { print(join("\t", "##Ref", "Start", "End", @comps)."\n"); } else { print(join("\t", "##Ref", "Start", "End", (1..($#lastCovs+1)), @comps)."\n"); } } $printedHeader = 1; # true } ## Calculate differential coverage my @DCov = (); for(my $i = 0; $i <= $#comp1; $i++){ my $x = $comp1[$i]-1; my $y = $comp2[$i]-1; my $val = (log($lastCovs[$x]+$covAdjust) - log($lastCovs[$y]+$covAdjust)) / log(2); push(@DCov, sprintf("%0.${accuracyDP}f", abs($val) < $threshold ? 0 : $val)); } ## normalise coverage to account for different window sizes if($verbosity > 0){ print("${dbgFill} adjusting coverage for $startPoss[0]..$endPoss[0]\n"); } my $bases = $endPoss[$#endPoss-1] - $startPoss[$#startPoss-1] + 1; my @unmodCovs = @lastCovs; if($window > 1){ map {$_ = sprintf("%0.2f", $_ / (ceil($bases / $window) * $bases)); s/(\.([0-9]*?))0+$/$1/;s/\.$//} @lastCovs; } ## Create description line for coverage / differential coverage my $descLine = ($DCOnly) ? join("\t", @DCov) : join("\t", @lastCovs, @DCov); $descLine =~ s/-0/0/g; # print("${dbgFill}storing line [1]: '${descLine}'\n"); push(@descLines, $descLine); if($verbosity > 0){ print("${dbgFill} startPoss: ".join(";", @startPoss)."\n"); print("${dbgFill} endPoss: ".join(";", @endPoss)."\n"); print("${dbgFill} refNames: ".join(";", @refNames)."\n"); print("${dbgFill} descLines: ".join(";", @descLines)."\n"); } if($lastLine || ($refNames[0] ne $refNames[$#refNames]) || ($descLines[0] ne $descLines[$#descLines])){ # print("${dbgFill}[1] Printing...\n"); my $startPos = shift(@startPoss); my $endPos = shift(@endPoss); my $startRef = shift(@refNames); my $lastDescLine = shift(@descLines); # print sequence (if any) if(defined($lastDescLine)){ if($lastLine){ $endPos = shift(@endPoss); } print(join("\t", $startRef, $startPos-1, $endPos, $lastDescLine)."\n"); } } elsif(scalar(@descLines) > 1) { # print("${dbgFill}[3] Merging record\n"); # print("${dbgFill} descLines: ".join(";", @descLines)."\n"); # print("${dbgFill}startPos[0]: $startPoss[1] => $startPoss[0]\n"); my $startPos = shift(@startPoss); my $endPos = shift(@endPoss); my $startRef = shift(@refNames); my $lastDescLine = shift(@descLines); $startPoss[0] = $startPos; # print("${dbgFill} startPoss: ".join(";", @startPoss)."\n"); # print("${dbgFill} endPoss: ".join(";", @endPoss)."\n"); # print("${dbgFill} refNames: ".join(";", @refNames)."\n"); # print("${dbgFill} descLines: ".join(";", @descLines)."\n"); ## end pos needs to be updated } } if(!$lastLine){ ## hack to make sure the last line can be processed using the same code as above if(!($nextLine = <>)){ $lastLine = 1; } else { $line = $nextLine; } redo; } }