<?php

function get_tempdir() {

	 	$tripal_db_dir='/tripal-workdir';
    	$TEMPDIR=$tripal_db_dir . "/tripal_db/tmp/";

        return $tripal_db_dir . "/tripal_db/tmp/";
}



function filter_regions_by_ref($regions, $ref) {

	$includereg=array();
	if ($ref=='cs10') {
		foreach (explode(",", $regions)  as $reg ) {
			if ( substr( $reg, 0, 3 ) === "NC_" ||  substr( $reg, 0, 3 ) === "NW_") {
				$includereg[]=$reg;
			}  
		} 
	}
	elseif ($ref=='pkv5') {
		foreach (explode(",", $regions)  as $reg ) {
			if ( substr( $reg, 0, 7 ) === "CM01079" ||  substr( $reg, 0, 5 ) === "AGQN0") {
				$includereg[]=$reg;
			}  
		} 
	}
	elseif ($ref=='fnv2') {
		foreach (explode(",", $regions)  as $reg ) {
			if ( substr( $reg, 0, 6 ) === "CM0116" ||  substr( $reg, 0, 7 ) === "QKVJ020") {
				$includereg[]=$reg;
			}  
		} 
	}

	if($regions=='list') {
		return "list";
	}
	//echo "<BR>" . $ref . " -> " . implode(",", $includereg);
	return implode(",", $includereg);
}


function mergevcf2table($vcfs, $regions, $ref, $bcffilters) {

    $tripal_db_dir='/tripal-workdir';
    $TEMPDIR=$tripal_db_dir . "/tripal_db/tmp/";
    $GATKPATH=$tripal_db_dir ."/tripal_db/gatk-4.2.6.1/gatk";
    $BCFPATH=$tripal_db_dir ."/tripal_db/samtools-bcftools-htslib-1.0_x64-linux/bin/bcftools";
    $TABIXPATH=$tripal_db_dir ."/tripal_db/samtools-bcftools-htslib-1.0_x64-linux/bin/tabix";

 	file_put_contents( $TEMPDIR .   'mergevcf2table', "vcf:" . "\n" . $vcfs  . "\n\nregions=" . $regions . "\n\nref=" . $ref );

	$cmd="";
	$cmds=array();


	try {
		$tmpfiles=array();
		$tmpfilessorted=array();
		$tmpfiles2regions = array();
		$execout=array();
		$ivcf=0;
		foreach ($vcfs as $vcf) {

			$tmpfile=tempnam($TEMPDIR, 'mergevcf2table_') . ".vcf.gz" ;


			$tmpfiles[]=$tmpfile;
			$cmd="";
			if ($regions=='list') {
				$tmpfiles2regions[$tmpfile]='list';
				$cmd= $BCFPATH . ' view -Oz ';
				//$cmd="cp " . $vcf . " " . $tmpfile;
			} else {
				$tmpf=filter_regions_by_ref($regions,$ref);
				$tmpfiles2regions[$tmpfile]=$tmpf;
				$cmd= $BCFPATH . ' view -Oz -r ' . $tmpfiles2regions[$tmpfile];
			}

		
			$bcffilter=$bcffilters[$ivcf];
			//$tmpfiles[]=$tmpfile;
			//$cmd= $BCFPATH . ' view -Oz -r ' . $tmpfiles2regions[$tmpfile];
			if( count($bcffilter)>0) {
				$cmd .= " -i '" . implode(" && ", $bcffilter) . "' ";
			}
			$cmd .= ' -o ' . $tmpfile . ' ' . $vcf;
			//$cmd= $BCFPATH . ' view -Oz -r ' . $regions . ' -o ' . $tmpfile . ' ' . $vcf;
			$cmds[]=$cmd;
			exec($cmd, $execout);
			#$cmd= $TABIXPATH . ' ' . $tmpfile;

			$tmpfilesorted=str_replace('.vcf.gz',".sorted.vcf.gz",$tmpfile);
			$cmd=$GATKPATH . ' SortVcf --CREATE_INDEX true -I ' . $tmpfile . ' -O ' .  $tmpfilesorted;
			$tmpfilessorted[]=$tmpfilesorted;
			//$tmpfiles2regions[$tmpfile]=$tmpfilesorted;


			$cmds[]=$cmd;
			exec($cmd);
			$ivcf=$ivcf+1;
		}

		//exec($BCFPATH . ' merge ' . implode($tmpfile,' ')

		if (count($tmpfilessorted)>1) {
			$tmpfile=tempnam($TEMPDIR, 'mergevcf2table_') . ".vcf.gz" ;
			$cmd= $BCFPATH . ' merge -Oz -o ' .  $tmpfile . ' ' .  implode(' ', $tmpfilessorted);
			$tmpfiles[]=$tmpfile;
			$cmds[]=$cmd;
			exec($cmd);
			#$cmd= $TABIXPATH . ' ' . $tmpfile;
                        
			$tmpfilesorted=str_replace('.vcf.gz',".sorted.vcf.gz",$tmpfile);
                        $cmd=$GATKPATH . ' SortVcf --CREATE_INDEX true -I ' . $tmpfile . ' -O ' .  $tmpfilesorted;
                        $tmpfilessorted[]=$tmpfilesorted;

			$cmds[]=$cmd;
			exec($cmd);

			if ($regions=='list') {
				$cmd=$GATKPATH . ' VariantsToTable  -V ' .$tmpfilessorted . ' -O /dev/stdout -F CHROM -F POS -F REF -F ALT -GF GT';
			} else {
				$cmd=$GATKPATH . ' VariantsToTable  -V ' .$tmpfilessorted .  ' -L ' . implode( explode(',', $regions),' -L ') . ' -O /dev/stdout -F CHROM -F POS -F REF -F ALT -GF GT';
			}
		} else {
			if($tmpfiles2regions[$tmpfiles[0]]=='list' ) {
				$cmd=$GATKPATH . ' VariantsToTable  -V ' . $tmpfilessorted[0] . ' -O /dev/stdout -F CHROM -F POS -F REF -F ALT -GF GT';
			} else {
				$cmd=$GATKPATH . ' VariantsToTable  -V ' . $tmpfilessorted[0] .  ' -L ' . implode( explode(',',   $tmpfiles2regions[$tmpfiles[0]]),' -L ') . ' -O /dev/stdout -F CHROM -F POS -F REF -F ALT -GF GT';
				//$cmd=$GATKPATH . ' VariantsToTable  -V ' . $tmpfilessorted[0] .  ' -L ' . implode( explode(',',   $regions),' -L ') . ' -O /dev/stdout -F CHROM -F POS -F REF -F ALT -GF GT';
			}
		}

		$cmds[]=$cmd;
		ob_start();
		passthru($cmd);
		$table=ob_get_clean();

		foreach ($tmpfiles as $tmp) {
			//$cmd='rm ' . $tmp;
			//$cmds[]=$cmd;
			//exec($cmd);
		}
               foreach ($tmpfilessorted as $tmp) {
                        //$cmd='rm ' . $tmp;
                        //$cmds[]=$cmd;
                        //exec($cmd);
                }

		file_put_contents( $TEMPDIR .   'execout', implode($execout,"\n"));
		file_put_contents( tempnam($TEMPDIR, 'gatkcmdsfile_'), "cmds:" . "\n" . implode($cmds,"\n") );

		//return implode($cmds,"\n") . "\n" . $table;
		return  $table;

	}  catch(Exception $e) {
		throw new Exception("mergevcf2table exception: " . $e->getMessage() . "\nlast cmd:" . $cmd ) ;
	}

	return implode($cmds,"\n");

}


function get_snp_summary() {
	return [['ref' => 'cs10','snpds' => '7ds'], ['ref' => 'cs10','snpds' => '21trichs'],  ['ref' => 'pkv5','snpds' => '6ds'], ['ref' => 'pkv5','snpds' => '21trichs'], ['ref' => 'fnv2','snpds' => '6ds'], ['ref' => 'fnv2','snpds' => '21trichs']];
}

/*
	Mainb SQP query, retunr table of snps,samples,call
*/

function simple_slim_api_variant_list($request,$args) {


	$tripal_db_dir='/transfer';
	//$tripal_db_dir='/tripal_db/vcfs';

	$limit=$request->getQueryParam('limit', $default = '200');
	$datasets= explode(",", $request->getQueryParam('snpds', $default = '7ds'));
	$ref=$request->getQueryParam('ref', $default = 'cs10');
	$regions=$args['path_var'];

	$bcffilter=[];

	$fmissing_lt=$request->getQueryParam('fmissing_lt', $default = 'none');

	$vcfpaths=array();
	$bcffilters=array();

	foreach ($datasets as $dataset) {
                if ($dataset=='6ds') {
                        switch ($ref) {
                                case 'cs10':
                                        $vcfpaths[]=$tripal_db_dir . "/cs10-wgs6ds-allsnps.vcf.gz";
                                        //$vcfpaths[]=$tripal_db_dir . "/cs10-wgs7ds-genomicsdb-allsnps.vcf.gz";

                                        break;
                                case 'pkv5':
                                        $vcfpaths[]=$tripal_db_dir . "/pkv5-wgs6ds-allsnps.vcf.gz"; break;
                                case 'fnv2':
                                        $vcfpaths[]=$tripal_db_dir . "/fnv2-wgs6ds-allsnps.vcf.gz"; break;
                                default:
                                        # code...
                                        break;
                        }

                } 
                elseif ($dataset=='7ds') {
                        switch ($ref) {
                                case 'cs10':
                                        //$vcfpaths[]=$tripal_db_dir . "/cs10-wgs6ds-allsnps.vcf.gz";
                                        $vcfpaths[]=$tripal_db_dir . "/cs10-wgs7ds-genomicsdb-gatk4170.allsnpsonly-bcftools.filltags.vcf.gz"; //"/cs10-wgs7ds-genomicsdb-allsnps.vcf.gz";

                                        break;
                                case 'pkv5':
                                        $vcfpaths[]=$tripal_db_dir . "/pkv5-wgs7ds-genomicsdb-allsnps.vcf.gz"; break;
                                case 'fnv2':
                                        $vcfpaths[]=$tripal_db_dir . "/fnv2-wgs7ds-genomicsdb-allsnps.vcf.gz"; break;
                                default:
                                        # code...
                                        break;
                        }

                }
		elseif ($dataset=='5ds') {
                        switch ($ref) {
                                case 'cs10':
                                        $vcfpaths[]=$tripal_db_dir . "/gather-select-cs10-pbgenotypegbcfall-renamed-allsnps.vcf.gz";
                                        break;
                                case 'pkv5':
                                        $vcfpaths[]=$tripal_db_dir . "/gather-select-pkv5-pbgenotypegbcfallchr-renamed-allsnps.vcf.gz";
                                default:
                                        # code...
                                        break;
                        }

                } elseif ($dataset=='21trichs') {
                        switch ($ref) {
                                case 'cs10':
                                        $vcfpaths[]=$tripal_db_dir . "/cs10-21trichomes-all-gatkgenotype-allsnps.snpeffann.vcf.gz";
                                        break;
                                case 'pkv5':
                                        $vcfpaths[]=$tripal_db_dir . "/pkv5-21trichomes-all-pbgenotypegvcf-allsnps.snpeffann.vcf.gz"; break;
                                case 'fnv2':
                                        $vcfpaths[]=$tripal_db_dir . "/fnv2-21trichomes-all-pbgenotypegvcf-allsnps.snpeffann.vcf.gz"; break;
                                default:
                                        # code...
                                        break;
                        }
                }
                elseif ($dataset=='26trichs') {
                        switch ($ref) {
                                case 'cs10':
                                        $vcfpaths[]=$tripal_db_dir . "/cs10-26trichomes-all-allsnps.vcf.gz";
                                        break;
                                case 'pkv5':
                                        $vcfpaths[]=$tripal_db_dir . "/pkv5-26trichomes-all-allsnps.vcf.gz"; break;
                                case 'fnv2':
                                        $vcfpaths[]=$tripal_db_dir . "/fnv2-26trichomes-all-allsnps.vcf.gz"; break;
                                default:
                                        # code...
                                        break;
                        }
                }
		elseif ($dataset=='57karray') {
			switch ($ref) {
				case 'cs10':
					$vcfpaths[]=$tripal_db_dir . "/cs10-57karray.vcf.gz";
					//$vcfpaths[]=$tripal_db_dir . "/cs10-wgs7ds-genomicsdb-allsnps.vcf.gz";
					break;
				default:
					# code...
					break;
			}
		}

               elseif ($dataset=='26trichsms') {
                        switch ($ref) {
                                case 'cs10':
                                        $vcfpaths[]=$tripal_db_dir . "/cs10-26trichs-missence.vcf.gz";
                                        //$vcfpaths[]=$tripal_db_dir . "/cs10-wgs7ds-genomicsdb-allsnps.vcf.gz";
                                        break;
                                default:
                                        # code...
                                        break;
                        }
                }

               elseif ($dataset=='7dsms') {
                        switch ($ref) {
                                case 'cs10':
                                        $vcfpaths[]=$tripal_db_dir . "/cs10-wgs7ds-missense.vcf.gz";
                                        //$vcfpaths[]=$tripal_db_dir . "/cs10-wgs7ds-genomicsdb-allsnps.vcf.gz";
                                        break;
                                default:
                                        # code...
                                        break;
                        }
                }


			

	$bcffilter=array();
        if ($fmissing_lt!='none') {
                $nalleles=780;
                 if ('6ds'==$dataset || '7ds'==$dataset || '7dsms'==$dataset) {
                        $nalleles=500;
                 } elseif('26trichs'==$dataset || '26trichsms'==$dataset) {
                         $nalleles=52;
                 } elseif('21trichs'== $dataset) {
                         $nalleles=42;
                 }
                  elseif('57karray'== $dataset) {
                         $nalleles=650;
                 }


                #$bcffilter[]='F_MISSING<' .  $fmissing_lt;
                $bcffilter[]='AN[0]>=' . strval(intval(floatval(1-$fmissing_lt)*$nalleles));
        }
        $maf_gt=$request->getQueryParam('maf_gt', $default = 'none');
        if ($maf_gt!='none') {
                $maf=floatval($maf_gt);
                $bcffilter[]='((AF[0]<=0.5 && AF[0]>=' . $maf_gt . ') || (AF[0]>0.5 && AF[0]<=' . strval(1-$maf) . ')) ';

                #$bcffilter[]='MAF>' . $maf_gt;
        }
	$bcffilters[]=$bcffilter;



        }

	$filename ='result';


	try {

		$table=mergevcf2table($vcfpaths, $regions, $ref, $bcffilters);

		if (array_key_exists('as_string',$args)) {

			//file_put_contents( $TEMPDIR . 'table2' ,  $table);
			return $table;
		}


	    header('Pragma: public');
	    header('Expires: 0');
	    header('Cache-Control: must-revalidate, post-check=0, pre-check=0');
	    header('Cache-Control: private', false);
	    header('Content-Type: application/octet-stream');
	    header('Content-Disposition: attachment; filename="' . $filename . '.tsv";');
	    header('Content-Transfer-Encoding: binary');


	    //exit( $cmd . "\n" . $table);
	    exit($table);

    } catch (Exception $e) {
	    watchdog_exception('simple_slim_api_gene_list', $e);
	    return array("ERROR" => "Exception error: " . $e->getMessage() . "  values:" . $value_var  . "  cmd:" . $$cmd );
	}

	return "";

}
