#!	/usr/bin/perl	-w

=head1	header

	###########################################################
	#  
	#  This script split reference haplotype data into multiple chuncks (with overlapping regions)
	#  
	#  perl Chunkhaploflag.pl Chr22_Ref_Ex ChunkPosition.txt
	#  
	#  Input : Chr22_Ref_Ex.hap, Chr22_Ref_Ex.map, Chr22_Ref_Ex.snps, Chr22_Ref_Ex.annot.txt (available from make1000GHapAnnot.pl)
	#  Output : Chr22_Ref_Ex_XXX.hap, Chr22_Ref_Ex_XXX.map, Chr22_Ref_Ex_XXX.snps, Chr22_Ref_Ex_XXX.annot.txt, Chr22_Ref_Ex_XXX.flag.txt
	#  
	#  Format of ChunkPosition.txt
	#  Chr1_01	100000000001	100009296085	100000000001	100008996085
	#  Col 1 : Chunk names to be reflected in output file headers.
	#  Col 2 : Start positions of the chuncks (with overlapping regions)
	#  Col 3 : End positions of the chuncks (with overlapping regions)
	#  Col 4 : Start positions of the chuncks (without overlapping regions)
	#  Col 5 : End positions of the chuncks (without overlapping regions)
	#  
	#  Chunck files will be split based on "with overlapping" regions
	#  Output flag files tell the SNPs to be included/excluded when selecting "without overlapping" regions
	#  Position information is based on ChrNum*100000000000+BasePaierPosition
	#  
	#  1KG haplotype data is available from http://www.sph.umich.edu/csg/abecasis/MACH/download/
	#  
	#  Any questions to Yukinori Okada (http://plaza.umin.ac.jp/~yokada/datasource/software.htm   yokada@broadinstitute.org)

=cut



&main(@ARGV);
exit;

=head1	main

=cut

sub	main(@){
	my ($prefix, $chunk)=@_;
	my $hap = $prefix.".hap";
	my $map = $prefix.".map";
	my $snp = $prefix.".snps";
	my $ant = $prefix.".annot.txt";
	my @separatehap;
	my @separatemap;
	my @separateanp;
	my @separateant;
	my @separateflg;
	
	my @chunkhead;
	my @chunkStPosi;
	my @chunkEdPosi;
	my @chunkSt;
	my @chunkEd;
	my @chunkStPosiTrue;
	my @chunkEdPosiTrue;
	my @chunkStTrue;
	my @chunkEdTrue;
	
	my $chunkNum;
	my $counter = 0;
	
	my @Chr = split(/_/,$prefix);
	my $len = 100000000000;

	
	open (INPUTchunk, "$chunk");
	while(<INPUTchunk>){
		chomp;
		my @inline = split(/\t/);
		
		$tmpChr = ($inline[1]-$inline[1]%$len)/$len;
		if ($tmpChr == substr($Chr[0], 3)) {
			my @head = split(/_/,$inline[0]);
			$chunkhead[$counter] = $head[1];
			
			$chunkStPosi[$counter] = $inline[1]%$len;
			$chunkEdPosi[$counter] = $inline[2]%$len;
			$chunkSt[$counter] = 100000000;
			$chunkEd[$counter] = -1;
			
			$chunkStPosiTrue[$counter] = $inline[3]%$len;
			$chunkEdPosiTrue[$counter] = $inline[4]%$len;
			$chunkStTrue[$counter] = 100000000;
			$chunkEdTrue[$counter] = -1;
			
			$counter++;
		}
		
	}
	close INPUTchunk;
	$chunkNum = $counter;
	

	for (my $i=0;$i<$chunkNum;$i++) {
		$separatehap[$i] = $prefix."_".$chunkhead[$i].".hap";
		$separatemap[$i] = $prefix."_".$chunkhead[$i].".map";
		$separatesnp[$i] = $prefix."_".$chunkhead[$i].".snps";
		$separateant[$i] = $prefix."_".$chunkhead[$i].".annot.txt";
		$separateflg[$i] = $prefix."_".$chunkhead[$i].".flag.txt";
		
		open ($OUTHAP[$i], "> $separatehap[$i]") or die "can't make hap files*$!\n";
		open ($OUTMAP[$i], "> $separatemap[$i]") or die "can't make map files*$!\n";
		open ($OUTSNP[$i], "> $separatesnp[$i]") or die "can't make snps files*$!\n";
		open ($OUTANT[$i], "> $separateant[$i]") or die "can't make annot files*$!\n";
		open ($OUTFLG[$i], "> $separateflg[$i]") or die "can't make flag files*$!\n";
	}


	open (INPUT, "$map");
	my $posi;
	$counter = 0;
	while(<INPUT>){
		chomp;
		my @inline = split(/\t/);
		$posi = $inline[2];
		
		for (my $i=0;$i<$chunkNum;$i++) {
			if ($posi >= $chunkStPosi[$i] && $posi <= $chunkEdPosi[$i]) {
				if ($counter < $chunkSt[$i]) {
					$chunkSt[$i] = $counter;
				}
				if ($counter > $chunkEd[$i]) {
					$chunkEd[$i] = $counter;
				}
				
			if ($posi >= $chunkStPosiTrue[$i] && $posi <= $chunkEdPosiTrue[$i]) {
				if ($counter < $chunkStTrue[$i]) {
					$chunkStTrue[$i] = $counter;
				}
				if ($counter > $chunkEdTrue[$i]) {
					$chunkEdTrue[$i] = $counter;
				}
			}
			}
		}
		$counter++;
	}
	close INPUT;


	my $tmp;
	open (INPUT, "$map");
	$counter = 0;
	while(<INPUT>){
		chomp;
		my @inline = split(/\t/);
		
		for (my $i=0;$i<$chunkNum;$i++) {
			if ($counter >= $chunkSt[$i] && $counter <= $chunkEd[$i]) {
				$tmp = $OUTMAP[$i];
				print $tmp $inline[0]."\t".$inline[1]."\t".$inline[2]."\n";
				$tmp = $OUTSNP[$i];
				print $tmp $inline[1]."\n";
				
				$tmp = $OUTFLG[$i];
				if ($counter >= $chunkStTrue[$i] && $counter <= $chunkEdTrue[$i]) {
					print $tmp "1\n";
				} else {
					print $tmp "0\n";
				}
			}
		}
		$counter++;
	}
	

	open (INPUT, "$ant");
	$counter = 0;

	while(<INPUT>){
		chomp;
#		my @inline = split(/\t/);
		if ($counter == 0) {
			for (my $i=0;$i<$chunkNum;$i++) {
				$tmp = $OUTANT[$i];
				print $tmp $_."\n";
			}
		} else {
			for (my $i=0;$i<$chunkNum;$i++) {
				$tmp = $OUTANT[$i];
				if ($counter-1 >= $chunkSt[$i] && $counter-1 <= $chunkEd[$i]) {
					print $tmp $_."\n";
				}
			}
		}
		$counter++;
	}


	for (my $i=0;$i<$chunkNum;$i++) {
		close $OUTMAP[$i];
		close $OUTSNP[$i];
		close $OUTANT[$i];
		close $OUTFLG[$i];
	}
	

	open (INPUT, "$hap");
	while(<INPUT>){
		chomp;
		my @inline = split(/ /);
		my @haplo = split(//, $inline[2]);
		for (my $i=0;$i<$chunkNum;$i++) {
		my $out = $inline[0]." ".$inline[1]." ";
			for (my $j=$chunkSt[$i];$j<=$chunkEd[$i];$j++) {
				$out .= $haplo[$j];
			}
			$tmp = $OUTHAP[$i];
			print $tmp $out."\n";
		}
	}
	

	for (my $i=0;$i<$chunkNum;$i++) {
		close $OUTHAP[$i];
	}
	
}

