#!	/usr/bin/perl	-w

=head1	header

	###########################################################
	#  
	#  This script split PLINK GWAS files into multiple chuncks (with overlapping regions)
	#  
	#  perl ChunkGWASped.pl Chr22_GWAS ChunkPosition.txt
	#  
	#  Input : Chr22_Ref_Ex.ped, Chr22_Ref_Ex.map, Chr22_Ref_Ex.info
	#  Output : Chr22_Ref_Ex_XXX.ped, Chr22_Ref_Ex_XXX.map, Chr22_Ref_Ex_XXX.info, Chr22_Ref_Ex_XXX.snps, Chr22_Ref_Ex_XXX.dat
	#  
	#  Format of ChunkPosition.txt
	#  Chr1_01	100000000001	100009296085	100000000001	100008996085
	#  Col 1 : Chunk names to be reflected in output file headers.
	#  Col 2 : Start positions of the chuncks (with overlapping regions)
	#  Col 3 : End positions of the chuncks (with overlapping regions)
	#  Col 4 : Start positions of the chuncks (without overlapping regions) ... not used in this script
	#  Col 5 : End positions of the chuncks (without overlapping regions) ... not used in this script
	#  
	#  Chunck files will be split based on "with overlapping" regions
	#  Position information is based on ChrNum*100000000000+BasePaierPosition
	#  
	#  Any questions to Yukinori Okada (http://plaza.umin.ac.jp/~yokada/datasource/software.htm   yokada@broadinstitute.org)

=cut



&main(@ARGV);
exit;

=head1	main
=cut

sub	main(@){
	my ($prefix, $chunk)=@_;
	my $ped = $prefix.".ped";
	my $info = $prefix.".info";
	my @separateped;
	my @separateinfo;
	my @separatemap;
	my @separatedat;
	my @separatesnps;
	
	my @chunkhead;
	my @chunkStPosi;
	my @chunkEdPosi;
	my @chunkSt;
	my @chunkEd;
	my $chunkNum;
	my $counter = 0;
	
	open (INPUTchunk, "$chunk");
	while(<INPUTchunk>){
		chomp;
		my @inline = split(/\t/);
		$chunkhead[$counter] = $inline[0];
		$chunkStPosi[$counter] = $inline[1];
		$chunkEdPosi[$counter] = $inline[2];
		$chunkSt[$counter] = 1000000;
		$chunkEd[$counter] = -1;
		$counter++;
	}
	close INPUTchunk;
	$chunkNum = $counter;
	
	#t@C쐬	
	for (my $i=0;$i<$chunkNum;$i++) {
		$separateped[$i] = $prefix."_".$chunkhead[$i].".ped";
		$separateinfo[$i] = $prefix."_".$chunkhead[$i].".info";
		$separatemap[$i] = $prefix."_".$chunkhead[$i].".map";
		$separatedat[$i] = $prefix."_".$chunkhead[$i].".dat";
		$separatesnp[$i] = $prefix."_".$chunkhead[$i].".snps";
		
		open ($OUTPED[$i], "> $separateped[$i]") or die "can't make ped files*$!\n";
		open ($OUTINFO[$i], "> $separateinfo[$i]") or die "can't make info files*$!\n";
		open ($OUTMAP[$i], "> $separatemap[$i]") or die "can't make map files*$!\n";
		open ($OUTDAT[$i], "> $separatedat[$i]") or die "can't make map files*$!\n";
		open ($OUTSNP[$i], "> $separatesnp[$i]") or die "can't make map files*$!\n";
	}

	#infot@CƂɕflag쐬
	open (INPUT, "$info");
	my $posi;
	$counter = 0;
	while(<INPUT>){
		chomp;
		my @inline = split(/\t/);
		$posi = $inline[1];
		
		for (my $i=0;$i<$chunkNum;$i++) {
			if ($posi >= $chunkStPosi[$i] && $posi <= $chunkEdPosi[$i]) {
				if ($counter < $chunkSt[$i]) {
					$chunkSt[$i] = $counter;
				}
				if ($counter > $chunkEd[$i]) {
					$chunkEd[$i] = $counter;
				}
			}
		}
		$counter++;
	}
	close INPUT;

	#t@C݇@
	my $tmp;
	my $len = 100000000000;
	open (INPUT, "$info");
	while(<INPUT>){
		chomp;
		my @inline = split(/\t/);
		$chrposi = $inline[1]%$len;
		$chr = ($inline[1]-$chrposi)/$len;
		
		for (my $i=0;$i<$chunkNum;$i++) {
			if ($inline[1] >= $chunkStPosi[$i] && $inline[1] <= $chunkEdPosi[$i]) {
				$tmp = $OUTINFO[$i];
				print $tmp $inline[0]."\t".$inline[1]."\n";
				$tmp = $OUTMAP[$i];
				print $tmp $chr."\t".$inline[0]."\t0\t".$chrposi."\n";
				$tmp = $OUTDAT[$i];
				print $tmp "M ".$inline[0]."\n";
				$tmp = $OUTSNP[$i];
				print $tmp $inline[0]."\n";
			}
		}
	}
	
	#t@CI@
	for (my $i=0;$i<$chunkNum;$i++) {
		close $OUTINFO[$i];
		close $OUTMAP[$i];
		close $OUTDAT[$i];
		close $OUTSNP[$i];
	}
	
	
	#\[g֐
	open (INPUT, "$ped");
	while(<INPUT>){
		chomp;
		my @inline = split(/\t/);
		for (my $i=0;$i<$chunkNum;$i++) {
#		my $out = $inline[0]."\t".$inline[1]."\t".$inline[2]."\t".$inline[3]."\t".$inline[4]."\t".$inline[5];
		my $out = $inline[0]."\t".$inline[1]."\t".$inline[2]."\t".$inline[3]."\t".$inline[5];
			for (my $j=$chunkSt[$i];$j<=$chunkEd[$i];$j++) {
				$out .= "\t".$inline[$j+6];
			}
			$tmp = $OUTPED[$i];
			print $tmp $out."\n";
		}
	}
	
	#t@CIA
	for (my $i=0;$i<$chunkNum;$i++) {
		close $OUTPED[$i];
	}
	
}

