#!/usr/bin/perl -w

# taking a .gbk file and converting it to a table of amino acids that 
# sequin will read. :-(

# input: .gbk file. output will be the same input with a .aa suffix. 

$input_file = $ARGV[0] ; 
chomp $input_file ; 

$output_file = join (".", "$input_file", "aa") ; 

unless (open INFILE, $input_file) { die "Could not find $input_file.\n\n" ; } 
@input_parts = split ("/", $input_file) ; 
$prefix = pop @input_parts ; 
print "$prefix \n" ; 

unless (open OUTFILE, ">$output_file") {die "Could not open $output_file for writing.\n" ; } 

$counter = 0 ; 
@genbank_in = <INFILE> ; 
$genbank_string = join('', @genbank_in) ; 

while ($genbank_string =~ /CDS  (.+?)table\=11/gs ) {
print "Found a CDS\n" ;
$counter++ ;  
	$orf=$&;
print "$orf\n\n" ; 
	if ($orf =~ /complement/) {	
		$number = join ("", $counter, "c") ; 
		print OUTFILE "\>$prefix ORF_$number " ; }
	else { $number = $counter ; print OUTFILE "\>$prefix ORF_$number " ; }
print "$number\n" ; 
	if ($orf =~ /translation="(.+?)"/ms) {
		$translation = $1 ; 
		$translation =~ s/\s//g ; } 
	if ($orf =~ /function="(.+?)"/ms) {
		$protein = $1 ; 
		$protein =~ s/\n//g ; 
		$protein =~ s/\s+/ /g; } 
	elsif ($orf =! /function/ ) {
		$protein = "hypothetical protein" ; } 
	print OUTFILE "[gene=ORF_$number] [protein=$protein]\n$translation\n" ; 	}

exit ; 