#!/usr/bin/perl

use strict;
use File::Basename; #to use basename() to get the filename (without dir) and/or dirname()

# Program : run_expriments
# Purpose : To run SVM training and testing for a set of expriment 
# Author  : Jumail Bin Taliba (jumail@utm.my)
# Date    : 03 Nov 2008

# Usage: 
#        run_expriments.pl   start_exp end_exp working_dir [svm_learn_params,.e.g. -t 1 -d 4]
# e.g.: 
#	./run_expriments.pl  1 10  dataset -t 2

my $exp_start = shift (@ARGV);
my $exp_end = shift (@ARGV);
my $input_dir= shift (@ARGV);


my $learn_params=join (' ',@ARGV);    # parameters that is going to be passed to svm_learn
my $filename_postfix =$learn_params;  # postfix name for prediciton and model file name in case to use the same dataset for different learning settins
$filename_postfix =~ s/[-,\s+]//g;    # e.g. input: -t 1 d 4 => output: t1d4
$filename_postfix = '_'.$filename_postfix if ($filename_postfix);

my $output_dir = $input_dir;

my $create_log = 0;
my $delete_model_after_prediction = 1;
my $delete_prediction_after_merging = 0;

my $use_existing_model = 0;

my $exec_svm_learn = 1;
my $exec_svm_classify = 1;

die("Working directory '$input_dir' doesn't exist") if (!$input_dir || !-d $input_dir);

my $predictions_file= $output_dir.'/predictions'.$filename_postfix.'.txt'; 
my $learn_log = $output_dir.'/svm_learn.log'; 
my $classify_log = $output_dir.'/svm_classify.log'; 
my $learn_tmp = $learn_log.'.tmp';
my $classify_tmp = $classify_log.'.tmp';
my @prediction_list = ();

SaveSettings();

if ($create_log)
{
	if ($exec_svm_learn) { open(flog_learn, ">$learn_log") || die ("Unable to open svm_learn log file '$learn_log' for writing"); }
	if ($exec_svm_classify) { open(flog_classify, ">$classify_log") || die ("Unable to open svm_classify log file '$classify_log' for writing"); }
}

for (my $i=$exp_start; $i<=$exp_end; $i++ )
{
	Prompt ("Running Expriment #$i:\n\n");

	my $number = $i;
	my $testing_file  = $input_dir.'/test'.$number.'.svm';
	my $training_file = $input_dir.'/train'.$number.'.svm';
	my $model_file    = $output_dir.'/model'.$number.$filename_postfix.'.svm'; 
	my $prediction_file = $output_dir.'/prediction'.$number.$filename_postfix.'.svm'; 

	my $svm_learn ="svm_learn $learn_params $training_file $model_file";
	$svm_learn = "$svm_learn > $learn_tmp" if ($create_log);

	my $svm_classify ="svm_classify $testing_file $model_file $prediction_file";   
	$svm_classify = "$svm_classify > $classify_tmp" if ($create_log);

	if ($exec_svm_learn)
	{
			if (!$training_file || !-e $training_file)
			{
					print("\n** Error: Training file '$training_file' doesn't exist\n\n");
					next;
			}

			print "Running training module: $svm_learn\n";
			system ($svm_learn) if (! $use_existing_model);
			print "\n";

			#Merge the training log file

			if ($create_log && -f $learn_tmp) 
			{  
					open(tmp, "<$learn_tmp");
					my $lines = <tmp>;
					close(tmp);
					unlink ($learn_tmp);

					print flog_learn "[Training #$i]\n\n";
					print flog_learn "$svm_learn\n\n";
					print flog_learn "$lines\n\n";
			}
	}

	if (!$model_file || !-e $model_file)
	{
		print("\n** Error: Model file '$model_file' doesn't exist\n\n");
		next;
	}

	if ($exec_svm_classify)
	{
			if (!$testing_file || !-e $testing_file)
			{
					print("\n** Error: Testing file '$testing_file' doesn't exist\n\n");
					next;
			}

			print "Running testing module: $svm_classify\n";
			system ($svm_classify);
			print "\n";

			unlink ($model_file) if (($delete_model_after_prediction) && (-e $model_file));

			#Merge the testing log file

			if ($create_log && -f $classify_tmp) 
			{  
					open(tmp, "<$classify_tmp");
					my $lines = <tmp>;
					close(tmp);
					unlink ($classify_tmp);

					print flog_classify "[Testing #$i]\n\n";
					print flog_classify "$svm_classify\n\n";
					print flog_classify "$lines\n\n";
			}

			if (!$prediction_file || !-f $prediction_file)
			{
					print("\n** Error: Prediction file '$prediction_file' doesn't exist\n\n");
					next;
			}

			# Create prediction file so that it is usable 
			#  for ROCR (two columns:1st. prediction values, 2nd. prediction classes

			my @predictions = CreatePredictionMatrix($prediction_file,$testing_file);
			unlink ($prediction_file) if (($delete_prediction_after_merging) && (-e $prediction_file));
			push (@prediction_list, [@predictions]);

			print "\n\n";
	}
}

close(flog_learn);
close(flog_classify);

if ($exec_svm_classify)
{
	print "Saving predictions to file : $predictions_file\n";
	SavePredictionsToFile($predictions_file, @prediction_list);
}


sub CreatePredictionMatrix
{
	my ($prediction_file, $testing_file) = @_;

	my @matrix = ();  #2D matrix. 1st. col: prediction values, 2nd col. prediction classes

	my $predict_count = 0;
	my $test_count = 0;
	my $line;

	#Read prediction file (containing 1 column of prediction values)
	if (!open(fin, "<$prediction_file"))
	{ 
		print("** Errror: Unable to open prediction file '$prediction_file' for reading\n");
		return ();
	}

	while ($line = <fin>)
	{
		# To handle different types of file format created by different platforms	
		$line =~ s/\r\n//g; # DOS
		$line =~ s/\r//g; # MAC
		$line =~ s/\n//g; # Unix

		$matrix[$predict_count] = [$line,0];
		$predict_count++;
	} 
	close(fin);

	#Read testing file (only for reading the first column, i.e. prediction classess)
	if (!open(fin, "<$testing_file"))
	{ 
		print("** Errror: Unable to open testing file '$testing_file' for reading\n");
		return ();
	}

	while ($line = <fin>)
	{
		# To handle different types of file format created by different platforms

		$line =~ s/\r\n//g; # DOS
		$line =~ s/\r//g; # MAC
		$line =~ s/\n//g; # Unix

		($line) = split (/\s+/,$line);
		$matrix[$test_count][1] = $line;
		$test_count++;
	} 
	close(fin);

	# The sizes of the prediction values and prediction classess must be the same. If not, there is
	#  something wrong with the files

	if ($predict_count != $test_count)
	{ 
		print("** Errror: The size of the prediction values and the classess are not the same\n");
		return ();
	}

	return @matrix;
}


sub SavePredictionsToFile
{
	my ($output_file, @list) = @_;

	if (!open(fout, ">$output_file") )
	{ 
		print ("** Error: Unable to open file '$output_file' for writing\n");
		return;
	}

	my $list_count = @list;

	for (my $j=0; $j<$list_count; $j++)
	{
		my @exp = @{$list[$j]};
		my $exp_count =@exp;

		for (my $i=0; $i<$exp_count; $i++)
		{
				my $value = $exp[$i][0];
				my $class = $exp[$i][1];

				print fout "$value\t$class\n";
		}
	}
	close(fout); 
}


# Save setting parameters for future reference and tracing
sub SaveSettings
{
	my $setting_info_file  = $output_dir.'/'.basename($0).'.settings';
	open(fout, ">$setting_info_file ") || die ("Unable to open file '$setting_info_file' for writing");
	
	print fout "start:\t$exp_start\n"; 
	print fout "end:\t$exp_end\n"; 
    	print fout "input_dir:\t$input_dir\n";
    	print fout "learning parameters:\t$learn_params\n";

	print fout "output_dir:\t$output_dir\n";

	print fout "create_log:\t$create_log\n";
	print fout "delete_model_after_prediction:\t$delete_model_after_prediction\n";
	print fout "delete_prediction_after_merging:\t$delete_prediction_after_merging\n";

	print fout "use_existing_model:\t$use_existing_model\n";

	print fout "exec_svm_learn:\t$exec_svm_learn\n";
	print fout "exec_svm_classify:\t$exec_svm_classify\n";

	print fout "predictions_file:\t$predictions_file\n"; 
	print fout "learn_log:\t$learn_log\n"; 
	print fout "classify_log:\t$classify_log\n"; 
	print fout "learn_tmp:\t$learn_tmp\n";
	print fout "classify_tmp:\t$classify_tmp\n";

	close(fout);
}


sub Now
{
	my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
	$year += 1900;
	return "$mday-$mon-$year $hour:$min:$sec";
}

sub Prompt
{
	my ($msg, $print_time) = @_;
	
	print STDERR Now().':' if ($print_time);
	print STDERR $msg."\n";
}