#!/opt/bin/perl

# AELalight (last update Jul 27, 2001)

# Transforms text of the form
#
#	Zero or more lines, possibly containing the string #DATE#
#
# 	###
#
#	version = <some_version>
#
# 	some transliteration (zero or more lines)
# 	;
# 	some translation (zero or more lines)
#
# 	some transliteration2 (zero or more lines)
# 	;
# 	some translation2 (zero or more lines)
#
#	....
#
#       version = <some_other_version>
#
#       some transliteration3 (zero or more lines)
#       ;
#       some translation3 (zero or more lines)
#
#  	.....
#
# into XML format, in keeping with AELalign.
# The part before ### is kept unaltered, as the header of the XML file, but
# possible occurrences in the header of string #DATE# are replaced by the 
# current date.
# After a transliteration, the ";" and following translation may 
# be omitted, but this transliteration is then ignored.

# Input is standard input or the file indicated by a possible first argument, 
# output is standard output or the file indicated by a possible second argument.

my $input_file = $ARGV[0];
my $output_file = $ARGV[1];

if ($input_file) { 
	open(INPUT, "$input_file") || die "cannot open $input_file";
}

if ($output_file) { 
	open(OUTPUT, ">$output_file") || die "cannot open $output_file";
}

# Needed for date:

my $thisday = (localtime)[3];
my $thismonth = (Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec)[(localtime)[4]];
my $thisyear = (localtime)[5] + 1900;
my $thisdate = "$thismonth $thisday, $thisyear";

my $not_eof;	# whether end-of-file reached
my $line;	# current line

initiate_reading();

while ($not_eof && not($line =~ /^\s*###\s*$/)) {
	$line =~ s/#DATE#/$thisdate/g;
	output_line($line);
	shift_line();
}

output_line("<body>\n\n");
	
shift_line();

do {

read_white_space();

while ($not_eof && $line =~ /^\s*version\s*=\s*([^\s]+)/) {
	my $version = $1;
	output_line("<coord version=\"$version\" pos=\"\@anon\"/>\n\n");
	shift_line();
	read_white_space();
}

my $al = "";
my $tr = "";

while ($not_eof && not($line =~ /^\s*(;|)\s*$/)) {
	$al .= $line;
	shift_line()
}

if ($not_eof && ($line =~ /^\s*;\s*$/)) {
	shift_line();
} else {
	$al = "";
}

while ($not_eof && not($line =~ /^\s*$/)) {
        $tr .= $line;
        shift_line()
}

if ($al || $tr) {
	$al =~ s/<([0-9][^>]*)>/<coord pos=\"$1\"\/>/g;
	$tr =~ s/<([0-9][^>]*)>/<coord pos=\"$1\"\/>/g;
	output_line("<textal>\n");
	output_line($al);
	output_line("</textal>\n<texttr>\n");
	output_line($tr);
	output_line("</texttr>\n\n");
}

} while $not_eof;

#######################################################################
# Closing files:

output_line("</body>\n");
output_line("</resource>\n");

if ($input_file) { 
	close(INPUT);
}

if ($output_file) { 
	close(OUTPUT);
}

####################################
# library:

sub initiate_reading {
	$not_eof = 1;
	shift_line();
}

sub shift_line {
        if ($not_eof) {
                unless (defined($line = next_line())) {
                        $not_eof = 0;
			$line = "";
                }
        }
}

sub next_line {
	if ($input_file) {
		return <INPUT>
	} else {
		return <STDIN>
	}
}

# read away empty lines

sub read_white_space {
	while ($not_eof && ($line =~ /^\s*$/)) { shift_line() }
}

sub output_line {
	my ($line) = @_;
	if ($output_file) {
                print OUTPUT $line;
        } else {
                print STDOUT $line;
        }
}
