#!/opt/bin/perl

# AELalign (last update Jan 5, 2001)

# This program takes a number of files containing information
# on an Ancient Egyptian text in XML, aligns them, and puts them in 
# HTML files.

use CGI qw(:standard);

# Should be compliant to dtd version:

my $dtd_version = "0.1";

# Argument is file that contains file names of XML resources
# on a particular text:

my $name = $ARGV[0]; 	
unless($name) {
	error_die("AELalign requires argument\n") }

# Up to now, only HTML output is supported.

my $output_mode = "html";

# Code for shallow parsing of XML:
################################################################
# REX/Perl 1.0 
# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
# Technical Report TR 1998-17, School of Computing Science, Simon Fraser 
# University, November, 1998.
# Copyright (c) 1998, Robert D. Cameron. 
# The following code may be freely used and distributed provided that
# this copyright and citation notice remains intact and that modifications
# or additions are clearly identified.

$TextSE = "[^<]+";
$UntilHyphen = "[^-]*-";
$Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
$CommentCE = "$Until2Hyphens>?";
$UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
$CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
$S = "[ \\n\\t\\r]+";
$NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
$NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
$Name = "(?:$NameStrt)(?:$NameChar)*";
$QuoteSE = "\"[^\"]*\"|'[^']*'";
$DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
$MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
$S1 = "[\\n\\r\\t ]";
$UntilQMs = "[^?]*\\?+";
$PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
$DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
$DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?";
$DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
$PI_CE = "$Name(?:$PI_Tail)?";
$EndTagCE = "$Name(?:$S)?>?";
$AttValSE = "\"[^<\"]*\"|'[^<']*'";
$ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?";
$MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)";
$XML_SPE = "$TextSE|$MarkupSPE";

sub ShallowParse {
  my($XML_document) = @_;
  return $XML_document =~ /$XML_SPE/g;
}

# end of REX/PERL 1.0 ###############################################

my $text_name = "unnamed";
my $out_dir = ".";
my $file_name = "unnamed";
my $index_header = "";

# Every file is a resource, numbered 0, 1, 2, ...,
# and has a name and a descr(iption), and information about
# creation, and possibly a URL.

my $resource_nr = -1;
my @resource_names = ();
my @resource_descrs = ();
my @resource_creates = ();
my @resource_urls = ();

# Variables holding the text of a single resource during parsing.

my $resource;
my $text;
my @parsed_text;
my $present_token;

# Each resource is divided into a number of streams. Different streams are 
# introduced for different versions of the same text, and
# different streams are introduced for different types of 
# text (e.g. hieroglyphs, transliteration, translation).
# Streams are assigned numbers, which are mapped to by hash %stream_nr.
# The numbers are mapped back to what the streams mean by %stream_name.
# During parsing, streams are written to a list of lists @stream.

my $max_stream_nr = -1;
my %stream_nr = ();
my %stream_name = ();
my @stream = ();
my $global_version;
my $global_pos;
my %global_type;
my $current_version;
my $current_type;

# There is a global counter for anonymous dummy positions. There is a
# fixed prefix for dummy positions.

my $dummy_count = 0;
my $dummy_prefix = "\@\@dummy_prefix";

# For alignment, coordinates are unified.
# A coordinate may point to another coordinates that it unifies with.
# For the representative of a class, the number of occurrences
# in the streams is counted.

my %coord_unify_to = ();
my %coord_ref_count = ();

# The system allows selecting only parts of the streams. The user writes
# commands to stop or start taking into account streams to or from
# certain coordinates.

my %stop_stream = ();
my %start_stream = ();

my %break_version = ();

# Data is unbreakable if it occurs in notes and comments.

my $unbreakable = 0;

# The keys currently allowed in <textlx>

my @lex_keys = ("texthi", "textal", "textform", "texttr", 
		"cite", "keyhi", "keyal", "keyform", "keytr",
		"dicthi", "dictal", "dictform", "dicttr");

# Obtain path of configuration file.

my $in_dir = $name;
$in_dir =~ s/[^\/]+$//;   

open(IN, "$name") || error_die("cannot open $name\n");

my $line;

while ($line = <IN>) {
        if ($line =~ /name\s*=\s*(.*[^\s])/i) {
		$text_name = $1;
        } elsif ($line =~ /directory\s*=\s*([^\s]+)/i) {
		$out_dir = $1;
        } elsif ($line =~ /file\s*=\s*([^\s]+)/i) {
		$file_name = $1;
        } elsif ($line =~ /header\s*=\s*([^\s]+)/i) {
		$index_header = $1;
        } elsif ($line =~ /resource\s*=\s*([^\s]+)/i) {
                $resource = $1;
                read_file();
        } elsif (resource_nr >= 0 &&
		   $line =~ /stop\s*=\s*"([^"]*)"\s+"([^"]*)"\s+([^\s]+|)\s*$/i) {
		my $stop_version = $1;
		my $stop_pos = $2;
		my $stop_type = $3;
                register_stop($stop_version, $stop_pos, $stop_type);
        } elsif (resource_nr >= 0 &&
		   $line =~ /start\s*=\s*"([^"]*)"\s+"([^"]*)"\s+([^\s]+|)\s*$/i) {
		my $start_version = $1;
		my $start_pos = $2;
		my $start_type = $3;
                register_start($start_version, $start_pos, $start_type);
	} elsif ($line =~ /break\s*=\s*"([^"]*)"\s*$/i) {
		my $version = $1;
		$break_version{$version} = 1;
	} elsif ($line =~ /^\s*$/) {  # ignore empty lines
        } else { print_error("Warning ($name): unknown:\n$line") }
}

close(IN);

# Where to stop taking input from a certain stream.

sub register_stop {
	my ($stop_version, $stop_pos, $stop_type) = @_;
	my @stop_types;
	if ($stop_type) { @stop_types = ($stop_type) }
	else { @stop_types = ("hi", "al", "tr", "lx"); }
	foreach $type (@stop_types) {
		$current_version = $stop_version;
		$current_type = $type;
		my $id = get_stream();
		my $stop_descr = "$id\n$stop_pos";
		$stop_stream{$stop_descr} = 1;
	}
}

# Where to start taking input from a certain stream.

sub register_start {
        my ($start_version, $start_pos, $start_type) = @_;
        my @start_types;
        if ($start_type) { @start_types = ($start_type) }
        else { @start_types = ("hi", "al", "tr", "lx"); }
        foreach $type (@start_types) {
                $current_version = $start_version;
                $current_type = $type;
                my $id = get_stream();
                my $start_descr = "$id\n$start_pos";
                $start_stream{$start_descr} = 1;
        }
}

sub read_file {
	open(RES, "$in_dir$resource") || error_die("cannot open $resource\n");
	read(RES, $text, 100000000); # high enough maximum file size
	@parsed_text = ShallowParse($text);
	close(RES);

	read_resource();
}

# The tokenized text is parsed according to the DTD by the
# following series of subroutines.

sub read_resource {
	$resource_nr++;
	read_top_material();
	read_token_space("<resource>");
	read_created();
	read_header();
	read_body();
	read_token("<\/resource>");
}

sub read_top_material {
	read_space();
	if ($present_token =~ /^<\?xml/i) { 
		shift_token_space(); 
	} else { print_error("Warning ($resource): first line erroneous, " .
				"am assuming document is XML file\n"); 
	}
	if ($present_token =~ /^<!DOCTYPE.+AELalign\.([0-9\.]+)\.dtd/i) {
		my $version = $1;
		unless ($version eq $dtd_version) { 
			print_error("Warning ($resource): version $version is " .
					"not $dtd_version\n");
		}
		shift_token_space();
	} else { print_error("Warning ($resource): DOCTYPE missing, " .
				"am assuming AELalign.$dtd_version\n")
	}
}

sub read_created { 
        if ($present_token =~ /^<created>/i) {
                shift_token_space();
                $resource_creates[$resource_nr] = read_content("created");
                read_token_space("<\/created>");
        } else { error_die("($resource) expected <created>\n" .
                        "not $present_token\n") }
}

sub read_header { 
	if ($present_token =~ 
	    /^<header\s+name\s*=\s*"([^"]*)"/i) {
        	$resource_names[$resource_nr] = $1;
		if ($present_token =~ /url\s*=\s*"([^"]*)"/i) {
			$resource_urls[$resource_nr] = $1;
		}
		shift_token_space();
		$resource_descrs[$resource_nr] = read_content("header");
		read_token_space("<\/header>");
	} else { error_die("($resource) expected <header name=\"...\">\n" . 
			"not $present_token\n") }
}

sub read_content {
	my ($closing) = @_;
	my $content = "";
	my @html_tokens = ();
	for (;;) {
		if ($present_token =~ /^<([a-zA-Z]*)>/) {
			my $token = $1;
			shift_token();
			unless ($token =~ /^(ul|li|p|i)$/i) {
				print_error("Unknown: $present_token\n")
			}
			push(@html_tokens, $token);
			$content .= "<$token>";
		} elsif (@html_tokens && $present_token =~ /^<\/([a-zA-Z]*)>/) {
			my $token = pop(@html_tokens);
			read_token("<\/$token>");
			$content .= "</$token>";
		} elsif ((not @html_tokens) && $present_token =~ /^<\/$closing>/i) {
			return $content;
		} elsif ($present_token =~ /^<\/([a-zA-Z])*>/) {
			error_die("Misplaced: $present_token\n");
		} else {
			$content .= $present_token;
			shift_token();
		}
	}
}

sub read_body {
	read_token_space("<body>");
	$global_version = "";
	$global_pos = "";
	%global_type = ();
	$current_type = "";
	while (	may_read_texthi() ||
		may_read_textal() ||
		may_read_textlx() ||
		may_read_texttr() ||
		may_read_coord_not_inline() ||
		may_read_align_not_inline()) {}
	read_token_space("<\/body>");
}

sub may_read_texthi {
	if (may_read_token("<texthi>")) {
		$current_type = "hi";
		read_implied_coord();
		while (	may_read_text() ||
                        may_read_no() ||
                        may_read_coord() ||
			may_read_align() ||
			may_read_anything()) {}
		read_token_space("<\/texthi>");
		$current_type = "";
		return 1;
	} else { return 0 }
}

sub may_read_textal {
	if (may_read_token("<textal>")) {
		$current_type = "al";
		read_implied_coord();
        	while (	may_read_text() ||
                        may_read_no() ||
			may_read_coord() ||
                        may_read_align() ||
			may_read_anything()) {}
		read_token_space("<\/textal>");
		$current_type = "";
        	return 1;
	} else { return 0; }
}

sub may_read_textlx {
        if (may_read_token("<textlx>")) {
                $current_type = "lx";
		read_implied_coord();
                while ( may_read_lx() ||
			may_read_text() ||
                        may_read_no() ||
                        may_read_coord() ||
                        may_read_align() ||
			may_read_space()) {}
                read_token_space("<\/textlx>");
                $current_type = "";
                return 1;
        } else { return 0; }
}

sub may_read_texttr {
	if (may_read_token("<texttr>")) {
		$current_type = "tr";
		read_implied_coord();
        	while ( may_read_al() ||
                        may_read_no() ||
                        may_read_coord() ||
                        may_read_align() ||
			may_read_anything()) {}
        	read_token_space("<\/texttr>");
		$current_type = "";
        	return 1; 
	} else { return 0; }
}

sub may_read_text {
        if (may_read_token("<text>")) {
		write_to_stream("<text>");
		$unbreakable = 1;
                while (	may_read_anything()) {}
                read_token("<\/text>");
		$unbreakable = 0;
                return 1;
        } else { return 0; }
}

sub may_read_no {
	if (may_read_token("<no>")) {
		write_to_stream("<no>");
		$unbreakable = 1;
        	while (	may_read_hi() ||
                       	may_read_al() ||
                       	may_read_tr() ||
			may_read_anything()) {}
        	read_token("<\/no>");
		$unbreakable = 0;
        	return 1; 
	} else { return 0; }
}

sub may_read_hi {
        if (may_read_token("<hi>")) {
		write_to_stream("<hi>");
                while (	may_read_anything()) {}
        	read_token("<\/hi>");
		write_to_stream("<\/hi>");
        	return 1;
	} else { return 0; }
}

sub may_read_al {
        if (may_read_token("<al>")) {
		write_to_stream("<al>");
                while (may_read_anything()) {}
        	read_token("<\/al>");
		write_to_stream("<\/al>");
        	return 1;
	} else { return 0; }
}

sub may_read_tr {
        if (may_read_token("<tr>")) {
		write_to_stream("<tr>");
                while (may_read_anything()) {}
        	read_token("<\/tr>");
		write_to_stream("<\/tr>");
        	return 1;
	} else { return 0; }
}

sub may_read_lx {
	if ($present_token =~ /^<lx/i) {
		write_to_stream("<lx>");
		$unbreakable = 1;
		$present_token =~ s/\t/ /g;
		foreach $key (@lex_keys) {
			if ($present_token =~ /$key\s*=\s*"([^"]*)"/i) {
				my $val = $1;
				write_to_stream("$key\t$val\n");
			}
		}
		shift_token_space();
		$unbreakable = 0;
                return 1;
        } else { return 0; }
}

sub may_read_coord_not_inline {
        if ($present_token =~ /^<coord/i) {
		$global_pos = "";
                if ($present_token =~ /version\s*=\s*"([^"]*)"/i) {
                        $global_version = $1;
                }
                if ($present_token =~ /pos\s*=\s*"([^"]*)"/i) {
                        $global_pos = $1;
			if ($global_pos eq "\@anon") {
				$dummy_count++;
				$global_pos = $dummy_prefix . $dummy_count;
			}
                }
                shift_token_space();
                return 1;
        } else { return 0; }
}

sub may_read_align_not_inline {
        if ($present_token =~ /^<align/i) {
        	my $al_version = "";
        	my $al_pos = "";
                if ($present_token =~ /version\s*=\s*"([^"]*)"/i) {
                        $al_version = $1;
                }
                if ($present_token =~ /pos\s*=\s*"([^"]*)"/i) {
                        $al_pos = $1;
                }
        	my $align = "$al_version\n$al_pos";
        	my $coord = "$global_version\n$global_pos";
        	unify_coord($align,$coord);
                shift_token_space();
                return 1;
        } else { return 0; }
}
		
sub may_read_coord {
        if ($present_token =~ /^<coord/i) {
		my $current_pos = "";
		if ($present_token =~ /version\s*=\s*"([^"]*)"/i) {
			$current_version = $1;
		}
		if ($present_token =~ /pos\s*=\s*"([^"]*)"/i) {
			$current_pos = $1;
		}
		write_coord($current_version, $current_pos);
                shift_token();
        	return 1;
	} else { return 0; }
}

sub may_read_align {
        if ($present_token =~ /^<align/i) {
		my $al_version = "";
        	my $al_pos = "";
                if ($present_token =~ /version\s*=\s*"([^"]*)"/i) {
                        $al_version = $1;
                }
                if ($present_token =~ /pos\s*=\s*"([^"]*)"/i) {
                        $al_pos = $1;
                } 
		write_align($al_version,$al_pos);
                shift_token();
                return 1;
        } else { return 0; }
}

# We make sure an implicit coord is not written twice for the
# same text type, or in the case of an implicit dummy position, a new dummy
# position is generated.

sub read_implied_coord {
	$current_version = $global_version;
	my $combination = "$current_version\n$global_pos\n$current_type";
	if (($current_version ne "" || $global_pos ne "") &&
			not($global_type{$combination})) {
		$global_type{$combination} = 1;
		write_coord($current_version,$global_pos);
	} elsif ($global_pos =~ /^$dummy_prefix/) {
		$dummy_count++;
		$global_pos = $dummy_prefix . $dummy_count;
		$combination = "$current_version\n$global_pos\n$current_type";
		$global_type{$combination} = 1;
		write_coord($current_version,$global_pos);
	}
}

sub may_read_anything {
        if (not ($present_token =~ /^</)) {
		$present_token =~ s/  +/ /gs;
		write_to_stream($present_token);
                shift_token();
                while (not ($present_token =~ /^</)) {
			$present_token =~ s/  +/ /gs;
			write_to_stream($present_token);
                        shift_token();
                };
                return 1;
        } else { return 0; }
}

# Read tag, and following space.

sub read_token_space {
	my ($wanted) = @_;
	read_token($wanted);
	read_space()
}

# Read tag, and give error if not present.

sub read_token {
        my ($wanted) = @_;
        if ($present_token =~ /^$wanted/is) {
                shift_token();
        } else { error_die("($resource) expected $wanted, not $present_token\n") }
}

# Read tag if present, and return boolean whether succeeded.

sub may_read_token {
        my ($wanted) = @_;
        if ($present_token =~ /^$wanted/is) {
                shift_token();
		return 1;
        } else { return 0; }
}

# Move on to next non-white-space token.

sub shift_token_space {
        shift_token();
        read_space();
}

# Move on to next token.

sub shift_token {
	if (@parsed_text) {
		$present_token = shift(@parsed_text);
	} else { error_die("($resource) premature end of file\n") }
}

# read away space.
        
sub read_space {
        while ($present_token =~/^\s*$/s) {
                shift_token();
        }
} 

sub may_read_space {
	if ($present_token =~ /^\s*$/s) {
		shift_token();
                return 1;
        } else { return 0; }
}

#############################################
# Administration of streams

# Yields nr of stream matching current environment.

sub get_stream {
	my $id = "$resource_nr\n$current_version\n$current_type";
	if (defined($stream_number{$id})) {
		return $stream_number{$id}; 
	} else { 
		my $new = ++$max_stream_nr;
		$stream_number{$id} = $new;
		$stream_name{$new} = $id;
		return $new;
	}
}

# Write to appropriate stream. In the case of unbreakable
# environments, append text behind previous line.
# Ignore the input if it consists of only space and no text has previously
# been written to the stream.

sub write_to_stream {
	my ($txt) = @_;
	my $stream_nr = get_stream();
	unless (@{ $stream[$stream_nr] } || $txt =~ /[^\s]/) {
		return;
	}
	if ($unbreakable) {
		@{ $stream[$stream_nr] }[$#{ $stream[$stream_nr] }] .= $txt;
	} else {
		push(@{ $stream[$stream_nr] }, $txt);
	}
}

sub write_coord {
	my ($version,$pos) = @_;
	my $stream_nr = get_stream();
	my $coord = "$version\n$pos";
	my $coord_line = "<coord>$coord";
	push(@{ $stream[$stream_nr] }, $coord_line);
	increase_coord_count($coord);
}

sub write_align {
        my ($version,$pos) = @_;
        my $stream_nr = get_stream();
        my $align = "$version\n$pos";
        my $align_line = "<align>$align";
        push(@{ $stream[$stream_nr] }, $align_line);
        increase_coord_count($align);
}

sub resource_nr_of {
	my ($stream_i) = @_;
	$stream_name{$stream_i} =~ /^(.*)\n/;
	return $1;
}

sub text_type_of {
	my ($stream_i) = @_;
	$stream_name{$stream_i} =~ /(..)$/;
	return $1;
}

#############################################
# Unification of coordinates.

sub unify_coord {
	my ($coord1,$coord2) = @_;
        $coord1 = coord_deref($coord1);
        $coord2 = coord_deref($coord2);
	unless ($coord1 eq $coord2) { 
		$coord_unify_to{$coord1} = $coord2;
		$coord_ref_count{$coord2} += $coord_ref_count{$coord1};
	}
}

sub increase_coord_count {
        my ($coord) = @_;
        $coord = coord_deref($coord);
	$coord_ref_count{$coord}++;
}

sub decrease_coord_count {
        my ($coord) = @_;
	$coord = coord_deref($coord);
        $coord_ref_count{$coord}--;
}

sub coord_deref {
	my ($coord) = @_;
        while (defined($coord_unify_to{$coord})) {
                $coord = $coord_unify_to{$coord}
        }
	return $coord;
}

#############################################
# Normalization of the streams. 
# 1) An align directly following a coord is merged with it.
#	I.e. the align is removed, and the coordinates are unified.
# 2) A word (sequence of alphanumerical symbols) broken up by a coord or
# align is merged (the suffix moved to after the prefix, 
# across coord or align).

for ($stream_i = 0; $stream_i <= $max_stream_nr; $stream_i++) {
	my $last_coord = "";
	my @new_stream = ();
	my @stream_buffer = ();
	my $prefix_waiting = 0;

while (@{ $stream[$stream_i] }) {
	my $current_line = shift(@{ $stream[$stream_i] });
	if ($current_line =~ /^<coord>(.*\n.*)$/) {
		$last_coord = $1;
		push(@stream_buffer, $current_line);
	} elsif ($current_line =~ /^<align>(.*\n.*)$/) {
		my $this_align = $1;
		if ($last_coord) {
			decrease_coord_count($this_align);
			unify_coord($this_align, $last_coord);
		} else {
			push(@stream_buffer, $current_line);
		}
	} elsif ($prefix_waiting &&
			$current_line =~ /^<(text|no)>/) {
		push(@new_stream, $current_line);
		if (not ($current_line =~ /[a-zA-Z0-9]$/)) {
			$prefix_waiting = 0;
		}
	} elsif ($prefix_waiting && 
			$current_line =~ /^([a-zA-Z0-9]+)/) {
		$suffix = $1;
		push(@new_stream, $suffix);
		$current_line =~ s/^$suffix//;
		unless ($current_line =~ /^$/) {
			unshift(@{ $stream[$stream_i] }, $current_line);
		}
	} else {
		if ($current_line =~ /[^\s]/) {
			$last_coord = "";
		}
		push(@stream_buffer, $current_line);
		push(@new_stream, @stream_buffer);
		@stream_buffer = ();
		if ( (text_type_of($stream_i) eq "al" ||
			text_type_of($stream_i) eq "tr") &&
		      $current_line =~ /[a-zA-Z0-9]$/ &&
		      $output_mode eq "html") {
			$prefix_waiting = 1;
		} else {
			$prefix_waiting = 0;
		}
	} 
}
	push(@new_stream, @stream_buffer);
	@{ $stream[$stream_i] } = @new_stream;

}

#############################################
# Filtering of streams. 

# Pieces are cut out of streams according to the 
# commands in $name.
# In addition, we mark dereferenced positions if
# breaks may occur there.

%break_pos = (); # maps dereferenced positions to boolean 
		 # indicating whether break preferable.

for ($stream_i = 0; $stream_i <= $max_stream_nr; $stream_i++) {
	my @new_stream = ();
	my $filter_on = "";

while (@{ $stream[$stream_i] }) {
	my $current_line = shift(@{ $stream[$stream_i] });
	if ($current_line =~ /^<coord>((.*)\n(.*))$/) {
		my $coord = $1;
		my $version = $2;
		my $pos = $3;
		my $descr = "$stream_i\n$pos";
		if ($stop_stream{$descr}) { $filter_on = "yes" }
		elsif ($start_stream{$descr}) { $filter_on = "" }
		$coord = coord_deref($coord);
		if ($break_version{$version}) {
			$break_pos{$coord} = 1
		} elsif ($pos =~ /^$dummy_prefix/ && $break_version{"\@anon"}) {
			$break_pos{$coord} = 1
		}
	}
	if ($filter_on) {
		if ($current_line =~ /^(?:<coord>|<align>)(.*\n.*)$/) {
			my $this_coord = $1;
			decrease_coord_count($this_coord);
		}
	} else {
		push(@new_stream, $current_line);
	}
}

@{ $stream[$stream_i] } = @new_stream;
}

#############################################

# For version and page, we maintain the first and
# last positions that have occurred of that version on that
# page. We use hash %version_set as a set of versions.

my %version_hash = ();
my %first_occ = ();
my %last_occ = ();

sub register_occ {
        my ($page, $version, $pos) = @_;
	$version_set{$version} = 1;
        unless (defined($first_occ{"$page\n$version"})) {
                $first_occ{"$page\n$version"} = $pos;
        }
        $last_occ{"$page\n$version"} = $pos;
}

#############################################
# Scheduling of aligned rows in tables.
# An estimate is made of the sizes of the texts in the
# different streams. Based on coord and align, the maximal
# positions are taken for each such entry in one of the
# streams, and columns in a table start simultaneously for
# such aligned positions. This procedure is repeated for
# several pages and several tables within a page.

# We fill the table breadth-first, in one leap until a minimum width
# is reached, then extending the width one position at a time, until
# the maximum width.
# After the minimum width was reached, if we find a coordinate in
# the first stream, then we discontinue to fill the present table.
# The objective is to limit the number of line breaks not 
# coinciding with coordinates.

# Constants:
# my $max_rows_on_page = 50;	# Sum of rows of tables (incl. footnotes)
# Temporarily while studying Peasant on the AEL List:
my $max_rows_on_page = 30;	# Sum of rows of tables (incl. footnotes)
my $max_row_size = 120; 	# Width of tables
my $min_row_size = 50; 		# If bigger, we may stop filling a table.

my $temp_row_size;		# Is given values between min and max value.

my $current_page = 1;
my $rows_on_page;
my $rows_in_table;

my $stream_reduced;	# boolean indicating no dead-lock

open_page();
$rows_on_page = 0;
for(;;) {
	$stream_reduced = 0;
	$rows_in_table = 0;
	fill_one_table();
	compute_rows_in_table();
	if ($stream_reduced == 0) {
		if (nonempty_streams()) {
			report_dead_lock();
		}
		last;
	} elsif ($rows_in_table == 0) {
		;
	} elsif ($rows_on_page + $rows_in_table <= $max_rows_on_page) {
		$rows_on_page += $rows_in_table;
		write_table();
	} else { 	
		close_page();
		$current_page++;	
		open_page();
		$rows_on_page = $rows_in_table;
		write_table();
	}
}
close_page();
correct_last_page();

sub open_page {
	my $prev = "";
	my $next;
	my $index = "<a href=\"$file_name"."0.html\">index</a>";
	if ($current_page > 1) {
		$prev = $current_page - 1;
		$prev = "<a href=\"$file_name$prev.html#end\">" .
			"previous page ($prev)</a>";
	}
	$next = $current_page + 1;
	$next = "<a href=\"$file_name$next.html\">" .
			"next page ($next)</a>";
        open(PAGE, ">$out_dir/$file_name$current_page.tmp") ||
		error_die("Could not open file, does directory $out_dir exist?\n");
	print PAGE start_html("$text_name ($current_page)"),
		"\n<center><h1>$text_name ($current_page)</h1></center>\n",
		"<div name=\"start\" align=\"center\">\n&nbsp;",
		"$prev&nbsp;&nbsp;&nbsp;$index&nbsp;&nbsp;&nbsp;",
		"$next&nbsp;</div>\n\n";
}

sub close_page {
	my $prev = "";
        my $next;
        my $index = "<a href=\"$file_name"."0.html\">index</a>";
        if ($current_page > 1) {
                $prev = $current_page - 1;
                $prev = "<a href=\"$file_name$prev.html\">" .
                        "previous page ($prev)</a>";
        }
        $next = $current_page + 1;
        $next = "<a href=\"$file_name$next.html#start\">" .
                        "next page ($next)</a>";
	print PAGE "\n<div name=\"end\" align=\"center\">\n&nbsp;",
                "$prev&nbsp;&nbsp;&nbsp;$index&nbsp;&nbsp;&nbsp;",
                "$next&nbsp;</div>\n",
		"<center><h1>$text_name ($current_page)</h1></center>\n",
		end_html, "\n";
	close(PAGE);
}

# On the last page there are references to the next, nonexistent page.
# These are removed:

sub correct_last_page {
	system("mv $out_dir/$file_name$current_page.tmp $out_dir/$file_name.tmp");
	open(OLD, "$out_dir/$file_name.tmp");
	open(PAGE, ">$out_dir/$file_name$current_page.tmp");
	$next = $current_page + 1;
	while (<OLD>) {
		s/<a href="$file_name$next\.html.*next page \($next\)<\/a>//;
		print PAGE $_;
	}
	close(PAGE);
	close(OLD);
	system("rm $out_dir/$file_name.tmp");
}

# The positions from coord or align entries are pending when not
# all references to them have been found in the streams so far.
# If all of them have been found, the positions are
# cleared, and the maximal column address is used for printing the
# following material in the respective columns.

        # positions that are cleared to be processed. Value nonzero if yes.
        my %pos_cleared;

        # positions pending. The value is the number of seen occurrences.
        my %pos_pending;

        # maximal column address of pending or cleared positions;
        my %pos_column;

# One table, with rows for different streams, is filled.
	
sub fill_one_table {
	%pos_cleared = ();
	%pos_column = ();
	make_table_empty();
	$temp_row_size = $min_row_size;
	while ($temp_row_size <= $max_row_size) {
	do { 
	     %pos_pending = ();
	     for ($stream_i = 0; $stream_i <= $max_stream_nr; $stream_i++) {
		read_row_till_blocked($stream_i);
	     }
	} until (no_clearable_pos());
	$temp_row_size++;
	}
}

# The variables for filling one table.

	# The parts of the streams for the current table:
	my @row_stream;

	# The length of the rows so far:
	my @row_size;

	# Which of the rows contain non-empty information:
	my @row_nonempty;

	# At which positions of the table does a column have to start:
	my @column_start;

sub make_table_empty {
	@row_stream = ();	
	@row_size = ();	
	@row_nonempty = ();	
	@column_start = ();	
}

# We clear a position if nr of seen occurrences so far equals
# the total nr of occurrences. If a cleared coordinate is seen that
# implies that further filling of the table should be discontinued,
# we set the $temp_row_size to a high enough value.

sub no_clearable_pos {
	my $first = get_first_coord();
	my $no_new_ones = 1;
	my $stop_table = 0;
	while (($pos,$nr_occs) = each %pos_pending) {
		if ($nr_occs == $coord_ref_count{$pos}) {
			$pos_cleared{$pos} = 1;
			$no_new_ones = 0;
			if ($min_row_size < $temp_row_size &&
					breakable_pos($pos, $first)) {
				$temp_row_size = $max_row_size;
				$stop_table = 1;
			}
		}
	}
	return ($no_new_ones || $stop_table);
}

# A break is preferable at position if:
# 1) no break lines occur at all in the configuration file and position occurs
# in first non-empty stream, or
# 2) the version occurs in the configuration file.

sub breakable_pos {
	my ($pos,$first) = @_;
	if (%break_version) {
		if ($break_pos{$pos}) { return 1 }
	} else {
		if ($pos eq $first) { return 1 }
	}
	return 0
}

sub nonempty_streams {
	for ($stream_i = 0; $stream_i <= $max_stream_nr; $stream_i++) {
		if (@{ $stream[$stream_i] }) {
			return 1;
		}
	}
	return 0
}

# Dead-lock may arise if there is a circular dependency between
# coords or aligns.

sub report_dead_lock {
	print_error("Dead-lock due to circular dependencies, " .
			"involving one or more from:\n");
	for ($stream_i = 0; $stream_i <= $max_stream_nr; $stream_i++) {
		if (@{ $stream[$stream_i] }) {
			my $line = shift(@{ $stream[$stream_i] });
			if ($line =~ /^(?:<coord>|<align>)(.*)\n(.*)$/) {
				my $this_version = $1;
				my $this_pos = $2;
				print_error("\t$this_version $this_pos\n");
			}
		}
	}
	exit(1);
}

# Count non-empty row in addition to footnotes counted before.

sub compute_rows_in_table {
	for ($stream_i = 0; $stream_i <= $max_stream_nr; $stream_i++) {
		if ($row_nonempty[$stream_i]) { 
			$rows_in_table++ 
		}
	}
}

# Get the coordinate in the first non-empty stream.

sub get_first_coord {
	for ($stream_i = 0; $stream_i <= $max_stream_nr; $stream_i++) {
		my $line = ${ $stream[$stream_i] }[0];
		if (not($row_nonempty[$stream_i]) &&
			@{ $stream[$stream_i] }) {
			;
		} elsif ($line =~ /^<coord>(.*\n.*)$/) {
			my $this_coord = $1;
        		$this_coord = coord_deref($this_coord);
			return $this_coord;
		} else { 		
			return "";
		}
	}
}

# A row is read from a stream, until:
# 1) A coordinate is encountered that is not cleared.
# 2) The stream is exhausted.
# 3) The row is full.
# For brackets formed by e.g. <al> and </al> we have to be careful
# when breaking up lines. These may need to be duplicated.

sub read_row_till_blocked {
	my ($stream_i) = @_;
	my @pending_brackets = ();
	while (@{ $stream[$stream_i] }) {
		my $line = shift(@{ $stream[$stream_i] });

#### BEGIN IF
if ($line =~ /^(<coord>|<align>)(.*\n.*)$/) {
	my $this_coord = $2;
	$this_coord = coord_deref($this_coord);
	if ($pos_cleared{$this_coord}) {
		$row_size[$stream_i] = $pos_column{$this_coord};
		# approximation of size should be constant to avoid deadlock:
		if ($row_size[$stream_i] + 5 <= $temp_row_size) {
			$column_start[$row_size[$stream_i]] = 1;
			$row_size[$stream_i] += size_of($line);
			push(@{ $row_stream[$stream_i] }, $line);
			$stream_reduced = 1;
		} else {
			unshift(@{ $stream[$stream_i] }, $line);
			return;
		}
	} else {
		$pos_column{$this_coord} = max( $pos_column{$this_coord},
						$row_size[$stream_i] );
		$pos_pending{$this_coord}++;
		unshift(@{ $stream[$stream_i] }, $line);
		return;
	}
} elsif ($line =~ /^<text>/) { 
	# text should preferably NOT be deferred to next line:
	if ($row_size[$stream_i] <= $temp_row_size) {
		$row_size[$stream_i] += size_of($line);
		push(@{ $row_stream[$stream_i] }, $line);
                $stream_reduced = 1;
		if ($line =~ /^<text>\s*[^\s]/) {
			$row_nonempty[$stream_i] = 1;
		}
	} else {
		unshift(@{ $stream[$stream_i] }, $line);
                return;
	}
} elsif ($line =~ /^<lx>/) {
        if ($row_size[$stream_i] + size_of($line) <= $temp_row_size) {
                $row_size[$stream_i] += size_of($line);
                push(@{ $row_stream[$stream_i] }, $line);
                $stream_reduced = 1;
		$row_nonempty[$stream_i] = 1;
        } else {
                unshift(@{ $stream[$stream_i] }, $line);
                return;
        }
} elsif ($line =~ /^<no>/) {
	$row_size[$stream_i] += size_of($line);
	push(@{ $row_stream[$stream_i] }, $line);
	$stream_reduced = 1;
	$row_nonempty[$stream_i] = 1;
	$rows_in_table++;
} elsif ($line =~ /^<al>/) {
	push(@{ $row_stream[$stream_i] }, $line);
	$stream_reduced = 1;
	push(@pending_brackets, "al");
} elsif ($line =~ /^<\/al>/) {
	push(@{ $row_stream[$stream_i] }, $line);
	$stream_reduced = 1;
	pop(@pending_brackets);
# At the moment unnecessary:
# } elsif ($line =~ /^<hi>/) {
#	push(@{ $row_stream[$stream_i] }, $line);
#	$stream_reduced = 1;
#	push(@pending_brackets, "hi");
# } elsif ($line =~ /^<\/hi>/) {
#	push(@{ $row_stream[$stream_i] }, $line);
#	$stream_reduced = 1;
#	pop(@pending_brackets);
} elsif (text_type_of($stream_i) eq "al" ||
		text_type_of($stream_i) eq "tr") {
        if ($row_size[$stream_i] + size_of($line) <= $temp_row_size) {
		$row_size[$stream_i] += size_of($line);
                push(@{ $row_stream[$stream_i] }, $line);
                $stream_reduced = 1;
                if ($line =~ /[^\s]/) {
                        $row_nonempty[$stream_i] = 1;
                }
        } else {
                my ($pref,$suf) =
                        break_up($line, $temp_row_size-$row_size[$stream_i]);
                if ($pref ne "") {
                        unshift(@{ $stream[$stream_i] }, $suf);
                        unshift(@{ $stream[$stream_i] }, $pref);
                } else {
                        unshift(@{ $stream[$stream_i] }, $line);
			my @pending_brackets2 = @pending_brackets;
			while (@pending_brackets) {
				my $bracket = pop(@pending_brackets);
				push(@{ $row_stream[$stream_i] }, "<\/$bracket>");
			}
			while (@pending_brackets2) {
                                my $bracket = shift(@pending_brackets2);
                                unshift(@{ $stream[$stream_i] }, "<$bracket>");
                        }
                        return;
                }
        }
} else { # text_type_of($stream_i) = "hi"
	if ($row_size[$stream_i] + size_of($line) <= $temp_row_size) {
		$row_size[$stream_i] += size_of($line);
		push(@{ $row_stream[$stream_i] }, $line);
                $stream_reduced = 1;
		if ($line =~ /[^\s]/) {
			$row_nonempty[$stream_i] = 1;
		}
	} else {
		my ($pref,$suf) = 
			break_up_hi($line, $temp_row_size-$row_size[$stream_i]);
		if ($pref ne "") {
			unshift(@{ $stream[$stream_i] }, $suf);
			unshift(@{ $stream[$stream_i] }, $pref);
		} else {
			unshift(@{ $stream[$stream_i] }, $line);
                        return;
		}
	}
}
#### END IF

	}
}

# Translations or transliterations can be split at any space symbol.
# If the line is ridiculously long, break up anywhere.

sub break_up {
        my ($line, $position) = @_;
        $position -= 1;
        if ($line =~ /^(.{0,$position}\s)(.*)$/s) {
                my $pref = $1;  
                my $suf = $2; 
                return ($pref, $suf);
	} elsif (length($line) > $max_row_size &&
			not substr($line, 0, $max_row_size) =~ /\s/) {
		return (substr($line, 0, $max_row_size),
				substr($line, $max_row_size));
        } else {
                return ("", "");
        }
}

# Hieroglyphic texts can be split at - symbol.

sub break_up_hi {
	my ($line, $position) = @_;
	$position -= 1;
	if ($line =~ /^(.{0,$position}-)(.*)$/s) {
		my $pref = $1;	
		my $suf = $2;	
		return ($pref, $suf);
        } elsif (length($line) > $max_row_size &&
                        not substr($line, 0, $max_row_size) =~ /-/) {
		print_error(
			"Dead-lock due to long unbreakable line:\n$line\n");
		exit(1);
	} else {
		return ("", "");
	}
}

# Computes the size of strings, expressed in number of characters,
# as they would be printed.
# A note "<no>..." does not take up space in the line itself
# (except a number), but is printed below the table.

sub size_of {
	my ($str) = @_;
	if ($str =~ /^<coord>.*\n(.*)$/) {
		my $str = $1;
                if ($str =~ /^@/) {
                        return 0;
                } else {
                        return length($str);
                }
	} elsif ($str =~ /^<align>/) {
		return 0;
	} elsif ($str =~ /^<no>/) { 
		return 1;
	} elsif ($str =~ /^<lx>/) {
		my $len = 0;
		foreach $key (@lex_keys) {
			if ($str =~ /$key\t(.*)\n/) {
				my $val = $1;
				$len = max($len, length($key));
				$len = max($len, length($val)+2);
			}
		}
		return $len;
	} else {
		$str =~ s/^<text>//;	
		$str =~ s/<\/?hi>//g;	
		$str =~ s/<\/?al>//g;	
		$str =~ s/<\/?tr>/"/g;	
		$str =~ s/\s+/ /g;	
		return length($str);
	}
}

# Max function 

sub max {
        my ($i1,$i2) = @_;
	if ($i1 < $i2) { $i2 } else { $i1 }
}

#####################3
# Writing a table consisting of rows.

# Footnotes:

@notes;
$note_nr;

sub write_table {
	@notes = ();
	$note_nr = 0;
	my $columns = nr_columns() + 1;
	print PAGE "<TABLE border=\"0\" cellpadding=\"0\">\n";
	for ($stream_i = 0; $stream_i <= $max_stream_nr; $stream_i++) {
		if ($row_nonempty[$stream_i]) {
			my $id = $stream_name{$stream_i};
			$id =~ /^(.*)\n(.*)\n..$/;
			my $resource_nr = $1;
			my $current_version = $2;
			my $resource_name = $resource_names[$resource_nr];
			if ($current_version) {
				$resource_name .= "($current_version)";
			} 
			print PAGE "<tr><td colspan=\"$columns\"><hr></td>\n",
					"<tr><th nowrap=\"yes\" ",
					"align=\"right\">$resource_name</th>\n";
			write_row($stream_i);
		}
	}
	print PAGE "<tr><td colspan=\"$columns\"><hr></td>\n</TABLE>\n";
	write_notes();
}

# Nr of locations where columns start (ignore location 0).

sub nr_columns {
	my $columns = 1;
	for ($i = 1; $i <= $#column_start; $i++) {
		if ($column_start[$i] == 1) {
			$columns++;
		}
	}
	return $columns;
}

sub write_row {
	my ($stream_i) = @_;
	my $type = text_type_of($stream_i);
	my $txt_start = text_start(text_type_of($stream_i));
	my $txt_end = text_end(text_type_of($stream_i));
	my $txt_interrupt_start = text_interrupt_start($type);
	my $txt_interrupt_end = text_interrupt_end($type);
	my $col_start = 0;
	my $col_text = "";
	while (@{ $row_stream[$stream_i] }) {
		my $line = shift(@{ $row_stream[$stream_i] });
		
#### BEGIN IF
if ($line =~ /^(?:<coord>|<align>)((.*)\n(.*))$/) {
        my $this_coord = $1;
        my $this_version = $2;
        my $this_pos = $3;
	$this_coord = coord_deref($this_coord);
	$new_col = $pos_column{$this_coord};
	if ($col_text ne "" || $new_col > $col_start) {
		my $columns = 1;
		for ($i = $col_start + 1; $i < $new_col; $i++) {
                	if ($column_start[$i] == 1) {
                        	$columns++;
			}
                }
		$col_text = fix_xml($col_text, $type);
		$col_text = 
			omit_empty_brackets("$txt_start$col_text$txt_end", $type);
		print PAGE 
			"<td colspan=\"$columns\" nowrap=\"yes\">",
			"$col_text</td>\n";
		$col_start = $new_col;
	}
	if ($line =~ /^<coord>/ && $this_pos !~ /^@/ && $this_pos ne "") {
		$col_text = "$txt_interrupt_start<sup><font color=\"blue\">" .
				"($this_pos)" .
				"</font></sup>$txt_interrupt_end";
		register_occ($current_page, $this_version, $this_pos);
	} else {
		$col_text = "";
	}
} elsif ($line =~ /^<text>(.*)/s) {
	my $in_line = $1;
	$col_text .= "$txt_interrupt_start$in_line$txt_interrupt_end";
} elsif ($line =~ /^<lx>(.*)/s) {
	my $in_line = $1;
        $col_text .= lex_data($in_line);
} elsif ($line =~ /^<no>(.*)/s) {
	my $note = $1;
	$note = no_to_html($note);
	$note_nr++;
	$note_mark = "<sup><font color=\"green\">$note_nr</font></sup>";
	$col_text .= "$txt_interrupt_start$note_mark$txt_interrupt_end";
	$note = "$note_mark $note";
	push(@notes, $note);
} elsif ($line =~ /^<al>/) {
	$col_text .= "<i>";
} elsif ($line =~ /^<\/al>/) {
	$col_text .= "</i>";
} else { 
	$col_text .= $line;
}
#### END IF
	
	}
	if ($col_text ne "") { 
		my $columns = 1;
		for ($i = $col_start + 1; $i <= $#column_start; $i++) {
        		if ($column_start[$i] == 1) {
				$columns++;
			}
		}
		$col_text = fix_xml($col_text, $type);
		$col_text = 
			omit_empty_brackets("$txt_start$col_text$txt_end", $type);
		print PAGE 
			"<td colspan=\"$columns\" nowrap=\"yes\">",
			"$col_text</td>\n";
	}
}

sub write_notes {
	foreach $note (@notes) {
		print PAGE "$note\n<br>";
	}
	print PAGE "<p>&nbsp;<p>\n\n";
}

sub no_to_html {
	my ($str) = @_;
	$str =~ s/<al>/<i>/g;
	$str =~ s/<\/al>/<\/i>/g;
	$str =~ s/<tr>/"/g;
	$str =~ s/<\/tr>/"/g;
	$str =~ s/<hi>/\n+H\n/g;
	$str =~ s/<\/hi>/\n-H\n/g;
	return $str;
}

sub text_start {
        my ($type) = @_;
        if ($type eq "al") {
                return "<i>"
        } elsif ($type eq "hi") {
                return "<TABLE border=\"0\" cellpadding=\"0\"><TR><TD>\n+H\n"
        } elsif ($type eq "lx") {
                return "<TABLE border=\"0\" cellpadding=\"0\"><TR>\n"
        } else {
                return ""
        }
}

sub text_end {
        my ($type) = @_;
        if ($type eq "al") {
                return "</i>"
        } elsif ($type eq "hi") {
                return "\n-H\n</TD></TR></TABLE>"
        } elsif ($type eq "lx") {
                return "</TABLE>"
        } else {
                return ""
        }
}

sub text_interrupt_end {
        my ($type) = @_;
        if ($type eq "al") {
                return "<i>"
        } elsif ($type eq "hi") {
                return "</TD><TD>\n+H\n"
        } elsif ($type eq "lx") {
		"</TD>\n"
        } else {
                return ""
        }
}

sub text_interrupt_start {
        my ($type) = @_;
        if ($type eq "al") {
                return "</i>"
        } elsif ($type eq "hi") {
                return "\n-H\n</TD><TD>"
	} elsif ($type eq "lx") {
                "<TD>"
        } else {
                return ""
        }
}

sub lex_data {
	my ($data) = @_;
	my $txt = "<TD><TABLE border=\"1\" cellpadding=\"0\">\n";
	my $prev_key = "";
	foreach $key (@lex_keys) {
		if ($data =~ /$key\t(.*)\n/) {
			my $val = $1;
			my $key_stripped = $key;
			$key_stripped =~ s/(al|hi|tr|form)$//;
			my $pre = "&nbsp;&nbsp;";
			my $post = "";
			if ($key =~ /hi$/) {
				$pre = "\n+H\n";
				$post = "\n-H\n";
			} elsif ($key =~ /al$/) {
				$pre .= "<i>";
                                $post = "</i>";
                        } elsif ($key =~ /form$/) {
                                $pre .= "<b>";
                                $post = "</b>";
			}
			my $entry_start = 
				"<TR><TD><font color=\"blue\">" .
                                "$key_stripped</font>";
			if ($key_stripped eq $prev_key) {
				$entry_start = ""
			}
			$txt .= "$entry_start<BR>$pre$val$post\n";
			$prev_key = $key_stripped;
		}
	}
	$txt .= "</TABLE></TD>\n";
	return $txt;
}

# Brackets without content may arise in the construction of a column. 
# These are removed.

sub omit_empty_brackets {
	my ($text, $type) = @_;
	if ($type eq "al") {
		$text =~ s/<i>\s*<\/i>//gs;
	} elsif ($type eq "hi") {
		$text =~ s/\+H\n\s*\n-H\n//gs;
		$text =~ s/<TD>\s*<\/TD>//gs;
	}
	$text =~ s/\n\s*\n/\n/gs;
	return $text;
}

# Remove features not yet implemented in the hieroglyphic and
# correct XML encodings.

sub fix_xml {
	my ($text, $type) = @_;
        if ($type eq "hi") {
		$text =~ s/#[be0-9]*//g;
		$text =~ s/-\s*\n([^-])/-\1/g;
		$text =~ s/\&amp;/\&/g;
	}
	return $text;
}
	
###############################
# Index page.

# A page is written that consists of file $index_header followed by an
# an index of the first and last positions for a version.

open_index_page();
make_index();
report_resources();
close_index_page();

sub open_index_page {
	open(PAGE, ">$out_dir/$file_name"."0.html") ||
		error_die("Could not open file, does directory $out_dir exist?\n");
	print PAGE start_html("$text_name (Index)"),
		"\n\n";
	close(PAGE);
	if ($index_header) {
		system("cat $in_dir$index_header >> $out_dir/$file_name"."0.html");
	}
}

sub close_index_page {
	print PAGE end_html;
	close(PAGE);
	system("chmod go+r $out_dir/$file_name"."0.html");
}

sub make_index {
	open(PAGE, ">>$out_dir/$file_name"."0.html") ||
                error_die("Could not open file, does directory $out_dir exist?\n");
	print PAGE "<p><HR>\n<center><h2>Index:</h2></center>\n",
		"<TABLE border align=\"center\">\n",
		"\n<TR><TH></TH>\n";
	foreach $version_name (keys %version_set) {
		print PAGE "<TH>$version_name</TH>\n";
	}
	for ($page_nr = 1; $page_nr <= $current_page; $page_nr++) {
		print PAGE "<TR><TH><a href =\"$file_name$page_nr.html\">",
			"page $page_nr</a></TH>";
		foreach $version_name (keys %version_set) {
			my $fst = $first_occ{"$page_nr\n$version_name"};
			my $lst = $last_occ{"$page_nr\n$version_name"};
			if ($fst eq $lst) {
			print PAGE "<TD align=\"cente\">$fst</TH>\n";
			} else {
			print PAGE "<TD align=\"cente\">$fst - $lst</TH>\n";
			}
		}
	}
	print PAGE "</TABLE>\n\n<p>";
}

sub report_resources {
	print PAGE "<h2>Incorporated resources:</h2>\n";
	for ($i = 0; $i <= $resource_nr; $i++) {
		my $name = $resource_names[$i];
		my $create = $resource_creates[$i];
		my $url = $resource_urls[$i];
		if ($url) {
			$url = "(<a type=\"text/plain\" href=\"$url\">$url</a>)";
		}
		$descr = $resource_descrs[$i];
		print PAGE "<HR><h3>$name $url</h3>",
			"<blockquote>$create</blockquote>$descr";
	};
	print PAGE "<HR>\n";
}

#########################################################
# Translation of the hieroglyphic code into pictures.

my $mdc2html_dir = "/home/cl-home/nederhof/public_html/AEL";
my $mdc2html = "$mdc2html_dir/mdc2html.pl CONF=$mdc2html_dir/mdc2html.conf " .
	"IMG_DIR=\"../hgifs/\" REMOTE=Y";

for ($page_nr = 1; $page_nr <= $current_page; $page_nr++) {
	system("$mdc2html $out_dir/$file_name$page_nr.tmp > $out_dir/$file_name$page_nr.html");
	system("rm $out_dir/$file_name$page_nr.tmp");
	system("chmod go+r $out_dir/$file_name$page_nr.html");
}

#############################################################
# Error handling.

sub print_error {
my($msg) = @_;
	print $msg;
}

sub error_die {
my($msg) = @_;
        print $msg;
	exit(1);
}