#!/usr/bin/perl
# A file containing annotated key to exercises is translated to html files.

# The file $name.ex contains lines starting with %ex and %nr that divide
# the file into sections and subdivide into subsections, resp. 
# One big file is made for all exercises, and several smaller files for
# individual "ex" sections. The tags %ex and %nr should be followed
# by the names of sections and subsections. A subsection can
# consist of several exercises.

# For each exercise, we can have lines starting with 
# %hi hieroglyphs
# %al (alphabetic) for transliteration
# %tr for translation
# %no for a note
# %qu for a question to the reader
# %cf for similar sentences in other publications
# %bi for bibliographic references concerning the full text
# For each exercise there should be at most one entry for 
# each one of the above.
# For multi-line entries for one of the above, the second and following
# lines should not be prefixed by a "%"-tag. The first empty line
# or line starting with % marks the end of the entry.
# Use <p> for dividing the entry into paragraphs.
# A line starting with two % is ignored; this can be used for comments.

# Certain codes in the entries are replaced by hyperlinks.
# For this, the existence of a file "labels" is assumed.

# A file $name.0 is the first half of a new file $name.html
# containing the root of the generated html files.

use CGI qw(:standard);

$name = $ARGV[0]; # identifies file names.
$acronym = $ARGV[1]; # acronym of publication.
$command = $ARGV[2]; # possible command for special treatment.

# Variables for comparing transliterations:

# Symbols that are interchangeable.
%near = ("d","D",
	"t","T",
	"i","j",
	"y","j",
        "=",".",
        "-"," ",
        "s","z",
	"mA","Am");

# symbols with low delete value.
%delete = (".",0,
        "^",0,
        "(",0,
        ")",0,
        ",",0,
        "=",0.3,
        "w",0.3,
        "i",0.2,
        "j",0.2,
        "y",0.2);

# Mapping from symbols in transliteration to Latex commands.
%latex = ("A","\\alp ",
	"^A","\\alp ",
	"^i","I",
	"^j","J",
	"^y","Y",
	"a","\\ayin ",
	"^a","\\ayin ",
	"^w","W",
	"^b","B",
	"^p","P",
	"^f","F",
	"^m","M",
	"^n","N",
	"^r","R",
	"^h","H",
	"H","\\hdot ",
	"^H","\\Hdot ",
	"x","\\hbow ",
	"^x","\\Hbow ",
	"X","\\hstroke ",
	"^X","\\Hstroke ",
	"^s","S",
	"S","\\sh ",
	"^S","\\Sh ",
	"q","\\kdot ",
	"^q","\\Kdot ",
	"^k","K",
	"^g","G",
	"^t","T",
	"T","\\tstroke ",
	"^T","\\Tstroke ",
	"^d","D",
	"D","\\dstroke ",
	"^D","\\Dstroke ",
	"-","--");

# A "labels" file is read that maps labels to the output representation
# and a relevant http path. Also files with example sentences are read and
# indexed.

my($ex,$nr,$hi,$al,$tr,$no,$qu,$cf,$bi,$entry,$ex_open,$ex_filled,$nr_tag);

my %example_hi = ();
my %example_al = ();
my %example_tr = ();
my %example_out = ();
my %example_out_tex = ();

my %label_out = ();
my %label_path = ();
open(LAB, "labels") || die "cannot open labels";
while (<LAB>) {
	if (/\s*([^\s]+)\s+([^\s]+)\s+([^\s]+)(\s*| .*)$/) {
		my($lab,$out,$path,$files) = ($1,$2,$3,$4);
		$label_out{$lab} = $out; 
		$label_path{$lab} = $path;
		foreach $file ($files =~ /[^\s]+/g) {
			read_sentences($file,$lab);
		}
	}
};
close(LAB);

# Hyperlink to publication.

my $book = cite_sub($acronym);

open(IN, "$name.ex") || die "cannot open $name.ex";

if ($command eq "search") {
	open(AL, ">/dev/null") || die "cannot open /dev/null";
	open(TEX, ">/dev/null") || die "cannot open /dev/null";
        open(ROOT, ">>/dev/null") || die "cannot open /dev/null"
} else {
	open(AL, ">$name\_all.html") || die "cannot open $name\_all.html";
	open(TEX, ">$name.tex") || die "cannot open $name.tex";
	open(ROOT, ">>$name.html") || die "cannot open $name.html";
}

print AL # header(),
	start_html("Key to $name"),
	h1("Key to the exercises from $book"), "\n\n";

print TEX <<ENDTEX;
\\documentstyle[11pt]{article}
\\setlength{\\textwidth}{16cm}
\\setlength{\\textheight}{24cm}
\\setlength{\\oddsidemargin}{0cm}
\\setlength{\\topmargin}{-1.3cm}
\\sloppy
\\frenchspacing
\\newfont{\\cyr}{wncyr10 scaled 800}
\\newcommand{\\alp}{%
\\hspace{0.15ex}{\\cyr Z}\\hspace{-0.15ex}}
\\newcommand{\\ayin}{%
{\\hspace{.18ex}\\raisebox{0.6ex}{\\scriptsize c}}\\hspace{-.15ex}}
\\newcommand{\\hdot}{%
\\d{h}}
\\newcommand{\\Hdot}{%
\\d{H}}
\\newcommand{\\hbow}{%
\\hspace{-0.35ex}\\parbox[b]{1.6ex}{\\raisebox{-2.9ex}[0pt][0pt]{%
\$\\stackrel{\\mbox{\\it h}}%
{\\mbox{\\raisebox{1ex}[2.5ex][0ex]{\\u{\\ }}}}\$}}}
\\newcommand{\\Hbow}{%
\\hspace{-0.3ex}\\parbox[b]{1.8ex}{\\raisebox{-2.9ex}[0pt][0pt]{%
\$\\stackrel{\\mbox{\\it H}}%
{\\mbox{\\raisebox{1ex}[2.5ex][0ex]{\\u{\\ }}}}\$}}}
\\newcommand{\\hstroke}{%
\\b{h}}
\\newcommand{\\Hstroke}{%
\\b{H}}
\\newcommand{\\sh}{%
\\u{s}}
\\newcommand{\\Sh}{%
\\u{S}}
\\newcommand{\\kdot}{%
\\d{k}}
\\newcommand{\\Kdot}{%
\\d{K}}
\\newcommand{\\tstroke}{%
\\b{t}}
\\newcommand{\\Tstroke}{%
\\b{T}}
\\newcommand{\\dstroke}{%
\\b{d}}
\\newcommand{\\Dstroke}{%
\\b{D}}
\\begin{document}
ENDTEX

unless ($command eq "search") {
	system("cp $name.0 $name.html");
	close(TEX);
	system("cat $name.tex0 >> $name.tex");
	open(TEX, ">>$name.tex") || die "cannot open $name.tex";
	print TEX "\\vspace{3ex}\n";
}

$hi = "";
$al = "";
$tr = "";
$no = "";
$qu = "";
$cf = "";
$bi = "";

$entry = "";
$ex_open = "";

while (<IN>) {
	if (/^%%/) {
	}
        elsif (/^%ex(\s+.*)$/) {
                $ex = $1;
		finish_item();
		if ($ex_open) {close_ex()} 
			else {$ex_open = "true"}
		start_ex();
        }
        elsif (/^%nr(\s+.*)$/) {
		$nr = $1;
		$nr_tag = clear_spaces($nr);
		unless ($ex_open) {die "%nr before any %ex entry:\n$_"};
                finish_item();
		start_nr();
        }
	elsif (/^%hi(\s+.*)$/) {
                if ($hi) {die "second %hi entry:\n$_"};
                $hi = "$1\n";
                $entry = "hi";
        }
        elsif (/^%al(\s+.*)$/) {
		if ($al) {die "second %al entry:\n$_"};
                $al = "$1\n";               
		$entry = "al";
        }
        elsif (/^%tr(\s+.*)$/) {
                if ($tr) {die "second %tr entry:\n$_"};
                $tr = "$1\n";               
                $entry = "tr";
        }
        elsif (/^%no(\s+.*)$/) {
                if ($no) {die "second %no entry:\n$_"};
                $no = "$1\n";               
                $entry = "no";
        }
        elsif (/^%qu(\s+.*)$/) {
                if ($qu) {die "second %qu entry:\n$_"};
                $qu = "$1\n";               
                $entry = "qu";
        }
	elsif (/^%cf(\s+.*)$/) {
                if ($cf) {die "second %cf entry:\n$_"};
                $cf = "$1\n";
                $entry = "cf";
        }
	elsif (/^%bi(\s+.*)$/) {
                if ($bi) {die "second %bi entry:\n$_"};
                $bi = "$1\n";
                $entry = "bi";
        }
        elsif (/^%/) {
		die "unrecognized entry:\n$_"
	}
        elsif (/^\s*$/) {
                finish_item();
        }
        else { 	if ($entry eq "hi")
			{$hi .= $_}
		elsif ($entry eq "al")
			{$al .= $_}
		elsif ($entry eq "tr")
                        {$tr .= $_}
		elsif ($entry eq "no")
                        {$no .= $_}
		elsif ($entry eq "qu")
                        {$qu .= $_}
		elsif ($entry eq "cf")
                        {$cf .= $_}
		elsif ($entry eq "bi")
                        {$bi .= $_}
		else {die "text without %:\n$_"}
	}
};

finish_item();
if ($ex_open) {
	finish_item();
	close_ex()};

print AL end_html;
close(AL);

print ROOT li("<a href=\"$name\_all.html\">All of the above<\/a> on one page"), 
	"\n";

if (open(ERR, "$name.err")) {
	my $err_file = join("",<ERR>);
	close(ERR);
	my $err_html = cite_sub($err_file);
	my $err_tex = no_to_latex($err_file);
	open(ERRH, ">$name\_err.html");
	print ERRH start_html("Errata in $name");
	print ERRH "\n<h1>Errata in $book</h1>\n";
	print ERRH "$err_html\n";
	print ERRH end_html;
	close(ERRH);
	print ROOT 
	  li("A list of <a href=\"$name\_err.html\">errata</a>");
	print TEX "\n\\medskip\\noindent{\\Large Errata}\\medskip\n";
	print TEX "$err_tex\n"
}

print ROOT "</ul>\n\n",
	"This document is also available in a compact ",
	"<a href=\"$name.pdf\">PDF format<\/a>.\n";

print ROOT end_html;
close(ROOT);

print TEX <<ENDTEX;
\\nocite{$acronym}
\\bibliographystyle{alpha}
\\bibliography{eal}
\\end{document}
ENDTEX
close(TEX);
unless ($command eq "search") {
        # system("latex $name > /dev/null");
        # system("bibtex $name");
        # system("latex $name > /dev/null");
        # system("latex $name > /dev/null");
        # system("dvips $name.dvi -o $name.ps")
}

sub finish_item {
if ($hi || $al || $tr || $no || $qu || $cf) {
print TEX "\\item[$nr_tag]\n";
$nr_tag = "";
$hi = clear_spaces($hi);
$al = clear_spaces($al);
$tr = clear_spaces($tr);
$cf = clear_spaces($cf);
if ($hi) {
	print AL b($hi),br(),"\n";
	print EX b($hi),br(),"\n";
	print TEX "{\\bf $hi}\n\\hspace{1ex}\n"
};
if ($al) {
	my $al_tex = al_to_latex($al);
	$al =~ s/<no>/<\/i>/g;
	$al =~ s/<\/no>/<i>/g;
	print AL i($al),br(),"\n";
	print EX i($al),br(),"\n";
	print TEX "$al_tex\n\\hspace{1ex}\n";
	find_matching($ex,$nr,$al,$cf);
};
if ($tr) {
        my $tr_tex = tr_to_latex($tr);
        $tr =~ s/<al>/<i>/g;
        $tr =~ s/<\/al>/<\/i>/g;
	print AL "$tr\n";
	print EX "$tr\n";
	print TEX "$tr_tex\n"
};
if ($no) {
        my $no_tex = no_to_latex($no);
	$no = cite_sub($no);
	print AL blockquote("<b>Note:</b> $no"),"\n";
	print EX blockquote("<b>Note:</b> $no"),"\n";
	print TEX "\\begin{quote}Note:\n$no_tex\n\\end{quote}\n"
};
if ($qu) {
        my $qu_tex = no_to_latex($qu);
	$qu = cite_sub($qu);
	print AL blockquote("<b>QUESTION:</b> $qu"),"\n";
	print EX blockquote("<b>QUESTION:</b> $qu"),"\n";
	print TEX "\\begin{quote}QUESTION:\n$qu_tex\n\\end{quote}\n"
};
if ($cf) {
	print AL "<BLOCKQUOTE><b>Cf:</b><ul>\n";
	print EX "<BLOCKQUOTE><b>Cf:</b><ul>\n";
	print TEX "\\begin{description}\n";
	foreach $ind ($cf =~ /[^\s]+/g) {
		my $other_hi = $example_hi{$ind};
		$other_hi = clear_spaces($other_hi);
		my $other_al = $example_al{$ind};
		$other_al = clear_spaces($other_al);
		my $other_al_tex = al_to_latex($other_al);
		$other_al =~ s/<no>/<\/i>/g;
		$other_al =~ s/<\/no>/<i>/g;
		my $other_tr = $example_tr{$ind};
		my $other_tr_tex = tr_to_latex($other_tr);
        	$other_tr =~ s/<al>/<i>/g;
        	$other_tr =~ s/<\/al>/<\/i>/g;	
		my $out = $example_out{$ind};
		my $out_tex = $example_out_tex{$ind};
		unless ($out) {
			$out = "...reference to be put here shortly...";
			$out_tex = "...reference to be put here shortly...";
			print "Undefined cf: $ind\n"
		};
		print AL li("$out:\n"),br(),"\n";
		print EX li("$out:\n"),br(),"\n";
		print TEX "\\item[$out_tex]\n";
		if ($other_hi) {
        		print AL b($other_hi),br(),"\n";
        		print EX b($other_hi),br(),"\n";
			print TEX "{\\bf $other_hi}\n\\hspace{1ex}\n"
		};
		if ($other_al) {
        		print AL i($other_al),br(),"\n";
        		print EX i($other_al),br(),"\n";
			print TEX "$other_al_tex\n\\hspace{1ex}\n";
		};
		if ($other_tr) {
        		print AL "$other_tr\n";
        		print EX "$other_tr\n";
			print TEX "$other_tr_tex\n"
		};
	};
	print AL "</ul></BLOCKQUOTE>\n";
	print EX "</ul></BLOCKQUOTE>\n";
	print TEX "\\end{description}\n"
};
print AL "<p>\n\n";
print EX "<p>\n\n";

$hi = "";
$al = "";
$tr = "";
$no = "";
$qu = "";
$cf = "";
$bi = "";
};
$entry = "";
$ex_filled = "true"
}

sub start_ex {
$ex = clear_spaces($ex);
my $ex_name = $ex;
$ex_name =~ s/ /_/g;
my $ex_html = $ex;
$ex_html =~ s/\&/\&amp;/g;
$ex_html = cite_sub($ex_html);
# $ex_html =~ s/^par /\&para; /g;
my $ex_tex = html_to_latex($ex_html);

if ($command eq "search") {
	open(EX, ">/dev/null") || die "cannot open /dev/null";
} else {
	open(EX, ">$name\_$ex_name.html") || die "cannot open $name\_$ex.html";
}

print EX start_html("Key to $ex from $name"),
	h1("Key to $ex_html from $book"), "\n\n";
print AL hr(),h1("$ex_html"), "\n", "<ul>\n";
print TEX "\n",
   "\\noindent\n{\\large $ex_tex}\\ \\ \\ ",
   "\\hrulefill\\nopagebreak\n", 
   "\\begin{description}\n";
print EX "<ul>\n";
print ROOT li("<a href=\"$name\_$ex_name.html\">$ex_html<\/a>"),"\n";
$ex_filled = "";
$nr = "";
$nr_tag = $nr;
}

sub close_ex {
unless ($ex_filled) {
	print TEX "\\item[\\ ]\n"
};
print AL "</ul>\n\n";
print TEX "\\end{description}\n";
print EX "</ul>\n\n";
print EX end_html;
close(EX)
}

sub start_nr {
$nr = clear_spaces($nr);
print AL "<li>\n", h2("$nr"), "\n\n";
print EX "<li>\n", h2("$nr"), "\n\n";
}

sub cite_sub {
my($txt) = @_;
$txt = clear_spaces($txt);
foreach $label (keys %label_out) {
	my $out = $label_out{$label};
	my $path = $label_path{$label};
	$txt =~ s/$label/<a href="$path">$out<\/a>/g
};
$txt =~ s/(^|[^a-zA-Z])PAR([^a-zA-Z])/$1&para;$2/g;
$txt =~ s/(^|[^a-zA-Z])PAR$/$1&para;/;
$txt =~ s/(^|[^a-zA-Z])SEC([^a-zA-Z])/$1&sect;$2/g;
$txt =~ s/(^|[^a-zA-Z])SEC$/$1&sect;/;
$txt =~ s/<hi>/<b>/g;
$txt =~ s/<\/hi>/<\/b>/g;
$txt =~ s/<al>/<i>/g;
$txt =~ s/<\/al>/<\/i>/g;
$txt =~ s/<tr>\s*/"/g;
$txt =~ s/\s*<\/tr>/"/g;
return $txt
}

sub clear_spaces {
my($txt) = @_;
$txt =~ s/ +/ /g;
$txt =~ s/^\s*//;
$txt =~ s/\s*$//;
return $txt
}

##########################################
# Files containing sentences are read and put in a hash.

my $sen_file;
my $sen_lab;

sub read_sentences {
($sen_file,$sen_lab) = @_;
if ($sen_file =~ /\.ex$/) {
	read_exercises()
}
elsif ($sen_file =~ /\.txt$/) {
        read_txt()
}
}

# A file with exercises has a similar format as before, except that
# only one entry per number is allowed.

sub read_exercises {
open(SEN, "$sen_file") || die "cannot open $sen_file";
$ex = "";
$nr = "";
$hi = "";
$al = "";
$tr = "";
$entry = "";

while (<SEN>) {
	if (/^%ex(\s+.*)$/) {
                my $new_ex = $1;
		store_exer();
		$ex = clear_spaces($new_ex);
		$nr = "";
        }
        elsif (/^%nr(\s+.*)$/) {
                my $new_nr = $1;
		store_exer();
                $nr = clear_spaces($new_nr)
        }
        elsif (/^%hi(\s+.*)$/) {
                if ($hi) {die "second %hi entry:\n$_"};
                $hi = "$1\n";
                $entry = "hi";
        }
        elsif (/^%al(\s+.*)$/) {
                if ($al) {die "second %al entry:\n$_"};
                $al = "$1\n";
                $entry = "al";
        }
        elsif (/^%tr(\s+.*)$/) {
                if ($tr) {die "second %tr entry:\n$_"};
                $tr = "$1\n";
                $entry = "tr";
        }
        elsif (/^%/) {
		$entry = ""
        }
	elsif (/^\s*$/) {
		$entry = ""
        }
        else {  if ($entry eq "hi")
                        {$hi .= $_}
		elsif ($entry eq "al")
                        {$al .= $_}
                elsif ($entry eq "tr")
                        {$tr .= $_}
        }
};

store_exer();
close(SEN)
}

# As above, but here there are entries %pa for page, and %li for line.

my($pa,$li);

sub read_txt {
open(SEN, "$sen_file") || die "cannot open $sen_file";
$pa = "";
$li = "";
$hi = "";
$al = "";
$tr = "";
$entry = "";

while (<SEN>) {
        if (/^%pa(\s+.*)$/) {
                my $new_pa = $1;
                store_example();
                $pa = clear_spaces($new_pa);
                $li = "";
        }
        elsif (/^%li(\s+.*)$/) {
                my $new_li = $1;
                store_example();
                $li = clear_spaces($new_li)
        }
        elsif (/^%hi(\s+.*)$/) {
                if ($hi) {die "second %hi entry:\n$_"};
                $hi = "$1\n";
                $entry = "hi";
        }
        elsif (/^%al(\s+.*)$/) {
                if ($al) {die "second %al entry:\n$_"};
                $al = "$1\n";
                $entry = "al";
        }
        elsif (/^%tr(\s+.*)$/) {
                if ($tr) {die "second %tr entry:\n$_"};
                $tr = "$1\n";
                $entry = "tr";
        }
        elsif (/^%/) {
                $entry = ""
        }
        elsif (/^\s*$/) {
                $entry = ""
        }
        else {  if ($entry eq "hi")
                        {$hi .= $_}
		elsif ($entry eq "al")
                        {$al .= $_}
                elsif ($entry eq "tr")
                        {$tr .= $_}
        }
};

store_example();
close(SEN)
}

sub store_exer {
my $ex_nr, $nr_nr;
if ($hi || $al || $tr) {
unless ($ex && $nr) {die "no %ex or no %nr"}
if ($ex=~ /([0-9]+)/) {
	$ex_nr = $1
} else {
	$ex_nr = $ex
}
if ($nr=~ /([0-9]+)/) {
        $nr_nr = $1
} else {
        $nr_nr = $nr
}
my $ind = "$sen_lab:$ex_nr:$nr_nr:";
$example_hi{$ind} = $hi;
$example_al{$ind} = $al;
$example_tr{$ind} = $tr;
$example_out{$ind} = cite_sub("$sen_lab, $ex, $nr");
$example_out_tex{$ind} = "\\cite{$sen_lab}, $ex, $nr";

$hi = "";
$al = "";
$tr = "";
$nr = "";
};
$entry = ""
};

sub store_example {
if ($hi || $al || $tr) {
unless ($pa && $li) {die "no %pa or no %li"}
my $ind = "$sen_lab:p$pa:$li:";
$example_hi{$ind} = $hi;
$example_al{$ind} = $al;
$example_tr{$ind} = $tr;
# $example_out{$ind} = cite_sub("$sen_lab, p. $pa [$li]");
$example_out{$ind} = cite_sub("$sen_lab, p. $pa");
# $example_out_tex{$ind} = "\\cite{$sen_lab}, p. $pa ($li)";
$example_out_tex{$ind} = "\\cite{$sen_lab}, p. $pa";

$hi = "";
$al = "";
$tr = "";
$li = "";
};
$entry = ""
};

####################################################################
# We compute the string distance in a special way, that allows for
# overlapping. No costs are associated with deletion/inserting at
# the ends and beginnings of the two strings. However, we constrain
# this process in such a way that comparison of the two strings should
# cover a number of words that is 60 % of the number of words in the
# first string. We therefore initialize the lower boundary and the left
# boundary of a lower left submatrix of the cost matrix accordingly to 0. 
# The total costs are accumulated not only at the upper right corner, but 
# at any position in the upper boundary and the right boundary of the matrix.
# This is slightly asymmetric. The costs should be less than one fifth of the
# number of characters in the first string.

sub find_matching {
unless ($command eq "search") { return };
my ($ex,$nr,$al,$cf) = @_;
my $new_cf = "";
my $out_report = "";
my ($ind, $other_al);
if (split(/( |-)/, $al) < 2) { return };
while (($ind, $other_al) = each %example_al) {
	$other_al = clear_spaces($other_al);
	$other_al =~ s/<no>/<\/i>/g;
	$other_al =~ s/<\/no>/<i>/g;
	my $goodness;
	if ($goodness = string_simm($al, $other_al)) {
		if ($cf =~ /$ind/) {
			$cf =~ s/$ind/ /
		} else {
			$out_report .= "--> $ind\n   $other_al\n";
			$new_cf .= $ind;
		} 
	}
}
unless ($cf =~ /^\s*$/ && $new_cf =~ /^\s*$/) {
	print "$ex $nr:\n   $al\n$out_report";
	unless ($cf =~ /^\s*$/) {
		print "??? $cf\n"
	};
	print "\n" 
}
};

# print string_simm("iw m(w)t m Hr=i mi Abb si mAA
# pr.w=sn </i>(sic!, read<i> =f</i>)<i> iri.n=f rnp.wt aSA.wt
# m nDr.t",
# "jw m(w).t m Hr=j mjn mj Abb z(.j) mAA
# pr.w=sn </i>(sic!, lies<i> =f</i>)<i> jr(j).n=f rnp.wt aSA.wt
# jT(j)(.w) m nDr.t");

sub near_strings {
my ($str1, $str2) = @_;
my ($near1, $near2);
if ($str1 eq $str2) {
	return 1
}
if (defined($near1 = $near{$str1})) {
	return ($near1 eq $str2);
} 
if (defined($near2 = $near{$str2})) {
        return ($near2 eq $str1);
} 
return 0
}

sub delete_price {
my ($str1) = @_;
foreach $str2 (keys %delete) {
        if ($str1 eq $str2) {
                return $delete{$str2}
        }       
}
return 1
}

# cost matrix
my @cost;

# maximal allowed cost
my $max_cost;

# Arrays that for i indicate the lowest and highest j with which the
# cost matrix is defined. 
my (@i_min,@i_max);

sub string_simm {
my ($str1,$str2) = @_;
$str1 =~ s/<\/i>[^<]*<i>//g;
$str2 =~ s/<\/i>[^<]*<i>//g;
$str1 =~ s/^\s*//;
$str2 =~ s/^\s*//;
$str1 =~ s/\s*$//;
$str2 =~ s/\s*$//;
$str1 =~ s/\s+/ /;
$str2 =~ s/\s+/ /;
my $length1 = length($str1);
my $length2 = length($str2);
$max_cost = 0.13 * min($length1,$length2);

my $nr_words1 = split(/[ -]/,$str1);
my $nr_words2 = split(/[ -]/,$str2);
my $not_needed_words1 = int (0.40 * $nr_words1);
my $needed = $nr_words1 - $not_needed_words1;
my $not_needed_words2 = $nr_words2 - $needed;
if ($not_needed_words2 < 0) { return 0 };

my $start1 = index_spaces($str1, $not_needed_words1);
my $start2 = index_spaces($str2, $not_needed_words2);

@cost = ();
@i_min = ();
@i_max = ();
my ($i,$j);
for ($i = 0; $i <= $start1; $i++) {
	$cost[$i][0] = 0;
	$i_min{$i} = 0;
	$i_max{$i} = 0;
};
for ($i = $start1 + 1; $i <= $length1; $i++) {
	$i_min{$i} = 32000;
	$i_max{$i} = -1;
};
for ($j = 0; $j <= $start2; $j++) {
	define_cost(0,$j,0);
};

for ($i = 0; $i <= $length1; $i++) {
	my $subs1 = substr($str1,$i,1);
	my $l1 = length($subs1);
	my $subs1_2 = substr($str1,$i,2);
	my $l1_2 = length($subs1_2);
for ($j = $i_min{$i}; $j <= $i_max{$i}; $j++) {
	my $subs2 = substr($str2,$j,1);
	my $l2 = length($subs2);
	my $subs2_2 = substr($str2,$j,2);
	my $l2_2 = length($subs2_2);

my $old_cost;
if (defined ($old_cost = $cost[$i][$j])) {
	if ($l1 == 1 && $l2 == 1) {
		if (near_strings($subs1, $subs2)) {
			define_cost($i+$l1,$j+$l2, $old_cost) 
		} else {
			define_cost($i+$l1,$j+$l2, $old_cost+1)
		}
	}
	if ($l1 > 0) {
		define_cost($i+$l1,$j, $old_cost+delete_price($subs1))
	}
	if ($l2 > 0) {
		define_cost($i,$j+$l2, $old_cost+delete_price($subs2))
	}
	if ($l1_2 == 2 && $l2_2 == 2 && near_strings($subs1_2,$subs2_2)) {
		define_cost($i+$l1_2,$j+$l2_2, $old_cost)
	}
}

}
}

my $min_cost = 32000;
my $final_cost;
for ($i = 0; $i <= $length1; $i++) {
	if (defined ($final_cost = $cost[$i][$length2])) {
        $min_cost = min($final_cost, $min_cost)
	}
};
for ($j = 0; $j <= $length2; $j++) {
	if (defined ($final_cost = $cost[$length1][$j])) {
        $min_cost = min($final_cost, $min_cost)
	}
};

# For testing;
# for ($j = $length2; $j >= 0; $j--) {
# for ($i = 0; $i <= $length1; $i++) {
# my $this = $cost[$i][$j];
# unless (defined $this) { $this = "-" }
# print "$this ";
# }
# print "\n"};

if ($min_cost <= $max_cost) { $min_cost + 1 } else { 0 }
};

# The cost is defined for an entry in the matrix.
# Only minimal costs lower than $max_cost are kept. For each column, 
# the minimal and maximal values are kept for which an entry is defined.

sub define_cost {
my ($i,$j,$new_cost) = @_;
if ($new_cost > $max_cost) {
	return
};
my $old_cost;
if (defined ($old_cost = $cost[$i][$j])) {
	$cost[$i][$j] = min($old_cost, $new_cost)
} else {
	$cost[$i][$j] = $new_cost;
	$i_min{$i} = min($i_min{$i}, $j);
	$i_max{$i} = max($i_max{$i}, $j);
}
}

sub min {
my ($x,$y) = @_;
if ($x < $y) {$x} else {$y}
}

sub max {
my ($x,$y) = @_;
if ($x > $y) {$x} else {$y}
}

# Looks for the index of the n-th word, or string length if not there.

sub index_spaces {
my ($string,$n) = @_;
$string = " " . $string;
for ($i = $n; $i > 1; $i--) {
	if ($string =~ /[ -]/) {
		$string =~ s/[ -]/+/
	} else {
		return (length($string)-1)
	}
}
if ($string =~ /-/ && $string =~ / /) {
	min(index($string, " "),index($string, "-"))
} elsif ($string =~ /[ -]/) {
	max(index($string, " "),index($string, "-"))
} else {
	length($string)-1
}
}

####################################################################
# Conversion to Latex.

# Symbols in transliteration are converted to Latex.

# %al entries converted to Latex
sub al_to_latex {
my ($string) = @_;
$string =~ s/\s\s+/\n/g;

my ($i);
my ($out_string) = "";
my ($mode) = "none";
 
for ($i = 0; $i < length($string); $i++) {

my $sym = substr($string, $i,1);
my $syms = substr($string, $i,2);
my $rest = substr($string, $i);

if ($rest =~ /^<no>/) {
        if ($mode eq "note") {
                die "Nested <no> in $string"
        } elsif ($mode eq "al") {
                $out_string .= "\\/}"
        };
	$out_string .= "{\\rm ";
        $mode = "note";
	$i += 3
} elsif ($rest =~ /^<\/no>/) {
        unless ($mode eq "note") {
                die "Invalid <\/no> in $string"
        };
	$out_string .= "}";
	$mode = "none";
	$i += 4
} elsif ($rest =~ /^\s/) {
	if ($mode eq "al") {
                $out_string .= "\\/} %\n";
		$mode = "none"
	} else { 
		$out_string .= $sym
	}
} else {
	if ($mode eq "note") {
		$out_string .= $sym
        } else {
		if ($mode eq "none") {
			$out_string .= "\\mbox{\\it ";
			$mode = "al"
		};
		my $mapped;
		if ($mapped = $latex{$sym}) {
        		$out_string .= $mapped
		} elsif ($mapped = $latex{$syms}) {
        		$out_string .= $mapped;
        		$i++
		} else {
			$out_string .= $sym
		}
	}
}
}

if ($mode eq "note") {
	die "Forgotten </no> in $string"
} elsif ($mode eq "al") {
        $out_string .= "\\/}"
};
return "$out_string"
}
# old code:
# } elsif ($rest =~ /^<no>/) {
#	$string =~ s/<no>/----/;
#	my $end_no = index($string, "</no>");
#	$string =~ s/<\/no>/-----/;
#	if ($end_no == -1) {$end_no = length($string)}
#	my $note = html_to_latex(substr($string, $i+4, $end_no - $i - 4));
#	$out_string .= "\\/\\mbox{\\rm $note}";
#	$i = $end_no + 4

# <al> environment concerted to Latex:
sub al_to_tex {
my ($string) = @_;
$string =~ s/\s\s+/\n/g;

my ($i);
my ($out_string) = "";
my ($mode) = "none";

for ($i = 0; $i < length($string); $i++) {

my $sym = substr($string, $i,1);
my $syms = substr($string, $i,2);
my $rest = substr($string, $i);

if ($rest =~ /^\s/) {
        if ($mode eq "al") {
                $out_string .= "\\/} %\n";
                $mode = "none"
        } else {
                $out_string .= $sym
        }
} else {
	if ($mode eq "none") {
		$out_string .= "\\mbox{\\it ";
		$mode = "al"
	};
	my $mapped;
	if ($mapped = $latex{$sym}) {
		$out_string .= $mapped
	} elsif ($mapped = $latex{$syms}) {
		$out_string .= $mapped;
		$i++
	} else {
		$out_string .= $sym
	}
}
}

if ($mode eq "al") {
        $out_string .= "\\/}"
};
return "$out_string"
}

# %tr entries converted to Latex
sub tr_to_latex {
my ($string) = @_;
while ($string =~ /<al>([^<]*)<\/al>/) {
	my $new_al = al_to_tex($1);
	$string =~ s/<al>[^<]*<\/al>/$new_al/
};
$string = html_to_latex($string);
return "{\\rm $string}"
}

# %no entries converted to Latex
sub no_to_latex {
my ($string) = @_;
$string = clear_spaces($string);
foreach $label (keys %label_out) {
	$string =~ s/$label/\\cite{$label}/g
};
$string =~ s/(^|[^a-zA-Z])PAR([^a-zA-Z])/$1&para;$2/g;
$string =~ s/(^|[^a-zA-Z])PAR$/$1&para;/;
$string =~ s/(^|[^a-zA-Z])SEC([^a-zA-Z])/$1&sect;$2/g;
$string =~ s/(^|[^a-zA-Z])SEC$/$1&sect;/;
$string =~ s/<hi>([^<]*)<\/hi>/{\\bf{}\1}/g;
while ($string =~ /<al>([^<]*)<\/al>/) {
	my $new_al = al_to_tex($1);
	$string =~ s/<al>[^<]*<\/al>/$new_al/
};
$string =~ s/<tr>([^<]*)<\/tr>/``\1''/g;
$string = html_to_latex($string);
return "{\\rm $string}"
}

# print trans_to_latex(html_to_latex("abcd<no>H&auml;TdS&szlig;Akxy^T^X</no>a^Sz"));

sub html_to_latex {
my ($string) = @_;
$string =~ s/&(.)uml;?/\\"\1/g;
$string =~ s/&szlig;?/\\ss{}/g;
$string =~ s/&laquo;?/\$\\langle\\langle\$/g;
$string =~ s/&raquo;?/\$\\rangle\\rangle\$/g;
$string =~ s/&lowast;?/\$\\ast\$/g;
$string =~ s/&times;?/\$\\times\$/g;
$string =~ s/&rarr;?/\$\\rightarrow\$/g;
$string =~ s/&para;?\s+/\\P~/g;
$string =~ s/&para;?/\\P{}/g;
$string =~ s/&sect;?\s+/\\S~/g;
$string =~ s/&sect;?/\\S{}/g;
$string =~ s/([^a-zA-Z])p\.\s+([0-9])/\1p.~\2/g;
$string =~ s/&amp;?/\\&/g;
$string =~ s/<em>(.*)<\/em>/{\\em{}\1\\\/}/gi;
$string =~ s/<i>(.*)<\/i>/{\\it{}\1\\\/}/gi;
$string =~ s/<ul>/\\begin{itemize}/gi;
$string =~ s/<\/ul>/\\end{itemize}/gi;
$string =~ s/<li>/\\item /gi;
$string =~ s/<p>/\n/gi;
$string =~ s/&lt;?/\$<\$/g;
$string =~ s/&gt;?/\$>\$/g;
return $string
}
