#!/usr/bin/perl # A file containing annotated key to exercises is translated to html files. # The file $name.ex contains lines starting with %ex and %nr that divide # the file into sections and subdivide into subsections, resp. # One big file is made for all exercises, and several smaller files for # individual "ex" sections. The tags %ex and %nr should be followed # by the names of sections and subsections. A subsection can # consist of several exercises. # For each exercise, we can have lines starting with # %hi hieroglyphs # %al (alphabetic) for transliteration # %tr for translation # %no for a note # %qu for a question to the reader # %cf for similar sentences in other publications # %bi for bibliographic references concerning the full text # For each exercise there should be at most one entry for # each one of the above. # For multi-line entries for one of the above, the second and following # lines should not be prefixed by a "%"-tag. The first empty line # or line starting with % marks the end of the entry. # Use

for dividing the entry into paragraphs. # A line starting with two % is ignored; this can be used for comments. # Certain codes in the entries are replaced by hyperlinks. # For this, the existence of a file "labels" is assumed. # A file $name.0 is the first half of a new file $name.html # containing the root of the generated html files. use CGI qw(:standard); $name = $ARGV[0]; # identifies file names. $acronym = $ARGV[1]; # acronym of publication. $command = $ARGV[2]; # possible command for special treatment. # Variables for comparing transliterations: # Symbols that are interchangeable. %near = ("d","D", "t","T", "i","j", "y","j", "=",".", "-"," ", "s","z", "mA","Am"); # symbols with low delete value. %delete = (".",0, "^",0, "(",0, ")",0, ",",0, "=",0.3, "w",0.3, "i",0.2, "j",0.2, "y",0.2); # Mapping from symbols in transliteration to Latex commands. %latex = ("A","\\alp ", "^A","\\alp ", "^i","I", "^j","J", "^y","Y", "a","\\ayin ", "^a","\\ayin ", "^w","W", "^b","B", "^p","P", "^f","F", "^m","M", "^n","N", "^r","R", "^h","H", "H","\\hdot ", "^H","\\Hdot ", "x","\\hbow ", "^x","\\Hbow ", "X","\\hstroke ", "^X","\\Hstroke ", "^s","S", "S","\\sh ", "^S","\\Sh ", "q","\\kdot ", "^q","\\Kdot ", "^k","K", "^g","G", "^t","T", "T","\\tstroke ", "^T","\\Tstroke ", "^d","D", "D","\\dstroke ", "^D","\\Dstroke ", "-","--"); # A "labels" file is read that maps labels to the output representation # and a relevant http path. Also files with example sentences are read and # indexed. my($ex,$nr,$hi,$al,$tr,$no,$qu,$cf,$bi,$entry,$ex_open,$ex_filled,$nr_tag); my %example_hi = (); my %example_al = (); my %example_tr = (); my %example_out = (); my %example_out_tex = (); my %label_out = (); my %label_path = (); open(LAB, "labels") || die "cannot open labels"; while () { if (/\s*([^\s]+)\s+([^\s]+)\s+([^\s]+)(\s*| .*)$/) { my($lab,$out,$path,$files) = ($1,$2,$3,$4); $label_out{$lab} = $out; $label_path{$lab} = $path; foreach $file ($files =~ /[^\s]+/g) { read_sentences($file,$lab); } } }; close(LAB); # Hyperlink to publication. my $book = cite_sub($acronym); open(IN, "$name.ex") || die "cannot open $name.ex"; if ($command eq "search") { open(AL, ">/dev/null") || die "cannot open /dev/null"; open(TEX, ">/dev/null") || die "cannot open /dev/null"; open(ROOT, ">>/dev/null") || die "cannot open /dev/null" } else { open(AL, ">$name\_all.html") || die "cannot open $name\_all.html"; open(TEX, ">$name.tex") || die "cannot open $name.tex"; open(ROOT, ">>$name.html") || die "cannot open $name.html"; } print AL # header(), start_html("Key to $name"), h1("Key to the exercises from $book"), "\n\n"; print TEX <> $name.tex"); open(TEX, ">>$name.tex") || die "cannot open $name.tex"; print TEX "\\vspace{3ex}\n"; } $hi = ""; $al = ""; $tr = ""; $no = ""; $qu = ""; $cf = ""; $bi = ""; $entry = ""; $ex_open = ""; while () { if (/^%%/) { } elsif (/^%ex(\s+.*)$/) { $ex = $1; finish_item(); if ($ex_open) {close_ex()} else {$ex_open = "true"} start_ex(); } elsif (/^%nr(\s+.*)$/) { $nr = $1; $nr_tag = clear_spaces($nr); unless ($ex_open) {die "%nr before any %ex entry:\n$_"}; finish_item(); start_nr(); } elsif (/^%hi(\s+.*)$/) { if ($hi) {die "second %hi entry:\n$_"}; $hi = "$1\n"; $entry = "hi"; } elsif (/^%al(\s+.*)$/) { if ($al) {die "second %al entry:\n$_"}; $al = "$1\n"; $entry = "al"; } elsif (/^%tr(\s+.*)$/) { if ($tr) {die "second %tr entry:\n$_"}; $tr = "$1\n"; $entry = "tr"; } elsif (/^%no(\s+.*)$/) { if ($no) {die "second %no entry:\n$_"}; $no = "$1\n"; $entry = "no"; } elsif (/^%qu(\s+.*)$/) { if ($qu) {die "second %qu entry:\n$_"}; $qu = "$1\n"; $entry = "qu"; } elsif (/^%cf(\s+.*)$/) { if ($cf) {die "second %cf entry:\n$_"}; $cf = "$1\n"; $entry = "cf"; } elsif (/^%bi(\s+.*)$/) { if ($bi) {die "second %bi entry:\n$_"}; $bi = "$1\n"; $entry = "bi"; } elsif (/^%/) { die "unrecognized entry:\n$_" } elsif (/^\s*$/) { finish_item(); } else { if ($entry eq "hi") {$hi .= $_} elsif ($entry eq "al") {$al .= $_} elsif ($entry eq "tr") {$tr .= $_} elsif ($entry eq "no") {$no .= $_} elsif ($entry eq "qu") {$qu .= $_} elsif ($entry eq "cf") {$cf .= $_} elsif ($entry eq "bi") {$bi .= $_} else {die "text without %:\n$_"} } }; finish_item(); if ($ex_open) { finish_item(); close_ex()}; print AL end_html; close(AL); print ROOT li("All of the above<\/a> on one page"), "\n"; if (open(ERR, "$name.err")) { my $err_file = join("",); close(ERR); my $err_html = cite_sub($err_file); my $err_tex = no_to_latex($err_file); open(ERRH, ">$name\_err.html"); print ERRH start_html("Errata in $name"); print ERRH "\n

Errata in $book

\n"; print ERRH "$err_html\n"; print ERRH end_html; close(ERRH); print ROOT li("A list of errata"); print TEX "\n\\medskip\\noindent{\\Large Errata}\\medskip\n"; print TEX "$err_tex\n" } print ROOT "\n\n", "This document is also available in a compact ", "PDF format<\/a>.\n"; print ROOT end_html; close(ROOT); print TEX < /dev/null"); # system("bibtex $name"); # system("latex $name > /dev/null"); # system("latex $name > /dev/null"); # system("dvips $name.dvi -o $name.ps") } sub finish_item { if ($hi || $al || $tr || $no || $qu || $cf) { print TEX "\\item[$nr_tag]\n"; $nr_tag = ""; $hi = clear_spaces($hi); $al = clear_spaces($al); $tr = clear_spaces($tr); $cf = clear_spaces($cf); if ($hi) { print AL b($hi),br(),"\n"; print EX b($hi),br(),"\n"; print TEX "{\\bf $hi}\n\\hspace{1ex}\n" }; if ($al) { my $al_tex = al_to_latex($al); $al =~ s//<\/i>/g; $al =~ s/<\/no>//g; print AL i($al),br(),"\n"; print EX i($al),br(),"\n"; print TEX "$al_tex\n\\hspace{1ex}\n"; find_matching($ex,$nr,$al,$cf); }; if ($tr) { my $tr_tex = tr_to_latex($tr); $tr =~ s///g; $tr =~ s/<\/al>/<\/i>/g; print AL "$tr\n"; print EX "$tr\n"; print TEX "$tr_tex\n" }; if ($no) { my $no_tex = no_to_latex($no); $no = cite_sub($no); print AL blockquote("Note: $no"),"\n"; print EX blockquote("Note: $no"),"\n"; print TEX "\\begin{quote}Note:\n$no_tex\n\\end{quote}\n" }; if ($qu) { my $qu_tex = no_to_latex($qu); $qu = cite_sub($qu); print AL blockquote("QUESTION: $qu"),"\n"; print EX blockquote("QUESTION: $qu"),"\n"; print TEX "\\begin{quote}QUESTION:\n$qu_tex\n\\end{quote}\n" }; if ($cf) { print AL "
Cf:
\n"; print EX "
Cf:
\n"; print TEX "\\begin{description}\n"; foreach $ind ($cf =~ /[^\s]+/g) { my $other_hi = $example_hi{$ind}; $other_hi = clear_spaces($other_hi); my $other_al = $example_al{$ind}; $other_al = clear_spaces($other_al); my $other_al_tex = al_to_latex($other_al); $other_al =~ s//<\/i>/g; $other_al =~ s/<\/no>//g; my $other_tr = $example_tr{$ind}; my $other_tr_tex = tr_to_latex($other_tr); $other_tr =~ s///g; $other_tr =~ s/<\/al>/<\/i>/g; my $out = $example_out{$ind}; my $out_tex = $example_out_tex{$ind}; unless ($out) { $out = "...reference to be put here shortly..."; $out_tex = "...reference to be put here shortly..."; print "Undefined cf: $ind\n" }; print AL li("$out:\n"),br(),"\n"; print EX li("$out:\n"),br(),"\n"; print TEX "\\item[$out_tex]\n"; if ($other_hi) { print AL b($other_hi),br(),"\n"; print EX b($other_hi),br(),"\n"; print TEX "{\\bf $other_hi}\n\\hspace{1ex}\n" }; if ($other_al) { print AL i($other_al),br(),"\n"; print EX i($other_al),br(),"\n"; print TEX "$other_al_tex\n\\hspace{1ex}\n"; }; if ($other_tr) { print AL "$other_tr\n"; print EX "$other_tr\n"; print TEX "$other_tr_tex\n" }; }; print AL "
\n"; print EX "
\n"; print TEX "\\end{description}\n" }; print AL "
\n\n"; print EX "
\n\n"; $hi = ""; $al = ""; $tr = ""; $no = ""; $qu = ""; $cf = ""; $bi = ""; }; $entry = ""; $ex_filled = "true" } sub start_ex { $ex = clear_spaces($ex); my $ex_name = $ex; $ex_name =~ s/ /_/g; my $ex_html = $ex; $ex_html =~ s/\&/\&/g; $ex_html = cite_sub($ex_html); # $ex_html =~ s/^par /\¶ /g; my $ex_tex = html_to_latex($ex_html); if ($command eq "search") { open(EX, ">/dev/null") || die "cannot open /dev/null"; } else { open(EX, ">$name\_$ex_name.html") || die "cannot open $name\_$ex.html"; } print EX start_html("Key to $ex from $name"), h1("Key to $ex_html from $book"), "\n\n"; print AL hr(),h1("$ex_html"), "\n", "
\n"; print TEX "\n", "\\noindent\n{\\large $ex_tex}\\ \\ \\ ", "\\hrulefill\\nopagebreak\n", "\\begin{description}\n"; print EX "
\n"; print ROOT li("$ex_html<\/a>"),"\n"; $ex_filled = ""; $nr = ""; $nr_tag = $nr; } sub close_ex { unless ($ex_filled) { print TEX "\\item[\\ ]\n" }; print AL "
\n\n"; print TEX "\\end{description}\n"; print EX "
\n\n"; print EX end_html; close(EX) } sub start_nr { $nr = clear_spaces($nr); print AL "
\n", h2("$nr"), "\n\n"; print EX "
\n", h2("$nr"), "\n\n"; } sub cite_sub { my($txt) = @_; $txt = clear_spaces($txt); foreach $label (keys %label_out) { my $out = $label_out{$label}; my $path = $label_path{$label}; $txt =~ s/$label/$out<\/a>/g }; $txt =~ s/(^|[^a-zA-Z])PAR([^a-zA-Z])/$1¶$2/g; $txt =~ s/(^|[^a-zA-Z])PAR$/$1¶/; $txt =~ s/(^|[^a-zA-Z])SEC([^a-zA-Z])/$1§$2/g; $txt =~ s/(^|[^a-zA-Z])SEC$/$1§/; $txt =~ s///g; $txt =~ s/<\/hi>/<\/b>/g; $txt =~ s///g; $txt =~ s/<\/al>/<\/i>/g; $txt =~ s/\s*/"/g; $txt =~ s/\s*<\/tr>/"/g; return $txt } sub clear_spaces { my($txt) = @_; $txt =~ s/ +/ /g; $txt =~ s/^\s*//; $txt =~ s/\s*$//; return $txt } ########################################## # Files containing sentences are read and put in a hash. my $sen_file; my $sen_lab; sub read_sentences { ($sen_file,$sen_lab) = @_; if ($sen_file =~ /\.ex$/) { read_exercises() } elsif ($sen_file =~ /\.txt$/) { read_txt() } } # A file with exercises has a similar format as before, except that # only one entry per number is allowed. sub read_exercises { open(SEN, "$sen_file") || die "cannot open $sen_file"; $ex = ""; $nr = ""; $hi = ""; $al = ""; $tr = ""; $entry = ""; while () { if (/^%ex(\s+.*)$/) { my $new_ex = $1; store_exer(); $ex = clear_spaces($new_ex); $nr = ""; } elsif (/^%nr(\s+.*)$/) { my $new_nr = $1; store_exer(); $nr = clear_spaces($new_nr) } elsif (/^%hi(\s+.*)$/) { if ($hi) {die "second %hi entry:\n$_"}; $hi = "$1\n"; $entry = "hi"; } elsif (/^%al(\s+.*)$/) { if ($al) {die "second %al entry:\n$_"}; $al = "$1\n"; $entry = "al"; } elsif (/^%tr(\s+.*)$/) { if ($tr) {die "second %tr entry:\n$_"}; $tr = "$1\n"; $entry = "tr"; } elsif (/^%/) { $entry = "" } elsif (/^\s*$/) { $entry = "" } else { if ($entry eq "hi") {$hi .= $_} elsif ($entry eq "al") {$al .= $_} elsif ($entry eq "tr") {$tr .= $_} } }; store_exer(); close(SEN) } # As above, but here there are entries %pa for page, and %li for line. my($pa,$li); sub read_txt { open(SEN, "$sen_file") || die "cannot open $sen_file"; $pa = ""; $li = ""; $hi = ""; $al = ""; $tr = ""; $entry = ""; while () { if (/^%pa(\s+.*)$/) { my $new_pa = $1; store_example(); $pa = clear_spaces($new_pa); $li = ""; } elsif (/^%li(\s+.*)$/) { my $new_li = $1; store_example(); $li = clear_spaces($new_li) } elsif (/^%hi(\s+.*)$/) { if ($hi) {die "second %hi entry:\n$_"}; $hi = "$1\n"; $entry = "hi"; } elsif (/^%al(\s+.*)$/) { if ($al) {die "second %al entry:\n$_"}; $al = "$1\n"; $entry = "al"; } elsif (/^%tr(\s+.*)$/) { if ($tr) {die "second %tr entry:\n$_"}; $tr = "$1\n"; $entry = "tr"; } elsif (/^%/) { $entry = "" } elsif (/^\s*$/) { $entry = "" } else { if ($entry eq "hi") {$hi .= $_} elsif ($entry eq "al") {$al .= $_} elsif ($entry eq "tr") {$tr .= $_} } }; store_example(); close(SEN) } sub store_exer { my $ex_nr, $nr_nr; if ($hi || $al || $tr) { unless ($ex && $nr) {die "no %ex or no %nr"} if ($ex=~ /([0-9]+)/) { $ex_nr = $1 } else { $ex_nr = $ex } if ($nr=~ /([0-9]+)/) { $nr_nr = $1 } else { $nr_nr = $nr } my $ind = "$sen_lab:$ex_nr:$nr_nr:"; $example_hi{$ind} = $hi; $example_al{$ind} = $al; $example_tr{$ind} = $tr; $example_out{$ind} = cite_sub("$sen_lab, $ex, $nr"); $example_out_tex{$ind} = "\\cite{$sen_lab}, $ex, $nr"; $hi = ""; $al = ""; $tr = ""; $nr = ""; }; $entry = "" }; sub store_example { if ($hi || $al || $tr) { unless ($pa && $li) {die "no %pa or no %li"} my $ind = "$sen_lab:p$pa:$li:"; $example_hi{$ind} = $hi; $example_al{$ind} = $al; $example_tr{$ind} = $tr; # $example_out{$ind} = cite_sub("$sen_lab, p. $pa [$li]"); $example_out{$ind} = cite_sub("$sen_lab, p. $pa"); # $example_out_tex{$ind} = "\\cite{$sen_lab}, p. $pa ($li)"; $example_out_tex{$ind} = "\\cite{$sen_lab}, p. $pa"; $hi = ""; $al = ""; $tr = ""; $li = ""; }; $entry = "" }; #################################################################### # We compute the string distance in a special way, that allows for # overlapping. No costs are associated with deletion/inserting at # the ends and beginnings of the two strings. However, we constrain # this process in such a way that comparison of the two strings should # cover a number of words that is 60 % of the number of words in the # first string. We therefore initialize the lower boundary and the left # boundary of a lower left submatrix of the cost matrix accordingly to 0. # The total costs are accumulated not only at the upper right corner, but # at any position in the upper boundary and the right boundary of the matrix. # This is slightly asymmetric. The costs should be less than one fifth of the # number of characters in the first string. sub find_matching { unless ($command eq "search") { return }; my ($ex,$nr,$al,$cf) = @_; my $new_cf = ""; my $out_report = ""; my ($ind, $other_al); if (split(/( |-)/, $al) < 2) { return }; while (($ind, $other_al) = each %example_al) { $other_al = clear_spaces($other_al); $other_al =~ s//<\/i>/g; $other_al =~ s/<\/no>//g; my $goodness; if ($goodness = string_simm($al, $other_al)) { if ($cf =~ /$ind/) { $cf =~ s/$ind/ / } else { $out_report .= "--> $ind\n $other_al\n"; $new_cf .= $ind; } } } unless ($cf =~ /^\s*$/ && $new_cf =~ /^\s*$/) { print "$ex $nr:\n $al\n$out_report"; unless ($cf =~ /^\s*$/) { print "??? $cf\n" }; print "\n" } }; # print string_simm("iw m(w)t m Hr=i mi Abb si mAA # pr.w=sn (sic!, read =f) iri.n=f rnp.wt aSA.wt # m nDr.t", # "jw m(w).t m Hr=j mjn mj Abb z(.j) mAA # pr.w=sn (sic!, lies =f) jr(j).n=f rnp.wt aSA.wt # jT(j)(.w) m nDr.t"); sub near_strings { my ($str1, $str2) = @_; my ($near1, $near2); if ($str1 eq $str2) { return 1 } if (defined($near1 = $near{$str1})) { return ($near1 eq $str2); } if (defined($near2 = $near{$str2})) { return ($near2 eq $str1); } return 0 } sub delete_price { my ($str1) = @_; foreach $str2 (keys %delete) { if ($str1 eq $str2) { return $delete{$str2} } } return 1 } # cost matrix my @cost; # maximal allowed cost my $max_cost; # Arrays that for i indicate the lowest and highest j with which the # cost matrix is defined. my (@i_min,@i_max); sub string_simm { my ($str1,$str2) = @_; $str1 =~ s/<\/i>[^<]*//g; $str2 =~ s/<\/i>[^<]*//g; $str1 =~ s/^\s*//; $str2 =~ s/^\s*//; $str1 =~ s/\s*$//; $str2 =~ s/\s*$//; $str1 =~ s/\s+/ /; $str2 =~ s/\s+/ /; my $length1 = length($str1); my $length2 = length($str2); $max_cost = 0.13 * min($length1,$length2); my $nr_words1 = split(/[ -]/,$str1); my $nr_words2 = split(/[ -]/,$str2); my $not_needed_words1 = int (0.40 * $nr_words1); my $needed = $nr_words1 - $not_needed_words1; my $not_needed_words2 = $nr_words2 - $needed; if ($not_needed_words2 < 0) { return 0 }; my $start1 = index_spaces($str1, $not_needed_words1); my $start2 = index_spaces($str2, $not_needed_words2); @cost = (); @i_min = (); @i_max = (); my ($i,$j); for ($i = 0; $i <= $start1; $i++) { $cost[$i][0] = 0; $i_min{$i} = 0; $i_max{$i} = 0; }; for ($i = $start1 + 1; $i <= $length1; $i++) { $i_min{$i} = 32000; $i_max{$i} = -1; }; for ($j = 0; $j <= $start2; $j++) { define_cost(0,$j,0); }; for ($i = 0; $i <= $length1; $i++) { my $subs1 = substr($str1,$i,1); my $l1 = length($subs1); my $subs1_2 = substr($str1,$i,2); my $l1_2 = length($subs1_2); for ($j = $i_min{$i}; $j <= $i_max{$i}; $j++) { my $subs2 = substr($str2,$j,1); my $l2 = length($subs2); my $subs2_2 = substr($str2,$j,2); my $l2_2 = length($subs2_2); my $old_cost; if (defined ($old_cost = $cost[$i][$j])) { if ($l1 == 1 && $l2 == 1) { if (near_strings($subs1, $subs2)) { define_cost($i+$l1,$j+$l2, $old_cost) } else { define_cost($i+$l1,$j+$l2, $old_cost+1) } } if ($l1 > 0) { define_cost($i+$l1,$j, $old_cost+delete_price($subs1)) } if ($l2 > 0) { define_cost($i,$j+$l2, $old_cost+delete_price($subs2)) } if ($l1_2 == 2 && $l2_2 == 2 && near_strings($subs1_2,$subs2_2)) { define_cost($i+$l1_2,$j+$l2_2, $old_cost) } } } } my $min_cost = 32000; my $final_cost; for ($i = 0; $i <= $length1; $i++) { if (defined ($final_cost = $cost[$i][$length2])) { $min_cost = min($final_cost, $min_cost) } }; for ($j = 0; $j <= $length2; $j++) { if (defined ($final_cost = $cost[$length1][$j])) { $min_cost = min($final_cost, $min_cost) } }; # For testing; # for ($j = $length2; $j >= 0; $j--) { # for ($i = 0; $i <= $length1; $i++) { # my $this = $cost[$i][$j]; # unless (defined $this) { $this = "-" } # print "$this "; # } # print "\n"}; if ($min_cost <= $max_cost) { $min_cost + 1 } else { 0 } }; # The cost is defined for an entry in the matrix. # Only minimal costs lower than $max_cost are kept. For each column, # the minimal and maximal values are kept for which an entry is defined. sub define_cost { my ($i,$j,$new_cost) = @_; if ($new_cost > $max_cost) { return }; my $old_cost; if (defined ($old_cost = $cost[$i][$j])) { $cost[$i][$j] = min($old_cost, $new_cost) } else { $cost[$i][$j] = $new_cost; $i_min{$i} = min($i_min{$i}, $j); $i_max{$i} = max($i_max{$i}, $j); } } sub min { my ($x,$y) = @_; if ($x < $y) {$x} else {$y} } sub max { my ($x,$y) = @_; if ($x > $y) {$x} else {$y} } # Looks for the index of the n-th word, or string length if not there. sub index_spaces { my ($string,$n) = @_; $string = " " . $string; for ($i = $n; $i > 1; $i--) { if ($string =~ /[ -]/) { $string =~ s/[ -]/+/ } else { return (length($string)-1) } } if ($string =~ /-/ && $string =~ / /) { min(index($string, " "),index($string, "-")) } elsif ($string =~ /[ -]/) { max(index($string, " "),index($string, "-")) } else { length($string)-1 } } #################################################################### # Conversion to Latex. # Symbols in transliteration are converted to Latex. # %al entries converted to Latex sub al_to_latex { my ($string) = @_; $string =~ s/\s\s+/\n/g; my ($i); my ($out_string) = ""; my ($mode) = "none"; for ($i = 0; $i < length($string); $i++) { my $sym = substr($string, $i,1); my $syms = substr($string, $i,2); my $rest = substr($string, $i); if ($rest =~ /^/) { if ($mode eq "note") { die "Nested in $string" } elsif ($mode eq "al") { $out_string .= "\\/}" }; $out_string .= "{\\rm "; $mode = "note"; $i += 3 } elsif ($rest =~ /^<\/no>/) { unless ($mode eq "note") { die "Invalid <\/no> in $string" }; $out_string .= "}"; $mode = "none"; $i += 4 } elsif ($rest =~ /^\s/) { if ($mode eq "al") { $out_string .= "\\/} %\n"; $mode = "none" } else { $out_string .= $sym } } else { if ($mode eq "note") { $out_string .= $sym } else { if ($mode eq "none") { $out_string .= "\\mbox{\\it "; $mode = "al" }; my $mapped; if ($mapped = $latex{$sym}) { $out_string .= $mapped } elsif ($mapped = $latex{$syms}) { $out_string .= $mapped; $i++ } else { $out_string .= $sym } } } } if ($mode eq "note") { die "Forgotten in $string" } elsif ($mode eq "al") { $out_string .= "\\/}" }; return "$out_string" } # old code: # } elsif ($rest =~ /^/) { # $string =~ s//----/; # my $end_no = index($string, ""); # $string =~ s/<\/no>/-----/; # if ($end_no == -1) {$end_no = length($string)} # my $note = html_to_latex(substr($string, $i+4, $end_no - $i - 4)); # $out_string .= "\\/\\mbox{\\rm $note}"; # $i = $end_no + 4 # environment concerted to Latex: sub al_to_tex { my ($string) = @_; $string =~ s/\s\s+/\n/g; my ($i); my ($out_string) = ""; my ($mode) = "none"; for ($i = 0; $i < length($string); $i++) { my $sym = substr($string, $i,1); my $syms = substr($string, $i,2); my $rest = substr($string, $i); if ($rest =~ /^\s/) { if ($mode eq "al") { $out_string .= "\\/} %\n"; $mode = "none" } else { $out_string .= $sym } } else { if ($mode eq "none") { $out_string .= "\\mbox{\\it "; $mode = "al" }; my $mapped; if ($mapped = $latex{$sym}) { $out_string .= $mapped } elsif ($mapped = $latex{$syms}) { $out_string .= $mapped; $i++ } else { $out_string .= $sym } } } if ($mode eq "al") { $out_string .= "\\/}" }; return "$out_string" } # %tr entries converted to Latex sub tr_to_latex { my ($string) = @_; while ($string =~ /([^<]*)<\/al>/) { my $new_al = al_to_tex($1); $string =~ s/[^<]*<\/al>/$new_al/ }; $string = html_to_latex($string); return "{\\rm $string}" } # %no entries converted to Latex sub no_to_latex { my ($string) = @_; $string = clear_spaces($string); foreach $label (keys %label_out) { $string =~ s/$label/\\cite{$label}/g }; $string =~ s/(^|[^a-zA-Z])PAR([^a-zA-Z])/$1¶$2/g; $string =~ s/(^|[^a-zA-Z])PAR$/$1¶/; $string =~ s/(^|[^a-zA-Z])SEC([^a-zA-Z])/$1§$2/g; $string =~ s/(^|[^a-zA-Z])SEC$/$1§/; $string =~ s/([^<]*)<\/hi>/{\\bf{}\1}/g; while ($string =~ /([^<]*)<\/al>/) { my $new_al = al_to_tex($1); $string =~ s/[^<]*<\/al>/$new_al/ }; $string =~ s/([^<]*)<\/tr>/``\1''/g; $string = html_to_latex($string); return "{\\rm $string}" } # print trans_to_latex(html_to_latex("abcdHäTdSßAkxy^T^Xa^Sz")); sub html_to_latex { my ($string) = @_; $string =~ s/&(.)uml;?/\\"\1/g; $string =~ s/ß?/\\ss{}/g; $string =~ s/«?/\$\\langle\\langle\$/g; $string =~ s/»?/\$\\rangle\\rangle\$/g; $string =~ s/∗?/\$\\ast\$/g; $string =~ s/×?/\$\\times\$/g; $string =~ s/→?/\$\\rightarrow\$/g; $string =~ s/¶?\s+/\\P~/g; $string =~ s/¶?/\\P{}/g; $string =~ s/§?\s+/\\S~/g; $string =~ s/§?/\\S{}/g; $string =~ s/([^a-zA-Z])p\.\s+([0-9])/\1p.~\2/g; $string =~ s/&?/\\&/g; $string =~ s/(.*)<\/em>/{\\em{}\1\\\/}/gi; $string =~ s/(.*)<\/i>/{\\it{}\1\\\/}/gi; $string =~ s/
/\\begin{itemize}/gi; $string =~ s/<\/ul>/\\end{itemize}/gi; $string =~ s/
/\\item /gi; $string =~ s/
/\n/gi; $string =~ s/<?/\$<\$/g; $string =~ s/>?/\$>\$/g; return $string }