#!/opt/bin/perl # From a file containing examples from a grammar, the # examples from a certain text are selected, and these # are translated to XML. # An input file .ex contains lines starting with %ex and %nr that divide # the file into sections and subdivide into subsections, resp. # An input file .txt contains lines starting with %pa and %li that divide # the file into pages and lines, resp. # For each example, we can have lines starting with # %hi hieroglyphs # %al (alphabetic) for transliteration # %tr for translation # %no for a note # %qu for a question to the reader # %cf for similar sentences in other publications # %bi for bibliographic references concerning the full text # For each exercise there should be at most one entry for # each one of the above. # For multi-line entries for one of the above, the second and following # lines should not be prefixed by a "%"-tag. The first empty line # or line starting with % marks the end of the entry. # Use

for dividing the entry into paragraphs. # A line starting with two % is ignored; this can be used for comments. ########################################################################### # Begin changeables. # Value is 0 if all available information is to be copied to XML file, and # is 1 is only the page number is to be copied. my $ref_only = 1; my $name = # identifies the name of the grammar. # "Graefe"; "Gardiner"; # "Allen"; # "Loprieno"; $descr_graefe = <

E. Graefe. Mittelägyptische Grammatik für Anfänger. Harrassowitz Verlag, Wiesbaden, 1994.

DESCR $descr_gardiner = <

A. Gardiner. Egyptian Grammar. Griffith Institute, Ashmolean Museum, Oxford, 1957.

Only direct quotes were included, and examples consisting of single words were mostly excluded. DESCR $descr_allen = <

J.P. Allen. Middle Egyptian: An Introduction to the Language and Culture of Hieroglyphs. Cambridge University Press, 2000.

Examples consisting of single words were excluded. DESCR $descr_loprieno = <

A. Loprieno. Ancient Egyptian: a linguistic introduction. Cambridge University Press, 1995.

Unnumbered examples in running text were excluded. DESCR my $descr = # gives description of the source ($name eq "Graefe") ? $descr_graefe : ($name eq "Gardiner") ? $descr_gardiner : ($name eq "Allen") ? $descr_allen : ($name eq "Loprieno") ? $descr_loprieno : ""; my @input_names = # identifies input files. ($name eq "Graefe") ? ("Graefe.txt") : ($name eq "Gardiner") ? ("Gardiner.txt") : ($name eq "Allen") ? ("Allen.txt", "Allen-cite.ex") : ($name eq "Loprieno") ? ("Loprieno.txt") : (); my $output_name = # identifies output file. ($name eq "Graefe") ? "../AEL/Bauer-Graefe.xml" : ($name eq "Gardiner") ? "../AEL/Peas-Gardiner.xml" : ($name eq "Allen") ? "../AEL/Peas-Allen.xml" : ($name eq "Loprieno") ? "../AEL/Peas-Loprieno.xml" : ""; my $url = # identifies file as URL. ($name eq "Graefe") ? "http://www.dfki.uni-sb.de/~nederhof/AEL/peasant/Bauer-Graefe.xml" : ($name eq "Gardiner") ? "http://www.dfki.uni-sb.de/~nederhof/AEL/peasant/Peas-Gardiner.xml" : ($name eq "Allen") ? "http://www.dfki.uni-sb.de/~nederhof/AEL/peasant/Peas-Allen.xml" : ($name eq "Loprieno") ? "http://www.dfki.uni-sb.de/~nederhof/AEL/peasant/Peas-Loprieno.xml" : ""; # Adapt this function to select on %bi entries of interest: sub to_be_selected { my ($str) = @_; if ($name eq "Graefe") { return ($str =~ /Bauer/); } else { return ($str =~ /Peas/); } } # Adapt this function to extract the version from the %bi entry: sub version { my ($str) = @_; if ($name eq "Graefe") { if ($str =~ /Bauer (B1|R)/) {return $1} } elsif ($name eq "Gardiner") { if ($str =~ /B 1/) {return "B1 Old"} elsif ($str =~ /B 2/) {return "B2"} elsif ($str =~ /R/) {return "R Old"} elsif ($str =~ /Bt/) {return "Bt"} } elsif ($name eq "Allen") { if ($str =~ /Peas\. ([BRt12]+)/) {return $1} } elsif ($name eq "Loprieno") { if ($str =~ /Peas\. (R|B[t12])/) {return $1} } } # Adapt this function to extract the position from the %bi entry: sub position { my ($str) = @_; if ($name eq "Graefe") { if ($str =~ /B1,([0-9]+)/) {return $1} elsif ($str =~ /R([0-9]+),([0-9]+)/) {return "$1.$2"} else {return ""} } elsif ($name eq "Gardiner") { if ($str =~ /B [12], ([0-9]+)/) {return $1} elsif ($str =~ /(?:R|Bt\.) ([0-9]+)/) {return $1} else {return ""} } elsif ($name eq "Allen") { if ($str =~ /B[12], ([0-9]+)/) {return $1} elsif ($str =~ /Bt ([0-9]+)/) {return $1} elsif ($str =~ /R ([0-9]+), ([0-9]+)/) {return "$1.$2"} } elsif ($name eq "Loprieno") { if ($str =~ /B[12],([0-9]+)/) {return $1} elsif ($str =~ /Bt ([0-9]+)/) {return $1} elsif ($str =~ /R([0-9]+)\.([0-9]+)/) {return "$1.$2"} } } # Adapt this function to extract the keys on which examples may be sorted: sub sorting_key { my ($str) = @_; my $key = position($str); if ($key =~ /^([0-9]+)[^0-9]([0-9]+)/) { $key = 1000*$1 + $2; } return $key } # End changeables. ########################################################################### open(OUT, ">$output_name") || die "cannot open $output_name\n"; my $thisday = (localtime)[3]; my $thismonth = (Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec)[(localtime)[4]]; my $thisyear = (localtime)[5] + 1900; print OUT < Automatically generated by ex2xml, $thismonth $thisday, $thisyear.

$descr

XMLSTART ########################################## # Files containing sentences are read. Those # whose %bi entry satisfies the pattern are stored. my($ex,$nr,$pa,$li,$hi,$al,$tr,$bi,$entry); # To contain references to all relevant examples: my @ids = (); # To map ids of examples to the examples themselves: my %example_hi = (); my %example_al = (); my %example_tr = (); foreach $input_name (@input_names) { if ($input_name =~ /\.ex$/) { read_exercises() } elsif ($input_name =~ /\.txt$/) { read_txt() } } sub read_exercises { open(IN, "$input_name") || die "cannot open $input_name"; $ex = ""; $nr = ""; $hi = ""; $al = ""; $tr = ""; $bi = ""; $entry = ""; while () { if (/^%ex(\s+.*)$/) { my $new_ex = $1; store_exer(); $ex = clear_spaces($new_ex); $ex =~ s/Exercise /Ex./; $nr = ""; } elsif (/^%nr(\s+.*)$/) { my $new_nr = $1; store_exer(); $nr = clear_spaces($new_nr) } elsif (/^%hi(\s+.*)$/) { if ($hi) {die "second %hi entry:\n$_"}; $hi = "$1\n"; $entry = "hi"; } elsif (/^%al(\s+.*)$/) { if ($al) {die "second %al entry:\n$_"}; $al = "$1\n"; $entry = "al"; } elsif (/^%tr(\s+.*)$/) { if ($tr) {die "second %tr entry:\n$_"}; $tr = "$1\n"; $entry = "tr"; } elsif (/^%bi(\s+.*)$/) { if ($bi) {die "second %bi entry:\n$_"}; $bi = "$1\n"; $entry = "bi"; } elsif (/^%/) { $entry = "" } elsif (/^\s*$/) { $entry = "" } else { if ($entry eq "hi") {$hi .= $_} elsif ($entry eq "al") {$al .= $_} elsif ($entry eq "tr") {$tr .= $_} elsif ($entry eq "bi") {$bi .= $_} } }; store_exer(); close(IN) } sub read_txt { open(IN, "$input_name") || die "cannot open $input_name"; $pa = ""; $li = ""; $hi = ""; $al = ""; $tr = ""; $bi = ""; $entry = ""; while () { if (/^%pa(\s+.*)$/) { my $new_pa = $1; store_example(); $pa = clear_spaces($new_pa); $li = ""; } elsif (/^%li(\s+.*)$/) { my $new_li = $1; store_example(); $li = clear_spaces($new_li) } elsif (/^%hi(\s+.*)$/) { if ($hi) {die "second %hi entry:\n$_"}; $hi = "$1\n"; $entry = "hi"; } elsif (/^%al(\s+.*)$/) { if ($al) {die "second %al entry:\n$_"}; $al = "$1\n"; $entry = "al"; } elsif (/^%tr(\s+.*)$/) { if ($tr) {die "second %tr entry:\n$_"}; $tr = "$1\n"; $entry = "tr"; } elsif (/^%bi(\s+.*)$/) { if ($bi) {die "second %bi entry:\n$_"}; $bi = "$1\n"; $entry = "bi"; } elsif (/^%/) { $entry = "" } elsif (/^\s*$/) { $entry = "" } else { if ($entry eq "hi") {$hi .= $_} elsif ($entry eq "al") {$al .= $_} elsif ($entry eq "tr") {$tr .= $_} elsif ($entry eq "bi") {$bi .= $_} } }; store_example(); close(IN) } sub store_exer { if ($hi || $al || $tr || $bi) { unless ($ex && $nr) {die "no %ex or no %nr"} $hi = clear_spaces($hi); $al = clear_spaces($al); $tr = clear_spaces($tr); $bi = clear_spaces($bi); if (to_be_selected($bi)) { my $id = sorting_key($bi) . "\n" . version($bi) . "\n" . position($bi) . "\n" . # "$name, $ex, $nr"; "$ex,$nr"; $example_hi{$id} = $hi; $example_al{$id} = $al; $example_tr{$id} = $tr; push(@ids, $id); } $hi = ""; $al = ""; $tr = ""; $bi = ""; $nr = ""; }; $entry = "" }; sub store_example { if ($hi || $al || $tr || $bi) { unless ($pa && $li) {die "no %pa or no %li"} $hi = clear_spaces($hi); $al = clear_spaces($al); $tr = clear_spaces($tr); $bi = clear_spaces($bi); if (to_be_selected($bi)) { my $id = sorting_key($bi) . "\n" . version($bi) . "\n" . position($bi) . "\n" . # "$name, p. $pa"; "p.$pa"; $example_hi{$id} = $hi; $example_al{$id} = $al; $example_tr{$id} = $tr; push(@ids, $id); } $hi = ""; $al = ""; $tr = ""; $bi = ""; $li = ""; }; $entry = "" }; ########################################## # Auxiliary routines. sub clear_spaces { my($txt) = @_; $txt =~ s/ +/ /g; $txt =~ s/^\s*//; $txt =~ s/\s*$//; return $txt } ######################################## # sorting: sub cmp_by_pos { $a =~ /^(.*)\n/; my $p1 = $1; $b =~ /^(.*)\n/; my $p2 = $1; if ($p1 =~ /^[0-9]+$/ && $p2 =~ /^[0-9]+$/) { return ($p1 <=> $p2); } else { return ($p1 cmp $p2); } } my @sorted_ids = sort cmp_by_pos @ids; ######################################## # Merging entries. # Coordinates should not be written twice. # Therefore, if two such entries should occur, # the entries are merged. Each coordinate has # at most one representative id. %coord_label = (); @ids = (); foreach $id (@sorted_ids) { $id =~ /^.*\n(.*)\n(.*)\n(.*)$/; my $version = $1; my $pos = $2; my $label = $3; my $hi = $ref_only ? "" : $example_hi{$id}; my $al = $ref_only ? "" : $example_al{$id}; my $tr = $ref_only ? "" : $example_tr{$id}; my $hi_out = ""; my $al_out = ""; my $tr_out = ""; unless ($hi eq "") { $hi_out = "($label)\n$hi\n"; } unless ($al eq "") { $al_out = "($label)\n$al\n"; } unless ($tr eq "") { $tr_out = "($label)\n$tr\n"; } if ($al eq "" && $hi eq "" && $tr eq "") { $al_out = "($label)\n"; } if (defined ($coord_label{"$version\n$pos"})) { my $id2 = $coord_label{"$version\n$pos"}; $example_hi{$id2} .= $hi_out; $example_al{$id2} .= $al_out; $example_tr{$id2} .= $tr_out; } else { $coord_label{"$version\n$pos"} = $id; $example_hi{$id} = $hi_out; $example_al{$id} = $al_out; $example_tr{$id} = $tr_out; push(@ids, $id); } } ######################################## # Writing the entries. foreach $id (@ids) { $id =~ /^.*\n(.*)\n(.*)\n(.*)$/; my $version = $1; my $pos = $2; my $label = $3; print OUT "\n"; my $hi = $example_hi{$id}; my $al = $example_al{$id}; my $tr = $example_tr{$id}; unless ($hi eq "") { print OUT "\n$hi\n"; } unless ($al eq "") { print OUT "\n$al\n"; } unless ($tr eq "") { print OUT "\n$tr\n"; } print OUT "\n"; } ########################################## print OUT < XMLEND close(OUT);