#!/usr/bin/perl -w # -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*- # # The Intltool Message Merger # # Copyright (C) 2000, 2003 Free Software Foundation. # Copyright (C) 2000, 2001 Eazel, Inc # # Intltool is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # version 2 published by the Free Software Foundation. # # Intltool is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # # Authors: Maciej Stachowiak # Kenneth Christiansen # Darin Adler # # Proper XML UTF-8'ification written by Cyrille Chepelov # ## Release information my $PROGRAM = "intltool-merge"; my $PACKAGE = "intltool"; my $VERSION = "0.50.2"; ## Loaded modules use strict; use Getopt::Long; use Text::Wrap; use File::Basename; use Encode; my $must_end_tag = -1; my $last_depth = -1; my $translation_depth = -1; my @tag_stack = (); my @entered_tag = (); my @translation_strings = (); my $leading_space = ""; ## Scalars used by the option stuff my $HELP_ARG = 0; my $VERSION_ARG = 0; my $BA_STYLE_ARG = 0; my $XML_STYLE_ARG = 0; my $KEYS_STYLE_ARG = 0; my $DESKTOP_STYLE_ARG = 0; my $SCHEMAS_STYLE_ARG = 0; my $RFC822DEB_STYLE_ARG = 0; my $QUOTED_STYLE_ARG = 0; my $QUOTEDXML_STYLE_ARG = 0; my $QUIET_ARG = 0; my $PASS_THROUGH_ARG = 0; my $UTF8_ARG = 0; my $MULTIPLE_OUTPUT = 0; my $NO_TRANSLATIONS_ARG = 0; my $cache_file; ## Handle options GetOptions ( "help" => \$HELP_ARG, "version" => \$VERSION_ARG, "quiet|q" => \$QUIET_ARG, "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility "ba-style|b" => \$BA_STYLE_ARG, "xml-style|x" => \$XML_STYLE_ARG, "keys-style|k" => \$KEYS_STYLE_ARG, "desktop-style|d" => \$DESKTOP_STYLE_ARG, "schemas-style|s" => \$SCHEMAS_STYLE_ARG, "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG, "quoted-style" => \$QUOTED_STYLE_ARG, "quotedxml-style" => \$QUOTEDXML_STYLE_ARG, "pass-through|p" => \$PASS_THROUGH_ARG, "utf8|u" => \$UTF8_ARG, "multiple-output|m" => \$MULTIPLE_OUTPUT, "no-translations" => \$NO_TRANSLATIONS_ARG, "cache|c=s" => \$cache_file ) or &error; my $PO_DIR; my $FILE; my $OUTFILE; my %po_files_by_lang = (); my %translations = (); # Use this instead of \w for XML files to handle more possible characters. my $w = "[-A-Za-z0-9._:]"; # XML quoted string contents my $q = "[^\\\"]*"; ## Check for options. if ($VERSION_ARG) { &print_version; } elsif ($HELP_ARG) { &print_help; } elsif ($BA_STYLE_ARG && @ARGV > 2) { &utf8_sanity_check; &preparation; &print_message; &ba_merge_translations; &finalize; } elsif ($XML_STYLE_ARG && (@ARGV > 2 || ($NO_TRANSLATIONS_ARG && @ARGV > 1))) { &utf8_sanity_check; &preparation; &print_message; &xml_merge_output; &finalize; } elsif ($KEYS_STYLE_ARG && @ARGV > 2) { &utf8_sanity_check; &preparation; &print_message; &keys_merge_translations; &finalize; } elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) { &utf8_sanity_check; &preparation; &print_message; &desktop_merge_translations; &finalize; } elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) { &utf8_sanity_check; &preparation; &print_message; &schemas_merge_translations; &finalize; } elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2) { &preparation; &print_message; &rfc822deb_merge_translations; &finalize; } elsif (($QUOTED_STYLE_ARG || $QUOTEDXML_STYLE_ARG) && @ARGV > 2) { &utf8_sanity_check; &preparation; &print_message; "ed_merge_translations($QUOTEDXML_STYLE_ARG); &finalize; } else { &print_help; } exit; ## Sub for printing release information sub print_version { print <<_EOF_; ${PROGRAM} (${PACKAGE}) ${VERSION} Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen. Copyright (C) 2000-2003 Free Software Foundation, Inc. Copyright (C) 2000-2001 Eazel, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. _EOF_ exit; } ## Sub for printing usage information sub print_help { print <<_EOF_; Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE Generates an output file that includes some localized attributes from an untranslated source file. Mandatory options: (exactly one must be specified) -b, --ba-style includes translations in the bonobo-activation style -d, --desktop-style includes translations in the desktop style -k, --keys-style includes translations in the keys style -s, --schemas-style includes translations in the schemas style -r, --rfc822deb-style includes translations in the RFC822 style --quoted-style includes translations in the quoted string style --quotedxml-style includes translations in the quoted xml string style -x, --xml-style includes translations in the standard xml style Other options: -u, --utf8 convert all strings to UTF-8 before merging (default for everything except RFC822 style) -p, --pass-through deprecated, does nothing and issues a warning -m, --multiple-output output one localized file per locale, instead of a single file containing all localized elements --no-translations do not merge any translations: only generates the unlocalised (English) version -- applies only to XML merging -c, --cache=FILE specify cache file name (usually \$top_builddir/po/.intltool-merge-cache) -q, --quiet suppress most messages --help display this help and exit --version output version information and exit Report bugs to http://bugs.launchpad.net/intltool _EOF_ exit; } ## Sub for printing error messages sub print_error { print STDERR "Try `${PROGRAM} --help' for more information.\n"; exit; } sub print_message { print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG; } sub preparation { if (!$XML_STYLE_ARG || !$NO_TRANSLATIONS_ARG) { $PO_DIR = $ARGV[0]; $FILE = $ARGV[1]; $OUTFILE = $ARGV[2]; &gather_po_files; &get_translation_database; } else { $FILE = $ARGV[0]; $OUTFILE = $ARGV[1]; } } # General-purpose code for looking up translations in .po files sub po_file2lang { my ($tmp) = @_; $tmp =~ s/^.*\/(.*)\.po$/$1/; return $tmp; } sub gather_po_files { if (my $linguas = $ENV{"LINGUAS"}) { for my $lang (split / /, $linguas) { my $po_file = $PO_DIR . "/" . $lang . ".po"; if (-e $po_file) { $po_files_by_lang{$lang} = $po_file; } } } else { if (open LINGUAS_FILE, "$PO_DIR/LINGUAS") { while () { next if /^#/; for my $lang (split) { chomp ($lang); my $po_file = $PO_DIR . "/" . $lang . ".po"; if (-e $po_file) { $po_files_by_lang{$lang} = $po_file; } } } close LINGUAS_FILE; } else { for my $po_file (glob "$PO_DIR/*.po") { $po_files_by_lang{po_file2lang($po_file)} = $po_file; } } } } sub get_po_encoding { my ($in_po_file) = @_; my $encoding = ""; open IN_PO_FILE, $in_po_file or die; while () { ## example: "Content-Type: text/plain; charset=ISO-8859-1\n" if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) { $encoding = $1; last; } } close IN_PO_FILE; if (!$encoding) { print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG; $encoding = "ISO-8859-1"; } return $encoding } sub utf8_sanity_check { print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG; $UTF8_ARG = 1; } sub get_translation_database { if ($cache_file) { &get_cached_translation_database; } else { &create_translation_database; } } sub get_newest_po_age { my $newest_age; foreach my $file (values %po_files_by_lang) { my $file_age = -M $file; $newest_age = $file_age if !$newest_age || $file_age < $newest_age; } $newest_age = 0 if !$newest_age; return $newest_age; } sub create_cache { print "Generating and caching the translation database\n" unless $QUIET_ARG; &create_translation_database; open CACHE, ">$cache_file" || die; print CACHE join "\x01", %translations; close CACHE; } sub load_cache { print "Found cached translation database\n" unless $QUIET_ARG; my $contents; open CACHE, "<$cache_file" || die; { local $/; $contents = ; } close CACHE; %translations = split "\x01", $contents; } sub get_cached_translation_database { my $cache_file_age = -M $cache_file; if (defined $cache_file_age) { if ($cache_file_age <= &get_newest_po_age) { &load_cache; return; } print "Found too-old cached translation database\n" unless $QUIET_ARG; } &create_cache; } sub add_translation { my ($lang, $encoding, $msgctxt, $msgid, $msgstr) = @_; return if !($msgid && $msgstr); if ($msgctxt) { $msgid = "$msgctxt\004$msgid"; } if (uc $encoding ne "UTF-8") { Encode::from_to ($msgid, $encoding, "UTF-8"); Encode::from_to ($msgstr, $encoding, "UTF-8"); } $translations{$lang, $msgid} = $msgstr; } sub create_translation_database { for my $lang (keys %po_files_by_lang) { my $po_file = $po_files_by_lang{$lang}; my $encoding = "UTF-8"; if ($UTF8_ARG) { $encoding = get_po_encoding ($po_file); if (uc $encoding ne "UTF-8") { print "NOTICE: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;; } } open PO_FILE, "<$po_file"; my $nextfuzzy = 0; my $inmsgctxt = 0; my $inmsgid = 0; my $inmsgstr = 0; my $msgctxt = ""; my $msgid = ""; my $msgstr = ""; while () { $nextfuzzy = 1 if /^#, fuzzy/; if (/^msgctxt "((\\.|[^\\]+)*)"/ ) { if ($inmsgstr) { add_translation ($lang, $encoding, $msgctxt, $msgid, $msgstr); $msgctxt = ""; $msgid = ""; $msgstr = ""; } $msgctxt = unescape_po_string($1); $inmsgctxt = 1; $inmsgid = 0; $inmsgstr = 0; } if (/^msgid "((\\.|[^\\]+)*)"/ ) { if ($inmsgstr) { add_translation ($lang, $encoding, $msgctxt, $msgid, $msgstr); $msgctxt = ""; $msgid = ""; $msgstr = ""; } if ($nextfuzzy) { $inmsgid = 0; $nextfuzzy = 0; } else { $msgid = unescape_po_string($1); $inmsgid = 1; } $inmsgctxt = 0; $inmsgstr = 0; } if (/^msgstr "((\\.|[^\\]+)*)"/) { $msgstr = unescape_po_string($1); $inmsgstr = 1; $inmsgctxt = 0; $inmsgid = 0; } if (/^"((\\.|[^\\]+)*)"/) { $msgctxt .= unescape_po_string($1) if $inmsgctxt; $msgid .= unescape_po_string($1) if $inmsgid; $msgstr .= unescape_po_string($1) if $inmsgstr; } } add_translation ($lang, $encoding, $msgctxt, $msgid, $msgstr) if ($inmsgstr); } } sub finalize { } sub unescape_one_sequence { my ($sequence) = @_; return "\\" if $sequence eq "\\\\"; return "\"" if $sequence eq "\\\""; return "\n" if $sequence eq "\\n"; return "\r" if $sequence eq "\\r"; return "\t" if $sequence eq "\\t"; return "\b" if $sequence eq "\\b"; return "\f" if $sequence eq "\\f"; return "\a" if $sequence eq "\\a"; return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7) return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/); return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/); # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489 return $sequence; } sub unescape_po_string { my ($string) = @_; $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg; return $string; } sub entity_decode { local ($_) = @_; s/'/'/g; # ' s/"/"/g; # " s/<//g; s/&/&/g; return $_; } # entity_encode: (string) # # Encode the given string to XML format (encode '<' etc). sub entity_encode { my ($pre_encoded) = @_; my @list_of_chars = unpack ('C*', $pre_encoded); # with UTF-8 we only encode minimalistic return join ('', map (&entity_encode_int_minimalist, @list_of_chars)); } sub entity_encode_int_minimalist { return """ if $_ == 34; return "&" if $_ == 38; return "'" if $_ == 39; return "<" if $_ == 60; return ">" if $_ == 62; return chr $_; } sub entity_encoded_translation { my ($lang, $string) = @_; my $translation = $translations{$lang, $string}; return $string if !$translation; return entity_encode ($translation); } ## XML (bonobo-activation specific) merge code sub ba_merge_translations { my $source; { local $/; # slurp mode open INPUT, "<$FILE" or die "can't open $FILE: $!"; $source = ; close INPUT; } open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!"; # Binmode so that selftest works ok if using a native Win32 Perl... binmode (OUTPUT) if $^O eq 'MSWin32'; while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) { print OUTPUT $1; my $node = $2 . "\n"; my @strings = (); $_ = $node; while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) { push @strings, entity_decode($3); } print OUTPUT; my %langs; for my $string (@strings) { for my $lang (keys %po_files_by_lang) { $langs{$lang} = 1 if $translations{$lang, $string}; } } for my $lang (sort keys %langs) { $_ = $node; s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s; s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg; print OUTPUT; } } print OUTPUT $source; close OUTPUT; } ## XML (non-bonobo-activation) merge code # Process tag attributes # Only parameter is a HASH containing attributes -> values mapping sub getAttributeString { my $sub = shift; my $do_translate = shift || 0; my $language = shift || ""; my $result = ""; my $translate = shift; foreach my $e (reverse(sort(keys %{ $sub }))) { my $key = $e; my $string = $sub->{$e}; my $quote = '"'; $string =~ s/^[\s]+//; $string =~ s/[\s]+$//; if ($string =~ /^'.*'$/) { $quote = "'"; } $string =~ s/^['"]//g; $string =~ s/['"]$//g; if ($do_translate && $key =~ /^_/) { $key =~ s|^_||g; if ($language) { # Handle translation my $decode_string = entity_decode($string); my $translation = $translations{$language, $decode_string}; if ($translation) { $translation = entity_encode($translation); $string = $translation; } $$translate = 2; } else { $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate } } $result .= " $key=$quote$string$quote"; } return $result; } # Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree sub getXMLstring { my $ref = shift; my $spacepreserve = shift || 0; my @list = @{ $ref }; my $result = ""; my $count = scalar(@list); my $attrs = $list[0]; my $index = 1; $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/)); while ($index < $count) { my $type = $list[$index]; my $content = $list[$index+1]; if (! $type ) { # We've got CDATA if ($content) { # lets strip the whitespace here, and *ONLY* here $content =~ s/\s+/ /gs if (!$spacepreserve); $result .= $content; } } elsif ( "$type" ne "1" ) { # We've got another element $result .= "<$type"; $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements if ($content) { my $subresult = getXMLstring($content, $spacepreserve); if ($subresult) { $result .= ">".$subresult . ""; } else { $result .= "/>"; } } else { $result .= "/>"; } } $index += 2; } return $result; } # Translate list of nodes if necessary sub translate_subnodes { my $fh = shift; my $content = shift; my $language = shift || ""; my $singlelang = shift || 0; my $spacepreserve = shift || 0; my @nodes = @{ $content }; my $count = scalar(@nodes); my $index = 0; while ($index < $count) { my $type = $nodes[$index]; my $rest = $nodes[$index+1]; if ($singlelang) { my $oldMO = $MULTIPLE_OUTPUT; $MULTIPLE_OUTPUT = 1; traverse($fh, $type, $rest, $language, $spacepreserve); $MULTIPLE_OUTPUT = $oldMO; } else { traverse($fh, $type, $rest, $language, $spacepreserve); } $index += 2; } } sub isWellFormedXmlFragment { my $ret = eval 'require XML::Parser'; if(!$ret) { die "You must have XML::Parser installed to run $0\n\n"; } my $fragment = shift; return 0 if (!$fragment); $fragment = "$fragment"; my $xp = new XML::Parser(Style => 'Tree'); my $tree = 0; eval { $tree = $xp->parse($fragment); }; return $tree; } sub traverse { my $fh = shift; my $nodename = shift; my $content = shift; my $language = shift || ""; my $spacepreserve = shift || 0; if (!$nodename) { if ($content =~ /^[\s]*$/) { $leading_space .= $content; } print $fh $content; } else { # element my @all = @{ $content }; my $attrs = shift @all; my $translate = 0; my $outattr = getAttributeString($attrs, 1, $language, \$translate); if ($nodename =~ /^_/) { $translate = 1; $nodename =~ s/^_//; } my $lookup = ''; $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/)); $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); print $fh "<$nodename", $outattr; if ($translate) { $content = getXMLstring($content, $spacepreserve); if (!$spacepreserve) { $content =~ s/^\s+//s; $content =~ s/\s+$//s; } if (exists $attrs->{"msgctxt"}) { my $context = entity_decode ($attrs->{"msgctxt"}); $context =~ s/^["'](.*)["']/$1/; $lookup = "$context\004$content"; } else { $lookup = $content; } if ($lookup || $translate == 2) { my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup}); if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) { $translation = $content if (!$translation); print $fh " xml:lang=\"", $language, "\"" if $language; print $fh ">"; if ($translate == 2) { translate_subnodes($fh, \@all, $language, 1, $spacepreserve); } else { print $fh $translation; } print $fh ""; return; # this means there will be no same translation with xml:lang="$language"... # if we want them both, just remove this "return" } else { print $fh ">"; if ($translate == 2) { translate_subnodes($fh, \@all, $language, 1, $spacepreserve); } else { print $fh $content; } print $fh ""; } } else { print $fh "/>"; } for my $lang (sort keys %po_files_by_lang) { if ($MULTIPLE_OUTPUT && $lang ne "$language") { next; } if ($lang) { # Handle translation # my $translate = 0; my $localattrs = getAttributeString($attrs, 1, $lang, \$translate); my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup}); if ($translate && !$translation) { $translation = $content; } if ($translation || $translate) { print $fh "\n"; $leading_space =~ s/.*\n//g; print $fh $leading_space; print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">"; if ($translate == 2) { translate_subnodes($fh, \@all, $lang, 1, $spacepreserve); } else { print $fh $translation; } print $fh ""; } } } } else { my $count = scalar(@all); if ($count > 0) { print $fh ">"; my $index = 0; while ($index < $count) { my $type = $all[$index]; my $rest = $all[$index+1]; traverse($fh, $type, $rest, $language, $spacepreserve); $index += 2; } print $fh ""; } else { print $fh "/>"; } } } } sub intltool_tree_comment { my $expat = shift; my $data = shift; my $clist = $expat->{Curlist}; my $pos = $#$clist; push @$clist, 1 => $data; } sub intltool_tree_cdatastart { my $expat = shift; my $clist = $expat->{Curlist}; my $pos = $#$clist; push @$clist, 0 => $expat->original_string(); } sub intltool_tree_cdataend { my $expat = shift; my $clist = $expat->{Curlist}; my $pos = $#$clist; $clist->[$pos] .= $expat->original_string(); } sub intltool_tree_char { my $expat = shift; my $text = shift; my $clist = $expat->{Curlist}; my $pos = $#$clist; # Use original_string so that we retain escaped entities # in CDATA sections. # if ($pos > 0 and $clist->[$pos - 1] eq '0') { $clist->[$pos] .= $expat->original_string(); } else { push @$clist, 0 => $expat->original_string(); } } sub intltool_tree_start { my $expat = shift; my $tag = shift; my @origlist = (); # Use original_string so that we retain escaped entities # in attribute values. We must convert the string to an # @origlist array to conform to the structure of the Tree # Style. # my @original_array = split /\x/, $expat->original_string(); my $source = $expat->original_string(); # Remove leading tag. # $source =~ s|^\s*<\s*(\S+)||s; # Grab attribute key/value pairs and push onto @origlist array. # while ($source) { if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/) { $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s; push @origlist, $1; push @origlist, '"' . $2 . '"'; } elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/) { $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s; push @origlist, $1; push @origlist, "'" . $2 . "'"; } else { last; } } my $ol = [ { @origlist } ]; push @{ $expat->{Lists} }, $expat->{Curlist}; push @{ $expat->{Curlist} }, $tag => $ol; $expat->{Curlist} = $ol; } sub readXml { my $filename = shift || return; if(!-f $filename) { die "ERROR Cannot find filename: $filename\n"; } my $ret = eval 'require XML::Parser'; if(!$ret) { die "You must have XML::Parser installed to run $0\n\n"; } my $xp = new XML::Parser(Style => 'Tree'); $xp->setHandlers(Char => \&intltool_tree_char); $xp->setHandlers(Start => \&intltool_tree_start); $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart); $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend); my $tree = $xp->parsefile($filename); # Hello thereHowdydo # would be: # [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{}, # 0, "Howdy", ref, [{}]], 0, "do" ] ] return $tree; } sub print_header { my $infile = shift; my $fh = shift; my $source; if(!-f $infile) { die "ERROR Cannot find filename: $infile\n"; } print $fh qq{\n}; { local $/; open DOCINPUT, "<${FILE}" or die; $source = ; close DOCINPUT; } if ($source =~ /()/s) { print $fh "$1\n"; } elsif ($source =~ /(]*>)/s) { print $fh "$1\n"; } } sub parseTree { my $fh = shift; my $ref = shift; my $language = shift || ""; my $name = shift @{ $ref }; my $cont = shift @{ $ref }; while (!$name || "$name" eq "1") { $name = shift @{ $ref }; $cont = shift @{ $ref }; } my $spacepreserve = 0; my $attrs = @{$cont}[0]; $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); traverse($fh, $name, $cont, $language, $spacepreserve); } sub xml_merge_output { my $source; if ($MULTIPLE_OUTPUT) { for my $lang (sort keys %po_files_by_lang) { if ( ! -d $lang ) { mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n"; } open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n"; binmode (OUTPUT) if $^O eq 'MSWin32'; my $tree = readXml($FILE); print_header($FILE, \*OUTPUT); parseTree(\*OUTPUT, $tree, $lang); close OUTPUT; print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG; } if ( ! -d "C" ) { mkdir "C" or -d "C" or die "Cannot create subdirectory C: $!\n"; } open OUTPUT, ">C/$OUTFILE" or die "Cannot open C/$OUTFILE: $!\n"; binmode (OUTPUT) if $^O eq 'MSWin32'; my $tree = readXml($FILE); print_header($FILE, \*OUTPUT); parseTree(\*OUTPUT, $tree); close OUTPUT; print "CREATED C/$OUTFILE\n" unless $QUIET_ARG; } else { open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n"; binmode (OUTPUT) if $^O eq 'MSWin32'; my $tree = readXml($FILE); print_header($FILE, \*OUTPUT); parseTree(\*OUTPUT, $tree); close OUTPUT; print "CREATED $OUTFILE\n" unless $QUIET_ARG; } } sub keys_merge_translation { my ($lang) = @_; if ( ! -d $lang && $MULTIPLE_OUTPUT) { mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n"; } open INPUT, "<${FILE}" or die "Cannot open ${FILE}: $!\n"; open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n"; binmode (OUTPUT) if $^O eq 'MSWin32'; while () { if (s/^(\s*)_(\w+=(.*))/$1$2/) { my $string = $3; if (!$MULTIPLE_OUTPUT) { print OUTPUT; my $non_translated_line = $_; for my $lang (sort keys %po_files_by_lang) { my $translation = $translations{$lang, $string}; next if !$translation; $_ = $non_translated_line; s/(\w+)=.*/[$lang]$1=$translation/; print OUTPUT; } } else { my $non_translated_line = $_; my $translation = $translations{$lang, $string}; $translation = $string if !$translation; $_ = $non_translated_line; s/(\w+)=.*/$1=$translation/; print OUTPUT; } } else { print OUTPUT; } } close OUTPUT; close INPUT; print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG; } sub keys_merge_translations { if ($MULTIPLE_OUTPUT) { for my $lang (sort keys %po_files_by_lang) { keys_merge_translation ($lang); } keys_merge_translation ("C"); } else { keys_merge_translation ("."); } } sub desktop_merge_translations { open INPUT, "<${FILE}" or die; open OUTPUT, ">${OUTFILE}" or die; binmode (OUTPUT) if $^O eq 'MSWin32'; while () { if (s/^(\s*)_([A-Za-z0-9\-]+=(.*))/$1$2/) { my $string = $3; print OUTPUT; my $non_translated_line = $_; for my $lang (sort keys %po_files_by_lang) { my $translation = $translations{$lang, $string}; next if !$translation; $_ = $non_translated_line; s/(\w+)=.*/${1}[$lang]=$translation/; print OUTPUT; } } else { print OUTPUT; } } close OUTPUT; close INPUT; } sub schemas_merge_translations { my $source; { local $/; # slurp mode open INPUT, "<$FILE" or die "can't open $FILE: $!"; $source = ; close INPUT; } open OUTPUT, ">$OUTFILE" or die; binmode (OUTPUT) if $^O eq 'MSWin32'; # FIXME: support attribute translations # Empty nodes never need translation, so unmark all of them. # For example, <_foo/> is just replaced by . $source =~ s|<\s*_($w+)\s*/>|<$1/>|g; while ($source =~ s/ (.*?) (\s+)((\s*) (\s*(?:\s*)?(.*?)\s*<\/default>)?(\s*) (\s*(?:\s*)?(.*?)\s*<\/short>)?(\s*) (\s*(?:\s*)?(.*?)\s*<\/long>)?(\s*) <\/locale>) //sx) { print OUTPUT $1; my $locale_start_spaces = $2 ? $2 : ''; my $default_spaces = $4 ? $4 : ''; my $short_spaces = $7 ? $7 : ''; my $long_spaces = $10 ? $10 : ''; my $locale_end_spaces = $13 ? $13 : ''; my $c_default_block = $3 ? $3 : ''; my $default_string = $6 ? $6 : ''; my $short_string = $9 ? $9 : ''; my $long_string = $12 ? $12 : ''; print OUTPUT "$locale_start_spaces$c_default_block"; $default_string =~ s/\s+/ /g; $default_string = entity_decode($default_string); $short_string =~ s/\s+/ /g; $short_string = entity_decode($short_string); $long_string =~ s/\s+/ /g; $long_string = entity_decode($long_string); for my $lang (sort keys %po_files_by_lang) { my $default_translation = $translations{$lang, $default_string}; my $short_translation = $translations{$lang, $short_string}; my $long_translation = $translations{$lang, $long_string}; next if (!$default_translation && !$short_translation && !$long_translation); print OUTPUT "\n$locale_start_spaces"; print OUTPUT "$default_spaces"; if ($default_translation) { $default_translation = entity_encode($default_translation); print OUTPUT "$default_translation"; } print OUTPUT "$short_spaces"; if ($short_translation) { $short_translation = entity_encode($short_translation); print OUTPUT "$short_translation"; } print OUTPUT "$long_spaces"; if ($long_translation) { $long_translation = entity_encode($long_translation); print OUTPUT "$long_translation"; } print OUTPUT "$locale_end_spaces"; } } print OUTPUT $source; close OUTPUT; } sub rfc822deb_merge_translations { my %encodings = (); for my $lang (keys %po_files_by_lang) { $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang})); } my $source; $Text::Wrap::huge = 'overflow'; $Text::Wrap::break = qr/\n|\s(?=\S)/; { local $/; # slurp mode open INPUT, "<$FILE" or die "can't open $FILE: $!"; $source = ; close INPUT; } open OUTPUT, ">${OUTFILE}" or die; binmode (OUTPUT) if $^O eq 'MSWin32'; while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg) { my $sep = $1; my $non_translated_line = $3.$4; my $string = $5; my $underscore = length($2); next if $underscore eq 0 && $non_translated_line =~ /^#/; # Remove [] dummy strings my $stripped = $string; $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2; $stripped =~ s/\[\s[^\[\]]*\]$//; $non_translated_line .= $stripped; print OUTPUT $sep.$non_translated_line; if ($underscore) { my @str_list = rfc822deb_split($underscore, $string); for my $lang (sort keys %po_files_by_lang) { my $is_translated = 1; my $str_translated = ''; my $first = 1; for my $str (@str_list) { my $translation = $translations{$lang, $str}; if (!$translation) { $is_translated = 0; last; } # $translation may also contain [] dummy # strings, mostly to indicate an empty string $translation =~ s/\[\s[^\[\]]*\]$//; if ($first) { if ($underscore eq 2) { $str_translated .= $translation; } else { $str_translated .= Text::Tabs::expand($translation) . "\n"; } } else { if ($underscore eq 2) { $str_translated .= ', ' . $translation; } else { $str_translated .= Text::Tabs::expand( Text::Wrap::wrap(' ', ' ', $translation)) . "\n .\n"; } } $first = 0; # To fix some problems with Text::Wrap::wrap $str_translated =~ s/(\n )+\n/\n .\n/g; } next unless $is_translated; $str_translated =~ s/\n \.\n$//; $str_translated =~ s/\s+$//; $_ = $non_translated_line; s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s; print OUTPUT; } } } print OUTPUT "\n"; close OUTPUT; close INPUT; } sub rfc822deb_split { # Debian defines a special way to deal with rfc822-style files: # when a value contain newlines, it consists of # 1. a short form (first line) # 2. a long description, all lines begin with a space, # and paragraphs are separated by a single dot on a line # This routine returns an array of all paragraphs, and reformat # them. # When first argument is 2, the string is a comma separated list of # values. my $type = shift; my $text = shift; $text =~ s/^[ \t]//mg; return (split(/, */, $text, 0)) if $type ne 1; return ($text) if $text !~ /\n/; $text =~ s/([^\n]*)\n//; my @list = ($1); my $str = ''; for my $line (split (/\n/, $text)) { chomp $line; if ($line =~ /^\.\s*$/) { # New paragraph $str =~ s/\s*$//; push(@list, $str); $str = ''; } elsif ($line =~ /^\s/) { # Line which must not be reformatted $str .= "\n" if length ($str) && $str !~ /\n$/; $line =~ s/\s+$//; $str .= $line."\n"; } else { # Continuation line, remove newline $str .= " " if length ($str) && $str !~ /\n$/; $str .= $line; } } $str =~ s/\s*$//; push(@list, $str) if length ($str); return @list; } sub quoted_translation { my ($xml_mode, $lang, $string) = @_; $string = entity_decode($string) if $xml_mode; $string =~ s/\\\"/\"/g; my $translation = $translations{$lang, $string}; $translation = $string if !$translation; $translation = entity_encode($translation) if $xml_mode; $translation =~ s/\"/\\\"/g; return $translation } sub quoted_merge_translations { my ($xml_mode) = @_; if (!$MULTIPLE_OUTPUT) { print "Quoted only supports Multiple Output.\n"; exit(1); } for my $lang (sort keys %po_files_by_lang) { if ( ! -d $lang ) { mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n"; } open INPUT, "<${FILE}" or die; open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n"; binmode (OUTPUT) if $^O eq 'MSWin32'; while () { s/\"(([^\"]|\\\")*[^\\\"])\"/"\"" . "ed_translation($xml_mode, $lang, $1) . "\""/ge; print OUTPUT; } close OUTPUT; close INPUT; } }