# latex2html.pm: interface to LaTeX2HTML
# Copyright (C) 1999, 2000, 2003, 2005, 2006, 2009, 2011, 2013
# Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License,
# or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
# This code was taken from the main texi2html file in 2006.
# Certainly originally written by Olaf Bachmann.
# Adapted from texi2html T2h_l2h.pm in 2011.
require 5.0;
use strict;
use Cwd;
use File::Copy;
use File::Spec;
my $global_cmds = get_conf('GLOBAL_COMMANDS');
if (!defined($global_cmds)) {
set_from_init_file('GLOBAL_COMMANDS', []);
$global_cmds = get_conf('GLOBAL_COMMANDS');
push @$global_cmds, ('math', 'tex');
texinfo_register_handler('structure', \&l2h_process);
texinfo_register_handler('finish', \&l2h_finish);
texinfo_register_command_formatting('math', \&l2h_do_tex);
texinfo_register_command_formatting('tex', \&l2h_do_tex);
# name/location of latex2html program
set_from_init_file('L2H_L2H', 'latex2html');
# If this is set the actual call to latex2html is skipped. The previously
# generated content is reused, instead.
# If set to 0, the cache is not used.
# If undef the cache is used for as many tex fragments as possible
# and for the remaining the command is run.
set_from_init_file('L2H_SKIP', undef);
# If this is set l2h uses the specified directory for temporary files. The path
# leading to this directory may not contain a dot (i.e., a ".");
# otherwise, l2h will fail.
set_from_init_file('L2H_TMP', '');
# If set, l2h uses the file as latex2html init file
set_from_init_file('L2H_FILE', undef);
# if this is set the intermediate files generated by texi2html in relation with
# latex2html are cleaned (they all have the prefix _l2h_).
set_from_init_file('L2H_CLEAN', 1);
# latex2html conversions consist of 2 stages:
# 1) l2h_process
# to latex: Put "latex" code into a latex file
# (l2h_to_latex, l2h_finish_to_latex)
# to html: Use latex2html to generate corresponding html code and images
# (l2h_to_html)
# from html: Extract generated code and images from latex2html run
# (l2h_init_from_html)
# 2) l2h_do_tex called each time a @tex or @math command is encountered
# in the output tree.
# init l2h defaults for files and names
my ($l2h_name, $l2h_latex_file, $l2h_cache_file, $l2h_html_file, $l2h_prefix);
# holds the status of latex2html operations. If 0 it means that there was
# an error
my $status = 0;
my $debug;
my $verbose;
my $docu_rdir;
my $docu_volume;
my $docu_directories;
my $docu_name;
my %commands_counters;
# init_from_html
my $extract_error_count;
my $invalid_counter_count;
# change_image_file_names
my %l2h_img; # associate src file to destination file
# such that files are not copied twice
my $image_count;
# do_tex
my $html_output_count = 0; # html text outputed in html result file
# First stage: Generation of Latex file
# Initialize with: init
# Add content with: l2h_to_latex ($text) --> HTML placeholder comment
# Finish with: finish_to_latex
my $l2h_latex_preamble = <get_conf('OUTFILE'))
and $Texinfo::Common::null_device_file{$self->get_conf('OUTFILE')});
$docu_name = $self->{'document_name'};
$docu_rdir = $self->{'destination_directory'};
$docu_rdir = '' if (!defined($docu_rdir));
my $no_file;
($docu_volume, $docu_directories, $no_file)
= File::Spec->splitpath($docu_rdir, 1);
$l2h_name = "${docu_name}_l2h";
$l2h_latex_file = File::Spec->catpath($docu_volume, $docu_directories,
$l2h_cache_file = File::Spec->catpath($docu_volume, $docu_directories,
# destination dir -- generated images are put there, should be the same
# as dir of enclosing html document --
$l2h_html_file = File::Spec->catpath($docu_volume, $docu_directories,
$l2h_prefix = "${l2h_name}_";
$debug = $self->get_conf('DEBUG');
$verbose = $self->get_conf('VERBOSE');
unless ($self->get_conf('L2H_SKIP')) {
unless (open(L2H_LATEX, ">$l2h_latex_file")) {
"l2h: could not open latex file %s for writing: %s"),
$l2h_latex_file, $!));
$status = 0;
warn "# l2h: use ${l2h_latex_file} as latex file\n" if ($verbose);
print L2H_LATEX $l2h_latex_preamble;
# open the database that holds cached text
l2h_init_cache($self) if (!defined($self->get_conf('L2H_SKIP'))
or $self->get_conf('L2H_SKIP'));
foreach my $command ('tex', 'math') {
if ($self->{'extra'}->{$command}) {
my $counter = 0;
foreach my $root (@{$self->{'extra'}->{$command}}) {
my $tree;
if ($command eq 'math') {
$tree = $root->{'args'}->[0];
} else {
$tree = {'contents' => [@{$root->{'contents'}}]};
if ($tree->{'contents'}->[0]
and $tree->{'contents'}->[0]->{'type'}
and $tree->{'contents'}->[0]->{'type'} eq 'empty_line_after_command') {
shift @{$tree->{'contents'}};
if ($tree->{'contents'}->[-1]->{'cmdname'}
and $tree->{'contents'}->[-1]->{'cmdname'} eq 'end') {
pop @{$tree->{'contents'}};
my $text = Texinfo::Convert::Texinfo::convert($tree);
#$text .= "\n" if ($command eq 'tex');
l2h_to_latex($self, $command, $text, $counter);
$commands_counters{$root} = $counter;
$status = l2h_finish_to_latex($self);
if ($status) {
$status = l2h_to_html($self);
if ($status) {
$status = l2h_init_from_html($self);
# FIXME use $status? That is abort when something goes wrong on the
# latex2html front?
return 1;
# print text (2nd arg) into latex file (if not already there nor in cache)
# which can be later on replaced by the latex2html generated text.
sub l2h_to_latex($$$$)
my $self = shift;
my $command = shift;
my $text = shift;
my $counter = shift;
if ($command eq 'tex') {
$text .= ' ';
} elsif ($command eq 'math') {
$text = "\$".$text."\$";
$text =~ s/(\s*)$//;
# try whether we have text already on things to do
my $count = $l2h_to_latex{$text};
unless ($count) {
$count = $latex_count;
# try whether we can get it from cache
my $cached_text = l2h_from_cache($text);
if (defined($cached_text)) {
# put the cached result in the html result array
$l2h_from_html[$count] = $cached_text;
} else {
unless ($self->get_conf('L2H_SKIP')) {
print L2H_LATEX "\\begin{rawhtml}\n\n";
print L2H_LATEX "\n";
print L2H_LATEX "\\end{rawhtml}\n";
print L2H_LATEX "$text\n";
print L2H_LATEX "\\begin{rawhtml}\n";
print L2H_LATEX "\n\n";
print L2H_LATEX "\\end{rawhtml}\n";
$l2h_to_latex[$count] = $text;
$l2h_to_latex{$text} = $count;
$global_count{"${command}_$counter"} = $count;
return 1;
# print closing into latex file and close it
sub l2h_finish_to_latex($)
my $self = shift;
my $reused = $to_latex_count - $latex_converted_count - $cached_count;
unless ($self->get_conf('L2H_SKIP')) {
print L2H_LATEX $l2h_latex_closing;
close (L2H_LATEX);
warn "# l2h: finished to latex ($cached_count cached, $reused reused, $latex_converted_count to process)\n" if ($verbose);
unless ($latex_count) {
# no @tex nor @math
return 0;
return 1;
# Use latex2html to generate corresponding html code and images
# to_html([$l2h_latex_file, [$l2h_html_dir]]):
# Call latex2html on $l2h_latex_file
# Put images (prefixed with $l2h_name."_") and html file(s) in $l2h_html_dir
# Return 1, on success
# 0, otherwise
sub l2h_to_html($)
my $self = shift;
my ($call, $dotbug);
# when there are no tex constructs to convert (happens in case everything
# comes from the cache), there is no latex2html run
if ($self->get_conf('L2H_SKIP') or ($latex_converted_count == 0)) {
warn "# l2h: skipping latex2html run\n" if ($verbose);
return 1;
# Check for dot in directory where dvips will work
if ($self->get_conf('L2H_TMP')) {
if ($self->get_conf('L2H_TMP') =~ /\./) {
$self->document_warn($self->__("l2h: L2H_TMP directory contains a dot"));
$dotbug = 1;
} else {
if (cwd() =~ /\./) {
$self->document_warn($self->__("l2h: current directory contains a dot"));
$dotbug = 1;
return 0 if ($dotbug);
$call = $self->get_conf('L2H_L2H');
# use init file, if specified
my $init_file = $self->get_conf('L2H_FILE');
$call = $call . " -init_file " . $init_file
if (defined($init_file) and $init_file ne ''
and -f $init_file and -r $init_file);
# set output dir
$call .= (($docu_rdir ne '') ? " -dir $docu_rdir" : " -no_subdir");
# use l2h_tmp, if specified
$call .= " -tmp ".$self->get_conf('L2H_TMP')
if (defined($self->get_conf('L2H_TMP'))
and $self->get_conf('L2H_TMP') ne '');
# use a given html version if specified
$call .= " -html_version ".$self->get_conf('L2H_HTML_VERSION')
if (defined($self->get_conf('L2H_HTML_VERSION'))
and $self->get_conf('L2H_HTML_VERSION') ne '');
# options we want to be sure of
$call .= " -address 0 -info 0 -split 0 -no_navigation -no_auto_link";
$call .= " -prefix $l2h_prefix $l2h_latex_file";
warn "# l2h: executing '$call'\n" if ($verbose);
if (system($call)) {
$self->document_error(sprintf($self->__("l2h: command did not succeed: %s"),
return 0;
} else {
warn "# l2h: latex2html finished successfully\n" if ($verbose);
return 1;
# Third stage: Extract generated contents from latex2html run
# Initialize with: init_from_html
# open $l2h_html_file for reading
# reads in contents into array indexed by numbers
# return 1, on success -- 0, otherwise
# Finish with: finish
# closes $l2h_html_dir/$l2h_name.".$docu_ext"
# the images generated by latex2html have names like ${docu_name}_l2h_img?.png
# they are copied to ${docu_name}_?.png, and html is changed accordingly.
# FIXME is it really necessary to bother doing that? Looks like an unneeded
# complication to me (pertusus, 2009), and it could go bad if there is some
# SRC="(.*?)" in the text (though the regexp could be made more specific).
# %l2h_img; # associate src file to destination file
# such that files are not copied twice
sub l2h_change_image_file_names($$)
my $self = shift;
my $content = shift;
my @images = ($content =~ /SRC="(.*?)"/g);
my ($src, $dest);
for $src (@images) {
$dest = $l2h_img{$src};
unless ($dest) {
my $ext = '';
if ($src =~ /.*\.(.*)$/ and (!defined($self->get_conf('EXTENSION'))
or $1 ne $self->get_conf('EXTENSION'))) {
$ext = ".$1";
} else {
# A warning when the image extension is the same than the
# document extension. copying the file could result in
# overwriting an output file (almost surely if the default
# texi2html file names are used).
"l2h: image has invalid extension: %s"), $src));
while (-e File::Spec->catpath($docu_volume, $docu_directories,
"${docu_name}_${image_count}$ext")) {
my $file_src
= File::Spec->catpath($docu_volume, $docu_directories, $src);
$dest = "${docu_name}_${image_count}$ext";
my $file_dest
= File::Spec->catpath($docu_volume, $docu_directories, $dest);
if ($debug) {
copy($file_src, $file_dest);
} else {
if (!rename($file_src, $file_dest)) {
$self->document_warn(sprintf($self->__("l2h: rename %s as %s failed: %s"),
$file_src, $file_dest, $!));
$l2h_img{$src} = $dest;
$content =~ s/SRC="$src"/SRC="$dest"/g;
return $content;
sub l2h_init_from_html($)
my $self = shift;
# when there are no tex constructs to convert (happens in case everything
# comes from the cache), the html file that was generated by previous
# latex2html runs isn't reused.
if ($latex_converted_count == 0) {
return 1;
if (! open(L2H_HTML, "<$l2h_html_file")) {
$self->document_warn(sprintf($self->__("l2h: could not open %s: %s"),
$l2h_html_file, $!));
return 0;
warn "# l2h: use $l2h_html_file as html file\n" if ($verbose);
my $html_converted_count = 0; # number of html resulting texts
# retrieved in the file
my ($count, $h_line);
while ($h_line = ) {
if ($h_line =~ /!-- l2h_begin $l2h_name ([0-9]+) --/) {
$count = $1;
my $h_content = '';
my $h_end_found = 0;
while ($h_line = ) {
if ($h_line =~ /!-- l2h_end $l2h_name $count --/) {
$h_end_found = 1;
chomp $h_content;
chomp $h_content;
# transform image file names and copy image files
$h_content = l2h_change_image_file_names($self, $h_content);
# store result in the html result array
$l2h_from_html[$count] = $h_content;
# also add the result in cache hash
$l2h_cache{$l2h_to_latex[$count]} = $h_content;
$h_content = $h_content.$h_line;
unless ($h_end_found) {
# couldn't found the closing comment. Should be a bug.
$self->document_warn(sprintf(__("latex2html.pm: end of \@%s item %d not found"),
$l2h_name, $count));
return 0;
# Not the same number of converted elements and retrieved elements
if ($latex_converted_count != $html_converted_count) {
"latex2html.pm: processing produced %d items in HTML; expected %d, the number of items found in the document"),
$html_converted_count, $latex_converted_count));
warn "# l2h: Got $html_converted_count of $latex_count html contents\n"
if ($verbose);
return 1;
# $html_output_count = 0; # html text outputed in html result file
# called each time a construct handled by latex2html is encountered, should
# output the corresponding html
sub l2h_do_tex($$)
my $self = shift;
my $cmdname = shift;;
my $command = shift;
my $content = shift;
my $counter = $commands_counters{$command};
return '' unless ($status);
my $count = $global_count{"${cmdname}_$counter"};
################################## begin debug section (incorrect counts)
if (!defined($count)) {
# counter is undefined
sprintf($self->__("l2h: could not determine the fragment %d for \@%s",
$counter, $cmdname)));
return ("")
if ($debug);
return '';
} elsif(($count <= 0) or ($count > $latex_count)) {
# counter out of range
$self->_bug_message("l2h: request of $count out of range [0,$latex_count]");
return ("")
if ($debug);
return '';
################################## end debug section (incorrect counts)
# this seems to be a valid counter
my $result = '';
$result = "" if ($debug);
if (defined($l2h_from_html[$count])) {
$result .= $l2h_from_html[$count];
$result .= "\n" if ($cmdname eq 'tex');
} else {
# if the result is not in @l2h_from_html, there is an error somewhere.
"l2h: could not extract the fragment %d for \@%s with output counter %d from HTML"),
$counter, $cmdname, $count));
# try simple (ordinary) substitution (without l2h)
$result .= "" if ($debug);
$result .= &{$self->default_commands_conversion($cmdname)}($self,
$cmdname, $command, $content);
$result .= "" if ($debug);
return $result;
# store results in the cache and remove temporary files.
sub l2h_finish($)
my $self = shift;
return 1 unless($status);
if ($verbose) {
if ($extract_error_count + $invalid_counter_count) {
warn "# l2h: finished from html ($extract_error_count extract and $invalid_counter_count invalid counter errors)\n";
} else {
warn "# l2h: finished from html (no error)\n";
if ($html_output_count != $latex_converted_count) {
# this may happen if @-commands are collected at some places
# but @-command at those places are not expanded later. For
# example @math on @multitable lines.
warn "# l2h: $html_output_count html outputed for $latex_converted_count converted\n";
if ($self->get_conf('L2H_CLEAN')) {
warn "# l2h: removing temporary files generated by l2h extension\n"
if ($verbose);
my $quoted_l2h_name = quotemeta($l2h_name);
my $dir = $docu_rdir;
$dir = File::Spec->curdir() if ($dir eq '');
if (opendir (DIR, $dir)) {
foreach my $file (grep { /^$quoted_l2h_name/ } readdir(DIR)) {
# FIXME error condition not checked
unlink File::Spec->catpath($docu_volume, $docu_directories, $file);
warn "# l2h: Finished\n" if $verbose;
return 1;
# stuff for l2h caching
# FIXME it is clear that l2h stuff takes very long compared with texi2any
# which is already quite long. However this also adds some complexity
# I tried doing this with a dbm data base, but it did not store all
# keys/values. Hence, I did as latex2html does it
sub l2h_init_cache($)
my $self = shift;
if (-r $l2h_cache_file) {
my $rdo = do "$l2h_cache_file";
$self->document_error(sprintf($self->__("l2h: could not load %s: %s"),
$l2h_cache_file, $@))
unless ($rdo);
# store all the text obtained through latex2html
sub l2h_store_cache($)
my $self = shift;
return unless $latex_count;
my ($key, $value);
unless (open(FH, ">$l2h_cache_file")) {
$self->document_error(sprintf($self->__("l2h: could not open %s for writing: %s"),
$l2h_cache_file, $!));
foreach my $key(sort(keys(%l2h_cache))) {
#while (($key, $value) = each %l2h_cache) {
my $value = $l2h_cache{$key};
# escape stuff
$key =~ s|/|\\/|g;
$key =~ s|\\\\/|\\/|g;
# weird, a \ at the end of the key results in an error
# maybe this also broke the dbm database stuff
$key =~ s|\\$|\\\\|;
$value =~ s/\|/\\\|/go;
$value =~ s/\\\\\|/\\\|/go;
$value =~ s|\\\\|\\\\\\\\|g;
print FH "\n\$l2h_cache_key = q/$key/;\n";
print FH "\$l2h_cache{\$l2h_cache_key} = q|$value|;\n";
print FH "1;";
close (FH);
# return cached html, if it exists for text, and if all pictures
# are there, as well
sub l2h_from_cache($)
my $text = shift;
my $cached = $l2h_cache{$text};
if (defined($cached)) {
while ($cached =~ m/SRC="(.*?)"/g) {
unless (-e File::Spec->catpath($docu_volume, $docu_directories, $1)) {
return undef;
return $cached;
return undef;