#! /usr/bin/perl -w # Script to turn PCRE man pages into HTML # Subroutine to handle font changes and other escapes sub do_line { my($s) = $_[0]; $s =~ s/</g; # Deal with < and > $s =~ s/>/>/g; $s =~ s"\\fI(.*?)\\f[RP]"$1"g; $s =~ s"\\fB(.*?)\\f[RP]"$1"g; $s =~ s"\\e"\\"g; $s =~ s/(?<=Copyright )\(c\)/©/g; $s; } # Subroutine to ensure not in a paragraph sub end_para { if ($inpara) { print TEMP "\n" if ($inpre); print TEMP "
\n"; } $inpara = $inpre = 0; $wrotetext = 0; } # Subroutine to start a new paragraph sub new_para { &end_para(); print TEMP "\n";
$inpara = 1;
}
# Main program
$inpara = 0;
$inpre = 0;
$wrotetext = 0;
$toc = 0;
$ref = 1;
while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
{
$toc = 1 if $ARGV[0] eq "-toc";
shift;
}
# Initial output to STDOUT
print <
Return to the PCRE index page.
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
$ARGV[0] man page
End
print "\n" if ($toc);
open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
while (
\n" if ($toc);
# Copy the remainder to the standard output
close(TEMP);
open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
print while (
\n" if $wrotetext;
}
# Handling .sp is subtle. If it is inside a literal section, do nothing if
# the next line is a non literal text line; similarly, if not inside a
# literal section, do nothing if a literal follows. The point being that
# the and
that delimit literal sections will do the spacing.
# Always skip if no previous output.
elsif (/^\.sp/)
{
if ($wrotetext)
{
$_ =
\n
\n" if (!/^[\s.]/);
}
redo; # Now process the lookahead line we just read
}
}
elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
{
&new_para();
}
elsif (/^\.SH\s*("?)(.*)\1/)
{
# Ignore the NAME section
if ($2 =~ /^NAME\b/)
{
$title
\n",
$ref, $ref);
$ref++;
}
else
{
print TEMP "
\n$title\n
\n";
}
}
elsif (/^\.SS\s*("?)(.*)\1/)
{
&end_para();
my($title) = &do_line($2);
print TEMP "
\n$title\n
\n";
}
elsif (/^\.B\s*(.*)/)
{
&new_para() if (!$inpara);
$_ = &do_line($1);
s/"(.*?)"/$1/g;
print TEMP "$_\n";
$wrotetext = 1;
}
elsif (/^\.I\s*(.*)/)
{
&new_para() if (!$inpara);
$_ = &do_line($1);
s/"(.*?)"/$1/g;
print TEMP "$_\n";
$wrotetext = 1;
}
# A comment that starts "HREF" takes the next line as a name that
# is turned into a hyperlink, using the text given, which might be
# in a special font. If it ends in () or (digits) or punctuation, they
# aren't part of the link.
elsif (/^\.\\"\s*HREF/)
{
$_=\n";
$inpre = 1;
}
}
elsif ($inpre)
{
print TEMP "\n";
$inpre = 0;
}
print TEMP;
$wrotetext = 1;
}
# The TOC, if present, will have been written - terminate it
print "