/[pcre]/code/trunk/132html
ViewVC logotype

Contents of /code/trunk/132html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 99 - (hide annotations) (download)
Tue Mar 6 12:27:42 2007 UTC (7 years, 7 months ago) by ph10
Original Path: code/trunk/maintain/132html
File size: 6796 byte(s)
1. Move the comment about version numbers from pcre.h.in to configure.ac 
because that's where they are now set.
2. Update all the man pages to remove the use of .br and .in because this
causes trouble for some HTML converters. Also standardised the final sections 
giving author information and revision date.
3. Update the maintain/132html man page converter to handle .nf/.fi and to barf 
at .br/.in.

1 ph10 97 #! /usr/bin/perl -w
2    
3     # Script to turn PCRE man pages into HTML
4    
5    
6     # Subroutine to handle font changes and other escapes
7    
8     sub do_line {
9     my($s) = $_[0];
10    
11     $s =~ s/</&#60;/g; # Deal with < and >
12     $s =~ s/>/&#62;/g;
13     $s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
14     $s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
15     $s =~ s"\\e"\\"g;
16     $s =~ s/(?<=Copyright )\(c\)/&copy;/g;
17     $s;
18     }
19    
20     # Subroutine to ensure not in a paragraph
21    
22     sub end_para {
23     if ($inpara)
24     {
25     print TEMP "</PRE>\n" if ($inpre);
26     print TEMP "</P>\n";
27     }
28     $inpara = $inpre = 0;
29     $wrotetext = 0;
30     }
31    
32     # Subroutine to start a new paragraph
33    
34     sub new_para {
35     &end_para();
36     print TEMP "<P>\n";
37     $inpara = 1;
38     }
39    
40    
41     # Main program
42    
43 ph10 99 $innf = 0;
44 ph10 97 $inpara = 0;
45     $inpre = 0;
46     $wrotetext = 0;
47     $toc = 0;
48     $ref = 1;
49    
50     while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
51     {
52     $toc = 1 if $ARGV[0] eq "-toc";
53     shift;
54     }
55    
56     # Initial output to STDOUT
57    
58     print <<End ;
59     <html>
60     <head>
61     <title>$ARGV[0] specification</title>
62     </head>
63     <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
64     <h1>$ARGV[0] man page</h1>
65     <p>
66     Return to the <a href="index.html">PCRE index page</a>.
67     </p>
68     <p>
69     This page is part of the PCRE HTML documentation. It was generated automatically
70     from the original man page. If there is any nonsense in it, please consult the
71     man page, in case the conversion went wrong.
72     <br>
73     End
74    
75     print "<ul>\n" if ($toc);
76    
77     open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
78    
79     while (<STDIN>)
80     {
81     # Handle lines beginning with a dot
82    
83     if (/^\./)
84     {
85 ph10 99 # Some of the PCRE man pages used to contain instances of .br. However,
86     # they should have all been removed because they cause trouble in some
87     # (other) automated systems that translate man pages to HTML. Complain if
88     # we find .br or .in (another macro that is deprecated).
89    
90     if (/^\.br/ || /^\.in/)
91 ph10 97 {
92 ph10 99 print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
93     print STDERR "*** $_\n";
94     die "*** Processing abandoned\n";
95 ph10 97 }
96    
97 ph10 99 # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
98    
99     elsif (/^\.nf/)
100     {
101     $innf = 1;
102     }
103    
104     elsif (/^\.fi/)
105     {
106     $innf = 0;
107     }
108    
109 ph10 97 # Handling .sp is subtle. If it is inside a literal section, do nothing if
110     # the next line is a non literal text line; similarly, if not inside a
111     # literal section, do nothing if a literal follows. The point being that
112     # the <pre> and </pre> that delimit literal sections will do the spacing.
113     # Always skip if no previous output.
114    
115     elsif (/^\.sp/)
116     {
117     if ($wrotetext)
118     {
119     $_ = <STDIN>;
120     if ($inpre)
121     {
122     print TEMP "\n" if (/^[\s.]/);
123     }
124     else
125     {
126     print TEMP "<br>\n<br>\n" if (!/^[\s.]/);
127     }
128     redo; # Now process the lookahead line we just read
129     }
130     }
131     elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
132     {
133     &new_para();
134     }
135     elsif (/^\.SH\s*("?)(.*)\1/)
136     {
137     # Ignore the NAME section
138     if ($2 =~ /^NAME\b/)
139     {
140     <STDIN>;
141     next;
142     }
143    
144     &end_para();
145     my($title) = &do_line($2);
146     if ($toc)
147     {
148     printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
149     $ref, $ref);
150     printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
151     $ref, $ref);
152     $ref++;
153     }
154     else
155     {
156     print TEMP "<br><b>\n$title\n</b><br>\n";
157     }
158     }
159     elsif (/^\.SS\s*("?)(.*)\1/)
160     {
161     &end_para();
162     my($title) = &do_line($2);
163     print TEMP "<br><b>\n$title\n</b><br>\n";
164     }
165     elsif (/^\.B\s*(.*)/)
166     {
167     &new_para() if (!$inpara);
168     $_ = &do_line($1);
169     s/"(.*?)"/$1/g;
170     print TEMP "<b>$_</b>\n";
171     $wrotetext = 1;
172     }
173     elsif (/^\.I\s*(.*)/)
174     {
175     &new_para() if (!$inpara);
176     $_ = &do_line($1);
177     s/"(.*?)"/$1/g;
178     print TEMP "<i>$_</i>\n";
179     $wrotetext = 1;
180     }
181    
182     # A comment that starts "HREF" takes the next line as a name that
183     # is turned into a hyperlink, using the text given, which might be
184     # in a special font. If it ends in () or (digits) or punctuation, they
185     # aren't part of the link.
186    
187     elsif (/^\.\\"\s*HREF/)
188     {
189     $_=<STDIN>;
190     chomp;
191     $_ = &do_line($_);
192     $_ =~ s/\s+$//;
193     $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
194     print TEMP "<a href=\"$1.html\">$_</a>\n";
195     }
196    
197     # A comment that starts "HTML" inserts literal HTML
198    
199     elsif (/^\.\\"\s*HTML\s*(.*)/)
200     {
201     print TEMP $1;
202     }
203    
204     # A comment that starts < inserts that HTML at the end of the
205     # *next* input line - so as not to get a newline between them.
206    
207     elsif (/^\.\\"\s*(<.*>)/)
208     {
209     my($markup) = $1;
210     $_=<STDIN>;
211     chomp;
212     $_ = &do_line($_);
213     $_ =~ s/\s+$//;
214     print TEMP "$_$markup\n";
215     }
216    
217     # A comment that starts JOIN joins the next two lines together, with one
218     # space between them. Then that line is processed. This is used in some
219     # displays where two lines are needed for the "man" version. JOINSH works
220     # the same, except that it assumes this is a shell command, so removes
221     # continuation backslashes.
222    
223     elsif (/^\.\\"\s*JOIN(SH)?/)
224     {
225     my($one,$two);
226     $one = <STDIN>;
227     $two = <STDIN>;
228     $one =~ s/\s*\\e\s*$// if (defined($1));
229     chomp($one);
230     $two =~ s/^\s+//;
231     $_ = "$one $two";
232     redo; # Process the joined lines
233     }
234    
235     # Ignore anything not recognized
236    
237     next;
238     }
239    
240     # Line does not begin with a dot. Replace blank lines with new paragraphs
241    
242     if (/^\s*$/)
243     {
244     &end_para() if ($wrotetext);
245     next;
246     }
247    
248     # Convert fonts changes and output an ordinary line. Ensure that indented
249     # lines are marked as literal.
250    
251     $_ = &do_line($_);
252     &new_para() if (!$inpara);
253    
254     if (/^\s/)
255     {
256     if (!$inpre)
257     {
258     print TEMP "<pre>\n";
259     $inpre = 1;
260     }
261     }
262     elsif ($inpre)
263     {
264     print TEMP "</pre>\n";
265     $inpre = 0;
266     }
267 ph10 99
268     # Add <br> to the end of a non-literal line if we are within .nf/.fi
269    
270     $_ .= "<br>\n" if (!$inpre && $innf);
271 ph10 97
272     print TEMP;
273     $wrotetext = 1;
274     }
275    
276     # The TOC, if present, will have been written - terminate it
277    
278     print "</ul>\n" if ($toc);
279    
280     # Copy the remainder to the standard output
281    
282     close(TEMP);
283     open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
284    
285     print while (<TEMP>);
286    
287     print <<End ;
288     <p>
289     Return to the <a href="index.html">PCRE index page</a>.
290     </p>
291     End
292    
293     close(TEMP);
294     unlink("/tmp/$$");
295    
296     # End

Properties

Name Value
svn:executable *

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12