/[pcre]/code/branches/pcre16/132html
ViewVC logotype

Contents of /code/branches/pcre16/132html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 99 - (show annotations) (download)
Tue Mar 6 12:27:42 2007 UTC (7 years, 1 month ago) by ph10
Original Path: code/trunk/maintain/132html
File size: 6796 byte(s)
1. Move the comment about version numbers from pcre.h.in to configure.ac 
because that's where they are now set.
2. Update all the man pages to remove the use of .br and .in because this
causes trouble for some HTML converters. Also standardised the final sections 
giving author information and revision date.
3. Update the maintain/132html man page converter to handle .nf/.fi and to barf 
at .br/.in.

1 #! /usr/bin/perl -w
2
3 # Script to turn PCRE man pages into HTML
4
5
6 # Subroutine to handle font changes and other escapes
7
8 sub do_line {
9 my($s) = $_[0];
10
11 $s =~ s/</&#60;/g; # Deal with < and >
12 $s =~ s/>/&#62;/g;
13 $s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
14 $s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
15 $s =~ s"\\e"\\"g;
16 $s =~ s/(?<=Copyright )\(c\)/&copy;/g;
17 $s;
18 }
19
20 # Subroutine to ensure not in a paragraph
21
22 sub end_para {
23 if ($inpara)
24 {
25 print TEMP "</PRE>\n" if ($inpre);
26 print TEMP "</P>\n";
27 }
28 $inpara = $inpre = 0;
29 $wrotetext = 0;
30 }
31
32 # Subroutine to start a new paragraph
33
34 sub new_para {
35 &end_para();
36 print TEMP "<P>\n";
37 $inpara = 1;
38 }
39
40
41 # Main program
42
43 $innf = 0;
44 $inpara = 0;
45 $inpre = 0;
46 $wrotetext = 0;
47 $toc = 0;
48 $ref = 1;
49
50 while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
51 {
52 $toc = 1 if $ARGV[0] eq "-toc";
53 shift;
54 }
55
56 # Initial output to STDOUT
57
58 print <<End ;
59 <html>
60 <head>
61 <title>$ARGV[0] specification</title>
62 </head>
63 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
64 <h1>$ARGV[0] man page</h1>
65 <p>
66 Return to the <a href="index.html">PCRE index page</a>.
67 </p>
68 <p>
69 This page is part of the PCRE HTML documentation. It was generated automatically
70 from the original man page. If there is any nonsense in it, please consult the
71 man page, in case the conversion went wrong.
72 <br>
73 End
74
75 print "<ul>\n" if ($toc);
76
77 open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
78
79 while (<STDIN>)
80 {
81 # Handle lines beginning with a dot
82
83 if (/^\./)
84 {
85 # Some of the PCRE man pages used to contain instances of .br. However,
86 # they should have all been removed because they cause trouble in some
87 # (other) automated systems that translate man pages to HTML. Complain if
88 # we find .br or .in (another macro that is deprecated).
89
90 if (/^\.br/ || /^\.in/)
91 {
92 print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
93 print STDERR "*** $_\n";
94 die "*** Processing abandoned\n";
95 }
96
97 # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
98
99 elsif (/^\.nf/)
100 {
101 $innf = 1;
102 }
103
104 elsif (/^\.fi/)
105 {
106 $innf = 0;
107 }
108
109 # Handling .sp is subtle. If it is inside a literal section, do nothing if
110 # the next line is a non literal text line; similarly, if not inside a
111 # literal section, do nothing if a literal follows. The point being that
112 # the <pre> and </pre> that delimit literal sections will do the spacing.
113 # Always skip if no previous output.
114
115 elsif (/^\.sp/)
116 {
117 if ($wrotetext)
118 {
119 $_ = <STDIN>;
120 if ($inpre)
121 {
122 print TEMP "\n" if (/^[\s.]/);
123 }
124 else
125 {
126 print TEMP "<br>\n<br>\n" if (!/^[\s.]/);
127 }
128 redo; # Now process the lookahead line we just read
129 }
130 }
131 elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
132 {
133 &new_para();
134 }
135 elsif (/^\.SH\s*("?)(.*)\1/)
136 {
137 # Ignore the NAME section
138 if ($2 =~ /^NAME\b/)
139 {
140 <STDIN>;
141 next;
142 }
143
144 &end_para();
145 my($title) = &do_line($2);
146 if ($toc)
147 {
148 printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
149 $ref, $ref);
150 printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
151 $ref, $ref);
152 $ref++;
153 }
154 else
155 {
156 print TEMP "<br><b>\n$title\n</b><br>\n";
157 }
158 }
159 elsif (/^\.SS\s*("?)(.*)\1/)
160 {
161 &end_para();
162 my($title) = &do_line($2);
163 print TEMP "<br><b>\n$title\n</b><br>\n";
164 }
165 elsif (/^\.B\s*(.*)/)
166 {
167 &new_para() if (!$inpara);
168 $_ = &do_line($1);
169 s/"(.*?)"/$1/g;
170 print TEMP "<b>$_</b>\n";
171 $wrotetext = 1;
172 }
173 elsif (/^\.I\s*(.*)/)
174 {
175 &new_para() if (!$inpara);
176 $_ = &do_line($1);
177 s/"(.*?)"/$1/g;
178 print TEMP "<i>$_</i>\n";
179 $wrotetext = 1;
180 }
181
182 # A comment that starts "HREF" takes the next line as a name that
183 # is turned into a hyperlink, using the text given, which might be
184 # in a special font. If it ends in () or (digits) or punctuation, they
185 # aren't part of the link.
186
187 elsif (/^\.\\"\s*HREF/)
188 {
189 $_=<STDIN>;
190 chomp;
191 $_ = &do_line($_);
192 $_ =~ s/\s+$//;
193 $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
194 print TEMP "<a href=\"$1.html\">$_</a>\n";
195 }
196
197 # A comment that starts "HTML" inserts literal HTML
198
199 elsif (/^\.\\"\s*HTML\s*(.*)/)
200 {
201 print TEMP $1;
202 }
203
204 # A comment that starts < inserts that HTML at the end of the
205 # *next* input line - so as not to get a newline between them.
206
207 elsif (/^\.\\"\s*(<.*>)/)
208 {
209 my($markup) = $1;
210 $_=<STDIN>;
211 chomp;
212 $_ = &do_line($_);
213 $_ =~ s/\s+$//;
214 print TEMP "$_$markup\n";
215 }
216
217 # A comment that starts JOIN joins the next two lines together, with one
218 # space between them. Then that line is processed. This is used in some
219 # displays where two lines are needed for the "man" version. JOINSH works
220 # the same, except that it assumes this is a shell command, so removes
221 # continuation backslashes.
222
223 elsif (/^\.\\"\s*JOIN(SH)?/)
224 {
225 my($one,$two);
226 $one = <STDIN>;
227 $two = <STDIN>;
228 $one =~ s/\s*\\e\s*$// if (defined($1));
229 chomp($one);
230 $two =~ s/^\s+//;
231 $_ = "$one $two";
232 redo; # Process the joined lines
233 }
234
235 # Ignore anything not recognized
236
237 next;
238 }
239
240 # Line does not begin with a dot. Replace blank lines with new paragraphs
241
242 if (/^\s*$/)
243 {
244 &end_para() if ($wrotetext);
245 next;
246 }
247
248 # Convert fonts changes and output an ordinary line. Ensure that indented
249 # lines are marked as literal.
250
251 $_ = &do_line($_);
252 &new_para() if (!$inpara);
253
254 if (/^\s/)
255 {
256 if (!$inpre)
257 {
258 print TEMP "<pre>\n";
259 $inpre = 1;
260 }
261 }
262 elsif ($inpre)
263 {
264 print TEMP "</pre>\n";
265 $inpre = 0;
266 }
267
268 # Add <br> to the end of a non-literal line if we are within .nf/.fi
269
270 $_ .= "<br>\n" if (!$inpre && $innf);
271
272 print TEMP;
273 $wrotetext = 1;
274 }
275
276 # The TOC, if present, will have been written - terminate it
277
278 print "</ul>\n" if ($toc);
279
280 # Copy the remainder to the standard output
281
282 close(TEMP);
283 open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
284
285 print while (<TEMP>);
286
287 print <<End ;
288 <p>
289 Return to the <a href="index.html">PCRE index page</a>.
290 </p>
291 End
292
293 close(TEMP);
294 unlink("/tmp/$$");
295
296 # End

Properties

Name Value
svn:executable *

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12