/[pcre]/code/trunk/132html
ViewVC logotype

Contents of /code/trunk/132html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 97 - (show annotations) (download)
Mon Mar 5 12:36:47 2007 UTC (7 years, 8 months ago) by ph10
Original Path: code/trunk/maintain/132html
File size: 6045 byte(s)
Applied Bob and Daniel's patches to convert the build system to automake. Added 
the maintain directory, containing files that are used for maintenance, but are 
not distributed. This is an intermediate step.

1 #! /usr/bin/perl -w
2
3 # Script to turn PCRE man pages into HTML
4
5
6 # Subroutine to handle font changes and other escapes
7
8 sub do_line {
9 my($s) = $_[0];
10
11 $s =~ s/</&#60;/g; # Deal with < and >
12 $s =~ s/>/&#62;/g;
13 $s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
14 $s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
15 $s =~ s"\\e"\\"g;
16 $s =~ s/(?<=Copyright )\(c\)/&copy;/g;
17 $s;
18 }
19
20 # Subroutine to ensure not in a paragraph
21
22 sub end_para {
23 if ($inpara)
24 {
25 print TEMP "</PRE>\n" if ($inpre);
26 print TEMP "</P>\n";
27 }
28 $inpara = $inpre = 0;
29 $wrotetext = 0;
30 }
31
32 # Subroutine to start a new paragraph
33
34 sub new_para {
35 &end_para();
36 print TEMP "<P>\n";
37 $inpara = 1;
38 }
39
40
41 # Main program
42
43 $inpara = 0;
44 $inpre = 0;
45 $wrotetext = 0;
46 $toc = 0;
47 $ref = 1;
48
49 while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
50 {
51 $toc = 1 if $ARGV[0] eq "-toc";
52 shift;
53 }
54
55 # Initial output to STDOUT
56
57 print <<End ;
58 <html>
59 <head>
60 <title>$ARGV[0] specification</title>
61 </head>
62 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
63 <h1>$ARGV[0] man page</h1>
64 <p>
65 Return to the <a href="index.html">PCRE index page</a>.
66 </p>
67 <p>
68 This page is part of the PCRE HTML documentation. It was generated automatically
69 from the original man page. If there is any nonsense in it, please consult the
70 man page, in case the conversion went wrong.
71 <br>
72 End
73
74 print "<ul>\n" if ($toc);
75
76 open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
77
78 while (<STDIN>)
79 {
80 # Handle lines beginning with a dot
81
82 if (/^\./)
83 {
84 if (/^\.br/)
85 {
86 print TEMP "<br>\n" if $wrotetext;
87 }
88
89 # Handling .sp is subtle. If it is inside a literal section, do nothing if
90 # the next line is a non literal text line; similarly, if not inside a
91 # literal section, do nothing if a literal follows. The point being that
92 # the <pre> and </pre> that delimit literal sections will do the spacing.
93 # Always skip if no previous output.
94
95 elsif (/^\.sp/)
96 {
97 if ($wrotetext)
98 {
99 $_ = <STDIN>;
100 if ($inpre)
101 {
102 print TEMP "\n" if (/^[\s.]/);
103 }
104 else
105 {
106 print TEMP "<br>\n<br>\n" if (!/^[\s.]/);
107 }
108 redo; # Now process the lookahead line we just read
109 }
110 }
111 elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
112 {
113 &new_para();
114 }
115 elsif (/^\.SH\s*("?)(.*)\1/)
116 {
117 # Ignore the NAME section
118 if ($2 =~ /^NAME\b/)
119 {
120 <STDIN>;
121 next;
122 }
123
124 &end_para();
125 my($title) = &do_line($2);
126 if ($toc)
127 {
128 printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
129 $ref, $ref);
130 printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
131 $ref, $ref);
132 $ref++;
133 }
134 else
135 {
136 print TEMP "<br><b>\n$title\n</b><br>\n";
137 }
138 }
139 elsif (/^\.SS\s*("?)(.*)\1/)
140 {
141 &end_para();
142 my($title) = &do_line($2);
143 print TEMP "<br><b>\n$title\n</b><br>\n";
144 }
145 elsif (/^\.B\s*(.*)/)
146 {
147 &new_para() if (!$inpara);
148 $_ = &do_line($1);
149 s/"(.*?)"/$1/g;
150 print TEMP "<b>$_</b>\n";
151 $wrotetext = 1;
152 }
153 elsif (/^\.I\s*(.*)/)
154 {
155 &new_para() if (!$inpara);
156 $_ = &do_line($1);
157 s/"(.*?)"/$1/g;
158 print TEMP "<i>$_</i>\n";
159 $wrotetext = 1;
160 }
161
162 # A comment that starts "HREF" takes the next line as a name that
163 # is turned into a hyperlink, using the text given, which might be
164 # in a special font. If it ends in () or (digits) or punctuation, they
165 # aren't part of the link.
166
167 elsif (/^\.\\"\s*HREF/)
168 {
169 $_=<STDIN>;
170 chomp;
171 $_ = &do_line($_);
172 $_ =~ s/\s+$//;
173 $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
174 print TEMP "<a href=\"$1.html\">$_</a>\n";
175 }
176
177 # A comment that starts "HTML" inserts literal HTML
178
179 elsif (/^\.\\"\s*HTML\s*(.*)/)
180 {
181 print TEMP $1;
182 }
183
184 # A comment that starts < inserts that HTML at the end of the
185 # *next* input line - so as not to get a newline between them.
186
187 elsif (/^\.\\"\s*(<.*>)/)
188 {
189 my($markup) = $1;
190 $_=<STDIN>;
191 chomp;
192 $_ = &do_line($_);
193 $_ =~ s/\s+$//;
194 print TEMP "$_$markup\n";
195 }
196
197 # A comment that starts JOIN joins the next two lines together, with one
198 # space between them. Then that line is processed. This is used in some
199 # displays where two lines are needed for the "man" version. JOINSH works
200 # the same, except that it assumes this is a shell command, so removes
201 # continuation backslashes.
202
203 elsif (/^\.\\"\s*JOIN(SH)?/)
204 {
205 my($one,$two);
206 $one = <STDIN>;
207 $two = <STDIN>;
208 $one =~ s/\s*\\e\s*$// if (defined($1));
209 chomp($one);
210 $two =~ s/^\s+//;
211 $_ = "$one $two";
212 redo; # Process the joined lines
213 }
214
215 # Ignore anything not recognized
216
217 next;
218 }
219
220 # Line does not begin with a dot. Replace blank lines with new paragraphs
221
222 if (/^\s*$/)
223 {
224 &end_para() if ($wrotetext);
225 next;
226 }
227
228 # Convert fonts changes and output an ordinary line. Ensure that indented
229 # lines are marked as literal.
230
231 $_ = &do_line($_);
232 &new_para() if (!$inpara);
233
234 if (/^\s/)
235 {
236 if (!$inpre)
237 {
238 print TEMP "<pre>\n";
239 $inpre = 1;
240 }
241 }
242 elsif ($inpre)
243 {
244 print TEMP "</pre>\n";
245 $inpre = 0;
246 }
247
248 print TEMP;
249 $wrotetext = 1;
250 }
251
252 # The TOC, if present, will have been written - terminate it
253
254 print "</ul>\n" if ($toc);
255
256 # Copy the remainder to the standard output
257
258 close(TEMP);
259 open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
260
261 print while (<TEMP>);
262
263 print <<End ;
264 <p>
265 Return to the <a href="index.html">PCRE index page</a>.
266 </p>
267 End
268
269 close(TEMP);
270 unlink("/tmp/$$");
271
272 # End

Properties

Name Value
svn:executable *

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12