/[pcre]/code/trunk/132html
ViewVC logotype

Contents of /code/trunk/132html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 97 - (hide annotations) (download)
Mon Mar 5 12:36:47 2007 UTC (7 years, 6 months ago) by ph10
Original Path: code/trunk/maintain/132html
File size: 6045 byte(s)
Applied Bob and Daniel's patches to convert the build system to automake. Added 
the maintain directory, containing files that are used for maintenance, but are 
not distributed. This is an intermediate step.

1 ph10 97 #! /usr/bin/perl -w
2    
3     # Script to turn PCRE man pages into HTML
4    
5    
6     # Subroutine to handle font changes and other escapes
7    
8     sub do_line {
9     my($s) = $_[0];
10    
11     $s =~ s/</&#60;/g; # Deal with < and >
12     $s =~ s/>/&#62;/g;
13     $s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
14     $s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
15     $s =~ s"\\e"\\"g;
16     $s =~ s/(?<=Copyright )\(c\)/&copy;/g;
17     $s;
18     }
19    
20     # Subroutine to ensure not in a paragraph
21    
22     sub end_para {
23     if ($inpara)
24     {
25     print TEMP "</PRE>\n" if ($inpre);
26     print TEMP "</P>\n";
27     }
28     $inpara = $inpre = 0;
29     $wrotetext = 0;
30     }
31    
32     # Subroutine to start a new paragraph
33    
34     sub new_para {
35     &end_para();
36     print TEMP "<P>\n";
37     $inpara = 1;
38     }
39    
40    
41     # Main program
42    
43     $inpara = 0;
44     $inpre = 0;
45     $wrotetext = 0;
46     $toc = 0;
47     $ref = 1;
48    
49     while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
50     {
51     $toc = 1 if $ARGV[0] eq "-toc";
52     shift;
53     }
54    
55     # Initial output to STDOUT
56    
57     print <<End ;
58     <html>
59     <head>
60     <title>$ARGV[0] specification</title>
61     </head>
62     <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
63     <h1>$ARGV[0] man page</h1>
64     <p>
65     Return to the <a href="index.html">PCRE index page</a>.
66     </p>
67     <p>
68     This page is part of the PCRE HTML documentation. It was generated automatically
69     from the original man page. If there is any nonsense in it, please consult the
70     man page, in case the conversion went wrong.
71     <br>
72     End
73    
74     print "<ul>\n" if ($toc);
75    
76     open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
77    
78     while (<STDIN>)
79     {
80     # Handle lines beginning with a dot
81    
82     if (/^\./)
83     {
84     if (/^\.br/)
85     {
86     print TEMP "<br>\n" if $wrotetext;
87     }
88    
89     # Handling .sp is subtle. If it is inside a literal section, do nothing if
90     # the next line is a non literal text line; similarly, if not inside a
91     # literal section, do nothing if a literal follows. The point being that
92     # the <pre> and </pre> that delimit literal sections will do the spacing.
93     # Always skip if no previous output.
94    
95     elsif (/^\.sp/)
96     {
97     if ($wrotetext)
98     {
99     $_ = <STDIN>;
100     if ($inpre)
101     {
102     print TEMP "\n" if (/^[\s.]/);
103     }
104     else
105     {
106     print TEMP "<br>\n<br>\n" if (!/^[\s.]/);
107     }
108     redo; # Now process the lookahead line we just read
109     }
110     }
111     elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
112     {
113     &new_para();
114     }
115     elsif (/^\.SH\s*("?)(.*)\1/)
116     {
117     # Ignore the NAME section
118     if ($2 =~ /^NAME\b/)
119     {
120     <STDIN>;
121     next;
122     }
123    
124     &end_para();
125     my($title) = &do_line($2);
126     if ($toc)
127     {
128     printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
129     $ref, $ref);
130     printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
131     $ref, $ref);
132     $ref++;
133     }
134     else
135     {
136     print TEMP "<br><b>\n$title\n</b><br>\n";
137     }
138     }
139     elsif (/^\.SS\s*("?)(.*)\1/)
140     {
141     &end_para();
142     my($title) = &do_line($2);
143     print TEMP "<br><b>\n$title\n</b><br>\n";
144     }
145     elsif (/^\.B\s*(.*)/)
146     {
147     &new_para() if (!$inpara);
148     $_ = &do_line($1);
149     s/"(.*?)"/$1/g;
150     print TEMP "<b>$_</b>\n";
151     $wrotetext = 1;
152     }
153     elsif (/^\.I\s*(.*)/)
154     {
155     &new_para() if (!$inpara);
156     $_ = &do_line($1);
157     s/"(.*?)"/$1/g;
158     print TEMP "<i>$_</i>\n";
159     $wrotetext = 1;
160     }
161    
162     # A comment that starts "HREF" takes the next line as a name that
163     # is turned into a hyperlink, using the text given, which might be
164     # in a special font. If it ends in () or (digits) or punctuation, they
165     # aren't part of the link.
166    
167     elsif (/^\.\\"\s*HREF/)
168     {
169     $_=<STDIN>;
170     chomp;
171     $_ = &do_line($_);
172     $_ =~ s/\s+$//;
173     $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
174     print TEMP "<a href=\"$1.html\">$_</a>\n";
175     }
176    
177     # A comment that starts "HTML" inserts literal HTML
178    
179     elsif (/^\.\\"\s*HTML\s*(.*)/)
180     {
181     print TEMP $1;
182     }
183    
184     # A comment that starts < inserts that HTML at the end of the
185     # *next* input line - so as not to get a newline between them.
186    
187     elsif (/^\.\\"\s*(<.*>)/)
188     {
189     my($markup) = $1;
190     $_=<STDIN>;
191     chomp;
192     $_ = &do_line($_);
193     $_ =~ s/\s+$//;
194     print TEMP "$_$markup\n";
195     }
196    
197     # A comment that starts JOIN joins the next two lines together, with one
198     # space between them. Then that line is processed. This is used in some
199     # displays where two lines are needed for the "man" version. JOINSH works
200     # the same, except that it assumes this is a shell command, so removes
201     # continuation backslashes.
202    
203     elsif (/^\.\\"\s*JOIN(SH)?/)
204     {
205     my($one,$two);
206     $one = <STDIN>;
207     $two = <STDIN>;
208     $one =~ s/\s*\\e\s*$// if (defined($1));
209     chomp($one);
210     $two =~ s/^\s+//;
211     $_ = "$one $two";
212     redo; # Process the joined lines
213     }
214    
215     # Ignore anything not recognized
216    
217     next;
218     }
219    
220     # Line does not begin with a dot. Replace blank lines with new paragraphs
221    
222     if (/^\s*$/)
223     {
224     &end_para() if ($wrotetext);
225     next;
226     }
227    
228     # Convert fonts changes and output an ordinary line. Ensure that indented
229     # lines are marked as literal.
230    
231     $_ = &do_line($_);
232     &new_para() if (!$inpara);
233    
234     if (/^\s/)
235     {
236     if (!$inpre)
237     {
238     print TEMP "<pre>\n";
239     $inpre = 1;
240     }
241     }
242     elsif ($inpre)
243     {
244     print TEMP "</pre>\n";
245     $inpre = 0;
246     }
247    
248     print TEMP;
249     $wrotetext = 1;
250     }
251    
252     # The TOC, if present, will have been written - terminate it
253    
254     print "</ul>\n" if ($toc);
255    
256     # Copy the remainder to the standard output
257    
258     close(TEMP);
259     open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
260    
261     print while (<TEMP>);
262    
263     print <<End ;
264     <p>
265     Return to the <a href="index.html">PCRE index page</a>.
266     </p>
267     End
268    
269     close(TEMP);
270     unlink("/tmp/$$");
271    
272     # End

Properties

Name Value
svn:executable *

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12