/[pcre]/code/branches/pcre16/CleanTxt
ViewVC logotype

Contents of /code/branches/pcre16/CleanTxt

Parent Directory Parent Directory | Revision Log Revision Log


Revision 755 - (show annotations) (download)
Mon Nov 21 10:41:54 2011 UTC (2 years, 4 months ago) by ph10
File size: 2941 byte(s)
Created a new branch for the development of 16-bit support.

1 #! /usr/bin/perl -w
2
3 # Script to take the output of nroff -man and remove all the backspacing and
4 # the page footers and the screen commands etc so that it is more usefully
5 # readable online. In fact, in the latest nroff, intermediate footers don't
6 # seem to be generated any more.
7
8 $blankcount = 0;
9 $lastwascut = 0;
10 $firstheader = 1;
11
12 # Input on STDIN; output to STDOUT.
13
14 while (<STDIN>)
15 {
16 s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
17 s/.\x8//g; # Remove "char, backspace"
18
19 # Handle header lines. Retain only the first one we encounter, but remove
20 # the blank line that follows. Any others (e.g. at end of document) and the
21 # following blank line are dropped.
22
23 if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
24 {
25 if ($firstheader)
26 {
27 $firstheader = 0;
28 print;
29 $lastprinted = $_;
30 $lastwascut = 0;
31 }
32 $_=<STDIN>; # Remove a blank that follows
33 next;
34 }
35
36 # Count runs of empty lines
37
38 if (/^\s*$/)
39 {
40 $blankcount++;
41 $lastwascut = 0;
42 next;
43 }
44
45 # If a chunk of lines has been cut out (page footer) and the next line
46 # has a different indentation, put back one blank line.
47
48 if ($lastwascut && $blankcount < 1 && defined($lastprinted))
49 {
50 ($a) = $lastprinted =~ /^(\s*)/;
51 ($b) = $_ =~ /^(\s*)/;
52 $blankcount++ if ($a ne $b);
53 }
54
55 # We get here only when we have a non-blank line in hand. If it was preceded
56 # by 3 or more blank lines, read the next 3 lines and see if they are blank.
57 # If so, remove all 7 lines, and remember that we have just done a cut.
58
59 if ($blankcount >= 3)
60 {
61 for ($i = 0; $i < 3; $i++)
62 {
63 $next[$i] = <STDIN>;
64 $next[$i] = "" if !defined $next[$i];
65 $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
66 $next[$i] =~ s/.\x8//g; # Remove "char, backspace"
67 }
68
69 # Cut out chunks of the form <3 blanks><non-blank><3 blanks>
70
71 if ($next[0] =~ /^\s*$/ &&
72 $next[1] =~ /^\s*$/ &&
73 $next[2] =~ /^\s*$/)
74 {
75 $blankcount -= 3;
76 $lastwascut = 1;
77 }
78
79 # Otherwise output the saved blanks, the current, and the next three
80 # lines. Remember the last printed line.
81
82 else
83 {
84 for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
85 print;
86 for ($i = 0; $i < 3; $i++)
87 {
88 $next[$i] =~ s/.\x8//g;
89 print $next[$i];
90 $lastprinted = $_;
91 }
92 $lastwascut = 0;
93 $blankcount = 0;
94 }
95 }
96
97 # This non-blank line is not preceded by 3 or more blank lines. Output
98 # any blanks there are, and the line. Remember it. Force two blank lines
99 # before headings.
100
101 else
102 {
103 $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
104 defined($lastprinted);
105 for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
106 print;
107 $lastprinted = $_;
108 $lastwascut = 0;
109 $blankcount = 0;
110 }
111 }
112
113 # End

Properties

Name Value
svn:executable *

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12