| 1 |
ph10 |
97 |
#! /usr/bin/perl -w |
| 2 |
|
|
|
| 3 |
|
|
# Script to take the output of nroff -man and remove all the backspacing and |
| 4 |
|
|
# the page footers and the screen commands etc so that it is more usefully |
| 5 |
|
|
# readable online. In fact, in the latest nroff, intermediate footers don't |
| 6 |
|
|
# seem to be generated any more. |
| 7 |
|
|
|
| 8 |
|
|
$blankcount = 0; |
| 9 |
|
|
$lastwascut = 0; |
| 10 |
|
|
$firstheader = 1; |
| 11 |
|
|
|
| 12 |
|
|
# Input on STDIN; output to STDOUT. |
| 13 |
|
|
|
| 14 |
|
|
while (<STDIN>) |
| 15 |
|
|
{ |
| 16 |
|
|
s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" |
| 17 |
|
|
s/.\x8//g; # Remove "char, backspace" |
| 18 |
|
|
|
| 19 |
|
|
# Handle header lines. Retain only the first one we encounter, but remove |
| 20 |
|
|
# the blank line that follows. Any others (e.g. at end of document) and the |
| 21 |
|
|
# following blank line are dropped. |
| 22 |
|
|
|
| 23 |
|
|
if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/) |
| 24 |
|
|
{ |
| 25 |
|
|
if ($firstheader) |
| 26 |
|
|
{ |
| 27 |
|
|
$firstheader = 0; |
| 28 |
|
|
print; |
| 29 |
|
|
$lastprinted = $_; |
| 30 |
|
|
$lastwascut = 0; |
| 31 |
|
|
} |
| 32 |
|
|
$_=<STDIN>; # Remove a blank that follows |
| 33 |
|
|
next; |
| 34 |
|
|
} |
| 35 |
|
|
|
| 36 |
|
|
# Count runs of empty lines |
| 37 |
|
|
|
| 38 |
|
|
if (/^\s*$/) |
| 39 |
|
|
{ |
| 40 |
|
|
$blankcount++; |
| 41 |
|
|
$lastwascut = 0; |
| 42 |
|
|
next; |
| 43 |
|
|
} |
| 44 |
|
|
|
| 45 |
|
|
# If a chunk of lines has been cut out (page footer) and the next line |
| 46 |
|
|
# has a different indentation, put back one blank line. |
| 47 |
|
|
|
| 48 |
|
|
if ($lastwascut && $blankcount < 1 && defined($lastprinted)) |
| 49 |
|
|
{ |
| 50 |
|
|
($a) = $lastprinted =~ /^(\s*)/; |
| 51 |
|
|
($b) = $_ =~ /^(\s*)/; |
| 52 |
|
|
$blankcount++ if ($a ne $b); |
| 53 |
|
|
} |
| 54 |
|
|
|
| 55 |
|
|
# We get here only when we have a non-blank line in hand. If it was preceded |
| 56 |
|
|
# by 3 or more blank lines, read the next 3 lines and see if they are blank. |
| 57 |
|
|
# If so, remove all 7 lines, and remember that we have just done a cut. |
| 58 |
|
|
|
| 59 |
|
|
if ($blankcount >= 3) |
| 60 |
|
|
{ |
| 61 |
|
|
for ($i = 0; $i < 3; $i++) |
| 62 |
|
|
{ |
| 63 |
|
|
$next[$i] = <STDIN>; |
| 64 |
|
|
$next[$i] = "" if !defined $next[$i]; |
| 65 |
|
|
$next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" |
| 66 |
|
|
$next[$i] =~ s/.\x8//g; # Remove "char, backspace" |
| 67 |
|
|
} |
| 68 |
|
|
|
| 69 |
|
|
# Cut out chunks of the form <3 blanks><non-blank><3 blanks> |
| 70 |
|
|
|
| 71 |
|
|
if ($next[0] =~ /^\s*$/ && |
| 72 |
|
|
$next[1] =~ /^\s*$/ && |
| 73 |
|
|
$next[2] =~ /^\s*$/) |
| 74 |
|
|
{ |
| 75 |
|
|
$blankcount -= 3; |
| 76 |
|
|
$lastwascut = 1; |
| 77 |
|
|
} |
| 78 |
|
|
|
| 79 |
|
|
# Otherwise output the saved blanks, the current, and the next three |
| 80 |
|
|
# lines. Remember the last printed line. |
| 81 |
|
|
|
| 82 |
|
|
else |
| 83 |
|
|
{ |
| 84 |
|
|
for ($i = 0; $i < $blankcount; $i++) { print "\n"; } |
| 85 |
|
|
print; |
| 86 |
|
|
for ($i = 0; $i < 3; $i++) |
| 87 |
|
|
{ |
| 88 |
|
|
$next[$i] =~ s/.\x8//g; |
| 89 |
|
|
print $next[$i]; |
| 90 |
|
|
$lastprinted = $_; |
| 91 |
|
|
} |
| 92 |
|
|
$lastwascut = 0; |
| 93 |
|
|
$blankcount = 0; |
| 94 |
|
|
} |
| 95 |
|
|
} |
| 96 |
|
|
|
| 97 |
|
|
# This non-blank line is not preceded by 3 or more blank lines. Output |
| 98 |
|
|
# any blanks there are, and the line. Remember it. Force two blank lines |
| 99 |
|
|
# before headings. |
| 100 |
|
|
|
| 101 |
|
|
else |
| 102 |
|
|
{ |
| 103 |
|
|
$blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ && |
| 104 |
|
|
defined($lastprinted); |
| 105 |
|
|
for ($i = 0; $i < $blankcount; $i++) { print "\n"; } |
| 106 |
|
|
print; |
| 107 |
|
|
$lastprinted = $_; |
| 108 |
|
|
$lastwascut = 0; |
| 109 |
|
|
$blankcount = 0; |
| 110 |
|
|
} |
| 111 |
|
|
} |
| 112 |
|
|
|
| 113 |
|
|
# End |