mirror of
https://github.com/MariaDB/server.git
synced 2025-01-18 04:53:01 +01:00
40b23ba214
Docs/Support/docbook-fixup.pl: DocBook script fixup. Docs/manual.texi: Remove double space (texi2db does not like it).
200 lines
5.7 KiB
Perl
Executable file
200 lines
5.7 KiB
Perl
Executable file
#!/usr/bin/perl -w
|
|
|
|
# Fix the output of `makeinfo --docbook` version 4.0c
|
|
# Convert the broken docbook output to well-formed XML that conforms to the O'Reilly idiom
|
|
# See code for detailed comments
|
|
# Authors: Arjen Lentz and Zak Greant (original code by Jeremy Cole)
|
|
|
|
use strict;
|
|
|
|
my $data = '';
|
|
my @apx = ();
|
|
my $apx = '';
|
|
my @nodes = ();
|
|
my $nodes = '';
|
|
|
|
msg ("-- Post-processing `makeinfo --docbook` output --");
|
|
msg ("** Written to work with makeinfo version 4.0c **\n");
|
|
|
|
msg ("Discarding DTD - not required by subsequent scripts");
|
|
# <> is a magic filehandle - either reading lines from stdin or from file(s) specified on the command line
|
|
<>;
|
|
|
|
msg ("Create an XML PI with ISO-8859-1 character encoding");
|
|
$data = "<?xml version='1.0' encoding='ISO-8859-1'?>";
|
|
|
|
msg ("Get the rest of the data");
|
|
$data = $data . join "", <>;
|
|
|
|
msg ("Add missing <bookinfo> and <abstract> opening tags");
|
|
# Note the absence of the g (global) pattern modified. This situation can only happen once.
|
|
# ...as soon as we find the first instance, we can stop looking.
|
|
$data =~ s/<book lang="en">/<book lang="en"><bookinfo><abstract>/;
|
|
|
|
|
|
# arjen 2002-05-01
|
|
msg ("Processing docbook-prefix special strings");
|
|
$data =~ s/FIXUPmdashFIXUP/\&mdash\;/g;
|
|
|
|
$data =~ s/FIXUPdoubledashFIXUP/--/g;
|
|
|
|
$data =~ s/FIXUPstrongFIXUP/<emphasis\ role\=\"bold\">/g;
|
|
$data =~ s/FIXUPendstrongFIXUP/<\/emphasis>/g;
|
|
|
|
$data =~ s/FIXUPemphFIXUP/<emphasis>/g;
|
|
$data =~ s/FIXUPendemphFIXUP/<\/emphasis>/g;
|
|
|
|
$data =~ s/FIXUPfileFIXUP/<filename>/g;
|
|
$data =~ s/FIXUPendfileFIXUP/<\/filename>/g;
|
|
|
|
$data =~ s/FIXUPsampFIXUP/<literal>/g;
|
|
$data =~ s/FIXUPendsampFIXUP/<\/literal>/g;
|
|
|
|
|
|
msg ("Removing mailto: from email addresses...");
|
|
$data =~ s/mailto://g;
|
|
|
|
msg ("Removing INFORMALFIGURE...");
|
|
$data =~ s{<informalfigure>.+?</informalfigure>}
|
|
{}gs;
|
|
|
|
msg ("Convert ampersand to XML escape sequence...");
|
|
$data =~ s/&(?!\w+;)/&/g;
|
|
|
|
# arjen 2002-05-01
|
|
msg ("Changing (TM) to XML escape sequence...");
|
|
$data =~ s/MySQL \(TM\)/MySQL™/g;
|
|
$data =~ s{<command>TM</command>}
|
|
{™}g;
|
|
|
|
# arjen 2002-05-01
|
|
msg ("Changing ' -- ' to XML escape sequence...");
|
|
$data =~ s/ -- /—/g;
|
|
|
|
msg ("Changing @@ to @...");
|
|
$data =~ s/@@/@/g;
|
|
|
|
msg ("Rework references of the notation '<n>'");
|
|
# Need to talk to Arjen about what the <n> bits are for
|
|
$data =~ s/<(\d)>/[$1]/g;
|
|
|
|
msg ("Changing '_' to '-' in references...");
|
|
$data =~ s{((?:id|linkend)=\".+?\")}
|
|
{&underscore2hyphen($1)}gex;
|
|
|
|
msg ("Changing ULINK to SYSTEMITEM...");
|
|
$data =~ s{<ulink url=\"(.+?)\">\s*</ulink>}
|
|
{<systemitem role=\"url\">$1</systemitem>}gs;
|
|
|
|
msg ("Adding PARA inside ENTRY...");
|
|
$data =~ s{<entry>(.*?)</entry>}
|
|
{<entry><para>$1</para></entry>}gs;
|
|
|
|
msg ("Fixing spacing problem with titles...");
|
|
$data =~ s{(</\w+>)(\w{2,})}
|
|
{$1 $2}gs;
|
|
|
|
msg ("Adding closing / to XREF and COLSPEC tags...");
|
|
$data =~ s{<(xref|colspec) (.+?)>}
|
|
{<$1 $2 />}gs;
|
|
|
|
# arjen 2002-04-26
|
|
msg ("Removing separate target titles from LINKs and make them XREFs...");
|
|
$data =~ s{<link (linkend=.+?)>.+?</link>}
|
|
{<xref $1 />}gs;
|
|
|
|
# Probably need to strip these
|
|
msg ('Adding "See " to XREFs that used to be @xref...');
|
|
$data =~ s{([.'!)])\s*<xref }
|
|
{$1 See <xref }gs;
|
|
|
|
msg ('Adding "see " to (XREFs) that used to be (@pxref)...');
|
|
$data =~ s{([([,;])(\s*)<xref }
|
|
{$1$2see <xref }gs;
|
|
|
|
msg ("Making first row in table THEAD...");
|
|
$data =~ s{( *)<tbody>(\s*<row>.+?</row>)}
|
|
{$1<thead>$2\n$1</thead>\n$1<tbody>}gs;
|
|
|
|
msg ("Removing EMPHASIS inside THEAD...");
|
|
$data =~ s{<thead>(.+?)</thead>}
|
|
{"<thead>".&strip_tag($1, 'emphasis')."</thead>"}gsex;
|
|
|
|
msg ("Removing empty PARA...");
|
|
$data =~ s{<para>\s*</para>}
|
|
{}gs;
|
|
|
|
msg ("Removing lf before /PARA in ENTRY...");
|
|
$data =~ s{\n(</para></entry>)}
|
|
{$1}gs;
|
|
|
|
msg ("Removing whitespace before /PARA if not on separate line...");
|
|
$data =~ s{(\S+)[\t ]+</para>}
|
|
{$1</para>}g;
|
|
|
|
msg ("Removing PARA around INDEXTERM if no text in PARA...");
|
|
$data =~ s{<para>((?:<indexterm role=\"[^"]+\">(?:<(primary|secondary)>[^>]+</\2>)+?</indexterm>)+?)\s*</para>}
|
|
{$1}gs;
|
|
|
|
@apx = ("Users", "MySQL Testimonials", "News", "GPL-license", "LGPL-license");
|
|
|
|
foreach $apx (@apx) {
|
|
msg ("Removing appendix $apx...");
|
|
$data =~ s{<appendix id=\"$apx\">(.+?)</appendix>}
|
|
{}gs;
|
|
|
|
# Skip to next appendix regex if the regex did not match anything
|
|
next unless (defined $&);
|
|
|
|
msg ("...Building list of removed nodes...");
|
|
|
|
# Split the last bracketed regex match into an array
|
|
# Extract the node names from the tags and push them into an array
|
|
foreach (split "\n", $&) {
|
|
push @nodes, $1 if /<\w+ id=\"(.+?)\">/
|
|
}
|
|
}
|
|
|
|
# 2002-02-22 arjen@mysql.com (added fix " /" to end of regex, to make it match)
|
|
msg ("Fixing references to removed nodes...");
|
|
# Merge the list of node names into a set of regex alternations
|
|
$nodes = join "|", @nodes;
|
|
|
|
# Find all references to removed nodes and convert them to absolute URLs
|
|
$data =~ s{<\w+ linkend="($nodes)" />}
|
|
{&xref2link($1)}ges;
|
|
|
|
print STDOUT $data;
|
|
exit;
|
|
|
|
#
|
|
# Definitions for helper sub-routines
|
|
#
|
|
|
|
sub msg {
|
|
print STDERR "docbook-fixup:", shift, "\n";
|
|
}
|
|
|
|
sub strip_tag($$) {
|
|
(my $str, my $tag) = @_;
|
|
$str =~ s{<$tag>(.+?)</$tag>}{$1}gs;
|
|
return $str;
|
|
}
|
|
|
|
sub underscore2hyphen($) {
|
|
my $str = shift;
|
|
$str =~ tr/_/-/;
|
|
return $str;
|
|
}
|
|
|
|
sub xref2link {
|
|
my $ref = shift;
|
|
$ref =~ tr/ /_/;
|
|
$ref =~ s{^((.)(.).+)$}{$2/$3/$1.html};
|
|
return "http://www.mysql.com/doc/" . $ref;
|
|
}
|
|
|
|
# We might need to encode the high-bit characters to ensure proper representation
|
|
# msg ("Converting high-bit characters to entities");
|
|
# $data =~ s/([\200-\400])/&get_entity($1)>/gs;
|
|
# There is no get_entity function yet - no point writing it til we need it :)
|