D.15. toc2toc

#!/usr/bin/perl -w
# This script generates a QNX Helpviewer Table of Contents.

require "getopts.pl";

$opt_d	= 0;
$opt_l	= "";

&Getopts('dl:');

$\	= "\n";
$,	= " ";

# force flush of output
select(STDERR); $| = 1;
select(STDOUT); $| = 1;

sub GetLevel
{
	local($level) = $_[0];

	print "-->new level: ", $level if $opt_d;

	while(<STDIN>)
	{
		# line to ignore?
		m%^ *$% && next;
		m%^<.?dd>$% && next;

		# end of this level?
		m%^</dl>$% &&
			return;

		# start of a new level?
		m%^<dl>$% && do {
			&GetLevel($level + 1);
			print "-->finish level: ", $level if $opt_d;
			next;
		};

		m%^<dt>.*<a href=([^<*>]*)>(.*?)</a>% && do {

			local($href)	= $1;
			local($title)	= $2;

			# strip html from title
			$title =~ s%<[^>]*>%%g;

			# strip quotation marks from href
			$href =~ s%[\"]%%g;

			# are hrefs to anchors understood by helpviewer?

			if ($level == 3) {print "$level|$title|../$href";}
                        else {print "$level|$title|$href";}

			next;
			
# Note: the original matching pattern (above) was this;
#		m%^<dt>.*<a href='([^"]*)'>(.*?)</a>% && do {
# but it had to be changed because apparently the later versions of
# perl can't handle single quotes in matching patterns.		
		
		};

		print STDERR "-->unrecognized input[$.]:", $_;
	}
}

# find the first level, and get it
while(<STDIN>)
{
#	next if !m#^><DL#;
	next if !m#Table of Contents#;
	&GetLevel($opt_l);
	exit 0;
}