#!/usr/bin/perl

=pod

=head1 NAME

pfm2kpx - extract correct kerning information from F<pfm> 
(Printer Font Metrics) files

=head1 SYNOPSIS

pfm2kpx [options] I<pfm>

=head1 DESCRIPTION

Many PostScript Type1 fonts contain only
F<pfb> and F<pfm> files, but no F<afm> (Adobe Font Metrics) files. These can be
generated with I<pf2afm> (which comes with I<GhostScript>), but
this might not yield all kerning pairs from the F<pfm> file. The reason
for this is that I<pf2afm> interprets C<dfCharSet = 0x00> to mean that
the Adobe StandardEncoding has been used, whereas many fonts (or font
creation tools) instead use the Windows ANSI (aka codepage 1252) encoding.
(You know you've hit this problem when B<pf2afm> complains "C<.notdef 
character ocurred among kern pairs -- you'd better check the resulting 
AFM file>".) In this case, the resulting F<afm> is incomplete and contains
wrong kerning pairs.

B<pfm2kpx> extracts the correct kerning pairs from such F<pfm> files and
repairs the F<afm> (if necessary creating it first by running I<pf2afm>).
If for some reason it can't update the F<afm>, it prints the kerning pairs to
C<stdout> instead.

When I<pf2afm> doesn't complain about the kerning pairs, B<pfm2kpx> 
thinks the resulting F<afm> file is fine and quits; this can be
changed by specifying the B<-f> option (see below).

=head1 OPTIONS

=over 4

=item B<-f>

Force updating of the F<afm> file, even if I<pf2afm> doesn't complain.

=back

=head1 SEE ALSO

F<pf2afm> (part of GhostScript), F<afm2afm>, F<autoinst>, F<cmap2enc>, 
F<font2afm>, F<ot2kpx>.

=head1 AUTHOR

Marc Penninga <marc@penninga.info>

=head1 HISTORY

=over 12

=item I<2005-02-17>

First version

=item I<2005-02-18>

Added C<binmode PFM> to make B<pfm2kpx> work on Windows platforms

=item I<2005-03-08>

Input files searched via B<kpsewhich> (where available)

=item I<2005-03-14>

Rewrote some of the code to make it more robust, added the B<-f> option

=item I<2005-03-15>

Input files searched using B<kpsewhich> or B<findtexmf>

=item I<2005-04-29>

Improved the documentation

=item I<2005-05-24>

Bugfix.

=item I<2005-07-29>

Some updates to the documentation.

=back

=cut

##############################################################################

@winansi = qw(

    .notdef .notdef .notdef .notdef
    .notdef .notdef .notdef .notdef
    .notdef .notdef .notdef .notdef
    .notdef .notdef .notdef .notdef
    
    .notdef .notdef .notdef .notdef
    .notdef .notdef .notdef .notdef
    .notdef .notdef .notdef .notdef
    .notdef .notdef .notdef .notdef
    
    space exclam quotedbl numbersign
    dollar percent ampersand quotesingle
    parenleft parenright asterisk plus
    comma hyphen period slash
    
    zero one two three
    four five six seven
    eight nine colon semicolon
    less equal greater question
    
    at A B C
    D E F G
    H I J K
    L M N O
    
    P Q R S
    T U V W
    X Y Z bracketleft
    backslash bracketright asciicircum underscore
    
    grave a b c
    d e f g
    h i j k
    l m n o
    
    p q r s
    t u v w
    x y z braceleft
    bar braceright asciitilde .notdef
    
    Euro .notdef quotesinglbase florin
    quotedblbase ellipsis dagger daggerdbl
    circumflex perthousand Scaron guilsinglleft
    OE .notdef Zcaron .notdef
    
    .notdef quoteleft quoteright quotedblleft
    quotedblright bullet endash emdash
    tilde trademark scaron guilsignlright
    oe .notdef zcaron Ydieresis
    
    .notdef exclamdown cent sterling
    currency yen brokenbar section
    dieresis copyright ordfeminine guillemotleft
    logicalnot .notdef registered macron
    
    degree plusminus twosuperior threesuperior
    acute mu paragraph periodcentered
    cedilla onesuperior ordmasculine guillemotright
    onequarter onehalf threequarters questiondown
    
    Agrave Aacute Acircumflex Atilde
    Adieresis Aring AE Ccedilla
    Egrave Eacute Ecircumflex Edieresis
    Igrave Iacute Icircumflex Idieresis
    
    Eth Ntilde Ograve Oacute
    Ocircumflex Otilde Odieresis multiply
    Oslash Ugrave Uacute Ucircumflex
    Udieresis Yacute Thorn germandbls
    
    agrave aacute acircumflex atilde
    adieresis aring ae ccedilla
    egrave eacute ecircumflex edieresis
    igrave iacute icircumflex idieresis
    
    eth ntilde ograve oacute
    ocircumflex otilde odieresis divide
    oslash ugrave uacute ucircumflex
    udieresis yacute thorn ydieresis
);

sub getword($) {
    my @bytes = reverse(split(//, substr($_[0], 0, 2)));
    my $r = 0;
    
    for (@bytes) {
    	$r = ($r << 8) + unpack "C", $_;
    }
    
    return $r >= 32768 ? $r - 65536 : $r;
}

sub getdword($) {
    my @bytes = reverse(split(//, substr($_[0], 0, 4)));
    my $r = 0;
    
    for (@bytes) {
    	$r = ($r << 8) + unpack "C", $_;
    }
    
    return $r;
}

##############################################################################

use Getopt::Std;
use integer;
use warnings; no warnings qw(uninitialized);

getopts "f", \%options;

$0 =~ s!.*/!!;
die "Usage: $0 pfmfile\n" if @ARGV != 1;

($base = $ARGV[0]) =~ s!.*/|\.pfm!!g;
unless (-e "${base}.afm") {
    $errmsg = `pf2afm '${base}.pfb'`;
    if ($errmsg !~ /\.notdef character ocurred among kern pairs/ and
    	not exists $options{f}) 
    {
    	warn "Warning: output from <pf2afm> seems OK; skipping `$ARGV[0]'.\n",
	     "	       If you disagree, rerun $0 with the `-f' option\n";
    	exit 0;
    }
}

if ((chop($fn = `kpsewhich '$ARGV[0]' 2>&1`) and -e $fn) or
    (chop($fn = `findtexmf '$ARGV[0]' 2>&1`) and -e $fn)) 
{
    open PFM, "<$fn" or die "Error: can't open `$fn' - $!\n";
} 
else {
    open PFM, "<$ARGV[0]" or 
	    die "Error: can't open `$ARGV[0]' - $!\n";
}
binmode PFM;
{
    local $/;
    $pfm = <PFM>;
}

$dfCharSet = unpack "C", substr($pfm, 85, 1);
if ($dfCharSet != 0) {
    die "Error: `dfCharSet' is $dfCharSet, not 0\n";
}

$dfPairKernTable = getdword(substr $pfm, 131, 4);
$etmKernPairs = getword(substr $pfm, 195, 2);
if ($dfPairKernTable == 0 or $etmKernPairs == 0) {
    warn "Warning: no kerning pairs found in `$ARGV[0]'\n";
    exit;
}
$pairkern = substr $pfm, $dfPairKernTable;
$n = getword(substr $pairkern, 0, 2);
if ($n != $etmKernPairs) {
    warn "Warning: incorrect number of kerning pairs in `$ARGV[0]';\n",
    	 "  	   please check the resulting AFM file!\n";
}
for $i (0 .. $n - 1) {
    $pair = substr $pairkern, 2 + 4 * $i, 4;
    $left = unpack "C", substr($pair, 0, 1);
    $right = unpack "C", substr($pair, 1, 1);
    $kern = getword(substr($pair, 2, 2));
    
    push @KPX, "KPX $winansi[$left] $winansi[$right] $kern\n";
}

if (open AFM, "<${base}.afm") {
    {
    	local $/;
	$afm = <AFM>;
    }
    $afm =~ s/StartKernData.*//s;
    $afm =~ s/\r\n*/\n/gs;
    $time = localtime;
    $afm =~ s/(?<=Comment ).*?$/Converted at $time by $0 from $ARGV[0]/m;
    if (open AFM, ">${base}.afm") {select AFM}
    else {
    	warn "Warning: can't create `${base}.afm' - $!\n" .
	     "	       printing to <STDOUT> instead\n";
    }
    print <<EOF;
${afm}
StartKernData
StartKernPairs $n
 @{KPX}EndKernPairs
EndKernData

EndFontMetrics
EOF
} 
else {
    warn "Warning: file `${base}.afm' not found, " . 
    	 "printing only kerning pairs\n";
    print "StartKernData\nStartKernPairs $n\n";
    map print, @KPX;
    print "EndKernPairs\nEndKernData\n";
}

__END__
