#! /usr/bin/perl
# This script's documentation stands below.  Scroll down.
use warnings;
use strict;
use integer;
use bytes;
use FindBin;
use lib $FindBin::RealBin;
use Def;
use Bnm;
use Alpha;

our $file_maint    = $Def::maint_txt;
our $file_debram   = $Def::debram_txt;

our $usage = <<END;
usage: $0 {-t debram.txt} {-m maint.txt} [-TPwWkch] \\
[Packages file]... { - [Packages file]... }
    -t debram.txt
    -m maint.txt
           use alternate data files (these options must appear first)
    -T output a complete debram.txt
    -P instead of a new debram.txt body, output a modified Packages file
    -w within each ram in the output, separate the new pkgs from the old
    -W output only the new packages
    -k keep packages already ramified in any Packages file (even after
       the optional dash `-'; see below)
    -c output contrib packages, too
    -h print this usage message
Packages listed only in files after the optional dash `-' are excluded
from the output (but see -k above, which partially overrides).
END

our $rule        = '-' x $Def::width . "\n";
our $skip_to_deb = $Def::w_maint + $Def::w_pri;

# You do not need this optional helper script to install, use, modify,
# develop, build, package or distribute `debram'.  If you are developing
# the debram, however, you may find the script convenient.
#
# SYNOPSIS
#
# new-debram-body {-t debram.txt} {-m maint.txt} [-TPwWkch]
#     [Packages file]... { - [Packages file]... }
#
# DESCRIPTION
#
# This script builds a new debram.txt body from one or more new Packages
# files and from the old debram.txt.  It prints the new debram.txt body
# to stdout.
#
# OPTIONS
#
# -t debram.txt
# -m maint.txt
#        use alternate data files (these options must appear first)
#
# -T output a complete debram.txt; not just a body
#
# -P instead of a new debram.txt body, output a sorted, modified
#    Packages file
#
# -w within each ram in the output, separate the new packages from the
#    old (a new package is defined to be one which appears only in a
#    Packages file, not in the old debram.txt)
#
# -W output only the new packages
#
# -k keep packages already ramified in any Packages file (even after
#    the optional dash `-'; see below)
#
# -c output contrib packages, too (contribs are filtered out by default)
#
# -h (or -?) print a usage message
#
# INPUT PRECEDENCE
#
# Packages files named leftmost on the command line take precedence.
# However, if the left file lacks a ramno for a certain package but a
# file further right has one, the right ramno is read and integrated
# with the left package info.  Files to the right of a dash `-' are
# special: they never add packages to the output (except if the -k
# option is given), but they do contribute ramnos for unramified
# packages already added (this is useful when we have an old Packages
# file with some obsolete packages; we want to lose the obsoletes while
# otherwise saving the old ramnos).
#
# PACKAGES FILES
#
# A Packages file consists of a list of binary control paragraphs, each
# of which resembles the following example.
#
#   Package: apt-zip
#   Priority: extra
#   Section: admin
#   Installed-Size: 104
#   Maintainer: Giacomo Catenazzi <cate@debian.org>
#   Architecture: all
#   Version: 0.13.2
#   Depends: apt (>= 0.3.10)
#   Filename: pool/main/a/apt-zip/apt-zip_0.13.2_all.deb
#   Size: 14752
#   MD5sum: 78b979ac3ea325e1a2d4b154f14f5eb4
#   Description: Update a non-networked computer using apt and removable media
#    These scripts simplify the process of using dselect and apt on a
#    non-networked Debian box, using removable media like ZIP floppies.
#    One generates a `fetch' script (supporting backends such as wget and
#    lftp, in a modular, extensible way) to be run on a host with better
#    connectivity, check space constraints of your removable media, and
#    then install the package on your Debian box.
#    .
#    Note on current version: space-checking is not done and spanning
#    multiple disks is not yet supported.
#
# You probably have a Packages file on your system at
#
#   /var/lib/dpkg/available
#
# At the time of this writing (Jan. 2004, revised July 2006), the
# various current Packages files are found in Debian's on-line archive
# in places like
#
#   dists/testing/main/binary-i386/Packages.gz
#
# When multiple Packages files are given, the leftmost takes precedence.
#
# THE RAMIFICATION SPECIAL CONTROL FIELD
#
# You may find it useful to edit a copy of the Packages file to include
# the special control field `Ramification:' in some or all of the binary
# control paragraphs.  This is no standard control field, of course, but
# the script understands it to identify a package's ramification.  For
# example,
#
#   Ramification: 8212
#   Package: apt-zip
#   Priority: extra
#   Section: admin
#   Installed-Size: 104
#   Maintainer: Giacomo Catenazzi <cate@debian.org>
#   Architecture: all
#   Version: 0.13.2
#   Depends: apt (>= 0.3.10)
#   Filename: pool/main/a/apt-zip/apt-zip_0.13.2_all.deb
#   Size: 14752
#   MD5sum: 78b979ac3ea325e1a2d4b154f14f5eb4
#   Description: Update a non-networked computer using apt and removable media
#    These scripts simplify the process of using dselect and apt on a
#    non-networked Debian box, using removable media like ZIP floppies.
#    One generates a `fetch' script (supporting backends such as wget and
#    lftp, in a modular, extensible way) to be run on a host with better
#    connectivity, check space constraints of your removable media, and
#    then install the package on your Debian box.
#    .
#    Note on current version: space-checking is not done and spanning
#    multiple disks is not yet supported.
#
# Extra text may follow the ram number.  If so, the script ignores the
# extra text.  For example,
#
#   Ramification: 8212 APT
#
# has the same effect as does
#
#   Ramification: 8212
#
# When no Ramification control field is given, the script uses the
# ramification number from the old debram.txt.
#
# The script's -P option can help you to generate an initial
# Ramification-identifying copy of the Packages file.
#
# PACKAGE SORTING ORDER
#
# To group related packages together, this script sorts packages in a
# peculiar order:
#
#   1. by maintainer's name; then
#   2. by source package name; then
#   3. by binary package name with the source's flagship binary first.
#
# Maintainers are sorted last name first.  Refer to `Alpha.pm' for the
# precise name-sorting algorithm.
#
# Package names are sorted first without a leading "lib".  This means
# that "libncurses5" sorts near "ncurses-bin".  (It also means that
# "libtool" sorts near "tool", which is wrong.  In almost every other
# case, though, it is right to sort without the leading "lib"; so this
# is how the sort is done.)
#
# Experience shows such a sort to be surprisingly useful.
#
# PACKAGES NOT OUTPUT
#
# Packages not named in the Packages files are not output, even when
# they appear in the old debram.txt.  Packages whose ramifications
# cannot be determined are output only when the -P option has been
# given.  Contrib packages are output only when the -c option has been
# given.  (As to non-free packages, their control paragraphs do not
# occur in the standard Packages files.  If non-free are fed to the
# script nevertheless, the script's output is undefined.)
#
# THE UNKNOWN-MAINTAINER ERROR
#
# This script often exits with an "unknown maintainer" error.  Usually
# this error is no sign of trouble; it usually merely means that the
# script has seen a new Debian maintainer mentioned somewhere in a
# Packages file, a maintainer not yet in maint.txt.  Sometimes it means
# that the script has found a new spelling of an existing maintainer's
# name.  In either case, the error is usually simple to fix.
#
# If you are developing the debram, when you see an "unknown maintainer"
# error,
#
#   1. examine the Packages files to decide whether the unknown
#      maintainer is really a new maintainer or is an existing
#      maintainer with an alternate spelling.
#
#   2. update `maint.txt' accordingly, and
#
#   3. run this script again.
#
# Although it is not necessary that `maint.txt' always be kept in
# perfect alphabetical order, after making several updates to it you
# probably want to `helper/sort-maint' it for neatness' and correctness'
# sake.
#
# FILES
#
# (See the global parameters at this script's head.  The files include
# debram.txt and maint.txt.)
#
# BUGS
#
# This script uses a lot of memory at runtime: it stores a data
# structure duplicating piecewise almost the Packages files' entire
# combined text.  This is convenient for the script writer but is
# technically unnecessary.  With additional scripting effort, the memory
# usage could be dramatically reduced.
#
# The -t and -m options should be respected even when they do not appear
# first on the command line.
#
# On the other hand, this is just a helper script.  To make such
# improvements therefore has low priority.
#
#

# The general iconv(3) function does more than is wanted here, so a
# limited local iconv function is provided.  It converts utf-8
# characters directly to Latin-1, except that utf-8 characters not
# present in Latin-1 are converted to $utf8_unk (probably '?').
sub iconv ($) {
  local $_ = shift;
  my $out = '';
  my( $c1, $c2, $c3, $c4 );
  while (
    ( $c1, $c2, $c3, $c4 ) =
    /^([\000-\177]*)([^\000-\177])([\200-\277]*)(.*)/s
  ) {
    if (
      length($c3) == 1
      && ( $c2 eq "\302" || $c2 eq "\303" )
      && ( $c3 ge "\200" && $c3 le "\277" )
    ) {
      $c2 = chr( 0100*(ord($c2)-0302) + ord($c3) );
    }
    else { $c2 = $Def::utf8_unk }
    $c3   = '';
    $out .= "$c1$c2$c3";
    $_    = "$c4";
  }
  $_ = $out . $_;
  return $_;
}

# Read command-line arguments and options.
while ( @ARGV >= 2 && ( $ARGV[0] eq '-t' || $ARGV[0] eq '-m' ) ) {
  my( $opt, $file ) = splice @ARGV, 0, 2;
  if    ( $opt eq '-t' ) { $file_debram = $file }
  elsif ( $opt eq '-m' ) { $file_maint  = $file }
}
my @opt;
my @arg;
push @{ /^-\S/ ? \@opt : \@arg }, $_ for @ARGV;
my %opt = map {
  my $o = $_;
  map { substr( $o, $_, 1 ) => 1 } 1 .. length($o)-1
} @opt;
if ( @arg < 1 || $opt{'?'} || $opt{h} ) {
  print $usage;
  exit 0;
}

# Read the maintainer names.
my %mnstd; # debram-standard maintainer names
my %maint; # full maintainer names
{
  open F, '<', $file_maint;
  local $/ = '';
  while ( <F> ) {
    my @m  = grep { /\S/ } split "\n";
    my $ms = shift @m;
    $mnstd{$ms} and die "$0: std name $ms appears twice\n";
    $mnstd{$ms} = 1;
    for ( @m ) {
      defined $maint{$_}
        and die "$0: full maint name $_ appears twice\n";
      $maint{$_} = $ms;
    };
  }
  close F;
}

# Read the available packages from the Packages files.
my %deb; # Debian packages
my $after_dash = 0;
my %maint_unk;
for my $file_avail ( @arg ) {
  $file_avail eq '-' and $after_dash = 1, next;
  open F, '<', $file_avail;
  local $/ = '';
  FILE_LOOP: while ( <F> ) {
    {
      my $psect = qr/(?:${Def::contrib}\/|.*\/${Def::contrib}$)/om;
      next FILE_LOOP if !$opt{c} && /^${Def::Section}:[ \t]+$psect/om;
    }
    my( $ram   ) = /^${Def::Ramification}:[ \t]+(\d{4})/om;
    my $mnt0;
    ( $mnt0 ) = /^${Def::Maintainer}:[ \t]+([^<>]+?)[ \t]+</om or
    ( $mnt0 ) = /^${Def::Maintainer}:[ \t]+(\S+@\S+)/om
      or die
      "$0: control paragraph lacks a proper " .
      "${Def::Maintainer} field\n$_\n";
    defined $maint{$mnt0} or $maint_unk{$mnt0} = 1;
    my  $maint   = $maint   {$mnt0};
    my( $pri0  ) = /^${Def::Priority}:[ \t]+(\S+)/om
      or die
      "$0: control paragraph lacks a proper " .
      "${Def::Priority} field\n$_\n";
    my  $pri     = $Def::pri{$pri0}
      or die "$0: unknown priority $pri0\n";
    my( $deb   ) = /^${Def::Package}:[ \t]+(\S+)/om
      or die
      "$0: control paragraph lacks a proper " .
      "${Def::Package} field\n$_\n";
    if ( $deb{$deb} ) {
      $deb{$deb}{ram} = $ram unless defined $deb{$deb}{ram};
      next;
    }
    next if $after_dash && !( $opt{k} && defined($ram) );
    my( $src   ) = /^${Def::Source}:[ \t]+(\S+)/om;
    my $lead;
    if ( defined( $src ) && $deb ne $src ) {
      $lead = 0;
    }
    else {
      $lead = 1;
      $src = $deb;
    }
    my( $desc  ) = /^${Def::Description}:[ \t]+(.+)/om
      or die
      "$0: control paragraph lacks a proper " .
      "${Def::Description} field\n$_\n";
    $desc = iconv $desc if $desc =~ /[^\000-\177]/;
    $deb{$deb} = {
      maint   => $maint,
      pri     => $pri  ,
      src     => $src  ,
      lead    => $lead , # is this a flagship package?
      desc    => $desc ,
      ram     => $ram  ,
      control => $_    ,
      old     => 0     ,
        # had this package been ramified
        # in the old debram.txt?
      reramified
              => 0     ,
        # had this package been ramified under a different ramno
        # in the old debram.txt?
    };
  }
  close F;
}
if ( keys %maint_unk ) {
  warn "$0: unknown maintainer(s):\n\n";
  warn "$_\n" . iconv($_) . "\n\n" for keys %maint_unk;
  exit 1;
}

# Read the section titles.  Record package ramifications not already
# known.
my %title;     # ram titles
my @head = (); # debram.txt head
my @tail = (); # debram.txt tail
{
  open F, '<', $file_debram;
  while ( <F> ) {
    push @head, $_;
    last if $_ eq $Def::mark_main_body;
  }
  push @head, scalar <F>;
  local $/ = '';
  while ( <F> ) {
    s/\n?\z/\n/s;
    my @c0 = /.*\n/g;
    my @c  = grep { /\S/ } @c0;
    chomp @c;
    shift @c;
    my( $ram, $title ) =
      shift( @c ) =~ /^(\d{${Def::ndig}}) (.+?) \(\d+\)$/o
      or do {
        push @tail, @c0;
        last;
      };
    defined $title{$ram} and die "$0: ram $ram is titled twice\n";
    $title{$ram} = $title;
    shift @c;
    for ( @c ) {
      my( $deb ) = /^.{$skip_to_deb}(\S+)/o
        or die "$0: badly formed old debram line\n$_\n";
      $deb{$deb} or next;
      if ( defined $deb{$deb}{ram} ) {
        $deb{$deb}{reramified} = 1
          if $ram ne $deb{$deb}{ram};
      }
      else {
        $deb{$deb}{ram} = $ram;
      }
      $deb{$deb}{old} = 1;
    }
  }
  push @tail, <F>;
  close F;
}

# Sort the packages in two ways: by ramification then
# maintainer (%debram); by maintainer (@pkg).
my @pkg = sort {
  Alpha::cmp_std_mn
    ( $deb{$a}{maint},   $deb{$b}{maint} )
  || Bnm::cmp_prefixless
    ( $deb{$a}{src  },   $deb{$b}{src  } )
  ||    $deb{$b}{lead} <=> $deb{$a}{lead}
  || Bnm::cmp_prefixless
    (      $a        ,        $b         )
} keys %deb;
my %debram = map { $_ => [] } keys %title;
for my $deb ( @pkg ) {
  my $ram = $deb{$deb}{ram};
  defined( $ram ) or next;
  $debram{$ram} or die "$0: unknown ram $ram\n";
  push @{ $debram{$ram} }, $deb;
}

# Subroutine: fill a field with dots.
sub dot ($$) {
  my( $c, $w ) = @_;
  $c .= ' ';
  if ( length( $c ) < $w - 1 )
    { $c .= "\267" x ( $w - length( $c ) - 1 ) }
  if ( length( $c ) < $w     ) { $c .= ' ' }
  return $c;
}

# Subroutine: output one line describing a single deb.
sub debout (;$) {
  local $_ = @_ ? $_[0] : $_;
  my $out;
  $out .= dot  $deb{$_}{maint}, $Def::w_maint;
  $out .= dot  $deb{$_}{pri  }, $Def::w_pri  ;
  $out .= dot       $_        , $Def::w_deb  ;
  $out .=      $deb{$_}{desc }          ;
  $out =~ s/^(.{0,${Def::width}}).*$/$1/o;
  print "$out\n";
}

# Output the reformed ramification.
unless ( $opt{P} ) {
  print @head if $opt{T};
  for my $ram ( sort keys %debram ) {
    print $rule;
    print "$ram $title{$ram} ("
      . scalar( @{ $debram{$ram} } ) . ")\n";
    print $rule;
    if ( $opt{w} || $opt{W} ) {
      my @old;
      my @new;
      push @{
        $deb{$_}{old} && !$deb{$_}{reramified}
        ? \@old : \@new
      }, $_ for @{ $debram{$ram} };
      if ( !$opt{W} ) {
        debout for @old;
        print $rule;
      }
      debout for @new;
    }
    else { debout for @{ $debram{$ram} } }
    print "\n";
  }
  print @tail if $opt{T};
}

# Output a working Packages file.
else {
  for ( @pkg ) {
    $opt{W} && $deb{$_}{old} && !$deb{$_}{reramified} and next;
    my $control = $deb{$_}{control};
    $control =~ s/^${Def::Ramification}:[ \t]+.*\n?//omg;
    print "${Def::Ramification}: "
      . "$deb{$_}{ram} $title{ $deb{$_}{ram} }\n"
      if defined $deb{$_}{ram};
    print $control;
  }
}

