#!/usr/bin/perl
# Copyright 2004 Vlado Keselj www.cs.dal.ca/~vlado

sub help { print <<"#EOT" }
# Extract Folder MsgId pairs from a list of directories or folders, version $VERSION
#
# Each pair is extracted to a line of text of the form
# "folder<TAB>msgid", where <TAB> is the tab character.
# For messages that do not have a Message-Id header the output line
# looks like "folder<TAB>$NoId".  The first message in a folder that looks like
# "folder internal data" (e.g., created by pine) is ignored.
#
# Uses: formail Unix utility
#
# Usage: extract-folder-msgid [switches] [directories]
#  -h  Print help and exit.
#  -v  Print version of the program and exit.
#EOT

use POSIX qw(tmpnam);

use strict;
use vars qw( $VERSION $Folder $NoId);
#$VERSION = sprintf "%d.%d", q$Revision: 1.2 $ =~ /(\d+)/g;
$VERSION = '1.2';
$NoId = ' --NoId--';

use Getopt::Std;
use vars qw($opt_v $opt_h );
getopts("vh");

if ($opt_v) { print "$VERSION\n"; exit; }
elsif ($opt_h || !@ARGV) { &help(); exit; }

$| = 1;
&go_recursive(@ARGV);

sub go_recursive {
    while ($#_ > -1) {
	my $dir = shift;

	next if -l $dir || !-e $dir; # symbolic link or does not exist: ignore it

	if (not -d $dir) {	                  # a file
	    &process_folder($dir);
	    next;
	}
	
	local ($_, *DIR); 	                  # recursively enter directory
	opendir(DIR, $dir) || die "can't opendir $dir: $!";
	map { /^\.\.?$/ ? '' : (&go_recursive("$dir/$_")) } readdir(DIR);
	closedir(DIR);
    }
}

sub process_folder {
    my $folder = $Folder = shift;
    return if -z $folder;	# ignore 0-size files

    my $tmpdir = tmpnam();
    mkdir $tmpdir, 0700 or die "can't mkdir $tmpdir: $!";

    $ENV{'FILENO'} = '0';
    `formail -s sh -c 'cat -> $tmpdir/\$FILENO' < $folder`;

    my $msg = getfile("$tmpdir/0");                 # get the first message
    if ($msg !~ /^From\ MAILER-DAEMON\ (.|\n)*      # Let's guess if this
                \nSubject:.*FOLDER\ INTERNAL\ DATA  #  should be ignored
         (.|\n)*\n\n(?-x:This text is part of the internal format )
	(?-x:of your mail folder, and is not\s+a real message\.)/x)
    { &process_msg($msg) }
    my @rmfiles = ("$tmpdir/0");

    for (my $counter = 1; -e "$tmpdir/$counter"; ++$counter) {
	$msg = getfile("$tmpdir/$counter");
	push @rmfiles, "$tmpdir/$counter";
	&process_msg($msg);
    }
    unlink(@rmfiles); rmdir $tmpdir or die "rmdir $tmpdir: $!";
}

sub process_msg {
    my $msg = shift;
    my $hdrs = $msg;
    $hdrs = $` if $hdrs =~ /\n\n/;
    if ($hdrs =~ /^Message-Id:(.*)/mi) {
	my $msgid = $1;
	my $tmp = $`.$';
	die if $tmp =~ /^Message-Id:/i;
	$msgid =~ s/\s+//g;
	print "$Folder\t$msgid\n";
    }
    else { print "$Folder\t$NoId\n" }
}

sub getfile($) {
    my $f = shift;
    local *F;
    open(F, "<$f") or die "getfile:cannot open $f:$!";
    my @r = <F>;
    close(F);
    return wantarray ? @r : join ('', @r);
}