#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # not running under some shell

# $Id$

use strict;
require 5.005;
use Getopt::Std;
use File::Basename qw(basename dirname);
use vars qw($VERSION);

$VERSION = '1.26';
$|++;

##############################################################################
# Set up catdir, catfile, splitdir and tmpdir. Hopefully we can just use
# File::Spec::Functions and not worry about it.
##############################################################################

BEGIN {
    # Use File::Spec if we can. Otherwise assume simple Unix semantics.
    eval "require File::Spec::Functions";
    if ($@) {
        # Use simple Unix semantics.
        *catdir   = sub { join '/', @_ };
        *catfile  = sub { join '/', @_ };
        *tmpdir   = sub { '/tmp' };
        *splitdir = sub { split m|/|, $_[0] };
    } else {
        # Import the functions we'll need.
        File::Spec::Functions->import(qw(catdir catfile tmpdir splitdir));
    }

    # Get the command-line arugments.
    use vars qw($opt_f $opt_r $opt_l $opt_m $opt_t $opt_n $opt_i $opt_p $opt_c
                $opt_s $opt_h $opt_d $opt_a $opt_D $opt_o $opt_e $opt_u $opt_g
                $opt_w $opt_H $opt_B $opt_j $opt_M $opt_S $opt_v $opt_V $opt_N
                $opt_I $opt_E $opt_q $opt_Q $opt_P $opt_U $opt_C
               );
    getopts('f:lr:m:t:nipcs:dhaDo:e:u:gw:HB:j:M:SvVN:I:E:qQP:UC:');
}

##############################################################################
# Set up the variables and constants we'll need later.
##############################################################################
if ($opt_w) {
    require URI::Escape;
    URI::Escape->import('uri_escape');
} else {
    eval 'sub uri_escape { @_ }';
}

# Set constants.
use constant DEBUG => $opt_D ? 1 : 0;
use constant SMLOCS => qw(
    /usr/lib/sendmail
    /usr/sbin/sendmail
    /usr/ucblib/sendmail
);

# When we do forking to get diff files, we want to ignore the children.
sub REAPER { wait; $SIG{CHLD} = \&REAPER }
$SIG{CHLD} = \&REAPER;

# These will be used by tmp_file_name().
my $tmpdir = tmpdir();
my $pgrp = $opt_g ? $ENV{USER} : getpgrp;

# Determine location of CVS and diff binaries and of the null file.
$opt_e ||= 'cvs';
$opt_j ||= 'diff';
$opt_N ||= '/dev/null';
$opt_q ||= $opt_Q;

# Make sure that both $opt_I and $opt_E are not speciied.
mydie("Cannot specify both -I and -E.\n") if $opt_I && $opt_E;

# This hash will be used in several of the functions below.
my %map = (
    mod => 'Modified Files',
    add => 'Added Files',
    rem => 'Removed Files',
    rev => 'Revision Data',
);

##############################################################################
# The main part of the program.
##############################################################################

# Check for required command-line arguments.
version() if $opt_v;
usage() if $opt_h || (!$opt_l && !$opt_v && !($opt_f && $opt_t));

# Run the directory logging mode, if specified.
log_dir($ARGV[0]) if $opt_l;

# Set the diffs anc charset options to their defaults.
$opt_o ||= '-u';
$opt_C ||= 'utf-8';

# Set the content type.
my $ctype = 'text/plain';
if ($opt_H) {
    require HTML::Entities;
    HTML::Entities->import('encode_entities');
    $ctype = 'text/html';
}

# Get the current working directory, the name of the repository module, and a
# list of the files being revised in the current directory.
my ($dir, $mod, $revs) = parse_file_list($opt_f, $opt_P);

# Unless we're in commit mode, just send the current message.
syncmail($mod, $revs) if $mod && not $opt_c;

# So if we got here, parse the contents of STDIN. This gives us the message
# and tags for only the last directory to be committed, but that should be
# fine.
my ($msg, $files, $tags) = $mod ? parse_stdin($dir, $revs) : ();

# If it's the last directory, send the message. Otherwise, save the data to
# disk for later in the commit.
if (is_last_dir($dir)) {
    notify($mod, $msg, $revs, $files, $tags);
} else {
    save_data($revs, $files) if $mod;
}

# That's it!

##############################################################################
# FUNCTIONS                                                                  #
##############################################################################

##############################################################################
# This function logs the currently-processed directory. Used when -l is passed
# -- that is, in the commitinfo file.
##############################################################################

sub log_dir {
    # Grab the directory name.
    (my $dir = $_[0]) =~ s|^$ENV{CVSROOT}/?||;
    DEBUG && dbpnt("Directory: '$dir'\n");
    my $file = tmp_file_name('lastdir');
    DEBUG && dbpnt("Writing to '$file'\n");
    local *FILE;
    open FILE, ">$file" or mydie("Cannot open '$file' for writing: $!\n");
    print FILE $dir;
    close FILE;
    exit;
}

##############################################################################
# These functions simply build a temporary filename. Pass in a string to make
# the filename unique. tmp_file_name() includes the full bath to the temp
# file, while tmp_base_name returns just the basename of the file.
##############################################################################

sub tmp_file_name { catdir($tmpdir, "#cvs\.$_[0]\.$pgrp") }
sub tmp_base_name { "#cvs\.$_[0]\.$pgrp" }

##############################################################################
# This function takes $opt_f as an argument, and returns the directory on
# which the current action is being processed, and an anonymous array of the
# file specs in that directory that are affected by this action.
##############################################################################

sub parse_file_list {
    my $dir = pop;
    my @revs;
    if ($dir) {
        # CVS 1.12 command-line format.
        @revs = shift =~ /(\S+\s(?:[\d\.]+|NONE)\s(?:[\d\.]+|NONE))\s?/g;
    } else {
        # Deprecated command-line format.
        DEBUG && dbpnt("File List: $_[0]\n");
        # Get the directory, list of file specs, and the module name.
        ($dir, @revs) = split ' ', shift;

        # Make sure we get the real full directory name -- that is, try to
        # compensate for directory names with commas and/or spaces.
        until (-d catdir($ENV{CVSROOT}, $dir) or not @revs) {
            $dir .= " " . shift @revs;
        }
        # Change @revs to the new command line format
        s/,/ /g for @revs;
    }

    my ($mod, @subdirs) = splitdir $dir;
    $mod = shift @subdirs unless defined $mod && $mod ne '';
    DEBUG && dbpnt("Directory: '$dir'; Subdir: '" . catdir(@subdirs) .
                   "'; Module: $mod\n");

    if ($revs[0] eq '-') {
        # This is a special status message, not really a version update or
        # anything. Just add more items here if you want to simply send the
        # notification and exit.
        my @chk = ([$opt_n, 'New', 'directory', 'New Directory'],
                   [$opt_i, 'Imported', 'sources', 'Imported Sources']);

        # So test for the simple checks.
        foreach my $c (@chk) {
            if ($revs[1] eq $c->[1] && $revs[2] eq $c->[2]) {
                # If we have a match, either exit or send the notice.
                DEBUG && dbpnt("Action: $c->[3]");
                exit if $c->[0];
                my $sub = mk_subject($c->[3], $mod);
                # Set up the multipart/mixed boundary for HTML email.
                boundary($sub) if $opt_a;
                mail($sub, $opt_H
                     ? \"${\xhtml_header()}<pre>${\slurp_file()}</pre>${\xhtml_footer()}"
                     : \slurp_file() );
            }
        }
    }

    # If we get here, it's not a special status message, but a list of files
    # and revisions. So split them up into a hash.
    my (%revmap, $got_files);
    my $regex = qr/^(.*?)(?:\s([\d.]+.\d|NONE))?(?:\s([\d.]+.\d|NONE))?$/;

    # Put together the regular expressions for including or excluding files.
    $opt_I = [map { qr/$_/ } split ' ', $opt_I] if $opt_I;
    $opt_E = [map { qr/$_/ } split ' ', $opt_E] if $opt_E;

  REV: while (@revs) {
        my $spec = shift @revs;
        my ($file, $old, $new) = $spec =~ $regex;
        # Handle file names with spaces.
        until ($old or $new or not @revs) {
            $spec = shift @revs;
            (my ($f), $old, $new) = $spec =~ $regex;
            $file .= " $f";
        }

        $revmap{$file} = undef;
        my $path = catfile(@subdirs, $file);
        DEBUG && dbpnt("$file => $old => $new ($path)\n");

        # Make sure the file is meant to be included.
        if ($opt_I) {
            for my $rx (@$opt_I) {
                DEBUG && dbpnt("Including $path") if $path =~ $rx;
                next REV unless $path =~ $rx;
            }
        }

        # Skip the file if it's meant to be excluded.
        if ($opt_E) {
            for my $rx (@$opt_E) {
                DEBUG && dbpnt("Excluding $path") unless $path =~ $rx;
                next REV if $path =~ $rx;
            }
        }

        # If we get here, keep it!
        $got_files = 1;
        $revmap{$file} = [$old, $new, $path];
    }

    # If there are no files, simply return the directory name.
    return ($dir) unless $got_files;
    return ($dir, $mod, \%revmap);
}

##############################################################################
# This function sends notices in roughly the same style as syncmail. It's used
# when -c is not specified.
##############################################################################

sub syncmail {
    my ($mod, $revs) = @_;
    # Get the subject and the message body.
    my $subject = mk_subject($opt_f, $mod);
    (my $body = slurp_file()) =~ s/\s+/\n/s;
    if ($opt_H) {
        # Set up the multipart/mixed boundary and turn the body of the message
        # into HTML.
        boundary($subject) if $opt_a;
        $body = xhtml_header() . "<pre>" . encode_entities($body) . "</pre>";
    }
    if ($opt_d) {
        # We want diffs. So we have to fork.
        unless (fork) {
            # In the child. Wait for CVS to let go!
            sleep 2;
            # Now get the diffs and send the message.
            $body .= attach($subject) if $opt_a;
            if ($opt_H) {
                $body .= "\n<pre>" . encode_entities(get_diffs($revs))
                  . '</pre>' . xhtml_footer() . "\n";
            } else {
                $body .= "\n" . get_diffs($revs);
            }
            mail($subject, \$body);
        }
        # Exit the parent process.
        exit;
    } else {
        # No diffs. Just send the mail. Delete temp files, just in case.
        $body .= xhtml_footer() . "\n" if $opt_H;
        mail($subject, \$body);
    }
}

##############################################################################
# This function builds an email subject. Pass in the message being logged, and
# it'll return a subject with the message truncated to the first words that
# will fit in 72 characters or less. This length includes the optional message
# that can be passed in via the -m argument as well as the module name if the
# -p argument has been specified.
##############################################################################

sub mk_subject {
    my ($msg, $mod, $files) = @_;
    # Strip out all carriage returns.
    $msg =~ s/\n/ /g;
    # Truncate to first period after a minimum of 10 characters.
    my $i = index $msg, '. ';
    $msg = substr($msg, 0, $i + 1) if $i > 0;
    # Prepend the filename or top directory.
    $msg = modified_area($files) . $msg if $opt_S and $opt_c and $files;
    # Prepend the module name.
    $msg = "$mod: $msg" if $opt_p;
    # Prepend the optional message.
    $msg = "$opt_m $msg" if $opt_m;
    # Truncate to the last word under 72 characters.
    $msg =~ s/^(.{0,72}\s+).*$/$1/m if length $msg > 72;
    return $msg;
}

##############################################################################
# This function decides how to summarize the list of modified files to give a
# hint in the subject line of what has been changed
##############################################################################
sub modified_area {
    my $files = shift;
    my @filelist = (@{$files->{mod}}, @{$files->{add}}, @{$files->{rem}})
      or return '';
    return basename(split /,/, $filelist[0]) . ': ' if @filelist == 1;
    my $ret = dirname split /,/, shift @filelist;
    for my $f (@filelist) {
        my $dir = dirname split /,/, $f;
        $ret = $dir if length $dir < length $ret;
    }
    # Strip out unnecessaries.
    $ret =~ s/^$ENV{CVSROOT}//o;
    return catdir((splitdir $ret)[-2,-1]). ': ';
}

##############################################################################
# This function simply returns the entire contents of a file passed in as the
# first argument. If there is no first argument, it'll read in STDIN.
##############################################################################

sub slurp_file {
    local $/;
    if ($_[0]) {
        # Get the contents of the file.
        local *FILE;
        open FILE, "<$_[0]"
          or mydie("Cannot open file $_[0] for reading: $!\n");
        my $cont = <FILE>;
        close FILE;
        return $cont;
    } else {
        # Return the contents of STDIN.
        return <STDIN>;
    }
}

##############################################################################
# This function actually sends the notification message. Pass in the message
# subject and body.
##############################################################################

sub mail {
    my ($subject, $body) = @_;

    # Over max size?
    if ($opt_M and (length($body) / 1024) > $opt_M) {
        exit if $opt_Q;
        print "*** Not sending mail to $opt_t!\n";
        print "*** The message is ", int(length($body) / 1024),
          "k long and maximum message size is ${opt_M}k.\n";
        exit;
    }

    print "Sending mail to $opt_t..." unless $opt_q;

    $opt_s ||= find_sendmail() or mydie("Cannot find sendmail. Use -s.\n");
    open(SENDMAIL, "|$opt_s -oi -t")
      or mydie("Cannot fork for sendmail: $!\n");
    print SENDMAIL "MIME-Version: 1.0\n";
    print SENDMAIL "From: $opt_u\n" if $opt_u;
    print SENDMAIL "To: $opt_t\nSubject: $subject\n";
    print SENDMAIL "Reply-To: $opt_r\n" if $opt_r;
    print SENDMAIL "X-Mailer: activitymail $VERSION, " .
                   "http://search.cpan.org/dist/activitymail/\n";
    if ($opt_a) {
        print SENDMAIL qq{Content-Type: multipart/mixed; boundary="$opt_a"\n\n}
          . "--$opt_a\nContent-Type: $ctype; charset=$opt_C\n";
    } else {
        print SENDMAIL "Content-Type: $ctype; charset=$opt_C\n";
    }
    print SENDMAIL "\n$$body";
    print SENDMAIL "--$opt_a--\n" if $opt_a;
    close SENDMAIL;
    print "Done\n" unless $opt_q;
    # Delete any temp files.
    exit;
}

##############################################################################
# This function finds the sendmail executable and returns it.
##############################################################################

sub find_sendmail { for (($opt_s ? $opt_s : ()), SMLOCS) { return $_ if -x } }

##############################################################################
# This function gets the diffs for all the files passed to it via an array ref
# of the file spec and returns them.
##############################################################################

sub get_diffs {
    my $revs = shift;
    # Initialize the diff string.
    my $diffs = '';
    # Initialize hash of binary file name extesions.
    my %ignore = $opt_B ? (map { lc $_ => 1 } split /\s+/, $opt_B) : ();
    DEBUG && dbpnt("Will ignore", join (', ', keys %ignore), "\n");
    # Change into the first directory.
    while (my ($file, $vers) = each %$revs) {
        # Skip binary files.
        next if -B $file;
        # Skip any from the list passed by the user.
        if ($opt_B && $file =~ /\.([^.]+)$/) {
            DEBUG && dbpnt("Ignore $1? ", $ignore{$1} ? "Yes\n" : "No\n");
            next if $ignore{lc $1};
        }
        # Okay, now process this sucker.
        my ($r1, $r2, $fn) = @$vers;
        # Older versions of CVS are too dumb to know that spaces in arguments
        # to be passed to diff are okay when the whole argument is in
        # quotation marks.
        $fn =~ s/\s+/_/g if $opt_U;
        if ($r1 eq 'NONE') {
            # It's a new file.
            if (-e $file) {
                # Compare to /dev/null.
                $diffs .= `$opt_j $opt_o -L '$opt_N' '$opt_N' -L '$fn' '$file'`;
            } else {
                # Otherwise, read the file from a non-changing update and pipe
                # that to diff.
                $diffs .= `$opt_e -fn update -r $r2 -p '$file' | $opt_j $opt_o -L '$opt_N' '$opt_N' -L '$fn' -`;
            }
        } elsif ($r2 eq 'NONE') {
            # The file has been deleted. Read it from a non-changing update
            # and pipe it to diff.
            $diffs .= `$opt_e -fn update -r $r1 -p '$file' | $opt_j $opt_o -L '$fn' - -L '$opt_N' '$opt_N'`;
        } else {
            # We actually have CVS diff the two versions.
            $diffs .= `$opt_e -f diff -kk -L '$fn' -L '$fn' $opt_o -r $r1 -r $r2 '$file'`;
        }
    }
    return $diffs;
}

##############################################################################
# This function parses the contents of STDIN. It grabs the message and
# assembles lists of the files that were affected.
##############################################################################

sub parse_stdin {
    my ($dir, $revs) = @_;
    my ($msg, $flag, $files) = ('', '', { mod => [], add => [], rem => [] } );
    my $tags;
    while (<STDIN>) {
        if (/^Modified Files/) {
            $flag = 'mod';
            DEBUG && dbpnt("Grabbing $map{$flag}\n");
            next;
        } elsif (/^Added Files/) {
            $flag = 'add';
            DEBUG && dbpnt("Grabbing $map{$flag}\n");
            next;
        } elsif (/^Removed Files/) {
            $flag = 'rem';
            DEBUG && dbpnt("Grabbing $map{$flag}\n");
            next;
        } elsif (/^Log Message/) {
            $flag = 'log';
            DEBUG && dbpnt("Grabbing log message\n");
            next;
        }
        next unless $flag;

        if ($flag eq 'log') {
            # Grab it for the log.
            $msg .= $_;
        } elsif (/^\s+Tag:\s+(.*)$/) {
            # It's a branch tag. Add it to the message if it hasn't been
            # added already.
            $tags->{$1} = 1;
        } else {
            chomp;
            next unless $_;
            my @files = split;
            while (my $f = shift @files) {
                $f .= " " . shift @files
                  until exists $revs->{$f} or not @files;
                # Skip the file if there's no revision information. This
                # is generally because -I or -E excluded it.
                unless ($revs->{$f}) {
                    delete $revs->{$f};
                    next;
                }
                push @{ $files->{$flag} }, catdir($ENV{CVSROOT}, $dir, $f) .
                  ",$revs->{$f}[0],$revs->{$f}[1]";
                DEBUG && dbpnt("$f => $files->{$flag}[-1]\n");
            }
        }
    }

    # Make sure there are no carriage returns or spaces at the end of $msg.
    $msg =~ s/\s+$//s;
    # Turn the tags into an array.
    $tags = [ keys %$tags ] if $tags;
    # Return!
    return ($msg, $files, $tags);
}

##############################################################################
# This function compares the currently-processed directory to the directory
# logged during the commitinfo phase (by the -l option) and returns true if
# they're the same.
##############################################################################

sub is_last_dir {
    my $dir = shift;
    my $file = tmp_file_name('lastdir');
    local *FILE;
    open FILE, "<$file" or mydie("Cannot open '$file' for reading: $!\n");
    my $logged = <FILE>;
    close FILE;
    DEBUG && ($dir eq $logged ? dbpnt("'$dir' is the last directory\n")
              : dbpnt("'$dir' is NOT the last directory\n"));
    return unless $dir eq $logged;
    # If we got here, there's a match. Delete the file.
    unlink($file);
    return 1;
}

##############################################################################
# This function grabs all the data from the log files and actually sends the
# accumulated notification message.
##############################################################################

sub notify {
    my ($mod, $msg, $revs, $files, $tags) = @_;
    print "Collecting file lists..." unless $opt_q;
    if ($opt_d) {
        # We want diffs. That means we have to fork so that CVS will let go
        # of the files.
        unless (fork) {
            # It's the child process. Grab the files.
            $files = get_files($files) or exit;
            # Grab the diffs for the latest files.
            $files->{rev} .= get_diffs($revs);
            my $subject = mk_subject($msg, $mod, $files);
            mail($subject, build_msg($msg, $files, $revs, $subject, $tags));
            exit;
        }
    } else {
        # No need for diffs. Just send it as is.
        $files = get_files($files) or exit;
        my $subject = mk_subject($msg, $mod, $files);
        mail($subject, build_msg($msg, $files, $revs, $subject, $tags));
        exit;
    }
    # Exit the parent process.
    exit;
}

##############################################################################
# This function retreives the lists of all the files affected by this action
# and saves them into a hashref, which it returns. Pass in a hashref with the
# current list of files to have the lists from the log files prepended to that
# list. It also will fetch the diffs stored by previous instances of
# activitymail so that they can all be concatenated together for the entire
# commit.
##############################################################################

sub get_files {
    my $files = shift;
    my $fetched = {};
    foreach my $flag (qw(mod add rem)) {
        my $file = tmp_file_name($flag);
        if (-T $file) {
            # The file exists, so grab its contents.
            DEBUG && dbpnt("Grabbing $map{$flag} from '$file'\n");
            local *FILE;
            open FILE, "<$file"
              or mydie("Cannot open '$file' for reading: $!\n");
            while (<FILE>) {
                chomp;
                push @{ $fetched->{$flag} }, $_;
            }
            close FILE;
        }
        # Delete the file.
        unlink($file);
        # Add the files that we had already.
        push @{ $fetched->{$flag} }, @{ $files->{$flag} }
          if $files->{$flag};
    }

    # Just return if there are no files listed (because they were excluded
    # by -I or -I).
    return unless %$fetched;

    # Return the files unless we need diffs.
    unless ($opt_d) {
        print "Done.\n" unless $opt_q;
        return $fetched;
    }

    # Okay, now get the diffs. Get the name of the lock files and wait until
    # they're all gone.
    print "Collecting diffs..." unless $opt_q;
    my $locks = tmp_base_name('lock');

    DEBUG && dbpnt("Waiting for all '$locks' files...\n");

    # We avoid glob() here because some systems may not have it! That is, with
    # older versions of Perl and where csh isn't installed. Such is the case
    # on SourceForge, for example.
    opendir(TMP, $tmpdir) or mydie("Cannot open '$tmpdir': $!\n");
    foreach (grep { /$locks/ } readdir(TMP)) {
        my $f = catdir($tmpdir, $_);
        sleep 1 while -e $f;
    }

    # Okay, the locks are gone. Go ahead and get the diffs.
    rewinddir(TMP);
    my $revs = tmp_base_name('rev');
    foreach my $file (grep { /$revs/ } readdir(TMP)) {
        $file = catdir($tmpdir, $file);
        DEBUG && dbpnt("Grabbing $map{rev} from '$file'\n");
        local *FILE;
        open FILE, "<$file" or mydie("Cannot open '$file' for reading: $!\n");
        local $/;
        $fetched->{rev} .= <FILE>;
        close FILE;
        unlink($file);
    }
    closedir(TMP);
    print "Done\n" unless $opt_q;
    return $fetched;
}

##############################################################################
# This function takes an existing message, a files href, and the message
# suject as arguments, and returns a fully formatted message that includes the
# list of files. The subject is used for attachments (see attach() below).
##############################################################################

sub build_msg {
    DEBUG && dbpnt("Building message body.\n");
    if ($opt_H) {
        $msg = build_html_msg(@_);
    } else {
        $msg = build_text_msg(@_);
    }

    # Okay, now add the diffs.
    if ($opt_d) {
        # Make it either an attachment or inline, depending on -a.
        $$msg .= $opt_a ? attach($_[3]) :
          "\n$map{rev}\n" . '-' x length($map{rev}). "\n";
        # Attach those diffs!
        if ($opt_H) {
            if ($opt_a) {
                $$msg .= $_[1]->{rev};
            } else {
                $$msg .= '<pre>' . encode_entities($_[1]->{rev}) . '</pre>';
            }
            $$msg .= xhtml_footer();
        } else {
            $$msg .= $_[1]->{rev};
        }
    }
    return $msg;
}

sub build_text_msg {
    my ($msg, $files, $revs, $subject, $tags) = @_;
    $msg = "Log Message:\n-----------\n$msg\n";

    # Create the lines that will go underneath the above in the message.
    my %dash = ( map { $_ => '-' x length($map{$_}) } keys %map );

    # Add any tags.
    $msg .= "\nTags:\n----\n" . join("\n    ", @$tags) . "\n" if $tags;

    foreach my $type (qw(mod add rem)) {
        # Skip it if there's nothing to report.
        next unless @{ $files->{$type} };

        # Identify the action.
        $msg .= "\n$map{$type}:\n$dash{$type}\n";

        # Grab the first directory name.
        my $lastdir = dirname($files->{$type}[0]);
        $lastdir =~ s|^$ENV{CVSROOT}/?||;
        $msg .= "    $lastdir:\n";

        for (@{ $files->{$type} }) {
            my ($new, $old, $f, @extras) = reverse split ',';
            # Handle file names with commas.
            $f = join ',', reverse(@extras), $f if @extras;
            my $curdir = dirname($f);
            $curdir =~ s|^$ENV{CVSROOT}/?||;
            if ($curdir ne $lastdir) {
                # Different directory. Record that.
                $lastdir = $curdir;
                $msg .= "    $lastdir:\n";
            }
            # Record the name of the file altered.
            $msg .= "        ";
            # Record the name of the file altered.
            my $nm = basename($f);
            my $dir = $curdir;
            $dir =~ s|^$ENV{CVSROOT}/?||;
            my $url = $opt_w ? "$opt_w$dir/$nm" : '';
            if ($type eq 'mod') {
                if ($opt_V) {
                    $url .= '?r1=' . uri_escape($old) . '&r2='
                          . uri_escape($new) if $opt_w;
                    $nm .= " (r$old -> r$new)";
                }
            } elsif ($type eq 'add') {
                $url .= '?rev=' . uri_escape($new)
                      . '&content-type=text/x-cvsweb-markup' if $opt_w;
                $nm .= " (r$new)" if $opt_V;
            }
            $msg .= "$nm\n";
            $msg .= "        ($url)\n" if $opt_w;
        }
    }
    return \$msg;
}

sub build_html_msg {
    my ($msg, $files, $revs, $subject) = @_;
    $msg = xhtml_header() . "<h3>Log Message</h3>\n\n<pre>"
      . encode_entities($msg) . "</pre>\n\n";

    # Add any tags.
    $msg .= "<h3>Tags:</h3>\n<ul>\n  <li>" .
      join("</li>\n<li>  ", @$tags) . "</li>\n</ul>\n" if $tags;

    foreach my $type (qw(mod add rem)) {
        # Skip it if there's nothing to report.
        next unless @{ $files->{$type} };

        # Identify the action.
        $msg .= "<h3>$map{$type}</h3>\n";

        # Grab the first directory name.
        my $lastdir = dirname($files->{$type}[0]);
        $lastdir =~ s|^$ENV{CVSROOT}/?||;
        $msg .= "<h4>" . encode_entities($lastdir) . "</h4>\n<ul>\n";

        for (@{ $files->{$type} }) {
            my ($new, $old, $f, @extras) = reverse split ',';
            # Handle file names with commas.
            $f = join ',', reverse(@extras), $f if @extras;
            DEBUG && dbpnt("processing file $f\n");
            my $curdir = dirname($f);
            $curdir =~ s|^$ENV{CVSROOT}/?||;
            if ($curdir ne $lastdir) {
                # Different directory. Record that.
                $lastdir = $curdir;
                $msg .= "</ul>\n<h4>" . encode_entities($lastdir)
                  . "</h4>\n<ul>\n";
            }
            # Record the name of the file altered.
            my $nm = basename($f);
            my $dir = $curdir;
            $dir =~ s|^$ENV{CVSROOT}/?||;
            $msg .= "  <li>";
            my $url = $opt_w ? "$opt_w$dir/$nm" : '';
            if ($type eq 'mod') {
                $url .= '?r1=' . uri_escape($old) . '&r2='
                      . uri_escape($new) if $opt_w;
                $nm .= " (r$old -> r$new)" if $opt_V;
            } elsif ($type eq 'add') {
                $url .= '?rev=' . uri_escape($new)
                      . '&content-type=text/x-cvsweb-markup' if $opt_w;
                $nm .= " (r$new)" if $opt_V;
            }
            $msg .= (
                $opt_w
                    ? '<a href="' . encode_entities($url) . qq{">$nm</a>}
                    : $nm
            ) . "</li>\n";
        }
        $msg .= "</ul>\n";
    }
    return \$msg;
}

##############################################################################
# These functions create the appropriate XHTML header and footer.
##############################################################################
sub xhtml_header {
    q{<!DOCTYPE html
    PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
</head>
<body>
};
}

sub xhtml_footer { "</body>\n</html>\n" }

##############################################################################
# This function saves the file and diff data to log files so that the data can
# be fetched later. Pass in the files revised in this directory and a list of
# all the files.
##############################################################################

sub save_data {
    my ($revs, $files) = @_;
    foreach (qw(mod add rem)) {
        # Just skip it if there are no files to record.
        next unless @{ $files->{$_} };
        my $file = tmp_file_name($_);
        DEBUG && dbpnt("Saving $map{$_} to '$file'\n");
        local *FILE;
        open FILE, ">>$file"
          or mydie("Cannot open '$file' for writing: $!\n");
        local $" = "\n";
        print FILE "@{ $files->{$_} }\n";
        close FILE;
    }

    # Exit unles we have diffs to save.
    exit unless $opt_d && $revs;

    # Okay, now get the diffs and save them.
    if (my $pid = fork) {
        # In the parent. Write a lock file.
        my $lock = tmp_file_name('lock') . "\.$pid";
        open LOCK, ">$lock" or mydie("Cannot open '$lock' for writing: $!\n");
        close LOCK;
        exit;
    } else {
        # In the child. Wait for CVS to let go!
        sleep 2;
        # Now save the diffs to disk. Each process gets its own file.
        my $file = tmp_file_name('rev') . "\.$$";
        DEBUG && dbpnt("Saving $map{rev} to '$file'\n");
        local *FILE;
        open FILE, ">$file" or mydie("Cannot open '$file' for writing: $!\n");
        print FILE get_diffs($revs);
        close FILE;

        # Okay, now delete the lock file created by the parent and exit.
        DEBUG && dbpnt("Deleting lock file" .
                       tmp_file_name('lock') . "\.$$" . "\n");
        unlink(tmp_file_name('lock') . "\.$$");
        exit;
    }
}

##############################################################################
# This function generates the text necessary to attach the diffs file. It
# takes the subject as the first argument and uses it to create a hash that
# uniquely identifies the attachment.
##############################################################################

sub attach {
    boundary(@_);
    # Get the date (UTC).
    my @gm = gmtime;
    $gm[5] += 1900;
    $gm[4] += 1;

    # Get the username.
    my $user = getlogin || getpwuid($<) || "unknown";

    # Return the attachment headers.
    local $" = '';
    return qq{

--$opt_a
Content-Disposition: attachment; filename=$user-@gm[5,4,3,2,1,0]-diff.txt
Content-Transfer-Encoding: 8bit
Content-Type: text/plain; charset=$opt_C

};
}

##############################################################################
# Ths function creates the attachment boundary string. MD5 would be better,
# but we can't count on its presence, and it's not really crucial, anyway.
##############################################################################
sub boundary {
    my $salt = join '', ('.', '/', 0..9, 'A'..'Z', 'a'..'z')[rand 64, rand 64];
    $opt_a = crypt($_[0], $salt);
}

##############################################################################
# This function takes an error message for its argument, prints it, and exits.
# The reason it's a separate function is that it prepends a string to the
# error message so that it'll stand out during commits, and so that the
# program won't actually die.
##############################################################################

sub mydie { print "######## activitymail error: $_[0]"; exit }

##############################################################################
# This function prints debug messages. The reason it's a separate function is
# that it prepends a string to the message so that it'll stand out during
# commits.
##############################################################################

sub dbpnt { print "\n" if $_[1]; print "@@@@@@@@ activitymail debug: $_[0]" }

##############################################################################
# This function prints a version message.
##############################################################################
sub version {
    my $exe = basename($0);
    print qq{$exe v$VERSION\n};
    exit;
}

##############################################################################
# This function prints a usage message.
##############################################################################

sub usage {
    my $exe = basename($0);
    print qq{$exe v$VERSION

Usage: $exe -l

       $exe -P %p -f '%{sVv}' -t admins\@example.com [options]

Supported options:

  -l             Directory logging mode. Required unless -c.
  -c             Commit mode. Required unless -l.
  -f '%{sVv}'    File spec argument from CVS. Required unless -l
  -P %p          Commit directory. Needed if -f and with CVS 1.12 or later.
  -t <email>     Notification destination email address. Required unless -l.
  -e <cvs>       Location of CVS. Defaults to "cvs" (i.e., in path).
  -d             Include diffs for all the files processed.
  -j             Location of diff. Defaults to "diff" (i.e., in path).
  -N <file>      Location of null file. Defaults to "/dev/null".
  -o <opts>      Options to pass to diff command.
  -a             Attach diffs to the message.
  -r <email>     Reply-to address.
  -n             Ignore New directory commits.
  -i             Ignore imports.
  -m <message>   Optional message at beginning of the email subject.
  -p             Add name of the CVS module to the message subject.
  -S             Add lowest common directory name to the subject.
  -s <sendmail>  Location of sendmail.
  -u <email>     User email address from which email should be sent.
  -g             Use \$USER environment variable to group commits.
  -M <size>      Maximum size of emailed messages in kilobytes.
  -V             Include revision numbers in the email message.
  -H             Use HTML for generated emails.
  -C <charset>   Charset to use for Content-Type headers.
  -w <url>       Include links to specified ViewVC/CVSWeb url in email.
  -B <list>      Space-delimited list of file name suffixes not to diff.
  -I <regexen>   Space-delimited list of regexen to include files.
  -E <regexen>   Space-delimited list of regexen to exclude files.
  -U             Remove spaces in diff file name headers.
  -q             Quiet mode.
  -Q             Very quiet mode.
  -v             Print the version number and exit.
  -h             Print this usage statement and exit.
  -D             Debug mode.

};
    exit;
}

__END__


=head1 NAME

activitymail - CVS activity notification

=head1 SYNOPSIS

  # In commitinfo:
  DEFAULT $CVSROOT/CVSROOT/activitymail -l

  # In loginfo:
  DEFAULT $CVSROOT/CVSROOT/activitymail -dacf '%{sVv}' -t admins@example.com

=head1 DESCRIPTION

This program may be used for sending email messages for CVS repository
activity. There are a number of different modes supported. It can send
messages for every change to the repository (like C<syncmail>), or it can be
used to send a single message for each commit. In the latter case, a list of
all the files affected by the commit will be assembled and listed in the
single message. This is similar to how C<commit_prep> and C<log_accum> work,
but is more efficient.

An additional option allows for diffs to be calculated for the recent changes,
and either appended to the message (like C<syncmail> does) or added as an
attachment (neater). See the C<-d> and C<-a> options below.

=head1 PREREQUISITES

This script requires L<Getopt::Std|Getopt::Std> and
L<File::Basename|File::Basename>. It also requires a CVS server and the
F<diff> package. See L<"Known Issues"> for more information.

=head1 COREQUISITES

This script works best with L<File::Spec|File::Spec> installed. See L<"Known
Issues"> for more information.

=head1 USAGE

To use this program, you need to check out your F<CVSROOT> repository from CVS
and edit some files. Here are the steps to follow:

=over 4

=item 1.

Copy F<activitymail> into your F<CVSROOT> checkout.

=item 2.

Add F<activitymail> to the F<CVSROOT> repository.

=item 3.

Add F<activitymail> to the F<checkoutlist> file.

=item 4.

If you plan to use commit mode (see L<"Commit Mode"> below), add a call (or
calls) to C<activitymail> with the C<-l> options to the F<commitinfo> file.
Read the CVS docs to determine the format for this file, and to decide which
repositories for which you want it run. Here's an example:

  DEFAULT $CVSROOT/CVSROOT/activitymail -l

=item 5.

Add a call (or calls) to activitymail to the F<loginfo> file. Note that the
C<-f> and C<-t> options are required here, and the C<-P> option is a good idea
for CVS 1.12 and later. Use the C<-c> option if you're running commit mode
(i.e., you've added a call with C<-l> to the F<commitinfo> file -- see
L<"Commit Mode"> below). Here's an example:

  DEFAULT $CVSROOT/CVSROOT/activitymail -cP %p -f '%{sVv}' -t admins@example.com

=item 6.

Commit your changes to CVSROOT.

=back

=head1 OPTIONS

=over 4

=item -l

Directory logging mode. Use this mode in the F<commitinfo> file to log the
current directory. Best if used with C<-c> in the F<loginfo> file -- otherwise
it's just a waste.

=item -c

Commit mode. This mode will aggregate all the actions on a single commit and
send a single message. Must have specified C<-l> in the F<commitinfo> file.

=item -f '%{sVv}'

The file spec argument from CVS. It must be called as C<-f '%{sVv}'>. Single
quotes are recommended, as some shells otherwise seem to have a hard time.
Required unless C<-l>. CVS 1.12 and later users should also use C<-P %p>, as
C<-f %1{SVv}> will generate deprecation warnings.

=item -P

The directory argument from CVS. This option is only useful for CVS 1.12 or
later, where the command line format should be C<-P %p -f '%{sVv}'>.

=item -t <email>

The email address or addresses to send notifications to. Required unless
C<-l>.

=item -e <cvs>

Location of the CVS executable, e.g., F</usr/bin/cvs>. Defaults to F<cvs>, thus
assuming that the cvs executable is in the path.

=item -d

Include the diffs for all the files processed. These will be appended to the
notification message unless the -a option has been specified.

=item -j <diff>

Location of the diff executable, e.g., F</usr/bin/diff>. Defaults to F<diff>,
thus assuming that the cvs executable is in the path. Used only for comparing
added and deleted files to F</dev/null>.

=item -N <file>

Location of the null file. Defaults to F</dev/null>. This file will be used to
diff against deleted and added files. In most cases this option can be left to
the default, but Win32 systems, for example, will need to change it to F<NUL>
or some such.

=item -o <diff options>

Options to pass to the C<cvs diff> command. Useful for changing the behavior
of the diff command. Be sure to include these options inside quotation marks
so that they will be distinguished from the options parsed by activitymail
itself. See L<diff> for a list of available options. Defaults to
C<-u --minimal> if not specified.

=item -a

Attach diffs to the notification message. The diffs for all the files
processed will be calculated, and then they will be added to the message as an
attachment.

=item -r <email>

An optional reply-to address. This address will be added to a Reply-To header
in the notification email.

=item -n

Ignore "New directory" commits. By default, activitymail sends mail when a
directory has been added. Use this option to ignore those actions.

=item -i

Ignore imports. By default, activitymail sends mail when a files have been
imported. Use this option to ignore those actions.

=item -m <message>

An optional message to put at the beginning of the email subject.

=item -p

Option to add the name of the CVS module to the message subject.

=item -S

Optional attempt give the file name or lowest common directory name. Used only
in commit mode.

=item -s <sendmail>

Location of sendmail. If not specified, activitymail will search for sendmail
in the following locations: C</usr/lib/sendmail>. C</usr/sbin/sendmail>,
C</usr/ucblib/sendmail>. If activitymail cannot find sendmail, it'll throw an
exception.

=item -u <user email>

Email address to use in the From header of the commit email message. Typical
usage is to use the CVS C<$USER> variable to specify an address, e.g., C<-u
${USER}@example.com>. The default behavior is to provide no From header and to
let Sendmail do it.

=item -g

Groups the collection of CVS transactions in a single commit by the C<$USER>
environment variable. This is most useful when connecting to CVS via
C<:pserver:>, since the usual method of collecting transactions -- by relying
on the value returned by C<pgrp>, won't work. Use in both the F<commitinfo>
and L<loginfo> files, or else it won't work at all!

=item -M <size>

Max length for email messages, in kilobytes. If an email greater than this
size would be sent then an error message is printed to the user's terminal,
instead. This option is useful if your repository contains large binary files
not prevented from be diffed by C<-B>, or when adding many files at once.
In those cases, failing to use this option may result in broken mail clients.

=item -V

Include the old and new revision numbers after each file listed in the email.

=item -H

Generate HTML emails. The Content-Type header will be set to "text/html" and
some basic HTML formatting tags used for the display of the commit message.

=item -C <charset>

Character set to be used in the Content-Type header. Defaults to "UTF-8". If
much of the content in your repository is encoded in a character set
incompatible with UTF-8, then set this option for a more appropriate character
set.

=item -w <url>

Include links to specified ViewVC or CVSWeb URL for the diffs for each file.
Most useful with the C<-H> option.

=item -B <list>

Binary file extension list. C<activitymail> does its best to prevent binary
files from being diffed by using the Perl C<-B> operator to check for binary
files. However, this approach doesn't catch all binary files. If you find that
C<activitymail> is diffing binary files, use this option to specify a quoted,
space-delimited list of file name extensions on the binary files that you want
C<activitymail> to skip.

=item -I <regexen>

A quoted, space-delimited list of regular expressions identifying the files to
include in the processing of the commit message. Use C<\s> in place of literal
spaces. The file name checked by the regular expression will be relative to
the CVS module root. Cannot be used in combination with C<-E>.

=item -E <regexen>

A quoted, space-delimited list of regular expressions identifying the files to
exclude from the processing of the commit message. Use C<\s> in place of
literal spaces. The file name checked by the regular expression will be
relative to the CVS module root. Cannot be used in combination with C<-I>.

=item -U

Older versions of CVS had a bug that prevented them from properly F<diff>ing
when they were passed arguments to be passed to C<diff> with spaces in them.
Since we use the C<-L> option to F<diff> to pass in the complete file name to
be put into the diff headers, this can lead to problems. So if you're using an
older version of CVS that exhibits this problem (prior to 1.12, if I recall
correctly), use this option to replace any spaces in file names with
underscores before passing them off to C<diff>.

=item -q

Quiet mode. Status messages will be suppressed. Debug messages will still be
output if C<-D> is enabled.

=item -Q

Very quiet mode. In addition to the status messages suppressed by C<-q>, C<-Q>
will also suppress the message output when an email won't be sent because of a
size limitation set by C<-m>. Debug messages will still be output if C<-D> is
enabled.

=item -h

Print usage statement. It's a simplified version of this section of the docs,
intended to remind the user of all the options. Be sure to read the detailed
descriptions here, first.

=item -D

Enables debug mode. This will trigger lots of output. All activitymail debug
messages will start with the string "@@@@@@@@ activitymail debug:" so that
they can be spotted easily.

=back

=head1 MODES

=head2 Standard Mode

In this mode, a notification message will be sent for every directory affected
by a single commit to the repository. This could be a lot of messages if
you've made a lot of changes, and is thus highly redundant.

To use it, all you need to cimply add a call to C<activitymail> to your
F<loginfo> file with (at minimum) the C<-f>, C<-P> (for CVS 1.12 and later),
and C<-t> options:

  DEFAULT $CVSROOT/CVSROOT/activitymail -P %p -f '%{sVv}' -t admins@example.com

To mimic the behavior of syncmail, add the -d option to append diffs:

  DEFAULT $CVSROOT/CVSROOT/activitymail -dP %p -f '%{sVv}' -t admins@example.com

=head2 Commit Mode

This mode takes a just a little more work to put in place, but manages your
email resources much more efficiently. In this mode, C<activitymail> tracks
all the files changed throughout a single commit and sends a single email when
all the changes have been made. This is especially useful in circumstances
where many files have been changed at once. In standard mode, many messages
will be sent, but in commit mode, only one will be sent.

An additional advantge of commit mode is that C<activitymail> will construct a
custom subject for the notification messages. In standard mode, the subject is
simply the contents of the C<-P> and C<-f> options. In commit mode, however,
C<activitymail> will use either the first sentence of the log message, or the
maximum number of words that take up less than 72 characters (including the
C<-m> and/or C<-p> options, if specified). This offers an easy way to see what
was done during the commit based on the context of the beginning of the actual
log message. CVS users thus might want to consider making the first sentence
of their messages (up to the first period) be a brief summary, and the rest of
the message can be a more detailed description of the changes.

To use commit mode, you B<must> place a call to C<activitymail> with the C<-l>
option in your F<commitinfo> file for every repository package you want to
manage in commit mode. Usually, that's everything, so you can just use the
line (as long as you have no other lines -- see L<cvs> for more information):

  DEFAULT $CVSROOT/CVSROOT/activitymail -l

Then, you'll need to add a second call to C<activitymail> to your F<loginfo> file
for the same repository packages as in the F<commitinfo> file's call to
C<activitymail>. A convenient line for this purpose (even if you have other log
filters in place) is the ALL line:

  ALL $CVSROOT/CVSROOT/activitymail -cf '%{sVv}' -t admins@example.com

The DEFAULT line will work equally well. Perhaps you want to have mail sent to
different addresses for different repository packages. see L<cvs> for more
information on the F<loginfo> file syntax.

If you'd like to see diffs for the changes for any particular commit, add the
C<-d> option. All of the changes to the repository will be recorded in diff
format and appended to the end of the message:

  ALL $CVSROOT/CVSROOT/activitymail -cdf '%{sVv}' -t admins@example.com

Better still, have the diffs added to the message as attachments by adding the
-a option.

  ALL $CVSROOT/CVSROOT/activitymail -cdaf '%{sVv}' -t admins@example.com

Finally, If you commit to CVS via C<:pserver:>, you should use the -g option
to get C<activitymail> to properly group all of the CVS activity for a commit. By
default, C<activitymail> uses the value returned by C<pgrp> to determine what's
part of a single commit action and what's another action. However, in
C<:pserver:> mode, C<pgrp> always returns the same value. The solution is to
use the C<$USER> environment variable to group the CVS activity. The
assumption is that a single user will not be doing two separate commits at the
same time, so this should work fine. Note that if you use the -g option, you
B<must> use it in both the F<commitinfo> file:

  DEFAULT $CVSROOT/CVSROOT/activitymail -lg

And the F<loginfo> file:

  ALL $CVSROOT/CVSROOT/activitymail -cdagf '%{sVv}' -t admins@example.com

=head1 KNOWN ISSUES

=over

=item *

This program depends on the presence of several modules that are distributed
standard with Perl. They are L<Getopt::Std|Getopt::Std>,
L<File::Basename|File::Basename>, and L<File::Spec|File::Spec>. If either
Getopt::Std or File::Basename isn't present, C<activitymail> won't run. If
File::Spec isn't installed (not uncommon, since it's a fairly recent addition
to Perl -- SourceForge, for example, doesn't have it as of this writing),
C<activitymail> will assume very simple Unix semantics for creating file names,
and will assume that C</tmp> is the temp directory.

=item *

The default C<diff> on SunOS 5.9 does not like the C<-L> option that
C<activitymail> uses. The workaround is to install GNU C<diff>.

=back

=head1 TO DO

=over 4

=item *

Change the way diffs are aggregated for messages so that we're not loading
them all up into memory, but passing them directly to sendmail instead. This
should dramatically lower the amount of memory C<activitymail> takes up during a
large commit.

=back

=head1 BUGS

Please send bug reports to <bug-activitymail@rt.cpan.org> or report them via
the CPAN Request Tracker at
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=activitymail>.

=head1 AUTHOR

David Wheeler <david@justatheory.com>

=head1 SEE ALSO

=over 4

=item L<SVN::Notify|SVN::Notify>

This is a port of C<activitymail> to subversion. Only it's a B<lot> better.
Check it out!

=item CVSspam

Ruby-powered CVS notification. Includes colored HTML representations of diffs
right in the email. L<http://www.badgers-in-foil.co.uk/projects/cvsspam/>.

=item syncmail

Python-powered CVS notification. Sends emails with diffs for every directory
in a single commit. Popular on
SourceForge. L<http://sourceforge.net/projects/cvs-syncmail>.

=item commit_prep & log_accum

The original Perl 4-powered CVS notification applications.
L<http://ccvs.cvshome.org/source/browse/ccvs/contrib/log_accum.in>,
L<http://ccvs.cvshome.org/source/browse/ccvs/contrib/commit_prep.in>.

=back

=head1 SUPPORT

This module is stored in an open repository at the following address:

L<http://github.com/theory/activitymail/tree/>

Patches against Widget::Meta are welcome. Please send bug reports to
<bug-activitymail@rt.cpan.org>.

=head1 COPYRIGHT AND LICENSE

Copyright (c) 2002-2009, David Wheeler. Some Rights Reserved.

This program is free software; you can redistribute it and/or modify it under
the same terms as Perl.

=begin comment

=pod SCRIPT CATEGORIES

VersionControl/CVS

UNIX/System_administration

=pod OSNAMES

any

=pod README

This program may be used for sending email messages for CVS repository
activity. Messages can be sent for every repository change, or for every
commit. In the latter case, a list of all the files affected may be assembled
and listed or attached to the single message.

=end comment

=cut