Sophie

Sophie

distrib > Mageia > 1 > i586 > by-pkgid > 40c756a8011f8b53c82f133567c4dbff > files > 501

mhonarc-2.6.16-6.mga1.noarch.rpm

#!/usr/bin/perl
##---------------------------------------------------------------------------##
##  File:
##	$Id: mha-preview,v 1.4 2005/07/05 02:06:21 ehood Exp $
##  Author:
##      Earl Hood       earl@earlhood.com
##  Description:
##      Custom MHonArc-based program that supports $X-MSG-PREVIEW$
##	resource variable using the callback API.
##
##	Invoke program with -man option to see manpage.
##---------------------------------------------------------------------------##
##    Copyright (C) 2002,2005	Earl Hood, earl@earlhood.com
##    This program is free software; you can redistribute it and/or modify
##    it under the same terms as MHonArc itself.
##---------------------------------------------------------------------------##

# Uncomment and modify the following if MHonArc libraries were not
# installed in a perl's site directory or in perl's normal search path.
#use lib qw(/path/to/mhonarc/libraries);

package MHAPreview;

use Getopt::Long;

# Max size of preview text: This is the maximum amount that will be
# saved for each message.  The resource variable length modifier can
# be used to always display less than max, but it is best to avoid
# doing that since it is a slow operation.  We have a custom command-line
# option to set the max size if code change is not desired.
my $PreviewLen = 256;

##-----------------------------------------------------------------------##
##  Main Block
##-----------------------------------------------------------------------##

MAIN: {
    unshift(@INC, 'lib');	# Should I leave this line in?

    ## Grab options from @ARGV unique to this program
    my %opts = ( );
    Getopt::Long::Configure('pass_through');
    GetOptions(\%opts,
      'prv-maxlen=i',
      'help',
      'man'
    );
    usage(1) if $opts{'help'};
    usage(2) if $opts{'man'};

    if ($opts{'prv-maxlen'} && ($opts{'prv-maxlen'} > 0)) {
      $PreviewLen = $opts{'prv-maxlen'};
    }

    ## Reset pass-through of options
    Getopt::Long::Configure('no_pass_through');

    ## Initialize MHonArc
    require 'mhamain.pl' || die qq/ERROR: Unable to require "mhamain.pl"\n/;
    mhonarc::initialize();

    ## Register callbacks for handling preview text
    register_callbacks();

    ## Process input.
    mhonarc::process_input() ? exit(0) : exit($mhonarc::CODE);
}

##-----------------------------------------------------------------------##
##  Callback Functions
##-----------------------------------------------------------------------##

sub register_callbacks {
  $mhonarc::CBMessageBodyRead = \&msg_body_read;
  $mhonarc::CBRcVarExpand     = \&rc_var_expand;
  $mhonarc::CBDbSave	      = \&db_save;
}

sub msg_body_read {
  my($fields, $html, $files) = @_;
  my $mha_index = $fields->{'x-mha-index'};
  my $preview = extract_preview($html, $PreviewLen);
  $X_MessagePreview{$mha_index} = $preview;
  1;
}


sub rc_var_expand {
  my($mha_index, $var_name, $arg_str) = @_;

  # $X-MSG-PREVIEW(mesg_spec)$
  if ($var_name eq 'X-MSG-PREVIEW') {
    # Use MHonArc function to support a mesg_spec argument
    my ($lref, $key, $pos, $opt) =
	mhonarc::compute_msg_pos($mha_index, $var_name, $arg_str);
    return ($X_MessagePreview{$key}||"", 0, 1);
  }

  # If we do not recognize $var_name, make sure to tell
  # MHonArc we do not so it will try.
  (undef, 0, 0);
}


sub db_save {
  my($db_fh) = @_;
  # Make sure variable is package qualified!
  mhonarc::print_var($db_fh, 'MHAPreview::X_MessagePreview',
		     \%X_MessagePreview);
}

##-----------------------------------------------------------------------##
##  Support Functions
##-----------------------------------------------------------------------##

sub extract_preview {
  # Extracting the preview text of the message body is not as
  # trivial as you may expect.  We have to deal with HTML tags
  # and entity references, but want to avoid the overhead of
  # using a full-blown HTML parser.  We also want to skip any
  # quoted text, otherwise preview text of replies would mainly
  # contain quoted text, making preview less useful.

  my $html_ref = shift;	# reference to HTML message body
  my $prev_len = shift;	# length of preview to extract

  # Make copy since we will be pre-process data to make extraction easier
  my $html = $$html_ref;

  # Normalize EOLs to make other patterns simplier
  $html =~ s/\r\n/\n/g;
  # Strip out quoting using <blockquote> (for flowed and/or fancy-quoting)
  $html =~ s/<blockquote[^>]*>.*?<\/blockquote\s*>//gis;
  # Strip tags
  $html =~ s/<[^>]*>//g;
  # Quoting using > and other common styles
  $html =~ s/^(?:&gt;|[\|:\+]).*$//gm;
  # Outhouse method of quoting
  $html =~ s/^-----Original Message-----.*\Z//;
  # Remove signatures
  $html =~ s/\n-- \n.*\z//s;
  # Preamble side comments
  $html =~ s/\A(?:\s*\[[^\]]*\])+//;
  # Common quote preambles
  $html =~ s/\A\s*In\s+article.*?(?:wrote|writes|said|says):[^\S\n]*\n//si;
  $html =~ s/\A.*(?:wrote|writes|said|says):[^\S\n]*\n//si;
  # Minimize whitespace
  $html =~ s/\s+/ /g;

  my $text = "";
  my $html_len = length($html);
  my($pos, $sublen, $erlen, $real_len);
  
  for ( $pos=0, $sublen=$prev_len; $pos < $html_len; ) {
    $text .= substr($html, $pos, $sublen);
    $pos += $sublen;

    # check for clipped entity reference
    while (($pos < $html_len) && ($text =~ s/\&[^;]*\Z//)) {
      $text .= substr($html, $pos, 1);
      ++$pos;
    }

    # compute entity reference lengths to determine "real" character
    # count and not raw character count.
    $er_len = 0;
    while ($text =~ /(\&[^;]+);/g) {
      $er_len += length($1);
    }

    # done if we have enough
    $real_len = length($text)-$er_len;
    if ($real_len >= $prev_len) {
      if ($real_len < $html_len) {
	$text .= '...';
      }
      last;
    }

    $sublen = $prev_len - (length($text)-$er_len);
  }

  $text;
}

sub usage {
  require Pod::Usage;
  my $verbose = shift;
  if ($verbose == 0) {
    Pod::Usage::pod2usage(-verbose => $verbose);
  } else {
    my $pager = $ENV{'PAGER'} || 'more';
    local(*PAGER);
    my $fh = (-t STDOUT && open(PAGER, "|$pager")) ? \*PAGER : \*STDOUT;
    Pod::Usage::pod2usage(-verbose => $verbose,
                          -output  => $fh);
  }
  exit 0;
}

##-----------------------------------------------------------------------##
__END__

=head1 NAME

mha-preview - MHonArc front-end to support message preview variable

=head1 SYNOPSIS

S<B<mha-preview> [I<options>] [I<arguments>]>

=head1 DESCRIPTION

B<mha-preview> is an example program the utilizes MHonArc's callback
API to support the special resource variable C<$X-MSG-PREVIEW$>.
The C<$X-MSG-PREVIEW$> represents the initial text of a message body.
With this variable, index pages can contain be customized to give
a listing like some MUAs that provide a glimpse of the message body
in the mail listing of a mail folder.

When extracting the preview text of the message body, all HTML tags
are removed and whitespace is compressed.

B<CAUTION>: If B<mha-preview> is used for an archive, it should
always be used to process the archive.  Otherwise, the message preview
data will be lost.

=head1 OPTIONS

B<mha-preview> takes the same options available to B<mhonarc> along
with the following additional options:

=over

=item C<-help>

Print a usage summary of this program (this option overrides
B<mhonarc>'s C<-help> option).

=item C<-man>

Print the manpage for this program.

=item C<-prv-maxlen>

Maximum amount of characters of the message body to store for each
message.  The default value is 256.

=back

=head1 NOTES

=over

=item *

The functionality of this program could be placed into the
C<mhasiteinit.pl> library to avoid the need for this program and
to make it part of the locally installed B<mhonarc>.  This would
avoid the problem noted in the CAUTION mentioned
in the L<DESCRIPTION section|"DESCRIPTION">.

=item *

The body preview resource variable may be worth putting into the
MHonArc code base directly.

=back

=head1 SEE ALSO

mhonarc(1)

=head1 LICENSE

B<mha-preview> comes with ABSOLUTELY NO WARRANTY and can be distributed
under the same terms as MHonArc itself.

=head1 AUTHOR

Earl Hood, earl@earlhood.com

=cut