Sophie

Sophie

distrib > Mandriva > 2010.0 > i586 > media > contrib-release > by-pkgid > 43bb1dd9140ac2a3090c0de3bad20e81 > files > 10

spamprobe-1.4d-7mdv2010.0.i586.rpm

#!/usr/bin/ruby
#
# A script to read an mbox of spams and perform a test on each to
# see how well the filter can score spams as they arrive.  After
# each message is score it is classified as spam and the next
# is scored.  This is intended to simulate normal use of the
# filter with incoming email.
#

if ARGV.length != 3
  STDERR.printf("usage: incrtest filename mail_type spamprobe\n")
  exit 1
end

class ScoreKeeper
  def initialize
    @goods = 0.to_f
    @spams = 0.to_f
    @classifies = 0
  end

  def isWrongType(score, mailtype)
    if score >= 0.9
      return mailtype == "good"
    else
      return mailtype == "spam"
    end
  end

  def addScore(score)
    if score >= 0.9
      @spams += 1
    else
      @goods += 1
    end
  end

  def addGood
    @goods += 1
  end

  def addSpam
    @spams += 1
  end

  def total
    @goods + @spams
  end

  def percentage
    100.0 * @spams / (@goods + @spams)
  end

  def printSummary
    printf("GOOD %10d   SPAM %10d  TOTAL  %10d  PCT %10.2f (%d errors)\n", @goods, @spams, total, percentage, @classifies)
  end

  def processMail(baseargs, mailtype, tmpname)
    score = 0
    args = baseargs + " score " + tmpname
    IO.popen(args, "r") do |f|
      a = f.readlines
      addScore(a[0].to_f)
    end
    if isWrongType(score, mailtype)
      @classifies += 1
      system(baseargs + " " + mailtype + " " + tmpname)
    end
  end
end


tmpname = '_temp'
filename = ARGV.shift
mailtype = ARGV.shift
baseargs = ARGV.shift

score_keeper = ScoreKeeper.new
on_blank = true
first = true
tmp = File.open(tmpname, "w")
File.open(filename).each do |line|
  if on_blank and line =~ /^From\s+\S+\s+\w\w\w\s+\w\w\w\s+\d\d?\s+\d\d\s*:\s*\d\d\s*:\s*\d\d\s+\d\d\d\d/
    on_blank = false
    if first
      first = false
    else
      tmp.close
      score_keeper.processMail(baseargs, mailtype, tmpname)
      tmp = File.open(tmpname, "w")
    end
  elsif line.length == 1
    on_blank = true
  end
  tmp.print(line)
end
tmp.close
score_keeper.processMail(baseargs, mailtype, tmpname)
score_keeper.printSummary