Home > Articles > Web Services > XML

  • Print
  • + Share This
This chapter is from the book

This chapter is from the book

14.9 | -Source Code for the xMail Application

CD-ROM reference=14018.txt

"""
xMail
Convert mailboxes to a simple XML form for
e-mail messages.
XML Processing with Python
Sean Mc Grath
The Eudora e-mail client stores e-mail messages in mailboxes.
The file format is plain text. Individual messages are separated by the string "From ???@???".
This program processes a mailbox creating an XML file
that conforms to the xmail DTD.
"""
# Import some standard modules
# rfc822 is the module for e_mail header parsing
import string,rfc822,StringIO
LINUX  = 0
EUDORA = 1
def XMLEscape(s):
      """
      Escape XMLs two special characters which may
      occur within an e-mail message.
      """
      s = string.replace(s,"&","&")
      s = string.replace(s,"<","&lt;")
      return s
def ProcessMessage(lines,out):
      """
      Given the lines that make up an e-mail message,
      create an XML message element. Uses the rfc822
      module to parse the e_mail headers.
      """
      out.write("<message>\n")
      # Create a single string from these lines.
      MessageString = string.joinfields(lines,"")
      # Create a file object from the string for use
      # by the rfc822 module.
      fo = StringIO.StringIO(MessageString)
      m = rfc822.Message (fo)
      # The m object now contains all the headers.
      # The headers can be accessed as a Python dictionary.
      out.write("<headers>\n")
      for (h,v) in m.items():
            out.write("<field>\n")
            out.write("<name>%s</name>\n" % XMLEscape(h))
            out.write("<value>%s</value>\n" % XMLEscape(v))
            out.write("</field>\n")
      out.write("</headers>\n")
      out.write("<body>\n")
      out.write(XMLEscape(fo.read()))
      out.write("</body>\n")
      out.write("</message>\n")

def DoEudoraMailBox(MailBox):
      """
      -Given a Eudora mailbox, convert its contents to XML conforming to the xmail DTD.
      """
      f = open (MailBox,"r")
      l = f.readline()[:_1]
      if string.find(l,"From ???@???")==_1:
            -# Sentinel that separates e-mail messages in the  # Eudora mbx notation.
            print 'Expected mailbox "%s"' % MailBox,
            Print 'to start with "From ???@???"'

            return
      if MailBox[-4:] != ".mbx":
            -print "Expected mailbox to have .mbx file    extension", MailBox
            return
      # Output file has same base name but .xml extension.
      out = open(MailBox[:-3]+"xml","w")
      out.write ('<?xml version="1.0"?>\n')
      out.write ('<!DOCTYPE xmail SYSTEM "xmail.dtd">\n')
      out.write ('<xmail>\n')
      Message = []
      l = f.readline()
      while l:
            if string.find(l,"From ???@???")!=-1:
                  # Full message accumulated in the Message list,
                  # so process it to XML.
                  ProcessMessage(Message,out)
                  Message = []
            else:
                  # Accumulate e-mail contents line by line in
                  # Message list.
                  Message.append (l)
            l = f.readline()
      if Message:
            # Last message in the mailbox
            ProcessMessage(Message,out)
      out.write ('</xmail>\n')
      f.close()
      out.close()

def DoLinuxMailBox(MailBox):
      """
      -Given a Unix mbox style mailbox, convert its contents to XML conforming to the xmail DTD.
      """
      f = open (MailBox,"r")
      l = f.readline()[:_1]
      if string.find(l,"From ")!=0:
            -print 'Expected mailbox "%s" to start with "From "' % MailBox
            return
      # Output file has same name as mailbox but with ".xml" added.
      out = open(MailBox+".xml","w")
      out.write ('<?xml version="1.0"?>\n')
      out.write ('<!DOCTYPE xmail SYSTEM "xmail.dtd">\n')
      out.write ('<xmail>\n')
      Message = []
      l = f.readline()
      while l:
            if string.find(l,"From ")==0:
                  # Full message accumulated in the Message list,
                  # so process it to XML.
                  ProcessMessage(Message,out)
                        Message = []
            else:
                  # Accumulate e_mail contents line by line in
                  # Message list.
                  Message.append (l)
            l = f.readline()
      if Message:
            # Last message in the mailbox
            ProcessMessage(Message,out)
      out.write ('</xmail>\n')
      f.close()
      out.close()

if __name__=="__main__":
      import sys,getopt
      format = LINUX
      (options,remainder) = getopt.getopt (sys.argv[1:],"le")
      for (option,value) in options:
            if option == "-l":
                  format = LINUX
            elif option == "-e":
                  format = EUDORA
      if len(remainder)!=1:
            print "Usage: %s -l|-e mailbox" % sys.argv[0]
            sys.exit()
      if format==EUDORA:
            DoEudoraMailBox(remainder[0])
      elif format==LINUX:
            DoLinuxMailBox(remainder[0])
  • + Share This
  • 🔖 Save To Your Account