You are here

Converting MBOX to ZIP

Task: Converting an MBOX format export of a mailbox into a ZIP file containing each message as a file named after the message-id of the email message. Every e-mail client worth a pinch of salt can export messages, or a mailbox, to an MBOX file.

import mailbox
import zipfile
from email.Parser import Parser
from email.generator import Generator
from tempfile import NamedTemporaryFile

def get_scratch_file():
    tmp = NamedTemporaryFile(
        mode='w+b',
        suffix='.data',
    )
    return tmp

if __name__ == '__main__':

    mbox = mailbox.mbox('mailbox.mbox')  # The MBox file to read
    wfile = open('mailbox.zip', 'wb')  # The ZIP file to create

    zfile = zipfile.ZipFile(wfile, 'a', compression=zipfile.ZIP_DEFLATED, )

    messages = dict()
    counter = 0
    for message in mbox:
        counter += 1
        message_id = message['Message-ID'].strip()[1:-1]  # remove the beginning "<"  & ">" from the Message-ID
        filename = '{0}-{1}.mbox'.format(counter, message_id, ).replace('/', '')  # remove any filesystem separators from the Message-ID
        print(filename)
        sfile = get_scratch_file()
        g = Generator(sfile, mangle_from_=False, maxheaderlen=255, )
        g.flatten(message)
        sfile.flush()
        sfile.seek(0)
        zfile.write(sfile.name, arcname=filename, )
        sfile.close()

    zfile.close()
    wfile.close()

Theme by Danetsoft and Danang Probo Sayekti inspired by Maksimer