使用 Python 提取 eml 格式邮件的收件人、发件人、发件时间等信息,并提取保存邮件附件。
eml文件是Outlook邮件的文件格式。我们可以在 Foxmail邮件客户端 或 网易邮件大师 等邮件客户端中,右键指定的邮件,选择“导出邮件”,即可将web邮件到处为本地eml格式的邮件文件。
执行如下python脚本即可批量提取eml邮件中收件人、发件人、附件等信息。
import email
import os
import re
import sys
import logging
from logging import handlers
from email.utils import parseaddr
class Logger(object):
level_relations = {
'debug':logging.DEBUG,
'info':logging.INFO,
'warning':logging.WARNING,
'error':logging.ERROR,
'crit':logging.CRITICAL
}
def __init__(self,filename,level='info',when='D',backCount=3,fmt=''):
self.logger = logging.getLogger(filename)
self.logger.setLevel(self.level_relations.get(level))
sh = logging.StreamHandler()
th = handlers.TimedRotatingFileHandler(filename=filename,when=when,backupCount=backCount,encoding='utf-8')
self.logger.addHandler(sh)
self.logger.addHandler(th)
#获取eml附件信息
def Get_Annex_Message(FilePath, Annex_Path, filename):
global sum #防止附件重复
try:
fp = open(FilePath, 'rb')
msg = email.message_from_binary_file(fp)
print_info(msg)
for part in msg.walk():
if part.get_content_maintype() == 'multipart':
continue
Annex_name = part.get_filename()
if Annex_name:
content_type = part.get_content_type()
Annex_name = re.sub("[\s+\!\/_,$%^*(+\"\']+|[+?=*]+".encode('utf-8').decode("utf8"), "".encode('utf-8').decode("utf8"),Annex_name) #去掉违法符号
if content_type == 'application/x-zip-compressed':
Annex_name = str(sum) + '.' + Annex_name + '.zip'
else:
Annex_name = str(sum) + '.' + Annex_name
log.logger.info("File\t: %s" %(Annex_name))
try:
fp = open(os.path.join(Annex_Path, Annex_name), 'wb')
fp.write(part.get_payload(decode=True))
except Exception as ee:
error.logger.error(ee)
return
sum += 1
except Exception as e:
error.logger.error(e)
return
#递归文件夹下所有文件
def List_Filepath(Eml_Path, Annex_Path):
for parent,dirnames,filenames in os.walk(Eml_Path):
for dirname in dirnames:
List_Filepath(dirname, Annex_Path)
for filename in filenames:
log.logger.info("-----------------------------")
log.logger.info("Name\t: %s" %(filename))
FilePath = os.path.join(parent,filename)
Get_Annex_Message(FilePath, Annex_Path, filename)
#创建目的文件夹
def Create_Dir(Annex_Path):
if os.path.exists(Annex_Path):
error.logger.error("Dir exists, Annex file will create in %s" % Annex_Path)
else:
os.mkdir(Annex_Path)
#主函数
def main():
global sum
sum = int(1)
Eml_Path = sys.argv[1] #第一个参数为eml所在文件夹
Annex_Path = sys.argv[2] #第二个参数为eml附件输出的路径
Create_Dir(Annex_Path) #创建保存附加的文件夹
List_Filepath(Eml_Path, Annex_Path)
#解析邮件关键字段
def print_info(msg,indent=0):
if indent == 0:
send_email_date = ''
send_email_from = []
send_email_to = []
send_email_cc = []
for header in ['Date', 'From', 'To', 'Cc']:
value = msg.get(header, '')
if value:
if header == 'Date':
send_email_date = value
value01 = value.split(', ')
for item in value01:
emailRealName, emailAdr = parseaddr(item)
if header == 'From':
send_email_from.append(emailAdr)
elif header == 'To':
send_email_to.append(emailAdr)
elif header == 'Cc':
send_email_cc.append(emailAdr)
else:
pass
log.logger.info("Data\t: %s" %(send_email_date))
log.logger.info("From\t: %s" %(send_email_from))
log.logger.info("Recv\t: %s" %(send_email_to))
log.logger.info("Copy\t: %s" %(send_email_cc))
#入口函数
if __name__ == "__main__":
log = Logger('eml.log',level='debug')
error = Logger('error.log', level='error')
main()
使用说明:
- Python 版本:3.8
- 运行格式 >python eml.py [邮件所在目录] [要存放附件的目录]
- 生成的日志 eml.log 文件会存放在命令行当前路径