CREATE TABLE `posts` ( `post_id` int(11) NOT NULL, `user_id` int(11) DEFAULT NULL, `post_time` int(11) DEFAULT NULL, `topic_id` int(11) DEFAULT NULL, `post_text` text, `rtf_file` varchar(45) DEFAULT NULL, PRIMARY KEY (`post_id`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
CREATE TABLE `users` ( `id` int(11) NOT NULL, -- id `mail` varchar(45) DEFAULT NULL, --ExchangeUserAddressEntry `exmail` varchar(100) DEFAULT NULL, --exchange , `exist` tinyint(1) DEFAULT NULL, -- , . . . `name` varchar(100) DEFAULT NULL, -- `inab` tinyint(1) DEFAULT NULL, -- PRIMARY KEY (`id`), UNIQUE KEY `mail_UNIQUE` (`mail`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
CREATE TABLE `topics` ( `id` int(11) NOT NULL, `title` varchar(255) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `title_UNIQUE` (`title`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
CREATE TABLE `attachments` ( `filename` varchar(45) NOT NULL, -- `name` varchar(255) DEFAULT NULL, -- `post_id` int(11) DEFAULT NULL, --id `att_id` int(11) DEFAULT NULL, --id PRIMARY KEY (`filename`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
CREATE TABLE `garbage` ( `id` int(11) NOT NULL, `rtf_file` varchar(45) DEFAULT NULL, -- post.rtf `class` int(11) DEFAULT NULL, -- Outlook Item class, PRIMARY KEY (`id`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
import win32com.client # Outlook.Application import pymysql.cursors # mysql # COM AddressEntries object = win32com.client.Dispatch("Outlook.Application") ns = object.GetNamespace("MAPI") als = ns.AddressLists gal = als.Item("Global Address List") ent = gal.AddressEntries # mysql cnx = pymysql.connect(use_unicode=True, charset='utf8',user='outlook', password='password', host='server',database='outlook') cursor = cnx.cursor() id = 0 for rec in ent: id += 1 exmail = rec.Address # ExchangeUserAddressEntry name = rec.Name # mail = rec.PropertyAccessor.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x39FE001E") # «account@domain.com» cursor.execute("INSERT INTO users (id, mail, exmail, name, inab) VALUES (%s, %s, %s, %s, 1);", (id, mail,exmail,name)) # inab =1, . . cursor.close() cnx.close()
import win32com.client import pymysql.cursors object = win32com.client.Dispatch("Outlook.Application") ns = object.GetNamespace("MAPI") tf = ns.GetFolderFromID('<EntryID>') # EntryID i = 0 tmp = tf.items # / tmp.sort('[ReceivedTime]',False) # cnx = pymysql.connect(use_unicode=True, charset='utf8',user='outlook', password='password', host='server',database='outlook') cursor = cnx.cursor() for aaa in tmp: i +=1 rtf_file = "post_%d.rtf" %i # rtf if (aaa.Class == 45) or (aaa.Class == 43): # postitem mailitem aaa.SaveAs('c:\\temp\\low\\store\\%s' %rtf_file ,1) #Save as rtf # , "c:\temp\low\store\" for ac in range(1,aaa.Attachments.Count,1): if aaa.Attachments.Item(ac).Type <> 6: # , OLE document, unrtf name = aaa.Attachments.Item(ac).FileName ext = name.split('.')[-1] filename = 'att_%d_%d.%s' %(i,ac,ext) aaa.Attachments.Item(ac).SaveAsFile('c:\\temp\\low\\store\\'+filename) cursor.execute("INSERT IGNORE INTO attachments (filename, name, post_id, att_id) VALUES (%s, %s, %s, %s);" ,(filename, name,i,ac)) # exmail = aaa.SenderEmailAddress name = aaa.SenderName #, users, , user_id cursor.execute("SELECT id FROM users WHERE exmail = '%s' UNION SELECT max(id)+1 FROM users;" %(exmail)) res = cursor.fetchall() if len(res) == 2: user_id = res[0][0] cursor.execute("UPDATE users SET exist=1 WHERE id=%s;",user_id) #, elif len(res) == 1: # , inab=0, . . user_id = res[0][0] mail = exmail.split('=')[-1] if '@' not in mail: mail = mail+'@not.exist' # - AD cursor.execute("INSERT INTO users (id,exist, exmail,mail, name, inab) VALUES (%s,1, %s, %s, %s, 0);" ,(user_id, exmail,mail,name)) # topic = aaa.ConversationTopic # outlook ConversationTopic tq = """set @mmm = (SELECT IFNULL(max(id), 0)+1 FROM topics); INSERT IGNORE INTO topics(id,title) values (@mmm,%s);""" cursor.execute(tq,topic) # # posts cursor.execute("SELECT id FROM topics WHERE title = %s;",topic) topic_id =cursor.fetchall()[0][0] post_time = int(aaa.ReceivedTime) cursor.execute("INSERT IGNORE INTO posts (post_id, user_id, post_time, topic_id, rtf_file) VALUES (%s, %s, %s, %s, %s);",(i,user_id,post_time,topic_id,rtf_file)) else: #Garbage cursor.execute("INSERT IGNORE INTO garbage (id, rtf_file,class) VALUES (%s, %s,%s);",(i,rtf_file,aaa.Class)) cursor.close() cnx.close()
#!/usr/bin/python import lxml.etree, pymysql, subprocess, os #lxml # , cpost - post_id. /tmp/2del - , unrtf def parsertf(cpost): p = subprocess.Popen('unrtf /opt/unrtf/store/post_%d.rtf'%cpost, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=True, cwd = '/tmp/2del/') f = p.stdout.read() root = lxml.etree.HTML(f)[1] img_id = 0 for img in root.xpath('//img'): # <img>, img_id += 1 if img.attrib['src'][-4:] == '.wmf': # img src = *.wmf, - . #, outlook cursor.execute('SELECT filename,name FROM attachments WHERE post_id=%s AND att_id=%s;'%(cpost,img_id)) try: res = cursor.fetchall()[0] img.addnext(lxml.etree.fromstring('<a href="/path/%s">%s</a>'%(res[0],res[1]))) # , , except: # , wmf. attname = 'att_%d_%d'%(cpost,img_id)+img.attrib['src'][-4:] subprocess.Popen('mv -f /tmp/2del/%s /opt/unrtf/store/%s'%(img.attrib['src'], attname), shell=True) img.addnext(lxml.etree.fromstring('<a href="/path/%s">%s</a>'%(attname,attname))) img.getparent().remove(img) else: # imgname = 'img_%d_%d'%(cpost,img_id)+img.attrib['src'][-4:] subprocess.Popen('mv -f /tmp/2del/%s /opt/unrtf/store/%s'%(img.attrib['src'], imgname), shell=True) img.attrib['src'] = imgname root.remove(root[0]) # from/to, .. htmltext = lxml.etree.tostring(root) cursor.execute('UPDATE posts SET post_text=%s WHERE post_id=%s;',(htmltext,cpost)) subprocess.Popen('rm -rf /tmp/2del/*', shell=True) return htmltext cnx = pymysql.connect(use_unicode=True, charset='utf8',user='outlook', password='password', host='server',database='outlook') cursor = cnx.cursor() # cursor.execute('SELECT post_id FROM posts;') posts = cursor.fetchall() for cpost in posts: parsertf(cpost[0]) cursor.close() cnx.close()
Source: https://habr.com/ru/post/309712/
All Articles