import re def pc(content, keep_citations=None, keep_body=None, config=None): if config is None: config = aconfig if keep_citations is None: keep_citations = config["MAILGW_KEEP_QUOTED_TEXT"] if keep_body is None: keep_body = config["MAILGW_LEAVE_BODY_UNCHANGED"] eol = config["MAILGW_EOL_RE"] signature = config["MAILGW_SIGN_RE"] original_msg = config["MAILGW_ORIGMSG_RE"] # strip off leading carriage-returns / newlines i = 0 for i in range(len(content)): if content[i] not in '\r\n': break if i > 0: sections = config["MAILGW_BLANKLINE_RE"].split(content[i:]) else: sections = config["MAILGW_BLANKLINE_RE"].split(content) print 'Sections:\n', '\n----------\n'.join(sections),'\n\n' # extract out the summary from the message summary = '' l = [] for section in sections: #section = section.strip() if not section: continue lines = eol.split(section) if (lines[0] and lines[0][0] in '>|') or (len(lines) > 1 and lines[1] and lines[1][0] in '>|'): # see if there's a response somewhere inside this section (ie. # no blank line between quoted message and response) for line in lines[1:]: if line and line[0] not in '>|': # breaking here messes with console section snippets # as they tend to form section where all lines start with # '>>>' #continue break else: # we keep quoted bits if specified in the config if keep_citations: l.append(section) continue # keep this section - it has reponse stuff in it lines = lines[lines.index(line):] section = '\n'.join(lines) # and while we're at it, use the first non-quoted bit as # our summary summary = section if not summary: # if we don't have our summary yet use the first line of this # section summary = section elif signature.match(lines[0]) and 2 <= len(lines) <= 10: # lose any signature break elif original_msg.match(lines[0]): # ditch the stupid Outlook quoting of the entire original message break # and add the section to the output l.append(section) # figure the summary - find the first sentence-ending punctuation or the # first whole line, whichever is longest sentence = re.search(r'^([^!?\.]+[!?\.])', summary) if sentence: sentence = sentence.group(1) else: sentence = '' first = eol.split(summary)[0] summary = max(sentence, first) # Now reconstitute the message content minus the bits we don't care # about. if not keep_body: content = '\n\n'.join(l) return summary, content aconfig = {} aconfig["MAILGW_LEAVE_BODY_UNCHANGED"] = False aconfig["MAILGW_KEEP_QUOTED_TEXT"] = True aconfig["MAILGW_EOL_RE"] = r"[\r\n]+" aconfig["MAILGW_BLANKLINE_RE"] = r"[\r\n]+\s*[\r\n]+" aconfig["MAILGW_SIGN_RE"] = "^[>|\s]*-- ?$" aconfig["MAILGW_ORIGMSG_RE"] = "^[>|\s]*-----\s?Original Message\s?-----$" for key, val in aconfig.items(): if isinstance(val, str): aconfig[key] = re.compile(val) bug = "> Quote\n\nUnquote\n\ncode snippet:\n\n>>> d = {}\n>>> d[1] = 'a'\n>>> d[1.0] = 'b'\n>>> d[1]\n'b'" print "Original:\n%s\n\n" % bug print "MailGW version:\n%s" % pc(bug, config=aconfig)[1]