blob: 31f07b45a6b984f0afe98a099162136997721fee [file] [log] [blame]
#!/usr/bin/python
# Script for watching twistd.log and looking for exceptions, and mailing them
# to an email address
"""%prog [options] dir [dir...]
Scans directories for exceptions in twistd.log files"""
import os
import re
import time
from smtplib import SMTP
from email.mime.text import MIMEText
import email.utils
def find_files(dirs, lasttime):
"""Return a list of twisted log files in directories `dirs` that have been
modified since `lasttime`"""
retval = []
for d in dirs:
for f in os.listdir(d):
if not f.startswith("twistd.log"):
continue
f = os.path.join(d, f)
mtime = os.path.getmtime(f)
if mtime > lasttime:
retval.append((mtime, f))
retval.sort()
return [r[1] for r in retval]
def unauthorized_logins(hostname, excs, name):
import socket
pattern = re.compile("(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d-\d\d\d\d) \[Broker,\d+,(\d+.\d+.\d+.\d+)\]")
unauthorized_hosts = {}
remaining_excs = []
example = ""
for e in excs:
m = pattern.search(e)
if e.find("Failure: twisted.cred.error.UnauthorizedLogin:") >= 0 and m and m.groups:
if unauthorized_hosts.has_key(m.group(2)):
unauthorized_hosts[m.group(2)]['count'] += 1;
if str(m.group(1)) < unauthorized_hosts[m.group(2)]['first']:
unauthorized_hosts[m.group(2)]['first'] = str(m.group(1))
if str(m.group(1)) > unauthorized_hosts[m.group(2)]['most_recent']:
unauthorized_hosts[m.group(2)]['most_recent'] = str(m.group(1))
else:
unauthorized_hosts[m.group(2)] = {}
unauthorized_hosts[m.group(2)]['count'] = 1;
unauthorized_hosts[m.group(2)]['first'] = str(m.group(1))
unauthorized_hosts[m.group(2)]['most_recent'] = str(m.group(1))
try:
unauthorized_hosts[m.group(2)]['hostname'] = socket.gethostbyaddr(m.group(2))[0]
except socket.herror:
unauthorized_hosts[m.group(2)]['hostname'] = "unknown"
if example == "":
example = e
else:
remaining_excs.append(e)
msg = ""
for ip in unauthorized_hosts:
msg += "Count: %d | First instance: %s | Most recent instance: %s | Twistd exception: twisted.cred.error.UnauthorizedLogin - %s %s\n" % (unauthorized_hosts[ip]['count'],
unauthorized_hosts[ip]['first'],
unauthorized_hosts[ip]['most_recent'],
unauthorized_hosts[ip]['hostname'],
ip)
if msg != "":
msg += "Example:\n"
msg += example
return msg, remaining_excs
def format_exceptions(hostname, excs, name):
repeat_pattern_funcs = [
unauthorized_logins,
]
msg = ""
for f in repeat_pattern_funcs:
if excs:
repeat_msg, excs = f(hostname, excs, name)
if repeat_msg != "":
msg += repeat_msg
if excs:
matched_excs = {}
for exc in excs:
lines = exc.split("\n")
key = "\n".join(lines[2:])
parsed_ts = ' '.join(lines[1].split(' ')[:2])
if matched_excs.has_key(key):
matched_excs[key]['count'] += 1
if parsed_ts > matched_excs[key]['most_recent']:
matched_excs[key]['most_recent'] = parsed_ts
continue
if parsed_ts < matched_excs[key]['first']:
matched_excs[key]['first'] = parsed_ts
continue
else:
matched_excs[key] = {}
matched_excs[key]['count'] = 1
matched_excs[key]['example'] = exc
matched_excs[key]['exception'] = lines[-2].strip()
matched_excs[key]['first'] = parsed_ts
matched_excs[key]['most_recent'] = parsed_ts
for exc_key in sorted(matched_excs, key=lambda x: (matched_excs[x]['count'], x), reverse=True):
msg += "\n" + "-" * 80 + "\n"
msg += "Count: %d | First instance: %s | Most recent instance: %s | Twistd exception: %s\n" % (matched_excs[exc_key]['count'],
matched_excs[exc_key]['first'],
matched_excs[exc_key]['most_recent'],
matched_excs[exc_key]['exception'])
msg += "Example:\n"
msg += matched_excs[exc_key]['example']
return msg
def send_msg(fromaddr, emails, hostname, name, msg):
"""Send an email to each address in `emails`, from `fromaddr`
The message will contain the hostname and list of exceptions `excs`"""
s = SMTP()
s.connect()
for addr in emails:
m = MIMEText(msg)
m['date'] = email.utils.formatdate()
m['to'] = addr
m['subject'] = "twistd.log exceptions on %s %s" % (hostname, name)
s.sendmail(fromaddr, [addr], m.as_string())
s.quit()
def parse_time(line):
"""Returns a timestamp from a datestring in the given line"""
m = re.search("(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})", line)
if m:
return time.mktime(time.strptime(m.group(1), "%Y-%m-%d %H:%M:%S"))
return None
class Scanner:
ignore_patterns = [
re.compile(re.escape("Failure: twisted.spread.pb.PBConnectionLost: [Failure instance: Traceback (failure with no frames): <class 'twisted.internet.error.ConnectionLost'>: Connection to the other side was lost in a non-clean fashion.")),
re.compile("schedulers/triggerable.py\", line \d+, in run.*d = self.parent.db.runInteraction\(self._run\).*exceptions.AttributeError: 'NoneType' object has no attribute 'db'", re.M + re.S),
# Ignore errors caused by older buildbot versions on the masters.
re.compile(re.escape("exceptions.AttributeError: BuildSlave instance has no attribute 'perspective_shutdown'")),
# Ignore users cancelling try runs
re.compile(re.escape("Failure: exceptions.RuntimeError")),
# Ignore clean-close "errors" from tegras
re.compile(re.escape("Failure: twisted.spread.pb.PBConnectionLost: [Failure instance: Traceback (failure with no frames): <class 'twisted.internet.error.ConnectionDone'>: Connection was closed cleanly")),
# Ignore stale broker refs we can't do anything about.
re.compile("twisted.spread.pb.DeadReferenceError: Calling Stale Broker"),
# Ignore exceptions triggered by NoneType objects.
re.compile(re.escape("exceptions.AttributeError: 'NoneType' object has no attribute")),
# Ignore PB connect errors.
re.compile(re.escape("Failure: twisted.spread.pb.PBConnectionLost: [Failure instance: Traceback (failure with no frames): <class 'socket.error'>: [Errno 9] Bad file descriptor")),
# Ignore AbandonChain errors
re.compile(re.escape("Failure: buildslave.exceptions.AbandonChain")),
]
def __init__(self, lasttime=0):
# Buffer of unhandled lines
self.lasttime = lasttime
def scan_file(self, f):
"""Scans file `f` for exceptions.
Returns a list of exception logs"""
current_exc = None
retval = []
for line in open(f):
# If we're processing an exception log, append this line to the
# current exception
if current_exc is not None:
# Blank lines mean the end of the exception
if line.strip() == "":
current_exc = "".join(current_exc)
for p in self.ignore_patterns:
if p.search(current_exc):
break
else:
retval.append(current_exc)
current_exc = None
else:
current_exc.append(line)
elif line.strip().endswith("Unhandled Error"):
# Ignore exceptions in this file that are older than
# lasttime
t = parse_time(line)
if not t:
print "Couldn't parse time in", line
elif t > self.lasttime:
current_exc = [line]
# Handle exceptions printed out at the end of the file
if current_exc:
current_exc = "".join(current_exc)
for p in self.ignore_patterns:
if p.search(current_exc):
break
else:
retval.append(current_exc)
return retval
def scan_dirs(self, dirs):
retval = []
new_lasttime = self.lasttime
# Find which files are newer than lasttime
# This won't handle the case where the logs are rotated while this
# script is running. FIXME?
files = find_files(dirs, self.lasttime)
for f in files:
for e in self.scan_file(f):
retval.append("Exception in %s:\n%s" % (f, e))
new_lasttime = max(new_lasttime, os.path.getmtime(f))
self.lasttime = new_lasttime
return retval
if __name__ == '__main__':
from optparse import OptionParser
parser = OptionParser()
parser.add_option("-t", "--timefile", dest="timefile",
help="where to save time of last handled file")
parser.add_option("-e", "--email", dest="emails", action="append",
help="email address to send exceptions to")
parser.add_option("-f", "--fromaddr", dest="fromaddr",
help="From address for email notifications")
parser.add_option("-n", "--name", dest="name", help="Short description")
parser.set_defaults(
emails=[],
fromaddr="reply@not.possible",
)
options, args = parser.parse_args()
if len(args) < 1:
parser.error("Must specify at least one directory to scan")
# Try and get the time we last ran from the timefile
lasttime = 0
if options.timefile:
try:
lasttime = float(open(options.timefile).read())
except:
pass
exceptions = []
s = Scanner(lasttime)
exceptions.extend(s.scan_dirs(args))
if exceptions:
hostname = os.uname()[1]
msg = format_exceptions(hostname, exceptions, options.name)
if options.emails:
send_msg(options.fromaddr, options.emails,
hostname, options.name, msg)
else:
print msg
if options.timefile:
open(options.timefile, "w").write(str(s.lasttime))