#!/usr/bin/env python
##
## Name: washcookies.py
## Purpose: Clean up Safari web cookies.
## Author: M. J. Fromberger
## Info: $Id: washcookies.py 654 2008-08-18 04:41:35Z sting $
##
## This program edits the stored web cookies for the user who runs it,
## discarding any cookies that are not deemed acceptable by the user.
## Acceptability is determined by a list of rules. A rule will either
## Accept a cookie or Deny it. Cookies are discarded if either:
##
## 1. No Keep rule matches the cookie, and either
## 2. Any Deny rule matches the cookie, or
## 3. No Accept rule matches the cookie.
##
## Rules are stored in the file "~/.cookierc". Each line that is not
## blank specifies one rule.
##
## Rules have the following format:
##
## {}+
##
## f -- "+" for Allow, "-" for Deny, "!" for Keep.
## sep -- the separator character for criteria.
## key -- the name of a cookie field.
## op -- a comparison operator.
## arg -- an argument for comparison (possibly empty).
##
## Operators:
## = case-insensitive string equality.
## ? test for key existence in the cookie.
## ~ regular expression search (Python regular expressions).
## @ domain-name string matching.
##
## An operator may be prefixed with '!' to negate the sense of the
## comparison. If the key and operator are omitted, "domain" and "@"
## are assumed. The "@" operator does case-insensitive string
## comparison, but if the argument starts with a period "." then
## it matches if the argument is a suffix of the value.
##
## Cookies have the following fields:
## domain -- the host or domain for which the cookie is delivered.
## path -- the path for which the cookie is delivered.
## name -- the name of the cookie.
## value -- the content of the cookie.
## httponly -- the HTTPOnly setting for this cookie.
##
## Examples:
## 1. Accept all cookies from banksite.com
## + .banksite.com
##
## 2. Reject all Google Analytics cookies
## - name~^__utm[abvz]$
##
## 3. Accept cookies from somehost.com, but not foo.somehost.com
## + .somehost.com domain!=foo.somehost.com
##
## 4. Reject cookies without an HttpOnly setting
## - httponly?
##
from __future__ import with_statement
import os, plistlib, pwd, re, sys, tempfile
# Regular expression matching a rule in ~/.cookierc
rule_re = re.compile(r'(\w+)(!?[=~@?])(.*)$')
# {{ parse_rule(s)
def parse_rule(s):
"""Parse a cookie rule, returning a tuple (f, rs) where
f is the rule type (+ = accept, - = reject, ! = keep)
rs is a list of criteria.
Each criterion is a tuple (op, key, arg) of strings.
"""
f, sep = s[:2]
rs = []
for r in s[2:].split(sep):
m = rule_re.match(r)
if m:
rs.append((m.group(2),
m.group(1).lower(),
m.group(3)))
else:
rs.append(('@', 'domain', r))
return f, rs
# }}
# {{ unparse_rule(r, flag, sep)
def unparse_rule(r, flag = '-', sep = ' '):
"""Unparse a cookie rule, returning a string. The flag is the
rule type, and sep is the desired criterion separator.
"""
out = [flag]
for op, key, val in r:
if op == '@' and key == 'domain':
out.append(val)
else:
out.append(key + op + val)
return sep.join(out)
# }}
# {{ match_rule(cookie, rule)
def match_rule(cookie, rule):
"""Returns True if the specified cookie (a dict) matches the given
list of rule criteria; otherwise False.
"""
def match_one(op, key, arg):
neg = op.startswith('!')
if neg: op = op[1:]
exists = True
for k, v in cookie.iteritems():
if key == k.lower():
val = v
break
else:
val = ''
exists = False
if op == '~':
res = bool(re.compile(arg).search(val))
elif op == '@' and arg.startswith('.'):
res = val.lower().endswith(arg[1:].lower())
elif op == '?':
res = exists
else:
res = (arg.lower() == val.lower())
return not res if neg else res
for op, key, arg in rule:
if not match_one(op, key, arg):
return False
else:
return True
# }}
# {{ cookie_path(user)
def cookie_path(user = None):
"""Find the path of the cookies file for the specified login name.
Uses the owner of the current process if not specified.
Note: This function does not verify the existence of the file, it
only computes the pathname.
"""
if user is None:
home = pwd.getpwuid(os.geteuid()).pw_dir
else:
home = pwd.getpwnam(user).pw_dir
return os.path.join(home, 'Library', 'Cookies', 'Cookies.plist')
# }}
# {{ read_cookies(path)
def read_cookies(path):
return plistlib.readPlist(path)
# }}
# {{ write_cookies(cookies, path)
def write_cookies(cookies, path):
d = os.path.split(path)[0]
fd, name = tempfile.mkstemp(dir = d, text = True)
with os.fdopen(fd, 'wt') as ofp:
plistlib.writePlist(cookies, ofp)
try:
os.rename(name, path)
except OSError:
os.unlink(name)
raise
# }}
# {{ load_rules(user)
def load_rules(user = None):
"""Load the list of cookie rules from ".cookierc" in the user's
home directory. Returns a tuple of (a, r, k), where a is a list
of accept rules, r is a list of reject rules, and k is a list of
keep rules.
If no rules are found, the default is to accept all cookies.
"""
cpath = os.path.expanduser('~%s/.cookierc' % (user or ''))
try:
with open(cpath, 'rt') as fp:
a = [] ; r = [] ; k = []
for line in fp:
if line.isspace() or line.startswith('#'):
continue
f, rs = parse_rule(line.strip())
if f == '+':
a.append(rs)
elif f == '-':
r.append(rs)
elif f == '!':
k.append(rs)
return a, r, k
except (OSError, IOError):
return ([parse_rule('+ .')], [])
# }}
# {{ find_bad_cookies(cookies, allow, deny)
def find_bad_cookies(cookies, allow, deny, keep):
"""Return the positions of all the cookies in the list that are
not matched by any keep rule, and either ARE matched by a deny
rule, or NOT matched by any allow rule.
The kill set is a dictionary mapping cookie positions to reasons.
A reason is either None, meaning no rule selected this cookie for
preservation, or a rule, meaning the cookie was rejected by the
application of that rule.
"""
kill = {}
for pos, cookie in enumerate(cookies):
for rule in keep:
if match_rule(cookie, rule):
break
else:
for rule in deny:
if match_rule(cookie, rule):
kill[pos] = rule
break
else:
for rule in allow:
if match_rule(cookie, rule):
break
else:
kill[pos] = None
return kill
# }}
# {{ main(argv)
def main(argv):
"""Command-line entry point."""
explain = os.getenv('WC_EXPLAIN', False)
cfpath = cookie_path()
cookies = read_cookies(cfpath)
allowed, denied, kept = load_rules()
icky = find_bad_cookies(cookies, allowed, denied, kept)
if icky:
print >> sys.stderr, "Removing %d unwanted cookie%s:" % (
len(icky), "s" if len(icky) <> 1 else "")
for pos in sorted(icky, key = lambda p: cookies[p]['Domain']):
reason = icky[pos]
print >> sys.stderr, ' - %-30.30s %s=%-20.20s' % (
cookies[pos]['Domain'],
cookies[pos]['Name'],
cookies[pos]['Value'])
if explain and reason:
print >> sys.stderr, ' %s' % \
unparse_rule(reason, flag = 'rejected by')
elif explain:
print >> sys.stderr, ' no matching rule'
for pos in sorted(icky, reverse = True):
cookies.pop(pos)
write_cookies(cookies, cfpath)
print >> sys.stderr, "Kept %d cookie%s in %s" % (
len(cookies), "s" if len(cookies) <> 1 else "", cfpath)
else:
print >> sys.stderr, "No unwanted cookies found."
return 0
# }}
if __name__ == "__main__":
res = main(sys.argv[1:])
sys.exit(res)
__all__ = (
"parse_rule", "match_rule", "load_rules",
"cookie_path", "read_cookies", "write_cookies",
"find_bad_cookies",
)
# Here there be dragons