#!/usr/bin/env python ## ## Name: washcookies.py ## Purpose: Clean up Safari web cookies. ## Author: M. J. Fromberger ## Info: $Id: washcookies.py 654 2008-08-18 04:41:35Z sting $ ## ## This program edits the stored web cookies for the user who runs it, ## discarding any cookies that are not deemed acceptable by the user. ## Acceptability is determined by a list of rules. A rule will either ## Accept a cookie or Deny it. Cookies are discarded if either: ## ## 1. No Keep rule matches the cookie, and either ## 2. Any Deny rule matches the cookie, or ## 3. No Accept rule matches the cookie. ## ## Rules are stored in the file "~/.cookierc". Each line that is not ## blank specifies one rule. ## ## Rules have the following format: ## ## {}+ ## ## f -- "+" for Allow, "-" for Deny, "!" for Keep. ## sep -- the separator character for criteria. ## key -- the name of a cookie field. ## op -- a comparison operator. ## arg -- an argument for comparison (possibly empty). ## ## Operators: ## = case-insensitive string equality. ## ? test for key existence in the cookie. ## ~ regular expression search (Python regular expressions). ## @ domain-name string matching. ## ## An operator may be prefixed with '!' to negate the sense of the ## comparison. If the key and operator are omitted, "domain" and "@" ## are assumed. The "@" operator does case-insensitive string ## comparison, but if the argument starts with a period "." then ## it matches if the argument is a suffix of the value. ## ## Cookies have the following fields: ## domain -- the host or domain for which the cookie is delivered. ## path -- the path for which the cookie is delivered. ## name -- the name of the cookie. ## value -- the content of the cookie. ## httponly -- the HTTPOnly setting for this cookie. ## ## Examples: ## 1. Accept all cookies from banksite.com ## + .banksite.com ## ## 2. Reject all Google Analytics cookies ## - name~^__utm[abvz]$ ## ## 3. Accept cookies from somehost.com, but not foo.somehost.com ## + .somehost.com domain!=foo.somehost.com ## ## 4. Reject cookies without an HttpOnly setting ## - httponly? ## from __future__ import with_statement import os, plistlib, pwd, re, sys, tempfile # Regular expression matching a rule in ~/.cookierc rule_re = re.compile(r'(\w+)(!?[=~@?])(.*)$') # {{ parse_rule(s) def parse_rule(s): """Parse a cookie rule, returning a tuple (f, rs) where f is the rule type (+ = accept, - = reject, ! = keep) rs is a list of criteria. Each criterion is a tuple (op, key, arg) of strings. """ f, sep = s[:2] rs = [] for r in s[2:].split(sep): m = rule_re.match(r) if m: rs.append((m.group(2), m.group(1).lower(), m.group(3))) else: rs.append(('@', 'domain', r)) return f, rs # }} # {{ unparse_rule(r, flag, sep) def unparse_rule(r, flag = '-', sep = ' '): """Unparse a cookie rule, returning a string. The flag is the rule type, and sep is the desired criterion separator. """ out = [flag] for op, key, val in r: if op == '@' and key == 'domain': out.append(val) else: out.append(key + op + val) return sep.join(out) # }} # {{ match_rule(cookie, rule) def match_rule(cookie, rule): """Returns True if the specified cookie (a dict) matches the given list of rule criteria; otherwise False. """ def match_one(op, key, arg): neg = op.startswith('!') if neg: op = op[1:] exists = True for k, v in cookie.iteritems(): if key == k.lower(): val = v break else: val = '' exists = False if op == '~': res = bool(re.compile(arg).search(val)) elif op == '@' and arg.startswith('.'): res = val.lower().endswith(arg[1:].lower()) elif op == '?': res = exists else: res = (arg.lower() == val.lower()) return not res if neg else res for op, key, arg in rule: if not match_one(op, key, arg): return False else: return True # }} # {{ cookie_path(user) def cookie_path(user = None): """Find the path of the cookies file for the specified login name. Uses the owner of the current process if not specified. Note: This function does not verify the existence of the file, it only computes the pathname. """ if user is None: home = pwd.getpwuid(os.geteuid()).pw_dir else: home = pwd.getpwnam(user).pw_dir return os.path.join(home, 'Library', 'Cookies', 'Cookies.plist') # }} # {{ read_cookies(path) def read_cookies(path): return plistlib.readPlist(path) # }} # {{ write_cookies(cookies, path) def write_cookies(cookies, path): d = os.path.split(path)[0] fd, name = tempfile.mkstemp(dir = d, text = True) with os.fdopen(fd, 'wt') as ofp: plistlib.writePlist(cookies, ofp) try: os.rename(name, path) except OSError: os.unlink(name) raise # }} # {{ load_rules(user) def load_rules(user = None): """Load the list of cookie rules from ".cookierc" in the user's home directory. Returns a tuple of (a, r, k), where a is a list of accept rules, r is a list of reject rules, and k is a list of keep rules. If no rules are found, the default is to accept all cookies. """ cpath = os.path.expanduser('~%s/.cookierc' % (user or '')) try: with open(cpath, 'rt') as fp: a = [] ; r = [] ; k = [] for line in fp: if line.isspace() or line.startswith('#'): continue f, rs = parse_rule(line.strip()) if f == '+': a.append(rs) elif f == '-': r.append(rs) elif f == '!': k.append(rs) return a, r, k except (OSError, IOError): return ([parse_rule('+ .')], []) # }} # {{ find_bad_cookies(cookies, allow, deny) def find_bad_cookies(cookies, allow, deny, keep): """Return the positions of all the cookies in the list that are not matched by any keep rule, and either ARE matched by a deny rule, or NOT matched by any allow rule. The kill set is a dictionary mapping cookie positions to reasons. A reason is either None, meaning no rule selected this cookie for preservation, or a rule, meaning the cookie was rejected by the application of that rule. """ kill = {} for pos, cookie in enumerate(cookies): for rule in keep: if match_rule(cookie, rule): break else: for rule in deny: if match_rule(cookie, rule): kill[pos] = rule break else: for rule in allow: if match_rule(cookie, rule): break else: kill[pos] = None return kill # }} # {{ main(argv) def main(argv): """Command-line entry point.""" explain = os.getenv('WC_EXPLAIN', False) cfpath = cookie_path() cookies = read_cookies(cfpath) allowed, denied, kept = load_rules() icky = find_bad_cookies(cookies, allowed, denied, kept) if icky: print >> sys.stderr, "Removing %d unwanted cookie%s:" % ( len(icky), "s" if len(icky) <> 1 else "") for pos in sorted(icky, key = lambda p: cookies[p]['Domain']): reason = icky[pos] print >> sys.stderr, ' - %-30.30s %s=%-20.20s' % ( cookies[pos]['Domain'], cookies[pos]['Name'], cookies[pos]['Value']) if explain and reason: print >> sys.stderr, ' %s' % \ unparse_rule(reason, flag = 'rejected by') elif explain: print >> sys.stderr, ' no matching rule' for pos in sorted(icky, reverse = True): cookies.pop(pos) write_cookies(cookies, cfpath) print >> sys.stderr, "Kept %d cookie%s in %s" % ( len(cookies), "s" if len(cookies) <> 1 else "", cfpath) else: print >> sys.stderr, "No unwanted cookies found." return 0 # }} if __name__ == "__main__": res = main(sys.argv[1:]) sys.exit(res) __all__ = ( "parse_rule", "match_rule", "load_rules", "cookie_path", "read_cookies", "write_cookies", "find_bad_cookies", ) # Here there be dragons