# -*- coding: utf-8 -*-
"""
blacklist - maintain a blacklist of Internet addresses

@copyright: Copyright © 2004 by Nir Soffer <nirs@freeshell.org>
@license: GNU GPL, see COPYING for details
"""

import struct
import re
import sets
import socket

import ipv4


class BlackList:
    """ Maintain Internet addresses black list 

    Addresses are read from standard text file, containing one address per
    line with optional wiki list markup. All other text is ignored. The
    blacklist accept both single addresses in decimal notation and
    netblocks in both CIDR notation or old style 'address mask'

    Example blacklist file:

         * 127.0.0.1                # Single address
         * 127.0.0.0/24             # CIDR format
         * 127.0.0.0 255.255.255.0  # old style
         # 80.80.80.80 Commented address
         This line is ignored and the next line also...
    
    Bad formated addresses are ignored quietly, illegal addresses will raise 
    a ValueError.
        
    Example usage:
        >>> bl = blacklist.BlackList()
        >>> bl.updateFromText('127.0.0.1\n127.0.0.32')
        >>> '127.0.0.1' in bl
        True
        >>> bl.updateFromText('10.0.0.0/8')
        >>> '10.200.0.1' in bl
        True
        >>> '192.114.134.51' in bl
        False

    """
    # line parser
    line_re = re.compile(r"""
    ^(?:\s+[-*]\s+)? # optional wiki list
    (?: # netblock or address
      (?P<netblock>\d+\.\d+\.\d+\.\d+(?:/\d+|[ ]+\d+\.\d+\.\d+\.\d+)) |
      (?P<address>\d+\.\d+\.\d+\.\d+)
    )
    .*$ # rest of line
    """, re.MULTILINE | re.VERBOSE)
   
    def __init__(self, text=''):
        """ Create new blacklist

        @keyword text: text containing addresses to block, see class docstring
        """
        self.ips = {}           # dictionary of compacted ips
        self.netblocks = []     # List of netblocks
        if text:
            self.updateFromText(text)
            
    def updateFromText(self, text):
        """ Update blacklist from text

        Update the black list with new addresses and netblocks in
        text. Single addresses are always unique because they are saved
        in a dictionary. Netblocks are also unique, but they can overlap.

        To do: reduce overlapping netblocks?

        @param text: text containing addresses to add to the blacklist
        """
        netblocks = dict(self.netblocks)
        for match in self.line_re.finditer(text):
            if match.group('address'):
                address = match.group('address')
                # Add single address, as four character string
                try:
                    ip = socket.inet_aton(address)
                    self.ips[ip] = None
                except socket.error:
                    raise ValueError('%s: invalid address' % address)
            elif match.group('netblock'):
                # Add netblock, removing overlapping netblocks with same address.
                # The biggest ranges remain, not the newer.
                min, max = ipv4.netblock(match.group('netblock'))
                if not netblocks.has_key(min) or netblocks[min] < max:
                    netblocks[min] = max

        self.netblocks = netblocks.items()
        self.netblocks.sort()
                
    def __contains__(self, ip):
        """ Called when using 'in' keyword

        @param ip: Internet address - number of string in decimal notation
        @return: True if the address or its netblock is in the blacklist
        @rtype: bool (0 or 1 for backward compatibility)
        """
        address = ipv4.address(ip)
        # First fast dictionary lookup for single address        
        # Pack using big endyan byte oreder and standard size and alignment
        key = struct.pack('!L', address)
        if key in self.ips:
            return 1
        # Slower searching in a sorted list of ranges
        for min, max in self.netblocks :
            if address < min:
                break
            if address <= max:
                return 1
        return 0
