#! /usr/bin/python

import sys
import os.path
import re

line = ""

# Maps from user-friendly version number to its protocol encoding.
VERSION = {"1.0": 0x01,
           "1.1": 0x02,
           "1.2": 0x03,
           "1.3": 0x04,
           "1.4": 0x05,
           "1.5": 0x06}

TYPES = {"u8":       (1,   False),
         "be16":     (2,   False),
         "be32":     (4,   False),
         "MAC":      (6,   False),
         "be64":     (8,   False),
         "be128":    (16,  False),
         "tunnelMD": (124, True)}

FORMATTING = {"decimal":            ("MFS_DECIMAL",      1,   8),
              "hexadecimal":        ("MFS_HEXADECIMAL",  1, 127),
              "ct state":           ("MFS_CT_STATE",     4,   4),
              "Ethernet":           ("MFS_ETHERNET",     6,   6),
              "IPv4":               ("MFS_IPV4",         4,   4),
              "IPv6":               ("MFS_IPV6",        16,  16),
              "OpenFlow 1.0 port":  ("MFS_OFP_PORT",     2,   2),
              "OpenFlow 1.1+ port": ("MFS_OFP_PORT_OXM", 4,   4),
              "frag":               ("MFS_FRAG",         1,   1),
              "tunnel flags":       ("MFS_TNL_FLAGS",    2,   2),
              "TCP flags":          ("MFS_TCP_FLAGS",    2,   2)}

PREREQS = {"none": "MFP_NONE",
           "ARP": "MFP_ARP",
           "VLAN VID": "MFP_VLAN_VID",
           "IPv4": "MFP_IPV4",
           "IPv6": "MFP_IPV6",
           "IPv4/IPv6": "MFP_IP_ANY",
           "MPLS": "MFP_MPLS",
           "TCP": "MFP_TCP",
           "UDP": "MFP_UDP",
           "SCTP": "MFP_SCTP",
           "ICMPv4": "MFP_ICMPV4",
           "ICMPv6": "MFP_ICMPV6",
           "ND": "MFP_ND",
           "ND solicit": "MFP_ND_SOLICIT",
           "ND advert": "MFP_ND_ADVERT"}

# Maps a name prefix into an (experimenter ID, class) pair, so:
#
#      - Standard OXM classes are written as (0, <oxm_class>)
#
#      - Experimenter OXM classes are written as (<oxm_vender>, 0xffff)
#
# If a name matches more than one prefix, the longest one is used.
OXM_CLASSES = {"NXM_OF_":        (0,          0x0000),
               "NXM_NX_":        (0,          0x0001),
               "OXM_OF_":        (0,          0x8000),
               "OXM_OF_PKT_REG": (0,          0x8001),
               "ONFOXM_ET_":     (0x4f4e4600, 0xffff),

               # This is the experimenter OXM class for Nicira, which is the
               # one that OVS would be using instead of NXM_OF_ and NXM_NX_
               # if OVS didn't have those grandfathered in.  It is currently
               # used only to test support for experimenter OXM, since there
               # are barely any real uses of experimenter OXM in the wild.
               "NXOXM_ET_":      (0x00002320, 0xffff)}


def oxm_name_to_class(name):
    prefix = ''
    class_ = None
    for p, c in OXM_CLASSES.items():
        if name.startswith(p) and len(p) > len(prefix):
            prefix = p
            class_ = c
    return class_


def decode_version_range(range):
    if range in VERSION:
        return (VERSION[range], VERSION[range])
    elif range.endswith('+'):
        return (VERSION[range[:-1]], max(VERSION.values()))
    else:
        a, b = re.match(r'^([^-]+)-([^-]+)$', range).groups()
        return (VERSION[a], VERSION[b])


def get_line():
    global line
    global line_number
    line = input_file.readline()
    line_number += 1
    if line == "":
        fatal("unexpected end of input")


n_errors = 0


def error(msg):
    global n_errors
    sys.stderr.write("%s:%d: %s\n" % (file_name, line_number, msg))
    n_errors += 1


def fatal(msg):
    error(msg)
    sys.exit(1)


def usage():
    argv0 = os.path.basename(sys.argv[0])
    print('''\
%(argv0)s, for extracting OpenFlow field properties from meta-flow.h
usage: %(argv0)s INPUT [--meta-flow | --nx-match]
  where INPUT points to lib/meta-flow.h in the source directory.
Depending on the option given, the output written to stdout is intended to be
saved either as lib/meta-flow.inc or lib/nx-match.inc for the respective C
file to #include.\
''' % {"argv0": argv0})
    sys.exit(0)


def make_sizeof(s):
    m = re.match(r'(.*) up to (.*)', s)
    if m:
        struct, member = m.groups()
        return "offsetof(%s, %s)" % (struct, member)
    else:
        return "sizeof(%s)" % s


def parse_oxms(s, prefix, n_bytes):
    if s == 'none':
        return ()

    return tuple(parse_oxm(s2.strip(), prefix, n_bytes) for s2 in s.split(','))


match_types = dict()


def parse_oxm(s, prefix, n_bytes):
    global match_types

    m = re.match('([A-Z0-9_]+)\(([0-9]+)\) since(?: OF(1\.[0-9]+) and)? v([12]\.[0-9]+)$', s)
    if not m:
        fatal("%s: syntax error parsing %s" % (s, prefix))

    name, oxm_type, of_version, ovs_version = m.groups()

    class_ = oxm_name_to_class(name)
    if class_ is None:
        fatal("unknown OXM class for %s" % name)
    oxm_vendor, oxm_class = class_

    if class_ in match_types:
        if oxm_type in match_types[class_]:
            fatal("duplicate match type for %s (conflicts with %s)" %
                  (name, match_types[class_][oxm_type]))
    else:
        match_types[class_] = dict()
    match_types[class_][oxm_type] = name

    # Normally the oxm_length is the size of the field, but for experimenter
    # OXMs oxm_length also includes the 4-byte experimenter ID.
    oxm_length = n_bytes
    if oxm_class == 0xffff:
        oxm_length += 4

    header = ("NXM_HEADER(0x%x,0x%x,%s,0,%d)"
              % (oxm_vendor, oxm_class, oxm_type, oxm_length))

    if of_version:
        if of_version not in VERSION:
            fatal("%s: unknown OpenFlow version %s" % (name, of_version))
        of_version_nr = VERSION[of_version]
        if of_version_nr < VERSION['1.2']:
            fatal("%s: claimed version %s predates OXM" % (name, of_version))
    else:
        of_version_nr = 0

    return (header, name, of_version_nr, ovs_version)


def parse_field(mff, comment):
    f = {'mff': mff}

    # First line of comment is the field name.
    m = re.match(r'"([^"]+)"(?:\s+\(aka "([^"]+)"\))?(?:\s+\(.*\))?\.', comment[0])
    if not m:
        fatal("%s lacks field name" % mff)
    f['name'], f['extra_name'] = m.groups()

    # Find the last blank line the comment.  The field definitions
    # start after that.
    blank = None
    for i in range(len(comment)):
        if not comment[i]:
            blank = i
    if not blank:
        fatal("%s: missing blank line in comment" % mff)

    d = {}
    for key in ("Type", "Maskable", "Formatting", "Prerequisites",
                "Access", "Prefix lookup member",
                "OXM", "NXM", "OF1.0", "OF1.1"):
        d[key] = None
    for fline in comment[blank + 1:]:
        m = re.match(r'([^:]+):\s+(.*)\.$', fline)
        if not m:
            fatal("%s: syntax error parsing key-value pair as part of %s"
                  % (fline, mff))
        key, value = m.groups()
        if key not in d:
            fatal("%s: unknown key" % key)
        elif key == 'Code point':
            d[key] += [value]
        elif d[key] is not None:
            fatal("%s: duplicate key" % key)
        d[key] = value
    for key, value in d.items():
        if not value and key not in ("OF1.0", "OF1.1",
                                     "Prefix lookup member", "Notes"):
            fatal("%s: missing %s" % (mff, key))

    m = re.match(r'([a-zA-Z0-9]+)(?: \(low ([0-9]+) bits\))?$', d['Type'])
    if not m:
        fatal("%s: syntax error in type" % mff)
    type_ = m.group(1)
    if type_ not in TYPES:
        fatal("%s: unknown type %s" % (mff, d['Type']))

    f['n_bytes'] = TYPES[type_][0]
    if m.group(2):
        f['n_bits'] = int(m.group(2))
        if f['n_bits'] > f['n_bytes'] * 8:
            fatal("%s: more bits (%d) than field size (%d)"
                  % (mff, f['n_bits'], 8 * f['n_bytes']))
    else:
        f['n_bits'] = 8 * f['n_bytes']
    f['variable'] = TYPES[type_][1]

    if d['Maskable'] == 'no':
        f['mask'] = 'MFM_NONE'
    elif d['Maskable'] == 'bitwise':
        f['mask'] = 'MFM_FULLY'
    else:
        fatal("%s: unknown maskable %s" % (mff, d['Maskable']))

    fmt = FORMATTING.get(d['Formatting'])
    if not fmt:
        fatal("%s: unknown format %s" % (mff, d['Formatting']))
    if f['n_bytes'] < fmt[1] or f['n_bytes'] > fmt[2]:
        fatal("%s: %d-byte field can't be formatted as %s"
              % (mff, f['n_bytes'], d['Formatting']))
    f['string'] = fmt[0]

    f['prereqs'] = PREREQS.get(d['Prerequisites'])
    if not f['prereqs']:
        fatal("%s: unknown prerequisites %s" % (mff, d['Prerequisites']))

    if d['Access'] == 'read-only':
        f['writable'] = False
    elif d['Access'] == 'read/write':
        f['writable'] = True
    else:
        fatal("%s: unknown access %s" % (mff, d['Access']))

    f['OF1.0'] = d['OF1.0']
    if not d['OF1.0'] in (None, 'exact match', 'CIDR mask'):
        fatal("%s: unknown OF1.0 match type %s" % (mff, d['OF1.0']))

    f['OF1.1'] = d['OF1.1']
    if not d['OF1.1'] in (None, 'exact match', 'bitwise mask'):
        fatal("%s: unknown OF1.1 match type %s" % (mff, d['OF1.1']))

    f['OXM'] = (parse_oxms(d['OXM'], 'OXM', f['n_bytes']) +
                parse_oxms(d['NXM'], 'NXM', f['n_bytes']))

    f['prefix'] = d["Prefix lookup member"]

    return f


def protocols_to_c(protocols):
    if protocols == set(['of10', 'of11', 'oxm']):
        return 'OFPUTIL_P_ANY'
    elif protocols == set(['of11', 'oxm']):
        return 'OFPUTIL_P_NXM_OF11_UP'
    elif protocols == set(['oxm']):
        return 'OFPUTIL_P_NXM_OXM_ANY'
    elif protocols == set([]):
        return 'OFPUTIL_P_NONE'
    else:
        assert False


def make_meta_flow(fields):
    output = []
    for f in fields:
        output += ["{"]
        output += ["    %s," % f['mff']]
        if f['extra_name']:
            output += ["    \"%s\", \"%s\"," % (f['name'], f['extra_name'])]
        else:
            output += ["    \"%s\", NULL," % f['name']]

        if f['variable']:
            variable = 'true'
        else:
            variable = 'false'
        output += ["    %d, %d, %s," % (f['n_bytes'], f['n_bits'], variable)]

        if f['writable']:
            rw = 'true'
        else:
            rw = 'false'
        output += ["    %s, %s, %s, %s,"
                   % (f['mask'], f['string'], f['prereqs'], rw)]

        oxm = f['OXM']
        of10 = f['OF1.0']
        of11 = f['OF1.1']
        if f['mff'] in ('MFF_DL_VLAN', 'MFF_DL_VLAN_PCP'):
            # MFF_DL_VLAN and MFF_DL_VLAN_PCP don't exactly correspond to
            # OF1.1, nor do they have NXM or OXM assignments, but their
            # meanings can be expressed in every protocol, which is the goal of
            # this member.
            protocols = set(["of10", "of11", "oxm"])
        else:
            protocols = set([])
            if of10:
                protocols |= set(["of10"])
            if of11:
                protocols |= set(["of11"])
            if oxm:
                protocols |= set(["oxm"])

        if f['mask'] == 'MFM_FULLY':
            cidr_protocols = protocols.copy()
            bitwise_protocols = protocols.copy()

            if of10 == 'exact match':
                bitwise_protocols -= set(['of10'])
                cidr_protocols -= set(['of10'])
            elif of10 == 'CIDR mask':
                bitwise_protocols -= set(['of10'])
            else:
                assert of10 is None

            if of11 == 'exact match':
                bitwise_protocols -= set(['of11'])
                cidr_protocols -= set(['of11'])
            else:
                assert of11 in (None, 'bitwise mask')
        else:
            assert f['mask'] == 'MFM_NONE'
            cidr_protocols = set([])
            bitwise_protocols = set([])

        output += ["    %s," % protocols_to_c(protocols)]
        output += ["    %s," % protocols_to_c(cidr_protocols)]
        output += ["    %s," % protocols_to_c(bitwise_protocols)]

        if f['prefix']:
            output += ["    FLOW_U32OFS(%s)," % f['prefix']]
        else:
            output += ["    -1, /* not usable for prefix lookup */"]

        output += ["},"]
    return output


def make_nx_match(fields):
    output = []
    print("static struct nxm_field_index all_nxm_fields[] = {")
    for f in fields:
        # Sort by OpenFlow version number (nx-match.c depends on this).
        for oxm in sorted(f['OXM'], key=lambda x: x[2]):
            print("""{ .nf = { %s, %d, "%s", %s } },""" % (
                oxm[0], oxm[2], oxm[1], f['mff']))
    print("};")
    return output


def extract_ofp_fields(mode):
    global line

    fields = []

    while True:
        get_line()
        if re.match('enum.*mf_field_id', line):
            break

    while True:
        get_line()
        first_line_number = line_number
        here = '%s:%d' % (file_name, line_number)
        if (line.startswith('/*')
            or line.startswith(' *')
            or line.startswith('#')
            or not line
            or line.isspace()):
            continue
        elif re.match('}', line) or re.match('\s+MFF_N_IDS', line):
            break

        # Parse the comment preceding an MFF_ constant into 'comment',
        # one line to an array element.
        line = line.strip()
        if not line.startswith('/*'):
            fatal("unexpected syntax between fields")
        line = line[1:]
        comment = []
        end = False
        while not end:
            line = line.strip()
            if line.startswith('*/'):
                get_line()
                break
            if not line.startswith('*'):
                fatal("unexpected syntax within field")

            line = line[1:]
            if line.startswith(' '):
                line = line[1:]
            if line.startswith(' ') and comment:
                continuation = True
                line = line.lstrip()
            else:
                continuation = False

            if line.endswith('*/'):
                line = line[:-2].rstrip()
                end = True
            else:
                end = False

            if continuation:
                comment[-1] += " " + line
            else:
                comment += [line]
            get_line()

        # Drop blank lines at each end of comment.
        while comment and not comment[0]:
            comment = comment[1:]
        while comment and not comment[-1]:
            comment = comment[:-1]

        # Parse the MFF_ constant(s).
        mffs = []
        while True:
            m = re.match('\s+(MFF_[A-Z0-9_]+),?\s?$', line)
            if not m:
                break
            mffs += [m.group(1)]
            get_line()
        if not mffs:
            fatal("unexpected syntax looking for MFF_ constants")

        if len(mffs) > 1 or '<N>' in comment[0]:
            for mff in mffs:
                # Extract trailing integer.
                m = re.match('.*[^0-9]([0-9]+)$', mff)
                if not m:
                    fatal("%s lacks numeric suffix in register group" % mff)
                n = m.group(1)

                # Search-and-replace <N> within the comment,
                # and drop lines that have <x> for x != n.
                instance = []
                for x in comment:
                    y = x.replace('<N>', n)
                    if re.search('<[0-9]+>', y):
                        if ('<%s>' % n) not in y:
                            continue
                        y = re.sub('<[0-9]+>', '', y)
                    instance += [y.strip()]
                fields += [parse_field(mff, instance)]
        else:
            fields += [parse_field(mffs[0], comment)]
        continue

    input_file.close()

    if n_errors:
        sys.exit(1)

    print("""\
/* Generated automatically; do not modify!    "-*- buffer-read-only: t -*- */
""")

    if mode == '--meta-flow':
        output = make_meta_flow(fields)
    elif mode == '--nx-match':
        output = make_nx_match(fields)
    else:
        assert False

    return output


if __name__ == '__main__':
    if '--help' in sys.argv:
        usage()
    elif len(sys.argv) != 3:
        sys.stderr.write("exactly two arguments required; "
                         "use --help for help\n")
        sys.exit(1)
    elif sys.argv[2] in ('--meta-flow', '--nx-match'):
        global file_name
        global input_file
        global line_number
        file_name = sys.argv[1]
        input_file = open(file_name)
        line_number = 0

        for oline in extract_ofp_fields(sys.argv[2]):
            print(oline)
    else:
        sys.stderr.write("invalid arguments; use --help for help\n")
        sys.exit(1)
