#             Perforce Defect Tracking Integration Project
#              <http://www.ravenbrook.com/project/p4dti/>
#
#  MAKE_SCHEMA_DOC.PY -- GENERATE BUGZILLA SCHEMA DOCUMENTATION
#
#             Nick Barnes, Ravenbrook Limited, 2003-07-07
#
#
# 1. INTRODUCTION
#

# This module generates Bugzilla schema documentation by accessing
# Bugzilla databases through a MySQL interface.
#
# The intended readership is project developers.
#
# This document is not confidential.

import string
import MySQLdb
import copy

import schema_remarks

error = 'Schema processing error'

# 2. Interacting with MySQL.

def fetchall(cursor):
    rows = cursor.fetchall()
    # for some reason, if no rows are returned sometimes one gets () here.
    if len(rows) == 0:
        rows = []
    return rows

def select_rows(cursor, select):
    rows = cursor.execute(select)
    if cursor.description == None :
        raise error, ("Trying to fetch rows from non-select '%s'"
                      % select)
    values = fetchall(cursor)
    if values == None :
        raise error, ("Select '%s' returned unfetchable rows."
                      % select)
    return values

def column_names(cursor):
    keys = []
    for i in range(len(cursor.description)):
        keys.append(cursor.description[i][0])
    return keys

def fetch_rows_as_list_of_dictionaries(cursor, select):
    results = []
    values = select_rows(cursor, select)
    keys = column_names(cursor)
    for value in values:
        result={}
        if len(keys) != len(value) :
            raise error, ("Select '%s' returns %d keys but %d columns."
                          % (select, len(keys), len(value)))
        for j in range(len(keys)):
            result[keys[j]] = value[j]
        results.append(result)
    return results

# 3. Obtaining a schema, and reducing it to a normal form.

# This is a map from type names (as returned by a 'describe'
# operation) to synonymous type names.

type_map={
    'smallint(6)':  'smallint',
    'mediumint(9)': 'mediumint',
    'tinyint(4)':   'tinyint',
    'int(11)':      'int',
    'bigint(20)':   'bigint',
    }

# Given output from a 'describe table' operation, return a map from
# column name to a map with the following entries:
# 
# 'Name':       column name,
# 'Default':    default value (or "None"),
# 'Type':       type name,
# 'Properties': properties (e.g. auto_increment).
# 'Remarks'   : list of HTML remarks
#
# Because almost all columns are "NOT NULL", that is the default, and
# other columns are marked 'null' under 'Properties'.

def reduce_columns(table, description):
    columns = {}
    for dict in description:
        name = dict['Field']
        type = dict['Type']
        if type_map.has_key(type):
            type = type_map[type]
        if type[0:4] == 'enum':
            type = type.replace("','", "', '")
        if dict['Null'] == 'YES':
            if dict['Extra']:
                extra = dict['Extra'] + ', null'
            else:
                extra = 'null'
        else:
            extra = dict['Extra']
            if extra == '':
                extra = '-'
        default = dict['Default']
        if default == '':
            default = "''"
        if default is None:
            default = 'None'
        if not schema_remarks.column_remarks.has_key(table):
            raise error, "No column remarks for table '%s'." % table
        if not schema_remarks.column_remarks[table].has_key(name):
            raise error, "Table '%s' has no remark for column '%s'." % (table, name)
        remark = schema_remarks.column_remarks[table][name]
        if remark is None:
            remarks=[]
        else:
            remarks=[remark]
        columns[name] = {
            'Name': name,
            'Default': default,
            'Type': type,
            'Properties': extra,
            'Remarks': remarks,
            }
    return columns

# Given output from "show index", return a map from index name to a
# map with the following entries:
#
# 'Name':    Index name, 'PRIMARY' for a primary index;
# 'Fields':  An ordered list of column names;
# 'Unique?':  'yes' or 'no';
# 'Remarks': A list of remarks.

def reduce_indexes(table, index_list):
    indexes = {}
    for i in index_list:
        kn = i['Key_name']
        if indexes.has_key(kn):
            indexes[kn]['Fields'][i['Seq_in_index']] = i['Column_name']
        else:
            if i['Non_unique'] == 1:
                u = 'no'
            else:
                u = 'yes'
            if not schema_remarks.index_remarks.has_key(table):
                raise error, "No index remarks for table '%s'." % table
            if not schema_remarks.index_remarks[table].has_key(kn):
                raise error, "Table '%s' has no remark for index '%s'." % (table, kn)
            remark = schema_remarks.index_remarks[table][kn]
            if remark:
                remarks = [remark]
            else:
                remarks = []
            indexes[kn] = {'Name': kn,
                           'Fields': {i['Seq_in_index']: i['Column_name']},
                           'Unique?': u,
                           'Remarks': remarks,
                           }
    # replace the 'Fields' map with an ordered list.
    for k in indexes.keys():
        f = indexes[k]['Fields'].items()
        f.sort()
        indexes[k]['Fields'] = string.join(map((lambda l: l[1]), f), ', ')
    return indexes

# Given a database name, get the schema for that database as a map
# from table name to (columns, indexes), where columns is a map
# produced by reduce_columns and indexes is a map produced by
# reduce_indexes.

def get_schema(db_name):
    db = MySQLdb.connect(db=db_name, user='bugs')
    cursor = db.cursor()
    tables = map(lambda x:x[0],select_rows(cursor, 'show tables'))
    schema = {}
    for table in tables:
        columns = fetch_rows_as_list_of_dictionaries(cursor,
                                                     'describe %s' % table)
        indexes = fetch_rows_as_list_of_dictionaries(cursor,
                                                     'show index from %s' % table)
        schema[table] = (reduce_columns(table, columns),
                         reduce_indexes(table, indexes))
    db.close()
    return schema

# 4. Generating HTML

body=[]

def add(s):
    body.append(s)

# output a coloured anchored table row, with a <th> in the first
# column.

def output_row(anchor, name, dict, keys, colour, colours):
    add('  <tr%s valign="top" align="left">\n\n' % colour)
    add('    <th><a id="%s" name="%s">%s</a></th>\n\n' %
        (anchor, anchor, name))
    for k in keys:
        add('    <td%s>%s</td>\n\n' % (colours.get(k,''), dict[k]))
    add('  </tr>\n\n')

# output the main schema table for a table.    

def output_description(table, colour, remark, columns, colours, subcolours):
    if remark:
        add('<p>%s</p>\n\n' % remark)
    add('<table%s border="1" cellspacing="0" cellpadding="5">\n\n' % colour)

    add('  <tr valign="top" align="left">\n\n')
    add('    <th>Field</th>\n\n')
    add('    <th>Type</th>\n\n')
    add('    <th>Default</th>\n\n')
    add('    <th>Properties</th>\n\n')
    add('    <th>Remarks</th>\n\n')
    add('  </tr>\n\n')
    cs = columns.keys()
    cs.sort()
    for c in cs:
        d = columns[c]
        if d['Remarks']:
            d['Remarks'] = string.join(d['Remarks'], ' ')
        else:
            d['Remarks'] = '-'
        output_row('column-%s-%s' % (table, c), c, d, ['Type',
                                                       'Default',
                                                       'Properties',
                                                       'Remarks'],
                   colours.get(c, ''),
                   subcolours.get(c, {}))
    add('</table>\n\n')

# output the indexes table for a table    

def output_indexes(table, colour, indexes, colours, subcolours):
    add('<table%s border="1" cellspacing="0" cellpadding="5">\n\n' % colour)

    # order the indexes: PRIMARY first, then alphabetical.
    ilist = []
    if indexes.has_key('PRIMARY'):
        ilist = [indexes['PRIMARY']]
        del indexes['PRIMARY']
    rest = indexes.items()
    rest.sort()
    ilist = ilist + map(lambda l:l[1], rest)

    add('  <tr valign="top" align="left">\n\n')
    add('    <th>Name</th>\n\n')
    add('    <th>Fields</th>\n\n')
    add('    <th>Unique?</th>\n\n')
    add('    <th>Remarks</th>\n\n')
    add('  </tr>\n\n')
    for l in ilist:
        n = l['Name']
        if l['Remarks']:
            l['Remarks'] = string.join(l['Remarks'], ' ')
        else:
            l['Remarks'] = '-'
        output_row("index-%s-%s" % (table, n), n, l, ['Fields',
                                                      'Unique?',
                                                      'Remarks'],
                   colours.get(n, ''),
                   subcolours.get(n, {}))
    add ('</table>\n\n')

def output_schema(schema, remarks,
                  table_colours,
                  column_colours,
                  column_subcolours,
                  index_colours,
                  index_subcolours):
    global body
    body=[]
    tables = schema.keys()
    tables.sort()
    for table in tables:
        (columns, indexes) = schema[table]
        colour = table_colours.get(table, '')
        if not remarks.has_key(table):
            raise error, "No table remark for table '%s'." % table
        if remarks[table] is None:
            remark = ''
        else:
            remark = remarks[table]
        add('<h3><a id="table-%s" name="table-%s">The "%s" table</a></h3>\n\n\n' % (table, table, table))
        output_description(table, colour, remark, columns,
                           column_colours.get(table, {}),
                           column_subcolours.get(table, {}))
        if indexes:
            add('<p>Indexes:</p>\n\n')
            output_indexes(table, colour, indexes,
                           index_colours.get(table, {}),
                           index_subcolours.get(table, {}))
        else:
            add('<p>The "%s" table has no indexes.</p>' % table)
    return body

# 5. Code to read all the database schemas and figure out the history
# from that.

# Bugzilla versions which we know about, in order.  This is every
# version since 2.10 which changed the schema.

version_order = [ 
    '2.10',   
    '2.12',   
    '2.14',   
    '2.14.2', 
    '2.16',   
    '2.17.1', 
    '2.17.3', 
    '2.17.4',
    '2.17.5',
    ]

# Names of MySQL tables for each version.  We have dump files locally
# in this directory; maybe the code should use mysqladmin to construct
# the MySQL tables from the dump files on the fly.

versions = {
    '2.10':   'bugs210',
    '2.12':   'bugs212',
    '2.14':   'bugs214',
    '2.14.2': 'bugs2142',
    '2.16':   'bugs2163',
    '2.17.1': 'bugs2171',
    '2.17.3': 'bugs2173',
    '2.17.4': 'bugs2174',
    '2.17.5': 'bugs2175',
    }

# We have a lot of maps of maps.  This function makes sure that a map
# has some entries, by adding an empty map as the entry if necessary.

def require_map_entries(dicts, keys):
    for dict in dicts:
        for k in keys:
            if not dict.has_key(k):
                dict[k] = {}

# colours of tables, and rows, and entries                

red = ' bgcolor="#ffcccc"'          # no longer present
green = ' bgcolor="#ccffcc"'        # no longer absent
blue = ' bgcolor="#ccccff"'         # changed

# table_colours: table -> colour
# column_colours: table -> column -> colour
# index_colours: table -> index -> colour
# column_subcolours: table -> column -> field -> colour
# index_subcolours: table -> index -> field ->colour

# any table can omit an entry at any level, meaning 'the same as other
# entries at this level'.

# For a field which can change (e.g. the Type of a column), we store
# it during processing as a list of pairs:
# 
#    [(first bugzilla version, value), ...]
# 
# So list[-1][1] is the current value.  When we're done figuring out
# the schema history, we replace this list with a single value.

# Make the initial pair lists for a column.

def pair_up_column_entries(bz, column):
    for k in ['Type', 'Default', 'Properties']:
        column[k] = [(bz, column[k])]

# Make the initial pair lists for an index.

def pair_up_index_entries(bz, index):
    for k in ['Fields', 'Unique?']:
        index[k] = [(bz, index[k])]

# Make all the initial pair lists for a table.
    
def pair_up_table_entries(bz, schema, table):
    (columns, indexes) = schema[table]
    for c in columns.values():
        pair_up_column_entries(bz, c)
    for i in indexes.values():
        pair_up_index_entries(bz, i)

# Given a pair list, make a single value which explains the history.
# I've tried various ways of showing this; this is the best I've come
# up with.
    
def stringify_pairs(pl):
    if len(pl) == 1:
        return pl[0][1]
    else:
        s = []
        for p in pl:
            s.append('<b>%s: </b>%s'% (p[0], p[1]))
        return string.join(s, '; <br />\n')

# Given a schema, fix up all the pair lists.    

def stringify_schema(schema):
    for table in schema.keys():
        (columns, indexes) = schema[table]
        for c in columns.values():
            for k in ['Type', 'Default', 'Properties']:
                c[k] = stringify_pairs(c[k])
        for i in indexes.values():
            for k in ['Fields', 'Unique?']:
                i[k] = stringify_pairs(i[k])

# Monster function to get the versioned schema and all the colour tables.
# Should be broken up into smaller functions.

# Note that this takes the first and last Bugzilla versions to figure out.

def get_versioned_tables(first, last):
    if not first in version_order:
        raise error, "I don't know about version '%s'." % last
    if not last in version_order:
        raise error, "I don't know about version '%s'." % last
    if not (version_order.index(last) >= version_order.index(first)):
        raise error, "Version '%s' comes before version '%s'." % (last, first)
    table_colours = {}
    column_colours = {}
    column_subcolours = {}
    index_colours = {}
    index_subcolours = {}
    table_remarks = copy.deepcopy(schema_remarks.table_remarks)
    column_remarks = copy.deepcopy(schema_remarks.column_remarks)
    index_remarks = copy.deepcopy(schema_remarks.index_remarks)
    first_db = versions[first]
    print "Getting schema for %s." % first
    schema = get_schema(first_db)
    require_map_entries([column_colours,
                         column_subcolours,
                         index_colours,
                         index_subcolours], schema.keys())
    # turn fields into lists connecting Bugzilla version to value
    for table in schema.keys():
        pair_up_table_entries(first, schema, table)
    for bz_name in version_order[(version_order.index(first)+1) : (version_order.index(last)+1)]:
        db_name = versions[bz_name]
        print "Getting schema for %s." % bz_name
        new_schema = get_schema(db_name)
        require_map_entries([column_colours,
                             column_subcolours,
                             index_colours,
                             index_subcolours], new_schema.keys())
        # Find schema items which are not in the new_schema, and mark them
        # as deleted.
        added_remark = 'Added in %s' % bz_name
        for table in schema.keys():
            if new_schema.has_key(table):
                (oldc, oldi) = schema[table]
                (newc, newi) = new_schema[table]
                # Find columns which have changed or been been deleted
                for c in oldc.keys():
                    if newc.has_key(c):
                        for k in ['Type', 'Default', 'Properties']:
                            oldvalue = oldc[c][k][-1][1]
                            newvalue = newc[c][k]
                            if oldvalue != newvalue:
                                # column changed
                                if not column_subcolours[table].has_key(c):
                                    column_subcolours[table][c] = {}
                                column_subcolours[table][c][k] = blue
                                oldc[c][k].append((bz_name, newvalue))
                    else:
                        # column dropped
                        if not column_colours[table].get(c,'') == red:
                            # column newly dropped
                            if (not schema_remarks.column_removed_remarks.has_key(table) or
                                not schema_remarks.column_removed_remarks[table].has_key(c)):
                                raise error, "No remark for removing %s from %s." % (c, table)
                            note = schema_remarks.column_removed_remarks[table][c]
                            if note:
                                note = (' <b>Removed in %s (%s).</b>\n'
                                        % (bz_name, note))
                            else:
                                note = (' <b>Removed in %s.</b>\n'
                                        % bz_name)
                            oldc[c]['Remarks'].append(note)
                            column_colours[table][c] = red
                for i in oldi.keys():
                    if newi.has_key(i):
                        for k in ['Fields', 'Unique?']:
                            oldvalue = oldi[i][k][-1][1]
                            newvalue = newi[i][k]
                            if oldvalue != newvalue:
                                # index changed
                                if not index_subcolours[table].has_key(i):
                                    index_subcolours[table][i] = {}
                                index_subcolours[table][i][k] = blue
                                oldi[i][k].append((bz_name, newvalue))
                    else:
                        # index dropped
                        if not index_colours[table].get(i,'') == red:
                            # index newly dropped
                            if (not schema_remarks.index_removed_remarks.has_key(table) or
                                not schema_remarks.index_removed_remarks[table].has_key(i)):
                                raise error, "No remark for removing index %s from %s." % (i, table)
                            note = schema_remarks.index_removed_remarks[table][i]
                            if note:
                                note = (' <b>Removed in %s (%s).</b>\n'
                                        % (bz_name, note))
                            else:
                                note = (' <b>Removed in %s.</b>\n'
                                        % bz_name)
                            oldi[i]['Remarks'].append(note)
                            index_colours[table][i] = red
            elif table_colours.get(table,'') != red:
                # Find tables which have been deleted.
                table_colours[table] = red
                note = schema_remarks.table_removed_remarks[table]
                if note:
                    note = ('<b>Removed in %s (%s).</b>\n'
                            % (bz_name, note))
                else:
                    note = ('<b>Removed in %s.</b>\n'
                            % bz_name)
                remark = table_remarks[table]
                if remark is None:
                    remark = ''
                table_remarks[table] = remark + note
        # Find new_schema items which are not in the schema, add them,
        # and mark them as new.
        for table in new_schema.keys():
            if schema.has_key(table):
                (oldc, oldi) = schema[table]
                (newc, newi) = new_schema[table]
                for c in newc.keys():
                    if not oldc.has_key(c):
                        # column added
                        if (not schema_remarks.column_added_remarks.has_key(table) or
                            not schema_remarks.column_added_remarks[table].has_key(c)):
                            raise error, "No remark for adding %s to %s." % (c, table)
                        note = schema_remarks.column_added_remarks[table][c]
                        if note:
                            note = (' <b>Added in %s (%s).</b>\n'
                                    % (bz_name, note))
                        else:
                            note = (' <b>Added in %s.</b>\n'
                                    % bz_name)
                        oldc[c] = newc[c]
                        oldc[c]['Remarks'].append(note)
                        pair_up_column_entries(bz_name, oldc[c])
                        column_colours[table][c] = green
                for i in newi.keys():
                    if not oldi.has_key(i):
                        # index added
                        if (not schema_remarks.index_added_remarks.has_key(table) or
                            not schema_remarks.index_added_remarks[table].has_key(i)):
                            raise error, "No remark for adding index %s to %s." % (i, table)
                        note = schema_remarks.index_added_remarks[table][i]
                        if note:
                            note = (' <b>Added in %s (%s).</b>\n'
                                    % (bz_name, note))
                        else:
                            note = (' <b>Added in %s.</b>\n'
                                    % bz_name)
                        oldi[i] = newi[i]
                        oldi[i]['Remarks'].append(note)
                        pair_up_index_entries(bz_name, oldi[i])
                        index_colours[table][i] = green
            else:
                # New table
                schema[table] = new_schema[table]
                pair_up_table_entries(bz_name, schema, table)
                table_colours[table] = green
                note = schema_remarks.table_added_remarks[table]
                if note:
                    note = ('<b>Added in %s (%s).</b>\n'
                            % (bz_name, note))
                else:
                    note = ('<b>Added in %s.</b>\n'
                            % bz_name)
                remark = table_remarks[table]
                if remark is None:
                    remark = ''
                table_remarks[table] = remark + note
    stringify_schema(schema)
    return (schema, table_remarks, table_colours,
            column_colours, column_subcolours,
            index_colours, index_subcolours)

def strip_p4_id(id):
    if id[:5] == '$Id: ':
        id = id[5:]
    if id[-2:] == ' $':
        id = id[:-2]
    return id

# Write the versioned schema document, including prelude and
# afterword, to a named file.  This is the function we call to
# generate our Bugzilla schema doc.  Note that although it will
# generate schema diffs for various version ranges, the prelude and
# afterword it adds are specific to certain Bugzilla versions.

def write_versioned_tables(first, last, filename):
    (schema, tr, tc, cc, csc, ic, isc) = get_versioned_tables(first, last)
    html = output_schema(schema, tr, tc, cc, csc, ic, isc)
    f = open(filename, 'w')
    f.write(schema_remarks.prelude)
    f.write(string.join(html,''))
    id = strip_p4_id('$Id: //info.ravenbrook.com/project/p4dti/version/2.1/design/bugzilla-schema/make_schema_doc.py#1 $')
    remarks_id = strip_p4_id(schema_remarks.remarks_id)
    f.write(schema_remarks.afterword(('Generated by %s<br/>\n'
                                      'Using remarks from %s' )%
                                     (id, remarks_id)))
    f.close()

# A. REFERENCES
#
#
# B. DOCUMENT HISTORY
#
# 2001-03-08 NB Created.
#
#
# C. COPYRIGHT AND LICENSE
#
# This file is copyright (c) 2001 Perforce Software, Inc.  All rights
# reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1.  Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#
# 2.  Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in
#     the documentation and/or other materials provided with the
#     distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.
#
#
# $Id: //info.ravenbrook.com/project/p4dti/version/2.1/design/bugzilla-schema/make_schema_doc.py#1 $
