# Copyright (C) 2013-2015, 2018 The Meme Factory, Inc.  http://www.meme.com/
#
#    This file is part of Gombe-MI.
#
#    Gombe-MI is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with Gombe-MI.  If not, see <http://www.gnu.org/licenses/>.
#
# Karl O. Pinc <kop@meme.com>
#

# There are main objects, and their subclasses, here:
#   LoadedForm
#   DBHandler  (generally referred to a an "upload handler", at present)
#   DBConnector (UploadEngine)
#
# See their documentation below.

# Write python 3 compatible code.
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import division

from csv import reader as csv_reader
from cgi import escape as cgi_escape
import collections
import ast
import markupsafe
import hashlib
import io

# We are not really using wtforms.  We use it to (barely)
# interact with the html and post request but really
# we define our own classes to handle working memory
# and interacting with the session.
from wtforms import (
    Form,
    BooleanField,
    StringField,
    RadioField,
    PasswordField,
    FileField)

import psycopg2

from gmi_pyramid.lib.form_constants import *


# Setup default values for forms.

class UserInitialPost():
    db          = LIVE_DB
    user        = ''
    password    = ''

class AuthInitialPost():
    db          = '' 
    user        = ''
    password    = ''

class UploadFileInitialPost(AuthInitialPost):
    upload_fmt  = CSV
    datafile    = ''
    trim_upload = True

class UploadNullFileInitialPost(UploadFileInitialPost):
    upload_null = True
    null_rep    = ''

class UploadTableInitialPost(UploadNullFileInitialPost):
    table       = ''

# The wtforms that suck data out of the html.

class UserWTForm(Form):
    '''The wtform used to connect to the "gombemi" db to authenticate .'''
    # We don't actually use the labels, wanting the template to
    # look (and render) like html, but I'll define them anyway
    # just to keep my hand in.
    user         = StringField('User:')
    password     = PasswordField('Password:')

    ivals        = UserInitialPost
        

class AuthWTForm(UserWTForm):
    '''The wtform used to connect to any db and authenticate.'''
    # We don't actually use the labels, wanting the template to
    # look (and render) like html, but I'll define them anyway
    # just to keep my hand in.
    db           = StringField('Database:')

    ivals        = AuthInitialPost
        

class UploadFileWTForm(AuthWTForm):
    '''The wtform used for uploading files.'''
    # We don't actually use the labels, wanting the template to
    # look (and render) like html, but I'll define them anyway
    # just to keep my hand in.
    upload_fmt   = RadioField('Upload Format:',
                              choices=[('Upload CSV Data:', CSV),
                                       ('Upload tab delimited Data:', TAB)])
    datafile     = FileField('File with CSV or Tab delimited Data:')
    trim_upload  = BooleanField('Trim Leading/Trailing Spaces:')

    ivals        = UploadFileInitialPost


class UploadNullFileWTForm(UploadFileWTForm):
    '''The wtform used for uploading files that may contain NULL.'''
    # We don't actually use the labels, wanting the template to
    # look (and render) like html, but I'll define them anyway
    # just to keep my hand in.
    upload_null  = BooleanField('Upload NULL Values:')
    null_rep     = StringField('NULL Representation:')

    ivals        = UploadNullFileInitialPost


class UploadTableWTForm(UploadNullFileWTForm):
    '''The wtform used for uploading arbitrary data into tables.'''
    table        = StringField('Table or View:')

    ivals        = UploadTableInitialPost


class LoadedForm(collections.MutableMapping):
    '''
    Abstract class representing an upload form.

    Responsible for getting information into and out of
    html forms.

    The user API is that it acts like a dict, but with extra methods.

    Attributes:
      uh      The UploadHandler instance using the form

      _store  Where the real dict is kept
      _form   Instantaiated html form object (WTForms)
      _fc     Class handling html form
    '''
    def __init__(self, uh, fc=None, data={}, **kwargs):
        super(LoadedForm, self).__init__()
        self.uh = uh
        if data == {}:
            store = dict(kwargs)
        else:
            store = dict(data)
            store.update(kwargs)
        self._store = store
        self._fc = fc

    def __iter__(self):                return next(self._store)
    def __len__(self):                 return len(self._store)
    def __getitem__(self, key):        return self._store[key]
    def __setitem__(self, key, value): self._store[key] = value
    def __delitem__(self, key):        del self._store[key]

    def read(self):
        '''
        In the children this loads form from pyramid self.uh.request
        object and self._form and the session.

        In this case we instantiate _form and give it some defaults
        '''
        self._form = self._fc(self.uh.request.POST, self._fc.ivals)

    def write(self, response, errors):
        '''
        Produces the dict pyramid will use to render the form.

        Input:
          response    Dict of results from connection execution
          errors      List of errors from connection execution
        '''
        response['errors'] = errors
        return response


class CredsLoadedForm(LoadedForm):
    '''
    Acts like a dict, but with extra methods.
    Manages credentials (but not db) needed to authenticate.

    Attributes:
      uh      The UploadHandler instance using the form
      user    The username used to login
      _form   Instantaiated html form object (WXForms)

    Methods:
      read()  Load form from pyramid request object.
    '''
    def __init__(self, uh, fc=UserWTForm, data={}, **kwargs):
        data.update(kwargs)
        super(CredsLoadedForm, self).__init__(uh, fc, data)

    def session_put(self, key, value):
        '''
        Put data into the session.

        Input:
          key    The key
          value  The value
          
        Returns:

        Side effects:
          Modifies session

        May be overridden by a subclass to keep data out
        of the session.
        '''
        self.uh.session[key] = value
        

    def read(self):
        '''
        Read form data from the client
        '''

        # Read parent's data
        super(CredsLoadedForm, self).read()

        # Read our form data

        # Keep password and user in the session.  All the other
        # form varaibles must be re-posted.
        post = self.uh.request.POST
        session = self.uh.request.session

        # Defaults are now in place in self._form for password
        # and user.  Ignore these since we want to know whether
        # to go to the session for data values.
        if 'password' in post:
            self['password'] = post['password']
            self.session_put('password', self['password'])
        elif 'password' in session:
            self['password'] = session['password']

        if 'user' in post:
            self['user'] = post['user']
            self.session_put('user', self['user'])
        elif 'user' in session:
            self['user'] = session['user']
        else:
            self['user'] = ''

        # Other, hidden, POST variables
        if 'action' in post:
            self['action'] = post['action']
        else:
            self['action'] = ''


    def write(self, result, errors):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        response = super(CredsLoadedForm, self).write(result, errors)
        if ('havecreds' not in response
            or ('havecreds' in response and not response['havecreds'])):
            # We don't know if the credentials are good or
            # we know they are bad.  Keep them out of the session.
            response['user'] = ''
            response['password'] = ''
            self.session_put('user', '')
            self.session_put('password', '')
        else:
            response['user'] = self['user']
        return response


class AuthLoadedForm(CredsLoadedForm):
    '''
    Acts like a dict, but with extra methods.
    Manages form data needed to authenticate, including db to authenticate
    in.

    Attributes:
      uh      The UploadHandler instance using the form
      user    The Usernamed used to login
      db      The db to login to
      _form   Instantaiated html form object (WXForms)

    '''
    def __init__(self, uh, fc=AuthWTForm, data={}, **kwargs):
        data.update(kwargs)
        super(AuthLoadedForm, self).__init__(uh, fc, data)

    def read(self):
        '''
        Read form data from the client
        '''

        # Read parent's data
        super(AuthLoadedForm, self).read()

        # Keep form variables handy
        self['db']               = self._form.db.data


    def write(self, result, errors):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        response = super(AuthLoadedForm, self).write(result, errors)
        response['db']   = self['db']
        return response


class UploadFileForm(AuthLoadedForm):
    '''
    Acts like a dict, but with extra methods.

    Attributes:
      uh      The UploadHandler instance using the form

    Methods:
      read()  Load form from pyramid request object.
    '''
    def __init__(self, uh, fc=UploadFileWTForm, data={}, **kwargs):
        data.update(kwargs)
        super(UploadFileForm, self).__init__(uh, fc, data)

    def read(self):
        '''
        Read form data from the client
        '''

        # Read parent's data
        super(UploadFileForm, self).read()

        # Read our own data
        self['upload_fmt']       = self._form.upload_fmt.data
        self['trim_upload']      = self._form.trim_upload.data

        # Other POST variables involving a file
        self['filename'] = ''
        self['localfh'] = ''
        if 'action' in self:
            if self._form.datafile.data != '':
                post = self.uh.request.POST
                if hasattr(post['datafile'], 'filename'):
                    self['filename']  = post['datafile'].filename
                if hasattr(post['datafile'], 'file'):
                    self['localfh']   = post['datafile'].file


    def write(self, result, errors):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        if self['upload_fmt'] == CSV:
            csv_checked = CHECKED
            tab_checked = UNCHECKED
        else:
            tab_checked = CHECKED
            csv_checked = UNCHECKED

        if self['trim_upload']:
            trim_upload_checked = CHECKED
        else:
            trim_upload_checked = UNCHECKED

        response = super(UploadFileForm, self).write(result, errors)
        response['filename']    = self['filename']
        response['trim_upload'] = trim_upload_checked
        response['csv_value']   = CSV_VALUE
        response['tab_value']   = TAB_VALUE
        response['csv_checked'] = csv_checked
        response['tab_checked'] = tab_checked
        return response


class UploadDoubleFileForm(UploadFileForm):
    '''
    Acts like a dict, but with extra methods.

    Attributes:
      uh      The UploadHandler instance using the form

    Methods:
      read()  Load form from pyramid request object.
    '''
    def __init__(self, uh, fc=UploadFileWTForm, data={}, **kwargs):
        data.update(kwargs)
        super(UploadDoubleFileForm, self).__init__(uh, fc, data)

    def read(self):
        '''
        Read form data from the client
        '''
        # Read parent's data
        super(UploadDoubleFileForm, self).read()

        # Read our own data
        post = self.uh.request.POST
        if 'last_key' in post:
            self['last_key']      = post['last_key']
        else:
            self['last_key']      = ''

    def write(self, result, errors):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        response = super(UploadDoubleFileForm, self).write(result, errors)
        response['last_key']   = self['last_key']
        return response


class UploadNullFileForm(UploadFileForm):
    '''
    Acts like a dict, but with extra methods.

    Attributes:
      uh      The UploadHandler instance using the form

    Methods:
      read()  Load form from pyramid request object.
    '''
    def __init__(self, uh, fc=UploadNullFileWTForm, data={}, **kwargs):
        data.update(kwargs)
        super(UploadNullFileForm, self).__init__(uh, fc, data)

    def read(self):
        '''
        Read form data from the client
        '''

        # Read parent's data
        super(UploadNullFileForm, self).read()

        # Read our own data
        self['upload_null']      = self._form.upload_null.data
        self['null_rep']         = self._form.null_rep.data


    def write(self, result, errors):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        if self['upload_null']:
            upload_null_checked = CHECKED
        else:
            upload_null_checked = UNCHECKED

        response = super(UploadNullFileForm, self).write(result, errors)
        response['upload_null']   = upload_null_checked
        response['null_rep']      = self['null_rep']
        return response


class UploadTableForm(UploadNullFileForm):
    '''
    Acts like a dict, but with extra methods.

    Attributes:
      uh      The UploadHandler instance using the form

    Methods:
      read()  Load form from pyramid request object.
    '''
    def __init__(self, uh, fc=UploadTableWTForm, data={}, **kwargs):
        data.update(kwargs)
        super(UploadTableForm, self).__init__(uh, fc, data)

    def read(self):
        '''
        Read form data from the client
        '''

        # Read parent's data
        super(UploadTableForm, self).read()

        # Read our own data
        self['table']      = self._form.table.data


    def write(self, result, errors):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        response = super(UploadTableForm, self).write(result, errors)
        response['table']   = self['table']
        return response


class UploadDoubleTableForm(UploadTableForm):
    '''
    Acts like a dict, but with extra methods.

    Attributes:
      uh      The UploadHandler instance using the form

    Methods:
      read()  Load form from pyramid request object.
    '''
    def __init__(self, uh, fc=UploadTableWTForm, data={}, **kwargs):
        data.update(kwargs)
        super(UploadDoubleTableForm, self).__init__(uh, fc, data)

    def read(self):
        '''
        Read form data from the client
        '''
        # Read parent's data
        super(UploadDoubleTableForm, self).read()

        # Read our own data
        post = self.uh.request.POST
        if 'last_key' in post:
            self['last_key']      = post['last_key']
        else:
            self['last_key']      = ''

    def write(self, result, errors):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        response = super(UploadDoubleTableForm, self).write(result, errors)
        response['last_key']   = self['last_key']
        return response


# Utility functions

def textualize(st):
    '''
    Return pg representation of NULL for None when string st is None.
    '''
    return 'NULL' if st == None else st

def is_checked(val):
    '''Is the value something a html input entity recognizes as checked?'''
    return val == CHECKED

# Some functions for logging

def escape_eol(string):
    '''Change all the newlines to \n.'''
    return string.replace('\n', r'\n')

def format_exception(ex):
    '''Return an exception formatted as suffix text for a log message.'''
    if isinstance(ex, psycopg2.DatabaseError):
        diag = ex.diag
        msg = diag.message_primary
        if hasattr(diag, 'message_detail'):
            msg += ', detail={0}'.format(escape_eol(diag.message_detail))
        if hasattr(diag, 'message_hint'):
            msg += ', hint={0}'.format(escape_eol(diag.message_hint))
    elif isinstance(ex, UploadError):
        msg = ex.e
        if ex.descr != '':
            msg += ' {0}'.format(escape_eol(ex.descr))
        if ex.detail != '':
            msg += ' {0}'.format(escape_eol(ex.detail))
    else:
        msg = ''
    if msg != '':
        msg = ': Error is ({0})'.format(msg)
    return msg

# Error handling

class UploadError(Exception):
    '''
    Module exceptions are derived from this class.

    lineno Line number to which error pertains, if any
    e      The error message
    descr  More description of the error
    detail Extra HTML describing the error
    data   Line of data causing problem, if any

    UploadError
      * Error
        *  NoHeadersError
        *  NoDataError
        *  DBError
          * DBCommitError
          * DBDataLineError
      * DataLineError
        *  TooManyColsError
    '''
    def __init__(self, e, lineno = '', descr='', detail='', data = ''):
        super(UploadError, self).__init__()
        self.lineno = lineno
        self.e = e
        self.descr = descr
        self.detail = detail
        self.data = data

    def __str__(self):
        out = 'error ({0})'.format(self.e)
        if self.lineno != '':
            out = '{0}: lineno ({1})'.format(out, self.lineno)
        if self.descr != '':
            out = '{0}: descr ({1})'.format(out, self.descr)
        if self.detail != '':
            out = '{0}: detail ({1})'.format(out, self.detail)
        if self.data != '':
            out = '{0}: data ({1})'.format(out, self.data)
        return out
    
class Error(UploadError):
    '''
    Module exceptions rasied while setting up to read data lines
    are derived from this class.

    e      The error message
    descr  More description of the error
    detail Extra HTML describing the error
    '''
    def __init__(self, e, descr='', detail=''):
        super(Error, self).__init__(e=e, descr=descr, detail=detail)

class NoFileError(Error):
    '''No file uploaded'''
    def __init__(self, e, descr='', detail=''):
        super(NoFileError, self).__init__(e, descr, detail)

class NoDBError(Error):
    '''No database name given'''
    def __init__(self, e, descr='', detail=''):
        super(NoDBError, self).__init__(e, descr, detail)

class NoUserError(Error):
    '''No user name supplied'''
    def __init__(self, e, descr='', detail=''):
        super(NoUserError, self).__init__(e, descr, detail)

class AuthFailError(Error):
    '''Unable to connect to the db'''
    def __init__(self, e, descr='', detail=''):
        super(AuthFailError, self).__init__(e, descr, detail)

class DryRunError(Error):
    '''Rollback due to dry_run config option'''
    def __init__(self, e, descr='', detail=''):
        super(DryRunError, self).__init__(e, descr, detail)

class CSRFError(Error):
    '''Invalid CSRF token'''
    def __init__(self, e, descr='', detail=''):
        super(CSRFError, self).__init__(e, descr, detail)

class NoHeadersError(Error):
    '''No column headings found'''
    def __init__(self, e, descr='', detail=''):
        super(NoHeadersError, self).__init__(e, descr, detail)

class NoDataError(Error):
    '''No data uploaded'''
    def __init__(self, e, descr='', detail=''):
        super(NoDataError, self).__init__(e, descr, detail)

class DuplicateUploadError(Error):
    '''The same filename updated twice into the same db'''
    def __init__(self, e, descr='', detail=''):
        super(DuplicateUploadError, self).__init__(e, descr, detail)

class DataInconsistencyError(Error):
    def __init__(self, e, descr='', detail=''):
        super(DataInconsistencyError, self).__init__(e, descr, detail)

class DBError(Error):
    '''psycopg2 raised an error'''
    def __init__(self, pgexc, e='process your request'):
        '''
        pgexc  The psycopg2 exception object
        e      Description of what PG was doing
        '''
        super(DBError, self).__init__(
            'PostgreSQL is unable to ' + e + ':',
            'It reports:',
            self.html_blockquote(pgexc))

    def html_blockquote(self, ex):
        '''
        Produce an html formatted message from a psycopg2 DatabaseError
        exception.
        '''
        primary = cgi_escape(ex.diag.message_primary)

        if ex.diag.message_detail == None:
            detail = ''
        else:
            detail = '<br />DETAIL: ' + cgi_escape(ex.diag.message_detail)

        if ex.diag.message_hint == None:
            hint = ''
        else:
            hint = '<br />HINT: ' + cgi_escape(ex.diag.message_hint)

        return '<blockquote><p>{0}: {1}{2}{3}</p></blockquote>'.format(
            ex.diag.severity,
            primary,
            detail,
            hint)

class DBCommitError(DBError):
    def __init__(self, pgexc):
        super(DBCommitError, self).__init__(pgexc)

class DBDataLineError(DBError):
    '''Database generated an error while the processor was running.'''
        
    def __init__(self, udl, pgexc):
        '''
        udl    An UploadDataLine instance
        pgexc  The psycopg2 exception object
        '''
        super(DBDataLineError, self).__init__(pgexc)
        self.lineno = udl.lineno
        self.data = udl.raw


class DataLineError(UploadError):
    '''
    Module exceptions rasied while line-by-line processing the uploaded
    data are derived from this class.

    lineno The line number
    e      The error message
    descr  More description of the error
    detail Extra HTML describing the error
    data   The uploaded data
    '''
    def __init__(self, lineno, e, descr='', detail='', data=''):
        super(DataLineError, self).__init__(e, lineno, descr, detail, data)

class TooManyColsError(DataLineError):
    def __init__(self, lineno, e, descr='', detail='', data=''):
        super(TooManyColsError, self).__init__(lineno, e, descr, detail, data)


# Upload processing

class SQLCommand(object):
    '''
    An SQL command that returns nothing

    Attributes:
      stmt  The statement, formatted for psycopg2 substitution
      args  Tuple of arguments used to substitute when executed.
    '''
    def __init__(self, stmt, args, ec=None):
        '''
        stmt   The statement, formatted for psycopg2 substitution
        args   Tuple of arguments used to substitute when executed.
        ec(ex) Produces the exception to raise an instance of on failure
                Input:
                  ex  The exception raised by psycopg2
        '''
        super(SQLCommand, self).__init__()
        self.stmt = stmt
        self.args = args
        self.ec = ec


    def execute(self, cur):
        '''
        Execute the sql statement.
        
        Input:
          cur  A psycopg2 cursor

        Side effects:
          Does something in the db.
          Can raise a psycopg2 error
        '''
        try:
            cur.execute(self.stmt, self.args)
        except psycopg2.DatabaseError as ex:
            if self.ec == None:
                raise ex
            else:
                raise self.ec(ex)


class LogSQLCommand(SQLCommand):
    '''An SQL command that logs success or failure.'''
    def __init__(self, stmt, args, ec=None,
                 log_success=None, log_failure=None):
        '''
        stmt  The statement, formatted for psycopg2 substitution
        args  Tuple of arguments used to substitute when executed.
        ec(ex) Produces the exception to raise an instance of on failure
                Input:
                  ex  The exception raised by psycopg2
        '''
        super(LogSQLCommand, self).__init__(stmt, args, ec)
        self.log_success = log_success
        self.log_failure = log_failure

    def execute(self, cur):
        '''
        Execute the sql statement.
        
        Input:
          cur  A psycopg2 cursor

        Side effects:
          Does something in the db.
          Can raise a psycopg2 error
        '''
        try:
            super(LogSQLCommand, self).execute(cur)
        except (UploadError, psycopg2.DatabaseError) as ex:
            if self.log_failure:
                self.log_failure(ex)
            raise
        else:
            if self.log_success:
                self.log_success()


class UploadLine(object):
    '''
    Representation of a generic uploaded line

    Responsbile for getting data out of uploaded lines.

    Once out, the following attributes provide the user API:
      raw    The "raw" line as a string
      tuples A sequence, the line split into columns
    '''
    def __init__(self, line, stol, mapper):
        '''
        line        Uploaded line
        lineno      Line number
        stol(st)    Parse string st and convert into list
        mapper(st)  Maps st onto desired python representation
                    (Used for trimming whitespace)
        '''
        super(UploadLine, self).__init__()
        self.raw = line
        self.tuples = [mapper(st) for st in stol(line)]

def doublequote(st):
    '''
    Put string in double quotes escaping according to sql's
    requirements for an identifier.
    
    Note that this does not work for unicode identifiers.
    '''
    out = '"'
    for ch in st:
        if ch == '"':
            out += '""'
        else:
            out += ch
    return out + '"'

class UploadHeaders(UploadLine):
    '''Uploaded heading line

    Derived from UploadLine

    raw    The "raw" line as a string
    tuples As a sequence
    sql    As a sql command fragment string
    '''

    def __init__(self, line, stol, mapper):

        if mapper(line) == '':
            raise NoHeadersError('No column headings found on first line',
                                 'The first line is ({0})'.format(line))

        super(UploadHeaders, self).__init__(line, stol, mapper)
        self.sql = ', '.join(['"' + doublequote(st) + '"'
                              for st in self.tuples])


class UploadDataLine(UploadLine):
    '''Uploaded data line

    Derived from UploadLine

    raw    The "raw" line as a string
    tuples As a sequence
    lineno The line number
    '''
    def __init__(self, line, lineno, stol, mapper):
        super(UploadDataLine, self).__init__(line, stol, mapper)
        self.lineno = lineno


class DBData(object):
    '''Data that is to modify the db.
  
    It is iterable, consisting of thunks which make an UploadedLine instance.

    lineno     Number of lines processed (including header line)

    '''
    def __init__(self):
        self.lineno = 0

    def __iter__(self):
        return self

    def __next__(self):
        '''
        Iterator to return a thunk which, when called, delivers the
        next object to be loaded into the db.'''
        self.lineno += 1
        return self._thunk()

    def _thunk():
        '''
        Return a thunk which, when called, delivers the next
        object to go into the db.
        '''
        raise NotImpementedError()


class SQLData(DBData):
    '''
    SQL statements returning no data that execute in the db.

    Attributes:
      stmts  List of SQLCommand instances
    '''
    def __init__(self, stmts):
        '''
        stmts  List of SQLCommand instances
        '''
        super(SQLData,self).__init__()
        self.stmts = stmts

        def gen(stmts):
            for stmt in stmts:
                yield stmt

        self.stmt_gen = gen(stmts)

    def _thunk(self):
        stmt = next(self.stmt_gen)
        return lambda: stmt


class UploadData(DBData):
    '''Uploaded data file
  
    It is iterable, consisting of thunks which make an UploadedLine instance.

    headers    UploadedHeaders instance
    cols       Number of columns
    lineno     Number of lines processed (including header line)

    '''
    def __init__(self, fileo, file_fmt, null_data, null_rep, trim=True):
        '''
        fileo       Uploaded file object
        file_fmt    File format: CSV or TAB
        null_data   (boolean) Uploaded data contains nulls
        null_rep    Uploaded string representation of null
        trim        (boolean) Trim leading and trailing whitespace?
        '''

        def get_headers():
            try:
                line = next(self._fileo)
            except StopIteration:
                raise NoDataError('Uploaded file contains no data')
            else:
                self.lineno += 1
                # Intuit the eol sequence
                if line[-2:] == '\r\n':
                    eol = '\r\n';
                    line = line[:-2]
                elif line[-1:] == '\n':
                    eol = '\n'
                    line = line[:-1]
                elif line[-1:] == '\r':
                    eol = '\r'
                    line = line[:-1]
                else:
                    # Give up
                    eol = ''

                if file_fmt == CSV:
                    func = lambda st: next(csv_reader((st,)))
                else:
                    func = lambda st: st.split('\t')
                return (eol, UploadHeaders(line, func, do_trim))

        super(UploadData, self).__init__()

        if trim:
            do_trim = lambda st: st.strip()
        else:
            do_trim = lambda st: st

        # Read lines as text with line endings intact.
        # newline='' required by csv.reader()
        self._fileo = io.TextIOWrapper(fileo, newline='')

        # Get column headings
        eol, self.headers = get_headers()
        neg_eollen = -len(eol)
        self.cols = len(self.headers.tuples)

        # Create parser to read raw lines into a list
        if file_fmt == CSV:
            self._parser = lambda st: self._extend(st,
                                                   next(csv_reader((st,))))
        else:    # Tab delimited format
            def tab_parser(line):
                if neg_eollen and line[neg_eollen:] == eol:
                    line = line[:neg_eollen]
                return self._extend(line, line.split('\t'))
            self._parser = tab_parser

        # Create mapper to frob data elements
        if null_data:
            null_rep = do_trim(null_rep)
            # psycopg2 maps None to NULL
            def mapper(st):
                st = do_trim(st)
                return None if st == null_rep else st
            self._mapper = mapper
        else:
            self._mapper = do_trim

    def _thunk(self):
        '''
        Return a thunk which, when called, delivers the
        UploadDataLine of the next line of the uploaded file..
        '''
        line = next(self._fileo)
        return lambda: UploadDataLine(line,
                                      self.lineno,
                                      self._parser,
                                      self._mapper)

    def _extend(self, line, seq):
        '''Give the list as many elements as there are in the header.
        
        line  line as string
        seq   line as sequence
        
        If there's too many elements, raise an error.
        '''
        if len(seq) > self.cols:
            raise TooManyColsError(self.lineno,
                                   'Line has too many columns',
                                   'More columns than column headings',
                                   data=line)
        return seq + [ '' for i in range(len(seq)+1, self.cols)]



class DataLineProcessor(object):
    '''
    A processor supplied uploaded lines (UploadDataLine instances)
    by an UploadEngine.  The lines are expected to be put into a pg db.

    Abstract class not expected to be instantiated.

    Attributes:
      ue     UploadEngine instance
      uh     UploadHandler instance
      cur    psycopg2 cursor

    Methods:
      eat(udl)  Given an UploadDataLine instance put the line in the db.
    '''
    def __init__(self, ue, uh):
        '''
        ue        UploadEngine instance
        uh        UploadHandler instance
        '''
        super(DataLineProcessor, self).__init__()
        self.ue = ue
        self.uh = uh
        self.cur = ue.cur

    def eat(udl):
        '''
        Update the db with the content of an uploaded line

        udl  An UploadDataLine instance
        '''
        raise NotImplementedError


class NoOpProcessor(DataLineProcessor):
    '''A processor that discards it's input.'''
    def __init__(self, ue, uh):
        '''
        ue             UploadEngine instance
        uh             UploadHandler instance
        cur            psycopg2 cursor
        '''
        super(NoOpProcessor, self).__init__(ue, uh)


    def eat(self, udl):
        '''
        Executes an sql command in the db.

        udl  A DataLine instance.
        '''
        pass


class ExecuteSQL(DataLineProcessor):
    def __init__(self, ue, uh):
        '''
        ue             UploadEngine instance
        uh             UploadHandler instance
        cur            psycopg2 cursor
        '''
        super(ExecuteSQL, self).__init__(ue, uh)


    def eat(self, sqlc):
        '''
        Executes an sql command in the db.

        sqlc  An SQLCommand instance (a command and it's args)
        '''
        sqlc.execute(self.cur)


class DBHandler(object):
    '''
    Abstract class of a handler that modifies the db.

    The main logic goes here, including getting the data
    into and out of the db.

    Responsible for:
      Telling the form (LoadedForm) to get user data
      Retrieval of data file or generation of other data to go in the db
        (Generally involving an UploadData instance, and hence an UploadLine
        instance.)
      Validation of user supplied data; error raising:
        The html form data
        Whatever other data is uploaded or otherwise passed in
      Pre-processing to prepare for getting data into the db
        (Pre-processing is referred to as "setup" presently.)
      Getting the individual bits of data into and out of the db
        (by way of having a factory that produces  a DataLineProcessor)
      Post-processing to wrap up after the data is in the db
        (Post-processing is referred to as "cleanup" presently.)
      Rendering the html output

    Attributes:
      request       A pyramid request instance
      uf            An UploadForm instance
      data          Something (beyond what's on the form) that goes into db
    '''
    def __init__(self, request):
        '''
        request A pyramid request instance
        '''
        super(DBHandler, self).__init__()
        self.request      = request
        self.uf  = self.make_form()

    def make_form(self):
        '''
        Return an instantiation of the upload form needed
        by the upload handler.
        '''
        raise NotImpliementedError

    def get_data(self):
        '''
        Put something that will go into the db into the 'data' attribute.
        '''
        raise NotImplimentedError

    def val_input(self):
        '''
        Validate input needed beyond that required to connect to the db.

        Note that this occurs after read() is called.

        This is expected to be replaced by it's subclass.
        '''
        return []

    def read(self):
        '''Read the html form into the upload form instance'''
        self.uf.read()

    def write(self, result, errors):
        '''
        Return dict needed to render an html form

        Input:
          errors  List of Error instances

        Returns:
        Dict pyramid will use to render the resulting form
        Reserved keys:
          errors   A list of UploadError exceptions.
        '''
        return self.uf.write(result, errors)

    def cleanup(self):
        '''
        Called after all lines are processed to do any final
        updates to the db.
        '''
        pass

    def render(self, errors, response):
        '''
        Return to pyramid what's needed to render the appropriate html
        page.

        Input:
          errors      List of Error instances
          response    Db connection result dict

        Returns:
          Dict pyramid will use to render the resulting form
        '''
        response = self.write(response, errors)
        return response


class SessionDBHandler(DBHandler):
    '''
    A DBHandler that supports sessions.

    Attributes:
      uf            An UploadForm instance
      session       A pyramid session instance
    '''
    def __init__(self, request):
        '''
        request A pyramid request instance
        '''
        super(SessionDBHandler, self).__init__(request)
        self.session = self.request.session

    def write(self, result, errors):
        '''
        Add csrf token into html form to protect sessions.

        Input:
          errors  List of Error instances

        Returns:
          Dict pyramid will use to render the resulting form
          Reserved keys:
            errors      A list of UploadError exceptions.
            csrf_token  Token for detecting CSRF.
        '''
        response = super(SessionDBHandler, self).write(result, errors)
        response['csrf_token'] = self.session.new_csrf_token()
        return response


class UploadHandler(SessionDBHandler):
    '''
    Handler for uploading a file.

    Attributes:
      request       A pyramid request instance
      uf            An UploadForm instance
      data          (optional) A DBData instance
    '''
    def __init__(self, request):
        '''
        request A pyramid request instance
        '''
        super(UploadHandler, self).__init__(request)

    def factory(self, ue):
        '''
        Takes an UploadEngine instance
        Returns a DataLineProcessor instance
        '''
        raise NotImplementedError

    def val_input(self):
        '''
        Validate input needed beyond that required to connect to the db.

        Returns:
          A list of Error instances
        '''
        uf = self.uf
        errors = super(UploadHandler, self).val_input()

        if uf['filename'] == '':
            errors.append(NoFileError('No file supplied'))
        
        return errors

    def double_validator(self, errors):
        '''Utility function that can optionally be called by
        a val_input() function.  It checks that the same file
        has not been uploaded twice in succession to the same
        db.

        `errors`
        List of errors.  Appended to.
        '''
        uf = self.uf
        if self.make_double_key() == uf['last_key']:
            errors.append(DuplicateUploadError(
                    'File just uploaded to this db',
                    ('File named ({0}) just uploaded'
                     .format(markupsafe.escape(uf['filename']))),
                    '<p>Try again to force upload.</p>'))

    def hash_sequence(self, seq):
        '''Produce a hash of the elements of a sequence.'''
        hash = hashlib.md5()
        for item in seq:
            hash.update((hash.hexdigest() + item).encode('utf-8'))
        return hash.hexdigest()

    def make_double_key(self):
        '''Generate unique value which identifies a double upload.'''
        uf = self.uf
        return self.hash_sequence((uf['db'], uf['filename']))

    def write_double_key(self, response):
        '''Utility function.  Optionally called from within write()
        to save a key which is later tested for to determine if
        the same file is being uploaded twice in a row.

        This function keys by filename and db.

        The key is stored in the form and submitted in POST.
        This way if the session times out due to inactivity
        the double upload is still detected.

        Since the data is delivered to the client and the
        client will likely see it anyway we use a hash not
        for security but just to provide a convenient single
        value that can be tested.

        Input:
          response   Dict used by pyramid to render html form.

        Side effects:
          Modifies response.  Adds 'last_key' entry used by form to store key.
        '''
        response['last_key'] = self.make_double_key()

    def write(self, result, errors):
        '''
        Add lines processed and number of errors into html form
        shown the user.

        Input:
          errors  List of Error instances

        Returns:
          Dict pyramid will use to render the resulting form
          Reserved keys:
            errors      A list of UploadError exceptions.
            csrf_token  Token for detecting CSRF.
            e_cnt      Number of errors.
            db_changed  Boolean. Whether the db was changed.
        '''
        response = super(UploadHandler, self).write(result, errors)
        if hasattr(self, 'data'):
            response['lines'] = self.data.lineno - 1
        response['e_cnt'] = len(errors)
        response['db_changed'] = (not response['errors']
                                  and self.uf['action'] != '')
        return response


class TabularFileUploadHandler(UploadHandler):
    '''
    Handler for uploading a file with rows and columns and column headings.

    Attributes:
      request       A pyramid request instance
      uf            An UploadForm instance
      data          An UploadData instance
    '''
    def __init__(self, request):
        '''
        request A pyramid request instance
        '''
        super(TabularFileUploadHandler, self).__init__(request)

    def cleanup(self):
        '''Finish after processing all lines.'''
        lines = self.ue.data.lineno
        if lines == 1:
            raise DataLineError(
                1,
                'File contains no data',
                ('No lines found after '
                 'initial line of column headings'))


class DBConnector(object):
    '''Abstract class supporting db connection and execution of a
    processor function that alters the db content.
    
    Responsible for connecting to the db, transaction management,
    and running the DataLineProcessor instances produced by the
    DBConnector's (the UploadHandler's) factory.
    Collects all errors raised during the processing.
    Telling the uploadhandler to cleanup.  This is done after
      all upload processing and before commit or rollback
      (and before the connection is closed).

    Attributes:
        uh        An UploadHandler instance.
        cur       A psycopg2 cursor instance
        db        Name of db to connect to
        user      User to connect to db
        password  Password to connect to db
        action    Upload form's hidden action variable
                  '' means not submitted, anything else means take action

    Methods:
        run()     Get a DataLineProcessor instance from the upload handler's
                  factory and feed it by iterating over data.
    '''

    def __init__(self, uh):
        '''
        uh         An UploadHandler instance
        '''
        super(DBConnector, self).__init__()

        # Configuration and response management.
        self.uh = uh

    def alter_db(self, conn):
        '''
        Alter the contents of the db.

        Input:
          conn    A database connection

        Returns:
          A list of Error instances

        Side Effects:
          Alter's db
        '''

        raise NotImplementedError


    def no_connection_response(self):
        '''
        The result dict to use when call_with_connection is not called.
        Matches with the result dict of call_with_connection

        Returns:
          Dict pyramid will use to render the resulting form.
          Reserved keys:
            havecreds   Boolean. Supplied credentials can connect to db.
        '''
        return {'havecreds' : False}

    def nodberror_factory(self):
        return NoDBError('No database name supplied')

    def nousererror_factory(self):
        return NoUserError('No user name supplied as login credentials')

    def authfailerror_factory(self):
        return AuthFailError('Unable to login',
                             'Is the database, user, and password correct?')

    def dryrunerror_factory(self):
        return DryRunError('Configured for "dry_run":'
                           ' Transaction deliberately rolled back')

    def upload_data(self, data, errors):
        '''Put a DBData object into the db.

        Input:
           data    A DBData object.
           errors  A list of errors

        Side Effects:
          Alters db content
          Add errors to `errors`.
        '''
        raise NotImplementedError

    def alter_db(self, conn):
        '''
        Alter the contents of the db.

        Input:
          conn    A database connection

        Returns:
          A list of Error instances

        Side Effects:
          Alter's db
        '''

        errors = []
        try:
            self.uh.get_data()
            self.data = self.uh.data
            self.cur = conn.cursor()
            # (Cannot call uh until after self is fully
            # initalized, including self.cur.)
            processor = self.uh.factory(self)
        except Error as ex:
            errors.append(ex)
        else:
            try:
                self.upload_data(processor, self.data, errors)
                # Let upload handler finish
                try:
                    self.uh.cleanup()
                except UploadError as ex:
                    errors.append(ex)
            finally:
                self.cur.close()
        return errors

    def call_with_connection(self, func):
        '''
        Validate input, connect to the db, and do something with
        the connection.

        func(conn)  Call this function with the connection.
                    func(conn) must return a list of Error instances

        Returns:
          (errors, response)
          errors       List of Error instances
          response     Dict pyramid will use to render the resulting form.
                       The dict returned by func(conn) plus reserved keys.
                       Reserved keys:
                         havecreds   Boolean. Supplied credentials can
                                     connect to db.
        Side effects:
          Raises errors, calls func(conn)
        '''
        errors = []
        havecreds = False
        response = {}

        uf = self.uh.uf

        if self.db == '':
            errors.append(self.nodberror_factory())

        if self.user == '':
            errors.append(self.nousererror_factory())

        newes = self.uh.val_input()
        if newes:
            if not errors:
                havecreds = True
            errors.extend(newes)

        if not errors:
            registry = self.uh.request.registry
            try:
                conn = psycopg2.connect(
                    database = self.db,
                    user     = self.user,
                    password = self.password,
                    host     = registry.settings['pg_host'],
                    port     = registry.settings['pg_port'])
            except psycopg2.OperationalError:
                errors = [self.authfailerror_factory()]
                havecreds = False
            else:
                havecreds = True
                errors = self.call_alter_db(func, conn)

        response.update({'havecreds' : havecreds
                 })
        return (errors, response)

    def read_uh(self):
        '''Read data into the upload handler.'''
        self.uh.read()

    def read_db(self):
        '''Read the db from the upload handler.'''
        self.db = self.uh.uf['db']

    def read_user(self):
        '''Read the username from the upload handler.'''
        self.user = self.uh.uf['user']

    def read_password(self):
        '''Read the password from the upload handler.'''
        if 'password' in self.uh.uf:
            self.password = self.uh.uf['password']
        else:
            self.password = ''

    def read_action(self):
        '''Read the action from the upload handler.'''
        self.action = self.uh.uf['action']

    def read(self):
        '''Allows for custom engines to map what the handler
        has to what they need.
        '''
        self.read_uh()
        self.read_db()
        self.read_user()
        self.read_password()
        self.read_action()

    def run(self):
        '''Call the alter_db function with the connection to alter the db,
        render form and return Response

        Input:

        Returns:
          (errors, response)
            errors   List of Error instantiations
            response   Dict containing connection result info

        Side effects:
          Maintains session
          (Whatever the processor() call does)
          Outputs html
        '''
        self.read()

        if self.action == 'u':
            errors, response = self.call_with_connection(self.alter_db)
        else:
            errors = []
            response = self.no_connection_response()

        return self.uh.render(errors, response)


class NoTransactionEngine(DBConnector):
    '''An engine that does not run things in a transaction.
    
    It also does not do CSRF checking.
    
    Attributes:
        uh        An UploadHandler instance.
        data      An UploadData instance of the uploaded data
        cur       A psycopg2 cursor instance
        db        Name of db to connect to
        user      User to connect to db
        password  Password to connect to db
        action    Upload form's hidden action variable
                  '' means not submitted, anything else means take action

    Methods:
        run()     Get a DataLineProcessor instance from the upload handler's
                  factory and feed it by iterating over data.
    '''
    def __init__(self, uh):
        '''
        uh         An UploadHandler instance
        '''
        super(NoTransactionEngine, self).__init__(uh)

    def call_alter_db(self, func, conn):
        '''
        Call a database modification function with a db connection,
        turning off the automatic wrapping of the activity in
        a transaction.

        func(conn)  Call this function with the connection.
                    func(conn) must return a list of Error instances

        Returns:
          errors       List of Error instances
        Side effects:
          Calls func(conn)
        '''
        conn.set_session(autocommit=True)
        errors = func(conn)
        conn.close()
        return errors

    def upload_data(self, processor, data, errors):
        '''Put a DBData object into the db, without a wrapping transaction.

        Input:
           processor A DataLineProcessor object.
           data      A DBData object.
           errors    A list of errors

        Side Effects:
          Alters db content
          Add errors to `errors`.
        '''
        for thunk in data:
            try:
                udl = thunk()
            except DataLineError as ex:
                errors.append(ex)
            else:
                try:
                    processor.eat(udl)
                except psycopg2.DatabaseError as ex:
                    errors.append(DBDataLineError(udl, ex))
                except DataLineError as ex:
                    errors.append(ex)
                except DBError as ex:
                    errors.append(ex)


class UnsafeUploadEngine(DBConnector):
    '''Abstract class supporting repeatedly running a processer to put
       uploaded data into db.
    
    Attributes:
        uh        An UploadHandler instance.
        data      An UploadData instance of the uploaded data
        cur       A psycopg2 cursor instance
        db        Name of db to connect to
        user      User to connect to db
        password  Password to connect to db
        action    Upload form's hidden action variable
                  '' means not submitted, anything else means take action

    Methods:
        run()     Get a DataLineProcessor instance from the upload handler's
                  factory and feed it by iterating over data.
        eat_old_line(udl, thunk)
                  Trap errors raised by the db while running thunk.
                  Report any errors as due to the udl UploadDataLine
                  instance.
    '''
    def __init__(self, uh):
        '''
        uh         An UploadHandler instance
        '''
        super(UnsafeUploadEngine, self).__init__(uh)

    def call_alter_db(self, func, conn):
        '''
        Call a database modification function with a connection.

        func(conn)  Call this function with the connection.
                    func(conn) must return a list of Error instances

        Returns:
          errors       List of Error instances
        Side effects:
          Calls func(conn)
        '''
        errors = func(conn)

        if errors:
            conn.rollback()
        else:
            if ast.literal_eval(self.uh.request.registry.settings['dry_run']):
                conn.rollback()
                errors.append(self.dryrunerror_factory())
            else:
                try:
                    conn.commit()
                except psycopg2.DatabaseError as ex:
                    errors.append(DBCommitError(ex))
        conn.close()
        return errors

    def eat_old_line(self, udl, thunk):
        '''
        Run thunk and report any errors raised by the db as due to
        the UploadDataLine instance udl.

        Returns:
          The result of running thunk.
        '''
        try:
            result = thunk()
        except psycopg2.DatabaseError as ex:
            raise DBDataLineError(udl, ex)
        else:
            return result

    def upload_data(self, processor, data, errors):
        '''Put a DBData object into the db.

        Input:
           processor A DataLineProcessor object.
           data      A DBData object.
           errors    A list of errors

        Side Effects:
          Alters db content
          Add errors to `errors`.
        '''
        for thunk in data:
            try:
                udl = thunk()
            except DataLineError as ex:
                errors.append(ex)
            else:
                self.cur.execute(
                    'SAVEPOINT line_savepoint;')
                try:
                    processor.eat(udl)
                except psycopg2.DatabaseError as ex:
                    self.cur.execute(
                        'ROLLBACK TO line_savepoint;')
                    errors.append(DBDataLineError(udl, ex))
                except DataLineError as ex:
                    self.cur.execute(
                        'ROLLBACK TO line_savepoint;')
                    errors.append(ex)
                except DBError as ex:
                    self.cur.execute(
                        'ROLLBACK TO line_savepoint;')
                    errors.append(ex)
                else:
                    self.cur.execute(
                        'RELEASE line_savepoint;')


class UploadEngine(UnsafeUploadEngine):
    '''Abstract class supporting repeatedly running a processor
    to alter db content.  Adds cross-site request forgery checking
    to the UnsafeUploadEngine class.
    
    Attributes:
        uh        An UploadHandler instance.
        cur       A psycopg2 cursor instance
        db        Name of db to connect to
        user      User to connect to db
        password  Password to connect to db
        action    Upload form's hidden action variable
                  '' means not submitted, anything else means take action
        csrf_token The client's csrf_token

    Methods:
        run()     Get a DataLineProcessor instance from the upload handler's
                  factory and feed it by iterating over data.
    '''

    def __init__(self, uh):
        '''
        uh         An UploadHandler instance
        '''
        super(UploadEngine, self).__init__(uh)

    def csrferror_factory(self):
        return CSRFError(
            'Your request failed and you are now logged out',
            ('This is a security measure. '
             'Some possible causes are:'),
            ('<ul>' 
             '<li>Cookies are off in the browser</li> '
             '<li>Reload of an expired page</li> '
             '  <ul>'
             '  <li>Use of the Back button to (re)submit'
             '      an old page</li>'
             '  <li>An attacker has crafted a page which resubmits'
             '      one of your old page submissions</li>'
             '  </ul> '
             '<li>Server restart</li> '
             '<li>Login session expired due to inactivity</li>'
             '</ul>'))

    def read_csrf_token(self):
        '''Save the csrf token.'''
        post = self.uh.request.POST
        if 'csrf_token' in post:
            self.csrf_token = post['csrf_token']
        else:
            self.csrf_token = None

    def read(self):
        '''Add a csrf_token.'''
        super(UploadEngine, self).read()
        self.read_csrf_token()

    def call_with_connection(self, func):
        '''
        Connect to the db.

        func(conn)  Call this function with the connection.
                    f(conn) must return a (errors, dict) tuple result,
                        errors   list of Error instances
                        dict     other results

        Returns:
          (errors, response)
          errors     List of Error instances
          response     Dict pyramid will use to render the resulting form.
                       The dict returned by func(conn) plus reserved keys.
                       Reserved keys:
                         havecreds   Boolean. Supplied credentials can
                                     connect to db.
                         session_expired  Boolean.  Session expired.
        Side effects:
          Raises errors, calls func(conn)
        '''
        session = self.uh.session

        token = session.get_csrf_token()
        if token != self.csrf_token:
            session.invalidate()
            response = self.no_connection_response()
            response['session_expired'] = True
            return([self.csrferror_factory()]
                   , response)
        else:
            (errors, response) = (super(UploadEngine, self)
                                  .call_with_connection(func))
            response['session_expired'] = False
            return (errors, response)
