# Copyright (C) 2017 The Meme Factory, Inc.  http://www.meme.com/
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Karl O. Pinc <kop@meme.com>

'''
Classes representing how filesystem trees should match convention.
'''

import re

from . import exceptions as ex
from . import scan_item


# Constants
SUFFIX_RE_STR = ''.join([
    r'(?:',
    scan_item.VERSION_RE_STR,
    r')?',                        # 0 or 1 version strings
    r'(?:',
    scan_item.PAREN_RE_STR,       # followed by repeated parens
    r')*',                        # (or not, because there's 0 or more)
    r'$'])                        # at the end of the string
SUFFIX_RE = re.compile(SUFFIX_RE_STR)


#
# Functions
#

#
# Strings to be matched must have "stop points" forcably inserted.
# These "stop points" cause matching of the current pattern component
# to cease.  The unmatched remainder is then matched by the rest of the
# pattern.
#
# The following functions insert (and undo the insertion of) "stop points".
def breakon_version_parens(data):
    '''
    Version numbers followed by parens end file name conventions.
    A "stop point" must come before the version number lest a preceeding
    <something> sort of pattern component match the version number.
    '''
    break_pos = SUFFIX_RE.search(data).start()
    if break_pos == len(data):
        return [data]
    else:
        return [data[0:break_pos], data[break_pos:]]


def reassemble(data, more):
    return data + ''.join(more)


#
# Other helper functions
#

def _extend_patterns(patterns, more_patterns):
    for pat in more_patterns:
        patterns.add(pat)


def _rows_of_patterns(patterns):
    return [pattern.rownum for pattern in patterns]


#
# Classes
#
class PatternLoc(object):
    '''Track where patterns match'''
    def __init__(self, rownum, pattern, pos):
        super(PatternLoc, self).__init__()
        self.rownum = rownum
        self.pattern = pattern

        # Don't let the position go past the end of the pattern.
        # This causes mis-matches in implicitly added pattern components
        # to be reported as occurring at the spot following the end of the
        # pattern.  This makes more sense to the user and filters out
        # multiple run-out-of-pattern mis-matches within the same
        # pattern.
        if pos is not None and len(pattern) < pos:
            self.pos = len(pattern)
        else:
            self.pos = pos

    def __eq__(self, other):
        return self.rownum == other.rownum and self.pos == other.pos

    def __hash__(self):
        return hash((self.rownum, self.pos))


class DecisionNode(object):
    def __init__(self, scan_item=None):
        super(DecisionNode, self).__init__()
        self.branches = {}
        self.patterns = set()
        self.scan_item = scan_item

    # Some internal functions to abstract how patterns are stored
    def attach_pattern(self, rownum, pattern, pos):
        self.patterns.add(PatternLoc(rownum, pattern, pos))

    # Pattern extension functions
    def _leaf_child(self):
        if None in self.branches:
            return self.branches[None]
        else:
            return None

    def _existing_pattern_check(self, rownum):
        leaf = self._leaf_child()
        if leaf:
            ex.express(ex.ExtraRuleWarning(
                'The rows are {0}'.format(
                    _rows_of_patterns(leaf.patterns) + [rownum])))

    def _extend_w_optional_item(self, next_item, rownum, pattern, scan_items):
        next_items = scan_items[1:]
        opt_branch = next_item.value[:]
        opt_branch.extend(next_items)
        self.extend_w_scan_items(rownum, pattern, opt_branch)
        self.extend_w_scan_items(rownum, pattern, next_items)

    def _extend_w_regular_item(self, next_item, rownum, pattern, scan_items):
        if next_item in self.branches:
            next_node = self.branches[next_item]
        else:
            self._existing_pattern_check(rownum)
            next_node = DecisionNode(next_item)
            self.branches[next_item] = next_node
        next_node.attach_pattern(rownum, pattern, next_item.pos)
        next_node.extend_w_scan_items(rownum, pattern, scan_items[1:])

    def _extend_w_next_item(self, rownum, pattern, scan_items):
        '''Extend decision tree with the next scan item'''
        next_item = scan_items[0]
        if next_item.type == 'optional':
            self._extend_w_optional_item(
                next_item, rownum, pattern, scan_items)
        else:
            self._extend_w_regular_item(
                next_item, rownum, pattern, scan_items)

    def _reuse_terminal(self, rownum, pattern):
        next_node = self.branches[None]
        next_node.attach_pattern(rownum, pattern, len(pattern))
        ex.express(ex.DuplicateRuleWarning(
            'The duplicate rows are {0}'
            .format(_rows_of_patterns(next_node.patterns))))

    def _make_new_terminal(self, rownum, pattern):
        next_node = DecisionNode(None)
        self.branches[None] = next_node
        next_node.attach_pattern(rownum, pattern, len(pattern))
        if len(self.patterns) > 1:
            ex.express(ex.ExtraRuleWarning(
                'The rows are {0}'
                .format(_rows_of_patterns(next_node.patterns))))

    def _terminate_scan(self, rownum, pattern):
        '''Terminate decision tree'''
        if None in self.branches:
            self._reuse_terminal(rownum, pattern)
        else:
            self._make_new_terminal(rownum, pattern)

    def extend_w_scan_items(self, rownum, pattern, scan_items):
        '''Extend the decision tree using scan_items'''
        if scan_items:
            self._extend_w_next_item(rownum, pattern, scan_items)
        else:
            self._terminate_scan(rownum, pattern)

    def match_branches(self, remainder, report_items, more):
        '''Merge results of calling invalid() on branch nodes'''
        ret_len = len(remainder) + sum(len(data) for data in more)
        ret_pats = None
        for branch in self.branches.values():
            (unmatched, unpats, child_report_items) = branch.invalid(
                remainder, more)
            if unmatched == '':
                # Don't bother to keep looking once there's a match
                child_report_items.extend(report_items)  # (in reverse order)
                return (unmatched, unpats, child_report_items)
            elif unmatched is True:
                ret_data = unmatched
                if ret_len == 0 and ret_pats is not None:
                    # Duplicate "end of pattern" mismatch
                    _extend_patterns(ret_pats, unpats)
                else:
                    ret_len = 0
                    ret_pats = unpats
            else:
                br_len = len(unmatched)
                if br_len < ret_len:
                    ret_data = unmatched
                    ret_len = br_len
                    ret_pats = unpats
                elif br_len == ret_len:
                    # Duplicate match.  Probably means duplicate rule...
                    if ret_pats is None:
                        ret_data = unmatched
                        ret_pats = unpats
                    else:
                        _extend_patterns(ret_pats, unpats)
        return (ret_data, ret_pats, [])

    def invalid(self, data, more):
        '''Return pattern(s) that "fail the least".

        This means the pattern that matches the most characters
        in the input.  All characters in valid input are matched
        by some pattern.

        The original file name input is broken into a sequence of strings.
        Matching of an individual decision tree node ends with the end
        of an individual string.  The remaining strings are then
        matched against the remaining pattern components.

        Input:
        data  File name portion to match against this decision tree branch.
        more  Sequence of additional pieces of the file name.

        Returns:  (unmatched, patlocs, report_items)
           unmatched  That portion of the data which did not match,
                      or True when all the data matched but no
                      convention was exhausted.  The empty string ('')
                      when the data matches one or more patterns.
           patlocs    Iterable (really, a set) of the PatternLoc
                      instances which match "the most".
           report_items List of "things" (at the moment unusual
                        parentheticals) to be reported when
                        the data matches, or the empty list when
                        the data does not exhaustively match any pattern.
        '''
        if self.scan_item is None:
            # End of decision tree.
            return (reassemble(data, more), self.patterns, [])
        elif data == '':
            if len(more) == 0:
                return (True, self.patterns, [])
            else:
                return self.invalid(more[0], more[1:])
        else:
            (match_len, report_items) = self.scan_item.matches(data)
            if match_len:
                report_items.reverse()    # Keep in reported order
                return self.match_branches(
                    data[match_len:], report_items, more)
            else:
                return (reassemble(data, more), self.patterns, [])


class DecisionTree(DecisionNode):
    '''File system object names test against a decision tree.'''

    def __init__(self, fsdir):
        super(DecisionTree, self).__init__()
        self.fsdir = fsdir

    def extend(self, rownum, pattern, scan):
        '''Extend the decision tree with with the scanned pattern'''
        self.attach_pattern(rownum, pattern, None)
        super(DecisionTree, self).extend_w_scan_items(
            rownum, pattern,
            scan_item.to_scan_items(scan))

    def invalid(self, data):
        data_parts = breakon_version_parens(data)
        (unmatched, patlocs, report_items) = self.match_branches(
            data_parts[0], [], data_parts[1:])
        report_items.reverse()   # Restore reported order
        return (unmatched, patlocs, report_items)


class FSDir(object):
    '''The properties of a directory as specified by the rule file.'''

    def __init__(self, tree_class, name_pattern, parent=None, rownum=None):
        self.name_pattern = name_pattern
        self.parent = parent
        self.rownum = rownum
        self.children = {}
        self.decision_tree = tree_class(self)

    def set_root(self):
        self.parent = self

    def is_root(self):
        return self.parent == self

    def add_child(self, rownum, child):
        self.children[rownum] = child

    def add_content(self, tree, parser, rownum, pattern):
        try:
            scan = parser.parse(pattern)
        except ex.LexError as err:
            raise ex.ScanError(rownum, err)
        except ex.YaccError as err:
            raise ex.ParseError(rownum, err)
        except ex.YaccEOFError as err:
            raise ex.ParseEOFError(rownum, pattern)

        tree.extend(rownum, pattern, scan)

    def invalid(self, data):
        return self.decision_tree.invalid(data)
