#!/usr/bin/python3
# Karl O. Pinc <kop@karlpinc.com>

'''Write a configuration value from a YAML or JSON file to standard output

Syntax: getcfg [--config path] [--section sectionname]
               [--default value] [identifier]

For more, and the latest docs, see the output of "getcfg -h".
See parse_args() for argument descriptions, and EPILOG, below.

There is also the similar project shyaml, but it is not packaged for RH or
EPEL and we are not using virtualenvs.  And, it is a more general YAML
reader and so not necessarly the most clear thing to use.
'''

import argparse
import datetime
import logging
import math
import os
import psutil
import sys
from psoft2ldap2 import conflib


# Path to /etc directory
ETC = f'{os.sep}etc'

# Suffix of config files
CONF_SUFFIX = '.conf'

# Delimiter used to separate keys within an identifier
IDENT_DELIM = ':'


# Helper functions for fstrings

def build_default_confpath(parent_command):
    return f'{os.path.join(ETC, parent_command)}{CONF_SUFFIX}'


def identifier_of(args):
    identifier = args['identifier']
    if identifier is None:
        return ''
    else:
        return f'identifer ({identifier}) of '


DESCRIPTION = (
    '''Write to standard output a configuration value read from a YAML or
JSON file.

A configuration may be as simple as a list of key-value pairs.

YAML and JSON support hierarchical tree-like structures.  So, although
it need not be used this way, getcfg supports configuring multiple
programs from a single configuration file.  Each program may have a
dedicated "section" of it's own, a section being a top-level node in
the configuration file.  Some configuration parameters may be placed
in sections each read by only one program.  Other configuration
parameters may be placed in sections read by multiple programs.  Thus
a related suite of programs may be configured from a single file.''')

EPILOGUE = ('''When no identifier is given, the specified configuration section
identifies the value written to standard output.

By default, a program using getcfg reads its configuration from a file
'''
            f'in {ETC}, a "{CONF_SUFFIX}" '
            '''file named after the program itself. When not
specified, the filesystem path of the configuration file is based on
the name of the process executing getcfg, with leading directory
components removed from the process name.  E.g. when run from program
"foo", the default configuration file is: '''
            f'{build_default_confpath("foo")}'
            '''

By default, a program using getcfg retrieves values from its own
section of the configuration file.  This default section has the name
of the process executing getcfg, with leading directory components
removed.

The following example illustrates these features:

SIMPLE EXAMPLE

    $ # A contrived YAML configuration file:
    $ cat /etc/myprog.sh.conf
    eeny:       catch a
    meeny:      tiger
    myprog.sh:
      miny:     by the
      moe:      toe

    $ # A sample program:
    $ cat myprog.sh
    #!/bin/bash
    getcfg moe
    getcfg miny
    getcfg -s meeny
    getcfg -s eeny

    $ # Sample result:
    $ ./myprog.sh
    toe
    by the
    tiger
    catch a

Only simplistic configuration values are supported; informally, just
single values and lists of single values are allowed.  The identified
configuration value must be a string, number, or other YAML/JSON
scalar value, or a sequence (a JSON array, a list), or set, of scalar
values.  Identifying a YAML mapping (a JSON object) is an error.
Identifying a sequence (a JSON array), or a set, which itself contains
a YAML collection (a JSON object or array) is an error.  Sequences
(arrays) and sets are written to standard output one entry per line.

As written to standard output, sequences (arrays) and sets cannot be
distinguished from multi-line string values.  Other YAML/JSON values
having differing data types may likewise appear identical when
written.  It is not possible to determine the YAML/JSON data type of
the value written.

The configuration file must contain a single YAML document.  JSON
configuration files are single, valid, YAML documents; JSON is a
subset of YAML.

OUTPUT FORMATTING

When configuration values are written to standard output, a uniform
formatting is applied.  Each value's data type determines its
formatting.

A YAML or JSON file contains written representations of typed data,
strings, numbers, and so forth.  There are many ways to write a given
data value.  The floating point number "1.0" may also be written
"1.00", although when considered as a string these are two different
data values.  Regardless of how the floating point number 1.00 appears
in a configuration file, getcfg writes 1.0 to standard output.

As you can see, the value written to standard output may not have the
form the value has in the configuration file.  If the particular
written representations of your data values are important then they
must be written as strings.  Write "1.00" (with the quotes, or in some
other syntax for strings) in the configuration file instead of writing
1.00 (without quotes).

The following table describes how each data type is formatted.

Output of YAML types:

  !!binary    Base64 encoded, without line breaks, as a UTF-8 string
  !!bool      "true" or "false"
  !!float     Formatting specifics depend on both the value being
              represented and the underlying hardware and operating
              system.  Outputs are:
                ["-"] digits "." digits,
                or "NAN", or "INF", or "-INF",
                or a scientific notation representation of the form
                ["-"] digit "." digits "e" sign digits
              Trailing zeros are suppressed, except for a single 0
              following the decimal point.  Leading zeros are
              suppressed, except for a single 0 preceding the decimal
              point.  Output is in scientific notation if the
              non-scientific representation of the value is either:
              close enough to zero that there are more than 3 zeros
              immediately to the right of the decimal point, or, far
              enough away from zero that there is a number, having the
              same number of decimal digits as the value, which loses
              precision when stored in the underlying system's float
              data type.
  !!int       ["-"] digits
  !!null      "null"
  !!str       Unicode string
  !!timestamp ISO 8601 Complete date plus hours, minutes, seconds, and
              a 6-digit decimal fraction of a second, with a plus or
              minus time zone offset, in 32 characters
              (e.g. 1997-07-16T19:20:30.450000+01:00)
  !!map       no output, error
  !!omap      no output, error
  !!pairs     no output, error
  !!set       One value per line
  !!seq       One value per line

It is an error to identify a configuration value of a type not in the
table above.

All numbers are written to standard output in base 10.

PATHS AND KEYS

A path "identifies" a particular configuration parameter.  It does
this by being composed of the sequence of keys (attributes in JSON
parlance), a sequence that ends with the key of the requested
configuration parameter.  The syntax of an identifier is a series of
keys separated by colons (:).  The search for the leftmost key starts
with the children of the node identified by the --section command line
option.

This syntax necessarily limits the values available for keys.  Keys
must be strings, and the only strings allowed are those which can be
written in the YAML "plain style".  The keys in identifiers are not
expected to be a YAML presentation and are not parsed as such, they
are strings of characters.  In particular, keys cannot contain the
colon (:) character.

EXAMPLE

    A YAML configuration file:
      # This file is /etc/foo.conf
      foo:
        email:
        - George Complex <george@example.com>
        - '"Georgina Q. Complex" <georgina@example.com>'
      mailcontent:
        subject: Important message
        body: |
          Help!
          I need somebody.
        footers:
          disclaimer: Not to be taken seriously
          informal: Sent with love and kindness

    A program using getcfg:
      #!/bin/bash
      # This file is /usr/local/bin/foo
      mapfile -t addresses < <(getcfg email)  ; # lines into array var
      body="$(                 getcfg --section mailcontent body)"
      subject="$(              getcfg --section mailcontent subject)"
      footer="$(               getcfg --config /etc/foo.conf \\
                                      --section mailcontent \\
                                      footers:informal)"
      { echo "$body"
        echo '----'
        echo "$footer"
      } | mail -s "$subject" "${addresses[@]}"

RELATED PROGRAMS

jq: A filter/transform, like sed, for JSON data.
    https://stedolan.github.io/jq/
shyaml: Read access to YAML from the command line.
    https://github.com/0k/shyaml
yamllint: A linter (syntax checker) and style checker for YAML files.
    https://github.com/adrienverge/yamllint

REFERENCES

YAML Ain’t Markup Language (YAML™) Version 1.2
  https://yaml.org/spec/1.2/spec.html
Language-Independent Types for YAML™ Version 1.1
  https://yaml.org/type/
Date and Time Formats
  https://www.w3.org/TR/NOTE-datetime
Bash FAQ
  https://mywiki.wooledge.org/BashFAQ
''')


# Exceptions

class GetcfgException(Exception):
    pass


class MissingNode(GetcfgException):
    pass


class MissingSection(MissingNode):
    pass


class MissingIdentifier(MissingNode):
    pass


class BadConfValue(GetcfgException):
    pass


class UnknownYAMLData(BadConfValue):
    pass


class MappingReturned(BadConfValue):
    pass


class BadSequenceValue(BadConfValue):
    pass


class MappingReturnedInSequence(BadSequenceValue):
    pass


class SequenceReturnedInSequence(BadSequenceValue):
    pass


# Functions

def parse_args(args):
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=DESCRIPTION,
        epilog=EPILOGUE)

    parser.add_argument(
        'identifier', nargs='?', help='''
      Identifies the configuration value written to standard output.  A
      sequence of keys separated by the colon (:) character.  This
      "path" is rooted at the indicated section.  See PATHS AND KEYS
      below.''')

    parser.add_argument(
        '--config', '-c', metavar='path', required=False, help=(
            '''The filesystem path of the configuration file to read.
            The default is based on the name, with leading directory components
            removed, of the parent process's command, like so: '''
            f'{build_default_confpath("PARENTCOMMAND")}'))

    parser.add_argument(
        '--section', '-s', metavar='sectionname', required=False, help='''
      Identifies the top-level node rooting the tree from which
      the configuration value is extracted.  If omitted defaults to
      the name, with leading directory components removed, of the
      parent process's command.''')

    parser.add_argument(
        '--default', '-d', required=False, metavar='value', help='''
      The value to write to stdout when the identified configuration
      value does not exist.  Unless this option is used getcfg exits
      with a non-zero status code and a message on standard error
      when the identified key is missing.''')

    return vars(parser.parse_args(args))


def get_parent_command():
    parent = psutil.Process(os.getppid())
    return os.path.basename(parent.name())


def find_conf_file(args, parent_command):
    conf_file = args['config']
    if conf_file is None:
        return build_default_confpath(parent_command)
    else:
        return conf_file


def read_conf_file(logger, args, parent_command):
    confpath = find_conf_file(args, parent_command)
    conf = conflib.dict_from_path(logger, confpath, expected=True)
    return (confpath, conf)


def find_section_name(args, parent_command):
    sname = args['section']
    if sname is None:
        return parent_command
    else:
        return sname


def get_section(confpath, conf, sname):
    if sname in conf:
        return conf[sname]
    else:
        raise MissingSection(
            f'No such section ({sname}) in configuration file ({confpath})')


def parse_identifier(identifier):
    return identifier.split(IDENT_DELIM)


def value_from_section(args, confpath, sname, section):
    identifier = args['identifier']
    if identifier is not None:
        keys = parse_identifier(identifier)
        for key in keys:
            if key in section:
                section = section[key]
            else:
                raise MissingIdentifier(
                    f'Identifier ({identifier}) not found in section '
                    f'({sname}) of configuration file ({confpath})')

    return section


def get_value(logger, args):
    parent_command = get_parent_command()
    (confpath, conf) = read_conf_file(logger, args, parent_command)
    sname = find_section_name(args, parent_command)
    try:
        section = get_section(confpath, conf, sname)
        value = value_from_section(args, confpath, sname, section)
    except MissingNode as ex:
        default_value = args['default']
        if default_value is None:
            raise
        else:
            if (isinstance(ex, MissingSection)
                    and args['identifier'] is not None):
                raise

            value = default_value

    return (confpath, sname, value)


def format_float(value):
    if math.isfinite(value):
        result = f'{value:.{sys.float_info.dig}g}'
        if ('e' in result
                or '.' in result):
            return result
        else:
            return f'{result}.0'  # The "g" format strips ".0"
    elif math.isnan(value):
        return 'NAN'
    elif value < 0:
        return '-INF'
    else:
        return 'INF'


def format_int(value):
    return str(value)


def format_bool(value):
    if value:
        return 'true'
    else:
        return 'false'


def format_datetime(value):
    if value.tzinfo is None:
        # YAML spec says an omitted time zone means UTC
        tval = value.replace(tzinfo=datetime.timezone.utc)
    else:
        tval = value
    return tval.isoformat(timespec='microseconds')


def format_none():
    return 'null'


def format_scalar(args, sname, confpath, value):
    if isinstance(value, str):
        return value
    elif isinstance(value, float):
        return format_float(value)
    elif isinstance(value, bool):
        return format_bool(value)
    elif isinstance(value, int):
        return format_int(value)
    elif isinstance(value, datetime.datetime):
        return format_datetime(value)
    elif value is None:
        return format_none()
    else:
        raise UnknownYAMLData(
            'The requested '
            f'{identifier_of(args)}'
            f'section ({sname}) of '
            f'configuration file ({confpath}) is of an unknown type '
            f' and cannot be written: {value}')


def is_yaml_mapping(value):
    '''!!map
    '''
    return isinstance(value, dict)


def is_yaml_listlike(value):
    '''!!seq, !!omap, !!pairs, !!set
    '''
    return isinstance(value, list) or isinstance(value, set)


def output_scalar(args, sname, confpath, value):
    if is_yaml_mapping(value):
        raise MappingReturnedInSequence(
            'The requested '
            f'{identifier_of(args)}'
            f'section ({sname}) of '
            f'configuration file ({confpath}) is a sequence containing '
            f'the YAML mapping: {value}')
    elif is_yaml_listlike(value):
        raise SequenceReturnedInSequence(
            'The requested '
            f'{identifier_of(args)}'
            f'section ({sname}) of '
            f'configuration file ({confpath}) is a sequence containing '
            f'the YAML sequence: {value}')
    else:
        print(format_scalar(args, sname, confpath, value))


def output_conf_value(logger, args):
    (confpath, sname, value) = get_value(logger, args)
    if is_yaml_mapping(value):
        raise MappingReturned(
            'The requested '
            f'{identifier_of(args)}'
            f'of section ({sname}) of '
            f'configuration file ({confpath}) is a YAML mapping: '
            f'{value}')
    elif is_yaml_listlike(value):
        for item in value:
            output_scalar(args, sname, confpath, item)
    else:
        output_scalar(args, sname, confpath, value)


def deliver_result(logger, args):
    try:
        output_conf_value(logger, args)
    except GetcfgException as ex:
        logger.critical(f'{sys.argv[0]}: {ex}')
        sys.exit(1)


def main():
    args = parse_args(sys.argv[1:])
    logger = logging.getLogger()
    deliver_result(logger, args)
    sys.exit(0)


# Allow this program to be run as a command without its
# distribution (the Python name for a package) being installed.
if __name__ == '__main__':
    main()
