#!/usr/bin/env python

"""General:
=======

Path Manager helps manage your path.  It allows you to add or remove
elements from a PATH-like variable including the bulk addition or
removal of path elements.

There are two main advantage of this script over sourcing a shell
script.  First, pathmgr lets you remove elements from the path.  This
is something that requires some work in most shells.  Specifically, if
you are using an XML configuration file, pathmgr lets you easily undo
bulk addition of path elements.  Second, pathmgr is portable.  This
lets you specify the path you want using a single XML configuration,
generate the new path using pathmgr, and pick up the changes using any
shell, even CMD.EXE.



Usage:
=====

    python pathmgr [<options>] <file> [<file> ...]



Options:
=======

    -a=<element>
    --after=<element>
    --append=<element>

        Add the path element to the path after all other path
        elements.  This option can be specified more than once.

    --anchor=<directory>

        The anchor is the directory to which relative paths are
        anchored.  It is not necessary to use this option, but it can
        be convenient.  The reason pathmgr needs to know exactly where
        relative paths are anchored is so it can add (or remove)
        normalized path elements if --normalize=on is set, and the
        only way it can normalize path elements is if it knows exactly
        where they reside.

        Anchors specified using a relative path are themselves
        relative to the "anchor base."  For command-line options, the
        anchor base is the current working directory.  For "sourced"
        XML configuration files, the anchor base is inherited from the
        caller.  For XML configuration files specified with --include,
        the anchor base is the directory containing the XML
        configuration file (c.f. --source).

        Roughly speaking, the path elements to be added or removed are
        formed as follows from a given relative path:

            (--normalize=off) => anchor + relative_path
            (--normalize=on)  => anchor_base + anchor + relative_path

        Roughly speaking, XML configuration files specified with
        --include=<relative_path> are relative only to the "anchor
        base" and ignore an explicitly set "anchor":

            xml_config_file = anchor_base + relative_path

        Thus for XML configuration files specified with --include, the
        "anchor base" is the directory where the file resides
        irrespective of whether the user has set an explict anchor.
        Thus, explicitly set anchors only affect path elements that
        are being added or removed from the path.  They do not affect
        loading of XML configuration files.

    -b=<element>
    --before=<element>
    --prepend=<element>

        Add the path element to the path before all other path
        elements.  This option can be specified more than once.

    -c
    --clean
    --clean-only

        Clean the path by removing path elements instead of adding
        them.  To restore your path to the value it had before the
        previous run of this script, run pathmgr with the exact same
        command as before except add the --clean option to the front
        of the command.  This will cause --append and --prepend to
        remove path elements instead of adding them.  This option can
        be specified more than once allowing path elements to be
        processed with different values for this option.  Use -c (or
        -c=on) and -c=off to explicitly enable or disable this option.

    --default=<variable>=<value>

        Define the default value of a variable.  This is mostly used
        in XML configuration files in order to provide a default if
        the user does not explicitly override the value on the command
        line with the --define option.  To use a variable, just add
        ${variable} to any path element and it will be replaced with
        the value of the variable.  This option can be specified more
        than once allowing path elements to be processed with
        different values for this option.

    -D=<variable>=<value>
    --def=<variable>=<value>
    --define=<variable>=<value>

        Define the value of a variable overriding any default value
        set up by the --default option.  This is mostly used on the
        command line to override default values set up in XML
        configuration files.  To use a variable, just add ${variable}
        to any path element and it will be replaced with the value of
        the variable.  This option can be specified more than once
        allowing path elements to be processed with different values
        for this option.  Also see the --undefine option.

    -d=<element>
    --delete=<element>

        Delete the path element from the path.  If you need to remove
        a specific path, it is usually a good idea to set
        --normalize=off before using this option so that the exact
        path specified is the one pathmgr tries to remove.  This
        option can be specified more than once.  This is a synonym for
        --remove.

    --dups
    --duplicates
    --allow-duplicates

        By default, new path elements that duplicate existing path
        elements are not added to the path.  By setting --dups=on, new
        path elements will always be added even if they duplicate an
        existing path element.  This option can be specified more than
        once allowing path elements to be processed with different
        values for this option.  Use -dups=on and -dups=off to
        explicitly enable or disable this option.

    -f=off
    --force=off

        By default, path elements are forced onto the path even if the
        underlying directory is missing from the file system.  By
        setting --force=off, path elements will be added to the path
        only if they actually exist.  This option can be specified
        more than once allowing path elements to be processed with
        different values for this option.  Use -f (or -f=on) and
        -f=off to explicitly enable or disable this option.

    -h
    --help

        Print this help message and exit.

    <file>
    --include=<file>

        Process the XML configuration file in local scope which means
        any changes to the context made by the XML file are local to
        that file and are not seen by the caller.  If you want an XML
        configuration file to change the caller's context, use the
        --source=<file> option instead.

        This is the default way XML configuration files are processed.
        This option can be specified more than once.  Note that this
        option is also available in XML configuration files allowing
        you to include other XML files.  Options specified in the XML
        configuration file are not effective until the file is
        processed in sequence with other processing.

        If an "anchor base" is already set, it is automatically
        overridden during processing of <file> with the parent
        directory of <file> serving as the new anchor base.  The
        original anchor base is restored when processing of <file>
        completes.  Thus, by default, all relative paths specified in
        an included XML configuration file are relative to the
        directory where the file resides.

        [Also see --source=<file>.]

    --license

        Print the license and exit.

    -l
    -l=<separator>
    --list
    --list=<separator>

        List the existing path with one directory per line instead of
        printing the path with its embedded delimiters.  This option
        lists the path as it exists at that point in the command
        pipeline.  To see the final result, the --list option must be
        specified as pathmgr's last command-line option.  This option
        can be specified more than once.  This is useful for
        debugging.  If a non-empty separator is specified, it will be
        printed after the listing to provide a visual separator
        between listings.  This option implies --quiet=on.  Add a
        trailing --quiet=off to override.

    --log-to-stderr

        By default, the help message, the version message, and log
        messages all go to standard output.  If this option is set,
        they will go to standard error instead.  This is mostly used
        when a shell sets the PATH directly from the output of the
        pathmgr.py Python script.  Without this option set in these
        circumstances, help output would not be seen by the user
        because it would be used to overwrite the PATH variable
        neither of which is good.  This option can only be specified
        once and only on the command line (because it needs to be used
        very early).  If it is specified more than once, the last
        value specified is used.  Use --log-to-stderr (or
        --log-to-stderr=on) and --log-to-stderr=off to explicitly
        enable or disable this option.

    -n=off
    --normalize=off

        By default, path elements are normalized before they are added
        to the path.  By disabling normalization, the path elements
        will be added to the path literally.  This option can be
        specified more than once allowing path elements to be
        processed with different values for this option.  Use -n (or
        -n=on) and -n=off to explicitly enable or disable this option.

        Note that the shell used by MinGW normalizes both command-line
        parameters and environment variables (like PATH) before
        passing them to native applications like the Python
        interpreter that runs the pathmgr script!  This can make it
        *appear* as though pathmgr is not honoring --normalize=off.

        The solution to the MinGW problem is two fold.  First, use an
        XML configuration file to pass paths to pathmgr to avoid
        MinGW's normalization of command-line parameters.  For
        example:

            <pathmgr>
              <normalize>off</normalize>
              <remove>/c/foo/bar/baz</remove>
            </pathmgr>

        Second, pipe the PATH into pathmgr instead of passing it
        through the environment to avoid MinGW's normalization of
        environment variables.  For example:

            printf "$PATH" | pathmgr --origin=- --sep=: --uds=off foo.xml

        To make this more convient, the "pathmgr-mingw" script is
        provided which can be executed as follows:

            pathmgr-mingw foo.xml

        To learn more about MinGW's normalization of command-line
        parameters and environment variables, visit the following URL:

            http://www.mingw.org/wiki/Posix_path_conversion

    --origin=<variable>

        The origin of the path being modified.  This can be either the
        name of an environment variable or the special string "-"
        which means the origin is taken from standard input.  By
        default, the origin is the PATH environment variable.  This
        option be specified only on the command line (because it needs
        to be used very early).  If it is specified more than once,
        the last value is used.

    --print-state

        Print the current state.  This option can be specified more
        than once.

    -q
    --quiet

        Do not print the final result.

    -r=<element>
    --remove=<element>

        Remove the path element from the path.  If you need to remove
        a specific path, it is usually a good idea to set
        --normalize=off before using this option so that the exact
        path specified is the one pathmgr tries to remove.  This
        option can be specified more than once.  This is a synonym for
        --delete.

    --rmdups
    --remove-duplicates

        Remove all duplicates from the path.  To specify this option
        in an XML configuration file use one of the following empty
        XML elements: <rmdups/> or <remove-duplicates/>.

        If normalization is on, path elements are considered to be
        duplicates if their normalized values are the same.  If
        normalization is off, path elements are considered to be
        duplicates if their nominal values are the same.  (See
        --normalize.)

    --sep=<separator>
    --separator=<separator>

        Path separator.  Defaults to ':' on Unix and ';' on Windows.
        This option can only be specified once and only on the command
        line (because it needs to be used very early).  If it is
        specified more than once, the last value specified is used.

    --source=<file>

        Process the XML configuration file in the same context as the
        caller.  This allows <file> to change the caller's context.
        When --source is used from the command line, any change made
        by <file> affects the global context (which is the primary
        reason one would use --source instead of --include).  Note
        that this option is also available in XML configuration files
        allowing you to "source" other XML files.

        Note that because the file is being processed in the caller's
        context it shares the same "anchor base" as the caller.  This
        means that XML configuration files included by <file> that are
        specified by relative path are not necessary relative to the
        directory where <file> resides (c.f., --include).

        This option can be specified more than once.  Options
        specified in the XML configuration file are not effective
        until the file is processed in sequence with other processing.

        [Also see --include=<file>.]

    --tilde=off
    --tilde-expansion=off

        By default, leading "~" or "~user" characters will be expanded
        to the equivalent of the user's HOME directory.  This option
        can be specified more than once allowing path elements to be
        processed with different values for this option.  Use --tilde
        (or --tilde=on) and --tilde=off to explicitly enable or
        disable this option.

        If you enable the --univ-dir-seps=on option (the default),
        tilde expansion will be consistent across platforms.  However,
        with --univ-dir-seps=off, the situation is a little
        complicated.  On Linux "~\\foo" will not expand because
        backslash is not a directory separator on Linux meaning
        "~\\foo" is interpreted as "~user" where user="\\foo".
        Because there is no "\\foo" user, there is no tilde expansion.
        On Windows, both "~\\foo" and "~/foo" will expand because
        Windows treats both the backslash and the slash as directory
        separators.

    -U=<variable>
    --undef=<variable>
    --undefine=<variable>

        Undefine the "--defined" value of a variable restoring any
        "--default" value.  This option can be specified more than
        once allowing path elements to be processed with different
        values for this option.  Also see the --define option.

    --uds=off
    --univ-dir-seps=off
    --universal-directory-separators=off

        By default, universal directory separators are enabled.  This
        means slashes or backslashes that appear in path elements get
        converted to native directory separators.  This option can be
        specified more than once allowing path elements to be
        processed with different values for this option.  Use
        --univ-dir-seps (or --univ-dir-seps=on) and
        --univ-dir-seps=off to explicitly enable or disable this
        option.

    -v
    --verbose

        Enable verbose output.  This option can be specified more than
        once allowing path elements to be processed with different
        values for this option.  Use -v (or -v=on) and -v=off to
        explicitly enable or disable this option.

    -V
    --version

        Print the version and exit.

    -w=<regexp>
    --which=<regexp>

        List each executable in your PATH (or delimited list specified
        by the --origin option) that matches the regular expression.
        This option implies --quiet=on.  To override, add a trailing
        --quiet=off.  This option is only valid from the command line.



XML Configuration Files:
=======================

Instead of listing the path elements on the command line, they can be
specified in the XML configuration files.  The basic format for each
file is just XML with <before> and <after> tags that control whether
the directory is prepended or appended respectively:

    <pathmgr>
       <remove>/home/spam/eggs</remove>
        <after>/home/foo/bar</after>
       <before>/home/foo/baz</before>
    </pathmgr>

Note that by default any options set in an XML configuration file is
only within scope while that XML file is being processed.  For more,
see the --include and --source options above.

Almost all of the options above that start with "--" are valid XML
tags.  They can be mixed with <after>, <before>, <remove>, etc., or
they can be placed in their own XML file to serve as basic
configuration:

    <pathmgr>
        <allow-duplicates/>
        <force>off</force>
        <list/>
    </pathmgr>



Examples:
========

    Unix Bourne Shell:
    -----------------

        PATH=`python pathmgr.py ...`
        export PATH


    Windows CMD.EXE:
    ---------------

        REM Use the provided pathmgr.bat wrapper.
        pathmgr.bat ...

        REM Alternatively, use "clip" then paste the result as follows:
        python pathmgr.py ... | clip
        set path=<paste_result_here>

"""

import copy
import os
import os.path
import re
import sys
from xml.dom.minidom import parse as parse_xml

########################################################################

__all__ = [
    'main',
    'CommandLineError',
    'PathMgrXmlError',
]

version_info = (2, 0, 1)
version = '%s.%s.%s' % version_info

license_text = """
This software is provided 'as-is', without any express or implied
warranty.  In no event will the author or authors be held liable for
any damages arising from the use of this software.

Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:

    1. The origin of this software must not be misrepresented; you
       must not claim that you wrote the original software. If you use
       this software in a product, an acknowledgment in the product
       documentation would be appreciated but is not required.

    2. Altered source versions must be plainly marked as such and
       must not be misrepresented as being the original software.

    3. This notice may not be removed or altered from any source
       distribution.
"""

########################################################################

class CommandLineError(Exception):
    """Exception raised if an error occurs when parsing the command line."""
    pass

class PathMgrXmlError(Exception):
    """Exception raised if an error occurs when parsing the command line."""
    pass

class SymbolTableError(Exception):
    """Exception raised if an error occurs processing symbols."""
    pass

########################################################################

class Options:
    """Parser for command-line arguments."""

    def __init__(self, args):

        """
        Parse the list of command line arguments args.  The results of
        the parse are stored in member variables.  The name of the
        program should not be included in args so instances of this
        class will typically be created as follows:

            Options(sys.argv[1:])

        """

        #
        # Member variables.
        #

        # The anchor for relative paths.  If self.anchor is itself
        # relative, it is anchored by self.anchor_base.
        self.anchor = ''

        # Anchors specified using a relative path are themselves
        # relative to the anchor base.  For command-line options, the
        # anchor base is the current working directory (or just '').
        # For "sourced" XML configuration files, the anchor base is
        # inherited from the caller.  For "included" XML configuration
        # files, the anchor base is the directory containing the XML
        # configuration file.
        self.anchor_base = ''

        # Whether to only clean the path (instead of cleaning the path
        # and then augmenting it).
        self.clean_only = False

        # Whether to allow duplicate path elements.
        self.duplicates = False

        # Whether to add directories to the path even if the directory
        # doesn't exist on the file system.
        self.force = True

        # Whether the user wants help.
        self.help = False

        # Whether the user wants to see the license.
        self.license = False

        # The log file.
        self.log_file = sys.stdout

        # Whether to normalize the path elements before adding them to
        # the path.
        self.normalize = True

        # The origin for the path being modified.  This can be either
        # the name of an environment variable or the special string
        # "-" which means the origin is taken from standard input.
        self.origin = 'PATH'

        # Whether to not print the final result.
        self.quiet = False

        # Path separator.
        self.separator = os.path.pathsep

        # Symbol table that holds default values.  These values are
        # defined with the --default option.
        self.symbols_default = {}

        # Symbol table that holds override values that override the defaults.
        # These values are defined with the --define option.
        self.symbols_override = {}

        # Whether to perform tilde expansion.
        self.tilde_expansion = True

        # This is the list of *directory* separators that get
        # converted to the native format.  This feature can be enabled
        # or disabled with the --univ-dir-seps flag.
        self.univ_dir_seps = ['/', '\\']

        # Save a copy of the list of directory separators in case we
        # have to restore it.
        self.univ_dir_seps_saved = self.univ_dir_seps

        # Enable verbose output.
        self.verbose = False

        # Whether the user wants the version.
        self.version = False

        # List of work to do.  This is a list of pairs where the first
        # element in the pair is the function that does the work and
        # the second element in the pair is the parameters to the
        # function.
        self.work = []

        #
        # Iterate over all the command-line options.  Mostly, we are
        # just building up a work list based on the options and files
        # specified by the user.  The work list will be executed later
        # by the main script.
        #

        for arg in args:

            #
            # NOTE: If you add a new command-line option below, you almost
            # certainly want to make a similar addition to the
            # _process_config_file() function so that the same option will
            # be available in the XML configuration files.
            #

            # --after
            x = _check_pair(['-a', '--after', '--append'], arg)
            if x:
                self.work.append((_do_after, x[1]))
                continue

            # --anchor
            x = _check_pair(['--anchor'], arg)
            if x:
                self.work.append((_do_anchor, x[1]))
                continue

            # --before
            x = _check_pair(['-b', '--before', '--prepend'], arg)
            if x:
                self.work.append((_do_before, x[1]))
                continue

            # --clean-only
            x = _check_bool(['-c', '--clean', '--clean-only'], arg)
            if x:
                self.work.append((_do_clean_only, x[1]))
                continue

            # --default
            x = _check_pair(['--default'], arg)
            if x:
                self.work.append((_do_default, x[1]))
                continue

            # --define
            x = _check_pair(['-D', '--def', '--define'], arg)
            if x:
                self.work.append((_do_define, x[1]))
                continue

            # --delete
            x = _check_pair(['-d', '--delete', '-r', '--remove'], arg)
            if x:
                self.work.append((_do_delete, x[1]))
                continue

            # --duplicates
            x = _check_bool(['--dups',
                             '--duplicates',
                             '--allow-duplicates'],
                                 arg)
            if x:
                self.work.append((_do_duplicates, x[1]))
                continue

            # --force
            x = _check_bool(['-f', '--force'], arg)
            if x:
                self.work.append((_do_force, x[1]))
                continue

            # --help
            x = _check_bool(['-h', '--help'], arg)
            if x:
                self.help = x[1]
                continue

            # --include
            x = _check_pair(['--include'], arg)
            if x:
                self.work.append((_do_include, x[1]))
                continue

            # --license
            x = _check_bool(['--license'], arg)
            if x:
                self.license = x[1]
                continue

            # --list
            if (arg == '-l') or (arg == '--list'):
                # No list separator should be printed.
                self.work.append((_do_list, None))
                self.work.append((_do_quiet, True))
                continue
            else:
                x = _check_pair(['-l', '--list'], arg)
                if x:
                    # The x[1] list separator should be printed.
                    self.work.append((_do_list, x[1]))
                    self.work.append((_do_quiet, True))
                    continue

            # --log-to-stderr
            x = _check_bool(['--log-to-stderr'], arg)
            if x:
                if x[1]:
                    self.log_file = sys.stderr
                else:
                    self.log_file = sys.stdout
                continue

            # --normalize
            x = _check_bool(['-n', '--normalize'], arg)
            if x:
                self.work.append((_do_normalize, x[1]))
                continue

            # --origin
            x = _check_pair(['--origin'], arg)
            if x:
                self.origin = x[1]
                continue

            # --print-state
            x = _check_bool(['--print-state'], arg)
            if x:
                self.work.append((_do_print_state, x[1]))
                continue

            # --quiet
            x = _check_bool(['-q', '--quiet'], arg)
            if x:
                self.work.append((_do_quiet, x[1]))
                continue

            # --remove
            # Note: --remove is a synonym for --delete.  See --delete above.

            # --remove-duplicates
            if (arg == "--rmdups") or (arg == "--remove-duplicates"):
                self.work.append((_do_remove_duplicates, None))
                continue

            # --sep
            x = _check_pair(['--sep', '--pathsep', '--separator'], arg)
            if x:
                self.separator = x[1]
                continue

            # --source
            x = _check_pair(['--source'], arg)
            if x:
                self.work.append((_do_source, x[1]))
                continue

            # --tilde-expansion
            x = _check_bool(['--tilde', '--tilde-expansion'], arg)
            if x:
                self.work.append((_do_tilde_expansion, x[1]))
                continue

            # --undefine
            x = _check_pair(['-U', '--undef', '--undefine'], arg)
            if x:
                self.work.append((_do_undefine, x[1]))
                continue

            # --univ-dir-seps
            x = _check_bool(['--uds',
                                  '--univ-dir-seps',
                                  '--universal-directory-separators'],
                                 arg)
            if x:
                self.work.append((_do_univ_dir_seps, x[1]))
                continue

            # --verbose
            x = _check_bool(['-v', '--verbose'], arg)
            if x:
                self.work.append((_do_verbose, x[1]))
                continue

            # --version
            if (arg == '-V') or (arg == '--version'):
                self.version = True
                continue

            # --which
            x = _check_pair(['-w', '--which'], arg)
            if x:
                self.work.append((_do_which, x[1]))
                self.work.append((_do_quiet, True))
                continue

            # Check for invalid command-line options.
            if arg.startswith('-'):
                raise CommandLineError('Invalid argument: %s' % (arg,))

            # So arg must be the name of a file.  Before adding it to
            # the work list, make sure it exists.
            if not os.path.exists(arg):
                raise CommandLineError('File does not exist: %s' % (arg,))
            self.work.append((_do_include, arg))

    def make_copy(self):
        """Make a deep copy of self."""
        rv = copy.copy(self)
        rv.symbols_default = copy.copy(self.symbols_default)
        rv.symbols_override = copy.copy(self.symbols_override)
        rv.work = []
        return rv

########################################################################

def _text_to_bool(text):
    s = text.lower()
    if (s == 'off') or (s == 'false'):
        return False
    elif (s == 'on') or (s == 'true'):
        return True
    else:
        raise RuntimeError('Unable to convert text to boolean: "%s"' % (text,))

def _check_pair(flags, arg):
    """
    Check if any of the command-line options in flags are
    satisfied as a pair by the command-line argument arg.  On
    success, the pair (flag, value) is returned.  For example,

        _check_pair(['foo', 'bar'], '--foo=baz')  --> ('--foo', 'baz')
        _check_pair(['foo', 'bar'], '--bar=baz')  --> ('--bar', 'baz')
        _check_pair(['foo', 'bar'], '--egg=baz')  --> None
    """

    # Check for matches.
    for flag in flags:
        m = re.match('^(%s)=(.*)$' % (flag,), arg)
        if m:
            return m.groups()

    # Check for accidental use of "--foo bar" (instead of "--foo=bar").
    for flag in flags:
        if ('%s' % (flag,)) == arg:
            msg = 'Invalid use of %s.  Should be %s=<var> instead.' \
                  % (arg, arg)
            if os.name == 'nt':
                msg = '%s\n           %s' \
                      % (msg, 'Under Windows, "=" must be quoted.')
            raise CommandLineError(msg)

def _check_bool(flags, arg):

    """
    Check if any of the command-line options in flags are satisfied
    as a boolean by the command-line argument arg.  On success, the
    pair (flag, value) is returned where "value" is a boolean.
    For example,

        _check_bool(['foo', 'bar'], '--foo=off')  --> ('--foo', False)
        _check_bool(['foo', 'bar'], '--bar=on' )  --> ('--bar', True )
        _check_bool(['foo', 'bar'], '--bar=on' )  --> ('--bar', True )
        _check_bool(['foo', 'bar'], '--bar'    )  --> ('--bar', True )
        _check_bool(['foo', 'bar'], '--egg=baz')  --> None
    """

    # Check for exact matches.  This is equivalent to "on" or True.
    for flag in flags:
        if flag == arg:
            return (flag, True)

    # Check for matches of the form --foo=off.
    x = _check_pair(flags, arg)
    if x:
        return (x[0], _text_to_bool(x[1]))

def _substitute_variables(symbols_default, symbols_override, text):
    """Perform variable substitution on the text using the default and
    override symbol tables.  The value from the symbols_override
    symbol table is preferred.  Only if this value does not exist will
    the value from symbols_default be used.  If the symbol is not
    defined in either table, a SymbolTableError is raised.
    """
    # Find all the occurrences of "${foo}" in the text and iterate
    # over just the symbol.  We use the "*?" non-greedy Kleene star to
    # do most of the work.
    pattern = '\\$\\{(.*?)\\}'
    for symbol in re.findall(pattern, text):
        # Lookup the replacement string.
        if symbol in symbols_override:
            replacement = symbols_override[symbol]
        elif symbol in symbols_default:
            replacement = symbols_default[symbol]
        else:
            raise SymbolTableError(
                'Symbol "%s" not defined.  It is needed in text: %s'
                % (symbol, text))
        # Generate the "${foo}" target that will be replaced.
        target = '${%s}' % (symbol,)
        text = text.replace(target, replacement, 1)
    return text

def _normalize_path(path):
    """Return the normalized path."""
    return os.path.abspath(path)

def _anchor_include_file(opts, fname):
    """Anchor the include file if it is not an absolute path.  Include
    files are always relative to the current working directory if
    specified on the command line or to the directory containing the
    parent XML file that is trying to include the child XML file.  In
    other words, include files are always relative to opts.anchor_base
    and are never influenced by the value of opts.anchor (which is
    just for elements being added or removed from the path)."""
    # Prepend the anchor_base if fname is a relative path.
    if not os.path.isabs(fname):
        fname = os.path.join(opts.anchor_base, fname)
    return fname

def _transform_include_file(opts, fname):
    """Transform the include file by performing variable and tilde
    substitution, then by anchoring fname, and then finally by
    normalizing the fname."""

    # Perform variable substitution (if any).
    fname = _substitute_variables(opts.symbols_default,
                                  opts.symbols_override,
                                  fname)

    # Perform tilde expansion (if possible).
    fname = os.path.expanduser(fname)

    # Anchor the file name.
    fname = _anchor_include_file(opts, fname)

    # Normalize the file name.
    fname = _normalize_path(fname)

    return fname

def _anchor_path_element(opts, element):
    """Anchor the path element if it is not an absolute path."""
    # Prepend the anchor if the element is a relative path.
    if not os.path.isabs(element):
        element = os.path.join(opts.anchor, element)
        # Prepend the anchor_base, if the user wants the paths
        # normalized and if element is still a relative path (because
        # opts.anchor is itself a relative path).
        if opts.normalize and not os.path.isabs(opts.anchor):
            element = os.path.join(opts.anchor_base, element)
    return element

def _transform_path_element(opts, element):
    """Transform the path element by performing variable and tilde
    substitution, then by changing the directory separators to
    universal directory separators (if requested), then by anchoring
    the path element, and then finally by normalizing the path element
    (if necessary)."""

    # Perform variable substitution (if any).
    element = _substitute_variables(opts.symbols_default,
                                    opts.symbols_override,
                                    element)

    # If universal directory separators is enabled, opts.univ_dir_seps
    # will hold a list of valid directory separators.  Use that list
    # to convert all the directory separators in "element" to the
    # native directory separator.  This needs to be done before the
    # call to os.path.expanduser() so Python can see the tilde;
    # otherwise, Python sees something like "~\\foo\\bar" as "~user"
    # where user="\\foo\\bar" which is not a valid user causing Python
    # to not exand the tilde.
    for dirsep in opts.univ_dir_seps:
        element = os.sep.join(element.split(dirsep))

    # Perform tilde expansion (if possible).
    if opts.tilde_expansion:
        element = os.path.expanduser(element)

    # Anchor the path element (if necessary).
    element = _anchor_path_element(opts, element)

    # Normalize the path element (if necessary).
    if opts.normalize:
        element = _normalize_path(element)

    return element

########################################################################

def _do_after(opts, parts, element):
    # Modifying this method often requires similar modifications to be
    # made to _do_before() and _do_delete().

    # Intercept this command so we can undo what it did last time
    # the entire script was called.
    if opts.clean_only:
        return _do_delete(opts, parts, element)

    # Otherwise, do a normal append.
    if opts.force or os.path.exists(element):
        # Do any necessary transformation on the path element.
        element = _transform_path_element(opts, element)
        if opts.verbose:
            opts.log_file.write('_do_after: %s\n' % (element,))
        if opts.duplicates or (element not in parts):
            parts.append(element)
    return parts

def _do_anchor(opts, parts, anchor):
    if opts.verbose:
        opts.log_file.write('_do_anchor: %s\n' % (anchor,))
    opts.anchor = anchor
    return parts

def _do_before(opts, parts, element):
    # Modifying this method often requires similar modifications to be
    # made to _do_after() and _do_delete().

    # Intercept this command so we can undo what it did last time
    # the entire script was called.
    if opts.clean_only:
        return _do_delete(opts, parts, element)

    # Otherwise, do a normal prepend.
    if opts.force or os.path.exists(element):
        # Do any necessary transformation on the path element.
        element = _transform_path_element(opts, element)
        if opts.verbose:
            opts.log_file.write('_do_before: %s\n' % (element,))
        if opts.duplicates or (element not in parts):
            parts.insert(0, element)
    return parts

def _do_clean_only(opts, parts, clean_only):
    if opts.verbose:
        opts.log_file.write('_do_clean: %s\n' % (clean_only,))
    opts.clean_only = clean_only
    return parts

def _do_default(opts, parts, symbol_value_pair):
    if opts.verbose:
        opts.log_file.write('_do_default: %s\n' % (symbol_value_pair,))
    x = symbol_value_pair.split('=', 1)
    if len(x) != 2:
        raise SymbolTableError(
            'Invalid form for symbol/value pair: %s' % (symbol_value_pair,))
    (symbol, value) = x
    opts.symbols_default[symbol] = value
    return parts

def _do_define(opts, parts, symbol_value_pair):
    if opts.verbose:
        opts.log_file.write('_do_define: %s\n' % (symbol_value_pair,))
    x = symbol_value_pair.split('=', 1)
    if len(x) != 2:
        raise SymbolTableError(
            'Invalid form for symbol/value pair: %s' % (symbol_value_pair,))
    (symbol, value) = x
    opts.symbols_override[symbol] = value
    return parts

def _do_delete(opts, parts, element):
    # Do any necessary transformation on the path element.
    element = _transform_path_element(opts, element)
    if opts.verbose:
        opts.log_file.write('_do_delete: %s\n' % (element,))
    return [x for x in parts if x != element]

def _do_duplicates(opts, parts, duplicates):
    if opts.verbose:
        opts.log_file.write('_do_duplicates: %s\n' % (duplicates,))
    opts.duplicates = duplicates
    return parts

def _do_force(opts, parts, force):
    if opts.verbose:
        opts.log_file.write('_do_force: %s\n' % (force,))
    opts.force = force
    return parts

def _do_list(opts, parts, list_separator):
    if opts.verbose:
        opts.log_file.write('_do_list:\n')
    for part in parts:
        opts.log_file.write('%s\n' % (part,))
    if list_separator != None:
        opts.log_file.write('%s\n' % (list_separator,))
    return parts

def _do_normalize(opts, parts, normalize):
    if opts.verbose:
        opts.log_file.write('_do_normalize: %s\n' % (normalize,))
    opts.normalize = normalize
    return parts

def _do_print_state(opts, parts, print_state):
    if print_state:
        opts.log_file.write('state:\n')
        opts.log_file.write('    clean         : %s\n' % (opts.clean_only,))
        opts.log_file.write('    force         : %s\n' % (opts.force,))
        opts.log_file.write('    help          : %s\n' % (opts.help,))
        opts.log_file.write('    license       : %s\n' % (opts.license,))
        opts.log_file.write('    log_to_stderr : %s\n'
                            % (opts.log_file == sys.stderr,))
        opts.log_file.write('    normalize     : %s\n' % (opts.normalize,))
        opts.log_file.write('    origin        : %s\n' % (opts.origin,))
        opts.log_file.write('    quiet         : %s\n' % (opts.quiet,))
        opts.log_file.write('    separator     : %s\n' % (opts.separator,))
        opts.log_file.write('    verbose       : %s\n' % (opts.verbose,))
        opts.log_file.write('    version       : %s\n' % (opts.version,))
        for w in opts.work:
            opts.log_file.write('    work          : %s\n' % (w,))
    return parts

def _do_quiet(opts, parts, quiet):
    if opts.verbose:
        opts.log_file.write('_do_quiet: %s\n' % (quiet,))
    opts.quiet = quiet
    return parts

def _do_remove_duplicates(opts, parts, junk):
    rv = []
    seen = {}
    # Verbose log.
    if opts.verbose:
        opts.log_file.write('_remove_duplicates:\n')
    # Iterate over each part of the path.
    for part in parts:
        # Generate the "key" used to determine if "part" is a duplicate.
        if opts.normalize:
            key = _normalize_path(part)
        else:
            key = part
        # Only add "part" values that have not yet been seen.
        if not key in seen:
            # Track the keys.
            seen[key] = True
            # But, append the parts.
            rv.append(part)
    return rv

def _do_tilde_expansion(opts, parts, tilde_expansion):
    if opts.verbose:
        opts.log_file.write('_do_tilde_expansion: %s\n' % (tilde_expansion,))
    opts.tilde_expansion = tilde_expansion
    return parts

def _do_undefine(opts, parts, symbol):
    if opts.verbose:
        opts.log_file.write('_do_undefine: %s\n' % (symbol_value_pair,))
    if symbol not in opts.symbols_override:
        raise SymbolTableError(
            'Cannot undefine missing symbol: %s' % (symbol,))
    del opts.symbols_override[symbol]
    return parts

def _do_univ_dir_seps(opts, parts, is_univ_dir_seps_enabled):
    if opts.verbose:
        opts.log_file.write('_do_univ_dir_seps: %s\n'
                            % (is_univ_dir_seps_enabled,))
    if is_univ_dir_seps_enabled:
        opts.univ_dir_seps = opts.univ_dir_seps_saved
    else:
        opts.univ_dir_seps = []
    return parts

def _do_verbose(opts, parts, verbose):
    if opts.verbose:
        opts.log_file.write('_do_verbose: %s\n' % (verbose,))
    opts.verbose = verbose
    return parts

def _do_which(opts, parts, pattern):
    for part in parts:
        try:
            fnames = os.listdir(part)
        except:
            # Don't worry about missing directories etc.
            pass
        for fname in fnames:
            if re.search(pattern, fname, re.IGNORECASE):
                opts.log_file.write(os.path.join(part, fname) + '\n')
    return parts

def _do_include(opts, parts, fname):

    # Verbose output.
    if opts.verbose:
        opts.log_file.write('_do_include: %s\n' % (fname,))

    # Create a copy of our current state which will give the state
    # variables local scope when the XML file is being processed.
    # Notice that the anchor base is updated to point to the directory
    # containing the XML file.
    state = opts.make_copy()
    state.anchor = ''
    state.anchor_base = os.path.dirname(fname)

    # Process the XML configuration file in local scope.  Unlike
    # "--source", this does not allow the XML configuration file to
    # alter the current context.
    return _process_config_file(state, parts, fname)

def _do_source(opts, parts, fname):

    # Verbose output.
    if opts.verbose:
        opts.log_file.write('_do_source: %s\n' % (fname,))

    # Note that "opts" is passed directly to _process_config_file() so
    # that all changes made by the config file are permanent.

    # Process the XML configuration file in the current context (i.e.,
    # "opts").  Unlike "--include", this allows the XML configuration
    # file to alter the current context.
    return _process_config_file(opts, parts, fname)

########################################################################

def _get_node_text(node):
    """Return the text associated with the XML node or None if no text is
    associated."""
    if node and node.childNodes and node.childNodes[0]:
        return node.childNodes[0].nodeValue

def _node_to_bool(node):
    """Convert the node to a boolean value.  True is returned for empty
    nodes (like <verbose/>) and nodes having "true" text; otherwise,
    False is returned."""
    if _get_node_text(node) == None:
        # Empty tags (like <verbose/>) should mean to enable.
        rv = True
    else:
        # The meaning of other tags depends on their text.
        rv = _text_to_bool(_get_node_text(node).strip())
    return rv

def _process_config_file(opts, parts, fname):

    #
    # NOTE: Typically (i.e., for --include) the "opts" that gets
    # passed in is a deep copy of global "Options" instance allowing
    # all changes to the options to only have local scope.
    #
    # However, the user is allowed to pass in file names using the
    # "--source=<fname>" option, and these files will be parsed in the
    # caller's context meaning changes made by "sourced" files will be
    # seen by the caller.
    #

    #
    # NOTE: If you add a new XML option below, you almost certainly
    # want to make a similar addition to the Options.__init__()
    # function so that the same option will be available on the
    # command line.
    #

    # Open the XML file.
    dom = parse_xml(fname)
    if not dom \
       or not dom.childNodes \
       or not dom.childNodes[0] \
       or dom.childNodes[0].nodeName != 'pathmgr':
        raise PathMgrXmlError(
            'Unable to find "pathmgr" root in file: %s' % (fname,))

    # Iterate over the child elements.
    for child in dom.childNodes[0].childNodes:
        if child.nodeName in ['after', 'append']:
            parts = _do_after(opts, parts, _get_node_text(child))
        elif child.nodeName == 'anchor':
            parts = _do_anchor(opts, parts, _get_node_text(child))
        elif child.nodeName in ['before', 'prepend']:
            parts = _do_before(opts, parts, _get_node_text(child))
        elif child.nodeName in ['clean', 'clean-only']:
            clean_only = _node_to_bool(child)
            parts = _do_clean_only(opts, parts, clean_only)
        elif child.nodeName == 'default':
            parts = _do_default(opts, parts, _get_node_text(child))
        elif child.nodeName in ['def', 'define']:
            parts = _do_define(opts, parts, _get_node_text(child))
        elif child.nodeName in ['delete', 'remove']:
            parts = _do_delete(opts, parts, _get_node_text(child))
        elif child.nodeName in ['dups', 'duplicates', 'allow-duplicates']:
            duplicates = _node_to_bool(child)
            parts = _do_duplicates(opts, parts, duplicates)
        elif child.nodeName == 'force':
            force = _node_to_bool(child)
            parts = _do_force(opts, parts, force)
        elif child.nodeName in ['include']:
            include_file = _transform_include_file(opts, _get_node_text(child))
            parts = _do_include(opts, parts, include_file)
        elif child.nodeName == 'list':
            separator = _get_node_text(child)
            if not separator:
                separator = None
            parts = _do_list(opts, parts, separator)
        elif child.nodeName == 'normalize':
            normalize = _node_to_bool(child)
            parts = _do_normalize(opts, parts, normalize)
        elif child.nodeName == 'print-state':
            print_state = _node_to_bool(child)
            parts = _do_print_state(opts, parts, print_state)
        elif child.nodeName == 'quiet':
            quiet = _node_to_bool(child)
            parts = _do_quiet(opts, parts, quiet)
        elif child.nodeName in ['rmdups', 'remove-duplicates']:
            parts = _do_remove_duplicates(opts, parts, None)
        elif child.nodeName in ['source']:
            include_file = _transform_include_file(opts, _get_node_text(child))
            parts = _do_source(opts, parts, include_file)
        elif child.nodeName in ['tilde', 'tilde-expansion']:
            tilde_expansion = _node_to_bool(child)
            parts = _do_tilde_expansion(opts, parts, tilde_expansion)
        elif child.nodeName in ['undef', 'undefine']:
            parts = _do_undefine(opts, parts, _get_node_text(child))
        elif child.nodeName == 'univ-dir-seps':
            is_univ_dir_seps_enabled = _node_to_bool(child)
            parts = _do_univ_dir_seps(opts, parts, is_univ_dir_seps_enabled)
        elif child.nodeName == 'verbose':
            verbose = _node_to_bool(child)
            parts = _do_verbose(opts, parts, verbose)
        elif child.nodeName == '#comment':
            pass
        elif (child.nodeName in ['#cdata-section', '#text']):
            if child.nodeValue.split():
                raise PathMgrXmlError('Unexpected text "%s" in file: %s'
                                      % (child.nodeValue.strip(), fname))
        else:
            raise PathMgrXmlError(
                'Unexpected tag "%s" in file: %s' % (child.nodeName, fname))

    return parts

########################################################################

def _work_on_parts(work_list, parts, opts):
    """Pipe the path element parts through the work list and return the
    result.  It is critical that all work functions return a "parts"
    list even if they not modified it because this is the accumlator.
    This function should be written as a reduction, but Python's
    lambdas are too weak."""
    for f, arg in work_list:
        parts = f(opts, parts, arg)
    return parts

def main():
    """Main entry point."""

    # Parse the command-line options.
    opts = Options(sys.argv[1:])

    # Help
    if opts.help:
        opts.log_file.write('%s\n' % (sys.modules['__main__'].__doc__,))
        sys.exit(0)

    # License
    if opts.license:
        opts.log_file.write('%s\n' % (license_text,))
        sys.exit(0)

    # Version
    if opts.version:
        opts.log_file.write('pathmgr %s\n' % (version,))
        sys.exit(0)

    # Get the path.  If the user specified "-" as the path variable,
    # get the path from standard input.
    if opts.origin == '-':
        path = sys.stdin.read()
    else:
        path = os.getenv(opts.origin)

    #  Split the path it into its component parts.  If the path isn't
    #  set or is an empty string, start with an empty list.
    if (not path) or (path == ''):
        parts = []
    else:
        parts = path.split(opts.separator)

    # Process the path through the filter specified by the user.
    parts = _work_on_parts(opts.work, parts, opts)

    if not opts.quiet:
        # Print the resulting path.  If we are printing to a TTY add a
        # trailing EOL because the string is not being directly piped
        # to a shell for command substitution.
        sys.stdout.write(opts.separator.join(parts))
        if sys.stdout.isatty():
            sys.stdout.write('\n')
        sys.stdout.flush()

########################################################################

if __name__ == '__main__':
    try:
        main()
    except Exception:
        (etype, e) = sys.exc_info()[:2]
        if etype != SystemExit:
            sys.stderr.write('*** Error: %s\n' % (e,))
            sys.exit(1)
