#! /usr/bin/env python

"""
Try to calculate and succinctly present the differences between two large
directory trees.
"""

from standard_script_setup import *
from CIME.utils import run_cmd, run_cmd_no_fail

import argparse, sys, os

###############################################################################
def parse_command_line(args, description):
###############################################################################
    parser = argparse.ArgumentParser(
        usage="""\n{0} case1 case2 [skip-files]
OR
{0} --help

\033[1mEXAMPLES:\033[0m
    > {0} case1 case2
""".format(os.path.basename(args[0])),
        description=description,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

    CIME.utils.setup_standard_logging_options(parser)

    parser.add_argument("case1", help="First case.")

    parser.add_argument("case2", help="Second case.")

    parser.add_argument("skip_list", nargs="*",
                        help="skip these files. You'll probably want to skip the bld directory if it's inside the case")

    parser.add_argument("-b", "--show-binary", action="store_true",
                        help="Show binary diffs")

    args = CIME.utils.parse_args_and_handle_standard_logging_options(args, parser)

    return args.case1, args.case2, args.show_binary, args.skip_list

###############################################################################
def recursive_diff(dir1, dir2, repls, show_binary=False, skip_list=()):
###############################################################################
    """
    Starting at dir1, dir2 respectively, compare their contents
    """
    # The assertions below hurt performance
    #assert os.path.isdir(dir1), dir1 + " not a directory"
    #assert os.path.isdir(dir2), dir2 + " not a directory"

    # Get contents of both directories
    dir1_contents = set(os.listdir(dir1))
    dir2_contents = set(os.listdir(dir2))

    # Use set operations to figure out what they have in common
    dir1_only = dir1_contents - dir2_contents
    dir2_only = dir2_contents - dir1_contents
    both      = dir1_contents & dir2_contents

    num_differing_files = 0

    # Print the unique items
    for dirname, set_obj in [(dir1, dir1_only), (dir2, dir2_only)]:
        for item in sorted(set_obj):
            if (item not in skip_list):
                print ("===============================================================================")
                print (os.path.join(dirname, item), "is unique")
                num_differing_files += 1

    # Handling of the common items is trickier
    for item in sorted(both):
        if (item in skip_list):
            continue
        path1 = os.path.join(dir1, item)
        path2 = os.path.join(dir2, item)
        path1isdir = os.path.isdir(path1)

        # If the directory status of the files differs, report diff
        if (path1isdir != os.path.isdir(path2)):
            print ("===============================================================================")
            print (path1 + " DIFFERS (directory status)")
            num_differing_files += 1
            continue

        # If we've made it this far, the files' status is the same. If the
        # files are directories, recursively check them, otherwise check
        # that the file contents match
        if (path1isdir):
            num_differing_files += recursive_diff(path1, path2, repls, show_binary, skip_list)
        else:
            # # As a (huge) performance enhancement, if the files have the same
            # # size, we assume the contents match
            # if (os.path.getsize(path1) != os.path.getsize(path2)):
            #     print path1 + " DIFFERS (contents)"

            stat, out, err = run_cmd("file {}".format(path1))
            if (stat != 0):
                logging.warning("Failed to probe file '{}', out: '{}', err: '{}'".format(path1, out, err))
                continue

            is_text_file = "text" in out
            if (not (not show_binary and not is_text_file)):
                the_text = open(path2, "r").read()
                for replace_item, replace_with in repls.items():
                    the_text = the_text.replace(replace_item, replace_with)

                stat, out, _ = run_cmd("diff -w {} -".format(path1), input_str=the_text)
                if (stat != 0):
                    print ("===============================================================================")
                    print (path1 + " DIFFERS (contents)")
                    num_differing_files += 1
                    print ("  "+ out)

    return num_differing_files

###############################################################################
def _main_func(description):
###############################################################################
    case1, case2, show_binary, skip_list = parse_command_line(sys.argv, description)

    xml_normalize_fields = ["TEST_TESTID", "SRCROOT"]
    repls = {}
    for xml_normalize_field in xml_normalize_fields:
        try:
            val1 = run_cmd_no_fail("./xmlquery --value {}".format(xml_normalize_field), from_dir=case1)
            val2 = run_cmd_no_fail("./xmlquery --value {}".format(xml_normalize_field), from_dir=case2)
            if os.sep in val1:
                repls[os.path.normpath(val2)] = os.path.normpath(val1)
            else:
                repls[val2] = val1
        except Exception:
            logging.warning("Warning, failed to normalize on " + xml_normalize_field)
            repls = {}

    num_differing_files = recursive_diff(case1, case2, repls, show_binary, skip_list)
    logging.info(num_differing_files, "files are different")
    sys.exit(0 if num_differing_files == 0 else 1)

###############################################################################

if (__name__ == "__main__"):
    _main_func(__doc__)
