From d0091285c8f58df99b70ead209df4cedd5ede004 Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Wed, 31 Jul 2024 13:42:02 -0700
Subject: [PATCH 01/24] Added --rename-headers functionality

---
 pds4indextools/pds4_create_xml_index.py | 48 ++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index c34edd1..260632e 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -123,6 +123,12 @@ def correct_duplicates(label_results):
         if number.isdigit():
             cropped = tag.replace('_'+number, '')
             if cropped in element_names:
+                if str(cropped+'_'+number+'<1>') in key:
+                    key_new = key.replace((cropped+'_'+str((int(number)+1))+'<1>'),
+                                          cropped+'<1>')
+                else:
+                    key_new = key.replace((cropped+'_'+str((int(number)+1))),
+                                          cropped+'<1>')
                 key_new = key.replace(('_' + number + '<1>'), '<1>')
                 parent = key_new.split('/')[-2].split('<')[0]
                 key_new = key_new.replace(parent+'<1>', parent+'<'+str(int(number)+1)+'>')
@@ -406,7 +412,7 @@ def process_headers(label_results, key, root, namespaces, prefixes):
     label_results[key_new] = label_results.pop(key)
 
 
-def renumber_xpaths(xpaths):
+def renumber_xpaths(xpaths, args):
     """
     Renumber a list of XPaths to be sequential at each level.
 
@@ -458,6 +464,8 @@ def renumber_xpaths(xpaths):
 
     Parameters:
         xpaths (list): The list of XPaths or XPath fragments.
+        args (argparse.Namespace): Arguments parsed from command line using argparse.
+
 
     Returns:
         dict: A dictionary containing a mapping from the original XPaths to the
@@ -511,7 +519,10 @@ def split_xpath_prefix_and_num(s):
         # increasing starting at 1. We also add a special entry for the empty
         # suffix when there is no number.
         unique_nums = sorted({x.num for x in prefix_group_list if x.num is not None})
-        renumber_map = {x: f'<{i+1}>' for i, x in enumerate(unique_nums)}
+        if args.dont_number_unique_tags and len(unique_nums) == 1:
+            renumber_map = {x: '' for x in unique_nums}
+        else:
+            renumber_map = {x: f'<{i+1}>' for i, x in enumerate(unique_nums)}
         renumber_map[None] = ''
 
         # We further group these by unique parent (including the number)
@@ -527,7 +538,7 @@ def split_xpath_prefix_and_num(s):
             # down.
             children = [x for x in parent_group_list if x.child is not None]
             if children:
-                child_map = renumber_xpaths([x.child for x in children])
+                child_map = renumber_xpaths([x.child for x in children], args)
                 xpath_map.update(
                     {
                         f'{x.parent}/{x.child}': (
@@ -569,6 +580,20 @@ def split_into_elements(xpath):
     return elements
 
 
+def replace_columns(filepath, df):
+    # Create an empty dictionary to store column mappings
+    column_mappings = {}
+
+    # Read the file and populate the dictionary
+    with open(filepath, 'r') as file:
+        for line in file:
+            old_name, new_name = line.strip().split(', ')
+            column_mappings[old_name] = new_name
+
+    # Step 2: Rename the columns using the mappings
+    df.rename(columns=column_mappings, inplace=True)
+
+
 def store_element_text(element, tree, results_dict, xsd_files, nillable_elements_info,
                        config, label_filename):
     """
@@ -762,6 +787,9 @@ def pad_column_values_and_headers(df):
 
     df = pd.DataFrame(rows)
 
+    if args.rename_headers:
+        replace_columns(args.rename_headers, df)
+
     if args.sort_by:
         sort_values = str(args.sort_by).split(',')
         try:
@@ -1176,6 +1204,15 @@ def main(cmd_line=None):
                                             'contain characters permissible in variable '
                                             'names.')
 
+    index_file_generation.add_argument('--rename-headers', type=str,
+                                       metavar='NEW_HEADERS_FILEPATH',
+                                       help='Rename headers in the generated index file'
+                                            'according to a given mapping file.')
+
+    index_file_generation.add_argument('--dont-number-unique-tags', action='store_true',
+                                       help='Removes the predicates of unique XPath '
+                                            'headers.')
+
     index_file_generation.add_argument(
         '--simplify-xpaths',
         action='store_true',
@@ -1347,11 +1384,12 @@ def main(cmd_line=None):
         # the column refers to. At this stage, duplicate XPaths may exist again due to
         # the reformatting. These duplicates are corrected to preserve the contents of
         # each element's value.
-        xpath_map = renumber_xpaths(label_results)
+        correct_duplicates(label_results)
+        xpath_map = renumber_xpaths(label_results, args)
         for old_xpath, new_xpath in xpath_map.items():
             label_results[new_xpath] = label_results.pop(old_xpath)
 
-        correct_duplicates(label_results)
+        # correct_duplicates(label_results)
 
         # Collect metadata about the label file. The label file's lid is scraped and
         # broken into multiple parts. This metadata can then be requested as additional

From e114d3e48d26227df93c07eb7b61a7764f476e9a Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Mon, 19 Aug 2024 13:55:50 -0700
Subject: [PATCH 02/24] Updated code, got to 100% unit test coverage

---
 pds4indextools/index_label_template_pds.xml   |   4 +
 pds4indextools/pds4_create_xml_index.py       | 356 +++++++++--------
 .../clean_header_field_names_success_2.csv    |   2 +
 test_files/expected/index_file_success.csv    |   2 +
 test_files/expected/label_success_1.csv       |   2 +
 test_files/expected/label_success_1.xml       | 116 ++++++
 test_files/expected/label_success_2.csv       |   2 +
 test_files/expected/label_success_2.xml       | 115 ++++++
 test_files/expected/label_success_3.csv       |   4 +
 test_files/expected/label_success_3.xml       | 102 +++++
 .../expected/limit_xpaths_file_success_1.csv  |   2 +
 .../expected/simplify_xpaths_success_1.txt    |  14 +-
 .../expected/simplify_xpaths_success_3.txt    |  62 +--
 .../expected/simplify_xpaths_success_4.txt    | 128 +++----
 test_files/expected/tester_config.yaml        |   4 +-
 test_files/expected/tester_config_label.yaml  |  13 +
 .../expected/tester_config_nillable.yaml      |  18 +
 test_files/labels/bad_lid_label.xml           |  25 ++
 .../samples/element_extra_file_info.txt       |   4 +
 tests/test_pds4_create_xml_index_blackbox.py  | 359 ++++++++++++++----
 tests/test_pds4_create_xml_index_whitebox.py  | 309 ++++++++++++++-
 21 files changed, 1281 insertions(+), 362 deletions(-)
 create mode 100644 test_files/expected/clean_header_field_names_success_2.csv
 create mode 100644 test_files/expected/index_file_success.csv
 create mode 100644 test_files/expected/label_success_1.csv
 create mode 100644 test_files/expected/label_success_1.xml
 create mode 100644 test_files/expected/label_success_2.csv
 create mode 100644 test_files/expected/label_success_2.xml
 create mode 100644 test_files/expected/label_success_3.csv
 create mode 100644 test_files/expected/label_success_3.xml
 create mode 100644 test_files/expected/limit_xpaths_file_success_1.csv
 create mode 100644 test_files/expected/tester_config_label.yaml
 create mode 100644 test_files/expected/tester_config_nillable.yaml
 create mode 100644 test_files/labels/bad_lid_label.xml
 create mode 100644 test_files/samples/element_extra_file_info.txt

diff --git a/pds4indextools/index_label_template_pds.xml b/pds4indextools/index_label_template_pds.xml
index 8699cdc..89b6bb6 100644
--- a/pds4indextools/index_label_template_pds.xml
+++ b/pds4indextools/index_label_template_pds.xml
@@ -107,7 +107,11 @@ $END_IF
         <File>
             <file_name>$BASENAME(TEMPFILE)$</file_name>
             <local_identifier>index-table</local_identifier>
+            $IF(File)
+            <creation_date_time>$File['creation_date_time']$</creation_date_time>
+            $ELSE
             <creation_date_time>$DATETIME(creation_date_time)$</creation_date_time>
+            $END_IF
             <md5_checksum>$FILE_MD5(TEMPFILE)$</md5_checksum>
             <comment></comment>
         </File>
diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index 260632e..96d4d12 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -124,12 +124,9 @@ def correct_duplicates(label_results):
             cropped = tag.replace('_'+number, '')
             if cropped in element_names:
                 if str(cropped+'_'+number+'<1>') in key:
-                    key_new = key.replace((cropped+'_'+str((int(number)+1))+'<1>'),
-                                          cropped+'<1>')
+                    key_new = key.replace((cropped+'_'+number+'<1>'), cropped+'<1>')
                 else:
-                    key_new = key.replace((cropped+'_'+str((int(number)+1))),
-                                          cropped+'<1>')
-                key_new = key.replace(('_' + number + '<1>'), '<1>')
+                    key_new = key.replace(cropped+'_'+number, cropped+'<1>')
                 parent = key_new.split('/')[-2].split('<')[0]
                 key_new = key_new.replace(parent+'<1>', parent+'<'+str(int(number)+1)+'>')
                 label_results[key_new] = label_results.pop(key)
@@ -302,6 +299,33 @@ def filter_dict_by_glob_patterns(input_dict, glob_patterns, valid_add_extra_file
     return filtered_dict
 
 
+def get_true_type(xsd_files, tag, namespaces):
+    def search_type(xsd_file, tag, namespaces):
+        print(f"Processing file: {xsd_file}")
+        xsd_tree = download_xsd_file(xsd_file)
+        namespaces = scrape_namespaces(xsd_tree)
+        true_type = find_base_attribute(xsd_tree, tag, namespaces)
+        if true_type:
+            print(f"Found true_type for tag '{tag}' in file: {xsd_file}")
+            return true_type
+
+        # Check for modified tag if the first search does not find a match
+        modified_tag = tag + "_WO_Units"
+        true_type = find_base_attribute(xsd_tree, modified_tag, namespaces)
+        if true_type:
+            print(f"Found true_type for modified tag '{modified_tag}' in file: {xsd_file}")
+        return true_type  # This will return either the found type or None
+
+    for xsd_file in xsd_files:
+        true_type = search_type(xsd_file, tag, namespaces)
+        if true_type:  # Only return if true_type is not None
+            print(f"Returning true_type found in file: {xsd_file}")
+            return true_type
+
+    print("No true_type found in any file.")
+    return None  # Return None if no match is found in any file
+
+
 def load_config_file(
         default_config_file=Path(__file__).resolve().parent/'default_config.yaml',
         specified_config_files=None):
@@ -519,10 +543,7 @@ def split_xpath_prefix_and_num(s):
         # increasing starting at 1. We also add a special entry for the empty
         # suffix when there is no number.
         unique_nums = sorted({x.num for x in prefix_group_list if x.num is not None})
-        if args.dont_number_unique_tags and len(unique_nums) == 1:
-            renumber_map = {x: '' for x in unique_nums}
-        else:
-            renumber_map = {x: f'<{i+1}>' for i, x in enumerate(unique_nums)}
+        renumber_map = {x: f'<{i+1}>' for i, x in enumerate(unique_nums)}
         renumber_map[None] = ''
 
         # We further group these by unique parent (including the number)
@@ -580,20 +601,6 @@ def split_into_elements(xpath):
     return elements
 
 
-def replace_columns(filepath, df):
-    # Create an empty dictionary to store column mappings
-    column_mappings = {}
-
-    # Read the file and populate the dictionary
-    with open(filepath, 'r') as file:
-        for line in file:
-            old_name, new_name = line.strip().split(', ')
-            column_mappings[old_name] = new_name
-
-    # Step 2: Rename the columns using the mappings
-    df.rename(columns=column_mappings, inplace=True)
-
-
 def store_element_text(element, tree, results_dict, xsd_files, nillable_elements_info,
                        config, label_filename):
     """
@@ -630,17 +637,12 @@ def store_element_text(element, tree, results_dict, xsd_files, nillable_elements
                     xsd_tree = download_xsd_file(xsd_file)
                     namespaces = scrape_namespaces(xsd_tree)
                     true_type = find_base_attribute(xsd_tree, tag, namespaces)
-                    if true_type:
-                        break  # Exit the loop once true_type is found
-
-                if not true_type:
-                    modified_tag = tag + "_WO_Units"
-                    for xsd_file in xsd_files:
-                        namespaces = scrape_namespaces(xsd_tree)
+                    if not true_type:
+                        modified_tag = tag + "_WO_Units"
                         true_type = find_base_attribute(xsd_tree, modified_tag,
                                                         namespaces)
-                        if true_type:
-                            break
+                        # if true_type:
+                        #     break
 
                 default = default_value_for_nil(config, true_type, nil_value)
                 results_dict[xpath] = default
@@ -704,40 +706,39 @@ def update_nillable_elements_from_xsd_file(xsd_file, nillable_elements_info):
     for element in elements_with_nillable:
         name = element.get('name')
         type_attribute = element.get('type')
-        if type_attribute not in nillable_elements_info:
-            if type_attribute:
-                # Split the type attribute to handle namespace:typename format
-                type_parts = type_attribute.split(':')
-                # Take the last part as the type name
-                type_name = type_parts[-1]
-
-                # Attempt to find the type definition in the document
-                type_definition_xpath = (f'//xs:simpleType[@name="{type_name}"] | '
-                                         f'//xs:complexType[@name="{type_name}"]')
-                type_definition = tree.xpath(
-                    type_definition_xpath, namespaces=namespace)
-
-                if type_definition:
-                    # Take the first match
-                    type_definition = type_definition[0]
-                    base_type = None
-                    # For complexType with simpleContent or simpleType, find base attr
-                    if type_definition.tag.endswith('simpleType'):
-                        restriction = type_definition.find('.//xs:restriction',
-                                                           namespaces=namespace)
-                        if restriction is not None:
-                            base_type = restriction.get('base')
-                    elif type_definition.tag.endswith('complexType'):
-                        extension = type_definition.find('.//xs:extension',
-                                                         namespaces=namespace)
-                        if extension is not None:
-                            base_type = extension.get('base')
-
-                    nillable_elements_info[name] = (
-                        base_type or 'External or built-in type')
-                else:
-                    # Type definition not found, might be external or built-in type
-                    nillable_elements_info[name] = 'External or built-in type'
+        if type_attribute:
+            # Split the type attribute to handle namespace:typename format
+            type_parts = type_attribute.split(':')
+            # Take the last part as the type name
+            type_name = type_parts[-1]
+
+            # Attempt to find the type definition in the document
+            type_definition_xpath = (f'//xs:simpleType[@name="{type_name}"] | '
+                                        f'//xs:complexType[@name="{type_name}"]')
+            type_definition = tree.xpath(
+                type_definition_xpath, namespaces=namespace)
+
+            if type_definition:
+                # Take the first match
+                type_definition = type_definition[0]
+                base_type = None
+                # For complexType with simpleContent or simpleType, find base attr
+
+                try:
+                    restriction = type_definition.find('.//xs:restriction',
+                                        namespaces=namespace)
+                    base_type = restriction.get('base')
+
+                except AttributeError:
+                    extension = type_definition.find('.//xs:extension',
+                                    namespaces=namespace)
+                    base_type = extension.get('base')
+
+                nillable_elements_info[name] = (
+                    base_type or 'External or built-in type')
+            else:
+                # Type definition not found, might be external or built-in type
+                nillable_elements_info[name] = 'External or built-in type'
 
 
 def write_results_to_csv(results_list, args, output_csv_path):
@@ -781,24 +782,22 @@ def pad_column_values_and_headers(df):
 
         return padded_df
 
+
     rows = []
     for result_dict in results_list:
-        rows.append(result_dict['Results'])
+        rows.append(result_dict)
 
     df = pd.DataFrame(rows)
 
-    if args.rename_headers:
-        replace_columns(args.rename_headers, df)
-
     if args.sort_by:
         sort_values = str(args.sort_by).split(',')
         try:
-            df.sort_values(by=sort_values, inplace=True)
-        except KeyError as bad_sort:
-            print(f'Unknown sort key {bad_sort}. For a list of available sort keys, use '
-                  f'the --output-headers-file option.')
+            sort_dataframe(df, sort_values)
+        except ValueError as bad_sort:
+            print(bad_sort)
             sys.exit(1)
 
+
     if args.clean_header_field_names:
         clean_headers(df)
 
@@ -838,32 +837,6 @@ def find_base_attribute(xsd_tree, target_name, new_namespaces):
     }
     namespaces.update(new_namespaces)
 
-    def follow_base_type(base_type):
-        """
-        Recursively follows the base type definitions to find the final base type.
-
-        Parameters:
-            base_type (str): The initial base type to follow.
-
-        Returns:
-            str: The final base type.
-        """
-        while True:
-            if 'ASCII' in base_type or 'UTF8' in base_type:
-                return base_type
-
-            next_query = (
-                f".//xs:simpleType[@name='{base_type.split(':')[-1]}']"
-                f"//xs:restriction/@base"
-            )
-            try:
-                next_result = xsd_tree.xpath(next_query, namespaces=namespaces)
-            except etree.XPathEvalError:
-                break
-            if not next_result:
-                break
-            base_type = next_result[0]
-        return base_type
 
     def get_base_type(query):
         """
@@ -875,11 +848,8 @@ def get_base_type(query):
         Returns:
             list: The result of the XPath query.
         """
-        try:
-            result = xsd_tree.xpath(query, namespaces=namespaces)
-            return result
-        except etree.XPathEvalError:
-            return None
+        result = xsd_tree.xpath(query, namespaces=namespaces)
+        return result
 
     queries = [
         f".//xs:complexType[@name='{target_name}']//xs:extension/@base",
@@ -933,7 +903,7 @@ def get_base_type(query):
         result = get_base_type(query)
         if result:
             base_type = result[0]
-            return follow_base_type(base_type)
+            return base_type
 
     return None
 
@@ -955,7 +925,15 @@ def scrape_namespaces(tree):
     return namespaces
 
 
-def get_creation_date(file_path):
+def sort_dataframe(df, sort_keys):
+        try:
+            df.sort_values(by=sort_keys, inplace=True)
+        except KeyError as bad_sort:
+            raise ValueError(f'Unknown sort key {bad_sort}. For a list of available sort '
+                             f'keys, use the --output-headers-file option.')
+
+
+def get_creation_date(file_path): 
     """
     Returns the creation date of a file in ISO 8601 format.
 
@@ -973,7 +951,7 @@ def get_creation_date(file_path):
         stat = os.stat(file_path)
         try:
             creation_time = stat.st_birthtime
-        except AttributeError:
+        except AttributeError: # pragma: no coverage
             # Fallback to the last modification time if birth time is not available
             creation_time = stat.st_mtime
 
@@ -1146,7 +1124,7 @@ def _fill_text(self, text, width, indent):
 
 def main(cmd_line=None):
     epilog_sfx = ''
-    if __version__ != 'Version unspecified':
+    if __version__ != 'Version unspecified': # pragma: no coverage
         epilog_sfx = f'|nVersion: {__version__}'
     parser = argparse.ArgumentParser(
         formatter_class=MultilineFormatter,
@@ -1290,10 +1268,19 @@ def main(cmd_line=None):
 
     for pattern in patterns:
         files = directory_path.glob(pattern)
-        if not files:
-            verboseprint(f'No files matching {pattern} found in '
-                         f'directory: {directory_path}')
-        label_files.extend(files)
+    
+        # Create an iterator from the generator
+        files_iter = iter(files)
+        
+        # Use a sentinel object to check if there's any item
+        sentinel = object()
+        first_file = next(files_iter, sentinel)
+        
+        if first_file is sentinel:
+            print(f"No files found for pattern: {pattern}")
+        else:
+            # If not empty, continue processing and include the first file
+            label_files.extend(itertools.chain([first_file], files_iter))
 
     verboseprint(f'{len(label_files)} matching file(s) found')
 
@@ -1337,7 +1324,7 @@ def main(cmd_line=None):
 
         filepath = str(label_file.relative_to(args.directorypath)).replace('\\', '/')
         # PDS4 compliant filepaths must be less than 255 characters.
-        if len(filepath) > 255:
+        if len(filepath) > 255: # pragma: no coverage
             print(f'Filepath {filepath} exceeds 255 character limit.')
             sys.exit(1)
 
@@ -1351,11 +1338,11 @@ def main(cmd_line=None):
         label_results = {}
         traverse_and_store(root, tree, label_results, xsd_files,
                            nillable_elements_info, config, label_file)
-
-        # The XPath headers in the label_results dictionary are reformatted to
-        # improve readability. Each XPath's namespace is replaced with its prefix for
-        # faster reference. Duplicate XPaths are made unique to ensure all results are
-        # present in the final product.
+                           
+        # # The XPath headers in the label_results dictionary are reformatted to
+        # # improve readability. Each XPath's namespace is replaced with its prefix for
+        # # faster reference. Duplicate XPaths are made unique to ensure all results are
+        # # present in the final product.
         for key in list(label_results):
             process_headers(label_results, key, root, namespaces, prefixes)
 
@@ -1384,19 +1371,21 @@ def main(cmd_line=None):
         # the column refers to. At this stage, duplicate XPaths may exist again due to
         # the reformatting. These duplicates are corrected to preserve the contents of
         # each element's value.
-        correct_duplicates(label_results)
         xpath_map = renumber_xpaths(label_results, args)
         for old_xpath, new_xpath in xpath_map.items():
             label_results[new_xpath] = label_results.pop(old_xpath)
 
-        # correct_duplicates(label_results)
+        correct_duplicates(label_results)
 
         # Collect metadata about the label file. The label file's lid is scraped and
         # broken into multiple parts. This metadata can then be requested as additional
         # columns within the index file.
-        lid = extract_logical_identifier(tree)
-        if lid is None:
-            lid = label_results.get('pds:logical_identifier', 'Missing_LID')
+        try:
+            lid = extract_logical_identifier(tree)
+        except AttributeError:
+            print(f"Label file {label_file} does not have a "
+                  f"logical_identifier attribute.")
+            sys.exit(1)
 
         # Attach extra columns if asked for.
         bundle_lid = ':'.join(lid.split(':')[:4])
@@ -1409,8 +1398,8 @@ def main(cmd_line=None):
             label_results = {**{ele: extras[ele] for ele in
                                 args.add_extra_file_info}, **label_results}
 
-        result_dict = {'Results': label_results}
-        all_results.append(result_dict)
+        all_results.append(label_results)
+
 
     if args.add_extra_file_info and elements_to_scrape is not None:
         elements_to_scrape = args.add_extra_file_info + elements_to_scrape
@@ -1419,57 +1408,69 @@ def main(cmd_line=None):
     # of the --limit-xpaths-file input file. If this command is not used, the original
     # dictionary will be returned. Glob patterns are processed sequentially, with the
     # first pattern having the highest priority.
-    for i in range(len(all_results)):
-        label_results = all_results[i]['Results']
-        label_results = filter_dict_by_glob_patterns(
+    
+    for label_results in all_results:
+        ind = all_results.index(label_results)
+        label_results_new = filter_dict_by_glob_patterns(
             label_results, elements_to_scrape, valid_add_extra_file_info, verboseprint)
-        all_results[i]['Results'] = label_results
+        all_results[ind] = label_results_new
 
-    if all(len(set(r['Results'])) == 0 for r in all_results):
+    if all(len(set(r)) == 0 for r in all_results):
         print('No results found: glob pattern(s) excluded all matches.')
         sys.exit(1)
 
-    # If --simplify-xpaths is used, the XPath headers will be shortened to the
-    # element's tag and namespace prefix. This is contingent on the uniqueness of
-    # the XPath header; if more than one XPath header shares a tag, a namespace and a
-    # predicate value, the XPath header will remain whole.
+    # # If --simplify-xpaths is used, the XPath headers will be shortened to the
+    # # element's tag and namespace prefix. This is contingent on the uniqueness of
+    # # the XPath header; if more than one XPath header shares a tag, a namespace and a
+    # # predicate value, the XPath header will remain whole.
     if args.simplify_xpaths:
-        for i in range(len(all_results)):
-            label_results = all_results[i]['Results']
+        headers = {}
+        unique_tags_master = []
+
+         # Step 1: Gather all possible tags from labels
+        for label_results in all_results:
+            keys = label_results.keys()
+            for key in keys:
+                tag = key.split('/')[-1]
+                tags.append(tag)
+                if key not in headers:
+                    headers[key] = tag
+
+        # For each label, collect all tags that only occur once. If a unique tag occurs
+        # multiple times within a label, that tag will be removed from the collective
+        # list of unique tags.
+        for label_results in all_results:
             tags = []
+            unique_tags = []
             names = []
-
-            # Step 1: Gather all tags from keys
-            for key in label_results:
-                elements = key.split('/')
-                tag = elements[-1]
-                name = tag.split('<')[0]
+            for key in keys:
+                tag = key.split('/')[-1]
                 tags.append(tag)
+                name = tag.split('<')[0]
                 names.append(name)
-
-            # Step 2: Find unique tags
-            unique_tags = []
             for tag in tags:
                 name = tag.split('<')[0]
-                if tags.count(tag) == 1 and names.count(name) == 1:
+                if (tags.count(tag) == 1 and names.count(name) == 1
+                    and tag not in unique_tags):
                     unique_tags.append(tag)
-
-            # Step 3: Create a new dictionary to hold modified results
+                # if tags.count(tag) > 1 and tag in unique_tags_master:
+                #     unique_tags_master.remove(tag)
+                
+            for tag in unique_tags:
+                unique_tags_master.append(tag)
+
+        for label_results in all_results:
+            ind = all_results.index(label_results)
             new_label_results = {}
-
-            # Step 4: Iterate over original dictionary to modify and copy to new
-            # dictionary
             for key, value in list(label_results.items()):
-                elements = key.split('/')
-                tag = elements[-1]
-                if tag in unique_tags:
-                    new_tag = tag.split('<')[0]
-                    verboseprint(f'XPath header {key} changed to {new_tag}')
-                    new_label_results[new_tag] = value
+                new_key = headers[key]
+                if key.split('/')[-1] in unique_tags_master:
+                    new_label_results[new_key] = value
                 else:
                     new_label_results[key] = value
 
-            all_results[i]['Results'] = new_label_results
+            all_results[ind] = new_label_results
+
 
     if output_csv_path:
         write_results_to_csv(all_results, args, output_csv_path)
@@ -1479,12 +1480,14 @@ def main(cmd_line=None):
     # the label_results dictionary and place them in the output file, instead of the
     # index file.
     if output_txt_path:
+        if not args.output_index_file:
+            print('No index file generated because --output-headers-file was '
+                  'provided without --output-index-file.')
         xpaths = []
         for label in all_results:
-            for values in label.values():
-                for xpath in values:
-                    if xpath not in xpaths:
-                        xpaths.append(xpath)
+            for xpath in label:
+                if xpath not in xpaths:
+                    xpaths.append(xpath)
 
         # The file is now written and placed in a given location. If cleaned header
         # field names are requested, they are processed here before being written in.
@@ -1497,14 +1500,12 @@ def main(cmd_line=None):
                         ':', '_').replace('/', '__').replace('<', '_').replace('>', '')
                 output_fp.write("%s\n" % item)
         print(f'XPath headers file generated at {output_txt_path}.')
-        if not args.output_index_file:
-            print('No index file generated because --output-headers-file was '
-                  'provided without --output-index-file.')
 
     # Generates the label for this index file, if --generate-label is used.
 
     if args.generate_label:
         index_file = output_csv_path
+        print(index_file)
 
         # The template label file is initialized.
         module_dir = Path(__file__).resolve().parent
@@ -1550,7 +1551,7 @@ def main(cmd_line=None):
                     true_type = 'pds:ASCII_LID'
                 elif header == 'filename':
                     true_type = 'pds:ASCII_File_Name'
-                elif header == filepath:
+                elif header == 'filepath':
                     true_type = 'pds:ASCII_File_Specification_Name'
                 elif header == 'bundle':
                     true_type = 'pds:ASCII_Text_Preserved'
@@ -1558,25 +1559,8 @@ def main(cmd_line=None):
                     parts = header.split('/')
                     name = parts[-1].split('<')[0].split(':')[-1]
 
-                    true_type = None
+                    true_type = get_true_type(xsd_files, name, namespaces)
 
-                    for xsd_file in xsd_files:
-                        xsd_tree = download_xsd_file(xsd_file)
-                        true_type = find_base_attribute(xsd_tree, name, namespaces)
-                        if true_type:
-                            break
-
-                    if not true_type:
-                        modified_name = name + "_WO_Units"
-                        for xsd_file in xsd_files:
-                            xsd_tree = download_xsd_file(xsd_file)
-                            true_type = find_base_attribute(xsd_tree, modified_name,
-                                                            namespaces)
-                            if true_type:
-                                break
-
-                if true_type is None:
-                    true_type = ':inapplicable'
                 true_type = true_type.split(':')[-1]
                 field_number += 1
                 header_length = len(header.encode('utf-8'))
@@ -1635,5 +1619,5 @@ def main(cmd_line=None):
         template.write(label_content, str(output_subdir / filename) + '.xml')
 
 
-if __name__ == '__main__':
+if __name__ == '__main__': # pragma: no coverage
     main()
diff --git a/test_files/expected/clean_header_field_names_success_2.csv b/test_files/expected/clean_header_field_names_success_2.csv
new file mode 100644
index 0000000..6304953
--- /dev/null
+++ b/test_files/expected/clean_header_field_names_success_2.csv
@@ -0,0 +1,2 @@
+pds_Product_Observational__pds_Identification_Area_1__pds_logical_identifier_1,pds_Product_Observational__pds_Identification_Area_1__pds_version_id_1,pds_Product_Observational__pds_Identification_Area_1__pds_title_1,pds_Product_Observational__pds_Identification_Area_1__pds_information_model_version_1,pds_Product_Observational__pds_Observing_System_1__pds_name_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_name_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_type_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_lid_reference_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_reference_type_1
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img,1.11.0.0,Cassini Orbiter Imaging Science Subsystem,Cassini Orbiter,Spacecraft,urn:nasa:pds:context:instrument_host:spacecraft.co,is_instrument_host
diff --git a/test_files/expected/index_file_success.csv b/test_files/expected/index_file_success.csv
new file mode 100644
index 0000000..90ebe6e
--- /dev/null
+++ b/test_files/expected/index_file_success.csv
@@ -0,0 +1,2 @@
+pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:information_model_version<1>,pds:Product_Observational/pds:Observing_System<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:type<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:lid_reference<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:reference_type<1>
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img,1.11.0.0,Cassini Orbiter Imaging Science Subsystem,Cassini Orbiter,Spacecraft,urn:nasa:pds:context:instrument_host:spacecraft.co,is_instrument_host
diff --git a/test_files/expected/label_success_1.csv b/test_files/expected/label_success_1.csv
new file mode 100644
index 0000000..90ebe6e
--- /dev/null
+++ b/test_files/expected/label_success_1.csv
@@ -0,0 +1,2 @@
+pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:information_model_version<1>,pds:Product_Observational/pds:Observing_System<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:type<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:lid_reference<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:reference_type<1>
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img,1.11.0.0,Cassini Orbiter Imaging Science Subsystem,Cassini Orbiter,Spacecraft,urn:nasa:pds:context:instrument_host:spacecraft.co,is_instrument_host
diff --git a/test_files/expected/label_success_1.xml b/test_files/expected/label_success_1.xml
new file mode 100644
index 0000000..6d4945f
--- /dev/null
+++ b/test_files/expected/label_success_1.xml
@@ -0,0 +1,116 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1L00.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+
+<Product_Ancillary xmlns="http://pds.nasa.gov/pds4/pds/v1"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://pds.nasa.gov/pds4/pds/v1 https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1L00.xsd">
+    <Identification_Area>
+        <logical_identifier>urn:nasa:pds:rms_metadata:document_opus:generated_label_1</logical_identifier>
+        <version_id>1.1</version_id>
+        <title>Index File</title>
+        <information_model_version>1.21.0.0</information_model_version>
+        <product_class>Product_Ancillary</product_class>
+        <License_Information>
+            <name>Creative Common Public License CC0 1.0 (2024)</name>
+            <description>Creative Commons Zero (CC0) license information.</description>
+            <Internal_Reference>
+                <lid_reference>urn:nasa:pds:system_bundle:document_pds4_standards:creative_commons_1.0.0::1.0</lid_reference>
+                <reference_type>product_to_license</reference_type>
+            </Internal_Reference>
+        </License_Information>
+    </Identification_Area>
+    <Reference_List>
+    </Reference_List>
+    <File_Area_Ancillary>
+        <File>
+            <file_name>generated_label_1.csv</file_name>
+            <local_identifier>index-table</local_identifier>
+            <creation_date_time>00:00:00</creation_date_time>
+            <md5_checksum>a177a1160bf3780c01e3bd9e02be89f4</md5_checksum>
+            <comment></comment>
+        </File>
+        <Header>
+            <offset unit="byte">0</offset>
+            <object_length unit="byte">819</object_length>
+            <parsing_standard_id>UTF-8 Text</parsing_standard_id>
+            <description>Provides the column headers, separated by commas, for the data table.</description>
+        </Header>
+        <Table_Delimited>
+            <offset unit="byte">0</offset>
+            <object_length unit="byte">1058</object_length>
+            <parsing_standard_id>PDS DSV 1</parsing_standard_id>
+            <records>2</records>
+            <record_delimiter>Line-Feed</record_delimiter>
+            <field_delimiter>Comma</field_delimiter>
+            <Record_Delimited>
+                <fields>9</fields>
+                <groups>0</groups>
+                <maximum_record_length unit="byte">818</maximum_record_length>
+                    <Field_Delimited>
+                        <name>pds:Product_Observational/pds:Identification_Area&lt;1&gt;/pds:logical_identifier&lt;1&gt;</name>
+                        <field_number>1</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">52</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:Product_Observational/pds:Identification_Area&lt;1&gt;/pds:version_id&lt;1&gt;</name>
+                        <field_number>2</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">3</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:Product_Observational/pds:Identification_Area&lt;1&gt;/pds:title&lt;1&gt;</name>
+                        <field_number>3</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">33</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:Product_Observational/pds:Identification_Area&lt;1&gt;/pds:information_model_version&lt;1&gt;</name>
+                        <field_number>4</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">8</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:name&lt;1&gt;</name>
+                        <field_number>5</field_number>
+                        <data_type>UTF8_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">41</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:Observing_System_Component&lt;1&gt;/pds:name&lt;1&gt;</name>
+                        <field_number>6</field_number>
+                        <data_type>UTF8_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">15</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:Observing_System_Component&lt;1&gt;/pds:type&lt;1&gt;</name>
+                        <field_number>7</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">10</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:Observing_System_Component&lt;1&gt;/pds:Internal_Reference&lt;1&gt;/pds:lid_reference&lt;1&gt;</name>
+                        <field_number>8</field_number>
+                        <data_type>ASCII_LID</data_type>
+                        <maximum_field_length unit="byte">50</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:Observing_System_Component&lt;1&gt;/pds:Internal_Reference&lt;1&gt;/pds:reference_type&lt;1&gt;</name>
+                        <field_number>9</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">18</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+            </Record_Delimited>
+        </Table_Delimited>
+    </File_Area_Ancillary>
+</Product_Ancillary>
diff --git a/test_files/expected/label_success_2.csv b/test_files/expected/label_success_2.csv
new file mode 100644
index 0000000..d9f2dc4
--- /dev/null
+++ b/test_files/expected/label_success_2.csv
@@ -0,0 +1,2 @@
+pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:information_model_version<1>,pds:Product_Observational/pds:Observing_System<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:type<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:lid_reference<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:reference_type<1>
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n                          ,1.0                                                                   ,Cassini ISS Image 1455200455n.img                                ,1.11.0.0                                                                             ,Cassini Orbiter Imaging Science Subsystem                    ,Cassini Orbiter                                                                                ,Spacecraft                                                                                     ,urn:nasa:pds:context:instrument_host:spacecraft.co                                                                                ,is_instrument_host                                                                                                                 
diff --git a/test_files/expected/label_success_2.xml b/test_files/expected/label_success_2.xml
new file mode 100644
index 0000000..b5bed6e
--- /dev/null
+++ b/test_files/expected/label_success_2.xml
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1L00.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+
+<Product_Metadata_Supplemental xmlns="http://pds.nasa.gov/pds4/pds/v1"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://pds.nasa.gov/pds4/pds/v1 https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1L00.xsd">
+    <Identification_Area>
+        <logical_identifier>urn:nasa:pds:rms_metadata:document_opus:generated_label_2</logical_identifier>
+        <version_id>1.1</version_id>
+        <title>Index File</title>
+        <information_model_version>1.21.0.0</information_model_version>
+        <product_class>Product_Ancillary</product_class>
+        <License_Information>
+            <name>Creative Common Public License CC0 1.0 (2024)</name>
+            <description>Creative Commons Zero (CC0) license information.</description>
+            <Internal_Reference>
+                <lid_reference>urn:nasa:pds:system_bundle:document_pds4_standards:creative_commons_1.0.0::1.0</lid_reference>
+                <reference_type>product_to_license</reference_type>
+            </Internal_Reference>
+        </License_Information>
+    </Identification_Area>
+    <Reference_List>
+    </Reference_List>
+    <File_Area_Metadata>
+        <File>
+            <file_name>generated_label_2.csv</file_name>
+            <local_identifier>index-table</local_identifier>
+            <creation_date_time>00:00:00</creation_date_time>
+            <md5_checksum>53d47b320936ac3fbba0852696065418</md5_checksum>
+            <comment></comment>
+        </File>
+        <Header>
+            <offset unit="byte">0</offset>
+            <object_length unit="byte">819</object_length>
+            <parsing_standard_id>UTF-8 Text</parsing_standard_id>
+            <description>Provides the column headers, separated by commas, for the data table.</description>
+        </Header>
+        <Table_Character>
+            <offset unit="byte"></offset>
+            <object_length unit="byte">1638</object_length>
+            <records>2</records>
+            <record_delimiter>Line-Feed</record_delimiter>
+            <description></description>
+            <Record_Character>
+                <fields>9</fields>
+                <groups>0</groups>
+                <record_length unit="byte"></record_length>
+                    <Field_Character>
+                        <name>pds:Product_Observational/pds:Identification_Area&lt;1&gt;/pds:logical_identifier&lt;1&gt;</name>
+                        <field_number>1</field_number>
+                        <field_location unit="byte">1</field_location>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <field_length unit="byte">78</field_length>
+                    </Field_Character>
+                    <Field_Character>
+                        <name>pds:Product_Observational/pds:Identification_Area&lt;1&gt;/pds:version_id&lt;1&gt;</name>
+                        <field_number>2</field_number>
+                        <field_location unit="byte">79</field_location>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <field_length unit="byte">70</field_length>
+                    </Field_Character>
+                    <Field_Character>
+                        <name>pds:Product_Observational/pds:Identification_Area&lt;1&gt;/pds:title&lt;1&gt;</name>
+                        <field_number>3</field_number>
+                        <field_location unit="byte">150</field_location>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <field_length unit="byte">65</field_length>
+                    </Field_Character>
+                    <Field_Character>
+                        <name>pds:Product_Observational/pds:Identification_Area&lt;1&gt;/pds:information_model_version&lt;1&gt;</name>
+                        <field_number>4</field_number>
+                        <field_location unit="byte">216</field_location>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <field_length unit="byte">85</field_length>
+                    </Field_Character>
+                    <Field_Character>
+                        <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:name&lt;1&gt;</name>
+                        <field_number>5</field_number>
+                        <field_location unit="byte">302</field_location>
+                        <data_type>UTF8_Short_String_Collapsed</data_type>
+                        <field_length unit="byte">61</field_length>
+                    </Field_Character>
+                    <Field_Character>
+                        <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:Observing_System_Component&lt;1&gt;/pds:name&lt;1&gt;</name>
+                        <field_number>6</field_number>
+                        <field_location unit="byte">364</field_location>
+                        <data_type>UTF8_Short_String_Collapsed</data_type>
+                        <field_length unit="byte">95</field_length>
+                    </Field_Character>
+                    <Field_Character>
+                        <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:Observing_System_Component&lt;1&gt;/pds:type&lt;1&gt;</name>
+                        <field_number>7</field_number>
+                        <field_location unit="byte">460</field_location>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <field_length unit="byte">95</field_length>
+                    </Field_Character>
+                    <Field_Character>
+                        <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:Observing_System_Component&lt;1&gt;/pds:Internal_Reference&lt;1&gt;/pds:lid_reference&lt;1&gt;</name>
+                        <field_number>8</field_number>
+                        <field_location unit="byte">556</field_location>
+                        <data_type>ASCII_LID</data_type>
+                        <field_length unit="byte">130</field_length>
+                    </Field_Character>
+                    <Field_Character>
+                        <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:Observing_System_Component&lt;1&gt;/pds:Internal_Reference&lt;1&gt;/pds:reference_type&lt;1&gt;</name>
+                        <field_number>9</field_number>
+                        <field_location unit="byte">687</field_location>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <field_length unit="byte">131</field_length>
+                    </Field_Character>
+            </Record_Character>
+        </Table_Character>
+    </File_Area_Metadata>
+</Product_Metadata_Supplemental>
diff --git a/test_files/expected/label_success_3.csv b/test_files/expected/label_success_3.csv
new file mode 100644
index 0000000..188177b
--- /dev/null
+++ b/test_files/expected/label_success_3.csv
@@ -0,0 +1,4 @@
+filename,filepath,lid,bundle,bundle_lid,pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>
+tester_label_1.xml,labels/tester_label_1.xml,urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,cassini_iss_saturn,urn:nasa:pds:cassini_iss_saturn,urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0
+tester_label_2.xml,labels/tester_label_2.xml,urn:nasa:pds:uranus_occ_u149_irtf_320cm:data:2200nm_counts-v-time_occult,uranus_occ_u149_irtf_320cm,urn:nasa:pds:uranus_occ_u149_irtf_320cm,urn:nasa:pds:uranus_occ_u149_irtf_320cm:data:2200nm_counts-v-time_occult,1.0
+tester_label_3.xml,labels/tester_label_3.xml,urn:nasa:pds:cassini_iss_cruise:data_raw:1357539630n,cassini_iss_cruise,urn:nasa:pds:cassini_iss_cruise,urn:nasa:pds:cassini_iss_cruise:data_raw:1357539630n,1.0
diff --git a/test_files/expected/label_success_3.xml b/test_files/expected/label_success_3.xml
new file mode 100644
index 0000000..96cc903
--- /dev/null
+++ b/test_files/expected/label_success_3.xml
@@ -0,0 +1,102 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1L00.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+
+<Product_Ancillary xmlns="http://pds.nasa.gov/pds4/pds/v1"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://pds.nasa.gov/pds4/pds/v1 https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1L00.xsd">
+    <Identification_Area>
+        <logical_identifier>urn:nasa:pds:rms_metadata:document_opus:generated_label_3</logical_identifier>
+        <version_id>1.1</version_id>
+        <title>Index File</title>
+        <information_model_version>1.21.0.0</information_model_version>
+        <product_class>Product_Ancillary</product_class>
+        <License_Information>
+            <name>Creative Common Public License CC0 1.0 (2024)</name>
+            <description>Creative Commons Zero (CC0) license information.</description>
+            <Internal_Reference>
+                <lid_reference>urn:nasa:pds:system_bundle:document_pds4_standards:creative_commons_1.0.0::1.0</lid_reference>
+                <reference_type>product_to_license</reference_type>
+            </Internal_Reference>
+        </License_Information>
+    </Identification_Area>
+    <Reference_List>
+    </Reference_List>
+    <File_Area_Ancillary>
+        <File>
+            <file_name>generated_label_3.csv</file_name>
+            <local_identifier>index-table</local_identifier>
+            <creation_date_time>00:00:00</creation_date_time>
+            <md5_checksum>8b2eb69a284938d23748de7f53d2e45b</md5_checksum>
+            <comment></comment>
+        </File>
+        <Header>
+            <offset unit="byte">0</offset>
+            <object_length unit="byte">190</object_length>
+            <parsing_standard_id>UTF-8 Text</parsing_standard_id>
+            <description>Provides the column headers, separated by commas, for the data table.</description>
+        </Header>
+        <Table_Delimited>
+            <offset unit="byte">0</offset>
+            <object_length unit="byte">864</object_length>
+            <parsing_standard_id>PDS DSV 1</parsing_standard_id>
+            <records>4</records>
+            <record_delimiter>Line-Feed</record_delimiter>
+            <field_delimiter>Comma</field_delimiter>
+            <Record_Delimited>
+                <fields>7</fields>
+                <groups>0</groups>
+                <maximum_record_length unit="byte">261</maximum_record_length>
+                    <Field_Delimited>
+                        <name>filename</name>
+                        <field_number>1</field_number>
+                        <data_type>ASCII_File_Name</data_type>
+                        <maximum_field_length unit="byte">18</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>filepath</name>
+                        <field_number>2</field_number>
+                        <data_type>ASCII_File_Specification_Name</data_type>
+                        <maximum_field_length unit="byte">25</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>lid</name>
+                        <field_number>3</field_number>
+                        <data_type>ASCII_LID</data_type>
+                        <maximum_field_length unit="byte">72</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>bundle</name>
+                        <field_number>4</field_number>
+                        <data_type>ASCII_Text_Preserved</data_type>
+                        <maximum_field_length unit="byte">26</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>bundle_lid</name>
+                        <field_number>5</field_number>
+                        <data_type>ASCII_LID</data_type>
+                        <maximum_field_length unit="byte">39</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:Product_Observational/pds:Identification_Area&lt;1&gt;/pds:logical_identifier&lt;1&gt;</name>
+                        <field_number>6</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">72</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:Product_Observational/pds:Identification_Area&lt;1&gt;/pds:version_id&lt;1&gt;</name>
+                        <field_number>7</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">3</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+            </Record_Delimited>
+        </Table_Delimited>
+    </File_Area_Ancillary>
+</Product_Ancillary>
diff --git a/test_files/expected/limit_xpaths_file_success_1.csv b/test_files/expected/limit_xpaths_file_success_1.csv
new file mode 100644
index 0000000..08b5633
--- /dev/null
+++ b/test_files/expected/limit_xpaths_file_success_1.csv
@@ -0,0 +1,2 @@
+pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img
diff --git a/test_files/expected/simplify_xpaths_success_1.txt b/test_files/expected/simplify_xpaths_success_1.txt
index e179908..d39804c 100644
--- a/test_files/expected/simplify_xpaths_success_1.txt
+++ b/test_files/expected/simplify_xpaths_success_1.txt
@@ -1,9 +1,9 @@
-pds:logical_identifier
-pds:version_id
-pds:title
-pds:information_model_version
+pds:logical_identifier<1>
+pds:version_id<1>
+pds:title<1>
+pds:information_model_version<1>
 pds:Product_Observational/pds:Observing_System<1>/pds:name<1>
 pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>
-pds:type
-pds:lid_reference
-pds:reference_type
+pds:type<1>
+pds:lid_reference<1>
+pds:reference_type<1>
diff --git a/test_files/expected/simplify_xpaths_success_3.txt b/test_files/expected/simplify_xpaths_success_3.txt
index 08fc585..0ac5ded 100644
--- a/test_files/expected/simplify_xpaths_success_3.txt
+++ b/test_files/expected/simplify_xpaths_success_3.txt
@@ -1,31 +1,31 @@
-rings:occultation_type
-rings:occultation_direction
-rings:time_series_direction
-rings:planetary_occultation_flag
-rings:data_quality_score
-rings:ring_plane
-rings:star_name
-rings:fresnel_scale
-rings:projected_star_diameter
-rings:sigma_projected_star_diameter
-rings:fractional_error_star_counts
-rings:time_constant_type
-rings:time_constant
-rings:sigma_time_constant
-rings:minimum_wavelength
-rings:maximum_wavelength
-rings:reference_time_utc
-rings:minimum_observed_event_time
-rings:maximum_observed_event_time
-rings:observed_event_start_tdb
-rings:observed_event_stop_tdb
-rings:earth_received_start_time_utc
-rings:earth_received_stop_time_utc
-rings:minimum_ring_radius
-rings:maximum_ring_radius
-rings:minimum_ring_longitude
-rings:maximum_ring_longitude
-rings:minimum_observed_ring_azimuth
-rings:maximum_observed_ring_azimuth
-rings:observed_ring_elevation
-rings:light_source_incidence_angle
+rings:occultation_type<1>
+rings:occultation_direction<1>
+rings:time_series_direction<1>
+rings:planetary_occultation_flag<1>
+rings:data_quality_score<1>
+rings:ring_plane<1>
+rings:star_name<1>
+rings:fresnel_scale<1>
+rings:projected_star_diameter<1>
+rings:sigma_projected_star_diameter<1>
+rings:fractional_error_star_counts<1>
+rings:time_constant_type<1>
+rings:time_constant<1>
+rings:sigma_time_constant<1>
+rings:minimum_wavelength<1>
+rings:maximum_wavelength<1>
+rings:reference_time_utc<1>
+rings:minimum_observed_event_time<1>
+rings:maximum_observed_event_time<1>
+rings:observed_event_start_tdb<1>
+rings:observed_event_stop_tdb<1>
+rings:earth_received_start_time_utc<1>
+rings:earth_received_stop_time_utc<1>
+rings:minimum_ring_radius<1>
+rings:maximum_ring_radius<1>
+rings:minimum_ring_longitude<1>
+rings:maximum_ring_longitude<1>
+rings:minimum_observed_ring_azimuth<1>
+rings:maximum_observed_ring_azimuth<1>
+rings:observed_ring_elevation<1>
+rings:light_source_incidence_angle<1>
diff --git a/test_files/expected/simplify_xpaths_success_4.txt b/test_files/expected/simplify_xpaths_success_4.txt
index a1d7d69..9422e08 100644
--- a/test_files/expected/simplify_xpaths_success_4.txt
+++ b/test_files/expected/simplify_xpaths_success_4.txt
@@ -1,64 +1,64 @@
-cassini:mission_phase_name
-cassini:spacecraft_clock_count_partition
-cassini:spacecraft_clock_start_count
-cassini:spacecraft_clock_stop_count
-cassini:limitations
-cassini:antiblooming_state_flag
-cassini:command_file_name
-cassini:command_sequence_number
-cassini:dark_strip_mean
-cassini:data_conversion_type
-cassini:delayed_readout_flag
-cassini:detector_temperature
-cassini:electronics_bias
-cassini:expected_maximum_full_well
-cassini:expected_maximum_DN_sat
-cassini:expected_packets
-cassini:exposure_duration
-cassini:filter_name_1
-cassini:filter_name_2
-cassini:filter_temperature
-cassini:flight_software_version_id
-cassini:gain_mode_id
-cassini:ground_software_version_id
-cassini:image_mid_time
-cassini:image_number
-cassini:image_time
-cassini:image_observation_type
-cassini:instrument_data_rate
-cassini:inst_cmprs_type
-cassini:inst_cmprs_param_malgo
-cassini:inst_cmprs_param_tb
-cassini:inst_cmprs_param_blocks
-cassini:inst_cmprs_param_quant
-cassini:inst_cmprs_rate_expected_bits
-cassini:inst_cmprs_rate_actual_bits
-cassini:inst_cmprs_ratio
-cassini:light_flood_state_flag
-cassini:method_description
-cassini:missing_lines
-cassini:missing_packet_flag
-cassini:optics_temperature_front
-cassini:optics_temperature_back
-cassini:order_number
-cassini:parallel_clock_voltage_index
-cassini:pds3_product_creation_time
-cassini:pds3_product_version_type
-cassini:pds3_target_desc
-cassini:pds3_target_list
-cassini:pds3_target_name
-cassini:pre-pds_version_number
-cassini:prepare_cycle_index
-cassini:readout_cycle_index
-cassini:received_packets
-cassini:sensor_head_electronics_temperature
-cassini:sequence_id
-cassini:sequence_number
-cassini:sequence_title
-cassini:shutter_mode_id
-cassini:shutter_state_id
-cassini:start_time_doy
-cassini:stop_time_doy
-cassini:telemetry_format_id
-cassini:valid_maximum_full_well
-cassini:valid_maximum_DN_sat
+cassini:mission_phase_name<1>
+cassini:spacecraft_clock_count_partition<1>
+cassini:spacecraft_clock_start_count<1>
+cassini:spacecraft_clock_stop_count<1>
+cassini:limitations<1>
+cassini:antiblooming_state_flag<1>
+cassini:command_file_name<1>
+cassini:command_sequence_number<1>
+cassini:dark_strip_mean<1>
+cassini:data_conversion_type<1>
+cassini:delayed_readout_flag<1>
+cassini:detector_temperature<1>
+cassini:electronics_bias<1>
+cassini:expected_maximum_full_well<1>
+cassini:expected_maximum_DN_sat<1>
+cassini:expected_packets<1>
+cassini:exposure_duration<1>
+cassini:filter_name_1<1>
+cassini:filter_name_2<1>
+cassini:filter_temperature<1>
+cassini:flight_software_version_id<1>
+cassini:gain_mode_id<1>
+cassini:ground_software_version_id<1>
+cassini:image_mid_time<1>
+cassini:image_number<1>
+cassini:image_time<1>
+cassini:image_observation_type<1>
+cassini:instrument_data_rate<1>
+cassini:inst_cmprs_type<1>
+cassini:inst_cmprs_param_malgo<1>
+cassini:inst_cmprs_param_tb<1>
+cassini:inst_cmprs_param_blocks<1>
+cassini:inst_cmprs_param_quant<1>
+cassini:inst_cmprs_rate_expected_bits<1>
+cassini:inst_cmprs_rate_actual_bits<1>
+cassini:inst_cmprs_ratio<1>
+cassini:light_flood_state_flag<1>
+cassini:method_description<1>
+cassini:missing_lines<1>
+cassini:missing_packet_flag<1>
+cassini:optics_temperature_front<1>
+cassini:optics_temperature_back<1>
+cassini:order_number<1>
+cassini:parallel_clock_voltage_index<1>
+cassini:pds3_product_creation_time<1>
+cassini:pds3_product_version_type<1>
+cassini:pds3_target_desc<1>
+cassini:pds3_target_list<1>
+cassini:pds3_target_name<1>
+cassini:pre-pds_version_number<1>
+cassini:prepare_cycle_index<1>
+cassini:readout_cycle_index<1>
+cassini:received_packets<1>
+cassini:sensor_head_electronics_temperature<1>
+cassini:sequence_id<1>
+cassini:sequence_number<1>
+cassini:sequence_title<1>
+cassini:shutter_mode_id<1>
+cassini:shutter_state_id<1>
+cassini:start_time_doy<1>
+cassini:stop_time_doy<1>
+cassini:telemetry_format_id<1>
+cassini:valid_maximum_full_well<1>
+cassini:valid_maximum_DN_sat<1>
diff --git a/test_files/expected/tester_config.yaml b/test_files/expected/tester_config.yaml
index 3ff9bc6..35d3d8c 100644
--- a/test_files/expected/tester_config.yaml
+++ b/test_files/expected/tester_config.yaml
@@ -18,4 +18,6 @@ nillable:
     anticipated: anticipated_alt
 
 label-contents:
-  version_id: 1.1
\ No newline at end of file
+  version_id: 1.1
+  File:
+    creation_date_time: '00:00:00'
diff --git a/test_files/expected/tester_config_label.yaml b/test_files/expected/tester_config_label.yaml
new file mode 100644
index 0000000..ada75dc
--- /dev/null
+++ b/test_files/expected/tester_config_label.yaml
@@ -0,0 +1,13 @@
+
+label-contents:
+  title: Index file for my occultation bundle
+  Modification_Detail:
+    - modification_date: '2024-01-01'
+      version_id: 1.1
+      description: |
+        This is a lengthy description of what this modification
+        changed in the bundle.
+        There were lots of changes.
+    - modification_date: '2023-01-01'
+      version_id: 1.0
+      description: Initial release.
diff --git a/test_files/expected/tester_config_nillable.yaml b/test_files/expected/tester_config_nillable.yaml
new file mode 100644
index 0000000..4be242b
--- /dev/null
+++ b/test_files/expected/tester_config_nillable.yaml
@@ -0,0 +1,18 @@
+nillable:
+  pds:ASCII_Integer:
+    inapplicable: -9999
+    missing: -9988
+    unknown: -9977
+    anticipated: -9966
+
+  pds:ASCII_Real:
+    inapplicable: -9999.0
+    missing: -9988.0
+    unknown: -9977.0
+    anticipated: -9966.0
+
+  pds:ASCII_Short_String_Collapsed:
+    inapplicable: inapplicable_alt
+    missing: missing_alt
+    unknown: unknown_alt
+    anticipated: anticipated_alt
\ No newline at end of file
diff --git a/test_files/labels/bad_lid_label.xml b/test_files/labels/bad_lid_label.xml
new file mode 100644
index 0000000..b6847a7
--- /dev/null
+++ b/test_files/labels/bad_lid_label.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Product_Observational
+  xmlns="http://pds.nasa.gov/pds4/pds/v1"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="
+  http://pds.nasa.gov/pds4/pds/v1 https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1B00.xsd
+  http://pds.nasa.gov/pds4/disp/v1 https://pds.nasa.gov/pds4/disp/v1/PDS4_DISP_1B00.xsd
+  http://pds.nasa.gov/pds4/mission/cassini/v1 https://pds.nasa.gov/pds4/mission/cassini/v1/PDS4_CASSINI_1B00_1300.xsd">
+    <Identification_Area>
+        <version_id>1.0</version_id>
+        <title>Cassini ISS Image 1455200455n.img</title>
+        <information_model_version>1.11.0.0</information_model_version>
+    </Identification_Area>
+    <Observing_System>
+        <name>Cassini Orbiter Imaging Science Subsystem</name>
+        <Observing_System_Component>
+            <name>Cassini Orbiter</name>
+            <type>Spacecraft</type>
+            <Internal_Reference>
+                <lid_reference>urn:nasa:pds:context:instrument_host:spacecraft.co</lid_reference>
+                <reference_type>is_instrument_host</reference_type>
+            </Internal_Reference>
+        </Observing_System_Component>
+    </Observing_System>
+</Product_Observational>
diff --git a/test_files/samples/element_extra_file_info.txt b/test_files/samples/element_extra_file_info.txt
new file mode 100644
index 0000000..731d690
--- /dev/null
+++ b/test_files/samples/element_extra_file_info.txt
@@ -0,0 +1,4 @@
+pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>
+pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>
+pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>
+!filename
\ No newline at end of file
diff --git a/tests/test_pds4_create_xml_index_blackbox.py b/tests/test_pds4_create_xml_index_blackbox.py
index 26e772f..f71e7ed 100644
--- a/tests/test_pds4_create_xml_index_blackbox.py
+++ b/tests/test_pds4_create_xml_index_blackbox.py
@@ -2,6 +2,7 @@
 import pytest
 import os
 import tempfile
+import shutil
 import pds4indextools.pds4_create_xml_index as tools
 
 
@@ -15,21 +16,67 @@
 
 
 @pytest.mark.parametrize(
-        'golden_file,new_file,cmd_line',
+        'golden_file,new_file_index,new_file_headers,cmd_line',
         [
+            #Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml"
+            (
+                str(expected_dir / 'index_file_success.csv'),
+                None, None,
+                []
+            ),
+
+            #Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary
+            (
+                str(expected_dir / 'index_file_success.csv'),
+                None, None,
+                [
+                    '--generate-label',
+                    'ancillary'
+                 ]
+            ),
+
+            # Testing --limit-xpaths-file with two outputs
+            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt --output-index-file limit_xpaths_file.csv
+            # Compare result to golden copy:
+            # test_files/expected/limit_xpaths_file_success_1.txt
+            (
+                str(expected_dir / 'limit_xpaths_file_success_1.csv'),
+                'limit_xpaths_file.csv', 'limit_xpaths_file.txt',
+                [
+                    str(test_files_dir),
+                    str(labels_dir.name / Path('tester_label_1.xml')),
+                    '--limit-xpaths-file',
+                    str(samples_dir / 'element_1.txt')
+                ]
+            ),
+
             # Testing --limit-xpaths-file
             # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt
             # Compare result to golden copy:
             # test_files/expected/limit_xpaths_file_success_1.txt
             (
                 str(expected_dir / 'limit_xpaths_file_success_1.txt'),
-                'limit_xpaths_file.txt',
+                None, 'limit_xpaths_file.txt',
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_1.xml')),
                     '--limit-xpaths-file',
-                    str(samples_dir / 'element_1.txt'),
-                    '--output-headers-file'
+                    str(samples_dir / 'element_1.txt')
+                ]
+            ),
+
+            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt
+            # Compare result to golden copy:
+            # test_files/expected/limit_xpaths_file_success_1.txt
+            (
+                str(expected_dir / 'limit_xpaths_file_success_1.txt'),
+                None, 'limit_xpaths_file_wack.txt',
+                [
+                    str(test_files_dir),
+                    str(labels_dir.name / Path('tester_label_1.xml')),
+                    str(labels_dir.name / Path('nonexistent.xml')),
+                    '--limit-xpaths-file',
+                    str(samples_dir / 'element_1.txt')
                 ]
             ),
 
@@ -38,13 +85,12 @@
             # test_files/expected/limit_xpaths_file_success_2.txt
             (
                 str(expected_dir / 'limit_xpaths_file_success_2.txt'),
-                'limit_xpaths_file_2.txt',
+                None, 'limit_xpaths_file_2.txt',
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_2.xml')),
                     '--limit-xpaths-file',
-                    str(samples_dir / 'element_2.txt'),
-                    '--output-headers-file',
+                    str(samples_dir / 'element_2.txt')
                 ]
             ),
 
@@ -53,13 +99,12 @@
             # test_files/expected/limit_xpaths_file_success_2.txt
             (
                 str(expected_dir / 'limit_xpaths_file_success_2.txt'),
-                'elements_dupe_file_2.txt',
+                None, 'elements_dupe_file_2.txt',
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_2.xml')),
                     '--limit-xpaths-file',
-                    str(samples_dir / 'element_duplicates.txt'),
-                    '--output-headers-file',
+                    str(samples_dir / 'element_duplicates.txt')
                 ]
             ),
 
@@ -68,14 +113,13 @@
             # test_files/expected/limit_xpaths_file_success_3.txt
             (
                 str(expected_dir / 'limit_xpaths_file_success_3.txt'),
-                'limit_xpaths_file_3.txt',
+                None, 'limit_xpaths_file_3.txt',
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_2.xml')),
                     str(labels_dir.name / Path('tester_label_3.xml')),
                     '--limit-xpaths-file',
-                    str(samples_dir / 'element_3.txt'),
-                    '--output-headers-file',
+                    str(samples_dir / 'element_3.txt')
                 ]
             ),
 
@@ -84,15 +128,14 @@
             # test_files/expected/limit_xpaths_file_success_4.txt
             (
                 str(expected_dir / 'limit_xpaths_file_success_4.txt'),
-                'limit_xpaths_file_4.txt',
+                None, 'limit_xpaths_file_4.txt',
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_1.xml')),
                     str(labels_dir.name / Path('tester_label_2.xml')),
                     str(labels_dir.name / Path('tester_label_3.xml')),
                     '--limit-xpaths-file',
-                    str(samples_dir / 'element_4.txt'),
-                    '--output-headers-file',
+                    str(samples_dir / 'element_4.txt')
                 ]
             ),
 
@@ -102,12 +145,11 @@
             # test_files/expected/simplify_xpaths_success_1.txt
             (
                 str(expected_dir / 'simplify_xpaths_success_1.txt'),
-                'simplify_xpaths_1.txt',
+                None, 'simplify_xpaths_1.txt',
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_1.xml')),
-                    '--simplify-xpaths',
-                    '--output-headers-file',
+                    '--simplify-xpaths'
                 ]
             ),
 
@@ -117,7 +159,7 @@
             # test_files/expected/simplify_xpaths_success_2.txt
             (
                 str(expected_dir / 'simplify_xpaths_success_2.txt'),
-                'simplify_xpaths_2.txt',
+                None, 'simplify_xpaths_2.txt',
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_1.xml')),
@@ -125,8 +167,7 @@
                     str(labels_dir.name / Path('tester_label_3.xml')),
                     '--simplify-xpaths',
                     '--limit-xpaths-file',
-                    str(samples_dir / 'elements_xpath_simplify_2.txt'),
-                    '--output-headers-file',
+                    str(samples_dir / 'elements_xpath_simplify_2.txt')
                 ]
             ),
 
@@ -136,14 +177,13 @@
             # test_files/expected/simplify_xpaths_success_3.txt
             (
                 str(expected_dir / 'simplify_xpaths_success_3.txt'),
-                'simplify_xpaths_3.txt',
+                None, 'simplify_xpaths_3.txt',
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_2.xml')),
                     '--simplify-xpaths',
                     '--limit-xpaths-file',
-                    str(samples_dir / 'elements_xpath_simplify_3.txt'),
-                    '--output-headers-file',
+                    str(samples_dir / 'elements_xpath_simplify_3.txt')
                 ]
             ),
 
@@ -153,14 +193,13 @@
             # test_files/expected/simplify_xpaths_success_4.txt
             (
                 str(expected_dir / 'simplify_xpaths_success_4.txt'),
-                'simplify_xpaths_4.txt',
+                None, 'simplify_xpaths_4.txt',
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_3.xml')),
                     '--simplify-xpaths',
                     '--limit-xpaths-file',
-                    str(samples_dir / 'elements_xpath_simplify_4.txt'),
-                    '--output-headers-file',
+                    str(samples_dir / 'elements_xpath_simplify_4.txt')
                 ]
             ),
 
@@ -170,15 +209,14 @@
             # test_files/expected/extra_file_info_success_1.csv
             (
                 str(expected_dir / 'extra_file_info_success_1.csv'),
-                'extra_file_info_1.csv',
+                'extra_file_info_1.csv', None,
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_2.xml')),
                     '--limit-xpaths-file',
-                    str(samples_dir / 'element_1.txt'),
+                    str(samples_dir / 'element_extra_file_info.txt'),
                     '--add-extra-file-info',
                     'filename,filepath',
-                    '--output-index-file',
                 ]
             ),
 
@@ -188,7 +226,7 @@
             # test_files/expected/extra_file_info_success_2.csv
             (
                 str(expected_dir / 'extra_file_info_success_2.csv'),
-                'extra_file_info_2.csv',
+                'extra_file_info_2.csv', None,
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_1.xml')),
@@ -199,8 +237,7 @@
                     '--add-extra-file-info',
                     'filename',
                     '--sort-by',
-                    'filename',
-                    '--output-index-file',
+                    'filename'
                 ]
             ),
 
@@ -209,7 +246,7 @@
             # test_files/expected/extra_file_info_success_3.csv
             (
                 str(expected_dir / 'extra_file_info_success_3.csv'),
-                'extra_file_info_3.csv',
+                'extra_file_info_3.csv', None,
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_1.xml')),
@@ -220,8 +257,7 @@
                     '--add-extra-file-info',
                     'filename,filepath,lid,bundle,bundle_lid',
                     '--sort-by',
-                    'filename',
-                    '--output-index-file',
+                    'filename'
                 ]
             ),
 
@@ -231,12 +267,24 @@
             # test_files/expected/clean_header_field_names_success_1.txt
             (
                 str(expected_dir / 'clean_header_field_names_success_1.txt'),
-                'clean_header_field_names_1.txt',
+                None, 'clean_header_field_names_1.txt',
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_1.xml')),
-                    '--clean-header-field-names',
-                    '--output-headers-file',
+                    '--clean-header-field-names'
+                ]
+            ),
+
+            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --clean-header-field-names --output-headers-file clean_header_field_names_2.txt
+            # Compare result to golden copy:
+            # test_files/expected/clean_header_field_names_success_2.txt
+            (
+                str(expected_dir / 'clean_header_field_names_success_2.csv'),
+                'clean_header_field_names_2.csv', None,
+                [
+                    str(test_files_dir),
+                    str(labels_dir.name / Path('tester_label_1.xml')),
+                    '--clean-header-field-names'
                 ]
             ),
 
@@ -245,15 +293,14 @@
             # test_files/expected/clean_header_field_names_success_2.txt
             (
                 str(expected_dir / 'clean_header_field_names_success_2.txt'),
-                'clean_header_field_names_2.txt',
+                None, 'clean_header_field_names_2.txt',
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_1.xml')),
                     str(labels_dir.name / Path('tester_label_2.xml')),
                     '--limit-xpaths-file',
                     str(samples_dir / 'elements_clean_header_field_names.txt'),
-                    '--clean-header-field-names',
-                    '--output-headers-file',
+                    '--clean-header-field-names'
                 ]
             ),
 
@@ -263,7 +310,7 @@
             # test_files/expected/sort_by_success_1.csv
             (
                 str(expected_dir / 'sort_by_success_1.csv'),
-                'sort_by_1.csv',
+                'sort_by_1.csv', None,
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_1.xml')),
@@ -273,8 +320,7 @@
                     str(samples_dir / 'elements_clean_header_field_names.txt'),
                     '--sort-by',
                     'pds:Product_Observational/pds:Identification_Area<1>/'
-                    'pds:logical_identifier<1>',
-                    '--output-index-file',
+                    'pds:logical_identifier<1>'
                 ]
             ),
 
@@ -283,7 +329,7 @@
             # test_files/expected/sort_by_success_2.csv
             (
                 str(expected_dir / 'sort_by_success_2.csv'),
-                'sort_by_2.csv',
+                'sort_by_2.csv', None,
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_1.xml')),
@@ -294,8 +340,7 @@
                     '--add-extra-file-info',
                     'bundle_lid,filepath',
                     '--sort-by',
-                    'bundle_lid',
-                    '--output-index-file',
+                    'bundle_lid'
                 ]
             ),
 
@@ -304,7 +349,7 @@
             # test_files/expected/identical_labels_success.csv
             (
                 str(expected_dir / 'identical_labels_success.csv'),
-                'identical_labels.csv',
+                'identical_labels.csv', None,
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('identical_label_*.xml')),
@@ -313,8 +358,7 @@
                     '--add-extra-file-info',
                     'filename',
                     '--sort-by',
-                    'filename',
-                    '--output-index-file'
+                    'filename'
                 ]
             ),
 
@@ -323,13 +367,12 @@
             # test_files/expected/nilled_element_success.csv
             (
                 str(expected_dir / 'nilled_element_success.csv'),
-                'nilled_element.csv',
+                'nilled_element.csv', None,
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('nilled_label.xml')),
                     '--limit-xpaths-file',
-                    str(samples_dir / 'elements_nilled.txt'),
-                    '--output-index-file'
+                    str(samples_dir / 'elements_nilled.txt')
                 ]
             ),
 
@@ -338,38 +381,154 @@
             # test_files/expected/fixed_width_success.csv
             (
                 str(expected_dir / 'fixed_width_success.csv'),
-                'fixed_width.csv',
+                'fixed_width.csv', None,
+                [
+                    str(test_files_dir),
+                    str(labels_dir.name / Path('tester_label_1.xml')),
+                    '--fixed-width'
+                ]
+            ),
+
+            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary --config ../test_files/expected/tester_config.yaml --output-index-file generated_label_1.csv
+            # Compare result to golden copy:
+            # test_files/expected/label_success_1.csv
+            # test_files/expected/label_success_1.xml
+            (
+                str(expected_dir / 'label_success_1.csv'),
+                'generated_label_1.csv', None,
+                [
+                    str(test_files_dir),
+                    str(labels_dir.name / Path('tester_label_1.xml')),
+                    '--generate-label',
+                    'ancillary',
+                    '--config',
+                    str(expected_dir / 'tester_config.yaml')
+                ]
+            ),
+
+            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label metadata --fixed-width --output-index-file generated_label_2.csv --config ../test_files/expected/tester_config.yaml --output-index-file generated_label_2.csv
+            # Compare result to golden copy:
+            # test_files/expected/label_success_2.csv
+            # test_files/expected/label_success_2.xml
+            (
+                str(expected_dir / 'label_success_2.csv'),
+                'generated_label_2.csv', None,
                 [
                     str(test_files_dir),
                     str(labels_dir.name / Path('tester_label_1.xml')),
+                    '--generate-label',
+                    'metadata',
                     '--fixed-width',
-                    '--output-index-file'
+                    '--config',
+                    str(expected_dir / 'tester_config.yaml')
+                ]
+            ),
+
+            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename,filepath,lid,bundle,bundle_lid --generate-label ancillary --config ../test_files/expected/tester_config.yaml --output-index-file generated_label_3.csv
+            # Compare result to golden copy:
+            # test_files/expected/label_success_3.csv
+            # test_files/expected/label_success_3.xml
+            (
+                str(expected_dir / 'label_success_3.csv'),
+                'generated_label_3.csv', None,
+                [
+                    str(test_files_dir),
+                    str(labels_dir.name / Path('tester_label_1.xml')),
+                    str(labels_dir.name / Path('tester_label_2.xml')),
+                    str(labels_dir.name / Path('tester_label_3.xml')),
+                    '--limit-xpaths-file',
+                    str(samples_dir / 'element_5.txt'),
+                    '--add-extra-file-info',
+                    'filename,filepath,lid,bundle,bundle_lid',
+                    '--sort-by',
+                    'filename',
+                    '--generate-label',
+                    'ancillary',
+                    '--config',
+                    str(expected_dir / 'tester_config.yaml')
                 ]
             )
         ]
     )
-def test_success(golden_file, new_file, cmd_line):
+def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
     # Create a temporary directory
     with tempfile.TemporaryDirectory(dir=test_files_dir.parent) as temp_dir:
         temp_dir_path = Path(temp_dir)
 
-        # THE PATH TO THE NEW FILE
-        path_to_file = temp_dir_path / new_file
-        # Call main() function with the simulated command line arguments
-        cmd_line.append(str(path_to_file))
-        tools.main(cmd_line)
+        if new_file_index == None and new_file_headers == None:
+            os.chdir(temp_dir_path)
+            cmd_line.append(str(test_files_dir))
+            cmd_line.append(str(labels_dir.name / Path('tester_label_1.xml')))
+            # Call main() function with the simulated command line arguments
+            tools.main(cmd_line)
+
+            path_to_file = temp_dir_path / 'index.csv'
+            # Assert that the file now exists
+            assert os.path.isfile(path_to_file)
+
+            # Open and compare the two files
+            with open(path_to_file, 'rb') as created:
+                formed = created.read()
+
+            with open(golden_file, 'rb') as new:
+                expected = new.read()
+
+            assert formed == expected
+            os.remove(path_to_file)
+            os.chdir(ROOT_DIR)
+
+        else:
+            # THE PATH TO THE NEW FILE
+            if new_file_index:
+                path_to_file = temp_dir_path / new_file_index
+                cmd_line.append('--output-index-file')
+                cmd_line.append(str(path_to_file))
+                # Call main() function with the simulated command line arguments
+                tools.main(cmd_line)
+                # Assert that the file now exists
+                assert os.path.isfile(path_to_file)
+
+                # Open and compare the two files
+                with open(path_to_file, 'rb') as created:
+                    formed = created.read()
+
+                with open(golden_file, 'rb') as new:
+                    expected = new.read()
+
+                assert formed == expected
 
-        # Assert that the file now exists
-        assert os.path.isfile(path_to_file)
+                if '--generate-label' in cmd_line:
+                    label_path = str(path_to_file).replace('.csv', '.xml')
+                    golden_label = str(golden_file).replace('.csv', '.xml')
+                    assert os.path.isfile(label_path)
 
-        # Open and compare the two files
-        with open(path_to_file, 'rb') as created:
-            formed = created.read()
+                    # Open and compare the two files
+                    with open(label_path, 'rb') as created:
+                        formed = created.read()
 
-        with open(golden_file, 'rb') as new:
-            expected = new.read()
+                    with open(golden_label, 'rb') as new:
+                        expected = new.read()
 
-        assert formed == expected
+                    assert formed == expected
+
+            if new_file_headers:
+                path_to_file = temp_dir_path / new_file_headers
+                golden_file = str(golden_file).replace('.csv', '.txt')
+                cmd_line.append('--output-headers-file')
+                cmd_line.append(str(path_to_file))
+                # Call main() function with the simulated command line arguments
+                tools.main(cmd_line)
+                # Assert that the file now exists
+                assert os.path.isfile(path_to_file)
+
+                # Open and compare the two files
+                with open(path_to_file, 'rb') as created:
+                    formed = created.read()
+
+                with open(golden_file, 'rb') as new:
+                    expected = new.read()
+
+                assert formed == expected
 
 
 @pytest.mark.parametrize(
@@ -386,6 +545,7 @@ def test_success(golden_file, new_file, cmd_line):
             '--add-extra-file-info',
             'bad_element',
             '--output-headers-file',
+            'hdout.txt'
         ),
 
         # Executable command: pds4_create_xml_index ../test_files/labels "bad_directory/labels/tester_label_*.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --add-extra-file-info filename --output-headers-file hdout.txt
@@ -397,6 +557,7 @@ def test_success(golden_file, new_file, cmd_line):
             '--add-extra-file-info',  # extra file info
             'filename',
             '--output-headers-file',
+            'hdout.txt'
         ),
 
         # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_empty.txt --output-headers-file hdout.txt
@@ -408,7 +569,56 @@ def test_success(golden_file, new_file, cmd_line):
             '--limit-xpaths-file',
             str(samples_dir / 'element_empty.txt'),  # empty elements file
             '--output-headers-file',
+            'hdout.txt'
+        ),
+
+        #Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --simplify-xpaths --sort-by bad_sort --output-headers-file hdout.csv
+        (
+            str(test_files_dir), 
+            str(labels_dir.name / Path('tester_label_1.xml')),
+            '--simplify-xpaths',
+            '--sort-by',
+            'bad_sort',
+            '--output-index-file',
+            'hdout.csv'
+        ),
+
+        #Executable command: pds4_create_xml_index ../test_files/labels "nonexistent.xml" --output-headers-file hdout.txt
+        (
+            str(test_files_dir), 
+            str(labels_dir.name / Path('nonexistent.xml')),
+            '--output-headers-file',
+            'hdout.txt',
+        ),
+
+        #Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_3.txt --output-headers-file hdout.txt
+        (
+            str(test_files_dir), 
+            str(labels_dir.name / Path('tester_label_1.xml')),
+            '--limit-xpaths-file',
+            str(samples_dir / 'elements_xpath_simplify_3.txt'),
+            '--output-headers-file',
+            'hdout.txt',
+        ),
+
+        #Executable command: pds4_create_xml_index ../test_files/labels "tester_label_*.xml" --generate-label ancillary --output-headers-file hdout.txt
+        (
+            str(test_files_dir), 
+            str(labels_dir.name / Path('tester_label_*.xml')),
+            '--generate-label',
+            'ancillary',
+            '--output-headers-file',
+            'hdout.txt',
+        ),
+
+        #Executable command: pds4_create_xml_index ../test_files/labels "bad_lid_label.xml" --output-headers-file hdout.txt
+        (
+            str(test_files_dir), 
+            str(labels_dir.name / Path('bad_lid_label.xml')),
+            '--output-headers-file',
+            'hdout.txt',
         )
+
     ]
 )
 def test_failures(cmd_line):
@@ -417,7 +627,8 @@ def test_failures(cmd_line):
         tools.main(cmd_line)
     assert e.type == SystemExit
     assert e.value.code != 0  # Check that the exit code indicates failure
-
+    if os.path.isfile('hdout.txt'):
+        os.remove('hdout.txt')
 
 @pytest.mark.parametrize(
     'new_file,cmd_line',
@@ -454,3 +665,7 @@ def test_failure_message(capfd, new_file, cmd_line):
 
         expected_message = ("Non-nillable element in")
         assert expected_message in captured.out or expected_message in captured.err
+
+def test_invalid_arguments():
+    with pytest.raises(SystemExit):  # Assuming argparse will call sys.exit on failure
+        tools.main(["--invalid-option"])
diff --git a/tests/test_pds4_create_xml_index_whitebox.py b/tests/test_pds4_create_xml_index_whitebox.py
index c746b73..cf9f9d2 100644
--- a/tests/test_pds4_create_xml_index_whitebox.py
+++ b/tests/test_pds4_create_xml_index_whitebox.py
@@ -1,3 +1,4 @@
+import argparse
 from datetime import datetime
 from lxml import etree
 import os
@@ -64,7 +65,7 @@ def test_load_config_object():
 
     # Tests that the config_object is loaded over.
     config_object = tools.load_config_file(
-        specified_config_files=[str(expected_dir/'tester_config.yaml'),])
+        specified_config_files=[str(expected_dir/'tester_config_nillable.yaml'),])
 
     assert config_object['nillable']['pds:ASCII_Date_YMD']['inapplicable'] == '0001-01-01'
     assert config_object['nillable']['pds:ASCII_Date_YMD']['missing'] == '0002-01-01'
@@ -90,6 +91,13 @@ def test_load_config_object():
     assert (config_object['nillable']['pds:ASCII_Short_String_Collapsed']
             ['anticipated'] == 'anticipated_alt')
 
+    # Tests specified configuration files wiht one or the other
+    config_object = tools.load_config_file(
+        specified_config_files=[str(expected_dir/'tester_config_label.yaml'),])
+
+    assert config_object['label-contents']['version_id'] == '1.0'
+    assert config_object['label-contents']['title'] == 'Index file for my occultation bundle'
+
     # A bad default config file
     with pytest.raises(SystemExit):
         tools.load_config_file(default_config_file=expected_dir/'non_existent_file.ini')
@@ -145,6 +153,9 @@ def test_default_value_for_nil():
             '0004-01-01T12:00Z')
     assert tools.default_value_for_nil(config_object, datetime_ymd_utc,
                                        'anticipated') == '0004-01-01T12:00Z'
+    
+    # Testing None
+    assert tools.default_value_for_nil(config_object, None, 'anticipated') == None
 
 
 def test_default_value_for_nil_ascii_date_time_ymd_utc():
@@ -241,6 +252,16 @@ def test_get_longest_row_length():
     result = tools.get_longest_row_length(filename)
     assert result == 254
 
+    # Failure
+    with pytest.raises(OSError):
+        filename = (
+            '0eD8s3JGt9RmE5YnVpLZxkf2A1gNbWqQ7TXHlchyojFzPBrMOIKvaSuUwd4pC6JrXjmtbZVnLQW9'
+            'gDKfpq7cHWnPoyT5sBM3YXIzlq06F4GDvw1MRaOJpEZU9kBX2AysnVrH6TQeY3G8oKPw5xfmLzN2'
+            'hF7sJ9Qc8LbH4ErWaMKtVUXoPIjzpRy1D0qW4s3N7Km8HGaLFCvxl6eyP7UZjWopX4rBdQ2VME3G'
+            '9XtF8h2TsjvQnKwDYLb50O8xFI6gUJwpQmA7nrZ4EYkTXoR9CpMN8QG6fKjW5uVDl3oJ1wzBsPpT'
+            '2cFmLRe7Hg1SYkN8qQv9RcHjA0F3I4mU')
+        result = tools.get_longest_row_length(filename)
+
 
 @pytest.fixture
 def create_temp_file():
@@ -260,3 +281,289 @@ def test_get_creation_date(create_temp_file, platform_name):
         assert isinstance(creation_date, str)
         # Assert that the returned date is in ISO 8601 format
         assert datetime.fromisoformat(creation_date)
+
+
+def test_correct_duplicates():
+    label_results = {
+        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name<1>': 1,
+        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name_1<1>': 2,
+        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name_2<1>': 3,
+        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name_3<1>': 4,
+        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name_4<1>': 5,
+        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name_5': 6
+        }
+    
+    tools.correct_duplicates(label_results)
+
+    assert label_results == {
+        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name<1>': 1,
+        '../geom:SPICE_Kernel_Identification<2>/geom:spice_kernel_file_name<1>': 2,
+        '../geom:SPICE_Kernel_Identification<3>/geom:spice_kernel_file_name<1>': 3,
+        '../geom:SPICE_Kernel_Identification<4>/geom:spice_kernel_file_name<1>': 4,
+        '../geom:SPICE_Kernel_Identification<5>/geom:spice_kernel_file_name<1>': 5,
+        '../geom:SPICE_Kernel_Identification<6>/geom:spice_kernel_file_name<1>': 6
+        }
+
+def test_update_nillable_elements_from_xsd_file():
+    xsd_files = []
+    nillable_elements_info = {}
+    label_files = ['test_files/labels/tester_label_1.xml',
+                   'test_files/labels/tester_label_2.xml']
+    
+    for label_file in label_files:
+        xml_urls = tools.process_schema_location(label_file)
+        for url in xml_urls:
+            if url not in xsd_files:
+                xsd_files.append(url)
+                tools.update_nillable_elements_from_xsd_file(url, nillable_elements_info)
+
+    assert nillable_elements_info == {
+        'start_time': 'pds:ASCII_Date_Time',
+        'start_date_time': 'pds:ASCII_Date_Time_YMD_UTC',
+        'stop_time': 'pds:ASCII_Date_Time',
+        'stop_date_time': 'pds:ASCII_Date_Time_YMD_UTC',
+        'publication_date': 'pds:ASCII_Date_YMD',
+        'stop_date': 'pds:ASCII_Date_YMD',
+        'reference_frame_id': 'pds:ASCII_Short_String_Collapsed',
+        'gain_mode_id': 'cassini:gain_mode_id_WO_Units',
+        'gain_mode_id_ir': 'pds:ASCII_Short_String_Collapsed',
+        'gain_mode_id_vis': 'pds:ASCII_Short_String_Collapsed',
+        'wavelength_range': 'pds:ASCII_Short_String_Collapsed',
+        'dsn_station_number': 'pds:ASCII_Integer'}
+
+
+def test_update_nillable_elements_from_xsd_file_with_edge_cases():
+    # Scenario 1: Testing with a type attribute that is None or already in
+    # nillable_elements_info
+    
+    # Mock XSD content with an element that doesn't have a 'type' attribute
+    xsd_content_missing_type = """
+    <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+        <xs:element name="element_without_type" nillable="true"/>
+        <xs:element name="start_time" type="pds:ASCII_Date_Time" nillable="true"/>
+    </xs:schema>
+    """
+    # Mock XSD content where type_attribute is already in nillable_elements_info
+    xsd_content_duplicate_type = """
+    <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+        <xs:element name="start_time" type="pds:ASCII_Date_Time" nillable="true"/>
+        <xs:element name="duplicate_element" type="pds:ASCII_Date_Time" nillable="true"/>
+    </xs:schema>
+    """
+
+    # Parse the mock XSD contents into XML trees
+    tree_missing_type = etree.fromstring(xsd_content_missing_type)
+    tree_duplicate_type = etree.fromstring(xsd_content_duplicate_type)
+
+    # Mock the download_xsd_file function to return these trees based on input
+    with mock.patch('pds4indextools.pds4_create_xml_index.download_xsd_file') as mock_download:
+        # Define the behavior of the mock for each file
+        mock_download.side_effect = (
+            lambda url: tree_missing_type if 'missing_type' in url
+            else tree_duplicate_type
+            )
+
+        # Initialize the dictionary that will hold the nillable elements information
+        nillable_elements_info = {
+            'start_time': 'pds:ASCII_Date_Time'  # Simulate an existing entry
+        }
+
+        # Call the function with the first scenario (missing type)
+        tools.update_nillable_elements_from_xsd_file(
+            'test_files/labels/missing_type.xsd', nillable_elements_info)
+        assert 'element_without_type' not in nillable_elements_info
+
+
+def test_clean_header_field_names():
+    data = {
+        'column:1': [1, 2, 3],
+        'column/2': [4, 5, 6],
+        '<column>3': [7, 8, 9],
+        'normal_column': [10, 11, 12]
+        }
+    df = pd.DataFrame(data)
+    
+    tools.clean_headers(df)
+    new = df.to_dict()
+
+    assert new == {
+        'column_1': {0: 1, 1: 2, 2: 3},
+        'column__2': {0: 4, 1: 5, 2: 6},
+        '_column3': {0: 7, 1: 8, 2: 9},
+        'normal_column': {0: 10, 1: 11, 2: 12}
+        }
+    
+def test_compute_max_field_lengths():
+
+    lengths = tools.compute_max_field_lengths(
+        str(expected_dir / 'extra_file_info_success_1.csv'))
+
+    assert lengths == {
+        'filename': 18,
+        'filepath': 25,
+        'pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>':
+        72,
+        'pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>': 3,
+        'pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>': 132
+        }
+    
+    # failure
+    with pytest.raises(SystemExit):
+        lengths = tools.compute_max_field_lengths(
+        str(expected_dir / 'fake_file.csv'))
+    
+
+def test_sort_dataframe_key_error():
+    df = pd.DataFrame({
+        'name': ['Alice', 'Bob', 'Charlie'],
+        'age': [30, 25, 35]
+    })
+    sort_keys = ['height']  # Non-existent column
+
+    with pytest.raises(ValueError, match=f"Unknown sort key '{sort_keys[0]}'. For a list of available sort "
+                                         f"keys, use the --output-headers-file option."):
+        tools.sort_dataframe(df, sort_keys)
+
+def test_validate_label_type():
+    arg = 'ancillary'
+    valid_choices = {'ancillary': 'Product_Ancillary',
+                     'metadata': 'Product_Metadata_Supplemental'}
+    assert tools.validate_label_type(arg, valid_choices) == 'Product_Ancillary'
+
+    # failure
+    with pytest.raises(argparse.ArgumentTypeError):
+        arg = 'bad_label_type'
+        assert tools.validate_label_type(arg, valid_choices) == 'Product_Ancillary'
+
+
+@mock.patch('os.path.exists')
+def test_generate_unique_filename(mock_exists):
+    # Setup the mock to return True for the first two checks and False thereafter
+    mock_exists.side_effect = [True, True, False]
+
+    # Run the function with a base filename
+    base_name = "file.txt"
+    result = tools.generate_unique_filename(base_name)
+
+    # Assert that the result is what we expect given the mocked behavior
+    assert result == "file2.txt"  # Since the first two checks return True, the counter reaches 2
+
+    # Ensure os.path.exists was called the expected number of times
+    assert mock_exists.call_count == 3
+
+
+import textwrap as _textwrap
+
+def test_fill_text():
+    # Create an instance of MultilineFormatter
+    formatter = tools.MultilineFormatter(prog="test_prog")
+
+    # Example input text with multiline separator
+    input_text = "This is a long text that should be wrapped.|nThis is a new paragraph."
+
+    # Expected formatted output (with appropriate indentation and line wrapping)
+    width = 40
+    indent = "    "  # 4 spaces
+
+    expected_output = (
+        _textwrap.fill("This is a long text that should be wrapped.", width, initial_indent=indent, subsequent_indent=indent) + '\n' +
+        _textwrap.fill("This is a new paragraph.", width, initial_indent=indent, subsequent_indent=indent) + '\n'
+    )
+
+    # Run the _fill_text method
+    result = formatter._fill_text(input_text, width, indent)
+
+    # Assert the result matches the expected output
+    assert result == expected_output
+
+from unittest.mock import patch
+
+# Assume the get_true_type function is imported from the relevant module.
+# from pds4indextools.pds4_create_xml_index import get_true_type
+
+@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
+def test_true_type_found_in_first_file(mock_find_base_attribute, mock_scrape_namespaces, mock_download_xsd_file):
+    # Setup mocks
+    mock_download_xsd_file.return_value = "mock_xsd_tree"
+    mock_scrape_namespaces.return_value = {"mock_namespace": "mock_value"}
+    mock_find_base_attribute.side_effect = ["mock_true_type", None]  # Found in the first file
+
+    xsd_files = ["file1.xsd", "file2.xsd"]
+    tag = "mock_tag"
+    namespaces = {"existing_namespace": "value"}
+    
+    result = tools.get_true_type(xsd_files, tag, namespaces)
+    
+    assert result == "mock_true_type"
+    mock_download_xsd_file.assert_called_once_with("file1.xsd")
+    mock_find_base_attribute.assert_called_once_with("mock_xsd_tree", tag, {"mock_namespace": "mock_value"})
+
+
+@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
+def test_true_type_found_in_second_file(mock_find_base_attribute, mock_scrape_namespaces, mock_download_xsd_file):
+    # Setup mocks
+    mock_download_xsd_file.return_value = "mock_xsd_tree"
+    mock_scrape_namespaces.return_value = {"mock_namespace": "mock_value"}
+    
+    # First file returns None for both original and modified tags
+    # Second file returns the true_type for the original tag
+    mock_find_base_attribute.side_effect = [None, None, "mock_true_type"]
+
+    xsd_files = ["file1.xsd", "file2.xsd"]
+    tag = "mock_tag"
+    namespaces = {"existing_namespace": "value"}
+    
+    result = tools.get_true_type(xsd_files, tag, namespaces)
+
+    print(f"Download called: {mock_download_xsd_file.call_count} times")
+    print(f"Find base attribute called: {mock_find_base_attribute.call_count} times")
+
+    # Check if the loop iterates over both files and correctly identifies the type in the second file
+    assert result == "mock_true_type"
+    assert mock_download_xsd_file.call_count == 2  # Should be called for both files
+    assert mock_find_base_attribute.call_count == 3  # Should be called twice for file1 (original + modified) and once for file2
+
+
+
+
+@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
+def test_true_type_found_with_modified_tag(mock_find_base_attribute, mock_scrape_namespaces, mock_download_xsd_file):
+    # Setup mocks
+    mock_download_xsd_file.return_value = "mock_xsd_tree"
+    mock_scrape_namespaces.return_value = {"mock_namespace": "mock_value"}
+    mock_find_base_attribute.side_effect = [None, "mock_true_type"]  # Found after modifying the tag
+
+    xsd_files = ["file1.xsd"]
+    tag = "mock_tag"
+    namespaces = {"existing_namespace": "value"}
+    
+    result = tools.get_true_type(xsd_files, tag, namespaces)
+    
+    assert result == "mock_true_type"
+    mock_find_base_attribute.assert_any_call("mock_xsd_tree", "mock_tag_WO_Units", {"mock_namespace": "mock_value"})
+
+
+@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
+def test_true_type_not_found(mock_find_base_attribute, mock_scrape_namespaces, mock_download_xsd_file):
+    # Setup mocks
+    mock_download_xsd_file.return_value = "mock_xsd_tree"
+    mock_scrape_namespaces.return_value = {"mock_namespace": "mock_value"}
+    mock_find_base_attribute.return_value = None  # Never found
+
+    xsd_files = ["file1.xsd", "file2.xsd"]
+    tag = "mock_tag"
+    namespaces = {"existing_namespace": "value"}
+    
+    result = tools.get_true_type(xsd_files, tag, namespaces)
+    
+    assert result == None
+    assert mock_download_xsd_file.call_count == 2
+    assert mock_find_base_attribute.call_count == 4  # Both original and modified tags are checked for both files

From e1cf1f9c01d9e901a3b38f0be1982c091c07a4fb Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Mon, 19 Aug 2024 14:17:01 -0700
Subject: [PATCH 03/24] Adding missing docstrings

---
 pds4indextools/pds4_create_xml_index.py | 68 ++++++++++++++++++++-----
 1 file changed, 56 insertions(+), 12 deletions(-)

diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index 96d4d12..4db41fd 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -54,7 +54,7 @@ def convert_header_to_xpath(root, xml_header_path, namespaces):
     Parameters:
         root (Element): The root element of the XML document.
         xml_header_path (str): Original XML header path.
-        namespaces (dict): Dictionary of XML namespace mappings.
+        namespaces (dict): A dictionary containing XML namespace mappings.
 
     Returns:
         str: Converted XPath expression.
@@ -300,20 +300,32 @@ def filter_dict_by_glob_patterns(input_dict, glob_patterns, valid_add_extra_file
 
 
 def get_true_type(xsd_files, tag, namespaces):
+    """
+    Determines the true type of a specified tag by searching through a list of XSD files.
+
+    This function iterates through the provided list of XSD files and attempts to find the 
+    "true type" of the given XML tag by examining its attributes and base types. If the
+    type is not found with the original tag, a modified version of the tag is also
+    checked.
+
+    Parameters:
+        xsd_files (list): A list of file paths or URLs to the XSD files.
+        tag (str): The XML tag to search for within the XSD files.
+        namespaces (dict): A dictionary containing XML namespace mappings.
+
+    Returns:
+        str or None: The "true type" of the tag if found, otherwise `None`.
+    """
     def search_type(xsd_file, tag, namespaces):
-        print(f"Processing file: {xsd_file}")
         xsd_tree = download_xsd_file(xsd_file)
         namespaces = scrape_namespaces(xsd_tree)
         true_type = find_base_attribute(xsd_tree, tag, namespaces)
         if true_type:
-            print(f"Found true_type for tag '{tag}' in file: {xsd_file}")
             return true_type
 
         # Check for modified tag if the first search does not find a match
         modified_tag = tag + "_WO_Units"
         true_type = find_base_attribute(xsd_tree, modified_tag, namespaces)
-        if true_type:
-            print(f"Found true_type for modified tag '{modified_tag}' in file: {xsd_file}")
         return true_type  # This will return either the found type or None
 
     for xsd_file in xsd_files:
@@ -322,8 +334,7 @@ def search_type(xsd_file, tag, namespaces):
             print(f"Returning true_type found in file: {xsd_file}")
             return true_type
 
-    print("No true_type found in any file.")
-    return None  # Return None if no match is found in any file
+    return None
 
 
 def load_config_file(
@@ -926,11 +937,44 @@ def scrape_namespaces(tree):
 
 
 def sort_dataframe(df, sort_keys):
-        try:
-            df.sort_values(by=sort_keys, inplace=True)
-        except KeyError as bad_sort:
-            raise ValueError(f'Unknown sort key {bad_sort}. For a list of available sort '
-                             f'keys, use the --output-headers-file option.')
+    """
+    Sorts a DataFrame based on specified keys.
+
+    This function sorts the input DataFrame in place using the provided sort keys. 
+    If an invalid key is provided, a `ValueError` is raised with a message indicating 
+    the unknown key and suggesting how to obtain a list of valid keys.
+
+    Parameters:
+        df (pandas.DataFrame): The DataFrame to be sorted.
+        sort_keys (str or list of str): The column name(s) to sort the DataFrame by. 
+                                        Can be a single string or a list of strings.
+
+    Raises:
+        ValueError: If any of the provided sort keys are not found in the DataFrame, 
+                    a `ValueError` is raised with a descriptive error message.
+
+    Example:
+        >>> df = pd.DataFrame({
+        ...     'name': ['Alice', 'Bob', 'Charlie'],
+        ...     'age': [25, 30, 22]
+        ... })
+        >>> sort_keys = ['age']
+        >>> sort_dataframe(df, sort_keys)
+        >>> print(df)
+             name  age
+        2  Charlie   22
+        0    Alice   25
+        1      Bob   30
+
+    Notes:
+        - The sorting is done in place, so the original DataFrame is modified.
+        - The function will raise an error if any of the specified sort keys are invalid.
+    """
+    try:
+        df.sort_values(by=sort_keys, inplace=True)
+    except KeyError as bad_sort:
+        raise ValueError(f'Unknown sort key {bad_sort}. For a list of available sort '
+                            f'keys, use the --output-headers-file option.')
 
 
 def get_creation_date(file_path): 

From 6edcafcb333fec6c6f14e7216430d85ed1daaba5 Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Mon, 19 Aug 2024 14:38:51 -0700
Subject: [PATCH 04/24] Making everything flake8 compliant

---
 pds4indextools/pds4_create_xml_index.py      |  51 ++++-----
 tests/test_pds4_create_xml_index_blackbox.py |  29 +++---
 tests/test_pds4_create_xml_index_whitebox.py | 104 +++++++++++--------
 3 files changed, 96 insertions(+), 88 deletions(-)

diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index 4db41fd..956eed4 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -303,7 +303,7 @@ def get_true_type(xsd_files, tag, namespaces):
     """
     Determines the true type of a specified tag by searching through a list of XSD files.
 
-    This function iterates through the provided list of XSD files and attempts to find the 
+    This function iterates through the provided list of XSD files and attempts to find the
     "true type" of the given XML tag by examining its attributes and base types. If the
     type is not found with the original tag, a modified version of the tag is also
     checked.
@@ -725,7 +725,7 @@ def update_nillable_elements_from_xsd_file(xsd_file, nillable_elements_info):
 
             # Attempt to find the type definition in the document
             type_definition_xpath = (f'//xs:simpleType[@name="{type_name}"] | '
-                                        f'//xs:complexType[@name="{type_name}"]')
+                                     f'//xs:complexType[@name="{type_name}"]')
             type_definition = tree.xpath(
                 type_definition_xpath, namespaces=namespace)
 
@@ -737,12 +737,12 @@ def update_nillable_elements_from_xsd_file(xsd_file, nillable_elements_info):
 
                 try:
                     restriction = type_definition.find('.//xs:restriction',
-                                        namespaces=namespace)
+                                                       namespaces=namespace)
                     base_type = restriction.get('base')
 
                 except AttributeError:
                     extension = type_definition.find('.//xs:extension',
-                                    namespaces=namespace)
+                                                     namespaces=namespace)
                     base_type = extension.get('base')
 
                 nillable_elements_info[name] = (
@@ -793,7 +793,6 @@ def pad_column_values_and_headers(df):
 
         return padded_df
 
-
     rows = []
     for result_dict in results_list:
         rows.append(result_dict)
@@ -808,7 +807,6 @@ def pad_column_values_and_headers(df):
             print(bad_sort)
             sys.exit(1)
 
-
     if args.clean_header_field_names:
         clean_headers(df)
 
@@ -848,7 +846,6 @@ def find_base_attribute(xsd_tree, target_name, new_namespaces):
     }
     namespaces.update(new_namespaces)
 
-
     def get_base_type(query):
         """
         Executes an XPath query to find the base type.
@@ -940,17 +937,17 @@ def sort_dataframe(df, sort_keys):
     """
     Sorts a DataFrame based on specified keys.
 
-    This function sorts the input DataFrame in place using the provided sort keys. 
-    If an invalid key is provided, a `ValueError` is raised with a message indicating 
+    This function sorts the input DataFrame in place using the provided sort keys.
+    If an invalid key is provided, a `ValueError` is raised with a message indicating
     the unknown key and suggesting how to obtain a list of valid keys.
 
     Parameters:
         df (pandas.DataFrame): The DataFrame to be sorted.
-        sort_keys (str or list of str): The column name(s) to sort the DataFrame by. 
+        sort_keys (str or list of str): The column name(s) to sort the DataFrame by.
                                         Can be a single string or a list of strings.
 
     Raises:
-        ValueError: If any of the provided sort keys are not found in the DataFrame, 
+        ValueError: If any of the provided sort keys are not found in the DataFrame,
                     a `ValueError` is raised with a descriptive error message.
 
     Example:
@@ -974,10 +971,10 @@ def sort_dataframe(df, sort_keys):
         df.sort_values(by=sort_keys, inplace=True)
     except KeyError as bad_sort:
         raise ValueError(f'Unknown sort key {bad_sort}. For a list of available sort '
-                            f'keys, use the --output-headers-file option.')
+                         f'keys, use the --output-headers-file option.')
 
 
-def get_creation_date(file_path): 
+def get_creation_date(file_path):
     """
     Returns the creation date of a file in ISO 8601 format.
 
@@ -995,7 +992,7 @@ def get_creation_date(file_path):
         stat = os.stat(file_path)
         try:
             creation_time = stat.st_birthtime
-        except AttributeError: # pragma: no coverage
+        except AttributeError:  # pragma: no coverage
             # Fallback to the last modification time if birth time is not available
             creation_time = stat.st_mtime
 
@@ -1168,7 +1165,7 @@ def _fill_text(self, text, width, indent):
 
 def main(cmd_line=None):
     epilog_sfx = ''
-    if __version__ != 'Version unspecified': # pragma: no coverage
+    if __version__ != 'Version unspecified':  # pragma: no coverage
         epilog_sfx = f'|nVersion: {__version__}'
     parser = argparse.ArgumentParser(
         formatter_class=MultilineFormatter,
@@ -1312,14 +1309,14 @@ def main(cmd_line=None):
 
     for pattern in patterns:
         files = directory_path.glob(pattern)
-    
+
         # Create an iterator from the generator
         files_iter = iter(files)
-        
+
         # Use a sentinel object to check if there's any item
         sentinel = object()
         first_file = next(files_iter, sentinel)
-        
+
         if first_file is sentinel:
             print(f"No files found for pattern: {pattern}")
         else:
@@ -1368,7 +1365,7 @@ def main(cmd_line=None):
 
         filepath = str(label_file.relative_to(args.directorypath)).replace('\\', '/')
         # PDS4 compliant filepaths must be less than 255 characters.
-        if len(filepath) > 255: # pragma: no coverage
+        if len(filepath) > 255:  # pragma: no coverage
             print(f'Filepath {filepath} exceeds 255 character limit.')
             sys.exit(1)
 
@@ -1382,7 +1379,7 @@ def main(cmd_line=None):
         label_results = {}
         traverse_and_store(root, tree, label_results, xsd_files,
                            nillable_elements_info, config, label_file)
-                           
+
         # # The XPath headers in the label_results dictionary are reformatted to
         # # improve readability. Each XPath's namespace is replaced with its prefix for
         # # faster reference. Duplicate XPaths are made unique to ensure all results are
@@ -1444,7 +1441,6 @@ def main(cmd_line=None):
 
         all_results.append(label_results)
 
-
     if args.add_extra_file_info and elements_to_scrape is not None:
         elements_to_scrape = args.add_extra_file_info + elements_to_scrape
 
@@ -1452,7 +1448,7 @@ def main(cmd_line=None):
     # of the --limit-xpaths-file input file. If this command is not used, the original
     # dictionary will be returned. Glob patterns are processed sequentially, with the
     # first pattern having the highest priority.
-    
+
     for label_results in all_results:
         ind = all_results.index(label_results)
         label_results_new = filter_dict_by_glob_patterns(
@@ -1471,7 +1467,7 @@ def main(cmd_line=None):
         headers = {}
         unique_tags_master = []
 
-         # Step 1: Gather all possible tags from labels
+        # Step 1: Gather all possible tags from labels
         for label_results in all_results:
             keys = label_results.keys()
             for key in keys:
@@ -1495,11 +1491,9 @@ def main(cmd_line=None):
             for tag in tags:
                 name = tag.split('<')[0]
                 if (tags.count(tag) == 1 and names.count(name) == 1
-                    and tag not in unique_tags):
+                        and tag not in unique_tags):
                     unique_tags.append(tag)
-                # if tags.count(tag) > 1 and tag in unique_tags_master:
-                #     unique_tags_master.remove(tag)
-                
+
             for tag in unique_tags:
                 unique_tags_master.append(tag)
 
@@ -1515,7 +1509,6 @@ def main(cmd_line=None):
 
             all_results[ind] = new_label_results
 
-
     if output_csv_path:
         write_results_to_csv(all_results, args, output_csv_path)
 
@@ -1663,5 +1656,5 @@ def main(cmd_line=None):
         template.write(label_content, str(output_subdir / filename) + '.xml')
 
 
-if __name__ == '__main__': # pragma: no coverage
+if __name__ == '__main__':  # pragma: no coverage
     main()
diff --git a/tests/test_pds4_create_xml_index_blackbox.py b/tests/test_pds4_create_xml_index_blackbox.py
index f71e7ed..a672e9b 100644
--- a/tests/test_pds4_create_xml_index_blackbox.py
+++ b/tests/test_pds4_create_xml_index_blackbox.py
@@ -2,7 +2,6 @@
 import pytest
 import os
 import tempfile
-import shutil
 import pds4indextools.pds4_create_xml_index as tools
 
 
@@ -18,14 +17,14 @@
 @pytest.mark.parametrize(
         'golden_file,new_file_index,new_file_headers,cmd_line',
         [
-            #Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml"
+            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml"
             (
                 str(expected_dir / 'index_file_success.csv'),
                 None, None,
                 []
             ),
 
-            #Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary
+            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary
             (
                 str(expected_dir / 'index_file_success.csv'),
                 None, None,
@@ -455,7 +454,7 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
     with tempfile.TemporaryDirectory(dir=test_files_dir.parent) as temp_dir:
         temp_dir_path = Path(temp_dir)
 
-        if new_file_index == None and new_file_headers == None:
+        if new_file_index is None and new_file_headers is None:
             os.chdir(temp_dir_path)
             cmd_line.append(str(test_files_dir))
             cmd_line.append(str(labels_dir.name / Path('tester_label_1.xml')))
@@ -572,9 +571,9 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
             'hdout.txt'
         ),
 
-        #Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --simplify-xpaths --sort-by bad_sort --output-headers-file hdout.csv
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --simplify-xpaths --sort-by bad_sort --output-headers-file hdout.csv
         (
-            str(test_files_dir), 
+            str(test_files_dir),
             str(labels_dir.name / Path('tester_label_1.xml')),
             '--simplify-xpaths',
             '--sort-by',
@@ -583,17 +582,17 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
             'hdout.csv'
         ),
 
-        #Executable command: pds4_create_xml_index ../test_files/labels "nonexistent.xml" --output-headers-file hdout.txt
+        # Executable command: pds4_create_xml_index ../test_files/labels "nonexistent.xml" --output-headers-file hdout.txt
         (
-            str(test_files_dir), 
+            str(test_files_dir),
             str(labels_dir.name / Path('nonexistent.xml')),
             '--output-headers-file',
             'hdout.txt',
         ),
 
-        #Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_3.txt --output-headers-file hdout.txt
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_3.txt --output-headers-file hdout.txt
         (
-            str(test_files_dir), 
+            str(test_files_dir),
             str(labels_dir.name / Path('tester_label_1.xml')),
             '--limit-xpaths-file',
             str(samples_dir / 'elements_xpath_simplify_3.txt'),
@@ -601,9 +600,9 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
             'hdout.txt',
         ),
 
-        #Executable command: pds4_create_xml_index ../test_files/labels "tester_label_*.xml" --generate-label ancillary --output-headers-file hdout.txt
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_*.xml" --generate-label ancillary --output-headers-file hdout.txt
         (
-            str(test_files_dir), 
+            str(test_files_dir),
             str(labels_dir.name / Path('tester_label_*.xml')),
             '--generate-label',
             'ancillary',
@@ -611,9 +610,9 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
             'hdout.txt',
         ),
 
-        #Executable command: pds4_create_xml_index ../test_files/labels "bad_lid_label.xml" --output-headers-file hdout.txt
+        # Executable command: pds4_create_xml_index ../test_files/labels "bad_lid_label.xml" --output-headers-file hdout.txt
         (
-            str(test_files_dir), 
+            str(test_files_dir),
             str(labels_dir.name / Path('bad_lid_label.xml')),
             '--output-headers-file',
             'hdout.txt',
@@ -630,6 +629,7 @@ def test_failures(cmd_line):
     if os.path.isfile('hdout.txt'):
         os.remove('hdout.txt')
 
+
 @pytest.mark.parametrize(
     'new_file,cmd_line',
     [
@@ -666,6 +666,7 @@ def test_failure_message(capfd, new_file, cmd_line):
         expected_message = ("Non-nillable element in")
         assert expected_message in captured.out or expected_message in captured.err
 
+
 def test_invalid_arguments():
     with pytest.raises(SystemExit):  # Assuming argparse will call sys.exit on failure
         tools.main(["--invalid-option"])
diff --git a/tests/test_pds4_create_xml_index_whitebox.py b/tests/test_pds4_create_xml_index_whitebox.py
index cf9f9d2..ada7110 100644
--- a/tests/test_pds4_create_xml_index_whitebox.py
+++ b/tests/test_pds4_create_xml_index_whitebox.py
@@ -6,7 +6,9 @@
 from pathlib import Path
 import pytest
 import pds4indextools.pds4_create_xml_index as tools
+import textwrap as _textwrap
 from unittest import mock
+from unittest.mock import patch
 
 
 # These two variables are the same for all tests, so we can either declare them as
@@ -96,7 +98,8 @@ def test_load_config_object():
         specified_config_files=[str(expected_dir/'tester_config_label.yaml'),])
 
     assert config_object['label-contents']['version_id'] == '1.0'
-    assert config_object['label-contents']['title'] == 'Index file for my occultation bundle'
+    assert (config_object['label-contents']['title'] ==
+            'Index file for my occultation bundle')
 
     # A bad default config file
     with pytest.raises(SystemExit):
@@ -153,9 +156,9 @@ def test_default_value_for_nil():
             '0004-01-01T12:00Z')
     assert tools.default_value_for_nil(config_object, datetime_ymd_utc,
                                        'anticipated') == '0004-01-01T12:00Z'
-    
+
     # Testing None
-    assert tools.default_value_for_nil(config_object, None, 'anticipated') == None
+    assert tools.default_value_for_nil(config_object, None, 'anticipated') is None
 
 
 def test_default_value_for_nil_ascii_date_time_ymd_utc():
@@ -292,7 +295,7 @@ def test_correct_duplicates():
         '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name_4<1>': 5,
         '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name_5': 6
         }
-    
+
     tools.correct_duplicates(label_results)
 
     assert label_results == {
@@ -304,12 +307,13 @@ def test_correct_duplicates():
         '../geom:SPICE_Kernel_Identification<6>/geom:spice_kernel_file_name<1>': 6
         }
 
+
 def test_update_nillable_elements_from_xsd_file():
     xsd_files = []
     nillable_elements_info = {}
     label_files = ['test_files/labels/tester_label_1.xml',
                    'test_files/labels/tester_label_2.xml']
-    
+
     for label_file in label_files:
         xml_urls = tools.process_schema_location(label_file)
         for url in xml_urls:
@@ -335,7 +339,7 @@ def test_update_nillable_elements_from_xsd_file():
 def test_update_nillable_elements_from_xsd_file_with_edge_cases():
     # Scenario 1: Testing with a type attribute that is None or already in
     # nillable_elements_info
-    
+
     # Mock XSD content with an element that doesn't have a 'type' attribute
     xsd_content_missing_type = """
     <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
@@ -356,7 +360,9 @@ def test_update_nillable_elements_from_xsd_file_with_edge_cases():
     tree_duplicate_type = etree.fromstring(xsd_content_duplicate_type)
 
     # Mock the download_xsd_file function to return these trees based on input
-    with mock.patch('pds4indextools.pds4_create_xml_index.download_xsd_file') as mock_download:
+    with mock.patch(
+        'pds4indextools.pds4_create_xml_index.download_xsd_file'
+                   ) as mock_download:
         # Define the behavior of the mock for each file
         mock_download.side_effect = (
             lambda url: tree_missing_type if 'missing_type' in url
@@ -382,7 +388,7 @@ def test_clean_header_field_names():
         'normal_column': [10, 11, 12]
         }
     df = pd.DataFrame(data)
-    
+
     tools.clean_headers(df)
     new = df.to_dict()
 
@@ -392,7 +398,8 @@ def test_clean_header_field_names():
         '_column3': {0: 7, 1: 8, 2: 9},
         'normal_column': {0: 10, 1: 11, 2: 12}
         }
-    
+
+
 def test_compute_max_field_lengths():
 
     lengths = tools.compute_max_field_lengths(
@@ -406,12 +413,11 @@ def test_compute_max_field_lengths():
         'pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>': 3,
         'pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>': 132
         }
-    
+
     # failure
     with pytest.raises(SystemExit):
-        lengths = tools.compute_max_field_lengths(
-        str(expected_dir / 'fake_file.csv'))
-    
+        lengths = tools.compute_max_field_lengths(str(expected_dir / 'fake_file.csv'))
+
 
 def test_sort_dataframe_key_error():
     df = pd.DataFrame({
@@ -420,10 +426,12 @@ def test_sort_dataframe_key_error():
     })
     sort_keys = ['height']  # Non-existent column
 
-    with pytest.raises(ValueError, match=f"Unknown sort key '{sort_keys[0]}'. For a list of available sort "
-                                         f"keys, use the --output-headers-file option."):
+    with pytest.raises(ValueError, match=f"Unknown sort key '{sort_keys[0]}'. For a list "
+                                         f"of available sort keys, use the "
+                                         f"--output-headers-file option."):
         tools.sort_dataframe(df, sort_keys)
 
+
 def test_validate_label_type():
     arg = 'ancillary'
     valid_choices = {'ancillary': 'Product_Ancillary',
@@ -446,14 +454,13 @@ def test_generate_unique_filename(mock_exists):
     result = tools.generate_unique_filename(base_name)
 
     # Assert that the result is what we expect given the mocked behavior
-    assert result == "file2.txt"  # Since the first two checks return True, the counter reaches 2
+    # Since the first two checks return True, the counter reaches 2
+    assert result == "file2.txt"
 
     # Ensure os.path.exists was called the expected number of times
     assert mock_exists.call_count == 3
 
 
-import textwrap as _textwrap
-
 def test_fill_text():
     # Create an instance of MultilineFormatter
     formatter = tools.MultilineFormatter(prog="test_prog")
@@ -466,8 +473,10 @@ def test_fill_text():
     indent = "    "  # 4 spaces
 
     expected_output = (
-        _textwrap.fill("This is a long text that should be wrapped.", width, initial_indent=indent, subsequent_indent=indent) + '\n' +
-        _textwrap.fill("This is a new paragraph.", width, initial_indent=indent, subsequent_indent=indent) + '\n'
+        _textwrap.fill("This is a long text that should be wrapped.",
+                       width, initial_indent=indent, subsequent_indent=indent) + '\n' +
+        _textwrap.fill("This is a new paragraph.", width, initial_indent=indent,
+                       subsequent_indent=indent) + '\n'
     )
 
     # Run the _fill_text method
@@ -476,39 +485,40 @@ def test_fill_text():
     # Assert the result matches the expected output
     assert result == expected_output
 
-from unittest.mock import patch
 
 # Assume the get_true_type function is imported from the relevant module.
 # from pds4indextools.pds4_create_xml_index import get_true_type
-
 @patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
 @patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
 @patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
-def test_true_type_found_in_first_file(mock_find_base_attribute, mock_scrape_namespaces, mock_download_xsd_file):
+def test_true_type_found_in_first_file(mock_find_base_attribute, mock_scrape_namespaces,
+                                       mock_download_xsd_file):
     # Setup mocks
     mock_download_xsd_file.return_value = "mock_xsd_tree"
     mock_scrape_namespaces.return_value = {"mock_namespace": "mock_value"}
-    mock_find_base_attribute.side_effect = ["mock_true_type", None]  # Found in the first file
+    mock_find_base_attribute.side_effect = ["mock_true_type", None]
 
     xsd_files = ["file1.xsd", "file2.xsd"]
     tag = "mock_tag"
     namespaces = {"existing_namespace": "value"}
-    
+
     result = tools.get_true_type(xsd_files, tag, namespaces)
-    
+
     assert result == "mock_true_type"
     mock_download_xsd_file.assert_called_once_with("file1.xsd")
-    mock_find_base_attribute.assert_called_once_with("mock_xsd_tree", tag, {"mock_namespace": "mock_value"})
+    mock_find_base_attribute.assert_called_once_with("mock_xsd_tree", tag,
+                                                     {"mock_namespace": "mock_value"})
 
 
 @patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
 @patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
 @patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
-def test_true_type_found_in_second_file(mock_find_base_attribute, mock_scrape_namespaces, mock_download_xsd_file):
+def test_true_type_found_in_second_file(mock_find_base_attribute, mock_scrape_namespaces,
+                                        mock_download_xsd_file):
     # Setup mocks
     mock_download_xsd_file.return_value = "mock_xsd_tree"
     mock_scrape_namespaces.return_value = {"mock_namespace": "mock_value"}
-    
+
     # First file returns None for both original and modified tags
     # Second file returns the true_type for the original tag
     mock_find_base_attribute.side_effect = [None, None, "mock_true_type"]
@@ -516,43 +526,47 @@ def test_true_type_found_in_second_file(mock_find_base_attribute, mock_scrape_na
     xsd_files = ["file1.xsd", "file2.xsd"]
     tag = "mock_tag"
     namespaces = {"existing_namespace": "value"}
-    
+
     result = tools.get_true_type(xsd_files, tag, namespaces)
 
     print(f"Download called: {mock_download_xsd_file.call_count} times")
     print(f"Find base attribute called: {mock_find_base_attribute.call_count} times")
 
-    # Check if the loop iterates over both files and correctly identifies the type in the second file
+    # Check if the loop iterates over both files and correctly identifies the type in
+    # the second file
     assert result == "mock_true_type"
-    assert mock_download_xsd_file.call_count == 2  # Should be called for both files
-    assert mock_find_base_attribute.call_count == 3  # Should be called twice for file1 (original + modified) and once for file2
-
-
+    assert mock_download_xsd_file.call_count == 2
+    assert mock_find_base_attribute.call_count == 3
 
 
 @patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
 @patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
 @patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
-def test_true_type_found_with_modified_tag(mock_find_base_attribute, mock_scrape_namespaces, mock_download_xsd_file):
+def test_true_type_found_with_modified_tag(mock_find_base_attribute,
+                                           mock_scrape_namespaces,
+                                           mock_download_xsd_file):
     # Setup mocks
     mock_download_xsd_file.return_value = "mock_xsd_tree"
     mock_scrape_namespaces.return_value = {"mock_namespace": "mock_value"}
-    mock_find_base_attribute.side_effect = [None, "mock_true_type"]  # Found after modifying the tag
+    # Found after modifying the tag
+    mock_find_base_attribute.side_effect = [None, "mock_true_type"]
 
     xsd_files = ["file1.xsd"]
     tag = "mock_tag"
     namespaces = {"existing_namespace": "value"}
-    
+
     result = tools.get_true_type(xsd_files, tag, namespaces)
-    
+
     assert result == "mock_true_type"
-    mock_find_base_attribute.assert_any_call("mock_xsd_tree", "mock_tag_WO_Units", {"mock_namespace": "mock_value"})
+    mock_find_base_attribute.assert_any_call("mock_xsd_tree", "mock_tag_WO_Units",
+                                             {"mock_namespace": "mock_value"})
 
 
 @patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
 @patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
 @patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
-def test_true_type_not_found(mock_find_base_attribute, mock_scrape_namespaces, mock_download_xsd_file):
+def test_true_type_not_found(mock_find_base_attribute, mock_scrape_namespaces,
+                             mock_download_xsd_file):
     # Setup mocks
     mock_download_xsd_file.return_value = "mock_xsd_tree"
     mock_scrape_namespaces.return_value = {"mock_namespace": "mock_value"}
@@ -561,9 +575,9 @@ def test_true_type_not_found(mock_find_base_attribute, mock_scrape_namespaces, m
     xsd_files = ["file1.xsd", "file2.xsd"]
     tag = "mock_tag"
     namespaces = {"existing_namespace": "value"}
-    
+
     result = tools.get_true_type(xsd_files, tag, namespaces)
-    
-    assert result == None
+
+    assert result is None
     assert mock_download_xsd_file.call_count == 2
-    assert mock_find_base_attribute.call_count == 4  # Both original and modified tags are checked for both files
+    assert mock_find_base_attribute.call_count == 4

From fe85192639e31394437810887bc67b98200372dc Mon Sep 17 00:00:00 2001
From: Robert French <rfrench@seti.org>
Date: Mon, 19 Aug 2024 16:52:23 -0700
Subject: [PATCH 05/24] Remove terminator dependence from tests

---
 tests/test_pds4_create_xml_index_blackbox.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/test_pds4_create_xml_index_blackbox.py b/tests/test_pds4_create_xml_index_blackbox.py
index a672e9b..ff944eb 100644
--- a/tests/test_pds4_create_xml_index_blackbox.py
+++ b/tests/test_pds4_create_xml_index_blackbox.py
@@ -466,10 +466,10 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
             assert os.path.isfile(path_to_file)
 
             # Open and compare the two files
-            with open(path_to_file, 'rb') as created:
+            with open(path_to_file, 'r') as created:
                 formed = created.read()
 
-            with open(golden_file, 'rb') as new:
+            with open(golden_file, 'r') as new:
                 expected = new.read()
 
             assert formed == expected
@@ -488,10 +488,10 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
                 assert os.path.isfile(path_to_file)
 
                 # Open and compare the two files
-                with open(path_to_file, 'rb') as created:
+                with open(path_to_file, 'r') as created:
                     formed = created.read()
 
-                with open(golden_file, 'rb') as new:
+                with open(golden_file, 'r') as new:
                     expected = new.read()
 
                 assert formed == expected
@@ -502,10 +502,10 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
                     assert os.path.isfile(label_path)
 
                     # Open and compare the two files
-                    with open(label_path, 'rb') as created:
+                    with open(label_path, 'r') as created:
                         formed = created.read()
 
-                    with open(golden_label, 'rb') as new:
+                    with open(golden_label, 'r') as new:
                         expected = new.read()
 
                     assert formed == expected
@@ -521,10 +521,10 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
                 assert os.path.isfile(path_to_file)
 
                 # Open and compare the two files
-                with open(path_to_file, 'rb') as created:
+                with open(path_to_file, 'r') as created:
                     formed = created.read()
 
-                with open(golden_file, 'rb') as new:
+                with open(golden_file, 'r') as new:
                     expected = new.read()
 
                 assert formed == expected

From 641c87c2c8c92cf775b86def66b9487f8e63606a Mon Sep 17 00:00:00 2001
From: Robert French <rfrench@seti.org>
Date: Mon, 19 Aug 2024 17:08:40 -0700
Subject: [PATCH 06/24] Force \n line terminator on writing CSV

---
 pds4indextools/pds4_create_xml_index.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index 956eed4..e2002f6 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -817,7 +817,7 @@ def pad_column_values_and_headers(df):
 
     else:
         print(f'Index file generated at {output_csv_path}')
-        df.to_csv(output_csv_path, index=False, na_rep='')
+        df.to_csv(output_csv_path, index=False, na_rep='', lineterminator='\n')
 
 
 def find_base_attribute(xsd_tree, target_name, new_namespaces):

From 4fa4dc6282aa50fd5d91409db087ba5818af07d9 Mon Sep 17 00:00:00 2001
From: Robert French <rfrench@seti.org>
Date: Mon, 19 Aug 2024 17:11:10 -0700
Subject: [PATCH 07/24] Missed a to_csv

---
 pds4indextools/pds4_create_xml_index.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index e2002f6..3c25688 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -813,7 +813,7 @@ def pad_column_values_and_headers(df):
     if args.fixed_width:
         padded_df = pad_column_values_and_headers(df)
         print(f'Fixed-width index file generated at {output_csv_path}')
-        padded_df.to_csv(output_csv_path, index=False, na_rep='')
+        padded_df.to_csv(output_csv_path, index=False, na_rep='', lineterminator='\n')
 
     else:
         print(f'Index file generated at {output_csv_path}')

From 328b3395a7fb6e0ab9097700273abad05126291d Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Tue, 20 Aug 2024 09:22:07 -0700
Subject: [PATCH 08/24] Removing --rename-headers and --dont-number-unique-tags

---
 pds4indextools/pds4_create_xml_index.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index 956eed4..d28d904 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -331,7 +331,6 @@ def search_type(xsd_file, tag, namespaces):
     for xsd_file in xsd_files:
         true_type = search_type(xsd_file, tag, namespaces)
         if true_type:  # Only return if true_type is not None
-            print(f"Returning true_type found in file: {xsd_file}")
             return true_type
 
     return None
@@ -1223,15 +1222,6 @@ def main(cmd_line=None):
                                             'contain characters permissible in variable '
                                             'names.')
 
-    index_file_generation.add_argument('--rename-headers', type=str,
-                                       metavar='NEW_HEADERS_FILEPATH',
-                                       help='Rename headers in the generated index file'
-                                            'according to a given mapping file.')
-
-    index_file_generation.add_argument('--dont-number-unique-tags', action='store_true',
-                                       help='Removes the predicates of unique XPath '
-                                            'headers.')
-
     index_file_generation.add_argument(
         '--simplify-xpaths',
         action='store_true',

From 82294e80031bdf8c9f2b627f515fbfa862b722a3 Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Mon, 26 Aug 2024 13:21:37 -0700
Subject: [PATCH 09/24] Minor syntax changes, fixed issue with label generation

---
 pds4indextools/index_label_template_pds.xml  |   22 +
 pds4indextools/pds4_create_xml_index.py      |    1 -
 test_files/expected/label_success_1.xml      |    2 +-
 test_files/expected/label_success_2.xml      |    2 +-
 test_files/expected/label_success_3.xml      |    2 +-
 test_files/expected/tester_config.yaml       |    2 +-
 tests/test_pds4_create_xml_index_blackbox.py | 1048 +++++++++---------
 tests/test_pds4_create_xml_index_whitebox.py |   57 +-
 8 files changed, 569 insertions(+), 567 deletions(-)

diff --git a/pds4indextools/index_label_template_pds.xml b/pds4indextools/index_label_template_pds.xml
index 89b6bb6..982db44 100644
--- a/pds4indextools/index_label_template_pds.xml
+++ b/pds4indextools/index_label_template_pds.xml
@@ -103,6 +103,28 @@ $END_IF
     $END_IF
     $IF(Product_Metadata_Supplemental)
     <File_Area_Metadata>
+    $END_IF
+    $IF(Product_Ancillary and isinstance(Product_Ancillary, dict) and 'File_Area_Ancillary' in Product_Ancillary)
+        $IF(EXISTS(File))
+        <File>
+            <file_name>$BASENAME(TEMPFILE)$</file_name>
+            <local_identifier>index-table</local_identifier>
+            <creation_date_time>$File['creation_date_time']$</creation_date_time>
+            <md5_checksum>$File['md5_checksum']$</md5_checksum>
+            <comment></comment>
+        </File>
+        $END_IF
+    $END_IF
+    $IF(Product_Metadata_Supplemental and isinstance(Product_Metadata_Supplemental, dict) and 'File_Area_Ancillary' in Product_Metadata_Supplemental)
+        $IF(File)
+        <File>
+            <file_name>$BASENAME(TEMPFILE)$</file_name>
+            <local_identifier>index-table</local_identifier>
+            <creation_date_time>$File['creation_date_time']$</creation_date_time>
+            <md5_checksum>$File['md5_checksum']$</md5_checksum>
+            <comment></comment>
+        </File>
+        $END_IF
     $END_IF
         <File>
             <file_name>$BASENAME(TEMPFILE)$</file_name>
diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index 3dfabf5..34ca2ff 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -1532,7 +1532,6 @@ def main(cmd_line=None):
 
     if args.generate_label:
         index_file = output_csv_path
-        print(index_file)
 
         # The template label file is initialized.
         module_dir = Path(__file__).resolve().parent
diff --git a/test_files/expected/label_success_1.xml b/test_files/expected/label_success_1.xml
index 6d4945f..aade34b 100644
--- a/test_files/expected/label_success_1.xml
+++ b/test_files/expected/label_success_1.xml
@@ -26,7 +26,7 @@
         <File>
             <file_name>generated_label_1.csv</file_name>
             <local_identifier>index-table</local_identifier>
-            <creation_date_time>00:00:00</creation_date_time>
+            <creation_date_time>0001-01-01T00:00:00.00Z</creation_date_time>
             <md5_checksum>a177a1160bf3780c01e3bd9e02be89f4</md5_checksum>
             <comment></comment>
         </File>
diff --git a/test_files/expected/label_success_2.xml b/test_files/expected/label_success_2.xml
index b5bed6e..628ae41 100644
--- a/test_files/expected/label_success_2.xml
+++ b/test_files/expected/label_success_2.xml
@@ -26,7 +26,7 @@
         <File>
             <file_name>generated_label_2.csv</file_name>
             <local_identifier>index-table</local_identifier>
-            <creation_date_time>00:00:00</creation_date_time>
+            <creation_date_time>0001-01-01T00:00:00.00Z</creation_date_time>
             <md5_checksum>53d47b320936ac3fbba0852696065418</md5_checksum>
             <comment></comment>
         </File>
diff --git a/test_files/expected/label_success_3.xml b/test_files/expected/label_success_3.xml
index 96cc903..2e6127e 100644
--- a/test_files/expected/label_success_3.xml
+++ b/test_files/expected/label_success_3.xml
@@ -26,7 +26,7 @@
         <File>
             <file_name>generated_label_3.csv</file_name>
             <local_identifier>index-table</local_identifier>
-            <creation_date_time>00:00:00</creation_date_time>
+            <creation_date_time>0001-01-01T00:00:00.00Z</creation_date_time>
             <md5_checksum>8b2eb69a284938d23748de7f53d2e45b</md5_checksum>
             <comment></comment>
         </File>
diff --git a/test_files/expected/tester_config.yaml b/test_files/expected/tester_config.yaml
index 35d3d8c..7b29fd8 100644
--- a/test_files/expected/tester_config.yaml
+++ b/test_files/expected/tester_config.yaml
@@ -20,4 +20,4 @@ nillable:
 label-contents:
   version_id: 1.1
   File:
-    creation_date_time: '00:00:00'
+    creation_date_time: '0001-01-01T00:00:00.00Z'
diff --git a/tests/test_pds4_create_xml_index_blackbox.py b/tests/test_pds4_create_xml_index_blackbox.py
index ff944eb..949f9eb 100644
--- a/tests/test_pds4_create_xml_index_blackbox.py
+++ b/tests/test_pds4_create_xml_index_blackbox.py
@@ -2,545 +2,527 @@
 import pytest
 import os
 import tempfile
+import shutil
 import pds4indextools.pds4_create_xml_index as tools
 
 
 # These two variables are the same for all tests, so we can either declare them as
 # global variables, or get the ROOT_DIR at the setup stage before running each test
 ROOT_DIR = Path(__file__).resolve().parent.parent
-test_files_dir = ROOT_DIR / 'test_files'
-samples_dir = test_files_dir / 'samples'
-expected_dir = test_files_dir / 'expected'
-labels_dir = test_files_dir / 'labels'
+TEST_FILES_DIR = ROOT_DIR / 'test_files'
+SAMPLES_DIR = TEST_FILES_DIR / 'samples'
+EXPECTED_DIR = TEST_FILES_DIR / 'expected'
+LABELS_DIR = TEST_FILES_DIR / 'labels'
+LABEL_NAME = LABELS_DIR.name
+
+
+def compare_files(path_to_file, golden_file):
+    # Assert that the file now exists
+    assert os.path.isfile(path_to_file)
+
+    # Open and compare the two files
+    with open(path_to_file, 'r') as created:
+        formed = created.read()
+
+    with open(golden_file, 'r') as new:
+        expected = new.read()
+
+    assert formed == expected
 
 
 @pytest.mark.parametrize(
-        'golden_file,new_file_index,new_file_headers,cmd_line',
-        [
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml"
-            (
-                str(expected_dir / 'index_file_success.csv'),
-                None, None,
-                []
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary
-            (
-                str(expected_dir / 'index_file_success.csv'),
-                None, None,
-                [
-                    '--generate-label',
-                    'ancillary'
-                 ]
-            ),
-
-            # Testing --limit-xpaths-file with two outputs
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt --output-index-file limit_xpaths_file.csv
-            # Compare result to golden copy:
-            # test_files/expected/limit_xpaths_file_success_1.txt
-            (
-                str(expected_dir / 'limit_xpaths_file_success_1.csv'),
-                'limit_xpaths_file.csv', 'limit_xpaths_file.txt',
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'element_1.txt')
-                ]
-            ),
-
-            # Testing --limit-xpaths-file
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt
-            # Compare result to golden copy:
-            # test_files/expected/limit_xpaths_file_success_1.txt
-            (
-                str(expected_dir / 'limit_xpaths_file_success_1.txt'),
-                None, 'limit_xpaths_file.txt',
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'element_1.txt')
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt
-            # Compare result to golden copy:
-            # test_files/expected/limit_xpaths_file_success_1.txt
-            (
-                str(expected_dir / 'limit_xpaths_file_success_1.txt'),
-                None, 'limit_xpaths_file_wack.txt',
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    str(labels_dir.name / Path('nonexistent.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'element_1.txt')
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_2.txt --output-headers-file limit_xpaths_file_2.txt
-            # Compare result to golden copy:
-            # test_files/expected/limit_xpaths_file_success_2.txt
-            (
-                str(expected_dir / 'limit_xpaths_file_success_2.txt'),
-                None, 'limit_xpaths_file_2.txt',
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_2.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'element_2.txt')
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_duplicates.txt --output-headers-file elements_dupe_file_2.txt
-            # Compare result to golden copy:
-            # test_files/expected/limit_xpaths_file_success_2.txt
-            (
-                str(expected_dir / 'limit_xpaths_file_success_2.txt'),
-                None, 'elements_dupe_file_2.txt',
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_2.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'element_duplicates.txt')
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_3.txt --output-headers-file limit_xpaths_file_3.txt
-            # Compare result to golden copy:
-            # test_files/expected/limit_xpaths_file_success_3.txt
-            (
-                str(expected_dir / 'limit_xpaths_file_success_3.txt'),
-                None, 'limit_xpaths_file_3.txt',
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_2.xml')),
-                    str(labels_dir.name / Path('tester_label_3.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'element_3.txt')
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_4.txt --output-headers-file limit_xpaths_file_4.txt
-            # Compare result to golden copy:
-            # test_files/expected/limit_xpaths_file_success_4.txt
-            (
-                str(expected_dir / 'limit_xpaths_file_success_4.txt'),
-                None, 'limit_xpaths_file_4.txt',
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    str(labels_dir.name / Path('tester_label_2.xml')),
-                    str(labels_dir.name / Path('tester_label_3.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'element_4.txt')
-                ]
-            ),
-
-            # Testing --simplify-xpaths
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --simplify-xpaths --output-headers-file simplify_xpaths_1.txt
-            # Compare result to golden copy:
-            # test_files/expected/simplify_xpaths_success_1.txt
-            (
-                str(expected_dir / 'simplify_xpaths_success_1.txt'),
-                None, 'simplify_xpaths_1.txt',
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    '--simplify-xpaths'
-                ]
-            ),
-
-            # Testing --simplify-xpaths
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --simplify-xpaths --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_2.txt --output-headers-file simplify_xpaths_2.txt
-            # Compare result to golden copy:
-            # test_files/expected/simplify_xpaths_success_2.txt
-            (
-                str(expected_dir / 'simplify_xpaths_success_2.txt'),
-                None, 'simplify_xpaths_2.txt',
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    str(labels_dir.name / Path('tester_label_2.xml')),
-                    str(labels_dir.name / Path('tester_label_3.xml')),
-                    '--simplify-xpaths',
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'elements_xpath_simplify_2.txt')
-                ]
-            ),
-
-            # Testing --simplify-xpaths
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --simplify-xpaths --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_3.txt --output-headers-file simplify_xpaths_3.txt
-            # Compare result to golden copy:
-            # test_files/expected/simplify_xpaths_success_3.txt
-            (
-                str(expected_dir / 'simplify_xpaths_success_3.txt'),
-                None, 'simplify_xpaths_3.txt',
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_2.xml')),
-                    '--simplify-xpaths',
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'elements_xpath_simplify_3.txt')
-                ]
-            ),
-
-            # Testing --simplify-xpaths
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_3.xml" --simplify-xpaths --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_4.txt --output-headers-file simplify_xpaths_4.txt
-            # Compare result to golden copy:
-            # test_files/expected/simplify_xpaths_success_4.txt
-            (
-                str(expected_dir / 'simplify_xpaths_success_4.txt'),
-                None, 'simplify_xpaths_4.txt',
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_3.xml')),
-                    '--simplify-xpaths',
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'elements_xpath_simplify_4.txt')
-                ]
-            ),
-
-            # Testing --add-extra-file-info
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --add-extra-file-info filename,filepath --output-index-file extra_file_info_1.csv
-            # Compare result to golden copy:
-            # test_files/expected/extra_file_info_success_1.csv
-            (
-                str(expected_dir / 'extra_file_info_success_1.csv'),
-                'extra_file_info_1.csv', None,
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_2.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'element_extra_file_info.txt'),
-                    '--add-extra-file-info',
-                    'filename,filepath',
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename --sort-by filename
-            # --output-index-file extra_file_info_2.csv
-            # Compare result to golden copy:
-            # test_files/expected/extra_file_info_success_2.csv
-            (
-                str(expected_dir / 'extra_file_info_success_2.csv'),
-                'extra_file_info_2.csv', None,
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    str(labels_dir.name / Path('tester_label_2.xml')),
-                    str(labels_dir.name / Path('tester_label_3.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'element_5.txt'),
-                    '--add-extra-file-info',
-                    'filename',
-                    '--sort-by',
-                    'filename'
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename,filepath,lid,bundle,bundle_lid --sort-by filename --output-index-file extra_file_info_3.csv
-            # Compare result to golden copy:
-            # test_files/expected/extra_file_info_success_3.csv
-            (
-                str(expected_dir / 'extra_file_info_success_3.csv'),
-                'extra_file_info_3.csv', None,
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    str(labels_dir.name / Path('tester_label_2.xml')),
-                    str(labels_dir.name / Path('tester_label_3.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'element_5.txt'),
-                    '--add-extra-file-info',
-                    'filename,filepath,lid,bundle,bundle_lid',
-                    '--sort-by',
-                    'filename'
-                ]
-            ),
-
-            # Testing --clean-header-field-names
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --clean-header-field-names --output-headers-file clean_header_field_names_1.txt
-            # Compare result to golden copy:
-            # test_files/expected/clean_header_field_names_success_1.txt
-            (
-                str(expected_dir / 'clean_header_field_names_success_1.txt'),
-                None, 'clean_header_field_names_1.txt',
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    '--clean-header-field-names'
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --clean-header-field-names --output-headers-file clean_header_field_names_2.txt
-            # Compare result to golden copy:
-            # test_files/expected/clean_header_field_names_success_2.txt
-            (
-                str(expected_dir / 'clean_header_field_names_success_2.csv'),
-                'clean_header_field_names_2.csv', None,
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    '--clean-header-field-names'
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --clean-header-field-names --output-headers-file clean_header_field_names_2.txt
-            # Compare result to golden copy:
-            # test_files/expected/clean_header_field_names_success_2.txt
-            (
-                str(expected_dir / 'clean_header_field_names_success_2.txt'),
-                None, 'clean_header_field_names_2.txt',
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    str(labels_dir.name / Path('tester_label_2.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'elements_clean_header_field_names.txt'),
-                    '--clean-header-field-names'
-                ]
-            ),
-
-            # Testing --sort by
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --sort-by 'pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>' --output-index-file sort_by_1.csv
-            # Compare result to golden copy:
-            # test_files/expected/sort_by_success_1.csv
-            (
-                str(expected_dir / 'sort_by_success_1.csv'),
-                'sort_by_1.csv', None,
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    str(labels_dir.name / Path('tester_label_2.xml')),
-                    str(labels_dir.name / Path('tester_label_3.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'elements_clean_header_field_names.txt'),
-                    '--sort-by',
-                    'pds:Product_Observational/pds:Identification_Area<1>/'
-                    'pds:logical_identifier<1>'
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --add-extra-file-info bundle_lid,filepath --sort-by bundle_lid --output-index-file sort_by_2.csv
-            # Compare result to golden copy:
-            # test_files/expected/sort_by_success_2.csv
-            (
-                str(expected_dir / 'sort_by_success_2.csv'),
-                'sort_by_2.csv', None,
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    str(labels_dir.name / Path('tester_label_2.xml')),
-                    str(labels_dir.name / Path('tester_label_3.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'elements_clean_header_field_names.txt'),
-                    '--add-extra-file-info',
-                    'bundle_lid,filepath',
-                    '--sort-by',
-                    'bundle_lid'
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "identical_label_*.xml" --limit-xpaths-file ../test_files/samples/identical_elements.txt --add-extra-file-info filename --sort-by filename --output-index-file identical_labels.csv
-            # Compare result to golden copy:
-            # test_files/expected/identical_labels_success.csv
-            (
-                str(expected_dir / 'identical_labels_success.csv'),
-                'identical_labels.csv', None,
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('identical_label_*.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'identical_elements.txt'),
-                    '--add-extra-file-info',
-                    'filename',
-                    '--sort-by',
-                    'filename'
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "nilled_label.xml" --limit-xpaths-file ../test_files/samples/elements_nilled.txt --output-index-file nilled_elements.csv
-            # Compare result to golden copy:
-            # test_files/expected/nilled_element_success.csv
-            (
-                str(expected_dir / 'nilled_element_success.csv'),
-                'nilled_element.csv', None,
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('nilled_label.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'elements_nilled.txt')
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --fixed-width --output-index-file fixed_width.csv
-            # Compare result to golden copy:
-            # test_files/expected/fixed_width_success.csv
-            (
-                str(expected_dir / 'fixed_width_success.csv'),
-                'fixed_width.csv', None,
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    '--fixed-width'
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary --config ../test_files/expected/tester_config.yaml --output-index-file generated_label_1.csv
-            # Compare result to golden copy:
-            # test_files/expected/label_success_1.csv
-            # test_files/expected/label_success_1.xml
-            (
-                str(expected_dir / 'label_success_1.csv'),
-                'generated_label_1.csv', None,
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    '--generate-label',
-                    'ancillary',
-                    '--config',
-                    str(expected_dir / 'tester_config.yaml')
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label metadata --fixed-width --output-index-file generated_label_2.csv --config ../test_files/expected/tester_config.yaml --output-index-file generated_label_2.csv
-            # Compare result to golden copy:
-            # test_files/expected/label_success_2.csv
-            # test_files/expected/label_success_2.xml
-            (
-                str(expected_dir / 'label_success_2.csv'),
-                'generated_label_2.csv', None,
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    '--generate-label',
-                    'metadata',
-                    '--fixed-width',
-                    '--config',
-                    str(expected_dir / 'tester_config.yaml')
-                ]
-            ),
-
-            # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename,filepath,lid,bundle,bundle_lid --generate-label ancillary --config ../test_files/expected/tester_config.yaml --output-index-file generated_label_3.csv
-            # Compare result to golden copy:
-            # test_files/expected/label_success_3.csv
-            # test_files/expected/label_success_3.xml
-            (
-                str(expected_dir / 'label_success_3.csv'),
-                'generated_label_3.csv', None,
-                [
-                    str(test_files_dir),
-                    str(labels_dir.name / Path('tester_label_1.xml')),
-                    str(labels_dir.name / Path('tester_label_2.xml')),
-                    str(labels_dir.name / Path('tester_label_3.xml')),
-                    '--limit-xpaths-file',
-                    str(samples_dir / 'element_5.txt'),
-                    '--add-extra-file-info',
-                    'filename,filepath,lid,bundle,bundle_lid',
-                    '--sort-by',
-                    'filename',
-                    '--generate-label',
-                    'ancillary',
-                    '--config',
-                    str(expected_dir / 'tester_config.yaml')
-                ]
-            )
-        ]
-    )
-def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
+    'GOLDEN_FILE,NEW_FILE_INDEX,NEW_FILE_HEADERS,CMD_LINE',
+    [
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml"
+        (
+            str(EXPECTED_DIR / 'index_file_success.csv'),
+            None, None,
+            []
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary
+        (
+            str(EXPECTED_DIR / 'index_file_success.csv'),
+            None, None,
+            [
+                '--generate-label',
+                'ancillary'
+            ]
+        ),
+
+        # Testing --limit-xpaths-file with two outputs
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt --output-index-file limit_xpaths_file.csv
+        # Compare result to golden copy:
+        # test_files/expected/limit_xpaths_file_success_1.txt
+        (
+            str(EXPECTED_DIR / 'limit_xpaths_file_success_1.csv'),
+            'limit_xpaths_file.csv', 'limit_xpaths_file.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'element_1.txt')
+            ]
+        ),
+
+        # Testing --limit-xpaths-file
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt
+        # Compare result to golden copy:
+        # test_files/expected/limit_xpaths_file_success_1.txt
+        (
+            str(EXPECTED_DIR / 'limit_xpaths_file_success_1.txt'),
+            None, 'limit_xpaths_file.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'element_1.txt')
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt
+        # Compare result to golden copy:
+        # test_files/expected/limit_xpaths_file_success_1.txt
+        (
+            str(EXPECTED_DIR / 'limit_xpaths_file_success_1.txt'),
+            None, 'limit_xpaths_file_wack.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                LABEL_NAME + '/nonexistent.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'element_1.txt')
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_2.txt --output-headers-file limit_xpaths_file_2.txt
+        # Compare result to golden copy:
+        # test_files/expected/limit_xpaths_file_success_2.txt
+        (
+            str(EXPECTED_DIR / 'limit_xpaths_file_success_2.txt'),
+            None, 'limit_xpaths_file_2.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_2.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'element_2.txt')
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_duplicates.txt --output-headers-file elements_dupe_file_2.txt
+        # Compare result to golden copy:
+        # test_files/expected/limit_xpaths_file_success_2.txt
+        (
+            str(EXPECTED_DIR / 'limit_xpaths_file_success_2.txt'),
+            None, 'elements_dupe_file_2.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_2.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'element_duplicates.txt')
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_3.txt --output-headers-file limit_xpaths_file_3.txt
+        # Compare result to golden copy:
+        # test_files/expected/limit_xpaths_file_success_3.txt
+        (
+            str(EXPECTED_DIR / 'limit_xpaths_file_success_3.txt'),
+            None, 'limit_xpaths_file_3.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_2.xml',
+                LABEL_NAME + '/tester_label_3.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'element_3.txt')
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_4.txt --output-headers-file limit_xpaths_file_4.txt
+        # Compare result to golden copy:
+        # test_files/expected/limit_xpaths_file_success_4.txt
+        (
+            str(EXPECTED_DIR / 'limit_xpaths_file_success_4.txt'),
+            None, 'limit_xpaths_file_4.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                LABEL_NAME + '/tester_label_2.xml',
+                LABEL_NAME + '/tester_label_3.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'element_4.txt')
+            ]
+        ),
+
+        # Testing --simplify-xpaths
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --simplify-xpaths --output-headers-file simplify_xpaths_1.txt
+        # Compare result to golden copy:
+        # test_files/expected/simplify_xpaths_success_1.txt
+        (
+            str(EXPECTED_DIR / 'simplify_xpaths_success_1.txt'),
+            None, 'simplify_xpaths_1.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                '--simplify-xpaths'
+            ]
+        ),
+
+        # Testing --simplify-xpaths
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --simplify-xpaths --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_2.txt --output-headers-file simplify_xpaths_2.txt
+        # Compare result to golden copy:
+        # test_files/expected/simplify_xpaths_success_2.txt
+        (
+            str(EXPECTED_DIR / 'simplify_xpaths_success_2.txt'),
+            None, 'simplify_xpaths_2.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                LABEL_NAME + '/tester_label_2.xml',
+                LABEL_NAME + '/tester_label_3.xml',
+                '--simplify-xpaths',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'elements_xpath_simplify_2.txt')
+            ]
+        ),
+
+        # Testing --simplify-xpaths
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --simplify-xpaths --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_3.txt --output-headers-file simplify_xpaths_3.txt
+        # Compare result to golden copy:
+        # test_files/expected/simplify_xpaths_success_3.txt
+        (
+            str(EXPECTED_DIR / 'simplify_xpaths_success_3.txt'),
+            None, 'simplify_xpaths_3.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_2.xml',
+                '--simplify-xpaths',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'elements_xpath_simplify_3.txt')
+            ]
+        ),
+
+        # Testing --simplify-xpaths
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_3.xml" --simplify-xpaths --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_4.txt --output-headers-file simplify_xpaths_4.txt
+        # Compare result to golden copy:
+        # test_files/expected/simplify_xpaths_success_4.txt
+        (
+            str(EXPECTED_DIR / 'simplify_xpaths_success_4.txt'),
+            None, 'simplify_xpaths_4.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_3.xml',
+                '--simplify-xpaths',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'elements_xpath_simplify_4.txt')
+            ]
+        ),
+
+        # Testing --add-extra-file-info
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --add-extra-file-info filename,filepath --output-index-file extra_file_info_1.csv
+        # Compare result to golden copy:
+        # test_files/expected/extra_file_info_success_1.csv
+        (
+            str(EXPECTED_DIR / 'extra_file_info_success_1.csv'),
+            'extra_file_info_1.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_2.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'element_extra_file_info.txt'),
+                '--add-extra-file-info',
+                'filename,filepath',
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename --sort-by filename
+        # --output-index-file extra_file_info_2.csv
+        # Compare result to golden copy:
+        # test_files/expected/extra_file_info_success_2.csv
+        (
+            str(EXPECTED_DIR / 'extra_file_info_success_2.csv'),
+            'extra_file_info_2.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                LABEL_NAME + '/tester_label_2.xml',
+                LABEL_NAME + '/tester_label_3.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'element_5.txt'),
+                '--add-extra-file-info',
+                'filename',
+                '--sort-by',
+                'filename'
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename,filepath,lid,bundle,bundle_lid --sort-by filename --output-index-file extra_file_info_3.csv
+        # Compare result to golden copy:
+        # test_files/expected/extra_file_info_success_3.csv
+        (
+            str(EXPECTED_DIR / 'extra_file_info_success_3.csv'),
+            'extra_file_info_3.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                LABEL_NAME + '/tester_label_2.xml',
+                LABEL_NAME + '/tester_label_3.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'element_5.txt'),
+                '--add-extra-file-info',
+                'filename,filepath,lid,bundle,bundle_lid',
+                '--sort-by',
+                'filename'
+            ]
+        ),
+
+        # Testing --clean-header-field-names
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --clean-header-field-names --output-headers-file clean_header_field_names_1.txt
+        # Compare result to golden copy:
+        # test_files/expected/clean_header_field_names_success_1.txt
+        (
+            str(EXPECTED_DIR / 'clean_header_field_names_success_1.txt'),
+            None, 'clean_header_field_names_1.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                '--clean-header-field-names'
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --clean-header-field-names --output-headers-file clean_header_field_names_2.txt
+        # Compare result to golden copy:
+        # test_files/expected/clean_header_field_names_success_2.txt
+        (
+            str(EXPECTED_DIR / 'clean_header_field_names_success_2.csv'),
+            'clean_header_field_names_2.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                '--clean-header-field-names'
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --clean-header-field-names --output-headers-file clean_header_field_names_2.txt
+        # Compare result to golden copy:
+        # test_files/expected/clean_header_field_names_success_2.txt
+        (
+            str(EXPECTED_DIR / 'clean_header_field_names_success_2.txt'),
+            None, 'clean_header_field_names_2.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                LABEL_NAME + '/tester_label_2.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'elements_clean_header_field_names.txt'),
+                '--clean-header-field-names'
+            ]
+        ),
+
+        # Testing --sort by
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --sort-by 'pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>' --output-index-file sort_by_1.csv
+        # Compare result to golden copy:
+        # test_files/expected/sort_by_success_1.csv
+        (
+            str(EXPECTED_DIR / 'sort_by_success_1.csv'),
+            'sort_by_1.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                LABEL_NAME + '/tester_label_2.xml',
+                LABEL_NAME + '/tester_label_3.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'elements_clean_header_field_names.txt'),
+                '--sort-by',
+                'pds:Product_Observational/pds:Identification_Area<1>/'
+                'pds:logical_identifier<1>'
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --add-extra-file-info bundle_lid,filepath --sort-by bundle_lid --output-index-file sort_by_2.csv
+        # Compare result to golden copy:
+        # test_files/expected/sort_by_success_2.csv
+        (
+            str(EXPECTED_DIR / 'sort_by_success_2.csv'),
+            'sort_by_2.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                LABEL_NAME + '/tester_label_2.xml',
+                LABEL_NAME + '/tester_label_3.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'elements_clean_header_field_names.txt'),
+                '--add-extra-file-info',
+                'bundle_lid,filepath',
+                '--sort-by',
+                'bundle_lid'
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "identical_label_*.xml" --limit-xpaths-file ../test_files/samples/identical_elements.txt --add-extra-file-info filename --sort-by filename --output-index-file identical_labels.csv
+        # Compare result to golden copy:
+        # test_files/expected/identical_labels_success.csv
+        (
+            str(EXPECTED_DIR / 'identical_labels_success.csv'),
+            'identical_labels.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/identical_label_*.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'identical_elements.txt'),
+                '--add-extra-file-info',
+                'filename',
+                '--sort-by',
+                'filename'
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "nilled_label.xml" --limit-xpaths-file ../test_files/samples/elements_nilled.txt --output-index-file nilled_elements.csv
+        # Compare result to golden copy:
+        # test_files/expected/nilled_element_success.csv
+        (
+            str(EXPECTED_DIR / 'nilled_element_success.csv'),
+            'nilled_element.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/nilled_label.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'elements_nilled.txt')
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --fixed-width --output-index-file fixed_width.csv
+        # Compare result to golden copy:
+        # test_files/expected/fixed_width_success.csv
+        (
+            str(EXPECTED_DIR / 'fixed_width_success.csv'),
+            'fixed_width.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                '--fixed-width'
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary --config ../test_files/expected/tester_config.yaml --output-index-file generated_label_1.csv
+        # Compare result to golden copy:
+        # test_files/expected/label_success_1.csv
+        # test_files/expected/label_success_1.xml
+        (
+            str(EXPECTED_DIR / 'label_success_1.csv'),
+            'generated_label_1.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                '--generate-label',
+                'ancillary',
+                '--config',
+                str(EXPECTED_DIR / 'tester_config.yaml')
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label metadata --fixed-width --output-index-file generated_label_2.csv --config ../test_files/expected/tester_config.yaml --output-index-file generated_label_2.csv
+        # Compare result to golden copy:
+        # test_files/expected/label_success_2.csv
+        # test_files/expected/label_success_2.xml
+        (
+            str(EXPECTED_DIR / 'label_success_2.csv'),
+            'generated_label_2.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                '--generate-label',
+                'metadata',
+                '--fixed-width',
+                '--config',
+                str(EXPECTED_DIR / 'tester_config.yaml')
+            ]
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename,filepath,lid,bundle,bundle_lid --generate-label ancillary --config ../test_files/expected/tester_config.yaml --output-index-file generated_label_3.csv
+        # Compare result to golden copy:
+        # test_files/expected/label_success_3.csv
+        # test_files/expected/label_success_3.xml
+        (
+            str(EXPECTED_DIR / 'label_success_3.csv'),
+            'generated_label_3.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                LABEL_NAME + '/tester_label_2.xml',
+                LABEL_NAME + '/tester_label_3.xml',
+                '--limit-xpaths-file',
+                str(SAMPLES_DIR / 'element_5.txt'),
+                '--add-extra-file-info',
+                'filename,filepath,lid,bundle,bundle_lid',
+                '--sort-by',
+                'filename',
+                '--generate-label',
+                'ancillary',
+                '--config',
+                str(EXPECTED_DIR / 'tester_config.yaml')
+            ]
+        )
+    ]
+)
+def test_success(GOLDEN_FILE, NEW_FILE_INDEX, NEW_FILE_HEADERS, CMD_LINE):
     # Create a temporary directory
-    with tempfile.TemporaryDirectory(dir=test_files_dir.parent) as temp_dir:
+    with tempfile.TemporaryDirectory(dir=TEST_FILES_DIR.parent) as temp_dir:
         temp_dir_path = Path(temp_dir)
 
-        if new_file_index is None and new_file_headers is None:
-            os.chdir(temp_dir_path)
-            cmd_line.append(str(test_files_dir))
-            cmd_line.append(str(labels_dir.name / Path('tester_label_1.xml')))
+        if NEW_FILE_INDEX is None and NEW_FILE_HEADERS is None:
+            shutil.copy(LABELS_DIR / 'tester_label_1.xml', temp_dir_path)
+            CMD_LINE.append(str(temp_dir_path))
+            CMD_LINE.append('tester_label_1.xml')
+            CMD_LINE.append('--output-index-file')
+            CMD_LINE.append(str(temp_dir_path / 'index.csv'))
             # Call main() function with the simulated command line arguments
-            tools.main(cmd_line)
+            tools.main(CMD_LINE)
 
             path_to_file = temp_dir_path / 'index.csv'
-            # Assert that the file now exists
-            assert os.path.isfile(path_to_file)
-
-            # Open and compare the two files
-            with open(path_to_file, 'r') as created:
-                formed = created.read()
 
-            with open(golden_file, 'r') as new:
-                expected = new.read()
-
-            assert formed == expected
-            os.remove(path_to_file)
-            os.chdir(ROOT_DIR)
+            compare_files(path_to_file, GOLDEN_FILE)
 
         else:
             # THE PATH TO THE NEW FILE
-            if new_file_index:
-                path_to_file = temp_dir_path / new_file_index
-                cmd_line.append('--output-index-file')
-                cmd_line.append(str(path_to_file))
+            if NEW_FILE_INDEX:
+                path_to_file = temp_dir_path / NEW_FILE_INDEX
+                CMD_LINE.append('--output-index-file')
+                CMD_LINE.append(str(path_to_file))
                 # Call main() function with the simulated command line arguments
-                tools.main(cmd_line)
-                # Assert that the file now exists
-                assert os.path.isfile(path_to_file)
-
-                # Open and compare the two files
-                with open(path_to_file, 'r') as created:
-                    formed = created.read()
+                tools.main(CMD_LINE)
 
-                with open(golden_file, 'r') as new:
-                    expected = new.read()
+                compare_files(path_to_file, GOLDEN_FILE)
 
-                assert formed == expected
-
-                if '--generate-label' in cmd_line:
+                if '--generate-label' in CMD_LINE:
                     label_path = str(path_to_file).replace('.csv', '.xml')
-                    golden_label = str(golden_file).replace('.csv', '.xml')
+                    golden_label = str(GOLDEN_FILE).replace('.csv', '.xml')
                     assert os.path.isfile(label_path)
 
-                    # Open and compare the two files
-                    with open(label_path, 'r') as created:
-                        formed = created.read()
-
-                    with open(golden_label, 'r') as new:
-                        expected = new.read()
-
-                    assert formed == expected
+                    compare_files(label_path, golden_label)
 
-            if new_file_headers:
-                path_to_file = temp_dir_path / new_file_headers
-                golden_file = str(golden_file).replace('.csv', '.txt')
-                cmd_line.append('--output-headers-file')
-                cmd_line.append(str(path_to_file))
+            if NEW_FILE_HEADERS:
+                path_to_file = temp_dir_path / NEW_FILE_HEADERS
+                GOLDEN_FILE = str(GOLDEN_FILE).replace('.csv', '.txt')
+                CMD_LINE.append('--output-headers-file')
+                CMD_LINE.append(str(path_to_file))
                 # Call main() function with the simulated command line arguments
-                tools.main(cmd_line)
-                # Assert that the file now exists
-                assert os.path.isfile(path_to_file)
-
-                # Open and compare the two files
-                with open(path_to_file, 'r') as created:
-                    formed = created.read()
-
-                with open(golden_file, 'r') as new:
-                    expected = new.read()
+                tools.main(CMD_LINE)
 
-                assert formed == expected
+                compare_files(path_to_file, GOLDEN_FILE)
 
 
 @pytest.mark.parametrize(
-    'cmd_line',
+    'CMD_LINE',
     [
         # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --add-extra-file-info bad_element --output-headers-file hdout.txt
         (
-            str(test_files_dir),
-            str(labels_dir.name / Path('tester_label_1.xml')),
-            str(labels_dir.name / Path('tester_label_2.xml')),
-            str(labels_dir.name / Path('tester_label_3.xml')),
+            str(TEST_FILES_DIR),
+            LABEL_NAME + '/tester_label_1.xml',
+            LABEL_NAME + '/tester_label_2.xml',
+            LABEL_NAME + '/tester_label_3.xml',
             '--limit-xpaths-file',
-            str(samples_dir / 'element_1.txt'),
+            str(SAMPLES_DIR / 'element_1.txt'),
             '--add-extra-file-info',
             'bad_element',
             '--output-headers-file',
@@ -549,10 +531,10 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
 
         # Executable command: pds4_create_xml_index ../test_files/labels "bad_directory/labels/tester_label_*.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --add-extra-file-info filename --output-headers-file hdout.txt
         (
-            str(test_files_dir),  # directory path
+            str(TEST_FILES_DIR),  # directory path
             'bad_directory/labels/tester_label_*.xml',  # non-existent directory
             '--limit-xpaths-file',
-            str(samples_dir / 'element_1.txt'),  # elements file
+            str(SAMPLES_DIR / 'element_1.txt'),  # elements file
             '--add-extra-file-info',  # extra file info
             'filename',
             '--output-headers-file',
@@ -561,20 +543,20 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
 
         # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_empty.txt --output-headers-file hdout.txt
         (
-            str(test_files_dir),  # directory path
-            str(labels_dir.name / Path('tester_label_1.xml')),
-            str(labels_dir.name / Path('tester_label_2.xml')),
-            str(labels_dir.name / Path('tester_label_3.xml')),
+            str(TEST_FILES_DIR),  # directory path
+            LABEL_NAME + '/tester_label_1.xml',
+            LABEL_NAME + '/tester_label_2.xml',
+            LABEL_NAME + '/tester_label_3.xml',
             '--limit-xpaths-file',
-            str(samples_dir / 'element_empty.txt'),  # empty elements file
+            str(SAMPLES_DIR / 'element_empty.txt'),  # empty elements file
             '--output-headers-file',
             'hdout.txt'
         ),
 
         # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --simplify-xpaths --sort-by bad_sort --output-headers-file hdout.csv
         (
-            str(test_files_dir),
-            str(labels_dir.name / Path('tester_label_1.xml')),
+            str(TEST_FILES_DIR),
+            LABEL_NAME + '/tester_label_1.xml',
             '--simplify-xpaths',
             '--sort-by',
             'bad_sort',
@@ -584,26 +566,26 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
 
         # Executable command: pds4_create_xml_index ../test_files/labels "nonexistent.xml" --output-headers-file hdout.txt
         (
-            str(test_files_dir),
-            str(labels_dir.name / Path('nonexistent.xml')),
+            str(TEST_FILES_DIR),
+            LABEL_NAME + '/nonexistent.xml',
             '--output-headers-file',
             'hdout.txt',
         ),
 
         # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_3.txt --output-headers-file hdout.txt
         (
-            str(test_files_dir),
-            str(labels_dir.name / Path('tester_label_1.xml')),
+            str(TEST_FILES_DIR),
+            LABEL_NAME + '/tester_label_1.xml',
             '--limit-xpaths-file',
-            str(samples_dir / 'elements_xpath_simplify_3.txt'),
+            str(SAMPLES_DIR / 'elements_xpath_simplify_3.txt'),
             '--output-headers-file',
             'hdout.txt',
         ),
 
         # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_*.xml" --generate-label ancillary --output-headers-file hdout.txt
         (
-            str(test_files_dir),
-            str(labels_dir.name / Path('tester_label_*.xml')),
+            str(TEST_FILES_DIR),
+            LABEL_NAME + '/tester_label_*.xml',
             '--generate-label',
             'ancillary',
             '--output-headers-file',
@@ -612,18 +594,18 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
 
         # Executable command: pds4_create_xml_index ../test_files/labels "bad_lid_label.xml" --output-headers-file hdout.txt
         (
-            str(test_files_dir),
-            str(labels_dir.name / Path('bad_lid_label.xml')),
+            str(TEST_FILES_DIR),
+            LABEL_NAME + '/bad_lid_label.xml',
             '--output-headers-file',
             'hdout.txt',
         )
 
     ]
 )
-def test_failures(cmd_line):
+def test_failures(CMD_LINE):
     # Call main() function with the simulated command line arguments
     with pytest.raises(SystemExit) as e:
-        tools.main(cmd_line)
+        tools.main(CMD_LINE)
     assert e.type == SystemExit
     assert e.value.code != 0  # Check that the exit code indicates failure
     if os.path.isfile('hdout.txt'):
@@ -631,32 +613,32 @@ def test_failures(cmd_line):
 
 
 @pytest.mark.parametrize(
-    'new_file,cmd_line',
+    'NEW_FILE,CMD_LINE',
     [
         # Executable command: pds4_create_xml_index ../test_files/labels "nilled_label_bad.xml" --limit-xpaths-file ../test_files/samples/elements_nilled_bad.txt --output-index-file indexout.csv
         (
             'nillable.csv',
             [
-                str(test_files_dir),  # directory path
-                str(labels_dir.name / Path('nilled_label_bad.xml')),
+                str(TEST_FILES_DIR),  # directory path
+                LABEL_NAME + '/nilled_label_bad.xml',
                 '--limit-xpaths-file',
-                str(samples_dir / 'elements_nilled_bad.txt'),
+                str(SAMPLES_DIR / 'elements_nilled_bad.txt'),
                 '--output-index-file'
             ]
         )
     ]
 )
-def test_failure_message(capfd, new_file, cmd_line):
-    with tempfile.TemporaryDirectory(dir=test_files_dir.parent) as temp_dir:
+def test_failure_message(capfd, NEW_FILE, CMD_LINE):
+    with tempfile.TemporaryDirectory(dir=TEST_FILES_DIR.parent) as temp_dir:
         temp_dir_path = Path(temp_dir)
 
         # THE PATH TO THE NEW FILE
-        path_to_file = temp_dir_path / new_file
+        path_to_file = temp_dir_path / NEW_FILE
         # Call main() function with the simulated command line arguments
-        cmd_line.append(str(path_to_file))
+        CMD_LINE.append(str(path_to_file))
 
         # Capture the output
-        tools.main(cmd_line)
+        tools.main(CMD_LINE)
         captured = capfd.readouterr()
 
         # Check if the expected statement is printed in stdout or stderr
diff --git a/tests/test_pds4_create_xml_index_whitebox.py b/tests/test_pds4_create_xml_index_whitebox.py
index ada7110..1fa6027 100644
--- a/tests/test_pds4_create_xml_index_whitebox.py
+++ b/tests/test_pds4_create_xml_index_whitebox.py
@@ -7,16 +7,15 @@
 import pytest
 import pds4indextools.pds4_create_xml_index as tools
 import textwrap as _textwrap
-from unittest import mock
-from unittest.mock import patch
+from unittest.mock import patch as PATCH
 
 
 # These two variables are the same for all tests, so we can either declare them as
 # global variables, or get the ROOT_DIR at the setup stage before running each test
 ROOT_DIR = Path(__file__).resolve().parent.parent
-test_files_dir = ROOT_DIR / 'test_files'
-expected_dir = test_files_dir / 'expected'
-labels_dir = test_files_dir / 'labels'
+TEST_FILES_DIR = ROOT_DIR / 'test_files'
+EXPECTED_DIR = TEST_FILES_DIR / 'expected'
+LABELS_DIR = TEST_FILES_DIR / 'labels'
 
 
 # Testing load_config_file()
@@ -67,7 +66,7 @@ def test_load_config_object():
 
     # Tests that the config_object is loaded over.
     config_object = tools.load_config_file(
-        specified_config_files=[str(expected_dir/'tester_config_nillable.yaml'),])
+        specified_config_files=[str(EXPECTED_DIR / 'tester_config_nillable.yaml'),])
 
     assert config_object['nillable']['pds:ASCII_Date_YMD']['inapplicable'] == '0001-01-01'
     assert config_object['nillable']['pds:ASCII_Date_YMD']['missing'] == '0002-01-01'
@@ -95,7 +94,7 @@ def test_load_config_object():
 
     # Tests specified configuration files wiht one or the other
     config_object = tools.load_config_file(
-        specified_config_files=[str(expected_dir/'tester_config_label.yaml'),])
+        specified_config_files=[str(EXPECTED_DIR / 'tester_config_label.yaml'),])
 
     assert config_object['label-contents']['version_id'] == '1.0'
     assert (config_object['label-contents']['title'] ==
@@ -103,12 +102,12 @@ def test_load_config_object():
 
     # A bad default config file
     with pytest.raises(SystemExit):
-        tools.load_config_file(default_config_file=expected_dir/'non_existent_file.ini')
+        tools.load_config_file(default_config_file=EXPECTED_DIR / 'non_existent_file.ini')
 
     # A bad specified config file
     with pytest.raises(SystemExit):
         tools.load_config_file(specified_config_files=list(
-            str(expected_dir/'non_existent_file.ini')))
+            str(EXPECTED_DIR / 'non_existent_file.ini')))
 
 
 # Testing default_value_for_nil()
@@ -201,7 +200,7 @@ def test_split_into_elements():
 # Testing process_schema_location()
 def test_process_schema_location():
     label_file = 'tester_label_1.xml'
-    schema_files = tools.process_schema_location(labels_dir / label_file)
+    schema_files = tools.process_schema_location(LABELS_DIR / label_file)
     assert (schema_files[0] ==
             'https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1B00.xsd')
     assert (schema_files[1] ==
@@ -221,7 +220,7 @@ def test_parse_label_file_exception_handling(capsys):
 
 def test_extract_logical_identifier():
     label_file = 'tester_label_1.xml'
-    tree = etree.parse(str(labels_dir / label_file))
+    tree = etree.parse(str(LABELS_DIR / label_file))
     assert (tools.extract_logical_identifier(tree) ==
             'urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n')
 
@@ -251,7 +250,7 @@ def test_scrape_namespaces():
 
 
 def test_get_longest_row_length():
-    filename = expected_dir / 'extra_file_info_success_1.csv'
+    filename = EXPECTED_DIR / 'extra_file_info_success_1.csv'
     result = tools.get_longest_row_length(filename)
     assert result == 254
 
@@ -279,7 +278,7 @@ def create_temp_file():
 @pytest.mark.parametrize('platform_name', ['Windows', 'Linux', 'Darwin'])
 def test_get_creation_date(create_temp_file, platform_name):
     # Mock platform.system() to simulate different platforms
-    with mock.patch('platform.system', return_value=platform_name):
+    with PATCH('platform.system', return_value=platform_name):
         creation_date = tools.get_creation_date(create_temp_file)
         assert isinstance(creation_date, str)
         # Assert that the returned date is in ISO 8601 format
@@ -360,7 +359,7 @@ def test_update_nillable_elements_from_xsd_file_with_edge_cases():
     tree_duplicate_type = etree.fromstring(xsd_content_duplicate_type)
 
     # Mock the download_xsd_file function to return these trees based on input
-    with mock.patch(
+    with PATCH(
         'pds4indextools.pds4_create_xml_index.download_xsd_file'
                    ) as mock_download:
         # Define the behavior of the mock for each file
@@ -403,7 +402,7 @@ def test_clean_header_field_names():
 def test_compute_max_field_lengths():
 
     lengths = tools.compute_max_field_lengths(
-        str(expected_dir / 'extra_file_info_success_1.csv'))
+        str(EXPECTED_DIR / 'extra_file_info_success_1.csv'))
 
     assert lengths == {
         'filename': 18,
@@ -416,7 +415,7 @@ def test_compute_max_field_lengths():
 
     # failure
     with pytest.raises(SystemExit):
-        lengths = tools.compute_max_field_lengths(str(expected_dir / 'fake_file.csv'))
+        lengths = tools.compute_max_field_lengths(str(EXPECTED_DIR / 'fake_file.csv'))
 
 
 def test_sort_dataframe_key_error():
@@ -444,7 +443,7 @@ def test_validate_label_type():
         assert tools.validate_label_type(arg, valid_choices) == 'Product_Ancillary'
 
 
-@mock.patch('os.path.exists')
+@PATCH('os.path.exists')
 def test_generate_unique_filename(mock_exists):
     # Setup the mock to return True for the first two checks and False thereafter
     mock_exists.side_effect = [True, True, False]
@@ -488,9 +487,9 @@ def test_fill_text():
 
 # Assume the get_true_type function is imported from the relevant module.
 # from pds4indextools.pds4_create_xml_index import get_true_type
-@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
-@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
-@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
+@PATCH('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@PATCH('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@PATCH('pds4indextools.pds4_create_xml_index.find_base_attribute')
 def test_true_type_found_in_first_file(mock_find_base_attribute, mock_scrape_namespaces,
                                        mock_download_xsd_file):
     # Setup mocks
@@ -510,9 +509,9 @@ def test_true_type_found_in_first_file(mock_find_base_attribute, mock_scrape_nam
                                                      {"mock_namespace": "mock_value"})
 
 
-@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
-@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
-@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
+@PATCH('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@PATCH('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@PATCH('pds4indextools.pds4_create_xml_index.find_base_attribute')
 def test_true_type_found_in_second_file(mock_find_base_attribute, mock_scrape_namespaces,
                                         mock_download_xsd_file):
     # Setup mocks
@@ -539,9 +538,9 @@ def test_true_type_found_in_second_file(mock_find_base_attribute, mock_scrape_na
     assert mock_find_base_attribute.call_count == 3
 
 
-@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
-@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
-@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
+@PATCH('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@PATCH('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@PATCH('pds4indextools.pds4_create_xml_index.find_base_attribute')
 def test_true_type_found_with_modified_tag(mock_find_base_attribute,
                                            mock_scrape_namespaces,
                                            mock_download_xsd_file):
@@ -562,9 +561,9 @@ def test_true_type_found_with_modified_tag(mock_find_base_attribute,
                                              {"mock_namespace": "mock_value"})
 
 
-@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
-@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
-@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
+@PATCH('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@PATCH('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@PATCH('pds4indextools.pds4_create_xml_index.find_base_attribute')
 def test_true_type_not_found(mock_find_base_attribute, mock_scrape_namespaces,
                              mock_download_xsd_file):
     # Setup mocks

From d85ce1c3beb4c669899dee003f95da569ab4cf85 Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Mon, 26 Aug 2024 13:44:36 -0700
Subject: [PATCH 10/24] Fixing incorrect capitalization

---
 tests/test_pds4_create_xml_index_blackbox.py | 62 ++++++++++----------
 tests/test_pds4_create_xml_index_whitebox.py | 32 +++++-----
 2 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/tests/test_pds4_create_xml_index_blackbox.py b/tests/test_pds4_create_xml_index_blackbox.py
index 949f9eb..75f09e7 100644
--- a/tests/test_pds4_create_xml_index_blackbox.py
+++ b/tests/test_pds4_create_xml_index_blackbox.py
@@ -31,7 +31,7 @@ def compare_files(path_to_file, golden_file):
 
 
 @pytest.mark.parametrize(
-    'GOLDEN_FILE,NEW_FILE_INDEX,NEW_FILE_HEADERS,CMD_LINE',
+    'golden_file,new_file_index,new_file_headers,cmd_line',
     [
         # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml"
         (
@@ -465,55 +465,55 @@ def compare_files(path_to_file, golden_file):
         )
     ]
 )
-def test_success(GOLDEN_FILE, NEW_FILE_INDEX, NEW_FILE_HEADERS, CMD_LINE):
+def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
     # Create a temporary directory
     with tempfile.TemporaryDirectory(dir=TEST_FILES_DIR.parent) as temp_dir:
         temp_dir_path = Path(temp_dir)
 
-        if NEW_FILE_INDEX is None and NEW_FILE_HEADERS is None:
+        if new_file_index is None and new_file_headers is None:
             shutil.copy(LABELS_DIR / 'tester_label_1.xml', temp_dir_path)
-            CMD_LINE.append(str(temp_dir_path))
-            CMD_LINE.append('tester_label_1.xml')
-            CMD_LINE.append('--output-index-file')
-            CMD_LINE.append(str(temp_dir_path / 'index.csv'))
+            cmd_line.append(str(temp_dir_path))
+            cmd_line.append('tester_label_1.xml')
+            cmd_line.append('--output-index-file')
+            cmd_line.append(str(temp_dir_path / 'index.csv'))
             # Call main() function with the simulated command line arguments
-            tools.main(CMD_LINE)
+            tools.main(cmd_line)
 
             path_to_file = temp_dir_path / 'index.csv'
 
-            compare_files(path_to_file, GOLDEN_FILE)
+            compare_files(path_to_file, golden_file)
 
         else:
             # THE PATH TO THE NEW FILE
-            if NEW_FILE_INDEX:
-                path_to_file = temp_dir_path / NEW_FILE_INDEX
-                CMD_LINE.append('--output-index-file')
-                CMD_LINE.append(str(path_to_file))
+            if new_file_index:
+                path_to_file = temp_dir_path / new_file_index
+                cmd_line.append('--output-index-file')
+                cmd_line.append(str(path_to_file))
                 # Call main() function with the simulated command line arguments
-                tools.main(CMD_LINE)
+                tools.main(cmd_line)
 
-                compare_files(path_to_file, GOLDEN_FILE)
+                compare_files(path_to_file, golden_file)
 
-                if '--generate-label' in CMD_LINE:
+                if '--generate-label' in cmd_line:
                     label_path = str(path_to_file).replace('.csv', '.xml')
-                    golden_label = str(GOLDEN_FILE).replace('.csv', '.xml')
+                    golden_label = str(golden_file).replace('.csv', '.xml')
                     assert os.path.isfile(label_path)
 
                     compare_files(label_path, golden_label)
 
-            if NEW_FILE_HEADERS:
-                path_to_file = temp_dir_path / NEW_FILE_HEADERS
-                GOLDEN_FILE = str(GOLDEN_FILE).replace('.csv', '.txt')
-                CMD_LINE.append('--output-headers-file')
-                CMD_LINE.append(str(path_to_file))
+            if new_file_headers:
+                path_to_file = temp_dir_path / new_file_headers
+                golden_file = str(golden_file).replace('.csv', '.txt')
+                cmd_line.append('--output-headers-file')
+                cmd_line.append(str(path_to_file))
                 # Call main() function with the simulated command line arguments
-                tools.main(CMD_LINE)
+                tools.main(cmd_line)
 
-                compare_files(path_to_file, GOLDEN_FILE)
+                compare_files(path_to_file, golden_file)
 
 
 @pytest.mark.parametrize(
-    'CMD_LINE',
+    'cmd_line',
     [
         # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --add-extra-file-info bad_element --output-headers-file hdout.txt
         (
@@ -602,10 +602,10 @@ def test_success(GOLDEN_FILE, NEW_FILE_INDEX, NEW_FILE_HEADERS, CMD_LINE):
 
     ]
 )
-def test_failures(CMD_LINE):
+def test_failures(cmd_line):
     # Call main() function with the simulated command line arguments
     with pytest.raises(SystemExit) as e:
-        tools.main(CMD_LINE)
+        tools.main(cmd_line)
     assert e.type == SystemExit
     assert e.value.code != 0  # Check that the exit code indicates failure
     if os.path.isfile('hdout.txt'):
@@ -613,7 +613,7 @@ def test_failures(CMD_LINE):
 
 
 @pytest.mark.parametrize(
-    'NEW_FILE,CMD_LINE',
+    'NEW_FILE,cmd_line',
     [
         # Executable command: pds4_create_xml_index ../test_files/labels "nilled_label_bad.xml" --limit-xpaths-file ../test_files/samples/elements_nilled_bad.txt --output-index-file indexout.csv
         (
@@ -628,17 +628,17 @@ def test_failures(CMD_LINE):
         )
     ]
 )
-def test_failure_message(capfd, NEW_FILE, CMD_LINE):
+def test_failure_message(capfd, NEW_FILE, cmd_line):
     with tempfile.TemporaryDirectory(dir=TEST_FILES_DIR.parent) as temp_dir:
         temp_dir_path = Path(temp_dir)
 
         # THE PATH TO THE NEW FILE
         path_to_file = temp_dir_path / NEW_FILE
         # Call main() function with the simulated command line arguments
-        CMD_LINE.append(str(path_to_file))
+        cmd_line.append(str(path_to_file))
 
         # Capture the output
-        tools.main(CMD_LINE)
+        tools.main(cmd_line)
         captured = capfd.readouterr()
 
         # Check if the expected statement is printed in stdout or stderr
diff --git a/tests/test_pds4_create_xml_index_whitebox.py b/tests/test_pds4_create_xml_index_whitebox.py
index 1fa6027..28221fd 100644
--- a/tests/test_pds4_create_xml_index_whitebox.py
+++ b/tests/test_pds4_create_xml_index_whitebox.py
@@ -7,7 +7,7 @@
 import pytest
 import pds4indextools.pds4_create_xml_index as tools
 import textwrap as _textwrap
-from unittest.mock import patch as PATCH
+from unittest.mock import patch
 
 
 # These two variables are the same for all tests, so we can either declare them as
@@ -278,7 +278,7 @@ def create_temp_file():
 @pytest.mark.parametrize('platform_name', ['Windows', 'Linux', 'Darwin'])
 def test_get_creation_date(create_temp_file, platform_name):
     # Mock platform.system() to simulate different platforms
-    with PATCH('platform.system', return_value=platform_name):
+    with patch('platform.system', return_value=platform_name):
         creation_date = tools.get_creation_date(create_temp_file)
         assert isinstance(creation_date, str)
         # Assert that the returned date is in ISO 8601 format
@@ -359,7 +359,7 @@ def test_update_nillable_elements_from_xsd_file_with_edge_cases():
     tree_duplicate_type = etree.fromstring(xsd_content_duplicate_type)
 
     # Mock the download_xsd_file function to return these trees based on input
-    with PATCH(
+    with patch(
         'pds4indextools.pds4_create_xml_index.download_xsd_file'
                    ) as mock_download:
         # Define the behavior of the mock for each file
@@ -443,7 +443,7 @@ def test_validate_label_type():
         assert tools.validate_label_type(arg, valid_choices) == 'Product_Ancillary'
 
 
-@PATCH('os.path.exists')
+@patch('os.path.exists')
 def test_generate_unique_filename(mock_exists):
     # Setup the mock to return True for the first two checks and False thereafter
     mock_exists.side_effect = [True, True, False]
@@ -487,9 +487,9 @@ def test_fill_text():
 
 # Assume the get_true_type function is imported from the relevant module.
 # from pds4indextools.pds4_create_xml_index import get_true_type
-@PATCH('pds4indextools.pds4_create_xml_index.download_xsd_file')
-@PATCH('pds4indextools.pds4_create_xml_index.scrape_namespaces')
-@PATCH('pds4indextools.pds4_create_xml_index.find_base_attribute')
+@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
 def test_true_type_found_in_first_file(mock_find_base_attribute, mock_scrape_namespaces,
                                        mock_download_xsd_file):
     # Setup mocks
@@ -509,9 +509,9 @@ def test_true_type_found_in_first_file(mock_find_base_attribute, mock_scrape_nam
                                                      {"mock_namespace": "mock_value"})
 
 
-@PATCH('pds4indextools.pds4_create_xml_index.download_xsd_file')
-@PATCH('pds4indextools.pds4_create_xml_index.scrape_namespaces')
-@PATCH('pds4indextools.pds4_create_xml_index.find_base_attribute')
+@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
 def test_true_type_found_in_second_file(mock_find_base_attribute, mock_scrape_namespaces,
                                         mock_download_xsd_file):
     # Setup mocks
@@ -538,9 +538,9 @@ def test_true_type_found_in_second_file(mock_find_base_attribute, mock_scrape_na
     assert mock_find_base_attribute.call_count == 3
 
 
-@PATCH('pds4indextools.pds4_create_xml_index.download_xsd_file')
-@PATCH('pds4indextools.pds4_create_xml_index.scrape_namespaces')
-@PATCH('pds4indextools.pds4_create_xml_index.find_base_attribute')
+@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
 def test_true_type_found_with_modified_tag(mock_find_base_attribute,
                                            mock_scrape_namespaces,
                                            mock_download_xsd_file):
@@ -561,9 +561,9 @@ def test_true_type_found_with_modified_tag(mock_find_base_attribute,
                                              {"mock_namespace": "mock_value"})
 
 
-@PATCH('pds4indextools.pds4_create_xml_index.download_xsd_file')
-@PATCH('pds4indextools.pds4_create_xml_index.scrape_namespaces')
-@PATCH('pds4indextools.pds4_create_xml_index.find_base_attribute')
+@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
 def test_true_type_not_found(mock_find_base_attribute, mock_scrape_namespaces,
                              mock_download_xsd_file):
     # Setup mocks

From a5a3fb5e3ad87fa3381c61e0041ab3c28dfa17df Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Wed, 28 Aug 2024 13:59:02 -0700
Subject: [PATCH 11/24] Updated config file, cleaned up debugging code

---
 docs/pds4_create_xml_index.rst              |  2 +
 pds4indextools/index_label_template_pds.xml | 35 ++++----
 pds4indextools/pds4_create_xml_index.py     | 90 +++++++--------------
 test_files/expected/tester_config.yaml      |  4 +-
 4 files changed, 53 insertions(+), 78 deletions(-)

diff --git a/docs/pds4_create_xml_index.rst b/docs/pds4_create_xml_index.rst
index d083287..3de7fc6 100644
--- a/docs/pds4_create_xml_index.rst
+++ b/docs/pds4_create_xml_index.rst
@@ -344,6 +344,8 @@ For reference, provided below are the full contents of the optional label classe
     doi
     curating_facility
     description
+  File_Area_Ancillary / File_Area_Metadata:
+    creation_date_time
 
 
 If no new contents are specified for label generation, the label will contain the
diff --git a/pds4indextools/index_label_template_pds.xml b/pds4indextools/index_label_template_pds.xml
index 982db44..de9e94c 100644
--- a/pds4indextools/index_label_template_pds.xml
+++ b/pds4indextools/index_label_template_pds.xml
@@ -100,43 +100,42 @@ $END_IF
     </Reference_List>
     $IF(Product_Ancillary)
     <File_Area_Ancillary>
-    $END_IF
-    $IF(Product_Metadata_Supplemental)
+    $ELSE
     <File_Area_Metadata>
     $END_IF
-    $IF(Product_Ancillary and isinstance(Product_Ancillary, dict) and 'File_Area_Ancillary' in Product_Ancillary)
-        $IF(EXISTS(File))
+    $IF(File_Area_Ancillary)
         <File>
             <file_name>$BASENAME(TEMPFILE)$</file_name>
             <local_identifier>index-table</local_identifier>
-            <creation_date_time>$File['creation_date_time']$</creation_date_time>
-            <md5_checksum>$File['md5_checksum']$</md5_checksum>
+            $IF(creation_date_time)
+            <creation_date_time>$File_Area_Ancillary['creation_date_time']$</creation_date_time>
+            $ELSE
+            <creation_date_time>$DATETIME(creation_date_time)$</creation_date_time>
+            $END_IF
+            <md5_checksum>$FILE_MD5(TEMPFILE)$</md5_checksum>
             <comment></comment>
         </File>
-        $END_IF
-    $END_IF
-    $IF(Product_Metadata_Supplemental and isinstance(Product_Metadata_Supplemental, dict) and 'File_Area_Ancillary' in Product_Metadata_Supplemental)
-        $IF(File)
+    $ELSE_IF(File_Area_Metadata)
         <File>
             <file_name>$BASENAME(TEMPFILE)$</file_name>
             <local_identifier>index-table</local_identifier>
-            <creation_date_time>$File['creation_date_time']$</creation_date_time>
-            <md5_checksum>$File['md5_checksum']$</md5_checksum>
+            $IF(creation_date_time)
+            <creation_date_time>$File_Area_Metadata['creation_date_time']$</creation_date_time>
+            $ELSE
+            <creation_date_time>$DATETIME(creation_date_time)$</creation_date_time>
+            $END_IF
+            <md5_checksum>$FILE_MD5(TEMPFILE)$</md5_checksum>
             <comment></comment>
         </File>
-        $END_IF
-    $END_IF
+    $ELSE
         <File>
             <file_name>$BASENAME(TEMPFILE)$</file_name>
             <local_identifier>index-table</local_identifier>
-            $IF(File)
-            <creation_date_time>$File['creation_date_time']$</creation_date_time>
-            $ELSE
             <creation_date_time>$DATETIME(creation_date_time)$</creation_date_time>
-            $END_IF
             <md5_checksum>$FILE_MD5(TEMPFILE)$</md5_checksum>
             <comment></comment>
         </File>
+    $END_IF
         <Header>
             <offset unit="byte">0</offset>
             <object_length unit="byte">$object_length_h$</object_length>
diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index 34ca2ff..cba197c 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -121,14 +121,15 @@ def correct_duplicates(label_results):
         tag = key.split('/')[-1].split('<')[0]
         number = tag.split('_')[-1]
         if number.isdigit():
-            cropped = tag.replace('_'+number, '')
+            cropped = tag.replace(f'_{number}', '')
             if cropped in element_names:
-                if str(cropped+'_'+number+'<1>') in key:
-                    key_new = key.replace((cropped+'_'+number+'<1>'), cropped+'<1>')
+                if str(f'{cropped}_{number}<1>') in key:
+                    key_new = key.replace((f'{cropped}_{number}<1>'), f'{cropped}<1>')
                 else:
-                    key_new = key.replace(cropped+'_'+number, cropped+'<1>')
+                    key_new = key.replace(f'{cropped}_{number}', f'{cropped}<1>')
                 parent = key_new.split('/')[-2].split('<')[0]
-                key_new = key_new.replace(parent+'<1>', parent+'<'+str(int(number)+1)+'>')
+                key_new = key_new.replace(f'{parent}<1>',
+                                          f'{parent}<{str(int(number)+1)}>')
                 label_results[key_new] = label_results.pop(key)
         element_names.add(tag)
 
@@ -301,7 +302,7 @@ def filter_dict_by_glob_patterns(input_dict, glob_patterns, valid_add_extra_file
 
 def get_true_type(xsd_files, tag, namespaces):
     """
-    Determines the true type of a specified tag by searching through a list of XSD files.
+    Returns the true type of a specified tag by searching through a list of XSD files.
 
     This function iterates through the provided list of XSD files and attempts to find the
     "true type" of the given XML tag by examining its attributes and base types. If the
@@ -446,7 +447,7 @@ def process_headers(label_results, key, root, namespaces, prefixes):
     label_results[key_new] = label_results.pop(key)
 
 
-def renumber_xpaths(xpaths, args):
+def renumber_xpaths(xpaths):
     """
     Renumber a list of XPaths to be sequential at each level.
 
@@ -498,7 +499,6 @@ def renumber_xpaths(xpaths, args):
 
     Parameters:
         xpaths (list): The list of XPaths or XPath fragments.
-        args (argparse.Namespace): Arguments parsed from command line using argparse.
 
 
     Returns:
@@ -569,7 +569,7 @@ def split_xpath_prefix_and_num(s):
             # down.
             children = [x for x in parent_group_list if x.child is not None]
             if children:
-                child_map = renumber_xpaths([x.child for x in children], args)
+                child_map = renumber_xpaths([x.child for x in children])
                 xpath_map.update(
                     {
                         f'{x.parent}/{x.child}': (
@@ -642,18 +642,7 @@ def store_element_text(element, tree, results_dict, xsd_files, nillable_elements
             if not parent_check:
                 print(f'Non-nillable element in {label_filename} '
                       f'has no associated text: {tag}')
-                true_type = None
-                for xsd_file in xsd_files:
-                    xsd_tree = download_xsd_file(xsd_file)
-                    namespaces = scrape_namespaces(xsd_tree)
-                    true_type = find_base_attribute(xsd_tree, tag, namespaces)
-                    if not true_type:
-                        modified_tag = tag + "_WO_Units"
-                        true_type = find_base_attribute(xsd_tree, modified_tag,
-                                                        namespaces)
-                        # if true_type:
-                        #     break
-
+                true_type = get_true_type(xsd_files, tag, tree.getroot().nsmap)
                 default = default_value_for_nil(config, true_type, nil_value)
                 results_dict[xpath] = default
 
@@ -725,8 +714,7 @@ def update_nillable_elements_from_xsd_file(xsd_file, nillable_elements_info):
             # Attempt to find the type definition in the document
             type_definition_xpath = (f'//xs:simpleType[@name="{type_name}"] | '
                                      f'//xs:complexType[@name="{type_name}"]')
-            type_definition = tree.xpath(
-                type_definition_xpath, namespaces=namespace)
+            type_definition = tree.xpath(type_definition_xpath, namespaces=namespace)
 
             if type_definition:
                 # Take the first match
@@ -744,8 +732,7 @@ def update_nillable_elements_from_xsd_file(xsd_file, nillable_elements_info):
                                                      namespaces=namespace)
                     base_type = extension.get('base')
 
-                nillable_elements_info[name] = (
-                    base_type or 'External or built-in type')
+                nillable_elements_info[name] = base_type or 'External or built-in type'
             else:
                 # Type definition not found, might be external or built-in type
                 nillable_elements_info[name] = 'External or built-in type'
@@ -947,7 +934,7 @@ def sort_dataframe(df, sort_keys):
 
     Raises:
         ValueError: If any of the provided sort keys are not found in the DataFrame,
-                    a `ValueError` is raised with a descriptive error message.
+        a `ValueError` is raised with a descriptive error message.
 
     Example:
         >>> df = pd.DataFrame({
@@ -961,10 +948,6 @@ def sort_dataframe(df, sort_keys):
         2  Charlie   22
         0    Alice   25
         1      Bob   30
-
-    Notes:
-        - The sorting is done in place, so the original DataFrame is modified.
-        - The function will raise an error if any of the specified sort keys are invalid.
     """
     try:
         df.sort_values(by=sort_keys, inplace=True)
@@ -1299,19 +1282,10 @@ def main(cmd_line=None):
 
     for pattern in patterns:
         files = directory_path.glob(pattern)
-
-        # Create an iterator from the generator
-        files_iter = iter(files)
-
-        # Use a sentinel object to check if there's any item
-        sentinel = object()
-        first_file = next(files_iter, sentinel)
-
-        if first_file is sentinel:
+        prev_len = len(label_files)
+        label_files.extend(files)
+        if len(label_files) == prev_len:
             print(f"No files found for pattern: {pattern}")
-        else:
-            # If not empty, continue processing and include the first file
-            label_files.extend(itertools.chain([first_file], files_iter))
 
     verboseprint(f'{len(label_files)} matching file(s) found')
 
@@ -1370,10 +1344,10 @@ def main(cmd_line=None):
         traverse_and_store(root, tree, label_results, xsd_files,
                            nillable_elements_info, config, label_file)
 
-        # # The XPath headers in the label_results dictionary are reformatted to
-        # # improve readability. Each XPath's namespace is replaced with its prefix for
-        # # faster reference. Duplicate XPaths are made unique to ensure all results are
-        # # present in the final product.
+        # The XPath headers in the label_results dictionary are reformatted to
+        # improve readability. Each XPath's namespace is replaced with its prefix for
+        # faster reference. Duplicate XPaths are made unique to ensure all results are
+        # present in the final product.
         for key in list(label_results):
             process_headers(label_results, key, root, namespaces, prefixes)
 
@@ -1387,7 +1361,7 @@ def main(cmd_line=None):
             new_parts = []
             for part in parts:
                 if not part.endswith('>') and parts.index(part) != 1:
-                    part = part+'<1>'
+                    part = f'{part}<1>'
                     new_parts.append(part)
                 else:
                     new_parts.append(part)
@@ -1402,7 +1376,7 @@ def main(cmd_line=None):
         # the column refers to. At this stage, duplicate XPaths may exist again due to
         # the reformatting. These duplicates are corrected to preserve the contents of
         # each element's value.
-        xpath_map = renumber_xpaths(label_results, args)
+        xpath_map = renumber_xpaths(label_results)
         for old_xpath, new_xpath in xpath_map.items():
             label_results[new_xpath] = label_results.pop(old_xpath)
 
@@ -1414,8 +1388,8 @@ def main(cmd_line=None):
         try:
             lid = extract_logical_identifier(tree)
         except AttributeError:
-            print(f"Label file {label_file} does not have a "
-                  f"logical_identifier attribute.")
+            print(f'Label file {label_file} does not have a '
+                  f'logical_identifier attribute.')
             sys.exit(1)
 
         # Attach extra columns if asked for.
@@ -1439,20 +1413,19 @@ def main(cmd_line=None):
     # dictionary will be returned. Glob patterns are processed sequentially, with the
     # first pattern having the highest priority.
 
-    for label_results in all_results:
-        ind = all_results.index(label_results)
+    for ind, label_results in enumerate(all_results):
         label_results_new = filter_dict_by_glob_patterns(
             label_results, elements_to_scrape, valid_add_extra_file_info, verboseprint)
         all_results[ind] = label_results_new
 
-    if all(len(set(r)) == 0 for r in all_results):
+    if all(len(r) == 0 for r in all_results):
         print('No results found: glob pattern(s) excluded all matches.')
         sys.exit(1)
 
-    # # If --simplify-xpaths is used, the XPath headers will be shortened to the
-    # # element's tag and namespace prefix. This is contingent on the uniqueness of
-    # # the XPath header; if more than one XPath header shares a tag, a namespace and a
-    # # predicate value, the XPath header will remain whole.
+    # If --simplify-xpaths is used, the XPath headers will be shortened to the
+    # element's tag and namespace prefix. This is contingent on the uniqueness of
+    # the XPath header; if more than one XPath header shares a tag, a namespace and a
+    # predicate value, the XPath header will remain whole.
     if args.simplify_xpaths:
         headers = {}
         unique_tags_master = []
@@ -1487,8 +1460,7 @@ def main(cmd_line=None):
             for tag in unique_tags:
                 unique_tags_master.append(tag)
 
-        for label_results in all_results:
-            ind = all_results.index(label_results)
+        for ind, label_results in enumerate(all_results):
             new_label_results = {}
             for key, value in list(label_results.items()):
                 new_key = headers[key]
diff --git a/test_files/expected/tester_config.yaml b/test_files/expected/tester_config.yaml
index 7b29fd8..0cd13cc 100644
--- a/test_files/expected/tester_config.yaml
+++ b/test_files/expected/tester_config.yaml
@@ -19,5 +19,7 @@ nillable:
 
 label-contents:
   version_id: 1.1
-  File:
+  File_Area_Metadata:
+    creation_date_time: '0001-01-01T00:00:00.00Z'
+  File_Area_Ancillary:
     creation_date_time: '0001-01-01T00:00:00.00Z'

From f4d745b8b9539eb26fa67e0133f4fbf5ec4e9cdc Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Wed, 28 Aug 2024 14:16:15 -0700
Subject: [PATCH 12/24] Updated label template with  statements

---
 pds4indextools/index_label_template_pds.xml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pds4indextools/index_label_template_pds.xml b/pds4indextools/index_label_template_pds.xml
index de9e94c..cad2abe 100644
--- a/pds4indextools/index_label_template_pds.xml
+++ b/pds4indextools/index_label_template_pds.xml
@@ -191,13 +191,11 @@ $END_IF
         $END_IF
     $IF(Product_Ancillary)
     </File_Area_Ancillary>
-    $END_IF
-    $IF(Product_Metadata_Supplemental)
+    $ELSE
     </File_Area_Metadata>
     $END_IF
 $IF(Product_Ancillary)
 </Product_Ancillary>
-$END_IF
-$IF(Product_Metadata_Supplemental)
+$ELSE
 </Product_Metadata_Supplemental>
 $END_IF

From 11430816761956b5d6c084c4b8b73eb0f3e4d39e Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Thu, 29 Aug 2024 10:35:07 -0700
Subject: [PATCH 13/24] Fixed duplicate scraped label issue caused by
 generalized glob patterns

---
 pds4indextools/pds4_create_xml_index.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index cba197c..a26c4fb 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -1263,7 +1263,7 @@ def main(cmd_line=None):
     # will determine which files will be scraped for.
 
     nillable_elements_info = {}
-    label_files = []
+    collected_files = set()
     all_results = []
     tags = []
     xsd_files = []
@@ -1282,13 +1282,15 @@ def main(cmd_line=None):
 
     for pattern in patterns:
         files = directory_path.glob(pattern)
-        prev_len = len(label_files)
-        label_files.extend(files)
-        if len(label_files) == prev_len:
+        prev_len = len(collected_files)
+        collected_files.update(files)
+        if len(collected_files) == prev_len:
             print(f"No files found for pattern: {pattern}")
 
-    verboseprint(f'{len(label_files)} matching file(s) found')
+    verboseprint(f'{len(collected_files)} matching file(s) found')
 
+    label_files = list(collected_files)
+    label_files.sort()
     if label_files == []:
         print(f'No files matching any patterns found in directory: {directory_path}')
         sys.exit(1)

From 2fb7bab16d277a3690c0cca58b9bbd608ed4470d Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Thu, 29 Aug 2024 11:00:06 -0700
Subject: [PATCH 14/24] Got unit test coverage back up to 100%

---
 pds4indextools/pds4_create_xml_index.py      | 6 ++++--
 tests/test_pds4_create_xml_index_blackbox.py | 8 +++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index a26c4fb..5b4c26b 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -893,13 +893,15 @@ def get_base_type(query):
         f"/*[local-name()='extension']/*/*/*/@base"
     ]
 
+    base_type = None
     for query in queries:
         result = get_base_type(query)
         if result:
             base_type = result[0]
-            return base_type
+        else:
+            continue
 
-    return None
+    return base_type
 
 
 def scrape_namespaces(tree):
diff --git a/tests/test_pds4_create_xml_index_blackbox.py b/tests/test_pds4_create_xml_index_blackbox.py
index 75f09e7..14afe97 100644
--- a/tests/test_pds4_create_xml_index_blackbox.py
+++ b/tests/test_pds4_create_xml_index_blackbox.py
@@ -471,17 +471,15 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
         temp_dir_path = Path(temp_dir)
 
         if new_file_index is None and new_file_headers is None:
-            shutil.copy(LABELS_DIR / 'tester_label_1.xml', temp_dir_path)
-            cmd_line.append(str(temp_dir_path))
+            cmd_line.append(str(LABELS_DIR))
             cmd_line.append('tester_label_1.xml')
-            cmd_line.append('--output-index-file')
-            cmd_line.append(str(temp_dir_path / 'index.csv'))
             # Call main() function with the simulated command line arguments
             tools.main(cmd_line)
 
-            path_to_file = temp_dir_path / 'index.csv'
+            path_to_file = ROOT_DIR / 'index.csv'
 
             compare_files(path_to_file, golden_file)
+            os.remove(path_to_file)
 
         else:
             # THE PATH TO THE NEW FILE

From 9f6bfc5087eaa9f79a28117165c508d935de0e0f Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Thu, 29 Aug 2024 14:44:11 -0700
Subject: [PATCH 15/24] Making flake8 compliant

---
 tests/test_pds4_create_xml_index_blackbox.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_pds4_create_xml_index_blackbox.py b/tests/test_pds4_create_xml_index_blackbox.py
index 14afe97..ad5fabc 100644
--- a/tests/test_pds4_create_xml_index_blackbox.py
+++ b/tests/test_pds4_create_xml_index_blackbox.py
@@ -2,7 +2,6 @@
 import pytest
 import os
 import tempfile
-import shutil
 import pds4indextools.pds4_create_xml_index as tools
 
 

From a138f7cf35849e083e7769ffe63c7c3990ffb4fa Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Wed, 4 Sep 2024 16:05:56 -0700
Subject: [PATCH 16/24] Making changes according to pull request

---
 pds4indextools/default_config.yaml                 |  2 ++
 pds4indextools/index_label_template_pds.xml        | 14 +++++++-------
 pds4indextools/pds4_create_xml_index.py            |  2 +-
 test_files/expected/label_success_2.xml            |  2 +-
 .../{expected => samples}/tester_config.yaml       |  2 +-
 .../{expected => samples}/tester_config_label.yaml |  0
 .../tester_config_nillable.yaml                    |  0
 tests/test_pds4_create_xml_index_blackbox.py       | 12 ++++++------
 tests/test_pds4_create_xml_index_whitebox.py       |  5 +++--
 9 files changed, 21 insertions(+), 18 deletions(-)
 rename test_files/{expected => samples}/tester_config.yaml (91%)
 rename test_files/{expected => samples}/tester_config_label.yaml (100%)
 rename test_files/{expected => samples}/tester_config_nillable.yaml (100%)

diff --git a/pds4indextools/default_config.yaml b/pds4indextools/default_config.yaml
index a2a5a48..b23fc05 100644
--- a/pds4indextools/default_config.yaml
+++ b/pds4indextools/default_config.yaml
@@ -44,3 +44,5 @@ label-contents:
   External_Reference:
   Source_Product_Internal:
   Source_Product_External:
+  File_Area_Ancillary:
+  File_Area_Metadata:
diff --git a/pds4indextools/index_label_template_pds.xml b/pds4indextools/index_label_template_pds.xml
index cad2abe..fdc59aa 100644
--- a/pds4indextools/index_label_template_pds.xml
+++ b/pds4indextools/index_label_template_pds.xml
@@ -103,26 +103,26 @@ $END_IF
     $ELSE
     <File_Area_Metadata>
     $END_IF
-    $IF(File_Area_Ancillary)
+        $IF(Product_Ancillary and File_Area_Ancillary)
         <File>
             <file_name>$BASENAME(TEMPFILE)$</file_name>
             <local_identifier>index-table</local_identifier>
-            $IF(creation_date_time)
+            $IF(File_Area_Ancillary['creation_date_time'])
             <creation_date_time>$File_Area_Ancillary['creation_date_time']$</creation_date_time>
             $ELSE
-            <creation_date_time>$DATETIME(creation_date_time)$</creation_date_time>
+            <creation_date_time>$DATETIME(calculated_creation_date_time)$</creation_date_time>
             $END_IF
             <md5_checksum>$FILE_MD5(TEMPFILE)$</md5_checksum>
             <comment></comment>
         </File>
-    $ELSE_IF(File_Area_Metadata)
+        $ELSE_IF(Product_Metadata_Supplemental and File_Area_Metadata)
         <File>
             <file_name>$BASENAME(TEMPFILE)$</file_name>
             <local_identifier>index-table</local_identifier>
-            $IF(creation_date_time)
+            $IF(File_Area_Metadata['creation_date_time'])
             <creation_date_time>$File_Area_Metadata['creation_date_time']$</creation_date_time>
             $ELSE
-            <creation_date_time>$DATETIME(creation_date_time)$</creation_date_time>
+            <creation_date_time>$DATETIME(calculated_creation_date_time)$</creation_date_time>
             $END_IF
             <md5_checksum>$FILE_MD5(TEMPFILE)$</md5_checksum>
             <comment></comment>
@@ -131,7 +131,7 @@ $END_IF
         <File>
             <file_name>$BASENAME(TEMPFILE)$</file_name>
             <local_identifier>index-table</local_identifier>
-            <creation_date_time>$DATETIME(creation_date_time)$</creation_date_time>
+            <creation_date_time>$DATETIME(calculated_creation_date_time)$</creation_date_time>
             <md5_checksum>$FILE_MD5(TEMPFILE)$</md5_checksum>
             <comment></comment>
         </File>
diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index 5b4c26b..1be5600 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -1587,7 +1587,7 @@ def main(cmd_line=None):
         # .yaml file from --config-file
         label_content = {
             'logical_identifier': 'urn:nasa:pds:rms_metadata:document_opus:' + filename,
-            'creation_date_time': str(creation_date),
+            'calculated_creation_date_time': str(creation_date),
             'TEMPFILE': index_file,
             'Field_Content': header_info,
             'fields': len(header_info),
diff --git a/test_files/expected/label_success_2.xml b/test_files/expected/label_success_2.xml
index 628ae41..23a5758 100644
--- a/test_files/expected/label_success_2.xml
+++ b/test_files/expected/label_success_2.xml
@@ -26,7 +26,7 @@
         <File>
             <file_name>generated_label_2.csv</file_name>
             <local_identifier>index-table</local_identifier>
-            <creation_date_time>0001-01-01T00:00:00.00Z</creation_date_time>
+            <creation_date_time>0002-02-02T00:00:00.00Z</creation_date_time>
             <md5_checksum>53d47b320936ac3fbba0852696065418</md5_checksum>
             <comment></comment>
         </File>
diff --git a/test_files/expected/tester_config.yaml b/test_files/samples/tester_config.yaml
similarity index 91%
rename from test_files/expected/tester_config.yaml
rename to test_files/samples/tester_config.yaml
index 0cd13cc..9097074 100644
--- a/test_files/expected/tester_config.yaml
+++ b/test_files/samples/tester_config.yaml
@@ -20,6 +20,6 @@ nillable:
 label-contents:
   version_id: 1.1
   File_Area_Metadata:
-    creation_date_time: '0001-01-01T00:00:00.00Z'
+    creation_date_time: '0002-02-02T00:00:00.00Z'
   File_Area_Ancillary:
     creation_date_time: '0001-01-01T00:00:00.00Z'
diff --git a/test_files/expected/tester_config_label.yaml b/test_files/samples/tester_config_label.yaml
similarity index 100%
rename from test_files/expected/tester_config_label.yaml
rename to test_files/samples/tester_config_label.yaml
diff --git a/test_files/expected/tester_config_nillable.yaml b/test_files/samples/tester_config_nillable.yaml
similarity index 100%
rename from test_files/expected/tester_config_nillable.yaml
rename to test_files/samples/tester_config_nillable.yaml
diff --git a/tests/test_pds4_create_xml_index_blackbox.py b/tests/test_pds4_create_xml_index_blackbox.py
index ad5fabc..aa8dff5 100644
--- a/tests/test_pds4_create_xml_index_blackbox.py
+++ b/tests/test_pds4_create_xml_index_blackbox.py
@@ -403,7 +403,7 @@ def compare_files(path_to_file, golden_file):
             ]
         ),
 
-        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary --config ../test_files/expected/tester_config.yaml --output-index-file generated_label_1.csv
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary --config ../test_files/samples/tester_config.yaml --output-index-file generated_label_1.csv
         # Compare result to golden copy:
         # test_files/expected/label_success_1.csv
         # test_files/expected/label_success_1.xml
@@ -416,11 +416,11 @@ def compare_files(path_to_file, golden_file):
                 '--generate-label',
                 'ancillary',
                 '--config',
-                str(EXPECTED_DIR / 'tester_config.yaml')
+                str(SAMPLES_DIR / 'tester_config.yaml')
             ]
         ),
 
-        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label metadata --fixed-width --output-index-file generated_label_2.csv --config ../test_files/expected/tester_config.yaml --output-index-file generated_label_2.csv
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label metadata --fixed-width --output-index-file generated_label_2.csv --config ../test_files/samples/tester_config.yaml --output-index-file generated_label_2.csv
         # Compare result to golden copy:
         # test_files/expected/label_success_2.csv
         # test_files/expected/label_success_2.xml
@@ -434,11 +434,11 @@ def compare_files(path_to_file, golden_file):
                 'metadata',
                 '--fixed-width',
                 '--config',
-                str(EXPECTED_DIR / 'tester_config.yaml')
+                str(SAMPLES_DIR / 'tester_config.yaml')
             ]
         ),
 
-        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename,filepath,lid,bundle,bundle_lid --generate-label ancillary --config ../test_files/expected/tester_config.yaml --output-index-file generated_label_3.csv
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename,filepath,lid,bundle,bundle_lid --generate-label ancillary --config ../test_files/samples/tester_config.yaml --output-index-file generated_label_3.csv
         # Compare result to golden copy:
         # test_files/expected/label_success_3.csv
         # test_files/expected/label_success_3.xml
@@ -459,7 +459,7 @@ def compare_files(path_to_file, golden_file):
                 '--generate-label',
                 'ancillary',
                 '--config',
-                str(EXPECTED_DIR / 'tester_config.yaml')
+                str(SAMPLES_DIR / 'tester_config.yaml')
             ]
         )
     ]
diff --git a/tests/test_pds4_create_xml_index_whitebox.py b/tests/test_pds4_create_xml_index_whitebox.py
index 28221fd..62a8c83 100644
--- a/tests/test_pds4_create_xml_index_whitebox.py
+++ b/tests/test_pds4_create_xml_index_whitebox.py
@@ -14,6 +14,7 @@
 # global variables, or get the ROOT_DIR at the setup stage before running each test
 ROOT_DIR = Path(__file__).resolve().parent.parent
 TEST_FILES_DIR = ROOT_DIR / 'test_files'
+SAMPLES_DIR = TEST_FILES_DIR / 'samples'
 EXPECTED_DIR = TEST_FILES_DIR / 'expected'
 LABELS_DIR = TEST_FILES_DIR / 'labels'
 
@@ -66,7 +67,7 @@ def test_load_config_object():
 
     # Tests that the config_object is loaded over.
     config_object = tools.load_config_file(
-        specified_config_files=[str(EXPECTED_DIR / 'tester_config_nillable.yaml'),])
+        specified_config_files=[str(SAMPLES_DIR / 'tester_config_nillable.yaml'),])
 
     assert config_object['nillable']['pds:ASCII_Date_YMD']['inapplicable'] == '0001-01-01'
     assert config_object['nillable']['pds:ASCII_Date_YMD']['missing'] == '0002-01-01'
@@ -94,7 +95,7 @@ def test_load_config_object():
 
     # Tests specified configuration files wiht one or the other
     config_object = tools.load_config_file(
-        specified_config_files=[str(EXPECTED_DIR / 'tester_config_label.yaml'),])
+        specified_config_files=[str(SAMPLES_DIR / 'tester_config_label.yaml'),])
 
     assert config_object['label-contents']['version_id'] == '1.0'
     assert (config_object['label-contents']['title'] ==

From 59be059ec5d585719a394db2b720f0f4b7562264 Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Wed, 4 Sep 2024 16:49:32 -0700
Subject: [PATCH 17/24] Adding further implementation in label template

---
 pds4indextools/index_label_template_pds.xml | 42 ++++++++++-----------
 pds4indextools/pds4_create_xml_index.py     |  2 +-
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/pds4indextools/index_label_template_pds.xml b/pds4indextools/index_label_template_pds.xml
index fdc59aa..2f8dea3 100644
--- a/pds4indextools/index_label_template_pds.xml
+++ b/pds4indextools/index_label_template_pds.xml
@@ -62,37 +62,37 @@ $END_IF
         $IF(Internal_Reference)
         $FOR(field, k=Internal_Reference)
         <Internal_Reference>
-            <lid_reference></lid_reference>
-            <reference_type></reference_type>
-            <comment></comment>
+            <lid_reference>$field['lid_reference']$</lid_reference>
+            <reference_type>$field['reference_type']$</reference_type>
+            <comment>$field['comment']$</comment>
         </Internal_Reference>
         $END_FOR
         $END_IF
         $IF(External_Reference)
         $FOR(field, k=External_Reference)
         <External_Reference>
-            <doi></doi>
-            <reference_text></reference_text>
-            <description></description>
+            <doi>$field['doi']$</doi>
+            <reference_text>$field['reference_text']$</reference_text>
+            <description>$field['description']$</description>
         </External_Reference>
         $END_FOR
         $END_IF
         $IF(Source_Product_Internal)
         $FOR(field, k=Source_Product_Internal)
         <Source_Product_Internal>
-            <lidvid_reference></lidvid_reference>
-            <reference_type></reference_type>
-            <comment></comment>
+            <lidvid_reference>$field['lidvid_reference']$</lidvid_reference>
+            <reference_type>$field['reference_type']$</reference_type>
+            <comment>$field['comment']$</comment>
         </Source_Product_Internal>
         $END_FOR
         $END_IF
         $IF(Source_Product_External)
         $FOR(field, k=Source_Product_External)
         <Source_Product_External>
-            <external_source_product_identifier></external_source_product_identifier>
-            <reference_type></reference_type>
-            <doi></doi>
-            <curating_facility></curating_facility>
+            <external_source_product_identifier>$external_source_product_identifier']$</external_source_product_identifier>
+            <reference_type>$field['reference_type']$</reference_type>
+            <doi>$field['doi']$</doi>
+            <curating_facility>$field['curating_facility']$</curating_facility>
             <description></description>
         </Source_Product_External>
         $END_FOR
@@ -105,34 +105,34 @@ $END_IF
     $END_IF
         $IF(Product_Ancillary and File_Area_Ancillary)
         <File>
-            <file_name>$BASENAME(TEMPFILE)$</file_name>
+            <file_name>$BASENAME(index_file_name)$</file_name>
             <local_identifier>index-table</local_identifier>
             $IF(File_Area_Ancillary['creation_date_time'])
             <creation_date_time>$File_Area_Ancillary['creation_date_time']$</creation_date_time>
             $ELSE
             <creation_date_time>$DATETIME(calculated_creation_date_time)$</creation_date_time>
             $END_IF
-            <md5_checksum>$FILE_MD5(TEMPFILE)$</md5_checksum>
+            <md5_checksum>$FILE_MD5(index_file_name)$</md5_checksum>
             <comment></comment>
         </File>
         $ELSE_IF(Product_Metadata_Supplemental and File_Area_Metadata)
         <File>
-            <file_name>$BASENAME(TEMPFILE)$</file_name>
+            <file_name>$BASENAME(index_file_name)$</file_name>
             <local_identifier>index-table</local_identifier>
             $IF(File_Area_Metadata['creation_date_time'])
             <creation_date_time>$File_Area_Metadata['creation_date_time']$</creation_date_time>
             $ELSE
             <creation_date_time>$DATETIME(calculated_creation_date_time)$</creation_date_time>
             $END_IF
-            <md5_checksum>$FILE_MD5(TEMPFILE)$</md5_checksum>
+            <md5_checksum>$FILE_MD5(index_file_name)$</md5_checksum>
             <comment></comment>
         </File>
     $ELSE
         <File>
-            <file_name>$BASENAME(TEMPFILE)$</file_name>
+            <file_name>$BASENAME(index_file_name)$</file_name>
             <local_identifier>index-table</local_identifier>
             <creation_date_time>$DATETIME(calculated_creation_date_time)$</creation_date_time>
-            <md5_checksum>$FILE_MD5(TEMPFILE)$</md5_checksum>
+            <md5_checksum>$FILE_MD5(index_file_name)$</md5_checksum>
             <comment></comment>
         </File>
     $END_IF
@@ -146,7 +146,7 @@ $END_IF
         <Table_Character>
             <offset unit="byte"></offset>
             <object_length unit="byte">$object_length_t$</object_length>
-            <records>$FILE_RECORDS(TEMPFILE)$</records>
+            <records>$FILE_RECORDS(index_file_name)$</records>
             <record_delimiter>Line-Feed</record_delimiter>
             <description></description>
             <Record_Character>
@@ -170,7 +170,7 @@ $END_IF
             <offset unit="byte">0</offset>
             <object_length unit="byte">$object_length_t$</object_length>
             <parsing_standard_id>PDS DSV 1</parsing_standard_id>
-            <records>$FILE_RECORDS(TEMPFILE)$</records>
+            <records>$FILE_RECORDS(index_file_name)$</records>
             <record_delimiter>Line-Feed</record_delimiter>
             <field_delimiter>Comma</field_delimiter>
             <Record_Delimited>
diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index 1be5600..9bf6439 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -1588,7 +1588,7 @@ def main(cmd_line=None):
         label_content = {
             'logical_identifier': 'urn:nasa:pds:rms_metadata:document_opus:' + filename,
             'calculated_creation_date_time': str(creation_date),
-            'TEMPFILE': index_file,
+            'index_file_name': index_file,
             'Field_Content': header_info,
             'fields': len(header_info),
             'maximum_record_length': get_longest_row_length(index_file),

From 94b43c1c26f0b0a7f2f10c631d6fe0955233d16d Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Thu, 5 Sep 2024 12:05:01 -0700
Subject: [PATCH 18/24] Added unit tests for references in label generation

---
 pds4indextools/index_label_template_pds.xml   |   4 +-
 .../expected/label_references_success.csv     |   2 +
 .../expected/label_references_success.xml     | 158 ++++++++++++++++++
 .../samples/tester_config_reference.yaml      |  36 ++++
 tests/test_pds4_create_xml_index_blackbox.py  |  27 ++-
 5 files changed, 219 insertions(+), 8 deletions(-)
 create mode 100644 test_files/expected/label_references_success.csv
 create mode 100644 test_files/expected/label_references_success.xml
 create mode 100644 test_files/samples/tester_config_reference.yaml

diff --git a/pds4indextools/index_label_template_pds.xml b/pds4indextools/index_label_template_pds.xml
index 2f8dea3..5f7aca3 100644
--- a/pds4indextools/index_label_template_pds.xml
+++ b/pds4indextools/index_label_template_pds.xml
@@ -89,11 +89,11 @@ $END_IF
         $IF(Source_Product_External)
         $FOR(field, k=Source_Product_External)
         <Source_Product_External>
-            <external_source_product_identifier>$external_source_product_identifier']$</external_source_product_identifier>
+            <external_source_product_identifier>$field['external_source_product_identifier']$</external_source_product_identifier>
             <reference_type>$field['reference_type']$</reference_type>
             <doi>$field['doi']$</doi>
             <curating_facility>$field['curating_facility']$</curating_facility>
-            <description></description>
+            <description>$field['description']$</description>
         </Source_Product_External>
         $END_FOR
         $END_IF
diff --git a/test_files/expected/label_references_success.csv b/test_files/expected/label_references_success.csv
new file mode 100644
index 0000000..0eb78a1
--- /dev/null
+++ b/test_files/expected/label_references_success.csv
@@ -0,0 +1,2 @@
+pds:logical_identifier<1>,pds:version_id<1>,pds:title<1>,pds:information_model_version<1>,pds:Product_Observational/pds:Observing_System<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>,pds:type<1>,pds:lid_reference<1>,pds:reference_type<1>
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img,1.11.0.0,Cassini Orbiter Imaging Science Subsystem,Cassini Orbiter,Spacecraft,urn:nasa:pds:context:instrument_host:spacecraft.co,is_instrument_host
diff --git a/test_files/expected/label_references_success.xml b/test_files/expected/label_references_success.xml
new file mode 100644
index 0000000..51e978f
--- /dev/null
+++ b/test_files/expected/label_references_success.xml
@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1L00.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+
+<Product_Ancillary xmlns="http://pds.nasa.gov/pds4/pds/v1"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://pds.nasa.gov/pds4/pds/v1 https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1L00.xsd">
+    <Identification_Area>
+        <logical_identifier>urn:nasa:pds:rms_metadata:document_opus:label_references</logical_identifier>
+        <version_id>1.0</version_id>
+        <title>Index file for my occultation bundle</title>
+        <information_model_version>1.21.0.0</information_model_version>
+        <product_class>Product_Ancillary</product_class>
+        <Modification_History>
+            <Modification_Detail>
+                <modification_date>2024-01-01</modification_date>
+                <version_id>1.1</version_id>
+                <description>This is a lengthy description of what this modification
+changed in the bundle.
+There were lots of changes.
+</description>
+            </Modification_Detail>
+            <Modification_Detail>
+                <modification_date>2023-01-01</modification_date>
+                <version_id>1.</version_id>
+                <description>Initial release.</description>
+            </Modification_Detail>
+        </Modification_History>
+        <License_Information>
+            <name>Creative Common Public License CC0 1.0 (2024)</name>
+            <description>Creative Commons Zero (CC0) license information.</description>
+            <Internal_Reference>
+                <lid_reference>urn:nasa:pds:system_bundle:document_pds4_standards:creative_commons_1.0.0::1.0</lid_reference>
+                <reference_type>product_to_license</reference_type>
+            </Internal_Reference>
+        </License_Information>
+    </Identification_Area>
+    <Reference_List>
+        <Internal_Reference>
+            <lid_reference>urn:nasa:pds:cassini_iss_cruise:data_raw:body-geometry</lid_reference>
+            <reference_type>data_to_resource</reference_type>
+            <comment>The index table of body surface geometry information associated with each observation.</comment>
+        </Internal_Reference>
+        <Internal_Reference>
+            <lid_reference>urn:nasa:pds:cassini_iss_cruise:body-inventory</lid_reference>
+            <reference_type>data_to_resource</reference_type>
+            <comment>An index listing every Saturn system body expected to fall within each field of view.</comment>
+        </Internal_Reference>
+        <External_Reference>
+            <doi>10.1086/113662</doi>
+            <reference_text>Elliot et al. (1984). "Structure of the Uranian rings. I. Square-well model and particle-size constraints" Astron J. 89, 1587-1603.</reference_text>
+            <description>reference material</description>
+        </External_Reference>
+        <Source_Product_Internal>
+            <lidvid_reference>urn:nasa:pds:insight-ifg-mars:data-ifg-raw:ifg-raw-sol0014-20181211t021721-20181211t150435-pt2hz::5.0</lidvid_reference>
+            <reference_type>data_to_raw_source_product</reference_type>
+            <comment>Raw data used in processing</comment>
+        </Source_Product_Internal>
+        <Source_Product_External>
+            <external_source_product_identifier>CO-S-UVIS-2-CUBE-V1.4:COUVIS_0056/DATA/D2016_245/EUV2016_245_17_49</external_source_product_identifier>
+            <reference_type>data_to_raw_source_product</reference_type>
+            <doi>None</doi>
+            <curating_facility>PDS RMS Node</curating_facility>
+            <description>The original PDS3 version of this product. The form of the reference is dataset_id:volume_id:directory_path:file_name.</description>
+        </Source_Product_External>
+    </Reference_List>
+    <File_Area_Ancillary>
+        <File>
+            <file_name>label_references.csv</file_name>
+            <local_identifier>index-table</local_identifier>
+            <creation_date_time>0001-01-01T00:00:00.00Z</creation_date_time>
+            <md5_checksum>85e4697006ea9a54e7eafa8cf4b9bb40</md5_checksum>
+            <comment></comment>
+        </File>
+        <Header>
+            <offset unit="byte">0</offset>
+            <object_length unit="byte">303</object_length>
+            <parsing_standard_id>UTF-8 Text</parsing_standard_id>
+            <description>Provides the column headers, separated by commas, for the data table.</description>
+        </Header>
+        <Table_Delimited>
+            <offset unit="byte">0</offset>
+            <object_length unit="byte">542</object_length>
+            <parsing_standard_id>PDS DSV 1</parsing_standard_id>
+            <records>2</records>
+            <record_delimiter>Line-Feed</record_delimiter>
+            <field_delimiter>Comma</field_delimiter>
+            <Record_Delimited>
+                <fields>9</fields>
+                <groups>0</groups>
+                <maximum_record_length unit="byte">302</maximum_record_length>
+                    <Field_Delimited>
+                        <name>pds:logical_identifier&lt;1&gt;</name>
+                        <field_number>1</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">52</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:version_id&lt;1&gt;</name>
+                        <field_number>2</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">3</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:title&lt;1&gt;</name>
+                        <field_number>3</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">33</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:information_model_version&lt;1&gt;</name>
+                        <field_number>4</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">8</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:name&lt;1&gt;</name>
+                        <field_number>5</field_number>
+                        <data_type>UTF8_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">41</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:Observing_System_Component&lt;1&gt;/pds:name&lt;1&gt;</name>
+                        <field_number>6</field_number>
+                        <data_type>UTF8_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">15</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:type&lt;1&gt;</name>
+                        <field_number>7</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">10</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:lid_reference&lt;1&gt;</name>
+                        <field_number>8</field_number>
+                        <data_type>ASCII_LID</data_type>
+                        <maximum_field_length unit="byte">50</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds:reference_type&lt;1&gt;</name>
+                        <field_number>9</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">18</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+            </Record_Delimited>
+        </Table_Delimited>
+    </File_Area_Ancillary>
+</Product_Ancillary>
diff --git a/test_files/samples/tester_config_reference.yaml b/test_files/samples/tester_config_reference.yaml
new file mode 100644
index 0000000..266e540
--- /dev/null
+++ b/test_files/samples/tester_config_reference.yaml
@@ -0,0 +1,36 @@
+
+label-contents:
+  title: Index file for my occultation bundle
+  Modification_Detail:
+    - modification_date: '2024-01-01'
+      version_id: 1.1
+      description: |
+        This is a lengthy description of what this modification
+        changed in the bundle.
+        There were lots of changes.
+    - modification_date: '2023-01-01'
+      version_id: 1.0
+      description: Initial release.
+  Internal_Reference:
+    - lid_reference: urn:nasa:pds:cassini_iss_cruise:data_raw:body-geometry
+      reference_type: data_to_resource
+      comment: The index table of body surface geometry information associated with each observation.
+    - lid_reference: urn:nasa:pds:cassini_iss_cruise:body-inventory
+      reference_type: data_to_resource
+      comment: An index listing every Saturn system body expected to fall within each field of view.
+  External_Reference:
+    - doi: 10.1086/113662
+      reference_text: Elliot et al. (1984). "Structure of the Uranian rings. I. Square-well model and particle-size constraints" Astron J. 89, 1587-1603.
+      description: reference material
+  Source_Product_Internal:
+    - lidvid_reference: urn:nasa:pds:insight-ifg-mars:data-ifg-raw:ifg-raw-sol0014-20181211t021721-20181211t150435-pt2hz::5.0
+      reference_type: data_to_raw_source_product
+      comment: Raw data used in processing
+  Source_Product_External:
+    - external_source_product_identifier: CO-S-UVIS-2-CUBE-V1.4:COUVIS_0056/DATA/D2016_245/EUV2016_245_17_49
+      reference_type: data_to_raw_source_product
+      doi:
+      curating_facility: PDS RMS Node
+      description: The original PDS3 version of this product. The form of the reference is dataset_id:volume_id:directory_path:file_name.
+  File_Area_Ancillary:
+    creation_date_time: '0001-01-01T00:00:00.00Z'
\ No newline at end of file
diff --git a/tests/test_pds4_create_xml_index_blackbox.py b/tests/test_pds4_create_xml_index_blackbox.py
index aa8dff5..0c0b395 100644
--- a/tests/test_pds4_create_xml_index_blackbox.py
+++ b/tests/test_pds4_create_xml_index_blackbox.py
@@ -49,6 +49,21 @@ def compare_files(path_to_file, golden_file):
             ]
         ),
 
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary --config-file ../test_files/samples/tester_config_reference.yaml --output-index-file label_references_success.csv --simplify-xpaths
+        (
+            str(EXPECTED_DIR / 'label_references_success.csv'),
+            'label_references.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/tester_label_1.xml',
+                '--generate-label',
+                'ancillary',
+                '--config-file',
+                str(SAMPLES_DIR / 'tester_config_reference.yaml'),
+                '--simplify-xpaths'
+            ]
+        ),
+
         # Testing --limit-xpaths-file with two outputs
         # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt --output-index-file limit_xpaths_file.csv
         # Compare result to golden copy:
@@ -403,7 +418,7 @@ def compare_files(path_to_file, golden_file):
             ]
         ),
 
-        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary --config ../test_files/samples/tester_config.yaml --output-index-file generated_label_1.csv
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary --config-file ../test_files/samples/tester_config.yaml --output-index-file generated_label_1.csv
         # Compare result to golden copy:
         # test_files/expected/label_success_1.csv
         # test_files/expected/label_success_1.xml
@@ -415,12 +430,12 @@ def compare_files(path_to_file, golden_file):
                 LABEL_NAME + '/tester_label_1.xml',
                 '--generate-label',
                 'ancillary',
-                '--config',
+                '--config-file',
                 str(SAMPLES_DIR / 'tester_config.yaml')
             ]
         ),
 
-        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label metadata --fixed-width --output-index-file generated_label_2.csv --config ../test_files/samples/tester_config.yaml --output-index-file generated_label_2.csv
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label metadata --fixed-width --output-index-file generated_label_2.csv --config-file ../test_files/samples/tester_config.yaml --output-index-file generated_label_2.csv
         # Compare result to golden copy:
         # test_files/expected/label_success_2.csv
         # test_files/expected/label_success_2.xml
@@ -433,12 +448,12 @@ def compare_files(path_to_file, golden_file):
                 '--generate-label',
                 'metadata',
                 '--fixed-width',
-                '--config',
+                '--config-file',
                 str(SAMPLES_DIR / 'tester_config.yaml')
             ]
         ),
 
-        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename,filepath,lid,bundle,bundle_lid --generate-label ancillary --config ../test_files/samples/tester_config.yaml --output-index-file generated_label_3.csv
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename,filepath,lid,bundle,bundle_lid --generate-label ancillary --config-file ../test_files/samples/tester_config.yaml --output-index-file generated_label_3.csv
         # Compare result to golden copy:
         # test_files/expected/label_success_3.csv
         # test_files/expected/label_success_3.xml
@@ -458,7 +473,7 @@ def compare_files(path_to_file, golden_file):
                 'filename',
                 '--generate-label',
                 'ancillary',
-                '--config',
+                '--config-file',
                 str(SAMPLES_DIR / 'tester_config.yaml')
             ]
         )

From 6639527d8daa8f87d2a14a7cedec36f841907f76 Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Thu, 17 Oct 2024 15:04:32 -0700
Subject: [PATCH 19/24] Making changes according to pull request

---
 docs/pds4_create_xml_index.rst                |  65 ++---
 pds4indextools/index_label_template_pds.xml   |  32 ++-
 pds4indextools/pds4_create_xml_index.py       | 232 ++++++++----------
 .../cleaned_headers_label_success.csv         |   3 +
 .../cleaned_headers_label_success.xml         | 226 +++++++++++++++++
 test_files/expected/label_success_1.xml       |   2 -
 test_files/expected/label_success_2.xml       |   2 -
 test_files/expected/label_success_3.xml       |   2 -
 test_files/expected/nested_label_success.txt  |  25 ++
 test_files/labels/nested_label.xml            |  74 ++++++
 test_files/labels/rf-tester-label_1.xml       |  66 +++++
 test_files/labels/rf-tester-label_2.xml       |  50 ++++
 test_files/samples/tester_config_label.yaml   |  28 ++-
 tests/test_pds4_create_xml_index_blackbox.py  |  34 ++-
 tests/test_pds4_create_xml_index_whitebox.py  |  22 --
 15 files changed, 663 insertions(+), 200 deletions(-)
 create mode 100644 test_files/expected/cleaned_headers_label_success.csv
 create mode 100644 test_files/expected/cleaned_headers_label_success.xml
 create mode 100644 test_files/expected/nested_label_success.txt
 create mode 100644 test_files/labels/nested_label.xml
 create mode 100644 test_files/labels/rf-tester-label_1.xml
 create mode 100644 test_files/labels/rf-tester-label_2.xml

diff --git a/docs/pds4_create_xml_index.rst b/docs/pds4_create_xml_index.rst
index 3de7fc6..2474ffd 100644
--- a/docs/pds4_create_xml_index.rst
+++ b/docs/pds4_create_xml_index.rst
@@ -145,9 +145,10 @@ Limiting results
 
 - ``--limit-xpaths-file XPATHS_FILEPATH``: Specify a text file containing a list of
   specific XPaths to extract from the label files. If this argument is not specified, all
-  elements found in the label files will be included. The given text file can specify
-  XPaths using ``glob``-style syntax, where each XPath level is treated as if it were a
-  directory in a filesystem. Available wildcards are:
+  elements found in the label files will be included. This command uses only the whole
+  versions of the XPath(s) -- simplified versions are not allowed. The given text file
+  can specify XPaths using ``glob``-style syntax, where each XPath level is treated as if
+  it were a directory in a filesystem. Available wildcards are:
 
   - ``?`` matches any single character within an XPath level
   - ``*`` matches any series of characters within an XPath level
@@ -302,6 +303,8 @@ Below is the ``label-contents`` section of the default configuration file::
     External_Reference:
     Source_Product_Internal:
     Source_Product_External:
+    File_Area_Ancillary:
+    File_Area_Metadata:
 
 Each listed value with an empty dictionary is an optional field the user can include in
 their generated label. If the user does decide to include one of these fields, **they must
@@ -311,41 +314,41 @@ element will remain empty**.
 For reference, provided below are the full contents of the optional label classes::
 
   Citation_Information:
-    author_list
-    editor_list
-    publication_year
-    doi
-    keyword
-    description
+    author_list:
+    editor_list:
+    publication_year:
+    doi:
+    keyword:
+    description:
     Funding_Acknowledgement:
-      funding_source
-      funding_year
-      funding_award
-      funding_acknowledgement_text
+      funding_source:
+      funding_year:
+      funding_award:
+      funding_acknowledgement_text:
   Modification_Detail:
-    modification_date
-    version_id
-    description
+    modification_date:
+    version_id:
+    description:
   Internal_Reference:
-    lid_reference
-    reference_type
-    comment
+    lid_reference:
+    reference_type:
+    comment:
   External_Reference:
-    doi
-    reference_text
-    description
+    doi:
+    reference_text:
+    description:
   Source_Product_Internal:
-    lidvid_reference
-    reference_type
-    comment
+    lidvid_reference:
+    reference_type:
+    comment:
   Source_Product_External:
-    external_source_product_identifier
-    reference_type
-    doi
-    curating_facility
-    description
+    external_source_product_identifier:
+    reference_type:
+    doi:
+    curating_facility:
+    description:
   File_Area_Ancillary / File_Area_Metadata:
-    creation_date_time
+    creation_date_time:
 
 
 If no new contents are specified for label generation, the label will contain the
diff --git a/pds4indextools/index_label_template_pds.xml b/pds4indextools/index_label_template_pds.xml
index 5f7aca3..8b40748 100644
--- a/pds4indextools/index_label_template_pds.xml
+++ b/pds4indextools/index_label_template_pds.xml
@@ -20,25 +20,35 @@ $END_IF
         <product_class>Product_Ancillary</product_class>
     $IF(Citation_Information)
         <Citation_Information>
+        $IF(Citation_Information['author_list'] and isinstance(Citation_Information['author_list'], list))
+        $FOR(Citation_Information['author_list'])
+            <author_list>$VALUE$</author_list>
+        $END_FOR
+        $ELSE_IF(Citation_Information['author_list'] and not isinstance(Citation_Information['author_list'], list))
             <author_list>$Citation_Information['author_list']$</author_list>
+        $END_IF
             <editor_list>$Citation_Information['editor_list']$</editor_list>
             <publication_year>$Citation_Information['publication_year']$</publication_year>
             <doi>$Citation_Information['doi']$</doi>
+        $IF(Citation_Information['keyword'] and isinstance(Citation_Information['keyword'], list))
         $FOR(Citation_Information['keyword'])
             <keyword>$VALUE$</keyword>
         $END_FOR
+        $ELSE_IF(Citation_Information['keyword'] and not isinstance(Citation_Information['keyword'], list))
+            <keyword>$Citation_Information['keyword']$</keyword>
+        $END_IF
             <description>$Citation_Information['description']$</description>
-        $IF(Citation_Information.get('Funding_Acknowledgement'))
+        $IF(Citation_Information['Funding_Acknowledgement'])
             <Funding_Acknowledgement>
-                <funding_source>$Funding_Acknowledgement['funding_source']$</funding_source>
-                <funding_year>$Funding_Acknowledgement['funding_year']$</funding_year>
-                <funding_award>$Funding_Acknowledgement['funding_award']$</funding_award>
-                <funding_acknowledgement_text>$Funding_Acknowledgement['funding_acknowledgement_text']$</funding_acknowledgement_text>
+                <funding_source>$Citation_Information['Funding_Acknowledgement']['funding_source']$</funding_source>
+                <funding_year>$Citation_Information['Funding_Acknowledgement']['funding_year']$</funding_year>
+                <funding_award>$Citation_Information['Funding_Acknowledgement']['funding_award']$</funding_award>
+                <funding_acknowledgement_text>$Citation_Information['Funding_Acknowledgement']['funding_acknowledgement_text']$</funding_acknowledgement_text>
             </Funding_Acknowledgement>
         $END_IF
         </Citation_Information>
     $END_IF
-    $IF(Modification_Detail)
+    $IF(Modification_Detail and isinstance(Modification_Detail, list))
         <Modification_History>
             $FOR(field, k=Modification_Detail)
             <Modification_Detail>
@@ -48,6 +58,14 @@ $END_IF
             </Modification_Detail>
             $END_FOR
         </Modification_History>
+    $ELSE_IF(Modification_Detail)
+        <Modification_History>
+            <Modification_Detail>
+                <modification_date>$Modification_Detail['modification_date']$</modification_date>
+                <version_id>$Modification_Detail['version_id']$</version_id>
+                <description>$Modification_Detail['description']$</description>
+            </Modification_Detail>
+        </Modification_History>
     $END_IF
         <License_Information>
             <name>Creative Common Public License CC0 1.0 (2024)</name>
@@ -58,6 +76,7 @@ $END_IF
             </Internal_Reference>
         </License_Information>
     </Identification_Area>
+    $IF(Internal_Reference or External_Reference or Source_Product_Internal or Source_Product_External)
     <Reference_List>
         $IF(Internal_Reference)
         $FOR(field, k=Internal_Reference)
@@ -98,6 +117,7 @@ $END_IF
         $END_FOR
         $END_IF
     </Reference_List>
+    $END_IF
     $IF(Product_Ancillary)
     <File_Area_Ancillary>
     $ELSE
diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index 9bf6439..c7d72da 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -11,11 +11,11 @@
 
 import argparse
 from collections import namedtuple
+from collections import Counter
 import csv
 from datetime import datetime
 import fnmatch
 import functools
-import itertools
 from itertools import groupby
 from lxml import etree
 import os
@@ -68,82 +68,53 @@ def convert_header_to_xpath(root, xml_header_path, namespaces):
         'pds:Product_Observational/pds:Identification_Area[1]/pds:version_id[2]'
     """
     sections = xml_header_path.split('/')
+    prefixes = namespaces.keys()
     xpath_final = ''
     portion = ''
     for sec in sections[1:]:
+        # portion = portion + section
         portion = f'{portion}/{sec}'
+        # grab the tag of that portion.
         tag = str(root.xpath(portion, namespaces=namespaces)[0].tag)
+        # if the section starts with '*', it's everything after the '*'
         if sec.startswith('*'):
             sec = sec[1:]
-        if ':' in sec:
-            sec = ''
+        # if sec starts with :, make it blank
+        elif any(f'{prefix}:' in sec for prefix in prefixes):
+            predicate = sec.split('[')[-1]
+            if predicate[0].isdigit():
+                sec = f'[{sec.split('[')[-1]}'
+            else:
+                sec = ''
+        # xpath_final is the current path, then the tag, then section/
         xpath_final = f'{xpath_final}/{tag}{sec}'
 
     return xpath_final
 
 
-def correct_duplicates(label_results):
+def clean_headers(df):
     """
-    Correct numbering of XPaths to have correct predicates.
-
-    Some namespaces do not contain predicates, and as a result, must be made artificially
-    unique via injected substrings. This function aids in the reformatting of these
-    strings so they match the syntax of the renumbering function. Note that this function
-    does not affect elements or attributes that natively contain the '_num' substring
-    (e.g., cassini:filter_name_1 and cassini:filter_name_2).
+    Clean the headers of a DataFrame by replacing certain characters with safer
+    alternatives and return a mapping of new to old headers.
 
     Parameters:
-        label_results (dict): The dictionary of XML results. This argument will be
-            mutated by the function.
+        df (pandas.DataFrame): The DataFrame whose headers need to be cleaned.
 
-    Example:
-            # XPaths in label_results shortened for readability
-        >>> keys = list(label_result)
-        >>> keys = [
-                ../geom:SPICE_Kernel_Identification<1>/geom:kernel_type<1>,
-                ../geom:SPICE_Kernel_Identification<1>/geom:kernel_type_1<1>,
-                ../geom:SPICE_Kernel_Identification<1>/geom:kernel_type_2<1>,
-                ../geom:SPICE_Kernel_Identification<1>/geom:kernel_type_3<1>,
-                ../geom:SPICE_Kernel_Identification<1>/geom:kernel_type_4<1>
-                ]
-        >>> correct_duplicate(label_results)
-        >>> keys = list(label_result)
-        >>> keys = [
-                ../geom:SPICE_Kernel_Identification<1>/geom:kernel_type<1>,
-                ../geom:SPICE_Kernel_Identification<2>/geom:kernel_type<1>,
-                ../geom:SPICE_Kernel_Identification<3>/geom:kernel_type<1>,
-                ../geom:SPICE_Kernel_Identification<4>/geom:kernel_type<1>,
-                ../geom:SPICE_Kernel_Identification<5>/geom:kernel_type<1>
-                ]
+    Returns:
+        dict: A dictionary mapping new headers to old headers.
     """
-    element_names = set()
-    for key in list(label_results):
-        tag = key.split('/')[-1].split('<')[0]
-        number = tag.split('_')[-1]
-        if number.isdigit():
-            cropped = tag.replace(f'_{number}', '')
-            if cropped in element_names:
-                if str(f'{cropped}_{number}<1>') in key:
-                    key_new = key.replace((f'{cropped}_{number}<1>'), f'{cropped}<1>')
-                else:
-                    key_new = key.replace(f'{cropped}_{number}', f'{cropped}<1>')
-                parent = key_new.split('/')[-2].split('<')[0]
-                key_new = key_new.replace(f'{parent}<1>',
-                                          f'{parent}<{str(int(number)+1)}>')
-                label_results[key_new] = label_results.pop(key)
-        element_names.add(tag)
+    # Create a mapping of old to new headers
+    header_map = {col: col.replace(':', '_')
+                          .replace('/', '__')
+                          .replace('<', '_')
+                          .replace('>', '') for col in df.columns}
 
+    # Update the DataFrame's headers
+    df.rename(columns=header_map, inplace=True)
 
-def clean_headers(df):
-    """
-    Clean the headers of a DataFrame by replacing certain characters with safer
-    alternatives.
+    header_map = {v: k for k, v in list(header_map.items())}
 
-    Parameters:
-        df (pandas.DataFrame): The DataFrame whose headers need to be cleaned.
-    """
-    return df.rename(columns=lambda x: x.replace(
-            ':', '_').replace('/', '__').replace('<', '_').replace('>', ''), inplace=True)
+    return header_map
 
 
 def default_value_for_nil(config, data_type, nil_value):
@@ -429,21 +400,11 @@ def process_headers(label_results, key, root, namespaces, prefixes):
         prefixes (dict): A dictionary containing XML namespace prefixes.
     """
     key_new = convert_header_to_xpath(root, key, namespaces)
-
     # Replace namespaces with prefixes
     for namespace in prefixes:
         if namespace in key_new:
             key_new = key_new.replace('{' + namespace + '}', prefixes[namespace] + ':')
 
-    # Check if key_new already exists in label_results, append suffix if necessary
-    if key_new in label_results:
-        suffix_gen = itertools.count(start=1, step=1)
-        while True:
-            trial_key = f"{key_new}_{next(suffix_gen)}"
-            if trial_key not in label_results:
-                key_new = trial_key
-                break
-
     label_results[key_new] = label_results.pop(key)
 
 
@@ -785,6 +746,14 @@ def pad_column_values_and_headers(df):
 
     df = pd.DataFrame(rows)
 
+    if args.simplify_xpaths:
+        original_headers = df.columns.tolist()
+        simplified_headers = simplify_xpaths(original_headers)
+        df.columns = simplified_headers
+
+    if args.clean_header_field_names:
+        clean_header_mapping = clean_headers(df)
+
     if args.sort_by:
         sort_values = str(args.sort_by).split(',')
         try:
@@ -793,9 +762,6 @@ def pad_column_values_and_headers(df):
             print(bad_sort)
             sys.exit(1)
 
-    if args.clean_header_field_names:
-        clean_headers(df)
-
     if args.fixed_width:
         padded_df = pad_column_values_and_headers(df)
         print(f'Fixed-width index file generated at {output_csv_path}')
@@ -805,6 +771,11 @@ def pad_column_values_and_headers(df):
         print(f'Index file generated at {output_csv_path}')
         df.to_csv(output_csv_path, index=False, na_rep='', lineterminator='\n')
 
+    if args.clean_header_field_names:
+        return clean_header_mapping
+    else:
+        return None
+
 
 def find_base_attribute(xsd_tree, target_name, new_namespaces):
     """
@@ -1130,6 +1101,48 @@ def generate_unique_filename(base_name):
     return new_filename
 
 
+def simplify_xpaths(headers):
+    """
+    Simplifies a list of XPath headers by shortening each header to its tag and
+    namespace prefix, provided the tag is unique.
+
+    This function processes a list of XPath-like strings (headers) and attempts to
+    simplify them to their last tag component. If a tag is unique within the list,
+    it replaces the full XPath header with the tag. If the tag is not unique
+    (i.e., multiple headers share the same tag), the full XPath header is retained.
+
+    Args:
+        headers (list of str): A list of strings representing XPath headers.
+
+    Returns:
+        list of str: A list of strings where unique tags have replaced their
+        corresponding full XPath headers, and non-unique tags remain unchanged.
+    """
+    # If --simplify-xpaths is used, the XPath headers will be shortened to the
+    # element's tag and namespace prefix. This is contingent on the uniqueness of
+    # the XPath header; if more than one XPath header shares a tag, a namespace and a
+    # predicate value, the XPath header will remain whole.
+    tags = []
+    matches = {}
+
+    # Step 1: Gather all possible tags from labels
+    for header in headers:
+        tag = header.split('/')[-1]
+        tags.append(tag)
+        matches[header] = tag
+
+    term_counts = Counter(tags)
+
+    for ind, header in enumerate(headers):
+        tag = header.split('/')[-1]
+        if term_counts[tag] == 1:
+            headers[ind] = tag
+        else:
+            continue
+
+    return headers
+
+
 class MultilineFormatter(argparse.HelpFormatter):
     """Class to allow multi-line help messages with argparse.
 
@@ -1218,7 +1231,8 @@ def main(cmd_line=None):
                                   metavar='XPATHS_FILEPATH',
                                   help='Optional text file specifying which XPaths to '
                                        'scrape. If not specified, all XPaths found in '
-                                       'the label files are included.')
+                                       'the label files are included. Only whole XPaths '
+                                       'can be specified.')
 
     limiting_results.add_argument('--output-headers-file', type=str,
                                   metavar='HEADERS_FILEPATH',
@@ -1267,7 +1281,6 @@ def main(cmd_line=None):
     nillable_elements_info = {}
     collected_files = set()
     all_results = []
-    tags = []
     xsd_files = []
 
     output_csv_path = None
@@ -1352,6 +1365,7 @@ def main(cmd_line=None):
         # improve readability. Each XPath's namespace is replaced with its prefix for
         # faster reference. Duplicate XPaths are made unique to ensure all results are
         # present in the final product.
+
         for key in list(label_results):
             process_headers(label_results, key, root, namespaces, prefixes)
 
@@ -1375,7 +1389,6 @@ def main(cmd_line=None):
         for key in list(label_results):
             if 'cyfunction' in key:
                 del label_results[key]
-
         # The XPath headers must be renumbered to reflect which instance of the element
         # the column refers to. At this stage, duplicate XPaths may exist again due to
         # the reformatting. These duplicates are corrected to preserve the contents of
@@ -1384,8 +1397,6 @@ def main(cmd_line=None):
         for old_xpath, new_xpath in xpath_map.items():
             label_results[new_xpath] = label_results.pop(old_xpath)
 
-        correct_duplicates(label_results)
-
         # Collect metadata about the label file. The label file's lid is scraped and
         # broken into multiple parts. This metadata can then be requested as additional
         # columns within the index file.
@@ -1426,57 +1437,14 @@ def main(cmd_line=None):
         print('No results found: glob pattern(s) excluded all matches.')
         sys.exit(1)
 
-    # If --simplify-xpaths is used, the XPath headers will be shortened to the
-    # element's tag and namespace prefix. This is contingent on the uniqueness of
-    # the XPath header; if more than one XPath header shares a tag, a namespace and a
-    # predicate value, the XPath header will remain whole.
     if args.simplify_xpaths:
-        headers = {}
-        unique_tags_master = []
-
-        # Step 1: Gather all possible tags from labels
+        original_headers = {}
         for label_results in all_results:
-            keys = label_results.keys()
-            for key in keys:
-                tag = key.split('/')[-1]
-                tags.append(tag)
-                if key not in headers:
-                    headers[key] = tag
-
-        # For each label, collect all tags that only occur once. If a unique tag occurs
-        # multiple times within a label, that tag will be removed from the collective
-        # list of unique tags.
-        for label_results in all_results:
-            tags = []
-            unique_tags = []
-            names = []
-            for key in keys:
-                tag = key.split('/')[-1]
-                tags.append(tag)
-                name = tag.split('<')[0]
-                names.append(name)
-            for tag in tags:
-                name = tag.split('<')[0]
-                if (tags.count(tag) == 1 and names.count(name) == 1
-                        and tag not in unique_tags):
-                    unique_tags.append(tag)
-
-            for tag in unique_tags:
-                unique_tags_master.append(tag)
-
-        for ind, label_results in enumerate(all_results):
-            new_label_results = {}
-            for key, value in list(label_results.items()):
-                new_key = headers[key]
-                if key.split('/')[-1] in unique_tags_master:
-                    new_label_results[new_key] = value
-                else:
-                    new_label_results[key] = value
-
-            all_results[ind] = new_label_results
+            for key in label_results.keys():
+                original_headers[key] = key.split('/')[-1]
 
     if output_csv_path:
-        write_results_to_csv(all_results, args, output_csv_path)
+        clean_header_mapping = write_results_to_csv(all_results, args, output_csv_path)
 
     # To instead receive a list of available information available within a label or set
     # of labels, you may use --output-headers-file. This will take all of the keys of
@@ -1495,6 +1463,8 @@ def main(cmd_line=None):
         # The file is now written and placed in a given location. If cleaned header
         # field names are requested, they are processed here before being written in.
         with open(output_txt_path, 'w') as output_fp:
+            if args.simplify_xpaths:
+                xpaths = simplify_xpaths(xpaths)
             for item in xpaths:
                 if args.clean_header_field_names:
                     verboseprint(
@@ -1547,8 +1517,12 @@ def main(cmd_line=None):
             # file is fixed-width or delimited.
             for header in headers:
                 whole_header = header
+                whole_header_length = len(whole_header)
                 if args.fixed_width:
                     header = header.strip()
+                if args.clean_header_field_names:
+                    full_header = header
+                    header = clean_header_mapping[header]
                 if (header in valid_add_extra_file_info and 'lid' in header):
                     true_type = 'pds:ASCII_LID'
                 elif header == 'filename':
@@ -1565,8 +1539,13 @@ def main(cmd_line=None):
 
                 true_type = true_type.split(':')[-1]
                 field_number += 1
-                header_length = len(header.encode('utf-8'))
-                header_name = header
+
+                if args.clean_header_field_names:
+                    header_length = len(full_header.encode('utf-8'))
+                    header_name = full_header
+                else:
+                    header_length = len(header.encode('utf-8'))
+                    header_name = header
 
                 maximum_field_length = maximum_field_lengths[whole_header]
                 header_info.append({'name': header_name,
@@ -1576,7 +1555,10 @@ def main(cmd_line=None):
                                     'field_length': maximum_field_length,
                                     'maximum_field_length': maximum_field_length,
                                     'offset': offset})
-                offset += header_length + jump
+                if args.fixed_width:
+                    offset += whole_header_length + jump
+                else:
+                    offset += header_length + jump
                 field_location = offset
 
         # The creation date of the index file is stored for later reference.
diff --git a/test_files/expected/cleaned_headers_label_success.csv b/test_files/expected/cleaned_headers_label_success.csv
new file mode 100644
index 0000000..efb79b3
--- /dev/null
+++ b/test_files/expected/cleaned_headers_label_success.csv
@@ -0,0 +1,3 @@
+pds_logical_identifier_1,pds_version_id_1,pds_title_1,pds_information_model_version_1,pds_author_list_1,pds_publication_year_1,pds_keyword_1,pds_keyword_2,pds_keyword_3,pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_1__geom_SPICE_Kernel_Identification_1__geom_kernel_type_1,pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_1__geom_SPICE_Kernel_Identification_1__geom_spice_kernel_file_name_1,pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_1__geom_comment_1,pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_2__geom_SPICE_Kernel_Identification_1__geom_kernel_type_1,pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_2__geom_SPICE_Kernel_Identification_1__geom_spice_kernel_file_name_1,pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_2__geom_comment_1,pds_Product_Observational__pds_Observing_System_1__pds_name_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_name_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_type_1,pds_Product_Observational__pds_Observing_System_2__pds_name_1,pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_name_1,pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_type_1,pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_lid_reference_1,pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_reference_type_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_lid_reference_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_reference_type_1
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img,1.11.0.0,"French, Richard G.",0003-01-01,kw1,kw2,kw3,SPK,ura111.bsp,These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.,SPK,earthstns_itrf93_040916.bsp,These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.,Cassini Orbiter Imaging Science Subsystem,Cassini Orbiter,Spacecraft,Another thing,Another thing,Spacecraft,urn:nasa:pds:context:instrument_host:spacecraft.co,is_instrument_host,,
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img,1.11.0.0,"French, Richard G.",0003-01-01,kw1,,,SPK,ura111.bsp,These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.,,,,Cassini Orbiter Imaging Science Subsystem,Cassini Orbiter,Spacecraft,,,,,,urn:nasa:pds:context:instrument_host:spacecraft.co,is_instrument_host
diff --git a/test_files/expected/cleaned_headers_label_success.xml b/test_files/expected/cleaned_headers_label_success.xml
new file mode 100644
index 0000000..621f7a2
--- /dev/null
+++ b/test_files/expected/cleaned_headers_label_success.xml
@@ -0,0 +1,226 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1L00.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+
+<Product_Metadata_Supplemental xmlns="http://pds.nasa.gov/pds4/pds/v1"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://pds.nasa.gov/pds4/pds/v1 https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1L00.xsd">
+    <Identification_Area>
+        <logical_identifier>urn:nasa:pds:rms_metadata:document_opus:cleaned_headers_label</logical_identifier>
+        <version_id>1.1</version_id>
+        <title>Index File</title>
+        <information_model_version>1.21.0.0</information_model_version>
+        <product_class>Product_Ancillary</product_class>
+        <License_Information>
+            <name>Creative Common Public License CC0 1.0 (2024)</name>
+            <description>Creative Commons Zero (CC0) license information.</description>
+            <Internal_Reference>
+                <lid_reference>urn:nasa:pds:system_bundle:document_pds4_standards:creative_commons_1.0.0::1.0</lid_reference>
+                <reference_type>product_to_license</reference_type>
+            </Internal_Reference>
+        </License_Information>
+    </Identification_Area>
+    <File_Area_Metadata>
+        <File>
+            <file_name>cleaned_headers_label.csv</file_name>
+            <local_identifier>index-table</local_identifier>
+            <creation_date_time>0002-02-02T00:00:00.00Z</creation_date_time>
+            <md5_checksum>24837ed11b0e8ceb94102e1f22d95b31</md5_checksum>
+            <comment></comment>
+        </File>
+        <Header>
+            <offset unit="byte">0</offset>
+            <object_length unit="byte">2183</object_length>
+            <parsing_standard_id>UTF-8 Text</parsing_standard_id>
+            <description>Provides the column headers, separated by commas, for the data table.</description>
+        </Header>
+        <Table_Delimited>
+            <offset unit="byte">0</offset>
+            <object_length unit="byte">3370</object_length>
+            <parsing_standard_id>PDS DSV 1</parsing_standard_id>
+            <records>3</records>
+            <record_delimiter>Line-Feed</record_delimiter>
+            <field_delimiter>Comma</field_delimiter>
+            <Record_Delimited>
+                <fields>25</fields>
+                <groups>0</groups>
+                <maximum_record_length unit="byte">2182</maximum_record_length>
+                    <Field_Delimited>
+                        <name>pds_logical_identifier_1</name>
+                        <field_number>1</field_number>
+                        <data_type>ASCII_LID</data_type>
+                        <maximum_field_length unit="byte">52</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_version_id_1</name>
+                        <field_number>2</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">3</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_title_1</name>
+                        <field_number>3</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">33</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_information_model_version_1</name>
+                        <field_number>4</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">8</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_author_list_1</name>
+                        <field_number>5</field_number>
+                        <data_type>UTF8_Text_Preserved</data_type>
+                        <maximum_field_length unit="byte">18</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_publication_year_1</name>
+                        <field_number>6</field_number>
+                        <data_type>ASCII_Date_YMD</data_type>
+                        <maximum_field_length unit="byte">10</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_keyword_1</name>
+                        <field_number>7</field_number>
+                        <data_type>UTF8_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">3</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_keyword_2</name>
+                        <field_number>8</field_number>
+                        <data_type>UTF8_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">3</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_keyword_3</name>
+                        <field_number>9</field_number>
+                        <data_type>UTF8_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">3</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_1__geom_SPICE_Kernel_Identification_1__geom_kernel_type_1</name>
+                        <field_number>10</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">3</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_1__geom_SPICE_Kernel_Identification_1__geom_spice_kernel_file_name_1</name>
+                        <field_number>11</field_number>
+                        <data_type>ASCII_File_Name</data_type>
+                        <maximum_field_length unit="byte">10</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_1__geom_comment_1</name>
+                        <field_number>12</field_number>
+                        <data_type>ASCII_Text_Preserved</data_type>
+                        <maximum_field_length unit="byte">171</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_2__geom_SPICE_Kernel_Identification_1__geom_kernel_type_1</name>
+                        <field_number>13</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">3</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_2__geom_SPICE_Kernel_Identification_1__geom_spice_kernel_file_name_1</name>
+                        <field_number>14</field_number>
+                        <data_type>ASCII_File_Name</data_type>
+                        <maximum_field_length unit="byte">27</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_2__geom_comment_1</name>
+                        <field_number>15</field_number>
+                        <data_type>ASCII_Text_Preserved</data_type>
+                        <maximum_field_length unit="byte">171</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observing_System_1__pds_name_1</name>
+                        <field_number>16</field_number>
+                        <data_type>UTF8_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">41</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_name_1</name>
+                        <field_number>17</field_number>
+                        <data_type>UTF8_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">15</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_type_1</name>
+                        <field_number>18</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">10</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observing_System_2__pds_name_1</name>
+                        <field_number>19</field_number>
+                        <data_type>UTF8_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">13</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_name_1</name>
+                        <field_number>20</field_number>
+                        <data_type>UTF8_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">13</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_type_1</name>
+                        <field_number>21</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">10</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_lid_reference_1</name>
+                        <field_number>22</field_number>
+                        <data_type>ASCII_LID</data_type>
+                        <maximum_field_length unit="byte">50</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_reference_type_1</name>
+                        <field_number>23</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">18</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_lid_reference_1</name>
+                        <field_number>24</field_number>
+                        <data_type>ASCII_LID</data_type>
+                        <maximum_field_length unit="byte">50</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+                    <Field_Delimited>
+                        <name>pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_reference_type_1</name>
+                        <field_number>25</field_number>
+                        <data_type>ASCII_Short_String_Collapsed</data_type>
+                        <maximum_field_length unit="byte">18</maximum_field_length>
+                        <Special_Constants></Special_Constants>
+                    </Field_Delimited>
+            </Record_Delimited>
+        </Table_Delimited>
+    </File_Area_Metadata>
+</Product_Metadata_Supplemental>
diff --git a/test_files/expected/label_success_1.xml b/test_files/expected/label_success_1.xml
index aade34b..8d53f30 100644
--- a/test_files/expected/label_success_1.xml
+++ b/test_files/expected/label_success_1.xml
@@ -20,8 +20,6 @@
             </Internal_Reference>
         </License_Information>
     </Identification_Area>
-    <Reference_List>
-    </Reference_List>
     <File_Area_Ancillary>
         <File>
             <file_name>generated_label_1.csv</file_name>
diff --git a/test_files/expected/label_success_2.xml b/test_files/expected/label_success_2.xml
index 23a5758..ea0070f 100644
--- a/test_files/expected/label_success_2.xml
+++ b/test_files/expected/label_success_2.xml
@@ -20,8 +20,6 @@
             </Internal_Reference>
         </License_Information>
     </Identification_Area>
-    <Reference_List>
-    </Reference_List>
     <File_Area_Metadata>
         <File>
             <file_name>generated_label_2.csv</file_name>
diff --git a/test_files/expected/label_success_3.xml b/test_files/expected/label_success_3.xml
index 2e6127e..6bb4f39 100644
--- a/test_files/expected/label_success_3.xml
+++ b/test_files/expected/label_success_3.xml
@@ -20,8 +20,6 @@
             </Internal_Reference>
         </License_Information>
     </Identification_Area>
-    <Reference_List>
-    </Reference_List>
     <File_Area_Ancillary>
         <File>
             <file_name>generated_label_3.csv</file_name>
diff --git a/test_files/expected/nested_label_success.txt b/test_files/expected/nested_label_success.txt
new file mode 100644
index 0000000..b0f1733
--- /dev/null
+++ b/test_files/expected/nested_label_success.txt
@@ -0,0 +1,25 @@
+pds:logical_identifier<1>
+pds:version_id<1>
+pds:title<1>
+pds:information_model_version<1>
+pds:author_list<1>
+pds:publication_year<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<1>/geom:kernel_type<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<1>/geom:spice_kernel_file_name<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<2>/geom:kernel_type<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<2>/geom:spice_kernel_file_name<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<3>/geom:kernel_type<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<3>/geom:spice_kernel_file_name<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<4>/geom:kernel_type<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<4>/geom:spice_kernel_file_name<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<2>/geom:kernel_type<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<2>/geom:spice_kernel_file_name<1>
+geom:comment<1>
+pds:Product_Observational/pds:Observing_System<1>/pds:name<1>
+pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>
+pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:type<1>
+pds:Product_Observational/pds:Observing_System<2>/pds:name<1>
+pds:Product_Observational/pds:Observing_System<2>/pds:Observing_System_Component<1>/pds:name<1>
+pds:Product_Observational/pds:Observing_System<2>/pds:Observing_System_Component<1>/pds:type<1>
+pds:lid_reference<1>
+pds:reference_type<1>
diff --git a/test_files/labels/nested_label.xml b/test_files/labels/nested_label.xml
new file mode 100644
index 0000000..d6127be
--- /dev/null
+++ b/test_files/labels/nested_label.xml
@@ -0,0 +1,74 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1B00.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<?xml-model href="https://pds.nasa.gov/pds4/disp/v1/PDS4_DISP_1B00.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<?xml-model href="https://pds.nasa.gov/pds4/mission/cassini/v1/PDS4_CASSINI_1B00_1300.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<Product_Observational xmlns="http://pds.nasa.gov/pds4/pds/v1"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:geom="http://pds.nasa.gov/pds4/geom/v1"
+ xmlns:rings="http://pds.nasa.gov/pds4/rings/v1"
+ xsi:schemaLocation="http://pds.nasa.gov/pds4/pds/v1 https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1E00.xsd
+                     http://pds.nasa.gov/pds4/geom/v1 https://pds.nasa.gov/pds4/geom/v1/PDS4_GEOM_1B10_1700.xsd
+                     http://pds.nasa.gov/pds4/rings/v1 https://pds.nasa.gov/pds4/rings/v1/PDS4_RINGS_1E00_1A00.xsd">
+    <Identification_Area>
+        <logical_identifier>urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n</logical_identifier>
+        <version_id>1.0</version_id>
+        <title>Cassini ISS Image 1455200455n.img</title>
+        <information_model_version>1.11.0.0</information_model_version>
+        <Citation_Information>
+            <author_list>French, Richard G.</author_list>
+            <publication_year nilReason="unknown" xsi:nil="true"/>
+        </Citation_Information>
+    </Identification_Area>
+    <Observation_Area>
+        <Discipline_Area>
+            <geom:Geometry>
+                <geom:SPICE_Kernel_Files>
+                    <geom:SPICE_Kernel_Identification>
+                        <geom:SPICE_Kernel_Identification_Extra>
+                            <geom:kernel_type>SPK</geom:kernel_type>
+                            <geom:spice_kernel_file_name>ura111.bsp</geom:spice_kernel_file_name>
+                        </geom:SPICE_Kernel_Identification_Extra>
+                        <geom:SPICE_Kernel_Identification_Extra>
+                            <geom:kernel_type>SPK</geom:kernel_type>
+                            <geom:spice_kernel_file_name>vgr2.ura111.bsp</geom:spice_kernel_file_name>
+                        </geom:SPICE_Kernel_Identification_Extra>
+                        <geom:SPICE_Kernel_Identification_Extra>
+                            <geom:kernel_type>BPC</geom:kernel_type>
+                            <geom:spice_kernel_file_name>earth_720101_031229.bpc</geom:spice_kernel_file_name>
+                        </geom:SPICE_Kernel_Identification_Extra>
+                        <geom:SPICE_Kernel_Identification_Extra>
+                            <geom:kernel_type>LSK</geom:kernel_type>
+                            <geom:spice_kernel_file_name>naif0012.tls</geom:spice_kernel_file_name>
+                        </geom:SPICE_Kernel_Identification_Extra>
+                    </geom:SPICE_Kernel_Identification>
+                    <geom:SPICE_Kernel_Identification>
+                            <geom:kernel_type>SPK</geom:kernel_type>
+                            <geom:spice_kernel_file_name>earthstns_itrf93_040916.bsp</geom:spice_kernel_file_name>
+                    </geom:SPICE_Kernel_Identification>
+                    <geom:comment>These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.</geom:comment>
+                </geom:SPICE_Kernel_Files>
+            </geom:Geometry>
+        </Discipline_Area>
+    </Observation_Area>
+    <Observing_System>
+        <name>Cassini Orbiter Imaging Science Subsystem</name>
+        <Observing_System_Component>
+            <name>Cassini Orbiter</name>
+            <type>Spacecraft</type>
+        </Observing_System_Component>
+    </Observing_System>
+    <Observing_System>
+        <name>Another thing</name>
+        <Observing_System_Component>
+            <name>Another thing</name>
+            <type>Spacecraft</type>
+            <Internal_Reference>
+                <lid_reference>urn:nasa:pds:context:instrument_host:spacecraft.co</lid_reference>
+                <reference_type>is_instrument_host</reference_type>
+            </Internal_Reference>
+        </Observing_System_Component>
+    </Observing_System>
+</Product_Observational>
\ No newline at end of file
diff --git a/test_files/labels/rf-tester-label_1.xml b/test_files/labels/rf-tester-label_1.xml
new file mode 100644
index 0000000..3f127bc
--- /dev/null
+++ b/test_files/labels/rf-tester-label_1.xml
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1B00.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<?xml-model href="https://pds.nasa.gov/pds4/disp/v1/PDS4_DISP_1B00.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<?xml-model href="https://pds.nasa.gov/pds4/mission/cassini/v1/PDS4_CASSINI_1B00_1300.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<Product_Observational xmlns="http://pds.nasa.gov/pds4/pds/v1"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:geom="http://pds.nasa.gov/pds4/geom/v1"
+ xmlns:rings="http://pds.nasa.gov/pds4/rings/v1"
+ xsi:schemaLocation="http://pds.nasa.gov/pds4/pds/v1 https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1E00.xsd
+                     http://pds.nasa.gov/pds4/geom/v1 https://pds.nasa.gov/pds4/geom/v1/PDS4_GEOM_1B10_1700.xsd
+                     http://pds.nasa.gov/pds4/rings/v1 https://pds.nasa.gov/pds4/rings/v1/PDS4_RINGS_1E00_1A00.xsd">
+    <Identification_Area>
+        <logical_identifier>urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n</logical_identifier>
+        <version_id>1.0</version_id>
+        <title>Cassini ISS Image 1455200455n.img</title>
+        <information_model_version>1.11.0.0</information_model_version>
+        <Citation_Information>
+            <author_list>French, Richard G.</author_list>
+            <publication_year nilReason="unknown" xsi:nil="true"/>
+            <keyword>kw1</keyword>
+            <keyword>kw2</keyword>
+            <keyword>kw3</keyword>
+        </Citation_Information>
+    </Identification_Area>
+    <Observation_Area>
+        <Discipline_Area>
+            <geom:Geometry>
+                <geom:SPICE_Kernel_Files>
+                    <geom:SPICE_Kernel_Identification>
+                        <geom:kernel_type>SPK</geom:kernel_type>
+                        <geom:spice_kernel_file_name>ura111.bsp</geom:spice_kernel_file_name>
+                    </geom:SPICE_Kernel_Identification>
+                    <geom:comment>These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.</geom:comment>
+                </geom:SPICE_Kernel_Files>
+                <geom:SPICE_Kernel_Files>
+                    <geom:SPICE_Kernel_Identification>
+                        <geom:kernel_type>SPK</geom:kernel_type>
+                        <geom:spice_kernel_file_name>earthstns_itrf93_040916.bsp</geom:spice_kernel_file_name>
+                    </geom:SPICE_Kernel_Identification>
+                    <geom:comment>These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.</geom:comment>
+                </geom:SPICE_Kernel_Files>
+            </geom:Geometry>
+        </Discipline_Area>
+    </Observation_Area>
+    <Observing_System>
+        <name>Cassini Orbiter Imaging Science Subsystem</name>
+        <Observing_System_Component>
+            <name>Cassini Orbiter</name>
+            <type>Spacecraft</type>
+        </Observing_System_Component>
+    </Observing_System>
+    <Observing_System>
+        <name>Another thing</name>
+        <Observing_System_Component>
+            <name>Another thing</name>
+            <type>Spacecraft</type>
+            <Internal_Reference>
+                <lid_reference>urn:nasa:pds:context:instrument_host:spacecraft.co</lid_reference>
+                <reference_type>is_instrument_host</reference_type>
+            </Internal_Reference>
+        </Observing_System_Component>
+    </Observing_System>
+</Product_Observational>
\ No newline at end of file
diff --git a/test_files/labels/rf-tester-label_2.xml b/test_files/labels/rf-tester-label_2.xml
new file mode 100644
index 0000000..8d62bbc
--- /dev/null
+++ b/test_files/labels/rf-tester-label_2.xml
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1B00.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<?xml-model href="https://pds.nasa.gov/pds4/disp/v1/PDS4_DISP_1B00.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<?xml-model href="https://pds.nasa.gov/pds4/mission/cassini/v1/PDS4_CASSINI_1B00_1300.sch"
+    schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<Product_Observational xmlns="http://pds.nasa.gov/pds4/pds/v1"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:geom="http://pds.nasa.gov/pds4/geom/v1"
+ xmlns:rings="http://pds.nasa.gov/pds4/rings/v1"
+ xsi:schemaLocation="http://pds.nasa.gov/pds4/pds/v1 https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1E00.xsd
+                     http://pds.nasa.gov/pds4/geom/v1 https://pds.nasa.gov/pds4/geom/v1/PDS4_GEOM_1B10_1700.xsd
+                     http://pds.nasa.gov/pds4/rings/v1 https://pds.nasa.gov/pds4/rings/v1/PDS4_RINGS_1E00_1A00.xsd">
+    <Identification_Area>
+        <logical_identifier>urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n</logical_identifier>
+        <version_id>1.0</version_id>
+        <title>Cassini ISS Image 1455200455n.img</title>
+        <information_model_version>1.11.0.0</information_model_version>
+        <Citation_Information>
+            <author_list>French, Richard G.</author_list>
+            <publication_year nilReason="unknown" xsi:nil="true"/>
+            <keyword>kw1</keyword>
+        </Citation_Information>
+    </Identification_Area>
+    <Observation_Area>
+        <Discipline_Area>
+            <geom:Geometry>
+                <geom:SPICE_Kernel_Files>
+                    <geom:SPICE_Kernel_Identification>
+                        <geom:kernel_type>SPK</geom:kernel_type>
+                        <geom:spice_kernel_file_name>ura111.bsp</geom:spice_kernel_file_name>
+                    </geom:SPICE_Kernel_Identification>
+                    <geom:comment>These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.</geom:comment>
+                </geom:SPICE_Kernel_Files>
+            </geom:Geometry>
+        </Discipline_Area>
+    </Observation_Area>
+    <Observing_System>
+        <name>Cassini Orbiter Imaging Science Subsystem</name>
+        <Observing_System_Component>
+            <name>Cassini Orbiter</name>
+            <type>Spacecraft</type>
+            <Internal_Reference>
+                <lid_reference>urn:nasa:pds:context:instrument_host:spacecraft.co</lid_reference>
+                <reference_type>is_instrument_host</reference_type>
+            </Internal_Reference>
+        </Observing_System_Component>
+    </Observing_System>
+</Product_Observational>
\ No newline at end of file
diff --git a/test_files/samples/tester_config_label.yaml b/test_files/samples/tester_config_label.yaml
index ada75dc..64b0d33 100644
--- a/test_files/samples/tester_config_label.yaml
+++ b/test_files/samples/tester_config_label.yaml
@@ -1,13 +1,23 @@
 
 label-contents:
   title: Index file for my occultation bundle
+  Citation_Information:
+    author_list:
+      Emilie Simpson,
+      Robert French,
+      Mia Mace
+    editor_list:
+    publication_year: 2024
+    doi:
+    keyword: [stellar, uranus, rings]
+    description:
+    Funding_Acknowledgement:
   Modification_Detail:
-    - modification_date: '2024-01-01'
-      version_id: 1.1
-      description: |
-        This is a lengthy description of what this modification
-        changed in the bundle.
-        There were lots of changes.
-    - modification_date: '2023-01-01'
-      version_id: 1.0
-      description: Initial release.
+  - modification_date: '2024-01-01'
+    version_id: 1.1
+    description: This is a lengthy description of what this modification
+                 changed in the bundle.
+                 There were lots of changes.
+  - modification_date: '2023-01-01'
+    version_id: '1.0'
+    description: Initial release.
diff --git a/tests/test_pds4_create_xml_index_blackbox.py b/tests/test_pds4_create_xml_index_blackbox.py
index 0c0b395..84ab987 100644
--- a/tests/test_pds4_create_xml_index_blackbox.py
+++ b/tests/test_pds4_create_xml_index_blackbox.py
@@ -418,6 +418,19 @@ def compare_files(path_to_file, golden_file):
             ]
         ),
 
+        # Executable command: python pds4indextools/pds4_create_xml_index.py ../test_files/labels "nested_label.xml" --output-headers-file headers_nested.txt --simplify-xpaths
+        # Compare result to golden copy:
+        # test_files/expected/nested_label_success.txt
+        (
+            str(EXPECTED_DIR / 'nested_label_success.txt'),
+            None, 'nested_label.txt',
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/nested_label.xml',
+                '--simplify-xpaths',
+            ]
+        ),
+
         # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary --config-file ../test_files/samples/tester_config.yaml --output-index-file generated_label_1.csv
         # Compare result to golden copy:
         # test_files/expected/label_success_1.csv
@@ -476,7 +489,26 @@ def compare_files(path_to_file, golden_file):
                 '--config-file',
                 str(SAMPLES_DIR / 'tester_config.yaml')
             ]
-        )
+        ),
+
+        # Executable command: pds4_create_xml_index ../test_files/labels "rf-tester-label_*.xml" --generate-label metadata --config-file ../test_files/samples/tester_config.yaml --output-index-file cleaned_headers_label.csv --clean-header-field-names
+        # Compare result to golden copy:
+        # test_files/expected/cleaned_headers_label_success.csv
+        # test_files/expected/cleaned_headers_label_success.xml
+        (
+            str(EXPECTED_DIR / 'cleaned_headers_label_success.csv'),
+            'cleaned_headers_label.csv', None,
+            [
+                str(TEST_FILES_DIR),
+                LABEL_NAME + '/rf-tester-label_*.xml',
+                '--generate-label',
+                'metadata',
+                '--config-file',
+                str(SAMPLES_DIR / 'tester_config.yaml'),
+                '--clean-header-field-names',
+                '--simplify-xpaths'
+            ]
+        ),
     ]
 )
 def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
diff --git a/tests/test_pds4_create_xml_index_whitebox.py b/tests/test_pds4_create_xml_index_whitebox.py
index 62a8c83..814f120 100644
--- a/tests/test_pds4_create_xml_index_whitebox.py
+++ b/tests/test_pds4_create_xml_index_whitebox.py
@@ -286,28 +286,6 @@ def test_get_creation_date(create_temp_file, platform_name):
         assert datetime.fromisoformat(creation_date)
 
 
-def test_correct_duplicates():
-    label_results = {
-        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name<1>': 1,
-        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name_1<1>': 2,
-        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name_2<1>': 3,
-        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name_3<1>': 4,
-        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name_4<1>': 5,
-        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name_5': 6
-        }
-
-    tools.correct_duplicates(label_results)
-
-    assert label_results == {
-        '../geom:SPICE_Kernel_Identification<1>/geom:spice_kernel_file_name<1>': 1,
-        '../geom:SPICE_Kernel_Identification<2>/geom:spice_kernel_file_name<1>': 2,
-        '../geom:SPICE_Kernel_Identification<3>/geom:spice_kernel_file_name<1>': 3,
-        '../geom:SPICE_Kernel_Identification<4>/geom:spice_kernel_file_name<1>': 4,
-        '../geom:SPICE_Kernel_Identification<5>/geom:spice_kernel_file_name<1>': 5,
-        '../geom:SPICE_Kernel_Identification<6>/geom:spice_kernel_file_name<1>': 6
-        }
-
-
 def test_update_nillable_elements_from_xsd_file():
     xsd_files = []
     nillable_elements_info = {}

From d4411fd95180c5e5c68c5a83c1087cb95eb9e1ca Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Thu, 17 Oct 2024 15:24:16 -0700
Subject: [PATCH 20/24] Fixing f-string format

---
 pds4indextools/pds4_create_xml_index.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index c7d72da..f9078ac 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -83,10 +83,10 @@ def convert_header_to_xpath(root, xml_header_path, namespaces):
         elif any(f'{prefix}:' in sec for prefix in prefixes):
             predicate = sec.split('[')[-1]
             if predicate[0].isdigit():
-                sec = f'[{sec.split('[')[-1]}'
+                sec = f"[{sec.split('[')[-1]}"
             else:
                 sec = ''
-        # xpath_final is the current path, then the tag, then section/
+        # xpath_final is the current path, then the tag, then section
         xpath_final = f'{xpath_final}/{tag}{sec}'
 
     return xpath_final

From 7ca45a66f432f7854f2778d9dcd1fa1f2bfd20aa Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Wed, 13 Nov 2024 15:32:36 -0800
Subject: [PATCH 21/24] Making changes according to pull request

---
 pds4indextools/index_label_template_pds.xml   |  4 +-
 pds4indextools/pds4_create_xml_index.py       | 68 +++++++++++++------
 test_files/expected/label_success_2.xml       | 16 ++---
 ...ster-label_1.xml => rf_tester_label_1.xml} |  0
 ...ster-label_2.xml => rf_tester_label_2.xml} |  0
 tests/test_pds4_create_xml_index_blackbox.py  | 28 +++++---
 6 files changed, 75 insertions(+), 41 deletions(-)
 rename test_files/labels/{rf-tester-label_1.xml => rf_tester_label_1.xml} (100%)
 rename test_files/labels/{rf-tester-label_2.xml => rf_tester_label_2.xml} (100%)

diff --git a/pds4indextools/index_label_template_pds.xml b/pds4indextools/index_label_template_pds.xml
index 8b40748..bc9142a 100644
--- a/pds4indextools/index_label_template_pds.xml
+++ b/pds4indextools/index_label_template_pds.xml
@@ -38,13 +38,15 @@ $END_IF
             <keyword>$Citation_Information['keyword']$</keyword>
         $END_IF
             <description>$Citation_Information['description']$</description>
-        $IF(Citation_Information['Funding_Acknowledgement'])
+        $IF('Funding_Acknowledgement' in Citation_Information)
+            $IF(Citation_Information['Funding_Acknowledgement'])
             <Funding_Acknowledgement>
                 <funding_source>$Citation_Information['Funding_Acknowledgement']['funding_source']$</funding_source>
                 <funding_year>$Citation_Information['Funding_Acknowledgement']['funding_year']$</funding_year>
                 <funding_award>$Citation_Information['Funding_Acknowledgement']['funding_award']$</funding_award>
                 <funding_acknowledgement_text>$Citation_Information['Funding_Acknowledgement']['funding_acknowledgement_text']$</funding_acknowledgement_text>
             </Funding_Acknowledgement>
+            $END_IF
         $END_IF
         </Citation_Information>
     $END_IF
diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index f9078ac..bd8fa00 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -104,10 +104,7 @@ def clean_headers(df):
         dict: A dictionary mapping new headers to old headers.
     """
     # Create a mapping of old to new headers
-    header_map = {col: col.replace(':', '_')
-                          .replace('/', '__')
-                          .replace('<', '_')
-                          .replace('>', '') for col in df.columns}
+    header_map = {col: header_cleaner(col) for col in df.columns}
 
     # Update the DataFrame's headers
     df.rename(columns=header_map, inplace=True)
@@ -308,6 +305,24 @@ def search_type(xsd_file, tag, namespaces):
     return None
 
 
+def header_cleaner(header):
+    """
+    Clean a header string.
+
+    Parameters:
+        header (str): The header string to be cleaned.
+
+    Returns:
+        str: The cleaned header string.
+    """
+    return (
+        header.replace(':', '_')
+              .replace('/', '__')
+              .replace('<', '_')
+              .replace('>', '')
+    )
+
+
 def load_config_file(
         default_config_file=Path(__file__).resolve().parent/'default_config.yaml',
         specified_config_files=None):
@@ -746,6 +761,17 @@ def pad_column_values_and_headers(df):
 
     df = pd.DataFrame(rows)
 
+    if (
+        df.map(lambda x: isinstance(x, str) and ('"' in x or "'" in x))
+        .any()
+        .any()
+        and not args.fixed_width
+    ):
+        print("Warning: scraped contents of labels contains quotes. This is "
+              "against PDS4 data standards. Index file and subsequent label file will "
+              "not be generated.")
+        sys.exit(1)
+
     if args.simplify_xpaths:
         original_headers = df.columns.tolist()
         simplified_headers = simplify_xpaths(original_headers)
@@ -764,12 +790,15 @@ def pad_column_values_and_headers(df):
 
     if args.fixed_width:
         padded_df = pad_column_values_and_headers(df)
+
         print(f'Fixed-width index file generated at {output_csv_path}')
-        padded_df.to_csv(output_csv_path, index=False, na_rep='', lineterminator='\n')
+        padded_df.to_csv(output_csv_path, index=False, na_rep='', lineterminator='\n',
+                         quoting=csv.QUOTE_MINIMAL)
 
     else:
         print(f'Index file generated at {output_csv_path}')
-        df.to_csv(output_csv_path, index=False, na_rep='', lineterminator='\n')
+        df.to_csv(output_csv_path, index=False, na_rep='', lineterminator='\n',
+                  quoting=csv.QUOTE_MINIMAL)
 
     if args.clean_header_field_names:
         return clean_header_mapping
@@ -1107,21 +1136,19 @@ def simplify_xpaths(headers):
     namespace prefix, provided the tag is unique.
 
     This function processes a list of XPath-like strings (headers) and attempts to
-    simplify them to their last tag component. If a tag is unique within the list,
-    it replaces the full XPath header with the tag. If the tag is not unique
-    (i.e., multiple headers share the same tag), the full XPath header is retained.
+    simplify them to their last tag component. If --simplify-xpaths is used, the XPath
+    headers will be shortened to the element's tag and namespace prefix. This is
+    contingent on the uniqueness of the XPath header; if more than one XPath header
+    shares a tag, a namespace and a predicate value, the XPath header will remain whole.
 
-    Args:
+    Parameters:
         headers (list of str): A list of strings representing XPath headers.
 
     Returns:
         list of str: A list of strings where unique tags have replaced their
         corresponding full XPath headers, and non-unique tags remain unchanged.
     """
-    # If --simplify-xpaths is used, the XPath headers will be shortened to the
-    # element's tag and namespace prefix. This is contingent on the uniqueness of
-    # the XPath header; if more than one XPath header shares a tag, a namespace and a
-    # predicate value, the XPath header will remain whole.
+    #
     tags = []
     matches = {}
 
@@ -1131,14 +1158,14 @@ def simplify_xpaths(headers):
         tags.append(tag)
         matches[header] = tag
 
+    # Step 2: Count the number of instances of each tag
     term_counts = Counter(tags)
 
+    # Step 3: If a tag occurs only once, shorten it.
     for ind, header in enumerate(headers):
         tag = header.split('/')[-1]
         if term_counts[tag] == 1:
             headers[ind] = tag
-        else:
-            continue
 
     return headers
 
@@ -1300,13 +1327,13 @@ def main(cmd_line=None):
         prev_len = len(collected_files)
         collected_files.update(files)
         if len(collected_files) == prev_len:
-            print(f"No files found for pattern: {pattern}")
+            print(f'No new files found for pattern: {pattern}')
 
     verboseprint(f'{len(collected_files)} matching file(s) found')
 
     label_files = list(collected_files)
     label_files.sort()
-    if label_files == []:
+    if len(label_files) == 0:
         print(f'No files matching any patterns found in directory: {directory_path}')
         sys.exit(1)
 
@@ -1469,8 +1496,7 @@ def main(cmd_line=None):
                 if args.clean_header_field_names:
                     verboseprint(
                         '--clean-header-field-names active. Headers reformatted.')
-                    item = item.replace(
-                        ':', '_').replace('/', '__').replace('<', '_').replace('>', '')
+                    item = header_cleaner(item)
                 output_fp.write("%s\n" % item)
         print(f'XPath headers file generated at {output_txt_path}.')
 
@@ -1559,7 +1585,7 @@ def main(cmd_line=None):
                     offset += whole_header_length + jump
                 else:
                     offset += header_length + jump
-                field_location = offset
+                field_location = offset + 1
 
         # The creation date of the index file is stored for later reference.
         creation_date = get_creation_date(index_file)
diff --git a/test_files/expected/label_success_2.xml b/test_files/expected/label_success_2.xml
index ea0070f..553803e 100644
--- a/test_files/expected/label_success_2.xml
+++ b/test_files/expected/label_success_2.xml
@@ -54,56 +54,56 @@
                     <Field_Character>
                         <name>pds:Product_Observational/pds:Identification_Area&lt;1&gt;/pds:version_id&lt;1&gt;</name>
                         <field_number>2</field_number>
-                        <field_location unit="byte">79</field_location>
+                        <field_location unit="byte">80</field_location>
                         <data_type>ASCII_Short_String_Collapsed</data_type>
                         <field_length unit="byte">70</field_length>
                     </Field_Character>
                     <Field_Character>
                         <name>pds:Product_Observational/pds:Identification_Area&lt;1&gt;/pds:title&lt;1&gt;</name>
                         <field_number>3</field_number>
-                        <field_location unit="byte">150</field_location>
+                        <field_location unit="byte">151</field_location>
                         <data_type>ASCII_Short_String_Collapsed</data_type>
                         <field_length unit="byte">65</field_length>
                     </Field_Character>
                     <Field_Character>
                         <name>pds:Product_Observational/pds:Identification_Area&lt;1&gt;/pds:information_model_version&lt;1&gt;</name>
                         <field_number>4</field_number>
-                        <field_location unit="byte">216</field_location>
+                        <field_location unit="byte">217</field_location>
                         <data_type>ASCII_Short_String_Collapsed</data_type>
                         <field_length unit="byte">85</field_length>
                     </Field_Character>
                     <Field_Character>
                         <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:name&lt;1&gt;</name>
                         <field_number>5</field_number>
-                        <field_location unit="byte">302</field_location>
+                        <field_location unit="byte">303</field_location>
                         <data_type>UTF8_Short_String_Collapsed</data_type>
                         <field_length unit="byte">61</field_length>
                     </Field_Character>
                     <Field_Character>
                         <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:Observing_System_Component&lt;1&gt;/pds:name&lt;1&gt;</name>
                         <field_number>6</field_number>
-                        <field_location unit="byte">364</field_location>
+                        <field_location unit="byte">365</field_location>
                         <data_type>UTF8_Short_String_Collapsed</data_type>
                         <field_length unit="byte">95</field_length>
                     </Field_Character>
                     <Field_Character>
                         <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:Observing_System_Component&lt;1&gt;/pds:type&lt;1&gt;</name>
                         <field_number>7</field_number>
-                        <field_location unit="byte">460</field_location>
+                        <field_location unit="byte">461</field_location>
                         <data_type>ASCII_Short_String_Collapsed</data_type>
                         <field_length unit="byte">95</field_length>
                     </Field_Character>
                     <Field_Character>
                         <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:Observing_System_Component&lt;1&gt;/pds:Internal_Reference&lt;1&gt;/pds:lid_reference&lt;1&gt;</name>
                         <field_number>8</field_number>
-                        <field_location unit="byte">556</field_location>
+                        <field_location unit="byte">557</field_location>
                         <data_type>ASCII_LID</data_type>
                         <field_length unit="byte">130</field_length>
                     </Field_Character>
                     <Field_Character>
                         <name>pds:Product_Observational/pds:Observing_System&lt;1&gt;/pds:Observing_System_Component&lt;1&gt;/pds:Internal_Reference&lt;1&gt;/pds:reference_type&lt;1&gt;</name>
                         <field_number>9</field_number>
-                        <field_location unit="byte">687</field_location>
+                        <field_location unit="byte">688</field_location>
                         <data_type>ASCII_Short_String_Collapsed</data_type>
                         <field_length unit="byte">131</field_length>
                     </Field_Character>
diff --git a/test_files/labels/rf-tester-label_1.xml b/test_files/labels/rf_tester_label_1.xml
similarity index 100%
rename from test_files/labels/rf-tester-label_1.xml
rename to test_files/labels/rf_tester_label_1.xml
diff --git a/test_files/labels/rf-tester-label_2.xml b/test_files/labels/rf_tester_label_2.xml
similarity index 100%
rename from test_files/labels/rf-tester-label_2.xml
rename to test_files/labels/rf_tester_label_2.xml
diff --git a/tests/test_pds4_create_xml_index_blackbox.py b/tests/test_pds4_create_xml_index_blackbox.py
index 84ab987..e51371c 100644
--- a/tests/test_pds4_create_xml_index_blackbox.py
+++ b/tests/test_pds4_create_xml_index_blackbox.py
@@ -500,7 +500,7 @@ def compare_files(path_to_file, golden_file):
             'cleaned_headers_label.csv', None,
             [
                 str(TEST_FILES_DIR),
-                LABEL_NAME + '/rf-tester-label_*.xml',
+                LABEL_NAME + '/rf_tester_label_*.xml',
                 '--generate-label',
                 'metadata',
                 '--config-file',
@@ -522,15 +522,16 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
             # Call main() function with the simulated command line arguments
             tools.main(cmd_line)
 
-            path_to_file = ROOT_DIR / 'index.csv'
+            path_to_index_file = ROOT_DIR / 'index.csv'
 
-            compare_files(path_to_file, golden_file)
-            os.remove(path_to_file)
+            compare_files(path_to_index_file, golden_file)
+            os.remove(path_to_index_file)
 
         else:
             # THE PATH TO THE NEW FILE
             if new_file_index:
                 path_to_file = temp_dir_path / new_file_index
+                path_to_label_file = ROOT_DIR / 'index.xml'
                 cmd_line.append('--output-index-file')
                 cmd_line.append(str(path_to_file))
                 # Call main() function with the simulated command line arguments
@@ -544,6 +545,8 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
                     assert os.path.isfile(label_path)
 
                     compare_files(label_path, golden_label)
+                    if os.path.isfile(path_to_label_file):
+                        os.remove(path_to_label_file)
 
             if new_file_headers:
                 path_to_file = temp_dir_path / new_file_headers
@@ -647,13 +650,16 @@ def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
     ]
 )
 def test_failures(cmd_line):
-    # Call main() function with the simulated command line arguments
-    with pytest.raises(SystemExit) as e:
-        tools.main(cmd_line)
-    assert e.type == SystemExit
-    assert e.value.code != 0  # Check that the exit code indicates failure
-    if os.path.isfile('hdout.txt'):
-        os.remove('hdout.txt')
+    try:
+        # Call main() function with the simulated command line arguments
+        with pytest.raises(SystemExit) as e:
+            tools.main(cmd_line)
+        assert e.type == SystemExit
+        assert e.value.code != 0  # Check that the exit code indicates failure
+    finally:
+        # Ensure hdout.txt is deleted regardless of test outcome
+        if os.path.isfile('hdout.txt'):
+            os.remove('hdout.txt')
 
 
 @pytest.mark.parametrize(

From 864ee8989e954c3b85f2b54fcfaa1ded904a5b2e Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Wed, 13 Nov 2024 15:37:48 -0800
Subject: [PATCH 22/24] Using DataFrame.applymap(), not DataFrame.map()

---
 pds4indextools/pds4_create_xml_index.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index bd8fa00..a608a35 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -762,7 +762,7 @@ def pad_column_values_and_headers(df):
     df = pd.DataFrame(rows)
 
     if (
-        df.map(lambda x: isinstance(x, str) and ('"' in x or "'" in x))
+        df.applymap(lambda x: isinstance(x, str) and ('"' in x or "'" in x))
         .any()
         .any()
         and not args.fixed_width

From 5548c8928b6a97ebc1b2bd1000ec80810bcb7ffe Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Tue, 19 Nov 2024 13:41:12 -0800
Subject: [PATCH 23/24] unified column order for extra file info, removed
 Python 3.8 requirement

---
 .github/workflows/run-tests.yml              |  2 +-
 pds4indextools/pds4_create_xml_index.py      | 13 ++++++++++++-
 pyproject.toml                               |  3 +--
 tests/test_pds4_create_xml_index_blackbox.py |  2 +-
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index b6810de..7eaf9a8 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -31,7 +31,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
+        python-version: ['3.9', '3.10', '3.11', '3.12']
       fail-fast: false
     steps:
       - name: Checkout
diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index a608a35..4307496 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -762,7 +762,7 @@ def pad_column_values_and_headers(df):
     df = pd.DataFrame(rows)
 
     if (
-        df.applymap(lambda x: isinstance(x, str) and ('"' in x or "'" in x))
+        df.map(lambda x: isinstance(x, str) and ('"' in x))
         .any()
         .any()
         and not args.fixed_width
@@ -1354,6 +1354,17 @@ def main(cmd_line=None):
     else:
         elements_to_scrape = None
 
+    if args.add_extra_file_info:
+        if elements_to_scrape is None:
+            elements_to_scrape = args.add_extra_file_info
+        else:
+            # Ensure add-extra-file-info fields appear first, respecting their order
+            # in the command line
+            elements_to_scrape = args.add_extra_file_info + [
+                xpath for xpath in elements_to_scrape
+                if xpath not in args.add_extra_file_info
+            ]
+
     # For each file in label_files, load in schema files and namespaces for reference.
     # Traverse the label file and scrape the desired contents. Place these contents
     # into a dictionary to later parse into a csv file.
diff --git a/pyproject.toml b/pyproject.toml
index 2ac6305..f34e16a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ name = "rms-pds4indextools"
 dynamic = ["version"]
 description = "pds4indextools"
 readme = "README.md"
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 dependencies = [
     "lxml",
     "pandas",
@@ -28,7 +28,6 @@ classifiers = [
   "Topic :: Software Development :: Libraries :: Python Modules",
   "Topic :: Utilities",
   "License :: OSI Approved :: Apache Software License",
-  "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
diff --git a/tests/test_pds4_create_xml_index_blackbox.py b/tests/test_pds4_create_xml_index_blackbox.py
index e51371c..c3add2d 100644
--- a/tests/test_pds4_create_xml_index_blackbox.py
+++ b/tests/test_pds4_create_xml_index_blackbox.py
@@ -233,7 +233,7 @@ def compare_files(path_to_file, golden_file):
         ),
 
         # Testing --add-extra-file-info
-        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --add-extra-file-info filename,filepath --output-index-file extra_file_info_1.csv
+        # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_extra_file_info.txt --add-extra-file-info filename,filepath --output-index-file extra_file_info_1.csv
         # Compare result to golden copy:
         # test_files/expected/extra_file_info_success_1.csv
         (

From a299746420a4fc9611f33e810f2a63c7f4681fb6 Mon Sep 17 00:00:00 2001
From: Emilie Simpson <esimpson@seti.org>
Date: Mon, 25 Nov 2024 16:20:41 -0800
Subject: [PATCH 24/24] limit-xpaths-file takes priority over
 add-extra-file-info term order

---
 pds4indextools/pds4_create_xml_index.py | 58 +++++++++++++++++++------
 1 file changed, 44 insertions(+), 14 deletions(-)

diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index 4307496..d12bf50 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -714,7 +714,7 @@ def update_nillable_elements_from_xsd_file(xsd_file, nillable_elements_info):
                 nillable_elements_info[name] = 'External or built-in type'
 
 
-def write_results_to_csv(results_list, args, output_csv_path):
+def write_results_to_csv(results_list, new_columns, args, output_csv_path):
     """
     Write results from a list of dictionaries to a CSV file.
 
@@ -761,6 +761,17 @@ def pad_column_values_and_headers(df):
 
     df = pd.DataFrame(rows)
 
+    if new_columns is not None:
+        new_columns_sorted = sorted(new_columns.items(), key=lambda x: x[1][0])
+
+        for col_name, (index, col_values) in new_columns_sorted:
+            # If the column already exists, remove it temporarily
+            if col_name in df.columns:
+                df = df.drop(columns=[col_name])
+
+            # Insert the column at the desired index
+            df.insert(index, col_name, col_values)
+
     if (
         df.map(lambda x: isinstance(x, str) and ('"' in x))
         .any()
@@ -1309,6 +1320,7 @@ def main(cmd_line=None):
     collected_files = set()
     all_results = []
     xsd_files = []
+    extra_file_info_ind = {}
 
     output_csv_path = None
     output_txt_path = None
@@ -1339,8 +1351,6 @@ def main(cmd_line=None):
 
     # Loading in additional patterns from --limit-xpaths-file, if applicable,
     if args.limit_xpaths_file:
-        verboseprint(
-            f'Element file {args.limit_xpaths_file} used for additional patterns.')
         with open(args.limit_xpaths_file, 'r') as limit_xpaths_file:
             elements_to_scrape = [line.strip() for line in limit_xpaths_file]
             verboseprint('Elements to scrape:')
@@ -1354,16 +1364,14 @@ def main(cmd_line=None):
     else:
         elements_to_scrape = None
 
-    if args.add_extra_file_info:
-        if elements_to_scrape is None:
-            elements_to_scrape = args.add_extra_file_info
-        else:
-            # Ensure add-extra-file-info fields appear first, respecting their order
-            # in the command line
-            elements_to_scrape = args.add_extra_file_info + [
-                xpath for xpath in elements_to_scrape
-                if xpath not in args.add_extra_file_info
-            ]
+    if (
+        args.add_extra_file_info
+        and args.limit_xpaths_file
+        and elements_to_scrape is not None
+    ):
+        for x in elements_to_scrape:
+            if x in valid_add_extra_file_info:
+                extra_file_info_ind[x] = elements_to_scrape.index(x)
 
     # For each file in label_files, load in schema files and namespaces for reference.
     # Traverse the label file and scrape the desired contents. Place these contents
@@ -1458,6 +1466,15 @@ def main(cmd_line=None):
 
         all_results.append(label_results)
 
+    for label_results in all_results:
+        if extra_file_info_ind != {}:
+            new_columns = {}
+            for key in extra_file_info_ind.keys():
+                values = [d[key] for d in all_results]
+                new_columns[key] = (extra_file_info_ind[key], values)
+        else:
+            new_columns = None
+
     if args.add_extra_file_info and elements_to_scrape is not None:
         elements_to_scrape = args.add_extra_file_info + elements_to_scrape
 
@@ -1482,7 +1499,8 @@ def main(cmd_line=None):
                 original_headers[key] = key.split('/')[-1]
 
     if output_csv_path:
-        clean_header_mapping = write_results_to_csv(all_results, args, output_csv_path)
+        clean_header_mapping = write_results_to_csv(all_results, new_columns, args,
+                                                    output_csv_path)
 
     # To instead receive a list of available information available within a label or set
     # of labels, you may use --output-headers-file. This will take all of the keys of
@@ -1498,6 +1516,18 @@ def main(cmd_line=None):
                 if xpath not in xpaths:
                     xpaths.append(xpath)
 
+        if new_columns is not None:
+            # Sort new elements by index
+            new_elements_sorted = sorted(new_columns.items(), key=lambda x: x[1][0])
+
+            # Insert new elements into xpaths
+            for name, (index, value) in new_elements_sorted:
+                # Remove the value if it exists
+                if name in xpaths:
+                    xpaths.remove(name)
+                # Insert at the desired index
+                xpaths.insert(index, name)
+
         # The file is now written and placed in a given location. If cleaned header
         # field names are requested, they are processed here before being written in.
         with open(output_txt_path, 'w') as output_fp: