diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index b6810de..7eaf9a8 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -31,7 +31,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
- python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
+ python-version: ['3.9', '3.10', '3.11', '3.12']
fail-fast: false
steps:
- name: Checkout
diff --git a/docs/pds4_create_xml_index.rst b/docs/pds4_create_xml_index.rst
index d083287..2474ffd 100644
--- a/docs/pds4_create_xml_index.rst
+++ b/docs/pds4_create_xml_index.rst
@@ -145,9 +145,10 @@ Limiting results
- ``--limit-xpaths-file XPATHS_FILEPATH``: Specify a text file containing a list of
specific XPaths to extract from the label files. If this argument is not specified, all
- elements found in the label files will be included. The given text file can specify
- XPaths using ``glob``-style syntax, where each XPath level is treated as if it were a
- directory in a filesystem. Available wildcards are:
+ elements found in the label files will be included. This command uses only the whole
+ versions of the XPath(s) -- simplified versions are not allowed. The given text file
+ can specify XPaths using ``glob``-style syntax, where each XPath level is treated as if
+ it were a directory in a filesystem. Available wildcards are:
- ``?`` matches any single character within an XPath level
- ``*`` matches any series of characters within an XPath level
@@ -302,6 +303,8 @@ Below is the ``label-contents`` section of the default configuration file::
External_Reference:
Source_Product_Internal:
Source_Product_External:
+ File_Area_Ancillary:
+ File_Area_Metadata:
Each listed value with an empty dictionary is an optional field the user can include in
their generated label. If the user does decide to include one of these fields, **they must
@@ -311,39 +314,41 @@ element will remain empty**.
For reference, provided below are the full contents of the optional label classes::
Citation_Information:
- author_list
- editor_list
- publication_year
- doi
- keyword
- description
+ author_list:
+ editor_list:
+ publication_year:
+ doi:
+ keyword:
+ description:
Funding_Acknowledgement:
- funding_source
- funding_year
- funding_award
- funding_acknowledgement_text
+ funding_source:
+ funding_year:
+ funding_award:
+ funding_acknowledgement_text:
Modification_Detail:
- modification_date
- version_id
- description
+ modification_date:
+ version_id:
+ description:
Internal_Reference:
- lid_reference
- reference_type
- comment
+ lid_reference:
+ reference_type:
+ comment:
External_Reference:
- doi
- reference_text
- description
+ doi:
+ reference_text:
+ description:
Source_Product_Internal:
- lidvid_reference
- reference_type
- comment
+ lidvid_reference:
+ reference_type:
+ comment:
Source_Product_External:
- external_source_product_identifier
- reference_type
- doi
- curating_facility
- description
+ external_source_product_identifier:
+ reference_type:
+ doi:
+ curating_facility:
+ description:
+ File_Area_Ancillary / File_Area_Metadata:
+ creation_date_time:
If no new contents are specified for label generation, the label will contain the
diff --git a/pds4indextools/default_config.yaml b/pds4indextools/default_config.yaml
index a2a5a48..b23fc05 100644
--- a/pds4indextools/default_config.yaml
+++ b/pds4indextools/default_config.yaml
@@ -44,3 +44,5 @@ label-contents:
External_Reference:
Source_Product_Internal:
Source_Product_External:
+ File_Area_Ancillary:
+ File_Area_Metadata:
diff --git a/pds4indextools/index_label_template_pds.xml b/pds4indextools/index_label_template_pds.xml
index 8699cdc..bc9142a 100644
--- a/pds4indextools/index_label_template_pds.xml
+++ b/pds4indextools/index_label_template_pds.xml
@@ -20,25 +20,37 @@ $END_IF
Product_Ancillary
$IF(Citation_Information)
+ $IF(Citation_Information['author_list'] and isinstance(Citation_Information['author_list'], list))
+ $FOR(Citation_Information['author_list'])
+ $VALUE$
+ $END_FOR
+ $ELSE_IF(Citation_Information['author_list'] and not isinstance(Citation_Information['author_list'], list))
$Citation_Information['author_list']$
+ $END_IF
$Citation_Information['editor_list']$
$Citation_Information['publication_year']$
$Citation_Information['doi']$
+ $IF(Citation_Information['keyword'] and isinstance(Citation_Information['keyword'], list))
$FOR(Citation_Information['keyword'])
$VALUE$
$END_FOR
+ $ELSE_IF(Citation_Information['keyword'] and not isinstance(Citation_Information['keyword'], list))
+ $Citation_Information['keyword']$
+ $END_IF
$Citation_Information['description']$
- $IF(Citation_Information.get('Funding_Acknowledgement'))
+ $IF('Funding_Acknowledgement' in Citation_Information)
+ $IF(Citation_Information['Funding_Acknowledgement'])
- $Funding_Acknowledgement['funding_source']$
- $Funding_Acknowledgement['funding_year']$
- $Funding_Acknowledgement['funding_award']$
- $Funding_Acknowledgement['funding_acknowledgement_text']$
+ $Citation_Information['Funding_Acknowledgement']['funding_source']$
+ $Citation_Information['Funding_Acknowledgement']['funding_year']$
+ $Citation_Information['Funding_Acknowledgement']['funding_award']$
+ $Citation_Information['Funding_Acknowledgement']['funding_acknowledgement_text']$
+ $END_IF
$END_IF
$END_IF
- $IF(Modification_Detail)
+ $IF(Modification_Detail and isinstance(Modification_Detail, list))
$FOR(field, k=Modification_Detail)
@@ -48,6 +60,14 @@ $END_IF
$END_FOR
+ $ELSE_IF(Modification_Detail)
+
+
+ $Modification_Detail['modification_date']$
+ $Modification_Detail['version_id']$
+ $Modification_Detail['description']$
+
+
$END_IF
Creative Common Public License CC0 1.0 (2024)
@@ -58,59 +78,86 @@ $END_IF
+ $IF(Internal_Reference or External_Reference or Source_Product_Internal or Source_Product_External)
$IF(Internal_Reference)
$FOR(field, k=Internal_Reference)
-
-
-
+ $field['lid_reference']$
+ $field['reference_type']$
+ $field['comment']$
$END_FOR
$END_IF
$IF(External_Reference)
$FOR(field, k=External_Reference)
-
-
-
+ $field['doi']$
+ $field['reference_text']$
+ $field['description']$
$END_FOR
$END_IF
$IF(Source_Product_Internal)
$FOR(field, k=Source_Product_Internal)
-
-
-
+ $field['lidvid_reference']$
+ $field['reference_type']$
+ $field['comment']$
$END_FOR
$END_IF
$IF(Source_Product_External)
$FOR(field, k=Source_Product_External)
-
-
-
-
-
+ $field['external_source_product_identifier']$
+ $field['reference_type']$
+ $field['doi']$
+ $field['curating_facility']$
+ $field['description']$
$END_FOR
$END_IF
+ $END_IF
$IF(Product_Ancillary)
- $END_IF
- $IF(Product_Metadata_Supplemental)
+ $ELSE
$END_IF
+ $IF(Product_Ancillary and File_Area_Ancillary)
- $BASENAME(TEMPFILE)$
+ $BASENAME(index_file_name)$
index-table
- $DATETIME(creation_date_time)$
- $FILE_MD5(TEMPFILE)$
+ $IF(File_Area_Ancillary['creation_date_time'])
+ $File_Area_Ancillary['creation_date_time']$
+ $ELSE
+ $DATETIME(calculated_creation_date_time)$
+ $END_IF
+ $FILE_MD5(index_file_name)$
+ $ELSE_IF(Product_Metadata_Supplemental and File_Area_Metadata)
+
+ $BASENAME(index_file_name)$
+ index-table
+ $IF(File_Area_Metadata['creation_date_time'])
+ $File_Area_Metadata['creation_date_time']$
+ $ELSE
+ $DATETIME(calculated_creation_date_time)$
+ $END_IF
+ $FILE_MD5(index_file_name)$
+
+
+ $ELSE
+
+ $BASENAME(index_file_name)$
+ index-table
+ $DATETIME(calculated_creation_date_time)$
+ $FILE_MD5(index_file_name)$
+
+
+ $END_IF
0
$object_length_h$
@@ -121,7 +168,7 @@ $END_IF
$object_length_t$
- $FILE_RECORDS(TEMPFILE)$
+ $FILE_RECORDS(index_file_name)$
Line-Feed
@@ -145,7 +192,7 @@ $END_IF
0
$object_length_t$
PDS DSV 1
- $FILE_RECORDS(TEMPFILE)$
+ $FILE_RECORDS(index_file_name)$
Line-Feed
Comma
@@ -166,13 +213,11 @@ $END_IF
$END_IF
$IF(Product_Ancillary)
- $END_IF
- $IF(Product_Metadata_Supplemental)
+ $ELSE
$END_IF
$IF(Product_Ancillary)
-$END_IF
-$IF(Product_Metadata_Supplemental)
+$ELSE
$END_IF
diff --git a/pds4indextools/pds4_create_xml_index.py b/pds4indextools/pds4_create_xml_index.py
index c34edd1..d12bf50 100644
--- a/pds4indextools/pds4_create_xml_index.py
+++ b/pds4indextools/pds4_create_xml_index.py
@@ -11,11 +11,11 @@
import argparse
from collections import namedtuple
+from collections import Counter
import csv
from datetime import datetime
import fnmatch
import functools
-import itertools
from itertools import groupby
from lxml import etree
import os
@@ -54,7 +54,7 @@ def convert_header_to_xpath(root, xml_header_path, namespaces):
Parameters:
root (Element): The root element of the XML document.
xml_header_path (str): Original XML header path.
- namespaces (dict): Dictionary of XML namespace mappings.
+ namespaces (dict): A dictionary containing XML namespace mappings.
Returns:
str: Converted XPath expression.
@@ -68,78 +68,50 @@ def convert_header_to_xpath(root, xml_header_path, namespaces):
'pds:Product_Observational/pds:Identification_Area[1]/pds:version_id[2]'
"""
sections = xml_header_path.split('/')
+ prefixes = namespaces.keys()
xpath_final = ''
portion = ''
for sec in sections[1:]:
+ # portion = portion + section
portion = f'{portion}/{sec}'
+ # grab the tag of that portion.
tag = str(root.xpath(portion, namespaces=namespaces)[0].tag)
+ # if the section starts with '*', it's everything after the '*'
if sec.startswith('*'):
sec = sec[1:]
- if ':' in sec:
- sec = ''
+ # if sec starts with :, make it blank
+ elif any(f'{prefix}:' in sec for prefix in prefixes):
+ predicate = sec.split('[')[-1]
+ if predicate[0].isdigit():
+ sec = f"[{sec.split('[')[-1]}"
+ else:
+ sec = ''
+ # xpath_final is the current path, then the tag, then section
xpath_final = f'{xpath_final}/{tag}{sec}'
return xpath_final
-def correct_duplicates(label_results):
+def clean_headers(df):
"""
- Correct numbering of XPaths to have correct predicates.
-
- Some namespaces do not contain predicates, and as a result, must be made artificially
- unique via injected substrings. This function aids in the reformatting of these
- strings so they match the syntax of the renumbering function. Note that this function
- does not affect elements or attributes that natively contain the '_num' substring
- (e.g., cassini:filter_name_1 and cassini:filter_name_2).
+ Clean the headers of a DataFrame by replacing certain characters with safer
+ alternatives and return a mapping of new to old headers.
Parameters:
- label_results (dict): The dictionary of XML results. This argument will be
- mutated by the function.
+ df (pandas.DataFrame): The DataFrame whose headers need to be cleaned.
- Example:
- # XPaths in label_results shortened for readability
- >>> keys = list(label_result)
- >>> keys = [
- ../geom:SPICE_Kernel_Identification<1>/geom:kernel_type<1>,
- ../geom:SPICE_Kernel_Identification<1>/geom:kernel_type_1<1>,
- ../geom:SPICE_Kernel_Identification<1>/geom:kernel_type_2<1>,
- ../geom:SPICE_Kernel_Identification<1>/geom:kernel_type_3<1>,
- ../geom:SPICE_Kernel_Identification<1>/geom:kernel_type_4<1>
- ]
- >>> correct_duplicate(label_results)
- >>> keys = list(label_result)
- >>> keys = [
- ../geom:SPICE_Kernel_Identification<1>/geom:kernel_type<1>,
- ../geom:SPICE_Kernel_Identification<2>/geom:kernel_type<1>,
- ../geom:SPICE_Kernel_Identification<3>/geom:kernel_type<1>,
- ../geom:SPICE_Kernel_Identification<4>/geom:kernel_type<1>,
- ../geom:SPICE_Kernel_Identification<5>/geom:kernel_type<1>
- ]
+ Returns:
+ dict: A dictionary mapping new headers to old headers.
"""
- element_names = set()
- for key in list(label_results):
- tag = key.split('/')[-1].split('<')[0]
- number = tag.split('_')[-1]
- if number.isdigit():
- cropped = tag.replace('_'+number, '')
- if cropped in element_names:
- key_new = key.replace(('_' + number + '<1>'), '<1>')
- parent = key_new.split('/')[-2].split('<')[0]
- key_new = key_new.replace(parent+'<1>', parent+'<'+str(int(number)+1)+'>')
- label_results[key_new] = label_results.pop(key)
- element_names.add(tag)
+ # Create a mapping of old to new headers
+ header_map = {col: header_cleaner(col) for col in df.columns}
+ # Update the DataFrame's headers
+ df.rename(columns=header_map, inplace=True)
-def clean_headers(df):
- """
- Clean the headers of a DataFrame by replacing certain characters with safer
- alternatives.
+ header_map = {v: k for k, v in list(header_map.items())}
- Parameters:
- df (pandas.DataFrame): The DataFrame whose headers need to be cleaned.
- """
- return df.rename(columns=lambda x: x.replace(
- ':', '_').replace('/', '__').replace('<', '_').replace('>', ''), inplace=True)
+ return header_map
def default_value_for_nil(config, data_type, nil_value):
@@ -296,6 +268,61 @@ def filter_dict_by_glob_patterns(input_dict, glob_patterns, valid_add_extra_file
return filtered_dict
+def get_true_type(xsd_files, tag, namespaces):
+ """
+ Returns the true type of a specified tag by searching through a list of XSD files.
+
+ This function iterates through the provided list of XSD files and attempts to find the
+ "true type" of the given XML tag by examining its attributes and base types. If the
+ type is not found with the original tag, a modified version of the tag is also
+ checked.
+
+ Parameters:
+ xsd_files (list): A list of file paths or URLs to the XSD files.
+ tag (str): The XML tag to search for within the XSD files.
+ namespaces (dict): A dictionary containing XML namespace mappings.
+
+ Returns:
+ str or None: The "true type" of the tag if found, otherwise `None`.
+ """
+ def search_type(xsd_file, tag, namespaces):
+ xsd_tree = download_xsd_file(xsd_file)
+ namespaces = scrape_namespaces(xsd_tree)
+ true_type = find_base_attribute(xsd_tree, tag, namespaces)
+ if true_type:
+ return true_type
+
+ # Check for modified tag if the first search does not find a match
+ modified_tag = tag + "_WO_Units"
+ true_type = find_base_attribute(xsd_tree, modified_tag, namespaces)
+ return true_type # This will return either the found type or None
+
+ for xsd_file in xsd_files:
+ true_type = search_type(xsd_file, tag, namespaces)
+ if true_type: # Only return if true_type is not None
+ return true_type
+
+ return None
+
+
+def header_cleaner(header):
+ """
+ Clean a header string.
+
+ Parameters:
+ header (str): The header string to be cleaned.
+
+ Returns:
+ str: The cleaned header string.
+ """
+ return (
+ header.replace(':', '_')
+ .replace('/', '__')
+ .replace('<', '_')
+ .replace('>', '')
+ )
+
+
def load_config_file(
default_config_file=Path(__file__).resolve().parent/'default_config.yaml',
specified_config_files=None):
@@ -388,21 +415,11 @@ def process_headers(label_results, key, root, namespaces, prefixes):
prefixes (dict): A dictionary containing XML namespace prefixes.
"""
key_new = convert_header_to_xpath(root, key, namespaces)
-
# Replace namespaces with prefixes
for namespace in prefixes:
if namespace in key_new:
key_new = key_new.replace('{' + namespace + '}', prefixes[namespace] + ':')
- # Check if key_new already exists in label_results, append suffix if necessary
- if key_new in label_results:
- suffix_gen = itertools.count(start=1, step=1)
- while True:
- trial_key = f"{key_new}_{next(suffix_gen)}"
- if trial_key not in label_results:
- key_new = trial_key
- break
-
label_results[key_new] = label_results.pop(key)
@@ -459,6 +476,7 @@ def renumber_xpaths(xpaths):
Parameters:
xpaths (list): The list of XPaths or XPath fragments.
+
Returns:
dict: A dictionary containing a mapping from the original XPaths to the
renumbered XPaths.
@@ -600,23 +618,7 @@ def store_element_text(element, tree, results_dict, xsd_files, nillable_elements
if not parent_check:
print(f'Non-nillable element in {label_filename} '
f'has no associated text: {tag}')
- true_type = None
- for xsd_file in xsd_files:
- xsd_tree = download_xsd_file(xsd_file)
- namespaces = scrape_namespaces(xsd_tree)
- true_type = find_base_attribute(xsd_tree, tag, namespaces)
- if true_type:
- break # Exit the loop once true_type is found
-
- if not true_type:
- modified_tag = tag + "_WO_Units"
- for xsd_file in xsd_files:
- namespaces = scrape_namespaces(xsd_tree)
- true_type = find_base_attribute(xsd_tree, modified_tag,
- namespaces)
- if true_type:
- break
-
+ true_type = get_true_type(xsd_files, tag, tree.getroot().nsmap)
default = default_value_for_nil(config, true_type, nil_value)
results_dict[xpath] = default
@@ -679,43 +681,40 @@ def update_nillable_elements_from_xsd_file(xsd_file, nillable_elements_info):
for element in elements_with_nillable:
name = element.get('name')
type_attribute = element.get('type')
- if type_attribute not in nillable_elements_info:
- if type_attribute:
- # Split the type attribute to handle namespace:typename format
- type_parts = type_attribute.split(':')
- # Take the last part as the type name
- type_name = type_parts[-1]
-
- # Attempt to find the type definition in the document
- type_definition_xpath = (f'//xs:simpleType[@name="{type_name}"] | '
- f'//xs:complexType[@name="{type_name}"]')
- type_definition = tree.xpath(
- type_definition_xpath, namespaces=namespace)
-
- if type_definition:
- # Take the first match
- type_definition = type_definition[0]
- base_type = None
- # For complexType with simpleContent or simpleType, find base attr
- if type_definition.tag.endswith('simpleType'):
- restriction = type_definition.find('.//xs:restriction',
- namespaces=namespace)
- if restriction is not None:
- base_type = restriction.get('base')
- elif type_definition.tag.endswith('complexType'):
- extension = type_definition.find('.//xs:extension',
- namespaces=namespace)
- if extension is not None:
- base_type = extension.get('base')
-
- nillable_elements_info[name] = (
- base_type or 'External or built-in type')
- else:
- # Type definition not found, might be external or built-in type
- nillable_elements_info[name] = 'External or built-in type'
-
-
-def write_results_to_csv(results_list, args, output_csv_path):
+ if type_attribute:
+ # Split the type attribute to handle namespace:typename format
+ type_parts = type_attribute.split(':')
+ # Take the last part as the type name
+ type_name = type_parts[-1]
+
+ # Attempt to find the type definition in the document
+ type_definition_xpath = (f'//xs:simpleType[@name="{type_name}"] | '
+ f'//xs:complexType[@name="{type_name}"]')
+ type_definition = tree.xpath(type_definition_xpath, namespaces=namespace)
+
+ if type_definition:
+ # Take the first match
+ type_definition = type_definition[0]
+ base_type = None
+ # For complexType with simpleContent or simpleType, find base attr
+
+ try:
+ restriction = type_definition.find('.//xs:restriction',
+ namespaces=namespace)
+ base_type = restriction.get('base')
+
+ except AttributeError:
+ extension = type_definition.find('.//xs:extension',
+ namespaces=namespace)
+ base_type = extension.get('base')
+
+ nillable_elements_info[name] = base_type or 'External or built-in type'
+ else:
+ # Type definition not found, might be external or built-in type
+ nillable_elements_info[name] = 'External or built-in type'
+
+
+def write_results_to_csv(results_list, new_columns, args, output_csv_path):
"""
Write results from a list of dictionaries to a CSV file.
@@ -758,30 +757,64 @@ def pad_column_values_and_headers(df):
rows = []
for result_dict in results_list:
- rows.append(result_dict['Results'])
+ rows.append(result_dict)
df = pd.DataFrame(rows)
+ if new_columns is not None:
+ new_columns_sorted = sorted(new_columns.items(), key=lambda x: x[1][0])
+
+ for col_name, (index, col_values) in new_columns_sorted:
+ # If the column already exists, remove it temporarily
+ if col_name in df.columns:
+ df = df.drop(columns=[col_name])
+
+ # Insert the column at the desired index
+ df.insert(index, col_name, col_values)
+
+ if (
+ df.map(lambda x: isinstance(x, str) and ('"' in x))
+ .any()
+ .any()
+ and not args.fixed_width
+ ):
+ print("Warning: scraped contents of labels contains quotes. This is "
+ "against PDS4 data standards. Index file and subsequent label file will "
+ "not be generated.")
+ sys.exit(1)
+
+ if args.simplify_xpaths:
+ original_headers = df.columns.tolist()
+ simplified_headers = simplify_xpaths(original_headers)
+ df.columns = simplified_headers
+
+ if args.clean_header_field_names:
+ clean_header_mapping = clean_headers(df)
+
if args.sort_by:
sort_values = str(args.sort_by).split(',')
try:
- df.sort_values(by=sort_values, inplace=True)
- except KeyError as bad_sort:
- print(f'Unknown sort key {bad_sort}. For a list of available sort keys, use '
- f'the --output-headers-file option.')
+ sort_dataframe(df, sort_values)
+ except ValueError as bad_sort:
+ print(bad_sort)
sys.exit(1)
- if args.clean_header_field_names:
- clean_headers(df)
-
if args.fixed_width:
padded_df = pad_column_values_and_headers(df)
+
print(f'Fixed-width index file generated at {output_csv_path}')
- padded_df.to_csv(output_csv_path, index=False, na_rep='')
+ padded_df.to_csv(output_csv_path, index=False, na_rep='', lineterminator='\n',
+ quoting=csv.QUOTE_MINIMAL)
else:
print(f'Index file generated at {output_csv_path}')
- df.to_csv(output_csv_path, index=False, na_rep='')
+ df.to_csv(output_csv_path, index=False, na_rep='', lineterminator='\n',
+ quoting=csv.QUOTE_MINIMAL)
+
+ if args.clean_header_field_names:
+ return clean_header_mapping
+ else:
+ return None
def find_base_attribute(xsd_tree, target_name, new_namespaces):
@@ -810,33 +843,6 @@ def find_base_attribute(xsd_tree, target_name, new_namespaces):
}
namespaces.update(new_namespaces)
- def follow_base_type(base_type):
- """
- Recursively follows the base type definitions to find the final base type.
-
- Parameters:
- base_type (str): The initial base type to follow.
-
- Returns:
- str: The final base type.
- """
- while True:
- if 'ASCII' in base_type or 'UTF8' in base_type:
- return base_type
-
- next_query = (
- f".//xs:simpleType[@name='{base_type.split(':')[-1]}']"
- f"//xs:restriction/@base"
- )
- try:
- next_result = xsd_tree.xpath(next_query, namespaces=namespaces)
- except etree.XPathEvalError:
- break
- if not next_result:
- break
- base_type = next_result[0]
- return base_type
-
def get_base_type(query):
"""
Executes an XPath query to find the base type.
@@ -847,11 +853,8 @@ def get_base_type(query):
Returns:
list: The result of the XPath query.
"""
- try:
- result = xsd_tree.xpath(query, namespaces=namespaces)
- return result
- except etree.XPathEvalError:
- return None
+ result = xsd_tree.xpath(query, namespaces=namespaces)
+ return result
queries = [
f".//xs:complexType[@name='{target_name}']//xs:extension/@base",
@@ -901,13 +904,15 @@ def get_base_type(query):
f"/*[local-name()='extension']/*/*/*/@base"
]
+ base_type = None
for query in queries:
result = get_base_type(query)
if result:
base_type = result[0]
- return follow_base_type(base_type)
+ else:
+ continue
- return None
+ return base_type
def scrape_namespaces(tree):
@@ -927,6 +932,43 @@ def scrape_namespaces(tree):
return namespaces
+def sort_dataframe(df, sort_keys):
+ """
+ Sorts a DataFrame based on specified keys.
+
+ This function sorts the input DataFrame in place using the provided sort keys.
+ If an invalid key is provided, a `ValueError` is raised with a message indicating
+ the unknown key and suggesting how to obtain a list of valid keys.
+
+ Parameters:
+ df (pandas.DataFrame): The DataFrame to be sorted.
+ sort_keys (str or list of str): The column name(s) to sort the DataFrame by.
+ Can be a single string or a list of strings.
+
+ Raises:
+ ValueError: If any of the provided sort keys are not found in the DataFrame,
+ a `ValueError` is raised with a descriptive error message.
+
+ Example:
+ >>> df = pd.DataFrame({
+ ... 'name': ['Alice', 'Bob', 'Charlie'],
+ ... 'age': [25, 30, 22]
+ ... })
+ >>> sort_keys = ['age']
+ >>> sort_dataframe(df, sort_keys)
+ >>> print(df)
+ name age
+ 2 Charlie 22
+ 0 Alice 25
+ 1 Bob 30
+ """
+ try:
+ df.sort_values(by=sort_keys, inplace=True)
+ except KeyError as bad_sort:
+ raise ValueError(f'Unknown sort key {bad_sort}. For a list of available sort '
+ f'keys, use the --output-headers-file option.')
+
+
def get_creation_date(file_path):
"""
Returns the creation date of a file in ISO 8601 format.
@@ -945,7 +987,7 @@ def get_creation_date(file_path):
stat = os.stat(file_path)
try:
creation_time = stat.st_birthtime
- except AttributeError:
+ except AttributeError: # pragma: no coverage
# Fallback to the last modification time if birth time is not available
creation_time = stat.st_mtime
@@ -1099,6 +1141,46 @@ def generate_unique_filename(base_name):
return new_filename
+def simplify_xpaths(headers):
+ """
+ Simplifies a list of XPath headers by shortening each header to its tag and
+ namespace prefix, provided the tag is unique.
+
+ This function processes a list of XPath-like strings (headers) and attempts to
+ simplify them to their last tag component. If --simplify-xpaths is used, the XPath
+ headers will be shortened to the element's tag and namespace prefix. This is
+ contingent on the uniqueness of the XPath header; if more than one XPath header
+ shares a tag, a namespace and a predicate value, the XPath header will remain whole.
+
+ Parameters:
+ headers (list of str): A list of strings representing XPath headers.
+
+ Returns:
+ list of str: A list of strings where unique tags have replaced their
+ corresponding full XPath headers, and non-unique tags remain unchanged.
+ """
+ #
+ tags = []
+ matches = {}
+
+ # Step 1: Gather all possible tags from labels
+ for header in headers:
+ tag = header.split('/')[-1]
+ tags.append(tag)
+ matches[header] = tag
+
+ # Step 2: Count the number of instances of each tag
+ term_counts = Counter(tags)
+
+ # Step 3: If a tag occurs only once, shorten it.
+ for ind, header in enumerate(headers):
+ tag = header.split('/')[-1]
+ if term_counts[tag] == 1:
+ headers[ind] = tag
+
+ return headers
+
+
class MultilineFormatter(argparse.HelpFormatter):
"""Class to allow multi-line help messages with argparse.
@@ -1118,7 +1200,7 @@ def _fill_text(self, text, width, indent):
def main(cmd_line=None):
epilog_sfx = ''
- if __version__ != 'Version unspecified':
+ if __version__ != 'Version unspecified': # pragma: no coverage
epilog_sfx = f'|nVersion: {__version__}'
parser = argparse.ArgumentParser(
formatter_class=MultilineFormatter,
@@ -1187,7 +1269,8 @@ def main(cmd_line=None):
metavar='XPATHS_FILEPATH',
help='Optional text file specifying which XPaths to '
'scrape. If not specified, all XPaths found in '
- 'the label files are included.')
+ 'the label files are included. Only whole XPaths '
+ 'can be specified.')
limiting_results.add_argument('--output-headers-file', type=str,
metavar='HEADERS_FILEPATH',
@@ -1234,10 +1317,10 @@ def main(cmd_line=None):
# will determine which files will be scraped for.
nillable_elements_info = {}
- label_files = []
+ collected_files = set()
all_results = []
- tags = []
xsd_files = []
+ extra_file_info_ind = {}
output_csv_path = None
output_txt_path = None
@@ -1253,21 +1336,21 @@ def main(cmd_line=None):
for pattern in patterns:
files = directory_path.glob(pattern)
- if not files:
- verboseprint(f'No files matching {pattern} found in '
- f'directory: {directory_path}')
- label_files.extend(files)
+ prev_len = len(collected_files)
+ collected_files.update(files)
+ if len(collected_files) == prev_len:
+ print(f'No new files found for pattern: {pattern}')
- verboseprint(f'{len(label_files)} matching file(s) found')
+ verboseprint(f'{len(collected_files)} matching file(s) found')
- if label_files == []:
+ label_files = list(collected_files)
+ label_files.sort()
+ if len(label_files) == 0:
print(f'No files matching any patterns found in directory: {directory_path}')
sys.exit(1)
# Loading in additional patterns from --limit-xpaths-file, if applicable,
if args.limit_xpaths_file:
- verboseprint(
- f'Element file {args.limit_xpaths_file} used for additional patterns.')
with open(args.limit_xpaths_file, 'r') as limit_xpaths_file:
elements_to_scrape = [line.strip() for line in limit_xpaths_file]
verboseprint('Elements to scrape:')
@@ -1281,6 +1364,15 @@ def main(cmd_line=None):
else:
elements_to_scrape = None
+ if (
+ args.add_extra_file_info
+ and args.limit_xpaths_file
+ and elements_to_scrape is not None
+ ):
+ for x in elements_to_scrape:
+ if x in valid_add_extra_file_info:
+ extra_file_info_ind[x] = elements_to_scrape.index(x)
+
# For each file in label_files, load in schema files and namespaces for reference.
# Traverse the label file and scrape the desired contents. Place these contents
# into a dictionary to later parse into a csv file.
@@ -1300,7 +1392,7 @@ def main(cmd_line=None):
filepath = str(label_file.relative_to(args.directorypath)).replace('\\', '/')
# PDS4 compliant filepaths must be less than 255 characters.
- if len(filepath) > 255:
+ if len(filepath) > 255: # pragma: no coverage
print(f'Filepath {filepath} exceeds 255 character limit.')
sys.exit(1)
@@ -1319,6 +1411,7 @@ def main(cmd_line=None):
# improve readability. Each XPath's namespace is replaced with its prefix for
# faster reference. Duplicate XPaths are made unique to ensure all results are
# present in the final product.
+
for key in list(label_results):
process_headers(label_results, key, root, namespaces, prefixes)
@@ -1332,7 +1425,7 @@ def main(cmd_line=None):
new_parts = []
for part in parts:
if not part.endswith('>') and parts.index(part) != 1:
- part = part+'<1>'
+ part = f'{part}<1>'
new_parts.append(part)
else:
new_parts.append(part)
@@ -1342,7 +1435,6 @@ def main(cmd_line=None):
for key in list(label_results):
if 'cyfunction' in key:
del label_results[key]
-
# The XPath headers must be renumbered to reflect which instance of the element
# the column refers to. At this stage, duplicate XPaths may exist again due to
# the reformatting. These duplicates are corrected to preserve the contents of
@@ -1351,14 +1443,15 @@ def main(cmd_line=None):
for old_xpath, new_xpath in xpath_map.items():
label_results[new_xpath] = label_results.pop(old_xpath)
- correct_duplicates(label_results)
-
# Collect metadata about the label file. The label file's lid is scraped and
# broken into multiple parts. This metadata can then be requested as additional
# columns within the index file.
- lid = extract_logical_identifier(tree)
- if lid is None:
- lid = label_results.get('pds:logical_identifier', 'Missing_LID')
+ try:
+ lid = extract_logical_identifier(tree)
+ except AttributeError:
+ print(f'Label file {label_file} does not have a '
+ f'logical_identifier attribute.')
+ sys.exit(1)
# Attach extra columns if asked for.
bundle_lid = ':'.join(lid.split(':')[:4])
@@ -1371,8 +1464,16 @@ def main(cmd_line=None):
label_results = {**{ele: extras[ele] for ele in
args.add_extra_file_info}, **label_results}
- result_dict = {'Results': label_results}
- all_results.append(result_dict)
+ all_results.append(label_results)
+
+ for label_results in all_results:
+ if extra_file_info_ind != {}:
+ new_columns = {}
+ for key in extra_file_info_ind.keys():
+ values = [d[key] for d in all_results]
+ new_columns[key] = (extra_file_info_ind[key], values)
+ else:
+ new_columns = None
if args.add_extra_file_info and elements_to_scrape is not None:
elements_to_scrape = args.add_extra_file_info + elements_to_scrape
@@ -1381,87 +1482,64 @@ def main(cmd_line=None):
# of the --limit-xpaths-file input file. If this command is not used, the original
# dictionary will be returned. Glob patterns are processed sequentially, with the
# first pattern having the highest priority.
- for i in range(len(all_results)):
- label_results = all_results[i]['Results']
- label_results = filter_dict_by_glob_patterns(
+
+ for ind, label_results in enumerate(all_results):
+ label_results_new = filter_dict_by_glob_patterns(
label_results, elements_to_scrape, valid_add_extra_file_info, verboseprint)
- all_results[i]['Results'] = label_results
+ all_results[ind] = label_results_new
- if all(len(set(r['Results'])) == 0 for r in all_results):
+ if all(len(r) == 0 for r in all_results):
print('No results found: glob pattern(s) excluded all matches.')
sys.exit(1)
- # If --simplify-xpaths is used, the XPath headers will be shortened to the
- # element's tag and namespace prefix. This is contingent on the uniqueness of
- # the XPath header; if more than one XPath header shares a tag, a namespace and a
- # predicate value, the XPath header will remain whole.
if args.simplify_xpaths:
- for i in range(len(all_results)):
- label_results = all_results[i]['Results']
- tags = []
- names = []
-
- # Step 1: Gather all tags from keys
- for key in label_results:
- elements = key.split('/')
- tag = elements[-1]
- name = tag.split('<')[0]
- tags.append(tag)
- names.append(name)
-
- # Step 2: Find unique tags
- unique_tags = []
- for tag in tags:
- name = tag.split('<')[0]
- if tags.count(tag) == 1 and names.count(name) == 1:
- unique_tags.append(tag)
-
- # Step 3: Create a new dictionary to hold modified results
- new_label_results = {}
-
- # Step 4: Iterate over original dictionary to modify and copy to new
- # dictionary
- for key, value in list(label_results.items()):
- elements = key.split('/')
- tag = elements[-1]
- if tag in unique_tags:
- new_tag = tag.split('<')[0]
- verboseprint(f'XPath header {key} changed to {new_tag}')
- new_label_results[new_tag] = value
- else:
- new_label_results[key] = value
-
- all_results[i]['Results'] = new_label_results
+ original_headers = {}
+ for label_results in all_results:
+ for key in label_results.keys():
+ original_headers[key] = key.split('/')[-1]
if output_csv_path:
- write_results_to_csv(all_results, args, output_csv_path)
+ clean_header_mapping = write_results_to_csv(all_results, new_columns, args,
+ output_csv_path)
# To instead receive a list of available information available within a label or set
# of labels, you may use --output-headers-file. This will take all of the keys of
# the label_results dictionary and place them in the output file, instead of the
# index file.
if output_txt_path:
+ if not args.output_index_file:
+ print('No index file generated because --output-headers-file was '
+ 'provided without --output-index-file.')
xpaths = []
for label in all_results:
- for values in label.values():
- for xpath in values:
- if xpath not in xpaths:
- xpaths.append(xpath)
+ for xpath in label:
+ if xpath not in xpaths:
+ xpaths.append(xpath)
+
+ if new_columns is not None:
+ # Sort new elements by index
+ new_elements_sorted = sorted(new_columns.items(), key=lambda x: x[1][0])
+
+ # Insert new elements into xpaths
+ for name, (index, value) in new_elements_sorted:
+ # Remove the value if it exists
+ if name in xpaths:
+ xpaths.remove(name)
+ # Insert at the desired index
+ xpaths.insert(index, name)
# The file is now written and placed in a given location. If cleaned header
# field names are requested, they are processed here before being written in.
with open(output_txt_path, 'w') as output_fp:
+ if args.simplify_xpaths:
+ xpaths = simplify_xpaths(xpaths)
for item in xpaths:
if args.clean_header_field_names:
verboseprint(
'--clean-header-field-names active. Headers reformatted.')
- item = item.replace(
- ':', '_').replace('/', '__').replace('<', '_').replace('>', '')
+ item = header_cleaner(item)
output_fp.write("%s\n" % item)
print(f'XPath headers file generated at {output_txt_path}.')
- if not args.output_index_file:
- print('No index file generated because --output-headers-file was '
- 'provided without --output-index-file.')
# Generates the label for this index file, if --generate-label is used.
@@ -1506,13 +1584,17 @@ def main(cmd_line=None):
# file is fixed-width or delimited.
for header in headers:
whole_header = header
+ whole_header_length = len(whole_header)
if args.fixed_width:
header = header.strip()
+ if args.clean_header_field_names:
+ full_header = header
+ header = clean_header_mapping[header]
if (header in valid_add_extra_file_info and 'lid' in header):
true_type = 'pds:ASCII_LID'
elif header == 'filename':
true_type = 'pds:ASCII_File_Name'
- elif header == filepath:
+ elif header == 'filepath':
true_type = 'pds:ASCII_File_Specification_Name'
elif header == 'bundle':
true_type = 'pds:ASCII_Text_Preserved'
@@ -1520,29 +1602,17 @@ def main(cmd_line=None):
parts = header.split('/')
name = parts[-1].split('<')[0].split(':')[-1]
- true_type = None
-
- for xsd_file in xsd_files:
- xsd_tree = download_xsd_file(xsd_file)
- true_type = find_base_attribute(xsd_tree, name, namespaces)
- if true_type:
- break
-
- if not true_type:
- modified_name = name + "_WO_Units"
- for xsd_file in xsd_files:
- xsd_tree = download_xsd_file(xsd_file)
- true_type = find_base_attribute(xsd_tree, modified_name,
- namespaces)
- if true_type:
- break
-
- if true_type is None:
- true_type = ':inapplicable'
+ true_type = get_true_type(xsd_files, name, namespaces)
+
true_type = true_type.split(':')[-1]
field_number += 1
- header_length = len(header.encode('utf-8'))
- header_name = header
+
+ if args.clean_header_field_names:
+ header_length = len(full_header.encode('utf-8'))
+ header_name = full_header
+ else:
+ header_length = len(header.encode('utf-8'))
+ header_name = header
maximum_field_length = maximum_field_lengths[whole_header]
header_info.append({'name': header_name,
@@ -1552,8 +1622,11 @@ def main(cmd_line=None):
'field_length': maximum_field_length,
'maximum_field_length': maximum_field_length,
'offset': offset})
- offset += header_length + jump
- field_location = offset
+ if args.fixed_width:
+ offset += whole_header_length + jump
+ else:
+ offset += header_length + jump
+ field_location = offset + 1
# The creation date of the index file is stored for later reference.
creation_date = get_creation_date(index_file)
@@ -1563,8 +1636,8 @@ def main(cmd_line=None):
# .yaml file from --config-file
label_content = {
'logical_identifier': 'urn:nasa:pds:rms_metadata:document_opus:' + filename,
- 'creation_date_time': str(creation_date),
- 'TEMPFILE': index_file,
+ 'calculated_creation_date_time': str(creation_date),
+ 'index_file_name': index_file,
'Field_Content': header_info,
'fields': len(header_info),
'maximum_record_length': get_longest_row_length(index_file),
@@ -1597,5 +1670,5 @@ def main(cmd_line=None):
template.write(label_content, str(output_subdir / filename) + '.xml')
-if __name__ == '__main__':
+if __name__ == '__main__': # pragma: no coverage
main()
diff --git a/pyproject.toml b/pyproject.toml
index 2ac6305..f34e16a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ name = "rms-pds4indextools"
dynamic = ["version"]
description = "pds4indextools"
readme = "README.md"
-requires-python = ">=3.8"
+requires-python = ">=3.9"
dependencies = [
"lxml",
"pandas",
@@ -28,7 +28,6 @@ classifiers = [
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Utilities",
"License :: OSI Approved :: Apache Software License",
- "Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
diff --git a/test_files/expected/clean_header_field_names_success_2.csv b/test_files/expected/clean_header_field_names_success_2.csv
new file mode 100644
index 0000000..6304953
--- /dev/null
+++ b/test_files/expected/clean_header_field_names_success_2.csv
@@ -0,0 +1,2 @@
+pds_Product_Observational__pds_Identification_Area_1__pds_logical_identifier_1,pds_Product_Observational__pds_Identification_Area_1__pds_version_id_1,pds_Product_Observational__pds_Identification_Area_1__pds_title_1,pds_Product_Observational__pds_Identification_Area_1__pds_information_model_version_1,pds_Product_Observational__pds_Observing_System_1__pds_name_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_name_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_type_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_lid_reference_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_reference_type_1
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img,1.11.0.0,Cassini Orbiter Imaging Science Subsystem,Cassini Orbiter,Spacecraft,urn:nasa:pds:context:instrument_host:spacecraft.co,is_instrument_host
diff --git a/test_files/expected/cleaned_headers_label_success.csv b/test_files/expected/cleaned_headers_label_success.csv
new file mode 100644
index 0000000..efb79b3
--- /dev/null
+++ b/test_files/expected/cleaned_headers_label_success.csv
@@ -0,0 +1,3 @@
+pds_logical_identifier_1,pds_version_id_1,pds_title_1,pds_information_model_version_1,pds_author_list_1,pds_publication_year_1,pds_keyword_1,pds_keyword_2,pds_keyword_3,pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_1__geom_SPICE_Kernel_Identification_1__geom_kernel_type_1,pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_1__geom_SPICE_Kernel_Identification_1__geom_spice_kernel_file_name_1,pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_1__geom_comment_1,pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_2__geom_SPICE_Kernel_Identification_1__geom_kernel_type_1,pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_2__geom_SPICE_Kernel_Identification_1__geom_spice_kernel_file_name_1,pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_2__geom_comment_1,pds_Product_Observational__pds_Observing_System_1__pds_name_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_name_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_type_1,pds_Product_Observational__pds_Observing_System_2__pds_name_1,pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_name_1,pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_type_1,pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_lid_reference_1,pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_reference_type_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_lid_reference_1,pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_reference_type_1
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img,1.11.0.0,"French, Richard G.",0003-01-01,kw1,kw2,kw3,SPK,ura111.bsp,These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.,SPK,earthstns_itrf93_040916.bsp,These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.,Cassini Orbiter Imaging Science Subsystem,Cassini Orbiter,Spacecraft,Another thing,Another thing,Spacecraft,urn:nasa:pds:context:instrument_host:spacecraft.co,is_instrument_host,,
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img,1.11.0.0,"French, Richard G.",0003-01-01,kw1,,,SPK,ura111.bsp,These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.,,,,Cassini Orbiter Imaging Science Subsystem,Cassini Orbiter,Spacecraft,,,,,,urn:nasa:pds:context:instrument_host:spacecraft.co,is_instrument_host
diff --git a/test_files/expected/cleaned_headers_label_success.xml b/test_files/expected/cleaned_headers_label_success.xml
new file mode 100644
index 0000000..621f7a2
--- /dev/null
+++ b/test_files/expected/cleaned_headers_label_success.xml
@@ -0,0 +1,226 @@
+
+
+
+
+
+ urn:nasa:pds:rms_metadata:document_opus:cleaned_headers_label
+ 1.1
+ Index File
+ 1.21.0.0
+ Product_Ancillary
+
+ Creative Common Public License CC0 1.0 (2024)
+ Creative Commons Zero (CC0) license information.
+
+ urn:nasa:pds:system_bundle:document_pds4_standards:creative_commons_1.0.0::1.0
+ product_to_license
+
+
+
+
+
+ cleaned_headers_label.csv
+ index-table
+ 0002-02-02T00:00:00.00Z
+ 24837ed11b0e8ceb94102e1f22d95b31
+
+
+
+ 0
+ 2183
+ UTF-8 Text
+ Provides the column headers, separated by commas, for the data table.
+
+
+ 0
+ 3370
+ PDS DSV 1
+ 3
+ Line-Feed
+ Comma
+
+ 25
+ 0
+ 2182
+
+ pds_logical_identifier_1
+ 1
+ ASCII_LID
+ 52
+
+
+
+ pds_version_id_1
+ 2
+ ASCII_Short_String_Collapsed
+ 3
+
+
+
+ pds_title_1
+ 3
+ ASCII_Short_String_Collapsed
+ 33
+
+
+
+ pds_information_model_version_1
+ 4
+ ASCII_Short_String_Collapsed
+ 8
+
+
+
+ pds_author_list_1
+ 5
+ UTF8_Text_Preserved
+ 18
+
+
+
+ pds_publication_year_1
+ 6
+ ASCII_Date_YMD
+ 10
+
+
+
+ pds_keyword_1
+ 7
+ UTF8_Short_String_Collapsed
+ 3
+
+
+
+ pds_keyword_2
+ 8
+ UTF8_Short_String_Collapsed
+ 3
+
+
+
+ pds_keyword_3
+ 9
+ UTF8_Short_String_Collapsed
+ 3
+
+
+
+ pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_1__geom_SPICE_Kernel_Identification_1__geom_kernel_type_1
+ 10
+ ASCII_Short_String_Collapsed
+ 3
+
+
+
+ pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_1__geom_SPICE_Kernel_Identification_1__geom_spice_kernel_file_name_1
+ 11
+ ASCII_File_Name
+ 10
+
+
+
+ pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_1__geom_comment_1
+ 12
+ ASCII_Text_Preserved
+ 171
+
+
+
+ pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_2__geom_SPICE_Kernel_Identification_1__geom_kernel_type_1
+ 13
+ ASCII_Short_String_Collapsed
+ 3
+
+
+
+ pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_2__geom_SPICE_Kernel_Identification_1__geom_spice_kernel_file_name_1
+ 14
+ ASCII_File_Name
+ 27
+
+
+
+ pds_Product_Observational__pds_Observation_Area_1__pds_Discipline_Area_1__geom_Geometry_1__geom_SPICE_Kernel_Files_2__geom_comment_1
+ 15
+ ASCII_Text_Preserved
+ 171
+
+
+
+ pds_Product_Observational__pds_Observing_System_1__pds_name_1
+ 16
+ UTF8_Short_String_Collapsed
+ 41
+
+
+
+ pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_name_1
+ 17
+ UTF8_Short_String_Collapsed
+ 15
+
+
+
+ pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_type_1
+ 18
+ ASCII_Short_String_Collapsed
+ 10
+
+
+
+ pds_Product_Observational__pds_Observing_System_2__pds_name_1
+ 19
+ UTF8_Short_String_Collapsed
+ 13
+
+
+
+ pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_name_1
+ 20
+ UTF8_Short_String_Collapsed
+ 13
+
+
+
+ pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_type_1
+ 21
+ ASCII_Short_String_Collapsed
+ 10
+
+
+
+ pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_lid_reference_1
+ 22
+ ASCII_LID
+ 50
+
+
+
+ pds_Product_Observational__pds_Observing_System_2__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_reference_type_1
+ 23
+ ASCII_Short_String_Collapsed
+ 18
+
+
+
+ pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_lid_reference_1
+ 24
+ ASCII_LID
+ 50
+
+
+
+ pds_Product_Observational__pds_Observing_System_1__pds_Observing_System_Component_1__pds_Internal_Reference_1__pds_reference_type_1
+ 25
+ ASCII_Short_String_Collapsed
+ 18
+
+
+
+
+
+
diff --git a/test_files/expected/index_file_success.csv b/test_files/expected/index_file_success.csv
new file mode 100644
index 0000000..90ebe6e
--- /dev/null
+++ b/test_files/expected/index_file_success.csv
@@ -0,0 +1,2 @@
+pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:information_model_version<1>,pds:Product_Observational/pds:Observing_System<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:type<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:lid_reference<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:reference_type<1>
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img,1.11.0.0,Cassini Orbiter Imaging Science Subsystem,Cassini Orbiter,Spacecraft,urn:nasa:pds:context:instrument_host:spacecraft.co,is_instrument_host
diff --git a/test_files/expected/label_references_success.csv b/test_files/expected/label_references_success.csv
new file mode 100644
index 0000000..0eb78a1
--- /dev/null
+++ b/test_files/expected/label_references_success.csv
@@ -0,0 +1,2 @@
+pds:logical_identifier<1>,pds:version_id<1>,pds:title<1>,pds:information_model_version<1>,pds:Product_Observational/pds:Observing_System<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>,pds:type<1>,pds:lid_reference<1>,pds:reference_type<1>
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img,1.11.0.0,Cassini Orbiter Imaging Science Subsystem,Cassini Orbiter,Spacecraft,urn:nasa:pds:context:instrument_host:spacecraft.co,is_instrument_host
diff --git a/test_files/expected/label_references_success.xml b/test_files/expected/label_references_success.xml
new file mode 100644
index 0000000..51e978f
--- /dev/null
+++ b/test_files/expected/label_references_success.xml
@@ -0,0 +1,158 @@
+
+
+
+
+
+ urn:nasa:pds:rms_metadata:document_opus:label_references
+ 1.0
+ Index file for my occultation bundle
+ 1.21.0.0
+ Product_Ancillary
+
+
+ 2024-01-01
+ 1.1
+ This is a lengthy description of what this modification
+changed in the bundle.
+There were lots of changes.
+
+
+
+ 2023-01-01
+ 1.
+ Initial release.
+
+
+
+ Creative Common Public License CC0 1.0 (2024)
+ Creative Commons Zero (CC0) license information.
+
+ urn:nasa:pds:system_bundle:document_pds4_standards:creative_commons_1.0.0::1.0
+ product_to_license
+
+
+
+
+
+ urn:nasa:pds:cassini_iss_cruise:data_raw:body-geometry
+ data_to_resource
+ The index table of body surface geometry information associated with each observation.
+
+
+ urn:nasa:pds:cassini_iss_cruise:body-inventory
+ data_to_resource
+ An index listing every Saturn system body expected to fall within each field of view.
+
+
+ 10.1086/113662
+ Elliot et al. (1984). "Structure of the Uranian rings. I. Square-well model and particle-size constraints" Astron J. 89, 1587-1603.
+ reference material
+
+
+ urn:nasa:pds:insight-ifg-mars:data-ifg-raw:ifg-raw-sol0014-20181211t021721-20181211t150435-pt2hz::5.0
+ data_to_raw_source_product
+ Raw data used in processing
+
+
+ CO-S-UVIS-2-CUBE-V1.4:COUVIS_0056/DATA/D2016_245/EUV2016_245_17_49
+ data_to_raw_source_product
+ None
+ PDS RMS Node
+ The original PDS3 version of this product. The form of the reference is dataset_id:volume_id:directory_path:file_name.
+
+
+
+
+ label_references.csv
+ index-table
+ 0001-01-01T00:00:00.00Z
+ 85e4697006ea9a54e7eafa8cf4b9bb40
+
+
+
+ 0
+ 303
+ UTF-8 Text
+ Provides the column headers, separated by commas, for the data table.
+
+
+ 0
+ 542
+ PDS DSV 1
+ 2
+ Line-Feed
+ Comma
+
+ 9
+ 0
+ 302
+
+ pds:logical_identifier<1>
+ 1
+ ASCII_Short_String_Collapsed
+ 52
+
+
+
+ pds:version_id<1>
+ 2
+ ASCII_Short_String_Collapsed
+ 3
+
+
+
+ pds:title<1>
+ 3
+ ASCII_Short_String_Collapsed
+ 33
+
+
+
+ pds:information_model_version<1>
+ 4
+ ASCII_Short_String_Collapsed
+ 8
+
+
+
+ pds:Product_Observational/pds:Observing_System<1>/pds:name<1>
+ 5
+ UTF8_Short_String_Collapsed
+ 41
+
+
+
+ pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>
+ 6
+ UTF8_Short_String_Collapsed
+ 15
+
+
+
+ pds:type<1>
+ 7
+ ASCII_Short_String_Collapsed
+ 10
+
+
+
+ pds:lid_reference<1>
+ 8
+ ASCII_LID
+ 50
+
+
+
+ pds:reference_type<1>
+ 9
+ ASCII_Short_String_Collapsed
+ 18
+
+
+
+
+
+
diff --git a/test_files/expected/label_success_1.csv b/test_files/expected/label_success_1.csv
new file mode 100644
index 0000000..90ebe6e
--- /dev/null
+++ b/test_files/expected/label_success_1.csv
@@ -0,0 +1,2 @@
+pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:information_model_version<1>,pds:Product_Observational/pds:Observing_System<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:type<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:lid_reference<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:reference_type<1>
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img,1.11.0.0,Cassini Orbiter Imaging Science Subsystem,Cassini Orbiter,Spacecraft,urn:nasa:pds:context:instrument_host:spacecraft.co,is_instrument_host
diff --git a/test_files/expected/label_success_1.xml b/test_files/expected/label_success_1.xml
new file mode 100644
index 0000000..8d53f30
--- /dev/null
+++ b/test_files/expected/label_success_1.xml
@@ -0,0 +1,114 @@
+
+
+
+
+
+ urn:nasa:pds:rms_metadata:document_opus:generated_label_1
+ 1.1
+ Index File
+ 1.21.0.0
+ Product_Ancillary
+
+ Creative Common Public License CC0 1.0 (2024)
+ Creative Commons Zero (CC0) license information.
+
+ urn:nasa:pds:system_bundle:document_pds4_standards:creative_commons_1.0.0::1.0
+ product_to_license
+
+
+
+
+
+ generated_label_1.csv
+ index-table
+ 0001-01-01T00:00:00.00Z
+ a177a1160bf3780c01e3bd9e02be89f4
+
+
+
+ 0
+ 819
+ UTF-8 Text
+ Provides the column headers, separated by commas, for the data table.
+
+
+ 0
+ 1058
+ PDS DSV 1
+ 2
+ Line-Feed
+ Comma
+
+ 9
+ 0
+ 818
+
+ pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>
+ 1
+ ASCII_Short_String_Collapsed
+ 52
+
+
+
+ pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>
+ 2
+ ASCII_Short_String_Collapsed
+ 3
+
+
+
+ pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>
+ 3
+ ASCII_Short_String_Collapsed
+ 33
+
+
+
+ pds:Product_Observational/pds:Identification_Area<1>/pds:information_model_version<1>
+ 4
+ ASCII_Short_String_Collapsed
+ 8
+
+
+
+ pds:Product_Observational/pds:Observing_System<1>/pds:name<1>
+ 5
+ UTF8_Short_String_Collapsed
+ 41
+
+
+
+ pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>
+ 6
+ UTF8_Short_String_Collapsed
+ 15
+
+
+
+ pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:type<1>
+ 7
+ ASCII_Short_String_Collapsed
+ 10
+
+
+
+ pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:lid_reference<1>
+ 8
+ ASCII_LID
+ 50
+
+
+
+ pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:reference_type<1>
+ 9
+ ASCII_Short_String_Collapsed
+ 18
+
+
+
+
+
+
diff --git a/test_files/expected/label_success_2.csv b/test_files/expected/label_success_2.csv
new file mode 100644
index 0000000..d9f2dc4
--- /dev/null
+++ b/test_files/expected/label_success_2.csv
@@ -0,0 +1,2 @@
+pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:information_model_version<1>,pds:Product_Observational/pds:Observing_System<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:type<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:lid_reference<1>,pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:reference_type<1>
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n ,1.0 ,Cassini ISS Image 1455200455n.img ,1.11.0.0 ,Cassini Orbiter Imaging Science Subsystem ,Cassini Orbiter ,Spacecraft ,urn:nasa:pds:context:instrument_host:spacecraft.co ,is_instrument_host
diff --git a/test_files/expected/label_success_2.xml b/test_files/expected/label_success_2.xml
new file mode 100644
index 0000000..553803e
--- /dev/null
+++ b/test_files/expected/label_success_2.xml
@@ -0,0 +1,113 @@
+
+
+
+
+
+ urn:nasa:pds:rms_metadata:document_opus:generated_label_2
+ 1.1
+ Index File
+ 1.21.0.0
+ Product_Ancillary
+
+ Creative Common Public License CC0 1.0 (2024)
+ Creative Commons Zero (CC0) license information.
+
+ urn:nasa:pds:system_bundle:document_pds4_standards:creative_commons_1.0.0::1.0
+ product_to_license
+
+
+
+
+
+ generated_label_2.csv
+ index-table
+ 0002-02-02T00:00:00.00Z
+ 53d47b320936ac3fbba0852696065418
+
+
+
+ 0
+ 819
+ UTF-8 Text
+ Provides the column headers, separated by commas, for the data table.
+
+
+
+ 1638
+ 2
+ Line-Feed
+
+
+ 9
+ 0
+
+
+ pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>
+ 1
+ 1
+ ASCII_Short_String_Collapsed
+ 78
+
+
+ pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>
+ 2
+ 80
+ ASCII_Short_String_Collapsed
+ 70
+
+
+ pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>
+ 3
+ 151
+ ASCII_Short_String_Collapsed
+ 65
+
+
+ pds:Product_Observational/pds:Identification_Area<1>/pds:information_model_version<1>
+ 4
+ 217
+ ASCII_Short_String_Collapsed
+ 85
+
+
+ pds:Product_Observational/pds:Observing_System<1>/pds:name<1>
+ 5
+ 303
+ UTF8_Short_String_Collapsed
+ 61
+
+
+ pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>
+ 6
+ 365
+ UTF8_Short_String_Collapsed
+ 95
+
+
+ pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:type<1>
+ 7
+ 461
+ ASCII_Short_String_Collapsed
+ 95
+
+
+ pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:lid_reference<1>
+ 8
+ 557
+ ASCII_LID
+ 130
+
+
+ pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:Internal_Reference<1>/pds:reference_type<1>
+ 9
+ 688
+ ASCII_Short_String_Collapsed
+ 131
+
+
+
+
+
diff --git a/test_files/expected/label_success_3.csv b/test_files/expected/label_success_3.csv
new file mode 100644
index 0000000..188177b
--- /dev/null
+++ b/test_files/expected/label_success_3.csv
@@ -0,0 +1,4 @@
+filename,filepath,lid,bundle,bundle_lid,pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>
+tester_label_1.xml,labels/tester_label_1.xml,urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,cassini_iss_saturn,urn:nasa:pds:cassini_iss_saturn,urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0
+tester_label_2.xml,labels/tester_label_2.xml,urn:nasa:pds:uranus_occ_u149_irtf_320cm:data:2200nm_counts-v-time_occult,uranus_occ_u149_irtf_320cm,urn:nasa:pds:uranus_occ_u149_irtf_320cm,urn:nasa:pds:uranus_occ_u149_irtf_320cm:data:2200nm_counts-v-time_occult,1.0
+tester_label_3.xml,labels/tester_label_3.xml,urn:nasa:pds:cassini_iss_cruise:data_raw:1357539630n,cassini_iss_cruise,urn:nasa:pds:cassini_iss_cruise,urn:nasa:pds:cassini_iss_cruise:data_raw:1357539630n,1.0
diff --git a/test_files/expected/label_success_3.xml b/test_files/expected/label_success_3.xml
new file mode 100644
index 0000000..6bb4f39
--- /dev/null
+++ b/test_files/expected/label_success_3.xml
@@ -0,0 +1,100 @@
+
+
+
+
+
+ urn:nasa:pds:rms_metadata:document_opus:generated_label_3
+ 1.1
+ Index File
+ 1.21.0.0
+ Product_Ancillary
+
+ Creative Common Public License CC0 1.0 (2024)
+ Creative Commons Zero (CC0) license information.
+
+ urn:nasa:pds:system_bundle:document_pds4_standards:creative_commons_1.0.0::1.0
+ product_to_license
+
+
+
+
+
+ generated_label_3.csv
+ index-table
+ 0001-01-01T00:00:00.00Z
+ 8b2eb69a284938d23748de7f53d2e45b
+
+
+
+ 0
+ 190
+ UTF-8 Text
+ Provides the column headers, separated by commas, for the data table.
+
+
+ 0
+ 864
+ PDS DSV 1
+ 4
+ Line-Feed
+ Comma
+
+ 7
+ 0
+ 261
+
+ filename
+ 1
+ ASCII_File_Name
+ 18
+
+
+
+ filepath
+ 2
+ ASCII_File_Specification_Name
+ 25
+
+
+
+ lid
+ 3
+ ASCII_LID
+ 72
+
+
+
+ bundle
+ 4
+ ASCII_Text_Preserved
+ 26
+
+
+
+ bundle_lid
+ 5
+ ASCII_LID
+ 39
+
+
+
+ pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>
+ 6
+ ASCII_Short_String_Collapsed
+ 72
+
+
+
+ pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>
+ 7
+ ASCII_Short_String_Collapsed
+ 3
+
+
+
+
+
+
diff --git a/test_files/expected/limit_xpaths_file_success_1.csv b/test_files/expected/limit_xpaths_file_success_1.csv
new file mode 100644
index 0000000..08b5633
--- /dev/null
+++ b/test_files/expected/limit_xpaths_file_success_1.csv
@@ -0,0 +1,2 @@
+pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>,pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>
+urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n,1.0,Cassini ISS Image 1455200455n.img
diff --git a/test_files/expected/nested_label_success.txt b/test_files/expected/nested_label_success.txt
new file mode 100644
index 0000000..b0f1733
--- /dev/null
+++ b/test_files/expected/nested_label_success.txt
@@ -0,0 +1,25 @@
+pds:logical_identifier<1>
+pds:version_id<1>
+pds:title<1>
+pds:information_model_version<1>
+pds:author_list<1>
+pds:publication_year<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<1>/geom:kernel_type<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<1>/geom:spice_kernel_file_name<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<2>/geom:kernel_type<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<2>/geom:spice_kernel_file_name<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<3>/geom:kernel_type<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<3>/geom:spice_kernel_file_name<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<4>/geom:kernel_type<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<1>/geom:SPICE_Kernel_Identification_Extra<4>/geom:spice_kernel_file_name<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<2>/geom:kernel_type<1>
+pds:Product_Observational/pds:Observation_Area<1>/pds:Discipline_Area<1>/geom:Geometry<1>/geom:SPICE_Kernel_Files<1>/geom:SPICE_Kernel_Identification<2>/geom:spice_kernel_file_name<1>
+geom:comment<1>
+pds:Product_Observational/pds:Observing_System<1>/pds:name<1>
+pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>
+pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:type<1>
+pds:Product_Observational/pds:Observing_System<2>/pds:name<1>
+pds:Product_Observational/pds:Observing_System<2>/pds:Observing_System_Component<1>/pds:name<1>
+pds:Product_Observational/pds:Observing_System<2>/pds:Observing_System_Component<1>/pds:type<1>
+pds:lid_reference<1>
+pds:reference_type<1>
diff --git a/test_files/expected/simplify_xpaths_success_1.txt b/test_files/expected/simplify_xpaths_success_1.txt
index e179908..d39804c 100644
--- a/test_files/expected/simplify_xpaths_success_1.txt
+++ b/test_files/expected/simplify_xpaths_success_1.txt
@@ -1,9 +1,9 @@
-pds:logical_identifier
-pds:version_id
-pds:title
-pds:information_model_version
+pds:logical_identifier<1>
+pds:version_id<1>
+pds:title<1>
+pds:information_model_version<1>
pds:Product_Observational/pds:Observing_System<1>/pds:name<1>
pds:Product_Observational/pds:Observing_System<1>/pds:Observing_System_Component<1>/pds:name<1>
-pds:type
-pds:lid_reference
-pds:reference_type
+pds:type<1>
+pds:lid_reference<1>
+pds:reference_type<1>
diff --git a/test_files/expected/simplify_xpaths_success_3.txt b/test_files/expected/simplify_xpaths_success_3.txt
index 08fc585..0ac5ded 100644
--- a/test_files/expected/simplify_xpaths_success_3.txt
+++ b/test_files/expected/simplify_xpaths_success_3.txt
@@ -1,31 +1,31 @@
-rings:occultation_type
-rings:occultation_direction
-rings:time_series_direction
-rings:planetary_occultation_flag
-rings:data_quality_score
-rings:ring_plane
-rings:star_name
-rings:fresnel_scale
-rings:projected_star_diameter
-rings:sigma_projected_star_diameter
-rings:fractional_error_star_counts
-rings:time_constant_type
-rings:time_constant
-rings:sigma_time_constant
-rings:minimum_wavelength
-rings:maximum_wavelength
-rings:reference_time_utc
-rings:minimum_observed_event_time
-rings:maximum_observed_event_time
-rings:observed_event_start_tdb
-rings:observed_event_stop_tdb
-rings:earth_received_start_time_utc
-rings:earth_received_stop_time_utc
-rings:minimum_ring_radius
-rings:maximum_ring_radius
-rings:minimum_ring_longitude
-rings:maximum_ring_longitude
-rings:minimum_observed_ring_azimuth
-rings:maximum_observed_ring_azimuth
-rings:observed_ring_elevation
-rings:light_source_incidence_angle
+rings:occultation_type<1>
+rings:occultation_direction<1>
+rings:time_series_direction<1>
+rings:planetary_occultation_flag<1>
+rings:data_quality_score<1>
+rings:ring_plane<1>
+rings:star_name<1>
+rings:fresnel_scale<1>
+rings:projected_star_diameter<1>
+rings:sigma_projected_star_diameter<1>
+rings:fractional_error_star_counts<1>
+rings:time_constant_type<1>
+rings:time_constant<1>
+rings:sigma_time_constant<1>
+rings:minimum_wavelength<1>
+rings:maximum_wavelength<1>
+rings:reference_time_utc<1>
+rings:minimum_observed_event_time<1>
+rings:maximum_observed_event_time<1>
+rings:observed_event_start_tdb<1>
+rings:observed_event_stop_tdb<1>
+rings:earth_received_start_time_utc<1>
+rings:earth_received_stop_time_utc<1>
+rings:minimum_ring_radius<1>
+rings:maximum_ring_radius<1>
+rings:minimum_ring_longitude<1>
+rings:maximum_ring_longitude<1>
+rings:minimum_observed_ring_azimuth<1>
+rings:maximum_observed_ring_azimuth<1>
+rings:observed_ring_elevation<1>
+rings:light_source_incidence_angle<1>
diff --git a/test_files/expected/simplify_xpaths_success_4.txt b/test_files/expected/simplify_xpaths_success_4.txt
index a1d7d69..9422e08 100644
--- a/test_files/expected/simplify_xpaths_success_4.txt
+++ b/test_files/expected/simplify_xpaths_success_4.txt
@@ -1,64 +1,64 @@
-cassini:mission_phase_name
-cassini:spacecraft_clock_count_partition
-cassini:spacecraft_clock_start_count
-cassini:spacecraft_clock_stop_count
-cassini:limitations
-cassini:antiblooming_state_flag
-cassini:command_file_name
-cassini:command_sequence_number
-cassini:dark_strip_mean
-cassini:data_conversion_type
-cassini:delayed_readout_flag
-cassini:detector_temperature
-cassini:electronics_bias
-cassini:expected_maximum_full_well
-cassini:expected_maximum_DN_sat
-cassini:expected_packets
-cassini:exposure_duration
-cassini:filter_name_1
-cassini:filter_name_2
-cassini:filter_temperature
-cassini:flight_software_version_id
-cassini:gain_mode_id
-cassini:ground_software_version_id
-cassini:image_mid_time
-cassini:image_number
-cassini:image_time
-cassini:image_observation_type
-cassini:instrument_data_rate
-cassini:inst_cmprs_type
-cassini:inst_cmprs_param_malgo
-cassini:inst_cmprs_param_tb
-cassini:inst_cmprs_param_blocks
-cassini:inst_cmprs_param_quant
-cassini:inst_cmprs_rate_expected_bits
-cassini:inst_cmprs_rate_actual_bits
-cassini:inst_cmprs_ratio
-cassini:light_flood_state_flag
-cassini:method_description
-cassini:missing_lines
-cassini:missing_packet_flag
-cassini:optics_temperature_front
-cassini:optics_temperature_back
-cassini:order_number
-cassini:parallel_clock_voltage_index
-cassini:pds3_product_creation_time
-cassini:pds3_product_version_type
-cassini:pds3_target_desc
-cassini:pds3_target_list
-cassini:pds3_target_name
-cassini:pre-pds_version_number
-cassini:prepare_cycle_index
-cassini:readout_cycle_index
-cassini:received_packets
-cassini:sensor_head_electronics_temperature
-cassini:sequence_id
-cassini:sequence_number
-cassini:sequence_title
-cassini:shutter_mode_id
-cassini:shutter_state_id
-cassini:start_time_doy
-cassini:stop_time_doy
-cassini:telemetry_format_id
-cassini:valid_maximum_full_well
-cassini:valid_maximum_DN_sat
+cassini:mission_phase_name<1>
+cassini:spacecraft_clock_count_partition<1>
+cassini:spacecraft_clock_start_count<1>
+cassini:spacecraft_clock_stop_count<1>
+cassini:limitations<1>
+cassini:antiblooming_state_flag<1>
+cassini:command_file_name<1>
+cassini:command_sequence_number<1>
+cassini:dark_strip_mean<1>
+cassini:data_conversion_type<1>
+cassini:delayed_readout_flag<1>
+cassini:detector_temperature<1>
+cassini:electronics_bias<1>
+cassini:expected_maximum_full_well<1>
+cassini:expected_maximum_DN_sat<1>
+cassini:expected_packets<1>
+cassini:exposure_duration<1>
+cassini:filter_name_1<1>
+cassini:filter_name_2<1>
+cassini:filter_temperature<1>
+cassini:flight_software_version_id<1>
+cassini:gain_mode_id<1>
+cassini:ground_software_version_id<1>
+cassini:image_mid_time<1>
+cassini:image_number<1>
+cassini:image_time<1>
+cassini:image_observation_type<1>
+cassini:instrument_data_rate<1>
+cassini:inst_cmprs_type<1>
+cassini:inst_cmprs_param_malgo<1>
+cassini:inst_cmprs_param_tb<1>
+cassini:inst_cmprs_param_blocks<1>
+cassini:inst_cmprs_param_quant<1>
+cassini:inst_cmprs_rate_expected_bits<1>
+cassini:inst_cmprs_rate_actual_bits<1>
+cassini:inst_cmprs_ratio<1>
+cassini:light_flood_state_flag<1>
+cassini:method_description<1>
+cassini:missing_lines<1>
+cassini:missing_packet_flag<1>
+cassini:optics_temperature_front<1>
+cassini:optics_temperature_back<1>
+cassini:order_number<1>
+cassini:parallel_clock_voltage_index<1>
+cassini:pds3_product_creation_time<1>
+cassini:pds3_product_version_type<1>
+cassini:pds3_target_desc<1>
+cassini:pds3_target_list<1>
+cassini:pds3_target_name<1>
+cassini:pre-pds_version_number<1>
+cassini:prepare_cycle_index<1>
+cassini:readout_cycle_index<1>
+cassini:received_packets<1>
+cassini:sensor_head_electronics_temperature<1>
+cassini:sequence_id<1>
+cassini:sequence_number<1>
+cassini:sequence_title<1>
+cassini:shutter_mode_id<1>
+cassini:shutter_state_id<1>
+cassini:start_time_doy<1>
+cassini:stop_time_doy<1>
+cassini:telemetry_format_id<1>
+cassini:valid_maximum_full_well<1>
+cassini:valid_maximum_DN_sat<1>
diff --git a/test_files/labels/bad_lid_label.xml b/test_files/labels/bad_lid_label.xml
new file mode 100644
index 0000000..b6847a7
--- /dev/null
+++ b/test_files/labels/bad_lid_label.xml
@@ -0,0 +1,25 @@
+
+
+
+ 1.0
+ Cassini ISS Image 1455200455n.img
+ 1.11.0.0
+
+
+ Cassini Orbiter Imaging Science Subsystem
+
+ Cassini Orbiter
+ Spacecraft
+
+ urn:nasa:pds:context:instrument_host:spacecraft.co
+ is_instrument_host
+
+
+
+
diff --git a/test_files/labels/nested_label.xml b/test_files/labels/nested_label.xml
new file mode 100644
index 0000000..d6127be
--- /dev/null
+++ b/test_files/labels/nested_label.xml
@@ -0,0 +1,74 @@
+
+
+
+
+
+
+ urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n
+ 1.0
+ Cassini ISS Image 1455200455n.img
+ 1.11.0.0
+
+ French, Richard G.
+
+
+
+
+
+
+
+
+
+ SPK
+ ura111.bsp
+
+
+ SPK
+ vgr2.ura111.bsp
+
+
+ BPC
+ earth_720101_031229.bpc
+
+
+ LSK
+ naif0012.tls
+
+
+
+ SPK
+ earthstns_itrf93_040916.bsp
+
+ These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.
+
+
+
+
+
+ Cassini Orbiter Imaging Science Subsystem
+
+ Cassini Orbiter
+ Spacecraft
+
+
+
+ Another thing
+
+ Another thing
+ Spacecraft
+
+ urn:nasa:pds:context:instrument_host:spacecraft.co
+ is_instrument_host
+
+
+
+
\ No newline at end of file
diff --git a/test_files/labels/rf_tester_label_1.xml b/test_files/labels/rf_tester_label_1.xml
new file mode 100644
index 0000000..3f127bc
--- /dev/null
+++ b/test_files/labels/rf_tester_label_1.xml
@@ -0,0 +1,66 @@
+
+
+
+
+
+
+ urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n
+ 1.0
+ Cassini ISS Image 1455200455n.img
+ 1.11.0.0
+
+ French, Richard G.
+
+ kw1
+ kw2
+ kw3
+
+
+
+
+
+
+
+ SPK
+ ura111.bsp
+
+ These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.
+
+
+
+ SPK
+ earthstns_itrf93_040916.bsp
+
+ These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.
+
+
+
+
+
+ Cassini Orbiter Imaging Science Subsystem
+
+ Cassini Orbiter
+ Spacecraft
+
+
+
+ Another thing
+
+ Another thing
+ Spacecraft
+
+ urn:nasa:pds:context:instrument_host:spacecraft.co
+ is_instrument_host
+
+
+
+
\ No newline at end of file
diff --git a/test_files/labels/rf_tester_label_2.xml b/test_files/labels/rf_tester_label_2.xml
new file mode 100644
index 0000000..8d62bbc
--- /dev/null
+++ b/test_files/labels/rf_tester_label_2.xml
@@ -0,0 +1,50 @@
+
+
+
+
+
+
+ urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n
+ 1.0
+ Cassini ISS Image 1455200455n.img
+ 1.11.0.0
+
+ French, Richard G.
+
+ kw1
+
+
+
+
+
+
+
+ SPK
+ ura111.bsp
+
+ These kernel files were used in the generation of the products in the parent bundle. Some or all of them may not have been used directly in the generation of this product.
+
+
+
+
+
+ Cassini Orbiter Imaging Science Subsystem
+
+ Cassini Orbiter
+ Spacecraft
+
+ urn:nasa:pds:context:instrument_host:spacecraft.co
+ is_instrument_host
+
+
+
+
\ No newline at end of file
diff --git a/test_files/samples/element_extra_file_info.txt b/test_files/samples/element_extra_file_info.txt
new file mode 100644
index 0000000..731d690
--- /dev/null
+++ b/test_files/samples/element_extra_file_info.txt
@@ -0,0 +1,4 @@
+pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>
+pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>
+pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>
+!filename
\ No newline at end of file
diff --git a/test_files/samples/tester_config.yaml b/test_files/samples/tester_config.yaml
new file mode 100644
index 0000000..9097074
--- /dev/null
+++ b/test_files/samples/tester_config.yaml
@@ -0,0 +1,25 @@
+nillable:
+ pds:ASCII_Integer:
+ inapplicable: -9999
+ missing: -9988
+ unknown: -9977
+ anticipated: -9966
+
+ pds:ASCII_Real:
+ inapplicable: -9999.0
+ missing: -9988.0
+ unknown: -9977.0
+ anticipated: -9966.0
+
+ pds:ASCII_Short_String_Collapsed:
+ inapplicable: inapplicable_alt
+ missing: missing_alt
+ unknown: unknown_alt
+ anticipated: anticipated_alt
+
+label-contents:
+ version_id: 1.1
+ File_Area_Metadata:
+ creation_date_time: '0002-02-02T00:00:00.00Z'
+ File_Area_Ancillary:
+ creation_date_time: '0001-01-01T00:00:00.00Z'
diff --git a/test_files/samples/tester_config_label.yaml b/test_files/samples/tester_config_label.yaml
new file mode 100644
index 0000000..64b0d33
--- /dev/null
+++ b/test_files/samples/tester_config_label.yaml
@@ -0,0 +1,23 @@
+
+label-contents:
+ title: Index file for my occultation bundle
+ Citation_Information:
+ author_list:
+ Emilie Simpson,
+ Robert French,
+ Mia Mace
+ editor_list:
+ publication_year: 2024
+ doi:
+ keyword: [stellar, uranus, rings]
+ description:
+ Funding_Acknowledgement:
+ Modification_Detail:
+ - modification_date: '2024-01-01'
+ version_id: 1.1
+ description: This is a lengthy description of what this modification
+ changed in the bundle.
+ There were lots of changes.
+ - modification_date: '2023-01-01'
+ version_id: '1.0'
+ description: Initial release.
diff --git a/test_files/expected/tester_config.yaml b/test_files/samples/tester_config_nillable.yaml
similarity index 83%
rename from test_files/expected/tester_config.yaml
rename to test_files/samples/tester_config_nillable.yaml
index 3ff9bc6..4be242b 100644
--- a/test_files/expected/tester_config.yaml
+++ b/test_files/samples/tester_config_nillable.yaml
@@ -15,7 +15,4 @@ nillable:
inapplicable: inapplicable_alt
missing: missing_alt
unknown: unknown_alt
- anticipated: anticipated_alt
-
-label-contents:
- version_id: 1.1
\ No newline at end of file
+ anticipated: anticipated_alt
\ No newline at end of file
diff --git a/test_files/samples/tester_config_reference.yaml b/test_files/samples/tester_config_reference.yaml
new file mode 100644
index 0000000..266e540
--- /dev/null
+++ b/test_files/samples/tester_config_reference.yaml
@@ -0,0 +1,36 @@
+
+label-contents:
+ title: Index file for my occultation bundle
+ Modification_Detail:
+ - modification_date: '2024-01-01'
+ version_id: 1.1
+ description: |
+ This is a lengthy description of what this modification
+ changed in the bundle.
+ There were lots of changes.
+ - modification_date: '2023-01-01'
+ version_id: 1.0
+ description: Initial release.
+ Internal_Reference:
+ - lid_reference: urn:nasa:pds:cassini_iss_cruise:data_raw:body-geometry
+ reference_type: data_to_resource
+ comment: The index table of body surface geometry information associated with each observation.
+ - lid_reference: urn:nasa:pds:cassini_iss_cruise:body-inventory
+ reference_type: data_to_resource
+ comment: An index listing every Saturn system body expected to fall within each field of view.
+ External_Reference:
+ - doi: 10.1086/113662
+ reference_text: Elliot et al. (1984). "Structure of the Uranian rings. I. Square-well model and particle-size constraints" Astron J. 89, 1587-1603.
+ description: reference material
+ Source_Product_Internal:
+ - lidvid_reference: urn:nasa:pds:insight-ifg-mars:data-ifg-raw:ifg-raw-sol0014-20181211t021721-20181211t150435-pt2hz::5.0
+ reference_type: data_to_raw_source_product
+ comment: Raw data used in processing
+ Source_Product_External:
+ - external_source_product_identifier: CO-S-UVIS-2-CUBE-V1.4:COUVIS_0056/DATA/D2016_245/EUV2016_245_17_49
+ reference_type: data_to_raw_source_product
+ doi:
+ curating_facility: PDS RMS Node
+ description: The original PDS3 version of this product. The form of the reference is dataset_id:volume_id:directory_path:file_name.
+ File_Area_Ancillary:
+ creation_date_time: '0001-01-01T00:00:00.00Z'
\ No newline at end of file
diff --git a/tests/test_pds4_create_xml_index_blackbox.py b/tests/test_pds4_create_xml_index_blackbox.py
index 26e772f..c3add2d 100644
--- a/tests/test_pds4_create_xml_index_blackbox.py
+++ b/tests/test_pds4_create_xml_index_blackbox.py
@@ -8,368 +8,555 @@
# These two variables are the same for all tests, so we can either declare them as
# global variables, or get the ROOT_DIR at the setup stage before running each test
ROOT_DIR = Path(__file__).resolve().parent.parent
-test_files_dir = ROOT_DIR / 'test_files'
-samples_dir = test_files_dir / 'samples'
-expected_dir = test_files_dir / 'expected'
-labels_dir = test_files_dir / 'labels'
+TEST_FILES_DIR = ROOT_DIR / 'test_files'
+SAMPLES_DIR = TEST_FILES_DIR / 'samples'
+EXPECTED_DIR = TEST_FILES_DIR / 'expected'
+LABELS_DIR = TEST_FILES_DIR / 'labels'
+LABEL_NAME = LABELS_DIR.name
+
+
+def compare_files(path_to_file, golden_file):
+ # Assert that the file now exists
+ assert os.path.isfile(path_to_file)
+
+ # Open and compare the two files
+ with open(path_to_file, 'r') as created:
+ formed = created.read()
+
+ with open(golden_file, 'r') as new:
+ expected = new.read()
+
+ assert formed == expected
@pytest.mark.parametrize(
- 'golden_file,new_file,cmd_line',
- [
- # Testing --limit-xpaths-file
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt
- # Compare result to golden copy:
- # test_files/expected/limit_xpaths_file_success_1.txt
- (
- str(expected_dir / 'limit_xpaths_file_success_1.txt'),
- 'limit_xpaths_file.txt',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_1.xml')),
- '--limit-xpaths-file',
- str(samples_dir / 'element_1.txt'),
- '--output-headers-file'
- ]
- ),
-
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_2.txt --output-headers-file limit_xpaths_file_2.txt
- # Compare result to golden copy:
- # test_files/expected/limit_xpaths_file_success_2.txt
- (
- str(expected_dir / 'limit_xpaths_file_success_2.txt'),
- 'limit_xpaths_file_2.txt',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_2.xml')),
- '--limit-xpaths-file',
- str(samples_dir / 'element_2.txt'),
- '--output-headers-file',
- ]
- ),
-
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_duplicates.txt --output-headers-file elements_dupe_file_2.txt
- # Compare result to golden copy:
- # test_files/expected/limit_xpaths_file_success_2.txt
- (
- str(expected_dir / 'limit_xpaths_file_success_2.txt'),
- 'elements_dupe_file_2.txt',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_2.xml')),
- '--limit-xpaths-file',
- str(samples_dir / 'element_duplicates.txt'),
- '--output-headers-file',
- ]
- ),
-
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_3.txt --output-headers-file limit_xpaths_file_3.txt
- # Compare result to golden copy:
- # test_files/expected/limit_xpaths_file_success_3.txt
- (
- str(expected_dir / 'limit_xpaths_file_success_3.txt'),
- 'limit_xpaths_file_3.txt',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_2.xml')),
- str(labels_dir.name / Path('tester_label_3.xml')),
- '--limit-xpaths-file',
- str(samples_dir / 'element_3.txt'),
- '--output-headers-file',
- ]
- ),
-
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_4.txt --output-headers-file limit_xpaths_file_4.txt
- # Compare result to golden copy:
- # test_files/expected/limit_xpaths_file_success_4.txt
- (
- str(expected_dir / 'limit_xpaths_file_success_4.txt'),
- 'limit_xpaths_file_4.txt',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_1.xml')),
- str(labels_dir.name / Path('tester_label_2.xml')),
- str(labels_dir.name / Path('tester_label_3.xml')),
- '--limit-xpaths-file',
- str(samples_dir / 'element_4.txt'),
- '--output-headers-file',
- ]
- ),
-
- # Testing --simplify-xpaths
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --simplify-xpaths --output-headers-file simplify_xpaths_1.txt
- # Compare result to golden copy:
- # test_files/expected/simplify_xpaths_success_1.txt
- (
- str(expected_dir / 'simplify_xpaths_success_1.txt'),
- 'simplify_xpaths_1.txt',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_1.xml')),
- '--simplify-xpaths',
- '--output-headers-file',
- ]
- ),
-
- # Testing --simplify-xpaths
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --simplify-xpaths --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_2.txt --output-headers-file simplify_xpaths_2.txt
- # Compare result to golden copy:
- # test_files/expected/simplify_xpaths_success_2.txt
- (
- str(expected_dir / 'simplify_xpaths_success_2.txt'),
- 'simplify_xpaths_2.txt',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_1.xml')),
- str(labels_dir.name / Path('tester_label_2.xml')),
- str(labels_dir.name / Path('tester_label_3.xml')),
- '--simplify-xpaths',
- '--limit-xpaths-file',
- str(samples_dir / 'elements_xpath_simplify_2.txt'),
- '--output-headers-file',
- ]
- ),
-
- # Testing --simplify-xpaths
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --simplify-xpaths --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_3.txt --output-headers-file simplify_xpaths_3.txt
- # Compare result to golden copy:
- # test_files/expected/simplify_xpaths_success_3.txt
- (
- str(expected_dir / 'simplify_xpaths_success_3.txt'),
- 'simplify_xpaths_3.txt',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_2.xml')),
- '--simplify-xpaths',
- '--limit-xpaths-file',
- str(samples_dir / 'elements_xpath_simplify_3.txt'),
- '--output-headers-file',
- ]
- ),
-
- # Testing --simplify-xpaths
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_3.xml" --simplify-xpaths --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_4.txt --output-headers-file simplify_xpaths_4.txt
- # Compare result to golden copy:
- # test_files/expected/simplify_xpaths_success_4.txt
- (
- str(expected_dir / 'simplify_xpaths_success_4.txt'),
- 'simplify_xpaths_4.txt',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_3.xml')),
- '--simplify-xpaths',
- '--limit-xpaths-file',
- str(samples_dir / 'elements_xpath_simplify_4.txt'),
- '--output-headers-file',
- ]
- ),
-
- # Testing --add-extra-file-info
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --add-extra-file-info filename,filepath --output-index-file extra_file_info_1.csv
- # Compare result to golden copy:
- # test_files/expected/extra_file_info_success_1.csv
- (
- str(expected_dir / 'extra_file_info_success_1.csv'),
- 'extra_file_info_1.csv',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_2.xml')),
- '--limit-xpaths-file',
- str(samples_dir / 'element_1.txt'),
- '--add-extra-file-info',
- 'filename,filepath',
- '--output-index-file',
- ]
- ),
-
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename --sort-by filename
- # --output-index-file extra_file_info_2.csv
- # Compare result to golden copy:
- # test_files/expected/extra_file_info_success_2.csv
- (
- str(expected_dir / 'extra_file_info_success_2.csv'),
- 'extra_file_info_2.csv',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_1.xml')),
- str(labels_dir.name / Path('tester_label_2.xml')),
- str(labels_dir.name / Path('tester_label_3.xml')),
- '--limit-xpaths-file',
- str(samples_dir / 'element_5.txt'),
- '--add-extra-file-info',
- 'filename',
- '--sort-by',
- 'filename',
- '--output-index-file',
- ]
- ),
-
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename,filepath,lid,bundle,bundle_lid --sort-by filename --output-index-file extra_file_info_3.csv
- # Compare result to golden copy:
- # test_files/expected/extra_file_info_success_3.csv
- (
- str(expected_dir / 'extra_file_info_success_3.csv'),
- 'extra_file_info_3.csv',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_1.xml')),
- str(labels_dir.name / Path('tester_label_2.xml')),
- str(labels_dir.name / Path('tester_label_3.xml')),
- '--limit-xpaths-file',
- str(samples_dir / 'element_5.txt'),
- '--add-extra-file-info',
- 'filename,filepath,lid,bundle,bundle_lid',
- '--sort-by',
- 'filename',
- '--output-index-file',
- ]
- ),
-
- # Testing --clean-header-field-names
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --clean-header-field-names --output-headers-file clean_header_field_names_1.txt
- # Compare result to golden copy:
- # test_files/expected/clean_header_field_names_success_1.txt
- (
- str(expected_dir / 'clean_header_field_names_success_1.txt'),
- 'clean_header_field_names_1.txt',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_1.xml')),
- '--clean-header-field-names',
- '--output-headers-file',
- ]
- ),
-
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --clean-header-field-names --output-headers-file clean_header_field_names_2.txt
- # Compare result to golden copy:
- # test_files/expected/clean_header_field_names_success_2.txt
- (
- str(expected_dir / 'clean_header_field_names_success_2.txt'),
- 'clean_header_field_names_2.txt',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_1.xml')),
- str(labels_dir.name / Path('tester_label_2.xml')),
- '--limit-xpaths-file',
- str(samples_dir / 'elements_clean_header_field_names.txt'),
- '--clean-header-field-names',
- '--output-headers-file',
- ]
- ),
-
- # Testing --sort by
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --sort-by 'pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>' --output-index-file sort_by_1.csv
- # Compare result to golden copy:
- # test_files/expected/sort_by_success_1.csv
- (
- str(expected_dir / 'sort_by_success_1.csv'),
- 'sort_by_1.csv',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_1.xml')),
- str(labels_dir.name / Path('tester_label_2.xml')),
- str(labels_dir.name / Path('tester_label_3.xml')),
- '--limit-xpaths-file',
- str(samples_dir / 'elements_clean_header_field_names.txt'),
- '--sort-by',
- 'pds:Product_Observational/pds:Identification_Area<1>/'
- 'pds:logical_identifier<1>',
- '--output-index-file',
- ]
- ),
-
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --add-extra-file-info bundle_lid,filepath --sort-by bundle_lid --output-index-file sort_by_2.csv
- # Compare result to golden copy:
- # test_files/expected/sort_by_success_2.csv
- (
- str(expected_dir / 'sort_by_success_2.csv'),
- 'sort_by_2.csv',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_1.xml')),
- str(labels_dir.name / Path('tester_label_2.xml')),
- str(labels_dir.name / Path('tester_label_3.xml')),
- '--limit-xpaths-file',
- str(samples_dir / 'elements_clean_header_field_names.txt'),
- '--add-extra-file-info',
- 'bundle_lid,filepath',
- '--sort-by',
- 'bundle_lid',
- '--output-index-file',
- ]
- ),
-
- # Executable command: pds4_create_xml_index ../test_files/labels "identical_label_*.xml" --limit-xpaths-file ../test_files/samples/identical_elements.txt --add-extra-file-info filename --sort-by filename --output-index-file identical_labels.csv
- # Compare result to golden copy:
- # test_files/expected/identical_labels_success.csv
- (
- str(expected_dir / 'identical_labels_success.csv'),
- 'identical_labels.csv',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('identical_label_*.xml')),
- '--limit-xpaths-file',
- str(samples_dir / 'identical_elements.txt'),
- '--add-extra-file-info',
- 'filename',
- '--sort-by',
- 'filename',
- '--output-index-file'
- ]
- ),
-
- # Executable command: pds4_create_xml_index ../test_files/labels "nilled_label.xml" --limit-xpaths-file ../test_files/samples/elements_nilled.txt --output-index-file nilled_elements.csv
- # Compare result to golden copy:
- # test_files/expected/nilled_element_success.csv
- (
- str(expected_dir / 'nilled_element_success.csv'),
- 'nilled_element.csv',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('nilled_label.xml')),
- '--limit-xpaths-file',
- str(samples_dir / 'elements_nilled.txt'),
- '--output-index-file'
- ]
- ),
-
- # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --fixed-width --output-index-file fixed_width.csv
- # Compare result to golden copy:
- # test_files/expected/fixed_width_success.csv
- (
- str(expected_dir / 'fixed_width_success.csv'),
- 'fixed_width.csv',
- [
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_1.xml')),
- '--fixed-width',
- '--output-index-file'
- ]
- )
- ]
- )
-def test_success(golden_file, new_file, cmd_line):
+ 'golden_file,new_file_index,new_file_headers,cmd_line',
+ [
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml"
+ (
+ str(EXPECTED_DIR / 'index_file_success.csv'),
+ None, None,
+ []
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary
+ (
+ str(EXPECTED_DIR / 'index_file_success.csv'),
+ None, None,
+ [
+ '--generate-label',
+ 'ancillary'
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary --config-file ../test_files/samples/tester_config_reference.yaml --output-index-file label_references_success.csv --simplify-xpaths
+ (
+ str(EXPECTED_DIR / 'label_references_success.csv'),
+ 'label_references.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ '--generate-label',
+ 'ancillary',
+ '--config-file',
+ str(SAMPLES_DIR / 'tester_config_reference.yaml'),
+ '--simplify-xpaths'
+ ]
+ ),
+
+ # Testing --limit-xpaths-file with two outputs
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt --output-index-file limit_xpaths_file.csv
+ # Compare result to golden copy:
+ # test_files/expected/limit_xpaths_file_success_1.txt
+ (
+ str(EXPECTED_DIR / 'limit_xpaths_file_success_1.csv'),
+ 'limit_xpaths_file.csv', 'limit_xpaths_file.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'element_1.txt')
+ ]
+ ),
+
+ # Testing --limit-xpaths-file
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt
+ # Compare result to golden copy:
+ # test_files/expected/limit_xpaths_file_success_1.txt
+ (
+ str(EXPECTED_DIR / 'limit_xpaths_file_success_1.txt'),
+ None, 'limit_xpaths_file.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'element_1.txt')
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --output-headers-file limit_xpaths_file.txt
+ # Compare result to golden copy:
+ # test_files/expected/limit_xpaths_file_success_1.txt
+ (
+ str(EXPECTED_DIR / 'limit_xpaths_file_success_1.txt'),
+ None, 'limit_xpaths_file_wack.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ LABEL_NAME + '/nonexistent.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'element_1.txt')
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_2.txt --output-headers-file limit_xpaths_file_2.txt
+ # Compare result to golden copy:
+ # test_files/expected/limit_xpaths_file_success_2.txt
+ (
+ str(EXPECTED_DIR / 'limit_xpaths_file_success_2.txt'),
+ None, 'limit_xpaths_file_2.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_2.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'element_2.txt')
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_duplicates.txt --output-headers-file elements_dupe_file_2.txt
+ # Compare result to golden copy:
+ # test_files/expected/limit_xpaths_file_success_2.txt
+ (
+ str(EXPECTED_DIR / 'limit_xpaths_file_success_2.txt'),
+ None, 'elements_dupe_file_2.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_2.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'element_duplicates.txt')
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_3.txt --output-headers-file limit_xpaths_file_3.txt
+ # Compare result to golden copy:
+ # test_files/expected/limit_xpaths_file_success_3.txt
+ (
+ str(EXPECTED_DIR / 'limit_xpaths_file_success_3.txt'),
+ None, 'limit_xpaths_file_3.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_2.xml',
+ LABEL_NAME + '/tester_label_3.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'element_3.txt')
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_4.txt --output-headers-file limit_xpaths_file_4.txt
+ # Compare result to golden copy:
+ # test_files/expected/limit_xpaths_file_success_4.txt
+ (
+ str(EXPECTED_DIR / 'limit_xpaths_file_success_4.txt'),
+ None, 'limit_xpaths_file_4.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ LABEL_NAME + '/tester_label_2.xml',
+ LABEL_NAME + '/tester_label_3.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'element_4.txt')
+ ]
+ ),
+
+ # Testing --simplify-xpaths
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --simplify-xpaths --output-headers-file simplify_xpaths_1.txt
+ # Compare result to golden copy:
+ # test_files/expected/simplify_xpaths_success_1.txt
+ (
+ str(EXPECTED_DIR / 'simplify_xpaths_success_1.txt'),
+ None, 'simplify_xpaths_1.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ '--simplify-xpaths'
+ ]
+ ),
+
+ # Testing --simplify-xpaths
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --simplify-xpaths --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_2.txt --output-headers-file simplify_xpaths_2.txt
+ # Compare result to golden copy:
+ # test_files/expected/simplify_xpaths_success_2.txt
+ (
+ str(EXPECTED_DIR / 'simplify_xpaths_success_2.txt'),
+ None, 'simplify_xpaths_2.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ LABEL_NAME + '/tester_label_2.xml',
+ LABEL_NAME + '/tester_label_3.xml',
+ '--simplify-xpaths',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'elements_xpath_simplify_2.txt')
+ ]
+ ),
+
+ # Testing --simplify-xpaths
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --simplify-xpaths --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_3.txt --output-headers-file simplify_xpaths_3.txt
+ # Compare result to golden copy:
+ # test_files/expected/simplify_xpaths_success_3.txt
+ (
+ str(EXPECTED_DIR / 'simplify_xpaths_success_3.txt'),
+ None, 'simplify_xpaths_3.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_2.xml',
+ '--simplify-xpaths',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'elements_xpath_simplify_3.txt')
+ ]
+ ),
+
+ # Testing --simplify-xpaths
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_3.xml" --simplify-xpaths --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_4.txt --output-headers-file simplify_xpaths_4.txt
+ # Compare result to golden copy:
+ # test_files/expected/simplify_xpaths_success_4.txt
+ (
+ str(EXPECTED_DIR / 'simplify_xpaths_success_4.txt'),
+ None, 'simplify_xpaths_4.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_3.xml',
+ '--simplify-xpaths',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'elements_xpath_simplify_4.txt')
+ ]
+ ),
+
+ # Testing --add-extra-file-info
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_2.xml" --limit-xpaths-file ../test_files/samples/element_extra_file_info.txt --add-extra-file-info filename,filepath --output-index-file extra_file_info_1.csv
+ # Compare result to golden copy:
+ # test_files/expected/extra_file_info_success_1.csv
+ (
+ str(EXPECTED_DIR / 'extra_file_info_success_1.csv'),
+ 'extra_file_info_1.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_2.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'element_extra_file_info.txt'),
+ '--add-extra-file-info',
+ 'filename,filepath',
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename --sort-by filename
+ # --output-index-file extra_file_info_2.csv
+ # Compare result to golden copy:
+ # test_files/expected/extra_file_info_success_2.csv
+ (
+ str(EXPECTED_DIR / 'extra_file_info_success_2.csv'),
+ 'extra_file_info_2.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ LABEL_NAME + '/tester_label_2.xml',
+ LABEL_NAME + '/tester_label_3.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'element_5.txt'),
+ '--add-extra-file-info',
+ 'filename',
+ '--sort-by',
+ 'filename'
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename,filepath,lid,bundle,bundle_lid --sort-by filename --output-index-file extra_file_info_3.csv
+ # Compare result to golden copy:
+ # test_files/expected/extra_file_info_success_3.csv
+ (
+ str(EXPECTED_DIR / 'extra_file_info_success_3.csv'),
+ 'extra_file_info_3.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ LABEL_NAME + '/tester_label_2.xml',
+ LABEL_NAME + '/tester_label_3.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'element_5.txt'),
+ '--add-extra-file-info',
+ 'filename,filepath,lid,bundle,bundle_lid',
+ '--sort-by',
+ 'filename'
+ ]
+ ),
+
+ # Testing --clean-header-field-names
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --clean-header-field-names --output-headers-file clean_header_field_names_1.txt
+ # Compare result to golden copy:
+ # test_files/expected/clean_header_field_names_success_1.txt
+ (
+ str(EXPECTED_DIR / 'clean_header_field_names_success_1.txt'),
+ None, 'clean_header_field_names_1.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ '--clean-header-field-names'
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --clean-header-field-names --output-headers-file clean_header_field_names_2.txt
+ # Compare result to golden copy:
+ # test_files/expected/clean_header_field_names_success_2.txt
+ (
+ str(EXPECTED_DIR / 'clean_header_field_names_success_2.csv'),
+ 'clean_header_field_names_2.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ '--clean-header-field-names'
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --clean-header-field-names --output-headers-file clean_header_field_names_2.txt
+ # Compare result to golden copy:
+ # test_files/expected/clean_header_field_names_success_2.txt
+ (
+ str(EXPECTED_DIR / 'clean_header_field_names_success_2.txt'),
+ None, 'clean_header_field_names_2.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ LABEL_NAME + '/tester_label_2.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'elements_clean_header_field_names.txt'),
+ '--clean-header-field-names'
+ ]
+ ),
+
+ # Testing --sort by
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --sort-by 'pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>' --output-index-file sort_by_1.csv
+ # Compare result to golden copy:
+ # test_files/expected/sort_by_success_1.csv
+ (
+ str(EXPECTED_DIR / 'sort_by_success_1.csv'),
+ 'sort_by_1.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ LABEL_NAME + '/tester_label_2.xml',
+ LABEL_NAME + '/tester_label_3.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'elements_clean_header_field_names.txt'),
+ '--sort-by',
+ 'pds:Product_Observational/pds:Identification_Area<1>/'
+ 'pds:logical_identifier<1>'
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/elements_clean_header_field_names.txt --add-extra-file-info bundle_lid,filepath --sort-by bundle_lid --output-index-file sort_by_2.csv
+ # Compare result to golden copy:
+ # test_files/expected/sort_by_success_2.csv
+ (
+ str(EXPECTED_DIR / 'sort_by_success_2.csv'),
+ 'sort_by_2.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ LABEL_NAME + '/tester_label_2.xml',
+ LABEL_NAME + '/tester_label_3.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'elements_clean_header_field_names.txt'),
+ '--add-extra-file-info',
+ 'bundle_lid,filepath',
+ '--sort-by',
+ 'bundle_lid'
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "identical_label_*.xml" --limit-xpaths-file ../test_files/samples/identical_elements.txt --add-extra-file-info filename --sort-by filename --output-index-file identical_labels.csv
+ # Compare result to golden copy:
+ # test_files/expected/identical_labels_success.csv
+ (
+ str(EXPECTED_DIR / 'identical_labels_success.csv'),
+ 'identical_labels.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/identical_label_*.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'identical_elements.txt'),
+ '--add-extra-file-info',
+ 'filename',
+ '--sort-by',
+ 'filename'
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "nilled_label.xml" --limit-xpaths-file ../test_files/samples/elements_nilled.txt --output-index-file nilled_elements.csv
+ # Compare result to golden copy:
+ # test_files/expected/nilled_element_success.csv
+ (
+ str(EXPECTED_DIR / 'nilled_element_success.csv'),
+ 'nilled_element.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/nilled_label.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'elements_nilled.txt')
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --fixed-width --output-index-file fixed_width.csv
+ # Compare result to golden copy:
+ # test_files/expected/fixed_width_success.csv
+ (
+ str(EXPECTED_DIR / 'fixed_width_success.csv'),
+ 'fixed_width.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ '--fixed-width'
+ ]
+ ),
+
+ # Executable command: python pds4indextools/pds4_create_xml_index.py ../test_files/labels "nested_label.xml" --output-headers-file headers_nested.txt --simplify-xpaths
+ # Compare result to golden copy:
+ # test_files/expected/nested_label_success.txt
+ (
+ str(EXPECTED_DIR / 'nested_label_success.txt'),
+ None, 'nested_label.txt',
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/nested_label.xml',
+ '--simplify-xpaths',
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label ancillary --config-file ../test_files/samples/tester_config.yaml --output-index-file generated_label_1.csv
+ # Compare result to golden copy:
+ # test_files/expected/label_success_1.csv
+ # test_files/expected/label_success_1.xml
+ (
+ str(EXPECTED_DIR / 'label_success_1.csv'),
+ 'generated_label_1.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ '--generate-label',
+ 'ancillary',
+ '--config-file',
+ str(SAMPLES_DIR / 'tester_config.yaml')
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --generate-label metadata --fixed-width --output-index-file generated_label_2.csv --config-file ../test_files/samples/tester_config.yaml --output-index-file generated_label_2.csv
+ # Compare result to golden copy:
+ # test_files/expected/label_success_2.csv
+ # test_files/expected/label_success_2.xml
+ (
+ str(EXPECTED_DIR / 'label_success_2.csv'),
+ 'generated_label_2.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ '--generate-label',
+ 'metadata',
+ '--fixed-width',
+ '--config-file',
+ str(SAMPLES_DIR / 'tester_config.yaml')
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_5.txt --add-extra-file-info filename,filepath,lid,bundle,bundle_lid --generate-label ancillary --config-file ../test_files/samples/tester_config.yaml --output-index-file generated_label_3.csv
+ # Compare result to golden copy:
+ # test_files/expected/label_success_3.csv
+ # test_files/expected/label_success_3.xml
+ (
+ str(EXPECTED_DIR / 'label_success_3.csv'),
+ 'generated_label_3.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ LABEL_NAME + '/tester_label_2.xml',
+ LABEL_NAME + '/tester_label_3.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'element_5.txt'),
+ '--add-extra-file-info',
+ 'filename,filepath,lid,bundle,bundle_lid',
+ '--sort-by',
+ 'filename',
+ '--generate-label',
+ 'ancillary',
+ '--config-file',
+ str(SAMPLES_DIR / 'tester_config.yaml')
+ ]
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "rf-tester-label_*.xml" --generate-label metadata --config-file ../test_files/samples/tester_config.yaml --output-index-file cleaned_headers_label.csv --clean-header-field-names
+ # Compare result to golden copy:
+ # test_files/expected/cleaned_headers_label_success.csv
+ # test_files/expected/cleaned_headers_label_success.xml
+ (
+ str(EXPECTED_DIR / 'cleaned_headers_label_success.csv'),
+ 'cleaned_headers_label.csv', None,
+ [
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/rf_tester_label_*.xml',
+ '--generate-label',
+ 'metadata',
+ '--config-file',
+ str(SAMPLES_DIR / 'tester_config.yaml'),
+ '--clean-header-field-names',
+ '--simplify-xpaths'
+ ]
+ ),
+ ]
+)
+def test_success(golden_file, new_file_index, new_file_headers, cmd_line):
# Create a temporary directory
- with tempfile.TemporaryDirectory(dir=test_files_dir.parent) as temp_dir:
+ with tempfile.TemporaryDirectory(dir=TEST_FILES_DIR.parent) as temp_dir:
temp_dir_path = Path(temp_dir)
- # THE PATH TO THE NEW FILE
- path_to_file = temp_dir_path / new_file
- # Call main() function with the simulated command line arguments
- cmd_line.append(str(path_to_file))
- tools.main(cmd_line)
+ if new_file_index is None and new_file_headers is None:
+ cmd_line.append(str(LABELS_DIR))
+ cmd_line.append('tester_label_1.xml')
+ # Call main() function with the simulated command line arguments
+ tools.main(cmd_line)
+
+ path_to_index_file = ROOT_DIR / 'index.csv'
+
+ compare_files(path_to_index_file, golden_file)
+ os.remove(path_to_index_file)
+
+ else:
+ # THE PATH TO THE NEW FILE
+ if new_file_index:
+ path_to_file = temp_dir_path / new_file_index
+ path_to_label_file = ROOT_DIR / 'index.xml'
+ cmd_line.append('--output-index-file')
+ cmd_line.append(str(path_to_file))
+ # Call main() function with the simulated command line arguments
+ tools.main(cmd_line)
+
+ compare_files(path_to_file, golden_file)
- # Assert that the file now exists
- assert os.path.isfile(path_to_file)
+ if '--generate-label' in cmd_line:
+ label_path = str(path_to_file).replace('.csv', '.xml')
+ golden_label = str(golden_file).replace('.csv', '.xml')
+ assert os.path.isfile(label_path)
- # Open and compare the two files
- with open(path_to_file, 'rb') as created:
- formed = created.read()
+ compare_files(label_path, golden_label)
+ if os.path.isfile(path_to_label_file):
+ os.remove(path_to_label_file)
- with open(golden_file, 'rb') as new:
- expected = new.read()
+ if new_file_headers:
+ path_to_file = temp_dir_path / new_file_headers
+ golden_file = str(golden_file).replace('.csv', '.txt')
+ cmd_line.append('--output-headers-file')
+ cmd_line.append(str(path_to_file))
+ # Call main() function with the simulated command line arguments
+ tools.main(cmd_line)
- assert formed == expected
+ compare_files(path_to_file, golden_file)
@pytest.mark.parametrize(
@@ -377,70 +564,126 @@ def test_success(golden_file, new_file, cmd_line):
[
# Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --add-extra-file-info bad_element --output-headers-file hdout.txt
(
- str(test_files_dir),
- str(labels_dir.name / Path('tester_label_1.xml')),
- str(labels_dir.name / Path('tester_label_2.xml')),
- str(labels_dir.name / Path('tester_label_3.xml')),
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ LABEL_NAME + '/tester_label_2.xml',
+ LABEL_NAME + '/tester_label_3.xml',
'--limit-xpaths-file',
- str(samples_dir / 'element_1.txt'),
+ str(SAMPLES_DIR / 'element_1.txt'),
'--add-extra-file-info',
'bad_element',
'--output-headers-file',
+ 'hdout.txt'
),
# Executable command: pds4_create_xml_index ../test_files/labels "bad_directory/labels/tester_label_*.xml" --limit-xpaths-file ../test_files/samples/element_1.txt --add-extra-file-info filename --output-headers-file hdout.txt
(
- str(test_files_dir), # directory path
+ str(TEST_FILES_DIR), # directory path
'bad_directory/labels/tester_label_*.xml', # non-existent directory
'--limit-xpaths-file',
- str(samples_dir / 'element_1.txt'), # elements file
+ str(SAMPLES_DIR / 'element_1.txt'), # elements file
'--add-extra-file-info', # extra file info
'filename',
'--output-headers-file',
+ 'hdout.txt'
),
# Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" "tester_label_2.xml" "tester_label_3.xml" --limit-xpaths-file ../test_files/samples/element_empty.txt --output-headers-file hdout.txt
(
- str(test_files_dir), # directory path
- str(labels_dir.name / Path('tester_label_1.xml')),
- str(labels_dir.name / Path('tester_label_2.xml')),
- str(labels_dir.name / Path('tester_label_3.xml')),
+ str(TEST_FILES_DIR), # directory path
+ LABEL_NAME + '/tester_label_1.xml',
+ LABEL_NAME + '/tester_label_2.xml',
+ LABEL_NAME + '/tester_label_3.xml',
'--limit-xpaths-file',
- str(samples_dir / 'element_empty.txt'), # empty elements file
+ str(SAMPLES_DIR / 'element_empty.txt'), # empty elements file
'--output-headers-file',
+ 'hdout.txt'
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --simplify-xpaths --sort-by bad_sort --output-headers-file hdout.csv
+ (
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ '--simplify-xpaths',
+ '--sort-by',
+ 'bad_sort',
+ '--output-index-file',
+ 'hdout.csv'
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "nonexistent.xml" --output-headers-file hdout.txt
+ (
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/nonexistent.xml',
+ '--output-headers-file',
+ 'hdout.txt',
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_1.xml" --limit-xpaths-file ../test_files/samples/elements_xpath_simplify_3.txt --output-headers-file hdout.txt
+ (
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_1.xml',
+ '--limit-xpaths-file',
+ str(SAMPLES_DIR / 'elements_xpath_simplify_3.txt'),
+ '--output-headers-file',
+ 'hdout.txt',
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "tester_label_*.xml" --generate-label ancillary --output-headers-file hdout.txt
+ (
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/tester_label_*.xml',
+ '--generate-label',
+ 'ancillary',
+ '--output-headers-file',
+ 'hdout.txt',
+ ),
+
+ # Executable command: pds4_create_xml_index ../test_files/labels "bad_lid_label.xml" --output-headers-file hdout.txt
+ (
+ str(TEST_FILES_DIR),
+ LABEL_NAME + '/bad_lid_label.xml',
+ '--output-headers-file',
+ 'hdout.txt',
)
+
]
)
def test_failures(cmd_line):
- # Call main() function with the simulated command line arguments
- with pytest.raises(SystemExit) as e:
- tools.main(cmd_line)
- assert e.type == SystemExit
- assert e.value.code != 0 # Check that the exit code indicates failure
+ try:
+ # Call main() function with the simulated command line arguments
+ with pytest.raises(SystemExit) as e:
+ tools.main(cmd_line)
+ assert e.type == SystemExit
+ assert e.value.code != 0 # Check that the exit code indicates failure
+ finally:
+ # Ensure hdout.txt is deleted regardless of test outcome
+ if os.path.isfile('hdout.txt'):
+ os.remove('hdout.txt')
@pytest.mark.parametrize(
- 'new_file,cmd_line',
+ 'NEW_FILE,cmd_line',
[
# Executable command: pds4_create_xml_index ../test_files/labels "nilled_label_bad.xml" --limit-xpaths-file ../test_files/samples/elements_nilled_bad.txt --output-index-file indexout.csv
(
'nillable.csv',
[
- str(test_files_dir), # directory path
- str(labels_dir.name / Path('nilled_label_bad.xml')),
+ str(TEST_FILES_DIR), # directory path
+ LABEL_NAME + '/nilled_label_bad.xml',
'--limit-xpaths-file',
- str(samples_dir / 'elements_nilled_bad.txt'),
+ str(SAMPLES_DIR / 'elements_nilled_bad.txt'),
'--output-index-file'
]
)
]
)
-def test_failure_message(capfd, new_file, cmd_line):
- with tempfile.TemporaryDirectory(dir=test_files_dir.parent) as temp_dir:
+def test_failure_message(capfd, NEW_FILE, cmd_line):
+ with tempfile.TemporaryDirectory(dir=TEST_FILES_DIR.parent) as temp_dir:
temp_dir_path = Path(temp_dir)
# THE PATH TO THE NEW FILE
- path_to_file = temp_dir_path / new_file
+ path_to_file = temp_dir_path / NEW_FILE
# Call main() function with the simulated command line arguments
cmd_line.append(str(path_to_file))
@@ -454,3 +697,8 @@ def test_failure_message(capfd, new_file, cmd_line):
expected_message = ("Non-nillable element in")
assert expected_message in captured.out or expected_message in captured.err
+
+
+def test_invalid_arguments():
+ with pytest.raises(SystemExit): # Assuming argparse will call sys.exit on failure
+ tools.main(["--invalid-option"])
diff --git a/tests/test_pds4_create_xml_index_whitebox.py b/tests/test_pds4_create_xml_index_whitebox.py
index c746b73..814f120 100644
--- a/tests/test_pds4_create_xml_index_whitebox.py
+++ b/tests/test_pds4_create_xml_index_whitebox.py
@@ -1,3 +1,4 @@
+import argparse
from datetime import datetime
from lxml import etree
import os
@@ -5,15 +6,17 @@
from pathlib import Path
import pytest
import pds4indextools.pds4_create_xml_index as tools
-from unittest import mock
+import textwrap as _textwrap
+from unittest.mock import patch
# These two variables are the same for all tests, so we can either declare them as
# global variables, or get the ROOT_DIR at the setup stage before running each test
ROOT_DIR = Path(__file__).resolve().parent.parent
-test_files_dir = ROOT_DIR / 'test_files'
-expected_dir = test_files_dir / 'expected'
-labels_dir = test_files_dir / 'labels'
+TEST_FILES_DIR = ROOT_DIR / 'test_files'
+SAMPLES_DIR = TEST_FILES_DIR / 'samples'
+EXPECTED_DIR = TEST_FILES_DIR / 'expected'
+LABELS_DIR = TEST_FILES_DIR / 'labels'
# Testing load_config_file()
@@ -64,7 +67,7 @@ def test_load_config_object():
# Tests that the config_object is loaded over.
config_object = tools.load_config_file(
- specified_config_files=[str(expected_dir/'tester_config.yaml'),])
+ specified_config_files=[str(SAMPLES_DIR / 'tester_config_nillable.yaml'),])
assert config_object['nillable']['pds:ASCII_Date_YMD']['inapplicable'] == '0001-01-01'
assert config_object['nillable']['pds:ASCII_Date_YMD']['missing'] == '0002-01-01'
@@ -90,14 +93,22 @@ def test_load_config_object():
assert (config_object['nillable']['pds:ASCII_Short_String_Collapsed']
['anticipated'] == 'anticipated_alt')
+ # Tests specified configuration files wiht one or the other
+ config_object = tools.load_config_file(
+ specified_config_files=[str(SAMPLES_DIR / 'tester_config_label.yaml'),])
+
+ assert config_object['label-contents']['version_id'] == '1.0'
+ assert (config_object['label-contents']['title'] ==
+ 'Index file for my occultation bundle')
+
# A bad default config file
with pytest.raises(SystemExit):
- tools.load_config_file(default_config_file=expected_dir/'non_existent_file.ini')
+ tools.load_config_file(default_config_file=EXPECTED_DIR / 'non_existent_file.ini')
# A bad specified config file
with pytest.raises(SystemExit):
tools.load_config_file(specified_config_files=list(
- str(expected_dir/'non_existent_file.ini')))
+ str(EXPECTED_DIR / 'non_existent_file.ini')))
# Testing default_value_for_nil()
@@ -146,6 +157,9 @@ def test_default_value_for_nil():
assert tools.default_value_for_nil(config_object, datetime_ymd_utc,
'anticipated') == '0004-01-01T12:00Z'
+ # Testing None
+ assert tools.default_value_for_nil(config_object, None, 'anticipated') is None
+
def test_default_value_for_nil_ascii_date_time_ymd_utc():
datetime_ymd_utc = 'pds:ASCII_Date_Time_YMD_UTC'
@@ -187,7 +201,7 @@ def test_split_into_elements():
# Testing process_schema_location()
def test_process_schema_location():
label_file = 'tester_label_1.xml'
- schema_files = tools.process_schema_location(labels_dir / label_file)
+ schema_files = tools.process_schema_location(LABELS_DIR / label_file)
assert (schema_files[0] ==
'https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1B00.xsd')
assert (schema_files[1] ==
@@ -207,7 +221,7 @@ def test_parse_label_file_exception_handling(capsys):
def test_extract_logical_identifier():
label_file = 'tester_label_1.xml'
- tree = etree.parse(str(labels_dir / label_file))
+ tree = etree.parse(str(LABELS_DIR / label_file))
assert (tools.extract_logical_identifier(tree) ==
'urn:nasa:pds:cassini_iss_saturn:data_raw:1455200455n')
@@ -237,10 +251,20 @@ def test_scrape_namespaces():
def test_get_longest_row_length():
- filename = expected_dir / 'extra_file_info_success_1.csv'
+ filename = EXPECTED_DIR / 'extra_file_info_success_1.csv'
result = tools.get_longest_row_length(filename)
assert result == 254
+ # Failure
+ with pytest.raises(OSError):
+ filename = (
+ '0eD8s3JGt9RmE5YnVpLZxkf2A1gNbWqQ7TXHlchyojFzPBrMOIKvaSuUwd4pC6JrXjmtbZVnLQW9'
+ 'gDKfpq7cHWnPoyT5sBM3YXIzlq06F4GDvw1MRaOJpEZU9kBX2AysnVrH6TQeY3G8oKPw5xfmLzN2'
+ 'hF7sJ9Qc8LbH4ErWaMKtVUXoPIjzpRy1D0qW4s3N7Km8HGaLFCvxl6eyP7UZjWopX4rBdQ2VME3G'
+ '9XtF8h2TsjvQnKwDYLb50O8xFI6gUJwpQmA7nrZ4EYkTXoR9CpMN8QG6fKjW5uVDl3oJ1wzBsPpT'
+ '2cFmLRe7Hg1SYkN8qQv9RcHjA0F3I4mU')
+ result = tools.get_longest_row_length(filename)
+
@pytest.fixture
def create_temp_file():
@@ -255,8 +279,283 @@ def create_temp_file():
@pytest.mark.parametrize('platform_name', ['Windows', 'Linux', 'Darwin'])
def test_get_creation_date(create_temp_file, platform_name):
# Mock platform.system() to simulate different platforms
- with mock.patch('platform.system', return_value=platform_name):
+ with patch('platform.system', return_value=platform_name):
creation_date = tools.get_creation_date(create_temp_file)
assert isinstance(creation_date, str)
# Assert that the returned date is in ISO 8601 format
assert datetime.fromisoformat(creation_date)
+
+
+def test_update_nillable_elements_from_xsd_file():
+ xsd_files = []
+ nillable_elements_info = {}
+ label_files = ['test_files/labels/tester_label_1.xml',
+ 'test_files/labels/tester_label_2.xml']
+
+ for label_file in label_files:
+ xml_urls = tools.process_schema_location(label_file)
+ for url in xml_urls:
+ if url not in xsd_files:
+ xsd_files.append(url)
+ tools.update_nillable_elements_from_xsd_file(url, nillable_elements_info)
+
+ assert nillable_elements_info == {
+ 'start_time': 'pds:ASCII_Date_Time',
+ 'start_date_time': 'pds:ASCII_Date_Time_YMD_UTC',
+ 'stop_time': 'pds:ASCII_Date_Time',
+ 'stop_date_time': 'pds:ASCII_Date_Time_YMD_UTC',
+ 'publication_date': 'pds:ASCII_Date_YMD',
+ 'stop_date': 'pds:ASCII_Date_YMD',
+ 'reference_frame_id': 'pds:ASCII_Short_String_Collapsed',
+ 'gain_mode_id': 'cassini:gain_mode_id_WO_Units',
+ 'gain_mode_id_ir': 'pds:ASCII_Short_String_Collapsed',
+ 'gain_mode_id_vis': 'pds:ASCII_Short_String_Collapsed',
+ 'wavelength_range': 'pds:ASCII_Short_String_Collapsed',
+ 'dsn_station_number': 'pds:ASCII_Integer'}
+
+
+def test_update_nillable_elements_from_xsd_file_with_edge_cases():
+ # Scenario 1: Testing with a type attribute that is None or already in
+ # nillable_elements_info
+
+ # Mock XSD content with an element that doesn't have a 'type' attribute
+ xsd_content_missing_type = """
+
+
+
+
+ """
+ # Mock XSD content where type_attribute is already in nillable_elements_info
+ xsd_content_duplicate_type = """
+
+
+
+
+ """
+
+ # Parse the mock XSD contents into XML trees
+ tree_missing_type = etree.fromstring(xsd_content_missing_type)
+ tree_duplicate_type = etree.fromstring(xsd_content_duplicate_type)
+
+ # Mock the download_xsd_file function to return these trees based on input
+ with patch(
+ 'pds4indextools.pds4_create_xml_index.download_xsd_file'
+ ) as mock_download:
+ # Define the behavior of the mock for each file
+ mock_download.side_effect = (
+ lambda url: tree_missing_type if 'missing_type' in url
+ else tree_duplicate_type
+ )
+
+ # Initialize the dictionary that will hold the nillable elements information
+ nillable_elements_info = {
+ 'start_time': 'pds:ASCII_Date_Time' # Simulate an existing entry
+ }
+
+ # Call the function with the first scenario (missing type)
+ tools.update_nillable_elements_from_xsd_file(
+ 'test_files/labels/missing_type.xsd', nillable_elements_info)
+ assert 'element_without_type' not in nillable_elements_info
+
+
+def test_clean_header_field_names():
+ data = {
+ 'column:1': [1, 2, 3],
+ 'column/2': [4, 5, 6],
+ '3': [7, 8, 9],
+ 'normal_column': [10, 11, 12]
+ }
+ df = pd.DataFrame(data)
+
+ tools.clean_headers(df)
+ new = df.to_dict()
+
+ assert new == {
+ 'column_1': {0: 1, 1: 2, 2: 3},
+ 'column__2': {0: 4, 1: 5, 2: 6},
+ '_column3': {0: 7, 1: 8, 2: 9},
+ 'normal_column': {0: 10, 1: 11, 2: 12}
+ }
+
+
+def test_compute_max_field_lengths():
+
+ lengths = tools.compute_max_field_lengths(
+ str(EXPECTED_DIR / 'extra_file_info_success_1.csv'))
+
+ assert lengths == {
+ 'filename': 18,
+ 'filepath': 25,
+ 'pds:Product_Observational/pds:Identification_Area<1>/pds:logical_identifier<1>':
+ 72,
+ 'pds:Product_Observational/pds:Identification_Area<1>/pds:version_id<1>': 3,
+ 'pds:Product_Observational/pds:Identification_Area<1>/pds:title<1>': 132
+ }
+
+ # failure
+ with pytest.raises(SystemExit):
+ lengths = tools.compute_max_field_lengths(str(EXPECTED_DIR / 'fake_file.csv'))
+
+
+def test_sort_dataframe_key_error():
+ df = pd.DataFrame({
+ 'name': ['Alice', 'Bob', 'Charlie'],
+ 'age': [30, 25, 35]
+ })
+ sort_keys = ['height'] # Non-existent column
+
+ with pytest.raises(ValueError, match=f"Unknown sort key '{sort_keys[0]}'. For a list "
+ f"of available sort keys, use the "
+ f"--output-headers-file option."):
+ tools.sort_dataframe(df, sort_keys)
+
+
+def test_validate_label_type():
+ arg = 'ancillary'
+ valid_choices = {'ancillary': 'Product_Ancillary',
+ 'metadata': 'Product_Metadata_Supplemental'}
+ assert tools.validate_label_type(arg, valid_choices) == 'Product_Ancillary'
+
+ # failure
+ with pytest.raises(argparse.ArgumentTypeError):
+ arg = 'bad_label_type'
+ assert tools.validate_label_type(arg, valid_choices) == 'Product_Ancillary'
+
+
+@patch('os.path.exists')
+def test_generate_unique_filename(mock_exists):
+ # Setup the mock to return True for the first two checks and False thereafter
+ mock_exists.side_effect = [True, True, False]
+
+ # Run the function with a base filename
+ base_name = "file.txt"
+ result = tools.generate_unique_filename(base_name)
+
+ # Assert that the result is what we expect given the mocked behavior
+ # Since the first two checks return True, the counter reaches 2
+ assert result == "file2.txt"
+
+ # Ensure os.path.exists was called the expected number of times
+ assert mock_exists.call_count == 3
+
+
+def test_fill_text():
+ # Create an instance of MultilineFormatter
+ formatter = tools.MultilineFormatter(prog="test_prog")
+
+ # Example input text with multiline separator
+ input_text = "This is a long text that should be wrapped.|nThis is a new paragraph."
+
+ # Expected formatted output (with appropriate indentation and line wrapping)
+ width = 40
+ indent = " " # 4 spaces
+
+ expected_output = (
+ _textwrap.fill("This is a long text that should be wrapped.",
+ width, initial_indent=indent, subsequent_indent=indent) + '\n' +
+ _textwrap.fill("This is a new paragraph.", width, initial_indent=indent,
+ subsequent_indent=indent) + '\n'
+ )
+
+ # Run the _fill_text method
+ result = formatter._fill_text(input_text, width, indent)
+
+ # Assert the result matches the expected output
+ assert result == expected_output
+
+
+# Assume the get_true_type function is imported from the relevant module.
+# from pds4indextools.pds4_create_xml_index import get_true_type
+@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
+def test_true_type_found_in_first_file(mock_find_base_attribute, mock_scrape_namespaces,
+ mock_download_xsd_file):
+ # Setup mocks
+ mock_download_xsd_file.return_value = "mock_xsd_tree"
+ mock_scrape_namespaces.return_value = {"mock_namespace": "mock_value"}
+ mock_find_base_attribute.side_effect = ["mock_true_type", None]
+
+ xsd_files = ["file1.xsd", "file2.xsd"]
+ tag = "mock_tag"
+ namespaces = {"existing_namespace": "value"}
+
+ result = tools.get_true_type(xsd_files, tag, namespaces)
+
+ assert result == "mock_true_type"
+ mock_download_xsd_file.assert_called_once_with("file1.xsd")
+ mock_find_base_attribute.assert_called_once_with("mock_xsd_tree", tag,
+ {"mock_namespace": "mock_value"})
+
+
+@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
+def test_true_type_found_in_second_file(mock_find_base_attribute, mock_scrape_namespaces,
+ mock_download_xsd_file):
+ # Setup mocks
+ mock_download_xsd_file.return_value = "mock_xsd_tree"
+ mock_scrape_namespaces.return_value = {"mock_namespace": "mock_value"}
+
+ # First file returns None for both original and modified tags
+ # Second file returns the true_type for the original tag
+ mock_find_base_attribute.side_effect = [None, None, "mock_true_type"]
+
+ xsd_files = ["file1.xsd", "file2.xsd"]
+ tag = "mock_tag"
+ namespaces = {"existing_namespace": "value"}
+
+ result = tools.get_true_type(xsd_files, tag, namespaces)
+
+ print(f"Download called: {mock_download_xsd_file.call_count} times")
+ print(f"Find base attribute called: {mock_find_base_attribute.call_count} times")
+
+ # Check if the loop iterates over both files and correctly identifies the type in
+ # the second file
+ assert result == "mock_true_type"
+ assert mock_download_xsd_file.call_count == 2
+ assert mock_find_base_attribute.call_count == 3
+
+
+@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
+def test_true_type_found_with_modified_tag(mock_find_base_attribute,
+ mock_scrape_namespaces,
+ mock_download_xsd_file):
+ # Setup mocks
+ mock_download_xsd_file.return_value = "mock_xsd_tree"
+ mock_scrape_namespaces.return_value = {"mock_namespace": "mock_value"}
+ # Found after modifying the tag
+ mock_find_base_attribute.side_effect = [None, "mock_true_type"]
+
+ xsd_files = ["file1.xsd"]
+ tag = "mock_tag"
+ namespaces = {"existing_namespace": "value"}
+
+ result = tools.get_true_type(xsd_files, tag, namespaces)
+
+ assert result == "mock_true_type"
+ mock_find_base_attribute.assert_any_call("mock_xsd_tree", "mock_tag_WO_Units",
+ {"mock_namespace": "mock_value"})
+
+
+@patch('pds4indextools.pds4_create_xml_index.download_xsd_file')
+@patch('pds4indextools.pds4_create_xml_index.scrape_namespaces')
+@patch('pds4indextools.pds4_create_xml_index.find_base_attribute')
+def test_true_type_not_found(mock_find_base_attribute, mock_scrape_namespaces,
+ mock_download_xsd_file):
+ # Setup mocks
+ mock_download_xsd_file.return_value = "mock_xsd_tree"
+ mock_scrape_namespaces.return_value = {"mock_namespace": "mock_value"}
+ mock_find_base_attribute.return_value = None # Never found
+
+ xsd_files = ["file1.xsd", "file2.xsd"]
+ tag = "mock_tag"
+ namespaces = {"existing_namespace": "value"}
+
+ result = tools.get_true_type(xsd_files, tag, namespaces)
+
+ assert result is None
+ assert mock_download_xsd_file.call_count == 2
+ assert mock_find_base_attribute.call_count == 4