Skip to content

Commit

Permalink
Update to collection versioning; 6.2.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
durack1 committed Feb 24, 2018
1 parent 74d42e0 commit f093710
Show file tree
Hide file tree
Showing 3 changed files with 387 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
*.py~
*.pyc
*.sh~
*.swp
~$*.xlsx
Expand Down
236 changes: 236 additions & 0 deletions src/CMIP6Lib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 23 13:09:26 2018
@author: durack1
"""
#%% imports
import json
from durolib import getGitInfo

#%% Get repo metadata
def ascertainVersion(testVal_activity_id,testVal_experiment_id,testVal_frequency,
testVal_grid_label,testVal_institution_id,testVal_license,
testVal_mip_era,testVal_nominal_resolution,testVal_realm,
testVal_required_global_attributes,testVal_source_id,
testVal_source_type,testVal_sub_experiment_id,testVal_table_id,
commitMessage):
# Load current history
versionHistory = json.load(open('versionHistory.json'))
versionHistory = versionHistory.get('versionHistory')
versionMIPEra = versionHistory['versions'].get('versionMIPEra')
versionCVStructure = versionHistory['versions'].get('versionCVStructure')
versionCVContent = versionHistory['versions'].get('versionCVContent')
versionCVCommit = versionHistory['versions'].get('versionCVCommit')

# versionMIPEra - CMIP6 id - The first integer is “6”, indicating the CV collection is for use in CMIP6
versionMIPEra = versionHistory['versions'].get('versionMIPEra')
# versionCVStructure - Incremented when the structure/format of CV’s changes or a new CV is added
versionCVStructure = versionHistory['versions'].get('versionCVStructure')
# versionCVContent - Incremented when a change to existing content is made other than “source_id” or “institution_id”
test1 = [testVal_activity_id,testVal_experiment_id,testVal_frequency,
testVal_grid_label,testVal_license,testVal_mip_era,
testVal_nominal_resolution,testVal_realm,
testVal_required_global_attributes,testVal_source_type,
testVal_sub_experiment_id,testVal_table_id]
test2 = [testVal_institution_id,testVal_source_id]
if any(test1):
versionCVContent = versionHistory['versions'].get('versionCVContent') + 1
versionCVCommit = 0
# Now update versionHistory - can use list entries, as var names aren't locatable
if testVal_activity_id:
versionHistory['activity_id']['commitMessage'] = commitMessage
if testVal_experiment_id:
versionHistory['experiment_id']['commitMessage'] = commitMessage
if testVal_frequency:
versionHistory['frequency']['commitMessage'] = commitMessage
if testVal_grid_label:
versionHistory['grid_label']['commitMessage'] = commitMessage
if testVal_license:
versionHistory['license']['commitMessage'] = commitMessage
if testVal_mip_era:
versionHistory['mip_era']['commitMessage'] = commitMessage
if testVal_nominal_resolution:
versionHistory['nominal_resolution']['commitMessage'] = commitMessage
if testVal_realm:
versionHistory['realm']['commitMessage'] = commitMessage
if testVal_required_global_attributes:
versionHistory['required_global_attributes']['commitMessage'] = commitMessage
if testVal_source_type:
versionHistory['source_type']['commitMessage'] = commitMessage
if testVal_sub_experiment_id:
versionHistory['sub_experiment_id']['commitMessage'] = commitMessage
if testVal_table_id:
versionHistory['table_id']['commitMessage'] = commitMessage
# versionCVCommit - Incremented whenever a new source_id and/or institution_id is added or amended
elif any(test2):
versionCVCommit = versionHistory['versions'].get('versionCVCommit') + 1
# Now update versionHistory - can use list entries, as var names aren't locatable
if testVal_institution_id:
versionHistory['institution_id']['commitMessage'] = commitMessage
if testVal_source_id:
versionHistory['source_id']['commitMessage'] = commitMessage

# versions - Update
versionHistory['versions']['versionMIPEra'] = versionMIPEra
versionHistory['versions']['versionCVStructure'] = versionCVStructure
versionHistory['versions']['versionCVContent'] = versionCVContent
versionHistory['versions']['versionCVCommit'] = versionCVCommit
versions = '.'.join(str(x) for x in [versionMIPEra,versionCVStructure,versionCVContent,versionCVCommit])

return [versionHistory,versions]


def getFileHistory(filePath):
# Call getGitInfo
versionInfo = getGitInfo(filePath)
if versionInfo == None:
return None
else:
# print results
#for count in range(0,len(versionInfo)):
# print count,versionInfo[count]

version_metadata = {}
version_metadata['author'] = versionInfo[4].replace('author: ','')
version_metadata['creation_date'] = versionInfo[3].replace('date: ','')
version_metadata['institution_id'] = 'PCMDI'
version_metadata['latest_tag_point'] = versionInfo[2].replace('latest_tagPoint: ','')
version_metadata['note'] = versionInfo[1].replace('note: ','')
version_metadata['previous_commit'] = versionInfo[0].replace('commit: ','')

#print version_metadata
return version_metadata

def versionHistoryUpdate(key,commitMessage,timeStamp,MD5,versionHistory):
url = 'https://github.com/WCRP-CMIP/CMIP6_CVs/commit/'
versionHistory[key]['commitMessage'] = commitMessage
versionHistory[key]['timeStamp'] = timeStamp
versionHistory[key]['URL'] = ''.join([url,MD5])
versionHistory[key]['MD5'] = MD5

return versionHistory


#%% Clean functions
def cleanString(string):
if isinstance(string,str) or isinstance(string,unicode):
# Take a string and clean it for standard errors
string = string.strip() # Remove trailing whitespace
string = string.strip(',.') # Remove trailing characters
string = string.replace(' + ', ' and ') # Replace +
string = string.replace(' & ', ' and ') # Replace +
string = string.replace(' ', ' ') # Replace ' ', ' '
string = string.replace(' ', ' ') # Replace ' ', ' '
string = string.replace('None','none') # Replace None, none
#string = string.replace('(&C', '(and C') # experiment_id html fix
#string = string.replace('(& ','(and ') # experiment_id html fix
#string = string.replace('GHG&ODS','GHG and ODS') # experiment_id html fix
#string = string.replace('anthro ', 'anthropogenic ') # Replace anthro
#string = string.replace('piinatubo', 'pinatubo') # Replace piinatubo
else:
print 'Non-string argument, aborting..'
print string
return string

return string


def dictDepth(x):
if type(x) is dict and x:
return 1 + max(dictDepth(x[a]) for a in x)
if type(x) is list and x:
return 1 + max(dictDepth(a) for a in x)
return 0


#You can walk a nested dictionary using recursion
def walk_dict(dictionary):
for key in dictionary:
if isinstance(dictionary[key], dict):
walk_dict(dictionary[key])
else:
#do something with dictionary[k]
pass


''' Notes
#import pyexcel_xlsx as pyx ; # requires openpyxl ('pip install openpyxl'), pyexcel-io ('git clone https://github.com/pyexcel/pyexcel-io')
# pyexcel-xlsx ('git clone https://github.com/pyexcel/pyexcel-xlsx'), and unidecode ('conda install unidecode')
#from string import replace
#from unidecode import unidecode
#import pdb
#import copy ; # Useful for copy.deepcopy() of dictionaries
# xlsx import
# Fields
# Alpha/json order, xlsx column, value
# 1 0 experiment_id string
# 2 1 activity_id list
# 3 8 additional_allowed_model_components list
# 4 13 description string
# 5 10 end_year string
# 6 2 experiment string
# 7 11 min_number_yrs_per_sim string
# 8 12 parent_activity_id list
# 9 6 parent_experiment_id list
# 10 7 required_model_components list
# 11 9 start_year string
# 12 5 sub_experiment string
# 13 4 sub_experiment_id string
# 14 3 tier string
os.chdir('/sync/git/CMIP6_CVs/src')
inFile = '170307_CMIP6_expt_list.xlsx'
data = pyx.get_data(inFile)
data = data['Sheet1']
headers = data[3]
experiment_id = {}
for count in range(4,len(data)):
if data[count] == []:
#print count,'blank field'
continue
row = data[count]
key = row[0] ; #replace(row[0],'_ ','_')
experiment_id[key] = {}
for count2,entry in enumerate(headers):
if count2 == 5:
continue ; # Skip sub_experiment
entry = replace(entry,'_ ','_') ; # clean up spaces
entry = replace(entry,' ', '_') ; # replace spaces with underscores
if count2 >= len(row):
experiment_id[key][entry] = ''
continue
value = row[count2]
if count2 in [1,4,6,7,8,12]:
if value == None:
pass
elif value == 'no parent':
pass
elif 'no parent,' in value:
value = ['no parent',replace(value,'no parent,','').strip()] ; # deal with multiple entries (including 'no parent')
pass
else:
value = replace(value,',','') ; # remove ','
value = value.split() ; # Change type to list
#print value
if type(value) == long:
experiment_id[key][entry] = str(value) ; #replace(str(value),' ','')
elif type(value) == list:
experiment_id[key][entry] = value
elif value == None:
experiment_id[key][entry] = '' ; # changed from none to preserve blank entries
else:
value = replace(value,' ',' ') ; # replace whitespace
value = replace(value,' ',' ') ; # replace whitespace
value = replace(value,' ',' ') ; # replace whitespace
experiment_id[key][entry] = unidecode(value) ; #replace(unidecode(value),' ','')
try:
unidecode(value)
except:
print count,count2,key,entry,value
del(inFile,data,headers,count,row,key,entry,value) ; gc.collect()
'''
Loading

0 comments on commit f093710

Please sign in to comment.