diff --git a/cmsdb/campaigns/run2_2018_JMEnano_v9/__init__.py b/cmsdb/campaigns/run2_2018_JMEnano_v9/__init__.py new file mode 100644 index 00000000..1d2351c1 --- /dev/null +++ b/cmsdb/campaigns/run2_2018_JMEnano_v9/__init__.py @@ -0,0 +1,32 @@ +# coding: utf-8 + +""" +Common, analysis independent definition of the 2018 data-taking campaign +with datasets at NanoAOD tier in version 9. +See https://python-order.readthedocs.io/en/latest/quickstart.html#analysis-campaign-and-config. + +Dataset ids are identical to those in DAS (https://cmsweb.cern.ch/das). +""" + +from order import Campaign + + +# +# campaign +# + +campaign_run2_2018_JMEnano_v9 = Campaign( + name="run2_2018_JMEnano_v9", + id=220181, + ecm=13, + bx=25, + aux={ + "year": 2018, + "tier": "NanoAOD", + "version": "9" + }, +) + +# trailing imports to load datasets +import cmsdb.campaigns.run2_2018_JMEnano_v9.data +import cmsdb.campaigns.run2_2018_JMEnano_v9.qcd diff --git a/cmsdb/campaigns/run2_2018_JMEnano_v9/data.py b/cmsdb/campaigns/run2_2018_JMEnano_v9/data.py new file mode 100644 index 00000000..9b915ad5 --- /dev/null +++ b/cmsdb/campaigns/run2_2018_JMEnano_v9/data.py @@ -0,0 +1,73 @@ +# coding: utf-8 + +""" +CMS datasets from the 2017 data-taking campaign +""" + +import cmsdb.processes as procs +from cmsdb.campaigns.run2_2018_JMEnano_v9 import campaign_run2_2018_JMEnano_v9 as cpn + + +# +# DiJet +# + +cpn.add_dataset( + name="data_jetht_a", + id=14260294, # from das + is_data=True, + processes=[procs.data_dijet], + keys=[ + "/JetHT/Run2018A-UL2018_MiniAODv2_JMENanoAODv9-v1/NANOAOD", + ], + n_files=281, + n_events=171484635, + aux={ + "era": "A", + }, +) + +cpn.add_dataset( + name="data_jetht_b", + id=14260659, + is_data=True, + processes=[procs.data_e], + keys=[ + "/JetHT/Run2018B-UL2018_MiniAODv2_JMENanoAODv9-v1/NANOAOD", + ], + n_files=128, + n_events=78255208, + aux={ + "era": "B", + }, +) + +cpn.add_dataset( + name="data_jetht_c", + id=14260590, + is_data=True, + processes=[procs.data_dijet], + keys=[ + "/JetHT/Run2018C-UL2018_MiniAODv2_JMENanoAODv9-v1/NANOAOD", + ], + n_files=138, + n_events=70027804, + aux={ + "era": "C", + }, +) + +cpn.add_dataset( + name="data_jetht_d", + id=14324490, + is_data=True, + processes=[procs.data_dijet], + keys=[ + "/JetHT/Run2018D-UL2018_MiniAODv2_JMENanoAODv9-v1/NANOAOD", + ], + n_files=538, + n_events=356967606, + aux={ + "era": "D", + }, +) diff --git a/cmsdb/campaigns/run2_2018_JMEnano_v9/qcd.py b/cmsdb/campaigns/run2_2018_JMEnano_v9/qcd.py new file mode 100644 index 00000000..12c467a2 --- /dev/null +++ b/cmsdb/campaigns/run2_2018_JMEnano_v9/qcd.py @@ -0,0 +1,113 @@ +# coding: utf-8 + +""" +QCD datasets for the 2017 data-taking campaign +""" + +import cmsdb.processes as procs +from cmsdb.campaigns.run2_2018_JMEnano_v9 import campaign_run2_2018_JMEnano_v9 as cpn + +# +# QCD HT-binned +# + +# HT-binned samples + +cpn.add_dataset( + name="qcd_ht50to100_madgraph", + id=14296686, + processes=[procs.qcd_ht50to100], + keys=[ + "/QCD_HT50to100_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa + ], + n_files=99, + n_events=38485273, +) + +cpn.add_dataset( + name="qcd_ht100to200_madgraph", + id=14286202, + processes=[procs.qcd_ht100to200], + keys=[ + "/QCD_HT100to200_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa + ], + n_files=175, + n_events=83416014, +) + +cpn.add_dataset( + name="qcd_ht200to300_madgraph", + id=14288453, + processes=[procs.qcd_ht200to300], + keys=[ + "/QCD_HT200to300_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa + ], + n_files=104, + n_events=57336623, +) + +cpn.add_dataset( + name="qcd_ht300to500_madgraph", + id=14296771, + processes=[procs.qcd_ht300to500], + keys=[ + "/QCD_HT300to500_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa + ], + n_files=235, + n_events=61491618, +) + +cpn.add_dataset( + name="qcd_ht500to700_madgraph", + id=14293400, + processes=[procs.qcd_ht500to700], + keys=[ + "/QCD_HT500to700_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa + ], + n_files=106, + n_events=49070152, +) + +cpn.add_dataset( + name="qcd_ht700to1000_madgraph", + id=14275579, + processes=[procs.qcd_ht700to1000], + keys=[ + "/QCD_HT700to1000_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa + ], + n_files=99, + n_events=48220920, +) + +cpn.add_dataset( + name="qcd_ht1000to1500_madgraph", + id=14296761, + processes=[procs.qcd_ht1000to1500], + keys=[ + "/QCD_HT1000to1500_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa + ], + n_files=67, + n_events=14127722, +) + +cpn.add_dataset( + name="qcd_ht1500to2000_madgraph", + id=14300066, + processes=[procs.qcd_ht1500to2000], + keys=[ + "/QCD_HT1500to2000_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa + ], + n_files=27, + n_events=10583770, +) + +cpn.add_dataset( + name="qcd_ht2000_madgraph", + id=14299583, + processes=[procs.qcd_ht2000], + keys=[ + "/QCD_HT2000toInf_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa + ], + n_files=15, + n_events=5202244, +) \ No newline at end of file diff --git a/cmsdb/processes/data.py b/cmsdb/processes/data.py index d1dba166..053d7db7 100644 --- a/cmsdb/processes/data.py +++ b/cmsdb/processes/data.py @@ -5,7 +5,7 @@ """ __all__ = [ - "data", "data_e", "data_mu", "data_tau", "data_met", "data_pho", + "data", "data_e", "data_mu", "data_tau", "data_met", "data_pho", "data_jetht", ] from order import Process @@ -57,3 +57,10 @@ is_data=True, label=r"Data $\gamma$", ) + +data_jetht = data.add_process( + name="data_jetht", + id=100, + is_data=True, + label=r"Data JetHT", +) diff --git a/scripts/get_das_info.py b/scripts/get_das_info.py new file mode 100644 index 00000000..fc3be3d4 --- /dev/null +++ b/scripts/get_das_info.py @@ -0,0 +1,71 @@ +# coding: utf-8 + +# USAGE: python GetDASinfo.py -d das_string +# e.g. /JetHT/Run2018C-UL2018_MiniAODv2_JMENanoAODv9-v1/NANOAOD + +from __future__ import annotations + +import subprocess +import json +from argparse import ArgumentParser +import law + +def get_das_info(das_strings: list[str], keys_of_interest: tuple | None = None): + for das_string in das_strings: + # set default keys of interest + keys_of_interest = keys_of_interest or ( + "name", "dataset_id", "nfiles", "nevents", + ) + + wildcard = "*" in das_string + datasets = [] + if not wildcard: + # keep consisting structure + datasets.append(das_string) + else: + # using a wildcard leads to a different structer in json format + cmd = f"dasgoclient -query='dataset={das_string}' -json" + code, out, _ = law.util.interruptable_popen( + cmd, + shell=True, + stdout=subprocess.PIPE, + executable="/bin/bash", + ) + if code != 0: + raise Exception(f"dasgoclient query failed:\n{out}") + infos = json.loads(out) + for info in infos: + dataset_name = info.get('dataset',[])[0].get('name',"") + # print(dataset_name) # keep for debugging purpose + datasets.append(dataset_name) + + for dataset in datasets: + # call dasgoclient command + cmd = f"dasgoclient -query='dataset={dataset}' -json" + code, out, _ = law.util.interruptable_popen( + cmd, + shell=True, + stdout=subprocess.PIPE, + executable="/bin/bash", + ) + if code != 0: + raise Exception(f"dasgoclient query failed:\n{out}") + infos = json.loads(out) + info_of_interest = {'name': dataset} + for info in infos: + dataset_info = info["dataset"][0] + # Get json format of single das_string gives multiple dictornaries with different info + # Avoid to print multiple infos twice and ask specificly for the kew of interest + if "dataset_info" in info["das"]["services"][0]: + info_of_interest["dataset_id"] = dataset_info.get("dataset_id", "") + elif "filesummaries" in info["das"]["services"][0]: + info_of_interest["nfiles"] = dataset_info.get("nfiles", "") + info_of_interest["nevents"] = dataset_info.get("nevents", "") + print(json.dumps(info_of_interest, indent=4)) + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument('-d', '--dataset', dest='dataset', nargs='+', help='das name') + args = parser.parse_args() + get_das_info(args.dataset)