Skip to content

Commit

Permalink
Merge pull request #11 from apaasch/master
Browse files Browse the repository at this point in the history
Add JMENano campaign
  • Loading branch information
riga authored Jun 19, 2023
2 parents 12d2ccf + 754f021 commit f987019
Show file tree
Hide file tree
Showing 5 changed files with 297 additions and 1 deletion.
32 changes: 32 additions & 0 deletions cmsdb/campaigns/run2_2018_JMEnano_v9/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# coding: utf-8

"""
Common, analysis independent definition of the 2018 data-taking campaign
with datasets at NanoAOD tier in version 9.
See https://python-order.readthedocs.io/en/latest/quickstart.html#analysis-campaign-and-config.
Dataset ids are identical to those in DAS (https://cmsweb.cern.ch/das).
"""

from order import Campaign


#
# campaign
#

campaign_run2_2018_JMEnano_v9 = Campaign(
name="run2_2018_JMEnano_v9",
id=220181,
ecm=13,
bx=25,
aux={
"year": 2018,
"tier": "NanoAOD",
"version": "9"
},
)

# trailing imports to load datasets
import cmsdb.campaigns.run2_2018_JMEnano_v9.data
import cmsdb.campaigns.run2_2018_JMEnano_v9.qcd
73 changes: 73 additions & 0 deletions cmsdb/campaigns/run2_2018_JMEnano_v9/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# coding: utf-8

"""
CMS datasets from the 2017 data-taking campaign
"""

import cmsdb.processes as procs
from cmsdb.campaigns.run2_2018_JMEnano_v9 import campaign_run2_2018_JMEnano_v9 as cpn


#
# DiJet
#

cpn.add_dataset(
name="data_jetht_a",
id=14260294, # from das
is_data=True,
processes=[procs.data_dijet],
keys=[
"/JetHT/Run2018A-UL2018_MiniAODv2_JMENanoAODv9-v1/NANOAOD",
],
n_files=281,
n_events=171484635,
aux={
"era": "A",
},
)

cpn.add_dataset(
name="data_jetht_b",
id=14260659,
is_data=True,
processes=[procs.data_e],
keys=[
"/JetHT/Run2018B-UL2018_MiniAODv2_JMENanoAODv9-v1/NANOAOD",
],
n_files=128,
n_events=78255208,
aux={
"era": "B",
},
)

cpn.add_dataset(
name="data_jetht_c",
id=14260590,
is_data=True,
processes=[procs.data_dijet],
keys=[
"/JetHT/Run2018C-UL2018_MiniAODv2_JMENanoAODv9-v1/NANOAOD",
],
n_files=138,
n_events=70027804,
aux={
"era": "C",
},
)

cpn.add_dataset(
name="data_jetht_d",
id=14324490,
is_data=True,
processes=[procs.data_dijet],
keys=[
"/JetHT/Run2018D-UL2018_MiniAODv2_JMENanoAODv9-v1/NANOAOD",
],
n_files=538,
n_events=356967606,
aux={
"era": "D",
},
)
113 changes: 113 additions & 0 deletions cmsdb/campaigns/run2_2018_JMEnano_v9/qcd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# coding: utf-8

"""
QCD datasets for the 2017 data-taking campaign
"""

import cmsdb.processes as procs
from cmsdb.campaigns.run2_2018_JMEnano_v9 import campaign_run2_2018_JMEnano_v9 as cpn

#
# QCD HT-binned
#

# HT-binned samples

cpn.add_dataset(
name="qcd_ht50to100_madgraph",
id=14296686,
processes=[procs.qcd_ht50to100],
keys=[
"/QCD_HT50to100_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa
],
n_files=99,
n_events=38485273,
)

cpn.add_dataset(
name="qcd_ht100to200_madgraph",
id=14286202,
processes=[procs.qcd_ht100to200],
keys=[
"/QCD_HT100to200_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa
],
n_files=175,
n_events=83416014,
)

cpn.add_dataset(
name="qcd_ht200to300_madgraph",
id=14288453,
processes=[procs.qcd_ht200to300],
keys=[
"/QCD_HT200to300_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa
],
n_files=104,
n_events=57336623,
)

cpn.add_dataset(
name="qcd_ht300to500_madgraph",
id=14296771,
processes=[procs.qcd_ht300to500],
keys=[
"/QCD_HT300to500_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa
],
n_files=235,
n_events=61491618,
)

cpn.add_dataset(
name="qcd_ht500to700_madgraph",
id=14293400,
processes=[procs.qcd_ht500to700],
keys=[
"/QCD_HT500to700_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa
],
n_files=106,
n_events=49070152,
)

cpn.add_dataset(
name="qcd_ht700to1000_madgraph",
id=14275579,
processes=[procs.qcd_ht700to1000],
keys=[
"/QCD_HT700to1000_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa
],
n_files=99,
n_events=48220920,
)

cpn.add_dataset(
name="qcd_ht1000to1500_madgraph",
id=14296761,
processes=[procs.qcd_ht1000to1500],
keys=[
"/QCD_HT1000to1500_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa
],
n_files=67,
n_events=14127722,
)

cpn.add_dataset(
name="qcd_ht1500to2000_madgraph",
id=14300066,
processes=[procs.qcd_ht1500to2000],
keys=[
"/QCD_HT1500to2000_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa
],
n_files=27,
n_events=10583770,
)

cpn.add_dataset(
name="qcd_ht2000_madgraph",
id=14299583,
processes=[procs.qcd_ht2000],
keys=[
"/QCD_HT2000toInf_TuneCP5_PSWeights_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM", # noqa
],
n_files=15,
n_events=5202244,
)
9 changes: 8 additions & 1 deletion cmsdb/processes/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""

__all__ = [
"data", "data_e", "data_mu", "data_tau", "data_met", "data_pho",
"data", "data_e", "data_mu", "data_tau", "data_met", "data_pho", "data_jetht",
]

from order import Process
Expand Down Expand Up @@ -57,3 +57,10 @@
is_data=True,
label=r"Data $\gamma$",
)

data_jetht = data.add_process(
name="data_jetht",
id=100,
is_data=True,
label=r"Data JetHT",
)
71 changes: 71 additions & 0 deletions scripts/get_das_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# coding: utf-8

# USAGE: python GetDASinfo.py -d das_string
# e.g. /JetHT/Run2018C-UL2018_MiniAODv2_JMENanoAODv9-v1/NANOAOD

from __future__ import annotations

import subprocess
import json
from argparse import ArgumentParser
import law

def get_das_info(das_strings: list[str], keys_of_interest: tuple | None = None):
for das_string in das_strings:
# set default keys of interest
keys_of_interest = keys_of_interest or (
"name", "dataset_id", "nfiles", "nevents",
)

wildcard = "*" in das_string
datasets = []
if not wildcard:
# keep consisting structure
datasets.append(das_string)
else:
# using a wildcard leads to a different structer in json format
cmd = f"dasgoclient -query='dataset={das_string}' -json"
code, out, _ = law.util.interruptable_popen(
cmd,
shell=True,
stdout=subprocess.PIPE,
executable="/bin/bash",
)
if code != 0:
raise Exception(f"dasgoclient query failed:\n{out}")
infos = json.loads(out)
for info in infos:
dataset_name = info.get('dataset',[])[0].get('name',"")
# print(dataset_name) # keep for debugging purpose
datasets.append(dataset_name)

for dataset in datasets:
# call dasgoclient command
cmd = f"dasgoclient -query='dataset={dataset}' -json"
code, out, _ = law.util.interruptable_popen(
cmd,
shell=True,
stdout=subprocess.PIPE,
executable="/bin/bash",
)
if code != 0:
raise Exception(f"dasgoclient query failed:\n{out}")
infos = json.loads(out)
info_of_interest = {'name': dataset}
for info in infos:
dataset_info = info["dataset"][0]
# Get json format of single das_string gives multiple dictornaries with different info
# Avoid to print multiple infos twice and ask specificly for the kew of interest
if "dataset_info" in info["das"]["services"][0]:
info_of_interest["dataset_id"] = dataset_info.get("dataset_id", "")
elif "filesummaries" in info["das"]["services"][0]:
info_of_interest["nfiles"] = dataset_info.get("nfiles", "")
info_of_interest["nevents"] = dataset_info.get("nevents", "")
print(json.dumps(info_of_interest, indent=4))


if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument('-d', '--dataset', dest='dataset', nargs='+', help='das name')
args = parser.parse_args()
get_das_info(args.dataset)

0 comments on commit f987019

Please sign in to comment.