Skip to content

Commit

Permalink
Fix ${{}} in runner (#175)
Browse files Browse the repository at this point in the history
* Update docker.yml

* Update tests.yml

* use black

---------

Co-authored-by: pierre.delaunay <[email protected]>
  • Loading branch information
Delaunay and pierre.delaunay authored Nov 21, 2023
1 parent 1f1bb16 commit c62e483
Show file tree
Hide file tree
Showing 40 changed files with 1,973 additions and 1,159 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,12 @@ jobs:
uses: actions/checkout@v3

- name: Get Image Tag Name
env:
GITHUB_REF_NAME_ENV: ${{ github.ref_name }}
run: |
REGEX="(.*)v(.*)\.(.*)\.(.*)"
IMAGE_TAG="nightly"
if [[ "${{ github.ref_name }}" =~ $REGEX ]]; then
if [[ "${GITHUB_REF_NAME_ENV}" =~ $REGEX ]]; then
IMAGE_TAG="${GITHUB_REF_NAME##*/}"
fi
echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ jobs:
MILABENCH_ARGS: ""
MILABENCH_GPU_ARCH: "${{ matrix.arch }}"
MILABENCH_DASH: "no"
MILABENCH_EXCLUDE: "${{ matrix.exclude }}"

steps:
- uses: actions/checkout@v3
Expand All @@ -60,7 +61,7 @@ jobs:

- name: Pytorch Sanity
run: |
if [[ "${{ matrix.arch }}" == "rocm" ]]; then
if [[ "${MILABENCH_GPU_ARCH}" == "rocm" ]]; then
groups
/opt/rocm/bin/rocminfo
fi
Expand Down Expand Up @@ -96,16 +97,16 @@ jobs:
- name: install benchmarks
run: |
milabench install --exclude "${{ matrix.exclude }}"
milabench install --exclude "${MILABENCH_EXCLUDE}"
- name: prepare benchmarks
run: |
milabench prepare --exclude "${{ matrix.exclude }}"
milabench prepare --exclude "${MILABENCH_EXCLUDE}"
- name: run benchmarks
run: |
export PATH="/opt/rocm/bin:$PATH"
milabench run --validations all --exclude "${{ matrix.exclude }}"
milabench run --validations all --exclude "${MILABENCH_EXCLUDE}"
- name: Summary
run: |
Expand Down
7 changes: 1 addition & 6 deletions benchmarks/dlrm/voirfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,7 @@ def instrument_main(ov, options: Config):
yield ov.phases.load_script

# Loss
(
ov.probe("//run > L")
.throttle(1)["L"]
.map(float)
.give("loss")
)
(ov.probe("//run > L").throttle(1)["L"].map(float).give("loss"))

# Compute Start & End + Batch
ov.probe(
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/flops/benchfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ class FlopsBenchmarch(Package):
base_requirements = "requirements.in"
prepare_script = "prepare.py"
main_script = "main.py"

def build_run_plan(self) -> "execs.Executor":
import milabench.executors as execs

main = self.dirs.code / self.main_script
pack = execs.PackExecutor(self, *self.argv, lazy=True)
# pack = execs.VoirExecutor(pack, cwd=main.parent)
pack = execs.ActivatorExecutor(pack, use_stdout=True)
return pack


__pack__ = FlopsBenchmarch
93 changes: 40 additions & 53 deletions benchmarks/flops/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,34 +22,37 @@
def _worker(state, queue, func, delay):
import time

while state['running']:
while state["running"]:
queue.put(func())
time.sleep(delay)



class Monitor:
def __init__(self, delay, func):
self.manager = multiprocessing.Manager()
self.state = self.manager.dict()
self.state['running'] = True
self.state["running"] = True
self.results = multiprocessing.Queue()
self.process = multiprocessing.Process(
target=_worker,
target=_worker,
args=(self.state, self.results, func, delay),
)

def start(self):
self.process.start()

def stop(self):
self.state['running'] = False
self.state["running"] = False
self.process.join()


def modelflops(model: torch.nn.Module, shape, repeat=10, dtype=torch.float32, unit=TERA):
def modelflops(
model: torch.nn.Module, shape, repeat=10, dtype=torch.float32, unit=TERA
):
# Not sure how much thop is correct in its computation
# it says it return MAC but I feel its methods is wrong
from thop import profile

# MAC: Multiply–accumulate operation
batch = torch.randn(*shape, dtype=dtype, device="cuda:0")

Expand Down Expand Up @@ -77,108 +80,92 @@ def modelflops(model: torch.nn.Module, shape, repeat=10, dtype=torch.float32, un
return (flops * repeat) / (end - start) / unit



def f(N, R=30, m=5000000, n=256, unit=TERA, dtype=torch.float32, log=None):
torch.cuda.empty_cache()
a = torch.eye(n, dtype=dtype, device="cuda:0")
x = torch.randn((m, n), dtype=dtype, device="cuda:0")
y = torch.zeros_like(x)

F = N * (2 * m * n * n + 2 * m * n * n)
for i in range(R):

for i in range(R):
torch.cuda.synchronize()
ts = -time.time()

for _ in range(N):
# No allocation in main loop using dual-out strategy
y = torch.mm(x, a, out=y)
x = torch.mm(y, a, out=x)

torch.cuda.synchronize()
ts += time.time()

if log is not None:
log({
"task": "train",
"rate": F / ts / unit,
"units": "Tflops"
})

log({"task": "train", "rate": F / ts / unit, "units": "Tflops"})

torch.cuda.empty_cache()


def setupvoir():
# wtf this do
data_file = SmuggleWriter(sys.stdout)
# data_file = sys.stdout

def log(data):
if data_file is not None:
data["t"] = time.time()
print(json.dumps(data), file=data_file)

while not monitor.results.empty():
print(json.dumps(monitor.results.get()), file=data_file)

def monitor_fn():
data = {
gpu["device"]: {
"memory": [
gpu["memory"]["used"],
gpu["memory"]["used"],
gpu["memory"]["total"],
],
"load": gpu["utilization"]["compute"],
"temperature": gpu["temperature"],
"power": gpu["power"]
"power": gpu["power"],
}
for gpu in get_gpu_info()["gpus"].values()
}
return {"task": "main", "gpudata": data, "t": time.time()}

monitor = Monitor(0.5, monitor_fn)
monitor.start()
return log, monitor



def main():
dtypes = {
'bf16': torch.bfloat16,
'fp16': torch.float16,
'fp32': torch.float32,
"bf16": torch.bfloat16,
"fp16": torch.float16,
"fp32": torch.float32,
}

parser = ArgumentParser()
parser.add_argument('--repeat', type=int, default=100)
parser.add_argument('--number', type=int, default=100)
parser.add_argument('--m', type=int, default=256)
parser.add_argument('--n', type=int, default=256)
parser.add_argument('--dtype', type=str, default='fp32', choices=dtypes.keys())
parser.add_argument('--tf32', action='store_true', default=False)
parser.add_argument("--repeat", type=int, default=100)
parser.add_argument("--number", type=int, default=100)
parser.add_argument("--m", type=int, default=256)
parser.add_argument("--n", type=int, default=256)
parser.add_argument("--dtype", type=str, default="fp32", choices=dtypes.keys())
parser.add_argument("--tf32", action="store_true", default=False)

args = parser.parse_args()

torch.backends.cuda.matmul.allow_tf32 = False
if args.tf32:
torch.backends.cuda.matmul.allow_tf32 = True

log, monitor = setupvoir()

f(
args.number,
args.repeat,
args.m,
args.n,
TERA,
dtypes[args.dtype],
log
)
f(args.number, args.repeat, args.m, args.n, TERA, dtypes[args.dtype], log)

monitor.stop()

if __name__ == "__main__":
main()



if __name__ == "__main__":
main()
9 changes: 5 additions & 4 deletions benchmarks/llama/benchfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ class LLAMA(Package):
def make_env(self):
return {
**super().make_env(),
"OMP_NUM_THREADS": str(self.config.get("cpus_per_gpu", 8))
"OMP_NUM_THREADS": str(self.config.get("cpus_per_gpu", 8)),
}

async def install(self):
await super().install()

def build_prepare_plan(self):
return CmdExecutor(
self,
Expand All @@ -36,7 +36,8 @@ def build_run_plan(self):
*self.argv,
"--cache",
str(self.dirs.cache),
use_stdout=True
use_stdout=True,
)


__pack__ = LLAMA
Loading

0 comments on commit c62e483

Please sign in to comment.