Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ${{}} in runner #175

Merged
merged 4 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,12 @@ jobs:
uses: actions/checkout@v3

- name: Get Image Tag Name
env:
GITHUB_REF_NAME_ENV: ${{ github.ref_name }}
run: |
REGEX="(.*)v(.*)\.(.*)\.(.*)"
IMAGE_TAG="nightly"
if [[ "${{ github.ref_name }}" =~ $REGEX ]]; then
if [[ "${GITHUB_REF_NAME_ENV}" =~ $REGEX ]]; then
IMAGE_TAG="${GITHUB_REF_NAME##*/}"
fi
echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ jobs:
MILABENCH_ARGS: ""
MILABENCH_GPU_ARCH: "${{ matrix.arch }}"
MILABENCH_DASH: "no"
MILABENCH_EXCLUDE: "${{ matrix.exclude }}"

steps:
- uses: actions/checkout@v3
Expand All @@ -60,7 +61,7 @@ jobs:

- name: Pytorch Sanity
run: |
if [[ "${{ matrix.arch }}" == "rocm" ]]; then
if [[ "${MILABENCH_GPU_ARCH}" == "rocm" ]]; then
groups
/opt/rocm/bin/rocminfo
fi
Expand Down Expand Up @@ -96,16 +97,16 @@ jobs:

- name: install benchmarks
run: |
milabench install --exclude "${{ matrix.exclude }}"
milabench install --exclude "${MILABENCH_EXCLUDE}"

- name: prepare benchmarks
run: |
milabench prepare --exclude "${{ matrix.exclude }}"
milabench prepare --exclude "${MILABENCH_EXCLUDE}"

- name: run benchmarks
run: |
export PATH="/opt/rocm/bin:$PATH"
milabench run --validations all --exclude "${{ matrix.exclude }}"
milabench run --validations all --exclude "${MILABENCH_EXCLUDE}"

- name: Summary
run: |
Expand Down
7 changes: 1 addition & 6 deletions benchmarks/dlrm/voirfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,7 @@ def instrument_main(ov, options: Config):
yield ov.phases.load_script

# Loss
(
ov.probe("//run > L")
.throttle(1)["L"]
.map(float)
.give("loss")
)
(ov.probe("//run > L").throttle(1)["L"].map(float).give("loss"))

# Compute Start & End + Batch
ov.probe(
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/flops/benchfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ class FlopsBenchmarch(Package):
base_requirements = "requirements.in"
prepare_script = "prepare.py"
main_script = "main.py"

def build_run_plan(self) -> "execs.Executor":
import milabench.executors as execs

main = self.dirs.code / self.main_script
pack = execs.PackExecutor(self, *self.argv, lazy=True)
# pack = execs.VoirExecutor(pack, cwd=main.parent)
pack = execs.ActivatorExecutor(pack, use_stdout=True)
return pack


__pack__ = FlopsBenchmarch
93 changes: 40 additions & 53 deletions benchmarks/flops/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,34 +22,37 @@
def _worker(state, queue, func, delay):
import time

while state['running']:
while state["running"]:
queue.put(func())
time.sleep(delay)



class Monitor:
def __init__(self, delay, func):
self.manager = multiprocessing.Manager()
self.state = self.manager.dict()
self.state['running'] = True
self.state["running"] = True
self.results = multiprocessing.Queue()
self.process = multiprocessing.Process(
target=_worker,
target=_worker,
args=(self.state, self.results, func, delay),
)

def start(self):
self.process.start()

def stop(self):
self.state['running'] = False
self.state["running"] = False
self.process.join()


def modelflops(model: torch.nn.Module, shape, repeat=10, dtype=torch.float32, unit=TERA):
def modelflops(
model: torch.nn.Module, shape, repeat=10, dtype=torch.float32, unit=TERA
):
# Not sure how much thop is correct in its computation
# it says it return MAC but I feel its methods is wrong
from thop import profile

# MAC: Multiply–accumulate operation
batch = torch.randn(*shape, dtype=dtype, device="cuda:0")

Expand Down Expand Up @@ -77,108 +80,92 @@ def modelflops(model: torch.nn.Module, shape, repeat=10, dtype=torch.float32, un
return (flops * repeat) / (end - start) / unit



def f(N, R=30, m=5000000, n=256, unit=TERA, dtype=torch.float32, log=None):
torch.cuda.empty_cache()
a = torch.eye(n, dtype=dtype, device="cuda:0")
x = torch.randn((m, n), dtype=dtype, device="cuda:0")
y = torch.zeros_like(x)

F = N * (2 * m * n * n + 2 * m * n * n)
for i in range(R):

for i in range(R):
torch.cuda.synchronize()
ts = -time.time()

for _ in range(N):
# No allocation in main loop using dual-out strategy
y = torch.mm(x, a, out=y)
x = torch.mm(y, a, out=x)

torch.cuda.synchronize()
ts += time.time()

if log is not None:
log({
"task": "train",
"rate": F / ts / unit,
"units": "Tflops"
})

log({"task": "train", "rate": F / ts / unit, "units": "Tflops"})

torch.cuda.empty_cache()


def setupvoir():
# wtf this do
data_file = SmuggleWriter(sys.stdout)
# data_file = sys.stdout

def log(data):
if data_file is not None:
data["t"] = time.time()
print(json.dumps(data), file=data_file)

while not monitor.results.empty():
print(json.dumps(monitor.results.get()), file=data_file)

def monitor_fn():
data = {
gpu["device"]: {
"memory": [
gpu["memory"]["used"],
gpu["memory"]["used"],
gpu["memory"]["total"],
],
"load": gpu["utilization"]["compute"],
"temperature": gpu["temperature"],
"power": gpu["power"]
"power": gpu["power"],
}
for gpu in get_gpu_info()["gpus"].values()
}
return {"task": "main", "gpudata": data, "t": time.time()}

monitor = Monitor(0.5, monitor_fn)
monitor.start()
return log, monitor



def main():
dtypes = {
'bf16': torch.bfloat16,
'fp16': torch.float16,
'fp32': torch.float32,
"bf16": torch.bfloat16,
"fp16": torch.float16,
"fp32": torch.float32,
}

parser = ArgumentParser()
parser.add_argument('--repeat', type=int, default=100)
parser.add_argument('--number', type=int, default=100)
parser.add_argument('--m', type=int, default=256)
parser.add_argument('--n', type=int, default=256)
parser.add_argument('--dtype', type=str, default='fp32', choices=dtypes.keys())
parser.add_argument('--tf32', action='store_true', default=False)
parser.add_argument("--repeat", type=int, default=100)
parser.add_argument("--number", type=int, default=100)
parser.add_argument("--m", type=int, default=256)
parser.add_argument("--n", type=int, default=256)
parser.add_argument("--dtype", type=str, default="fp32", choices=dtypes.keys())
parser.add_argument("--tf32", action="store_true", default=False)

args = parser.parse_args()

torch.backends.cuda.matmul.allow_tf32 = False
if args.tf32:
torch.backends.cuda.matmul.allow_tf32 = True

log, monitor = setupvoir()

f(
args.number,
args.repeat,
args.m,
args.n,
TERA,
dtypes[args.dtype],
log
)
f(args.number, args.repeat, args.m, args.n, TERA, dtypes[args.dtype], log)

monitor.stop()

if __name__ == "__main__":
main()



if __name__ == "__main__":
main()
9 changes: 5 additions & 4 deletions benchmarks/llama/benchfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ class LLAMA(Package):
def make_env(self):
return {
**super().make_env(),
"OMP_NUM_THREADS": str(self.config.get("cpus_per_gpu", 8))
"OMP_NUM_THREADS": str(self.config.get("cpus_per_gpu", 8)),
}

async def install(self):
await super().install()

def build_prepare_plan(self):
return CmdExecutor(
self,
Expand All @@ -36,7 +36,8 @@ def build_run_plan(self):
*self.argv,
"--cache",
str(self.dirs.cache),
use_stdout=True
use_stdout=True,
)


__pack__ = LLAMA
Loading
Loading