Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache/main' into alamb/pin_toolchain
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Feb 17, 2025
2 parents a4faeb7 + 580e622 commit 819c129
Show file tree
Hide file tree
Showing 160 changed files with 4,051 additions and 1,491 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ on:
jobs:
# Check license header
license-header-check:
runs-on: ubuntu-20.04
runs-on: ubuntu-latest
name: Check License Header
steps:
- uses: actions/checkout@v4
Expand Down
65 changes: 43 additions & 22 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,10 @@ ctor = "0.2.9"
dashmap = "6.0.1"
datafusion = { path = "datafusion/core", version = "45.0.0", default-features = false }
datafusion-catalog = { path = "datafusion/catalog", version = "45.0.0" }
datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "45.0.0", default-features = false }
datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "45.0.0" }
datafusion-common = { path = "datafusion/common", version = "45.0.0", default-features = false }
datafusion-common-runtime = { path = "datafusion/common-runtime", version = "45.0.0" }
datafusion-datasource = { path = "datafusion/datasource", version = "45.0.0", default-features = false }
datafusion-doc = { path = "datafusion/doc", version = "45.0.0" }
datafusion-execution = { path = "datafusion/execution", version = "45.0.0" }
datafusion-expr = { path = "datafusion/expr", version = "45.0.0" }
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,21 @@
![Commit Activity][commit-activity-badge]
[![Open Issues][open-issues-badge]][open-issues-url]
[![Discord chat][discord-badge]][discord-url]
[![Linkedin][linkedin-badge]][linkedin-url]

[crates-badge]: https://img.shields.io/crates/v/datafusion.svg
[crates-url]: https://crates.io/crates/datafusion
[license-badge]: https://img.shields.io/badge/license-Apache%20v2-blue.svg
[license-url]: https://github.com/apache/datafusion/blob/main/LICENSE.txt
[actions-badge]: https://github.com/apache/datafusion/actions/workflows/rust.yml/badge.svg
[actions-url]: https://github.com/apache/datafusion/actions?query=branch%3Amain
[discord-badge]: https://img.shields.io/discord/885562378132000778.svg?logo=discord&style=flat-square
[discord-badge]: https://img.shields.io/badge/Chat-Discord-purple
[discord-url]: https://discord.com/invite/Qw5gKqHxUM
[commit-activity-badge]: https://img.shields.io/github/commit-activity/m/apache/datafusion
[open-issues-badge]: https://img.shields.io/github/issues-raw/apache/datafusion
[open-issues-url]: https://github.com/apache/datafusion/issues
[linkedin-badge]: https://img.shields.io/badge/Follow-Linkedin-blue
[linkedin-url]: https://www.linkedin.com/company/apache-datafusion/

[Website](https://datafusion.apache.org/) |
[API Docs](https://docs.rs/datafusion/latest/datafusion/) |
Expand Down
188 changes: 188 additions & 0 deletions benchmarks/lineprotocol.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#!/usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.


"""
Converts a given json to LineProtocol format that can be
visualised by grafana/other systems that support LineProtocol.
Usage example:
$ python3 lineprotocol.py sort.json
benchmark,name=sort,version=28.0.0,datafusion_version=28.0.0,num_cpus=8 query="sort utf8",iteration=0,row_count=10838832,elapsed_ms=85626006 1691105678000000000
benchmark,name=sort,version=28.0.0,datafusion_version=28.0.0,num_cpus=8 query="sort utf8",iteration=1,row_count=10838832,elapsed_ms=68694468 1691105678000000000
benchmark,name=sort,version=28.0.0,datafusion_version=28.0.0,num_cpus=8 query="sort utf8",iteration=2,row_count=10838832,elapsed_ms=63392883 1691105678000000000
benchmark,name=sort,version=28.0.0,datafusion_version=28.0.0,num_cpus=8 query="sort utf8",iteration=3,row_count=10838832,elapsed_ms=66388367 1691105678000000000
"""

# sort.json
"""
{
"queries": [
{
"iterations": [
{
"elapsed": 85626.006132,
"row_count": 10838832
},
{
"elapsed": 68694.467851,
"row_count": 10838832
},
{
"elapsed": 63392.883406,
"row_count": 10838832
},
{
"elapsed": 66388.367387,
"row_count": 10838832
},
],
"query": "sort utf8",
"start_time": 1691105678
},
],
"context": {
"arguments": [
"sort",
"--path",
"benchmarks/data",
"--scale-factor",
"1.0",
"--iterations",
"4",
"-o",
"sort.json"
],
"benchmark_version": "28.0.0",
"datafusion_version": "28.0.0",
"num_cpus": 8,
"start_time": 1691105678
}
}
"""

from __future__ import annotations

import json
from dataclasses import dataclass
from typing import Dict, List, Any
from pathlib import Path
from argparse import ArgumentParser
import sys
print = sys.stdout.write


@dataclass
class QueryResult:
elapsed: float
row_count: int

@classmethod
def load_from(cls, data: Dict[str, Any]) -> QueryResult:
return cls(elapsed=data["elapsed"], row_count=data["row_count"])


@dataclass
class QueryRun:
query: int
iterations: List[QueryResult]
start_time: int

@classmethod
def load_from(cls, data: Dict[str, Any]) -> QueryRun:
return cls(
query=data["query"],
iterations=[QueryResult(**iteration) for iteration in data["iterations"]],
start_time=data["start_time"],
)

@property
def execution_time(self) -> float:
assert len(self.iterations) >= 1

# Use minimum execution time to account for variations / other
# things the system was doing
return min(iteration.elapsed for iteration in self.iterations)


@dataclass
class Context:
benchmark_version: str
datafusion_version: str
num_cpus: int
start_time: int
arguments: List[str]
name: str

@classmethod
def load_from(cls, data: Dict[str, Any]) -> Context:
return cls(
benchmark_version=data["benchmark_version"],
datafusion_version=data["datafusion_version"],
num_cpus=data["num_cpus"],
start_time=data["start_time"],
arguments=data["arguments"],
name=data["arguments"][0]
)


@dataclass
class BenchmarkRun:
context: Context
queries: List[QueryRun]

@classmethod
def load_from(cls, data: Dict[str, Any]) -> BenchmarkRun:
return cls(
context=Context.load_from(data["context"]),
queries=[QueryRun.load_from(result) for result in data["queries"]],
)

@classmethod
def load_from_file(cls, path: Path) -> BenchmarkRun:
with open(path, "r") as f:
return cls.load_from(json.load(f))


def lineformat(
baseline: Path,
) -> None:
baseline = BenchmarkRun.load_from_file(baseline)
context = baseline.context
benchamrk_str = f"benchmark,name={context.name},version={context.benchmark_version},datafusion_version={context.datafusion_version},num_cpus={context.num_cpus}"
for query in baseline.queries:
query_str = f"query=\"{query.query}\""
timestamp = f"{query.start_time*10**9}"
for iter_num, result in enumerate(query.iterations):
print(f"{benchamrk_str} {query_str},iteration={iter_num},row_count={result.row_count},elapsed_ms={result.elapsed*1000:.0f} {timestamp}\n")

def main() -> None:
parser = ArgumentParser()
parser.add_argument(
"path",
type=Path,
help="Path to the benchmark file.",
)
options = parser.parse_args()

lineformat(options.baseline_path)



if __name__ == "__main__":
main()
Loading

0 comments on commit 819c129

Please sign in to comment.