Skip to content

Commit

Permalink
Merge branch 'main' into message_processor
Browse files Browse the repository at this point in the history
  • Loading branch information
TomTheBear authored Dec 11, 2024
2 parents 98ba5ef + 8837ff4 commit f7c7a52
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/Release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ jobs:
Release:
runs-on: ubuntu-latest
# We need the RPMs, so add dependency
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build, Ubuntu-focal-build]
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build, Ubuntu-jammy-build]

steps:
# See: https://github.com/actions/download-artifact
Expand Down
37 changes: 17 additions & 20 deletions .github/workflows/runonce.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,11 @@ jobs:
submodules: recursive
fetch-depth: 0

# Use dnf to install build dependencies
- name: Install build dependencies
run: |
wget -q https://go.dev/dl/go1.22.4.linux-amd64.tar.gz --output-document=- | \
tar --directory=/usr/local --extract --gzip
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
go version
# See: https://github.com/marketplace/actions/setup-go-environment
- name: Setup Golang
uses: actions/setup-go@v4
with:
go-version: '1.21'

- name: RPM build MetricCollector
id: rpmbuild
Expand Down Expand Up @@ -126,13 +124,11 @@ jobs:
submodules: recursive
fetch-depth: 0

# Use dnf to install build dependencies
- name: Install build dependencies
run: |
wget -q https://go.dev/dl/go1.22.4.linux-amd64.tar.gz --output-document=- | \
tar --directory=/usr/local --extract --gzip
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
go version
# See: https://github.com/marketplace/actions/setup-go-environment
- name: Setup Golang
uses: actions/setup-go@v4
with:
go-version: '1.21'

- name: RPM build MetricCollector
id: rpmbuild
Expand Down Expand Up @@ -163,12 +159,13 @@ jobs:
submodules: recursive
fetch-depth: 0
# Use official golang package
- name: Install Golang
run: |
wget -q https://go.dev/dl/go1.22.4.linux-amd64.tar.gz --output-document=- | \
tar --directory=/usr/local --extract --gzip
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
go version

# See: https://github.com/marketplace/actions/setup-go-environment
- name: Setup Golang
uses: actions/setup-go@v4
with:
go-version: '1.21'

- name: DEB build MetricCollector
id: dpkg-build
run: |
Expand Down
24 changes: 20 additions & 4 deletions collectors/ipmiMetric.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"strconv"
"strings"
"time"

cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
)
Expand Down Expand Up @@ -54,15 +55,30 @@ func (m *IpmiCollector) Init(config json.RawMessage) error {
// Check if executables ipmitool or ipmisensors are found
p, err := exec.LookPath(m.config.IpmitoolPath)
if err == nil {
m.ipmitool = p
command := exec.Command(p)
err := command.Run()
if err != nil {
cclog.ComponentError(m.name, fmt.Sprintf("Failed to execute %s: %v", p, err.Error()))
m.ipmitool = ""
} else {
m.ipmitool = p
}
}
p, err = exec.LookPath(m.config.IpmisensorsPath)
if err == nil {
m.ipmisensors = p
command := exec.Command(p)
err := command.Run()
if err != nil {
cclog.ComponentError(m.name, fmt.Sprintf("Failed to execute %s: %v", p, err.Error()))
m.ipmisensors = ""
} else {
m.ipmisensors = p
}
}
if len(m.ipmitool) == 0 && len(m.ipmisensors) == 0 {
return errors.New("no IPMI reader found")
return errors.New("no usable IPMI reader found")
}

m.init = true
return nil
}
Expand Down Expand Up @@ -119,8 +135,8 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
cclog.ComponentError(
m.name,
fmt.Sprintf("readIpmiTool(): Failed to wait for the end of command \"%s\": %v\n", command.String(), err),
fmt.Sprintf("readIpmiTool(): command stderr: \"%s\"\n", string(errMsg)),
)
cclog.ComponentError(m.name, fmt.Sprintf("readIpmiTool(): command stderr: \"%s\"\n", strings.TrimSpace(string(errMsg))))
return
}
}
Expand Down
12 changes: 12 additions & 0 deletions collectors/likwidMetric.go
Original file line number Diff line number Diff line change
Expand Up @@ -374,10 +374,21 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
}
defer watcher.Close()
if len(m.config.LockfilePath) > 0 {
// Check if the lock file exists
info, err := os.Stat(m.config.LockfilePath)
if os.IsNotExist(err) {
// Create the lock file if it does not exist
file, createErr := os.Create(m.config.LockfilePath)
if createErr != nil {
return true, fmt.Errorf("failed to create lock file: %v", createErr)
}
file.Close()
info, err = os.Stat(m.config.LockfilePath) // Recheck the file after creation
}
if err != nil {
return true, err
}
// Check file ownership
uid := info.Sys().(*syscall.Stat_t).Uid
if uid != uint32(os.Getuid()) {
usr, err := user.LookupId(fmt.Sprint(uid))
Expand All @@ -387,6 +398,7 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
return true, fmt.Errorf("Access to performance counters locked by %d", uid)
}
}
// Add the lock file to the watcher
err = watcher.Add(m.config.LockfilePath)
if err != nil {
cclog.ComponentError(m.name, err.Error())
Expand Down
7 changes: 5 additions & 2 deletions collectors/likwidMetric.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ The `likwid` collector is probably the most complicated collector. The LIKWID li
{
"events" : {
"COUNTER0": "EVENT0",
"COUNTER1": "EVENT1",
"COUNTER1": "EVENT1"
},
"metrics" : [
{
Expand All @@ -27,7 +27,7 @@ The `likwid` collector is probably the most complicated collector. The LIKWID li
}
]
}
]
],
"globalmetrics" : [
{
"name": "global_sum",
Expand Down Expand Up @@ -132,6 +132,9 @@ In some cases LIKWID returns `0.0` for some events that are further used in proc

One might think this does not happen often but often used metrics in the world of performance engineering like Instructions-per-Cycle (IPC) or more frequently the actual CPU clock are derived with events like `CPU_CLK_UNHALTED_CORE` (Intel) which do not increment in halted state (as the name implies). In there are different power management systems in a chip which can cause a hardware thread to go in such a state. Moreover, if no cycles are executed by the core, also many other events are not incremented as well (like `INSTR_RETIRED_ANY` for retired instructions and part of IPC).

### `lockfile_path` option
LIKWID can be configured with a lock file with which the access to the performance monitoring registers can be disabled (only the owner of the lock file is allowed to access the registers). When the `lockfile_path` option is set, the collector subscribes to changes to this file to stop monitoring if the owner of the lock file changes. This feature is useful when users should be able to perform own hardware performance counter measurements through LIKWID or any other tool.

### `send_*_total values` option

- `send_core_total_values`: Metrics, which are usually collected on a per hardware thread basis, are additionally summed up per CPU core.
Expand Down
4 changes: 2 additions & 2 deletions sinks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ This folder contains the SinkManager and sink implementations for the cc-metric-
The configuration file for the sinks is a list of configurations. The `type` field in each specifies which sink to initialize.

```json
[
{
"mystdout" : {
"type" : "stdout",
"meta_as_tags" : [
Expand All @@ -31,7 +31,7 @@ The configuration file for the sinks is a list of configurations. The `type` fie
"database" : "ccmetric",
"password" : "<jwt token>"
}
]
}
```


Expand Down

0 comments on commit f7c7a52

Please sign in to comment.