forked from NetApp/zufs-zus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmovnt.h
135 lines (111 loc) · 3.18 KB
/
movnt.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/* SPDX-License-Identifier: BSD-3-Clause */
/*
* BRIEF DESCRIPTION
*
* Some General x86_64 operations.
* NT means Non-Temporal (Intel's terminology)
*
* Copyright (c) 2018 NetApp, Inc. All rights reserved.
*
* See module.c for LICENSE details.
*/
#ifndef __ZUS_MOVENT_H
#define __ZUS_MOVENT_H
#define CACHELINE_SHIFT (6)
#define CACHELINE_SIZE (1UL << CACHELINE_SHIFT)
#include <emmintrin.h>
#include "zus.h"
static inline void a_clflushopt(void *p)
{
asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)p));
}
static inline void a_clwb(void *p)
{
asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)p));
}
static inline void cl_flush(void *buf, uint32_t len)
{
uint32_t i;
len = len + ((unsigned long)(buf) & (CACHELINE_SIZE - 1));
for (i = 0; i < len; i += CACHELINE_SIZE)
_mm_clflush(buf + i);
}
/*
* clwb writes back cachelines concurrently and require a store
* barrier (sfence) to verify completeness.
*
* WARNING: don't use directly, will crash old unsupported CPUs!
*/
static inline void __cl_flush_wb(void *buf, uint32_t len)
{
uint32_t i;
len = len + ((unsigned long)(buf) & (CACHELINE_SIZE - 1));
for (i = 0; i < len; i += CACHELINE_SIZE)
a_clwb(buf + i);
_mm_sfence();
}
/*
* clflushopt flushes cachelines concurrently and require a store
* barrier (sfence) to verify completeness.
*
* WARNING: don't use directly, will crash old unsupported CPUs!
*/
static inline void __cl_flush_opt(void *buf, uint32_t len)
{
uint32_t i;
len = len + ((unsigned long)(buf) & (CACHELINE_SIZE - 1));
for (i = 0; i < len; i += CACHELINE_SIZE)
a_clflushopt(buf + i);
_mm_sfence();
}
extern void (*cl_flush_opt)(void *buf, uint32_t len);
extern void (*cl_flush_wb)(void *buf, uint32_t len);
/* TODO use AVX-512 instructions if available PXS-245 */
static inline void _memzero_nt_cachelines(void *dst, size_t cachelines)
{
/* must use dummy outputs so not to clobber inputs */
ulong dummy1, dummy2;
asm volatile (
"xor %%rax,%%rax\n"
"1: movnti %%rax,(%0)\n"
"movnti %%rax,1*8(%0)\n"
"movnti %%rax,2*8(%0)\n"
"movnti %%rax,3*8(%0)\n"
"movnti %%rax,4*8(%0)\n"
"movnti %%rax,5*8(%0)\n"
"movnti %%rax,6*8(%0)\n"
"movnti %%rax,7*8(%0)\n"
"leaq 64(%0),%0\n"
"dec %1\n"
"jnz 1b\n"
: "=D" (dummy1), "=d" (dummy2) :
"D" (dst), "d" (cachelines) : "memory", "rax");
}
static inline void memzero_nt(void *dst, size_t len)
{
size_t cachelines, prefix_len;
/* if dst is not cacheline aligned, fill with memset */
if (unlikely((ulong)dst & (CACHELINE_SIZE-1))) {
prefix_len = CACHELINE_SIZE - ((ulong)dst & (CACHELINE_SIZE-1));
if (prefix_len > len)
prefix_len = len;
memset(dst, 0, prefix_len);
cl_flush(dst, prefix_len);
len -= prefix_len;
dst += prefix_len;
}
cachelines = len >> CACHELINE_SHIFT;
if (likely(cachelines))
_memzero_nt_cachelines(dst, cachelines);
/* fill remaining bytes with memset */
len -= cachelines << CACHELINE_SHIFT;
dst += cachelines << CACHELINE_SHIFT;
if (unlikely(len > 0)) {
memset(dst, 0, len);
cl_flush(dst, len);
}
}
/* zus: nvml_movnt.c */
void *pmem_memmove_persist(void *pmemdest, const void *src, size_t len);
#define memcpy_to_pmem pmem_memmove_persist
#endif /* ifndef __ZUS_MOVENT_H */