-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
- Loading branch information
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#ifndef _BLAZEPALMBARRACUDA_COMMON_H_ | ||
#define _BLAZEPALMBARRACUDA_COMMON_H_ | ||
|
||
#include "BlazePalmDetectionStruct.hlsl" | ||
|
||
// Maximum number of detections. This value must be matched with MaxDetection | ||
// in PalmDetector.cs. | ||
#define MAX_DETECTION 64 | ||
|
||
// We can encode the geometric features of Detection into a float4x4 matrix. | ||
// This is handy for calculating weighted means of detections. | ||
|
||
float4x4 DetectionToMatrix(in PalmDetection d) | ||
{ | ||
return float4x4(d.center, d.extent, | ||
d.keyPoints[0], d.keyPoints[1], | ||
d.keyPoints[2], d.keyPoints[3], | ||
d.keyPoints[4], d.keyPoints[5]); | ||
} | ||
|
||
PalmDetection MatrixToDetection(float4x4 m, float score) | ||
{ | ||
PalmDetection d; | ||
d.center = m._m00_m01; | ||
d.extent = m._m02_m03; | ||
d.keyPoints[0] = m._m10_m11; | ||
d.keyPoints[1] = m._m12_m13; | ||
d.keyPoints[2] = m._m20_m21; | ||
d.keyPoints[3] = m._m22_m23; | ||
d.keyPoints[4] = m._m30_m31; | ||
d.keyPoints[5] = m._m32_m33; | ||
d.score = score; | ||
d.pad = 0; | ||
return d; | ||
} | ||
|
||
// Common math functions | ||
|
||
float2 VFlip(float2 p) | ||
{ | ||
return float2(p.x, 1 - p.y); | ||
} | ||
|
||
float Sigmoid(float x) | ||
{ | ||
return 1 / (1 + exp(-x)); | ||
} | ||
|
||
float CalculateIOU(in PalmDetection d1, in PalmDetection d2) | ||
{ | ||
float area0 = d1.extent.x * d1.extent.y; | ||
float area1 = d2.extent.x * d2.extent.y; | ||
|
||
float2 p0 = max(d1.center - d1.extent / 2, d2.center - d2.extent / 2); | ||
float2 p1 = min(d1.center + d1.extent / 2, d2.center + d2.extent / 2); | ||
float areaInner = max(0, p1.x - p0.x) * max(0, p1.y - p0.y); | ||
|
||
return areaInner / (area0 + area1 - areaInner); | ||
} | ||
|
||
#endif |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#ifndef _BLAZEPALMBARRACUDA_STRUCT_H_ | ||
#define _BLAZEPALMBARRACUDA_STRUCT_H_ | ||
|
||
// Detection structure: The layout of this structure must be matched with the | ||
// one defined in Detection.cs | ||
struct PalmDetection | ||
{ | ||
float2 center; | ||
float2 extent; | ||
float2 keyPoints[6]; | ||
float score; | ||
float3 pad; | ||
}; | ||
|
||
#endif |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// | ||
// Function template for Postprocess1 (bounding box aggregation) | ||
// | ||
[numthreads(CELLS_IN_ROW, CELLS_IN_ROW, 1)] | ||
void KERNEL_NAME(uint2 id : SV_DispatchThreadID) | ||
{ | ||
// Scale factor based on the input image size | ||
float scale = 1 / _ImageSize; | ||
|
||
// Corresponding row number in the input texture | ||
uint row0 = (id.y * CELLS_IN_ROW + id.x) * ANCHOR_COUNT + INDEX_OFFSET; | ||
|
||
// Anchor point coordinates | ||
float2 anchor = (CELLS_IN_ROW - 0.5 - id) / CELLS_IN_ROW; | ||
|
||
for (uint ai = 0; ai < ANCHOR_COUNT; ai++) | ||
{ | ||
PalmDetection d; | ||
d.pad = 0; | ||
|
||
// Row number of this anchor | ||
uint row = row0 + ai; | ||
|
||
// Confidence score | ||
d.score = Sigmoid(_Scores[uint2(0, row)]); | ||
|
||
// Bounding box | ||
float x = _Boxes[uint2(0, row)]; | ||
float y = _Boxes[uint2(1, row)]; | ||
float w = _Boxes[uint2(2, row)]; | ||
float h = _Boxes[uint2(3, row)]; | ||
|
||
d.center = VFlip(anchor + float2(x, y) * scale); | ||
d.extent = float2(w, h) * scale; | ||
|
||
// Key points | ||
[unroll] for (uint ki = 0; ki < 6; ki++) | ||
{ | ||
float kx = _Boxes[uint2(4 + 2 * ki + 0, row)]; | ||
float ky = _Boxes[uint2(4 + 2 * ki + 1, row)]; | ||
d.keyPoints[ki] = VFlip(anchor + float2(kx, ky) * scale); | ||
} | ||
|
||
// Thresholding | ||
if (d.score > _Threshold) _Output.Append(d); | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#pragma kernel Postprocess1Layer1 | ||
#pragma kernel Postprocess1Layer2 | ||
|
||
// | ||
// 1st postprocessor (bounding box aggregation) | ||
// | ||
|
||
#include "BlazePalmCommonShader.hlsl" | ||
|
||
// Input uniforms | ||
Texture2D<float> _Scores; | ||
Texture2D<float> _Boxes; | ||
float _ImageSize; | ||
float _Threshold; | ||
|
||
// Output uniforms | ||
AppendStructuredBuffer<PalmDetection> _Output; | ||
|
||
// First layer: 8-pixel stride, two anchors | ||
#define CELLS_IN_ROW 16 | ||
#define ANCHOR_COUNT 2 | ||
#define INDEX_OFFSET 384 | ||
#define KERNEL_NAME Postprocess1Layer1 | ||
#include "BlazePalmPostprocess1Kernel.hlsl" | ||
|
||
#undef CELLS_IN_ROW | ||
#undef ANCHOR_COUNT | ||
#undef KERNEL_NAME | ||
#undef INDEX_OFFSET | ||
|
||
// Second layer: 16-pixel stride, six anchors | ||
#define CELLS_IN_ROW 8 | ||
#define ANCHOR_COUNT 6 | ||
#define INDEX_OFFSET 0 | ||
#define KERNEL_NAME Postprocess1Layer2 | ||
#include "BlazePalmPostprocess1Kernel.hlsl" |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#pragma kernel Postprocess2 | ||
|
||
// | ||
// 2nd postprocessor (overlap removal) | ||
// | ||
|
||
#include "BlazePalmCommonShader.hlsl" | ||
|
||
#define IOU_THRESHOLD 0.3 | ||
|
||
// Input uniforms | ||
ConsumeStructuredBuffer<PalmDetection> _Input; | ||
ByteAddressBuffer _Count; | ||
|
||
// Output uniforms | ||
AppendStructuredBuffer<PalmDetection> _Output; | ||
|
||
// Detection entry cache | ||
groupshared PalmDetection _entry[MAX_DETECTION]; | ||
|
||
[numthreads(1, 1, 1)] | ||
void Postprocess2(uint3 id : SV_DispatchThreadID) | ||
{ | ||
// Detection entry cache initialization | ||
uint entry_count = _Count.Load(0); | ||
if (entry_count == 0) return; | ||
|
||
for (uint i = 0; i < entry_count; i++) | ||
_entry[i] = _Input.Consume(); | ||
|
||
// Try all the permutations. | ||
for (i = 0; i < entry_count - 1; i++) | ||
{ | ||
if (_entry[i].score == 0) continue; | ||
|
||
// Variables for accumulation | ||
float max_score = _entry[i].score; | ||
float4x4 g_acc = DetectionToMatrix(_entry[i]) * max_score; | ||
float s_acc = max_score; | ||
|
||
for (uint j = i + 1; j < entry_count; j++) | ||
{ | ||
if (_entry[j].score == 0) continue; | ||
|
||
// Overlap test | ||
if (CalculateIOU(_entry[i], _entry[j]) < IOU_THRESHOLD) continue; | ||
|
||
// Accumulation | ||
float score = _entry[j].score; | ||
g_acc += DetectionToMatrix(_entry[j]) * score; | ||
s_acc += score; | ||
max_score = max(max_score, score); | ||
|
||
// Remove the entry. | ||
_entry[j].score = 0; | ||
} | ||
|
||
// Output the blended detection. | ||
_Output.Append(MatrixToDetection(g_acc / s_acc, max_score)); | ||
} | ||
} |