-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgating_lib.h
55 lines (45 loc) · 1.06 KB
/
gating_lib.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#include <stdio.h>
#include <stdlib.h>
#include <cublas_v2.h>
#include <cuda_profiler_api.h>
#include <iostream>
#include <fstream>
#include <float.h>
#define TOPK_DEFAULT 6
#define E_DEFAULT 160
#define MAX_PAD 0xffffffff
struct gating_param {
int bsz ;
int seq_len ;
int n_routed_experts ;
int num_experts_per_tok ;
bool norm_topk_prob ;
float routed_scaling_factor ;
float aux_loss_alpha ;
bool seq_aux ;
// user def for capacity
int expert_capacity_number ;
int drop_policy; //"probs", # or "position" or other
bool drop_and_pad ;
int opt_level;
};
extern "C" int gating_api(
void * temp_buf_in,
void * params,
float *logits,
int * topk_res,
float * weight_out,
float * loss ,
float * gates_out,
int * histc);
extern "C" int gating_api_bk(
float* grad_loss,
float* grad_weight_out,
int* histc,
int* topk_res,
float* gates,
float* grad_input,
float routed_factor,
float loss_factor,
int m, int e, int k
);