-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstate.h
364 lines (321 loc) · 12.8 KB
/
state.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
/*
* Copyright (c) 2011-2013, Los Alamos National Security, LLC.
* All rights Reserved.
*
* Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
* under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
* Laboratory (LANL), which is operated by Los Alamos National Security, LLC
* for the U.S. Department of Energy. The U.S. Government has rights to use,
* reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
* ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
* ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
* to produce derivative works, such modified software should be clearly marked,
* so as not to confuse it with the version available from LANL.
*
* Additionally, redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the Los Alamos National Security, LLC, Los Alamos
* National Laboratory, LANL, the U.S. Government, nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
* NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
* SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* CLAMR -- LA-CC-11-094
* This research code is being developed as part of the
* 2011 X Division Summer Workshop for the express purpose
* of a collaborative code for development of ideas in
* the implementation of AMR codes for Exascale platforms
*
* AMR implementation of the Wave code previously developed
* as a demonstration code for regular grids on Exascale platforms
* as part of the Supercomputing Challenge and Los Alamos
* National Laboratory
*
* Authors: Bob Robey XCP-2 [email protected]
* Neal Davis [email protected], [email protected]
* David Nicholaeff [email protected], [email protected]
* Dennis Trujillo [email protected], [email protected]
*
*/
#ifndef STATE_H_
#define STATE_H_
#include <list>
#include "MallocPlus/MallocPlus.h"
#include "mesh/mesh.h"
#include "crux/crux.h"
#ifdef HAVE_OPENCL
#include "ezcl/ezcl.h"
#endif
#include "l7/l7.h"
#define STATUS_OK 0
#define STATUS_NAN 1
#define STATUS_MASS_LOSS 2
#if !defined(FULL_PRECISION) && !defined(MIXED_PRECISION) && !defined(MINIMUM_PRECISION)
#define FULL_PRECISION
#endif
#ifdef NO_CL_DOUBLE
#undef FULL_PRECISION
#undef MIXED_PRECISION
#define MINIMUM_PRECISION
#endif
#if defined(MINIMUM_PRECISION)
typedef float state_t; // this is for physics state variables ncell in size
typedef float real_t; // this is used for intermediate calculations
typedef struct
{
float s0;
float s1;
} real2_t;
#define CONSERVATION_EPS 15.0
#ifdef HAVE_OPENCL
typedef cl_float cl_state_t; // for gpu physics state variables
typedef cl_float4 cl_state4_t; // for gpu physics state variables
typedef cl_float cl_real_t; // for intermediate gpu physics state variables
typedef cl_float2 cl_real2_t; // for intermediate gpu physics state variables
typedef cl_float4 cl_real4_t; // for intermediate gpu physics state variables
#endif
#ifdef HAVE_MPI
#define MPI_STATE_T MPI_FLOAT // for MPI communication for physics state variables
#define MPI_REAL_T MPI_FLOAT // for MPI communication for physics state variables
#define L7_STATE_T L7_FLOAT
#define L7_REAL_T L7_FLOAT
#endif
#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats
typedef float state_t;
typedef double real_t;
typedef struct
{
double s0;
double s1;
} real2_t;
#define CONSERVATION_EPS .02
#ifdef HAVE_OPENCL
typedef cl_float cl_state_t;
typedef cl_float4 cl_state4_t;
typedef cl_double cl_real_t; // for intermediate gpu physics state variables
typedef cl_double2 cl_real2_t; // for intermediate gpu physics state variables
typedef cl_double4 cl_real4_t; // for intermediate gpu physics state variables
#endif
#ifdef HAVE_MPI
#define MPI_STATE_T MPI_FLOAT
#define MPI_REAL_T MPI_DOUBLE
#define L7_STATE_T L7_FLOAT
#define L7_REAL_T L7_DOUBLE
#endif
#elif defined(FULL_PRECISION)
typedef double state_t;
typedef double real_t;
typedef struct
{
double s0;
double s1;
} real2_t;
#define CONSERVATION_EPS .02
#ifdef HAVE_OPENCL
typedef cl_double cl_state_t;
typedef cl_double4 cl_state4_t;
typedef cl_double cl_real_t; // for intermediate gpu physics state variables
typedef cl_double2 cl_real2_t; // for intermediate gpu physics state variables
typedef cl_double4 cl_real4_t; // for intermediate gpu physics state variables
#endif
#ifdef HAVE_MPI
#define MPI_STATE_T MPI_DOUBLE
#define MPI_REAL_T MPI_DOUBLE
#define L7_STATE_T L7_DOUBLE
#define L7_REAL_T L7_DOUBLE
#endif
#endif
extern "C" void do_calc(void);
enum SUM_TYPE {
SUM_REGULAR,
SUM_KAHAN
};
enum SIGN_RULE {
DIAG_RULE,
X_RULE,
Y_RULE,
};
enum state_timers
{
STATE_TIMER_APPLY_BCS,
STATE_TIMER_SET_TIMESTEP,
STATE_TIMER_FINITE_DIFFERENCE,
STATE_TIMER_REFINE_POTENTIAL,
STATE_TIMER_CALC_MPOT,
STATE_TIMER_REZONE_ALL,
STATE_TIMER_MASS_SUM,
STATE_TIMER_READ,
STATE_TIMER_WRITE,
STATE_TIMER_SIZE
};
typedef enum state_timers state_timer_category;
using namespace std;
class State {
public:
MallocPlus state_memory;
MallocPlus gpu_state_memory;
Mesh *mesh;
state_t *H;
state_t *U;
state_t *V;
#ifdef HAVE_OPENCL
cl_mem dev_H;
cl_mem dev_U;
cl_mem dev_V;
cl_mem dev_mass_sum;
cl_mem dev_deltaT;
cl_event apply_BCs_event;
cl_mem dev_mpot;
//cl_mem dev_ioffset;
cl_mem dev_result;
#endif
double cpu_timers[STATE_TIMER_SIZE];
long long gpu_timers[STATE_TIMER_SIZE];
// constructor -- allocates state arrays to size ncells
State(Mesh *mesh_in);
void init(int do_gpu_calc);
void terminate(void);
/* Memory routines for linked list of state arrays */
void allocate(size_t ncells);
void allocate_from_backup_file(FILE *fp);
void allocate_for_rollback(State *state_to_copy);
void resize(size_t ncells);
void memory_reset_ptrs(void);
#ifdef HAVE_OPENCL
void allocate_device_memory(size_t ncells);
#endif
void resize_old_device_memory(size_t ncells);
/* Accessor routines */
double get_cpu_timer(state_timer_category category) {return(cpu_timers[category]); };
/* Convert nanoseconds to msecs */
double get_gpu_timer(state_timer_category category) {return((double)(gpu_timers[category])*1.0e-9); };
/* Boundary routines -- not currently used */
void add_boundary_cells(void);
void apply_boundary_conditions(void);
void apply_boundary_conditions_local(void);
void apply_boundary_conditions_ghost(void);
void remove_boundary_cells(void);
/*******************************************************************
* set_timestep
* Input
* H, U, V -- from state object
* celltype, level, lev_delta
* Output
* mindeltaT returned
*******************************************************************/
double set_timestep(double g, double sigma);
#ifdef HAVE_OPENCL
double gpu_set_timestep(double sigma);
#endif
/*******************************************************************
* calc finite difference
* will add ghost region to H, U, V and fill at start of routine
* Input
* H, U, V -- from state object
* nlft, nrht, nbot, ntop, level, celltype -- from mesh object
* Output
* H, U, V
*******************************************************************/
void calc_finite_difference(double deltaT);
void calc_finite_difference_via_faces(double deltaT);
#ifdef HAVE_OPENCL
void gpu_calc_finite_difference(double deltaT);
#endif
/*******************************************************************
* calc refine potential -- state has responsibility to calc initial
* refinement potential array that is then passed to mesh for
* smoothing and enforcing refinement ruiles
* Input
* H, U, V -- from state object
* Output
* mpot
* ioffset
* count
*******************************************************************/
size_t calc_refine_potential(vector<int> &mpot, int &icount, int &jcount);
#ifdef HAVE_OPENCL
size_t gpu_calc_refine_potential(int &icount, int &jcount);
#endif
/*******************************************************************
* rezone all -- most of call is done in mesh
* Input
* Mesh and state variables
* Output
* New mesh and state variables on refined mesh
*******************************************************************/
void rezone_all(int icount, int jcount, vector<int> mpot);
#ifdef HAVE_OPENCL
void gpu_rezone_all(int icount, int jcount, bool localStencil);
#endif
/*******************************************************************
* load balance -- most of call is done in mesh, but pointers are
* reset to newly allocated state arrays
* Input
* Mesh and state variables
* Output
* New mesh and state variables on refined mesh
*******************************************************************/
#ifdef HAVE_MPI
void do_load_balance_local(size_t &numcells);
#ifdef HAVE_OPENCL
void gpu_do_load_balance_local(size_t &numcells);
#endif
#endif
/*******************************************************************
* mass sum -- Conservation of mass check
* Input
* H from state object
* Precision type for sum
* Output
* total mass is returned
*******************************************************************/
double mass_sum(int enhanced_precision_sum);
#ifdef HAVE_OPENCL
double gpu_mass_sum(int enhanced_precision_sum);
#endif
void fill_circle(double circ_radius, double fill_value, double background);
void state_reorder(vector<int> iorder);
void symmetry_check(const char *string, vector<int> sym_index, double eps,
SIGN_RULE sign_rule, int &flag);
void output_timing_info(int do_cpu_calc, int do_gpu_calc, double total_elapsed_time);
/* state comparison routines */
#ifdef HAVE_OPENCL
void compare_state_gpu_global_to_cpu_global(const char* string, int cycle, uint ncells);
#endif
void compare_state_cpu_local_to_cpu_global(State *state_global, const char* string, int cycle, uint ncells, uint ncells_global, int *nsizes, int *ndispl);
#ifdef HAVE_OPENCL
void compare_state_all_to_gpu_local(State *state_global, uint ncells, uint ncells_global, int mype, int ncycle, int *nsizes, int *ndispl);
#endif
void output_timer_block(mesh_device_types device_type, double elapsed_time,
double mesh_time, double compute_time, double total_elapsed_time, double speedup_ratio);
void timer_output(state_timer_category category, mesh_device_types device_type, int timer_level);
void print(void);
size_t get_checkpoint_size(void);
void store_checkpoint(Crux *crux);
void restore_checkpoint(Crux *crux);
//Added to for second print for every interation: Brian Atkinson (5-29-14)
void print(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage);
void print_local(int ncycle);
void print_failure_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, bool got_nan);
void print_rollback_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, int backup_attempt, int num_of_attempts, int error_status);
private:
State(const State&); // To block copy constructor so copies are not made inadvertently
void print_object_info(void);
};
#endif // ifndef STATE_H_