1 /*
2 * Copyright (c) 2012-2021, The OSKAR Developers.
3 * See the LICENSE file at the top-level directory of this distribution.
4 */
5
6 #include "beam_pattern/oskar_beam_pattern.h"
7 #include "beam_pattern/private_beam_pattern.h"
8 #include "convert/oskar_convert_mjd_to_gast_fast.h"
9 #include "correlate/oskar_evaluate_auto_power.h"
10 #include "correlate/oskar_evaluate_cross_power.h"
11 #include "math/oskar_cmath.h"
12 #include "math/private_cond2_2x2.h"
13 #include "utility/oskar_device.h"
14 #include "utility/oskar_file_exists.h"
15 #include "utility/oskar_get_error_string.h"
16 #include "utility/oskar_get_memory_usage.h"
17 #include "oskar_version.h"
18
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include <fitsio.h>
23
24 #ifdef _OPENMP
25 #include <omp.h>
26 #endif
27
28 #ifdef __cplusplus
29 extern "C" {
30 #endif
31
32 static void* run_blocks(void* arg);
33 static void sim_chunks(oskar_BeamPattern* h, int i_chunk_start, int i_time,
34 int i_channel, int i_active, int device_id, int* status);
35 static void write_chunks(oskar_BeamPattern* h, int i_chunk_start, int i_time,
36 int i_channel, int i_active, int* status);
37 static void write_pixels(oskar_BeamPattern* h, int i_chunk, int i_time,
38 int i_channel, int num_pix, int channel_average, int time_average,
39 const oskar_Mem* in, int chunk_desc, int stokes_in, int* status);
40 static void complex_to_amp(const oskar_Mem* complex_in, const int offset,
41 const int stride, const int num_points, oskar_Mem* output, int* status);
42 static void complex_to_phase(const oskar_Mem* complex_in, const int offset,
43 const int stride, const int num_points, oskar_Mem* output, int* status);
44 static void complex_to_real(const oskar_Mem* complex_in, const int offset,
45 const int stride, const int num_points, oskar_Mem* output, int* status);
46 static void complex_to_imag(const oskar_Mem* complex_in, const int offset,
47 const int stride, const int num_points, oskar_Mem* output, int* status);
48 static void jones_to_ixr(const oskar_Mem* complex_in, const int offset,
49 const int num_points, oskar_Mem* output, int* status);
50 static void oskar_convert_linear_to_stokes(const int num_points,
51 const int offset_in, const oskar_Mem* linear, const int stokes_index,
52 oskar_Mem* output, int* status);
53 static void record_timing(oskar_BeamPattern* h);
54 static unsigned int disp_width(unsigned int value);
55
56
57 struct ThreadArgs
58 {
59 oskar_BeamPattern* h;
60 int num_threads, thread_id;
61 };
62 typedef struct ThreadArgs ThreadArgs;
63
oskar_beam_pattern_run(oskar_BeamPattern * h,int * status)64 void oskar_beam_pattern_run(oskar_BeamPattern* h, int* status)
65 {
66 int i = 0;
67 oskar_Timer* tmr = 0;
68 oskar_Thread** threads = 0;
69 ThreadArgs* args = 0;
70 if (*status || !h) return;
71
72 /* Check root name exists. */
73 if (!h->root_path)
74 {
75 oskar_log_error(h->log, "No output file name specified.");
76 *status = OSKAR_ERR_FILE_IO;
77 return;
78 }
79
80 /* Initialise if required. */
81 oskar_beam_pattern_check_init(h, status);
82 tmr = oskar_timer_create(OSKAR_TIMER_NATIVE);
83 oskar_timer_resume(tmr);
84
85 /* Set up worker threads. */
86 const int num_threads = h->num_devices + 1;
87 oskar_barrier_set_num_threads(h->barrier, num_threads);
88 threads = (oskar_Thread**) calloc(num_threads, sizeof(oskar_Thread*));
89 args = (ThreadArgs*) calloc(num_threads, sizeof(ThreadArgs));
90 for (i = 0; i < num_threads; ++i)
91 {
92 args[i].h = h;
93 args[i].num_threads = num_threads;
94 args[i].thread_id = i;
95 }
96
97 /* Record memory usage. */
98 if (!*status)
99 {
100 #if defined(OSKAR_HAVE_CUDA) || defined(OSKAR_HAVE_OPENCL)
101 oskar_log_section(h->log, 'M', "Initial memory usage");
102 for (i = 0; i < h->num_gpus; ++i)
103 {
104 oskar_device_log_mem(h->dev_loc, 0, h->gpu_ids[i], h->log);
105 }
106 #endif
107 oskar_log_section(h->log, 'M', "Starting simulation...");
108 }
109
110 /* Set status code. */
111 h->status = *status;
112
113 /* Start simulation timer. */
114 oskar_timer_start(h->tmr_sim);
115
116 /* Start the worker threads. */
117 for (i = 0; i < num_threads; ++i)
118 {
119 threads[i] = oskar_thread_create(run_blocks, (void*)&args[i], 0);
120 }
121
122 /* Wait for worker threads to finish. */
123 for (i = 0; i < num_threads; ++i)
124 {
125 oskar_thread_join(threads[i]);
126 oskar_thread_free(threads[i]);
127 }
128 free(threads);
129 free(args);
130
131 /* Get status code. */
132 *status = h->status;
133
134 /* Record memory usage. */
135 if (!*status)
136 {
137 #if defined(OSKAR_HAVE_CUDA) || defined(OSKAR_HAVE_OPENCL)
138 oskar_log_section(h->log, 'M', "Final memory usage");
139 for (i = 0; i < h->num_gpus; ++i)
140 {
141 oskar_device_log_mem(h->dev_loc, 0, h->gpu_ids[i], h->log);
142 }
143 #endif
144 /* Record time taken. */
145 oskar_log_set_value_width(h->log, 25);
146 record_timing(h);
147 }
148
149 /* Finalise. */
150 oskar_beam_pattern_reset_cache(h, status);
151
152 /* Check for errors. */
153 if (!*status)
154 {
155 oskar_log_message(h->log, 'M', 0, "Run completed in %.3f sec.",
156 oskar_timer_elapsed(tmr));
157 }
158 else
159 {
160 oskar_log_error(h->log, "Run failed with code %i: %s.", *status,
161 oskar_get_error_string(*status));
162 }
163 oskar_timer_free(tmr);
164
165 /* Close the log. */
166 oskar_log_close(h->log);
167 }
168
169
170 /* Private methods. */
171
run_blocks(void * arg)172 static void* run_blocks(void* arg)
173 {
174 oskar_BeamPattern* h = 0;
175 int i_inner = 0, i_outer = 0, num_inner = 0, num_outer = 0;
176 int c = 0, t = 0, f = 0, *status = 0;
177
178 /* Loop indices for previous iteration (accessed only by thread 0). */
179 int cp = 0, tp = 0, fp = 0;
180
181 /* Get thread function arguments. */
182 h = ((ThreadArgs*)arg)->h;
183 status = &(h->status);
184 const int num_threads = ((ThreadArgs*)arg)->num_threads;
185 const int thread_id = ((ThreadArgs*)arg)->thread_id;
186 const int device_id = thread_id - 1;
187
188 #ifdef _OPENMP
189 /* Disable any nested parallelism. */
190 omp_set_nested(0);
191 omp_set_num_threads(1);
192 #endif
193
194 if (device_id >= 0 && device_id < h->num_gpus)
195 {
196 oskar_device_set(h->dev_loc, h->gpu_ids[device_id], status);
197 }
198
199 /* Set ranges of inner and outer loops based on averaging mode. */
200 if (h->average_single_axis != 'T')
201 {
202 num_outer = h->num_time_steps;
203 num_inner = h->num_channels; /* Channel on inner loop. */
204 }
205 else
206 {
207 num_outer = h->num_channels;
208 num_inner = h->num_time_steps; /* Time on inner loop. */
209 }
210
211 /* Loop over image pixel chunks, running simulation and file writing one
212 * chunk at a time. Simulation and file output are overlapped by using
213 * double buffering, and a dedicated thread is used for file output.
214 *
215 * Thread 0 is used for file writes.
216 * Threads 1 to n (mapped to compute devices) do the simulation.
217 */
218 for (c = 0; c < h->num_chunks; c += h->num_devices)
219 {
220 for (i_outer = 0; i_outer < num_outer; ++i_outer)
221 {
222 for (i_inner = 0; i_inner < num_inner; ++i_inner)
223 {
224 /* Set time and channel indices based on averaging mode. */
225 if (h->average_single_axis != 'T')
226 {
227 t = i_outer;
228 f = i_inner;
229 }
230 else
231 {
232 f = i_outer;
233 t = i_inner;
234 }
235 if (thread_id > 0 || num_threads == 1)
236 {
237 sim_chunks(h, c, t, f, h->i_global & 1, device_id, status);
238 }
239 if (thread_id == 0 && h->i_global > 0)
240 {
241 write_chunks(h, cp, tp, fp, h->i_global & 1, status);
242 }
243
244 /* Barrier 1: Set indices of the previous chunk(s). */
245 oskar_barrier_wait(h->barrier);
246 if (thread_id == 0)
247 {
248 cp = c;
249 tp = t;
250 fp = f;
251 h->i_global++;
252 }
253
254 /* Barrier 2: Check sim and write are done. */
255 oskar_barrier_wait(h->barrier);
256 }
257 }
258 }
259
260 /* Write the very last chunk(s). */
261 if (thread_id == 0)
262 {
263 write_chunks(h, cp, tp, fp, h->i_global & 1, status);
264 }
265
266 return 0;
267 }
268
269
sim_chunks(oskar_BeamPattern * h,int i_chunk_start,int i_time,int i_channel,int i_active,int device_id,int * status)270 static void sim_chunks(oskar_BeamPattern* h, int i_chunk_start, int i_time,
271 int i_channel, int i_active, int device_id, int* status)
272 {
273 DeviceData* d = 0;
274 int chunk_size = 0, i = 0;
275 int num_models_evaluated = 0;
276 int *models_evaluated = 0, *model_offsets = 0;
277 if (*status) return;
278
279 /* Get chunk index from GPU ID and chunk start,
280 * and return immediately if it's out of range. */
281 d = &h->d[device_id];
282 const int* type_map = oskar_mem_int_const(
283 oskar_telescope_station_type_map_const(d->tel), status);
284 const int i_chunk = i_chunk_start + device_id;
285 if (i_chunk >= h->num_chunks) return;
286
287 /* Get time and frequency values. */
288 oskar_timer_resume(d->tmr_compute);
289 const double dt_dump = h->time_inc_sec / 86400.0;
290 const double mjd = h->time_start_mjd_utc + dt_dump * (i_time + 0.5);
291 const double gast_rad = oskar_convert_mjd_to_gast_fast(mjd);
292 const double freq_hz = h->freq_start_hz + i_channel * h->freq_inc_hz;
293
294 /* Work out the size of the chunk. */
295 chunk_size = h->max_chunk_size;
296 if ((i_chunk + 1) * h->max_chunk_size > h->num_pixels)
297 {
298 chunk_size = h->num_pixels - i_chunk * h->max_chunk_size;
299 }
300
301 /* Copy pixel chunk coordinate data to GPU only if chunk is different. */
302 if (i_chunk != d->previous_chunk_index)
303 {
304 const int offset = i_chunk * h->max_chunk_size;
305 d->previous_chunk_index = i_chunk;
306 oskar_mem_copy_contents(d->lon_rad, h->lon_rad,
307 0, offset, chunk_size, status);
308 oskar_mem_copy_contents(d->lat_rad, h->lat_rad,
309 0, offset, chunk_size, status);
310 oskar_mem_copy_contents(d->x, h->x, 0, offset, chunk_size, status);
311 oskar_mem_copy_contents(d->y, h->y, 0, offset, chunk_size, status);
312 oskar_mem_copy_contents(d->z, h->z, 0, offset, chunk_size, status);
313 }
314
315 /* Generate beam for this pixel chunk, for all active stations. */
316 for (i = 0; i < h->num_active_stations; ++i)
317 {
318 const int offset = i * chunk_size;
319 const oskar_Mem* const source_coords[] = {d->x, d->y, d->z};
320 if (!oskar_telescope_allow_station_beam_duplication(d->tel))
321 {
322 const oskar_Station* station =
323 oskar_telescope_station_const(d->tel, h->station_ids[i]);
324 if (!station)
325 {
326 station = oskar_telescope_station_const(d->tel, 0);
327 }
328 oskar_station_beam(station,
329 d->work, h->source_coord_type, chunk_size,
330 source_coords, h->lon0, h->lat0,
331 oskar_telescope_phase_centre_coord_type(d->tel),
332 oskar_telescope_phase_centre_longitude_rad(d->tel),
333 oskar_telescope_phase_centre_latitude_rad(d->tel),
334 i_time, gast_rad, freq_hz,
335 offset, d->jones_data, status);
336 }
337 else
338 {
339 int j = 0, station_to_copy = -1;
340 const int station_model_type = type_map[h->station_ids[i]];
341 for (j = 0; j < num_models_evaluated; ++j)
342 {
343 if (models_evaluated[j] == station_model_type)
344 {
345 station_to_copy = model_offsets[j];
346 break;
347 }
348 }
349 if (station_to_copy >= 0)
350 {
351 oskar_mem_copy_contents(
352 d->jones_data, d->jones_data,
353 (size_t)(i * chunk_size), /* Dest. */
354 (size_t)(station_to_copy * chunk_size), /* Source. */
355 (size_t)chunk_size, status);
356 }
357 else
358 {
359 oskar_station_beam(
360 oskar_telescope_station_const(d->tel, station_model_type),
361 d->work, h->source_coord_type, chunk_size,
362 source_coords, h->lon0, h->lat0,
363 oskar_telescope_phase_centre_coord_type(d->tel),
364 oskar_telescope_phase_centre_longitude_rad(d->tel),
365 oskar_telescope_phase_centre_latitude_rad(d->tel),
366 i_time, gast_rad, freq_hz,
367 offset, d->jones_data, status);
368 num_models_evaluated++;
369 models_evaluated = (int*) realloc(models_evaluated,
370 num_models_evaluated * sizeof(int));
371 model_offsets = (int*) realloc(model_offsets,
372 num_models_evaluated * sizeof(int));
373 models_evaluated[num_models_evaluated - 1] = station_model_type;
374 model_offsets[num_models_evaluated - 1] = i;
375 }
376 }
377 if (d->auto_power[0])
378 {
379 oskar_evaluate_auto_power(chunk_size,
380 offset, d->jones_data, 1.0, 0.0, 0.0, 0.0,
381 offset, d->auto_power[0], status);
382 }
383 if (d->auto_power[1])
384 {
385 oskar_evaluate_auto_power(chunk_size,
386 offset, d->jones_data,
387 h->test_source_stokes[0],
388 h->test_source_stokes[1],
389 h->test_source_stokes[2],
390 h->test_source_stokes[3],
391 offset, d->auto_power[1], status);
392 }
393 }
394 free(models_evaluated);
395 free(model_offsets);
396 if (d->cross_power[0])
397 {
398 oskar_evaluate_cross_power(chunk_size, h->num_active_stations,
399 d->jones_data, 1.0, 0.0, 0.0, 0.0,
400 0, d->cross_power[0], status);
401 }
402 if (d->cross_power[1])
403 {
404 oskar_evaluate_cross_power(chunk_size, h->num_active_stations,
405 d->jones_data,
406 h->test_source_stokes[0],
407 h->test_source_stokes[1],
408 h->test_source_stokes[2],
409 h->test_source_stokes[3],
410 0, d->cross_power[1], status);
411 }
412
413 /* Copy the output data into host memory. */
414 if (d->jones_data_cpu[i_active])
415 {
416 oskar_mem_copy_contents(d->jones_data_cpu[i_active], d->jones_data,
417 0, 0, chunk_size * h->num_active_stations, status);
418 }
419 for (i = 0; i < 2; ++i)
420 {
421 if (d->auto_power[i])
422 {
423 oskar_mem_copy_contents(d->auto_power_cpu[i][i_active],
424 d->auto_power[i], 0, 0,
425 chunk_size * h->num_active_stations, status);
426 }
427 if (d->cross_power[i])
428 {
429 oskar_mem_copy_contents(d->cross_power_cpu[i][i_active],
430 d->cross_power[i], 0, 0, chunk_size, status);
431 }
432 }
433 oskar_mutex_lock(h->mutex);
434 oskar_log_message(h->log, 'S', 1, "Chunk %*i/%i, "
435 "Time %*i/%i, Channel %*i/%i [Device %i]",
436 disp_width(h->num_chunks), i_chunk+1, h->num_chunks,
437 disp_width(h->num_time_steps), i_time+1, h->num_time_steps,
438 disp_width(h->num_channels), i_channel+1, h->num_channels,
439 device_id);
440 oskar_mutex_unlock(h->mutex);
441 oskar_timer_pause(d->tmr_compute);
442 }
443
444
write_chunks(oskar_BeamPattern * h,int i_chunk_start,int i_time,int i_channel,int i_active,int * status)445 static void write_chunks(oskar_BeamPattern* h, int i_chunk_start,
446 int i_time, int i_channel, int i_active, int* status)
447 {
448 int i = 0, chunk_sources = 0, stokes = 0;
449 if (*status) return;
450
451 /* Write inactive chunk(s) from all GPUs. */
452 oskar_timer_resume(h->tmr_write);
453 for (i = 0; i < h->num_devices; ++i)
454 {
455 DeviceData* d = &h->d[i];
456
457 /* Get chunk index from GPU ID & chunk start. Stop if out of range. */
458 const int i_chunk = i_chunk_start + i;
459 if (i_chunk >= h->num_chunks || *status) break;
460
461 /* Get the size of the chunk. */
462 chunk_sources = h->max_chunk_size;
463 if ((i_chunk + 1) * h->max_chunk_size > h->num_pixels)
464 {
465 chunk_sources = h->num_pixels - i_chunk * h->max_chunk_size;
466 }
467 const int chunk_size = chunk_sources * h->num_active_stations;
468
469 /* Write non-averaged raw data, if required. */
470 write_pixels(h, i_chunk, i_time, i_channel, chunk_sources, 0, 0,
471 d->jones_data_cpu[!i_active], JONES_DATA, -1, status);
472
473 /* Loop over Stokes parameter types. */
474 for (stokes = 0; stokes < 2; ++stokes)
475 {
476 /* Write non-averaged data, if required. */
477 write_pixels(h, i_chunk, i_time, i_channel, chunk_sources, 0, 0,
478 d->auto_power_cpu[stokes][!i_active],
479 AUTO_POWER_DATA, stokes, status);
480 write_pixels(h, i_chunk, i_time, i_channel, chunk_sources, 0, 0,
481 d->cross_power_cpu[stokes][!i_active],
482 CROSS_POWER_DATA, stokes, status);
483
484 /* Time-average the data if required. */
485 if (d->auto_power_time_avg[stokes])
486 {
487 oskar_mem_add(d->auto_power_time_avg[stokes],
488 d->auto_power_time_avg[stokes],
489 d->auto_power_cpu[stokes][!i_active],
490 0, 0, 0, chunk_size, status);
491 }
492 if (d->cross_power_time_avg[stokes])
493 {
494 oskar_mem_add(d->cross_power_time_avg[stokes],
495 d->cross_power_time_avg[stokes],
496 d->cross_power_cpu[stokes][!i_active],
497 0, 0, 0, chunk_sources, status);
498 }
499
500 /* Channel-average the data if required. */
501 if (d->auto_power_channel_avg[stokes])
502 {
503 oskar_mem_add(d->auto_power_channel_avg[stokes],
504 d->auto_power_channel_avg[stokes],
505 d->auto_power_cpu[stokes][!i_active],
506 0, 0, 0, chunk_size, status);
507 }
508 if (d->cross_power_channel_avg[stokes])
509 {
510 oskar_mem_add(d->cross_power_channel_avg[stokes],
511 d->cross_power_channel_avg[stokes],
512 d->cross_power_cpu[stokes][!i_active],
513 0, 0, 0, chunk_sources, status);
514 }
515
516 /* Channel- and time-average the data if required. */
517 if (d->auto_power_channel_and_time_avg[stokes])
518 {
519 oskar_mem_add(d->auto_power_channel_and_time_avg[stokes],
520 d->auto_power_channel_and_time_avg[stokes],
521 d->auto_power_cpu[stokes][!i_active],
522 0, 0, 0, chunk_size, status);
523 }
524 if (d->cross_power_channel_and_time_avg[stokes])
525 {
526 oskar_mem_add(d->cross_power_channel_and_time_avg[stokes],
527 d->cross_power_channel_and_time_avg[stokes],
528 d->cross_power_cpu[stokes][!i_active],
529 0, 0, 0, chunk_sources, status);
530 }
531
532 /* Write time-averaged data. */
533 if (i_time == h->num_time_steps - 1)
534 {
535 if (d->auto_power_time_avg[stokes])
536 {
537 oskar_mem_scale_real(d->auto_power_time_avg[stokes],
538 1.0 / h->num_time_steps, 0, chunk_size, status);
539 write_pixels(h, i_chunk, 0, i_channel, chunk_sources, 0, 1,
540 d->auto_power_time_avg[stokes],
541 AUTO_POWER_DATA, stokes, status);
542 oskar_mem_clear_contents(d->auto_power_time_avg[stokes],
543 status);
544 }
545 if (d->cross_power_time_avg[stokes])
546 {
547 oskar_mem_scale_real(d->cross_power_time_avg[stokes],
548 1.0 / h->num_time_steps, 0, chunk_sources, status);
549 write_pixels(h, i_chunk, 0, i_channel, chunk_sources, 0, 1,
550 d->cross_power_time_avg[stokes],
551 CROSS_POWER_DATA, stokes, status);
552 oskar_mem_clear_contents(d->cross_power_time_avg[stokes],
553 status);
554 }
555 }
556
557 /* Write channel-averaged data. */
558 if (i_channel == h->num_channels - 1)
559 {
560 if (d->auto_power_channel_avg[stokes])
561 {
562 oskar_mem_scale_real(d->auto_power_channel_avg[stokes],
563 1.0 / h->num_channels, 0, chunk_size, status);
564 write_pixels(h, i_chunk, i_time, 0, chunk_sources, 1, 0,
565 d->auto_power_channel_avg[stokes],
566 AUTO_POWER_DATA, stokes, status);
567 oskar_mem_clear_contents(d->auto_power_channel_avg[stokes],
568 status);
569 }
570 if (d->cross_power_channel_avg[stokes])
571 {
572 oskar_mem_scale_real(d->cross_power_channel_avg[stokes],
573 1.0 / h->num_channels, 0, chunk_sources, status);
574 write_pixels(h, i_chunk, i_time, 0, chunk_sources, 1, 0,
575 d->cross_power_channel_avg[stokes],
576 CROSS_POWER_DATA, stokes, status);
577 oskar_mem_clear_contents(
578 d->cross_power_channel_avg[stokes], status);
579 }
580 }
581
582 /* Write channel- and time-averaged data. */
583 if ((i_time == h->num_time_steps - 1) &&
584 (i_channel == h->num_channels - 1))
585 {
586 if (d->auto_power_channel_and_time_avg[stokes])
587 {
588 oskar_mem_scale_real(
589 d->auto_power_channel_and_time_avg[stokes],
590 1.0 / (h->num_channels * h->num_time_steps),
591 0, chunk_size, status);
592 write_pixels(h, i_chunk, 0, 0, chunk_sources, 1, 1,
593 d->auto_power_channel_and_time_avg[stokes],
594 AUTO_POWER_DATA, stokes, status);
595 oskar_mem_clear_contents(
596 d->auto_power_channel_and_time_avg[stokes],
597 status);
598 }
599 if (d->cross_power_channel_and_time_avg[stokes])
600 {
601 oskar_mem_scale_real(
602 d->cross_power_channel_and_time_avg[stokes],
603 1.0 / (h->num_channels * h->num_time_steps),
604 0, chunk_sources, status);
605 write_pixels(h, i_chunk, 0, 0, chunk_sources, 1, 1,
606 d->cross_power_channel_and_time_avg[stokes],
607 CROSS_POWER_DATA, stokes, status);
608 oskar_mem_clear_contents(
609 d->cross_power_channel_and_time_avg[stokes],
610 status);
611 }
612 }
613 }
614 }
615 oskar_timer_pause(h->tmr_write);
616 }
617
618
write_pixels(oskar_BeamPattern * h,int i_chunk,int i_time,int i_channel,int num_pix,int channel_average,int time_average,const oskar_Mem * in,int chunk_desc,int stokes_in,int * status)619 static void write_pixels(oskar_BeamPattern* h, int i_chunk, int i_time,
620 int i_channel, int num_pix, int channel_average, int time_average,
621 const oskar_Mem* in, int chunk_desc, int stokes_in, int* status)
622 {
623 int i = 0;
624 if (!in) return;
625
626 /* Loop over data products. */
627 const int num_pol = h->pol_mode == OSKAR_POL_MODE_FULL ? 4 : 1;
628 for (i = 0; i < h->num_data_products; ++i)
629 {
630 fitsfile* f = 0;
631 FILE* t = 0;
632 int dp = 0, stokes_out = 0, i_station = 0, off = 0;
633
634 /* Get data product info. */
635 f = h->data_products[i].fits_file;
636 t = h->data_products[i].text_file;
637 dp = h->data_products[i].type;
638 stokes_out = h->data_products[i].stokes_out;
639 i_station = h->data_products[i].i_station;
640
641 /* Check averaging mode and polarisation input type. */
642 if (h->data_products[i].time_average != time_average ||
643 h->data_products[i].channel_average != channel_average ||
644 h->data_products[i].stokes_in != stokes_in)
645 {
646 continue;
647 }
648
649 /* Treat raw data output as special case, as it doesn't go via pix. */
650 if (dp == RAW_COMPLEX && chunk_desc == JONES_DATA && t)
651 {
652 oskar_Mem* station_data = 0;
653 station_data = oskar_mem_create_alias(in, i_station * num_pix,
654 num_pix, status);
655 oskar_mem_save_ascii(t, 1, 0, num_pix, status, station_data);
656 oskar_mem_free(station_data, status);
657 continue;
658 }
659 if (dp == CROSS_POWER_RAW_COMPLEX &&
660 chunk_desc == CROSS_POWER_DATA && t)
661 {
662 oskar_mem_save_ascii(t, 1, 0, num_pix, status, in);
663 continue;
664 }
665
666 /* Convert complex values to pixel data. */
667 oskar_mem_clear_contents(h->pix, status);
668 if (chunk_desc == JONES_DATA && dp == AMP)
669 {
670 off = i_station * num_pix * num_pol;
671 if (stokes_out == XX || stokes_out == -1)
672 {
673 complex_to_amp(in, off, num_pol, num_pix, h->pix, status);
674 }
675 else if (stokes_out == XY)
676 {
677 complex_to_amp(in, off + 1, num_pol, num_pix, h->pix, status);
678 }
679 else if (stokes_out == YX)
680 {
681 complex_to_amp(in, off + 2, num_pol, num_pix, h->pix, status);
682 }
683 else if (stokes_out == YY)
684 {
685 complex_to_amp(in, off + 3, num_pol, num_pix, h->pix, status);
686 }
687 else
688 {
689 continue;
690 }
691 }
692 else if (chunk_desc == JONES_DATA && dp == PHASE)
693 {
694 off = i_station * num_pix * num_pol;
695 if (stokes_out == XX || stokes_out == -1)
696 {
697 complex_to_phase(in, off, num_pol, num_pix, h->pix, status);
698 }
699 else if (stokes_out == XY)
700 {
701 complex_to_phase(in, off + 1, num_pol, num_pix, h->pix, status);
702 }
703 else if (stokes_out == YX)
704 {
705 complex_to_phase(in, off + 2, num_pol, num_pix, h->pix, status);
706 }
707 else if (stokes_out == YY)
708 {
709 complex_to_phase(in, off + 3, num_pol, num_pix, h->pix, status);
710 }
711 else
712 {
713 continue;
714 }
715 }
716 else if (chunk_desc == JONES_DATA && dp == IXR)
717 {
718 jones_to_ixr(in, i_station * num_pix, num_pix, h->pix, status);
719 }
720 else if (chunk_desc == AUTO_POWER_DATA ||
721 chunk_desc == CROSS_POWER_DATA)
722 {
723 off = i_station * num_pix; /* Station offset. */
724 if (off < 0 || chunk_desc == CROSS_POWER_DATA) off = 0;
725 if (chunk_desc == CROSS_POWER_DATA && (dp & AUTO_POWER))
726 {
727 continue;
728 }
729 if (chunk_desc == AUTO_POWER_DATA && (dp & CROSS_POWER))
730 {
731 continue;
732 }
733 if (stokes_out >= I && stokes_out <= V)
734 {
735 oskar_convert_linear_to_stokes(num_pix, off, in,
736 stokes_out, h->ctemp, status);
737 }
738 else
739 {
740 continue;
741 }
742 if (dp & AMP)
743 {
744 complex_to_amp(h->ctemp, 0, 1, num_pix, h->pix, status);
745 }
746 else if (dp & PHASE)
747 {
748 complex_to_phase(h->ctemp, 0, 1, num_pix, h->pix, status);
749 }
750 else if (dp & REAL)
751 {
752 complex_to_real(h->ctemp, 0, 1, num_pix, h->pix, status);
753 }
754 else if (dp & IMAG)
755 {
756 complex_to_imag(h->ctemp, 0, 1, num_pix, h->pix, status);
757 }
758 else
759 {
760 continue;
761 }
762 }
763 else continue;
764
765 /* Check for FITS file. */
766 if (f && h->width && h->height)
767 {
768 long firstpix[4];
769 firstpix[0] = 1 + (i_chunk * h->max_chunk_size) % h->width;
770 firstpix[1] = 1 + (i_chunk * h->max_chunk_size) / h->width;
771 firstpix[2] = 1 + i_channel;
772 firstpix[3] = 1 + i_time;
773 fits_write_pix(f, (h->prec == OSKAR_DOUBLE ? TDOUBLE : TFLOAT),
774 firstpix, num_pix, oskar_mem_void(h->pix), status);
775 }
776
777 /* Check for text file. */
778 if (t) oskar_mem_save_ascii(t, 1, 0, num_pix, status, h->pix);
779 }
780 }
781
782
complex_to_amp(const oskar_Mem * complex_in,const int offset,const int stride,const int num_points,oskar_Mem * output,int * status)783 static void complex_to_amp(const oskar_Mem* complex_in, const int offset,
784 const int stride, const int num_points, oskar_Mem* output, int* status)
785 {
786 int i = 0, j = 0;
787 if (oskar_mem_precision(output) == OSKAR_SINGLE)
788 {
789 float *out = 0, x = 0.0f, y = 0.0f;
790 const float2* in = 0;
791 in = oskar_mem_float2_const(complex_in, status) + offset;
792 out = oskar_mem_float(output, status);
793 for (i = 0; i < num_points; ++i)
794 {
795 j = i * stride;
796 x = in[j].x;
797 y = in[j].y;
798 out[i] = sqrt(x*x + y*y);
799 }
800 }
801 else
802 {
803 double *out = 0, x = 0.0, y = 0.0;
804 const double2* in = 0;
805 in = oskar_mem_double2_const(complex_in, status) + offset;
806 out = oskar_mem_double(output, status);
807 for (i = 0; i < num_points; ++i)
808 {
809 j = i * stride;
810 x = in[j].x;
811 y = in[j].y;
812 out[i] = sqrt(x*x + y*y);
813 }
814 }
815 }
816
817
complex_to_phase(const oskar_Mem * complex_in,const int offset,const int stride,const int num_points,oskar_Mem * output,int * status)818 static void complex_to_phase(const oskar_Mem* complex_in, const int offset,
819 const int stride, const int num_points, oskar_Mem* output, int* status)
820 {
821 int i = 0, j = 0;
822 if (oskar_mem_precision(output) == OSKAR_SINGLE)
823 {
824 float *out = 0;
825 const float2* in = 0;
826 in = oskar_mem_float2_const(complex_in, status) + offset;
827 out = oskar_mem_float(output, status);
828 for (i = 0; i < num_points; ++i)
829 {
830 j = i * stride;
831 out[i] = atan2(in[j].y, in[j].x);
832 }
833 }
834 else
835 {
836 double *out = 0;
837 const double2* in = 0;
838 in = oskar_mem_double2_const(complex_in, status) + offset;
839 out = oskar_mem_double(output, status);
840 for (i = 0; i < num_points; ++i)
841 {
842 j = i * stride;
843 out[i] = atan2(in[j].y, in[j].x);
844 }
845 }
846 }
847
848
complex_to_real(const oskar_Mem * complex_in,const int offset,const int stride,const int num_points,oskar_Mem * output,int * status)849 static void complex_to_real(const oskar_Mem* complex_in, const int offset,
850 const int stride, const int num_points, oskar_Mem* output, int* status)
851 {
852 int i = 0;
853 if (oskar_mem_precision(output) == OSKAR_SINGLE)
854 {
855 float *out = 0;
856 const float2* in = 0;
857 in = oskar_mem_float2_const(complex_in, status) + offset;
858 out = oskar_mem_float(output, status);
859 for (i = 0; i < num_points; ++i) out[i] = in[i * stride].x;
860 }
861 else
862 {
863 double *out = 0;
864 const double2* in = 0;
865 in = oskar_mem_double2_const(complex_in, status) + offset;
866 out = oskar_mem_double(output, status);
867 for (i = 0; i < num_points; ++i) out[i] = in[i * stride].x;
868 }
869 }
870
871
complex_to_imag(const oskar_Mem * complex_in,const int offset,const int stride,const int num_points,oskar_Mem * output,int * status)872 static void complex_to_imag(const oskar_Mem* complex_in, const int offset,
873 const int stride, const int num_points, oskar_Mem* output, int* status)
874 {
875 int i = 0;
876 if (oskar_mem_precision(output) == OSKAR_SINGLE)
877 {
878 float *out = 0;
879 const float2* in = 0;
880 in = oskar_mem_float2_const(complex_in, status) + offset;
881 out = oskar_mem_float(output, status);
882 for (i = 0; i < num_points; ++i) out[i] = in[i * stride].y;
883 }
884 else
885 {
886 double *out = 0;
887 const double2* in = 0;
888 in = oskar_mem_double2_const(complex_in, status) + offset;
889 out = oskar_mem_double(output, status);
890 for (i = 0; i < num_points; ++i) out[i] = in[i * stride].y;
891 }
892 }
893
894
jones_to_ixr(const oskar_Mem * jones,const int offset,const int num_points,oskar_Mem * output,int * status)895 static void jones_to_ixr(const oskar_Mem* jones, const int offset,
896 const int num_points, oskar_Mem* output, int* status)
897 {
898 int i = 0;
899
900 /* Check for fully polarised data. */
901 if (!oskar_mem_is_matrix(jones) || !oskar_mem_is_complex(jones)) return;
902
903 if (oskar_mem_precision(output) == OSKAR_SINGLE)
904 {
905 float *out = 0, cond = 0.0f, ixr = 0.0f;
906 const float4c* in = 0;
907 in = oskar_mem_float4c_const(jones, status) + offset;
908 out = oskar_mem_float(output, status);
909 for (i = 0; i < num_points; ++i)
910 {
911 cond = oskar_cond2_2x2_inline_f(in + i);
912 ixr = (cond + 1.0f) / (cond - 1.0f);
913 ixr *= ixr;
914 if (ixr > 1e6) ixr = 1e6;
915 out[i] = ixr;
916 }
917 }
918 else
919 {
920 double *out = 0, cond = 0.0, ixr = 0.0;
921 const double4c* in = 0;
922 in = oskar_mem_double4c_const(jones, status) + offset;
923 out = oskar_mem_double(output, status);
924 for (i = 0; i < num_points; ++i)
925 {
926 cond = oskar_cond2_2x2_inline_d(in + i);
927 ixr = (cond + 1.0) / (cond - 1.0);
928 ixr *= ixr;
929 if (ixr > 1e8) ixr = 1e8;
930 out[i] = ixr;
931 }
932 }
933 }
934
935
936 #define LINEAR_TO_STOKES(N, IN, OUT) {\
937 int i;\
938 switch (stokes_index) {\
939 case 0: /* I = 0.5 * (XX + YY) */\
940 for (i = 0; i < N; ++i) {\
941 OUT[i].x = 0.5 * (IN[i].a.x + IN[i].d.x);\
942 OUT[i].y = 0.5 * (IN[i].a.y + IN[i].d.y);\
943 }\
944 break;\
945 case 1: /* Q = 0.5 * (XX - YY) */\
946 for (i = 0; i < N; ++i) {\
947 OUT[i].x = 0.5 * (IN[i].a.x - IN[i].d.x);\
948 OUT[i].y = 0.5 * (IN[i].a.y - IN[i].d.y);\
949 }\
950 break;\
951 case 2: /* U = 0.5 * (XY + YX) */\
952 for (i = 0; i < N; ++i) {\
953 OUT[i].x = 0.5 * (IN[i].b.x + IN[i].c.x);\
954 OUT[i].y = 0.5 * (IN[i].b.y + IN[i].c.y);\
955 }\
956 break;\
957 case 3: /* V = -0.5i * (XY - YX) */\
958 for (i = 0; i < N; ++i) {\
959 OUT[i].x = 0.5 * (IN[i].b.y - IN[i].c.y);\
960 OUT[i].y = -0.5 * (IN[i].b.x - IN[i].c.x);\
961 }\
962 break;\
963 default:\
964 break;\
965 }\
966 }
967
oskar_convert_linear_to_stokes(const int num_points,const int offset_in,const oskar_Mem * linear,const int stokes_index,oskar_Mem * output,int * status)968 void oskar_convert_linear_to_stokes(const int num_points,
969 const int offset_in, const oskar_Mem* linear, const int stokes_index,
970 oskar_Mem* output, int* status)
971 {
972 if (*status) return;
973 if (!oskar_mem_is_complex(linear) || !oskar_mem_is_complex(output))
974 {
975 *status = OSKAR_ERR_BAD_DATA_TYPE;
976 return;
977 }
978 if (!oskar_mem_is_matrix(linear))
979 {
980 if (stokes_index == 0)
981 {
982 oskar_mem_copy_contents(output, linear, 0, offset_in, num_points,
983 status);
984 }
985 else
986 {
987 *status = OSKAR_ERR_INVALID_ARGUMENT;
988 }
989 return;
990 }
991 if (oskar_mem_is_double(linear))
992 {
993 double2* out = 0;
994 const double4c* in = 0;
995 out = oskar_mem_double2(output, status);
996 in = oskar_mem_double4c_const(linear, status) + offset_in;
997 LINEAR_TO_STOKES(num_points, in, out)
998 }
999 else
1000 {
1001 float2* out = 0;
1002 const float4c* in = 0;
1003 out = oskar_mem_float2(output, status);
1004 in = oskar_mem_float4c_const(linear, status) + offset_in;
1005 LINEAR_TO_STOKES(num_points, in, out)
1006 }
1007 }
1008
1009
record_timing(oskar_BeamPattern * h)1010 static void record_timing(oskar_BeamPattern* h)
1011 {
1012 int i = 0;
1013 oskar_log_section(h->log, 'M', "Simulation timing");
1014 oskar_log_value(h->log, 'M', 0, "Total wall time", "%.3f s",
1015 oskar_timer_elapsed(h->tmr_sim));
1016 for (i = 0; i < h->num_devices; ++i)
1017 {
1018 oskar_log_value(h->log, 'M', 0, "Compute", "%.3f s [Device %i]",
1019 oskar_timer_elapsed(h->d[i].tmr_compute), i);
1020 }
1021 oskar_log_value(h->log, 'M', 0, "Write", "%.3f s",
1022 oskar_timer_elapsed(h->tmr_write));
1023 }
1024
1025
disp_width(unsigned int v)1026 static unsigned int disp_width(unsigned int v)
1027 {
1028 return (v >= 100000u) ? 6 : (v >= 10000u) ? 5 : (v >= 1000u) ? 4 :
1029 (v >= 100u) ? 3 : (v >= 10u) ? 2u : 1u;
1030 /* return v == 1u ? 1u : (unsigned)log10(v)+1 */
1031 }
1032
1033
1034 #ifdef __cplusplus
1035 }
1036 #endif
1037