1!-----------------------------------------------------------------------------
2!
3!  Copyright (C) 1997-2013 Krzysztof M. Gorski, Eric Hivon,
4!                          Benjamin D. Wandelt, Anthony J. Banday,
5!                          Matthias Bartelmann, Hans K. Eriksen,
6!                          Frode K. Hansen, Martin Reinecke
7!
8!
9!  This file is part of HEALPix.
10!
11!  HEALPix is free software; you can redistribute it and/or modify
12!  it under the terms of the GNU General Public License as published by
13!  the Free Software Foundation; either version 2 of the License, or
14!  (at your option) any later version.
15!
16!  HEALPix is distributed in the hope that it will be useful,
17!  but WITHOUT ANY WARRANTY; without even the implied warranty of
18!  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19!  GNU General Public License for more details.
20!
21!  You should have received a copy of the GNU General Public License
22!  along with HEALPix; if not, write to the Free Software
23!  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
24!
25!  For more information about HEALPix see http://healpix.sourceforge.net
26!
27!-----------------------------------------------------------------------------
28! template for routine SP/DP overloading for module alm_tools
29
30! K M A P   : map kind                 either SP or DP
31! K A L M C : alm kind (complex)       either SPC or DPC
32! K A L M   : alm related and cl kind  either SP or DP
33!
34! K L O A D : suffixe of routine name, to be replaced by either s (SP) or d (DP)
35  !   subroutine alm2map_sc
36  !   subroutine alm2map_spin
37  !   subroutine alm2map_sc_pre
38  !   subroutine alm2map_pol
39  !   subroutine alm2map_pol_pre1
40  !   subroutine alm2map_pol_pre2
41  !   subroutine alm2map_sc_der
42  !   subroutine alm2map_pol_der
43  !
44  !   subroutine map2alm_sc
45  !   subroutine map2alm_spin
46  !   subroutine map2alm_sc_pre
47  !   subroutine map2alm_pol
48  !   subroutine map2alm_pol_pre1
49  !   subroutine map2alm_pol_pre2
50  !
51  !   subroutine map2alm_iterative
52  !
53  !   subroutine alter_alm
54  !   subroutine create_alm
55  !   subroutine alm2cl
56  !
57  !   subroutine rotate_alm
58  ! ===========================
59! manage preprocessing variables:
60! libsharp is used, unless DONT_USE_SHARP is set
61#ifndef DONT_USE_SHARP
62#define USE_SHARP
63#endif
64  ! ===========================
65  !**************************************************************************
66  !
67  !             ALM2MAP
68  !
69  !**************************************************************************
70!     computes a map form its alm for the HEALPIX pixelisation
71!     map(theta,phi) = sum_l_m a_lm Y_lm(theta,phi)
72!                    = sum_m {e^(i*m*phi) sum_l a_lm*lambda_lm(theta)}
73!
74!     where Y_lm(theta,phi) = lambda(theta) * e^(i*m*phi)
75!
76!     * the recurrence of Ylm is the standard one (cf Num Rec)
77!     * the sum over m is done by FFT
78  !=======================================================================
79  subroutine alm2map_sc_wrapper_KLOAD(nsmax, nlmax, nmmax, alm, map, zbounds,plm)
80    !=======================================================================
81    !     gfortran gets confused between plm and zbounds
82    !=======================================================================
83    integer(I4B), intent(IN)                   :: nsmax, nlmax, nmmax
84    complex(KALMC), intent(IN),  dimension(1:1,0:nlmax,0:nmmax) :: alm
85    real(KMAP),   intent(OUT), dimension(0:(12_i8b*nsmax)*nsmax-1) :: map
86    real(DP),     intent(IN),  dimension(1:2),        optional :: zbounds
87    real(DP),     intent(IN),  dimension(0:),         optional :: plm
88
89    if (present(plm)) then
90       if (present(zbounds)) then
91          if (       (zbounds(1)+1.0_dp) >  EPSILON(1.0_DP) &
92               &.or. (zbounds(2)-1.0_dp) < -EPSILON(1.0_DP)) then ! active zbounds
93             call fatal_error('ALM2MAP: ZBOUNDS and PLM not available simultaneously')
94          endif
95       else
96          call alm2map_sc_pre_KLOAD(nsmax, nlmax, nmmax, alm, map, plm)
97       endif
98    else
99       if (present(zbounds)) then
100          call alm2map_sc_KLOAD(nsmax, nlmax, nmmax, alm, map, zbounds=zbounds)
101       else
102          call alm2map_sc_KLOAD(nsmax, nlmax, nmmax, alm, map)
103       endif
104    endif
105    return
106  end subroutine alm2map_sc_wrapper_KLOAD
107  !=======================================================================
108  subroutine alm2map_sc_KLOAD(nsmax, nlmax, nmmax, alm, map, zbounds)
109    !=======================================================================
110    !     computes a map form its alm for the HEALPIX pixelisation
111    !      for the Temperature field
112    !=======================================================================
113    integer(I4B), intent(IN)                   :: nsmax, nlmax, nmmax
114    complex(KALMC), intent(IN),  dimension(1:1,0:nlmax,0:nmmax) :: alm
115    real(KMAP),   intent(OUT), dimension(0:(12_i8b*nsmax)*nsmax-1) :: map
116    real(DP),     intent(IN),  dimension(1:2),         optional :: zbounds
117
118    real(DP), dimension(1:2)         :: zbounds_in
119#ifdef USE_SHARP
120    zbounds_in = (/-1.d0 , 1.d0/)
121    if (present(zbounds)) zbounds_in = zbounds
122
123    call sharp_hp_alm2map_x_KLOAD(nsmax,nlmax,nmmax,alm,map,zbounds_in)
124#else
125
126    integer(I4B) :: l, m, ith                          ! alm related
127    integer(I8B) :: istart_south, istart_north, npix   ! map related
128    integer(I4B) :: nrings, nphmx
129
130    integer(I4B)                              :: par_lm
131    real(DP)                                  :: b_er, b_ei, b_or, b_oi
132    real(DP),     dimension(:,:), allocatable :: dalm
133    real(DP),     dimension(:),   allocatable :: lam_lm
134    real(DP),     dimension(:),   allocatable :: mfac
135    real(DP),     dimension(:,:), allocatable :: recfac
136
137    integer(i4b)                                :: ll, l_min, l_start
138    complex(DPC), dimension(:,:), allocatable :: b_north, b_south
139    real(DP),     dimension(:),   allocatable :: ring
140    integer(i4b)             :: nchunks, chunksize, ichunk, lchk, uchk, ithl, nphl
141    integer(I8B), dimension(0:SMAXCHK-1) :: startpix
142    integer(I4B), dimension(0:SMAXCHK-1) :: nph, kphi0
143    real(DP),     dimension(0:SMAXCHK-1) :: cth, sth
144
145    character(LEN=*), parameter :: code = 'ALM2MAP'
146    integer(I4B) :: status
147    !=======================================================================
148
149    if (present(zbounds)) then
150       call fatal_error(code//&
151            &': zbounds not supported if DONT_USE_SHARP is set at compilation.')
152    endif
153
154    ! Healpix definitions
155    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
156    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
157    nphmx  = 4*nsmax           ! maximum number of pixels/ring
158
159    !     --- allocates space for arrays ---
160    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
161    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
162
163    allocate(mfac(0:nmmax),stat = status)
164    call assert_alloc(status,code,'mfac')
165
166    allocate(b_north(0:nmmax, 0:chunksize-1),stat = status)
167    call assert_alloc(status,code,'b_north')
168
169    allocate(b_south(0:nmmax, 0:chunksize-1),stat = status)
170    call assert_alloc(status,code,'b_south')
171
172    if (.not.do_openmp()) then
173       allocate(recfac(0:1,0:nlmax),stat = status)
174       call assert_alloc(status,code,'recfac')
175       allocate(dalm(0:1,0:nlmax), stat = status)
176       call assert_alloc(status,code,'dalm')
177       allocate(lam_lm(0:nlmax), stat = status)
178       call assert_alloc(status,code,'lam_lm')
179       allocate(ring(0:nphmx-1),stat = status)
180       call assert_alloc(status,code,'ring')
181    endif
182    !     ------------ initiate variables and arrays ----------------
183
184    call gen_mfac(nmmax,mfac)
185
186    call init_rescale()
187    map = 0.0 ! set the whole map to zero
188
189    ! loop on chunks
190    do ichunk = 0, nchunks-1
191       lchk = ichunk * chunksize + 1
192       uchk = min(lchk+chunksize - 1, nrings)
193
194       do ith = lchk, uchk
195          ithl = ith - lchk !local index
196          ! get pixel location information
197          call get_pixel_layout(nsmax, ith, cth(ithl), sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
198       enddo
199       !        -----------------------------------------------------
200       !        for each theta, and each m, computes
201       !        b(m,theta) = sum_over_l>m (lambda_l_m(theta) * a_l_m)
202       !        ------------------------------------------------------
203       !        lambda_mm tends to go down when m increases (risk of underflow)
204       !        lambda_lm tends to go up   when l increases (risk of overflow)
205
206
207       b_north(:,:) = 0_dpc ! pad with zeros
208       b_south(:,:) = 0_dpc
209
210!$OMP parallel default(none) &
211!$OMP shared(nlmax, nmmax, lchk, uchk, rescale_tab, &
212!$OMP    cth, sth, mfac, alm, b_north, b_south) &
213!$OMP private(recfac, dalm, lam_lm, b_er, b_ei, b_or, b_oi, status, ll, l, m, ithl, l_min, l_start)
214
215       if (do_openmp()) then
216          allocate(recfac(0:1,0:nlmax),stat = status)
217          call assert_alloc(status,code,'recfac')
218          allocate(dalm(0:1,0:nlmax), stat = status)
219          call assert_alloc(status,code,'dalm')
220          allocate(lam_lm(0:nlmax), stat = status)
221          call assert_alloc(status,code,'lam_lm')
222       endif
223
224!$OMP do schedule(dynamic,1)
225       do m = 0, nmmax
226          ! generate recursion factors (recfac) for Ylm of degree m
227          call gen_recfac(nlmax, m, recfac)
228
229          ! extract needed alm under memory and CPU efficient form
230          do ll = m, nlmax
231             dalm(0,ll) =  real(alm(1,ll,m),kind=dp)
232             dalm(1,ll) = aimag(alm(1,ll,m))
233          enddo
234          do ithl = 0, uchk - lchk
235             l_min = l_min_ylm(m, sth(ithl))
236             if (nlmax >= l_min) then ! skip calculations when Ylm too small
237                ! compute lam_lm(theta) for all l>=m
238                call do_lam_lm(nlmax, m, cth(ithl), sth(ithl), mfac(m), recfac, lam_lm)
239                b_er = 0.0_dp ; b_ei = 0.0_dp ; b_or = 0.0_dp ; b_oi = 0.0_dp
240                ! do the product a_lm * Y_lm for all l>=m
241                l_start = max(m, l_min) ! even values of (l+m)
242                if (mod(m+l_start,2) == 1) l_start = l_start + 1
243                do l = l_start, nlmax, 2
244                   b_er = b_er + lam_lm(l) * dalm(0,l)
245                   b_ei = b_ei + lam_lm(l) * dalm(1,l)
246                enddo
247
248                l_start = max(m+1, l_min) ! odd values of (l+m)
249                if (mod(m+l_start,2) == 0) l_start = l_start + 1
250                do l = l_start, nlmax, 2
251                   b_or = b_or + lam_lm(l) * dalm(0,l)
252                   b_oi = b_oi + lam_lm(l) * dalm(1,l)
253                enddo
254
255                b_north(m,ithl) = cmplx(b_er + b_or, b_ei + b_oi, kind=DP) ! north: Even + Odd
256                b_south(m,ithl) = cmplx(b_er - b_or, b_ei - b_oi, kind=DP) ! south: Even - Odd
257             endif ! test on nlmax
258          enddo ! loop on rings (ithl)
259       enddo ! loop on m
260!$OMP end do
261       if (do_openmp()) then
262          deallocate (recfac,dalm,lam_lm)
263       endif
264!$OMP end parallel
265
266!$OMP parallel default(none) &
267!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
268!$OMP      lchk, uchk, b_north, b_south, nph, startpix, kphi0, map) &
269!$OMP  private(ithl, nphl, istart_north, istart_south, &
270!$OMP      ith, ring, status)
271       if (do_openmp()) then
272          allocate(ring(0:nphmx-1),stat = status)
273          call assert_alloc(status,code,'ring')
274       endif
275!$OMP do schedule(dynamic,1)
276          !        ---------------------------------------------------------------
277          !        sum_m  b(m,theta)*exp(i*m*phi)   -> f(phi,theta) by FFT
278          !        ---------------------------------------------------------------
279       do ithl = 0, uchk - lchk
280          nphl = nph(ithl)
281          istart_north = startpix(ithl)
282          call ring_synthesis(nsmax,nlmax,nmmax,b_north(0,ithl),nphl,ring,kphi0(ithl))   ! north hemisph. + equator
283          !map(istart_north:istart_north+nphl-1) = ring(0:nphl-1)
284          call sub_ring2map(ring, map, istart_north, nphl)
285       enddo ! loop on ithl
286!$OMP end do
287!$OMP do schedule(dynamic,1)
288       do ithl = 0, uchk - lchk
289          nphl = nph(ithl)
290          istart_south = npix-startpix(ithl)-nphl
291          ith  = ithl + lchk
292          if (ith < nrings) then
293             call ring_synthesis(nsmax,nlmax,nmmax,b_south(0,ithl),nphl,ring,kphi0(ithl)) ! south hemisph. w/o equat
294             !map(istart_south:istart_south+nphl-1) = ring(0:nphl-1)
295             call sub_ring2map(ring, map, istart_south, nphl)
296          endif
297       enddo ! loop on ithl
298!$OMP end do
299       if (do_openmp()) then
300          deallocate(ring)
301       endif
302!$OMP end parallel
303    enddo    ! loop on chunks
304
305    !     --------------------
306    !     free memory and exit
307    !     --------------------
308    if (.not.do_openmp()) then
309       deallocate (ring,recfac,lam_lm,dalm)
310    endif
311    deallocate(mfac)
312    deallocate(b_north, b_south)
313    return
314#endif
315  end subroutine alm2map_sc_KLOAD
316
317  !=======================================================================
318  subroutine alm2map_spin_KLOAD(nsmax, nlmax, nmmax, spin, alm, map, zbounds)
319    !=======================================================================
320    !     computes a map form its alm for the HEALPIX pixelisation
321    !      for the Temperature field
322    !=======================================================================
323    integer(I4B),   intent(IN)                   :: nsmax, nlmax, nmmax, spin
324    complex(KALMC), intent(IN),  dimension(1:,0:,0:) :: alm
325    real(KMAP),     intent(OUT), dimension(0:,1:) :: map
326    real(DP),       intent(IN),  dimension(1:2),   optional :: zbounds
327
328    real(DP), dimension(1:2)         :: zbounds_in
329    integer(I4B) :: l, m, ith                          ! alm related
330    integer(I8B) :: istart_south, istart_north, npix   ! map related
331    integer(I4B) :: nrings, nphmx
332
333    real(DP),     dimension(1:4)              :: b_even, b_odd
334    real(DP),     dimension(:,:), allocatable :: dalm
335    real(DP),     dimension(:,:), allocatable :: lam_lm_spin
336    real(DP),     dimension(:),   allocatable :: mfac, mfac_spin
337    real(DP),     dimension(:,:), allocatable :: recfac_spin
338
339    integer(i4b)                                :: ll, l_min, l_start
340    complex(DPC), dimension(:,:), allocatable :: b_north1, b_south1
341    complex(DPC), dimension(:,:), allocatable :: b_north2, b_south2
342    real(DP),     dimension(:),   allocatable :: ring
343    integer(i4b)  :: nchunks, chunksize, ichunk, lchk, uchk, ithl, nphl
344    integer(I8B), dimension(0:SMAXCHK-1) :: startpix
345    integer(I4B), dimension(0:SMAXCHK-1) :: nph, kphi0
346    real(DP),     dimension(0:SMAXCHK-1) :: cth, sth
347    complex(DPC) :: junk_sum, junk_dif
348
349    character(LEN=*), parameter :: code = 'ALM2MAP_SPIN'
350    integer(I4B) :: status
351    logical(LGT) :: even_spin
352    integer(I4B) :: aspin
353
354#ifdef USE_SHARP
355    zbounds_in = (/-1.d0 , 1.d0/)
356    if (present(zbounds)) zbounds_in = zbounds
357
358    aspin = abs(spin)
359    if ((aspin>0).and.(aspin<=100)) then
360      call sharp_hp_alm2map_spin_x_KLOAD(nsmax,nlmax,nmmax,aspin, &
361        alm(1:2,0:nlmax,0:nmmax),map(0:12*nsmax*nsmax-1,1:2), zbounds_in)
362      return
363    endif
364#endif
365    !=======================================================================
366
367    if (present(zbounds)) then
368       call fatal_error(code//&
369            &': zbounds not supported if DONT_USE_SHARP is set at compilation.')
370    endif
371
372    ! Healpix definitions
373    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
374    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
375    nphmx  = 4*nsmax           ! maximum number of pixels/ring
376
377    !     --- allocates space for arrays ---
378    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
379    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
380
381    allocate(mfac(0:nmmax),stat = status)
382    call assert_alloc(status,code,'mfac')
383
384    allocate(mfac_spin(0:nmmax),stat = status)
385    call assert_alloc(status,code,'mfac_spin')
386
387    allocate(b_north1(0:nmmax, 0:chunksize-1),stat = status)
388    call assert_alloc(status,code,'b_north1')
389
390    allocate(b_south1(0:nmmax, 0:chunksize-1),stat = status)
391    call assert_alloc(status,code,'b_south1')
392
393    allocate(b_north2(0:nmmax, 0:chunksize-1),stat = status)
394    call assert_alloc(status,code,'b_north2')
395
396    allocate(b_south2(0:nmmax, 0:chunksize-1),stat = status)
397    call assert_alloc(status,code,'b_south2')
398
399    even_spin = (iand(abs(spin), 1) == 0)
400
401    if (.not.do_openmp()) then
402       allocate(ring(0:nphmx-1),stat = status)
403       call assert_alloc(status,code,'ring')
404       allocate(dalm(0:3,0:nlmax), stat = status)
405       call assert_alloc(status,code,'dalm')
406       allocate(recfac_spin(0:2,0:nlmax),stat = status)
407       call assert_alloc(status,code,'recfac_spin')
408       allocate(lam_lm_spin(1:2,0:nlmax), stat = status)
409       call assert_alloc(status,code,'lam_lm_spin')
410    endif
411    !     ------------ initiate variables and arrays ----------------
412
413    call gen_mfac(nmmax,mfac)
414    call gen_mfac_spin(nmmax, spin, mfac_spin)
415
416    call init_rescale()
417    map = 0.0_KMAP ! set the whole map to zero
418
419    ! loop on chunks
420    do ichunk = 0, nchunks-1
421       lchk = ichunk * chunksize + 1
422       uchk = min(lchk+chunksize - 1, nrings)
423
424       do ith = lchk, uchk
425          ithl = ith - lchk !local index
426          ! get pixel location information
427          call get_pixel_layout(nsmax, ith, cth(ithl), sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
428       enddo
429
430       b_north1(:,:) = 0_dpc ! pad with zeros
431       b_south1(:,:) = 0_dpc
432       b_north2(:,:) = 0_dpc ! pad with zeros
433       b_south2(:,:) = 0_dpc
434
435!$OMP parallel default(none) &
436!$OMP shared(nlmax, nmmax, spin, lchk, uchk, rescale_tab, even_spin, &
437!$OMP    cth, sth, mfac, mfac_spin, alm, b_north1, b_south1, b_north2, b_south2) &
438!$OMP private(recfac_spin, dalm, junk_sum, junk_dif, lam_lm_spin, b_even, b_odd, status, ll, l, m, ithl, l_min, l_start)
439
440       if (do_openmp()) then
441          allocate(dalm(0:3,0:nlmax), stat = status)
442          call assert_alloc(status,code,'dalm')
443          allocate(recfac_spin(0:2,0:nlmax),stat = status)
444          call assert_alloc(status,code,'recfac_spin')
445          allocate(lam_lm_spin(1:2,0:nlmax), stat = status)
446          call assert_alloc(status,code,'lam_lm_spin')
447       endif
448
449!$OMP do schedule(dynamic,1)
450       do m = 0, nmmax
451          ! generate recursion factors (recfac_spin) for Ylm,s of degree m and spin s
452          call gen_recfac_spin(nlmax, m, spin, recfac_spin)
453          ! extract needed alm under memory and CPU efficient form
454          ! input alms are   -(a[s]+ (-1)^s a[-s])/2    and -(a[s]- (-1)^s a[-s])/(2i)
455          ! turn them into    (a[s]+ a[-s])/2           and  (a[s]- a[-s])/2
456          ! these new definitions remove the need for (-1)^s factors from now on
457          if (even_spin) then
458             do ll = m, nlmax
459                dalm(0,ll) =   -real(alm(1,ll,m),kind=dp) ; dalm(1,ll) = -aimag(alm(1,ll,m))
460                dalm(2,ll) =  +aimag(alm(2,ll,m)) ;         dalm(3,ll) =  -real(alm(2,ll,m),kind=dp)
461             enddo
462          else
463             do ll = m, nlmax
464                dalm(0,ll) =  +aimag(alm(2,ll,m)) ;         dalm(1,ll) =  -real(alm(2,ll,m),kind=dp)
465                dalm(2,ll) =   -real(alm(1,ll,m),kind=dp) ; dalm(3,ll) = -aimag(alm(1,ll,m))
466             enddo
467          endif
468          ! map1 +/- i map2 = a[+/-s] Y [+/-s]
469          ! Y_even = (Y[s] + Y[-s] ) / 2  ; Y_odd  = (Y[s] - Y[-s] ) / 2
470          ! hsum = (a[s]+a[-s])/2, hdiff = (a[s]-a[-s])/2
471          ! map1 = Y_even * hsum    + Y_odd * hdiff
472          ! map2 = Y_even * hdiff/i + Y_odd * hsum /i
473          do ithl = 0, uchk - lchk
474             l_min = l_min_ylm(m, sth(ithl))
475             if (nlmax >= l_min) then ! skip calculations when Ylm too small
476                ! compute lam_lm(theta) for all l>=m
477                call do_lam_lm_spin(nlmax, m, spin, cth(ithl), sth(ithl), mfac(m), mfac_spin(m), recfac_spin, lam_lm_spin)
478                b_even(1:4) = 0.0_dp
479                b_odd(1:4)  = 0.0_dp
480                ! do the product a_lm * Y_lm for all l>=m
481                l_start = max(m, l_min) ! even values of (l+m)
482                if (mod(m+l_start,2) == 1) l_start = l_start + 1
483                do l = l_start, nlmax, 2
484                   b_even(1:2) = b_even(1:2) + lam_lm_spin(1,l) * dalm(0:1,l)
485                   b_even(3)   = b_even(3)   + lam_lm_spin(1,l) * dalm(3,l)
486                   b_even(4)   = b_even(4)   - lam_lm_spin(1,l) * dalm(2,l)
487                   b_odd(1:2)  = b_odd(1:2)  + lam_lm_spin(2,l) * dalm(2:3,l)
488                   b_odd(3)    = b_odd(3)    + lam_lm_spin(2,l) * dalm(1,l)
489                   b_odd(4)    = b_odd(4)    - lam_lm_spin(2,l) * dalm(0,l)
490                enddo
491
492                l_start = max(m+1, l_min) ! odd values of (l+m)
493                if (mod(m+l_start,2) == 0) l_start = l_start + 1
494                do l = l_start, nlmax, 2
495                   b_odd(1:2)  = b_odd(1:2)  + lam_lm_spin(1,l) * dalm(0:1,l)
496                   b_odd(3)    = b_odd(3)    + lam_lm_spin(1,l) * dalm(3,l)
497                   b_odd(4)    = b_odd(4)    - lam_lm_spin(1,l) * dalm(2,l)
498                   b_even(1:2) = b_even(1:2) + lam_lm_spin(2,l) * dalm(2:3,l)
499                   b_even(3)   = b_even(3)   + lam_lm_spin(2,l) * dalm(1,l)
500                   b_even(4)   = b_even(4)   - lam_lm_spin(2,l) * dalm(0,l)
501                enddo
502
503                b_north1(m,ithl) = cmplx(b_even(1) + b_odd(1),  b_even(2) + b_odd(2), kind=DP) ! north: Even + Odd
504                b_south1(m,ithl) = cmplx(b_even(1) - b_odd(1),  b_even(2) - b_odd(2), kind=DP) ! south: Even - Odd
505                b_north2(m,ithl) = cmplx(b_even(3) + b_odd(3),  b_even(4) + b_odd(4), kind=DP) ! north: Even + Odd
506                b_south2(m,ithl) = cmplx(b_even(3) - b_odd(3),  b_even(4) - b_odd(4), kind=DP) ! south: Even - Odd
507             endif ! test on nlmax
508          enddo ! loop on rings (ithl)    allocate(mfac(0:nmmax),stat = status)
509    call assert_alloc(status,code,'mfac')
510
511
512       enddo ! loop on m
513!$OMP end do
514       if (do_openmp()) then
515          deallocate (recfac_spin,dalm,lam_lm_spin)
516       endif
517!$OMP end parallel
518
519!$OMP parallel default(none) &
520!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
521!$OMP      lchk, uchk, b_north1, b_south1, b_north2, b_south2, nph, startpix, kphi0, map) &
522!$OMP  private(ithl, nphl, istart_north, istart_south, &
523!$OMP      ith, ring, status)
524       if (do_openmp()) then
525          allocate(ring(0:nphmx-1),stat = status)
526          call assert_alloc(status,code,'ring')
527       endif
528!$OMP do schedule(dynamic,1)
529          !        ---------------------------------------------------------------
530          !        sum_m  b(m,theta)*exp(i*m*phi)   -> f(phi,theta) by FFT
531          !        ---------------------------------------------------------------
532       do ithl = 0, uchk - lchk
533          nphl = nph(ithl)
534          istart_north = startpix(ithl)
535          call ring_synthesis(nsmax,nlmax,nmmax,b_north1(0,ithl),nphl,ring,kphi0(ithl))   ! north hemisph. + equator
536          !map(istart_north:istart_north+nphl-1,1) = ring(0:nphl-1)
537          call sub_ring2map(ring, map, 1, istart_north, nphl)
538          !!print*,ithl,maxval(abs(ring(0:nphl-1)))
539          call ring_synthesis(nsmax,nlmax,nmmax,b_north2(0,ithl),nphl,ring,kphi0(ithl))   ! north hemisph. + equator
540          !map(istart_north:istart_north+nphl-1,2) = ring(0:nphl-1)
541          call sub_ring2map(ring, map, 2, istart_north, nphl)
542       enddo ! loop on ithl
543!$OMP end do
544!$OMP do schedule(dynamic,1)
545       do ithl = 0, uchk - lchk
546          nphl = nph(ithl)
547          istart_south = npix-startpix(ithl)-nphl
548          ith  = ithl + lchk
549          if (ith < nrings) then
550             call ring_synthesis(nsmax,nlmax,nmmax,b_south1(0,ithl),nphl,ring,kphi0(ithl)) ! south hemisph. w/o equat
551             !map(istart_south:istart_south+nphl-1,1) = ring(0:nphl-1)
552             call sub_ring2map(ring, map, 1, istart_south, nphl)
553             call ring_synthesis(nsmax,nlmax,nmmax,b_south2(0,ithl),nphl,ring,kphi0(ithl)) ! south hemisph. w/o equat
554             !map(istart_south:istart_south+nphl-1,2) = ring(0:nphl-1)
555             call sub_ring2map(ring, map, 2, istart_south, nphl)
556          endif
557       enddo ! loop on ithl
558!$OMP end do
559       if (do_openmp()) then
560          deallocate(ring)
561       endif
562!$OMP end parallel
563    enddo    ! loop on chunks
564
565    !     --------------------
566    !     free memory and exit
567    !     --------------------
568    if (.not.do_openmp()) then
569       deallocate (ring,recfac_spin,lam_lm_spin,dalm)
570    endif
571    deallocate(mfac, mfac_spin)
572    deallocate(b_north1, b_south1)
573    deallocate(b_north2, b_south2)
574    return
575  end subroutine alm2map_spin_KLOAD
576
577
578
579  !=======================================================================
580  subroutine alm2map_sc_pre_KLOAD(nsmax, nlmax, nmmax, alm, map, plm)
581    !=======================================================================
582    !     computes a map form its alm for the HEALPIX pixelisation
583    !      for the Temperature field, with precomputed Ylm
584    !=======================================================================
585    integer(I4B), intent(IN)                   :: nsmax, nlmax, nmmax
586    complex(KALMC), intent(IN),  dimension(1:1,0:nlmax,0:nmmax) :: alm
587    real(KMAP),   intent(OUT), dimension(0:(12_i8b*nsmax)*nsmax-1) :: map
588    real(DP),     intent(IN),  dimension(0:)                  :: plm
589
590    integer(I4B) :: l, m, ith
591    integer(I8B) :: istart_south, istart_north, npix
592    integer(I4B) :: nrings, nphmx
593
594    integer(I8B) :: n_lm, n_plm, i_mm
595    complex(DPC), dimension(-1:1)             :: b_ns
596    real(DP),     dimension(:,:), allocatable :: dalm
597    integer(i4b)                              :: ll, l_min, l_start
598    real(DP)                                  :: cth
599    complex(DPC), dimension(:,:), allocatable :: b_north, b_south
600    real(DP),     dimension(:),   allocatable :: ring
601    integer(i4b)             :: nchunks, chunksize, ichunk, lchk, uchk, ithl, nphl
602    integer(I8B), dimension(0:SMAXCHK-1) :: startpix
603    integer(I4B), dimension(0:SMAXCHK-1) :: nph, kphi0
604    real(DP),     dimension(0:SMAXCHK-1) :: sth
605
606    character(LEN=*), parameter :: code = 'ALM2MAP'
607    integer(I4B) :: status
608    !=======================================================================
609
610    ! Healpix definitions
611    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
612    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
613    nphmx  = 4*nsmax           ! maximum number of pixels/ring
614    n_lm   = ((nmmax+1_I8B)*(2*nlmax-nmmax+2))/2 !number of (l,m) with m in[0,M] and l in [m,L]
615    n_plm  = n_lm * nrings
616
617    !     --- allocates space for arrays ---
618    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
619    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
620
621    allocate(b_north(0:nmmax, 0:chunksize-1),stat = status)
622    call assert_alloc(status,code,'b_north')
623
624    allocate(b_south(0:nmmax, 0:chunksize-1),stat = status)
625    call assert_alloc(status,code,'b_south')
626
627    if (.not. do_openmp()) then
628       allocate(dalm(0:1,0:nlmax), stat = status)
629       call assert_alloc(status,code,'dalm')
630       allocate(ring(0:nphmx-1),stat = status)
631       call assert_alloc(status,code,'ring')
632    endif
633    !     ------------ initiate variables and arrays ----------------
634
635    map = 0.0 ! set the whole map to zero
636
637    ! loop on chunks
638    do ichunk = 0, nchunks-1
639       lchk = ichunk * chunksize + 1
640       uchk = min(lchk+chunksize - 1, nrings)
641
642       do ith = lchk, uchk
643          ithl = ith - lchk !local index
644          ! get pixel location information
645          call get_pixel_layout(nsmax, ith, cth, sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
646       enddo
647       !        -----------------------------------------------------
648       !        for each theta, and each m, computes
649       !        b(m,theta) = sum_over_l>m (lambda_l_m(theta) * a_l_m)
650       !        ------------------------------------------------------
651
652       b_north(:,:) = 0_dpc ! pad with zeros
653       b_south(:,:) = 0_dpc
654
655!$OMP parallel default(none) &
656!$OMP shared(nlmax, nmmax, sth, lchk, uchk, plm, alm, b_north, b_south, n_lm) &
657!$OMP private(dalm, b_ns, status, m, ith, ithl, l_min, l_start, l, ll, i_mm)
658
659       if (do_openmp()) then
660          allocate(dalm(0:1,0:nlmax), stat = status)
661          call assert_alloc(status,code,'dalm')
662       endif
663
664!$OMP do schedule(dynamic,1)
665       do m = 0, nmmax
666
667          ! extract needed alm under memory and CPU efficient form
668          do ll = m, nlmax
669             dalm(0,ll) =  real(alm(1,ll,m),kind=dp)
670             dalm(1,ll) = aimag(alm(1,ll,m))
671          enddo
672          do ithl = 0, uchk - lchk
673             l_min = l_min_ylm(m, sth(ithl))
674             if (nlmax >= l_min) then ! skip calculations when Ylm too small
675                ith = ithl + lchk
676                i_mm = n_lm * (ith-1) + ((2_I8B*nlmax + 3 - m)*m)/2 ! location of Ym,m for ring ith
677
678                !           ---------- l = m ----------
679
680                if (m >= l_min) then
681                   b_ns( 1) = cmplx(plm(i_mm) * dalm(0,m), plm(i_mm) * dalm(1,m), kind=DP)
682                   b_ns(-1) = 0.0_dpc
683                else
684                   b_ns = 0.0_dpc
685                endif
686
687                !           ---------- l > m ----------
688                l_start = max(m+1, l_min) ! odd values of (l+m)
689                if (mod(m+l_start,2) == 0) l_start = l_start+1
690                do l = l_start, nlmax, 2
691                   b_ns(-1) = b_ns(-1) + &
692                        & cmplx(plm(i_mm+l-m) * dalm(0,l), plm(i_mm+l-m)*dalm(1,l), kind=DP)
693                enddo ! loop on l
694
695                l_start = max(m+2, l_min) ! even values of (l+m)
696                if (mod(m+l_start,2) == 1) l_start = l_start+1
697                do l = l_start, nlmax, 2
698                   b_ns(1)  = b_ns(1) + &
699                        & cmplx(plm(i_mm+l-m) * dalm(0,l), plm(i_mm+l-m)*dalm(1,l),kind=DP)
700                enddo ! loop on l
701
702                b_north(m,ithl) = b_ns(1) + b_ns(-1)
703                b_south(m,ithl) = b_ns(1) - b_ns(-1)
704             endif ! test on nlmax
705          enddo ! loop on rings (ithl)
706       enddo ! loop on m
707!$OMP end do
708       if (do_openmp()) then
709          deallocate (dalm)
710       endif
711!$OMP end parallel
712
713!$OMP parallel default(none) &
714!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
715!$OMP      lchk, uchk, b_north, b_south, nph, startpix, kphi0, map) &
716!$OMP  private(ithl, nphl, istart_north, istart_south, &
717!$OMP      ith, ring, status)
718       if (do_openmp()) then
719          allocate(ring(0:nphmx-1),stat = status)
720          call assert_alloc(status,code,'ring')
721       endif
722!$OMP do schedule(dynamic,1)
723       do ithl = 0, uchk - lchk
724          nphl = nph(ithl)
725          istart_north = startpix(ithl)
726          istart_south = npix-istart_north-nphl
727          ith  = ithl + lchk
728          !        ---------------------------------------------------------------
729          !        sum_m  b(m,theta)*exp(i*m*phi)   -> f(phi,theta) by FFT
730          !        ---------------------------------------------------------------
731          call ring_synthesis(nsmax,nlmax,nmmax,b_north(0,ithl),nphl,ring,kphi0(ithl))   ! north hemisph. + equator
732          map(istart_north:istart_north+nphl-1) = ring(0:nphl-1)
733
734          if (ith < nrings) then
735             call ring_synthesis(nsmax,nlmax,nmmax,b_south(0,ithl),nphl,ring,kphi0(ithl)) ! south hemisph. w/o equat
736             map(istart_south:istart_south+nphl-1) = ring(0:nphl-1)
737          endif
738       enddo ! loop on ithl
739!$OMP end do
740       if (do_openmp()) then
741          deallocate(ring)
742       endif
743!$OMP end parallel
744    enddo    ! loop on chunks
745
746    !     --------------------
747    !     free memory and exit
748    !     --------------------
749    if (.not.do_openmp()) then
750       deallocate (ring,dalm)
751    endif
752    deallocate(b_north, b_south)
753    return
754  end subroutine alm2map_sc_pre_KLOAD
755
756  !=======================================================================
757  subroutine alm2map_pol_wrapper_KLOAD(nsmax, nlmax, nmmax, alm_TGC, map_TQU, zbounds, plm)
758    !=======================================================================
759    !     gfortran gets confused between plm and zbounds
760    !=======================================================================
761    integer(I4B), intent(IN)                   :: nsmax, nlmax, nmmax
762    complex(KALMC), intent(IN),  dimension(1:3,0:nlmax,0:nmmax) :: alm_TGC
763    real(KMAP),   intent(OUT), dimension(0:(12_i8b*nsmax)*nsmax-1,1:3) :: map_TQU
764    real(DP),     intent(IN),  dimension(1:2),          optional :: zbounds
765    real(DP),     intent(IN),  dimension(0:), optional  :: plm
766
767    if (present(plm)) then
768       if (present(zbounds)) then
769          if (       (zbounds(1)+1.0_dp) >  EPSILON(1.0_DP) &
770               &.or. (zbounds(2)-1.0_dp) < -EPSILON(1.0_DP)) then ! active zbounds
771             call fatal_error('ALM2MAP: ZBOUNDS and PLM not available simultaneously')
772          endif
773       else
774          call alm2map_pol_pre1_KLOAD(nsmax, nlmax, nmmax, alm_TGC, map_TQU, plm)
775       endif
776    else
777       if (present(zbounds)) then
778          call alm2map_pol_KLOAD(nsmax, nlmax, nmmax, alm_TGC, map_TQU, zbounds=zbounds)
779       else
780          call alm2map_pol_KLOAD(nsmax, nlmax, nmmax, alm_TGC, map_TQU)
781       endif
782    endif
783    return
784  end subroutine alm2map_pol_wrapper_KLOAD
785
786  !=======================================================================
787  subroutine alm2map_pol_KLOAD(nsmax, nlmax, nmmax, alm_TGC, map_TQU, zbounds)
788    !=======================================================================
789    !     computes a map form its alm for the HEALPIX pixelisation
790    !      for the Temperature+Polarization field
791    !=======================================================================
792    integer(I4B), intent(IN)                   :: nsmax, nlmax, nmmax
793    complex(KALMC), intent(IN),  dimension(1:3,0:nlmax,0:nmmax) :: alm_TGC
794    real(KMAP),   intent(OUT), dimension(0:(12_i8b*nsmax)*nsmax-1,1:3) :: map_TQU
795    real(DP),     intent(IN),  dimension(1:2),          optional :: zbounds
796
797    real(DP), dimension(1:2)         :: zbounds_in
798#ifdef USE_SHARP
799    zbounds_in = (/-1.d0 , 1.d0/)
800    if (present(zbounds)) zbounds_in = zbounds
801
802    call sharp_hp_alm2map_pol_x_KLOAD(nsmax,nlmax,nmmax,alm_TGC,map_TQU,zbounds_in)
803#else
804
805    integer(I4B) :: l, m, ith                    ! alm related
806    integer(I8B) :: istart_south, istart_north, npix   ! map related
807    integer(I4B) :: nrings, nphmx
808
809    real(DP),     dimension(-3:8)             :: b_ns
810    real(DP),     dimension(:,:), allocatable :: recfac, dalm, lam_lm
811    real(DP),     dimension(:),   allocatable :: lam_fact, mfac
812
813    integer(i4b)                                :: ll, l_min, l_start
814    real(DP),     dimension(:),     allocatable :: normal_l
815    complex(DPC), dimension(:,:,:), allocatable :: b_TQU
816    complex(DPC), dimension(:),     allocatable :: bsub
817    real(DP),     dimension(:),     allocatable :: ring
818    integer(i4b)        :: nchunks, chunksize, ichunk, lchk, uchk, ithl, nphl, i
819    integer(I8B), dimension(0:SMAXCHK) :: startpix
820    integer(I4B), dimension(0:SMAXCHK) :: nph, kphi0
821    real(DP),     dimension(0:SMAXCHK) :: cth, sth
822!    real(sp) :: time0, time1, time2, time3, time4, tt1, tt2
823
824    character(LEN=*), parameter :: code = 'ALM2MAP'
825    integer(I4B) :: status
826    !=======================================================================
827
828    if (present(zbounds)) then
829       call fatal_error(code//&
830            &': zbounds not supported if DONT_USE_SHARP is set at compilation.')
831    endif
832
833    ! Healpix definitions
834    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
835    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
836    nphmx  = 4*nsmax           ! maximum number of pixels/ring
837
838    !     --- allocates space for arrays ---
839    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
840    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
841
842    allocate(mfac(0:nmmax),stat = status)
843    call assert_alloc(status,code,'mfac')
844
845    allocate(normal_l(0:nlmax),stat = status)
846    call assert_alloc(status,code,'normal_l')
847
848    allocate(b_TQU(1:6, 0:nmmax, 0:chunksize-1),stat = status)
849    call assert_alloc(status,code,'b_TQU')
850
851    if (.not. do_openmp()) then
852       allocate(recfac(0:1,0:nlmax), dalm(0:5,0:nlmax), lam_fact(0:nlmax),stat = status)
853       call assert_alloc(status,code,'recfac, dalm & lam_fact')
854       allocate(lam_lm(1:3,0:nlmax),stat = status)
855       call assert_alloc(status,code,'lam_lm')
856       allocate(ring(0:nphmx-1),bsub(0:nlmax),stat = status)
857       call assert_alloc(status,code,'ring & bsub')
858    endif
859
860    !     ------------ initiate variables and arrays ----------------
861
862    call gen_mfac(nmmax,mfac)
863
864    call init_rescale()
865    map_TQU = 0.0 ! set the whole map to zero
866    ! generate Polarization normalisation
867    call gen_normpol(nlmax, normal_l)
868
869    ! loop on chunks
870    do ichunk = 0, nchunks-1
871       lchk = ichunk * chunksize + 1
872       uchk = min(lchk+chunksize - 1, nrings)
873
874       do ith = lchk, uchk
875          ithl = ith - lchk !local index
876          ! get pixel location information
877          call get_pixel_layout(nsmax, ith, cth(ithl), sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
878       enddo
879       !        -----------------------------------------------------
880       !        for each theta, and each m, computes
881       !        b(m,theta) = sum_over_l>m (lambda_l_m(theta) * a_l_m)
882       !        ------------------------------------------------------
883
884
885       b_TQU = 0_dpc ! pad with zeros
886
887!$OMP parallel default(none) &
888!$OMP shared(nlmax, nmmax, lchk, uchk, &
889!$OMP    rescale_tab, &
890!$OMP    cth, sth, mfac, normal_l, alm_TGC, b_TQU) &
891!$OMP private(recfac, dalm, lam_fact, lam_lm, b_ns, status, &
892!$OMP   m, l, ll, ithl, l_min, l_start)
893
894       if (do_openmp()) then
895          ! allocate thread safe arrays
896          allocate(recfac(0:1,0:nlmax), dalm(0:5,0:nlmax), lam_fact(0:nlmax),stat = status)
897          call assert_alloc(status,code,'recfac, dalm & lam_fact')
898          allocate(lam_lm(1:3,0:nlmax),stat = status)
899          call assert_alloc(status,code,'lam_lm')
900       endif
901
902!$OMP do schedule(dynamic,1)
903       do m = 0, nmmax
904          ! generate recursion factors (recfac) for Ylm of degree m
905          call gen_recfac(nlmax, m, recfac)
906          ! generate Ylm relation factor for degree m
907          call gen_lamfac(nlmax, m, lam_fact)
908
909          ! extract needed alm under memory and CPU efficient form
910          do ll = m, nlmax
911             dalm(0,ll) =  real(alm_TGC(1,ll,m),kind=dp) ! T, real
912             dalm(1,ll) = aimag(alm_TGC(1,ll,m))         ! T, imag
913             dalm(2,ll) =  real(alm_TGC(2,ll,m),kind=dp)*normal_l(ll) ! G, real
914             dalm(3,ll) = aimag(alm_TGC(2,ll,m))        *normal_l(ll)
915             dalm(4,ll) =  real(alm_TGC(3,ll,m),kind=dp)*normal_l(ll) ! C, real
916             dalm(5,ll) = aimag(alm_TGC(3,ll,m))        *normal_l(ll)
917          enddo
918          do ithl = 0, uchk - lchk
919             l_min =  l_min_ylm(m, sth(ithl)) ! lowest l of non-negligible Ylm
920             if (nlmax >= l_min) then
921                ! compute lam_lm(p,theta) for all l>=m
922                call do_lam_lm_pol(nlmax, m, cth(ithl), sth(ithl), mfac(m), recfac, lam_fact, lam_lm)
923                   !      T =   alm_T * Ylm
924                   !      Q = - alm_E * Wlm - i alm_B * Xlm
925                   !      U = i alm_E * Xlm -   alm_B * Wlm
926
927                b_ns = 0.0_dp
928                l_start = max(m, l_min) ! even values of (l+m)
929                if (mod(m+l_start,2) == 1) l_start = l_start + 1
930                do l = l_start, nlmax, 2
931                   b_ns(3: 4) = b_ns(3: 4) + lam_lm(1,l) * dalm(0:1,l) ! T even
932                   b_ns(5: 8) = b_ns(5: 8) - lam_lm(2,l) * dalm(2:5,l) ! Q, U  even
933                   b_ns(-1)   = b_ns(-1)   + lam_lm(3,l) * dalm(5,l) ! Q odd
934                   b_ns( 0)   = b_ns( 0)   - lam_lm(3,l) * dalm(4,l)
935                   b_ns( 1)   = b_ns( 1)   - lam_lm(3,l) * dalm(3,l) ! U odd
936                   b_ns( 2)   = b_ns( 2)   + lam_lm(3,l) * dalm(2,l)
937                enddo
938
939                l_start = max(m+1, l_min) ! odd values of (l+m)
940                if (mod(m+l_start,2) == 0) l_start = l_start + 1
941                do l = l_start, nlmax, 2
942                   b_ns(-3:-2) = b_ns(-3:-2) + lam_lm(1,l) * dalm(0:1,l) ! T odd
943                   b_ns(-1: 2) = b_ns(-1: 2) - lam_lm(2,l) * dalm(2:5,l) ! Q, U  odd
944                   b_ns( 5)    = b_ns( 5)    + lam_lm(3,l) * dalm(5,l) ! Q even
945                   b_ns( 6)    = b_ns( 6)    - lam_lm(3,l) * dalm(4,l)
946                   b_ns( 7)    = b_ns( 7)    - lam_lm(3,l) * dalm(3,l) ! U even
947                   b_ns( 8)    = b_ns( 8)    + lam_lm(3,l) * dalm(2,l)
948                enddo
949
950                b_TQU(1,m,ithl) = cmplx(b_ns(3) + b_ns(-3), b_ns(4) + b_ns(-2), kind = DP) ! T north
951                b_TQU(4,m,ithl) = cmplx(b_ns(3) - b_ns(-3), b_ns(4) - b_ns(-2), kind = DP) ! T south
952                b_TQU(2,m,ithl) = cmplx(b_ns(5) + b_ns(-1), b_ns(6) + b_ns( 0), kind = DP) ! Q n
953                b_TQU(5,m,ithl) = cmplx(b_ns(5) - b_ns(-1), b_ns(6) - b_ns( 0), kind = DP) ! Q s
954                b_TQU(3,m,ithl) = cmplx(b_ns(7) + b_ns( 1), b_ns(8) + b_ns( 2), kind = DP) ! U n
955                b_TQU(6,m,ithl) = cmplx(b_ns(7) - b_ns( 1), b_ns(8) - b_ns( 2), kind = DP) ! U s
956             endif ! test on nlmax
957          enddo ! loop on rings (ithl)
958       enddo ! loop on m
959!$OMP end do
960       if (do_openmp()) deallocate (recfac,dalm, lam_fact, lam_lm)
961!$OMP end parallel
962
963
964!$OMP parallel default(none) &
965!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
966!$OMP      lchk, uchk, b_TQU, nph, startpix, kphi0, map_TQU) &
967!$OMP  private(ithl, nphl, istart_north, istart_south, &
968!$OMP      ith, ring, bsub, i, status)
969
970       if (do_openmp()) then
971          allocate(ring(0:nphmx-1),bsub(0:nlmax),stat = status)
972          call assert_alloc(status,code,'ring & bsub')
973       endif
974
975!$OMP do schedule(dynamic,1)
976!        call wall_clock_time(time2)
977!        tt2 = tt2 + time2 - time3
978       do ithl = 0, uchk - lchk
979          nphl = nph(ithl)
980          istart_north = startpix(ithl)
981          istart_south = npix-istart_north-nphl
982          ith  = ithl + lchk
983          !        ---------------------------------------------------------------
984          !        sum_m  b(m,theta)*exp(i*m*phi)   -> f(phi,theta) by FFT
985          !        ---------------------------------------------------------------
986          do i=1,3
987             bsub = b_TQU(i,:,ithl)
988             call ring_synthesis(nsmax,nlmax,nmmax,bsub,nphl,ring,kphi0(ithl))   ! north hemisph. + equator
989             !map_TQU(istart_north:istart_north+nphl-1, i) = ring(0:nphl-1)
990             call sub_ring2map(ring, map_TQU, i, istart_north, nphl)
991          enddo
992
993          if (ith < nrings) then
994             do i=1,3
995                bsub = b_TQU(3+i,:,ithl)
996                call ring_synthesis(nsmax,nlmax,nmmax,bsub,nphl,ring,kphi0(ithl)) ! south hemisph. w/o equat
997                !map_TQU(istart_south:istart_south+nphl-1, i) = ring(0:nphl-1)
998                call sub_ring2map(ring, map_TQU, i, istart_south, nphl)
999             enddo
1000          endif
1001       enddo ! loop on ithl
1002!$OMP end do
1003       if (do_openmp()) deallocate(ring, bsub)
1004!$OMP end parallel
1005
1006!        call wall_clock_time(time3)
1007!        tt1 = tt1 + time3 - time2
1008
1009    enddo    ! loop on chunks
1010!     print*,'FFT:',tt1
1011!     print*,'the rest:',tt2
1012
1013    !     --------------------
1014    !     free memory and exit
1015    !     --------------------
1016    if (.not. do_openmp()) then
1017       deallocate(recfac, dalm, lam_fact, lam_lm, ring, bsub)
1018    endif
1019    deallocate(mfac, normal_l)
1020    deallocate(b_TQU)
1021    return
1022#endif
1023  end subroutine alm2map_pol_KLOAD
1024
1025  !=======================================================================
1026  subroutine alm2map_pol_pre1_KLOAD(nsmax, nlmax, nmmax, alm_TGC, map_TQU, plm)
1027    !=======================================================================
1028    !     computes a map form its alm for the HEALPIX pixelisation
1029    !      for the Temperature+Polarization field
1030    !=======================================================================
1031    integer(I4B), intent(IN)                   :: nsmax, nlmax, nmmax
1032    complex(KALMC), intent(IN),  dimension(1:3,0:nlmax,0:nmmax) :: alm_TGC
1033    real(KMAP),   intent(OUT), dimension(0:(12_i8b*nsmax)*nsmax-1,1:3) :: map_TQU
1034    real(DP),     intent(IN),  dimension(0:) :: plm
1035    !real(DP),     intent(IN),  dimension(1:2),          optional :: zbounds
1036
1037    integer(I4B) :: l, m, ith
1038    integer(I8B) :: istart_south, istart_north, npix, n_lm, n_plm, i_mm
1039    integer(I4B) :: nrings, nphmx
1040
1041    integer(I4B)                              :: par_lm
1042    real(DP)            :: lam_lm, cth_ring, lambda_w, lambda_x, normal_m, lam_lm1m
1043    real(DP)                 :: fm2, fl, flm1
1044    real(DP)                 :: a_w, b_w, a_x, fm_on_s2, two_on_s2, two_cth_ring
1045    real(DP),     dimension(-3:8)             :: b_ns
1046    real(DP),     dimension(:,:), allocatable :: dalm
1047    real(DP),     dimension(:),   allocatable :: lam_fact
1048
1049    integer(i4b)                                :: ll, l_min
1050    real(DP),     dimension(:),     allocatable :: normal_l
1051    complex(DPC), dimension(:,:,:), allocatable :: b_TQU
1052    complex(DPC), dimension(:),     allocatable :: bsub
1053    real(DP),     dimension(:),     allocatable :: ring
1054    integer(i4b)        :: nchunks, chunksize, ichunk, lchk, uchk, ithl, nphl, i
1055    integer(I8B), dimension(0:SMAXCHK) :: startpix
1056    integer(I4B), dimension(0:SMAXCHK) :: nph, kphi0
1057    real(DP),     dimension(0:SMAXCHK) :: cth, sth
1058    real(DP),     dimension(0:SMAXCHK) :: one_on_s2, c_on_s2
1059
1060    character(LEN=*), parameter :: code = 'ALM2MAP'
1061    integer(I4B) :: status
1062    !=======================================================================
1063
1064    ! Healpix definitions
1065    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
1066    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
1067    nphmx  = 4*nsmax           ! maximum number of pixels/ring
1068    n_lm   = ((nmmax+1_I8B)*(2*nlmax-nmmax+2))/2 !number of (l,m) with m in[0,M] and l in [m,L]
1069    n_plm  = n_lm * nrings
1070
1071    !     --- allocates space for arrays ---
1072    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
1073    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
1074
1075    allocate(normal_l(0:nlmax),stat = status)
1076    call assert_alloc(status,code,'normal_l')
1077
1078    allocate(b_TQU(1:6, 0:nmmax, 0:chunksize-1),stat = status)
1079    call assert_alloc(status,code,'b_TQU')
1080
1081    if (.not. do_openmp()) then
1082       allocate(dalm(0:5,0:nlmax), lam_fact(0:nlmax),stat = status)
1083       call assert_alloc(status,code,'dalm & lam_fact')
1084       allocate(ring(0:nphmx-1),bsub(0:nlmax),stat = status)
1085       call assert_alloc(status,code,'ring & bsub')
1086    endif
1087
1088    !     ------------ initiate variables and arrays ----------------
1089
1090    map_TQU = 0.0 ! set the whole map to zero
1091    ! generate Polarization normalisation
1092    call gen_normpol(nlmax, normal_l)
1093
1094    ! loop on chunks
1095    do ichunk = 0, nchunks-1
1096       lchk = ichunk * chunksize + 1
1097       uchk = min(lchk+chunksize - 1, nrings)
1098
1099       do ith = lchk, uchk
1100          ithl = ith - lchk !local index
1101          ! get pixel location information
1102          call get_pixel_layout(nsmax, ith, cth(ithl), sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
1103          one_on_s2(ithl) =    1.0_dp / sth(ithl)**2
1104            c_on_s2(ithl) = cth(ithl) / sth(ithl)**2
1105       enddo
1106       !        -----------------------------------------------------
1107       !        for each theta, and each m, computes
1108       !        b(m,theta) = sum_over_l>m (lambda_l_m(theta) * a_l_m)
1109       !        ------------------------------------------------------
1110       !        lambda_mm tends to go down when m increases (risk of underflow)
1111       !        lambda_lm tends to go up   when l increases (risk of overflow)
1112
1113
1114       b_TQU = 0_dpc ! pad with zeros
1115
1116!$OMP parallel default(none) &
1117!$OMP shared(nlmax, nmmax, lchk, uchk, n_lm, ith, &
1118!$OMP    cth, sth, normal_l, alm_TGC, b_TQU, plm, one_on_s2, c_on_s2) &
1119!$OMP private(dalm, lam_fact, b_ns, status, i_mm, &
1120!$OMP   m, ll, fm2, normal_m, ithl, l_min, &
1121!$OMP   par_lm, lam_lm, lam_lm1m, lambda_w, lambda_x, &
1122!$OMP   cth_ring, fm_on_s2, two_on_s2, two_cth_ring, a_w, a_x, b_w,  &
1123!$OMP   l, fl, flm1)
1124
1125       if (do_openmp()) then
1126          ! allocate thread safe arrays
1127          allocate(dalm(0:5,0:nlmax), lam_fact(0:nlmax),stat = status)
1128          call assert_alloc(status,code,'dalm & lam_fact')
1129       endif
1130
1131!$OMP do schedule(dynamic,1)
1132       do m = 0, nmmax
1133          ! generate Ylm relation factor for degree m
1134          call gen_lamfac(nlmax, m, lam_fact)
1135          fm2 = real(m * m, kind = DP)
1136          normal_m = (2.0_dp * m) * (1 - m)
1137
1138          ! extract needed alm under memory and CPU efficient form
1139          do ll = m, nlmax
1140             dalm(0,ll) =  real(alm_TGC(1,ll,m),kind=dp) ! T, real
1141             dalm(1,ll) = aimag(alm_TGC(1,ll,m))         ! T, imag
1142             dalm(2,ll) =  real(alm_TGC(2,ll,m),kind=dp)*normal_l(ll) ! G, real
1143             dalm(3,ll) = aimag(alm_TGC(2,ll,m))        *normal_l(ll)
1144             dalm(4,ll) =  real(alm_TGC(3,ll,m),kind=dp)*normal_l(ll) ! C, real
1145             dalm(5,ll) = aimag(alm_TGC(3,ll,m))        *normal_l(ll)
1146          enddo
1147          do ithl = 0, uchk - lchk
1148             l_min =  l_min_ylm(m, sth(ithl)) ! lowest l of non-negligible Ylm
1149             if (nlmax >= l_min) then
1150                ith = ithl + lchk
1151                i_mm = n_lm * (ith-1) + ((2_I8B*nlmax + 3 - m)*m)/2 ! location of Ym,m for ring ith
1152
1153                !           ---------- l = m ----------
1154                par_lm = 3  ! = 3 * (-1)^(l+m)
1155                lam_lm = plm(i_mm)
1156
1157                if (m >= l_min) then ! skip Ymm if too small
1158                   lambda_w =  (normal_m * lam_lm)  * ( 0.5_dp - one_on_s2(ithl) )
1159                   lambda_x =  (normal_m * lam_lm)  *            c_on_s2(ithl)
1160
1161                   !      T =   alm_T * Ylm
1162                   !      Q = - alm_E * Wlm - i alm_B * Xlm
1163                   !      U = i alm_E * Xlm -   alm_B * Wlm
1164                   b_ns(-3:-2) = 0.0_dp               ! T odd
1165                   b_ns(-1: 0) =   lambda_x * (/   dalm(5,m), - dalm(4,m) /) ! Q odd
1166                   b_ns( 1: 2) =   lambda_x * (/ - dalm(3,m),   dalm(2,m) /) ! U odd
1167                   b_ns( 3: 4) =   lam_lm   * dalm(0:1,m) ! T even
1168                   b_ns( 5: 6) = - lambda_w * dalm(2:3,m) ! Q even
1169                   b_ns( 7: 8) = - lambda_w * dalm(4:5,m) ! U even
1170                else
1171                   b_ns = 0.0_dp
1172                endif
1173
1174                !           ---------- l > m ----------
1175                cth_ring = cth(ithl)
1176                fm_on_s2     =      m * one_on_s2(ithl)
1177                two_on_s2    = 2.0_dp * one_on_s2(ithl)
1178                two_cth_ring = 2.0_dp * cth_ring
1179                b_w          =  c_on_s2(ithl)
1180                do l = m+1, nlmax
1181                   par_lm = - par_lm  ! = 3 * (-1)^(l+m)
1182                   if (l >= l_min) then
1183                      lam_lm1m = plm(i_mm+l-m-1) * lam_fact(l)
1184                      lam_lm   = plm(i_mm+l-m)
1185
1186                      fl = real(l, kind = DP)
1187                      flm1 = fl - 1.0_dp
1188                      a_w =  two_on_s2 * (fl - fm2)  + flm1 * fl
1189                      a_x =  two_cth_ring * flm1
1190                      lambda_w =                b_w * lam_lm1m - a_w * lam_lm
1191                      lambda_x = fm_on_s2 * (         lam_lm1m - a_x * lam_lm)
1192
1193                      b_ns(par_lm:  par_lm+1) = b_ns(par_lm:  par_lm+1) + lam_lm   * dalm(0:1,l) ! T even or odd
1194                      b_ns(par_lm+2:par_lm+5) = b_ns(par_lm+2:par_lm+5) - lambda_w * dalm(2:5,l) ! Q, U  even or odd
1195                      b_ns(2-par_lm) = b_ns(2-par_lm) + lambda_x * dalm(5,l) ! Q odd (or even)
1196                      b_ns(3-par_lm) = b_ns(3-par_lm) - lambda_x * dalm(4,l)
1197                      b_ns(4-par_lm) = b_ns(4-par_lm) - lambda_x * dalm(3,l) ! U odd (or even)
1198                      b_ns(5-par_lm) = b_ns(5-par_lm) + lambda_x * dalm(2,l)
1199                    endif
1200
1201                enddo ! loop on l
1202
1203                b_TQU(1,m,ithl) = cmplx(b_ns(3) + b_ns(-3), b_ns(4) + b_ns(-2), kind = DP) ! T north
1204                b_TQU(4,m,ithl) = cmplx(b_ns(3) - b_ns(-3), b_ns(4) - b_ns(-2), kind = DP) ! T south
1205                b_TQU(2,m,ithl) = cmplx(b_ns(5) + b_ns(-1), b_ns(6) + b_ns( 0), kind = DP) ! Q n
1206                b_TQU(5,m,ithl) = cmplx(b_ns(5) - b_ns(-1), b_ns(6) - b_ns( 0), kind = DP) ! Q s
1207                b_TQU(3,m,ithl) = cmplx(b_ns(7) + b_ns( 1), b_ns(8) + b_ns( 2), kind = DP) ! U n
1208                b_TQU(6,m,ithl) = cmplx(b_ns(7) - b_ns( 1), b_ns(8) - b_ns( 2), kind = DP) ! U s
1209             endif ! test on nlmax
1210          enddo ! loop on rings (ithl)
1211       enddo ! loop on m
1212!$OMP end do
1213       if (do_openmp()) deallocate (dalm, lam_fact)
1214!$OMP end parallel
1215
1216
1217!$OMP parallel default(none) &
1218!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
1219!$OMP      lchk, uchk, b_TQU, nph, startpix, kphi0, map_TQU) &
1220!$OMP  private(ithl, nphl, istart_north, istart_south, &
1221!$OMP      ith, ring, bsub, i, status)
1222
1223       if (do_openmp()) then
1224          allocate(ring(0:nphmx-1),bsub(0:nlmax),stat = status)
1225          call assert_alloc(status,code,'ring & bsub')
1226       endif
1227
1228!$OMP do schedule(dynamic,1)
1229       do ithl = 0, uchk - lchk
1230          nphl = nph(ithl)
1231          istart_north = startpix(ithl)
1232          istart_south = npix-istart_north-nphl
1233          ith  = ithl + lchk
1234          !        ---------------------------------------------------------------
1235          !        sum_m  b(m,theta)*exp(i*m*phi)   -> f(phi,theta) by FFT
1236          !        ---------------------------------------------------------------
1237          do i=1,3
1238             bsub = b_TQU(i,:,ithl)
1239             call ring_synthesis(nsmax,nlmax,nmmax,bsub,nphl,ring,kphi0(ithl))   ! north hemisph. + equator
1240             map_TQU(istart_north:istart_north+nphl-1, i) = ring(0:nphl-1)
1241          enddo
1242
1243          if (ith < nrings) then
1244             do i=1,3
1245                bsub = b_TQU(3+i,:,ithl)
1246                call ring_synthesis(nsmax,nlmax,nmmax,bsub,nphl,ring,kphi0(ithl)) ! south hemisph. w/o equat
1247                map_TQU(istart_south:istart_south+nphl-1, i) = ring(0:nphl-1)
1248             enddo
1249          endif
1250       enddo ! loop on ithl
1251!$OMP end do
1252       if (do_openmp()) deallocate(ring, bsub)
1253!$OMP end parallel
1254
1255    enddo    ! loop on chunks
1256    !     --------------------
1257    !     free memory and exit
1258    !     --------------------
1259    if (.not. do_openmp()) then
1260       deallocate(dalm, lam_fact, ring, bsub)
1261    endif
1262    deallocate(normal_l)
1263    deallocate(b_TQU)
1264    return
1265  end subroutine alm2map_pol_pre1_KLOAD
1266
1267  !=======================================================================
1268  subroutine alm2map_pol_pre2_KLOAD(nsmax, nlmax, nmmax, alm_TGC, map_TQU, plm)
1269    !=======================================================================
1270    !     computes a map form its alm for the HEALPIX pixelisation
1271    !      for the Temperature+Polarization field
1272    !=======================================================================
1273    integer(I4B), intent(IN)                   :: nsmax, nlmax, nmmax
1274    complex(KALMC), intent(IN),  dimension(1:3,0:nlmax,0:nmmax) :: alm_TGC
1275    real(KMAP),   intent(OUT), dimension(0:(12_i8b*nsmax)*nsmax-1,1:3) :: map_TQU
1276    real(DP),     intent(IN),  dimension(0:,1:) :: plm
1277
1278    integer(I4B) :: l, m, ith
1279    integer(I8B) :: istart_south, istart_north, npix
1280    integer(I4B) :: nrings, nphmx
1281
1282    integer(I8B) :: n_lm, n_plm, i_mm, i_up
1283    real(DP),     dimension(-3:8)             :: b_ns
1284    real(DP),     dimension(:,:), allocatable :: dalm, plm_sub
1285
1286    integer(i4b)                                :: ll, l_min, l_start
1287    complex(DPC), dimension(:,:,:), allocatable :: b_TQU
1288    complex(DPC), dimension(:),     allocatable :: bsub
1289    real(DP),     dimension(:),     allocatable :: ring
1290    integer(i4b)        :: nchunks, chunksize, ichunk, lchk, uchk, ithl, nphl, i
1291    real(DP)                           :: cth
1292    integer(I4B), dimension(0:SMAXCHK) :: nph, kphi0
1293    integer(I8B), dimension(0:SMAXCHK) :: startpix
1294    real(DP),     dimension(0:SMAXCHK) :: sth
1295
1296    character(LEN=*), parameter :: code = 'ALM2MAP'
1297    integer(I4B) :: status
1298    !=======================================================================
1299
1300    ! Healpix definitions
1301    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
1302    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
1303    nphmx  = 4*nsmax           ! maximum number of pixels/ring
1304    n_lm   = ((nmmax+1_I8B)*(2*nlmax-nmmax+2))/2 !number of (l,m) with m in[0,M] and l in [m,L]
1305    n_plm  = n_lm * nrings
1306
1307    !     --- allocates space for arrays ---
1308    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
1309    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
1310
1311    allocate(b_TQU(1:6, 0:nmmax, 0:chunksize-1),stat = status)
1312    call assert_alloc(status,code,'b_TQU')
1313
1314    if (.not. do_openmp()) then
1315       allocate(dalm(0:5,0:nlmax),plm_sub(1:3,0:nlmax), stat = status)
1316       call assert_alloc(status,code,'dalm & plm_sub')
1317       allocate(ring(0:nphmx-1),bsub(0:nlmax),stat = status)
1318       call assert_alloc(status,code,'ring & bsub')
1319    endif
1320
1321    !     ------------ initiate variables and arrays ----------------
1322    map_TQU = 0.0 ! set the whole map to zero
1323
1324    ! loop on chunks
1325    do ichunk = 0, nchunks-1
1326       lchk = ichunk * chunksize + 1
1327       uchk = min(lchk+chunksize - 1, nrings)
1328
1329       do ith = lchk, uchk
1330          ithl = ith - lchk !local index
1331          ! get pixel location information
1332          call get_pixel_layout(nsmax, ith, cth, sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
1333       enddo
1334       !        -----------------------------------------------------
1335       !        for each theta, and each m, computes
1336       !        b(m,theta) = sum_over_l>m (lambda_l_m(theta) * a_l_m)
1337       !        ------------------------------------------------------
1338       !        lambda_mm tends to go down when m increases (risk of underflow)
1339       !        lambda_lm tends to go up   when l increases (risk of overflow)
1340
1341
1342       b_TQU = 0_dpc ! pad with zeros
1343
1344!$OMP parallel default(none) &
1345!$OMP shared(nlmax, nmmax, lchk, uchk, cth, sth, alm_TGC, b_TQU, plm, n_lm, ith) &
1346!$OMP private(dalm, b_ns, plm_sub, status, m, ll, ithl, l_min, &
1347!$OMP   l_start, l, i_mm, i_up)
1348
1349       if (do_openmp()) then
1350          ! allocate thread safe arrays
1351          allocate(dalm(0:5,0:nlmax), plm_sub(1:3,0:nlmax), stat = status)
1352          call assert_alloc(status,code,'dalm & plm_sub')
1353       endif
1354
1355!$OMP do schedule(dynamic,1)
1356       do m = 0, nmmax
1357
1358          ! extract needed alm under memory and CPU efficient form
1359          do ll = m, nlmax
1360             dalm(0,ll) =  real(alm_TGC(1,ll,m),kind=dp) ! T, real
1361             dalm(1,ll) = aimag(alm_TGC(1,ll,m))         ! T, imag
1362             dalm(2,ll) =  real(alm_TGC(2,ll,m),kind=dp) ! G, real
1363             dalm(3,ll) = aimag(alm_TGC(2,ll,m))
1364             dalm(4,ll) =  real(alm_TGC(3,ll,m),kind=dp) ! C, real
1365             dalm(5,ll) = aimag(alm_TGC(3,ll,m))
1366          enddo
1367          do ithl = 0, uchk - lchk
1368             l_min =  l_min_ylm(m, sth(ithl)) ! lowest l of non-negligible Ylm
1369             if (nlmax >= l_min) then
1370                ith = ithl + lchk
1371                i_mm = n_lm * (ith-1) + ((2_I8B*nlmax + 3 - m)*m)/2 ! location of Ym,m for ring ith
1372                i_up = i_mm + nlmax - m
1373                plm_sub(1,m:nlmax) = plm(i_mm:i_up,1)
1374                plm_sub(2,m:nlmax) = plm(i_mm:i_up,2)
1375                plm_sub(3,m:nlmax) = plm(i_mm:i_up,3)
1376                !           ---------- l = m ----------
1377
1378                if (m >= l_min) then ! skip Ymm if too small
1379                   !      T =   alm_T * Ylm
1380                   !      Q = - alm_E * Wlm - i alm_B * Xlm
1381                   !      U = i alm_E * Xlm -   alm_B * Wlm
1382                   b_ns(-3:-2) = 0.0_dp               ! T odd
1383                   b_ns(-1: 0) =   plm_sub(3,m) * (/   dalm(5,m), - dalm(4,m) /) ! Q odd
1384                   b_ns( 1: 2) =   plm_sub(3,m) * (/ - dalm(3,m),   dalm(2,m) /) ! U odd
1385                   b_ns( 3: 4) =   plm_sub(1,m)   * dalm(0:1,m) ! T even
1386                   b_ns( 5: 6) = - plm_sub(2,m) * dalm(2:3,m) ! Q even
1387                   b_ns( 7: 8) = - plm_sub(2,m) * dalm(4:5,m) ! U even
1388                else
1389                   b_ns = 0.0_dp
1390                endif
1391
1392                !           ---------- l > m ----------
1393!                 do l = m+1, nlmax
1394!                    par_lm = - par_lm  ! = 3 * (-1)^(l+m)
1395!                    if (l >= l_min) then
1396!                       b_ns(par_lm:  par_lm+1) = b_ns(par_lm:  par_lm+1) + plm_sub(1,l) * dalm(0:1,l) ! T even or odd
1397!                       b_ns(par_lm+2:par_lm+5) = b_ns(par_lm+2:par_lm+5) - plm_sub(2,l) * dalm(2:5,l) ! Q, U  even or odd
1398!                       b_ns(2-par_lm) = b_ns(2-par_lm) + plm_sub(3,l) * dalm(5,l) ! Q odd (or even)
1399!                       b_ns(3-par_lm) = b_ns(3-par_lm) - plm_sub(3,l) * dalm(4,l)
1400!                       b_ns(4-par_lm) = b_ns(4-par_lm) - plm_sub(3,l) * dalm(3,l) ! U odd (or even)
1401!                       b_ns(5-par_lm) = b_ns(5-par_lm) + plm_sub(3,l) * dalm(2,l)
1402!                    endif
1403
1404                l_start = max(m+1, l_min) ! odd values of (l+m)
1405                if (mod(m+l_start,2) == 0) l_start = l_start+1
1406                do l = l_start, nlmax, 2
1407                   b_ns(-3:-2) = b_ns(-3:-2) + plm_sub(1,l) * dalm(0:1,l) ! T odd
1408                   b_ns(-1: 2) = b_ns(-1: 2) - plm_sub(2,l) * dalm(2:5,l) ! Q, U  odd
1409                   b_ns(5)     = b_ns(5)     + plm_sub(3,l) * dalm(5,l) ! Q even
1410                   b_ns(6)     = b_ns(6)     - plm_sub(3,l) * dalm(4,l)
1411                   b_ns(7)     = b_ns(7)     - plm_sub(3,l) * dalm(3,l) ! U even
1412                   b_ns(8)     = b_ns(8)     + plm_sub(3,l) * dalm(2,l)
1413                enddo ! loop on l
1414
1415                l_start = max(m+2, l_min) ! even values of (l+m)
1416                if (mod(m+l_start,2) == 1) l_start = l_start+1
1417                do l = l_start, nlmax, 2
1418                   b_ns(3:4) = b_ns(3:4) + plm_sub(1,l) * dalm(0:1,l) ! T even
1419                   b_ns(5:8) = b_ns(5:8) - plm_sub(2,l) * dalm(2:5,l) ! Q, U  even
1420                   b_ns(-1)  = b_ns(-1)  + plm_sub(3,l) * dalm(5,l) ! Q odd
1421                   b_ns( 0)  = b_ns( 0)  - plm_sub(3,l) * dalm(4,l)
1422                   b_ns( 1)  = b_ns( 1)  - plm_sub(3,l) * dalm(3,l) ! U odd
1423                   b_ns( 2)  = b_ns( 2)  + plm_sub(3,l) * dalm(2,l)
1424                enddo ! loop on l
1425
1426                b_TQU(1,m,ithl) = cmplx(b_ns(3) + b_ns(-3), b_ns(4) + b_ns(-2), kind = DP) ! T north
1427                b_TQU(4,m,ithl) = cmplx(b_ns(3) - b_ns(-3), b_ns(4) - b_ns(-2), kind = DP) ! T south
1428                b_TQU(2,m,ithl) = cmplx(b_ns(5) + b_ns(-1), b_ns(6) + b_ns( 0), kind = DP) ! Q n
1429                b_TQU(5,m,ithl) = cmplx(b_ns(5) - b_ns(-1), b_ns(6) - b_ns( 0), kind = DP) ! Q s
1430                b_TQU(3,m,ithl) = cmplx(b_ns(7) + b_ns( 1), b_ns(8) + b_ns( 2), kind = DP) ! U n
1431                b_TQU(6,m,ithl) = cmplx(b_ns(7) - b_ns( 1), b_ns(8) - b_ns( 2), kind = DP) ! U s
1432             endif ! test on nlmax
1433          enddo ! loop on rings (ithl)
1434       enddo ! loop on m
1435!$OMP end do
1436       if (do_openmp()) deallocate (dalm)
1437!$OMP end parallel
1438
1439
1440!$OMP parallel default(none) &
1441!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
1442!$OMP      lchk, uchk, b_TQU, nph, startpix, kphi0, map_TQU) &
1443!$OMP  private(ithl, nphl, istart_north, istart_south, &
1444!$OMP      ith, ring, bsub, i, status)
1445
1446       if (do_openmp()) then
1447          allocate(ring(0:nphmx-1),bsub(0:nlmax),stat = status)
1448          call assert_alloc(status,code,'ring & bsub')
1449       endif
1450
1451!$OMP do schedule(dynamic,1)
1452       do ithl = 0, uchk - lchk
1453          nphl = nph(ithl)
1454          istart_north = startpix(ithl)
1455          istart_south = npix-istart_north-nphl
1456          ith  = ithl + lchk
1457          !        ---------------------------------------------------------------
1458          !        sum_m  b(m,theta)*exp(i*m*phi)   -> f(phi,theta) by FFT
1459          !        ---------------------------------------------------------------
1460          do i=1,3
1461             bsub = b_TQU(i,:,ithl)
1462             call ring_synthesis(nsmax,nlmax,nmmax,bsub,nphl,ring,kphi0(ithl))   ! north hemisph. + equator
1463             map_TQU(istart_north:istart_north+nphl-1, i) = ring(0:nphl-1)
1464          enddo
1465
1466          if (ith < nrings) then
1467             do i=1,3
1468                bsub = b_TQU(3+i,:,ithl)
1469                call ring_synthesis(nsmax,nlmax,nmmax,bsub,nphl,ring,kphi0(ithl)) ! south hemisph. w/o equat
1470                map_TQU(istart_south:istart_south+nphl-1, i) = ring(0:nphl-1)
1471             enddo
1472          endif
1473       enddo ! loop on ithl
1474!$OMP end do
1475       if (do_openmp()) deallocate(ring, bsub)
1476!$OMP end parallel
1477
1478    enddo    ! loop on chunks
1479    !     --------------------
1480    !     free memory and exit
1481    !     --------------------
1482    if (.not. do_openmp()) then
1483       deallocate(dalm, ring, bsub)
1484    endif
1485    deallocate(b_TQU)
1486    return
1487  end subroutine alm2map_pol_pre2_KLOAD
1488
1489
1490  !=======================================================================
1491  subroutine alm2map_sc_der_KLOAD(nsmax, nlmax, nmmax, alm, map, der1, der2, zbounds)
1492    !=======================================================================
1493    !     computes a map and its 1st and 2nd derivative
1494    !       from its alm for the HEALPIX pixelisation
1495    !      for the Temperature field
1496    !
1497    ! der1 = dT/d(theta), dT/d(phi)/sin(theta)
1498    ! der2 = d^2T/d(theta^2), d^2T/d(theta)/d(phi)/sin(theta), d^2T/d(phi^2)/sin(theta)^2
1499    !
1500    ! derivatives of Ylm are obtained from Qu.Th. of Ang.Mom. Varshalovich et al
1501    !=======================================================================
1502    integer(I4B), intent(IN)                   :: nsmax, nlmax, nmmax
1503    complex(KALMC), INTENT(IN),  dimension(1:1,0:nlmax,0:nmmax) :: alm
1504    real(KMAP),   INTENT(OUT), dimension(0:(12_i8b*nsmax)*nsmax-1)     :: map
1505    real(KMAP),   INTENT(OUT), dimension(0:(12_i8b*nsmax)*nsmax-1,1:2) :: der1
1506    real(KMAP),   INTENT(OUT), dimension(0:(12_i8b*nsmax)*nsmax-1,1:3), optional :: der2
1507    real(DP),     intent(IN),  dimension(1:2),         optional :: zbounds
1508
1509    integer(I4B) :: l, m, ith, scalel, scalem ! alm related
1510    integer(I8B) :: istart_south, istart_north, npix   ! map related
1511    integer(I4B) :: nrings, nphmx
1512    real(DP), dimension(1:2)         :: zbounds_in
1513
1514    integer(I4B)                              :: par_lm
1515    real(DP)             :: lam_mm, lam_lm, lam_0, lam_1, lam_2, corfac, cth_ring
1516    real(DP)                  :: ovflow, unflow
1517    real(DP)                  :: f2m, fm2, fm, fllp1, lam_lm1m
1518    real(dp)                  :: cotanth, one_on_s1, one_on_s2
1519    real(DP), dimension(-1:2) :: b_ns
1520    real(DP), dimension(-1:2) :: b_ns_p, b_ns_t
1521    real(DP), dimension(-1:2) :: b_ns_pp, b_ns_tt, b_ns_tp
1522    real(DP), dimension( 0:1) :: factor, dfdt, d2fdt2
1523
1524    integer(i4b)                            :: ll, l_min
1525    complex(DPC), dimension(:,:), ALLOCATABLE :: b_north,    b_south
1526    complex(DPC), dimension(:,:), ALLOCATABLE :: b_north_t,  b_south_t
1527    complex(DPC), dimension(:,:), ALLOCATABLE :: b_north_p,  b_south_p
1528    complex(DPC), dimension(:,:), ALLOCATABLE :: b_north_tt, b_south_tt
1529    complex(DPC), dimension(:,:), ALLOCATABLE :: b_north_tp, b_south_tp
1530    complex(DPC), dimension(:,:), ALLOCATABLE :: b_north_pp, b_south_pp
1531    integer(i4b)        :: nchunks, chunksize, ichunk, lchk, uchk, ithl, nphl
1532    real(DP), dimension(:,:), ALLOCATABLE :: recfac, dalm
1533    real(DP), dimension(:),   ALLOCATABLE :: lam_fact, mfac
1534    real(DP), dimension(:),   ALLOCATABLE :: ring
1535
1536    integer(I8B), dimension(0:SMAXCHK-1) :: startpix
1537    integer(I4B), dimension(0:SMAXCHK-1) :: nph,kphi0
1538    real(DP),     dimension(0:SMAXCHK-1) :: cth, sth, one_on_s
1539    logical(LGT), dimension(0:SMAXCHK-1) :: keep_north, keep_south, keep_it
1540
1541    character(len=*), parameter :: code = 'ALM2MAP_DER'
1542    logical(lgt) :: do_d2
1543    integer(I4B) :: status
1544    !=======================================================================
1545    do_d2 = (present(der2)) ! do 2nd derivative
1546    zbounds_in = (/-1.d0 , 1.d0/)
1547    if (present(zbounds)) zbounds_in = zbounds
1548
1549    ! Healpix definitions
1550    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
1551    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
1552    nphmx  = 4*nsmax           ! maximum number of pixels/ring
1553
1554    !     --- allocates space for arrays ---
1555    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
1556    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
1557
1558    allocate(mfac(0:nmmax),stat = status)
1559    call assert_alloc(status,code,'mfac')
1560
1561    allocate(b_north(0:nmmax,0:chunksize-1), b_north_t(0:nmmax,0:chunksize-1), &
1562         &   b_north_p(0:nmmax,0:chunksize-1),stat = status)
1563    allocate(b_south(0:nmmax,0:chunksize-1), b_south_t(0:nmmax,0:chunksize-1), &
1564         &   b_south_p(0:nmmax,0:chunksize-1),stat = status)
1565    if (do_d2) then
1566       allocate(b_north_tt(0:nmmax,0:chunksize-1),b_north_tp(0:nmmax,0:chunksize-1), &
1567            &   b_north_pp(0:nmmax,0:chunksize-1),                    stat = status)
1568       allocate(b_south_tt(0:nmmax,0:chunksize-1),b_south_tp(0:nmmax,0:chunksize-1), &
1569            &   b_south_pp(0:nmmax,0:chunksize-1),                    stat = status)
1570    endif
1571    call assert_alloc(status,code,'b_north & b_south')
1572
1573    if (.not. do_openmp()) then
1574       allocate(recfac(0:1,0:nlmax), dalm(0:1,0:nlmax), lam_fact(0:nlmax),stat = status)
1575       call assert_alloc(status,code,'recfac, dalm & lam_fact')
1576       allocate(ring(0:nphmx-1),stat = status)
1577       call assert_alloc(status,code,'ring')
1578    endif
1579
1580    !     ------------ initiate arrays ----------------
1581    call gen_mfac(nmmax,mfac)
1582
1583    call init_rescale()
1584    ovflow = rescale_tab(1)
1585    unflow = rescale_tab(-1)
1586    map = 0.0 ! set the whole map to zero
1587    map = 0.0
1588    der1 = 0.0
1589    if (do_d2) der2 = 0.0
1590
1591    do ichunk = 0, nchunks-1
1592       lchk = ichunk * chunksize + 1
1593       uchk = min(lchk+chunksize - 1, nrings)
1594
1595       do ith = lchk, uchk
1596          ithl = ith - lchk !local index
1597          ! get pixel location information
1598          call get_pixel_layout(nsmax, ith, cth(ithl), sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
1599          one_on_s(ithl)  = 1.0_dp / sth(ithl)
1600          ! find out which rings are to be synthetized
1601          call select_rings(cth(ithl), zbounds_in, keep_north(ithl), keep_south(ithl), keep_it(ithl))
1602       enddo
1603       !        -----------------------------------------------------
1604       !        for each theta, and each m, computes
1605       !        b(m,theta) = sum_over_l>m (lambda_l_m(theta) * a_l_m)
1606       !        ------------------------------------------------------
1607       !        lambda_mm tends to go down when m increases (risk of underflow)
1608       !        lambda_lm tends to go up   when l increases (risk of overflow)
1609
1610       b_north    = 0_dpc ; b_south = 0_dpc    ! pad with zeros
1611       b_north_t  = 0_dpc ; b_south_t  = 0_dpc
1612       b_north_p  = 0_dpc ; b_south_p  = 0_dpc
1613       if (do_d2) then
1614          b_north_tt = 0_dpc ; b_south_tt = 0_dpc
1615          b_north_tp = 0_dpc ; b_south_tp = 0_dpc
1616          b_north_pp = 0_dpc ; b_south_pp = 0_dpc
1617       endif
1618
1619!$OMP parallel default(none) &
1620!$OMP shared(nlmax, nmmax, lchk, uchk, &
1621!$OMP    rescale_tab, ovflow, unflow, &
1622!$OMP    cth, sth, mfac, alm, one_on_s,  &
1623!$OMP      b_north, b_south, b_north_t, b_south_t, &
1624!$OMP      b_north_p,  b_south_p,  b_north_tt, b_south_tt, &
1625!$OMP      b_north_tp, b_south_tp, b_north_pp, b_south_pp, do_d2, &
1626!$OMP      keep_north, keep_south, keep_it ) &
1627!$OMP private(recfac, dalm, lam_fact, status, &
1628!$OMP   m, ll, fm, f2m, fm2, ithl, l_min, &
1629!$OMP   scalem, scalel, corfac, par_lm, &
1630!$OMP   lam_mm, lam_lm, lam_lm1m, lam_0, lam_1, lam_2, &
1631!$OMP   cth_ring, one_on_s1, one_on_s2, cotanth, factor, dfdt, d2fdt2, &
1632!$OMP   b_ns, b_ns_t, b_ns_p, b_ns_tt, b_ns_tp, b_ns_pp,   &
1633!$OMP   l, fllp1)
1634
1635       if (do_openmp()) then
1636          ! allocate thread safe arrays
1637          allocate(recfac(0:1,0:nlmax), dalm(0:1,0:nlmax), lam_fact(0:nlmax),stat = status)
1638          call assert_alloc(status,code,'recfac, dalm & lam_fact')
1639       endif
1640
1641!$OMP do schedule(dynamic,1)
1642       do m = 0, nmmax
1643          ! generate recursion factors (recfac) for Ylm of degree m
1644          call gen_recfac(nlmax, m, recfac)
1645          ! generate Ylm relation factor for degree m
1646          call gen_lamfac_der(nlmax, m, lam_fact)
1647          f2m = 2.0_dp * m
1648          fm2 = real(m*m, kind=dp)
1649          fm  = real(m,   kind=dp)
1650
1651          ! extract needed alm under memory and CPU efficient form
1652          do ll = m, nlmax
1653             dalm(0,ll) =  real(alm(1,ll,m),kind=dp)
1654             dalm(1,ll) = aimag(alm(1,ll,m))
1655          enddo
1656
1657          do ithl = 0, uchk - lchk
1658             l_min = l_min_ylm(m, sth(ithl))
1659             if (keep_it(ithl) .and. nlmax >= l_min) then ! skip calculations when Ylm too small, and skip irrelevant rings
1660                ! determine lam_mm
1661                call compute_lam_mm(mfac(m), m, sth(ithl), lam_mm, corfac, scalem)
1662
1663                cth_ring = cth(ithl)
1664                one_on_s2 = one_on_s(ithl)**2
1665                cotanth   = cth_ring * one_on_s(ithl)
1666                !           ---------- l = m ----------
1667                par_lm = 1  ! = (-1)^(l+m)
1668
1669                lam_lm = corfac * lam_mm
1670                if (m >= l_min) then
1671                   !-----------------------
1672                   ! f = Y_lm * a_lm
1673                   factor(0:1) = lam_lm * dalm(0:1,m)
1674                   b_ns( 1:2) = factor(0:1) ! even
1675                   b_ns(-1:0) = 0.0_dp ! odd
1676                   !------------------------- 1st derivatives
1677                   ! df/dtheta = (l/tan(theta)*Y_lm - fact/sin(theta)*Y_l-1m)*a_lm
1678                   dfdt(0:1)   =   (m * cotanth) * factor(0:1)
1679                   b_ns_t( 1:2) = 0.0_dp
1680                   b_ns_t(-1:0) = dfdt ! different theta-parity
1681                   ! df/dphi = i * m * Y_lm * a_lm
1682                   b_ns_p( 1:2) = m  * (/ -factor(1), factor(0) /)
1683                   b_ns_p(-1:0) = 0.0_dp
1684                   !------------------------- 2nd derivatives
1685                   if (do_d2) then
1686                      ! d^2f/dtheta^2    = [-(l(l+1) - m^2/sin^2(theta)) Y_lm - cotan(theta) dY_lm/dtheta] * a_lm
1687                      b_ns_tt( 1:2) = (fm2 * one_on_s2 - fm2 - fm) * factor - cotanth * dfdt
1688                      b_ns_tt(-1:0) = 0.0_dp
1689                      ! d^2f/dtheta/dphi = i * m * df/dtheta
1690                      b_ns_tp( 1:2) = 0.0_dp
1691                      b_ns_tp(-1:0) = m * (/ -dfdt(1), dfdt(0) /)! different theta-parity
1692                      ! d^2f/dphi^2      = -m^2 * Y_lm * a_lm
1693                      b_ns_pp( 1:2) = - fm2 * factor
1694                      b_ns_pp(-1:0) = 0.0_dp
1695                   endif
1696                   !-------------------------
1697                else
1698                   b_ns   = 0.0_dp
1699                   b_ns_t = 0.0_dp ; b_ns_p = 0.0_dp
1700                   b_ns_tt = 0.0_dp ; b_ns_tp = 0.0_dp ; b_ns_pp = 0.0_dp
1701                endif
1702
1703                !           ---------- l > m ----------
1704                lam_0 = 0.0_dp
1705                lam_1 = 1.0_dp
1706                scalel=0
1707                lam_2 = cth_ring * lam_1 * recfac(0,m)
1708                do l = m+1, nlmax
1709                   par_lm = - par_lm  ! = (-1)^(l+m)
1710                   fllp1 = real(l*l + l, kind=dp) ! l(l+1)
1711                   lam_lm1m = lam_lm * lam_fact(l) ! actual lambda_l-1,m
1712                   lam_lm = lam_2 * corfac * lam_mm
1713                   if (l >= l_min) then
1714                      !--------------------------
1715                      ! f = Y_lm * a_lm
1716                      factor(0:1) = lam_lm * dalm(0:1,l)
1717                      b_ns(par_lm:par_lm+1) = b_ns(par_lm:par_lm+1) + factor(0:1)
1718                      !-------------------------- 1st derivatives
1719                      ! df/dtheta = (l/tan(theta)*Y_lm - fact/sin(theta)*Y_l-1m)*a_lm
1720                      dfdt(0:1)   =   (l * cotanth) * factor(0:1) &
1721                           &        - (one_on_s(ithl) * lam_lm1m) * dalm(0:1,l)
1722                      b_ns_t(-par_lm:1-par_lm) = b_ns_t(-par_lm:1-par_lm) + dfdt(0:1)
1723                      ! df/dphi = i * m * Y_lm * a_lm
1724                      b_ns_p(par_lm  ) = b_ns_p(par_lm  ) - m * factor(1)
1725                      b_ns_p(par_lm+1) = b_ns_p(par_lm+1) + m * factor(0)
1726                      !-------------------------- 2nd derivatives
1727                      if (do_d2) then
1728                         ! d^2f/dtheta^2    = [-(l(l+1) - m^2/sin^2(theta)) Y_lm - cotan(theta) dY_lm/dtheta] * a_lm
1729                         d2fdt2(0:1) = (fm2*one_on_s2 - fllp1) * factor(0:1) - cotanth * dfdt(0:1)
1730                         b_ns_tt(par_lm:par_lm+1) = b_ns_tt(par_lm:par_lm+1) + d2fdt2(0:1)
1731                         ! d^2f/dtheta/dphi = i * m * df/dtheta
1732                         b_ns_tp( -par_lm) = b_ns_tp( -par_lm) - m * dfdt(1)
1733                         b_ns_tp(1-par_lm) = b_ns_tp(1-par_lm) + m * dfdt(0)
1734                         ! d^2f/dphi^2      = -m^2 * Y_lm * a_lm
1735                         b_ns_pp(par_lm:par_lm+1) = b_ns_pp(par_lm:par_lm+1) - fm2 * factor(0:1)
1736                      endif
1737                      !-------------------------
1738                   endif
1739                   lam_0 = lam_1 * recfac(1,l-1)
1740                   lam_1 = lam_2
1741                   lam_2 = (cth_ring * lam_1 - lam_0) * recfac(0,l)
1742                   if (abs(lam_2) > OVFLOW) then
1743                      lam_1 = lam_1*UNFLOW
1744                      lam_2 = lam_2*UNFLOW
1745                      scalel= scalel + 1
1746                      corfac = rescale_tab(max(scalel+scalem,RSMIN))
1747                   elseif (abs(lam_2) < UNFLOW) then
1748                      lam_1 = lam_1*OVFLOW
1749                      lam_2 = lam_2*OVFLOW
1750                      scalel= scalel - 1
1751                      corfac = rescale_tab(max(scalel+scalem,RSMIN))
1752                   endif
1753                enddo ! loop on l
1754
1755                one_on_s1 = one_on_s(ithl)
1756                if (keep_north(ithl)) then
1757                   b_north  (m,ithl) = cmplx(b_ns(1)   + b_ns(-1),   b_ns(2)   + b_ns(0),   kind=DP)
1758                   b_north_t(m,ithl) = cmplx(b_ns_t(1) + b_ns_t(-1), b_ns_t(2) + b_ns_t(0), kind=DP)
1759                   b_north_p(m,ithl) = cmplx(b_ns_p(1) + b_ns_p(-1), b_ns_p(2) + b_ns_p(0), kind=DP)*one_on_s1
1760                endif
1761                if (keep_south(ithl)) then
1762                   b_south  (m,ithl) = cmplx(b_ns(1)   - b_ns(-1),   b_ns(2)   - b_ns(0),   kind=DP)
1763                   b_south_t(m,ithl) = cmplx(b_ns_t(1) - b_ns_t(-1), b_ns_t(2) - b_ns_t(0), kind=DP)
1764                   b_south_p(m,ithl) = cmplx(b_ns_p(1) - b_ns_p(-1), b_ns_p(2) - b_ns_p(0), kind=DP)*one_on_s1
1765                endif
1766                if (do_d2 .and. keep_north(ithl)) then
1767                   b_north_tt(m,ithl) = cmplx(b_ns_tt(1) + b_ns_tt(-1), b_ns_tt(2) + b_ns_tt(0), kind=DP)
1768                   b_north_pp(m,ithl) = cmplx(b_ns_pp(1) + b_ns_pp(-1), b_ns_pp(2) + b_ns_pp(0), kind=DP)*one_on_s2
1769                   b_north_tp(m,ithl) = cmplx(b_ns_tp(1) + b_ns_tp(-1), b_ns_tp(2) + b_ns_tp(0), kind=DP)*one_on_s1
1770                endif
1771                if (do_d2 .and. keep_south(ithl)) then
1772                   b_south_tt(m,ithl) = cmplx(b_ns_tt(1) - b_ns_tt(-1), b_ns_tt(2) - b_ns_tt(0), kind=DP)
1773                   b_south_tp(m,ithl) = cmplx(b_ns_tp(1) - b_ns_tp(-1), b_ns_tp(2) - b_ns_tp(0), kind=DP)*one_on_s1
1774                   b_south_pp(m,ithl) = cmplx(b_ns_pp(1) - b_ns_pp(-1), b_ns_pp(2) - b_ns_pp(0), kind=DP)*one_on_s2
1775                endif
1776             endif ! test on nlmax
1777          enddo ! loop on rings (ithl)
1778       enddo ! loop on m
1779!$OMP end do
1780       if (do_openmp()) deallocate (recfac,dalm, lam_fact)
1781!$OMP end parallel
1782
1783!$OMP parallel default(none) &
1784!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
1785!$OMP      lchk, uchk, b_north, b_south, b_north_t, b_south_t, &
1786!$OMP      b_north_p,  b_south_p,  b_north_tt, b_south_tt, &
1787!$OMP      b_north_tp, b_south_tp, b_north_pp, b_south_pp, &
1788!$OMP      nph, startpix, kphi0, map, der1, der2, do_d2, &
1789!$OMP      keep_north, keep_south, keep_it ) &
1790!$OMP  private(ithl, nphl, istart_north, istart_south, &
1791!$OMP      ith, ring, status)
1792
1793       if (do_openmp()) then
1794          allocate(ring(0:nphmx-1), stat = status)
1795          call assert_alloc(status,code,'ring')
1796       endif
1797
1798!$OMP do schedule(dynamic,1)
1799
1800       do ithl = 0, uchk - lchk
1801          nphl = nph(ithl)
1802          istart_north = startpix(ithl)
1803          istart_south = npix-istart_north-nphl
1804          ith  = ithl + lchk
1805
1806       !        ---------------------------------------------------------------
1807       !        sum_m  b(m,theta)*exp(i*m*phi)   -> f(phi,theta)
1808       !        ---------------------------------------------------------------
1809          if (keep_north(ithl)) then
1810             call ring_synthesis(nsmax,nlmax,nmmax,b_north(0,ithl),   nphl,ring,kphi0(ithl))   ! north hemisph. + equator
1811             map(istart_north:istart_north+nphl-1) = ring(0:nphl-1)
1812
1813             call ring_synthesis(nsmax,nlmax,nmmax,b_north_t(0,ithl), nphl,ring,kphi0(ithl))
1814             der1(istart_north:istart_north+nphl-1,1) = ring(0:nphl-1)
1815             call ring_synthesis(nsmax,nlmax,nmmax,b_north_p(0,ithl), nphl,ring,kphi0(ithl))
1816             der1(istart_north:istart_north+nphl-1,2) = ring(0:nphl-1)
1817             if (do_d2) then
1818                call ring_synthesis(nsmax,nlmax,nmmax,b_north_tt(0,ithl),nphl,ring,kphi0(ithl))
1819                der2(istart_north:istart_north+nphl-1,1) = ring(0:nphl-1)
1820                call ring_synthesis(nsmax,nlmax,nmmax,b_north_tp(0,ithl),nphl,ring,kphi0(ithl))
1821                der2(istart_north:istart_north+nphl-1,2) = ring(0:nphl-1)
1822                call ring_synthesis(nsmax,nlmax,nmmax,b_north_pp(0,ithl),nphl,ring,kphi0(ithl))
1823                der2(istart_north:istart_north+nphl-1,3) = ring(0:nphl-1)
1824             endif
1825          endif
1826
1827          if (keep_south(ithl) .and. ith < nrings) then
1828             call ring_synthesis(nsmax,nlmax,nmmax,b_south(0,ithl),  nphl,ring,kphi0(ithl)) ! south hemisph. w/o equat
1829             map(istart_south:istart_south+nphl-1) = ring(0:nphl-1)
1830
1831             call ring_synthesis(nsmax,nlmax,nmmax,b_south_t(0,ithl),nphl,ring,kphi0(ithl)) ! south hemisph. w/o equat
1832             der1(istart_south:istart_south+nphl-1,1) = ring(0:nphl-1)
1833             call ring_synthesis(nsmax,nlmax,nmmax,b_south_p(0,ithl),nphl,ring,kphi0(ithl)) ! south hemisph. w/o equat
1834             der1(istart_south:istart_south+nphl-1,2) = ring(0:nphl-1)
1835             if (do_d2) then
1836                call ring_synthesis(nsmax,nlmax,nmmax,b_south_tt(0,ithl),nphl,ring,kphi0(ithl))
1837                der2(istart_south:istart_south+nphl-1,1) = ring(0:nphl-1)
1838                call ring_synthesis(nsmax,nlmax,nmmax,b_south_tp(0,ithl),nphl,ring,kphi0(ithl))
1839                der2(istart_south:istart_south+nphl-1,2) = ring(0:nphl-1)
1840                call ring_synthesis(nsmax,nlmax,nmmax,b_south_pp(0,ithl),nphl,ring,kphi0(ithl))
1841                der2(istart_south:istart_south+nphl-1,3) = ring(0:nphl-1)
1842             endif
1843          endif
1844
1845       enddo    ! loop on ithl
1846!$OMP end do
1847       if (do_openmp()) deallocate(ring)
1848!$OMP end parallel
1849
1850    enddo    ! loop on chunks
1851
1852    !     --------------------
1853    !     free memory and exit
1854    !     --------------------
1855    if (.not. do_openmp()) then
1856       deallocate(recfac, dalm, lam_fact, ring)
1857    endif
1858
1859    deallocate(mfac)
1860    deallocate(b_north, b_north_t, b_north_p)
1861    deallocate(b_south, b_south_t, b_south_p)
1862    if (do_d2) then
1863       deallocate(b_north_tt, b_north_tp, b_north_pp)
1864       deallocate(b_south_tt, b_south_tp, b_south_pp)
1865    endif
1866
1867    return
1868  end subroutine alm2map_sc_der_KLOAD
1869
1870  !=======================================================================
1871  subroutine alm2map_pol_der_KLOAD(nsmax, nlmax, nmmax, alm_TGC, map_TQU, der1, der2, zbounds)
1872    !=======================================================================
1873    !     computes a map and its 1st and 2nd derivative
1874    !       from its alm for the HEALPIX pixelisation
1875    !      for the Temperature and polarisation fields
1876    !
1877    ! der1 = dX/d(theta), dX/d(phi)/sin(theta)
1878    ! der2 = d^2X/d(theta^2), d^2X/d(theta)/d(phi)/sin(theta), d^2X/d(phi^2)/sin(theta)^2
1879    ! where X = (T,Q,U)
1880    !
1881    ! derivatives of Ylm are obtained from Qu.Th. of Ang.Mom. Varshalovich et al
1882    !
1883    ! 2010-02-09: correction of a bug affecting
1884    ! dX/d(theta), dX/d(theta)/d(phi)/sin(theta), d^2X/d(theta)^2 for X = Q,U
1885    ! mainly at low ell
1886    !=======================================================================
1887    integer(I4B), intent(IN)                   :: nsmax, nlmax, nmmax
1888    complex(KALMC), INTENT(IN),  dimension(1:3,0:nlmax,0:nmmax) :: alm_TGC
1889    real(KMAP),   INTENT(OUT), dimension(0:(12_i8b*nsmax)*nsmax-1,1:3)     :: map_TQU
1890    real(KMAP),   INTENT(OUT), dimension(0:(12_i8b*nsmax)*nsmax-1,1:6) :: der1
1891    real(KMAP),   INTENT(OUT), dimension(0:(12_i8b*nsmax)*nsmax-1,1:9), optional :: der2
1892    real(DP),     intent(IN),  dimension(1:2),         optional :: zbounds
1893
1894    integer(I4B) :: l, m, ith ! alm related
1895    integer(I8B) :: istart_south, istart_north, npix   ! map related
1896    integer(I4B) :: nrings, nphmx
1897    real(DP), dimension(1:2)         :: zbounds_in
1898
1899    integer(I4B)                              :: par_lm, k, k0, k1, di
1900    real(DP)             :: lam_mm, cth_ring
1901    real(DP)                  :: f2m, fm2, fm, fllp1, lam_lm1m, f2, f3, fl
1902    real(dp)                  :: cotanth, one_on_s1, one_on_s2
1903    real(dp)                  :: a0, xp, at, aq, derW, derX, derY
1904    real(dp)                  :: b0t, b0p, bx, der2W, der2X, der2Y
1905    real(DP), dimension(-3:8) :: b_ns
1906    real(DP), dimension(-3:8) :: b_ns_p, b_ns_t
1907    real(DP), dimension(-3:8) :: b_ns_pp, b_ns_tt, b_ns_tp
1908    real(DP), dimension( 0:1) :: factor
1909
1910    integer(i4b)                            :: ll, l_min
1911    complex(DPC), dimension(:,:,:), ALLOCATABLE :: b_d1, b_d2
1912    complex(DPC), dimension(:), allocatable :: bline
1913    integer(i4b)        :: nchunks, chunksize, ichunk, lchk, uchk, ithl, nphl
1914    real(DP), dimension(:,:), ALLOCATABLE :: recfac, dalm, lam_lm
1915    real(DP), dimension(:),   ALLOCATABLE :: lam_fact, mfac, lam_fact_der, normal_l
1916    real(DP), dimension(:),   ALLOCATABLE :: ring
1917
1918    integer(I8B), dimension(0:SMAXCHK-1) :: startpix
1919    integer(I4B), dimension(0:SMAXCHK-1) :: nph,kphi0
1920    real(DP),     dimension(0:SMAXCHK-1) :: cth, sth, one_on_s
1921    logical(LGT), dimension(0:SMAXCHK-1) :: keep_north, keep_south, keep_it
1922
1923    character(len=*), parameter :: code = 'ALM2MAP_DER'
1924    logical(lgt) :: do_d2
1925    integer(I4B) :: status
1926    !=======================================================================
1927    do_d2 = (present(der2)) ! do 2nd derivative
1928    zbounds_in = (/-1.d0 , 1.d0/)
1929    if (present(zbounds)) zbounds_in = zbounds
1930
1931    ! Healpix definitions
1932    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
1933    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
1934    nphmx  = 4*nsmax           ! maximum number of pixels/ring
1935
1936    !     --- allocates space for arrays ---
1937    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
1938    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
1939
1940    allocate(mfac(0:nmmax),stat = status)
1941    call assert_alloc(status,code,'mfac')
1942
1943    allocate(normal_l(0:nlmax),stat = status)
1944    call assert_alloc(status,code,'normal_l')
1945
1946    allocate(b_d1(0:17,0:nmmax,0:chunksize-1), stat = status)
1947    if (do_d2) then
1948       allocate(b_d2(0:17,0:nmmax,0:chunksize-1),  stat = status)
1949    endif
1950    call assert_alloc(status,code,'b_d1 & b_d2')
1951
1952    if (.not. do_openmp()) then
1953       allocate(recfac(0:1,0:nlmax), dalm(0:5,0:nlmax), lam_fact(0:nlmax), lam_fact_der(0:nlmax), stat = status)
1954       call assert_alloc(status,code,'recfac, dalm & lam_fact')
1955       allocate(ring(0:nphmx-1), bline(0:nmmax),stat = status)
1956       call assert_alloc(status,code,'ring & bline')
1957       allocate(lam_lm(1:3,0:nlmax),stat = status)
1958       call assert_alloc(status,code,'lam_lm')
1959    endif
1960
1961    !     ------------ initiate arrays ----------------
1962    call gen_mfac(nmmax,mfac)
1963
1964    call init_rescale()
1965
1966    ! generate Polarization normalisation
1967    call gen_normpol(nlmax, normal_l)
1968
1969    map_TQU = 0.0 ! set the whole map to zero
1970    der1 = 0.0
1971    if (do_d2) der2 = 0.0
1972
1973    do ichunk = 0, nchunks-1
1974       lchk = ichunk * chunksize + 1
1975       uchk = min(lchk+chunksize - 1, nrings)
1976
1977       do ith = lchk, uchk
1978          ithl = ith - lchk !local index
1979          ! get pixel location information
1980          call get_pixel_layout(nsmax, ith, cth(ithl), sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
1981          one_on_s(ithl)  = 1.0_dp / sth(ithl)
1982          ! find out which rings are to be synthetized
1983          call select_rings(cth(ithl), zbounds_in, keep_north(ithl), keep_south(ithl), keep_it(ithl))
1984       enddo
1985       !        -----------------------------------------------------
1986       !        for each theta, and each m, computes
1987       !        b(m,theta) = sum_over_l>m (lambda_l_m(theta) * a_l_m)
1988       !        ------------------------------------------------------
1989       !        lambda_mm tends to go down when m increases (risk of underflow)
1990       !        lambda_lm tends to go up   when l increases (risk of overflow)
1991
1992       b_d1   = 0_dpc   ! pad with zeros
1993       if (do_d2) b_d2 = 0_dpc
1994
1995!$OMP parallel default(none) &
1996!$OMP shared(nlmax, nmmax, lchk, uchk, &
1997!$OMP    rescale_tab, normal_l, &
1998!$OMP    cth, sth, mfac, alm_TGC, one_on_s,  &
1999!$OMP      b_d1, b_d2, do_d2, &
2000!$OMP      keep_north, keep_south, keep_it ) &
2001!$OMP private(recfac, dalm, lam_fact, lam_fact_der, status, &
2002!$OMP   m, ll, fm, f2m, fm2, ithl, l_min, k, k0, k1, &
2003!$OMP   par_lm, lam_lm, lam_lm1m, &
2004!$OMP   cth_ring, one_on_s1, one_on_s2, cotanth, factor, &
2005!$OMP   b_ns, b_ns_t, b_ns_p, b_ns_tt, b_ns_tp, b_ns_pp,   &
2006!$OMP   l, fllp1, fl, a0, xp, at, aq, derW, derX, derY, f2, f3, b0t, b0p, bx, der2W, der2X, der2Y)
2007
2008       if (do_openmp()) then
2009          ! allocate thread safe arrays
2010          allocate(recfac(0:1,0:nlmax), dalm(0:5,0:nlmax), lam_fact(0:nlmax), lam_fact_der(0:nlmax), &
2011               & stat = status)
2012          call assert_alloc(status,code,'recfac, dalm & lam_fact')
2013          allocate(lam_lm(1:3,0:nlmax),stat = status)
2014          call assert_alloc(status,code,'lam_lm')
2015       endif
2016
2017!$OMP do schedule(dynamic,1)
2018       do m = 0, nmmax
2019          ! generate recursion factors (recfac) for Ylm of degree m
2020          call gen_recfac(nlmax, m, recfac)
2021          ! generate Ylm relation factor for degree m
2022          call gen_lamfac_der(nlmax, m, lam_fact_der)
2023          call gen_lamfac    (nlmax, m, lam_fact)
2024          f2m = 2.0_dp * m
2025          fm2 = real(m*m, kind=dp)
2026          fm  = real(m,   kind=dp)
2027
2028          ! extract needed alm under memory and CPU efficient form
2029          do ll = m, nlmax
2030             dalm(0,ll) =  real(alm_TGC(1,ll,m),kind=dp)
2031             dalm(1,ll) = aimag(alm_TGC(1,ll,m))
2032             dalm(2,ll) =  real(alm_TGC(2,ll,m),kind=dp)*normal_l(ll) ! G, real
2033             dalm(3,ll) = aimag(alm_TGC(2,ll,m))        *normal_l(ll)
2034             dalm(4,ll) =  real(alm_TGC(3,ll,m),kind=dp)*normal_l(ll) ! C, real
2035             dalm(5,ll) = aimag(alm_TGC(3,ll,m))        *normal_l(ll)
2036          enddo
2037
2038          do ithl = 0, uchk - lchk
2039             l_min = l_min_ylm(m, sth(ithl))
2040             if (keep_it(ithl) .and. nlmax >= l_min) then ! skip calculations when Ylm too small, and skip irrelevant rings
2041
2042                ! compute lam_lm(p,theta) for all l>=m
2043                call do_lam_lm_pol(nlmax, m, cth(ithl), sth(ithl), mfac(m), recfac, lam_fact, lam_lm)
2044
2045                cth_ring = cth(ithl)
2046                one_on_s1 = one_on_s(ithl)
2047                one_on_s2 = one_on_s(ithl)**2
2048                cotanth   = cth_ring * one_on_s(ithl)
2049
2050                b_ns   = 0.0_dp
2051                b_ns_t = 0.0_dp  ; b_ns_p  = 0.0_dp
2052                b_ns_tt = 0.0_dp ; b_ns_tp = 0.0_dp ; b_ns_pp = 0.0_dp
2053                do l = l_min, nlmax
2054                   fl    = real(l, kind=dp) ! l
2055                   fllp1 = real(l*l + l, kind=dp) ! l(l+1)
2056                   par_lm = 3 ! = (-1)^(l+m)
2057                   if (mod(l+m,2) == 1) par_lm = -par_lm
2058
2059                   !--------------------------
2060                   ! f = Y_lm * a_lm
2061                   factor(0:1) = lam_lm(1,l) * dalm(0:1,l)
2062                   b_ns(par_lm:par_lm+1)   = b_ns(par_lm:  par_lm+1) + factor(0:1) ! T even
2063                   b_ns(par_lm+2:par_lm+5) = b_ns(par_lm+2:par_lm+5) - lam_lm(2,l) * dalm(2:5,l) ! Q, U  even
2064                   b_ns(2-par_lm)          = b_ns(2-par_lm)          + lam_lm(3,l) * dalm(5,l) ! Q odd
2065                   b_ns(3-par_lm)          = b_ns(3-par_lm)          - lam_lm(3,l) * dalm(4,l)
2066                   b_ns(4-par_lm)          = b_ns(4-par_lm)          - lam_lm(3,l) * dalm(3,l) ! U odd
2067                   b_ns(5-par_lm)          = b_ns(5-par_lm)          + lam_lm(3,l) * dalm(2,l)
2068                   !-------------------------- 1st derivatives
2069                   if (l > 0) then
2070                      ! df/dphi = i * m * Y_lm * a_lm
2071                      f2 = m * lam_lm(2,l)
2072                      f3 = m * lam_lm(3,l)
2073                      b_ns_p(par_lm  ) = b_ns_p(par_lm  ) - m * factor(1)
2074                      b_ns_p(par_lm+1) = b_ns_p(par_lm+1) + m * factor(0)
2075                      b_ns_p(par_lm+2) = b_ns_p(par_lm+2) + f2 * dalm(3,l)
2076                      b_ns_p(par_lm+3) = b_ns_p(par_lm+3) - f2 * dalm(2,l)
2077                      b_ns_p(par_lm+4) = b_ns_p(par_lm+4) + f2 * dalm(5,l)
2078                      b_ns_p(par_lm+5) = b_ns_p(par_lm+5) - f2 * dalm(4,l)
2079                      b_ns_p(2-par_lm) = b_ns_p(2-par_lm) + f3 * dalm(4,l) ! Q odd
2080                      b_ns_p(3-par_lm) = b_ns_p(3-par_lm) + f3 * dalm(5,l)
2081                      b_ns_p(4-par_lm) = b_ns_p(4-par_lm) - f3 * dalm(2,l) ! U odd
2082                      b_ns_p(5-par_lm) = b_ns_p(5-par_lm) - f3 * dalm(3,l)
2083
2084                      ! dY_lm/dtheta = (l/tan(theta)*Y_lm                         -fact/sin(theta)*Y_l-1m)
2085                      ! dW_lm/dtheta = (l/tan(theta)*W_lm - S*m/l/sin(theta)*X_lm -fact/sin(theta)*sqrt(1-S^2/l^2)*W_l-1m
2086                      ! dX_lm/dtheta = (l/tan(theta)*X_lm - S*m/l/sin(theta)*W_lm -fact/sin(theta)*sqrt(1-S^2/l^2)*X_l-1m
2087                      a0 = fl * cotanth          ! l/tan(theta)
2088                      at = lam_fact_der(l) * one_on_s1 ! sqrt((2l+1)/(2l-1)*(l^2-m^2))/sin(theta)
2089                      derY   = a0 * lam_lm(1,l) - at * lam_lm(1,l-1)
2090                      b_ns_t( -par_lm:1-par_lm) = b_ns_t( -par_lm:1-par_lm) + derY * dalm(0:1,l) ! T odd
2091                   endif
2092                   if (l > 1) then
2093                      xp = (2*m)*one_on_s1/fl  ! spin m / (l sin(theta))
2094                      aq = at * sqrt(1.0_dp - 4.0_dp/(fl*fl)) ! at * sqrt(l^2-spin^2)/l
2095                      aq = aq * normal_l(l-1) / normal_l(l) ! correct for Ypol renormalisation ! 2010-02-09
2096                      derW   = a0 * lam_lm(2,l) - aq * lam_lm(2,l-1) + xp * lam_lm(3,l)
2097                      derX   = a0 * lam_lm(3,l) - aq * lam_lm(3,l-1) + xp * lam_lm(2,l)
2098                      b_ns_t(2-par_lm:5-par_lm) = b_ns_t(2-par_lm:5-par_lm) - derW * dalm(2:5,l) ! Q, U  odd
2099                      b_ns_t(2+par_lm)          = b_ns_t(2+par_lm)          + derX * dalm(5,l) ! Q even
2100                      b_ns_t(3+par_lm)          = b_ns_t(3+par_lm)          - derX * dalm(4,l)
2101                      b_ns_t(4+par_lm)          = b_ns_t(4+par_lm)          - derX * dalm(3,l) ! U even
2102                      b_ns_t(5+par_lm)          = b_ns_t(5+par_lm)          + derX * dalm(2,l)
2103                   endif
2104                   !-------------------------- 2nd derivatives
2105                   if (do_d2 .and. l > 0) then
2106                      ! d^2 Y/dtheta^2    = -[l(l+1) - (m^2)/sin^2(theta)] Y                                     - cotan(theta) dY/dtheta
2107                      ! d^2 W/dtheta^2    = -[l(l+1) - (m^2+s^2)/sin^2(theta)] W - 2ms cos(theta)/sin^2(theta) X - cotan(theta) dW/dtheta
2108                      ! d^2 X/dtheta^2    = -[l(l+1) - (m^2+s^2)/sin^2(theta)] X - 2ms cos(theta)/sin^2(theta) W - cotan(theta) dX/dtheta
2109                      ! s = -2
2110                      b0t = fm2*one_on_s2 - fllp1
2111                      der2Y = b0t * lam_lm(1,l)                    - cotanth * derY
2112                      b_ns_tt(par_lm:par_lm+1)   = b_ns_tt(par_lm:par_lm+1)   + der2Y * dalm(0:1,l)
2113                      b_ns_tp( -par_lm)          = b_ns_tp( -par_lm)          - (fm * derY) * dalm(1,l)
2114                      b_ns_tp(1-par_lm)          = b_ns_tp(1-par_lm)          + (fm * derY) * dalm(0,l)
2115                      b_ns_pp(par_lm:par_lm+1)   = b_ns_pp(par_lm:par_lm+1)   - fm2 * factor(0:1)
2116
2117                      if (l > 1) then
2118                         b0p = b0t + 4._dp*one_on_s2
2119                         bx  = 4*m * cth_ring * one_on_s2
2120                         der2W = b0p * lam_lm(2,l) + bx * lam_lm(3,l) - cotanth * derW
2121                         der2X = b0p * lam_lm(3,l) + bx * lam_lm(2,l) - cotanth * derX
2122                         b_ns_tt(par_lm+2:par_lm+5) = b_ns_tt(par_lm+2:par_lm+5) - der2W * dalm(2:5,l)
2123                         b_ns_tt(2-par_lm)          = b_ns_tt(2-par_lm)          + der2X * dalm(5,l) ! Q odd
2124                         b_ns_tt(3-par_lm)          = b_ns_tt(3-par_lm)          - der2X * dalm(4,l)
2125                         b_ns_tt(4-par_lm)          = b_ns_tt(4-par_lm)          - der2X * dalm(3,l) ! U odd
2126                         b_ns_tt(5-par_lm)          = b_ns_tt(5-par_lm)          + der2X * dalm(2,l)
2127                         ! d^2f/dtheta/dphi = i * m * df/dtheta
2128                         b_ns_tp(par_lm+2:par_lm+3) = b_ns_tp(par_lm+2:par_lm+3) + (fm * derX) * dalm(4:5,l)
2129                         b_ns_tp(par_lm+4:par_lm+5) = b_ns_tp(par_lm+4:par_lm+5) - (fm * derX) * dalm(2:3,l)
2130                         b_ns_tp(2-par_lm)          = b_ns_tp(2-par_lm)          + (fm * derW) * dalm(3,l)
2131                         b_ns_tp(3-par_lm)          = b_ns_tp(3-par_lm)          - (fm * derW) * dalm(2,l)
2132                         b_ns_tp(4-par_lm)          = b_ns_tp(4-par_lm)          + (fm * derW) * dalm(5,l)
2133                         b_ns_tp(5-par_lm)          = b_ns_tp(5-par_lm)          - (fm * derW) * dalm(4,l)
2134                         ! d^2f/dphi^2      = -m^2 * Y_lm * a_lm
2135                         b_ns_pp(par_lm+2:par_lm+5) = b_ns_pp(par_lm+2:par_lm+5) +(fm2 * lam_lm(2,l))* dalm(2:5,l) ! Q, U  even
2136                         b_ns_pp(2-par_lm)          = b_ns_pp(2-par_lm)          -(fm2 * lam_lm(3,l))* dalm(5,l) ! Q odd
2137                         b_ns_pp(3-par_lm)          = b_ns_pp(3-par_lm)          +(fm2 * lam_lm(3,l))* dalm(4,l)
2138                         b_ns_pp(4-par_lm)          = b_ns_pp(4-par_lm)          +(fm2 * lam_lm(3,l))* dalm(3,l) ! U odd
2139                         b_ns_pp(5-par_lm)          = b_ns_pp(5-par_lm)          -(fm2 * lam_lm(3,l))* dalm(2,l)
2140                      endif
2141                   endif
2142                   !-------------------------
2143                enddo ! loop on l
2144                do k=0,2 ! loop on T,Q,U
2145                   k0 = 2*k
2146                   k1 = k0+1
2147                   if (keep_north(ithl)) then
2148                      ! fields
2149                      b_d1(0+k,m,ithl)   = cmplx(b_ns(k0+3) + b_ns(k0-3), &
2150                           &                     b_ns(k1+3) + b_ns(k1-3), kind=DP) ! north=Even+Odd
2151                      ! dfield/dtheta
2152                      b_d1(6+k,m,ithl)   = cmplx(b_ns_t(k0+3) + b_ns_t(k0-3), &
2153                           &                     b_ns_t(k1+3) + b_ns_t(k1-3), kind=DP) ! north=Even+Odd
2154                      ! dfield/dphi/sin(theta)
2155                      b_d1(12+k,m,ithl)  = cmplx(b_ns_p(k0+3) + b_ns_p(k0-3), &
2156                           &                     b_ns_p(k1+3) + b_ns_p(k1-3), kind=DP)*one_on_s1
2157                   endif
2158                   if (keep_south(ithl)) then
2159                      ! fields
2160                      b_d1(3+k,m,ithl)   = cmplx(b_ns(k0+3) - b_ns(k0-3), &
2161                           &                     b_ns(k1+3) - b_ns(k1-3), kind=DP) ! south=Even-Odd
2162                      ! dfield/dtheta
2163                      b_d1(9+k,m,ithl)   = cmplx(b_ns_t(k0+3) - b_ns_t(k0-3), &
2164                           &                     b_ns_t(k1+3) - b_ns_t(k1-3), kind=DP) ! south=Even-Odd
2165                      ! dfield/dphi/sin(theta)
2166                      b_d1(15+k,m,ithl)  = cmplx(b_ns_p(k0+3) - b_ns_p(k0-3), &
2167                           &                     b_ns_p(k1+3) - b_ns_p(k1-3), kind=DP)*one_on_s1
2168                   endif
2169                enddo
2170                if (do_d2) then
2171                   do k=0,2 ! loop on T,Q,U
2172                      k0 = 2*k
2173                      k1 = k0+1
2174                      if (keep_north(ithl)) then
2175                         ! dfield/dtheta^2
2176                         b_d2(0+k,m,ithl)   = cmplx(b_ns_tt(k0+3) + b_ns_tt(k0-3), &
2177                              &                     b_ns_tt(k1+3) + b_ns_tt(k1-3), kind=DP)
2178                         ! dfield/dtheta/dphi/sin(theta)
2179                         b_d2(6+k,m,ithl)   = cmplx(b_ns_tp(k0+3) + b_ns_tp(k0-3), &
2180                              &                     b_ns_tp(k1+3) + b_ns_tp(k1-3), kind=DP)*one_on_s1
2181                         ! dfield/dphi&2/sin(theta)^2
2182                         b_d2(12+k,m,ithl)  = cmplx(b_ns_pp(k0+3) + b_ns_pp(k0-3), &
2183                           &                     b_ns_pp(k1+3) + b_ns_pp(k1-3), kind=DP)*one_on_s2
2184                      endif
2185                      if (keep_south(ithl)) then
2186                         ! dfield/dtheta^2
2187                         b_d2(3+k,m,ithl)   = cmplx(b_ns_tt(k0+3) - b_ns_tt(k0-3), &
2188                              &                     b_ns_tt(k1+3) - b_ns_tt(k1-3), kind=DP)
2189                         ! dfield/dtheta/dphi/sin(theta)
2190                         b_d2(9+k,m,ithl)   = cmplx(b_ns_tp(k0+3) - b_ns_tp(k0-3), &
2191                              &                     b_ns_tp(k1+3) - b_ns_tp(k1-3), kind=DP)*one_on_s1
2192                         ! dfield/dphi&2/sin(theta)^2
2193                         b_d2(15+k,m,ithl)  = cmplx(b_ns_pp(k0+3) - b_ns_pp(k0-3), &
2194                              &                     b_ns_pp(k1+3) - b_ns_pp(k1-3), kind=DP)*one_on_s2
2195                      endif
2196                   enddo
2197                endif
2198             endif ! test on nlmax
2199          enddo ! loop on rings (ithl)
2200       enddo ! loop on m
2201!$OMP end do
2202       if (do_openmp()) deallocate (recfac,dalm, lam_fact, lam_fact_der)
2203!$OMP end parallel
2204
2205!$OMP parallel default(none) &
2206!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
2207!$OMP      lchk, uchk, b_d1, b_d2, &
2208!$OMP      nph, startpix, kphi0, map_TQU, der1, der2, do_d2, &
2209!$OMP      keep_north, keep_south, keep_it ) &
2210!$OMP  private(ithl, nphl, istart_north, istart_south, &
2211!$OMP      ith, ring, bline, status, k0, di)
2212
2213       if (do_openmp()) then
2214          allocate(ring(0:nphmx-1), stat = status)
2215          call assert_alloc(status,code,'ring')
2216          allocate(bline(0:nmmax),stat = status)
2217          call assert_alloc(status,code,'bline')
2218       endif
2219
2220!$OMP do schedule(dynamic,1)
2221
2222       do ithl = 0, uchk - lchk
2223          nphl = nph(ithl)
2224          istart_north = startpix(ithl)
2225          istart_south = npix-istart_north-nphl
2226          ith  = ithl + lchk
2227
2228       !        ---------------------------------------------------------------
2229       !        sum_m  b(m,theta)*exp(i*m*phi)   -> f(phi,theta)
2230       !        ---------------------------------------------------------------
2231          if (keep_north(ithl)) then
2232             do k0=0,2
2233                bline = b_d1(k0, 0:nmmax, ithl)
2234                call ring_synthesis(nsmax,nlmax,nmmax,bline,   nphl,ring,kphi0(ithl))   ! north hemisph. + equator
2235                map_TQU(istart_north:istart_north+nphl-1,k0+1) = ring(0:nphl-1)
2236             enddo
2237             do k0=0,2
2238                bline = b_d1(6+k0, 0:nmmax, ithl)
2239                call ring_synthesis(nsmax,nlmax,nmmax,bline,   nphl,ring,kphi0(ithl))   ! north hemisph. + equator
2240                der1(istart_north:istart_north+nphl-1,k0+1) = ring(0:nphl-1)
2241             enddo
2242             do k0=0,2
2243                bline = b_d1(12+k0, 0:nmmax, ithl)
2244                call ring_synthesis(nsmax,nlmax,nmmax,bline,   nphl,ring,kphi0(ithl))   ! north hemisph. + equator
2245                der1(istart_north:istart_north+nphl-1,k0+4) = ring(0:nphl-1)
2246             enddo
2247          endif
2248
2249          if (keep_south(ithl) .and. ith < nrings) then
2250             do k0=0,2
2251                bline = b_d1(3+k0, 0:nmmax, ithl)
2252                call ring_synthesis(nsmax,nlmax,nmmax,bline,   nphl,ring,kphi0(ithl))   ! south hemisph. w/o equat
2253                map_TQU(istart_south:istart_south+nphl-1,k0+1) = ring(0:nphl-1)
2254             enddo
2255             do k0=0,2
2256                bline = b_d1(9+k0, 0:nmmax, ithl)
2257                call ring_synthesis(nsmax,nlmax,nmmax,bline,   nphl,ring,kphi0(ithl))   ! south hemisph. w/o equat
2258                der1(istart_south:istart_south+nphl-1,k0+1) = ring(0:nphl-1)
2259             enddo
2260             do k0=0,2
2261                bline = b_d1(15+k0, 0:nmmax, ithl)
2262                call ring_synthesis(nsmax,nlmax,nmmax,bline,   nphl,ring,kphi0(ithl))   ! south hemisph. w/o equat
2263                der1(istart_south:istart_south+nphl-1,k0+4) = ring(0:nphl-1)
2264             enddo
2265          endif
2266          if (do_d2) then
2267             if (keep_north(ithl)) then
2268                do di = 0,2
2269                   do k0=0,2
2270                      bline = b_d2(k0+6*di, 0:nmmax, ithl)
2271                      call ring_synthesis(nsmax,nlmax,nmmax,bline,   nphl,ring,kphi0(ithl))   ! north hemisph. + equator
2272                      der2(istart_north:istart_north+nphl-1,k0+3*di+1) = ring(0:nphl-1)
2273                   enddo
2274                enddo
2275             endif
2276             if (keep_south(ithl) .and. ith < nrings) then
2277                do di = 0,2
2278                   do k0=0,2
2279                      bline = b_d2(k0+6*di+3, 0:nmmax, ithl)
2280                      call ring_synthesis(nsmax,nlmax,nmmax,bline,   nphl,ring,kphi0(ithl))   ! south hemisph. w/o equat
2281                      der2(istart_south:istart_south+nphl-1,k0+3*di+1) = ring(0:nphl-1)
2282                   enddo
2283                enddo
2284             endif
2285          endif
2286       enddo    ! loop on ithl
2287!$OMP end do
2288       if (do_openmp()) then
2289          deallocate(ring, bline)
2290       endif
2291!$OMP end parallel
2292
2293    enddo    ! loop on chunks
2294
2295    !     --------------------
2296    !     free memory and exit
2297    !     --------------------
2298    if (.not. do_openmp()) then
2299       deallocate(recfac, dalm, lam_fact, lam_fact_der, ring, bline)
2300    endif
2301
2302    deallocate(mfac)
2303    deallocate(b_d1)
2304    if (do_d2) then
2305       deallocate(b_d2)
2306    endif
2307
2308    return
2309  end subroutine alm2map_pol_der_KLOAD
2310
2311  !**************************************************************************
2312  !
2313  !             MAP2ALM
2314  !
2315  !**************************************************************************
2316  !=======================================================================
2317  !  map2alm(nsmax, nlmax, nmmax, map, alm [, zbound, w8ring ,plm])
2318  !
2319  !     computes the a(l,m) from a map (temperature only or polarized)
2320  !           for the HEALPIX pixelisation
2321  !
2322  !     For the Temperature field
2323  !     a(l,m) = int T(theta,phi) Y_lm(theta,phi)^* dtheta*dphi
2324  !            = int dtheta lambda_lm(theta)
2325  !                  * int dphi T(theta,phi) e^(-i*m*phi)
2326  !     For polarizaton
2327  !     alm_G = \int ( - Q Wlm - i U Xlm )
2328  !     alm_C = \int ( - U Wlm + i Q Xlm )
2329  !
2330  !     where Y_lm(theta,phi) = lambda(theta) * e^(i*m*phi)
2331  !
2332  !     * the recurrence of Ylm is the standard one (cf Num Rec)
2333  !     * the integral over phi is done by FFT
2334  !
2335  !     zbounds: cut apply (in cos_theta)
2336  !     w8ring: ring dependent weigthing scheme to improve quadrature
2337  !     plm: precomputed Ylm
2338  !=======================================================================
2339
2340  ! interface with legacy code
2341  !=======================================================================
2342  subroutine map2alm_old_sc_KLOAD(nsmax, nlmax, nmmax, map, alm, cos_theta_cut, w8ring, plm)
2343    !=======================================================================
2344    integer(I4B), intent(IN)                    :: nsmax, nlmax, nmmax
2345    real(KMAP),   intent(IN),  dimension(0:(12_i8b*nsmax)*nsmax-1) :: map
2346    complex(KALMC), intent(OUT), dimension(1:1,0:nlmax,0:nmmax) :: alm
2347    real(DP),     intent(IN)                          :: cos_theta_cut
2348    real(DP),     intent(IN),  dimension(1:2*nsmax,1) :: w8ring
2349    real(DP),     intent(IN),  dimension(0:), optional          :: plm
2350    !
2351    real(DP),     dimension(1:2) :: zbounds
2352
2353    call warning_oldbounds(cos_theta_cut, zbounds)
2354    if (present(plm)) then
2355       call map2alm(nsmax, nlmax, nmmax, map, alm, zbounds, w8ring, plm)
2356    else
2357       call map2alm(nsmax, nlmax, nmmax, map, alm, zbounds, w8ring)
2358    endif
2359
2360    return
2361  end subroutine map2alm_old_sc_KLOAD
2362  !=======================================================================
2363  subroutine map2alm_old_pol_KLOAD(nsmax, nlmax, nmmax, map_TQU, alm_TGC, cos_theta_cut, w8ring, plm)
2364    !=======================================================================
2365    integer(I4B), intent(IN)                    :: nsmax, nlmax, nmmax
2366    real(KMAP),   intent(IN),  dimension(0:(12_i8b*nsmax)*nsmax-1,1:3) :: map_TQU
2367    complex(KALMC), intent(OUT), dimension(1:3,0:nlmax,0:nmmax) :: alm_TGC
2368    real(DP),     intent(IN)                          :: cos_theta_cut
2369    real(DP),     intent(IN),  dimension(1:2*nsmax,1:3) :: w8ring
2370    real(DP),     intent(IN),  dimension(0:), optional          :: plm
2371    !
2372    real(DP),     dimension(1:2) :: zbounds
2373
2374    call warning_oldbounds(cos_theta_cut, zbounds)
2375    if (present(plm)) then
2376       call map2alm(nsmax, nlmax, nmmax, map_TQU, alm_TGC, zbounds, w8ring, plm)
2377    else
2378       call map2alm(nsmax, nlmax, nmmax, map_TQU, alm_TGC, zbounds, w8ring)
2379    endif
2380    return
2381  end subroutine map2alm_old_pol_KLOAD
2382
2383  !=======================================================================
2384  subroutine map2alm_old_pol2_KLOAD(nsmax, nlmax, nmmax, map_TQU, alm_TGC, cos_theta_cut, w8ring, plm)
2385    !=======================================================================
2386    integer(I4B), intent(IN)                    :: nsmax, nlmax, nmmax
2387    real(KMAP),   intent(IN),  dimension(0:(12_i8b*nsmax)*nsmax-1,1:3) :: map_TQU
2388    complex(KALMC), intent(OUT), dimension(1:3,0:nlmax,0:nmmax) :: alm_TGC
2389    real(DP),     intent(IN)                          :: cos_theta_cut
2390    real(DP),     intent(IN),  dimension(1:2*nsmax,1:3) :: w8ring
2391    real(DP),     intent(IN),  dimension(0:,1:)         :: plm
2392    !
2393    real(DP),     dimension(1:2) :: zbounds
2394
2395    call warning_oldbounds(cos_theta_cut, zbounds)
2396    call map2alm(nsmax, nlmax, nmmax, map_TQU, alm_TGC, zbounds, w8ring, plm)
2397
2398    return
2399  end subroutine map2alm_old_pol2_KLOAD
2400
2401
2402  !=======================================================================
2403  subroutine map2alm_sc_KLOAD(nsmax, nlmax, nmmax, map, alm, zbounds, w8ring)
2404    !=======================================================================
2405    !     computes the a(l,m) from a Temperature map for the HEALPIX pixelisation
2406    !        all from scratch
2407    !=======================================================================
2408    integer(I4B), intent(IN)                    :: nsmax, nlmax, nmmax
2409    real(KMAP),   intent(IN),  dimension(0:(12_i8b*nsmax)*nsmax-1) :: map
2410    complex(KALMC), intent(OUT), dimension(1:1,0:nlmax,0:nmmax) :: alm
2411    real(DP),     intent(IN),  dimension(1:2),         optional :: zbounds
2412    real(DP),     intent(IN),  dimension(1:2*nsmax,1), optional :: w8ring
2413
2414    real(DP), dimension(1:2)         :: zbounds_in
2415    real(DP), dimension(1:2*nsmax,1) :: w8ring_in
2416
2417#ifdef USE_SHARP
2418    zbounds_in = (/-1.d0 , 1.d0/)
2419    if (present(zbounds)) zbounds_in = zbounds
2420    w8ring_in  = 1.d0
2421    if (present(w8ring))  w8ring_in  = w8ring
2422
2423    call sharp_hp_map2alm_x_KLOAD(nsmax,nlmax,nmmax,map,alm,zbounds_in,w8ring_in)
2424#else
2425
2426    integer(I4B) :: l, m, ith, scalem, scalel   ! alm related
2427    integer(I8B) :: istart_south, istart_north, npix  ! map related
2428    integer(I4B) :: nrings, nphmx
2429    real(DP)     :: omega_pix
2430
2431    integer(I4B)                              :: par_lm
2432    real(DP)              :: lam_mm, lam_lm, lam_0, lam_1, lam_2, corfac, cth_ring
2433    real(DP)                      :: ovflow, unflow
2434    real(DP),     dimension(-1:2)             :: phas_sd
2435    real(DP),     dimension(:,:), allocatable :: dalm
2436    real(DP),     dimension(:),   allocatable :: mfac
2437    real(DP),     dimension(:,:), allocatable :: recfac
2438
2439    integer(I4B)                              :: l_min
2440    complex(DPC)                              :: php, phm
2441    complex(DPC), dimension(:,:), allocatable :: phas_n, phas_s
2442    real(DP),     dimension(:),   allocatable :: ring
2443    integer(I4B)                   :: nchunks, chunksize, ichunk, lchk, uchk, ithl, nphl
2444    integer(I8B), dimension(0:SMAXCHK-1) :: startpix
2445    integer(I4B), dimension(0:SMAXCHK-1) :: nph, kphi0
2446    real(DP),     dimension(0:SMAXCHK-1) :: cth, sth
2447    logical(LGT), dimension(0:SMAXCHK-1) :: keep_north, keep_south, keep_it
2448
2449    character(LEN=*), PARAMETER :: code = 'MAP2ALM'
2450    integer(I4B) :: status
2451    !=======================================================================
2452
2453    zbounds_in = (/-1.d0 , 1.d0/)
2454    if (present(zbounds)) zbounds_in = zbounds
2455    w8ring_in  = 1.d0
2456    if (present(w8ring))  w8ring_in  = w8ring
2457
2458    ! Healpix definitions
2459    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
2460    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
2461    nphmx  = 4*nsmax           ! maximum number of pixels/ring
2462    omega_pix = FOURPI / real(npix, kind=DP)  ! pixel area (identical for all pixels)
2463
2464    !     --- allocates space for arrays ---
2465    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
2466    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
2467
2468    allocate(mfac(0:nmmax),stat = status)
2469    call assert_alloc(status,code,'mfac')
2470
2471    allocate(phas_n(0:nmmax,0:chunksize-1),stat = status)
2472    call assert_alloc(status,code,'phas_n')
2473
2474    allocate(phas_s(0:nmmax,0:chunksize-1),stat = status)
2475    call assert_alloc(status,code,'phas_s')
2476
2477    if (.not.do_openmp()) then
2478       allocate(ring(0:nphmx-1),stat = status)
2479       call assert_alloc(status,code,'ring')
2480       allocate(recfac(0:1,0:nlmax),stat = status)
2481       call assert_alloc(status,code,'recfac')
2482       allocate(dalm(1:2,0:nlmax),stat = status)
2483       call assert_alloc(status,code,'dalm')
2484    endif
2485
2486    !     ------------ initiate variables and arrays ----------------
2487
2488    call gen_mfac(nmmax,mfac)
2489
2490    call init_rescale()
2491    ovflow = rescale_tab(1)
2492    unflow = rescale_tab(-1)
2493    alm = 0.0 ! set the whole alm array to zero
2494
2495    ! loop on chunks
2496    do ichunk = 0, nchunks-1
2497       lchk = ichunk * chunksize + 1
2498       uchk = min(lchk+chunksize - 1, nrings)
2499
2500       do ith = lchk, uchk
2501          ithl = ith - lchk !local index
2502          ! get pixel location information
2503          call get_pixel_layout(nsmax, ith, cth(ithl), sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
2504          ! find out which rings are to be analysed
2505          call select_rings(cth(ithl), zbounds_in, keep_north(ithl), keep_south(ithl), keep_it(ithl))
2506       enddo
2507
2508       !-----------------------------------------------------------------------
2509       !  computes the integral in phi : phas_m(theta)
2510       !  for each parallele from Pole to Equator (use symmetry for S. Hemisphere)
2511       !-----------------------------------------------------------------------
2512       phas_n = 0_dpc
2513       phas_s = 0_dpc
2514
2515!$OMP parallel default(none) &
2516!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
2517!$OMP      lchk, uchk, nph, startpix, kphi0, w8ring_in, phas_n, phas_s, &
2518!$OMP      keep_north, keep_south, map) &
2519!$OMP  private(ithl, nphl, istart_north, istart_south, ith, ring, status)
2520
2521       if (do_openmp()) then
2522          allocate(ring(0:nphmx-1),stat = status)
2523          call assert_alloc(status,code,'ring')
2524       endif
2525!$OMP do schedule(dynamic,1)
2526       do ith = lchk, uchk
2527          ithl = ith - lchk !local index
2528          nphl = nph(ithl)
2529          istart_north = startpix(ithl)
2530          istart_south = npix-istart_north-nphl
2531          ! do Fourier Transform on rings
2532          if (keep_north(ithl)) then
2533!             ring(0:nphl-1) = map(istart_north:istart_north+nphl-1) * w8ring_in(ith,1)
2534             call sub_map2ring(map, istart_north, nphl, w8ring_in(ith,1), ring)
2535             call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_n(0,ithl), kphi0(ithl))
2536          endif
2537
2538          if (ith < nrings .and. keep_south(ithl)) then
2539!              ring(0:nphl-1) = map(istart_south:istart_south+nphl-1) * w8ring_in(ith,1)
2540             call sub_map2ring(map, istart_south, nphl, w8ring_in(ith, 1), ring)
2541             call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_s(0,ithl), kphi0(ithl))
2542          endif
2543       enddo ! loop on ring
2544!$OMP end do
2545       if (do_openmp()) then
2546          deallocate(ring)
2547       endif
2548!$OMP end parallel
2549       !-----------------------------------------------------------------------
2550       !              computes the a_lm by integrating over theta
2551       !                  lambda_lm(theta) * phas_m(theta)
2552       !                         for each m and l
2553       !-----------------------------------------------------------------------
2554
2555!$OMP parallel default(none) &
2556!$OMP shared(nlmax, nmmax, lchk, uchk, rescale_tab, ovflow, unflow, &
2557!$OMP    cth, sth, mfac, alm, phas_n, phas_s, keep_it, omega_pix) &
2558!$OMP private(recfac, dalm, phas_sd, status, m, ithl, l_min, &
2559!$OMP   scalem, scalel, corfac, par_lm, lam_mm, lam_lm, lam_0, lam_1, lam_2, &
2560!$OMP   cth_ring, l, php, phm)
2561
2562       if (do_openmp()) then
2563          allocate(recfac(0:1,0:nlmax),stat = status)
2564          call assert_alloc(status,code,'recfac')
2565          allocate(dalm(1:2,0:nlmax),stat = status)
2566          call assert_alloc(status,code,'dalm')
2567       endif
2568
2569!$OMP do schedule(dynamic,1)
2570       do m = 0, nmmax
2571          ! generate recursion factors (recfac) for Ylm of degree m
2572          call gen_recfac(nlmax, m, recfac)
2573
2574          ! introduce double precision vector to perform summation over ith for each l
2575          dalm(1:2, m:nlmax ) = 0.0_dp
2576
2577          do ithl = 0, uchk - lchk
2578             l_min = l_min_ylm(m, sth(ithl))
2579             if (keep_it(ithl) .and. nlmax >= l_min) then ! avoid un-necessary calculations (EH, 09-2001)
2580                ! determine lam_mm
2581                call compute_lam_mm(mfac(m), m, sth(ithl), lam_mm, corfac, scalem)
2582
2583                !           ---------- l = m ----------
2584                par_lm = 1
2585
2586                php = phas_n(m,ithl) + phas_s(m,ithl) ! sum  (if (l+m) even)
2587                phm = phas_n(m,ithl) - phas_s(m,ithl) ! diff (if (l+m) odd)
2588                phas_sd(-1:0) =  (/ real(phm, kind=dp), aimag(phm) /)
2589                phas_sd( 1:2) =  (/ real(php, kind=dp), aimag(php) /)
2590
2591                if (m >= l_min) then
2592                   lam_lm = corfac * lam_mm !Actual lam_mm
2593                   dalm(1:2, m) = dalm(1:2, m) + lam_lm * phas_sd(par_lm:par_lm+1)
2594                endif
2595
2596                !           ---------- l > m ----------
2597                lam_0 = 0.0_dp
2598                lam_1 = 1.0_dp
2599                scalel=0
2600                cth_ring = cth(ithl)
2601                lam_2 = cth_ring * lam_1 * recfac(0,m)
2602                do l = m+1, nlmax
2603                   par_lm = - par_lm  ! = (-1)^(l+m)
2604                   if (l >= l_min) then
2605                      lam_lm = lam_2 * corfac * lam_mm
2606                      dalm(1:2, l) = dalm(1:2, l) &
2607                           &       + lam_lm * phas_sd(par_lm:par_lm+1)
2608                   endif
2609
2610                   lam_0 = lam_1 * recfac(1,l-1)
2611                   lam_1 = lam_2
2612                   lam_2 = (cth_ring * lam_1 - lam_0) * recfac(0,l)
2613                   if (abs(lam_2) > OVFLOW) then
2614                      lam_1 = lam_1*UNFLOW
2615                      lam_2 = lam_2*UNFLOW
2616                      scalel= scalel + 1
2617                      corfac= rescale_tab(max(scalem+scalel,RSMIN))
2618                   elseif (abs(lam_2) < UNFLOW) then
2619                      lam_1 = lam_1*OVFLOW
2620                      lam_2 = lam_2*OVFLOW
2621                      scalel= scalel - 1
2622                      corfac= rescale_tab(max(scalem+scalel,RSMIN))
2623                   endif
2624
2625                enddo ! loop on l
2626             endif ! test on cut sky and nlmax
2627          enddo ! loop on ithl
2628          do l = m, nlmax
2629             alm(1, l, m) = alm(1, l, m) + cmplx(dalm(1, l), dalm(2, l), kind=DP) * omega_pix
2630          enddo
2631       enddo ! loop on m
2632!$OMP end do
2633       if (do_openmp()) then
2634          deallocate (recfac,dalm)
2635       endif
2636!$OMP end parallel
2637    enddo ! loop on chunks
2638
2639    !     --------------------
2640    !     free memory and exit
2641    !     --------------------
2642    deallocate(phas_n,phas_s)
2643    deallocate(mfac)
2644    if (.not.do_openmp()) then
2645       deallocate (ring,recfac,dalm)
2646    endif
2647    RETURN
2648#endif
2649  END subroutine map2alm_sc_KLOAD
2650  !=======================================================================
2651  subroutine map2alm_spin_KLOAD(nsmax, nlmax, nmmax, spin, map, alm, zbounds, w8ring)
2652    !=======================================================================
2653    !     computes the a(l,m) from a Temperature map for the HEALPIX pixelisation
2654    !        all from scratch
2655    !=======================================================================
2656    integer(I4B),   intent(IN)                    :: nsmax, nlmax, nmmax, spin
2657    real(KMAP),     intent(IN),  dimension(0:, 1:) :: map
2658    complex(KALMC), intent(OUT), dimension(1:,0:,0:) :: alm
2659    real(DP),       intent(IN),  dimension(1:2),         optional :: zbounds
2660    real(DP),       intent(IN),  dimension(1:2*nsmax,1:2), optional :: w8ring
2661
2662    real(DP), dimension(1:2)         :: zbounds_in
2663    real(DP), dimension(1:2*nsmax,1:2) :: w8ring_in
2664    integer(I4B) :: l, m, ith, scalem, scalel   ! alm related
2665    integer(I8B) :: istart_south, istart_north, npix  ! map related
2666    integer(I4B) :: nrings, nphmx
2667    real(DP)     :: omega_pix
2668
2669    real(DP)              :: corfac
2670    real(DP),     dimension(-1:2)             :: phas_sd1, phas_sd2
2671    real(DP),     dimension(:,:), allocatable :: dalm
2672    real(DP),     dimension(:,:), allocatable :: lam_lm_spin
2673    real(DP),     dimension(:),   allocatable :: mfac, mfac_spin
2674    real(DP),     dimension(:,:), allocatable :: recfac_spin
2675
2676    integer(I4B)                              :: l_min, l_start
2677    complex(DPC)                              :: php1, phm1, php2, phm2
2678    complex(DPC), dimension(:,:), allocatable :: phas_n1, phas_s1,phas_n2, phas_s2
2679    real(DP),     dimension(:),   allocatable :: ring
2680    integer(I4B)                   :: nchunks, chunksize, ichunk, lchk, uchk, ithl, nphl
2681    integer(I8B), dimension(0:SMAXCHK-1) :: startpix
2682    integer(I4B), dimension(0:SMAXCHK-1) :: nph, kphi0
2683    real(DP),     dimension(0:SMAXCHK-1) :: cth, sth
2684    logical(LGT), dimension(0:SMAXCHK-1) :: keep_north, keep_south, keep_it
2685
2686    character(LEN=*), PARAMETER :: code = 'MAP2ALM_SPIN'
2687    integer(I4B) :: status
2688    logical(LGT) :: even_spin
2689    integer(I4B) :: aspin
2690    !=======================================================================
2691
2692    zbounds_in = (/-1.d0 , 1.d0/)
2693    if (present(zbounds)) zbounds_in = zbounds
2694    w8ring_in  = 1.d0
2695    if (present(w8ring))  w8ring_in  = w8ring
2696
2697#ifdef USE_SHARP
2698    aspin = abs(spin)
2699    if ((aspin>0).and.(aspin<=100)) then
2700      call sharp_hp_map2alm_spin_x_KLOAD(nsmax,nlmax,nmmax,aspin, &
2701        map(0:12*nsmax*nsmax-1,1:2),alm(1:2,0:nlmax,0:nmmax),zbounds_in,w8ring_in)
2702      return
2703    endif
2704#endif
2705
2706    ! Healpix definitions
2707    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
2708    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
2709    nphmx  = 4*nsmax           ! maximum number of pixels/ring
2710    omega_pix = FOURPI / real(npix, kind=DP)  ! pixel area (identical for all pixels)
2711
2712    !     --- allocates space for arrays ---
2713    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
2714    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
2715
2716    allocate(mfac(0:nmmax),stat = status)
2717    call assert_alloc(status,code,'mfac')
2718
2719    allocate(mfac_spin(0:nmmax),stat = status)
2720    call assert_alloc(status,code,'mfac_spin')
2721
2722    allocate(phas_n1(0:nmmax,0:chunksize-1),stat = status)
2723    call assert_alloc(status,code,'phas_n1')
2724
2725    allocate(phas_s1(0:nmmax,0:chunksize-1),stat = status)
2726    call assert_alloc(status,code,'phas_s1')
2727
2728    allocate(phas_n2(0:nmmax,0:chunksize-1),stat = status)
2729    call assert_alloc(status,code,'phas_n2')
2730
2731    allocate(phas_s2(0:nmmax,0:chunksize-1),stat = status)
2732    call assert_alloc(status,code,'phas_s2')
2733
2734    even_spin = (iand(abs(spin), 1) == 0)
2735
2736    if (.not.do_openmp()) then
2737       allocate(ring(0:nphmx-1),stat = status)
2738       call assert_alloc(status,code,'ring')
2739       allocate(dalm(1:4,0:nlmax),stat = status)
2740       call assert_alloc(status,code,'dalm')
2741       allocate(recfac_spin(0:2,0:nlmax),stat = status)
2742       call assert_alloc(status,code,'recfac_spin')
2743       allocate(lam_lm_spin(1:2,0:nlmax), stat = status)
2744       call assert_alloc(status,code,'lam_lm_spin')
2745    endif
2746
2747    !     ------------ initiate variables and arrays ----------------
2748
2749    call gen_mfac(nmmax,mfac)
2750    call gen_mfac_spin(nmmax, spin, mfac_spin)
2751
2752    call init_rescale()
2753    alm = 0.0 ! set the whole alm array to zero
2754
2755    ! loop on chunks
2756    do ichunk = 0, nchunks-1
2757       lchk = ichunk * chunksize + 1
2758       uchk = min(lchk+chunksize - 1, nrings)
2759
2760       do ith = lchk, uchk
2761          ithl = ith - lchk !local index
2762          ! get pixel location information
2763          call get_pixel_layout(nsmax, ith, cth(ithl), sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
2764          ! find out which rings are to be analysed
2765          call select_rings(cth(ithl), zbounds_in, keep_north(ithl), keep_south(ithl), keep_it(ithl))
2766       enddo
2767
2768       !-----------------------------------------------------------------------
2769       !  computes the integral in phi : phas_m(theta)
2770       !  for each parallele from Pole to Equator (use symmetry for S. Hemisphere)
2771       !-----------------------------------------------------------------------
2772       phas_n1 = 0_dpc
2773       phas_s1 = 0_dpc
2774       phas_n2 = 0_dpc
2775       phas_s2 = 0_dpc
2776
2777!$OMP parallel default(none) &
2778!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
2779!$OMP      lchk, uchk, nph, startpix, kphi0, w8ring_in, phas_n1, phas_s1,  phas_n2, phas_s2, &
2780!$OMP      keep_north, keep_south, map) &
2781!$OMP  private(ithl, nphl, istart_north, istart_south, ith, ring, status)
2782
2783       if (do_openmp()) then
2784          allocate(ring(0:nphmx-1),stat = status)
2785          call assert_alloc(status,code,'ring')
2786       endif
2787!$OMP do schedule(dynamic,1)
2788       do ith = lchk, uchk
2789          ithl = ith - lchk !local index
2790          nphl = nph(ithl)
2791          istart_north = startpix(ithl)
2792          istart_south = npix-istart_north-nphl
2793          ! do Fourier Transform on rings
2794          if (keep_north(ithl)) then
2795             !ring(0:nphl-1) = map(istart_north:istart_north+nphl-1, 1) * w8ring_in(ith,1)
2796             call sub_map2ring(map, 1, istart_north, nphl, w8ring_in(ith, 1), ring)
2797             call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_n1(0,ithl), kphi0(ithl))
2798             !ring(0:nphl-1) = map(istart_north:istart_north+nphl-1, 2) * w8ring_in(ith,2)
2799             call sub_map2ring(map, 2, istart_north, nphl, w8ring_in(ith, 2), ring)
2800             call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_n2(0,ithl), kphi0(ithl))
2801          endif
2802
2803          if (ith < nrings .and. keep_south(ithl)) then
2804             !ring(0:nphl-1) = map(istart_south:istart_south+nphl-1, 1) * w8ring_in(ith,1)
2805             call sub_map2ring(map, 1, istart_south, nphl, w8ring_in(ith, 1), ring)
2806             call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_s1(0,ithl), kphi0(ithl))
2807             !ring(0:nphl-1) = map(istart_south:istart_south+nphl-1, 2) * w8ring_in(ith,2)
2808             call sub_map2ring(map, 2, istart_south, nphl, w8ring_in(ith, 2), ring)
2809             call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_s2(0,ithl), kphi0(ithl))
2810          endif
2811       enddo ! loop on ring
2812!$OMP end do
2813       if (do_openmp()) then
2814          deallocate(ring)
2815       endif
2816!$OMP end parallel
2817       !-----------------------------------------------------------------------
2818       !              computes the a_lm by integrating over theta
2819       !                  lambda_lm(theta) * phas_m(theta)
2820       !                         for each m and l
2821       !-----------------------------------------------------------------------
2822
2823!$OMP parallel default(none) &
2824!$OMP shared(nlmax, nmmax, spin, lchk, uchk, rescale_tab, even_spin, &
2825!$OMP    cth, sth, mfac, mfac_spin, alm, phas_n1, phas_s1, phas_n2, phas_s2, keep_it, omega_pix) &
2826!$OMP private(recfac_spin, dalm, phas_sd1, phas_sd2, status, m, ithl, l_min, &
2827!$OMP   lam_lm_spin, l, l_start, php1, phm1, php2, phm2)
2828
2829       if (do_openmp()) then
2830          allocate(dalm(1:4,0:nlmax),stat = status)
2831          call assert_alloc(status,code,'dalm')
2832          allocate(recfac_spin(0:2,0:nlmax),stat = status)
2833          call assert_alloc(status,code,'recfac_spin')
2834          allocate(lam_lm_spin(1:2,0:nlmax), stat = status)
2835          call assert_alloc(status,code,'lam_lm_spin')
2836       endif
2837
2838!$OMP do schedule(dynamic,1)
2839       do m = 0, nmmax
2840          ! generate recursion factors (recfac_spin) for Ylm of degree m
2841          call gen_recfac_spin(nlmax, m, spin, recfac_spin)
2842
2843          ! a[+/-s]  = (map1 +/- i map2) Y [+/-s]^*
2844          ! Y_even = (Y[s] + Y[-s] ) / 2  ; Y_odd  = (Y[s] - Y[-s] ) / 2
2845          ! hsum = (a[s]+a[-s])/2, hdiff = (a[s]-a[-s])/2
2846          ! hsum  = Y_even^*     * map1 + Y_odd^* * i * map2
2847          ! hdiff = Y_even^* * i * map2 + Y_odd^*     * map1
2848          ! introduce double precision vector to perform summation over ith for each l
2849          dalm(1:4, m:nlmax ) = 0.0_dp
2850
2851          do ithl = 0, uchk - lchk
2852             l_min = l_min_ylm(m, sth(ithl))
2853             if (keep_it(ithl) .and. nlmax >= l_min) then ! avoid un-necessary calculations (EH, 09-2001)
2854                ! compute lam_lm(theta) for all l>=m
2855                call do_lam_lm_spin(nlmax, m, spin, cth(ithl), sth(ithl), mfac(m), mfac_spin(m), recfac_spin, lam_lm_spin)
2856
2857                php1 = phas_n1(m,ithl) + phas_s1(m,ithl) ! sum  (if (l+m) even)
2858                phm1 = phas_n1(m,ithl) - phas_s1(m,ithl) ! diff (if (l+m) odd)
2859                php2 = phas_n2(m,ithl) + phas_s2(m,ithl) ! sum  (if (l+m) even)
2860                phm2 = phas_n2(m,ithl) - phas_s2(m,ithl) ! diff (if (l+m) odd)
2861
2862                phas_sd1(-1:2) =  (/ real(phm1, kind=dp), aimag(phm1), real(php1, kind=dp), aimag(php1) /)
2863                phas_sd2(-1:2) =  (/ real(phm2, kind=dp), aimag(phm2), real(php2, kind=dp), aimag(php2) /)
2864
2865                l_start = max(m, l_min) ! even values of (l+m)
2866                if (mod(m+l_start,2) == 1) l_start = l_start + 1
2867                do l = l_start, nlmax, 2
2868                   dalm(1, l) = dalm(1, l) + lam_lm_spin(1,l) * phas_sd1(1) - lam_lm_spin(2,l) * phas_sd2(0)
2869                   dalm(2, l) = dalm(2, l) + lam_lm_spin(1,l) * phas_sd1(2) + lam_lm_spin(2,l) * phas_sd2(-1)
2870                   dalm(3, l) = dalm(3, l) - lam_lm_spin(1,l) * phas_sd2(2) + lam_lm_spin(2,l) * phas_sd1(-1)
2871                   dalm(4, l) = dalm(4, l) + lam_lm_spin(1,l) * phas_sd2(1) + lam_lm_spin(2,l) * phas_sd1(0)
2872                enddo
2873                l_start = max(m+1, l_min) ! odd values of (l+m)
2874                if (mod(m+l_start,2) == 0) l_start = l_start + 1
2875                do l = l_start, nlmax, 2
2876                   dalm(1, l) = dalm(1, l) + lam_lm_spin(1,l) * phas_sd1(-1) - lam_lm_spin(2,l) * phas_sd2(2)
2877                   dalm(2, l) = dalm(2, l) + lam_lm_spin(1,l) * phas_sd1(0)  + lam_lm_spin(2,l) * phas_sd2(1)
2878                   dalm(3, l) = dalm(3, l) - lam_lm_spin(1,l) * phas_sd2(0)  + lam_lm_spin(2,l) * phas_sd1(1)
2879                   dalm(4, l) = dalm(4, l) + lam_lm_spin(1,l) * phas_sd2(-1) + lam_lm_spin(2,l) * phas_sd1(2)
2880                enddo
2881
2882             endif ! test on cut sky and nlmax
2883          enddo ! loop on ithl
2884          ! internal alms are    (a[s]+ a[-s])/2           and  (a[s]- a[-s])/2
2885          ! turn them into      -(a[s]+ (-1)^s a[-s])/2    and -(a[s]- (-1)^s a[-s])/(2i)
2886          if (even_spin) then
2887             do l = m, nlmax
2888                alm(1, l, m) = alm(1, l, m) - cmplx(dalm(1, l),  dalm(2, l), kind=DP) * omega_pix
2889                alm(2, l, m) = alm(2, l, m) - cmplx(dalm(4, l), -dalm(3, l), kind=DP) * omega_pix
2890             enddo
2891          else
2892             do l = m, nlmax
2893                alm(1, l, m) = alm(1, l, m) - cmplx(dalm(3, l),  dalm(4, l), kind=DP) * omega_pix
2894                alm(2, l, m) = alm(2, l, m) - cmplx(dalm(2, l), -dalm(1, l), kind=DP) * omega_pix
2895             enddo
2896          endif
2897       enddo ! loop on m
2898!$OMP end do
2899       if (do_openmp()) then
2900          deallocate (recfac_spin,dalm,lam_lm_spin)
2901       endif
2902!$OMP end parallel
2903    enddo ! loop on chunks
2904
2905    !     --------------------
2906    !     free memory and exit
2907    !     --------------------
2908    deallocate(phas_n1,phas_s1,phas_n2,phas_s2)
2909    deallocate(mfac,mfac_spin)
2910    if (.not.do_openmp()) then
2911       deallocate (ring,recfac_spin,dalm,lam_lm_spin)
2912    endif
2913    RETURN
2914  END subroutine map2alm_spin_KLOAD
2915
2916  !=======================================================================
2917  subroutine map2alm_sc_pre_KLOAD(nsmax, nlmax, nmmax, map, alm, zbounds, w8ring, plm)
2918    !=======================================================================
2919    !     computes the a(l,m) from a Temperature map for the HEALPIX pixelisation
2920    !       with precomputed Ylm(theta)
2921    !=======================================================================
2922    integer(I4B), intent(IN)                    :: nsmax, nlmax, nmmax
2923    real(KMAP),   intent(IN),  dimension(0:(12_i8b*nsmax)*nsmax-1) :: map
2924    complex(KALMC), intent(OUT), dimension(1:1,0:nlmax,0:nmmax) :: alm
2925    real(DP),     intent(IN),  dimension(1:2)                 :: zbounds
2926    real(DP),     intent(IN),  dimension(1:2*nsmax,1) :: w8ring
2927    real(DP),     intent(IN),  dimension(0:)          :: plm
2928
2929    integer(I8B) :: istart_south, istart_north, npix
2930    integer(I4B) :: nrings, nphmx
2931    real(DP)     :: omega_pix
2932
2933    integer(I8B)                              :: n_lm, n_plm, i_mm
2934    integer(I4B)                              :: l, m, ith
2935    real(DP),     dimension(-1:2)             :: phas_sd
2936    real(DP),     dimension(:,:), allocatable :: dalm
2937    integer(I4B)                              :: l_min, l_start
2938    complex(DPC)                              :: php, phm
2939    complex(DPC), dimension(:,:), allocatable :: phas_n, phas_s
2940    real(DP),     dimension(:),   allocatable :: ring
2941    real(DP)                                  :: cth
2942    integer(I4B)                   :: nchunks, chunksize, ichunk, lchk, uchk, ithl, nphl
2943    integer(I4B), dimension(0:SMAXCHK-1) :: nph, kphi0
2944    integer(I8B), dimension(0:SMAXCHK-1) :: startpix
2945    real(DP),     dimension(0:SMAXCHK-1) :: sth
2946    logical(LGT), dimension(0:SMAXCHK-1) :: keep_north, keep_south, keep_it
2947
2948    character(LEN=*), PARAMETER :: code = 'MAP2ALM'
2949    integer(I4B) :: status
2950    !=======================================================================
2951
2952    ! Healpix definitions
2953    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
2954    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
2955    nphmx  = 4*nsmax           ! maximum number of pixels/ring
2956    omega_pix = FOURPI / real(npix, DP)  ! pixel area (identical for all pixels)
2957    n_lm   = ((nmmax+1_I8B)*(2*nlmax-nmmax+2))/2 !number of (l,m) with m in[0,M] and l in [m,L]
2958    n_plm  = n_lm * nrings
2959
2960    !     --- allocates space for arrays ---
2961    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
2962    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
2963
2964    allocate(phas_n(0:nmmax,0:chunksize-1),stat = status)
2965    call assert_alloc(status,code,'phas_n')
2966
2967    allocate(phas_s(0:nmmax,0:chunksize-1),stat = status)
2968    call assert_alloc(status,code,'phas_s')
2969
2970    if (.not.do_openmp()) then
2971       allocate(ring(0:nphmx-1),stat = status)
2972       call assert_alloc(status,code,'ring')
2973       allocate(dalm(1:2,0:nlmax),stat = status)
2974       call assert_alloc(status,code,'dalm')
2975    endif
2976    !     ------------ initiate variables and arrays ----------------
2977    alm = 0.0 ! set the whole alm array to zero
2978
2979    ! loop on chunks
2980    do ichunk = 0, nchunks-1
2981       lchk = ichunk * chunksize + 1
2982       uchk = min(lchk+chunksize - 1, nrings)
2983
2984       do ith = lchk, uchk
2985          ithl = ith - lchk !local index
2986          ! get pixel location information
2987          call get_pixel_layout(nsmax, ith, cth, sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
2988          ! find out which rings are to be analysed
2989          call select_rings(cth, zbounds, keep_north(ithl), keep_south(ithl), keep_it(ithl))
2990       enddo
2991
2992       !-----------------------------------------------------------------------
2993       !  computes the integral in phi : phas_m(theta)
2994       !  for each parallele from Pole to Equator (use symmetry for S. Hemisphere)
2995       !-----------------------------------------------------------------------
2996       phas_n = 0_dpc
2997       phas_s = 0_dpc
2998
2999!$OMP parallel default(none) &
3000!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
3001!$OMP      lchk, uchk, nph, startpix, kphi0, w8ring, phas_n, phas_s, &
3002!$OMP      keep_north, keep_south, map) &
3003!$OMP  private(ithl, nphl, istart_north, istart_south, ith, ring, status)
3004
3005       if (do_openmp()) then
3006          allocate(ring(0:nphmx-1),stat = status)
3007          call assert_alloc(status,code,'ring')
3008       endif
3009!$OMP do schedule(dynamic,1)
3010       do ith = lchk, uchk
3011          ithl = ith - lchk !local index
3012          nphl = nph(ithl)
3013          istart_north = startpix(ithl)
3014          istart_south = npix-istart_north-nphl
3015          ! do Fourier Transform on rings
3016          if (keep_north(ithl)) then
3017             ring(0:nphl-1) = map(istart_north:istart_north+nphl-1) * w8ring(ith,1)
3018             call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_n(0,ithl), kphi0(ithl))
3019          endif
3020
3021          if (ith < nrings .and. keep_south(ithl)) then
3022             ring(0:nphl-1) = map(istart_south:istart_south+nphl-1) * w8ring(ith,1)
3023             call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_s(0,ithl), kphi0(ithl))
3024          endif
3025       enddo ! loop on ring
3026!$OMP end do
3027       if (do_openmp()) then
3028          deallocate(ring)
3029       endif
3030!$OMP end parallel
3031       !-----------------------------------------------------------------------
3032       !              computes the a_lm by integrating over theta
3033       !                  lambda_lm(theta) * phas_m(theta)
3034       !                         for each m and l
3035       !-----------------------------------------------------------------------
3036
3037!$OMP parallel default(none) &
3038!$OMP shared(nlmax, nmmax, lchk, uchk, ith, &
3039!$OMP    sth, alm, phas_n, phas_s, keep_it, plm, n_lm, omega_pix) &
3040!$OMP private(dalm, phas_sd, status, m, ithl, l_min, l_start, l, php, phm, i_mm)
3041
3042       if (do_openmp()) then
3043          allocate(dalm(1:2,0:nlmax),stat = status)
3044          call assert_alloc(status,code,'dalm')
3045       endif
3046
3047!$OMP do schedule(dynamic,1)
3048       do m = 0, nmmax
3049
3050          ! introduce double precision vector to perform summation over ith for each l
3051          dalm(1:2, m:nlmax ) = 0.0_dp
3052
3053          do ithl = 0, uchk - lchk
3054             l_min = l_min_ylm(m, sth(ithl))
3055             if (keep_it(ithl) .and. nlmax >= l_min) then ! avoid un-necessary calculations (EH, 09-2001)
3056                ith = ithl + lchk
3057                i_mm = n_lm * (ith-1) + ((2_I8B*nlmax+3-m)*m)/2 ! location of Ym,m for ring ith
3058
3059                php = phas_n(m,ithl) + phas_s(m,ithl) ! sum  (if (l+m) even)
3060                phm = phas_n(m,ithl) - phas_s(m,ithl) ! diff (if (l+m) odd)
3061                phas_sd(-1:0) =  (/ real(phm, kind=dp), aimag(phm) /)
3062                phas_sd( 1:2) =  (/ real(php, kind=dp), aimag(php) /)
3063
3064                !           ---------- l >= m ----------
3065                l_start = max(m, l_min) ! even values of (l+m)
3066                if (mod(m+l_start,2) == 1) l_start = l_start+1
3067                do l = l_start, nlmax, 2
3068                   dalm(1,l) = dalm(1,l) + plm(i_mm+l-m) * phas_sd(1)
3069                   dalm(2,l) = dalm(2,l) + plm(i_mm+l-m) * phas_sd(2)
3070                enddo ! loop on l
3071
3072                l_start = max(m+1, l_min) ! odd values of (l+m)
3073                if (mod(m+l_start,2) == 0) l_start = l_start+1
3074                do l = l_start, nlmax, 2
3075                   dalm(1,l) = dalm(1,l) + plm(i_mm+l-m) * phas_sd(-1)
3076                   dalm(2,l) = dalm(2,l) + plm(i_mm+l-m) * phas_sd(0)
3077                enddo ! loop on l
3078
3079
3080             endif ! test on cut sky and nlmax
3081          enddo ! loop on ithl
3082          do l = m, nlmax
3083             alm(1, l, m) = alm(1, l, m) + cmplx(dalm(1, l), dalm(2, l), kind=DP) * omega_pix
3084          enddo
3085       enddo ! loop on m
3086!$OMP end do
3087       if (do_openmp()) then
3088          deallocate (dalm)
3089       endif
3090!$OMP end parallel
3091    enddo ! loop on chunks
3092
3093    !     --------------------
3094    !     free memory and exit
3095    !     --------------------
3096    if (.not.do_openmp()) then
3097       deallocate(dalm, ring)
3098    endif
3099    deallocate(phas_n,phas_s)
3100    RETURN
3101  END subroutine map2alm_sc_pre_KLOAD
3102
3103
3104
3105  !=======================================================================
3106  subroutine map2alm_pol_KLOAD(nsmax, nlmax, nmmax, map_TQU,&
3107       &                     alm_TGC, zbounds, w8ring_TQU)
3108    !=======================================================================
3109    !     computes the a(l,m) from a polarized map for the HEALPIX pixelisation
3110    !       all from scratch
3111    !=======================================================================
3112    integer(I4B), intent(IN)                   :: nsmax, nlmax, nmmax
3113    real(KMAP),   intent(IN),  dimension(0:(12_i8b*nsmax)*nsmax-1,1:3) :: map_TQU
3114    complex(KALMC), intent(OUT), dimension(1:3,0:nlmax,0:nmmax) :: alm_TGC
3115    real(DP),     intent(IN),  dimension(1:2),          optional :: zbounds
3116    real(DP),     intent(IN), dimension(1:2*nsmax,1:3), optional :: w8ring_TQU
3117
3118    real(DP), dimension(1:2)         :: zbounds_in
3119    real(DP), dimension(1:2*nsmax,3) :: w8ring_in
3120#ifdef USE_SHARP
3121    zbounds_in = (/-1.d0 , 1.d0/)
3122    if (present(zbounds)) zbounds_in = zbounds
3123    w8ring_in  = 1.d0
3124    if (present(w8ring_TQU))  w8ring_in  = w8ring_TQU
3125
3126    call sharp_hp_map2alm_pol_x_KLOAD(nsmax,nlmax,nmmax,map_TQU,alm_TGC,zbounds_in,w8ring_in)
3127#else
3128
3129    integer(I4B) :: l, m, ith, scalel, scalem, nrings, nphmx
3130    integer(I8B) :: istart_south, istart_north, npix
3131    real(DP)     :: omega_pix
3132
3133    integer(I4B)                              :: par_lm
3134    real(DP)            :: lam_mm, lam_lm, lam_0, lam_1, lam_2, corfac, cth_ring
3135    real(DP)                 :: lambda_w, lambda_x, normal_m, lam_lm1m
3136    real(DP)                 :: fm2, fl, flm1
3137    real(DP)                 :: a_w, b_w, a_x, fm_on_s2, two_on_s2, two_cth_ring
3138    real(DP)                 :: ovflow, unflow
3139    real(DP),     dimension(-3:8)             :: phas_sd
3140    real(DP),     dimension(-3:6)             :: phas_sdx
3141    real(DP),     dimension(:,:), allocatable :: recfac, dalm
3142    real(DP),     dimension(:),   allocatable :: lam_fact, mfac
3143
3144    integer(i4b)                              :: l_min
3145    real(DP),     dimension(:),   allocatable :: normal_l
3146    complex(DPC)                              :: php, phm
3147    complex(DPC), dimension(:,:,:), allocatable :: phas_ns
3148    real(DP),     dimension(:),   allocatable :: ring
3149    integer(i4b)    :: nchunks, chunksize, ichunk, lchk, uchk, ithl, nphl, i, ii
3150    integer(I8B), dimension(0:SMAXCHK) :: startpix
3151    integer(I4B), dimension(0:SMAXCHK) :: nph, kphi0
3152    real(DP),     dimension(0:SMAXCHK) :: cth, sth
3153    real(DP),     dimension(0:SMAXCHK) :: one_on_s2, c_on_s2
3154    logical(LGT), dimension(0:SMAXCHK-1) :: keep_north, keep_south, keep_it
3155!    real(sp) :: time0, time1, time2, time3, time4, tt1, tt2
3156
3157    character(LEN=*), parameter :: code = 'MAP2ALM'
3158    integer(I4B) :: status
3159    !=======================================================================
3160
3161    zbounds_in = (/-1.d0 , 1.d0/)
3162    if (present(zbounds)) zbounds_in = zbounds
3163    w8ring_in  = 1.d0
3164    if (present(w8ring_TQU))  w8ring_in  = w8ring_TQU
3165
3166    ! Healpix definitions
3167    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
3168    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
3169    nphmx  = 4*nsmax           ! maximum number of pixels/ring
3170    omega_pix = FOURPI / real(npix, kind=DP)
3171
3172    !     --- allocates space for arrays ---
3173    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
3174    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
3175
3176    allocate(normal_l(0:nlmax),stat = status)
3177    call assert_alloc(status,code,'normal_l')
3178
3179    allocate(mfac(0:nmmax),stat = status)
3180    call assert_alloc(status,code,'mfac')
3181
3182    allocate(phas_ns(0:nlmax,0:5,0:chunksize-1),stat = status)
3183    call assert_alloc(status,code,'phas_ns')
3184
3185    if (.not.do_openmp()) then
3186       allocate(ring(0:nphmx-1),stat = status)
3187       call assert_alloc(status,code,'ring')
3188       allocate(recfac(0:1,0:nlmax),stat = status)
3189       call assert_alloc(status,code,'recfac')
3190       allocate(dalm(0:5,0:nlmax), stat = status)
3191       call assert_alloc(status,code,'dalm')
3192       allocate(lam_fact(0:nlmax),stat = status)
3193       call assert_alloc(status,code,'lam_fact')
3194    endif
3195    !     ------------ initiate variables and arrays ----------------
3196
3197    call gen_mfac(nmmax,mfac)
3198
3199    call init_rescale()
3200    ovflow = rescale_tab(1)
3201    unflow = rescale_tab(-1)
3202    alm_TGC = 0.0 ! set the whole alm array to zero
3203
3204    ! generate Polarization normalisation
3205    call gen_normpol(nlmax, normal_l)
3206
3207!    tt1 = 0
3208    ! loop on chunks
3209    do ichunk = 0, nchunks-1
3210       lchk = ichunk * chunksize + 1
3211       uchk = min(lchk+chunksize - 1, nrings)
3212
3213!       call wall_clock_time(time1)
3214       do ith = lchk, uchk
3215          ithl = ith - lchk !local index
3216          ! get pixel location information
3217          call get_pixel_layout(nsmax, ith, cth(ithl), sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
3218          one_on_s2(ithl) =    1.0_dp / sth(ithl)**2
3219            c_on_s2(ithl) = cth(ithl) / sth(ithl)**2
3220          ! find out which rings are to be analysed
3221          call select_rings(cth(ithl), zbounds_in, keep_north(ithl), keep_south(ithl), keep_it(ithl))
3222       enddo
3223
3224       !-----------------------------------------------------------------------
3225       !  computes the integral in phi : phas_m(theta)
3226       !  for each parallele from Pole to Equator (use symmetry for S. Hemisphere)
3227       !-----------------------------------------------------------------------
3228       phas_ns = 0_dpc
3229
3230!$OMP parallel default(none) &
3231!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
3232!$OMP      lchk, uchk,nph, startpix, kphi0, w8ring_in, phas_ns, &
3233!$OMP      keep_north, keep_south, map_TQU) &
3234!$OMP  private(ithl, nphl, istart_north, istart_south, &
3235!$OMP      ith, ring, i, ii, status)
3236       if (do_openmp()) then
3237          allocate(ring(0:nphmx-1),stat = status)
3238          call assert_alloc(status,code,'ring')
3239       endif
3240!$OMP do schedule(dynamic,1)
3241       ! do Fourier Transform on rings
3242       do ith = lchk, uchk
3243          ithl = ith - lchk !local index
3244          nphl = nph(ithl)
3245          istart_north = startpix(ithl)
3246          istart_south = npix-istart_north-nphl
3247          if (keep_north(ithl)) then
3248             do i=1,3
3249                ii = 2*(i-1) ! 0,2,4
3250!                ring(0:nphl-1) = map_TQU(istart_north:istart_north+nphl-1,i) * w8ring_in(ith,i)
3251                call sub_map2ring(map_TQU, i, istart_north, nphl, w8ring_in(ith,i), ring)
3252                call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_ns(0,ii,ithl), kphi0(ithl))
3253             enddo
3254          endif
3255
3256          if (ith < nrings .and. keep_south(ithl)) then
3257             do i=1,3
3258                ii = 2*i - 1 ! 1,3,5
3259!                ring(0:nphl-1) = map_TQU(istart_south:istart_south+nphl-1,i) * w8ring_in(ith,i)
3260                call sub_map2ring(map_TQU, i, istart_south, nphl, w8ring_in(ith,i), ring)
3261                call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_ns(0,ii,ithl), kphi0(ithl))
3262             enddo
3263          endif
3264       enddo
3265!$OMP end do
3266       if (do_openmp()) then
3267          deallocate (ring)
3268       endif
3269!$OMP end parallel
3270
3271!       call wall_clock_time(time2)
3272!       tt1 = tt1 + time2 - time1
3273
3274!$OMP parallel default(none) &
3275!$OMP shared(nlmax, nmmax, lchk, uchk, &
3276!$OMP    rescale_tab, ovflow, unflow, omega_pix, &
3277!$OMP    cth, sth, keep_it, mfac, normal_l, alm_TGC, phas_ns, one_on_s2, c_on_s2) &
3278!$OMP private(recfac, dalm, lam_fact, phas_sd, phas_sdx, phm, php, status, &
3279!$OMP   m, fm2, normal_m, ithl, l_min, &
3280!$OMP   scalem, scalel, corfac, par_lm, &
3281!$OMP   lam_mm, lam_lm, lam_lm1m, lambda_w, lambda_x, lam_0, lam_1, lam_2, &
3282!$OMP   cth_ring, fm_on_s2, two_on_s2, two_cth_ring, a_w, a_x, b_w,  &
3283!$OMP   l, fl, flm1, i)
3284
3285       if (do_openmp()) then
3286          allocate(recfac(0:1,0:nlmax),stat = status)
3287          call assert_alloc(status,code,'recfac')
3288          allocate(dalm(0:5,0:nlmax), stat = status)
3289          call assert_alloc(status,code,'dalm')
3290          allocate(lam_fact(0:nlmax),stat = status)
3291          call assert_alloc(status,code,'lam_fact')
3292       endif
3293!$OMP do schedule(dynamic,1)
3294
3295       do m = 0, nmmax
3296          ! generate recursion factors (recfac) for Ylm of degree m
3297          call gen_recfac(nlmax, m, recfac)
3298          ! generate Ylm relation factor for degree m
3299          call gen_lamfac(nlmax, m, lam_fact)
3300          fm2 = real(m * m, kind = DP)
3301          normal_m = (2.0_dp * m) * (1 - m)
3302
3303          ! introduce double precision vector to perform summation over ith for each l
3304          dalm(0:5, m:nlmax) = 0.0_dp
3305
3306          do ithl = 0, uchk - lchk
3307             l_min = l_min_ylm(m, sth(ithl)) ! lower l bound of non-negligible Ylm
3308             if (keep_it(ithl) .and. nlmax >= l_min) then ! avoid un-necessary calculations (EH, 09-2001)
3309                ! determine lam_mm
3310                call compute_lam_mm(mfac(m), m, sth(ithl), lam_mm, corfac, scalem)
3311
3312                do i=0,2 ! loop on T, Q, U
3313                   phm = phas_ns(m, 2*i, ithl) - phas_ns(m, 2*i+1, ithl) ! N-S: (l+m) odd
3314                   phas_sd(-3+2*i)   =  real(phm, kind=dp)
3315                   phas_sd(-3+2*i+1) = aimag(phm)
3316                   php = phas_ns(m, 2*i, ithl) + phas_ns(m, 2*i+1, ithl) ! N+S: (l+m) even
3317                   phas_sd( 3+2*i)   =  real(php, kind=dp)
3318                   phas_sd( 3+2*i+1) = aimag(php)
3319                enddo
3320                phas_sdx(-3: 0) = (/ phas_sd(8), - phas_sd(7), - phas_sd(6),   phas_sd(5) /)
3321                phas_sdx( 3: 6) = (/ phas_sd(2), - phas_sd(1), - phas_sd(0),   phas_sd(-1) /)
3322
3323                !           ---------- l = m ----------
3324                par_lm = 3  ! = 3 * (-1)^(l+m)
3325                lam_lm = corfac * lam_mm            ! Actual lam_mm
3326
3327                if (m >= l_min) then
3328                   lambda_w =  (normal_m * lam_lm)  * ( 0.5_dp - one_on_s2(ithl) )
3329                   lambda_x =  (normal_m * lam_lm)  *            c_on_s2(ithl)
3330
3331                   ! alm_T = \int T Ylm
3332                   ! alm_G = \int ( - Q Wlm - i U Xlm )
3333                   ! alm_C = \int ( - U Wlm + i Q Xlm )
3334                   dalm(0:1, m) = dalm(0:1, m) + lam_lm * phas_sd(par_lm:par_lm+1)
3335                   dalm(2:5, m) = dalm(2:5, m)  &
3336                        &       - lambda_w * phas_sd(par_lm+2:par_lm+5) &
3337                        &       + lambda_x * phas_sdx(par_lm:par_lm+3)
3338                endif
3339
3340                !           ---------- l > m ----------
3341                lam_0 = 0.0_dp
3342                lam_1 = 1.0_dp
3343                scalel=0
3344                cth_ring = cth(ithl)
3345                lam_2 = cth_ring * lam_1 * recfac(0,m)
3346                fm_on_s2     =      m * one_on_s2(ithl)
3347                two_on_s2    = 2.0_dp * one_on_s2(ithl)
3348                two_cth_ring = 2.0_dp * cth_ring
3349                b_w          =  c_on_s2(ithl)
3350                do l = m+1, nlmax
3351                   par_lm   = - par_lm  ! = 3 * (-1)^(l+m)
3352                   lam_lm1m = lam_lm * lam_fact(l) ! must be incremented, even if not used
3353                   lam_lm   = lam_2 * corfac * lam_mm
3354                   if (l >= l_min) then
3355
3356                      fl = real(l, kind = DP)
3357                      flm1 = fl - 1.0_dp
3358                      a_w =  two_on_s2 * (fl - fm2)  + flm1 * fl
3359                      a_x =  two_cth_ring * flm1
3360                      lambda_w =                b_w * lam_lm1m - a_w * lam_lm
3361                      lambda_x = fm_on_s2 * (         lam_lm1m - a_x * lam_lm)
3362
3363                      dalm(0:1, l) = dalm(0:1, l) + lam_lm * phas_sd(par_lm:par_lm+1)
3364                      dalm(2:5, l) = dalm(2:5, l)  &
3365                           &       - lambda_w * phas_sd(par_lm+2:par_lm+5) &
3366                           &       + lambda_x * phas_sdx(par_lm:par_lm+3)
3367                   endif
3368
3369                   lam_0 = lam_1 * recfac(1,l-1)
3370                   lam_1 = lam_2
3371                   lam_2 = (cth_ring * lam_1 - lam_0) * recfac(0,l)
3372                   if (abs(lam_2) > OVFLOW) then
3373                      lam_1 = lam_1*UNFLOW
3374                      lam_2 = lam_2*UNFLOW
3375                      scalel= scalel + 1
3376                      corfac = rescale_tab(max(scalel+scalem,RSMIN))
3377                   elseif (abs(lam_2) < UNFLOW) then
3378                      lam_1 = lam_1*OVFLOW
3379                      lam_2 = lam_2*OVFLOW
3380                      scalel= scalel - 1
3381                      corfac = rescale_tab(max(scalel+scalem,RSMIN))
3382                   endif
3383
3384                enddo ! loop on l
3385             endif ! test on cut sky
3386          enddo ! loop on ithl
3387          do l = m, nlmax
3388             alm_TGC(1, l, m) = alm_TGC(1, l, m) + cmplx(dalm(0, l), dalm(1, l), kind=DP) * omega_pix
3389             alm_TGC(2, l, m) = alm_TGC(2, l, m) + cmplx(dalm(2, l), dalm(3, l), kind=DP) * normal_l(l) * omega_pix
3390             alm_TGC(3, l, m) = alm_TGC(3, l, m) + cmplx(dalm(4, l), dalm(5, l), kind=DP) * normal_l(l) * omega_pix
3391          enddo
3392       enddo ! loop on m
3393!$OMP end do
3394       if (do_openmp()) then
3395          deallocate (recfac,dalm,lam_fact)
3396       endif
3397!$OMP end parallel
3398    enddo    ! loop on chunks
3399
3400!    print*,'FFT',tt1
3401    !     --------------------
3402    !     free memory and exit
3403    !     --------------------
3404    if (.not.do_openmp()) then
3405       deallocate (recfac,dalm,lam_fact,ring)
3406    endif
3407    deallocate(mfac, normal_l)
3408    deallocate(phas_ns)
3409    return
3410#endif
3411  end subroutine map2alm_pol_KLOAD
3412
3413  !=======================================================================
3414  subroutine map2alm_pol_pre1_KLOAD(nsmax, nlmax, nmmax, map_TQU,&
3415       &                     alm_TGC, zbounds, w8ring_TQU, plm)
3416    !=======================================================================
3417    !     computes the a(l,m) from a polarized map for the HEALPIX pixelisation
3418    !       with precomputed Ylm_T(theta)
3419    !=======================================================================
3420    integer(I4B), intent(IN)                   :: nsmax, nlmax, nmmax
3421    real(KMAP),   intent(IN),  dimension(0:(12_i8b*nsmax)*nsmax-1,1:3) :: map_TQU
3422    complex(KALMC), intent(OUT), dimension(1:3,0:nlmax,0:nmmax) :: alm_TGC
3423    real(DP),     intent(IN),  dimension(1:2)                 :: zbounds
3424    real(DP),     intent(IN), dimension(1:2*nsmax,1:3) :: w8ring_TQU
3425    real(DP),     intent(IN), dimension(0:)            :: plm
3426
3427    integer(I4B) :: l, m, ith
3428    integer(I8B) :: istart_south, istart_north, npix, n_lm, n_plm, i_mm
3429    integer(I4B) :: nrings, nphmx
3430    real(DP)     :: omega_pix
3431
3432    integer(I4B)                              :: par_lm
3433    real(DP)            :: lam_lm, cth_ring
3434    real(DP)                 :: lambda_w, lambda_x, normal_m, lam_lm1m
3435    real(DP)                 :: fm2, fl, flm1
3436    real(DP)                 :: a_w, b_w, a_x, fm_on_s2, two_on_s2, two_cth_ring
3437    real(DP),     dimension(-3:8)             :: phas_sd
3438    real(DP),     dimension(-3:6)             :: phas_sdx
3439    real(DP),     dimension(:,:), allocatable :: dalm
3440    real(DP),     dimension(:),   allocatable :: lam_fact
3441
3442    integer(i4b)                              :: l_min
3443    real(DP),     dimension(:),   allocatable :: normal_l
3444    complex(DPC)                              :: php, phm
3445    complex(DPC), dimension(:,:,:), allocatable :: phas_ns
3446    real(DP),     dimension(:),   allocatable :: ring
3447    integer(i4b)        :: nchunks, chunksize, ichunk, lchk, uchk, ithl, i, ii
3448    integer(I8B), dimension(0:SMAXCHK-1) :: startpix
3449    integer(I4B), dimension(0:SMAXCHK-1) :: nph, kphi0
3450    real(DP),     dimension(0:SMAXCHK-1) :: cth, sth
3451    real(DP),     dimension(0:SMAXCHK-1) :: one_on_s2, c_on_s2
3452    logical(LGT), dimension(0:SMAXCHK-1) :: keep_north, keep_south, keep_it
3453!    real(sp) :: time0, time1, time2, time3, time4, tt1, tt2
3454    integer(I4B)                       :: nphl
3455
3456    character(LEN=*), parameter :: code = 'MAP2ALM'
3457    integer(I4B) :: status
3458    !=======================================================================
3459
3460    ! Healpix definitions
3461    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
3462    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
3463    nphmx  = 4*nsmax           ! maximum number of pixels/ring
3464    omega_pix = FOURPI / real(npix, kind=DP)
3465    n_lm   = ((nmmax+1_I8B)*(2*nlmax-nmmax+2))/2 !number of (l,m) with m in[0,M] and l in [m,L]
3466    n_plm  = n_lm * nrings
3467
3468    !     --- allocates space for arrays ---
3469    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
3470    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
3471
3472    allocate(normal_l(0:nlmax),stat = status)
3473    call assert_alloc(status,code,'normal_l')
3474
3475    allocate(phas_ns(0:nlmax,0:5,0:chunksize-1),stat = status)
3476    call assert_alloc(status,code,'phas_ns')
3477
3478    if (.not.do_openmp()) then
3479       allocate(ring(0:nphmx-1),stat = status)
3480       call assert_alloc(status,code,'ring')
3481       allocate(dalm(0:5,0:nlmax), stat = status)
3482       call assert_alloc(status,code,'dalm')
3483       allocate(lam_fact(0:nlmax),stat = status)
3484       call assert_alloc(status,code,'lam_fact')
3485    endif
3486    !     ------------ initiate variables and arrays ----------------
3487
3488    alm_TGC = 0.0 ! set the whole alm array to zero
3489
3490    ! generate Polarization normalisation
3491    call gen_normpol(nlmax, normal_l)
3492
3493    ! loop on chunks
3494    do ichunk = 0, nchunks-1
3495       lchk = ichunk * chunksize + 1
3496       uchk = min(lchk+chunksize - 1, nrings)
3497
3498!       call wall_clock_time(time1)
3499       do ith = lchk, uchk
3500          ithl = ith - lchk !local index
3501          ! get pixel location information
3502          call get_pixel_layout(nsmax, ith, cth(ithl), sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
3503          one_on_s2(ithl) =    1.0_dp / sth(ithl)**2
3504            c_on_s2(ithl) = cth(ithl) / sth(ithl)**2
3505          ! find out which rings are to be analysed
3506          call select_rings(cth(ithl), zbounds, keep_north(ithl), keep_south(ithl), keep_it(ithl))
3507       enddo
3508
3509       !-----------------------------------------------------------------------
3510       !  computes the integral in phi : phas_m(theta)
3511       !  for each parallele from Pole to Equator (use symmetry for S. Hemisphere)
3512       !-----------------------------------------------------------------------
3513       phas_ns = 0_dpc
3514
3515!$OMP parallel default(none) &
3516!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
3517!$OMP      lchk, uchk,nph, startpix, kphi0, w8ring_TQU, phas_ns, &
3518!$OMP      keep_north, keep_south, map_TQU) &
3519!$OMP  private(ithl, nphl, istart_north, istart_south, &
3520!$OMP      ith, ring, i, ii, status)
3521       if (do_openmp()) then
3522          allocate(ring(0:nphmx-1),stat = status)
3523          call assert_alloc(status,code,'ring')
3524       endif
3525!$OMP do schedule(dynamic,1)
3526       ! do Fourier Transform on rings
3527       do ith = lchk, uchk
3528          ithl = ith - lchk !local index
3529          nphl = nph(ithl)
3530          istart_north = startpix(ithl)
3531          istart_south = npix-istart_north-nphl
3532          if (keep_north(ithl)) then
3533             do i=1,3
3534                ii = 2*(i-1) ! 0,2,4
3535                ring(0:nphl-1) = map_TQU(istart_north:istart_north+nphl-1,i) * w8ring_TQU(ith,i)
3536                call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_ns(0,ii,ithl), kphi0(ithl))
3537             enddo
3538          endif
3539
3540          if (ith < nrings .and. keep_south(ithl)) then
3541             do i=1,3
3542                ii = 2*i - 1 ! 1,3,5
3543                ring(0:nphl-1) = map_TQU(istart_south:istart_south+nphl-1,i) * w8ring_TQU(ith,i)
3544                call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_ns(0,ii,ithl), kphi0(ithl))
3545             enddo
3546          endif
3547       enddo
3548!$OMP end do
3549       if (do_openmp()) then
3550          deallocate (ring)
3551       endif
3552!$OMP end parallel
3553
3554!$OMP parallel default(none) &
3555!$OMP shared(nlmax, nmmax, lchk, uchk, omega_pix, n_lm, plm, ith, &
3556!$OMP    cth, sth, keep_it, normal_l, alm_TGC, phas_ns, one_on_s2, c_on_s2) &
3557!$OMP private(dalm, lam_fact, phas_sd, phas_sdx, phm, php, status, &
3558!$OMP   m, fm2, normal_m, ithl, l_min, par_lm, i_mm, &
3559!$OMP   lam_lm, lam_lm1m, lambda_w, lambda_x, &
3560!$OMP   cth_ring, fm_on_s2, two_on_s2, two_cth_ring, a_w, a_x, b_w,  &
3561!$OMP   l, fl, flm1, i)
3562
3563       if (do_openmp()) then
3564          allocate(dalm(0:5,0:nlmax), stat = status)
3565          call assert_alloc(status,code,'dalm')
3566          allocate(lam_fact(0:nlmax),stat = status)
3567          call assert_alloc(status,code,'lam_fact')
3568       endif
3569!$OMP do schedule(dynamic,1)
3570
3571       do m = 0, nmmax
3572          ! generate Ylm relation factor for degree m
3573          call gen_lamfac(nlmax, m, lam_fact)
3574          fm2 = real(m * m, kind = DP)
3575          normal_m = (2.0_dp * m) * (1 - m)
3576
3577          ! introduce double precision vector to perform summation over ith for each l
3578          dalm(0:5, m:nlmax) = 0.0_dp
3579
3580          do ithl = 0, uchk - lchk
3581             l_min = l_min_ylm(m, sth(ithl)) ! lower l bound of non-negligible Ylm
3582             if (keep_it(ithl) .and. nlmax >= l_min) then ! avoid un-necessary calculations (EH, 09-2001)
3583                ith = ithl + lchk
3584                i_mm = n_lm * (ith-1) + ((2_I8B*nlmax + 3 - m)*m)/2 ! location of Ym,m for ring ith
3585
3586                do i=0,2 ! loop on T, Q, U
3587                   phm = phas_ns(m, 2*i, ithl) - phas_ns(m, 2*i+1, ithl) ! N-S: (l+m) odd
3588                   phas_sd(-3+2*i)   =  real(phm, kind=dp)
3589                   phas_sd(-3+2*i+1) = aimag(phm)
3590                   php = phas_ns(m, 2*i, ithl) + phas_ns(m, 2*i+1, ithl) ! N+S: (l+m) even
3591                   phas_sd( 3+2*i)   =  real(php, kind=dp)
3592                   phas_sd( 3+2*i+1) = aimag(php)
3593                enddo
3594                phas_sdx(-3: 0) = (/ phas_sd(8), - phas_sd(7), - phas_sd(6),   phas_sd(5) /)
3595                phas_sdx( 3: 6) = (/ phas_sd(2), - phas_sd(1), - phas_sd(0),   phas_sd(-1) /)
3596
3597                !           ---------- l = m ----------
3598                par_lm = 3  ! = 3 * (-1)^(l+m)
3599                lam_lm = plm(i_mm)
3600
3601                if (m >= l_min) then
3602                   lambda_w =  (normal_m * lam_lm)  * ( 0.5_dp - one_on_s2(ithl) )
3603                   lambda_x =  (normal_m * lam_lm)  *            c_on_s2(ithl)
3604
3605                   ! alm_T = \int T Ylm
3606                   ! alm_G = \int ( - Q Wlm - i U Xlm )
3607                   ! alm_C = \int ( - U Wlm + i Q Xlm )
3608                   dalm(0:1, m) = dalm(0:1, m) + lam_lm * phas_sd(par_lm:par_lm+1)
3609                   dalm(2:5, m) = dalm(2:5, m)  &
3610                        &       - lambda_w * phas_sd(par_lm+2:par_lm+5) &
3611                        &       + lambda_x * phas_sdx(par_lm:par_lm+3)
3612                endif
3613
3614                !           ---------- l > m ----------
3615                cth_ring = cth(ithl)
3616                fm_on_s2     =      m * one_on_s2(ithl)
3617                two_on_s2    = 2.0_dp * one_on_s2(ithl)
3618                two_cth_ring = 2.0_dp * cth_ring
3619                b_w          =  c_on_s2(ithl)
3620                do l = m+1, nlmax
3621                   par_lm = - par_lm  ! = 3 * (-1)^(l+m)
3622                   if (l >= l_min) then
3623                      lam_lm1m = plm(i_mm+l-m-1) * lam_fact(l)
3624                      lam_lm   = plm(i_mm+l-m)
3625
3626                      fl = real(l, kind = DP)
3627                      flm1 = fl - 1.0_dp
3628                      a_w =  two_on_s2 * (fl - fm2)  + flm1 * fl
3629                      a_x =  two_cth_ring * flm1
3630                      lambda_w =                b_w * lam_lm1m - a_w * lam_lm
3631                      lambda_x = fm_on_s2 * (         lam_lm1m - a_x * lam_lm)
3632
3633                      dalm(0:1, l) = dalm(0:1, l) + lam_lm * phas_sd(par_lm:par_lm+1)
3634                      dalm(2:5, l) = dalm(2:5, l)  &
3635                           &       - lambda_w * phas_sd(par_lm+2:par_lm+5) &
3636                           &       + lambda_x * phas_sdx(par_lm:par_lm+3)
3637                   endif
3638
3639                enddo ! loop on l
3640             endif ! test on cut sky
3641          enddo ! loop on ithl
3642          do l = m, nlmax
3643             alm_TGC(1, l, m) = alm_TGC(1, l, m) + cmplx(dalm(0, l), dalm(1, l), kind=DP) * omega_pix
3644             alm_TGC(2, l, m) = alm_TGC(2, l, m) + cmplx(dalm(2, l), dalm(3, l), kind=DP) * normal_l(l) * omega_pix
3645             alm_TGC(3, l, m) = alm_TGC(3, l, m) + cmplx(dalm(4, l), dalm(5, l), kind=DP) * normal_l(l) * omega_pix
3646          enddo
3647       enddo ! loop on m
3648!$OMP end do
3649       if (do_openmp()) then
3650          deallocate (dalm,lam_fact)
3651       endif
3652!$OMP end parallel
3653    enddo    ! loop on chunks
3654
3655!    print*,'FFT',tt1
3656    !     --------------------
3657    !     free memory and exit
3658    !     --------------------
3659    if (.not.do_openmp()) then
3660       deallocate (dalm,lam_fact,ring)
3661    endif
3662    deallocate(normal_l, phas_ns)
3663    return
3664  end subroutine map2alm_pol_pre1_KLOAD
3665
3666  !=======================================================================
3667  subroutine map2alm_pol_pre2_KLOAD(nsmax, nlmax, nmmax, map_TQU,&
3668       &                     alm_TGC, zbounds, w8ring_TQU, plm)
3669    !=======================================================================
3670    !     computes the a(l,m) from a polarized map for the HEALPIX pixelisation
3671    !       with precomputed Ylm_T(theta) an dYlm_P(theta)
3672    !=======================================================================
3673    integer(I4B), intent(IN)                   :: nsmax, nlmax, nmmax
3674    real(KMAP),   intent(IN),  dimension(0:(12_i8b*nsmax)*nsmax-1,1:3) :: map_TQU
3675    complex(KALMC), intent(OUT), dimension(1:3,0:nlmax,0:nmmax) :: alm_TGC
3676    real(DP),     intent(IN),  dimension(1:2)                 :: zbounds
3677    real(DP),     intent(IN), dimension(1:2*nsmax,1:3) :: w8ring_TQU
3678    real(DP),     intent(IN), dimension(0:,1:)         :: plm
3679
3680    integer(I4B) :: l, m, ith
3681    integer(I8B) :: istart_south, istart_north, npix, n_lm, n_plm, i_mm, i_up
3682    integer(I4B) :: nrings, nphmx
3683    real(DP)     :: omega_pix
3684
3685    real(DP),     dimension(-3:8)             :: phas_sd
3686    real(DP),     dimension(-3:6)             :: phas_sdx
3687    real(DP),     dimension(:,:), allocatable :: dalm, plm_sub
3688
3689    integer(i4b)                              :: l_min, l_start
3690    complex(DPC)                              :: php, phm
3691    complex(DPC), dimension(:,:,:), allocatable :: phas_ns
3692    real(DP),     dimension(:),   allocatable :: ring
3693    integer(i4b)    :: nchunks, chunksize, ichunk, lchk, uchk, ithl, nphl, i, ii
3694    integer(I8B), dimension(0:SMAXCHK-1) :: startpix
3695    integer(I4B), dimension(0:SMAXCHK-1) :: nph, kphi0
3696    real(DP)                           :: cth
3697    real(DP),     dimension(0:SMAXCHK-1) :: sth
3698    logical(LGT), dimension(0:SMAXCHK-1) :: keep_north, keep_south, keep_it
3699
3700    character(LEN=*), parameter :: code = 'MAP2ALM'
3701    integer(I4B) :: status
3702    !=======================================================================
3703
3704    ! Healpix definitions
3705    nrings = 2*nsmax           ! number of isolatitude rings on N. hemisphere + equat
3706    npix   = (12_I8B*nsmax)*nsmax    ! total number of pixels on the sphere
3707    nphmx  = 4*nsmax           ! maximum number of pixels/ring
3708    omega_pix = FOURPI / real(npix, DP)
3709    n_lm   = ((nmmax+1_I8B)*(2*nlmax-nmmax+2))/2 !number of (l,m) with m in[0,M] and l in [m,L]
3710    n_plm  = n_lm * nrings
3711
3712    !     --- allocates space for arrays ---
3713    nchunks   = nrings/SMAXCHK + 1  ! number of chunks
3714    chunksize = (nrings+nchunks-1)/nchunks ! <= SMAXCHK
3715
3716    allocate(phas_ns(0:nlmax,0:5,0:chunksize-1),stat = status)
3717    call assert_alloc(status,code,'phas_ns')
3718
3719    if (.not.do_openmp()) then
3720          allocate(dalm(0:5,0:nlmax), plm_sub(1:3,0:nlmax), stat = status)
3721          call assert_alloc(status,code,'dalm & plm_sub')
3722          allocate(ring(0:nphmx-1),stat = status)
3723          call assert_alloc(status,code,'ring')
3724    endif
3725    !     ------------ initiate variables and arrays ----------------
3726
3727    alm_TGC = 0.0 ! set the whole alm array to zero
3728
3729    ! loop on chunks
3730    do ichunk = 0, nchunks-1
3731       lchk = ichunk * chunksize + 1
3732       uchk = min(lchk+chunksize - 1, nrings)
3733
3734       do ith = lchk, uchk
3735          ithl = ith - lchk !local index
3736          ! get pixel location information
3737          call get_pixel_layout(nsmax, ith, cth, sth(ithl), nph(ithl), startpix(ithl), kphi0(ithl))
3738          ! find out which rings are to be analysed
3739          call select_rings(cth, zbounds, keep_north(ithl), keep_south(ithl), keep_it(ithl))
3740       enddo
3741
3742       !-----------------------------------------------------------------------
3743       !  computes the integral in phi : phas_m(theta)
3744       !  for each parallele from Pole to Equator (use symmetry for S. Hemisphere)
3745       !-----------------------------------------------------------------------
3746       phas_ns = 0_dpc
3747
3748!$OMP parallel default(none) &
3749!$OMP  shared(nsmax, nlmax, nmmax, npix, nrings, nphmx, &
3750!$OMP      lchk, uchk,nph, startpix, kphi0, w8ring_TQU, phas_ns, &
3751!$OMP      keep_north, keep_south, map_TQU) &
3752!$OMP  private(ithl, nphl, istart_north, istart_south, &
3753!$OMP      ith, ring, i, ii, status)
3754       if (do_openmp()) then
3755          allocate(ring(0:nphmx-1),stat = status)
3756          call assert_alloc(status,code,'ring')
3757       endif
3758!$OMP do schedule(dynamic,1)
3759       ! do Fourier Transform on rings
3760       do ith = lchk, uchk
3761          ithl = ith - lchk !local index
3762          nphl = nph(ithl)
3763          istart_north = startpix(ithl)
3764          istart_south = npix-istart_north-nphl
3765          if (keep_north(ithl)) then
3766             do i=1,3
3767                ii = 2*(i-1) ! 0,2,4
3768                ring(0:nphl-1) = map_TQU(istart_north:istart_north+nphl-1,i) * w8ring_TQU(ith,i)
3769                call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_ns(0,ii,ithl), kphi0(ithl))
3770             enddo
3771          endif
3772
3773          if (ith < nrings .and. keep_south(ithl)) then
3774             do i=1,3
3775                ii = 2*i - 1 ! 1,3,5
3776                ring(0:nphl-1) = map_TQU(istart_south:istart_south+nphl-1,i) * w8ring_TQU(ith,i)
3777                call ring_analysis(nsmax, nlmax, nmmax, ring, nphl, phas_ns(0,ii,ithl), kphi0(ithl))
3778             enddo
3779          endif
3780       enddo
3781!$OMP end do
3782       if (do_openmp()) then
3783          deallocate (ring)
3784       endif
3785!$OMP end parallel
3786
3787!$OMP parallel default(none) &
3788!$OMP shared(nlmax, nmmax, lchk, uchk, omega_pix, n_lm, ith, &
3789!$OMP    sth, keep_it, alm_TGC, phas_ns, plm) &
3790!$OMP private(dalm, phas_sd, phas_sdx, plm_sub, phm, php, status, &
3791!$OMP   m, ithl, l_min, l_start, l, i_mm, i_up, i)
3792       if (do_openmp()) then
3793          allocate(dalm(0:5,0:nlmax), plm_sub(1:3,0:nlmax), stat = status)
3794          call assert_alloc(status,code,'dalm & plm_sub')
3795       endif
3796!$OMP do schedule(dynamic,1)
3797
3798       do m = 0, nmmax
3799
3800          ! introduce double precision vector to perform summation over ith for each l
3801          dalm(0:5, m:nlmax) = 0.0_dp
3802
3803          do ithl = 0, uchk - lchk
3804             l_min = l_min_ylm(m, sth(ithl)) ! lower l bound of non-negligible Ylm
3805             if (keep_it(ithl) .and. nlmax >= l_min) then ! avoid un-necessary calculations (EH, 09-2001)
3806                ith = ithl + lchk
3807                i_mm = n_lm * (ith-1) + ((2_I8B*nlmax + 3 - m)*m)/2 ! location of Ym,m for ring ith
3808                i_up = i_mm + nlmax - m
3809                plm_sub(1,m:nlmax) = plm(i_mm:i_up,1)
3810                plm_sub(2,m:nlmax) = plm(i_mm:i_up,2)
3811                plm_sub(3,m:nlmax) = plm(i_mm:i_up,3)
3812
3813                do i=0,2 ! loop on T, Q, U
3814                   phm = phas_ns(m, 2*i, ithl) - phas_ns(m, 2*i+1, ithl) ! N-S: (l+m) odd
3815                   phas_sd(-3+2*i)   =  real(phm, kind=dp)
3816                   phas_sd(-3+2*i+1) = aimag(phm)
3817                   php = phas_ns(m, 2*i, ithl) + phas_ns(m, 2*i+1, ithl) ! N+S: (l+m) even
3818                   phas_sd( 3+2*i)   =  real(php, kind=dp)
3819                   phas_sd( 3+2*i+1) = aimag(php)
3820                enddo
3821                phas_sdx(-3: 0) = (/ phas_sd(8), - phas_sd(7), - phas_sd(6),   phas_sd(5) /)
3822                phas_sdx( 3: 6) = (/ phas_sd(2), - phas_sd(1), - phas_sd(0),   phas_sd(-1) /)
3823
3824                !           ---------- l = m ----------
3825                if (m >= l_min) then
3826
3827                   ! alm_T = \int T Ylm
3828                   ! alm_G = \int ( - Q Wlm - i U Xlm )
3829                   ! alm_C = \int ( - U Wlm + i Q Xlm )
3830                   dalm(0:1, m) = dalm(0:1, m) + plm_sub(1,m) * phas_sd(3:4)
3831                   dalm(2:5, m) = dalm(2:5, m)  &
3832                        &       - plm_sub(2,m) * phas_sd(5:8) &
3833                        &       + plm_sub(3,m) * phas_sdx(3:6)
3834                endif
3835
3836                !           ---------- l > m ----------
3837                l_start = max(m+1, l_min) ! odd values of (l+m)
3838                if (mod(m+l_start,2) == 0) l_start = l_start+1
3839                do l = l_start, nlmax, 2
3840                   dalm(0:1,l) = dalm(0:1,l) + plm_sub(1,l) * phas_sd(-3:-2)
3841                   dalm(2:5,l) = dalm(2:5,l) - plm_sub(2,l) * phas_sd(-1:2) &
3842                        &                    + plm_sub(3,l) * phas_sdx(-3:0)
3843                enddo ! loop on l
3844
3845                l_start = max(m+2, l_min) ! even values of (l+m)
3846                if (mod(m+l_start,2) == 1) l_start = l_start+1
3847                do l = l_start, nlmax, 2
3848                   dalm(0:1,l) = dalm(0:1,l) + plm_sub(1,l) * phas_sd(3:4)
3849                   dalm(2:5,l) = dalm(2:5,l) - plm_sub(2,l) * phas_sd(5:8) &
3850                        &                    + plm_sub(3,l) * phas_sdx(3:6)
3851!                   dalm(2:5,l) = dalm(2:5,l) + plm_sub(2,l) * phas_sd(5:8) &
3852!                        &                    - plm_sub(3,l) * phas_sdx(3:6)
3853                enddo ! loop on l
3854
3855             endif ! test on cut sky
3856          enddo ! loop on ithl
3857          do l = m, nlmax
3858             alm_TGC(1, l, m) = alm_TGC(1, l, m) + cmplx(dalm(0, l), dalm(1, l), kind=DP) * omega_pix
3859             alm_TGC(2, l, m) = alm_TGC(2, l, m) + cmplx(dalm(2, l), dalm(3, l), kind=DP) * omega_pix
3860             alm_TGC(3, l, m) = alm_TGC(3, l, m) + cmplx(dalm(4, l), dalm(5, l), kind=DP) * omega_pix
3861          enddo
3862       enddo ! loop on m
3863!$OMP end do
3864       if (do_openmp()) then
3865          deallocate (dalm, plm_sub)
3866       endif
3867!$OMP end parallel
3868    enddo    ! loop on chunks
3869
3870    !     --------------------
3871    !     free memory and exit
3872    !     --------------------
3873    deallocate(phas_ns)
3874    if (.not.do_openmp()) then
3875        deallocate (ring, dalm, plm_sub)
3876    endif
3877    return
3878  end subroutine map2alm_pol_pre2_KLOAD
3879
3880
3881  !=======================================================================
3882  subroutine map2alm_iterative_old_KLOAD(nsmax, nlmax, nmmax, iter_order, map_TQU, alm_TGC, &
3883       &   zbounds, w8ring, plm, mask)
3884    ! 2008-07-10: bug correction on map2alm for T only map: only pass temperature weights
3885    ! 2008-11-06: adjust w8ring_in 2nd dimension to actual w8ring
3886    ! 2008-11-27: patch to ignore dimension of non presen optional array (plm)
3887    ! 2008-12-03: use pointer toward map and plm section (avoid stack overloading)
3888    ! 2015-03-06: replaced implicit loop with explicit one because of ifort 15.0.2
3889  !=======================================================================
3890
3891    use statistics, only: tstats, compute_statistics
3892    use pix_tools,  only: nside2npix
3893    implicit none
3894    integer(I4B),   intent(IN)                       :: nsmax, nlmax, nmmax, iter_order
3895    real(KMAP),     intent(INOUT),  dimension(0:,1:), target    :: map_TQU
3896    complex(KALMC), intent(INOUT), dimension(1:,0:,0:) :: alm_TGC
3897    real(DP),       intent(IN),  dimension(1:2),    optional :: zbounds
3898    real(DP),       intent(IN),  dimension(1:,1:), optional :: w8ring
3899    real(DP),       intent(IN),  dimension(0:,1:), optional, target :: plm
3900    real(KMAP),     intent(IN),  dimension(0:,1:),   optional :: mask
3901    ! local variables
3902    real(DP), dimension(1:2)         :: zbounds_in
3903    real(DP), dimension(1:2*nsmax,3) :: w8ring_in
3904    integer(I4B) :: n_maps, n_alms, n_pols, n_masks, n_plms, n_w8_2
3905    integer(I4B) :: iter, status, polar, i, lbm
3906    integer(I4B) :: npixtot, p
3907    integer(I8B) :: n1_plm
3908    real(kind=DP), dimension(1:3) :: rms
3909    logical(LGT) :: do_mask
3910    type(tstats) :: map_stat
3911    real(KMAP),     allocatable, dimension(:,:)   :: map_TQU_in
3912    complex(KALMC),allocatable,  dimension(:,:,:) :: alm_TGC_out
3913
3914    character(len=*), parameter :: code = 'map2alm_iterative_old'
3915    real(KMAP), pointer, dimension(:) :: p_map_1
3916    real(DP),   pointer, dimension(:) :: p_plm_1
3917    !=======================================================================
3918
3919    n_maps = size(map_TQU,2)
3920    if (n_maps /=1 .and. n_maps /=3) then
3921       print*,'invalid 2nd dim for map: ',n_maps
3922       print*,'(should be 1 or 3) '
3923       call fatal_error(code)
3924    endif
3925    npixtot = nside2npix(nsmax)
3926    if (size(map_TQU,1) /= npixtot) then
3927       print*,'map 1st dim does not match nside ',size(map_TQU,1),npixtot,nsmax
3928       call fatal_error(code)
3929    endif
3930
3931    n_alms = size(alm_TGC,1)
3932    if (n_alms /=1 .and. n_alms /=3) then
3933       print*,'invalid 1st dim for alm: ',n_alms
3934       print*,'(should be 1 or 3) '
3935       call fatal_error(code)
3936    endif
3937    if (size(alm_TGC, 2) /= (nlmax+1) .or. size(alm_TGC, 3) /= (nmmax+1)) then
3938       print*,'alm dimensions do not match l_max, m_max'
3939       print*,size(alm_TGC, 2),  (nlmax+1)
3940       print*,size(alm_TGC, 3), (nmmax+1)
3941       call fatal_error(code)
3942    endif
3943    polar = 0
3944    n_pols = min(n_maps, n_alms)
3945    if (n_pols == 3) polar=1
3946
3947    n_plms = 0
3948    if (present(plm)) then
3949       n1_plm =  (nmmax+1_I8B) * (2*nlmax-nmmax+2_I8B) * nsmax
3950       if (size(plm,1) >= n1_plm) n_plms = size(plm,2) ! 1 or 3
3951       if (n_plms /= 0 .and. n_plms /= 1 .and. n_plms /= 3) then
3952          if (abs(n_plms) > 1000) then ! edited 2008-11-27
3953             n_plms = 0 ! ignore Plm on compilers returning stupid values for dangling arrays
3954          else
3955             print*,'invalid 2nd dim for plm: ',n_plms
3956             print*,'(should be 1 or 3) '
3957             call fatal_error(code)
3958          endif
3959       endif
3960    endif
3961    if (polar == 0) n_plms = min(n_plms,1)
3962
3963    zbounds_in = (/-1.d0 , 1.d0/)
3964    if (present(zbounds)) zbounds_in = zbounds
3965
3966    n_w8_2 = 0
3967    w8ring_in  = 1.d0
3968    if (present(w8ring)) then
3969       if (size(w8ring,1) >= 2*nsmax) n_w8_2 = size(w8ring, 2)
3970       if (n_w8_2 /= 0 .and. n_w8_2 /= 1 .and. n_w8_2 /= 3) then
3971          print*,'invalid 2nd dim for w8ring: ',n_w8_2
3972          print*,'(should be 1 or 3) '
3973          call fatal_error(code)
3974       endif
3975       if (n_w8_2 /= 0) then
3976          w8ring_in(1:2*nsmax,1:n_w8_2)  = w8ring(1:2*nsmax,1:n_w8_2) ! edited 2008-11-06
3977       endif
3978    endif
3979
3980    do_mask = .false.
3981    n_masks = 0
3982    if (present(mask)) then
3983       do_mask = (size(mask,1) == npixtot)
3984       n_masks = size(mask,2)
3985       lbm = lbound(mask, dim=1)
3986       if (n_masks > 3) then
3987          print*,'Invalid 2nd dimension for mask:',n_masks
3988          print*,'Should be 1, 2 or 3'
3989          call fatal_error(code)
3990       endif
3991    endif
3992
3993
3994    ! -------------------------------------
3995    ! multiply map by mask
3996    ! -------------------------------------
3997    if (do_mask) then
3998       do i=1,n_pols
3999         ! map_TQU(0:,i) = map_TQU(0:,i) * mask(lbm:, min(i, n_masks))
4000          do p=0, npixtot-1 ! explicit loop because of ifort 15.0.2 2015-03-06
4001             map_TQU(p,i) = map_TQU(p,i) * mask(lbm+p, min(i, n_masks))
4002          enddo
4003       enddo
4004    endif
4005
4006    ! -------------------------------------
4007    ! allocate arrays required for iterations
4008    ! -------------------------------------
4009    if (iter_order > 0) then
4010       ALLOCATE(alm_TGC_out(1:n_pols, 0:nlmax, 0:nmmax),stat = status)
4011       call assert_alloc(status,code,"alm_TGC_out")
4012
4013       ALLOCATE(map_TQU_in(0:npixtot-1,1:n_pols),stat = status)
4014       call assert_alloc(status,code,"map_TQU_in")
4015
4016       alm_TGC_out = 0.0_KALMC      ! final alm
4017       map_TQU_in  = map_TQU        ! input map (masked)
4018       alm_TGC     = 0.0_KALMC      ! intermediate alm
4019    endif
4020
4021    if ( iter_order < 0) then
4022       print*,'Iteration order must be non-negative'
4023       call fatal_error(code)
4024    endif
4025
4026    nullify(p_map_1, p_plm_1) ! for sanity
4027    ! -------------------------------------
4028    ! start iterative (Jacobi) analysis
4029    ! a^(n) = a^(n-1) + A w ( m - S a^(n-1) )
4030    ! a^(0) = A w m
4031    ! with w = mask * quadrature_weights
4032    ! -------------------------------------
4033    do iter = 0, iter_order
4034       ! map -> alm
4035       select case (n_plms+polar*10)
4036       case(0) ! Temperature only
4037          p_map_1 => map_TQU(:,1)
4038          call map2alm(nsmax, nlmax, nmmax, p_map_1, alm_TGC, zbounds_in, w8ring_in(:,1:1))
4039       case(1) ! T with precomputed Ylm
4040          p_map_1 => map_TQU(:,1)
4041          p_plm_1 => plm(:,1)
4042          call map2alm(nsmax, nlmax, nmmax, p_map_1, alm_TGC, zbounds_in, w8ring_in(:,1:1), p_plm_1)
4043       case(10) ! T+P
4044          call map2alm(nsmax, nlmax, nmmax, map_TQU, alm_TGC, zbounds_in, w8ring_in)
4045       case(11) ! T+P with precomputed Ylm_T
4046          p_plm_1 => plm(:,1)
4047          call map2alm(nsmax, nlmax, nmmax, map_TQU, alm_TGC, zbounds_in, w8ring_in, p_plm_1)
4048       case(13) ! T+P with precomputed Ylm
4049          call map2alm(nsmax, nlmax, nmmax, map_TQU, alm_TGC, zbounds_in, w8ring_in, plm)
4050       case default
4051          print*,'Invalid configuration'
4052          call fatal_error(code)
4053       end select
4054
4055       ! if no iteration: exit
4056       if (iter_order == 0) exit
4057
4058       ! if iterate, show residual amplitude
4059       if (iter == 0 .and. iter_order > 0) then
4060          PRINT *,"       Iterated map analysis "
4061          PRINT *,"       input map StDev"
4062          do i=1,n_pols
4063             call compute_statistics(map_TQU(:,i), map_stat)
4064             rms (i) = map_stat%rms
4065          enddo
4066          write(*,9111) iter,rms(1:n_pols)
4067          PRINT *,"       residual map StDev"
4068       endif
40699111   format(i3,3(g20.5))
4070
4071       ! alm_2 = alm_2 + alm : increment alms
4072       map_TQU = 0.0_KMAP
4073       alm_TGC_out = alm_TGC_out + alm_TGC
4074
4075       ! if final iteration: exit
4076       if (iter == iter_order) exit
4077
4078       ! alm_2 -> map : reconstructed map
4079       select case (n_plms+polar*10)
4080       case(0)
4081          call alm2map(nsmax, nlmax, nmmax, alm_TGC_out, p_map_1, zbounds=zbounds_in)
4082       case(1)
4083          call alm2map(nsmax, nlmax, nmmax, alm_TGC_out, p_map_1, plm=p_plm_1)
4084       case(10)
4085          call alm2map(nsmax, nlmax, nmmax, alm_TGC_out, map_TQU, zbounds=zbounds_in)
4086       case(11)
4087          call alm2map(nsmax, nlmax, nmmax, alm_TGC_out, map_TQU, plm=p_plm_1)
4088       case(13)
4089          call alm2map(nsmax, nlmax, nmmax, alm_TGC_out, map_TQU, plm=plm)
4090       case default
4091          print*,'Invalid configuration'
4092          call fatal_error(code)
4093       end select
4094
4095       ! map = map_2 - map : residual map
4096       ! multiply newly synthetized MAP by MASK in order to allow comparison with input MAP.
4097       if (do_mask) then
4098          do i=1,n_pols
4099             map_TQU(0:,i) = map_TQU(0:,i) * mask(lbm:, min(i, n_masks))
4100          enddo
4101       endif
4102       map_TQU = map_TQU_in - map_TQU
4103
4104       do i=1,n_pols
4105          call compute_statistics(map_TQU(:,i), map_stat)
4106          rms (i) = map_stat%rms
4107       enddo
4108       write(*,9111) iter,rms(1:n_pols)
4109    enddo
4110
4111    ! -------------------------------------
4112    ! clean up and exit
4113    ! -------------------------------------
4114    if (iter_order > 0) then
4115       alm_TGC = alm_TGC_out ! final alms
4116       map_TQU = map_TQU_in ! input map (masked)
4117       deallocate(alm_TGC_out, map_TQU_in)
4118    endif
4119
4120    if (associated(p_map_1)) nullify(p_map_1)
4121    if (associated(p_plm_1)) nullify(p_plm_1)
4122
4123    return
4124  end subroutine map2alm_iterative_old_KLOAD
4125
4126  !=======================================================================
4127  subroutine map2alm_iterative_KLOAD(nsmax, nlmax, nmmax, iter_order, map_TQU, alm_TGC, &
4128       &   zbounds, w8ring, plm, mask)
4129    ! 2008-07-10: bug correction on map2alm for T only map: only pass temperature weights
4130    ! 2008-11-06: adjust w8ring_in 2nd dimension to actual w8ring
4131    ! 2008-11-27: patch to ignore dimension of non presen optional array (plm)
4132    ! 2008-12-03: use pointer toward map and plm section (avoid stack overloading)
4133    ! 2015-03-06: replaced implicit loop with explicit one because of ifort 15.0.2
4134    ! 2018-11:    apply the mask (and zbounds) only on input map
4135  !=======================================================================
4136
4137    use statistics, only: tstats, compute_statistics
4138    use pix_tools,  only: nside2npix, apply_mask
4139    use long_intrinsic, only: long_size
4140    implicit none
4141    integer(I4B),   intent(IN)                       :: nsmax, nlmax, nmmax, iter_order
4142    real(KMAP),     intent(INOUT),  dimension(0:,1:), target    :: map_TQU
4143    complex(KALMC), intent(INOUT), dimension(1:,0:,0:) :: alm_TGC
4144    real(DP),       intent(IN),  dimension(1:2),    optional :: zbounds
4145    real(DP),       intent(IN),  dimension(1:,1:), optional :: w8ring
4146    real(DP),       intent(IN),  dimension(0:,1:), optional, target :: plm
4147    real(KMAP),     intent(IN),  dimension(0:,1:),   optional :: mask
4148    ! local variables
4149    real(DP), dimension(1:2)         :: zbounds_in, zbounds_full, zbounds_run
4150    real(DP), dimension(1:2*nsmax,3) :: w8ring_in
4151    integer(I4B) :: n_maps, n_alms, n_pols, n_masks, n_plms, n_w8_2
4152    integer(I4B) :: iter, status, polar, i, lbm, order
4153    integer(I8B) :: npixtot, p
4154    integer(I8B) :: n1_plm
4155    real(kind=DP), dimension(1:3) :: rms
4156    logical(LGT) :: do_mask, do_bounds
4157    type(tstats) :: map_stat
4158    real(KMAP),     allocatable, dimension(:,:)   :: map_TQU_in
4159    complex(KALMC),allocatable,  dimension(:,:,:) :: alm_TGC_out
4160
4161    character(len=*), parameter :: code = 'map2alm_iterative'
4162    real(KMAP), pointer, dimension(:) :: p_map_1
4163    real(DP),   pointer, dimension(:) :: p_plm_1
4164    !=======================================================================
4165
4166    n_maps = size(map_TQU,2)
4167    if (n_maps /=1 .and. n_maps /=3) then
4168       print*,'invalid 2nd dim for map: ',n_maps
4169       print*,'(should be 1 or 3) '
4170       call fatal_error(code)
4171    endif
4172    npixtot = nside2npix(nsmax)
4173    if (long_size(map_TQU,1) /= npixtot) then
4174       print*,'map 1st dim does not match nside ',size(map_TQU,1),npixtot,nsmax
4175       call fatal_error(code)
4176    endif
4177
4178    n_alms = size(alm_TGC,1)
4179    if (n_alms /=1 .and. n_alms /=3) then
4180       print*,'invalid 1st dim for alm: ',n_alms
4181       print*,'(should be 1 or 3) '
4182       call fatal_error(code)
4183    endif
4184    if (size(alm_TGC, 2) /= (nlmax+1) .or. size(alm_TGC, 3) /= (nmmax+1)) then
4185       print*,'alm dimensions do not match l_max, m_max'
4186       print*,size(alm_TGC, 2),  (nlmax+1)
4187       print*,size(alm_TGC, 3), (nmmax+1)
4188       call fatal_error(code)
4189    endif
4190    polar = 0
4191    n_pols = min(n_maps, n_alms)
4192    if (n_pols == 3) polar=1
4193
4194    n_plms = 0
4195    if (present(plm)) then
4196       n1_plm =  (nmmax+1_I8B) * (2*nlmax-nmmax+2_I8B) * nsmax
4197       if (size(plm,1) >= n1_plm) n_plms = size(plm,2) ! 1 or 3
4198       if (n_plms /= 0 .and. n_plms /= 1 .and. n_plms /= 3) then
4199          if (abs(n_plms) > 1000) then ! edited 2008-11-27
4200             n_plms = 0 ! ignore Plm on compilers returning stupid values for dangling arrays
4201          else
4202             print*,'invalid 2nd dim for plm: ',n_plms
4203             print*,'(should be 1 or 3) '
4204             call fatal_error(code)
4205          endif
4206       endif
4207    endif
4208    if (polar == 0) n_plms = min(n_plms,1)
4209
4210    zbounds_full = (/-1.d0 , 1.d0/)
4211    zbounds_in   = zbounds_full
4212    do_bounds = .false.
4213    if (present(zbounds)) then
4214       zbounds_in = zbounds
4215       do_bounds = .true.
4216    endif
4217    zbounds_run = zbounds_in
4218
4219    n_w8_2 = 0
4220    w8ring_in  = 1.d0
4221    if (present(w8ring)) then
4222       if (size(w8ring,1) >= 2*nsmax) n_w8_2 = size(w8ring, 2)
4223       if (n_w8_2 /= 0 .and. n_w8_2 /= 1 .and. n_w8_2 /= 3) then
4224          print*,'invalid 2nd dim for w8ring: ',n_w8_2
4225          print*,'(should be 1 or 3) '
4226          call fatal_error(code)
4227       endif
4228       if (n_w8_2 /= 0) then
4229          w8ring_in(1:2*nsmax,1:n_w8_2)  = w8ring(1:2*nsmax,1:n_w8_2) ! edited 2008-11-06
4230       endif
4231    endif
4232
4233    do_mask = .false.
4234    n_masks = 0
4235    if (present(mask)) then
4236       do_mask = (long_size(mask,1) == npixtot)
4237       n_masks = size(mask,2)
4238       lbm = lbound(mask, dim=1)
4239       if (n_masks > 3) then
4240          print*,'Invalid 2nd dimension for mask:',n_masks
4241          print*,'Should be 1, 2 or 3'
4242          call fatal_error(code)
4243       endif
4244    endif
4245
4246
4247    ! -------------------------------------
4248    ! multiply map by mask and apply zbounds
4249    ! -------------------------------------
4250    order = 1 ! ring ordered
4251    if (do_bounds .and. iter_order > 0) then
4252       if (do_mask) then
4253          call apply_mask(map_TQU, order, mask=mask, zbounds=zbounds_in)
4254       else
4255          call apply_mask(map_TQU, order,            zbounds=zbounds_in)
4256       endif
4257    else ! no iteration or no bounds, apply mask (if any) here, keep zbounds (if any) for map2alm
4258       if (do_mask) then
4259          call apply_mask(map_TQU, order, mask=mask)
4260       endif
4261    endif
4262
4263    ! -------------------------------------
4264    ! allocate arrays required for iterations
4265    ! -------------------------------------
4266    if (iter_order > 0) then
4267       ALLOCATE(alm_TGC_out(1:n_pols, 0:nlmax, 0:nmmax),stat = status)
4268       call assert_alloc(status,code,"alm_TGC_out")
4269
4270       ALLOCATE(map_TQU_in(0:npixtot-1,1:n_pols),stat = status)
4271       call assert_alloc(status,code,"map_TQU_in")
4272
4273       alm_TGC_out = 0.0_KALMC      ! final alm
4274       map_TQU_in  = map_TQU        ! input map (masked)
4275       alm_TGC     = 0.0_KALMC      ! intermediate alm
4276    endif
4277
4278    if ( iter_order < 0) then
4279       print*,'Iteration order must be non-negative'
4280       call fatal_error(code)
4281    endif
4282
4283    nullify(p_map_1, p_plm_1) ! for sanity
4284    ! -------------------------------------
4285    ! start iterative (Jacobi) analysis
4286    ! a^(n) = a^(n-1) + A ( w m - S a^(n-1) )
4287    ! a^(0) = A w m
4288    ! with w = mask and zbounds
4289    ! A = alm2map with quadrature weights
4290    ! -------------------------------------
4291    do iter = 0, iter_order
4292       ! map -> alm
4293       select case (n_plms+polar*10)
4294       case(0) ! Temperature only
4295          p_map_1 => map_TQU(:,1)
4296          call map2alm(nsmax, nlmax, nmmax, p_map_1, alm_TGC, zbounds_run, w8ring_in(:,1:1))
4297       case(1) ! T with precomputed Ylm
4298          p_map_1 => map_TQU(:,1)
4299          p_plm_1 => plm(:,1)
4300          call map2alm(nsmax, nlmax, nmmax, p_map_1, alm_TGC, zbounds_run, w8ring_in(:,1:1), p_plm_1)
4301       case(10) ! T+P
4302          call map2alm(nsmax, nlmax, nmmax, map_TQU, alm_TGC, zbounds_run, w8ring_in)
4303       case(11) ! T+P with precomputed Ylm_T
4304          p_plm_1 => plm(:,1)
4305          call map2alm(nsmax, nlmax, nmmax, map_TQU, alm_TGC, zbounds_run, w8ring_in, p_plm_1)
4306       case(13) ! T+P with precomputed Ylm
4307          call map2alm(nsmax, nlmax, nmmax, map_TQU, alm_TGC, zbounds_run, w8ring_in, plm)
4308       case default
4309          print*,'Invalid configuration'
4310          call fatal_error(code)
4311       end select
4312
4313       ! if no iteration: exit
4314       if (iter_order == 0) exit
4315
4316       ! in case of iterations, analyze full sky map
4317       zbounds_run = zbounds_full
4318
4319       ! if iterate, show residual amplitude
4320       if (iter == 0 .and. iter_order > 0) then
4321          PRINT *,"       Iterated map analysis "
4322          PRINT *,"       input map StDev"
4323          do i=1,n_pols
4324             call compute_statistics(map_TQU(:,i), map_stat)
4325             rms (i) = map_stat%rms
4326          enddo
4327          write(*,9111) iter,rms(1:n_pols)
4328          PRINT *,"       residual map StDev"
4329       endif
43309111   format(i3,3(g20.5))
4331
4332       ! alm_2 = alm_2 + alm : increment alms
4333       map_TQU = 0.0_KMAP
4334       alm_TGC_out = alm_TGC_out + alm_TGC
4335
4336       ! if final iteration: exit
4337       if (iter == iter_order) exit
4338
4339       ! alm_2 -> map : reconstructed map
4340       select case (n_plms+polar*10)
4341       case(0)
4342          call alm2map(nsmax, nlmax, nmmax, alm_TGC_out, p_map_1, zbounds=zbounds_full)
4343       case(1)
4344          call alm2map(nsmax, nlmax, nmmax, alm_TGC_out, p_map_1, plm=p_plm_1)
4345       case(10)
4346          call alm2map(nsmax, nlmax, nmmax, alm_TGC_out, map_TQU, zbounds=zbounds_full)
4347       case(11)
4348          call alm2map(nsmax, nlmax, nmmax, alm_TGC_out, map_TQU, plm=p_plm_1)
4349       case(13)
4350          call alm2map(nsmax, nlmax, nmmax, alm_TGC_out, map_TQU, plm=plm)
4351       case default
4352          print*,'Invalid configuration'
4353          call fatal_error(code)
4354       end select
4355
4356       map_TQU = map_TQU_in - map_TQU ! residual map
4357
4358       do i=1,n_pols
4359          call compute_statistics(map_TQU(:,i), map_stat)
4360          rms (i) = map_stat%rms
4361       enddo
4362       write(*,9111) iter,rms(1:n_pols)
4363    enddo
4364
4365    ! -------------------------------------
4366    ! clean up and exit
4367    ! -------------------------------------
4368    if (iter_order > 0) then
4369       alm_TGC = alm_TGC_out ! final alms
4370       map_TQU = map_TQU_in ! input map (masked)
4371       deallocate(alm_TGC_out, map_TQU_in)
4372    endif
4373
4374    if (associated(p_map_1)) nullify(p_map_1)
4375    if (associated(p_plm_1)) nullify(p_plm_1)
4376
4377    return
4378  end subroutine map2alm_iterative_KLOAD
4379
4380
4381  !**************************************************************************
4382  !
4383  !             ALM treatment
4384  !
4385  !**************************************************************************
4386
4387  !=======================================================================
4388  subroutine alter_alm_KLOAD(nsmax, nlmax, nmmax, fwhm_arcmin, alm, beam_file, window)
4389    !=======================================================================
4390    !     multiply the alm by a gaussian function of FWHM fwhm_arcmin,
4391    !        or a beam_file or an arbitrary window
4392    !     --> equivalent to a convolution in real sphere
4393    !=======================================================================
4394    integer(I4B),     intent(in)                         :: nlmax,nsmax,nmmax
4395    real(KALM),       intent(in)                         :: fwhm_arcmin
4396    complex(KALMC),   intent(inout), dimension(1:,0:,0:) :: alm
4397    character(LEN=*), intent(IN),  optional              :: beam_file
4398    real(KALM),       intent(IN),  optional, dimension(0:,1:) :: window
4399
4400    real(DP), dimension(:,:),  allocatable :: beamw
4401    integer(I4B)                           :: status
4402
4403    integer(I4B)                           :: m, i, nl, nda, nlw, ndw, j
4404    character(len=*), parameter            :: code = "alter_alm"
4405
4406    !-----------------------------------------------------------------------
4407
4408    nda = size(alm,1)
4409
4410    if (present(window)) then
4411       nlw = size(window,1)
4412       ndw = size(window,2)
4413       nl  = min(nlw, nlmax+1)
4414       do m=0, nmmax
4415          do i = 1, nda
4416             j = min(ndw, i) ! if no polarization window, replicate temperature
4417             alm(i, m:nl-1, m) = alm(i, m:nl-1, m) * window(m:nl-1, j)
4418          enddo
4419       enddo
4420       ! set to 0 alms not altered
4421       if (nlw <= nlmax) then
4422          alm(1:nda, nlw:nlmax, 0:nmmax) = 0.0_KALMC
4423          print*,code //' set to 0 alm with l in range ',nlw,nlmax
4424       endif
4425    else
4426       allocate(beamw(0:nlmax,1:nda), stat = status)
4427       call assert_alloc(status,code,'beamw')
4428       call generate_beam(real(fwhm_arcmin,kind=dp), nlmax, beamw, beam_file)
4429       do m = 0, nmmax
4430          do i = 1, nda
4431             alm(i, m:nlmax, m) = alm(i, m:nlmax, m) * beamw(m:nlmax, i)
4432          enddo
4433       enddo
4434       deallocate(beamw)
4435    endif
4436
4437    return
4438  end subroutine alter_alm_KLOAD
4439
4440  !=======================================================================
4441  subroutine create_alm_v12_KLOAD &
4442       &     (nsmax, nlmax, nmmax, polar, filename, iseed, fwhm_arcmin, &
4443       &        alm_TGC, header_PS, windowfile, units, beam_file)
4444    !=======================================================================
4445    use rngmod, only: rand_init, planck_rng
4446    INTEGER(I4B), INTENT(IN) :: nsmax, nlmax, nmmax
4447
4448    INTEGER(I4B),       INTENT(IN) :: polar
4449    CHARACTER(LEN=*),   INTENT(IN)      :: filename
4450    INTEGER(I4B),       INTENT(INOUT)   :: iseed
4451    REAL(KALM),           INTENT(IN)      :: fwhm_arcmin
4452    COMPLEX(KALMC),       INTENT(OUT), &
4453         & DIMENSION(1:1+2*polar,0:nlmax,0:nmmax) :: alm_TGC
4454    CHARACTER(LEN=80),  INTENT(OUT),             DIMENSION(1:):: header_PS
4455    CHARACTER(LEN=*),   INTENT(IN),    OPTIONAL               :: windowfile
4456    CHARACTER(LEN=80),  INTENT(OUT),   OPTIONAL, DIMENSION(1:):: units
4457    CHARACTER(LEN=*),   INTENT(IN),    OPTIONAL               :: beam_file
4458    !
4459    type(planck_rng)       :: rng_handle
4460
4461
4462    print*,'============================================================================='
4463    print*,'WARNING: create_alm calling sequence has changed'
4464    print*,' from'
4465    print*,'  call create_alm (nsmax, nlmax, nmmax, polar, filename, ISEED, fwhm_arcmin,'
4466    print*,'                    alm_TGC, header_PS [, windowfile, units, beam_file])'
4467    print*,' to'
4468    print*,'  call create_alm (nsmax, nlmax, nmmax, polar, filename, RNG_HANDLE, fwhm_arcmin,'
4469    print*,'                    alm_TGC, header_PS [, windowfile, units, beam_file])'
4470    print*,'  '
4471    print*,' see documentation for details'
4472    print*,'============================================================================='
4473    if (iseed < 0) then
4474       call rand_init(rng_handle, iseed) ! start new sequence
4475       call  create_alm &
4476            &     (nsmax, nlmax, nmmax, polar, filename, rng_handle, fwhm_arcmin, &
4477            &        alm_TGC, header_PS, windowfile, units, beam_file)
4478       iseed = abs(iseed)
4479    else
4480       print*,'ERROR: old calling sequence can only be used with a new seed (ISEED < 0).'
4481       print*,' see documentation for details on new interface'
4482       call fatal_error('create_alm_v12')
4483    endif
4484
4485    return
4486  end subroutine create_alm_v12_KLOAD
4487   !=======================================================================
4488  subroutine create_alm_old_KLOAD &
4489       &     (nsmax, nlmax, nmmax, polar, filename, rng_handle, fwhm_arcmin, &
4490       &        alm_TGC, header_PS, windowfile, units, beam_file)
4491    !=======================================================================
4492    !     creates the a_lm from the power spectrum,
4493    !     assuming they are gaussian  and complex
4494    !     with a variance given by C(l)
4495    !
4496    !     the input file FILENAME should contain :
4497    !       l, C_temp(l), [ C_grad(l), C_curl(l), C_temp_grad(l) ]
4498    !
4499    !     with *consecutive* l's (missing C(l) are set to 0.)
4500    !
4501    !     because the map is real we have : a_l-m = (-)^m conjug(a_lm)
4502    !     so we actually compute them only for m >= 0
4503    !
4504    !
4505    !     RNG_HANDLE : planck_rng structure
4506    !
4507    !     FWHM_ARCMIN
4508    !
4509    !     ALM_TGC (Complex array) : either (1:1, 0:nlmax, 0:nmmax)
4510    !       for temperature only (if POLAR=0)
4511    !                               or     (1:3, 0:nlmax, 0:nmmax)
4512    !       for temperature + polarisation (if POLAR =1)
4513    !       (respectively Temp, Grad or Electric component
4514    !        and Curl or Magnetic one)
4515    !
4516    !=======================================================================
4517    use fitstools, only: fits2cl
4518!    use ran_tools, only: randgauss_boxmuller
4519    use rngmod, only: rand_gauss, planck_rng
4520    USE head_fits, ONLY : add_card, merge_headers, get_card
4521
4522    INTEGER(I4B), INTENT(IN) :: nsmax, nlmax, nmmax
4523    INTEGER(I4B),       INTENT(IN) :: polar
4524    type(planck_rng),   intent(inout)                         :: rng_handle
4525    CHARACTER(LEN=*),   INTENT(IN)      :: filename
4526    REAL(KALM),           INTENT(IN)      :: fwhm_arcmin
4527    COMPLEX(KALMC),       INTENT(OUT), &
4528         & DIMENSION(1:1+2*polar,0:nlmax,0:nmmax) :: alm_TGC
4529    CHARACTER(LEN=80),  INTENT(OUT),             DIMENSION(1:):: header_PS
4530    CHARACTER(LEN=*),   INTENT(IN),    OPTIONAL               :: windowfile
4531    CHARACTER(LEN=80),  INTENT(OUT),   OPTIONAL, DIMENSION(1:):: units
4532    CHARACTER(LEN=*),   INTENT(IN),    OPTIONAL               :: beam_file
4533
4534    INTEGER(I4B) :: i, l, m, npw, l_max
4535    INTEGER(I4B) :: ncl, nlheader, count_tt, count_pn
4536    INTEGER(I4B) :: status
4537    REAL(DP) ::  hsqrt2, quadrupole
4538    REAL(DP) ::  rms_tt, rms_g1, rms_g2, rms_cc
4539    REAL(DP) ::  zeta1_r, zeta1_i, zeta2_r, zeta2_i, zeta3_r, zeta3_i
4540
4541    LOGICAL(LGT) ::  polarisation
4542
4543    REAL(DP), DIMENSION(:,:), ALLOCATABLE :: pixlw, beamw
4544    REAL(DP), DIMENSION(0:nlmax)          :: cls_tt, cls_tg, cls_gg, cls_cc
4545    REAL(DP), DIMENSION(0:nlmax,1:4)      :: cl_in
4546!    REAL(DP), DIMENSION(:),   ALLOCATABLE :: fact_norm
4547
4548    CHARACTER(LEN=*), PARAMETER :: code = 'CREATE_ALM'
4549    CHARACTER(LEN=*), PARAMETER :: version = '2.0.0'
4550    CHARACTER(LEN=20)                            ::  string_quad
4551    CHARACTER(LEN=80), DIMENSION(1:180)          :: header_file
4552    CHARACTER(LEN=80), DIMENSION(:), allocatable :: units_power
4553    CHARACTER(LEN=80) :: temptype, com_tt
4554    CHARACTER(LEN=80) :: polnorm, com_pn
4555
4556    !=======================================================================
4557    ! Is polarisation required ?
4558    if (polar .eq. 0) then
4559       polarisation = .false.
4560    elseif (polar .eq. 1) then
4561       polarisation = .true.
4562    else
4563       print*,'wrong choice of polar'
4564       call fatal_error
4565    endif
4566
4567    ! set maximum multipole (depends on user choice and pixel window function)
4568    l_max = nlmax
4569    npw = 4*nsmax + 1
4570    if (present(windowfile)) then
4571       l_max = min(l_max, npw - 1)
4572    else
4573       npw = l_max + 1
4574    endif
4575    if (l_max < nlmax) then
4576       print*,'a_lm are only generated for 0 < l <= ',l_max
4577    endif
4578    !-----------------------------------------------------------------------
4579
4580    !     ----- set the alm array to zero ------
4581    if (polarisation) then
4582       alm_TGC(1:3, 0:nlmax, 0:nmmax) = CMPLX(0.0,0.0, kind=KALM)
4583    else
4584       alm_TGC(1  , 0:nlmax, 0:nmmax) = CMPLX(0.0,0.0, kind=KALM)
4585    endif
4586    !     --------------------------------------
4587    cls_tt = 0.0_dp
4588    cls_gg = 0.0_dp
4589    cls_cc = 0.0_dp
4590    cls_tg = 0.0_dp
4591    cl_in  = 0.0_dp
4592
4593    !     --- reads the C(l) file ---
4594    ncl = 1
4595    if (polarisation) ncl = 4
4596    nlheader = SIZE(header_file)
4597    allocate(units_power(1:ncl), stat = status)
4598    call assert_alloc(status,code,'units_power')
4599
4600    call fits2cl(filename, cl_in(0:l_max,1:ncl), l_max, ncl, &
4601         &       header_file, units=units_power)
4602
4603    !    ---- check input power spectra consistency  ----
4604    do i=1,ncl
4605       do l=0,l_max
4606          if (i < 4) then ! auto correlation
4607             if (cl_in(l,i) < 0.0) then
4608                print*,code,'> Negative input power spectrum at l =', l,', index = ',i
4609                print*,code,'> ',cl_in(l,i)
4610                call fatal_error
4611             endif
4612          else ! cross correlation
4613             if (abs(cl_in(l,4)) > sqrt(cl_in(l,1))*sqrt(cl_in(l,2)) ) then
4614                print *,code,'> Inconsistent T, E and ExT ower spectrum terms at l = ',l
4615                print*,code,'> ',cl_in(l,1),cl_in(l,2),cl_in(l,4)
4616             endif
4617          endif
4618       enddo
4619    enddo
4620
4621    ! 2009-01-07: removed fact_norm, was creating problem with PGF90
4622    !     we always expect 4 columns, in the order T, G(=E), C(=B) and TG(=TE)
4623    cls_tt(0:l_max) = cl_in(0:l_max,1)
4624    cls_gg(0:l_max) = cl_in(0:l_max,2)
4625    cls_cc(0:l_max) = cl_in(0:l_max,3)
4626    cls_tg(0:l_max) = cl_in(0:l_max,4)
4627
4628    ! converts units for power spectrum into units for maps
4629    if (present(units)) then
4630       call pow2alm_units(units_power, units)
4631    endif
4632
4633    ! finds out temperature type (THERMO or ANTENNA)
4634    ! THERMO is the default
4635    call get_card(header_file,"TEMPTYPE",temptype,com_tt,count=count_tt)
4636    if (count_tt < 1) then
4637       temptype = "THERMO"
4638       com_tt = " temperature : either THERMO or ANTENNA"
4639       print*,'   Will assume '//temptype
4640    endif
4641
4642    if (polar > 0) then
4643       call get_card(header_file,"POLNORM",polnorm,com_pn,count=count_pn)
4644       if (count_pn < 1) then
4645          print*,' ********************************************************* '
4646          print*,'            The convention for normalization '
4647          print*,'        of the input polarization power spectra '
4648          print*,'                      is unknown. '
4649          print*,' The code will proceed as if the convention was that of CMBFAST'
4650          print*,'  See the Healpix PDF of Html documentation for details'
4651          print*,' ********************************************************* '
4652       endif
4653    endif
4654
4655    quadrupole = cls_tt(2)
4656    !     --- creates the header relative to power spectrum ---
4657    header_PS = ''
4658    call add_card(header_PS)
4659    call add_card(header_PS,'HISTORY',' alm generated by ' &
4660         &        //code//'_'//version//' from following power spectrum')
4661    call add_card(header_PS)
4662    call add_card(header_PS,'COMMENT','----------------------------------------------------')
4663    call add_card(header_PS,'COMMENT','Planck Power Spectrum Description Specific Keywords')
4664    call add_card(header_PS,'COMMENT','----------------------------------------------------')
4665    call add_card(header_PS,'COMMENT','Input power spectrum in : ')
4666    call add_card(header_PS,'COMMENT',TRIM(filename))
4667    call add_card(header_PS)
4668    call add_card(header_PS,'COMMENT','Quadrupole')
4669    write(string_quad,'(1pe15.6)') quadrupole
4670    call add_card(header_PS,'COMMENT','  C(2) = '//string_quad//' '//trim(units_power(1)))
4671    call add_card(header_PS)
4672    call add_card(header_PS,"TEMPTYPE",temptype,com_tt)
4673    call add_card(header_PS)
4674    ! ------- insert header read in power spectrum file -------
4675    call merge_headers(header_file, header_PS)
4676    deallocate(units_power)
4677
4678    !     --- smoothes the initial power spectrum ---
4679    !       beam (gaussian or external file) + pixel (external file)
4680
4681    ! define beam
4682    allocate(beamw(0:l_max,1:3), stat = status)
4683    call assert_alloc(status,code,'beamw')
4684    call generate_beam(real(fwhm_arcmin,kind=dp), l_max, beamw, beam_file)
4685!     call gaussbeam(real(fwhm_arcmin,kind=dp), l_max, beamw)
4686
4687    ! get the pixel window function
4688    allocate(pixlw(0:npw-1,1:3), stat = status)
4689    call assert_alloc(status,code,'pixlw')
4690    if(present(windowfile)) then
4691       call pixel_window(pixlw, windowfile=windowfile)
4692    else
4693       pixlw(:,:)  = 1.0_dp
4694    endif
4695    ! multiply beam and pixel windows
4696    beamw(0:l_max,1:3) = beamw(0:l_max,1:3) * pixlw(0:l_max,1:3)
4697
4698    cls_tt(0:l_max) = cls_tt(0:l_max) * beamw(0:l_max,1)**2
4699    cls_tg(0:l_max) = cls_tg(0:l_max) * beamw(0:l_max,1)*beamw(0:l_max,2)
4700    cls_gg(0:l_max) = cls_gg(0:l_max) * beamw(0:l_max,2)**2
4701    cls_cc(0:l_max) = cls_cc(0:l_max) * beamw(0:l_max,3)**2
4702    deallocate(pixlw)
4703    deallocate(beamw)
4704
4705
4706    !     --- generates randomly the alm according to their power spectrum ---
4707    !     alm_T = zeta1 * rms_tt
4708    !     alm_G = zeta1 * rms_g1 + zeta2 * rms_g2
4709    !     alm_C = zeta3 * rms_cc
4710    hsqrt2 = SQRT2 / 2.0_dp
4711
4712    do l = 0, l_max
4713       rms_tt = 0.0_dp
4714       rms_g1 = 0.0_dp
4715       if (cls_tt(l) .ne. 0) then
4716          rms_tt   = sqrt(cls_tt(l))
4717          rms_g1   = cls_tg(l) / rms_tt
4718       endif
4719
4720       !        ------ m = 0 ------
4721!        zeta1_r = randgauss_boxmuller(iseed)  ! gaussian deviate based on ran_mwc
4722       zeta1_r = rand_gauss(rng_handle)
4723       zeta1_i = 0.0_dp
4724       alm_TGC(1, l, 0)   = CMPLX(zeta1_r, zeta1_i, kind=KALM) * rms_tt ! T
4725       if (polarisation) then
4726          alm_TGC(2, l, 0) = CMPLX(zeta1_r, zeta1_i, kind=KALM) * rms_g1 ! G
4727       endif
4728
4729       !        ------ m > 0 ------
4730       do m = 1,l
4731!          zeta1_r = randgauss_boxmuller(iseed) * hsqrt2
4732!          zeta1_i = randgauss_boxmuller(iseed) * hsqrt2
4733          zeta1_r = rand_gauss(rng_handle) * hsqrt2
4734          zeta1_i = rand_gauss(rng_handle) * hsqrt2
4735          alm_TGC(1, l, m) = CMPLX(zeta1_r, zeta1_i, kind=KALM) * rms_tt
4736          if (polarisation) then
4737             alm_TGC(2, l, m) = CMPLX(zeta1_r, zeta1_i, kind=KALM) * rms_g1
4738          endif
4739       enddo
4740    enddo
4741
4742    !     the coefficient generation is separated so that the Temperature
4743    !     coeff. are the same (for the same seed) weither the polarisation is set or not
4744
4745    if (polarisation) then
4746       do l = 0, l_max
4747          rms_g2 = 0.0_dp
4748          rms_cc = 0.0_dp
4749          if (cls_tt(l) .ne. 0) then
4750             rms_g2 = cls_gg(l) - (cls_tg(l)/cls_tt(l))*cls_tg(l) ! to avoid underflow
4751             ! test for consistency but make sure it is not due to round off error
4752             if (rms_g2 <= 0.0) then
4753                if (abs(rms_g2) > abs(1.e-8*cls_gg(l))) then
4754                   print*,code,'> Inconsistent TT, GG and TG spectra at l=',l
4755                   call fatal_error
4756                else ! only round off error, keep going
4757                   rms_g2 = 0.0
4758                endif
4759             endif
4760             rms_g2 = SQRT( rms_g2 )
4761             rms_cc = SQRT( cls_cc(l) )
4762          endif
4763
4764          !           ------ m = 0 ------
4765!          zeta2_r = randgauss_boxmuller(iseed)
4766          zeta2_r = rand_gauss(rng_handle)
4767          zeta2_i = 0.0_dp
4768!          zeta3_r = randgauss_boxmuller(iseed)
4769          zeta3_r = rand_gauss(rng_handle)
4770          zeta3_i = 0.0_dp
4771          alm_TGC(2, l, 0) = alm_TGC(2, l, 0) &
4772               &           + CMPLX(zeta2_r, zeta2_i, kind=KALM) * rms_g2 ! G
4773          alm_TGC(3, l, 0) = CMPLX(zeta3_r, zeta3_i, kind=KALM) * rms_cc ! C
4774
4775          !           ------ m > 0 ------
4776          do m = 1,l
4777!              zeta2_r = randgauss_boxmuller(iseed) * hsqrt2
4778!              zeta2_i = randgauss_boxmuller(iseed) * hsqrt2
4779!              zeta3_r = randgauss_boxmuller(iseed) * hsqrt2
4780!              zeta3_i = randgauss_boxmuller(iseed) * hsqrt2
4781             zeta2_r = rand_gauss(rng_handle) * hsqrt2
4782             zeta2_i = rand_gauss(rng_handle) * hsqrt2
4783             zeta3_r = rand_gauss(rng_handle) * hsqrt2
4784             zeta3_i = rand_gauss(rng_handle) * hsqrt2
4785             alm_TGC(2, l, m) = alm_TGC(2, l, m) &
4786                  &                 + CMPLX(zeta2_r, zeta2_i, kind=KALM) * rms_g2 ! G
4787             alm_TGC(3, l, m) = CMPLX(zeta3_r, zeta3_i, kind=KALM) * rms_cc ! C
4788          enddo
4789       enddo
4790    endif
4791
4792    return
4793  end subroutine create_alm_old_KLOAD
4794
4795   !=======================================================================
4796  subroutine create_alm_KLOAD &
4797       &     (nsmax, nlmax, nmmax, polar, filename, rng_handle, fwhm_arcmin, &
4798       &        alm_TGC, header_PS, windowfile, units, beam_file)
4799    !=======================================================================
4800    !     creates the a_lm from the power spectrum,
4801    !     assuming they are gaussian  and complex
4802    !     with a variance given by C(l)
4803    !
4804    !     the input file FILENAME should contain :
4805    !       l, C_temp(l), [ C_grad(l), C_curl(l), C_temp_grad(l), [C_temp_curl(l), C_grad_curl(l)] ]
4806    !
4807    !     with *consecutive* l's (missing C(l) are set to 0.)
4808    !
4809    !     because the map is real we have : a_l-m = (-)^m conjug(a_lm)
4810    !     so we actually compute them only for m >= 0
4811    !
4812    !
4813    !     RNG_HANDLE : planck_rng structure
4814    !
4815    !     FWHM_ARCMIN
4816    !
4817    !     ALM_TGC (Complex array) : either (1:1, 0:nlmax, 0:nmmax)
4818    !       for temperature only (if POLAR=0)
4819    !                               or     (1:3, 0:nlmax, 0:nmmax)
4820    !       for temperature + polarisation (if POLAR =1 or POLAR =2)
4821    !       (respectively Temp, Grad or Electric component
4822    !        and Curl or Magnetic one)
4823    !
4824    ! 2014-05-28: support of POLAR=2, in which the unconventional
4825    ! TC (=TB) and GC (=EB) spectra (if available in the input FITS file),
4826    ! are taken into account
4827    !
4828    !=======================================================================
4829    use fitstools,  only: fits2cl, getsize_fits
4830    use rngmod,     only: rand_gauss, planck_rng
4831    use head_fits,  only: add_card, merge_headers, get_card
4832    use misc_utils, only: string
4833
4834    INTEGER(I4B),       INTENT(IN)      :: nsmax, nlmax, nmmax
4835    INTEGER(I4B),       INTENT(IN)      :: polar
4836    type(planck_rng),   intent(inout)   :: rng_handle
4837    CHARACTER(LEN=*),   INTENT(IN)      :: filename
4838    REAL(KALM),         INTENT(IN)      :: fwhm_arcmin
4839    COMPLEX(KALMC),     INTENT(OUT), &
4840         & DIMENSION(1:1+2*min(polar,1),0:nlmax,0:nmmax) :: alm_TGC
4841    CHARACTER(LEN=80),  INTENT(OUT),             DIMENSION(1:):: header_PS
4842    CHARACTER(LEN=*),   INTENT(IN),    OPTIONAL               :: windowfile
4843    CHARACTER(LEN=80),  INTENT(OUT),   OPTIONAL, DIMENSION(1:):: units
4844    CHARACTER(LEN=*),   INTENT(IN),    OPTIONAL               :: beam_file
4845
4846    INTEGER(I4B) :: i, l, m, npw, l_max
4847    INTEGER(I4B) :: ncl, nlheader, count_tt, count_pn
4848    INTEGER(I4B) :: status
4849    REAL(DP)     ::  hsqrt2, quadrupole
4850    REAL(DP)     ::  rms_tt, rms_g1, rms_g2, rms_c1, rms_c2, rms_c3
4851    real(DP)     ::  var_g2, var_c3
4852    REAL(DP)     ::  zeta1_r, zeta1_i, zeta2_r, zeta2_i, zeta3_r, zeta3_i
4853    real(DP), parameter :: ZERO = 0.0_DP
4854    complex(KALMC) :: zeta1, zeta2, zeta3
4855    integer(I4B) :: junk, ncl_file
4856
4857    LOGICAL(LGT) ::  polarisation, bcoupling
4858
4859    REAL(DP), DIMENSION(:,:), ALLOCATABLE :: pixlw, beamw
4860    REAL(DP), DIMENSION(0:nlmax)          :: cls_tt, cls_gg, cls_cc, cls_tg
4861    REAL(DP), DIMENSION(0:nlmax)          :: cls_tc, cls_gc
4862    REAL(DP), DIMENSION(0:nlmax,1:6)      :: cl_in
4863!    REAL(DP), DIMENSION(:),   ALLOCATABLE :: fact_norm
4864
4865    CHARACTER(LEN=*), PARAMETER :: code = 'CREATE_ALM'
4866    CHARACTER(LEN=*), PARAMETER :: version = '2.1.0'
4867    CHARACTER(LEN=20)                            ::  string_quad
4868    CHARACTER(LEN=80), DIMENSION(1:180)          :: header_file
4869    CHARACTER(LEN=80), DIMENSION(:), allocatable :: units_power
4870    CHARACTER(LEN=80) :: temptype, com_tt
4871    CHARACTER(LEN=80) :: polnorm, com_pn
4872
4873
4874    !=======================================================================
4875    ! Is polarisation required ?
4876    if (polar == 0) then
4877       polarisation = .false.
4878       bcoupling    = .false.
4879    elseif (polar == 1) then
4880       polarisation = .true.
4881       bcoupling    = .false.
4882    elseif (polar == 2) then
4883       polarisation = .true.
4884       bcoupling    = .true.
4885    else
4886       print*,code//'> wrong choice of polar: '//trim(string(polar))
4887       print*,code//'> must be 0, 1 or 2'
4888       call fatal_error
4889    endif
4890
4891    ! set maximum multipole (depends on user choice and pixel window function)
4892    l_max = nlmax
4893    npw = 4*nsmax + 1
4894    if (present(windowfile)) then
4895       l_max = min(l_max, npw - 1)
4896    else
4897       npw = l_max + 1
4898    endif
4899    if (l_max < nlmax) then
4900       print*,code//'> WARNING: a_lm are only generated for 0 <= l <= '//trim(string(l_max))
4901    endif
4902    !-----------------------------------------------------------------------
4903
4904    !     ----- set the alm array to zero ------
4905    if (polarisation) then
4906       alm_TGC(1:3, 0:nlmax, 0:nmmax) = CMPLX(0.0,0.0, kind=KALM)
4907    else
4908       alm_TGC(1  , 0:nlmax, 0:nmmax) = CMPLX(0.0,0.0, kind=KALM)
4909    endif
4910    !     --------------------------------------
4911    cls_tt = ZERO
4912    cls_gg = ZERO
4913    cls_cc = ZERO
4914    cls_tg = ZERO
4915    cls_tc = ZERO
4916    cls_gc = ZERO
4917    cl_in  = ZERO
4918
4919    !     --- reads the C(l) file ---
4920    ncl = 1
4921    if (polarisation) ncl = 4
4922    if (polarisation .and. bcoupling) ncl = 6
4923
4924    junk = getsize_fits(filename, nmaps=ncl_file)
4925    if (ncl_file < ncl) then
4926       print*,code//'> Only '//trim(string(ncl_file))//' spectra found in '//trim(filename)
4927       print*,code//'> Expected at least '//trim(string(ncl))
4928       print*,code//'> WARNING: absent (cross-)spectra will be set to 0'
4929       ncl = ncl_file
4930       !call fatal_error()
4931    endif
4932    polarisation = (ncl >= 4)
4933    bcoupling    = (ncl == 6)
4934
4935    nlheader = SIZE(header_file)
4936    allocate(units_power(1:ncl), stat = status)
4937    call assert_alloc(status,code,'units_power')
4938
4939    call fits2cl(filename, cl_in(0:l_max,1:ncl), l_max, ncl, &
4940         &       header_file, units=units_power)
4941
4942    !    ---- check input power spectra consistency  ----
4943    do i=1,ncl
4944       do l=0,l_max
4945          if (i < 4) then ! TT, EE, BB auto correlation
4946             if (cl_in(l,i) < 0.0) then
4947                print*,code//'> Negative input auto power spectrum at l =', l,', index = ',i
4948                print*,code//'> ',cl_in(l,i)
4949                call fatal_error
4950             endif
4951          elseif (i == 4) then ! TE cross correlation
4952             if (abs(cl_in(l,4)) > sqrt(cl_in(l,1))*sqrt(cl_in(l,2)) ) then
4953                print*,code//'> Inconsistent TT, EE and TE power spectrum terms at l = ',l
4954                print*,code//'> ',cl_in(l,1),cl_in(l,2),cl_in(l,4)
4955             endif
4956          elseif (i == 5) then ! TB cross correlation
4957             if (abs(cl_in(l,5)) > sqrt(cl_in(l,1))*sqrt(cl_in(l,3)) ) then
4958                print*,code//'> Inconsistent TT, BB and TB power spectrum terms at l = ',l
4959                print*,code//'> ',cl_in(l,1),cl_in(l,3),cl_in(l,5)
4960             endif
4961          elseif (i == 6) then ! EB cross correlation
4962             if (abs(cl_in(l,6)) > sqrt(cl_in(l,2))*sqrt(cl_in(l,3)) ) then
4963                print*,code//'> Inconsistent EE, BB and EB power spectrum terms at l = ',l
4964                print*,code//'> ',cl_in(l,2),cl_in(l,3),cl_in(l,6)
4965             endif
4966          endif
4967       enddo
4968    enddo
4969
4970    ! 2009-01-07: removed fact_norm, was creating problem with PGF90
4971    !     we always expect 4 or 6 columns, in the order T, G(=E), C(=B) and TG(=TE), TC(=TB), GC(=EB)
4972    cls_tt(0:l_max) = cl_in(0:l_max,1)
4973    if (polarisation) then
4974       cls_gg(0:l_max) = cl_in(0:l_max,2)
4975       cls_cc(0:l_max) = cl_in(0:l_max,3)
4976       cls_tg(0:l_max) = cl_in(0:l_max,4)
4977       if (bcoupling) then
4978          cls_tc(0:l_max) = cl_in(0:l_max,5)
4979          cls_gc(0:l_max) = cl_in(0:l_max,6)
4980       endif
4981    endif
4982
4983    ! converts units for power spectrum into units for maps
4984    if (present(units)) then
4985       call pow2alm_units(units_power, units)
4986    endif
4987
4988    ! finds out temperature type (THERMO or ANTENNA)
4989    ! THERMO is the default
4990    call get_card(header_file,"TEMPTYPE",temptype,com_tt,count=count_tt)
4991    if (count_tt < 1) then
4992       temptype = "THERMO"
4993       com_tt = " temperature : either THERMO or ANTENNA"
4994       print*,'   Will assume '//temptype
4995    endif
4996
4997    if (polar > 0) then
4998       call get_card(header_file,"POLNORM",polnorm,com_pn,count=count_pn)
4999       if (count_pn < 1) then
5000          print*,' ********************************************************* '
5001          print*,'            The convention for normalization '
5002          print*,'        of the input polarization power spectra '
5003          print*,'                      is unknown. '
5004          print*,' The code will proceed as if the convention was that of CMBFAST'
5005          print*,'  See the Healpix PDF of Html documentation for details'
5006          print*,' ********************************************************* '
5007       endif
5008    endif
5009
5010    quadrupole = cls_tt(2)
5011    !     --- creates the header relative to power spectrum ---
5012    header_PS = ''
5013    call add_card(header_PS)
5014    call add_card(header_PS,'HISTORY',' alm generated by ' &
5015         &        //code//'_'//version//' from following power spectrum')
5016    call add_card(header_PS)
5017    call add_card(header_PS,'COMMENT','----------------------------------------------------')
5018    call add_card(header_PS,'COMMENT','Planck Power Spectrum Description Specific Keywords')
5019    call add_card(header_PS,'COMMENT','----------------------------------------------------')
5020    call add_card(header_PS,'COMMENT','Input power spectrum in : ')
5021    call add_card(header_PS,'COMMENT',TRIM(filename))
5022    call add_card(header_PS)
5023    call add_card(header_PS,'COMMENT','Quadrupole')
5024    write(string_quad,'(1pe15.6)') quadrupole
5025    call add_card(header_PS,'COMMENT','  C(2) = '//string_quad//' '//trim(units_power(1)))
5026    call add_card(header_PS)
5027    call add_card(header_PS,"TEMPTYPE",temptype,com_tt)
5028    call add_card(header_PS)
5029    ! ------- insert header read in power spectrum file -------
5030    call merge_headers(header_file, header_PS)
5031    deallocate(units_power)
5032
5033    !     --- smoothes the initial power spectrum ---
5034    !       beam (gaussian or external file) + pixel (external file)
5035
5036    ! define beam
5037    allocate(beamw(0:l_max,1:3), stat = status)
5038    call assert_alloc(status,code,'beamw')
5039    call generate_beam(real(fwhm_arcmin,kind=dp), l_max, beamw, beam_file)
5040!     call gaussbeam(real(fwhm_arcmin,kind=dp), l_max, beamw)
5041
5042    ! get the pixel window function
5043    allocate(pixlw(0:npw-1,1:3), stat = status)
5044    call assert_alloc(status,code,'pixlw')
5045    if(present(windowfile)) then
5046       call pixel_window(pixlw, windowfile=windowfile)
5047    else
5048       pixlw(:,:)  = 1.0_dp
5049    endif
5050    ! multiply beam and pixel windows
5051    beamw(0:l_max,1:3) = beamw(0:l_max,1:3) * pixlw(0:l_max,1:3)
5052
5053    cls_tt(0:l_max) = cls_tt(0:l_max) * beamw(0:l_max,1)**2
5054    cls_gg(0:l_max) = cls_gg(0:l_max) * beamw(0:l_max,2)**2
5055    cls_cc(0:l_max) = cls_cc(0:l_max) * beamw(0:l_max,3)**2
5056    cls_tg(0:l_max) = cls_tg(0:l_max) * beamw(0:l_max,1)*beamw(0:l_max,2)
5057    cls_tc(0:l_max) = cls_tc(0:l_max) * beamw(0:l_max,1)*beamw(0:l_max,3)
5058    cls_gc(0:l_max) = cls_gc(0:l_max) * beamw(0:l_max,2)*beamw(0:l_max,3)
5059    deallocate(pixlw)
5060    deallocate(beamw)
5061
5062
5063    !     --- generates randomly the alm according to their power spectra ---
5064    !         (Cholesky decomposition of spectra covariance matrix)
5065    !     alm_T = zeta1 * rms_tt
5066    !     alm_G = zeta1 * rms_g1 + zeta2 * rms_g2
5067    !     alm_C = zeta1 * rms_c1 + zeta2 * rms_c2 + zeta3 * rms_c3
5068    !
5069    ! rms_tt = sqrt(TT), rms_g1 = TE/rms_tt, rms_c1 = TB/rms_tt
5070    ! rms_g2 = sqrt( EE - (TE)^2/TT )
5071    ! rms_c2 = ( EB - TE TB/TT ) / rms_g2
5072    ! rms_c3 = sqrt( BB - rms_c1^2 - rms_c2^2 )
5073
5074    hsqrt2 = SQRT2 / 2.0_dp
5075
5076    do l = 0, l_max
5077       rms_tt = ZERO
5078       rms_g1 = ZERO
5079       rms_c1 = ZERO
5080       if (cls_tt(l) > ZERO) then
5081          rms_tt   = sqrt(cls_tt(l))
5082          rms_g1   = cls_tg(l) / rms_tt
5083          rms_c1   = cls_tc(l) / rms_tt
5084       endif
5085
5086       !        ------ m = 0 ------
5087       zeta1_r = rand_gauss(rng_handle)
5088       zeta1_i = ZERO
5089       zeta1   = CMPLX(zeta1_r, zeta1_i, kind=KALM)
5090       alm_TGC(1, l, 0)                   = zeta1 * rms_tt ! T->T
5091       if (polarisation) alm_TGC(2, l, 0) = zeta1 * rms_g1 ! T->E
5092       if (bcoupling)    alm_TGC(3, l, 0) = zeta1 * rms_c1 ! T->B
5093
5094       !        ------ m > 0 ------
5095       do m = 1,l
5096          zeta1_r = rand_gauss(rng_handle) * hsqrt2
5097          zeta1_i = rand_gauss(rng_handle) * hsqrt2
5098          zeta1   = CMPLX(zeta1_r, zeta1_i, kind=KALM)
5099          alm_TGC(1, l, m)                   = zeta1 * rms_tt
5100          if (polarisation) alm_TGC(2, l, m) = zeta1 * rms_g1
5101          if (bcoupling)    alm_TGC(3, l, m) = zeta1 * rms_c1
5102       enddo
5103    enddo
5104
5105    !     the coefficient generation is separated so that the Temperature
5106    !     coeff. are the same (for the same seed) whether the polarisation is set or not
5107
5108    if (polarisation) then
5109       do l = 0, l_max
5110          rms_g2 = ZERO
5111          rms_c1 = ZERO
5112          rms_c2 = ZERO
5113          rms_c3 = ZERO
5114          if (cls_tt(l) > ZERO) then
5115             var_g2 = cls_gg(l) - (cls_tg(l)/cls_tt(l))*cls_tg(l) ! to avoid underflow
5116             ! test for consistency but make sure it is not due to round off error
5117             if (var_g2 <= ZERO) then
5118                if (abs(var_g2) > abs(1.e-8*cls_gg(l))) then
5119                   print*,code//'> Inconsistent TT, GG and TG spectra at l=',l
5120                   call fatal_error
5121                else ! only round off error, keep going
5122                   var_g2 = ZERO
5123                endif
5124             endif
5125             rms_c1 = cls_tc(l) / sqrt( cls_tt(l) )
5126             if (var_g2 > ZERO) then
5127                rms_g2 = sqrt( var_g2 )
5128                rms_c2 = ( cls_gc(l) - cls_tc(l) * (cls_tg(l) / cls_tt(l)) ) / rms_g2
5129             endif
5130             var_c3 = cls_cc(l) - rms_c1**2 - rms_c2**2
5131             if (var_c3 <= ZERO) then
5132                if (abs(var_c3) > abs(1.e-8*cls_cc(l))) then
5133                   print*,code//'> Inconsistent spectra at l=',l
5134                   call fatal_error
5135                else ! only round off error, keep going
5136                   var_c3 = ZERO
5137                endif
5138             endif
5139             rms_c3 = sqrt( var_c3 )
5140          endif
5141
5142          !           ------ m = 0 ------
5143          zeta2_r = rand_gauss(rng_handle)
5144          zeta2_i = ZERO
5145          zeta3_r = rand_gauss(rng_handle)
5146          zeta3_i = ZERO
5147          zeta2   = CMPLX(zeta2_r, zeta2_i, kind=KALM)
5148          zeta3   = CMPLX(zeta3_r, zeta3_i, kind=KALM)
5149          alm_TGC(2, l, 0) = alm_TGC(2, l, 0) + zeta2 * rms_g2 ! E->E
5150          alm_TGC(3, l, 0) = alm_TGC(3, l, 0) + zeta3 * rms_c3 ! B->B
5151          if (bcoupling) then
5152             alm_TGC(3, l, 0) = alm_TGC(3, l, 0) + zeta2 * rms_c2 ! E->B
5153          endif
5154
5155          !           ------ m > 0 ------
5156          do m = 1,l
5157             zeta2_r = rand_gauss(rng_handle) * hsqrt2
5158             zeta2_i = rand_gauss(rng_handle) * hsqrt2
5159             zeta3_r = rand_gauss(rng_handle) * hsqrt2
5160             zeta3_i = rand_gauss(rng_handle) * hsqrt2
5161             zeta2   = CMPLX(zeta2_r, zeta2_i, kind=KALM)
5162             zeta3   = CMPLX(zeta3_r, zeta3_i, kind=KALM)
5163             alm_TGC(2, l, m) = alm_TGC(2, l, m) + zeta2 * rms_g2
5164             alm_TGC(3, l, m) = alm_TGC(3, l, m) + zeta3 * rms_c3
5165             if (bcoupling) then
5166                alm_TGC(3, l, m) = alm_TGC(3, l, m) + zeta2 * rms_c2
5167             endif
5168          enddo
5169       enddo
5170    endif
5171
5172    return
5173  end subroutine create_alm_KLOAD
5174
5175  !========================================================
5176  subroutine sub_alm2cl_KLOAD(alm1, i1, alm2, i2, cl, i3)
5177    !========================================================
5178    integer(I4B),                        intent(in) :: i1, i2, i3
5179    complex(KALMC), dimension(1:,0:,0:), intent(in) :: alm1, alm2
5180    real(KALM),     dimension(0:,1:),    intent(out):: cl
5181
5182    integer(I4B) :: nlmax, nmmax, l, mm, j1, j2, j3
5183    complex(DPC) :: dc
5184    real(DP), parameter :: two = 2.000000000000000000_dp
5185    real(DP), parameter :: one = 1.000000000000000000_dp
5186    !========================================================
5187
5188    nlmax = min( size(alm1, 2), size(alm2, 2), size(cl, 1)) - 1
5189    nmmax = min( size(alm1, 3), size(alm2, 3)) - 1
5190    j1 = size(alm1, 1)
5191    j2 = size(alm2, 1)
5192    j3 = size(cl,   2)
5193
5194    if (i1 > j1 .or. i2 > j2 .or. i3 > j3) then
5195       call fatal_error('invalid index in alm -> C(l)')
5196    endif
5197    do l = 0, nlmax
5198       mm = min(l, nmmax)
5199       dc =          sum(      alm1(i1,l,1:mm)*conjg(alm2(i2,l,1:mm)))
5200       dc = (dc + conjg(dc)) + alm1(i1,l,0)   *      alm2(i2,l,0)
5201       cl(l,i3) = real(dc, kind=DP) / (two*l + one)
5202    enddo
5203
5204    return
5205  end subroutine sub_alm2cl_KLOAD
5206  !========================================================
5207  subroutine alm2cl2_KLOAD(nlmax, nmmax, alm1, alm2, cl, symmetric)
5208  !========================================================
5209    ! computes C(l) from a_lm1, a_lm2, in the order
5210    ! TT, [EE, TE, BB, [TB, EB, [ET, BT, BE]]]
5211    ! TE= alm1_T * alm2_E
5212    ! unless symmetric is set : TE = (alm1_T * alm2_E + alm1_E * alm2_T)/2
5213    !=======================================================
5214    integer(I4B),                        intent(in) :: nlmax, nmmax
5215    complex(KALMC), dimension(1:,0:,0:), intent(in) :: alm1, alm2
5216    real(KALM)    , dimension(0:, 1: ),  intent(out):: cl
5217    logical(LGT),   optional,            intent(in) :: symmetric
5218    !
5219    integer(I4B) :: ncl, na1, na2, k1, k2
5220    real(DP), parameter :: half = 0.500000000000000000_dp
5221    logical(LGT) :: polarisation, bcoupling, do_sym, asympol
5222
5223    real(KALM), allocatable, dimension(:,:) :: cl_work
5224    !========================================================
5225
5226    ncl = size(cl, 2)
5227    na1 = size(alm1, 1)
5228    na2 = size(alm2, 1)
5229    polarisation = (na1 >= 3 .and. na2 >= 3 .and. ncl >=4)
5230    bcoupling    = (ncl >=6) .and. polarisation
5231    asympol      = (ncl >=9) .and. polarisation
5232    do_sym = .false.
5233    cl = 0.0_KALM
5234    if (present(symmetric)) do_sym = symmetric
5235    if (polarisation .and. do_sym) then
5236       print*,'Symmetric TE C(l)'
5237    endif
5238    if (polarisation) then
5239       allocate(cl_work(0:nlmax, 1:2))
5240    endif
5241
5242    ! TT power spectrum
5243    k1 = 1_i4b
5244    call sub_alm2cl(alm1, k1, alm2, k1, cl, k1)
5245
5246    if (polarisation) then
5247       ! GG or EE power spectrum
5248       k1 = 2_i4b
5249       call sub_alm2cl(alm1, k1, alm2, k1, cl, k1)
5250
5251       ! CC or BB power spectrum
5252       k1 = 3_i4b
5253       call sub_alm2cl(alm1, k1, alm2, k1, cl, k1)
5254
5255       ! TG or TE power spectrum
5256       call sub_alm2cl(alm1, 1_i4b, alm2, 2_i4b, cl_work, 1_i4b)
5257       call sub_alm2cl(alm1, 2_i4b, alm2, 1_i4b, cl_work, 2_i4b)
5258       k1 = 4  ;   k2 = k1 +3
5259                    cl(0:,k1) =  cl_work(0:,1)
5260       if (asympol) cl(0:,k2) =                cl_work(0:,2)
5261       if (do_sym ) cl(0:,k1) = (cl_work(0:,1)+cl_work(0:,2)) * half
5262
5263       if (bcoupling) then
5264          ! TC or TB power spectrum
5265          call sub_alm2cl(alm1, 1_i4b, alm2, 3_i4b, cl_work, 1_i4b)
5266          call sub_alm2cl(alm1, 3_i4b, alm2, 1_i4b, cl_work, 2_i4b)
5267          k1 = 5  ;   k2 = k1 +3
5268                       cl(0:,k1) =  cl_work(0:,1)
5269          if (asympol) cl(0:,k2) =                cl_work(0:,2)
5270          if (do_sym ) cl(0:,k1) = (cl_work(0:,1)+cl_work(0:,2)) * half
5271
5272
5273          ! GC or EB power spectrum
5274          call sub_alm2cl(alm1, 2_i4b, alm2, 3_i4b, cl_work, 1_i4b)
5275          call sub_alm2cl(alm1, 3_i4b, alm2, 2_i4b, cl_work, 2_i4b)
5276          k1 = 6  ;   k2 = k1 +3
5277                       cl(0:,k1) =  cl_work(0:,1)
5278          if (asympol) cl(0:,k2) =                cl_work(0:,2)
5279          if (do_sym ) cl(0:,k1) = (cl_work(0:,1)+cl_work(0:,2)) * half
5280       endif
5281    endif
5282
5283    if (allocated(cl_work)) deallocate(cl_work)
5284
5285    return
5286  end subroutine alm2cl2_KLOAD
5287  !========================================================
5288  subroutine alm2cl1_KLOAD(nlmax, nmmax, alm1, cl)
5289  !========================================================
5290    ! computes C(l) from a_lm, in the order
5291    ! TT, [EE, TE, BB, [TB, EB]]
5292    !=======================================================
5293    integer(I4B),                      intent(in) :: nlmax, nmmax
5294    complex(KALMC), dimension(1:,0:,0:), intent(in) :: alm1
5295    real(KALM)    , dimension(0:, 1: ),  intent(out):: cl
5296    !
5297    integer(I4B) :: ncl
5298    !========================================================
5299
5300    ncl = size(cl, 2)
5301    cl = 0.0_KALM
5302    call alm2cl(nlmax, nmmax, alm1, alm1, cl)
5303
5304    return
5305  end subroutine alm2cl1_KLOAD
5306
5307  !========================================================
5308  subroutine rotate_alm_KLOAD(lmax, alm, psi, theta, phi)
5309    !=========================================================
5310    !Input: Complex array alm(p,l,m) with (l,m) in [0,lmax]^2, and p in [1,nd]
5311    !Euler rotation angles psi, theta, phi in radians
5312    !Output: Rotated array alm(p, l,m)
5313    !
5314    ! Euler angle convention  is right handed, active rotation
5315    ! psi is the first rotation about the z-axis (vertical), in [-2pi,2pi]
5316    ! then theta about the ORIGINAL (unrotated) y-axis, in [-2pi,2pi]
5317    ! then phi  about the ORIGINAL (unrotated) z-axis (vertical), in [-2pi,2pi]
5318    !
5319    ! Equivalently
5320    ! phi is the first rotation about the z-axis (vertical)
5321    ! then theta  about the NEW   y-axis (line of nodes)
5322    ! then psi    about the FINAL z-axis (figure axis)
5323    ! ---
5324    ! the recursion on the Wigner d matrix is inspired from the very stable
5325    ! double sided one described in Risbo (1996, J. of Geodesy, 70, 383)
5326    ! based on equation (4.4.1) in Edmonds (1957).
5327    ! the Risbo's scatter scheme has been repladed by a gather scheme for
5328    ! better computing efficiency
5329    ! the size of the matrix is divided by 2 using Edmonds Eq.(4.2.5)
5330    ! to speed up calculations
5331    ! the loop on j has been unrolled for further speed-up
5332    ! EH, March--April 2005
5333    !=========================================================
5334    integer(I4B),   intent(in) :: lmax
5335    complex(KALMC), intent(inout), dimension(1:,0:,0:) :: alm
5336    real(DP),       intent(in) :: psi, theta, phi
5337    ! local variables
5338    complex(DPC), dimension(0:lmax) :: exppsi, expphi
5339    complex(DPC), dimension(:,:), allocatable :: alm1, alm2
5340    real(DP),     dimension(:,:), allocatable :: d, dd
5341    real(DP),     dimension(:),   allocatable :: sqt, rsqt
5342    real(DP),     dimension(:),   allocatable :: tsign
5343    integer(I4B) :: status
5344    integer(I4B) :: mm, ll, na1, na2, nd
5345    integer(I4B) :: i, j, k, kd, hj
5346    real(DP)     :: p, q, pj, qj, fj, temp
5347    character(len=*), parameter :: code = 'ROTATE_ALM'
5348    !==========================================================
5349
5350    if (abs(psi) > 2.d0*PI .or. abs(phi) > 2.d0*PI .or. abs(theta) > 2.d0*PI) then
5351       write(*,'(a,3(g12.4))') code,psi,theta,phi
5352       call fatal_error(code//': angles should be in Radians')
5353    endif
5354
5355    nd = size(alm,1)
5356    na1 = size(alm,2)
5357    na2 = size(alm,3)
5358    if (na1 < (lmax+1) .or. na2 < (lmax+1)) then
5359       call fatal_error(code//': unconsistent alm array size and lmax')
5360    endif
5361
5362    allocate(d (-1:2*lmax,   -1:lmax),   stat = status)
5363    call assert_alloc(status,code,'d')
5364    allocate(dd(-1:2*lmax, -1:lmax), stat = status)
5365    call assert_alloc(status,code,'dd')
5366    allocate(sqt(0:2*lmax), rsqt(0:2*lmax), stat = status)
5367    call assert_alloc(status,code,'sqt & rsqt')
5368    allocate(alm1(1:nd,0:lmax), alm2(1:nd,0:lmax), stat = status)
5369    call assert_alloc(status,code,'alm1 & alm2')
5370    allocate(tsign(0:lmax+1), stat = status)
5371    call assert_alloc(status,code,'tsign')
5372
5373    do i=0, lmax,2
5374       tsign(i)   =  1.0_dp
5375       tsign(i+1) = -1.0_dp
5376    enddo
5377    !     initialization of square-root  table
5378    do i=0,2*lmax
5379       sqt(i) = SQRT(DBLE(i))
5380    enddo
5381
5382    ! initialisation of exponential table
5383    exppsi(0)=cmplx(1, 0, kind=DPC)
5384    expphi(0)=cmplx(1, 0, kind=DPC)
5385
5386    do i=1,lmax
5387       exppsi(i)= cmplx(cos(psi*i), -sin(psi*i), kind=DPC)
5388       expphi(i)= cmplx(cos(phi*i), -sin(phi*i), kind=DPC)
5389    enddo
5390
5391    ! Note: theta has the correct sign.
5392    p = sin(theta/2.d0)
5393    q = cos(theta/2.d0)
5394
5395    d  = 0.0_dp ! very important for gather scheme
5396    dd = 0.0_dp
5397    do ll=0,lmax
5398
5399       ! ------ build d-matrix of order l ------
5400       if (ll == 0) then
5401          d(0,0) = 1.d0
5402          goto 2000
5403       endif
5404       if (ll == 1) then
5405          !     initialize d-matrix degree 1/2
5406          dd(0,0)  =  q
5407          dd(1,0)  = -p
5408          dd(0,1)  =  p
5409          dd(1,1)  =  q
5410          goto 1000
5411       endif
5412
5413       !  l - 1 --> l - 1/2
5414       j = 2*ll - 1
5415       rsqt(0:j) = sqt(j:0:-1)
5416       fj = DBLE(j)
5417       qj = q / fj
5418       pj = p / fj
5419!$OMP parallel default(none) &
5420!$OMP   shared(j, fj, d, dd, rsqt, sqt, q, p, qj, pj) &
5421!$OMP   private(k)
5422!$OMP do schedule(dynamic,100)
5423       do k = 0, j/2 ! keep only m' <= -1/2
5424          dd(0:j,k) = rsqt(0:j) * ( d(0:j,k)      * (sqt(j-k)  * qj)   &
5425               &                  + d(0:j,k-1)    * (sqt(k)    * pj) ) &
5426               &    +  sqt(0:j) * ( d(-1:j-1,k-1) * (sqt(k)    * qj)   &
5427               &                  - d(-1:j-1,k)   * (sqt(j-k)  * pj) )
5428       enddo ! loop on k
5429!$OMP end do
5430!$OMP end parallel
5431       ! l=half-integer, reconstruct m'= 1/2 by symmetry
5432       hj = ll-1
5433       if (mod(ll,2) == 0) then
5434          do k = 0, j-1, 2
5435             dd(k,   ll) =   dd(j-k,   hj)
5436             dd(k+1, ll) = - dd(j-k-1, hj)
5437          enddo
5438       else
5439          do k = 0, j-1, 2
5440             dd(k,   ll) = - dd(j-k,   hj)
5441             dd(k+1, ll) =   dd(j-k-1, hj)
5442          enddo
5443       endif
5444
54451000   continue
5446
5447       !  l - 1/2 --> l
5448       j = 2*ll
5449       rsqt(0:j) = sqt(j:0:-1)
5450       fj = DBLE(j)
5451       qj = q / fj
5452       pj = p / fj
5453!$OMP parallel default(none) &
5454!$OMP   shared(j, fj, d, dd, rsqt, sqt, q, p, qj, pj) &
5455!$OMP   private(k)
5456!$OMP do schedule(dynamic,100)
5457       do k = 0, j/2 ! keep only m' <= 0
5458          d (0:j,k) = rsqt(0:j) * ( dd(0:j,k)      * (sqt(j-k)  * qj)   &
5459               &                  + dd(0:j,k-1)    * (sqt(k)    * pj) ) &
5460               &    +  sqt(0:j) * ( dd(-1:j-1,k-1) * (sqt(k)    * qj)   &
5461               &                  - dd(-1:j-1,k)   * (sqt(j-k)  * pj) )
5462       enddo ! loop on k
5463!$OMP end do
5464!$OMP end parallel
5465
54662000   continue
5467       ! ------- apply rotation matrix -------
5468       do kd = 1, nd
5469          alm1(kd,0:ll)  = alm(kd,ll,0:ll) * exppsi(0:ll)
5470       enddo
5471
5472       ! m = 0
5473       do kd = 1, nd
5474          alm2(kd,0:ll) = alm1(kd,0) * d(ll:2*ll,ll)
5475       enddo
5476
5477!$OMP parallel default(none) &
5478!$OMP   shared(d, alm1, alm2, tsign, nd, ll) &
5479!$OMP   private(mm, kd)
5480!$OMP do schedule(dynamic,100)
5481       do mm = 0, ll
5482          do kd = 1, nd
5483             alm2(kd, mm) = alm2(kd,mm) + sum(alm1(kd,1:ll) *                d(ll-1:0:-1,ll-mm)) &
5484                  &                +conjg(sum(alm1(kd,1:ll) * (tsign(1:ll) * d(ll+1:2*ll,ll-mm))))
5485          enddo
5486       enddo
5487!$OMP end do
5488!$OMP end parallel
5489
5490       ! new alm for ll
5491       do kd = 1,nd
5492          alm(kd,ll,0:ll) = alm2(kd,0:ll)*expphi(0:ll)
5493       enddo
5494
5495    enddo ! loop on ll
5496
5497    deallocate(d)
5498    deallocate(dd)
5499    deallocate(sqt, rsqt)
5500    deallocate(alm1, alm2)
5501
5502  end subroutine rotate_alm_KLOAD
5503
5504
5505
5506  !**************************************************************************
5507  !
5508  !             extract rings from map, or put rings into map
5509  !
5510  !**************************************************************************
5511  !==============================================================================
5512  subroutine sub_map2ring_1d_KLOAD(map, start_pix, np_in_ring, weight, ring, partial, pixels)
5513    !==============================================================================
5514    ! extract one ring from a 1-dim RING ordered map, applying weight
5515    ! or build a ring from a list of pixels and signal (if partial is set and pixels is defined)
5516    !==============================================================================
5517    real(KMAP),   intent(IN),  dimension(0:)     :: map
5518    integer(I8B), intent(IN)                     :: start_pix
5519    integer(I4B), intent(IN)                     :: np_in_ring
5520    real(DP),     intent(IN)                     :: weight
5521    real(DP),     intent(OUT), dimension(0:)     :: ring
5522    logical(LGT), intent(IN),                 optional :: partial
5523    integer(I8B), intent(IN),  dimension(0:), optional :: pixels
5524
5525    logical(LGT) :: partial_in
5526    integer(I8B) :: npix, ipix, end_pix, p
5527
5528    !----------------------------------------------------------
5529
5530    partial_in = .false.
5531    npix = 0_i8b
5532    end_pix = start_pix + np_in_ring - 1
5533
5534    if (present(partial)) then
5535       if (partial) then
5536          if (present(pixels)) npix = size(pixels)
5537          partial_in = (npix > 0)
5538       endif
5539    endif
5540
5541    if (partial_in) then ! cut sky case
5542       ring(0:np_in_ring - 1) = 0.0_dp
5543       do ipix=0, npix-1
5544          p = pixels(ipix)
5545          if (p >= start_pix .and. p <= end_pix) then
5546             ring(p - start_pix) = map(ipix) * weight
5547          endif
5548       enddo
5549
5550    else ! full sky case
5551       ring(0:np_in_ring-1) = map(start_pix: end_pix) * weight
5552    endif
5553
5554    return
5555
5556  end subroutine sub_map2ring_1d_KLOAD
5557  !==============================================================================
5558  subroutine sub_map2ring_nd_KLOAD(map, idim, start_pix, np_in_ring, weight, ring, partial, pixels)
5559    !==============================================================================
5560    ! extract one ring from a N-dim RING ordered map, applying weight
5561    ! or build a ring from a list of pixels and signal (if partial is set and pixels is defined)
5562    !==============================================================================
5563
5564    real(KMAP),   intent(IN),  dimension(0:,1:)  :: map
5565    integer(I8B), intent(IN)                     :: start_pix
5566    integer(I4B), intent(IN)                     :: np_in_ring, idim
5567    real(DP),     intent(IN)                     :: weight
5568    real(DP),     intent(OUT), dimension(0:)     :: ring
5569    logical(LGT), intent(IN),                 optional :: partial
5570    integer(I8B), intent(IN),  dimension(0:), optional :: pixels
5571
5572    logical(LGT) :: partial_in
5573    integer(I8B) :: npix, ipix, end_pix, p
5574
5575    !----------------------------------------------------------
5576
5577    partial_in = .false.
5578    npix = 0_i8b
5579    end_pix = start_pix + np_in_ring - 1
5580
5581    if (present(partial)) then
5582       if (partial) then
5583          if (present(pixels)) npix = size(pixels)
5584          partial_in = (npix > 0)
5585       endif
5586    endif
5587
5588    if (partial_in) then ! cut sky case
5589       ring(0:np_in_ring - 1) = 0.0_dp
5590       do ipix=0, npix-1
5591          p = pixels(ipix)
5592          if (p >= start_pix .and. p <= end_pix) then
5593             ring(p - start_pix) = map(ipix, idim) * weight
5594          endif
5595       enddo
5596
5597    else ! full sky case
5598       ring(0:np_in_ring-1) = map(start_pix: end_pix, idim) * weight
5599    endif
5600
5601    return
5602
5603  end subroutine sub_map2ring_nd_KLOAD
5604
5605  !==============================================================================
5606  subroutine sub_ring2map_1d_KLOAD(ring, map, start_pix, np_in_ring, partial, pixels)
5607    !==============================================================================
5608    ! puts one ring into a 1-dim RING ordered map
5609    ! or extract signal corresponding to a list of pixels from a ring (if partial is set and pixels is defined)
5610    !==============================================================================
5611    real(DP),     intent(IN),  dimension(0:)     :: ring
5612    real(KMAP),   intent(OUT), dimension(0:)     :: map
5613    integer(I8B), intent(IN)                     :: start_pix
5614    integer(I4B), intent(IN)                     :: np_in_ring
5615    logical(LGT), intent(IN),                 optional :: partial
5616    integer(I8B), intent(IN),  dimension(0:), optional :: pixels
5617
5618    logical(LGT) :: partial_in
5619    integer(I8B) :: npix, ipix, end_pix, p
5620
5621    !----------------------------------------------------------
5622
5623    partial_in = .false.
5624    npix = 0_i8b
5625    end_pix = start_pix + np_in_ring - 1
5626
5627    if (present(partial)) then
5628       if (partial) then
5629          if (present(pixels)) npix = size(pixels)
5630          partial_in = (npix > 0)
5631       endif
5632    endif
5633
5634    if (partial_in) then ! cut sky case
5635       do ipix=0, npix-1
5636          p = pixels(ipix)
5637          if (p >= start_pix .and. p <= end_pix) then
5638             map(ipix) = ring(p - start_pix)
5639          endif
5640       enddo
5641
5642    else ! full sky case
5643       map(start_pix: end_pix) = ring(0:np_in_ring-1)
5644    endif
5645
5646    return
5647
5648  end subroutine sub_ring2map_1d_KLOAD
5649
5650  !==============================================================================
5651  subroutine sub_ring2map_nd_KLOAD(ring, map, idim, start_pix, np_in_ring, partial, pixels)
5652    !==============================================================================
5653    ! puts one ring into a n-dim RING ordered map
5654    ! or extract signal corresponding to a list of pixels from a ring (if partial is set and pixels is defined)
5655    !==============================================================================
5656    real(DP),     intent(IN),  dimension(0:)     :: ring
5657    real(KMAP),   intent(OUT), dimension(0:,1:)  :: map
5658    integer(I8B), intent(IN)                     :: start_pix
5659    integer(I4B), intent(IN)                     :: np_in_ring, idim
5660    logical(LGT), intent(IN),                 optional :: partial
5661    integer(I8B), intent(IN),  dimension(0:), optional :: pixels
5662
5663    logical(LGT) :: partial_in
5664    integer(I8B) :: npix, ipix, end_pix, p
5665
5666    !----------------------------------------------------------
5667
5668    partial_in = .false.
5669    npix = 0_i8b
5670    end_pix = start_pix + np_in_ring - 1
5671
5672    if (present(partial)) then
5673       if (partial) then
5674          if (present(pixels)) npix = size(pixels)
5675          partial_in = (npix > 0)
5676       endif
5677    endif
5678
5679    if (partial_in) then ! cut sky case
5680       do ipix=0, npix-1
5681          p = pixels(ipix)
5682          if (p >= start_pix .and. p <= end_pix) then
5683             map(ipix, idim) = ring(p - start_pix)
5684          endif
5685       enddo
5686
5687    else ! full sky case
5688       map(start_pix: end_pix, idim) = ring(0:np_in_ring-1)
5689    endif
5690
5691    return
5692
5693  end subroutine sub_ring2map_nd_KLOAD
5694