1 |
/*************************************************************************** |
2 |
************************************************************************** |
3 |
|
4 |
S2kit 1.0 |
5 |
|
6 |
A lite version of Spherical Harmonic Transform Kit |
7 |
|
8 |
Peter Kostelec, Dan Rockmore |
9 |
{geelong,rockmore}@cs.dartmouth.edu |
10 |
|
11 |
Contact: Peter Kostelec |
12 |
geelong@cs.dartmouth.edu |
13 |
|
14 |
Copyright 2004 Peter Kostelec, Dan Rockmore |
15 |
|
16 |
This file is part of S2kit. |
17 |
|
18 |
S2kit is free software; you can redistribute it and/or modify |
19 |
it under the terms of the GNU General Public License as published by |
20 |
the Free Software Foundation; either version 2 of the License, or |
21 |
(at your option) any later version. |
22 |
|
23 |
S2kit is distributed in the hope that it will be useful, |
24 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
25 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
26 |
GNU General Public License for more details. |
27 |
|
28 |
You should have received a copy of the GNU General Public License |
29 |
along with S2kit; if not, write to the Free Software |
30 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
31 |
|
32 |
See the accompanying LICENSE file for details. |
33 |
|
34 |
************************************************************************ |
35 |
************************************************************************/ |
36 |
|
37 |
|
38 |
|
39 |
/******************************************************************** |
40 |
|
41 |
FST_semi_memo.c - routines to perform convolutions on the |
42 |
2-sphere using a combination of semi-naive and naive algorithms. |
43 |
|
44 |
ASSUMES THAT ALL PRECOMPUTED-DATA IS IN MEMORY, AND NOT TO BE |
45 |
READ FROM THE DISK. |
46 |
|
47 |
The primary functions in this package are |
48 |
|
49 |
1) FST_semi_memo() - computes the spherical harmonic expansion. |
50 |
2) InvFST_semi_memo() - computes the inverse spherical harmonic transform. |
51 |
3) FZT_semi_memo() - computes the zonal harmonic transform. |
52 |
4) TransMult() - Multiplies harmonic coefficients using Driscoll-Healy |
53 |
result. Dual of convolution in "time" domain. |
54 |
5) Conv2Sphere_semi_memo() - Convolves two functins defined on the 2-sphere, |
55 |
using seminaive transform |
56 |
|
57 |
and one utility function: |
58 |
|
59 |
1) seanindex(): Given bandwidth bw, seanindex(m,l,bw) will give the |
60 |
position of the coefficient f-hat(m,l) in the one-row array |
61 |
|
62 |
For descriptions on calling these functions, see the documentation |
63 |
preceding each function. |
64 |
|
65 |
*/ |
66 |
|
67 |
#include <math.h> |
68 |
#include <stdio.h> |
69 |
#include <stdlib.h> |
70 |
#include <string.h> |
71 |
|
72 |
#include "fftw3.h" |
73 |
|
74 |
#include "makeweights.h" |
75 |
#include "cospmls.h" |
76 |
#include "primitive.h" |
77 |
#include "naive_synthesis.h" |
78 |
#include "seminaive.h" |
79 |
#include "FST_semi_memo.h" |
80 |
|
81 |
|
82 |
|
83 |
/************************************************************************/ |
84 |
|
85 |
|
86 |
/***************************************************************** |
87 |
|
88 |
Given bandwidth bw, seanindex(m,l,bw) will give the position of the |
89 |
coefficient f-hat(m,l) in the one-row array that Sean stores the spherical |
90 |
coefficients. This is needed to help preserve the symmetry that the |
91 |
coefficients have: (l = degree, m = order, and abs(m) <= l) |
92 |
|
93 |
f-hat(l,-m) = (-1)^m * conjugate( f-hat(l,m) ) |
94 |
|
95 |
Thanks for your help Mark! |
96 |
|
97 |
******************************************************************/ |
98 |
|
99 |
|
100 |
int seanindex(int m, |
101 |
int l, |
102 |
int bw) |
103 |
{ |
104 |
int bigL; |
105 |
|
106 |
bigL = bw - 1; |
107 |
|
108 |
if( m >= 0 ) |
109 |
return( m * ( bigL + 1 ) - ( ( m * (m - 1) ) /2 ) + ( l - m ) ); |
110 |
else |
111 |
return( ( ( bigL * ( bigL + 3 ) ) /2 ) + 1 + |
112 |
( ( bigL + m ) * ( bigL + m + 1 ) / 2 ) + ( l - abs( m ) ) ); |
113 |
} |
114 |
|
115 |
|
116 |
/************************************************************************/ |
117 |
/* performs a spherical harmonic transform using the semi-naive |
118 |
and naive algorithms |
119 |
|
120 |
bw -> bandwidth of problem |
121 |
size -> size = 2*bw -> dimension of input array (recall that |
122 |
sampling is done at twice the bandwidth) |
123 |
|
124 |
The inputs rdata and idata are expected to be pointers to |
125 |
size x size arrays. The array rdata contains the real parts |
126 |
of the function samples, and idata contains the imaginary |
127 |
parts. |
128 |
|
129 |
rcoeffs and icoeffs are expected to be pointers to bw x bw arrays, |
130 |
and will contain the harmonic coefficients in a "linearized" form. |
131 |
The array rcoeffs contains the real parts of the coefficients, |
132 |
and icoeffs contains the imaginary parts. |
133 |
|
134 |
spharmonic_pml_table should be a (double **) pointer to |
135 |
the result of a call to Spharmonic_Pml_Table. Because this |
136 |
table is re-used in the inverse transform, and because for |
137 |
timing purposes the computation of the table is not included, |
138 |
it is passed in as an argument. Also, at some point this |
139 |
code may be used as par of a series of convolutions, so |
140 |
reducing repetitive computation is prioritized. |
141 |
|
142 |
spharmonic_pml_table will be an array of (double *) pointers |
143 |
the array being of length TableSize(m,bw) |
144 |
|
145 |
workspace needs to be a double pointer to an array of size |
146 |
(8 * bw^2) + (7 * bw). |
147 |
|
148 |
cutoff -> what order to switch from semi-naive to naive |
149 |
algorithm. |
150 |
|
151 |
dataformat =0 -> samples are complex, =1 -> samples real |
152 |
|
153 |
|
154 |
Output Ordering of coeffs f(m,l) is |
155 |
f(0,0) f(0,1) f(0,2) ... f(0,bw-1) |
156 |
f(1,1) f(1,2) ... f(1,bw-1) |
157 |
etc. |
158 |
f(bw-2,bw-2), f(bw-2,bw-1) |
159 |
f(bw-1,bw-1) |
160 |
f(-(bw-1),bw-1) |
161 |
f(-(bw-2),bw-2) f(-(bw-2),bw-1) |
162 |
etc. |
163 |
f(-2,2) ... f(-2,bw-1) |
164 |
f(-1,1) f(-1,2) ... f(-1,bw-1) |
165 |
|
166 |
|
167 |
This only requires an array of size (bw*bw). If zero-padding |
168 |
is used to make the indexing nice, then you need a an |
169 |
(2bw-1) * bw array - but that is not done here. |
170 |
Because of the amount of space necessary for doing |
171 |
large transforms, it is important not to use any |
172 |
more than necessary. |
173 |
|
174 |
*/ |
175 |
|
176 |
void FST_semi_memo(double *rdata, double *idata, |
177 |
double *rcoeffs, double *icoeffs, |
178 |
int bw, |
179 |
double **seminaive_naive_table, |
180 |
double *workspace, |
181 |
int dataformat, |
182 |
int cutoff, |
183 |
fftw_plan *dctPlan, |
184 |
fftw_plan *fftPlan, |
185 |
double *weights ) |
186 |
{ |
187 |
int size, m, i, j; |
188 |
int dummy0, dummy1 ; |
189 |
double *rres, *ires; |
190 |
double *rdataptr, *idataptr; |
191 |
double *fltres, *scratchpad; |
192 |
double *eval_pts; |
193 |
double pow_one; |
194 |
double tmpA, tmpSize ; |
195 |
|
196 |
size = 2*bw ; |
197 |
tmpSize = 1./ ((double ) size); |
198 |
tmpA = sqrt( 2. * M_PI ) ; |
199 |
|
200 |
rres = workspace; /* needs (size * size) = (4 * bw^2) */ |
201 |
ires = rres + (size * size); /* needs (size * size) = (4 * bw^2) */ |
202 |
fltres = ires + (size * size) ; /* needs bw */ |
203 |
eval_pts = fltres + bw; /* needs (2*bw) */ |
204 |
scratchpad = eval_pts + (2*bw); /* needs (4 * bw) */ |
205 |
|
206 |
/* total workspace is (8 * bw^2) + (7 * bw) */ |
207 |
|
208 |
/* do the FFTs along phi */ |
209 |
fftw_execute_split_dft( *fftPlan, |
210 |
rdata, idata, |
211 |
rres, ires ) ; |
212 |
/* |
213 |
normalize |
214 |
|
215 |
note that I'm getting the sqrt(2pi) in there at |
216 |
this point ... to account for the fact that the spherical |
217 |
harmonics are of norm 1: I need to account for |
218 |
the fact that the associated Legendres are |
219 |
of norm 1 |
220 |
*/ |
221 |
tmpSize *= tmpA ; |
222 |
for( j = 0 ; j < size*size ; j ++ ) |
223 |
{ |
224 |
rres[j] *= tmpSize ; |
225 |
ires[j] *= tmpSize ; |
226 |
} |
227 |
|
228 |
/* point to start of output data buffers */ |
229 |
rdataptr = rcoeffs; |
230 |
idataptr = icoeffs; |
231 |
|
232 |
for (m=0; m<bw; m++) { |
233 |
/* |
234 |
fprintf(stderr,"m = %d\n",m); |
235 |
*/ |
236 |
|
237 |
/*** test to see if before cutoff or after ***/ |
238 |
if (m < cutoff){ |
239 |
|
240 |
/* do the real part */ |
241 |
SemiNaiveReduced(rres+(m*size), |
242 |
bw, |
243 |
m, |
244 |
fltres, |
245 |
scratchpad, |
246 |
seminaive_naive_table[m], |
247 |
weights, |
248 |
dctPlan); |
249 |
|
250 |
/* now load real part of coefficients into output space */ |
251 |
memcpy(rdataptr, fltres, sizeof(double) * (bw - m)); |
252 |
|
253 |
rdataptr += bw-m; |
254 |
|
255 |
/* do imaginary part */ |
256 |
SemiNaiveReduced(ires+(m*size), |
257 |
bw, |
258 |
m, |
259 |
fltres, |
260 |
scratchpad, |
261 |
seminaive_naive_table[m], |
262 |
weights, |
263 |
dctPlan); |
264 |
|
265 |
/* now load imaginary part of coefficients into output space */ |
266 |
memcpy(idataptr, fltres, sizeof(double) * (bw - m)); |
267 |
|
268 |
idataptr += bw-m; |
269 |
|
270 |
} |
271 |
else{ |
272 |
/* do real part */ |
273 |
Naive_AnalysisX(rres+(m*size), |
274 |
bw, |
275 |
m, |
276 |
weights, |
277 |
fltres, |
278 |
seminaive_naive_table[m], |
279 |
scratchpad ); |
280 |
memcpy(rdataptr, fltres, sizeof(double) * (bw - m)); |
281 |
rdataptr += bw-m; |
282 |
|
283 |
/* do imaginary part */ |
284 |
Naive_AnalysisX(ires+(m*size), |
285 |
bw, |
286 |
m, |
287 |
weights, |
288 |
fltres, |
289 |
seminaive_naive_table[m], |
290 |
scratchpad ); |
291 |
memcpy(idataptr, fltres, sizeof(double) * (bw - m)); |
292 |
idataptr += bw-m; |
293 |
} |
294 |
} |
295 |
|
296 |
/*** now do upper coefficients ****/ |
297 |
|
298 |
/* now if the data is real, we don't have to compute the |
299 |
coefficients whose order is less than 0, i.e. since |
300 |
the data is real, we know that |
301 |
f-hat(l,-m) = (-1)^m * conjugate(f-hat(l,m)), |
302 |
so use that to get the rest of the coefficients |
303 |
|
304 |
dataformat =0 -> samples are complex, =1 -> samples real |
305 |
|
306 |
*/ |
307 |
|
308 |
if( dataformat == 0 ){ |
309 |
|
310 |
/* note that m is greater than bw here, but this is for |
311 |
purposes of indexing the input data arrays. |
312 |
The "true" value of m as a parameter for Pml is |
313 |
size - m */ |
314 |
|
315 |
for (m=bw+1; m<size; m++) { |
316 |
/* |
317 |
fprintf(stderr,"m = %d\n",-(size-m)); |
318 |
*/ |
319 |
|
320 |
if ( (size-m) < cutoff ) |
321 |
{ |
322 |
/* do real part */ |
323 |
SemiNaiveReduced(rres+(m*size), |
324 |
bw, |
325 |
size-m, |
326 |
fltres, |
327 |
scratchpad, |
328 |
seminaive_naive_table[size-m], |
329 |
weights, |
330 |
dctPlan ); |
331 |
|
332 |
/* now load real part of coefficients into output space */ |
333 |
if ((m % 2) != 0) { |
334 |
for (i=0; i<m-bw; i++) |
335 |
rdataptr[i] = -fltres[i]; |
336 |
} |
337 |
else { |
338 |
memcpy(rdataptr, fltres, sizeof(double) * (m - bw)); |
339 |
} |
340 |
rdataptr += m-bw; |
341 |
|
342 |
/* do imaginary part */ |
343 |
SemiNaiveReduced(ires+(m*size), |
344 |
bw, |
345 |
size-m, |
346 |
fltres, |
347 |
scratchpad, |
348 |
seminaive_naive_table[size-m], |
349 |
weights, |
350 |
dctPlan); |
351 |
|
352 |
/* now load imag part of coefficients into output space */ |
353 |
if ((m % 2) != 0) { |
354 |
for (i=0; i<m-bw; i++) |
355 |
idataptr[i] = -fltres[i]; |
356 |
} |
357 |
else { |
358 |
memcpy(idataptr, fltres, sizeof(double) * (m - bw)); |
359 |
} |
360 |
idataptr += m-bw; |
361 |
} |
362 |
else |
363 |
{ |
364 |
Naive_AnalysisX(rres+(m*size), |
365 |
bw, |
366 |
size-m, |
367 |
weights, |
368 |
fltres, |
369 |
seminaive_naive_table[size-m], |
370 |
scratchpad); |
371 |
|
372 |
/* now load real part of coefficients into output space */ |
373 |
if ((m % 2) != 0) { |
374 |
for (i=0; i<m-bw; i++) |
375 |
rdataptr[i] = -fltres[i]; |
376 |
} |
377 |
else { |
378 |
memcpy(rdataptr, fltres, sizeof(double) * (m - bw)); |
379 |
} |
380 |
rdataptr += m-bw; |
381 |
|
382 |
/* do imaginary part */ |
383 |
Naive_AnalysisX(ires+(m*size), |
384 |
bw, |
385 |
size-m, |
386 |
weights, |
387 |
fltres, |
388 |
seminaive_naive_table[size-m], |
389 |
scratchpad); |
390 |
|
391 |
/* now load imag part of coefficients into output space */ |
392 |
if ((m % 2) != 0) { |
393 |
for (i=0; i<m-bw; i++) |
394 |
idataptr[i] = -fltres[i]; |
395 |
} |
396 |
else { |
397 |
memcpy(idataptr, fltres, sizeof(double) * (m - bw)); |
398 |
} |
399 |
idataptr += m-bw; |
400 |
|
401 |
} |
402 |
|
403 |
} |
404 |
} |
405 |
else /**** if the data is real ****/ |
406 |
{ |
407 |
pow_one = 1.0; |
408 |
for(i = 1; i < bw; i++){ |
409 |
pow_one *= -1.0; |
410 |
for( j = i; j < bw; j++){ |
411 |
/* |
412 |
SEANINDEXP(dummy0,i,j,bw); |
413 |
SEANINDEXN(dummy1,-i,j,bw); |
414 |
*/ |
415 |
|
416 |
dummy0 = seanindex(i, j, bw); |
417 |
dummy1 = seanindex(-i, j, bw); |
418 |
|
419 |
rcoeffs[dummy1] = |
420 |
pow_one * rcoeffs[dummy0]; |
421 |
icoeffs[dummy1] = |
422 |
-pow_one * icoeffs[dummy0]; |
423 |
} |
424 |
} |
425 |
} |
426 |
|
427 |
} |
428 |
|
429 |
/************************************************************************/ |
430 |
/* Inverse spherical harmonic transform. |
431 |
|
432 |
bw -> bandwidth of problem |
433 |
size = 2*bw |
434 |
|
435 |
Inputs rcoeffs and icoeffs are harmonic coefficients stored |
436 |
in (bw * bw) arrays in the order spec'ed above. |
437 |
|
438 |
rdata and idata are (size x size) arrays with the transformed result. |
439 |
|
440 |
transpose_spharmonic_pml_table should be the (double **) |
441 |
result of a call to Transpose_Spharmonic_Pml_Table() |
442 |
|
443 |
workspace is (8 * bw^2) + (10 * bw) |
444 |
|
445 |
*/ |
446 |
|
447 |
/* dataformat =0 -> samples are complex, =1 -> samples real */ |
448 |
|
449 |
void InvFST_semi_memo(double *rcoeffs, double *icoeffs, |
450 |
double *rdata, double *idata, |
451 |
int bw, |
452 |
double **transpose_seminaive_naive_table, |
453 |
double *workspace, |
454 |
int dataformat, |
455 |
int cutoff, |
456 |
fftw_plan *idctPlan, |
457 |
fftw_plan *ifftPlan ) |
458 |
{ |
459 |
int size, m, i, n; |
460 |
double *rdataptr, *idataptr; |
461 |
double *rfourdata, *ifourdata; |
462 |
double *rinvfltres, *iminvfltres, *scratchpad; |
463 |
double *sin_values, *eval_pts; |
464 |
double tmpA ; |
465 |
|
466 |
size = 2*bw ; |
467 |
|
468 |
rfourdata = workspace; /* needs (size * size) */ |
469 |
ifourdata = rfourdata + (size * size); /* needs (size * size) */ |
470 |
rinvfltres = ifourdata + (size * size); /* needs (2 * bw) */ |
471 |
iminvfltres = rinvfltres + (2 * bw); /* needs (2 * bw) */ |
472 |
sin_values = iminvfltres + (2 * bw); /* needs (2 * bw) */ |
473 |
eval_pts = sin_values + (2 * bw); /* needs (2 * bw) */ |
474 |
scratchpad = eval_pts + (2 * bw); /* needs (2 * bw) */ |
475 |
|
476 |
/* total workspace = (8 * bw^2) + (10 * bw) */ |
477 |
|
478 |
/* load up the sin_values array */ |
479 |
n = 2*bw; |
480 |
|
481 |
ArcCosEvalPts(n, eval_pts); |
482 |
for (i=0; i<n; i++) |
483 |
sin_values[i] = sin(eval_pts[i]); |
484 |
|
485 |
|
486 |
/* Now do all of the inverse Legendre transforms */ |
487 |
rdataptr = rcoeffs; |
488 |
idataptr = icoeffs; |
489 |
|
490 |
for (m=0; m<bw; m++) |
491 |
{ |
492 |
/* |
493 |
fprintf(stderr,"m = %d\n",m); |
494 |
*/ |
495 |
|
496 |
if(m < cutoff) |
497 |
{ |
498 |
/* do real part first */ |
499 |
InvSemiNaiveReduced(rdataptr, |
500 |
bw, |
501 |
m, |
502 |
rinvfltres, |
503 |
transpose_seminaive_naive_table[m], |
504 |
sin_values, |
505 |
scratchpad, |
506 |
idctPlan ); |
507 |
|
508 |
/* now do imaginary part */ |
509 |
|
510 |
InvSemiNaiveReduced(idataptr, |
511 |
bw, |
512 |
m, |
513 |
iminvfltres, |
514 |
transpose_seminaive_naive_table[m], |
515 |
sin_values, |
516 |
scratchpad, |
517 |
idctPlan); |
518 |
|
519 |
/* will store normal, then tranpose before doing inverse fft */ |
520 |
memcpy(rfourdata+(m*size), rinvfltres, sizeof(double) * size); |
521 |
memcpy(ifourdata+(m*size), iminvfltres, sizeof(double) * size); |
522 |
|
523 |
/* move to next set of coeffs */ |
524 |
rdataptr += bw-m; |
525 |
idataptr += bw-m; |
526 |
|
527 |
} |
528 |
else |
529 |
{ |
530 |
|
531 |
/* first do the real part */ |
532 |
Naive_SynthesizeX(rdataptr, |
533 |
bw, |
534 |
m, |
535 |
rinvfltres, |
536 |
transpose_seminaive_naive_table[m]); |
537 |
|
538 |
/* now do the imaginary */ |
539 |
Naive_SynthesizeX(idataptr, |
540 |
bw, |
541 |
m, |
542 |
iminvfltres, |
543 |
transpose_seminaive_naive_table[m]); |
544 |
|
545 |
/* will store normal, then tranpose before doing inverse fft */ |
546 |
memcpy(rfourdata+(m*size), rinvfltres, sizeof(double) * size); |
547 |
memcpy(ifourdata+(m*size), iminvfltres, sizeof(double) * size); |
548 |
|
549 |
/* move to next set of coeffs */ |
550 |
|
551 |
rdataptr += bw-m; |
552 |
idataptr += bw-m; |
553 |
|
554 |
} |
555 |
} |
556 |
/* closes m loop */ |
557 |
|
558 |
/* now fill in zero values where m = bw (from problem definition) */ |
559 |
memset(rfourdata + (bw * size), 0, sizeof(double) * size); |
560 |
memset(ifourdata + (bw * size), 0, sizeof(double) * size); |
561 |
|
562 |
/* now if the data is real, we don't have to compute the |
563 |
coefficients whose order is less than 0, i.e. since |
564 |
the data is real, we know that |
565 |
invf-hat(l,-m) = conjugate(invf-hat(l,m)), |
566 |
so use that to get the rest of the real data |
567 |
|
568 |
dataformat =0 -> samples are complex, =1 -> samples real |
569 |
|
570 |
*/ |
571 |
|
572 |
if(dataformat == 0){ |
573 |
|
574 |
/* now do negative m values */ |
575 |
|
576 |
for (m=bw+1; m<size; m++) |
577 |
{ |
578 |
/* |
579 |
fprintf(stderr,"m = %d\n",-(size-m)); |
580 |
*/ |
581 |
|
582 |
if ( (size-m) < cutoff ) |
583 |
{ |
584 |
/* do real part first */ |
585 |
InvSemiNaiveReduced(rdataptr, |
586 |
bw, |
587 |
size - m, |
588 |
rinvfltres, |
589 |
transpose_seminaive_naive_table[size - m], |
590 |
sin_values, |
591 |
scratchpad, |
592 |
idctPlan); |
593 |
|
594 |
/* now do imaginary part */ |
595 |
InvSemiNaiveReduced(idataptr, |
596 |
bw, |
597 |
size - m, |
598 |
iminvfltres, |
599 |
transpose_seminaive_naive_table[size - m], |
600 |
sin_values, |
601 |
scratchpad, |
602 |
idctPlan ); |
603 |
|
604 |
/* will store normal, then tranpose before doing inverse fft */ |
605 |
if ((m % 2) != 0) |
606 |
for(i=0; i< size; i++){ |
607 |
rinvfltres[i] = -rinvfltres[i]; |
608 |
iminvfltres[i] = -iminvfltres[i]; |
609 |
} |
610 |
|
611 |
memcpy(rfourdata + (m*size), rinvfltres, sizeof(double) * size); |
612 |
memcpy(ifourdata + (m*size), iminvfltres, sizeof(double) * size); |
613 |
|
614 |
/* move to next set of coeffs */ |
615 |
rdataptr += bw-(size-m); |
616 |
idataptr += bw-(size-m); |
617 |
} |
618 |
else |
619 |
{ |
620 |
/* first do the real part */ |
621 |
Naive_SynthesizeX(rdataptr, |
622 |
bw, |
623 |
size-m, |
624 |
rinvfltres, |
625 |
transpose_seminaive_naive_table[size-m]); |
626 |
|
627 |
/* now do the imaginary */ |
628 |
Naive_SynthesizeX(idataptr, |
629 |
bw, |
630 |
size-m, |
631 |
iminvfltres, |
632 |
transpose_seminaive_naive_table[size-m]); |
633 |
|
634 |
/* will store normal, then tranpose before doing inverse fft */ |
635 |
if ((m % 2) != 0) |
636 |
for(i=0; i< size; i++){ |
637 |
rinvfltres[i] = -rinvfltres[i]; |
638 |
iminvfltres[i] = -iminvfltres[i]; |
639 |
} |
640 |
|
641 |
memcpy(rfourdata + (m*size), rinvfltres, sizeof(double) * size); |
642 |
memcpy(ifourdata + (m*size), iminvfltres, sizeof(double) * size); |
643 |
|
644 |
/* move to next set of coeffs */ |
645 |
rdataptr += bw-(size-m); |
646 |
idataptr += bw-(size-m); |
647 |
|
648 |
} |
649 |
|
650 |
} /* closes m loop */ |
651 |
} |
652 |
else { |
653 |
for(m = bw + 1; m < size; m++){ |
654 |
|
655 |
memcpy(rfourdata+(m*size), rfourdata+((size-m)*size), |
656 |
sizeof(double) * size); |
657 |
memcpy(ifourdata+(m*size), ifourdata+((size-m)*size), |
658 |
sizeof(double) * size); |
659 |
for(i = 0; i < size; i++) |
660 |
ifourdata[(m*size)+i] *= -1.0; |
661 |
} |
662 |
} |
663 |
|
664 |
/* normalize */ |
665 |
tmpA = 1./(sqrt(2.*M_PI) ); |
666 |
for(i=0;i<4*bw*bw;i++) |
667 |
{ |
668 |
rfourdata[i] *= tmpA ; |
669 |
ifourdata[i] *= tmpA ; |
670 |
} |
671 |
|
672 |
|
673 |
fftw_execute_split_dft( *ifftPlan, |
674 |
ifourdata, rfourdata, |
675 |
idata, rdata ); |
676 |
/* amscray */ |
677 |
|
678 |
} |
679 |
|
680 |
/************************************************************************/ |
681 |
/* |
682 |
Zonal Harmonic transform using seminaive algorithm - used in convolutions |
683 |
|
684 |
bw -> bandwidth of problem |
685 |
|
686 |
size = 2 * bw |
687 |
|
688 |
rdata and idata should be pointers to size x size arrays. |
689 |
rres and ires should be pointers to double arrays of size bw. |
690 |
|
691 |
cos_pml_table contains Legendre coefficients of P(0,l) functions |
692 |
and is result of CosPmlTableGen for m = 0; |
693 |
FZT_semi only computes spherical harmonics for m=0. |
694 |
|
695 |
dataformat =0 -> samples are complex, =1 -> samples real |
696 |
|
697 |
workspace needed is (12 * bw) |
698 |
|
699 |
*/ |
700 |
|
701 |
void FZT_semi_memo(double *rdata, double *idata, |
702 |
double *rres, double *ires, |
703 |
int bw, |
704 |
double *cos_pml_table, |
705 |
double *workspace, |
706 |
int dataformat, |
707 |
fftw_plan *dctPlan, |
708 |
double *weights ) |
709 |
{ |
710 |
int i, j, size; |
711 |
double *r0, *i0, dsize; |
712 |
double tmpreal, tmpimag; |
713 |
double tmpA ; |
714 |
double *scratchpad ; |
715 |
|
716 |
size = 2*bw ; |
717 |
|
718 |
/* assign memory */ |
719 |
r0 = workspace; /* needs (2 * bw) */ |
720 |
i0 = r0 + (2 * bw); /* needs (2 * bw) */ |
721 |
scratchpad = i0 + (2 * bw); /* needs (4 * bw) */ |
722 |
|
723 |
/* total workspace = 13*bw */ |
724 |
|
725 |
dsize = 1.0 / ((double) size); |
726 |
tmpA = sqrt( 2.* M_PI ); |
727 |
dsize *= tmpA ; |
728 |
|
729 |
/* compute the m = 0 components */ |
730 |
for (i=0; i<size; i++) { |
731 |
tmpreal = 0.0; |
732 |
tmpimag = 0.0; |
733 |
|
734 |
for(j=0; j<size; j++) { |
735 |
tmpreal += rdata[(i*size)+j]; |
736 |
tmpimag += idata[(i*size)+j]; |
737 |
} |
738 |
/* normalize */ |
739 |
r0[i] = tmpreal*dsize; |
740 |
i0[i] = tmpimag*dsize; |
741 |
} |
742 |
|
743 |
/* do the real part */ |
744 |
SemiNaiveReduced(r0, |
745 |
bw, |
746 |
0, |
747 |
rres, |
748 |
scratchpad, |
749 |
cos_pml_table, |
750 |
weights, |
751 |
dctPlan); |
752 |
|
753 |
if(dataformat == 0) /* do imaginary part */ |
754 |
SemiNaiveReduced(i0, |
755 |
bw, |
756 |
0, |
757 |
ires, |
758 |
scratchpad, |
759 |
cos_pml_table, |
760 |
weights, |
761 |
dctPlan); |
762 |
else /* otherwise set coefficients = 0 */ |
763 |
memset(ires, 0, sizeof(double) * size); |
764 |
|
765 |
} |
766 |
|
767 |
/************************************************************************/ |
768 |
/* |
769 |
multiplies harmonic coefficients of a function and a filter. |
770 |
See convolution theorem of Driscoll and Healy for details. |
771 |
|
772 |
bw -> bandwidth of problem |
773 |
size = 2*bw |
774 |
|
775 |
datacoeffs should be output of an FST, filtercoeffs the |
776 |
output of an FZT. There should be (bw * bw) datacoeffs, |
777 |
and bw filtercoeffs. |
778 |
rres and ires should point to arrays of dimension bw * bw. |
779 |
|
780 |
*/ |
781 |
|
782 |
void TransMult(double *rdatacoeffs, double *idatacoeffs, |
783 |
double *rfiltercoeffs, double *ifiltercoeffs, |
784 |
double *rres, double *ires, |
785 |
int bw) |
786 |
{ |
787 |
|
788 |
int m, l, size; |
789 |
double *rdptr, *idptr, *rrptr, *irptr; |
790 |
|
791 |
size = 2*bw ; |
792 |
|
793 |
rdptr = rdatacoeffs; |
794 |
idptr = idatacoeffs; |
795 |
rrptr = rres; |
796 |
irptr = ires; |
797 |
|
798 |
for (m=0; m<bw; m++) { |
799 |
for (l=m; l<bw; l++) { |
800 |
compmult(rfiltercoeffs[l], ifiltercoeffs[l], |
801 |
rdptr[l-m], idptr[l-m], |
802 |
rrptr[l-m], irptr[l-m]); |
803 |
|
804 |
rrptr[l-m] *= sqrt(4*M_PI/(2*l+1)); |
805 |
irptr[l-m] *= sqrt(4*M_PI/(2*l+1)); |
806 |
|
807 |
} |
808 |
rdptr += bw-m; idptr += bw-m; |
809 |
rrptr += bw-m; irptr += bw-m; |
810 |
} |
811 |
for (m=bw+1; m<size; m++) { |
812 |
for (l=size-m; l<bw; l++){ |
813 |
compmult(rfiltercoeffs[l], ifiltercoeffs[l], |
814 |
rdptr[l-size+m], idptr[l-size+m], |
815 |
rrptr[l-size+m], irptr[l-size+m]); |
816 |
|
817 |
rrptr[l-size+m] *= sqrt(4*M_PI/(2*l+1)); |
818 |
irptr[l-size+m] *= sqrt(4*M_PI/(2*l+1)); |
819 |
|
820 |
} |
821 |
rdptr += m-bw; idptr += m-bw; |
822 |
rrptr += m-bw; irptr += m-bw; |
823 |
} |
824 |
|
825 |
} |
826 |
|
827 |
/************************************************************************/ |
828 |
/* Here's the big banana |
829 |
Convolves two functions defined on the 2-sphere. |
830 |
Uses seminaive algorithms for spherical harmonic transforms |
831 |
|
832 |
size = 2*bw |
833 |
|
834 |
Inputs: |
835 |
|
836 |
rdata, idata - (size * size) arrays containing real and |
837 |
imaginary parts of sampled function. |
838 |
rfilter, ifilter - (size * size) arrays containing real and |
839 |
imaginary parts of sampled filter function. |
840 |
rres, ires - (size * size) arrays containing real and |
841 |
imaginary parts of result function. |
842 |
|
843 |
|
844 |
Suggestion - if you want to do multiple convolutions, |
845 |
don't keep allocating and freeing space with every call, |
846 |
or keep recomputing the spharmonic_pml tables. |
847 |
Allocate workspace once before you call this function, then |
848 |
just set up pointers as first step of this procedure rather |
849 |
than mallocing. And do the same with the FST, FZT, and InvFST functions. |
850 |
|
851 |
ASSUMPTIONS: |
852 |
1. data is strictly REAL |
853 |
2. will do semi-naive algorithm for ALL orders -> change the cutoff |
854 |
value if you want it to be different |
855 |
|
856 |
Memory requirements for Conv2Sphere |
857 |
|
858 |
Need space for spharmonic tables and local workspace and |
859 |
scratchpad space for FST_semi |
860 |
|
861 |
Let legendreSize = Reduced_Naive_TableSize(bw,cutoff) + |
862 |
Reduced_SpharmonicTableSize(bw,cutoff) |
863 |
|
864 |
Then the workspace needs to be this large: |
865 |
|
866 |
2 * legendreSize + |
867 |
8 * (bw*bw) + 10*bw + |
868 |
4 * (bw*bw) + 2*bw |
869 |
|
870 |
for a total of |
871 |
|
872 |
2 * legendreSize + |
873 |
12 * (bw*bw) + 12*bw ; |
874 |
|
875 |
|
876 |
|
877 |
*/ |
878 |
void Conv2Sphere_semi_memo(double *rdata, double *idata, |
879 |
double *rfilter, double *ifilter, |
880 |
double *rres, double *ires, |
881 |
int bw, |
882 |
double *workspace) |
883 |
{ |
884 |
int size, spharmonic_bound ; |
885 |
int legendreSize, cutoff ; |
886 |
double *frres, *fires, *filtrres, *filtires, *trres, *tires; |
887 |
double **spharmonic_pml_table, **transpose_spharmonic_pml_table; |
888 |
double *spharmonic_result_space, *transpose_spharmonic_result_space; |
889 |
double *scratchpad; |
890 |
|
891 |
/* fftw */ |
892 |
int rank, howmany_rank ; |
893 |
fftw_iodim dims[1], howmany_dims[1]; |
894 |
|
895 |
/* forward transform stuff */ |
896 |
fftw_plan dctPlan, fftPlan ; |
897 |
double *weights ; |
898 |
|
899 |
/* inverse transform stuff */ |
900 |
fftw_plan idctPlan, ifftPlan ; |
901 |
|
902 |
size =2*bw ; |
903 |
cutoff = bw ; |
904 |
legendreSize = Reduced_Naive_TableSize(bw,cutoff) + |
905 |
Reduced_SpharmonicTableSize(bw,cutoff) ; |
906 |
|
907 |
/* assign space */ |
908 |
|
909 |
spharmonic_bound = legendreSize ; |
910 |
|
911 |
spharmonic_result_space = workspace; /* needs legendreSize */ |
912 |
|
913 |
transpose_spharmonic_result_space = |
914 |
spharmonic_result_space + legendreSize ; /* needs legendreSize */ |
915 |
|
916 |
frres = transpose_spharmonic_result_space + |
917 |
legendreSize ; /* needs (bw*bw) */ |
918 |
fires = frres + (bw*bw); /* needs (bw*bw) */ |
919 |
trres = fires + (bw*bw); /* needs (bw*bw) */ |
920 |
tires = trres + (bw*bw); /* needs (bw*bw) */ |
921 |
filtrres = tires + (bw*bw); /* needs bw */ |
922 |
filtires = filtrres + bw; /* needs bw */ |
923 |
scratchpad = filtires + bw; /* needs (8*bw^2)+(10*bw) */ |
924 |
|
925 |
/* allocate space, and compute, the weights for this bandwidth */ |
926 |
weights = (double *) malloc(sizeof(double) * 4 * bw); |
927 |
makeweights( bw, weights ); |
928 |
|
929 |
/* make the fftw plans */ |
930 |
|
931 |
/* make DCT plans -> note that I will be using the GURU |
932 |
interface to execute these plans within the routines*/ |
933 |
|
934 |
/* forward DCT */ |
935 |
dctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata, |
936 |
FFTW_REDFT10, FFTW_ESTIMATE ) ; |
937 |
|
938 |
/* inverse DCT */ |
939 |
idctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata, |
940 |
FFTW_REDFT01, FFTW_ESTIMATE ); |
941 |
|
942 |
/* |
943 |
fft "preamble" ; |
944 |
note that this plan places the output in a transposed array |
945 |
*/ |
946 |
rank = 1 ; |
947 |
dims[0].n = 2*bw ; |
948 |
dims[0].is = 1 ; |
949 |
dims[0].os = 2*bw ; |
950 |
howmany_rank = 1 ; |
951 |
howmany_dims[0].n = 2*bw ; |
952 |
howmany_dims[0].is = 2*bw ; |
953 |
howmany_dims[0].os = 1 ; |
954 |
|
955 |
/* forward fft */ |
956 |
fftPlan = fftw_plan_guru_split_dft( rank, dims, |
957 |
howmany_rank, howmany_dims, |
958 |
rdata, idata, |
959 |
workspace, workspace+(4*bw*bw), |
960 |
FFTW_ESTIMATE ); |
961 |
|
962 |
/* |
963 |
now plan for inverse fft - note that this plans assumes |
964 |
that I'm working with a transposed array, e.g. the inputs |
965 |
for a length 2*bw transform are placed every 2*bw apart, |
966 |
the output will be consecutive entries in the array |
967 |
*/ |
968 |
rank = 1 ; |
969 |
dims[0].n = 2*bw ; |
970 |
dims[0].is = 2*bw ; |
971 |
dims[0].os = 1 ; |
972 |
howmany_rank = 1 ; |
973 |
howmany_dims[0].n = 2*bw ; |
974 |
howmany_dims[0].is = 1 ; |
975 |
howmany_dims[0].os = 2*bw ; |
976 |
|
977 |
/* inverse fft */ |
978 |
ifftPlan = fftw_plan_guru_split_dft( rank, dims, |
979 |
howmany_rank, howmany_dims, |
980 |
rdata, idata, |
981 |
workspace, workspace+(4*bw*bw), |
982 |
FFTW_ESTIMATE ); |
983 |
|
984 |
|
985 |
/* precompute the associated Legendre fcts */ |
986 |
spharmonic_pml_table = |
987 |
Spharmonic_Pml_Table(bw, |
988 |
spharmonic_result_space, |
989 |
scratchpad); |
990 |
|
991 |
transpose_spharmonic_pml_table = |
992 |
Transpose_Spharmonic_Pml_Table(spharmonic_pml_table, |
993 |
bw, |
994 |
transpose_spharmonic_result_space, |
995 |
scratchpad); |
996 |
FST_semi_memo(rdata, idata, |
997 |
frres, fires, |
998 |
bw, |
999 |
spharmonic_pml_table, |
1000 |
scratchpad, |
1001 |
1, |
1002 |
bw, |
1003 |
&dctPlan, |
1004 |
&fftPlan, |
1005 |
weights ); |
1006 |
|
1007 |
FZT_semi_memo(rfilter, ifilter, |
1008 |
filtrres, filtires, |
1009 |
bw, |
1010 |
spharmonic_pml_table[0], |
1011 |
scratchpad, |
1012 |
1, |
1013 |
&dctPlan, |
1014 |
weights ); |
1015 |
|
1016 |
TransMult(frres, fires, filtrres, filtires, trres, tires, bw); |
1017 |
|
1018 |
InvFST_semi_memo(trres, tires, |
1019 |
rres, ires, |
1020 |
bw, |
1021 |
transpose_spharmonic_pml_table, |
1022 |
scratchpad, |
1023 |
1, |
1024 |
bw, |
1025 |
&idctPlan, |
1026 |
&ifftPlan ); |
1027 |
|
1028 |
free( weights ) ; |
1029 |
|
1030 |
/*** |
1031 |
have to free the memory that was allocated in |
1032 |
Spharmonic_Pml_Table() and |
1033 |
Transpose_Spharmonic_Pml_Table() |
1034 |
***/ |
1035 |
|
1036 |
free(spharmonic_pml_table); |
1037 |
free(transpose_spharmonic_pml_table); |
1038 |
|
1039 |
/* destroy plans */ |
1040 |
fftw_destroy_plan( ifftPlan ) ; |
1041 |
fftw_destroy_plan( fftPlan ) ; |
1042 |
fftw_destroy_plan( idctPlan ) ; |
1043 |
fftw_destroy_plan( dctPlan ) ; |
1044 |
} |