ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/SHAPES/FST_semi_memo.c
Revision: 1287
Committed: Wed Jun 23 20:18:48 2004 UTC (20 years, 10 months ago) by chrisfen
Content type: text/plain
File size: 27350 byte(s)
Log Message:
Major progress towards inclusion of spherical harmonic transform capability - still having some build issues...

File Contents

# Content
1 /***************************************************************************
2 **************************************************************************
3
4 S2kit 1.0
5
6 A lite version of Spherical Harmonic Transform Kit
7
8 Peter Kostelec, Dan Rockmore
9 {geelong,rockmore}@cs.dartmouth.edu
10
11 Contact: Peter Kostelec
12 geelong@cs.dartmouth.edu
13
14 Copyright 2004 Peter Kostelec, Dan Rockmore
15
16 This file is part of S2kit.
17
18 S2kit is free software; you can redistribute it and/or modify
19 it under the terms of the GNU General Public License as published by
20 the Free Software Foundation; either version 2 of the License, or
21 (at your option) any later version.
22
23 S2kit is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 GNU General Public License for more details.
27
28 You should have received a copy of the GNU General Public License
29 along with S2kit; if not, write to the Free Software
30 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31
32 See the accompanying LICENSE file for details.
33
34 ************************************************************************
35 ************************************************************************/
36
37
38
39 /********************************************************************
40
41 FST_semi_memo.c - routines to perform convolutions on the
42 2-sphere using a combination of semi-naive and naive algorithms.
43
44 ASSUMES THAT ALL PRECOMPUTED-DATA IS IN MEMORY, AND NOT TO BE
45 READ FROM THE DISK.
46
47 The primary functions in this package are
48
49 1) FST_semi_memo() - computes the spherical harmonic expansion.
50 2) InvFST_semi_memo() - computes the inverse spherical harmonic transform.
51 3) FZT_semi_memo() - computes the zonal harmonic transform.
52 4) TransMult() - Multiplies harmonic coefficients using Driscoll-Healy
53 result. Dual of convolution in "time" domain.
54 5) Conv2Sphere_semi_memo() - Convolves two functins defined on the 2-sphere,
55 using seminaive transform
56
57 and one utility function:
58
59 1) seanindex(): Given bandwidth bw, seanindex(m,l,bw) will give the
60 position of the coefficient f-hat(m,l) in the one-row array
61
62 For descriptions on calling these functions, see the documentation
63 preceding each function.
64
65 */
66
67 #include <math.h>
68 #include <stdio.h>
69 #include <stdlib.h>
70 #include <string.h>
71
72 #include "fftw3.h"
73
74 #include "makeweights.h"
75 #include "cospmls.h"
76 #include "primitive.h"
77 #include "naive_synthesis.h"
78 #include "seminaive.h"
79 #include "FST_semi_memo.h"
80
81
82
83 /************************************************************************/
84
85
86 /*****************************************************************
87
88 Given bandwidth bw, seanindex(m,l,bw) will give the position of the
89 coefficient f-hat(m,l) in the one-row array that Sean stores the spherical
90 coefficients. This is needed to help preserve the symmetry that the
91 coefficients have: (l = degree, m = order, and abs(m) <= l)
92
93 f-hat(l,-m) = (-1)^m * conjugate( f-hat(l,m) )
94
95 Thanks for your help Mark!
96
97 ******************************************************************/
98
99
100 int seanindex(int m,
101 int l,
102 int bw)
103 {
104 int bigL;
105
106 bigL = bw - 1;
107
108 if( m >= 0 )
109 return( m * ( bigL + 1 ) - ( ( m * (m - 1) ) /2 ) + ( l - m ) );
110 else
111 return( ( ( bigL * ( bigL + 3 ) ) /2 ) + 1 +
112 ( ( bigL + m ) * ( bigL + m + 1 ) / 2 ) + ( l - abs( m ) ) );
113 }
114
115
116 /************************************************************************/
117 /* performs a spherical harmonic transform using the semi-naive
118 and naive algorithms
119
120 bw -> bandwidth of problem
121 size -> size = 2*bw -> dimension of input array (recall that
122 sampling is done at twice the bandwidth)
123
124 The inputs rdata and idata are expected to be pointers to
125 size x size arrays. The array rdata contains the real parts
126 of the function samples, and idata contains the imaginary
127 parts.
128
129 rcoeffs and icoeffs are expected to be pointers to bw x bw arrays,
130 and will contain the harmonic coefficients in a "linearized" form.
131 The array rcoeffs contains the real parts of the coefficients,
132 and icoeffs contains the imaginary parts.
133
134 spharmonic_pml_table should be a (double **) pointer to
135 the result of a call to Spharmonic_Pml_Table. Because this
136 table is re-used in the inverse transform, and because for
137 timing purposes the computation of the table is not included,
138 it is passed in as an argument. Also, at some point this
139 code may be used as par of a series of convolutions, so
140 reducing repetitive computation is prioritized.
141
142 spharmonic_pml_table will be an array of (double *) pointers
143 the array being of length TableSize(m,bw)
144
145 workspace needs to be a double pointer to an array of size
146 (8 * bw^2) + (7 * bw).
147
148 cutoff -> what order to switch from semi-naive to naive
149 algorithm.
150
151 dataformat =0 -> samples are complex, =1 -> samples real
152
153
154 Output Ordering of coeffs f(m,l) is
155 f(0,0) f(0,1) f(0,2) ... f(0,bw-1)
156 f(1,1) f(1,2) ... f(1,bw-1)
157 etc.
158 f(bw-2,bw-2), f(bw-2,bw-1)
159 f(bw-1,bw-1)
160 f(-(bw-1),bw-1)
161 f(-(bw-2),bw-2) f(-(bw-2),bw-1)
162 etc.
163 f(-2,2) ... f(-2,bw-1)
164 f(-1,1) f(-1,2) ... f(-1,bw-1)
165
166
167 This only requires an array of size (bw*bw). If zero-padding
168 is used to make the indexing nice, then you need a an
169 (2bw-1) * bw array - but that is not done here.
170 Because of the amount of space necessary for doing
171 large transforms, it is important not to use any
172 more than necessary.
173
174 */
175
176 void FST_semi_memo(double *rdata, double *idata,
177 double *rcoeffs, double *icoeffs,
178 int bw,
179 double **seminaive_naive_table,
180 double *workspace,
181 int dataformat,
182 int cutoff,
183 fftw_plan *dctPlan,
184 fftw_plan *fftPlan,
185 double *weights )
186 {
187 int size, m, i, j;
188 int dummy0, dummy1 ;
189 double *rres, *ires;
190 double *rdataptr, *idataptr;
191 double *fltres, *scratchpad;
192 double *eval_pts;
193 double pow_one;
194 double tmpA, tmpSize ;
195
196 size = 2*bw ;
197 tmpSize = 1./ ((double ) size);
198 tmpA = sqrt( 2. * M_PI ) ;
199
200 rres = workspace; /* needs (size * size) = (4 * bw^2) */
201 ires = rres + (size * size); /* needs (size * size) = (4 * bw^2) */
202 fltres = ires + (size * size) ; /* needs bw */
203 eval_pts = fltres + bw; /* needs (2*bw) */
204 scratchpad = eval_pts + (2*bw); /* needs (4 * bw) */
205
206 /* total workspace is (8 * bw^2) + (7 * bw) */
207
208 /* do the FFTs along phi */
209 fftw_execute_split_dft( *fftPlan,
210 rdata, idata,
211 rres, ires ) ;
212 /*
213 normalize
214
215 note that I'm getting the sqrt(2pi) in there at
216 this point ... to account for the fact that the spherical
217 harmonics are of norm 1: I need to account for
218 the fact that the associated Legendres are
219 of norm 1
220 */
221 tmpSize *= tmpA ;
222 for( j = 0 ; j < size*size ; j ++ )
223 {
224 rres[j] *= tmpSize ;
225 ires[j] *= tmpSize ;
226 }
227
228 /* point to start of output data buffers */
229 rdataptr = rcoeffs;
230 idataptr = icoeffs;
231
232 for (m=0; m<bw; m++) {
233 /*
234 fprintf(stderr,"m = %d\n",m);
235 */
236
237 /*** test to see if before cutoff or after ***/
238 if (m < cutoff){
239
240 /* do the real part */
241 SemiNaiveReduced(rres+(m*size),
242 bw,
243 m,
244 fltres,
245 scratchpad,
246 seminaive_naive_table[m],
247 weights,
248 dctPlan);
249
250 /* now load real part of coefficients into output space */
251 memcpy(rdataptr, fltres, sizeof(double) * (bw - m));
252
253 rdataptr += bw-m;
254
255 /* do imaginary part */
256 SemiNaiveReduced(ires+(m*size),
257 bw,
258 m,
259 fltres,
260 scratchpad,
261 seminaive_naive_table[m],
262 weights,
263 dctPlan);
264
265 /* now load imaginary part of coefficients into output space */
266 memcpy(idataptr, fltres, sizeof(double) * (bw - m));
267
268 idataptr += bw-m;
269
270 }
271 else{
272 /* do real part */
273 Naive_AnalysisX(rres+(m*size),
274 bw,
275 m,
276 weights,
277 fltres,
278 seminaive_naive_table[m],
279 scratchpad );
280 memcpy(rdataptr, fltres, sizeof(double) * (bw - m));
281 rdataptr += bw-m;
282
283 /* do imaginary part */
284 Naive_AnalysisX(ires+(m*size),
285 bw,
286 m,
287 weights,
288 fltres,
289 seminaive_naive_table[m],
290 scratchpad );
291 memcpy(idataptr, fltres, sizeof(double) * (bw - m));
292 idataptr += bw-m;
293 }
294 }
295
296 /*** now do upper coefficients ****/
297
298 /* now if the data is real, we don't have to compute the
299 coefficients whose order is less than 0, i.e. since
300 the data is real, we know that
301 f-hat(l,-m) = (-1)^m * conjugate(f-hat(l,m)),
302 so use that to get the rest of the coefficients
303
304 dataformat =0 -> samples are complex, =1 -> samples real
305
306 */
307
308 if( dataformat == 0 ){
309
310 /* note that m is greater than bw here, but this is for
311 purposes of indexing the input data arrays.
312 The "true" value of m as a parameter for Pml is
313 size - m */
314
315 for (m=bw+1; m<size; m++) {
316 /*
317 fprintf(stderr,"m = %d\n",-(size-m));
318 */
319
320 if ( (size-m) < cutoff )
321 {
322 /* do real part */
323 SemiNaiveReduced(rres+(m*size),
324 bw,
325 size-m,
326 fltres,
327 scratchpad,
328 seminaive_naive_table[size-m],
329 weights,
330 dctPlan );
331
332 /* now load real part of coefficients into output space */
333 if ((m % 2) != 0) {
334 for (i=0; i<m-bw; i++)
335 rdataptr[i] = -fltres[i];
336 }
337 else {
338 memcpy(rdataptr, fltres, sizeof(double) * (m - bw));
339 }
340 rdataptr += m-bw;
341
342 /* do imaginary part */
343 SemiNaiveReduced(ires+(m*size),
344 bw,
345 size-m,
346 fltres,
347 scratchpad,
348 seminaive_naive_table[size-m],
349 weights,
350 dctPlan);
351
352 /* now load imag part of coefficients into output space */
353 if ((m % 2) != 0) {
354 for (i=0; i<m-bw; i++)
355 idataptr[i] = -fltres[i];
356 }
357 else {
358 memcpy(idataptr, fltres, sizeof(double) * (m - bw));
359 }
360 idataptr += m-bw;
361 }
362 else
363 {
364 Naive_AnalysisX(rres+(m*size),
365 bw,
366 size-m,
367 weights,
368 fltres,
369 seminaive_naive_table[size-m],
370 scratchpad);
371
372 /* now load real part of coefficients into output space */
373 if ((m % 2) != 0) {
374 for (i=0; i<m-bw; i++)
375 rdataptr[i] = -fltres[i];
376 }
377 else {
378 memcpy(rdataptr, fltres, sizeof(double) * (m - bw));
379 }
380 rdataptr += m-bw;
381
382 /* do imaginary part */
383 Naive_AnalysisX(ires+(m*size),
384 bw,
385 size-m,
386 weights,
387 fltres,
388 seminaive_naive_table[size-m],
389 scratchpad);
390
391 /* now load imag part of coefficients into output space */
392 if ((m % 2) != 0) {
393 for (i=0; i<m-bw; i++)
394 idataptr[i] = -fltres[i];
395 }
396 else {
397 memcpy(idataptr, fltres, sizeof(double) * (m - bw));
398 }
399 idataptr += m-bw;
400
401 }
402
403 }
404 }
405 else /**** if the data is real ****/
406 {
407 pow_one = 1.0;
408 for(i = 1; i < bw; i++){
409 pow_one *= -1.0;
410 for( j = i; j < bw; j++){
411 /*
412 SEANINDEXP(dummy0,i,j,bw);
413 SEANINDEXN(dummy1,-i,j,bw);
414 */
415
416 dummy0 = seanindex(i, j, bw);
417 dummy1 = seanindex(-i, j, bw);
418
419 rcoeffs[dummy1] =
420 pow_one * rcoeffs[dummy0];
421 icoeffs[dummy1] =
422 -pow_one * icoeffs[dummy0];
423 }
424 }
425 }
426
427 }
428
429 /************************************************************************/
430 /* Inverse spherical harmonic transform.
431
432 bw -> bandwidth of problem
433 size = 2*bw
434
435 Inputs rcoeffs and icoeffs are harmonic coefficients stored
436 in (bw * bw) arrays in the order spec'ed above.
437
438 rdata and idata are (size x size) arrays with the transformed result.
439
440 transpose_spharmonic_pml_table should be the (double **)
441 result of a call to Transpose_Spharmonic_Pml_Table()
442
443 workspace is (8 * bw^2) + (10 * bw)
444
445 */
446
447 /* dataformat =0 -> samples are complex, =1 -> samples real */
448
449 void InvFST_semi_memo(double *rcoeffs, double *icoeffs,
450 double *rdata, double *idata,
451 int bw,
452 double **transpose_seminaive_naive_table,
453 double *workspace,
454 int dataformat,
455 int cutoff,
456 fftw_plan *idctPlan,
457 fftw_plan *ifftPlan )
458 {
459 int size, m, i, n;
460 double *rdataptr, *idataptr;
461 double *rfourdata, *ifourdata;
462 double *rinvfltres, *iminvfltres, *scratchpad;
463 double *sin_values, *eval_pts;
464 double tmpA ;
465
466 size = 2*bw ;
467
468 rfourdata = workspace; /* needs (size * size) */
469 ifourdata = rfourdata + (size * size); /* needs (size * size) */
470 rinvfltres = ifourdata + (size * size); /* needs (2 * bw) */
471 iminvfltres = rinvfltres + (2 * bw); /* needs (2 * bw) */
472 sin_values = iminvfltres + (2 * bw); /* needs (2 * bw) */
473 eval_pts = sin_values + (2 * bw); /* needs (2 * bw) */
474 scratchpad = eval_pts + (2 * bw); /* needs (2 * bw) */
475
476 /* total workspace = (8 * bw^2) + (10 * bw) */
477
478 /* load up the sin_values array */
479 n = 2*bw;
480
481 ArcCosEvalPts(n, eval_pts);
482 for (i=0; i<n; i++)
483 sin_values[i] = sin(eval_pts[i]);
484
485
486 /* Now do all of the inverse Legendre transforms */
487 rdataptr = rcoeffs;
488 idataptr = icoeffs;
489
490 for (m=0; m<bw; m++)
491 {
492 /*
493 fprintf(stderr,"m = %d\n",m);
494 */
495
496 if(m < cutoff)
497 {
498 /* do real part first */
499 InvSemiNaiveReduced(rdataptr,
500 bw,
501 m,
502 rinvfltres,
503 transpose_seminaive_naive_table[m],
504 sin_values,
505 scratchpad,
506 idctPlan );
507
508 /* now do imaginary part */
509
510 InvSemiNaiveReduced(idataptr,
511 bw,
512 m,
513 iminvfltres,
514 transpose_seminaive_naive_table[m],
515 sin_values,
516 scratchpad,
517 idctPlan);
518
519 /* will store normal, then tranpose before doing inverse fft */
520 memcpy(rfourdata+(m*size), rinvfltres, sizeof(double) * size);
521 memcpy(ifourdata+(m*size), iminvfltres, sizeof(double) * size);
522
523 /* move to next set of coeffs */
524 rdataptr += bw-m;
525 idataptr += bw-m;
526
527 }
528 else
529 {
530
531 /* first do the real part */
532 Naive_SynthesizeX(rdataptr,
533 bw,
534 m,
535 rinvfltres,
536 transpose_seminaive_naive_table[m]);
537
538 /* now do the imaginary */
539 Naive_SynthesizeX(idataptr,
540 bw,
541 m,
542 iminvfltres,
543 transpose_seminaive_naive_table[m]);
544
545 /* will store normal, then tranpose before doing inverse fft */
546 memcpy(rfourdata+(m*size), rinvfltres, sizeof(double) * size);
547 memcpy(ifourdata+(m*size), iminvfltres, sizeof(double) * size);
548
549 /* move to next set of coeffs */
550
551 rdataptr += bw-m;
552 idataptr += bw-m;
553
554 }
555 }
556 /* closes m loop */
557
558 /* now fill in zero values where m = bw (from problem definition) */
559 memset(rfourdata + (bw * size), 0, sizeof(double) * size);
560 memset(ifourdata + (bw * size), 0, sizeof(double) * size);
561
562 /* now if the data is real, we don't have to compute the
563 coefficients whose order is less than 0, i.e. since
564 the data is real, we know that
565 invf-hat(l,-m) = conjugate(invf-hat(l,m)),
566 so use that to get the rest of the real data
567
568 dataformat =0 -> samples are complex, =1 -> samples real
569
570 */
571
572 if(dataformat == 0){
573
574 /* now do negative m values */
575
576 for (m=bw+1; m<size; m++)
577 {
578 /*
579 fprintf(stderr,"m = %d\n",-(size-m));
580 */
581
582 if ( (size-m) < cutoff )
583 {
584 /* do real part first */
585 InvSemiNaiveReduced(rdataptr,
586 bw,
587 size - m,
588 rinvfltres,
589 transpose_seminaive_naive_table[size - m],
590 sin_values,
591 scratchpad,
592 idctPlan);
593
594 /* now do imaginary part */
595 InvSemiNaiveReduced(idataptr,
596 bw,
597 size - m,
598 iminvfltres,
599 transpose_seminaive_naive_table[size - m],
600 sin_values,
601 scratchpad,
602 idctPlan );
603
604 /* will store normal, then tranpose before doing inverse fft */
605 if ((m % 2) != 0)
606 for(i=0; i< size; i++){
607 rinvfltres[i] = -rinvfltres[i];
608 iminvfltres[i] = -iminvfltres[i];
609 }
610
611 memcpy(rfourdata + (m*size), rinvfltres, sizeof(double) * size);
612 memcpy(ifourdata + (m*size), iminvfltres, sizeof(double) * size);
613
614 /* move to next set of coeffs */
615 rdataptr += bw-(size-m);
616 idataptr += bw-(size-m);
617 }
618 else
619 {
620 /* first do the real part */
621 Naive_SynthesizeX(rdataptr,
622 bw,
623 size-m,
624 rinvfltres,
625 transpose_seminaive_naive_table[size-m]);
626
627 /* now do the imaginary */
628 Naive_SynthesizeX(idataptr,
629 bw,
630 size-m,
631 iminvfltres,
632 transpose_seminaive_naive_table[size-m]);
633
634 /* will store normal, then tranpose before doing inverse fft */
635 if ((m % 2) != 0)
636 for(i=0; i< size; i++){
637 rinvfltres[i] = -rinvfltres[i];
638 iminvfltres[i] = -iminvfltres[i];
639 }
640
641 memcpy(rfourdata + (m*size), rinvfltres, sizeof(double) * size);
642 memcpy(ifourdata + (m*size), iminvfltres, sizeof(double) * size);
643
644 /* move to next set of coeffs */
645 rdataptr += bw-(size-m);
646 idataptr += bw-(size-m);
647
648 }
649
650 } /* closes m loop */
651 }
652 else {
653 for(m = bw + 1; m < size; m++){
654
655 memcpy(rfourdata+(m*size), rfourdata+((size-m)*size),
656 sizeof(double) * size);
657 memcpy(ifourdata+(m*size), ifourdata+((size-m)*size),
658 sizeof(double) * size);
659 for(i = 0; i < size; i++)
660 ifourdata[(m*size)+i] *= -1.0;
661 }
662 }
663
664 /* normalize */
665 tmpA = 1./(sqrt(2.*M_PI) );
666 for(i=0;i<4*bw*bw;i++)
667 {
668 rfourdata[i] *= tmpA ;
669 ifourdata[i] *= tmpA ;
670 }
671
672
673 fftw_execute_split_dft( *ifftPlan,
674 ifourdata, rfourdata,
675 idata, rdata );
676 /* amscray */
677
678 }
679
680 /************************************************************************/
681 /*
682 Zonal Harmonic transform using seminaive algorithm - used in convolutions
683
684 bw -> bandwidth of problem
685
686 size = 2 * bw
687
688 rdata and idata should be pointers to size x size arrays.
689 rres and ires should be pointers to double arrays of size bw.
690
691 cos_pml_table contains Legendre coefficients of P(0,l) functions
692 and is result of CosPmlTableGen for m = 0;
693 FZT_semi only computes spherical harmonics for m=0.
694
695 dataformat =0 -> samples are complex, =1 -> samples real
696
697 workspace needed is (12 * bw)
698
699 */
700
701 void FZT_semi_memo(double *rdata, double *idata,
702 double *rres, double *ires,
703 int bw,
704 double *cos_pml_table,
705 double *workspace,
706 int dataformat,
707 fftw_plan *dctPlan,
708 double *weights )
709 {
710 int i, j, size;
711 double *r0, *i0, dsize;
712 double tmpreal, tmpimag;
713 double tmpA ;
714 double *scratchpad ;
715
716 size = 2*bw ;
717
718 /* assign memory */
719 r0 = workspace; /* needs (2 * bw) */
720 i0 = r0 + (2 * bw); /* needs (2 * bw) */
721 scratchpad = i0 + (2 * bw); /* needs (4 * bw) */
722
723 /* total workspace = 13*bw */
724
725 dsize = 1.0 / ((double) size);
726 tmpA = sqrt( 2.* M_PI );
727 dsize *= tmpA ;
728
729 /* compute the m = 0 components */
730 for (i=0; i<size; i++) {
731 tmpreal = 0.0;
732 tmpimag = 0.0;
733
734 for(j=0; j<size; j++) {
735 tmpreal += rdata[(i*size)+j];
736 tmpimag += idata[(i*size)+j];
737 }
738 /* normalize */
739 r0[i] = tmpreal*dsize;
740 i0[i] = tmpimag*dsize;
741 }
742
743 /* do the real part */
744 SemiNaiveReduced(r0,
745 bw,
746 0,
747 rres,
748 scratchpad,
749 cos_pml_table,
750 weights,
751 dctPlan);
752
753 if(dataformat == 0) /* do imaginary part */
754 SemiNaiveReduced(i0,
755 bw,
756 0,
757 ires,
758 scratchpad,
759 cos_pml_table,
760 weights,
761 dctPlan);
762 else /* otherwise set coefficients = 0 */
763 memset(ires, 0, sizeof(double) * size);
764
765 }
766
767 /************************************************************************/
768 /*
769 multiplies harmonic coefficients of a function and a filter.
770 See convolution theorem of Driscoll and Healy for details.
771
772 bw -> bandwidth of problem
773 size = 2*bw
774
775 datacoeffs should be output of an FST, filtercoeffs the
776 output of an FZT. There should be (bw * bw) datacoeffs,
777 and bw filtercoeffs.
778 rres and ires should point to arrays of dimension bw * bw.
779
780 */
781
782 void TransMult(double *rdatacoeffs, double *idatacoeffs,
783 double *rfiltercoeffs, double *ifiltercoeffs,
784 double *rres, double *ires,
785 int bw)
786 {
787
788 int m, l, size;
789 double *rdptr, *idptr, *rrptr, *irptr;
790
791 size = 2*bw ;
792
793 rdptr = rdatacoeffs;
794 idptr = idatacoeffs;
795 rrptr = rres;
796 irptr = ires;
797
798 for (m=0; m<bw; m++) {
799 for (l=m; l<bw; l++) {
800 compmult(rfiltercoeffs[l], ifiltercoeffs[l],
801 rdptr[l-m], idptr[l-m],
802 rrptr[l-m], irptr[l-m]);
803
804 rrptr[l-m] *= sqrt(4*M_PI/(2*l+1));
805 irptr[l-m] *= sqrt(4*M_PI/(2*l+1));
806
807 }
808 rdptr += bw-m; idptr += bw-m;
809 rrptr += bw-m; irptr += bw-m;
810 }
811 for (m=bw+1; m<size; m++) {
812 for (l=size-m; l<bw; l++){
813 compmult(rfiltercoeffs[l], ifiltercoeffs[l],
814 rdptr[l-size+m], idptr[l-size+m],
815 rrptr[l-size+m], irptr[l-size+m]);
816
817 rrptr[l-size+m] *= sqrt(4*M_PI/(2*l+1));
818 irptr[l-size+m] *= sqrt(4*M_PI/(2*l+1));
819
820 }
821 rdptr += m-bw; idptr += m-bw;
822 rrptr += m-bw; irptr += m-bw;
823 }
824
825 }
826
827 /************************************************************************/
828 /* Here's the big banana
829 Convolves two functions defined on the 2-sphere.
830 Uses seminaive algorithms for spherical harmonic transforms
831
832 size = 2*bw
833
834 Inputs:
835
836 rdata, idata - (size * size) arrays containing real and
837 imaginary parts of sampled function.
838 rfilter, ifilter - (size * size) arrays containing real and
839 imaginary parts of sampled filter function.
840 rres, ires - (size * size) arrays containing real and
841 imaginary parts of result function.
842
843
844 Suggestion - if you want to do multiple convolutions,
845 don't keep allocating and freeing space with every call,
846 or keep recomputing the spharmonic_pml tables.
847 Allocate workspace once before you call this function, then
848 just set up pointers as first step of this procedure rather
849 than mallocing. And do the same with the FST, FZT, and InvFST functions.
850
851 ASSUMPTIONS:
852 1. data is strictly REAL
853 2. will do semi-naive algorithm for ALL orders -> change the cutoff
854 value if you want it to be different
855
856 Memory requirements for Conv2Sphere
857
858 Need space for spharmonic tables and local workspace and
859 scratchpad space for FST_semi
860
861 Let legendreSize = Reduced_Naive_TableSize(bw,cutoff) +
862 Reduced_SpharmonicTableSize(bw,cutoff)
863
864 Then the workspace needs to be this large:
865
866 2 * legendreSize +
867 8 * (bw*bw) + 10*bw +
868 4 * (bw*bw) + 2*bw
869
870 for a total of
871
872 2 * legendreSize +
873 12 * (bw*bw) + 12*bw ;
874
875
876
877 */
878 void Conv2Sphere_semi_memo(double *rdata, double *idata,
879 double *rfilter, double *ifilter,
880 double *rres, double *ires,
881 int bw,
882 double *workspace)
883 {
884 int size, spharmonic_bound ;
885 int legendreSize, cutoff ;
886 double *frres, *fires, *filtrres, *filtires, *trres, *tires;
887 double **spharmonic_pml_table, **transpose_spharmonic_pml_table;
888 double *spharmonic_result_space, *transpose_spharmonic_result_space;
889 double *scratchpad;
890
891 /* fftw */
892 int rank, howmany_rank ;
893 fftw_iodim dims[1], howmany_dims[1];
894
895 /* forward transform stuff */
896 fftw_plan dctPlan, fftPlan ;
897 double *weights ;
898
899 /* inverse transform stuff */
900 fftw_plan idctPlan, ifftPlan ;
901
902 size =2*bw ;
903 cutoff = bw ;
904 legendreSize = Reduced_Naive_TableSize(bw,cutoff) +
905 Reduced_SpharmonicTableSize(bw,cutoff) ;
906
907 /* assign space */
908
909 spharmonic_bound = legendreSize ;
910
911 spharmonic_result_space = workspace; /* needs legendreSize */
912
913 transpose_spharmonic_result_space =
914 spharmonic_result_space + legendreSize ; /* needs legendreSize */
915
916 frres = transpose_spharmonic_result_space +
917 legendreSize ; /* needs (bw*bw) */
918 fires = frres + (bw*bw); /* needs (bw*bw) */
919 trres = fires + (bw*bw); /* needs (bw*bw) */
920 tires = trres + (bw*bw); /* needs (bw*bw) */
921 filtrres = tires + (bw*bw); /* needs bw */
922 filtires = filtrres + bw; /* needs bw */
923 scratchpad = filtires + bw; /* needs (8*bw^2)+(10*bw) */
924
925 /* allocate space, and compute, the weights for this bandwidth */
926 weights = (double *) malloc(sizeof(double) * 4 * bw);
927 makeweights( bw, weights );
928
929 /* make the fftw plans */
930
931 /* make DCT plans -> note that I will be using the GURU
932 interface to execute these plans within the routines*/
933
934 /* forward DCT */
935 dctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata,
936 FFTW_REDFT10, FFTW_ESTIMATE ) ;
937
938 /* inverse DCT */
939 idctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata,
940 FFTW_REDFT01, FFTW_ESTIMATE );
941
942 /*
943 fft "preamble" ;
944 note that this plan places the output in a transposed array
945 */
946 rank = 1 ;
947 dims[0].n = 2*bw ;
948 dims[0].is = 1 ;
949 dims[0].os = 2*bw ;
950 howmany_rank = 1 ;
951 howmany_dims[0].n = 2*bw ;
952 howmany_dims[0].is = 2*bw ;
953 howmany_dims[0].os = 1 ;
954
955 /* forward fft */
956 fftPlan = fftw_plan_guru_split_dft( rank, dims,
957 howmany_rank, howmany_dims,
958 rdata, idata,
959 workspace, workspace+(4*bw*bw),
960 FFTW_ESTIMATE );
961
962 /*
963 now plan for inverse fft - note that this plans assumes
964 that I'm working with a transposed array, e.g. the inputs
965 for a length 2*bw transform are placed every 2*bw apart,
966 the output will be consecutive entries in the array
967 */
968 rank = 1 ;
969 dims[0].n = 2*bw ;
970 dims[0].is = 2*bw ;
971 dims[0].os = 1 ;
972 howmany_rank = 1 ;
973 howmany_dims[0].n = 2*bw ;
974 howmany_dims[0].is = 1 ;
975 howmany_dims[0].os = 2*bw ;
976
977 /* inverse fft */
978 ifftPlan = fftw_plan_guru_split_dft( rank, dims,
979 howmany_rank, howmany_dims,
980 rdata, idata,
981 workspace, workspace+(4*bw*bw),
982 FFTW_ESTIMATE );
983
984
985 /* precompute the associated Legendre fcts */
986 spharmonic_pml_table =
987 Spharmonic_Pml_Table(bw,
988 spharmonic_result_space,
989 scratchpad);
990
991 transpose_spharmonic_pml_table =
992 Transpose_Spharmonic_Pml_Table(spharmonic_pml_table,
993 bw,
994 transpose_spharmonic_result_space,
995 scratchpad);
996 FST_semi_memo(rdata, idata,
997 frres, fires,
998 bw,
999 spharmonic_pml_table,
1000 scratchpad,
1001 1,
1002 bw,
1003 &dctPlan,
1004 &fftPlan,
1005 weights );
1006
1007 FZT_semi_memo(rfilter, ifilter,
1008 filtrres, filtires,
1009 bw,
1010 spharmonic_pml_table[0],
1011 scratchpad,
1012 1,
1013 &dctPlan,
1014 weights );
1015
1016 TransMult(frres, fires, filtrres, filtires, trres, tires, bw);
1017
1018 InvFST_semi_memo(trres, tires,
1019 rres, ires,
1020 bw,
1021 transpose_spharmonic_pml_table,
1022 scratchpad,
1023 1,
1024 bw,
1025 &idctPlan,
1026 &ifftPlan );
1027
1028 free( weights ) ;
1029
1030 /***
1031 have to free the memory that was allocated in
1032 Spharmonic_Pml_Table() and
1033 Transpose_Spharmonic_Pml_Table()
1034 ***/
1035
1036 free(spharmonic_pml_table);
1037 free(transpose_spharmonic_pml_table);
1038
1039 /* destroy plans */
1040 fftw_destroy_plan( ifftPlan ) ;
1041 fftw_destroy_plan( fftPlan ) ;
1042 fftw_destroy_plan( idctPlan ) ;
1043 fftw_destroy_plan( dctPlan ) ;
1044 }