ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/SHAPES/FST_semi_memo.c
Revision: 1287
Committed: Wed Jun 23 20:18:48 2004 UTC (20 years ago) by chrisfen
Content type: text/plain
File size: 27350 byte(s)
Log Message:
Major progress towards inclusion of spherical harmonic transform capability - still having some build issues...

File Contents

# User Rev Content
1 chrisfen 1287 /***************************************************************************
2     **************************************************************************
3    
4     S2kit 1.0
5    
6     A lite version of Spherical Harmonic Transform Kit
7    
8     Peter Kostelec, Dan Rockmore
9     {geelong,rockmore}@cs.dartmouth.edu
10    
11     Contact: Peter Kostelec
12     geelong@cs.dartmouth.edu
13    
14     Copyright 2004 Peter Kostelec, Dan Rockmore
15    
16     This file is part of S2kit.
17    
18     S2kit is free software; you can redistribute it and/or modify
19     it under the terms of the GNU General Public License as published by
20     the Free Software Foundation; either version 2 of the License, or
21     (at your option) any later version.
22    
23     S2kit is distributed in the hope that it will be useful,
24     but WITHOUT ANY WARRANTY; without even the implied warranty of
25     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26     GNU General Public License for more details.
27    
28     You should have received a copy of the GNU General Public License
29     along with S2kit; if not, write to the Free Software
30     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31    
32     See the accompanying LICENSE file for details.
33    
34     ************************************************************************
35     ************************************************************************/
36    
37    
38    
39     /********************************************************************
40    
41     FST_semi_memo.c - routines to perform convolutions on the
42     2-sphere using a combination of semi-naive and naive algorithms.
43    
44     ASSUMES THAT ALL PRECOMPUTED-DATA IS IN MEMORY, AND NOT TO BE
45     READ FROM THE DISK.
46    
47     The primary functions in this package are
48    
49     1) FST_semi_memo() - computes the spherical harmonic expansion.
50     2) InvFST_semi_memo() - computes the inverse spherical harmonic transform.
51     3) FZT_semi_memo() - computes the zonal harmonic transform.
52     4) TransMult() - Multiplies harmonic coefficients using Driscoll-Healy
53     result. Dual of convolution in "time" domain.
54     5) Conv2Sphere_semi_memo() - Convolves two functins defined on the 2-sphere,
55     using seminaive transform
56    
57     and one utility function:
58    
59     1) seanindex(): Given bandwidth bw, seanindex(m,l,bw) will give the
60     position of the coefficient f-hat(m,l) in the one-row array
61    
62     For descriptions on calling these functions, see the documentation
63     preceding each function.
64    
65     */
66    
67     #include <math.h>
68     #include <stdio.h>
69     #include <stdlib.h>
70     #include <string.h>
71    
72     #include "fftw3.h"
73    
74     #include "makeweights.h"
75     #include "cospmls.h"
76     #include "primitive.h"
77     #include "naive_synthesis.h"
78     #include "seminaive.h"
79     #include "FST_semi_memo.h"
80    
81    
82    
83     /************************************************************************/
84    
85    
86     /*****************************************************************
87    
88     Given bandwidth bw, seanindex(m,l,bw) will give the position of the
89     coefficient f-hat(m,l) in the one-row array that Sean stores the spherical
90     coefficients. This is needed to help preserve the symmetry that the
91     coefficients have: (l = degree, m = order, and abs(m) <= l)
92    
93     f-hat(l,-m) = (-1)^m * conjugate( f-hat(l,m) )
94    
95     Thanks for your help Mark!
96    
97     ******************************************************************/
98    
99    
100     int seanindex(int m,
101     int l,
102     int bw)
103     {
104     int bigL;
105    
106     bigL = bw - 1;
107    
108     if( m >= 0 )
109     return( m * ( bigL + 1 ) - ( ( m * (m - 1) ) /2 ) + ( l - m ) );
110     else
111     return( ( ( bigL * ( bigL + 3 ) ) /2 ) + 1 +
112     ( ( bigL + m ) * ( bigL + m + 1 ) / 2 ) + ( l - abs( m ) ) );
113     }
114    
115    
116     /************************************************************************/
117     /* performs a spherical harmonic transform using the semi-naive
118     and naive algorithms
119    
120     bw -> bandwidth of problem
121     size -> size = 2*bw -> dimension of input array (recall that
122     sampling is done at twice the bandwidth)
123    
124     The inputs rdata and idata are expected to be pointers to
125     size x size arrays. The array rdata contains the real parts
126     of the function samples, and idata contains the imaginary
127     parts.
128    
129     rcoeffs and icoeffs are expected to be pointers to bw x bw arrays,
130     and will contain the harmonic coefficients in a "linearized" form.
131     The array rcoeffs contains the real parts of the coefficients,
132     and icoeffs contains the imaginary parts.
133    
134     spharmonic_pml_table should be a (double **) pointer to
135     the result of a call to Spharmonic_Pml_Table. Because this
136     table is re-used in the inverse transform, and because for
137     timing purposes the computation of the table is not included,
138     it is passed in as an argument. Also, at some point this
139     code may be used as par of a series of convolutions, so
140     reducing repetitive computation is prioritized.
141    
142     spharmonic_pml_table will be an array of (double *) pointers
143     the array being of length TableSize(m,bw)
144    
145     workspace needs to be a double pointer to an array of size
146     (8 * bw^2) + (7 * bw).
147    
148     cutoff -> what order to switch from semi-naive to naive
149     algorithm.
150    
151     dataformat =0 -> samples are complex, =1 -> samples real
152    
153    
154     Output Ordering of coeffs f(m,l) is
155     f(0,0) f(0,1) f(0,2) ... f(0,bw-1)
156     f(1,1) f(1,2) ... f(1,bw-1)
157     etc.
158     f(bw-2,bw-2), f(bw-2,bw-1)
159     f(bw-1,bw-1)
160     f(-(bw-1),bw-1)
161     f(-(bw-2),bw-2) f(-(bw-2),bw-1)
162     etc.
163     f(-2,2) ... f(-2,bw-1)
164     f(-1,1) f(-1,2) ... f(-1,bw-1)
165    
166    
167     This only requires an array of size (bw*bw). If zero-padding
168     is used to make the indexing nice, then you need a an
169     (2bw-1) * bw array - but that is not done here.
170     Because of the amount of space necessary for doing
171     large transforms, it is important not to use any
172     more than necessary.
173    
174     */
175    
176     void FST_semi_memo(double *rdata, double *idata,
177     double *rcoeffs, double *icoeffs,
178     int bw,
179     double **seminaive_naive_table,
180     double *workspace,
181     int dataformat,
182     int cutoff,
183     fftw_plan *dctPlan,
184     fftw_plan *fftPlan,
185     double *weights )
186     {
187     int size, m, i, j;
188     int dummy0, dummy1 ;
189     double *rres, *ires;
190     double *rdataptr, *idataptr;
191     double *fltres, *scratchpad;
192     double *eval_pts;
193     double pow_one;
194     double tmpA, tmpSize ;
195    
196     size = 2*bw ;
197     tmpSize = 1./ ((double ) size);
198     tmpA = sqrt( 2. * M_PI ) ;
199    
200     rres = workspace; /* needs (size * size) = (4 * bw^2) */
201     ires = rres + (size * size); /* needs (size * size) = (4 * bw^2) */
202     fltres = ires + (size * size) ; /* needs bw */
203     eval_pts = fltres + bw; /* needs (2*bw) */
204     scratchpad = eval_pts + (2*bw); /* needs (4 * bw) */
205    
206     /* total workspace is (8 * bw^2) + (7 * bw) */
207    
208     /* do the FFTs along phi */
209     fftw_execute_split_dft( *fftPlan,
210     rdata, idata,
211     rres, ires ) ;
212     /*
213     normalize
214    
215     note that I'm getting the sqrt(2pi) in there at
216     this point ... to account for the fact that the spherical
217     harmonics are of norm 1: I need to account for
218     the fact that the associated Legendres are
219     of norm 1
220     */
221     tmpSize *= tmpA ;
222     for( j = 0 ; j < size*size ; j ++ )
223     {
224     rres[j] *= tmpSize ;
225     ires[j] *= tmpSize ;
226     }
227    
228     /* point to start of output data buffers */
229     rdataptr = rcoeffs;
230     idataptr = icoeffs;
231    
232     for (m=0; m<bw; m++) {
233     /*
234     fprintf(stderr,"m = %d\n",m);
235     */
236    
237     /*** test to see if before cutoff or after ***/
238     if (m < cutoff){
239    
240     /* do the real part */
241     SemiNaiveReduced(rres+(m*size),
242     bw,
243     m,
244     fltres,
245     scratchpad,
246     seminaive_naive_table[m],
247     weights,
248     dctPlan);
249    
250     /* now load real part of coefficients into output space */
251     memcpy(rdataptr, fltres, sizeof(double) * (bw - m));
252    
253     rdataptr += bw-m;
254    
255     /* do imaginary part */
256     SemiNaiveReduced(ires+(m*size),
257     bw,
258     m,
259     fltres,
260     scratchpad,
261     seminaive_naive_table[m],
262     weights,
263     dctPlan);
264    
265     /* now load imaginary part of coefficients into output space */
266     memcpy(idataptr, fltres, sizeof(double) * (bw - m));
267    
268     idataptr += bw-m;
269    
270     }
271     else{
272     /* do real part */
273     Naive_AnalysisX(rres+(m*size),
274     bw,
275     m,
276     weights,
277     fltres,
278     seminaive_naive_table[m],
279     scratchpad );
280     memcpy(rdataptr, fltres, sizeof(double) * (bw - m));
281     rdataptr += bw-m;
282    
283     /* do imaginary part */
284     Naive_AnalysisX(ires+(m*size),
285     bw,
286     m,
287     weights,
288     fltres,
289     seminaive_naive_table[m],
290     scratchpad );
291     memcpy(idataptr, fltres, sizeof(double) * (bw - m));
292     idataptr += bw-m;
293     }
294     }
295    
296     /*** now do upper coefficients ****/
297    
298     /* now if the data is real, we don't have to compute the
299     coefficients whose order is less than 0, i.e. since
300     the data is real, we know that
301     f-hat(l,-m) = (-1)^m * conjugate(f-hat(l,m)),
302     so use that to get the rest of the coefficients
303    
304     dataformat =0 -> samples are complex, =1 -> samples real
305    
306     */
307    
308     if( dataformat == 0 ){
309    
310     /* note that m is greater than bw here, but this is for
311     purposes of indexing the input data arrays.
312     The "true" value of m as a parameter for Pml is
313     size - m */
314    
315     for (m=bw+1; m<size; m++) {
316     /*
317     fprintf(stderr,"m = %d\n",-(size-m));
318     */
319    
320     if ( (size-m) < cutoff )
321     {
322     /* do real part */
323     SemiNaiveReduced(rres+(m*size),
324     bw,
325     size-m,
326     fltres,
327     scratchpad,
328     seminaive_naive_table[size-m],
329     weights,
330     dctPlan );
331    
332     /* now load real part of coefficients into output space */
333     if ((m % 2) != 0) {
334     for (i=0; i<m-bw; i++)
335     rdataptr[i] = -fltres[i];
336     }
337     else {
338     memcpy(rdataptr, fltres, sizeof(double) * (m - bw));
339     }
340     rdataptr += m-bw;
341    
342     /* do imaginary part */
343     SemiNaiveReduced(ires+(m*size),
344     bw,
345     size-m,
346     fltres,
347     scratchpad,
348     seminaive_naive_table[size-m],
349     weights,
350     dctPlan);
351    
352     /* now load imag part of coefficients into output space */
353     if ((m % 2) != 0) {
354     for (i=0; i<m-bw; i++)
355     idataptr[i] = -fltres[i];
356     }
357     else {
358     memcpy(idataptr, fltres, sizeof(double) * (m - bw));
359     }
360     idataptr += m-bw;
361     }
362     else
363     {
364     Naive_AnalysisX(rres+(m*size),
365     bw,
366     size-m,
367     weights,
368     fltres,
369     seminaive_naive_table[size-m],
370     scratchpad);
371    
372     /* now load real part of coefficients into output space */
373     if ((m % 2) != 0) {
374     for (i=0; i<m-bw; i++)
375     rdataptr[i] = -fltres[i];
376     }
377     else {
378     memcpy(rdataptr, fltres, sizeof(double) * (m - bw));
379     }
380     rdataptr += m-bw;
381    
382     /* do imaginary part */
383     Naive_AnalysisX(ires+(m*size),
384     bw,
385     size-m,
386     weights,
387     fltres,
388     seminaive_naive_table[size-m],
389     scratchpad);
390    
391     /* now load imag part of coefficients into output space */
392     if ((m % 2) != 0) {
393     for (i=0; i<m-bw; i++)
394     idataptr[i] = -fltres[i];
395     }
396     else {
397     memcpy(idataptr, fltres, sizeof(double) * (m - bw));
398     }
399     idataptr += m-bw;
400    
401     }
402    
403     }
404     }
405     else /**** if the data is real ****/
406     {
407     pow_one = 1.0;
408     for(i = 1; i < bw; i++){
409     pow_one *= -1.0;
410     for( j = i; j < bw; j++){
411     /*
412     SEANINDEXP(dummy0,i,j,bw);
413     SEANINDEXN(dummy1,-i,j,bw);
414     */
415    
416     dummy0 = seanindex(i, j, bw);
417     dummy1 = seanindex(-i, j, bw);
418    
419     rcoeffs[dummy1] =
420     pow_one * rcoeffs[dummy0];
421     icoeffs[dummy1] =
422     -pow_one * icoeffs[dummy0];
423     }
424     }
425     }
426    
427     }
428    
429     /************************************************************************/
430     /* Inverse spherical harmonic transform.
431    
432     bw -> bandwidth of problem
433     size = 2*bw
434    
435     Inputs rcoeffs and icoeffs are harmonic coefficients stored
436     in (bw * bw) arrays in the order spec'ed above.
437    
438     rdata and idata are (size x size) arrays with the transformed result.
439    
440     transpose_spharmonic_pml_table should be the (double **)
441     result of a call to Transpose_Spharmonic_Pml_Table()
442    
443     workspace is (8 * bw^2) + (10 * bw)
444    
445     */
446    
447     /* dataformat =0 -> samples are complex, =1 -> samples real */
448    
449     void InvFST_semi_memo(double *rcoeffs, double *icoeffs,
450     double *rdata, double *idata,
451     int bw,
452     double **transpose_seminaive_naive_table,
453     double *workspace,
454     int dataformat,
455     int cutoff,
456     fftw_plan *idctPlan,
457     fftw_plan *ifftPlan )
458     {
459     int size, m, i, n;
460     double *rdataptr, *idataptr;
461     double *rfourdata, *ifourdata;
462     double *rinvfltres, *iminvfltres, *scratchpad;
463     double *sin_values, *eval_pts;
464     double tmpA ;
465    
466     size = 2*bw ;
467    
468     rfourdata = workspace; /* needs (size * size) */
469     ifourdata = rfourdata + (size * size); /* needs (size * size) */
470     rinvfltres = ifourdata + (size * size); /* needs (2 * bw) */
471     iminvfltres = rinvfltres + (2 * bw); /* needs (2 * bw) */
472     sin_values = iminvfltres + (2 * bw); /* needs (2 * bw) */
473     eval_pts = sin_values + (2 * bw); /* needs (2 * bw) */
474     scratchpad = eval_pts + (2 * bw); /* needs (2 * bw) */
475    
476     /* total workspace = (8 * bw^2) + (10 * bw) */
477    
478     /* load up the sin_values array */
479     n = 2*bw;
480    
481     ArcCosEvalPts(n, eval_pts);
482     for (i=0; i<n; i++)
483     sin_values[i] = sin(eval_pts[i]);
484    
485    
486     /* Now do all of the inverse Legendre transforms */
487     rdataptr = rcoeffs;
488     idataptr = icoeffs;
489    
490     for (m=0; m<bw; m++)
491     {
492     /*
493     fprintf(stderr,"m = %d\n",m);
494     */
495    
496     if(m < cutoff)
497     {
498     /* do real part first */
499     InvSemiNaiveReduced(rdataptr,
500     bw,
501     m,
502     rinvfltres,
503     transpose_seminaive_naive_table[m],
504     sin_values,
505     scratchpad,
506     idctPlan );
507    
508     /* now do imaginary part */
509    
510     InvSemiNaiveReduced(idataptr,
511     bw,
512     m,
513     iminvfltres,
514     transpose_seminaive_naive_table[m],
515     sin_values,
516     scratchpad,
517     idctPlan);
518    
519     /* will store normal, then tranpose before doing inverse fft */
520     memcpy(rfourdata+(m*size), rinvfltres, sizeof(double) * size);
521     memcpy(ifourdata+(m*size), iminvfltres, sizeof(double) * size);
522    
523     /* move to next set of coeffs */
524     rdataptr += bw-m;
525     idataptr += bw-m;
526    
527     }
528     else
529     {
530    
531     /* first do the real part */
532     Naive_SynthesizeX(rdataptr,
533     bw,
534     m,
535     rinvfltres,
536     transpose_seminaive_naive_table[m]);
537    
538     /* now do the imaginary */
539     Naive_SynthesizeX(idataptr,
540     bw,
541     m,
542     iminvfltres,
543     transpose_seminaive_naive_table[m]);
544    
545     /* will store normal, then tranpose before doing inverse fft */
546     memcpy(rfourdata+(m*size), rinvfltres, sizeof(double) * size);
547     memcpy(ifourdata+(m*size), iminvfltres, sizeof(double) * size);
548    
549     /* move to next set of coeffs */
550    
551     rdataptr += bw-m;
552     idataptr += bw-m;
553    
554     }
555     }
556     /* closes m loop */
557    
558     /* now fill in zero values where m = bw (from problem definition) */
559     memset(rfourdata + (bw * size), 0, sizeof(double) * size);
560     memset(ifourdata + (bw * size), 0, sizeof(double) * size);
561    
562     /* now if the data is real, we don't have to compute the
563     coefficients whose order is less than 0, i.e. since
564     the data is real, we know that
565     invf-hat(l,-m) = conjugate(invf-hat(l,m)),
566     so use that to get the rest of the real data
567    
568     dataformat =0 -> samples are complex, =1 -> samples real
569    
570     */
571    
572     if(dataformat == 0){
573    
574     /* now do negative m values */
575    
576     for (m=bw+1; m<size; m++)
577     {
578     /*
579     fprintf(stderr,"m = %d\n",-(size-m));
580     */
581    
582     if ( (size-m) < cutoff )
583     {
584     /* do real part first */
585     InvSemiNaiveReduced(rdataptr,
586     bw,
587     size - m,
588     rinvfltres,
589     transpose_seminaive_naive_table[size - m],
590     sin_values,
591     scratchpad,
592     idctPlan);
593    
594     /* now do imaginary part */
595     InvSemiNaiveReduced(idataptr,
596     bw,
597     size - m,
598     iminvfltres,
599     transpose_seminaive_naive_table[size - m],
600     sin_values,
601     scratchpad,
602     idctPlan );
603    
604     /* will store normal, then tranpose before doing inverse fft */
605     if ((m % 2) != 0)
606     for(i=0; i< size; i++){
607     rinvfltres[i] = -rinvfltres[i];
608     iminvfltres[i] = -iminvfltres[i];
609     }
610    
611     memcpy(rfourdata + (m*size), rinvfltres, sizeof(double) * size);
612     memcpy(ifourdata + (m*size), iminvfltres, sizeof(double) * size);
613    
614     /* move to next set of coeffs */
615     rdataptr += bw-(size-m);
616     idataptr += bw-(size-m);
617     }
618     else
619     {
620     /* first do the real part */
621     Naive_SynthesizeX(rdataptr,
622     bw,
623     size-m,
624     rinvfltres,
625     transpose_seminaive_naive_table[size-m]);
626    
627     /* now do the imaginary */
628     Naive_SynthesizeX(idataptr,
629     bw,
630     size-m,
631     iminvfltres,
632     transpose_seminaive_naive_table[size-m]);
633    
634     /* will store normal, then tranpose before doing inverse fft */
635     if ((m % 2) != 0)
636     for(i=0; i< size; i++){
637     rinvfltres[i] = -rinvfltres[i];
638     iminvfltres[i] = -iminvfltres[i];
639     }
640    
641     memcpy(rfourdata + (m*size), rinvfltres, sizeof(double) * size);
642     memcpy(ifourdata + (m*size), iminvfltres, sizeof(double) * size);
643    
644     /* move to next set of coeffs */
645     rdataptr += bw-(size-m);
646     idataptr += bw-(size-m);
647    
648     }
649    
650     } /* closes m loop */
651     }
652     else {
653     for(m = bw + 1; m < size; m++){
654    
655     memcpy(rfourdata+(m*size), rfourdata+((size-m)*size),
656     sizeof(double) * size);
657     memcpy(ifourdata+(m*size), ifourdata+((size-m)*size),
658     sizeof(double) * size);
659     for(i = 0; i < size; i++)
660     ifourdata[(m*size)+i] *= -1.0;
661     }
662     }
663    
664     /* normalize */
665     tmpA = 1./(sqrt(2.*M_PI) );
666     for(i=0;i<4*bw*bw;i++)
667     {
668     rfourdata[i] *= tmpA ;
669     ifourdata[i] *= tmpA ;
670     }
671    
672    
673     fftw_execute_split_dft( *ifftPlan,
674     ifourdata, rfourdata,
675     idata, rdata );
676     /* amscray */
677    
678     }
679    
680     /************************************************************************/
681     /*
682     Zonal Harmonic transform using seminaive algorithm - used in convolutions
683    
684     bw -> bandwidth of problem
685    
686     size = 2 * bw
687    
688     rdata and idata should be pointers to size x size arrays.
689     rres and ires should be pointers to double arrays of size bw.
690    
691     cos_pml_table contains Legendre coefficients of P(0,l) functions
692     and is result of CosPmlTableGen for m = 0;
693     FZT_semi only computes spherical harmonics for m=0.
694    
695     dataformat =0 -> samples are complex, =1 -> samples real
696    
697     workspace needed is (12 * bw)
698    
699     */
700    
701     void FZT_semi_memo(double *rdata, double *idata,
702     double *rres, double *ires,
703     int bw,
704     double *cos_pml_table,
705     double *workspace,
706     int dataformat,
707     fftw_plan *dctPlan,
708     double *weights )
709     {
710     int i, j, size;
711     double *r0, *i0, dsize;
712     double tmpreal, tmpimag;
713     double tmpA ;
714     double *scratchpad ;
715    
716     size = 2*bw ;
717    
718     /* assign memory */
719     r0 = workspace; /* needs (2 * bw) */
720     i0 = r0 + (2 * bw); /* needs (2 * bw) */
721     scratchpad = i0 + (2 * bw); /* needs (4 * bw) */
722    
723     /* total workspace = 13*bw */
724    
725     dsize = 1.0 / ((double) size);
726     tmpA = sqrt( 2.* M_PI );
727     dsize *= tmpA ;
728    
729     /* compute the m = 0 components */
730     for (i=0; i<size; i++) {
731     tmpreal = 0.0;
732     tmpimag = 0.0;
733    
734     for(j=0; j<size; j++) {
735     tmpreal += rdata[(i*size)+j];
736     tmpimag += idata[(i*size)+j];
737     }
738     /* normalize */
739     r0[i] = tmpreal*dsize;
740     i0[i] = tmpimag*dsize;
741     }
742    
743     /* do the real part */
744     SemiNaiveReduced(r0,
745     bw,
746     0,
747     rres,
748     scratchpad,
749     cos_pml_table,
750     weights,
751     dctPlan);
752    
753     if(dataformat == 0) /* do imaginary part */
754     SemiNaiveReduced(i0,
755     bw,
756     0,
757     ires,
758     scratchpad,
759     cos_pml_table,
760     weights,
761     dctPlan);
762     else /* otherwise set coefficients = 0 */
763     memset(ires, 0, sizeof(double) * size);
764    
765     }
766    
767     /************************************************************************/
768     /*
769     multiplies harmonic coefficients of a function and a filter.
770     See convolution theorem of Driscoll and Healy for details.
771    
772     bw -> bandwidth of problem
773     size = 2*bw
774    
775     datacoeffs should be output of an FST, filtercoeffs the
776     output of an FZT. There should be (bw * bw) datacoeffs,
777     and bw filtercoeffs.
778     rres and ires should point to arrays of dimension bw * bw.
779    
780     */
781    
782     void TransMult(double *rdatacoeffs, double *idatacoeffs,
783     double *rfiltercoeffs, double *ifiltercoeffs,
784     double *rres, double *ires,
785     int bw)
786     {
787    
788     int m, l, size;
789     double *rdptr, *idptr, *rrptr, *irptr;
790    
791     size = 2*bw ;
792    
793     rdptr = rdatacoeffs;
794     idptr = idatacoeffs;
795     rrptr = rres;
796     irptr = ires;
797    
798     for (m=0; m<bw; m++) {
799     for (l=m; l<bw; l++) {
800     compmult(rfiltercoeffs[l], ifiltercoeffs[l],
801     rdptr[l-m], idptr[l-m],
802     rrptr[l-m], irptr[l-m]);
803    
804     rrptr[l-m] *= sqrt(4*M_PI/(2*l+1));
805     irptr[l-m] *= sqrt(4*M_PI/(2*l+1));
806    
807     }
808     rdptr += bw-m; idptr += bw-m;
809     rrptr += bw-m; irptr += bw-m;
810     }
811     for (m=bw+1; m<size; m++) {
812     for (l=size-m; l<bw; l++){
813     compmult(rfiltercoeffs[l], ifiltercoeffs[l],
814     rdptr[l-size+m], idptr[l-size+m],
815     rrptr[l-size+m], irptr[l-size+m]);
816    
817     rrptr[l-size+m] *= sqrt(4*M_PI/(2*l+1));
818     irptr[l-size+m] *= sqrt(4*M_PI/(2*l+1));
819    
820     }
821     rdptr += m-bw; idptr += m-bw;
822     rrptr += m-bw; irptr += m-bw;
823     }
824    
825     }
826    
827     /************************************************************************/
828     /* Here's the big banana
829     Convolves two functions defined on the 2-sphere.
830     Uses seminaive algorithms for spherical harmonic transforms
831    
832     size = 2*bw
833    
834     Inputs:
835    
836     rdata, idata - (size * size) arrays containing real and
837     imaginary parts of sampled function.
838     rfilter, ifilter - (size * size) arrays containing real and
839     imaginary parts of sampled filter function.
840     rres, ires - (size * size) arrays containing real and
841     imaginary parts of result function.
842    
843    
844     Suggestion - if you want to do multiple convolutions,
845     don't keep allocating and freeing space with every call,
846     or keep recomputing the spharmonic_pml tables.
847     Allocate workspace once before you call this function, then
848     just set up pointers as first step of this procedure rather
849     than mallocing. And do the same with the FST, FZT, and InvFST functions.
850    
851     ASSUMPTIONS:
852     1. data is strictly REAL
853     2. will do semi-naive algorithm for ALL orders -> change the cutoff
854     value if you want it to be different
855    
856     Memory requirements for Conv2Sphere
857    
858     Need space for spharmonic tables and local workspace and
859     scratchpad space for FST_semi
860    
861     Let legendreSize = Reduced_Naive_TableSize(bw,cutoff) +
862     Reduced_SpharmonicTableSize(bw,cutoff)
863    
864     Then the workspace needs to be this large:
865    
866     2 * legendreSize +
867     8 * (bw*bw) + 10*bw +
868     4 * (bw*bw) + 2*bw
869    
870     for a total of
871    
872     2 * legendreSize +
873     12 * (bw*bw) + 12*bw ;
874    
875    
876    
877     */
878     void Conv2Sphere_semi_memo(double *rdata, double *idata,
879     double *rfilter, double *ifilter,
880     double *rres, double *ires,
881     int bw,
882     double *workspace)
883     {
884     int size, spharmonic_bound ;
885     int legendreSize, cutoff ;
886     double *frres, *fires, *filtrres, *filtires, *trres, *tires;
887     double **spharmonic_pml_table, **transpose_spharmonic_pml_table;
888     double *spharmonic_result_space, *transpose_spharmonic_result_space;
889     double *scratchpad;
890    
891     /* fftw */
892     int rank, howmany_rank ;
893     fftw_iodim dims[1], howmany_dims[1];
894    
895     /* forward transform stuff */
896     fftw_plan dctPlan, fftPlan ;
897     double *weights ;
898    
899     /* inverse transform stuff */
900     fftw_plan idctPlan, ifftPlan ;
901    
902     size =2*bw ;
903     cutoff = bw ;
904     legendreSize = Reduced_Naive_TableSize(bw,cutoff) +
905     Reduced_SpharmonicTableSize(bw,cutoff) ;
906    
907     /* assign space */
908    
909     spharmonic_bound = legendreSize ;
910    
911     spharmonic_result_space = workspace; /* needs legendreSize */
912    
913     transpose_spharmonic_result_space =
914     spharmonic_result_space + legendreSize ; /* needs legendreSize */
915    
916     frres = transpose_spharmonic_result_space +
917     legendreSize ; /* needs (bw*bw) */
918     fires = frres + (bw*bw); /* needs (bw*bw) */
919     trres = fires + (bw*bw); /* needs (bw*bw) */
920     tires = trres + (bw*bw); /* needs (bw*bw) */
921     filtrres = tires + (bw*bw); /* needs bw */
922     filtires = filtrres + bw; /* needs bw */
923     scratchpad = filtires + bw; /* needs (8*bw^2)+(10*bw) */
924    
925     /* allocate space, and compute, the weights for this bandwidth */
926     weights = (double *) malloc(sizeof(double) * 4 * bw);
927     makeweights( bw, weights );
928    
929     /* make the fftw plans */
930    
931     /* make DCT plans -> note that I will be using the GURU
932     interface to execute these plans within the routines*/
933    
934     /* forward DCT */
935     dctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata,
936     FFTW_REDFT10, FFTW_ESTIMATE ) ;
937    
938     /* inverse DCT */
939     idctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata,
940     FFTW_REDFT01, FFTW_ESTIMATE );
941    
942     /*
943     fft "preamble" ;
944     note that this plan places the output in a transposed array
945     */
946     rank = 1 ;
947     dims[0].n = 2*bw ;
948     dims[0].is = 1 ;
949     dims[0].os = 2*bw ;
950     howmany_rank = 1 ;
951     howmany_dims[0].n = 2*bw ;
952     howmany_dims[0].is = 2*bw ;
953     howmany_dims[0].os = 1 ;
954    
955     /* forward fft */
956     fftPlan = fftw_plan_guru_split_dft( rank, dims,
957     howmany_rank, howmany_dims,
958     rdata, idata,
959     workspace, workspace+(4*bw*bw),
960     FFTW_ESTIMATE );
961    
962     /*
963     now plan for inverse fft - note that this plans assumes
964     that I'm working with a transposed array, e.g. the inputs
965     for a length 2*bw transform are placed every 2*bw apart,
966     the output will be consecutive entries in the array
967     */
968     rank = 1 ;
969     dims[0].n = 2*bw ;
970     dims[0].is = 2*bw ;
971     dims[0].os = 1 ;
972     howmany_rank = 1 ;
973     howmany_dims[0].n = 2*bw ;
974     howmany_dims[0].is = 1 ;
975     howmany_dims[0].os = 2*bw ;
976    
977     /* inverse fft */
978     ifftPlan = fftw_plan_guru_split_dft( rank, dims,
979     howmany_rank, howmany_dims,
980     rdata, idata,
981     workspace, workspace+(4*bw*bw),
982     FFTW_ESTIMATE );
983    
984    
985     /* precompute the associated Legendre fcts */
986     spharmonic_pml_table =
987     Spharmonic_Pml_Table(bw,
988     spharmonic_result_space,
989     scratchpad);
990    
991     transpose_spharmonic_pml_table =
992     Transpose_Spharmonic_Pml_Table(spharmonic_pml_table,
993     bw,
994     transpose_spharmonic_result_space,
995     scratchpad);
996     FST_semi_memo(rdata, idata,
997     frres, fires,
998     bw,
999     spharmonic_pml_table,
1000     scratchpad,
1001     1,
1002     bw,
1003     &dctPlan,
1004     &fftPlan,
1005     weights );
1006    
1007     FZT_semi_memo(rfilter, ifilter,
1008     filtrres, filtires,
1009     bw,
1010     spharmonic_pml_table[0],
1011     scratchpad,
1012     1,
1013     &dctPlan,
1014     weights );
1015    
1016     TransMult(frres, fires, filtrres, filtires, trres, tires, bw);
1017    
1018     InvFST_semi_memo(trres, tires,
1019     rres, ires,
1020     bw,
1021     transpose_spharmonic_pml_table,
1022     scratchpad,
1023     1,
1024     bw,
1025     &idctPlan,
1026     &ifftPlan );
1027    
1028     free( weights ) ;
1029    
1030     /***
1031     have to free the memory that was allocated in
1032     Spharmonic_Pml_Table() and
1033     Transpose_Spharmonic_Pml_Table()
1034     ***/
1035    
1036     free(spharmonic_pml_table);
1037     free(transpose_spharmonic_pml_table);
1038    
1039     /* destroy plans */
1040     fftw_destroy_plan( ifftPlan ) ;
1041     fftw_destroy_plan( fftPlan ) ;
1042     fftw_destroy_plan( idctPlan ) ;
1043     fftw_destroy_plan( dctPlan ) ;
1044     }