ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/group/trunk/SHAPES/FST_semi_memo.c
(Generate patch)

Comparing trunk/SHAPES/FST_semi_memo.c (file contents):
Revision 1287 by chrisfen, Wed Jun 23 20:18:48 2004 UTC vs.
Revision 1294 by chrisfen, Thu Jun 24 13:44:53 2004 UTC

# Line 422 | Line 422 | void FST_semi_memo(double *rdata, double *idata,
422              -pow_one * icoeffs[dummy0];
423          }
424        }
425    }
426  
427 }
428
429 /************************************************************************/
430 /* Inverse spherical harmonic transform.
431
432   bw -> bandwidth of problem
433   size = 2*bw
434
435   Inputs rcoeffs and icoeffs are harmonic coefficients stored
436   in (bw * bw) arrays in the order spec'ed above.
437
438   rdata and idata are (size x size) arrays with the transformed result.
439
440   transpose_spharmonic_pml_table should be the (double **)
441   result of a call to Transpose_Spharmonic_Pml_Table()
442
443   workspace is (8 * bw^2) + (10 * bw)
444
445 */
446
447 /*      dataformat =0 -> samples are complex, =1 -> samples real */
448
449 void InvFST_semi_memo(double *rcoeffs, double *icoeffs,
450                      double *rdata, double *idata,
451                      int bw,
452                      double **transpose_seminaive_naive_table,
453                      double *workspace,
454                      int dataformat,
455                      int cutoff,
456                      fftw_plan *idctPlan,
457                      fftw_plan *ifftPlan )
458 {
459  int size, m, i, n;
460  double *rdataptr, *idataptr;
461  double *rfourdata, *ifourdata;
462  double *rinvfltres, *iminvfltres, *scratchpad;
463  double *sin_values, *eval_pts;
464  double tmpA ;
465
466  size = 2*bw ;
467
468  rfourdata = workspace;                  /* needs (size * size) */
469  ifourdata = rfourdata + (size * size);  /* needs (size * size) */
470  rinvfltres = ifourdata + (size * size); /* needs (2 * bw) */
471  iminvfltres = rinvfltres + (2 * bw);    /* needs (2 * bw) */
472  sin_values = iminvfltres + (2 * bw);    /* needs (2 * bw) */
473  eval_pts = sin_values + (2 * bw);       /* needs (2 * bw) */
474  scratchpad = eval_pts + (2 * bw);       /* needs (2 * bw) */
475  
476  /* total workspace = (8 * bw^2) + (10 * bw) */
477
478  /* load up the sin_values array */
479  n = 2*bw;
480
481  ArcCosEvalPts(n, eval_pts);
482  for (i=0; i<n; i++)
483    sin_values[i] = sin(eval_pts[i]);
484
485
486  /* Now do all of the inverse Legendre transforms */
487  rdataptr = rcoeffs;
488  idataptr = icoeffs;
489
490  for (m=0; m<bw; m++)
491    {
492      /*
493        fprintf(stderr,"m = %d\n",m);
494      */
495    
496      if(m < cutoff)
497        {
498          /* do real part first */
499          InvSemiNaiveReduced(rdataptr,
500                              bw,
501                              m,
502                              rinvfltres,
503                              transpose_seminaive_naive_table[m],
504                              sin_values,
505                              scratchpad,
506                              idctPlan );
507      
508          /* now do imaginary part */
509      
510          InvSemiNaiveReduced(idataptr,
511                              bw,
512                              m,
513                              iminvfltres,
514                              transpose_seminaive_naive_table[m],
515                              sin_values,
516                              scratchpad,
517                              idctPlan);
518          
519          /* will store normal, then tranpose before doing inverse fft */
520          memcpy(rfourdata+(m*size), rinvfltres, sizeof(double) * size);
521          memcpy(ifourdata+(m*size), iminvfltres, sizeof(double) * size);
522      
523          /* move to next set of coeffs */
524          rdataptr += bw-m;
525          idataptr += bw-m;
526      
527        }
528      else
529        {
530
531          /* first do the real part */    
532          Naive_SynthesizeX(rdataptr,
533                            bw,
534                            m,
535                            rinvfltres,
536                            transpose_seminaive_naive_table[m]);
537          
538          /* now do the imaginary */    
539          Naive_SynthesizeX(idataptr,
540                            bw,
541                            m,
542                            iminvfltres,
543                            transpose_seminaive_naive_table[m]);
544
545          /* will store normal, then tranpose before doing inverse fft    */
546          memcpy(rfourdata+(m*size), rinvfltres, sizeof(double) * size);
547          memcpy(ifourdata+(m*size), iminvfltres, sizeof(double) * size);
548        
549          /* move to next set of coeffs */
550        
551          rdataptr += bw-m;
552          idataptr += bw-m;
553        
554        }
555    }
556  /* closes m loop */
557
558  /* now fill in zero values where m = bw (from problem definition) */
559  memset(rfourdata + (bw * size), 0, sizeof(double) * size);
560  memset(ifourdata + (bw * size), 0, sizeof(double) * size);
561  
562  /* now if the data is real, we don't have to compute the
563     coefficients whose order is less than 0, i.e. since
564     the data is real, we know that
565     invf-hat(l,-m) = conjugate(invf-hat(l,m)),
566     so use that to get the rest of the real data
567    
568     dataformat =0 -> samples are complex, =1 -> samples real
569    
570  */
571
572  if(dataformat == 0){
573    
574    /* now do negative m values */
575    
576    for (m=bw+1; m<size; m++)
577      {
578        /*
579          fprintf(stderr,"m = %d\n",-(size-m));
580        */
581        
582        if ( (size-m) < cutoff )
583          {
584            /* do real part first */
585            InvSemiNaiveReduced(rdataptr,
586                                bw,
587                                size - m,
588                                rinvfltres,
589                                transpose_seminaive_naive_table[size - m],
590                                sin_values,
591                                scratchpad,
592                                idctPlan);
593        
594            /* now do imaginary part */
595            InvSemiNaiveReduced(idataptr,
596                                bw,
597                                size - m,
598                                iminvfltres,
599                                transpose_seminaive_naive_table[size - m],
600                                sin_values,
601                                scratchpad,
602                                idctPlan );
603
604            /* will store normal, then tranpose before doing inverse fft    */
605            if ((m % 2) != 0)
606              for(i=0; i< size; i++){
607                rinvfltres[i] = -rinvfltres[i];
608                iminvfltres[i] = -iminvfltres[i];
609              }
610        
611            memcpy(rfourdata + (m*size), rinvfltres, sizeof(double) * size);
612            memcpy(ifourdata + (m*size), iminvfltres, sizeof(double) * size);
613                
614            /* move to next set of coeffs */
615            rdataptr += bw-(size-m);
616            idataptr += bw-(size-m);
617          }
618        else
619          {
620            /* first do the real part */
621            Naive_SynthesizeX(rdataptr,
622                              bw,
623                              size-m,
624                              rinvfltres,
625                              transpose_seminaive_naive_table[size-m]);
626          
627            /* now do the imaginary */  
628            Naive_SynthesizeX(idataptr,
629                              bw,
630                              size-m,
631                              iminvfltres,
632                              transpose_seminaive_naive_table[size-m]);
633
634            /* will store normal, then tranpose before doing inverse fft    */
635            if ((m % 2) != 0)
636              for(i=0; i< size; i++){
637                rinvfltres[i] = -rinvfltres[i];
638                iminvfltres[i] = -iminvfltres[i];
639              }
640        
641            memcpy(rfourdata + (m*size), rinvfltres, sizeof(double) * size);
642            memcpy(ifourdata + (m*size), iminvfltres, sizeof(double) * size);
643        
644            /* move to next set of coeffs */
645            rdataptr += bw-(size-m);
646            idataptr += bw-(size-m);
647
648          }
649
650      } /* closes m loop */
651  }
652  else {
653    for(m = bw + 1; m < size; m++){
654      
655      memcpy(rfourdata+(m*size), rfourdata+((size-m)*size),
656             sizeof(double) * size);
657      memcpy(ifourdata+(m*size), ifourdata+((size-m)*size),
658             sizeof(double) * size);
659      for(i = 0; i < size; i++)
660        ifourdata[(m*size)+i] *= -1.0;
425      }
662  }
663
664  /* normalize */
665  tmpA = 1./(sqrt(2.*M_PI) );
666  for(i=0;i<4*bw*bw;i++)
667    {
668      rfourdata[i] *= tmpA ;
669      ifourdata[i] *= tmpA ;
670    }
671
672
673  fftw_execute_split_dft( *ifftPlan,
674                          ifourdata, rfourdata,
675                          idata, rdata );
676  /* amscray */
426    
678 }
679
680 /************************************************************************/
681 /*
682  Zonal Harmonic transform using seminaive algorithm - used in convolutions
683
684  bw -> bandwidth of problem
685
686  size = 2 * bw
687
688  rdata and idata should be pointers to size x size arrays.
689  rres and ires should be pointers to double arrays of size bw.
690  
691  cos_pml_table contains Legendre coefficients of P(0,l) functions
692  and is result of CosPmlTableGen for m = 0;
693  FZT_semi only computes spherical harmonics for m=0.
694
695  dataformat =0 -> samples are complex, =1 -> samples real
696  
697  workspace needed is (12 * bw)
698
699 */
700
701 void FZT_semi_memo(double *rdata, double *idata,
702                   double *rres, double *ires,
703                   int bw,
704                   double *cos_pml_table,
705                   double *workspace,
706                   int dataformat,
707                   fftw_plan *dctPlan,
708                   double *weights )
709 {
710  int i, j, size;
711  double *r0, *i0, dsize;
712  double tmpreal, tmpimag;
713  double tmpA ;
714  double *scratchpad ;
715
716  size = 2*bw ;
717
718  /* assign memory */
719  r0 = workspace;             /* needs (2 * bw) */
720  i0 = r0 + (2 * bw);         /* needs (2 * bw) */
721  scratchpad = i0 + (2 * bw);   /* needs (4 * bw) */
722
723  /* total workspace = 13*bw */
724
725  dsize = 1.0 / ((double) size);
726  tmpA = sqrt( 2.* M_PI );
727  dsize *= tmpA ;
728
729  /* compute the m = 0 components */
730  for (i=0; i<size; i++) {
731    tmpreal = 0.0;
732    tmpimag = 0.0;
733
734    for(j=0; j<size; j++) {
735      tmpreal += rdata[(i*size)+j];
736      tmpimag += idata[(i*size)+j];
737    }
738    /* normalize */
739    r0[i] = tmpreal*dsize;
740    i0[i] = tmpimag*dsize;
741  }
742
743  /* do the real part */
744  SemiNaiveReduced(r0,
745                   bw,
746                   0,
747                   rres,
748                   scratchpad,
749                   cos_pml_table,
750                   weights,
751                   dctPlan);
752
753  if(dataformat == 0)   /* do imaginary part */
754    SemiNaiveReduced(i0,
755                     bw,
756                     0,
757                     ires,
758                     scratchpad,
759                     cos_pml_table,
760                     weights,
761                     dctPlan);
762  else                 /* otherwise set coefficients = 0 */
763    memset(ires, 0, sizeof(double) * size);
764
427   }
428  
767 /************************************************************************/
768 /*
769  multiplies harmonic coefficients of a function and a filter.
770  See convolution theorem of Driscoll and Healy for details.
771
772  bw -> bandwidth of problem
773  size = 2*bw
774
775  datacoeffs should be output of an FST, filtercoeffs the
776  output of an FZT.  There should be (bw * bw) datacoeffs,
777  and bw filtercoeffs.
778  rres and ires should point to arrays of dimension bw * bw.
779
780 */
781
782 void TransMult(double *rdatacoeffs, double *idatacoeffs,
783               double *rfiltercoeffs, double *ifiltercoeffs,
784               double *rres, double *ires,
785               int bw)
786 {
787
788  int m, l, size;
789  double *rdptr, *idptr, *rrptr, *irptr;
790
791  size = 2*bw ;
792
793  rdptr = rdatacoeffs;
794  idptr = idatacoeffs;
795  rrptr = rres;
796  irptr = ires;
797
798  for (m=0; m<bw; m++) {
799    for (l=m; l<bw; l++) {
800      compmult(rfiltercoeffs[l], ifiltercoeffs[l],
801               rdptr[l-m], idptr[l-m],
802               rrptr[l-m], irptr[l-m]);
803
804      rrptr[l-m] *= sqrt(4*M_PI/(2*l+1));
805      irptr[l-m] *= sqrt(4*M_PI/(2*l+1));
806
807    }
808    rdptr += bw-m; idptr += bw-m;
809    rrptr += bw-m; irptr += bw-m;
810  }
811  for (m=bw+1; m<size; m++) {
812    for (l=size-m; l<bw; l++){
813      compmult(rfiltercoeffs[l], ifiltercoeffs[l],
814               rdptr[l-size+m], idptr[l-size+m],
815               rrptr[l-size+m], irptr[l-size+m]);
816
817      rrptr[l-size+m] *= sqrt(4*M_PI/(2*l+1));
818      irptr[l-size+m] *= sqrt(4*M_PI/(2*l+1));
819
820    }
821    rdptr += m-bw; idptr += m-bw;
822    rrptr += m-bw; irptr += m-bw;
823  }
824
825 }
826
827 /************************************************************************/
828 /* Here's the big banana
829   Convolves two functions defined on the 2-sphere.
830   Uses seminaive algorithms for spherical harmonic transforms
831
832   size = 2*bw
833
834   Inputs:
835
836   rdata, idata - (size * size) arrays containing real and
837                  imaginary parts of sampled function.
838   rfilter, ifilter - (size * size) arrays containing real and
839                      imaginary parts of sampled filter function.
840   rres, ires - (size * size) arrays containing real and
841                  imaginary parts of result function.
842
843
844   Suggestion - if you want to do multiple convolutions,
845   don't keep allocating and freeing space with every call,
846   or keep recomputing the spharmonic_pml tables.
847   Allocate workspace once before you call this function, then
848   just set up pointers as first step of this procedure rather
849   than mallocing.  And do the same with the FST, FZT, and InvFST functions.
850
851   ASSUMPTIONS:
852   1. data is strictly REAL
853   2. will do semi-naive algorithm for ALL orders -> change the cutoff
854      value if you want it to be different
855
856   Memory requirements for Conv2Sphere
857
858   Need space for spharmonic tables and local workspace and
859   scratchpad space for FST_semi
860
861   Let legendreSize = Reduced_Naive_TableSize(bw,cutoff) +
862                      Reduced_SpharmonicTableSize(bw,cutoff)
863
864   Then the workspace needs to be this large:
865
866   2 * legendreSize  +
867   8 * (bw*bw)  + 10*bw +
868   4 * (bw*bw) + 2*bw
869
870   for a total of
871
872   2 * legendreSize  +
873   12 * (bw*bw) + 12*bw ;
874  
875
876
877 */
878 void Conv2Sphere_semi_memo(double *rdata, double *idata,
879                           double *rfilter, double *ifilter,
880                           double *rres, double *ires,
881                           int bw,
882                           double *workspace)
883 {
884  int size, spharmonic_bound ;
885  int legendreSize, cutoff ;
886  double *frres, *fires, *filtrres, *filtires, *trres, *tires;
887  double **spharmonic_pml_table, **transpose_spharmonic_pml_table;
888  double *spharmonic_result_space, *transpose_spharmonic_result_space;
889  double *scratchpad;
890
891  /* fftw */
892  int rank, howmany_rank ;
893  fftw_iodim dims[1], howmany_dims[1];
894
895  /* forward transform stuff */
896  fftw_plan dctPlan, fftPlan ;
897  double *weights ;
898
899  /* inverse transform stuff */
900  fftw_plan idctPlan, ifftPlan ;
901
902  size =2*bw ;
903  cutoff = bw ;
904  legendreSize = Reduced_Naive_TableSize(bw,cutoff) +
905    Reduced_SpharmonicTableSize(bw,cutoff) ;
906
907  /* assign space */
908
909  spharmonic_bound = legendreSize ;
910
911  spharmonic_result_space = workspace;          /* needs legendreSize */
912
913  transpose_spharmonic_result_space =
914    spharmonic_result_space +  legendreSize ;   /* needs legendreSize */
915
916  frres = transpose_spharmonic_result_space +
917    legendreSize ;                              /* needs (bw*bw) */
918  fires = frres + (bw*bw);                      /* needs (bw*bw) */
919  trres = fires + (bw*bw);                      /* needs (bw*bw) */
920  tires = trres + (bw*bw);                      /* needs (bw*bw) */
921  filtrres = tires + (bw*bw);                   /* needs bw */
922  filtires = filtrres + bw;                     /* needs bw */
923  scratchpad = filtires + bw;                   /* needs (8*bw^2)+(10*bw) */
924
925  /* allocate space, and compute, the weights for this bandwidth */
926  weights = (double *) malloc(sizeof(double) * 4 * bw);
927  makeweights( bw, weights );
928
929  /* make the fftw plans */
930
931  /* make DCT plans -> note that I will be using the GURU
932     interface to execute these plans within the routines*/
933
934  /* forward DCT */
935  dctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata,
936                              FFTW_REDFT10, FFTW_ESTIMATE ) ;
937      
938  /* inverse DCT */
939  idctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata,
940                               FFTW_REDFT01, FFTW_ESTIMATE );
941  
942  /*
943    fft "preamble" ;
944    note that this plan places the output in a transposed array
945  */
946  rank = 1 ;
947  dims[0].n = 2*bw ;
948  dims[0].is = 1 ;
949  dims[0].os = 2*bw ;
950  howmany_rank = 1 ;
951  howmany_dims[0].n = 2*bw ;
952  howmany_dims[0].is = 2*bw ;
953  howmany_dims[0].os = 1 ;
954
955  /* forward fft */
956  fftPlan = fftw_plan_guru_split_dft( rank, dims,
957                                      howmany_rank, howmany_dims,
958                                      rdata, idata,
959                                      workspace, workspace+(4*bw*bw),
960                                      FFTW_ESTIMATE );
961
962  /*
963    now plan for inverse fft - note that this plans assumes
964    that I'm working with a transposed array, e.g. the inputs
965    for a length 2*bw transform are placed every 2*bw apart,
966    the output will be consecutive entries in the array
967  */
968  rank = 1 ;
969  dims[0].n = 2*bw ;
970  dims[0].is = 2*bw ;
971  dims[0].os = 1 ;
972  howmany_rank = 1 ;
973  howmany_dims[0].n = 2*bw ;
974  howmany_dims[0].is = 1 ;
975  howmany_dims[0].os = 2*bw ;
976
977  /* inverse fft */
978  ifftPlan = fftw_plan_guru_split_dft( rank, dims,
979                                       howmany_rank, howmany_dims,
980                                       rdata, idata,
981                                       workspace, workspace+(4*bw*bw),
982                                       FFTW_ESTIMATE );
983
984
985  /* precompute the associated Legendre fcts */
986  spharmonic_pml_table =
987    Spharmonic_Pml_Table(bw,
988                         spharmonic_result_space,
989                         scratchpad);
990
991  transpose_spharmonic_pml_table =
992    Transpose_Spharmonic_Pml_Table(spharmonic_pml_table,
993                                   bw,
994                                   transpose_spharmonic_result_space,
995                                   scratchpad);
996  FST_semi_memo(rdata, idata,
997                frres, fires,
998                bw,
999                spharmonic_pml_table,
1000                scratchpad,
1001                1,
1002                bw,
1003                &dctPlan,
1004                &fftPlan,
1005                weights );
1006
1007  FZT_semi_memo(rfilter, ifilter,
1008                filtrres, filtires,
1009                bw,
1010                spharmonic_pml_table[0],
1011                scratchpad,
1012                1,
1013                &dctPlan,
1014                weights );
1015
1016  TransMult(frres, fires, filtrres, filtires, trres, tires, bw);
1017
1018  InvFST_semi_memo(trres, tires,
1019                   rres, ires,
1020                   bw,
1021                   transpose_spharmonic_pml_table,
1022                   scratchpad,
1023                   1,
1024                   bw,
1025                   &idctPlan,
1026                   &ifftPlan );
1027
1028  free( weights ) ;
1029
1030  /***
1031      have to free the memory that was allocated in
1032      Spharmonic_Pml_Table() and
1033      Transpose_Spharmonic_Pml_Table()
1034  ***/
1035
1036  free(spharmonic_pml_table);
1037  free(transpose_spharmonic_pml_table);
1038
1039  /* destroy plans */
1040  fftw_destroy_plan( ifftPlan ) ;
1041  fftw_destroy_plan( fftPlan ) ;
1042  fftw_destroy_plan( idctPlan ) ;
1043  fftw_destroy_plan( dctPlan ) ;
1044 }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines