Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

fftw-int.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 1997-1999 Massachusetts Institute of Technology
00003  *
00004  * This program is free software; you can redistribute it and/or modify
00005  * it under the terms of the GNU General Public License as published by
00006  * the Free Software Foundation; either version 2 of the License, or
00007  * (at your option) any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software
00016  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00017  *
00018  */
00019 
00020 /* fftw.h -- system-wide definitions */
00021 /* $Id: fftw-int.h,v 1.1 2004/05/21 21:02:53 maxx Exp $ */
00022 
00023 #ifndef FFTW_INT_H
00024 #define FFTW_INT_H
00025 #include <config.h>
00026 #include <fftw.h>
00027 
00028 #ifdef __cplusplus
00029 extern "C" {
00030 #endif                          /* __cplusplus */
00031 
00032 /****************************************************************************/
00033 /*                            Private Functions                             */
00034 /****************************************************************************/
00035 
00036 extern fftw_twiddle *fftw_create_twiddle(int n, const fftw_codelet_desc *d);
00037 extern void fftw_destroy_twiddle(fftw_twiddle *tw);
00038 
00039 extern void fftw_strided_copy(int, fftw_complex *, int, fftw_complex *);
00040 extern void fftw_executor_simple(int, const fftw_complex *, fftw_complex *,
00041                                  fftw_plan_node *, int, int,
00042                                  fftw_recurse_kind recurse_kind);
00043 
00044 extern fftwnd_plan fftwnd_create_plan_aux(int rank, const int *n,
00045                                           fftw_direction dir, int flags);
00046 extern fftw_plan *fftwnd_new_plan_array(int rank);
00047 extern fftw_plan *fftwnd_create_plans_generic(fftw_plan *plans,
00048                                               int rank, const int *n,
00049                                               fftw_direction dir, int flags);
00050 extern fftw_plan *fftwnd_create_plans_specific(fftw_plan *plans,
00051                                                int rank, const int *n,
00052                                                const int *n_after,
00053                                                fftw_direction dir, int flags,
00054                                                fftw_complex *in, int istride,
00055                                                fftw_complex *out, int ostride);
00056 extern int fftwnd_work_size(int rank, const int *n, int flags, int ncopies);
00057 
00058 extern void fftwnd_aux(fftwnd_plan p, int cur_dim,
00059                        fftw_complex *in, int istride,
00060                        fftw_complex *out, int ostride,
00061                        fftw_complex *work);
00062 extern void fftwnd_aux_howmany(fftwnd_plan p, int cur_dim,
00063                                int howmany,
00064                                fftw_complex *in, int istride, int idist,
00065                                fftw_complex *out, int ostride, int odist,
00066                                fftw_complex *work);
00067 
00068 /* wisdom prototypes */
00069 enum fftw_wisdom_category {
00070      FFTW_WISDOM, RFFTW_WISDOM
00071 };
00072 
00073 extern int fftw_wisdom_lookup(int n, int flags, fftw_direction dir,
00074                               enum fftw_wisdom_category category,
00075                               int istride, int ostride,
00076                               enum fftw_node_type *type,
00077                               int *signature,
00078                               fftw_recurse_kind *recurse_kind, int replace_p);
00079 extern void fftw_wisdom_add(int n, int flags, fftw_direction dir,
00080                             enum fftw_wisdom_category cat,
00081                             int istride, int ostride,
00082                             enum fftw_node_type type,
00083                             int signature,
00084                             fftw_recurse_kind recurse_kind);
00085 
00086 /* Private planner functions: */
00087 extern double fftw_estimate_node(fftw_plan_node *p);
00088 extern fftw_plan_node *fftw_make_node_notw(int size,
00089                                         const fftw_codelet_desc *config);
00090 extern fftw_plan_node *fftw_make_node_real2hc(int size,
00091                                         const fftw_codelet_desc *config);
00092 extern fftw_plan_node *fftw_make_node_hc2real(int size,
00093                                         const fftw_codelet_desc *config);
00094 extern fftw_plan_node *fftw_make_node_twiddle(int n,
00095                                          const fftw_codelet_desc *config,
00096                                               fftw_plan_node *recurse,
00097                                               int flags);
00098 extern fftw_plan_node *fftw_make_node_hc2hc(int n,
00099                                             fftw_direction dir,
00100                                          const fftw_codelet_desc *config,
00101                                             fftw_plan_node *recurse,
00102                                             int flags);
00103 extern fftw_plan_node *fftw_make_node_generic(int n, int size,
00104                                               fftw_generic_codelet *codelet,
00105                                               fftw_plan_node *recurse,
00106                                               int flags);
00107 extern fftw_plan_node *fftw_make_node_rgeneric(int n, int size,
00108                                                fftw_direction dir,
00109                                                fftw_rgeneric_codelet * codelet,
00110                                                fftw_plan_node *recurse,
00111                                                int flags);
00112 extern int fftw_factor(int n);
00113 extern fftw_plan_node *fftw_make_node(void);
00114 extern fftw_plan fftw_make_plan(int n, fftw_direction dir,
00115                                 fftw_plan_node *root, int flags,
00116                                 enum fftw_node_type wisdom_type,
00117                                 int wisdom_signature,
00118                                 fftw_recurse_kind recurse_kind,
00119                                 int vector_size);
00120 extern void fftw_use_plan(fftw_plan p);
00121 extern void fftw_use_node(fftw_plan_node *p);
00122 extern void fftw_destroy_plan_internal(fftw_plan p);
00123 extern fftw_plan fftw_pick_better(fftw_plan p1, fftw_plan p2);
00124 extern fftw_plan fftw_lookup(fftw_plan *table, int n, int flags,
00125                              int vector_size);
00126 extern void fftw_insert(fftw_plan *table, fftw_plan this_plan);
00127 extern void fftw_make_empty_table(fftw_plan *table);
00128 extern void fftw_destroy_table(fftw_plan *table);
00129 extern void fftw_complete_twiddle(fftw_plan_node *p, int n);
00130 
00131 extern fftw_plan_node *fftw_make_node_rader(int n, int size,
00132                                             fftw_direction dir,
00133                                             fftw_plan_node *recurse,
00134                                             int flags);
00135 extern fftw_rader_data *fftw_rader_top;
00136 
00137 /* undocumented debugging hook */
00138 typedef void (*fftw_plan_hook_ptr) (fftw_plan plan);
00139 extern DL_IMPORT(fftw_plan_hook_ptr) fftw_plan_hook;
00140 extern DL_IMPORT(fftw_plan_hook_ptr) rfftw_plan_hook;
00141 
00142 /****************************************************************************/
00143 /*                          Overflow-safe multiply                          */
00144 /****************************************************************************/
00145 
00146 /* The Rader routines do a lot of operations of the form (x * y) % p, which
00147    are vulnerable to overflow problems for large p.  To get around this,
00148    we either use "long long" arithmetic (if it is available and double
00149    the size of int), or default to a subroutine defined in twiddle.c. */
00150 
00151 #if defined(FFTW_ENABLE_UNSAFE_MULMOD)
00152 #  define MULMOD(x,y,p) (((x) * (y)) % (p))
00153 #elif defined(LONGLONG_IS_TWOINTS)
00154 #  define MULMOD(x,y,p) ((int) ((((long long) (x)) * ((long long) (y))) \
00155                                 % ((long long) (p))))
00156 #else
00157 #  define USE_FFTW_SAFE_MULMOD
00158 #  define MULMOD(x,y,p) fftw_safe_mulmod(x,y,p)
00159 extern int fftw_safe_mulmod(int x, int y, int p);
00160 #endif
00161 
00162 /****************************************************************************/
00163 /*                           Floating Point Types                           */
00164 /****************************************************************************/
00165 
00166 /*
00167  * We use these definitions to make it easier for people to change
00168  * FFTW to use long double and similar types. You shouldn't have to
00169  * change this just to use float or double. 
00170  */
00171 
00172 /*
00173  * Change this if your floating-point constants need to be expressed
00174  * in a special way.  For example, if fftw_real is long double, you
00175  * will need to append L to your fp constants to make them of the
00176  * same precision.  Do this by changing "x" below to "x##L". 
00177  */
00178 #define FFTW_KONST(x) ((fftw_real) x)
00179 
00180 /*
00181  * Ordinarily, we use the standard sin/cos functions to compute trig.
00182  * constants.  You'll need to change these if fftw_real has more
00183  * than double precision.
00184  */
00185 #define FFTW_TRIG_SIN sin
00186 #define FFTW_TRIG_COS cos
00187 typedef double FFTW_TRIG_REAL;  /* the argument type for sin and cos */
00188 
00189 #define FFTW_K2PI FFTW_KONST(6.2831853071795864769252867665590057683943388)
00190 
00191 /****************************************************************************/
00192 /*                               gcc/x86 hacks                              */
00193 /****************************************************************************/
00194 
00195 /*
00196  * gcc 2.[78].x and x86 specific hacks.  These macros align the stack
00197  * pointer so that the double precision temporary variables in the
00198  * codelets will be aligned to a multiple of 8 bytes (*way* faster on
00199  * pentium and pentiumpro)
00200  */
00201 #ifdef __GNUC__
00202 #  ifdef __i386__
00203 #    ifdef FFTW_ENABLE_I386_HACKS
00204 #      ifndef FFTW_GCC_ALIGNS_STACK
00205 #      ifndef FFTW_ENABLE_FLOAT
00206 #        define FFTW_USING_I386_HACKS
00207 #        define HACK_ALIGN_STACK_EVEN {                                    \
00208            if ((((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4);  \
00209          }
00210 
00211 #        define HACK_ALIGN_STACK_ODD {                                     \
00212            if (!(((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4); \
00213          }
00214 
00215 #      endif /* ! FFTW_ENABLE_FLOAT */
00216 #      endif /* ! FFTW_GCC_ALIGNS_STACK */
00217 #    endif /* FFTW_ENABLE_I386_HACKS */
00218 
00219 #    ifdef FFTW_DEBUG_ALIGNMENT
00220 #      define ASSERT_ALIGNED_DOUBLE {                                      \
00221          double __foo;                                                       \
00222          if ((((long) &__foo) & 0x7)) abort();                               \
00223        }
00224 #    endif /* FFTW_DEBUG_ALIGNMENT */
00225 
00226 #  endif /* __i386__ */
00227 #endif /* __GNUC__ */
00228 
00229 #ifndef HACK_ALIGN_STACK_EVEN
00230 #  define HACK_ALIGN_STACK_EVEN {}
00231 #endif
00232 #ifndef HACK_ALIGN_STACK_ODD
00233 #  define HACK_ALIGN_STACK_ODD {}
00234 #endif
00235 #ifndef ASSERT_ALIGNED_DOUBLE
00236 #  define ASSERT_ALIGNED_DOUBLE {}
00237 #endif
00238 
00239 /****************************************************************************/
00240 /*                                  Timers                                  */
00241 /****************************************************************************/
00242 
00243 /*
00244  * Here, you can use all the nice timers available in your machine.
00245  */
00246 
00247 /*
00248  *
00249  Things you should define to include your own clock:
00250  
00251  fftw_time -- the data type used to store a time
00252  
00253  extern fftw_time fftw_get_time(void); 
00254  -- a function returning the current time.  (We have
00255  implemented this as a macro in most cases.)
00256  
00257  extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2);
00258  -- returns the time difference (t1 - t2).
00259  If t1 < t2, it may simply return zero (although this
00260  is not required).  (We have implemented this as a macro
00261  in most cases.)
00262  
00263  extern double fftw_time_to_sec(fftw_time t);
00264  -- returns the time t expressed in seconds, as a double.
00265  (Implemented as a macro in most cases.)
00266  
00267  FFTW_TIME_MIN -- a double-precision macro holding the minimum
00268  time interval (in seconds) for accurate time measurements.
00269  This should probably be at least 100 times the precision of
00270  your clock (we use even longer intervals, to be conservative).
00271  This will determine how long the planner takes to measure
00272  the speeds of different possible plans.
00273  
00274  Bracket all of your definitions with an appropriate #ifdef so that
00275  they will be enabled on your machine.  If you do add your own
00276  high-precision timer code, let us know (at fftw@fftw.org).
00277  
00278  Only declarations should go in this file.  Any function definitions
00279  that you need should go into timer.c.
00280  */
00281 
00282 /*
00283  * define a symbol so that we know that we have the fftw_time_diff
00284  * function/macro (it did not exist prior to FFTW 1.2) 
00285  */
00286 #define FFTW_HAS_TIME_DIFF
00287 
00288 /**********************************************
00289  *              SOLARIS
00290  **********************************************/
00291 #if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T)
00292 
00293 /* we use the nanosecond virtual timer */
00294 #ifdef HAVE_SYS_TIME_H
00295 #include <sys/time.h>
00296 #endif
00297 
00298 typedef hrtime_t fftw_time;
00299 
00300 #define fftw_get_time() gethrtime()
00301 #define fftw_time_diff(t1,t2) ((t1) - (t2))
00302 #define fftw_time_to_sec(t) ((double) t / 1.0e9)
00303 
00304 /*
00305  * a measurement is valid if it runs for at least
00306  * FFTW_TIME_MIN seconds.
00307  */
00308 #define FFTW_TIME_MIN (1.0e-4)  /* for Solaris nanosecond timer */
00309 #define FFTW_TIME_REPEAT 8
00310 
00311 /**********************************************
00312  *        Pentium time stamp counter
00313  **********************************************/
00314 #elif defined(__GNUC__) && defined(__i386__) && defined(FFTW_ENABLE_PENTIUM_TIMER)
00315 
00316 /*
00317  * Use internal Pentium register (time stamp counter). Resolution
00318  * is 1/FFTW_CYCLES_PER_SEC seconds (e.g. 5 ns for Pentium 200 MHz).
00319  * (This code was contributed by Wolfgang Reimer)
00320  */
00321 
00322 #ifndef FFTW_CYCLES_PER_SEC
00323 #error "Must define FFTW_CYCLES_PER_SEC in fftw/config.h to use the Pentium cycle counter"
00324 #endif
00325 
00326 typedef unsigned long long fftw_time;
00327 
00328 static __inline__ fftw_time read_tsc()
00329 {
00330      fftw_time ret;
00331 
00332      __asm__ __volatile__("rdtsc": "=A" (ret)); 
00333      /* no input, nothing else clobbered */
00334      return ret;
00335 }
00336 
00337 #define fftw_get_time()  read_tsc()
00338 #define fftw_time_diff(t1,t2) ((t1) - (t2))
00339 #define fftw_time_to_sec(t) (((double) (t)) / FFTW_CYCLES_PER_SEC)
00340 #define FFTW_TIME_MIN (1.0e-4)  /* for Pentium TSC register */
00341 
00342 /************* generic systems having gettimeofday ************/
00343 #elif defined(HAVE_GETTIMEOFDAY) || defined(HAVE_BSDGETTIMEOFDAY)
00344 #ifdef HAVE_SYS_TIME_H
00345 #include <sys/time.h>
00346 #endif
00347 #ifdef HAVE_UNISTD_H
00348 #include <unistd.h>
00349 #endif
00350 #define FFTW_USE_GETTIMEOFDAY
00351 
00352 typedef struct timeval fftw_time;
00353 
00354 extern fftw_time fftw_gettimeofday_get_time(void);
00355 extern fftw_time fftw_gettimeofday_time_diff(fftw_time t1, fftw_time t2);
00356 #define fftw_get_time() fftw_gettimeofday_get_time()
00357 #define fftw_time_diff(t1, t2) fftw_gettimeofday_time_diff(t1, t2)
00358 #define fftw_time_to_sec(t) ((double)(t).tv_sec + (double)(t).tv_usec * 1.0E-6)
00359 
00360 #ifndef FFTW_TIME_MIN
00361 /* this should be fine on any system claiming a microsecond timer */
00362 #define FFTW_TIME_MIN (1.0e-2)
00363 #endif
00364 
00365 /**********************************************
00366  *              MACINTOSH
00367  **********************************************/
00368 #elif defined(HAVE_MAC_TIMER)
00369 
00370 /*
00371  * By default, use the microsecond-timer in the Mac Time Manager.
00372  * Alternatively, by changing the following #if 1 to #if 0, you
00373  * can use the nanosecond timer available *only* on PCI PowerMacs. 
00374  * WARNING: the nanosecond timer was just a little experiment;
00375  * I haven't gotten it to work reliably.  Tips/patches are welcome.
00376  */
00377 #ifndef HAVE_MAC_PCI_TIMER      /* use time manager */
00378 
00379 /*
00380  * Use Macintosh Time Manager routines (maximum resolution is about 20
00381  * microseconds). 
00382  */
00383 typedef struct fftw_time_struct {
00384      unsigned long hi, lo;
00385 } fftw_time;
00386 
00387 extern fftw_time get_Mac_microseconds(void);
00388 
00389 #define fftw_get_time() get_Mac_microseconds()
00390 
00391 /* define as a function instead of a macro: */
00392 extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2);
00393 
00394 #define fftw_time_to_sec(t) ((t).lo * 1.0e-6 + 4294967295.0e-6 * (t).hi)
00395 
00396 /* very conservative, since timer should be accurate to 20e-6: */
00397 /* (although this seems not to be the case in practice) */
00398 #define FFTW_TIME_MIN (5.0e-2)  /* for MacOS Time Manager timer */
00399 
00400 #else                           /* use nanosecond timer */
00401 
00402 /* Use the nanosecond timer available on PCI PowerMacs. */
00403 
00404 #include <DriverServices.h>
00405 
00406 typedef AbsoluteTime fftw_time;
00407 #define fftw_get_time() UpTime()
00408 #define fftw_time_diff(t1,t2) SubAbsoluteFromAbsolute(t1,t2)
00409 #define fftw_time_to_sec(t) (AbsoluteToNanoseconds(t).lo * 1.0e-9)
00410 
00411 /* Extremely conservative minimum time: */
00412 /* for MacOS PCI PowerMac nanosecond timer */
00413 #define FFTW_TIME_MIN (5.0e-3)  
00414 
00415 #endif                          /* use nanosecond timer */
00416 
00417 /**********************************************
00418  *              WINDOWS
00419  **********************************************/
00420 #elif defined(HAVE_WIN32_TIMER)
00421 
00422 #include <time.h>
00423 
00424 typedef unsigned long fftw_time;
00425 extern unsigned long GetPerfTime(void);
00426 extern double GetPerfSec(double ticks);
00427 
00428 #define fftw_get_time() GetPerfTime()
00429 #define fftw_time_diff(t1,t2) ((t1) - (t2))
00430 #define fftw_time_to_sec(t) GetPerfSec(t)
00431 
00432 #define FFTW_TIME_MIN (5.0e-2)  /* for Win32 timer */
00433 
00434 /**********************************************
00435  *              CRAY
00436  **********************************************/
00437 #elif defined(_CRAYMPP)         /* Cray MPP system */
00438 
00439 double SECONDR(void);           /* 
00440                                  * I think you have to link with -lsci to
00441                                  * get this 
00442                                  */
00443 
00444 typedef double fftw_time;
00445 #define fftw_get_time() SECONDR()
00446 #define fftw_time_diff(t1,t2) ((t1) - (t2))
00447 #define fftw_time_to_sec(t) (t)
00448 
00449 #define FFTW_TIME_MIN (1.0e-1)  /* for Cray MPP SECONDR timer */
00450 
00451 /**********************************************
00452  *          VANILLA UNIX/ISO C SYSTEMS
00453  **********************************************/
00454 /* last resort: use good old Unix clock() */
00455 #else
00456 
00457 #include <time.h>
00458 
00459 typedef clock_t fftw_time;
00460 
00461 #ifndef CLOCKS_PER_SEC
00462 #ifdef sun
00463 /* stupid sunos4 prototypes */
00464 #define CLOCKS_PER_SEC 1000000
00465 extern long clock(void);
00466 #else                           /* not sun, we don't know CLOCKS_PER_SEC */
00467 #error Please define CLOCKS_PER_SEC
00468 #endif
00469 #endif
00470 
00471 #define fftw_get_time() clock()
00472 #define fftw_time_diff(t1,t2) ((t1) - (t2))
00473 #define fftw_time_to_sec(t) (((double) (t)) / CLOCKS_PER_SEC)
00474 
00475 /*
00476  * ***VERY*** conservative constant: this says that a
00477  * measurement must run for 200ms in order to be valid.
00478  * You had better check the manual of your machine
00479  * to discover if it can do better than this
00480  */
00481 #define FFTW_TIME_MIN (2.0e-1)  /* for default clock() timer */
00482 
00483 #endif                          /* UNIX clock() */
00484 
00485 /* take FFTW_TIME_REPEAT measurements... */
00486 #ifndef FFTW_TIME_REPEAT
00487 #define FFTW_TIME_REPEAT 4
00488 #endif
00489 
00490 /* but do not run for more than TIME_LIMIT seconds while measuring one FFT */
00491 #ifndef FFTW_TIME_LIMIT
00492 #define FFTW_TIME_LIMIT 2.0
00493 #endif
00494 
00495 #ifdef __cplusplus
00496 }                               /* extern "C" */
00497 
00498 #endif                          /* __cplusplus */
00499 
00500 #endif                          /* FFTW_INT_H */

Generated on Wed Dec 15 21:20:28 2004 for vuVolume by  doxygen 1.3.9.1