

#ifndef NTL_FFT__H
#define NTL_FFT__H

#include <NTL/ZZ.h>
#include <NTL/vector.h>
#include <NTL/vec_long.h>
#include <NTL/SmartPtr.h>
#include <NTL/LazyTable.h>

NTL_OPEN_NNS

#define NTL_FFTFudge (4)
// This constant is used in selecting the correct
// number of FFT primes for polynomial multiplication
// in ZZ_pX and zz_pX.  Set at 4, this allows for
// two FFT reps to be added or subtracted once,
// before performing CRT, and leaves a reasonable margin for error.
// Don't change this!

#define NTL_FFTMaxRootBnd (NTL_SP_NBITS-2)
// Absolute maximum root bound for FFT primes.
// Don't change this!

#if (25 <= NTL_FFTMaxRootBnd)
#define NTL_FFTMaxRoot (25)
#else
#define NTL_FFTMaxRoot  NTL_FFTMaxRootBnd
#endif
// Root bound for FFT primes.  Held to a maximum
// of 25 to avoid large tables and excess precomputation,
// and to keep the number of FFT primes needed small.
// This means we can multiply polynomials of degree less than 2^24.  
// This can be increased, with a slight performance penalty.




class FFTVectorPair {
public:
   Vec<long> wtab_precomp;
   Vec<mulmod_precon_t> wqinvtab_precomp;
};

typedef LazyTable<FFTVectorPair, NTL_FFTMaxRoot+1> FFTMultipliers;


class FFTMulTabs {
public:

   FFTMultipliers MulTab, InvMulTab;

};

class zz_pInfoT; // forward reference, defined in lzz_p.h


struct FFTPrimeInfo {
   long q;   // the prime itself
   double qinv;   // 1/((double) q)

   SmartPtr<zz_pInfoT> zz_p_context; 
   // pointer to corresponding zz_p context, which points back to this 
   // object in the case of a non-user FFT prime

   Vec<long> RootTable;
   //   RootTable[j] = w^{2^{MaxRoot-j}},
   //                  where w is a primitive 2^MaxRoot root of unity
   //                  for q

   Vec<long> RootInvTable;
   // RootInvTable[j] = 1/RootTable[j] mod q

   Vec<long> TwoInvTable;
   // TwoInvTable[j] = 1/2^j mod q

   Vec<mulmod_precon_t> TwoInvPreconTable;
   // mulmod preconditioning data

   UniquePtr< FFTMulTabs > bigtab;

};

void InitFFTPrimeInfo(FFTPrimeInfo& info, long q, long w, bool bigtab);

#define NTL_FFT_BIGTAB_LIMIT (350)
// big tables are only used for the first NTL_FFT_BIGTAB_LIMIT primes
// TODO: maybe we should have a similar limit for the degree of
// the convolution as well.
// FIXME: This is currently only used in FFT.c -- maybe move there?



#define NTL_MAX_FFTPRIMES (20000)
// for a thread-safe implementation, it is most convenient to
// impose a reasonabel upper bound on he number of FFT primes.
// without this restriction, a growing table would have to be
// relocated in one thread, leaving dangling pointers in 
// another thread.  Each entry in the table is just a poiner,
// so this does not incur too much space overhead.
// One could alo implement a 2D-table, which would allocate
// rows on demand, thus reducing wasted space at the price
// of extra arithmetic to actually index into the table.
// This may be an option to consider at some point.

// At the current setting of 20000, on 64-bit machines with 50-bit
// FFT primes, this allows for polynomials with 20*50/2 = 500K-bit 
// coefficients, while the table itself takes 160KB.


typedef LazyTable<FFTPrimeInfo, NTL_MAX_FFTPRIMES> FFTTablesType;

extern FFTTablesType FFTTables;
// a truly GLOBAL variable, shared among all threads


inline long GetFFTPrime(long i)
{
   return FFTTables[i]->q;
}

inline double GetFFTPrimeInv(long i)
{
   return FFTTables[i]->qinv;
}



long CalcMaxRoot(long p);
// calculates max power of two supported by this FFT prime.

void UseFFTPrime(long index);
// allocates and initializes information for FFT prime


void FFT(long* A, const long* a, long k, long q, const long* root);
// the low-level FFT routine.
// computes a 2^k point FFT modulo q, using the table root for the roots.

void FFT(long* A, const long* a, long k, long q, const long* root, const FFTMultipliers& tab);


inline
void FFTFwd(long* A, const long *a, long k, const FFTPrimeInfo& info)
// Slightly higher level interface...using the ith FFT prime
{
#ifdef NTL_FFT_BIGTAB
   if (info.bigtab)
      FFT(A, a, k, info.q, &info.RootTable[0], info.bigtab->MulTab);
   else
      FFT(A, a, k, info.q, &info.RootTable[0]);
#else
   FFT(A, a, k, info.q, &info.RootTable[0]);
#endif
}

inline 
void FFTFwd(long* A, const long *a, long k, long i)
{
   FFTFwd(A, a, k, *FFTTables[i]);
}

inline
void FFTRev(long* A, const long *a, long k, const FFTPrimeInfo& info)
// Slightly higher level interface...using the ith FFT prime
{
#ifdef NTL_FFT_BIGTAB
   if (info.bigtab)
      FFT(A, a, k, info.q, &info.RootInvTable[0], info.bigtab->InvMulTab);
   else
      FFT(A, a, k, info.q, &info.RootInvTable[0]);
#else
   FFT(A, a, k, info.q, &info.RootInvTable[0]);
#endif
}

inline
void FFTRev(long* A, const long *a, long k, long i)
{
   FFTRev(A, a, k, *FFTTables[i]);
}

inline
void FFTMulTwoInv(long* A, const long *a, long k, const FFTPrimeInfo& info)
{
   VectorMulModPrecon(1L << k, A, a, info.TwoInvTable[k], info.q, 
                      info.TwoInvPreconTable[k]);
}

inline
void FFTMulTwoInv(long* A, const long *a, long k, long i)
{
   FFTMulTwoInv(A, a, k, *FFTTables[i]);
}

inline 
void FFTRev1(long* A, const long *a, long k, const FFTPrimeInfo& info)
// FFTRev + FFTMulTwoInv
{
   FFTRev(A, a, k, info);
   FFTMulTwoInv(A, A, k, info);
}

inline 
void FFTRev1(long* A, const long *a, long k, long i)
{
   FFTRev1(A, a, k, *FFTTables[i]);
}


long IsFFTPrime(long n, long& w);
// tests if n is an "FFT prime" and returns corresponding root




NTL_CLOSE_NNS

#endif
