///
/// This file is part of Rheolef.
///
/// Copyright (C) 2000-2009 Pierre Saramito <Pierre.Saramito@imag.fr>
///
/// Rheolef is free software; you can redistribute it and/or modify
/// it under the terms of the GNU General Public License as published by
/// the Free Software Foundation; either version 2 of the License, or
/// (at your option) any later version.
///
/// Rheolef is distributed in the hope that it will be useful,
/// but WITHOUT ANY WARRANTY; without even the implied warranty of
/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
/// GNU General Public License for more details.
///
/// You should have received a copy of the GNU General Public License
/// along with Rheolef; if not, write to the Free Software
/// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
///
/// =========================================================================
// direct solver CHOLMOD, seq & dis implementations
//
// Note : why a dis implementation based on cholmod ?
//
// Because when dis_ext_nnz == 0, then the matrix is block diagonal.
// in that case the cholmod is better than mumps that initialize stuff
// for the distributed case.
// Is could appends e.g. for block-diagonal mass matrix "Pkd"
// This also occurs when nproc==1/
//
#include "rheolef/config.h"
#ifdef _RHEOLEF_HAVE_CHOLMOD
#include "solver_cholmod.h"

namespace rheolef {
using namespace std;

// =========================================================================
// cholmod utilities
// =========================================================================
// Int=int or long: see cholmod_l_demo.c
//                  and cholmod_internal.h with #define DLONG
template<class M>
static
cholmod_triplet*
load_triplet_int (const csr<double,M>& a, cholmod_common *common)
{
    common->status = CHOLMOD_OK;
    // read_triplet (a, nrow, ncol, nnz, stype, FALSE, buf, common);
    size_t nrow = a.nrow();
    size_t ncol = a.ncol();
    size_t nnz  = a.nnz();

    Int nitems, unknown, k, i, j, skew_symmetric, p, complex_symmetric;

    // 1) quick return for empty matrix
    if (nrow == 0 || ncol == 0 || nnz == 0) {
	return cholmod_allocate_triplet (nrow, ncol, 0, 0, CHOLMOD_REAL, common);
    }
    size_t extra = 0;
    int ok = 1;
    size_t nnz2 = cholmod_add_size_t (nnz, extra, &ok);
    check_macro (ok != 0, "allocation problem");

    // 2) allocate workspace
    size_t s = cholmod_add_size_t (nrow, ncol, &ok); // s = nrow + ncol
    check_macro (ok != 0, "allocation problem");

    cholmod_allocate_work (0, s, 0, common);
    Int* Rdeg = (Int*)(common->Iwork);	// size nrow 
    Int* Cdeg = Rdeg + nrow;	// size ncol


    // 3) allocate the triplets
    Int STYPE_UNSYMMETRIC = 0;
    cholmod_triplet* T = cholmod_allocate_triplet (nrow, ncol, nnz2, STYPE_UNSYMMETRIC, CHOLMOD_REAL, common);
    check_macro (common->status >= CHOLMOD_OK, "allocation problem");

    // 4) copy triplets
    // TODO: could set T->x, T->i, T->j as raw pointers into a ?
    //       T->i does not exists : may be allocated
    //       T->j : possible, is size_t* instead of int* or long* but could be fixed
    //       T->x : possible, as double*
    //  but rheolef uses a table of (j,val) instead of two separate tables : niet !
    Int*    Ti = (Int*)(T->i);
    Int*    Tj = (Int*)(T->j);
    double* Tx = (double*)(T->x);
    typedef typename csr<double,M>::size_type size_type;
    typename csr<double,distributed>::const_iterator ia = a.begin();
    for (size_type i = 0, n = a.nrow(), q = 0; i < n; i++) {
      for (typename csr<double,M>::const_data_iterator p = ia[i]; p < ia[i+1]; p++, q++) {
	Ti[q] = i;
	Tj[q] = (*p).first;
	Tx[q] = (*p).second;
      }
    }
    T->nnz   = a.nnz();
    T->stype = STYPE_UNSYMMETRIC;
    T->dtype = DTYPE;
    return T;
}
template<class M>
static
void
proxy_dense (const vec<double,M>& x, cholmod_dense& x_chol)
{
  x_chol.nrow  = x.size();
  x_chol.ncol  = 1;
  x_chol.nzmax = x.size();
  x_chol.d     = x.size();
  x_chol.x     = (void*)(x.begin().operator->()); // const_cast
  x_chol.z     = 0;
  x_chol.xtype = CHOLMOD_REAL;
  x_chol.dtype = DTYPE;
} 
// =========================================================================
// the class interface
// =========================================================================
template<class T, class M>
solver_cholmod_rep<T,M>::solver_cholmod_rep ()
 : solver_abstract_rep<T,M>(solver_option_type()),
   common(), l_chol(0), y_work(0), e_work(0)
{
  _init();
}
template<class T, class M>
solver_cholmod_rep<T,M>::solver_cholmod_rep (const csr<T,M>& a, const solver_option_type& opt)
 : solver_abstract_rep<T,M>(solver_option_type()),
   common(), l_chol(0), y_work(0), e_work(0)
{
  _init();
  update_values (a);
}
template<class T, class M>
void
solver_cholmod_rep<T,M>::_init ()
{
  cholmod_start (&common);
}
template<class T, class M>
solver_cholmod_rep<T,M>::~solver_cholmod_rep ()
{
  if (e_work) cholmod_free_dense  (&e_work, &common);
  if (y_work) cholmod_free_dense  (&y_work, &common);
  if (l_chol) cholmod_free_factor (&l_chol, &common);
  cholmod_finish (&common);
#ifdef TO_CLEAN
  cholmod_print_common ("common", &common);
#endif // TO_CLEAN
  check_macro (common.malloc_count == 0, "some memory may be deallocated (internal error)");
}
template<class T, class M>
void
solver_cholmod_rep<T,M>::update_values (const csr<T,M>& a)
{
  if (l_chol) cholmod_free_factor (&l_chol, &common);
  cholmod_triplet* a_triplet = load_triplet_int (a, &common);
  cholmod_sparse* a_chol = cholmod_triplet_to_sparse (a_triplet, 0, &common);
  cholmod_free_triplet (&a_triplet, &common);
  l_chol = cholmod_analyze (a_chol, &common);
  cholmod_factorize (a_chol, l_chol, &common);
  cholmod_free_sparse (&a_chol, &common);
}
template<class T, class M>
vec<T,M>
solver_cholmod_rep<T,M>::solve (const vec<T,M>& b) const
{
  check_macro (l_chol != 0, "solve: solver may be initialized (HINT: call solver.update(matrix))");
  if (b.size() == 0) return b;
  vec<T,M> x(b.ownership(), 0);
  cholmod_dense pb_chol; proxy_dense (b, pb_chol); cholmod_dense* b_chol = &pb_chol;
  cholmod_dense px_chol; proxy_dense (x, px_chol); cholmod_dense* x_chol = &px_chol;
  cholmod_solve2 (CHOLMOD_A, l_chol, b_chol, 0, &x_chol, 0, &y_work, &e_work, &common);
  return x;
}
template<class T, class M>
vec<T,M>
solver_cholmod_rep<T,M>::trans_solve (const vec<T,M>& b) const
{
  return solve(b);
}
// ----------------------------------------------------------------------------
// instanciation in library
// ----------------------------------------------------------------------------
// TODO: code is only valid here for T=double

template class solver_cholmod_rep<double,sequential>;

#ifdef _RHEOLEF_HAVE_MPI
template class solver_cholmod_rep<double,distributed>;
#endif // _RHEOLEF_HAVE_MPI

} // namespace rheolef
#endif // _RHEOLEF_HAVE_CHOLMOD
