00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include "LinAlg.h"
00017 #include <math.h>
00018 #if defined(WIN32)
00019 #include <malloc.h>
00020 #else
00021 #include <alloca.h>
00022 #endif
00023
00024 namespace linalg
00025 {
00026 using namespace linalg;
00027
00028
00029
00030
00031
00032
00033 void LazyTransposedMatrix::fill_in(Matrix& m) const
00034 {
00035 register const REAL * rsp = proto.elements;
00036 register REAL * tp = m.elements;
00037
00038
00039
00040
00041 while( tp < m.elements + m.nelems )
00042 {
00043 register const REAL * sp = rsp++;
00044
00045 while( sp < proto.elements + proto.nelems )
00046 *tp++ = *sp, sp += proto.nrows;
00047 }
00048 assert( tp == m.elements + m.nelems &&
00049 rsp == proto.elements + proto.nrows );
00050
00051 }
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064 Matrix& Matrix::operator *= (const Matrix& source)
00065 {
00066 is_valid();
00067 source.is_valid();
00068
00069 if( row_lwb != source.col_lwb || ncols != source.nrows ||
00070 col_lwb != source.col_lwb || ncols != source.ncols )
00071 info(), source.info(),
00072 _error("matrices above are unsuitable for the inplace multiplication");
00073
00074
00075 #ifdef WIN32
00076 REAL * const one_row = (REAL *)malloc(ncols*sizeof(REAL));
00077 #else
00078 REAL * const one_row = (REAL *)alloca(ncols*sizeof(REAL));
00079 #endif
00080 const REAL * one_row_end = &one_row[ncols];
00081
00082 register REAL * trp = elements;
00083 for(; trp < &elements[nrows]; trp++)
00084 {
00085 register REAL *wrp, *orp;
00086 for(wrp=trp,orp=one_row; orp < one_row_end;)
00087 *orp++ = *wrp, wrp += nrows;
00088
00089 register REAL *scp=source.elements;
00090 for(wrp=trp; wrp < elements+nelems; wrp += nrows)
00091 {
00092 register double sum = 0;
00093 for(orp=one_row; orp < one_row_end;)
00094 sum += *orp++ * *scp++;
00095 *wrp = sum;
00096 }
00097 }
00098
00099 return *this;
00100 }
00101
00102
00103
00104
00105 void Matrix::mult(const Matrix& A, const Matrix& B)
00106 {
00107 A.is_valid();
00108 B.is_valid();
00109 is_valid();
00110
00111 if( A.ncols != B.nrows || A.col_lwb != B.row_lwb )
00112 A.info(), B.info(),
00113 _error("matrices above cannot be multiplied");
00114 if( nrows != A.nrows || ncols != B.ncols ||
00115 row_lwb != A.row_lwb || col_lwb != B.col_lwb )
00116 A.info(),B.info(),info(),
00117 _error("product A*B is incompatible with the given matrix");
00118
00119 register REAL * arp;
00120 REAL * bcp = B.elements;
00121 register REAL * cp = elements;
00122 while( cp < elements + nelems )
00123 {
00124 for(arp = A.elements; arp < A.elements + A.nrows; )
00125 {
00126 register double cij = 0;
00127 register REAL * bccp = bcp;
00128 while( arp < A.elements + A.nelems )
00129 cij += *bccp++ * *arp, arp += A.nrows;
00130 *cp++ = cij;
00131 arp -= A.nelems - 1;
00132 }
00133 bcp += B.nrows;
00134 }
00135
00136 assert( cp == elements + nelems && bcp == B.elements + B.nelems );
00137 }
00138
00139 #if 0
00140
00141
00142
00143
00144 void Matrix::_AtmultB(const Matrix& A, const Matrix& B)
00145 {
00146 A.is_valid();
00147 B.is_valid();
00148
00149 if( A.nrows != B.nrows || A.row_lwb != B.row_lwb )
00150 A.info(), B.info(),
00151 _error("matrices above are unsuitable for A'B multiplication");
00152
00153 allocate(A.ncols,B.ncols,A.col_lwb,B.col_lwb);
00154
00155 register REAL * acp;
00156 REAL * bcp = B.elements;
00157 register REAL * cp = elements;
00158 while( cp < elements + nelems )
00159 {
00160 for(acp = A.elements; acp<A.elements + A.nelems;)
00161 {
00162 register double cij = 0;
00163 register REAL * bccp = bcp;
00164 for(register int i=0; i<A.nrows; i++)
00165 cij += *bccp++ * *acp++;
00166 *cp++ = cij;
00167 }
00168 bcp += B.nrows;
00169 }
00170
00171 assert( cp == elements + nelems && bcp == B.elements + B.nelems );
00172 }
00173 #endif
00174 }