1 /++
2 $(H1 Lubeck - Linear Algebra)
3 
4 Authors: Ilya Yaroshenko, Lars Tandle Kyllingstad (SciD author)
5 +/
6 module lubeck;
7 
8 import cblas : Diag;
9 import mir.blas;
10 import mir.internal.utility : realType, isComplex;
11 import mir.lapack;
12 import mir.math.common;
13 import mir.ndslice.allocation;
14 import mir.ndslice.dynamic: transposed;
15 import mir.ndslice.slice;
16 import mir.ndslice.topology;
17 import mir.ndslice.traits : isMatrix;
18 import mir.utility;
19 import std.meta;
20 import std.traits;
21 import std.typecons: Flag, Yes, No;
22 public import mir.lapack: lapackint;
23 
24 version(LDC)
25     import ldc.attributes: fastmath;
26 else
27     enum { fastmath };
28 
29 private template IterationType(Iterator)
30 {
31     alias T = Unqual!(typeof(Iterator.init[0]));
32     static if (isIntegral!T || is(T == real))
33         alias IterationType = double;
34     else
35     static if (is(T == creal))
36         alias IterationType = cdouble;
37     else
38     {
39         static assert(
40             is(T == double) ||
41             is(T == float) ||
42             is(T == cdouble) ||
43             is(T == cfloat));
44         alias IterationType = T;
45     }
46 }
47 
48 /++
49 Gets the type that can be used with Blas routines that all types can be implicitly converted to. 
50 
51 +/
52 alias BlasType(Iterators...) =
53     CommonType!(staticMap!(IterationType, Iterators));
54 
55 /++
56 General matrix-matrix multiplication. Allocates result to an uninitialized slice using GC.
57 Params:
58     a = m(rows) x k(cols) matrix
59     b = k(rows) x n(cols) matrix
60 Result: 
61     m(rows) x n(cols)
62 +/
63 Slice!(BlasType!(IteratorA, IteratorB)*, 2)
64     mtimes(IteratorA, SliceKind kindA, IteratorB, SliceKind kindB)(
65         Slice!(IteratorA, 2, kindA) a,
66         Slice!(IteratorB, 2, kindB) b)
67 {
68     assert(a.length!1 == b.length!0);
69 
70     // reallocate data if required
71     alias A = BlasType!IteratorA;
72     alias B = BlasType!IteratorB;
73     alias C = CommonType!(A, B);
74     static if (!is(Unqual!IteratorA == C*))
75         return .mtimes(a.as!C.slice, b);
76     else
77     static if (!is(Unqual!IteratorB == C*))
78         return .mtimes(a, b.as!C.slice);
79     else
80     {
81         static if (kindA != Contiguous)
82             if (a._stride!0 != 1 && a._stride!1 != 1
83                 || a._stride!0 <= 0
84                 || a._stride!1 <= 0)
85                 return .mtimes(a.slice, b);
86 
87         static if (kindB != Contiguous)
88             if (b._stride!0 != 1 && b._stride!1 != 1
89                 || b._stride!0 <= 0
90                 || b._stride!1 <= 0)
91                 return .mtimes(a, b.slice);
92 
93         auto c = uninitSlice!C(a.length!0, b.length!1);
94 
95         if (a.length!1 == 1 && b.length!0 == 1)
96         {
97             c[] = cast(C) 0;
98             ger(cast(C)1, a.front!1, b.front, c);
99         }
100         else
101         {
102             gemm(cast(C)1, a, b, cast(C)0, c);
103         }
104         return c;
105     }
106 }
107 
108 ///
109 unittest
110 {
111     import mir.ndslice;
112 
113     auto a =
114         [-5,  1,  7, 7, -4,
115          -1, -5,  6, 3, -3,
116          -5, -2, -3, 6,  0].sliced(3, 5);
117 
118     auto b = slice!double(5, 4);
119     b[] =
120         [[-5, -3,  3,  1],
121          [ 4,  3,  6,  4],
122          [-4, -2, -2,  2],
123          [-1,  9,  4,  8],
124          [ 9, 8,  3, -2]];
125 
126     assert(mtimes(a, b) ==
127         [[-42,  35,  -7, 77],
128          [-69, -21, -42, 21],
129          [ 23,  69,   3, 29]]
130         );
131 }
132 
133 /// ger specialized case in mtimes
134 unittest
135 {
136     import mir.ndslice;
137 
138     // from https://github.com/kaleidicassociates/lubeck/issues/8
139     {
140         auto a = [1.0f, 2.0f].sliced(2, 1);
141         auto b = [1.0f, 2.0f].sliced(2, 1);
142         assert(mtimes(a, b.transposed) == [[1, 2], [2, 4]]);
143     }
144     {
145         auto a = [1.0, 2.0].sliced(1, 2);
146         auto b = [1.0, 2.0].sliced(1, 2);
147         assert(mtimes(a.transposed, b) == [[1, 2], [2, 4]]);
148     }
149 }
150 
151 ///
152 unittest
153 {
154     import mir.ndslice;
155 
156     // from https://github.com/kaleidicassociates/lubeck/issues/3
157     Slice!(float*, 2) a = slice!float(1, 1);
158     Slice!(float*, 2, Universal) b1 = slice!float(16, 1).transposed;
159     Slice!(float*, 2) b2 = slice!float(1, 16);
160 
161     a[] = 3;
162     b1[] = 4;
163     b2[] = 4;
164 
165     // Confirm that this message does not appear
166     // Outputs: ** On entry to SGEMM  parameter number  8 had an illegal value
167     assert(a.mtimes(b1) == a.mtimes(b2));
168 }
169 
170 /++
171 General matrix-matrix multiplication. Allocates result to an uninitialized slice using GC.
172 Params:
173     a = m(rows) x k(cols) matrix
174     b = k(rows) x 1(cols) vector
175 Result:
176     m(rows) x 1(cols)
177 +/
178 Slice!(BlasType!(IteratorA, IteratorB)*)
179     mtimes(IteratorA, SliceKind kindA, IteratorB, SliceKind kindB)(
180         Slice!(IteratorA, 2, kindA) a,
181         Slice!(IteratorB, 1, kindB) b)
182 {
183     assert(a.length!1 == b.length!0);
184 
185     // reallocate data if required
186     alias A = BlasType!IteratorA;
187     alias B = BlasType!IteratorB;
188     alias C = CommonType!(A, B);
189     static if (!is(Unqual!IteratorA == C*))
190         return .mtimes(a.as!C.slice, b);
191     else
192     static if (!is(Unqual!IteratorB == C*))
193         return .mtimes(a, b.as!C.slice);
194     else
195     {
196         static if (kindA != Contiguous)
197             if (a._stride!0 != 1 && a._stride!1 != 1
198                 || a._stride!0 <= 0
199                 || a._stride!1 <= 0)
200                 return .mtimes(a.slice, b);
201 
202         static if (kindB != Contiguous)
203             if (b._stride!1 <= 0)
204                 return .mtimes(a, b.slice);
205 
206         auto c = uninitSlice!C(a.length!0);
207         gemv(cast(C)1, a, b, cast(C)0, c);
208         return c;
209     }
210 }
211 
212 /++
213 General matrix-matrix multiplication.
214 Params:
215     a = 1(rows) x k(cols) vector
216     b = k(rows) x n(cols) matrix
217 Result:
218     1(rows) x n(cols)
219 +/
220 Slice!(BlasType!(IteratorA, IteratorB)*)
221     mtimes(IteratorA, SliceKind kindA, IteratorB, SliceKind kindB)(
222         Slice!(IteratorB, 1, kindB) a,
223         Slice!(IteratorA, 2, kindA) b,
224         )
225 {
226     return .mtimes(b.universal.transposed, a);
227 }
228 
229 ///
230 unittest
231 {
232     import mir.ndslice;
233 
234     auto a =
235         [-5,  1,  7, 7, -4,
236          -1, -5,  6, 3, -3,
237          -5, -2, -3, 6,  0]
238             .sliced(3, 5)
239             .universal
240             .transposed;
241 
242     auto b = slice!double(5);
243     b[] = [-5, 4,-4,-1, 9];
244 
245     assert(mtimes(b, a) == [-42, -69, 23]);
246 }
247 
248 /++
249 Vector-vector multiplication (dot product).
250 Params:
251     a = 1(rows) x k(cols) vector
252     b = k(rows) x 1(cols) matrix
253 Result:
254     scalar
255 +/
256 CommonType!(BlasType!IteratorA, BlasType!IteratorB)
257     mtimes(IteratorA, SliceKind kindA, IteratorB, SliceKind kindB)(
258         Slice!(IteratorB, 1, kindB) a,
259         Slice!(IteratorA, 1, kindA) b,
260         )
261 {
262     alias A = BlasType!IteratorA;
263     alias B = BlasType!IteratorB;
264     alias C = CommonType!(A, B);
265     static if (is(IteratorB == C*) && is(IteratorA == C*))
266     {
267         return dot(a, b);
268     }
269     else
270     {
271         auto c = cast(typeof(return)) 0;
272         import mir.algorithm.iteration: reduce;
273         return c.reduce!"a + b * c"(a.as!(typeof(return)), b.as!(typeof(return)));
274     }
275 }
276 
277 ///
278 unittest
279 {
280     import mir.ndslice;
281 
282     auto a = [1, 2, 4].sliced;
283     auto b = [3, 4, 2].sliced;
284     assert(a.mtimes(b) == 19);
285 }
286 
287 /++
288 Calculates the inverse of a matrix.
289 +/
290 auto inv(Iterator, SliceKind kind)(Slice!(Iterator, 2, kind) a)
291 in
292 {
293     assert (a.length!0 == a.length!1, "matrix must be square");
294 }
295 body
296 {
297     alias T = BlasType!Iterator;
298 
299     auto m = a.as!T.slice.canonical;
300     auto ipiv = m.length.uninitSlice!lapackint;
301 
302     auto info = getrf!T(m, ipiv);
303     if (info == 0)
304     {
305         info = getri(m, ipiv, m.getri_wq.uninitSlice!T);
306     }
307 
308     import std.exception: enforce;
309     enforce(info == 0, "inv: matrix is singular");
310     return m;
311 }
312 
313 ///
314 unittest
315 {
316     import mir.ndslice;
317 
318     auto a =  [
319         1, 0, 2,
320         2, 2, 0,
321         0, 1, 1]
322         .sliced(3, 3);
323     
324     enum : double { _13 = 1.0/3.0, _16 = 1.0/6.0, _23 = 2.0/3.0 }
325     auto ans = [
326         _13, _13, -_23,
327         -_13,_16, _23,
328         _13, -_16, _13]
329         .sliced(a.shape);
330 
331     import mir.algorithm.iteration: equal;
332     import std.math: approxEqual;
333     assert(equal!((a, b) => a.approxEqual(b, 1e-10L, 1e-10L))(a.inv, ans));
334     assert(equal!((a, b) => a.approxEqual(b, 1e-10L, 1e-10L))(a.as!cdouble.inv.as!double, ans));
335 }
336 
337 ///
338 unittest
339 {
340     import mir.ndslice.topology: iota;
341 
342     try
343     {
344         auto m = [3, 3].iota.inv;
345         assert (false, "Matrix should be detected as singular");
346     }
347     catch (Exception e)
348     {
349         assert (true);
350     }
351 }
352 
353 ///
354 struct SvdResult(T)
355 {
356     ///
357     Slice!(T*, 2) u;
358     ///
359     Slice!(T*) sigma;
360     ///
361     Slice!(T*, 2) vt;
362 }
363 
364 /++
365 Computes the singular value decomposition.
366 
367 Params:
368     matrix = input `M x N` matrix
369     slim = If true the first `min(M,N)` columns of `u` and the first
370         `min(M,N)` rows of `vt` are returned in the ndslices `u` and `vt`.
371 Returns: $(LREF SvdResult). Results are allocated by the GC.
372 +/
373 auto svd(
374         Flag!"allowDestroy" allowDestroy = No.allowDestroy,
375         string algorithm = "gesvd",
376         SliceKind kind,
377         Iterator
378     )(
379         Slice!(Iterator, 2, kind) matrix,
380         Flag!"slim" slim = No.slim,
381     )
382     if (algorithm == "gesvd" || algorithm == "gesdd")
383 {
384     import lapack;
385     alias T = BlasType!Iterator;
386     static if (allowDestroy && kind != Universal && is(Iterstor == T*))
387         alias a = matrix.canonical;
388     else
389         auto a = matrix.as!T.slice.canonical;
390 
391     auto m = cast(lapackint)a.length!1;
392     auto n = cast(lapackint)a.length!0;
393 
394     auto s = uninitSlice!T(min(m, n));
395     auto u = uninitSlice!T(slim ? s.length : m, m);
396     auto vt = uninitSlice!T(n, slim ? s.length : n);
397 
398     static if (algorithm == "gesvd")
399     {
400         auto jobu = slim ? 'S' : 'A';
401         auto jobvt = slim ? 'S' : 'A';
402         auto work = gesvd_wq(jobu, jobvt, a, u.canonical, vt.canonical).uninitSlice!T;
403         auto info = gesvd(jobu, jobvt, a, s, u.canonical, vt.canonical, work);
404         enum msg = "svd: DBDSQR did not converge";
405     }
406     else // gesdd
407     {
408         auto iwork = uninitSlice!lapackint(s.length * 8);
409         auto jobz = slim ? 'S' : 'A';
410         auto work = gesdd_wq(jobz, a, u.canonical, vt.canonical).uninitSlice!T;
411         auto info = gesdd(jobz, a, s, u.canonical, vt.canonical, work, iwork);
412         enum msg = "svd: DBDSDC did not converge, updating process failed";
413     }
414 
415     import std.exception: enforce;
416     enforce(info == 0, msg);
417     return SvdResult!T(vt, s, u); //transposed
418 }
419 
420 ///
421 unittest
422 {
423     import mir.ndslice;
424 
425     auto a =  [
426          7.52,  -1.10,  -7.95,   1.08,
427         -0.76,   0.62,   9.34,  -7.10,
428          5.13,   6.62,  -5.66,   0.87,
429         -4.75,   8.52,   5.75,   5.30,
430          1.33,   4.91,  -5.49,  -3.52,
431         -2.40,  -6.77,   2.34,   3.95]
432         .sliced(6, 4);
433 
434     auto r = a.svd;
435 
436     auto sigma = slice!double(a.shape, 0);
437     sigma.diagonal[] = r.sigma;
438     auto m = r.u.mtimes(sigma).mtimes(r.vt);
439 
440     import mir.algorithm.iteration: equal;
441     import std.math: approxEqual;
442     assert(equal!((a, b) => a.approxEqual(b, 1e-8, 1e-8))(a, m));
443 }
444 
445 ///
446 unittest
447 {
448     import std.typecons: Yes;
449     import mir.ndslice;
450 
451     auto a =  [
452          7.52,  -1.10,  -7.95,   1.08,
453         -0.76,   0.62,   9.34,  -7.10,
454          5.13,   6.62,  -5.66,   0.87,
455         -4.75,   8.52,   5.75,   5.30,
456          1.33,   4.91,  -5.49,  -3.52,
457         -2.40,  -6.77,   2.34,   3.95]
458         .sliced(6, 4);
459 
460     auto r = a.svd(Yes.slim);
461     assert(r.u.shape == [6, 4]);
462     assert(r.vt.shape == [4, 4]);
463 }
464 
465 /++
466 Solve systems of linear equations AX = B for X.
467 Computes minimum-norm solution to a linear least squares problem
468 if A is not a square matrix.
469 +/
470 Slice!(BlasType!(IteratorA, IteratorB)*, 2)
471     mldivide
472     (IteratorA, SliceKind kindA, IteratorB, SliceKind kindB)(
473         Slice!(IteratorA, 2, kindA) a,
474         Slice!(IteratorB, 2, kindB) b)
475 {
476     import std.exception: enforce;
477     import std.conv: to;
478 
479     assert(a.length!0 == b.length!0);
480 
481     alias A = BlasType!IteratorA;
482     alias B = BlasType!IteratorB;
483     alias C = CommonType!(A, B);
484 
485     auto a_ = a.universal.transposed.as!C.slice.canonical;
486     auto b_ = b.universal.transposed.as!C.slice.canonical;
487 
488     if (a.length!0 == a.length!1)
489     {
490         auto ipiv = a_.length.uninitSlice!lapackint;
491         auto info = gesv(a_, ipiv, b_);
492     }
493     else
494     {
495         static if(!isComplex!C)
496         {
497             size_t liwork = void;
498             auto lwork = gelsd_wq(a_, b_, liwork);
499             auto s = min(a_.length!0, a_.length!1).uninitSlice!C;
500             auto work = lwork.uninitSlice!C;
501             auto iwork = liwork.uninitSlice!lapackint;
502             size_t rank;
503             C rcond = -1;
504 
505             auto info = gelsd(a_, b_, s, rcond, rank, work, iwork);
506         }
507         else
508         {
509             size_t liwork = void;
510             size_t lrwork = void;
511             auto lwork = gelsd_wq(a_, b_, lrwork, liwork);
512             auto s = min(a_.length!0, a_.length!1).uninitSlice!(realType!C);
513             auto work = lwork.uninitSlice!C;
514             auto iwork = liwork.uninitSlice!lapackint;
515             auto rwork = lrwork.uninitSlice!(realType!C);
516             size_t rank;
517             realType!C rcond = -1;
518 
519             auto info = gelsd!C(a_, b_, s, rcond, rank, work, rwork, iwork);
520         }
521 
522         enforce(info == 0, to!string(info) ~ " off-diagonal elements of an intermediate bidiagonal form did not converge to zero.");
523         b_ = b_[0 .. $, 0 .. a_.length!0];
524     }
525 
526     return b_.universal.transposed.slice;
527 }
528 
529 /// ditto
530 Slice!(BlasType!(IteratorA, IteratorB)*)
531     mldivide
532     (IteratorA, SliceKind kindA, IteratorB, SliceKind kindB)(
533         Slice!(IteratorA, 2, kindA) a,
534         Slice!(IteratorB, 1, kindB) b)
535 {
536     return a.mldivide(b.repeat(1).unpack.universal.transposed).front!1.assumeContiguous;
537 }
538 
539 /// AX=B
540 unittest
541 {
542     import std.meta: AliasSeq;
543     import mir.ndslice;
544 
545     foreach(C; AliasSeq!(double, cdouble))
546     {
547         static if(is(C == cdouble))
548             enum transform = "a+0i";
549         else
550             enum transform = "a";
551 
552         auto a = [
553             1, -1,  1,
554             2,  2, -4,
555             -1,  5,  0].sliced(3, 3).map!transform;
556         auto b = [
557             2.0,  0,
558             -6  , -6,
559             9  ,  1].sliced(3, 2).map!transform;
560         auto t = [
561             1.0, -1,
562             2  ,  0,
563             3  ,  1].sliced(3, 2).map!transform;
564 
565         auto x = mldivide(a, b);
566         assert(x == t);
567     }
568 }
569 
570 /// Ax=B
571 unittest
572 {
573     import std.meta: AliasSeq;
574     import mir.ndslice;
575 
576     foreach(C; AliasSeq!(double, cdouble))
577     {
578         static if(is(C == cdouble))
579             enum transform = "a+0i";
580         else
581             enum transform = "a";
582 
583         auto a = [
584             1, -1,  1,
585             2,  2, -4,
586             -1,  5,  0].sliced(3, 3).map!transform;
587         auto b = [
588             2.0,
589             -6  ,
590             9  ].sliced(3).map!transform;
591         auto t = [
592             1.0,
593             2  ,
594             3  ].sliced(3).map!transform;
595 
596         auto x = mldivide(a, b);
597         assert(x == t);
598     }
599 }
600 
601 /// Least-Squares Solution of Underdetermined System
602 unittest
603 {
604     import std.meta: AliasSeq;
605     import mir.ndslice;
606 
607     foreach(C; AliasSeq!(double, )) //cdouble fails for DMD>=2085
608     {
609         static if(is(C == cdouble))
610             enum transform = "a+0i";
611         else
612             enum transform = "a";
613 
614         auto a = [
615             -0.57,  -1.28,  -0.39,   0.25,
616             -1.93,   1.08,  -0.31,  -2.14,
617             2.30,   0.24,   0.40,  -0.35,
618             -1.93,   0.64,  -0.66,   0.08,
619             0.15,   0.30,   0.15,  -2.13,
620             -0.02,   1.03,  -1.43,   0.50,
621             ].sliced(6, 4).map!transform;
622 
623         auto b = [
624         -2.67,
625         -0.55,
626         3.34,
627         -0.77,
628         0.48,
629         4.10,
630         ].sliced.map!transform;
631 
632         auto x = [
633             1.5339,
634             1.8707,
635             -1.5241,
636             0.0392].sliced.map!transform;
637 
638         import std.math: approxEqual;
639         static if(is(C == cdouble))
640         {
641             assert(a.mldivide(b).map!"a.re".approxEqual(x.map!"a.re"));
642             assert(a.mldivide(b).map!"a.im".approxEqual(x.map!"a.im"));
643         }
644         else
645         {
646             assert(a.mldivide(b).approxEqual(x));
647         }
648     }
649 }
650 
651 /// Principal component analises result.
652 struct PcaResult(T)
653 {
654     /// Principal component coefficients, also known as loadings.
655     Slice!(T*, 2) coeff;
656     /// Principal component scores.
657     Slice!(T*, 2) score;
658     /// Principal component variances.
659     Slice!(T*) latent;
660 }
661 
662 /++
663 Principal component analysis of raw data.
664 
665 Params:
666     matrix = input `M x N` matrix, where 'M (rows)>= N(cols)'
667     cc = Flag to centern columns. True by default.
668 Returns: $(LREF PcaResult)
669 +/
670 auto pca(Flag!"allowDestroy" allowDestroy = No.allowDestroy, Iterator, SliceKind kind)(Slice!(Iterator, 2, kind) matrix, in Flag!"centerColumns" cc = Yes.centerColumns)
671 in
672 {
673     assert(matrix.length!0 >= matrix.length!1);
674 }
675 body
676 {
677     import mir.math.sum: sum;
678     import mir.algorithm.iteration: maxIndex, eachUploPair;
679     import mir.utility: swap;
680 
681     alias T = BlasType!Iterator;
682     SvdResult!T svdResult;
683     if (cc)
684     {
685         static if (allowDestroy && kind != Universal && is(Iterstor == T*))
686             alias m = matrix;
687         else
688             auto m = matrix.as!T.slice;
689         foreach (col; m.universal.transposed)
690             col[] -= col.sum!"kb2" / col.length;
691         svdResult = m.svd!(Yes.allowDestroy)(Yes.slim);
692     }
693     else
694     {
695         svdResult = matrix.svd!(allowDestroy)(Yes.slim);
696     }
697     with (svdResult)
698     {
699         foreach (row; u)
700             row[] *= sigma;
701         T c = max(0, ptrdiff_t(matrix.length) - cast(bool) cc);
702         foreach (ref s; sigma)
703             s = s * s / c;
704         foreach(size_t i; 0 .. sigma.length)
705         {
706             auto col = vt[i];
707             if (col[col.map!fabs.maxIndex] < 0)
708             {
709                 col[] *= -1;
710                 u[0 .. $, i] *= -1;
711             }
712         }
713         vt.eachUploPair!swap;
714         return PcaResult!T(vt, u, sigma);
715     }
716 }
717 
718 ///
719 unittest
720 {
721     import mir.ndslice;
722 
723     import std.math: approxEqual;
724     import mir.algorithm.iteration: equal;
725 
726     auto ingedients = [
727          7,  26,   6,  60,
728          1,  29,  15,  52,
729         11,  56,   8,  20,
730         11,  31,   8,  47,
731          7,  52,   6,  33,
732         11,  55,   9,  22,
733          3,  71,  17,   6,
734          1,  31,  22,  44,
735          2,  54,  18,  22,
736         21,  47,   4,  26,
737          1,  40,  23,  34,
738         11,  66,   9,  12,
739         10,  68,   8,  12].sliced(13, 4);
740 
741     auto res = ingedients.pca;
742 
743     auto coeff = [
744         -0.067799985695474,  -0.646018286568728,   0.567314540990512,   0.506179559977705,
745         -0.678516235418647,  -0.019993340484099,  -0.543969276583817,   0.493268092159297,
746          0.029020832106229,   0.755309622491133,   0.403553469172668,   0.515567418476836,
747          0.730873909451461,  -0.108480477171676,  -0.468397518388289,   0.484416225289198,
748     ].sliced(4, 4);
749 
750     auto score = [
751          36.821825999449700,  -6.870878154227367,  -4.590944457629745,   0.396652582713912,
752          29.607273420710964,   4.610881963526308,  -2.247578163663940,  -0.395843536696492,
753         -12.981775719737618,  -4.204913183175938,   0.902243082694698,  -1.126100587210615,
754          23.714725720918022,  -6.634052554708721,   1.854742000806314,  -0.378564808384691,
755          -0.553191676624597,  -4.461732123178686,  -6.087412652325177,   0.142384896047281,
756         -10.812490833309816,  -3.646571174544059,   0.912970791674604,  -0.134968810314680,
757         -32.588166608817929,   8.979846284936063,  -1.606265913996588,   0.081763927599947,
758          22.606395499005586,  10.725906457369449,   3.236537714483416,   0.324334774646368,
759          -9.262587237675838,   8.985373347478788,  -0.016909578102172,  -0.543746175981799,
760          -3.283969329640680, -14.157277337500918,   7.046512994833761,   0.340509860960606,
761           9.220031117829379,  12.386080787220454,   3.428342878284624,   0.435152769664895,
762         -25.584908517429557,  -2.781693148152386,  -0.386716066864491,   0.446817950545605,
763         -26.903161834677597,  -2.930971165042989,  -2.445522630195304,   0.411607156409658,
764     ].sliced(13, 4);
765 
766     auto latent = [5.177968780739053, 0.674964360487231, 0.124054300480810, 0.002371532651878].sliced;
767     latent[] *= 100;
768 
769     assert(equal!approxEqual(res.coeff, coeff));
770     assert(equal!approxEqual(res.score, score));
771     assert(equal!approxEqual(res.latent, latent));
772 }
773 
774 /++
775 Computes Moore-Penrose pseudoinverse of matrix.
776 
777 Params:
778     matrix = Input `M x N` matrix.
779     tolerance = The computation is based on SVD and any singular values less than tolerance are treated as zero.
780 Returns: Moore-Penrose pseudoinverse matrix
781 +/
782 Slice!(BlasType!Iterator*, 2)
783     pinv(Flag!"allowDestroy" allowDestroy = No.allowDestroy, Iterator, SliceKind kind)(Slice!(Iterator, 2, kind) matrix, BlasType!Iterator tolerance = BlasType!Iterator.nan)
784 {
785     import mir.algorithm.iteration: find, each;
786     import std.math: nextUp;
787 
788     auto svd = matrix.svd!allowDestroy(Yes.slim);
789     if (tolerance != tolerance)
790     {
791         auto n = svd.sigma.front;
792         auto eps = n.nextUp - n;
793         tolerance = max(matrix.length!0, matrix.length!1) * eps;
794     }
795     auto st = svd.sigma.find!(a => !(a >= tolerance));
796     static if (is(typeof(st) : sizediff_t))
797         alias si = st;
798     else
799         auto si = st[0];
800     auto s = svd.sigma[0 .. $ - si];
801     s.each!"a = 1 / a";
802     svd.vt[0 .. s.length].pack!1.map!"a".zip(s).each!"a.a[] *= a.b";
803     auto v = svd.vt[0 .. s.length].universal.transposed;
804     auto ut = svd.u.universal.transposed[0 .. s.length];
805     return v.mtimes(ut);
806 }
807 
808 ///
809 unittest
810 {
811     import mir.ndslice;
812 
813     auto a = [
814         64,  2,  3, 61, 60,  6,
815          9, 55, 54, 12, 13, 51,
816         17, 47, 46, 20, 21, 43,
817         40, 26, 27, 37, 36, 30,
818         32, 34, 35, 29, 28, 38,
819         41, 23, 22, 44, 45, 19,
820         49, 15, 14, 52, 53, 11,
821          8, 58, 59,  5,  4, 62].sliced(8, 6);
822 
823     auto b = a.pinv;
824 
825     auto result = [
826         0.0177, -0.0165, -0.0164,  0.0174,  0.0173, -0.0161, -0.0160,  0.0170,
827        -0.0121,  0.0132,  0.0130, -0.0114, -0.0112,  0.0124,  0.0122, -0.0106,
828        -0.0055,  0.0064,  0.0060, -0.0043, -0.0040,  0.0049,  0.0045, -0.0028,
829        -0.0020,  0.0039,  0.0046, -0.0038, -0.0044,  0.0064,  0.0070, -0.0063,
830        -0.0086,  0.0108,  0.0115, -0.0109, -0.0117,  0.0139,  0.0147, -0.0141,
831         0.0142, -0.0140, -0.0149,  0.0169,  0.0178, -0.0176, -0.0185,  0.0205];
832 
833     import std.math: approxEqual;
834 
835     assert(b.field.approxEqual(result, 1e-2, 1e-2));
836 }
837 
838 /++
839 Covariance matrix.
840 
841 Params:
842     matrix = matrix whose rows represent observations and whose columns represent random variables.
843 Reuturns:
844     Normalized by `N-1` covariance matrix.
845 +/
846 Slice!(BlasType!Iterator*, 2)
847     cov(Iterator, SliceKind kind)(Slice!(Iterator, 2, kind) matrix)
848 {
849     import mir.math.sum: sum;
850     import mir.algorithm.iteration: each, eachUploPair;
851     alias A = BlasType!Iterator;
852     static if (kind == Contiguous)
853         auto mc = matrix.canonical;
854     else
855         alias mc = matrix;
856     auto m = mc.shape.uninitSlice!A.canonical;
857     auto s = m;
858 
859     auto factor = 1 / A(m.length!0 - 1).sqrt;
860     while(m.length!1)
861     {
862         auto shift = - mc.front!1.sum!A / m.length!0;
863         m.front!1.each!((ref a, b) { a = (b + shift) * factor; })(mc.front!1);
864         mc.popFront!1;
865         m.popFront!1;
866     }
867 
868     auto alpha = cast(A) 1;
869     auto beta = cast(A) 0;
870     auto c = [s.length!1, s.length!1].uninitSlice!A;
871     syrk(Uplo.Upper, alpha, s.universal.transposed, beta, c);
872 
873     c.eachUploPair!"b = a";
874     return c;
875 }
876 
877 ///
878 unittest
879 {
880     import mir.ndslice;
881 
882     import std.stdio;
883     import mir.ndslice;
884 
885     auto c = 8.magic[0..$-1].cov;
886 
887     import std.math: approxEqual;
888     assert(c.field.approxEqual([
889          350.0000, -340.6667, -331.3333,  322.0000,  312.6667, -303.3333, -294.0000,  284.6667,
890         -340.6667,  332.4762,  324.2857, -316.0952, -307.9048,  299.7143,  291.5238, -283.3333,
891         -331.3333,  324.2857,  317.2381, -310.1905, -303.1429,  296.0952,  289.0476, -282.0000,
892          322.0000, -316.0952, -310.1905,  304.2857,  298.3810, -292.4762, -286.5714,  280.6667,
893          312.6667, -307.9048, -303.1429,  298.3810,  293.6190, -288.8571, -284.0952,  279.3333,
894         -303.3333,  299.7143,  296.0952, -292.4762, -288.8571,  285.2381,  281.6190, -278.0000,
895         -294.0000,  291.5238,  289.0476, -286.5714, -284.0952,  281.6190,  279.1429, -276.6667,
896          284.6667, -283.3333, -282.0000,  280.6667,  279.3333, -278.0000, -276.6667,  275.3333]));
897 }
898 
899 /++
900 Matrix determinant.
901 +/
902 auto detSymmetric(Iterator, SliceKind kind)(char store, Slice!(Iterator, 2, kind) a)
903 in
904 {
905     assert(store == 'U' || store == 'L');
906     assert (a.length!0 == a.length!1, "matrix must be square");
907     assert (a.length!0, "matrix must not be empty");
908 }
909 body
910 {
911     import mir.algorithm.iteration: each;
912     import mir.ndslice.topology: diagonal;
913     import mir.math.numeric: Prod;
914 
915     alias T = BlasType!Iterator;
916 
917     auto packed = uninitSlice!T(a.length * (a.length + 1) / 2);
918     auto ipiv = a.length.uninitSlice!lapackint;
919     int sign;
920     Prod!T prod;
921     if (store == 'L')
922     {
923         auto pck = packed.stairs!"+"(a.length);
924         auto gen = a.stairs!"+";
925         pck.each!"a[] = b"(gen);
926         auto info = sptrf(pck, ipiv);
927         if (info > 0)
928             return cast(T) 0;
929         for (size_t j; j < ipiv.length; j++)
930         {
931             auto i = ipiv[j];
932             // 1x1 block at m[k,k]
933             if (i > 0)
934             {
935                 prod.put(pck[j].back);
936                 sign ^= i != j + 1; // i.e. row interchanged with another
937             }
938             else
939             {
940                 i = -i;
941                 auto offDiag = pck[j + 1][$ - 2];
942                 auto blockDet = pck[j].back * pck[j + 1].back - offDiag * offDiag;
943                 prod.put(blockDet);
944                 sign ^= i != j + 1 && i != j + 2; // row interchanged with other
945                 j++;
946             }
947         }
948     }
949     else
950     {
951         auto pck = packed.stairs!"-"(a.length);
952         auto gen = a.stairs!"-";
953         pck.each!"a[] = b"(gen);
954         auto info = sptrf(pck, ipiv);
955         if (info > 0)
956             return cast(T) 0;
957         for (size_t j; j < ipiv.length; j++)
958         {
959             auto i = ipiv[j];
960             // 1x1 block at m[k,k]
961             if (i > 0)
962             {
963                 prod.put(pck[j].front);
964                 sign ^= i != j + 1; // i.e. row interchanged with another
965             }
966             else
967             {
968                 i = -i;
969                 auto offDiag = pck[j][1];
970                 auto blockDet = pck[j].front * pck[j + 1].front - offDiag * offDiag;
971                 prod.put(blockDet);
972                 sign ^= i != j + 1 && i != j + 2; // row interchanged with other
973                 j++;
974             }
975         }
976     }
977     if(sign & 1)
978         prod.x = -prod.x;
979     return prod.value;
980 }
981 
982 /// ditto
983 auto det(Iterator, SliceKind kind)(Slice!(Iterator, 2, kind) a)
984 in
985 {
986     assert (a.length!0 == a.length!1, "matrix must be square");
987 }
988 body
989 {
990     import mir.ndslice.topology: diagonal, zip, iota;
991     import mir.math.numeric: Prod;
992 
993     alias T = BlasType!Iterator;
994 
995     auto m = a.as!T.slice.canonical;
996     auto ipiv = a.length.uninitSlice!lapackint;
997 
998     // LU factorization
999     auto info = m.getrf(ipiv);
1000 
1001     // If matrix is singular, determinant is zero.
1002     if (info > 0)
1003     {
1004         return cast(T) 0;
1005     }
1006 
1007     // The determinant is the product of the diagonal entries
1008     // of the upper triangular matrix. The array ipiv contains
1009     // the pivots.
1010     int sign;
1011     Prod!T prod;
1012     foreach (tup; m.diagonal.zip(ipiv, [ipiv.length].iota(1)))
1013     {
1014         prod.put(tup.a);
1015         sign ^= tup.b != tup.c; // i.e. row interchanged with another
1016     }
1017     if(sign & 1)
1018         prod.x = -prod.x;
1019     return prod.value;
1020 }
1021 
1022 ///
1023 unittest
1024 {
1025     import mir.ndslice;
1026     import mir.math;
1027 
1028     // Check for zero-determinant shortcut.
1029     auto ssing = [4, 2, 2, 1].sliced(2, 2);
1030     auto ssingd = det(ssing);
1031     assert (det(ssing) == 0);
1032     assert (detSymmetric('L', ssing) == 0);
1033 
1034     // General dense matrix.
1035     int dn = 101;
1036     auto d = uninitSlice!double(dn, dn);
1037     foreach (k; 0 .. dn)
1038     foreach (l; 0 .. dn)
1039         d[k,l] = 0.5 * (k == l ? (k + 1) * (k + 1) + 1 : 2 * (k + 1) * (l + 1));
1040 
1041     auto dd = det(d);
1042     import std.math: approxEqual;
1043     assert (approxEqual(dd, 3.539152633479803e289, double.epsilon.sqrt));
1044 
1045     // Symmetric packed matrix
1046     auto spa = [ 1.0, -2, 3, 4, 5, -6, -7, -8, -9, 10].sliced.stairs!"+"(4);
1047     auto sp = [spa.length, spa.length].uninitSlice!double;
1048     import mir.algorithm.iteration: each;
1049     sp.stairs!"+".each!"a[] = b"(spa);
1050     assert (detSymmetric('L', sp).approxEqual(5874.0, double.epsilon.sqrt));
1051     assert (detSymmetric('U', sp.universal.transposed).approxEqual(5874.0, double.epsilon.sqrt));
1052 }
1053 
1054 /++
1055 Eigenvalues and eigenvectors POD.
1056 
1057 See_also: $(LREF eigSymmetric).
1058 +/
1059 struct EigSymmetricResult(T)
1060 {
1061     /// Eigenvalues
1062     Slice!(T*) values;
1063     /// Eigenvectors stored in rows
1064     Slice!(T*, 2) vectors;
1065 }
1066 
1067 /++
1068 Eigenvalues and eigenvectors of symmetric matrix.
1069 
1070 Returns:
1071     $(LREF EigSymmetricResult)
1072 +/
1073 auto eigSymmetric(Flag!"computeVectors" cv = Yes.computeVectors, Iterator, SliceKind kind)(char store, Slice!(Iterator, 2, kind) a)
1074 in
1075 {
1076     assert(store == 'U' || store == 'L');
1077     assert (a.length!0 == a.length!1, "matrix must be square");
1078     assert (a.length!0, "matrix must not be empty");
1079 }
1080 body
1081 {
1082     import mir.algorithm.iteration: each;
1083     import mir.ndslice.topology: diagonal;
1084     import mir.math.numeric: Prod;
1085 
1086     alias T = BlasType!Iterator;
1087 
1088     auto packed = [a.length * (a.length + 1) / 2].uninitSlice!T;
1089     auto w = [a.length].uninitSlice!T;
1090     static if (cv)
1091     {
1092         auto z = [a.length, a.length].uninitSlice!T;
1093     }
1094     else
1095     {
1096         T[1] _vData = void;
1097         auto z = _vData.sliced(1, 1);
1098     }
1099     auto work = [a.length * 3].uninitSlice!T;
1100     size_t info = void;
1101     auto jobz = cv ? 'V' : 'N';
1102     if (store == 'L')
1103     {
1104         auto pck = packed.stairs!"+"(a.length);
1105         auto gen = a.stairs!"+";
1106         pck.each!"a[] = b"(gen);
1107         info = spev!T(jobz, pck, w, z.canonical, work);
1108     }
1109     else
1110     {
1111         auto pck = packed.stairs!"-"(a.length);
1112         auto gen = a.stairs!"-";
1113         pck.each!"a[] = b"(gen);
1114         info = spev!T(jobz, pck, w, z.canonical, work);
1115     }
1116     import std.exception: enforce;
1117     import std.format: format;
1118     enforce (info == 0, format("The algorithm failed to converge." ~
1119         "%s off-diagonal elements of an intermediate tridiagonal form did not converge to zero.", info));
1120     static if (cv)
1121     {
1122         return EigSymmetricResult!T(w, z);
1123     }
1124     else
1125     {
1126         return EigSymmetricResult!T(w);
1127     }
1128 }
1129 
1130 ///
1131 unittest
1132 {
1133     import mir.ndslice;
1134 
1135     import mir.ndslice.slice: sliced;
1136     import mir.ndslice.topology: universal, map;
1137     import mir.ndslice.dynamic: transposed;
1138     import std.math: approxEqual;
1139 
1140     auto a = [
1141         1.0000, 0.5000, 0.3333, 0.2500,
1142         0.5000, 1.0000, 0.6667, 0.5000,
1143         0.3333, 0.6667, 1.0000, 0.7500,
1144         0.2500, 0.5000, 0.7500, 1.0000].sliced(4, 4);
1145 
1146     auto eigr = eigSymmetric('L', a);
1147 
1148     assert(eigr.values.approxEqual([0.2078,0.4078,0.8482,2.5362]));
1149 
1150     auto test = [
1151          0.0693, -0.4422, -0.8105, 0.3778,
1152         -0.3618,  0.7420, -0.1877, 0.5322,
1153          0.7694,  0.0486,  0.3010, 0.5614,
1154         -0.5219, -0.5014,  0.4662, 0.5088]
1155             .sliced(4, 4)
1156             .universal
1157             .transposed;
1158 
1159     foreach (i; 0 .. 4)
1160         assert(eigr.vectors[i].approxEqual(test[i]) ||
1161             eigr.vectors[i].map!"-a".approxEqual(test[i]));
1162 }
1163 
1164 version (unittest)
1165 {
1166 /++
1167 Swaps rows of input matrix
1168 Params
1169     ipiv = pivot points
1170 Returns:
1171     a = shifted matrix
1172 +/
1173     private void moveRows(Iterator, SliceKind kind)
1174                         (Slice!(Iterator, 2, kind) a,
1175                         Slice!(lapackint*) ipiv)
1176     {
1177         import mir.algorithm.iteration: each;
1178         foreach_reverse(i;0..ipiv.length)
1179         {
1180             if(ipiv[i] == i + 1)
1181                 continue;
1182             each!swap(a[i], a[ipiv[i] - 1]);
1183         }
1184     }
1185 }
1186 
1187 unittest
1188 {
1189     import mir.ndslice;
1190 
1191     auto A = 
1192            [ 9,  9,  9,
1193              8,  8,  8,
1194              7,  7,  7,
1195              6,  6,  6,
1196              5,  5,  5,
1197              4,  4,  4,
1198              3,  3,  3,
1199              2,  2,  2,
1200              1,  1,  1,
1201              0,  0,  0 ]
1202              .sliced(10, 3)
1203              .as!double.slice;
1204     auto ipiv = [ lapackint(10), 9, 8, 7, 6, 6, 7, 8, 9, 10 ].sliced(10);
1205     moveRows(A, ipiv);
1206 
1207     auto B = 
1208            [ 0,  0,  0,
1209              1,  1,  1,
1210              2,  2,  2,
1211              3,  3,  3,
1212              4,  4,  4,
1213              5,  5,  5,
1214              6,  6,  6,
1215              7,  7,  7,
1216              8,  8,  8,
1217              9,  9,  9 ]
1218              .sliced(10, 3)
1219              .as!double.slice;
1220 
1221     import mir.algorithm.iteration: equal;
1222     assert(equal!((a, b) => fabs(a - b) < 1e-12)(B, A));
1223 }
1224 
1225 unittest
1226 {
1227     auto A = 
1228            [ 1,  1,  1,
1229              2,  2,  2,
1230              3,  3,  3,
1231              4,  4,  4,
1232              5,  5,  5,
1233              6,  6,  6,
1234              7,  7,  7,
1235              8,  8,  8,
1236              9,  9,  9,
1237              0,  0,  0 ]
1238              .sliced(10, 3)
1239              .as!double.slice;
1240     auto ipiv = [ lapackint(2), 3, 4, 5, 6, 7, 8, 9, 10, 10 ].sliced(10);
1241     moveRows(A, ipiv);
1242     
1243     auto B = 
1244            [ 0,  0,  0,
1245              1,  1,  1,
1246              2,  2,  2,
1247              3,  3,  3,
1248              4,  4,  4,
1249              5,  5,  5,
1250              6,  6,  6,
1251              7,  7,  7,
1252              8,  8,  8,
1253              9,  9,  9 ]
1254              .sliced(10, 3)
1255              .as!double.slice;
1256 
1257     import mir.algorithm.iteration: equal;
1258     assert(equal!((a, b) => fabs(a - b) < 1e-12)(B, A));
1259 }
1260 
1261 ///LUResult consist lu factorization.
1262 struct LUResult(T)
1263 {
1264     /++
1265     Matrix in witch lower triangular is L part of factorization
1266     (diagonal elements of L are not stored), upper triangular
1267     is U part of factorization.
1268     +/
1269     Slice!(T*, 2, Canonical) lut;
1270     /++
1271     The pivot indices, for 1 <= i <= min(M,N), row i of the matrix
1272     was interchanged with row ipiv(i).
1273     +/
1274     Slice!(lapackint*) ipiv;
1275     ///L part of the factorization.
1276     auto l() @property
1277     {
1278         import mir.algorithm.iteration: eachUpper;
1279         auto l = lut.transposed[0..lut.length!1, 0..min(lut.length!0, lut.length!1)].slice.canonical;
1280         l.eachUpper!"a = 0";
1281         l.diagonal[] = 1;
1282         return l;
1283     }
1284     ///U part of the factorization.
1285     auto u() @property
1286     {
1287         import mir.algorithm.iteration: eachLower;
1288         auto u = lut.transposed[0..min(lut.length!0, lut.length!1), 0..lut.length!0].slice.canonical;
1289         u.eachLower!"a = 0";
1290         return u;
1291     }
1292 
1293     ///
1294     auto solve(Flag!"allowDestroy" allowDestroy = No.allowDestroy, Iterator, size_t N, SliceKind kind)(
1295         char trans,
1296         Slice!(Iterator, N, kind) b)
1297     {
1298         return luSolve!(allowDestroy)(trans, lut, ipiv, b);
1299     }
1300 }
1301 
1302 /++
1303 Computes LU factorization of a general 'M x N' matrix 'A' using partial
1304 pivoting with row interchanges.
1305 The factorization has the form:
1306     \A = P * L * U
1307 Where P is a permutation matrix, L is lower triangular with unit
1308 diagonal elements (lower trapezoidal if m > n), and U is upper
1309 triangular (upper trapezoidal if m < n).
1310 Params:
1311     allowDestroy = flag to delete the source matrix.
1312     a = input 'M x N' matrix for factorization.
1313 Returns: $(LREF LUResalt)
1314 +/
1315 auto luDecomp(Flag!"allowDestroy" allowDestroy = No.allowDestroy,
1316               Iterator, SliceKind kind)
1317              (Slice!(Iterator, 2, kind) a)
1318 {
1319     alias T = BlasType!Iterator;
1320     auto ipiv = uninitSlice!lapackint(min(a.length!0, a.length!1));
1321     auto b = a.transposed;
1322     auto m = (allowDestroy && b._stride!1 == 1) ? b.assumeCanonical : a.transposed.as!T.slice.canonical;
1323     
1324     getrf(m, ipiv);
1325     return LUResult!T(m, ipiv);
1326 }
1327 
1328 /++
1329 Solves a system of linear equations
1330     \A * X = B, or
1331     \A**T * X = B
1332 with a general 'N x N' matrix 'A' using the LU factorization computed by luDecomp.
1333 Params:
1334     allowDestroy = flag to delete the source matrix.
1335     lut = factorization of matrix 'A', A = P * L * U.
1336     ipiv = the pivot indices from luDecomp.
1337     b = the right hand side matrix B.
1338     trans = specifies the form of the system of equations:
1339           'N': A * X = B (No transpose)
1340           'T': A**T * X = B (Transpose)
1341           'C': A**T * X = B (Conjugate transpose = Transpose)
1342 Returns:
1343     Return solve of the system linear equations.
1344 +/
1345 auto luSolve(Flag!"allowDestroy" allowDestroy = No.allowDestroy, SliceKind kindB, size_t N, IteratorB, IteratorLU)(
1346     char trans,
1347     Slice!(IteratorLU, 2, Canonical) lut,
1348     Slice!(lapackint*) ipiv,
1349     Slice!(IteratorB, N, kindB) b,
1350     )
1351 in
1352 {
1353     assert(lut.length!0 == lut.length!1, "matrix must be squared");
1354     assert(ipiv.length == lut.length, "size of ipiv must be equal to the number of rows a");
1355     assert(lut.length!1 == b.length!0, "number of columns a should be equal to the number of rows b");
1356 }
1357 body
1358 {
1359     alias LU = BlasType!IteratorLU;
1360     alias B = BlasType!IteratorB;
1361     alias T = CommonType!(LU, B);
1362     static if(is(T* == IteratorLU))
1363         auto lut_ = lut;
1364     else
1365         auto lut_ = lut.as!T.slice.canonical;
1366     
1367     //convect vector to matrix.
1368     static if(N == 1)
1369         auto k = b.sliced(1, b.length);
1370     else
1371         auto k = b.transposed;
1372 
1373     static if(is(IteratorB == T*))
1374         auto m = (allowDestroy && k._stride!1 == 1) ? k.assumeCanonical : k.as!T.slice.canonical;
1375     else
1376         auto m = k.as!T.slice.canonical;
1377     getrs!T(trans, lut_, m, ipiv);
1378     return m.transposed;
1379 }
1380 
1381 unittest
1382 {
1383     auto A =
1384         [ 1,  4, -3,  5,  6,
1385          -2,  8,  5,  7,  8,
1386           3,  4,  7,  9,  1,
1387           2,  4,  6,  3,  2,
1388           6,  8,  3,  5,  2 ]
1389             .sliced(5, 5)
1390             .as!double.slice
1391             .canonical;
1392     auto B =
1393         [ 1,  3,  4,  8,  0,  0,  0,
1394           2,  1,  7,  1,  0,  0,  0,
1395           3,  5,  7,  7,  0,  0,  0,
1396           4,  4,  9,  8,  0,  0,  0,
1397           5,  5,  8,  1,  0,  0,  0 ]
1398             .sliced(5, 7)
1399             .as!double.slice
1400             .universal;
1401 
1402     auto B_ = B[0..$, 0..4];
1403     auto LU = A.luDecomp();
1404     auto m = luSolve('N', LU.lut, LU.ipiv, B_);
1405 
1406     import std.math: approxEqual;
1407     import mir.algorithm.iteration: equal;
1408     assert(equal!approxEqual(mtimes(A, m), B_));
1409 }
1410 
1411 ///
1412 unittest
1413 {
1414     import mir.math;
1415     import mir.ndslice;
1416 
1417     auto A =
1418         [ 1,  4, -3,  5,  6,
1419          -2,  8,  5,  7,  8,
1420           3,  4,  7,  9,  1,
1421           2,  4,  6,  3,  2,
1422           6,  8,  3,  5,  2 ]
1423             .sliced(5, 5)
1424             .as!double.slice
1425             .canonical;
1426     
1427     import mir.random.variable;
1428     import mir.random.algorithm;
1429     auto B = randomSlice(uniformVar(-100, 100), 5, 100);
1430     
1431     auto LU = A.luDecomp();
1432     auto X = LU.solve('N', B);
1433 
1434     import mir.algorithm.iteration: equal;
1435     assert(equal!((a, b) => fabs(a - b) < 1e-12)(mtimes(A, X), B));
1436 }
1437 
1438 unittest
1439 {
1440     auto A =
1441         [ 1,  4, -3,  5,  6,
1442          -2,  8,  5,  7,  8,
1443           3,  4,  7,  9,  1,
1444           2,  4,  6,  3,  2,
1445           6,  8,  3,  5,  2 ]
1446             .sliced(5, 5)
1447             .as!double.slice
1448             .canonical;
1449     auto B =
1450         [ 2,  8,  3,  5,  8,
1451           8,  1,  4,  9,  86,
1452           1,  6,  7,  1,  67,
1453           6,  1,  5,  4,  45,
1454           1,  2,  3,  1,  11 ]
1455             .sliced(5, 5)
1456             .as!double.slice
1457             .universal;
1458     auto C = B.slice;
1459 
1460     auto LU = A.luDecomp();
1461     auto m = luSolve!(Yes.allowDestroy)('N', LU.lut, LU.ipiv, B.transposed);
1462     auto m2 = LU.solve('N', C);
1463 
1464     import std.math: approxEqual;
1465     import mir.algorithm.iteration: equal;
1466     assert(equal!approxEqual(mtimes(A, m), C.transposed));
1467     assert(equal!approxEqual(mtimes(A, m2), C));
1468 }
1469 
1470 unittest
1471 {
1472     auto A =
1473         [ 1,  4, -3,  5,  6,
1474          -2,  8,  5,  7,  8,
1475           3,  4,  7,  9,  1,
1476           2,  4,  6,  3,  2,
1477           6,  8,  3,  5,  2 ]
1478             .sliced(5, 5)
1479             .as!float.slice
1480             .canonical;
1481     auto B = [ 1,  2,  3,  4,  5 ].sliced(5).as!double.slice;
1482     auto C = B.slice.sliced(5, 1);
1483 
1484     auto LU = A.luDecomp();
1485     auto m = luSolve!(Yes.allowDestroy)('N', LU.lut, LU.ipiv, B);
1486 
1487     import std.math: approxEqual;
1488     import mir.algorithm.iteration: equal;
1489     assert(equal!approxEqual(mtimes(A, m), C));
1490 }
1491 
1492 unittest
1493 {
1494     auto A =
1495         [ 1,  4, -3,  5,  6,
1496          -2,  8,  5,  7,  8,
1497           3,  4,  7,  9,  1,
1498           2,  4,  6,  3,  2,
1499           6,  8,  3,  5,  2 ]
1500             .sliced(5, 5)
1501             .as!double.slice
1502             .canonical;
1503     auto B = [ 1,  15,  4,  5,  8,
1504                3,  20,  1,  9,  11 ].sliced(5, 2).as!float.slice;
1505 
1506     auto LU = A.luDecomp();
1507     auto m = luSolve('N', LU.lut, LU.ipiv, B);
1508 
1509     import std.math: approxEqual;
1510     import mir.algorithm.iteration: equal;
1511     assert(equal!approxEqual(mtimes(A, m), B));
1512 }
1513 
1514 unittest
1515 {
1516     auto A =
1517         [ 11,  14, -31,  53,  62,
1518          -92,  83,  52,  74,  83,
1519           31,  45,  73,  96,  17,
1520           23,  14,  65,  35,  26,
1521           62,  28,  34,  51,  25 ]
1522             .sliced(5, 5)
1523             .as!float.slice
1524             .universal;
1525     auto B =
1526         [ 6,  1,  3,  1,  11,
1527          12,  5,  7,  6,  78,
1528           8,  4,  1,  5,  54,
1529           3,  1,  8,  1,  45,
1530           1,  6,  8,  6,  312 ]
1531             .sliced(5, 5)
1532             .as!double.slice;
1533     auto B2 = B.slice;
1534     auto C = B.slice;
1535 
1536     auto LU = luDecomp(A.transposed);
1537     auto m = luSolve!(Yes.allowDestroy)('T', LU.lut, LU.ipiv, B);
1538     auto m2 = luSolve!(Yes.allowDestroy)('N', LU.lut, LU.ipiv, B2);
1539 
1540     import std.math: approxEqual;
1541     import mir.algorithm.iteration: equal;
1542     assert(equal!approxEqual(mtimes(A, m), C));
1543     assert(equal!approxEqual(mtimes(A.transposed, m2), C));
1544 }
1545 
1546 unittest
1547 {
1548     auto A =
1549         [ 54,  93,  14,  44,  33,
1550           51,  85,  28,  81,  75,
1551           89,  17,  15,  44,  58,
1552           75,  80,  18,  35,  14,
1553           21,  48,  72,  21,  88 ]
1554             .sliced(5, 5)
1555             .as!double.slice
1556             .universal;
1557     auto B =
1558         [ 5,  7,  8,  3,  78,
1559           1,  2,  5,  4,  5,
1560           2,  4,  1,  5,  15,
1561           1,  1,  4,  1,  154,
1562           1,  3,  1,  8,  17 ]
1563             .sliced(5, 5)
1564             .as!float.slice
1565             .canonical;
1566 
1567     auto LU = A.luDecomp();
1568     auto m = luSolve('N', LU.lut, LU.ipiv, B);
1569 
1570     import std.math: approxEqual;
1571     import mir.algorithm.iteration: equal;
1572     assert(equal!approxEqual(mtimes(A, m), B));
1573 }
1574 
1575 unittest
1576 {
1577     
1578     auto B =
1579         [ 1,  4, -3,
1580          -2,  8,  5,
1581           3,  4,  7,
1582           2,  4,  6 ]
1583             .sliced(4, 3)
1584             .as!double.slice;
1585 
1586     auto LU = B.luDecomp!(Yes.allowDestroy)();
1587     LU.l; LU.u;
1588     auto res = mtimes(LU.l, LU.u);
1589     moveRows(res, LU.ipiv);
1590 
1591     import mir.algorithm.iteration: equal;
1592     import std.math: approxEqual;
1593     assert(res.equal!approxEqual(B));
1594 }
1595 
1596 unittest
1597 {
1598     import mir.ndslice;
1599 
1600     auto B =
1601         [ 3, -7, -2,  2,
1602          -3,  5,  1,  0,
1603           6, -4,  0, -5,
1604          -9,  5, -5, 12 ]
1605             .sliced(4, 4)
1606             .as!double.slice
1607             .canonical;
1608     auto C = B.transposed.slice;
1609 
1610     auto LU = B.transposed.luDecomp!(Yes.allowDestroy)();
1611     auto res = mtimes(LU.l, LU.u);
1612     moveRows(res, LU.ipiv);
1613 
1614     import mir.algorithm.iteration: equal;
1615     import std.math: approxEqual;
1616     assert(res.equal!approxEqual(C));
1617 }
1618 
1619 ///Consist LDL factorization;
1620 struct LDLResult(T)
1621 {
1622     /++
1623     uplo = 'U': Upper triangle is stored;
1624          'L': lower triangle is stored.
1625     +/
1626     char uplo;
1627     /++
1628     Matrix in witch lower triangular matrix is 'L' part of
1629     factorization, diagonal is 'D' part.
1630     +/
1631     Slice!(T*, 2, Canonical) matrix;
1632     /++
1633     The pivot indices.
1634     If ipiv(k) > 0, then rows and columns k and ipiv(k) were
1635     interchanged and D(k, k) is a '1 x 1' diagonal block.
1636     If ipiv(k) = ipiv(k + 1) < 0, then rows and columns k+1 and
1637     -ipiv(k) were interchanged and D(k:k+1, k:k+1) is a '2 x 2'
1638     diagonal block.
1639     +/
1640     Slice!(lapackint*) ipiv;
1641     /++
1642     Return solves a system of linear equations
1643         \A * X = B,
1644     using LDL factorization.
1645     +/
1646     auto solve(Flag!"allowDestroy" allowDestroy = No.allowDestroy, SliceKind kindB, size_t N, IteratorB)(
1647         Slice!(IteratorB, N, kindB) b)
1648     {
1649         return ldlSolve!(allowDestroy)(uplo, matrix, ipiv, b);
1650     }
1651 }
1652 
1653 /++
1654 Computes the factorization of a real symmetric matrix A using the
1655 Bunch-Kaufman diagonal pivoting method.
1656 The for of the factorization is:
1657     \A = L*D*L**T
1658 Where L is product if permutation and unit lower triangular matrices,
1659 and D is symmetric and block diagonal with '1 x 1' and '2 x 2'
1660 diagonal blocks.
1661 Params:
1662     allowDestroy = flag to delete the source matrix.
1663     a = input symmetric 'n x n' matrix for factorization.
1664     uplo = 'U': Upper triangle is stored;
1665          'L': lower triangle is stored.
1666 Returns:$(LREF LDLResult)
1667 +/
1668 auto ldlDecomp(Flag!"allowDestroy" allowDestroy = No.allowDestroy, Iterator, SliceKind kind)(
1669     char uplo,
1670     Slice!(Iterator, 2, kind) a)
1671 in
1672 {
1673     assert(a.length!0 == a.length!1, "matrix must be squared");
1674 }
1675 body
1676 {
1677     alias T = BlasType!Iterator;
1678     auto work = [T.sizeof * a.length].uninitSlice!T;
1679     auto ipiv = a.length.uninitSlice!lapackint;
1680     auto m = (allowDestroy && a._stride!1 == 1) ? a.assumeCanonical : a.transposed.as!T.slice.canonical;
1681 
1682     sytrf!T(uplo, m, ipiv, work);
1683     return LDLResult!T(uplo, m, ipiv);
1684 }
1685 
1686 /++
1687 Solves a system of linear equations \A * X = B with symmetric matrix 'A' using the
1688 factorization
1689 \A = U * D * U**T, or
1690 \A = L * D * L**T
1691 computed by ldlDecomp.
1692 Params:
1693     allowDestroy = flag to delete the source matrix.
1694     a = 'LD' or 'UD' matrix computed by ldlDecomp.
1695     ipiv = details of the interchanges and the block structure of D as determined by ldlDecomp.
1696     b = the right hand side matrix.
1697     uplo = specifies whether the details of the factorization are stored as an upper or
1698            lower triangular matrix:
1699          'U': Upper triangular, form is \A = U * D * U**T;
1700          'L': Lower triangular, form is \A = L * D * L**T.
1701 Returns:
1702     The solution matrix.
1703 +/
1704 auto ldlSolve(Flag!"allowDestroy" allowDestroy = No.allowDestroy, SliceKind kindB, size_t N, IteratorB, IteratorA)(
1705     char uplo,
1706     Slice!(IteratorA, 2, Canonical) a,
1707     Slice!(lapackint*) ipiv,
1708     Slice!(IteratorB, N, kindB) b)
1709 in
1710 {
1711     assert(a.length!0 == a.length!1, "matrix must be squared");
1712     assert(ipiv.length == a.length, "size of ipiv must be equal to the number of rows a");
1713     assert(a.length!1 == b.length!0, "number of columns a should be equal to the number of rows b");
1714 }
1715 body
1716 {
1717     alias A = BlasType!IteratorA;
1718     alias B = BlasType!IteratorB;
1719     alias T = CommonType!(A, B);
1720     static if(is(T* == IteratorA))
1721         auto a_ = a;
1722     else
1723         auto a_ = a.as!T.slice.canonical;
1724     
1725     //convect vector to matrix.
1726     static if(N == 1)
1727         auto k = b.sliced(1, b.length);
1728     else
1729         auto k = b.transposed;
1730 
1731     auto work = [T.sizeof * a.length].uninitSlice!T;
1732     static if(is(IteratorB == T*))
1733         auto m = (allowDestroy && k._stride!1 == 1) ? k.assumeCanonical : k.as!T.slice.canonical;
1734     else
1735         auto m = k.as!T.slice.canonical;
1736     sytrs2!T(a_, m, ipiv, work, uplo);
1737     return m.transposed;
1738 }
1739 
1740 ///
1741 unittest
1742 {
1743     import mir.ndslice;
1744 
1745     auto A =
1746         [ 2.07,  3.87,  4.20, -1.15,
1747           3.87, -0.21,  1.87,  0.63,
1748           4.20,  1.87,  1.15,  2.06,
1749          -1.15,  0.63,  2.06, -1.81 ]
1750             .sliced(4, 4)
1751             .as!double.slice
1752             .canonical;
1753 
1754     import mir.random.variable;
1755     import mir.random.algorithm;
1756     auto B = randomSlice(uniformVar(-100, 100), 4, 100);
1757 
1758     auto LDL = ldlDecomp('L', A);
1759     auto X = LDL.solve(B);
1760 
1761     import std.math: approxEqual;
1762     import mir.algorithm.iteration: equal;
1763     assert(equal!approxEqual(mtimes(A, X), B));
1764 }
1765 
1766 unittest
1767 {
1768     auto A =
1769         [ 9, -1,  2,
1770          -1,  8, -5,
1771           2, -5,  7 ]
1772             .sliced(3, 3)
1773             .as!float.slice
1774             .canonical;
1775     auto A_ = A.slice;
1776 
1777     auto B =
1778         [ 5,  7,  1,
1779           1,  8,  5,
1780           9,  3,  2 ]
1781           .sliced(3, 3)
1782           .as!double.slice
1783           .canonical;
1784     auto B_ = B.slice;
1785 
1786     auto LDL = ldlDecomp!(Yes.allowDestroy)('L', A);
1787     auto X = ldlSolve!(Yes.allowDestroy)(LDL.uplo, A, LDL.ipiv, B.transposed);
1788 
1789     import std.math: approxEqual;
1790     import mir.algorithm.iteration: equal;
1791     assert(equal!approxEqual(mtimes(A_, X), B_.transposed));
1792 }
1793 
1794 unittest
1795 {
1796     auto A =
1797         [10, 20, 30,
1798          20, 45, 80,
1799          30, 80, 171 ]
1800             .sliced(3, 3)
1801             .as!double.slice
1802             .canonical;
1803     auto B = [ 1, 4, 7 ].sliced(3).as!float.slice.canonical;
1804     auto B_ = B.sliced(3, 1);
1805 
1806     auto LDL = ldlDecomp('L', A);
1807     auto X = LDL.solve(B);
1808     
1809     import std.math: approxEqual;
1810     import mir.algorithm.iteration: equal;
1811     assert(equal!approxEqual(mtimes(A, X), B_));
1812 }
1813 
1814 struct choleskyResult(T)
1815 {
1816     /++
1817     If uplo = 'L': lower triangle of 'matrix' is stored.
1818     If uplo = 'U': upper triangle of 'matrix' is stored.
1819     +/
1820     char uplo;
1821     /++
1822     if uplo = Lower, the leading 'N x N' lower triangular part of A
1823     contains the lower triangular part of the matrix A, and the
1824     strictly upper triangular part if A is not referenced.
1825     if uplo = Upper, the leading 'N x N' upper triangular part of A
1826     contains the upper triangular part of the matrix A, and the
1827     strictly lower triangular part if A is not referenced.
1828     +/
1829     Slice!(T*, 2, Canonical) matrix;
1830     /++
1831     Return solves a system of linear equations
1832         \A * X = B,
1833     using Cholesky factorization.
1834     +/
1835     auto solve(Flag!"allowDestroy" allowDestroy = No.allowDestroy,
1836                Iterator, size_t N, SliceKind kind)
1837               (Slice!(Iterator, N, kind) b)
1838     {
1839         return choleskySolve!(allowDestroy)(uplo, matrix, b);
1840     }
1841 }
1842 /++
1843 Computs Cholesky decomposition of symmetric positive definite matrix 'A'.
1844 The factorization has the form:
1845     \A = U**T * U, if UPLO = Upper, or
1846     \A = L * L**T, if UPLO = Lower
1847 Where U is an upper triangular matrix and L is lower triangular.
1848 Params:
1849     allowDestroy = flag to delete the source matrix.
1850     a = symmetric 'N x N' matrix.
1851     uplo = if uplo is Upper, then upper triangle of A is stored, else
1852     lower.
1853 Returns: $(LREF choleskyResult)
1854 +/
1855 auto choleskyDecomp( Flag!"allowDestroy" allowDestroy = No.allowDestroy, Iterator, SliceKind kind)(
1856     char uplo,
1857     Slice!(Iterator, 2, kind) a)
1858 in
1859 {
1860     assert(a.length!0 == a.length!1, "matrix must be squared");
1861 }
1862 body
1863 {
1864     alias T = BlasType!Iterator;
1865     auto m = (allowDestroy && a._stride!1 == 1) ? a.assumeCanonical : a.as!T.slice.canonical;
1866     potrf!T(uplo, m);
1867     return choleskyResult!T(uplo, m);
1868 }
1869 
1870 /++
1871     Solves a system of linear equations A * X = B with a symmetric matrix A using the
1872     Cholesky factorization:
1873     \A = U**T * U or
1874     \A = L * L**T
1875     computed by choleskyDecomp.
1876 Params:
1877     allowDestroy = flag to delete the source matrix.
1878     c = the triangular factor 'U' or 'L' from the Cholesky factorization
1879         \A = U**T * U or
1880         \A = L * L**T,
1881     as computed by choleskyDecomp.
1882     b = the right hand side matrix.
1883     uplo = 'U': Upper triangle of A is stored;
1884          'L': Lower triangle of A is stored.
1885 Returns:
1886     The solution matrix X.
1887 +/
1888 auto choleskySolve(Flag!"allowDestroy" allowDestroy = No.allowDestroy, SliceKind kindB, size_t N, IteratorB, IteratorC)(
1889     char uplo,
1890     Slice!(IteratorC, 2, Canonical) c,
1891     Slice!(IteratorB, N, kindB) b)
1892 in
1893 {
1894     assert(c.length!0 == c.length!1, "matrix must be squared");
1895     assert(c.length!1 == b.length!0, "number of columns a should be equal to the number of rows b");
1896 }
1897 body
1898 {
1899     alias B = BlasType!IteratorB;
1900     alias C = BlasType!IteratorC;
1901     alias T = CommonType!(B, C);
1902     static if(is(T* == IteratorC))
1903         auto c_ = c;
1904     else
1905         auto c_ = c.as!T.slice.canonical;
1906 
1907     //convect vector to matrix.
1908     static if(N == 1)
1909         auto k = b.sliced(1, b.length);
1910     else
1911         auto k = b.transposed;
1912 
1913     static if(is(IteratorB == T*))
1914         auto m = (allowDestroy && k._stride!1 == 1) ? k.assumeCanonical : k.as!T.slice.canonical;
1915     else
1916         auto m = k.as!T.slice.canonical;
1917     potrs!T(uplo, c_, m);
1918     return m.transposed;
1919 }
1920 
1921 ///
1922 unittest
1923 {
1924     import mir.ndslice;
1925 
1926     auto A =
1927            [ 25, 15, -5,
1928              15, 18,  0,
1929              -5,  0, 11 ]
1930              .sliced(3, 3)
1931              .as!double.slice;
1932     
1933     import mir.random.variable;
1934     import mir.random.algorithm;
1935     auto B = randomSlice(uniformVar(-100, 100), 3, 100);
1936 
1937     auto C = choleskyDecomp('L', A);
1938     auto X = C.solve(B);
1939 
1940     import std.math: approxEqual;
1941     import mir.algorithm.iteration: equal;
1942     assert(equal!approxEqual(mtimes(A, X), B));
1943 }
1944 
1945 unittest
1946 {
1947     auto A =
1948             [ 1,  1,  3,
1949               1,  5,  5,
1950               3,  5, 19 ]
1951              .sliced(3, 3)
1952              .as!double.slice
1953              .universal;
1954     auto B = [ 10,  157,  80 ].sliced(3).as!float.slice;
1955     auto C_ = B.slice.sliced(3, 1);
1956 
1957     auto C = choleskyDecomp('U', A);
1958     auto X = choleskySolve!(Yes.allowDestroy)(C.uplo, C.matrix, B);
1959 
1960     import std.math: approxEqual;
1961     import mir.algorithm.iteration: equal;
1962     assert(equal!approxEqual(mtimes(A, X), C_));
1963 }
1964 
1965 unittest
1966 {
1967     auto A =
1968             [ 6,  15,  55,
1969              15,  55, 225,
1970              55, 225, 979 ]
1971              .sliced(3, 3)
1972              .as!float.slice
1973              .canonical;
1974     auto B =
1975             [ 7,  3,
1976               2,  1,
1977               1,  8 ]
1978               .sliced(3, 2)
1979               .as!double.slice
1980               .universal;
1981 
1982     auto C = choleskyDecomp('L', A);
1983     auto X = choleskySolve(C.uplo, C.matrix, B);
1984 
1985     import std.math: approxEqual;
1986     import mir.algorithm.iteration: equal;
1987     assert(equal!approxEqual(mtimes(A, X), B));
1988 }
1989 
1990 
1991 ///
1992 struct QRResult(T)
1993 {
1994     /++
1995     Matrix in witch the elements on and above the diagonal of the array contain the min(M, N) x N
1996     upper trapezoidal matrix 'R' (R is upper triangular if m >= n). The elements below the
1997     diagonal, with the array tau, represent the orthogonal matrix 'Q' as product of min(m, n).
1998     +/
1999     Slice!(T*, 2, Canonical) matrix;
2000     ///The scalar factors of the elementary reflectors
2001     Slice!(T*) tau;
2002     /++
2003     Solve the least squares problem:
2004         \min ||A * X - B||
2005     Using the QR factorization:
2006         \A = Q * R
2007     computed by qrDecomp.
2008     +/
2009     auto solve(Flag!"allowDestroy" allowDestroy = No.allowDestroy,
2010                Iterator, size_t N, SliceKind kind)
2011               (Slice!(Iterator, N, kind) b)
2012     {
2013         return qrSolve!(allowDestroy)(matrix, tau, b);
2014     }
2015     
2016     /++
2017     Extract the Q matrix
2018     +/
2019     auto Q(Flag!"allowDestroy" allowDestroy = No.allowDestroy)
2020     {
2021         auto work = [matrix.length].uninitSlice!T;
2022 
2023         auto m = (allowDestroy && matrix._stride!1 == 1) ? matrix.assumeCanonical : matrix.as!T.slice.canonical;
2024 
2025         static if(is(T == double) || is(T == float))
2026             orgqr!T(m, tau, work);
2027         else
2028             ungqr!T(m, tau, work);
2029         return m.transposed;
2030     }
2031     
2032     /++
2033     Extract the R matrix
2034     +/
2035     auto R()
2036     {
2037         import mir.algorithm.iteration: eachLower;
2038 
2039         auto r = [tau.length, tau.length].uninitSlice!T;
2040         if (matrix.shape[0] == matrix.shape[1]) {
2041             r[] = matrix.transposed.slice;
2042         } else {
2043             r[] = matrix[0..tau.length, 0..tau.length].transposed.slice;
2044         }
2045         r.eachLower!("a = cast(" ~ T.stringof ~ ")0");
2046         return r.universal;
2047     }
2048 
2049     /++
2050     Reconstruct the original matrix given a QR decomposition
2051     +/
2052     auto reconstruct()
2053     {
2054         auto q = Q();
2055         auto r = R();
2056         return reconstruct(q, r);
2057     }
2058 
2059     /++
2060     Reconstruct the original matrix given a QR decomposition
2061     +/
2062     auto reconstruct(T, U)(T q, U r)
2063         if (isMatrix!T && isMatrix!U)
2064     {
2065         return mtimes(q, r).universal;
2066     }
2067 }
2068 
2069 ///
2070 unittest
2071 {
2072     import mir.ndslice;
2073 
2074     auto A =
2075             [ 1,  1,  0,
2076               1,  0,  1,
2077               0,  1,  1 ]
2078               .sliced(3, 3)
2079               .as!double.slice;
2080 
2081     auto Q_test =
2082             [ -0.7071068,  0.4082483,  -0.5773503,
2083               -0.7071068, -0.4082483,   0.5773503,
2084                        0,  0.8164966,   0.5773503]
2085               .sliced(3, 3)
2086               .as!double.slice;
2087 
2088     auto R_test =
2089             [ -1.414214,  -0.7071068,   -0.7071068,
2090                       0,   1.2247449,    0.4082483,
2091                       0,           0,    1.1547005]
2092               .sliced(3, 3)
2093               .as!double.slice;
2094 
2095     auto val = qrDecomp(A);
2096 
2097     //saving these values to doublecheck they don't change later
2098     auto val_matrix = val.matrix.slice;
2099     auto val_tau = val.tau.slice;
2100 
2101     import std.math: approxEqual;
2102     import mir.ndslice : equal;
2103     
2104     auto r = val.R;
2105     assert(equal!approxEqual(val.R, R_test));
2106 
2107     auto q = val.Q;
2108     assert(equal!approxEqual(val.Q, Q_test));
2109 
2110     //double-checking values do not change
2111     assert(equal!approxEqual(val_matrix, val.matrix));
2112     assert(equal!approxEqual(val_tau, val.tau));
2113 
2114     auto a = val.reconstruct;
2115     assert(equal!approxEqual(A, a));
2116 }
2117 
2118 unittest
2119 {
2120     auto A =
2121             [  3,  -6,
2122                4,  -8,
2123                0,   1]
2124               .sliced(3, 2)
2125               .as!double.slice;
2126 
2127     auto Q_check =
2128             [ -0.6,  0,
2129               -0.8,  0,
2130                0.0, -1]
2131               .sliced(3, 2)
2132               .as!double.slice;
2133 
2134     auto R_check =
2135             [ -5,  10,
2136                0,  -1]
2137               .sliced(2, 2)
2138               .as!double.slice;
2139 
2140     auto C = qrDecomp(A);
2141     auto q = C.Q;
2142     auto r = C.R;
2143     auto A_reconstructed = C.reconstruct(q, r);
2144 
2145     import std.math: approxEqual;
2146     import mir.algorithm.iteration: equal;
2147     assert(equal!approxEqual(q, Q_check));
2148     assert(equal!approxEqual(r, R_check));
2149     assert(equal!approxEqual(A_reconstructed, A));
2150 }
2151 
2152 /++
2153 Computes a QR factorization of matrix 'a'.
2154 Params:
2155     allowDestroy = flag to delete the source matrix.
2156     a = initial matrix
2157 Returns: $(LREF QRResult)
2158 +/
2159 auto qrDecomp(Flag!"allowDestroy" allowDestroy = No.allowDestroy,
2160               Iterator, SliceKind kind)
2161              (Slice!(Iterator, 2, kind) a)
2162 {
2163     alias T = BlasType!Iterator;
2164     auto work = [T.sizeof * a.length].uninitSlice!T;
2165     auto tau = (cast(int) min(a.length!0, a.length!1)).uninitSlice!T;
2166     auto m = (allowDestroy && a._stride!1 == 1) ? a.assumeCanonical : a.transposed.as!T.slice.canonical;
2167 
2168     geqrf!T(m, tau, work);
2169     return QRResult!T(m, tau);
2170 }
2171 
2172 /++
2173 Solve the least squares problem:
2174     \min ||A * X - B||
2175 Using the QR factorization:
2176     \A = Q * R
2177 computed by qrDecomp.
2178 Params:
2179     allowDestroy = flag to delete the source matrix.
2180     a = detalis of the QR factorization of the original matrix as returned by qrDecomp.
2181     tau = details of the orhtogonal matrix Q.
2182     b = right hand side matrix.
2183 Returns: solution matrix.
2184 +/
2185 auto qrSolve(Flag!"allowDestroy" allowDestroy = No.allowDestroy,
2186              SliceKind kindB, size_t N, IteratorB, IteratorA, IteratorT)
2187             (Slice!(IteratorA, 2, Canonical) a,
2188              Slice!(IteratorT) tau,
2189              Slice!(IteratorB, N, kindB) b
2190             )
2191 in
2192 {
2193     assert(a.length!1 == b.length!0, "number of columns a should be equal to the number of rows b");
2194 }
2195 body
2196 {
2197     alias A = BlasType!IteratorA;
2198     alias B = BlasType!IteratorB;
2199     alias T = CommonType!(A, B);
2200     static if(is(T* == IteratorA))
2201         auto a_ = a;
2202     else
2203         auto a_ = a.as!T.slice.canonical;
2204     static if(is(T* == IteratorT))
2205         auto tau_ = tau;
2206     else
2207         auto tau_ = tau.as!T.slice.canonical;
2208 
2209     //convect vector to matrix.
2210     static if(N == 1)
2211         auto k = b.sliced(1, b.length);
2212     else
2213         auto k = b.transposed;
2214 
2215     static if(is(IteratorB == T*))
2216         auto m = (allowDestroy && k._stride!1 == 1) ? k.assumeCanonical : k.as!T.slice.canonical;
2217     else
2218         auto m = k.as!T.slice.canonical;
2219     auto work = [m.length!0].uninitSlice!T;
2220     static if(is(T == double) || is(T == float))
2221         ormqr!T('L', 'T', a_, tau_, m, work);
2222     else
2223         unmqr!T('L', 'C', a_, tau_, m, work);
2224 
2225     if (a_.length!0 != a_.length!1) {
2226         a_ = a_[0..tau.length, 0..tau.length];
2227         m = m.selectFront!1(tau.length);
2228     }
2229     trsm!T(Side.Right, Uplo.Lower, Diag.NonUnit, cast(T) 1.0, a_, m);
2230 
2231     return m.transposed;
2232 }
2233 
2234 ///
2235 unittest
2236 {
2237     import mir.ndslice;
2238 
2239     auto A =
2240             [ 3,  1, -1,  2,
2241              -5,  1,  3, -4,
2242               2,  0,  1, -1,
2243               1, -5,  3, -3 ]
2244               .sliced(4, 4)
2245               .as!double.slice;
2246 
2247     import mir.random.variable;
2248     import mir.random.algorithm;
2249     auto B = randomSlice(uniformVar(-100, 100), 4, 100);
2250 
2251     auto C = qrDecomp(A);
2252     auto X = C.solve(B);
2253 
2254     import std.math: approxEqual;
2255     import mir.algorithm.iteration: equal;
2256     assert(equal!approxEqual(mtimes(A, X), B));
2257 }
2258 
2259 unittest
2260 {
2261     auto A =
2262             [ 3,  1, -1,  2,
2263              -5,  1,  3, -4,
2264               2,  0,  1, -1,
2265               1, -5,  3, -3 ]
2266               .sliced(4, 4)
2267               .as!float.slice;
2268     auto B = [ 6, -12, 1, 3 ].sliced(4).as!double.slice.canonical;
2269     auto B_ = B.slice.sliced(B.length, 1);
2270     auto C = qrDecomp(A);
2271     auto X = qrSolve(C.matrix, C.tau, B);
2272 
2273     import std.math: approxEqual;
2274     import mir.algorithm.iteration: equal;
2275     assert(equal!approxEqual(mtimes(A, X), B_));
2276 }
2277 
2278 unittest
2279 {
2280     auto A =
2281             [ 1,  1,  0,
2282               1,  0,  1,
2283               0,  1,  1 ]
2284               .sliced(3, 3)
2285               .as!double.slice;
2286 
2287     auto B =
2288             [ 7,  6,  98,
2289               4,  8,  17,
2290               5,  3,  24 ]
2291               .sliced(3, 3)
2292               .as!float.slice;
2293     auto C = qrDecomp(A);
2294     auto X = qrSolve(C.matrix, C.tau, B);
2295 
2296     import std.math: approxEqual;
2297     import mir.algorithm.iteration: equal;
2298     
2299     assert(equal!approxEqual(mtimes(A, X), B));
2300 }
2301 
2302 unittest
2303 {
2304     auto A =
2305             [ 1,  1,  0,
2306               1,  0,  1,
2307               0,  1,  1 ]
2308               .sliced(3, 3)
2309               .as!cdouble.slice;
2310 
2311     auto B =
2312             [ 15,  78,  11,
2313               21,  47,  71,
2314               81,  11,  81 ]
2315               .sliced(3, 3)
2316               .as!cfloat.slice;
2317     auto C = qrDecomp(A);
2318     auto X = qrSolve(C.matrix, C.tau, B);
2319     auto res = mtimes(A, X);
2320 
2321     import mir.algorithm.iteration: equal;
2322     assert(equal!((a, b) => fabs(a - b) < 1e-12)(res, B));
2323 }
2324 
2325 unittest
2326 {
2327     auto A =
2328             [  3,  -6,
2329                4,  -8,
2330                0,   1]
2331               .sliced(3, 2)
2332               .as!double.slice;
2333 
2334     auto B = [-1, 7, 2]
2335                .sliced(3)
2336                .as!double.slice.canonical;
2337 
2338     auto X_check = [5, 2]
2339                     .sliced(2, 1)
2340                     .as!double.slice;
2341 
2342     auto C = qrDecomp(A);
2343     auto X = qrSolve(C.matrix, C.tau, B);
2344 
2345     import std.math: approxEqual;
2346     import mir.algorithm.iteration: equal;
2347     assert(equal!approxEqual(X, X_check));
2348 }