35 #include "qrm_common.h"
84 integer :: qrm_nth, nth, thn, info,
i
87 integer :: pnl_id, upd_id, act_id, asm_id, clean_id, sched_id
91 real(kind(1.d0)),
allocatable :: work(:)
93 real(kind(1.d0)) :: flops, times(6), t1, t2
95 type(qrm_adata_type
),
pointer :: adata
96 type(dqrm_fdata_type
),
pointer :: fdata
100 character(len=*),
parameter :: name=
'qrm_factorization_core'
114 adata => qrm_mat%adata
115 fdata => qrm_mat%fdata
137 do i=adata%nleaves, 1, -1
139 fdata%front_list(adata%leaves(
i))%status = qrm_activable_
145 #if defined (_OPENMP)
146 call omp_set_num_threads(1)
147 qrm_nth = qrm_mat%icntl(qrm_nthreads_)
156 #if defined (_OPENMP)
157 nth = omp_get_num_threads()
158 thn = omp_get_thread_num()
169 __qrm_prnt_dbg(
'("Num threads: ",i3)')nth
177 __qrm_check_ret(name,
'qrm_alloc',9998)
190 if(qrm_err_stack%nelem .gt. 0) goto 9998
199 if(.not. got_task) cycle taskloop
209 call
panel(task, thn, work, flops)
212 call
update(task, thn, work, flops)
221 call
clean(task, thn)
240 if(qrm_err_stack%nelem .gt. 0)
then
252 if(err_act .eq. qrm_abort_)
then
269 type(dqrm_front_type
),
pointer :: front
281 if(f .eq. 0)
exit active_fronts
282 front => fdata%front_list(f)
285 if(.not. omp_test_lock(front%lock)) cycle active_fronts
289 call omp_unset_lock(front%lock)
301 if(f .eq. 0)
exit ready_fronts
302 front => fdata%front_list(f)
304 if (found)
exit ready_fronts
326 type(dqrm_front_type
) :: front
329 type(dqrm_front_type
),
pointer :: father, cfront
330 integer :: p, c,
i, j, childpt, child
335 if( (front%status .le. 0) .or. &
336 &(front%status .eq. qrm_done_)) &
340 if(minval(front%ptable) .eq. qrm_done_)
then
345 front%status = qrm_busy_
352 if(maxval(front%ptable) .lt. 0)
return
356 if (front%lastpnl .lt. front%np)
then
359 if (front%ptable(front%lastpnl+1) .eq. front%lastpnl)
then
362 tsk =
qrm_task_type(qrm_task_pnl_, front%num, front%lastpnl+1, 0,.false.)
369 front%ptable(front%lastpnl+1) = qrm_busy_
377 do p=1, front%lastpnl
380 if(front%ptable(c) .eq. qrm_busy_) cycle
382 if(front%ptable(c) .eq. p-1)
then
390 front%ptable(c) = qrm_busy_
398 p = adata%parent(front%num)
400 father => fdata%front_list(p)
402 if(father%status .eq. qrm_active_)
then
403 do c=front%npiv/front%nb+1, front%nc
404 if (front%ptable(c) .eq. min(c, front%np))
then
412 front%ptable(c) = qrm_busy_
413 father%status = qrm_busy_
441 type(dqrm_front_type
) :: front
449 if(front%status .eq. qrm_activable_)
then
454 front%status = qrm_busy_
479 if(fdata%done .eq. fdata%nfronts)
then
497 subroutine panel(task, thn, work, flops)
506 real(kind(1.d0)) :: t1, t2
509 real(kind(1.d0)) :: work(:)
510 real(kind(1.d0)) :: flops
513 integer :: f, np, nc, nb, &
514 & m, n, k, i, info, j, ib, fcol, lcol, ii, jj
517 front => fdata%front_list(task%front)
518 f = adata%parent(front%num)
520 call
qrm_get(qrm_mat,
'qrm_ib', ib)
523 __qrm_prnt_dbg(
'(i3," =--> Panel : ",i4," -- ",i4)')thn,task%front,task%pnl
530 fcol = (task%pnl-1)*front%nb+1
531 lcol = min(task%pnl*front%nb,front%n)
535 do i=fcol, min(task%pnl*front%nb,front%ne), ib
536 n = min(ib, lcol-
i+1)
537 m = max(front%stair(min(
i+n-1,front%ne))-
i+1,0)
541 call dgeqrf(m, n, front%front(
i,
i), front%m, front%tau(
i), work(1), &
544 call dlarft(
'f',
'c', m, k, front%front(
i,
i), front%m, front%tau(
i), &
545 & front%t(1,
i), front%ib )
551 call dlarfb(
'l',
't',
'f',
'c', m, n, k, front%front(
i,
i), &
552 & front%m, front%t(1,
i), front%ib, &
553 & front%front(
i,j), front%m, work(1), front%nb)
567 if(task%pnl .lt. front%nc)
then
568 if(front%ptable(task%pnl+1) .eq. task%pnl-1)
then
569 tsk =
qrm_task_type(qrm_task_upd_, front%num, task%pnl, task%pnl+1, .false.)
571 front%ptable(task%pnl+1) = qrm_busy_
578 front%ptable(task%pnl) = qrm_done_
580 if ((task%pnl .le. front%npiv/front%nb) .or. &
581 & (front%npiv .eq. front%ne))
then
583 front%ptable(task%pnl) = qrm_done_
585 front%ptable(task%pnl) = task%pnl
589 if(task%pnl .eq. front%nc)
then
590 front%status = qrm_factorized_
592 front%lastpnl=task%pnl
607 subroutine update(task, thn, work, flops)
619 real(kind(1.d0)) :: work(:)
621 real(kind(1.d0)) :: flops
623 real(kind(1.d0)) :: t1, t2
627 integer :: f,
i, j, m, n, k, ib, fcol, lcol
629 front => fdata%front_list(task%front)
630 f = adata%parent(front%num)
633 __qrm_prnt_dbg(
'(i3," =--> Update : ",i4," -- ",i4,2x,i4)')thn,task%front,task%pnl,task%col
640 call
qrm_get(qrm_mat,
'qrm_ib', ib)
642 fcol = (task%pnl-1)*front%nb+1
643 lcol = min(task%pnl*front%nb, front%ne)
644 j = (task%col-1)*front%nb+1
645 n = min(front%nb, front%n-j+1)
648 k = min(ib, lcol-
i+1)
649 m = max(front%stair(min(
i+k-1,front%ne))-
i+1,0)
652 call dlarfb(
'l',
't',
'f',
'c', m, n , k, front%front(
i,
i), &
653 & front%m, front%t(1,
i), front%ib, &
654 & front%front(
i,j), front%m, work(1), front%nb)
668 if((f .eq. 0) .and. (task%pnl .eq. front%np))
then
671 front%ptable(task%col) = qrm_done_
672 else if ((task%pnl .eq. front%np) .and. &
673 & (front%npiv .eq. front%ne))
then
674 front%ptable(task%col) = qrm_done_
677 front%ptable(task%col) = task%pnl
679 i = minval(front%ptable)
680 if(
i .ge. front%np) front%status = qrm_factorized_
682 if((task%col .eq. (task%pnl+1)) .and. (task%col .le. front%np))
then
683 tsk =
qrm_task_type(qrm_task_pnl_, front%num, task%col, 0, .false.)
685 front%ptable(task%col) = qrm_busy_
711 real(kind(1.d0)) :: flops
714 integer :: msize,
i, f
717 character(len=*),
parameter :: name=
'activate'
721 front => fdata%front_list(task%front)
724 call omp_set_lock(front%lock)
728 __qrm_prnt_dbg(
'(i3," =--> Activate: ",i4)')thn,task%front
736 __qrm_check_ret(name,
'qrm_activate_front',9999)
740 if((front%m .eq. 0) .or. (front%n .eq. 0))
then
741 front%status = qrm_done_
743 fdata%done = fdata%done+1
746 front%status = qrm_active_
754 f = adata%parent(front%num)
756 front => fdata%front_list(f)
758 if (front%status .eq. qrm_ready_)
then
760 front%status = qrm_activable_
771 if(err_act .eq. qrm_abort_)
then
798 integer :: f,
i, j, nb, row, col, frow, fcol, b
799 integer :: nc, mc, ptr
801 integer,
allocatable :: tmp(:)
804 character(len=*),
parameter :: name=
'assemble'
808 front => fdata%front_list(task%front)
809 f = adata%parent(front%num)
810 father => fdata%front_list(f)
813 __qrm_prnt_dbg(
'(i3," =--> Assemble: ",i4," -- ",i4)')thn,task%front,task%col
821 __qrm_check_ret(name,
'qrm_aalloc',9999)
824 do col = (task%col-1)*front%nb+1, min(front%n, task%col*front%nb)
825 if(col .le. front%npiv) cycle
826 fcol = front%colmap(col-front%npiv)
827 do row = front%npiv+1, min(col, min(front%m, front%n))
828 frow = front%rowmap(row - front%npiv)
829 father%front(frow, fcol) = front%front(row,col)
831 b = (fcol-1)/father%nb+1
836 father%ptable = father%ptable+tmp
837 father%status = qrm_active_
847 front%ptable(task%col) = qrm_done_
855 if(err_act .eq. qrm_abort_)
then
880 integer :: f, mn, h,
i, j
883 character(len=*),
parameter :: name=
'clean'
887 front => fdata%front_list(task%front)
890 __qrm_prnt_dbg(
'(i3," =--> Clean : ",i4)')thn,task%front
900 __qrm_check_ret(name,
'qrm_clean_front',9999)
910 fdata%done = fdata%done+1
914 front%status = qrm_done_
922 if(err_act .eq. qrm_abort_)
then
This module contains all the error management routines and data.
subroutine qrm_init_task_queue(h)
Inititalizes a set of queues attached to a family of threads referenced through the handle h...
integer function qrm_queue_next(q, n)
Returns the element that follows n in the queue q. Very useful for sweeping through a queue...
Generic interface for the qrm_adealloc_i, qrm_adealloc_2i, qrm_adealloc_s, qrm_adealloc_2s, qrm_adealloc_3s, qrm_adealloc_d, qrm_adealloc_2d, qrm_adealloc_3d, qrm_adealloc_c, qrm_adealloc_2c, qrm_adealloc_3c, qrm_adealloc_z, qrm_adealloc_2z, qrm_adealloc_3z, routines.
subroutine qrm_clean_task_queue(h)
Destroyes a set of queues.
This type defines the handle for the queues attached to a family of threads.
subroutine clean(task, thn)
This module contains all the facilities for visualizing the execution profile of a parallel code...
A data type meant to to define a queue.
logical function qrm_get_task(h, tsk)
Pops a task from a queue. Tasks are always popped from the head of the queue. The return value is ...
subroutine qrm_queue_rm(q, n)
Removes (without returning it) an element from a queue.
Generif interface for the ::dqrm_pgeti, ::dqrm_pgetr and.
This module contains the interfaces of all non-typed routines.
subroutine assemble(task, thn)
subroutine qrm_err_act_restore(err_act)
Restores the value of the qrm_err_act variable.
subroutine qrm_queue_push(q, elem)
Pushes an element on a queue.
This module contains all the facilities for front queues.
logical function front_sched_act(front)
This module contains the definition of the basic sparse matrix type and of the associated methods...
logical function front_sched_ops(front)
subroutine dqrm_factorization_core(qrm_mat)
This is the main factorization routine. It performs the factorization of all the fronts that have bee...
subroutine dqrm_activate_front(qrm_mat, fnum, flops)
This routine activates a front.
subroutine qrm_err_check()
This subroutine checks the errors stack. If something is found all the entries in the stack are poppe...
subroutine update(task, thn, work, flops)
subroutine activate(task, thn, flops)
This module contains the definition of all the data related to the factorization phase.
Generic interface for the qrm_aalloc_i, qrm_aalloc_2i, qrm_aalloc_s, qrm_aalloc_2s, qrm_aalloc_3s, qrm_aalloc_d, qrm_aalloc_2d, qrm_aalloc_3d, qrm_aalloc_c, qrm_aalloc_2c, qrm_aalloc_3c, qrm_aalloc_z, qrm_aalloc_2z, qrm_aalloc_3z, routines.
This module contains all the generic interfaces for the typed routines in the factorization phase...
This type defines a computational task.
subroutine qrm_par_mem_init()
This routine has to be called at the beginning of a parallel section. Afterwards, each thread will up...
subroutine dqrm_clean_front(qrm_mat, fnum)
This routine performs the cleaning of a front.
subroutine panel(task, thn, work, flops)
Generic interface for the ::qrm_count_realflops ::qrm_count_pureflops.
This type defines the data structure used to store a matrix.
integer function qrm_task_queue_card(h)
Returns the number of tasks present on a set of queues referenced by a handle.
subroutine check_facto_over()
This type defines a data structure containing all the data related to a front.
subroutine qrm_par_mem_finalize()
This module implements the memory handling routines. Pretty mucch allocations and deallocations...
logical function qrm_sched_task(h, tsk, pol, q)
Pushes a task on a queue.
This module contains the definition of a task type that is used for scheduling tasks during the facto...
subroutine qrm_err_push(code, sub, ied, aed)
This subroutine pushes an error on top of the stack.
subroutine qrm_queue_free(q)
Frees a queue.
subroutine qrm_err_act_save(err_act)
Saves a copy of the qrm_err_act variable.