35 #include "qrm_common.h"
84 integer :: qrm_nth, nth, thn, info,
i
87 integer :: pnl_id, upd_id, act_id, asm_id, clean_id, sched_id
91 _qrm_data,
allocatable :: work(:)
93 real(kind(1.d0)) :: flops, times(6), t1, t2
95 type(qrm_adata_type
),
pointer :: adata
96 type(_qrm_fdata_type
),
pointer :: fdata
100 character(len=*),
parameter :: name=
'qrm_factorization_core'
114 adata => qrm_mat%adata
115 fdata => qrm_mat%fdata
137 do i=adata%nleaves, 1, -1
139 fdata%front_list(adata%leaves(
i))%status = qrm_activable_
145 #if defined (_OPENMP)
146 call omp_set_num_threads(1)
147 qrm_nth = qrm_mat%icntl(qrm_nthreads_)
156 #if defined (_OPENMP)
157 nth = omp_get_num_threads()
158 thn = omp_get_thread_num()
169 __qrm_prnt_dbg(
'("Num threads: ",i3)')nth
177 __qrm_check_ret(name,
'qrm_alloc',9998)
190 if(qrm_err_stack%nelem .gt. 0) goto 9998
199 if(.not. got_task) cycle taskloop
209 call
panel(task, thn, work, flops)
212 call
update(task, thn, work, flops)
221 call
clean(task, thn)
240 if(qrm_err_stack%nelem .gt. 0)
then
252 if(err_act .eq. qrm_abort_)
then
269 type(_qrm_front_type
),
pointer :: front
281 if(f .eq. 0)
exit active_fronts
282 front => fdata%front_list(f)
285 if(.not. omp_test_lock(front%lock)) cycle active_fronts
289 call omp_unset_lock(front%lock)
301 if(f .eq. 0)
exit ready_fronts
302 front => fdata%front_list(f)
304 if (found)
exit ready_fronts
326 type(_qrm_front_type
) :: front
329 type(_qrm_front_type
),
pointer :: father, cfront
330 integer :: p, c,
i, j, childpt, child
335 if( (front%status .le. 0) .or. &
336 &(front%status .eq. qrm_done_)) &
340 if(minval(front%ptable) .eq. qrm_done_)
then
345 front%status = qrm_busy_
352 if(maxval(front%ptable) .lt. 0)
return
356 if (front%lastpnl .lt. front%np)
then
359 if (front%ptable(front%lastpnl+1) .eq. front%lastpnl)
then
362 tsk =
qrm_task_type(qrm_task_pnl_, front%num, front%lastpnl+1, 0,.false.)
369 front%ptable(front%lastpnl+1) = qrm_busy_
377 do p=1, front%lastpnl
380 if(front%ptable(c) .eq. qrm_busy_) cycle
382 if(front%ptable(c) .eq. p-1)
then
390 front%ptable(c) = qrm_busy_
398 p = adata%parent(front%num)
400 father => fdata%front_list(p)
402 if(father%status .eq. qrm_active_)
then
403 do c=front%npiv/front%nb+1, front%nc
404 if (front%ptable(c) .eq. min(c, front%np))
then
412 front%ptable(c) = qrm_busy_
413 father%status = qrm_busy_
441 type(_qrm_front_type
) :: front
449 if(front%status .eq. qrm_activable_)
then
454 front%status = qrm_busy_
479 if(fdata%done .eq. fdata%nfronts)
then
497 subroutine panel(task, thn, work, flops)
506 real(kind(1.d0)) :: t1, t2
510 real(kind(1.d0)) :: flops
513 integer :: f, np, nc, nb, &
514 & m, n, k, i, info, j, ib, fcol, lcol, ii, jj
517 front => fdata%front_list(task%front)
518 f = adata%parent(front%num)
520 call
qrm_get(qrm_mat,
'qrm_ib', ib)
523 __qrm_prnt_dbg(
'(i3," =--> Panel : ",i4," -- ",i4)')thn,task%front,task%pnl
530 fcol = (task%pnl-1)*front%nb+1
531 lcol = min(task%pnl*front%nb,front%n)
535 do i=fcol, min(task%pnl*front%nb,front%ne), ib
536 n = min(ib, lcol-
i+1)
537 m = max(front%stair(min(
i+n-1,front%ne))-
i+1,0)
541 call _xgeqrf(m, n, front%front(
i,
i), front%m, front%tau(
i), work(1), &
544 call _xlarft(
'f',
'c', m, k, front%front(
i,
i), front%m, front%tau(
i), &
545 & front%t(1,
i), front%ib )
551 call _xlarfb(
'l',
't',
'f',
'c', m, n, k, front%front(
i,
i), &
552 & front%m, front%t(1,
i), front%ib, &
553 & front%front(
i,j), front%m, work(1), front%nb)
567 if(task%pnl .lt. front%nc)
then
568 if(front%ptable(task%pnl+1) .eq. task%pnl-1)
then
569 tsk =
qrm_task_type(qrm_task_upd_, front%num, task%pnl, task%pnl+1, .false.)
571 front%ptable(task%pnl+1) = qrm_busy_
578 front%ptable(task%pnl) = qrm_done_
580 if ((task%pnl .le. front%npiv/front%nb) .or. &
581 & (front%npiv .eq. front%ne))
then
583 front%ptable(task%pnl) = qrm_done_
585 front%ptable(task%pnl) = task%pnl
589 if(task%pnl .eq. front%nc)
then
590 front%status = qrm_factorized_
592 front%lastpnl=task%pnl
607 subroutine update(task, thn, work, flops)
621 real(kind(1.d0)) :: flops
623 real(kind(1.d0)) :: t1, t2
627 integer :: f,
i, j, m, n, k, ib, fcol, lcol
629 front => fdata%front_list(task%front)
630 f = adata%parent(front%num)
633 __qrm_prnt_dbg(
'(i3," =--> Update : ",i4," -- ",i4,2x,i4)')thn,task%front,task%pnl,task%col
640 call
qrm_get(qrm_mat,
'qrm_ib', ib)
642 fcol = (task%pnl-1)*front%nb+1
643 lcol = min(task%pnl*front%nb, front%ne)
644 j = (task%col-1)*front%nb+1
645 n = min(front%nb, front%n-j+1)
648 k = min(ib, lcol-
i+1)
649 m = max(front%stair(min(
i+k-1,front%ne))-
i+1,0)
652 call _xlarfb(
'l',
't',
'f',
'c', m, n , k, front%front(
i,
i), &
653 & front%m, front%t(1,
i), front%ib, &
654 & front%front(
i,j), front%m, work(1), front%nb)
668 if((f .eq. 0) .and. (task%pnl .eq. front%np))
then
671 front%ptable(task%col) = qrm_done_
672 else if ((task%pnl .eq. front%np) .and. &
673 & (front%npiv .eq. front%ne))
then
674 front%ptable(task%col) = qrm_done_
677 front%ptable(task%col) = task%pnl
679 i = minval(front%ptable)
680 if(
i .ge. front%np) front%status = qrm_factorized_
682 if((task%col .eq. (task%pnl+1)) .and. (task%col .le. front%np))
then
683 tsk =
qrm_task_type(qrm_task_pnl_, front%num, task%col, 0, .false.)
685 front%ptable(task%col) = qrm_busy_
711 real(kind(1.d0)) :: flops
714 integer :: msize,
i, f
717 character(len=*),
parameter :: name=
'activate'
721 front => fdata%front_list(task%front)
724 call omp_set_lock(front%lock)
728 __qrm_prnt_dbg(
'(i3," =--> Activate: ",i4)')thn,task%front
736 __qrm_check_ret(name,
'qrm_activate_front',9999)
740 if((front%m .eq. 0) .or. (front%n .eq. 0))
then
741 front%status = qrm_done_
743 fdata%done = fdata%done+1
746 front%status = qrm_active_
754 f = adata%parent(front%num)
756 front => fdata%front_list(f)
758 if (front%status .eq. qrm_ready_)
then
760 front%status = qrm_activable_
771 if(err_act .eq. qrm_abort_)
then
798 integer :: f,
i, j, nb, row, col, frow, fcol, b
799 integer :: nc, mc, ptr
801 integer,
allocatable :: tmp(:)
804 character(len=*),
parameter :: name=
'assemble'
808 front => fdata%front_list(task%front)
809 f = adata%parent(front%num)
810 father => fdata%front_list(f)
813 __qrm_prnt_dbg(
'(i3," =--> Assemble: ",i4," -- ",i4)')thn,task%front,task%col
821 __qrm_check_ret(name,
'qrm_aalloc',9999)
824 do col = (task%col-1)*front%nb+1, min(front%n, task%col*front%nb)
825 if(col .le. front%npiv) cycle
826 fcol = front%colmap(col-front%npiv)
827 do row = front%npiv+1, min(col, min(front%m, front%n))
828 frow = front%rowmap(row - front%npiv)
829 father%front(frow, fcol) = front%front(row,col)
831 b = (fcol-1)/father%nb+1
836 father%ptable = father%ptable+tmp
837 father%status = qrm_active_
847 front%ptable(task%col) = qrm_done_
855 if(err_act .eq. qrm_abort_)
then
880 integer :: f, mn, h,
i, j
883 character(len=*),
parameter :: name=
'clean'
887 front => fdata%front_list(task%front)
890 __qrm_prnt_dbg(
'(i3," =--> Clean : ",i4)')thn,task%front
900 __qrm_check_ret(name,
'qrm_clean_front',9999)
910 fdata%done = fdata%done+1
914 front%status = qrm_done_
922 if(err_act .eq. qrm_abort_)
then
This module contains all the error management routines and data.
subroutine qrm_init_task_queue(h)
Inititalizes a set of queues attached to a family of threads referenced through the handle h...
integer function qrm_queue_next(q, n)
Returns the element that follows n in the queue q. Very useful for sweeping through a queue...
Generic interface for the qrm_adealloc_i, qrm_adealloc_2i, qrm_adealloc_s, qrm_adealloc_2s, qrm_adealloc_3s, qrm_adealloc_d, qrm_adealloc_2d, qrm_adealloc_3d, qrm_adealloc_c, qrm_adealloc_2c, qrm_adealloc_3c, qrm_adealloc_z, qrm_adealloc_2z, qrm_adealloc_3z, routines.
This module contains the definition of all the data related to the factorization phase.
subroutine qrm_clean_task_queue(h)
Destroyes a set of queues.
This type defines the handle for the queues attached to a family of threads.
subroutine clean(task, thn)
This module contains all the facilities for visualizing the execution profile of a parallel code...
A data type meant to to define a queue.
logical function qrm_get_task(h, tsk)
Pops a task from a queue. Tasks are always popped from the head of the queue. The return value is ...
subroutine qrm_queue_rm(q, n)
Removes (without returning it) an element from a queue.
This module contains all the generic interfaces for the typed routines in the factorization phase...
This module contains the interfaces of all non-typed routines.
subroutine assemble(task, thn)
subroutine qrm_err_act_restore(err_act)
Restores the value of the qrm_err_act variable.
subroutine qrm_queue_push(q, elem)
Pushes an element on a queue.
This module contains all the facilities for front queues.
logical function front_sched_act(front)
logical function front_sched_ops(front)
subroutine qrm_err_check()
This subroutine checks the errors stack. If something is found all the entries in the stack are poppe...
subroutine update(task, thn, work, flops)
subroutine activate(task, thn, flops)
subroutine _qrm_clean_front(qrm_mat, fnum)
This routine performs the cleaning of a front.
Generic interface for the qrm_aalloc_i, qrm_aalloc_2i, qrm_aalloc_s, qrm_aalloc_2s, qrm_aalloc_3s, qrm_aalloc_d, qrm_aalloc_2d, qrm_aalloc_3d, qrm_aalloc_c, qrm_aalloc_2c, qrm_aalloc_3c, qrm_aalloc_z, qrm_aalloc_2z, qrm_aalloc_3z, routines.
This type defines a computational task.
subroutine _qrm_factorization_core(qrm_mat)
This is the main factorization routine. It performs the factorization of all the fronts that have bee...
subroutine qrm_par_mem_init()
This routine has to be called at the beginning of a parallel section. Afterwards, each thread will up...
subroutine panel(task, thn, work, flops)
This type defines the data structure used to store a matrix.
Generic interface for the ::qrm_count_realflops ::qrm_count_pureflops.
integer function qrm_task_queue_card(h)
Returns the number of tasks present on a set of queues referenced by a handle.
This module contains the definition of the basic sparse matrix type and of the associated methods...
subroutine check_facto_over()
subroutine qrm_par_mem_finalize()
This module implements the memory handling routines. Pretty mucch allocations and deallocations...
Generif interface for the ::_qrm_pgeti, ::_qrm_pgetr and.
logical function qrm_sched_task(h, tsk, pol, q)
Pushes a task on a queue.
subroutine _qrm_activate_front(qrm_mat, fnum, flops)
This routine activates a front.
This module contains the definition of a task type that is used for scheduling tasks during the facto...
subroutine qrm_err_push(code, sub, ied, aed)
This subroutine pushes an error on top of the stack.
subroutine qrm_queue_free(q)
Frees a queue.
This type defines a data structure containing all the data related to a front.
subroutine qrm_err_act_save(err_act)
Saves a copy of the qrm_err_act variable.