QR_MUMPS
 All Classes Files Functions Variables Enumerations Enumerator Pages
dqrm_apply_qt.F90
Go to the documentation of this file.
1 !! ##############################################################################################
2 !!
3 !! Copyright 2012 CNRS, INPT
4 !!
5 !! This file is part of qr_mumps.
6 !!
7 !! qr_mumps is free software: you can redistribute it and/or modify
8 !! it under the terms of the GNU Lesser General Public License as
9 !! published by the Free Software Foundation, either version 3 of
10 !! the License, or (at your option) any later version.
11 !!
12 !! qr_mumps is distributed in the hope that it will be useful,
13 !! but WITHOUT ANY WARRANTY; without even the implied warranty of
14 !! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 !! GNU Lesser General Public License for more details.
16 !!
17 !! You can find a copy of the GNU Lesser General Public License
18 !! in the qr_mumps/doc directory.
19 !!
20 !! ##############################################################################################
21 
22 
23 !! ##############################################################################################
33 
34 
35 #include "qrm_common.h"
36 
44 subroutine dqrm_apply_qt(qrm_mat, b)
45 
46  use dqrm_spmat_mod
47  use dqrm_rfpf_mod
48  use qrm_mem_mod
49  use qrm_queue_mod
50  use qrm_task_mod
51  use qrm_error_mod
52  use qrm_common_mod
53  implicit none
54 
55  type(dqrm_spmat_type), target :: qrm_mat
56  real(kind(1.d0)) :: b(:,:)
57 
58 
59  integer :: qrm_nth, nth, thn, info, f, dones
60  integer :: i, c
61  type(qrm_task_type) :: task
62  type(qrm_task_queue_handle) :: tq_h
63  type(qrm_queue) :: ready_q
64  type(dqrm_front_type), pointer :: front
65  integer, allocatable :: status(:)
66  type(qrm_adata_type), pointer :: adata
67  type(dqrm_fdata_type), pointer :: fdata
68  logical :: got_task
69 #if defined (_OPENMP)
70  integer(kind=omp_lock_kind), allocatable :: locks(:)
71  integer(kind=omp_lock_kind) :: dlock
72 #endif
73 
74  ! error management
75  integer :: err_act
76  character(len=*), parameter :: name='qrm_aply_qt'
77 
78  call qrm_err_act_save(err_act)
79 
80  __qrm_prnt_dbg('("Applying Q^T")')
81 
82  ! simplify
83  adata => qrm_mat%adata
84  fdata => qrm_mat%fdata
85 
86  ! initialize queue
87  call qrm_queue_init(ready_q , adata%nnodes, qrm_lifo_)
88 
89  ! initialize to safe values for the case where openmp is not used
90  ! the status and locks arrays are needed because multiple solves may
91  ! be run in parallel on the same fdata
92  call qrm_aalloc(status, qrm_mat%adata%nnodes)
93  __qrm_check_ret(name,'qrm_aalloc',9999)
94 #if defined (_OPENMP)
95  allocate(locks(adata%nnodes))
96  call omp_init_lock(dlock)
97 #endif
98 
99  do f = 1, adata%nnodes
100  status(f) = 0
101 #if defined (_OPENMP)
102  call omp_init_lock(locks(f))
103 #endif
104  do i=adata%childptr(f), adata%childptr(f+1)-1
105  c = adata%child(i)
106  if(adata%small(c) .eq. 0) status(f) = status(f)-1
107  end do
108  end do
109 
110  ! push all the leaves in the ready queue
111  do i=adata%nleaves, 1, -1
112  call qrm_queue_push(ready_q, adata%leaves(i))
113  status(adata%leaves(i)) = qrm_ready_
114  end do
115 
116  if(adata%ncsing .gt. 0) then
117  dones = 1
118  else
119  dones = 0
120  end if
121 
122 #if defined (_OPENMP)
123  call omp_set_num_threads(1)
124  qrm_nth=qrm_mat%icntl(qrm_nthreads_)
125 #endif
126 
127  !$omp parallel &
128  !$omp & num_threads(qrm_nth) &
129  !$omp & private(got_task, task, nth, thn, info) &
130  !$omp & shared(ready_q, status, locks, dlock, dones, tq_h)
131 
132 #if defined (_OPENMP)
133  nth = omp_get_num_threads()
134  thn = omp_get_thread_num()
135 #else
136  nth = 1
137  thn = 0
138 #endif
139 
140  info = 0
141 
142  call qrm_par_mem_init()
143  call qrm_init_task_queue(tq_h)
144 
145  taskloop: do
146  if(qrm_err_stack%nelem .gt. 0) goto 9998
147 
148  ! fill up the queue if there are too few tasks
149  if(qrm_task_queue_card(tq_h) .lt. nth) then
150  call fill_queue_qt( ready_q, tq_h )
151  end if
152 
153  got_task = qrm_get_task(tq_h, task)
154 
155  if(.not. got_task) cycle taskloop ! queue is empty
156 
157  select case(task%id)
158  case(qrm_task_exit_)
159  !$omp barrier
160  ! done, exit
161  exit
162  case(qrm_task_app_)
163  ! apply a set of Householder vectors
164  call apply_qt(task, thn, ready_q)
165  end select
166  end do taskloop
167 
168 9998 continue
169  call qrm_clean_task_queue(tq_h)
170  call qrm_par_mem_finalize()
171  !$omp end parallel
172 
173 
174  call qrm_adealloc(status)
175 #if defined (_OPENMP)
176  deallocate(locks)
177 #endif
178  call qrm_queue_free(ready_q)
179 
180  if(qrm_err_stack%nelem .gt. 0) then
181  call qrm_err_push(21, name)
182  goto 9999
183  end if
184 
185  call qrm_err_act_restore(err_act)
186  return
187 
188 9999 continue ! error management
189  call qrm_err_act_restore(err_act)
190  if(err_act .eq. qrm_abort_) then
191  call qrm_err_check()
192  end if
193  return
194 
195 contains
196 
197 
198 
199 !==========================================================================================
200 !==========================================================================================
201  subroutine fill_queue_qt( ready_q, tq_h )
202  implicit none
203 
204  type(dqrm_front_type), pointer :: front
205  integer :: f
206  integer :: thn
207  logical :: found
208  type(qrm_task_type) :: tsk
209  type(qrm_task_queue_handle) :: tq_h
210  type(qrm_queue) :: ready_q
211 
212 #if defined (_OPENMP)
213  thn = omp_get_thread_num()
214 #else
215  thn = 0
216 #endif
217 
218  found = .false.
219  f = 0
220  do
221 
222  f = qrm_queue_next(ready_q, f)
223  if(f .eq. 0) exit
224 
225  front => fdata%front_list(f)
226 
227  !$ if(.not. omp_test_lock(locks(f))) cycle
228  ! !$ call omp_set_lock(locks(f))
229  if(status(f) .eq. qrm_ready_) then
230  tsk = qrm_task_type(qrm_task_app_, front%num, 0, 0, .false.)
231 
232  if(qrm_sched_task(tq_h, tsk, 'h')) then
233  ! mark the column as assigned
234  found = .true.
235  status(f) = qrm_busy_
236  call qrm_queue_rm(ready_q, f)
237  end if
238 
239  end if
240  !$ call omp_unset_lock(locks(f))
241  end do
242 
243  ! if nothing was found above, then check if the facto is over
244  ! otherwise return
245  if(found) return
246 
247  call check_applyqt_over( tq_h )
248 
249  return
250  end subroutine fill_queue_qt
251 
252 
253 
254 
255 !==========================================================================================
256 !==========================================================================================
257  subroutine check_applyqt_over( tq_h )
258  ! checks whether the apply_qt is over and eventually schedules a
259  ! termination task
260  implicit none
261 
262  type(qrm_task_type) :: tsk
263  logical :: found
264  type(qrm_task_queue_handle) :: tq_h
265 
266  ! all the fronts are done
267  !$ call omp_set_lock( dlock )
268  if(dones .eq. fdata%nfronts) then
269  tsk = qrm_task_type(qrm_task_exit_, 0, 0, 0, .false.)
270  found = qrm_sched_task(tq_h, tsk, 't')
271  end if
272  !$ call omp_unset_lock( dlock )
273 
274  return
275  end subroutine check_applyqt_over
276 
277 
278 
279 
280 !==========================================================================================
281 !==========================================================================================
282  subroutine apply_qt(task, thn, ready_q)
283  implicit none
284  type(qrm_task_type) :: task
285  integer :: thn
286 
287  type(dqrm_front_type), pointer :: front
288  integer :: f, p, c, info
289  type(qrm_queue) :: ready_q
290 
291  front => null()
292 
293  ! to make things easier
294  front => qrm_mat%fdata%front_list(task%front)
295  f = qrm_mat%adata%parent(task%front)
296  info = 0
297 
298  ! sweep over the children to check whether there are small subtrees
299  ! that have to be treated
300  do p = adata%childptr(front%num), adata%childptr(front%num+1)-1
301  c = adata%child(p)
302  if(adata%small(c) .eq. 1) call do_subtree_qt(c, info)
303  if(info .ne. 0) goto 9997
304  end do
305 
306  call front_qt(front, info)
307  if(info .ne. 0) goto 9997
308  status(task%front) = qrm_done_
309 
310  !$ call omp_set_lock( dlock )
311  dones = dones+1
312  !$ call omp_unset_lock( dlock )
313 
314  if(f .ne. 0) then
315  !$ call omp_set_lock( locks(f) )
316  status(f) = status(f)+1
317  if(status(f) .eq. qrm_ready_) call qrm_queue_push(ready_q, f)
318  !$ call omp_unset_lock( locks(f) )
319  end if
320 
321 9997 continue
322  return
323  end subroutine apply_qt
324 
325 
326 
327 !==========================================================================================
328 !==========================================================================================
329  subroutine do_subtree_qt(fnum, info)
330  implicit none
331 
332  integer :: fnum, info
333 
334  type(dqrm_front_type), pointer :: front
335  integer :: node, f
336 
337  info = 0
338 
339  node = fnum
340 
341  subtree: do
342 
343  front => fdata%front_list(node)
344 
345  if (status(node) .eq. qrm_ready_) then
346  ! the front is ready to be assembled and factorized
347 
348  ! factorize
349  call front_qt(front, info)
350  if(info .ne. 0) goto 9998
351 
352  !$ call omp_set_lock( dlock )
353  dones = dones+1
354  !$ call omp_unset_lock( dlock )
355 
356  f = adata%parent(node)
357  if(f .ne. 0) then
358  !$ call omp_set_lock( locks(f) )
359  status(f) = status(f)+1
360  !$ call omp_unset_lock( locks(f) )
361  end if
362 
363  status(node) = qrm_done_
364 
365  if(node .eq. fnum) exit subtree ! we're done
366 
367  node = adata%parent(node)
368  if(node .eq. 0) exit subtree
369 
370  else
371  ! go down the subtree
372  node = adata%child(adata%childptr(node+1)+status(node))
373  cycle subtree
374  end if
375 
376 
377  end do subtree
378 
379 9998 continue
380  return
381  end subroutine do_subtree_qt
382 
383 
384 
385 
386 !==========================================================================================
387 !==========================================================================================
388  subroutine front_qt(front, info)
389  use dqrm_utils_mod
390  use dqrm_rfpf_mod
391  use dqrm_spmat_mod
392  use qrm_mem_mod
393  use qrm_common_mod
394  implicit none
395 
396  type(dqrm_front_type) :: front
397  integer :: info
398 
399  integer :: pv1, c, k, m, pv2, n, i, j, pk, p, cnt, jp
400  integer :: f, thn
401  ! TODO: optimize this by allocating once in the subtree
402  real(kind(1.d0)), allocatable :: work(:,:), in_b(:,:), t(:,:)
403 
404  ! error management
405  character(len=*), parameter :: name='front_qt'
406 
407  f = adata%parent(front%num)
408 
409  ! shortcut
410  if (min(front%m, front%n) .le. 0) goto 9999
411 
412 #if defined (_OPENMP)
413  call omp_set_lock( locks(front%num) )
414  thn = omp_get_thread_num()
415 #else
416  thn = 0
417 #endif
418 
419  ! write(*,'(i3," =--> Apply Q'' : ",i4)')thn, front%num
420 
421  ! allocate everything that's needed
422  n = size(b,2)
423  call qrm_aalloc(in_b, front%m, n, info=info)
424  ! call qrm_aalloc(t, front%nb, front%nb, info=info)
425  call qrm_aalloc(work, n, front%nb, info)
426  __qrm_check_ret(name,'qrm_aalloc',9999)
427 
428  in_b = b(front%rows,:)
429 
430  ! do all the computations here
431  cnt=1
432  j = 0
433  p = 1
434 
435 #if defined (RFPF)
436  ! pv1 = 1
437  ! do c = 1, front%ne, front%nb
438  ! k = min(front%nb, front%ne-c+1)
439  ! m = max(front%stair(c+k-1)-c-k+1,0)
440 
441  ! if(m .le. 0) then
442  ! pv2 = 1
443  ! else
444  ! pv2 = pv1+(k*(k+1))/2
445  ! end if
446 
447  ! call drprft('f', 'c', m, k, front%h(pv1), front%h(pv2), m, front%tau(c), t(1,1), front%nb)
448 
449  ! call drprfb('l', 't', 'f', 'c', m+k, n, k, front%h(pv1), front%h(pv2), m, &
450  ! & t(1,1), front%nb, in_b(c,1), front%m, work(1,1), n)
451  ! pv1 = pv1+(k*(k+1))/2 + k*m
452  ! end do
453 #else
454 
455  outer: do jp = 1, front%n, front%nb
456  pk = min(front%nb, front%ne-jp+1)
457  if(pk .le. 0) exit outer
458 
459  inner: do j = jp, jp+pk-1, front%ib
460  k = min(front%ib, jp+pk - j)
461  if(k .le. 0) exit inner
462  m = max(front%stair(j+k-1),j+k-1) - j+1
463  call dlarfb('l', 't', 'f', 'c', m, n, k, front%h(cnt), m, front%h(cnt), &
464  & m, in_b(j,1), front%m, work(1,1), n)
465  cnt = cnt+m*k
466  end do inner
467  end do outer
468 #endif
469 
470  ! scatter front%b into b
471  b(front%rows,:) = in_b
472 
473  ! cleanup
474  call qrm_adealloc(in_b)
475  ! call qrm_adealloc(t)
476  call qrm_adealloc(work)
477  __qrm_check_ret(name,'qrm_adelloc',9999)
478 
479 9999 continue
480 #if defined (_OPENMP)
481  call omp_unset_lock( locks(front%num) )
482 #endif
483  return
484 
485  end subroutine front_qt
486 
487 
488 
489 
490 end subroutine dqrm_apply_qt
491 
492 
This module contains all the error management routines and data.
subroutine qrm_init_task_queue(h)
Inititalizes a set of queues attached to a family of threads referenced through the handle h...
subroutine dqrm_apply_qt(qrm_mat, b)
This function applies Q' to a vector/matrix.
integer function qrm_queue_next(q, n)
Returns the element that follows n in the queue q. Very useful for sweeping through a queue...
Generic interface for the qrm_adealloc_i, qrm_adealloc_2i, qrm_adealloc_s, qrm_adealloc_2s, qrm_adealloc_3s, qrm_adealloc_d, qrm_adealloc_2d, qrm_adealloc_3d, qrm_adealloc_c, qrm_adealloc_2c, qrm_adealloc_3c, qrm_adealloc_z, qrm_adealloc_2z, qrm_adealloc_3z, routines.
subroutine qrm_clean_task_queue(h)
Destroyes a set of queues.
This type defines the handle for the queues attached to a family of threads.
A data type meant to to define a queue.
logical function qrm_get_task(h, tsk)
Pops a task from a queue. Tasks are always popped from the head of the queue. The return value is ...
subroutine qrm_queue_rm(q, n)
Removes (without returning it) an element from a queue.
This module contains the interfaces of all non-typed routines.
subroutine qrm_err_act_restore(err_act)
Restores the value of the qrm_err_act variable.
subroutine front_qt(front, info)
subroutine qrm_queue_push(q, elem)
Pushes an element on a queue.
This module contains all the facilities for front queues.
subroutine do_subtree_qt(fnum, info)
This module contains the definition of the basic sparse matrix type and of the associated methods...
This module contains generic interfaces for a number of auxiliary tools.
subroutine qrm_err_check()
This subroutine checks the errors stack. If something is found all the entries in the stack are poppe...
subroutine fill_queue_qt(ready_q, tq_h)
subroutine check_applyqt_over(tq_h)
Generic interface for the qrm_aalloc_i, qrm_aalloc_2i, qrm_aalloc_s, qrm_aalloc_2s, qrm_aalloc_3s, qrm_aalloc_d, qrm_aalloc_2d, qrm_aalloc_3d, qrm_aalloc_c, qrm_aalloc_2c, qrm_aalloc_3c, qrm_aalloc_z, qrm_aalloc_2z, qrm_aalloc_3z, routines.
Definition: qrm_mem_mod.F90:78
This type defines a computational task.
subroutine qrm_par_mem_init()
This routine has to be called at the beginning of a parallel section. Afterwards, each thread will up...
This type defines the data structure used to store a matrix.
integer function qrm_task_queue_card(h)
Returns the number of tasks present on a set of queues referenced by a handle.
subroutine qrm_par_mem_finalize()
This module implements the memory handling routines. Pretty mucch allocations and deallocations...
Definition: qrm_mem_mod.F90:38
int i
Definition: secs.c:40
This module contains an implementation of some operations on triangular/trapezoidal matrices stored i...
logical function qrm_sched_task(h, tsk, pol, q)
Pushes a task on a queue.
subroutine apply_qt(task, thn, ready_q)
This module contains the definition of a task type that is used for scheduling tasks during the facto...
subroutine qrm_err_push(code, sub, ied, aed)
This subroutine pushes an error on top of the stack.
subroutine qrm_queue_free(q)
Frees a queue.
subroutine qrm_err_act_save(err_act)
Saves a copy of the qrm_err_act variable.