QR_MUMPS
 All Classes Files Functions Variables Enumerations Enumerator Pages
dqrm_solve_r.F90
Go to the documentation of this file.
1 !! ##############################################################################################
2 !!
3 !! Copyright 2012 CNRS, INPT
4 !!
5 !! This file is part of qr_mumps.
6 !!
7 !! qr_mumps is free software: you can redistribute it and/or modify
8 !! it under the terms of the GNU Lesser General Public License as
9 !! published by the Free Software Foundation, either version 3 of
10 !! the License, or (at your option) any later version.
11 !!
12 !! qr_mumps is distributed in the hope that it will be useful,
13 !! but WITHOUT ANY WARRANTY; without even the implied warranty of
14 !! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 !! GNU Lesser General Public License for more details.
16 !!
17 !! You can find a copy of the GNU Lesser General Public License
18 !! in the qr_mumps/doc directory.
19 !!
20 !! ##############################################################################################
21 
22 
23 !! ##############################################################################################
33 
34 
35 #include "qrm_common.h"
36 
45 subroutine dqrm_solve_r(qrm_mat, b, x)
46 
47  use dqrm_spmat_mod
48  use dqrm_rfpf_mod
49  use qrm_mem_mod
50  use qrm_common_mod
51  use qrm_task_mod
52  use qrm_queue_mod
53  use dqrm_solve_mod, protect=>dqrm_solve_r
54  use qrm_error_mod
55  implicit none
56 
57  type(dqrm_spmat_type), target :: qrm_mat
58  real(kind(1.d0)), intent(inout) :: b(:,:)
59  real(kind(1.d0)), intent(out) :: x(:,:)
60 
61  integer :: nth, thn, info, f, dones
62  integer :: i, p
63  type(qrm_task_type) :: task
64  type(qrm_task_queue_handle) :: tq_h
65  type(qrm_queue) :: ready_q
66  type(dqrm_front_type), pointer :: front
67  integer, allocatable :: status(:)
68  type(qrm_adata_type), pointer :: adata
69  type(dqrm_fdata_type), pointer :: fdata
70  logical :: got_task
71 #if defined (_OPENMP)
72  integer(kind=omp_lock_kind), allocatable :: locks(:)
73  integer(kind=omp_lock_kind) :: dlock
74 #endif
75 
76  ! error management
77  integer :: err_act
78  character(len=*), parameter :: name='qrm_solve_r'
79 
80  call qrm_err_act_save(err_act)
81 
82  __qrm_prnt_dbg('("Solving for R")')
83 
84  ! simplify
85  adata => qrm_mat%adata
86  fdata => qrm_mat%fdata
87 
88  ! initialize queue
89  call qrm_queue_init(ready_q , adata%nnodes, qrm_lifo_)
90 
91  ! initialize to safe values for the case where openmp is not used
92  ! the status and locks arrays are needed because multiple solves may
93  ! be run in parallel on the same fdata
94  call qrm_aalloc(status, qrm_mat%adata%nnodes)
95  __qrm_check_ret(name,'qrm_aalloc',9999)
96  !$ allocate(locks(adata%nnodes))
97 
98  do f = 1, adata%nnodes
99  status(f) = qrm_ready_
100  !$ call omp_init_lock(locks(f))
101  p = adata%parent(f)
102  if(p .eq. 0 .and. (adata%rc(f).ge.0)) call qrm_queue_push(ready_q, f)
103  end do
104 
105  !$ call omp_init_lock(dlock)
106 
107  if(adata%ncsing .gt. 0) then
108  dones = 1
109  else
110  dones = 0
111  end if
112 
113  !$ call omp_set_num_threads(1)
114 
115  !$ nth = qrm_mat%icntl(qrm_nthreads_)
116  !$omp parallel &
117  !$omp & num_threads(nth) &
118  !$omp & private(got_task, task, nth, thn, info) &
119  !$omp & shared(ready_q, status, locks, dlock, dones, tq_h)
120 
121 #if defined (_OPENMP)
122  nth = omp_get_num_threads()
123  thn = omp_get_thread_num()
124 #else
125  nth = 1
126  thn = 0
127 #endif
128 
129  info = 0
130 
131  call qrm_par_mem_init()
132  call qrm_init_task_queue(tq_h)
133 
134  taskloop: do
135  if(qrm_err_stack%nelem .gt. 0) goto 9998
136 
137  ! fill up the queue if there are too few tasks
138  if(qrm_task_queue_card(tq_h) .lt. nth) then
139  call fill_queue_r( )
140  end if
141 
142  got_task = qrm_get_task(tq_h, task)
143 
144  if(.not. got_task) cycle taskloop ! queue is empty
145 
146  select case(task%id)
147  case(qrm_task_exit_)
148  !$omp barrier
149  ! done, exit
150  exit
151  case(qrm_task_sol_)
152  ! apply a set of Householder vectors
153  call solve_r(task, thn)
154  end select
155  end do taskloop
156 
157 9998 continue
158  call qrm_clean_task_queue(tq_h)
159  call qrm_par_mem_finalize()
160  !$omp end parallel
161 
162 
163  call qrm_adealloc(status)
164  !$ deallocate(locks)
165  call qrm_queue_free(ready_q)
166 
167  if(qrm_err_stack%nelem .gt. 0) then
168  call qrm_err_push(22, name)
169  goto 9999
170  end if
171 
172  ! check if there is a singleton block and eventually solve for it
173  if (qrm_mat%adata%ncsing .gt. 0) then
174  call dqrm_solve_sing_front(qrm_mat, b, x, 'n')
175  __qrm_check_ret(name,'qrm_solve_sing_front',9999)
176  end if
177 
178  call qrm_err_act_restore(err_act)
179  return
180 
181 9999 continue ! error management
182  call qrm_err_act_restore(err_act)
183  if(err_act .eq. qrm_abort_) then
184  call qrm_err_check()
185  end if
186  return
187 
188 contains
189 
190 
191 
192 !==========================================================================================
193 !==========================================================================================
194  subroutine fill_queue_r( )
195  implicit none
196 
197  type(dqrm_front_type), pointer :: front
198  integer :: f
199  integer :: thn
200  logical :: found
201  type(qrm_task_type) :: tsk
202 
203 #if defined(_OPENMP)
204  thn=omp_get_thread_num()
205 #else
206  thn=0
207 #endif
208 
209  found = .false.
210 
211  f = 0
212  do
213 
214  f = qrm_queue_next(ready_q, f)
215  if(f .eq. 0) exit
216 
217  front => fdata%front_list(f)
218 
219 #if defined(_OPENMP)
220  if(.not. omp_test_lock(locks(f))) cycle
221 #endif
222  if(status(f) .eq. qrm_ready_) then
223  call qrm_queue_rm(ready_q, f)
224  tsk = qrm_task_type(qrm_task_sol_, front%num, 0, 0, .false.)
225  if(qrm_sched_task(tq_h, tsk, 'h')) then
226  ! mark the column as assigned
227  found = .true.
228  status(f) = qrm_busy_
229  end if
230  end if
231 #if defined(_OPENMP)
232  call omp_unset_lock(locks(f))
233 #endif
234  end do
235 
236  ! if nothing was found above, then check if the facto is over
237  ! otherwise return
238  if(found) return
239 
240  call check_solver_over( )
241 
242  return
243  end subroutine fill_queue_r
244 
245 
246 
247 
248 !==========================================================================================
249 !==========================================================================================
250  subroutine check_solver_over( )
251  ! checks whether the apply_qt is over and eventually schedules a
252  ! termination task
253  implicit none
254 
255  type(qrm_task_type) :: tsk
256  logical :: found
257 
258  ! all the fronts are done
259  !$ call omp_set_lock( dlock )
260  if(dones .eq. fdata%nfronts) then
261  tsk = qrm_task_type(qrm_task_exit_, 0, 0, 0, .false.)
262  found = qrm_sched_task(tq_h, tsk, 't')
263  end if
264  !$ call omp_unset_lock( dlock )
265 
266  return
267  end subroutine check_solver_over
268 
269 
270 
271 
272 
273 
274 !==========================================================================================
275 !==========================================================================================
276  subroutine solve_r(task, thn)
277  implicit none
278  type(qrm_task_type) :: task
279  integer :: thn
280 
281  type(dqrm_front_type), pointer :: front
282  integer :: f, p, c, info
283 
284  front => null()
285 
286  ! to make things easier
287  front => qrm_mat%fdata%front_list(task%front)
288  info = 0
289 
290  call front_r(front, info)
291  status(task%front) = qrm_done_
292  call qrm_queue_rm(ready_q, front%num)
293 
294 #if defined(_OPENMP)
295  call omp_set_lock( dlock )
296 #endif
297  dones = dones+1
298 #if defined(_OPENMP)
299  call omp_unset_lock( dlock )
300 #endif
301 
302  ! sweep over the children. Small children are treated immediately,
303  ! the others are pushed on the ready_q
304  do p = adata%childptr(front%num), adata%childptr(front%num+1)-1
305  c = adata%child(p)
306  if(adata%small(c) .eq. 1) then
307  call do_subtree_r(c, info)
308  if(info .ne. 0) goto 9997
309  else
310  call qrm_queue_push(ready_q, c)
311  end if
312  end do
313 
314 
315 9997 continue
316  return
317  end subroutine solve_r
318 
319 
320 
321 
322 
323 
324 !==========================================================================================
325 !==========================================================================================
326  subroutine do_subtree_r(fnum, info)
327  implicit none
328 
329  integer :: fnum, info
330 
331  type(dqrm_front_type), pointer :: front
332  integer :: node, c, acc, thn, p
333  type(qrm_queue) :: sub_q
334 
335  info = 0
336  acc = 0
337 
338 #if defined(_OPENMP)
339  thn = omp_get_thread_num()
340 #else
341  thn = 0
342 #endif
343 
344  call qrm_queue_init(sub_q, adata%nnodes, qrm_fifo_)
345  call qrm_queue_push(sub_q, fnum)
346 
347  do
348  node = qrm_queue_pop(sub_q)
349 
350  if(node .eq. 0) exit
351 
352  front => qrm_mat%fdata%front_list(node)
353 
354  call front_r(front, info)
355  status(node) = qrm_done_
356 
357  !$ call omp_set_lock( dlock )
358  dones = dones+1
359  !$ call omp_unset_lock( dlock )
360 
361  ! sweep over the children. Small children are treated immediately,
362  ! the others are pushed on the ready_q
363  do p = adata%childptr(front%num), adata%childptr(front%num+1)-1
364  c = adata%child(p)
365  call qrm_queue_push(sub_q, c)
366  acc = acc+1
367  end do
368 
369  end do
370 
371  call qrm_queue_free(sub_q)
372 
373  return
374 
375  end subroutine do_subtree_r
376 
377 
378 
379 
380 
381 
382 !==========================================================================================
383 !==========================================================================================
384  subroutine front_r(front, info)
385 
386  use dqrm_utils_mod
387  use dqrm_rfpf_mod
388  use dqrm_spmat_mod
389  use qrm_mem_mod
390  use qrm_common_mod
391  implicit none
392 
393  type(dqrm_front_type) :: front
394  integer :: info
395 
396  integer :: k, m, pk, jp, n, i, thn, cnt, j, pv1, pv2, r
397 
398  ! TODO: optimize this by allocating once in the subtree
399  real(kind(1.d0)), allocatable :: in_x(:,:)
400 
401  ! error management
402  character(len=*), parameter :: name='front_r'
403 
404  ! shortcut
405  if (min(front%m, front%n) .le.0) goto 9999
406  if (front%npiv .le. 0) goto 9999
407 
408 #if defined(_OPENMP)
409  thn = omp_get_thread_num()
410 #else
411  thn = 0
412 #endif
413  ! !$ call omp_set_lock( locks(front%num) )
414 
415  ! write(*,'(i3," =--> Solve R : ",i4)')thn, front%num
416  call qrm_aalloc(in_x, front%n, size(x,2), info)
417  __qrm_check_ret(name,'qrm_aalloc',9999)
418 
419  in_x(1:front%npiv,:) = b(front%rows(1:front%npiv),:)
420  in_x(front%npiv+1:front%n,:) = x(front%cols(front%npiv+1:front%n),:)
421 
422 
423 #if defined (RFPF)
424  ! pv1 = 1
425 
426  ! ! set r to be the starting row of the R factor
427  ! r = ((front%npiv-1)/front%nb)*front%nb+1
428  ! do
429  ! if(r .lt. 1) exit
430  ! ! k is the size of the panel
431  ! k = min(front%nb,front%npiv-r+1)
432  ! ! n is the col size of the rectangular part of the panel
433  ! n = front%n - r-k+1
434  ! ! pv1 is the position of the first element of the panel in the
435  ! ! front%r array (where the rfpf part starts)
436  ! pv1 = (front%n-r+1)*(r-1)+r*(r-1)/2+1
437  ! ! pv2 is the position in the front%r array where the
438  ! ! rectangular part of this panel starts
439  ! if(n .le. 0) then
440  ! pv2 = 1
441  ! else
442  ! pv2 = pv1+(k*(k+1))/2
443  ! end if
444  ! if(n .gt. 0) call dgemm('n', 'n', k, size(x,2), n, -1.d0, front%r(pv2), &
445  ! &k, in_x(r+k,1), front%n, 1.d0, in_x(r,1), front%n)
446  ! call drpsm('l', 'u', 'n', 'n', k, size(x,2), 1.d0, front%r(pv1), in_x(r,1), front%n)
447  ! ! go backwards one panel
448  ! r = r - front%nb
449  ! end do
450 #else
451  n = size(x,2)
452  cnt=size(front%r)+1
453  outer: do jp = front%npiv - mod(front%npiv, front%nb)+1, 1, -front%nb
454  pk = min(front%nb, front%npiv-jp+1)
455  if(pk .le. 0) cycle
456  ! write(*,*)'solve',jp,pk
457 
458  inner: do j = jp+pk-mod(pk,front%ib), jp, -front%ib
459  m = min(front%ib, jp+pk - j)
460  if(m .le. 0) cycle
461  k = front%n-j-m+1
462  cnt = cnt-m*(front%n-j+1)
463  ! write(*,*)size(front%r), cnt, j, k+m, m
464  if(k.gt.0) call dgemm('n', 'n', m, n, k, -1.d0, front%r(cnt+m*m), m, &
465  & in_x(j+m,1), front%n, 1.d0, in_x(j,1), front%n)
466  call dtrsm('l', 'u', 'n', 'n', m, n, 1.d0, front%r(cnt), &
467  & m, in_x(j,1), front%n)
468  end do inner
469  end do outer
470 #endif
471 
472  x(front%cols(1:front%npiv),:) = in_x(1:front%npiv,:)
473 
474  call qrm_adealloc(in_x)
475  __qrm_check_ret(name,'qrm_adealloc',9999)
476  ! !$ call omp_unset_lock( locks(front%num) )
477 
478 9999 continue !error
479  return
480 
481  end subroutine front_r
482 
483 end subroutine dqrm_solve_r
This module contains all the error management routines and data.
subroutine qrm_init_task_queue(h)
Inititalizes a set of queues attached to a family of threads referenced through the handle h...
integer function qrm_queue_next(q, n)
Returns the element that follows n in the queue q. Very useful for sweeping through a queue...
Generic interface for the qrm_adealloc_i, qrm_adealloc_2i, qrm_adealloc_s, qrm_adealloc_2s, qrm_adealloc_3s, qrm_adealloc_d, qrm_adealloc_2d, qrm_adealloc_3d, qrm_adealloc_c, qrm_adealloc_2c, qrm_adealloc_3c, qrm_adealloc_z, qrm_adealloc_2z, qrm_adealloc_3z, routines.
subroutine qrm_clean_task_queue(h)
Destroyes a set of queues.
subroutine check_solver_over()
This type defines the handle for the queues attached to a family of threads.
subroutine dqrm_solve_sing_front(qrm_mat, b, x, trans)
This function handles the front containing the singletons during the solve for R or R'...
This module contains all the interfaces for the typed routines in the solve phase.
subroutine solve_r(task, thn)
A data type meant to to define a queue.
logical function qrm_get_task(h, tsk)
Pops a task from a queue. Tasks are always popped from the head of the queue. The return value is ...
subroutine qrm_queue_rm(q, n)
Removes (without returning it) an element from a queue.
This module contains the interfaces of all non-typed routines.
subroutine qrm_err_act_restore(err_act)
Restores the value of the qrm_err_act variable.
subroutine qrm_queue_push(q, elem)
Pushes an element on a queue.
This module contains all the facilities for front queues.
This module contains the definition of the basic sparse matrix type and of the associated methods...
This module contains generic interfaces for a number of auxiliary tools.
subroutine qrm_err_check()
This subroutine checks the errors stack. If something is found all the entries in the stack are poppe...
Generic interface for the qrm_aalloc_i, qrm_aalloc_2i, qrm_aalloc_s, qrm_aalloc_2s, qrm_aalloc_3s, qrm_aalloc_d, qrm_aalloc_2d, qrm_aalloc_3d, qrm_aalloc_c, qrm_aalloc_2c, qrm_aalloc_3c, qrm_aalloc_z, qrm_aalloc_2z, qrm_aalloc_3z, routines.
Definition: qrm_mem_mod.F90:78
subroutine dqrm_solve_r(qrm_mat, b, x)
This function solves for R against multiple vectors.
subroutine front_r(front, info)
subroutine fill_queue_r()
This type defines a computational task.
subroutine qrm_par_mem_init()
This routine has to be called at the beginning of a parallel section. Afterwards, each thread will up...
integer function qrm_queue_pop(q)
Pops an element from a queue.
This type defines the data structure used to store a matrix.
integer function qrm_task_queue_card(h)
Returns the number of tasks present on a set of queues referenced by a handle.
subroutine qrm_par_mem_finalize()
This module implements the memory handling routines. Pretty mucch allocations and deallocations...
Definition: qrm_mem_mod.F90:38
subroutine do_subtree_r(fnum, info)
int i
Definition: secs.c:40
This module contains an implementation of some operations on triangular/trapezoidal matrices stored i...
logical function qrm_sched_task(h, tsk, pol, q)
Pushes a task on a queue.
This module contains the definition of a task type that is used for scheduling tasks during the facto...
subroutine qrm_err_push(code, sub, ied, aed)
This subroutine pushes an error on top of the stack.
subroutine qrm_queue_free(q)
Frees a queue.
subroutine qrm_err_act_save(err_act)
Saves a copy of the qrm_err_act variable.