QR_MUMPS
 All Classes Files Functions Variables Enumerations Enumerator Pages
qrm_solve_r.F90
Go to the documentation of this file.
1 !! ##############################################################################################
2 !!
3 !! Copyright 2012 CNRS, INPT
4 !!
5 !! This file is part of qr_mumps.
6 !!
7 !! qr_mumps is free software: you can redistribute it and/or modify
8 !! it under the terms of the GNU Lesser General Public License as
9 !! published by the Free Software Foundation, either version 3 of
10 !! the License, or (at your option) any later version.
11 !!
12 !! qr_mumps is distributed in the hope that it will be useful,
13 !! but WITHOUT ANY WARRANTY; without even the implied warranty of
14 !! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 !! GNU Lesser General Public License for more details.
16 !!
17 !! You can find a copy of the GNU Lesser General Public License
18 !! in the qr_mumps/doc directory.
19 !!
20 !! ##############################################################################################
21 
22 
23 !! ##############################################################################################
33 
34 
35 #include "qrm_common.h"
36 
45 subroutine _qrm_solve_r(qrm_mat, b, x)
46 
47  use _qrm_spmat_mod
48  use _qrm_rfpf_mod
49  use qrm_mem_mod
50  use qrm_common_mod
51  use qrm_task_mod
52  use qrm_queue_mod
53  use _qrm_solve_mod, protect=>_qrm_solve_r
54  use qrm_error_mod
55  implicit none
56 
57  type(_qrm_spmat_type), target :: qrm_mat
58  _qrm_data, intent(inout) :: b(:,:)
59  _qrm_data, intent(out) :: x(:,:)
60 
61  integer :: nth, thn, info, f, dones
62  integer :: i, p
63  type(qrm_task_type) :: task
64  type(qrm_task_queue_handle) :: tq_h
65  type(qrm_queue) :: ready_q
66  type(_qrm_front_type), pointer :: front
67  integer, allocatable :: status(:)
68  type(qrm_adata_type), pointer :: adata
69  type(_qrm_fdata_type), pointer :: fdata
70  logical :: got_task
71 #if defined (_OPENMP)
72  integer(kind=omp_lock_kind), allocatable :: locks(:)
73  integer(kind=omp_lock_kind) :: dlock
74 #endif
75 
76  ! error management
77  integer :: err_act
78  character(len=*), parameter :: name='qrm_solve_r'
79 
80  call qrm_err_act_save(err_act)
81 
82  __qrm_prnt_dbg('("Solving for R")')
83 
84  ! simplify
85  adata => qrm_mat%adata
86  fdata => qrm_mat%fdata
87 
88  ! initialize queue
89  call qrm_queue_init(ready_q , adata%nnodes, qrm_lifo_)
90 
91  ! initialize to safe values for the case where openmp is not used
92  ! the status and locks arrays are needed because multiple solves may
93  ! be run in parallel on the same fdata
94  call qrm_aalloc(status, qrm_mat%adata%nnodes)
95  __qrm_check_ret(name,'qrm_aalloc',9999)
96  !$ allocate(locks(adata%nnodes))
97 
98  do f = 1, adata%nnodes
99  status(f) = qrm_ready_
100  !$ call omp_init_lock(locks(f))
101  p = adata%parent(f)
102  if(p .eq. 0 .and. (adata%rc(f).ge.0)) call qrm_queue_push(ready_q, f)
103  end do
104 
105  !$ call omp_init_lock(dlock)
106 
107  if(adata%ncsing .gt. 0) then
108  dones = 1
109  else
110  dones = 0
111  end if
112 
113  !$ call omp_set_num_threads(1)
114 
115  !$ nth = qrm_mat%icntl(qrm_nthreads_)
116  !$omp parallel &
117  !$omp & num_threads(nth) &
118  !$omp & private(got_task, task, nth, thn, info) &
119  !$omp & shared(ready_q, status, locks, dlock, dones, tq_h)
120 
121 #if defined (_OPENMP)
122  nth = omp_get_num_threads()
123  thn = omp_get_thread_num()
124 #else
125  nth = 1
126  thn = 0
127 #endif
128 
129  info = 0
130 
131  call qrm_par_mem_init()
132  call qrm_init_task_queue(tq_h)
133 
134  taskloop: do
135  if(qrm_err_stack%nelem .gt. 0) goto 9998
136 
137  ! fill up the queue if there are too few tasks
138  if(qrm_task_queue_card(tq_h) .lt. nth) then
139  call fill_queue_r( )
140  end if
141 
142  got_task = qrm_get_task(tq_h, task)
143 
144  if(.not. got_task) cycle taskloop ! queue is empty
145 
146  select case(task%id)
147  case(qrm_task_exit_)
148  !$omp barrier
149  ! done, exit
150  exit
151  case(qrm_task_sol_)
152  ! apply a set of Householder vectors
153  call solve_r(task, thn)
154  end select
155  end do taskloop
156 
157 9998 continue
158  call qrm_clean_task_queue(tq_h)
159  call qrm_par_mem_finalize()
160  !$omp end parallel
161 
162 
163  call qrm_adealloc(status)
164  !$ deallocate(locks)
165  call qrm_queue_free(ready_q)
166 
167  if(qrm_err_stack%nelem .gt. 0) then
168  call qrm_err_push(22, name)
169  goto 9999
170  end if
171 
172  ! check if there is a singleton block and eventually solve for it
173  if (qrm_mat%adata%ncsing .gt. 0) then
174  call _qrm_solve_sing_front(qrm_mat, b, x, 'n')
175  __qrm_check_ret(name,'qrm_solve_sing_front',9999)
176  end if
177 
178  call qrm_err_act_restore(err_act)
179  return
180 
181 9999 continue ! error management
182  call qrm_err_act_restore(err_act)
183  if(err_act .eq. qrm_abort_) then
184  call qrm_err_check()
185  end if
186  return
187 
188 contains
189 
190 
191 
192 !==========================================================================================
193 !==========================================================================================
194  subroutine fill_queue_r( )
195  implicit none
196 
197  type(_qrm_front_type), pointer :: front
198  integer :: f
199  integer :: thn
200  logical :: found
201  type(qrm_task_type) :: tsk
202 
203 #if defined(_OPENMP)
204  thn=omp_get_thread_num()
205 #else
206  thn=0
207 #endif
208 
209  found = .false.
210 
211  f = 0
212  do
213 
214  f = qrm_queue_next(ready_q, f)
215  if(f .eq. 0) exit
216 
217  front => fdata%front_list(f)
218 
219 #if defined(_OPENMP)
220  if(.not. omp_test_lock(locks(f))) cycle
221 #endif
222  if(status(f) .eq. qrm_ready_) then
223  call qrm_queue_rm(ready_q, f)
224  tsk = qrm_task_type(qrm_task_sol_, front%num, 0, 0, .false.)
225  if(qrm_sched_task(tq_h, tsk, 'h')) then
226  ! mark the column as assigned
227  found = .true.
228  status(f) = qrm_busy_
229  end if
230  end if
231 #if defined(_OPENMP)
232  call omp_unset_lock(locks(f))
233 #endif
234  end do
235 
236  ! if nothing was found above, then check if the facto is over
237  ! otherwise return
238  if(found) return
239 
240  call check_solver_over( )
241 
242  return
243  end subroutine fill_queue_r
244 
245 
246 
247 
248 !==========================================================================================
249 !==========================================================================================
250  subroutine check_solver_over( )
251  ! checks whether the apply_qt is over and eventually schedules a
252  ! termination task
253  implicit none
254 
255  type(qrm_task_type) :: tsk
256  logical :: found
257 
258  ! all the fronts are done
259  !$ call omp_set_lock( dlock )
260  if(dones .eq. fdata%nfronts) then
261  tsk = qrm_task_type(qrm_task_exit_, 0, 0, 0, .false.)
262  found = qrm_sched_task(tq_h, tsk, 't')
263  end if
264  !$ call omp_unset_lock( dlock )
265 
266  return
267  end subroutine check_solver_over
268 
269 
270 
271 
272 
273 
274 !==========================================================================================
275 !==========================================================================================
276  subroutine solve_r(task, thn)
277  implicit none
278  type(qrm_task_type) :: task
279  integer :: thn
280 
281  type(_qrm_front_type), pointer :: front
282  integer :: f, p, c, info
283 
284  front => null()
285 
286  ! to make things easier
287  front => qrm_mat%fdata%front_list(task%front)
288  info = 0
289 
290  call front_r(front, info)
291  status(task%front) = qrm_done_
292  call qrm_queue_rm(ready_q, front%num)
293 
294 #if defined(_OPENMP)
295  call omp_set_lock( dlock )
296 #endif
297  dones = dones+1
298 #if defined(_OPENMP)
299  call omp_unset_lock( dlock )
300 #endif
301 
302  ! sweep over the children. Small children are treated immediately,
303  ! the others are pushed on the ready_q
304  do p = adata%childptr(front%num), adata%childptr(front%num+1)-1
305  c = adata%child(p)
306  if(adata%small(c) .eq. 1) then
307  call do_subtree_r(c, info)
308  if(info .ne. 0) goto 9997
309  else
310  call qrm_queue_push(ready_q, c)
311  end if
312  end do
313 
314 
315 9997 continue
316  return
317  end subroutine solve_r
318 
319 
320 
321 
322 
323 
324 !==========================================================================================
325 !==========================================================================================
326  subroutine do_subtree_r(fnum, info)
327  implicit none
328 
329  integer :: fnum, info
330 
331  type(_qrm_front_type), pointer :: front
332  integer :: node, c, acc, thn, p
333  type(qrm_queue) :: sub_q
334 
335  info = 0
336  acc = 0
337 
338 #if defined(_OPENMP)
339  thn = omp_get_thread_num()
340 #else
341  thn = 0
342 #endif
343 
344  call qrm_queue_init(sub_q, adata%nnodes, qrm_fifo_)
345  call qrm_queue_push(sub_q, fnum)
346 
347  do
348  node = qrm_queue_pop(sub_q)
349 
350  if(node .eq. 0) exit
351 
352  front => qrm_mat%fdata%front_list(node)
353 
354  call front_r(front, info)
355  status(node) = qrm_done_
356 
357  !$ call omp_set_lock( dlock )
358  dones = dones+1
359  !$ call omp_unset_lock( dlock )
360 
361  ! sweep over the children. Small children are treated immediately,
362  ! the others are pushed on the ready_q
363  do p = adata%childptr(front%num), adata%childptr(front%num+1)-1
364  c = adata%child(p)
365  call qrm_queue_push(sub_q, c)
366  acc = acc+1
367  end do
368 
369  end do
370 
371  call qrm_queue_free(sub_q)
372 
373  return
374 
375  end subroutine do_subtree_r
376 
377 
378 
379 
380 
381 
382 !==========================================================================================
383 !==========================================================================================
384  subroutine front_r(front, info)
385 
386  use _qrm_utils_mod
387  use _qrm_rfpf_mod
388  use _qrm_spmat_mod
389  use qrm_mem_mod
390  use qrm_common_mod
391  implicit none
392 
393  type(_qrm_front_type) :: front
394  integer :: info
395 
396  integer :: k, m, pk, jp, n, i, thn, cnt, j, pv1, pv2, r
397 
398  ! TODO: optimize this by allocating once in the subtree
399  _qrm_data, allocatable :: in_x(:,:)
400 
401  ! error management
402  character(len=*), parameter :: name='front_r'
403 
404  ! shortcut
405  if (min(front%m, front%n) .le.0) goto 9999
406  if (front%npiv .le. 0) goto 9999
407 
408 #if defined(_OPENMP)
409  thn = omp_get_thread_num()
410 #else
411  thn = 0
412 #endif
413  ! !$ call omp_set_lock( locks(front%num) )
414 
415  ! write(*,'(i3," =--> Solve R : ",i4)')thn, front%num
416  call qrm_aalloc(in_x, front%n, size(x,2), info)
417  __qrm_check_ret(name,'qrm_aalloc',9999)
418 
419  in_x(1:front%npiv,:) = b(front%rows(1:front%npiv),:)
420  in_x(front%npiv+1:front%n,:) = x(front%cols(front%npiv+1:front%n),:)
421 
422 
423 #if defined (RFPF)
424  ! pv1 = 1
425 
426  ! ! set r to be the starting row of the R factor
427  ! r = ((front%npiv-1)/front%nb)*front%nb+1
428  ! do
429  ! if(r .lt. 1) exit
430  ! ! k is the size of the panel
431  ! k = min(front%nb,front%npiv-r+1)
432  ! ! n is the col size of the rectangular part of the panel
433  ! n = front%n - r-k+1
434  ! ! pv1 is the position of the first element of the panel in the
435  ! ! front%r array (where the rfpf part starts)
436  ! pv1 = (front%n-r+1)*(r-1)+r*(r-1)/2+1
437  ! ! pv2 is the position in the front%r array where the
438  ! ! rectangular part of this panel starts
439  ! if(n .le. 0) then
440  ! pv2 = 1
441  ! else
442  ! pv2 = pv1+(k*(k+1))/2
443  ! end if
444  ! if(n .gt. 0) call _xgemm('n', 'n', k, size(x,2), n, -_qrm_one, front%r(pv2), &
445  ! &k, in_x(r+k,1), front%n, _qrm_one, in_x(r,1), front%n)
446  ! call _xrpsm('l', 'u', 'n', 'n', k, size(x,2), _qrm_one, front%r(pv1), in_x(r,1), front%n)
447  ! ! go backwards one panel
448  ! r = r - front%nb
449  ! end do
450 #else
451  n = size(x,2)
452  cnt=size(front%r)+1
453  outer: do jp = front%npiv - mod(front%npiv, front%nb)+1, 1, -front%nb
454  pk = min(front%nb, front%npiv-jp+1)
455  if(pk .le. 0) cycle
456  ! write(*,*)'solve',jp,pk
457 
458  inner: do j = jp+pk-mod(pk,front%ib), jp, -front%ib
459  m = min(front%ib, jp+pk - j)
460  if(m .le. 0) cycle
461  k = front%n-j-m+1
462  cnt = cnt-m*(front%n-j+1)
463  ! write(*,*)size(front%r), cnt, j, k+m, m
464  if(k.gt.0) call _xgemm('n', 'n', m, n, k, -_qrm_one, front%r(cnt+m*m), m, &
465  & in_x(j+m,1), front%n, _qrm_one, in_x(j,1), front%n)
466  call _xtrsm('l', 'u', 'n', 'n', m, n, _qrm_one, front%r(cnt), &
467  & m, in_x(j,1), front%n)
468  end do inner
469  end do outer
470 #endif
471 
472  x(front%cols(1:front%npiv),:) = in_x(1:front%npiv,:)
473 
474  call qrm_adealloc(in_x)
475  __qrm_check_ret(name,'qrm_adealloc',9999)
476  ! !$ call omp_unset_lock( locks(front%num) )
477 
478 9999 continue !error
479  return
480 
481  end subroutine front_r
482 
483 end subroutine _qrm_solve_r
This module contains all the error management routines and data.
subroutine qrm_init_task_queue(h)
Inititalizes a set of queues attached to a family of threads referenced through the handle h...
integer function qrm_queue_next(q, n)
Returns the element that follows n in the queue q. Very useful for sweeping through a queue...
Generic interface for the qrm_adealloc_i, qrm_adealloc_2i, qrm_adealloc_s, qrm_adealloc_2s, qrm_adealloc_3s, qrm_adealloc_d, qrm_adealloc_2d, qrm_adealloc_3d, qrm_adealloc_c, qrm_adealloc_2c, qrm_adealloc_3c, qrm_adealloc_z, qrm_adealloc_2z, qrm_adealloc_3z, routines.
subroutine qrm_clean_task_queue(h)
Destroyes a set of queues.
subroutine check_solver_over()
This type defines the handle for the queues attached to a family of threads.
subroutine solve_r(task, thn)
This module contains an implementation of some operations on triangular/trapezoidal matrices stored i...
A data type meant to to define a queue.
subroutine _qrm_solve_r(qrm_mat, b, x)
This function solves for R against multiple vectors.
Definition: qrm_solve_r.F90:45
logical function qrm_get_task(h, tsk)
Pops a task from a queue. Tasks are always popped from the head of the queue. The return value is ...
subroutine qrm_queue_rm(q, n)
Removes (without returning it) an element from a queue.
This module contains the interfaces of all non-typed routines.
subroutine qrm_err_act_restore(err_act)
Restores the value of the qrm_err_act variable.
subroutine qrm_queue_push(q, elem)
Pushes an element on a queue.
This module contains all the facilities for front queues.
subroutine _qrm_solve_sing_front(qrm_mat, b, x, trans)
This function handles the front containing the singletons during the solve for R or R'...
This module contains generic interfaces for a number of auxiliary tools.
subroutine qrm_err_check()
This subroutine checks the errors stack. If something is found all the entries in the stack are poppe...
Generic interface for the qrm_aalloc_i, qrm_aalloc_2i, qrm_aalloc_s, qrm_aalloc_2s, qrm_aalloc_3s, qrm_aalloc_d, qrm_aalloc_2d, qrm_aalloc_3d, qrm_aalloc_c, qrm_aalloc_2c, qrm_aalloc_3c, qrm_aalloc_z, qrm_aalloc_2z, qrm_aalloc_3z, routines.
Definition: qrm_mem_mod.F90:78
subroutine front_r(front, info)
subroutine fill_queue_r()
This type defines a computational task.
subroutine qrm_par_mem_init()
This routine has to be called at the beginning of a parallel section. Afterwards, each thread will up...
This module contains all the interfaces for the typed routines in the solve phase.
This type defines the data structure used to store a matrix.
integer function qrm_queue_pop(q)
Pops an element from a queue.
integer function qrm_task_queue_card(h)
Returns the number of tasks present on a set of queues referenced by a handle.
This module contains the definition of the basic sparse matrix type and of the associated methods...
subroutine qrm_par_mem_finalize()
This module implements the memory handling routines. Pretty mucch allocations and deallocations...
Definition: qrm_mem_mod.F90:38
subroutine do_subtree_r(fnum, info)
int i
Definition: secs.c:40
logical function qrm_sched_task(h, tsk, pol, q)
Pushes a task on a queue.
This module contains the definition of a task type that is used for scheduling tasks during the facto...
subroutine qrm_err_push(code, sub, ied, aed)
This subroutine pushes an error on top of the stack.
subroutine qrm_queue_free(q)
Frees a queue.
subroutine qrm_err_act_save(err_act)
Saves a copy of the qrm_err_act variable.