1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin.           */
3 /* All rights reserved.                                              */
4 /*                                                                   */
5 /* Redistribution and use in source and binary forms, with or        */
6 /* without modification, are permitted provided that the following   */
7 /* conditions are met:                                               */
8 /*                                                                   */
9 /*   1. Redistributions of source code must retain the above         */
10 /*      copyright notice, this list of conditions and the following  */
11 /*      disclaimer.                                                  */
12 /*                                                                   */
13 /*   2. Redistributions in binary form must reproduce the above      */
14 /*      copyright notice, this list of conditions and the following  */
15 /*      disclaimer in the documentation and/or other materials       */
16 /*      provided with the distribution.                              */
17 /*                                                                   */
18 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
32 /*                                                                   */
33 /* The views and conclusions contained in the software and           */
34 /* documentation are those of the authors and should not be          */
35 /* interpreted as representing official policies, either expressed   */
36 /* or implied, of The University of Texas at Austin.                 */
37 /*********************************************************************/
38 
39 #ifndef COMMON_THREAD
40 #define COMMON_THREAD
41 
42 /* Basic Thread Debugging */
43 #undef SMP_DEBUG
44 
45 /* Thread Timing Debugging */
46 #undef TIMING_DEBUG
47 
48 /* Global Parameter */
49 extern int blas_cpu_number;
50 extern int blas_num_threads;
51 extern int blas_omp_linked;
52 
53 #define BLAS_LEGACY	0x8000U
54 #define BLAS_PTHREAD	0x4000U
55 #define BLAS_NODE	0x2000U
56 
57 #define BLAS_PREC	0x0003U
58 #define BLAS_SINGLE	0x0000U
59 #define BLAS_DOUBLE	0x0001U
60 #define BLAS_XDOUBLE	0x0002U
61 #define BLAS_REAL	0x0000U
62 #define BLAS_COMPLEX	0x0004U
63 
64 #define BLAS_TRANSA	0x0030U	/* 2bit */
65 #define BLAS_TRANSA_N	0x0000U
66 #define BLAS_TRANSA_T	0x0010U
67 #define BLAS_TRANSA_R	0x0020U
68 #define BLAS_TRANSA_C	0x0030U
69 #define BLAS_TRANSA_SHIFT     4
70 
71 #define BLAS_TRANSB	0x0300U	/* 2bit */
72 #define BLAS_TRANSB_N	0x0000U
73 #define BLAS_TRANSB_T	0x0100U
74 #define BLAS_TRANSB_R	0x0200U
75 #define BLAS_TRANSB_C	0x0300U
76 #define BLAS_TRANSB_SHIFT     8
77 
78 #define BLAS_RSIDE      0x0400U
79 #define BLAS_RSIDE_SHIFT     10
80 #define BLAS_UPLO       0x0800U
81 #define BLAS_UPLO_SHIFT      11
82 
83 #define BLAS_STATUS_NOTYET	0
84 #define BLAS_STATUS_QUEUED	1
85 #define BLAS_STATUS_RUNNING	2
86 #define BLAS_STATUS_FINISHED	4
87 
88 typedef struct blas_queue {
89 
90   void *routine;
91   BLASLONG position;
92   BLASLONG assigned;
93 
94   blas_arg_t *args;
95   void *range_m;
96   void *range_n;
97   void *sa, *sb;
98 
99   struct blas_queue *next;
100 
101 #if defined( __WIN32__) || defined(__CYGWIN32__)
102   CRITICAL_SECTION lock;
103   HANDLE finish;
104 #else
105   pthread_mutex_t	 lock;
106   pthread_cond_t	 finished;
107 #endif
108 
109   int mode, status;
110 
111 #ifdef CONSISTENT_FPCSR
112   unsigned int sse_mode, x87_mode;
113 #endif
114 
115 #ifdef SMP_DEBUG
116   int    num;
117 #endif
118 #ifdef TIMING_DEBUG
119   unsigned int clocks;
120 #endif
121 } blas_queue_t;
122 
123 #ifdef SMP_SERVER
124 
125 extern int blas_server_avail;
126 
num_cpu_avail(int level)127 static __inline int num_cpu_avail(int level) {
128 
129   if ((blas_cpu_number == 1)
130 
131 #ifdef USE_OPENMP
132       || omp_in_parallel()
133 #endif
134       ) return 1;
135 
136   return blas_cpu_number;
137 
138 }
139 
blas_queue_init(blas_queue_t * queue)140 static __inline void blas_queue_init(blas_queue_t *queue){
141 
142   queue -> sa    = NULL;
143   queue -> sb    = NULL;
144   queue-> next  = NULL;
145 }
146 
147 int blas_thread_init(void);
148 int BLASFUNC(blas_thread_shutdown)(void);
149 int exec_blas(BLASLONG, blas_queue_t *);
150 int exec_blas_async(BLASLONG, blas_queue_t *);
151 int exec_blas_async_wait(BLASLONG, blas_queue_t *);
152 
153 #else
154 int exec_blas_async(BLASLONG num_cpu, blas_param_t *param, pthread_t *);
155 int exec_blas_async_wait(BLASLONG num_cpu, pthread_t *blas_threads);
156 int exec_blas(BLASLONG num_cpu, blas_param_t *param, void *buffer);
157 #endif
158 
159 #ifndef ASSEMBLER
160 
161 int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
162 		       void *a, BLASLONG lda,
163 		       void *b, BLASLONG ldb,
164 		       void *c, BLASLONG ldc, int (*function)(), int threads);
165 
166 int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
167 
168 int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
169 
170 int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
171 
172 int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG, BLASLONG);
173 
174 int trsm_thread(int mode, BLASLONG m, BLASLONG n,
175 		double alpha_r, double alpha_i,
176 		void *a, BLASLONG lda,
177 		void *c, BLASLONG ldc, int (*function)(), void *buffer);
178 
179 int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
180 
181 int beta_thread(int mode, BLASLONG m, BLASLONG n,
182 		double alpha_r, double alpha_i,
183 		void *c, BLASLONG ldc, int (*fuction)());
184 
185 int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k,
186 		 void *offsetA, BLASLONG lda,
187 		 void *offsetB, BLASLONG jb,
188 		 void *ipiv, BLASLONG offset, int (*function)(), void *buffer);
189 
190 #endif  /* ENDIF ASSEMBLER */
191 
192 #endif
193