Skip to content

Commit 2b05948

Browse files
ct-clmsnctaylordevinamatthews
authored
blis support for hpx (#682)
Implement threading backend via HPX. HPX is an asynchronous many task runtime system used in high performance computing applications. The runtime implements the ISO C++ parallelism specification and provides a user-space thread implementation. This PR provides BLIS a thread backend implementation using HPX and resolves feature request #681. The configuration script, makefiles, and testsuite have been updated to support an HPX build option. The addition of HPX support provides other developers an exemplar for integrating other C++ threading backends into BLIS. Co-authored-by: ctaylor <[email protected]> Co-authored-by: Devin Matthews <[email protected]>
1 parent e1ea25d commit 2b05948

35 files changed

+2648
-2083
lines changed

Makefile

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,16 @@ else
552552
@echo "Compiling $$@" $(call get-frame-text-for,$(1))
553553
@$(CC) $(call get-frame-cflags-for,$(1)) -c $$< -o $$@
554554
endif
555+
556+
ifneq ($(findstring hpx,$(THREADING_MODEL)),)
557+
$(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.cpp $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS)
558+
ifeq ($(ENABLE_VERBOSE),yes)
559+
$(CXX) $(call get-frame-cxxflags-for,$(1)) -c $$< -o $$@
560+
else
561+
@echo "Compiling $$@" $(call get-frame-cxxtext-for,$(1))
562+
@$(CXX) $(call get-frame-cxxflags-for,$(1)) -c $$< -o $$@
563+
endif
564+
endif
555565
endef
556566

557567
# first argument: a kernel set (name) being targeted (e.g. haswell).

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ writing complex kernels.
286286

287287
* **Advanced multithreading support.** BLIS allows multiple levels of
288288
symmetric multithreading for nearly all level-3 operations. (Currently, users
289-
may choose to obtain parallelism via either OpenMP or POSIX threads). This
289+
may choose to obtain parallelism via OpenMP, POSIX threads, or HPX). This
290290
means that matrices may be partitioned in multiple dimensions simultaneously to
291291
attain scalable, high-performance parallelism on multicore and many-core
292292
architectures. The key to this innovation is a thread-specific control tree

blastest/src/cblat1.c

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ static real c_b52 = 0.f;
6868
/* ===================================================================== */
6969
/* Main program */ int main(void)
7070
{
71+
#ifdef BLIS_ENABLE_HPX
72+
char* program = "cblat1";
73+
bli_thread_initialize_hpx( 1, &program );
74+
#endif
75+
7176
/* Initialized data */
7277

7378
static real sfac = 9.765625e-4f;
@@ -136,7 +141,12 @@ static real c_b52 = 0.f;
136141
}
137142
s_stop("", (ftnlen)0);
138143

139-
return 0;
144+
#ifdef BLIS_ENABLE_HPX
145+
return bli_thread_finalize_hpx();
146+
#else
147+
// Return peacefully.
148+
return 0;
149+
#endif
140150
} /* main */
141151

142152
/* Subroutine */ int header_(void)
@@ -230,23 +240,23 @@ static real c_b52 = 0.f;
230240
complex q__1;
231241

232242
/* Builtin functions */
233-
integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen),
243+
integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen),
234244
e_wsle(void);
235245
/* Subroutine */ int s_stop(char *, ftnlen);
236246

237247
/* Local variables */
238248
integer i__;
239249
complex cx[8];
240250
integer np1, len;
241-
extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
242-
integer *), ctest_(integer *, complex *, complex *, complex *,
251+
extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
252+
integer *), ctest_(integer *, complex *, complex *, complex *,
243253
real *);
244254
complex mwpcs[5], mwpct[5];
245255
extern real scnrm2_(integer *, complex *, integer *);
246256
extern /* Subroutine */ int itest1_(integer *, integer *), stest1_(real *,
247257
real *, real *, real *);
248258
extern integer icamax_(integer *, complex *, integer *);
249-
extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
259+
extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
250260
*);
251261
extern real scasum_(integer *, complex *, integer *);
252262

@@ -465,7 +475,7 @@ static real c_b52 = 0.f;
465475
complex q__1;
466476

467477
/* Builtin functions */
468-
integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen),
478+
integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen),
469479
e_wsle(void);
470480
/* Subroutine */ int s_stop(char *, ftnlen);
471481

@@ -481,23 +491,23 @@ static real c_b52 = 0.f;
481491
#else
482492
complex cdotc_(
483493
#endif
484-
integer *, complex *, integer
494+
integer *, complex *, integer
485495
*, complex *, integer *);
486-
extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
496+
extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
487497
complex *, integer *);
488498
extern /* Complex */
489499
#ifdef BLIS_ENABLE_COMPLEX_RETURN_INTEL
490500
void cdotu_(complex *,
491501
#else
492502
complex cdotu_(
493503
#endif
494-
integer *, complex *, integer
504+
integer *, complex *, integer
495505
*, complex *, integer *);
496-
extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
497-
complex *, integer *), ctest_(integer *, complex *, complex *,
506+
extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
507+
complex *, integer *), ctest_(integer *, complex *, complex *,
498508
complex *, real *);
499509
integer ksize;
500-
extern /* Subroutine */ int caxpy_(integer *, complex *, complex *,
510+
extern /* Subroutine */ int caxpy_(integer *, complex *, complex *,
501511
integer *, complex *, integer *);
502512

503513
/* Fortran I/O blocks */
@@ -691,7 +701,7 @@ complex cdotu_(
691701
sfac)
692702
{
693703
real scomp[1], strue[1];
694-
extern /* Subroutine */ int stest_(integer *, real *, real *, real *,
704+
extern /* Subroutine */ int stest_(integer *, real *, real *, real *,
695705
real *);
696706

697707
/* ************************* STEST1 ***************************** */
@@ -733,7 +743,7 @@ real sdiff_(real *sa, real *sb)
733743
return ret_val;
734744
} /* sdiff_ */
735745

736-
/* Subroutine */ int ctest_(integer *len, complex *ccomp, complex *ctrue,
746+
/* Subroutine */ int ctest_(integer *len, complex *ccomp, complex *ctrue,
737747
complex *csize, real *sfac)
738748
{
739749
/* System generated locals */
@@ -745,7 +755,7 @@ real sdiff_(real *sa, real *sb)
745755
/* Local variables */
746756
integer i__;
747757
real scomp[20], ssize[20], strue[20];
748-
extern /* Subroutine */ int stest_(integer *, real *, real *, real *,
758+
extern /* Subroutine */ int stest_(integer *, real *, real *, real *,
749759
real *);
750760

751761
/* **************************** CTEST ***************************** */

0 commit comments

Comments
 (0)