Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/mac_mpich.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
runs-on: macos-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Set up dependencies
run: |
# brew install gcc
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/mac_openmpi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
runs-on: macos-latest
timeout-minutes: 90
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Set up dependencies
run: |
# brew install gcc
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ubuntu_mpich.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Set up dependencies
run: |
sudo apt-get update
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ubuntu_openmpi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 90
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Set up dependencies
run: |
sudo apt-get update
Expand Down
12 changes: 6 additions & 6 deletions src/drivers/ncmpio/ncmpio_close.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ ncmpio_close(void *ncdp)

if (max_npairs_put > 0) { /* put npairs > 0 */
put_time = ncp->ina_time_init + ncp->ina_time_flatten;
ntimers = 4;
ntimers = 5;
for (i=0; i<ntimers; i++) {
tt[i] = ncp->ina_time_put[i];
put_time += tt[i];
Expand All @@ -163,12 +163,12 @@ ncmpio_close(void *ncdp)
MPI_Reduce(tt, max_t, ntimers+3, MPI_DOUBLE, MPI_MAX, 0, ncp->comm);
put_time = max_t[ntimers+2];
if (ncp->rank == 0)
printf("%s: INA put timing %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f = %5.2f\n",
__func__, max_t[ntimers],max_t[ntimers+1],max_t[0],max_t[1],max_t[2],max_t[3],put_time);
printf("%s: INA put timing %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f = %5.2f\n",
__func__, max_t[ntimers],max_t[ntimers+1],max_t[0],max_t[1],max_t[2],max_t[3],max_t[4],put_time);
}
if (max_npairs_get > 0) { /* get npairs > 0 */
get_time = ncp->ina_time_init + ncp->ina_time_flatten;
ntimers = 4;
ntimers = 5;
for (i=0; i<ntimers; i++) {
tt[i] = ncp->ina_time_get[i];
get_time += tt[i];
Expand All @@ -179,8 +179,8 @@ ncmpio_close(void *ncdp)

MPI_Reduce(tt, max_t, ntimers+3, MPI_DOUBLE, MPI_MAX, 0, ncp->comm);
if (ncp->rank == 0)
printf("%s: INA get timing %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f = %5.2f\n",
__func__, max_t[ntimers],max_t[ntimers+1],max_t[0],max_t[1],max_t[2],max_t[3],max_t[ntimers+2]);
printf("%s: INA get timing %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f = %5.2f\n",
__func__, max_t[ntimers],max_t[ntimers+1],max_t[0],max_t[1],max_t[2],max_t[3],max_t[4],max_t[ntimers+2]);
}
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/drivers/ncmpio/ncmpio_file_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ printf("%s at %d: buf_view count=%lld off=%lld %lld len=%lld %lld\n",__func__,__
int wkl[21];
#endif
for (i=0; i<buf_view.count; i++) {
in_ptr = (char*)buf + (buf_view.off[i] - buf_view.off[0]);
in_ptr = (char*)buf + buf_view.off[i];
#if 0
memcpy(wkl, in_ptr, buf_view.len[i]);
ncmpii_in_swapn(wkl, buf_view.len[i]/4, 4);
Expand Down
107 changes: 75 additions & 32 deletions src/drivers/ncmpio/ncmpio_intra_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -1571,7 +1571,7 @@ int ina_put(NC *ncp,
{
int i, j, err, mpireturn, status=NC_NOERR;
char *recv_buf=NULL, *wr_buf = NULL;
MPI_Aint npairs=0, *meta=NULL, *count=NULL;
MPI_Aint npairs=0, *meta=NULL, *count=NULL, *bufAddr=NULL;
MPI_Offset wr_amnt=0;
#ifdef HAVE_MPI_LARGE_COUNT
MPI_Count *off_ptr, *len_ptr;
Expand Down Expand Up @@ -1738,20 +1738,24 @@ int ina_put(NC *ncp,

if (do_sort && indv_sorted) {
/* Interleaved offsets are found but individual offsets are already
* sorted. In this case, heap_merge() is called to merge all
* offsets into one single sorted offset list. Note count[] is
* initialized and will be used in heap_merge()
* sorted. This is commonly seen from the checkerboard domain
* partitioning pattern. In this case, heap_merge() must be called
* to merge all individually already-sorted offsets into one single
* sorted offset list. Note count[] is initialized and will be used
* in heap_merge()
*/
count = (MPI_Aint*) NCI_Malloc(sizeof(MPI_Aint) *ncp->num_nonaggrs);
count = (MPI_Aint*) NCI_Malloc(sizeof(MPI_Aint)*ncp->num_nonaggrs);
for (i=0; i<ncp->num_nonaggrs; i++) count[i] = meta[i*3];
}

/* Construct an array of buffer addresses containing a mapping of the
* buffer used to receive write data from non-aggregators and the
* buffer used to write to file. bufAddr[] is calculated based on the
* assumption that the write buffer is contiguous.
* assumption that the write buffer of this aggregator is contiguous,
* i.e. buf_view.is_contig being 1. For non-aggregators, their write
* data will always be received into a contiguous buffer.
*/
MPI_Aint *bufAddr = (MPI_Aint*)NCI_Malloc(sizeof(MPI_Aint) * npairs);
bufAddr = (MPI_Aint*)NCI_Malloc(sizeof(MPI_Aint) * npairs);
bufAddr[0] = 0;
for (i=1; i<npairs; i++)
bufAddr[i] = bufAddr[i-1] + len_ptr[i-1];
Expand Down Expand Up @@ -1876,17 +1880,21 @@ if (fake_overlap == 0) assert(npairs == i+1);

if (recv_buf != buf) {
/* Pack this aggregator's write data into front of recv_buf */
if (buf_view.is_contig && buf_view.type == MPI_BYTE)
memcpy(recv_buf, buf, buf_view.size);
else {
#ifdef HAVE_MPI_LARGE_COUNT
MPI_Count pos=0;
MPI_Count num = (buf_view.is_contig) ? buf_view.size : 1;
MPI_Pack_c(buf, num, buf_view.type, recv_buf, buf_view.size, &pos,
MPI_COMM_SELF);
MPI_Count pos=0;
MPI_Count num = (buf_view.is_contig) ? buf_view.size : 1;
MPI_Pack_c(buf, num, buf_view.type, recv_buf, buf_view.size,
&pos, MPI_COMM_SELF);
#else
int pos=0;
MPI_Count num = (buf_view.is_contig) ? buf_view.size : 1;
MPI_Pack(buf, num, buf_view.type, recv_buf, buf_view.size, &pos,
MPI_COMM_SELF);
int pos=0;
MPI_Count num = (buf_view.is_contig) ? buf_view.size : 1;
MPI_Pack(buf, num, buf_view.type, recv_buf, buf_view.size,
&pos, MPI_COMM_SELF);
#endif
}
}

#if defined(PNETCDF_PROFILING) && (PNETCDF_PROFILING == 1)
Expand Down Expand Up @@ -1932,6 +1940,12 @@ if (fake_overlap == 0) assert(npairs == i+1);
}
NCI_Free(req);

#if defined(PNETCDF_PROFILING) && (PNETCDF_PROFILING == 1)
endT = MPI_Wtime();
if (ncp->rank == ncp->my_aggr) ncp->ina_time_put[3] += endT - startT;
startT = endT;
#endif

/* Now all write data has been collected into recv_buf. In case of any
* overlap, we must coalesce recv_buf into wr_buf using off_ptr[],
* len_ptr[], and bufAddr[]. For overlapped regions, requests with
Expand All @@ -1942,13 +1956,47 @@ if (fake_overlap == 0) assert(npairs == i+1);
* wr_buf, a contiguous buffer, wr_buf, which will later be used in a
* call to MPI-IO/PNCIO file write.
*/
if (!do_sort && wr_amnt == recv_amnt)
if (!do_sort && wr_amnt == recv_amnt) {
wr_buf = recv_buf;

if (wr_buf != buf) {
/* If write data has been packed in wr_buf, a contiguous buffer,
* update buf_view before passing it to the MPI-IO/PNCIO file
* write.
*/
buf_view.size = wr_amnt;
buf_view.type = MPI_BYTE;
buf_view.is_contig = 1;
}
/* else case is when user's buffer, buf, can be used to write */
}
#if 0
/* Note copying write data into a contiguous buffer in most cases will
* run faster in MPI-IO and PNCIO.
*/
else if (buf_view.is_contig && !overlap) {
/* Note we can reuse bufAddr[] and len_ptr[] as buf_view.off and
* buf_view.len only when buf_view.is_contig is true, because
* bufAddr[] is constructed based on the assumption that the write
* buffer is contiguous.
*/
wr_buf = recv_buf;
buf_view.size = wr_amnt;
buf_view.type = MPI_BYTE;
buf_view.is_contig = (npairs <= 1);
buf_view.off = (MPI_Offset*)bufAddr; /* based on recv_buf */
buf_view.len = len_ptr;
buf_view.count = npairs;
}
#endif
else {
/* do_sort means buffer's offsets and lengths have been moved
* around in order to make file offset-length pairs monotonically
* non-decreasing. We need to copy write data into a temporary
* buffer, wr_buf, and write it to the file.
* non-decreasing. We need to re-arrange the write buffer
* accordingly by copying write data into a temporary buffer,
* wr_buf, and write it to the file. Copying write data into a
* contiguous buffer in most cases will run faster in MPI-IO and
* PNCIO.
*/
wr_buf = NCI_Malloc(wr_amnt);
ptr = wr_buf;
Expand All @@ -1957,18 +2005,23 @@ if (fake_overlap == 0) assert(npairs == i+1);
memcpy(ptr, recv_buf + bufAddr[j], len_ptr[j]);
ptr += len_ptr[j];
}
/* Write data has been packed in wr_buf, a contiguous buffer,
* update buf_view before passing it to the MPI-IO/PNCIO file
* write.
*/
buf_view.size = wr_amnt;
buf_view.type = MPI_BYTE;
buf_view.is_contig = 1;

if (recv_buf != buf) NCI_Free(recv_buf);
}

NCI_Free(bufAddr);
} /* if (npairs > 0) */

NCI_Free(meta);

#if defined(PNETCDF_PROFILING) && (PNETCDF_PROFILING == 1)
endT = MPI_Wtime();
if (ncp->rank == ncp->my_aggr) ncp->ina_time_put[3] += endT - startT;
if (ncp->rank == ncp->my_aggr) ncp->ina_time_put[4] += endT - startT;
#endif

/* set the fileview */
Expand All @@ -1983,22 +2036,12 @@ if (fake_overlap == 0) assert(npairs == i+1);
ncp->maxmem_put[4] = MAX(ncp->maxmem_put[4], mem_max);
#endif

if (wr_buf != buf) {
/* If write data has been packed in wr_buf, a contiguous buffer,
* buf_view must be updated before passing it to the MPI-IO/PNCIO file
* write.
*/
buf_view.size = wr_amnt;
buf_view.type = MPI_BYTE;
buf_view.is_contig = 1;
}
/* else case is when the user's buffer, buf, can be used to write */

/* carry out write request to file */
err = ncmpio_read_write(ncp, NC_REQ_WR, 0, buf_view, wr_buf);
if (status == NC_NOERR) status = err;

if (wr_buf != buf) NCI_Free(wr_buf);
if (bufAddr != NULL) NCI_Free(bufAddr);

/* Must free offsets and lengths now, as they may be realloc-ed in
* ina_collect_md()
Expand Down
25 changes: 25 additions & 0 deletions src/utils/ncmpidiff/ncmpidiff.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@
#define uint64 unsigned long long
#endif

static int first_diff;
static char cmd_opts[1024];

#define PRINT_CMD_OPTS \
if (first_diff) { \
printf("%s\n", cmd_opts); \
first_diff = 0; \
}

#define OOM_ERROR { \
fprintf(stderr, "Error: calloc() out of memory at line %d\n",__LINE__); \
Expand Down Expand Up @@ -89,6 +97,7 @@
strcat(msg, str); \
sprintf(str, "value \"%s\" vs \"%s\"\n", b1, b2); \
strcat(msg, str); \
PRINT_CMD_OPTS \
printf("%s", msg); \
numHeadDIFF++; \
} \
Expand Down Expand Up @@ -119,6 +128,7 @@
sprintf(str, "value %g vs %g (difference = %e)\n", \
(double)b1[pos],(double)b2[pos],(double)(b1[pos]-b2[pos])); \
strcat(msg, str); \
PRINT_CMD_OPTS \
printf("%s", msg); \
numHeadDIFF++; \
} \
Expand Down Expand Up @@ -148,6 +158,7 @@
strcat(msg, str); \
sprintf(str, "value \"%s\" vs \"%s\"\n", b1, b2); \
strcat(msg, str); \
PRINT_CMD_OPTS \
printf("%s", msg); \
numHeadDIFF++; \
} \
Expand Down Expand Up @@ -178,6 +189,7 @@
sprintf(str, "value %g vs %g (difference = %e)\n", \
(double)b1[pos],(double)b2[pos],(double)(b1[pos]-b2[pos])); \
strcat(msg, str); \
PRINT_CMD_OPTS \
printf("%s", msg); \
numHeadDIFF++; \
} \
Expand Down Expand Up @@ -225,6 +237,7 @@
if (pos != varsize || worst != -1) { /* diff is found */ \
double v1, v2; \
if (ndims[0] == 0) { /* scalar variable */ \
PRINT_CMD_OPTS \
if (worst == -1) \
printf("DIFF: scalar variable \"%s\" of type \"%s\"\n", \
name[0], get_type(xtype[0])); \
Expand All @@ -245,6 +258,7 @@
diffStart[_i] = pos % shape[_i] + start[_i]; \
pos /= shape[_i]; \
} \
PRINT_CMD_OPTS \
if (worst == -1) \
printf("DIFF: variable \"%s\" of type \"%s\" at element ["OFFFMT, \
name[0], get_type(xtype[0]), diffStart[0]); \
Expand Down Expand Up @@ -378,6 +392,16 @@ int main(int argc, char **argv)
MPI_Comm_size(comm, &nprocs);
MPI_Comm_rank(comm, &rank);

if (nprocs == 1)
sprintf(cmd_opts, "ncmpidiff", rank);
else
sprintf(cmd_opts, "Rank %d: ncmpidiff", rank);

for (i=1; i<argc; i++) {
strcat(cmd_opts, " ");
strcat(cmd_opts, argv[i]);
}

verbose = 0;
quiet = 0;
check_header = 0;
Expand All @@ -386,6 +410,7 @@ int main(int argc, char **argv)
var_list.names = NULL;
var_list.nvars = 0;
check_tolerance = 0;
first_diff = 1;

while ((c = getopt(argc, argv, "bhqt:v:")) != -1) {
char *str, *ptr;
Expand Down
Loading
Loading