// BLMPI.cc #undef G4BL_MPI /* This source file is part of G4beamline, http://g4beamline.muonsinc.com Copyright (C) 2003,2004,2005,2006 by Tom Roberts, all rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. http://www.gnu.org/copyleft/gpl.html */ // This is the only G4beamline file that uses MPI routines. // It needs G4BL_MPI defined, or it is all a big no-op. #ifdef G4BL_MPI #include #endif #include #include "G4ApplicationState.hh" #include "G4StateManager.hh" #include "G4PrimaryTransformer.hh" #include "BLAssert.hh" #include "BLMPI.hh" #include "BLParam.hh" #include "BLManager.hh" #include "BLRunManager.hh" #include "BLBeam.hh" #include "BLNTuple.hh" #include "BLRealTimeMonitor.hh" // MPI parameter values int MPI_PackTracks=10; int MPI_PackNTuples=100; int MPI_ProbeSleep=1; // class BLMPI static values int BLMPI::rank=1; // makes isRank1() return true if not MPI int BLMPI::nodes=0; int BLMPI::nComplete=0; double BLMPI::startupTime=0.0; double BLMPI::startupWaitTime=0.0; double BLMPI::computeTime=0.0; double BLMPI::computeWaitTime=0.0; #ifdef G4BL_MPI // extends to the end of this file, except for dummy routines /** enum MPITags defines the tags for MPI messages. * Collective routines use no tag. * NTuple handles are also tags >= FIRST_NTUPLE_HANDLE. **/ enum MPITags { CREATE_NTUPLE=1274, NTUPLE_HANDLE, NTUPLE_ROW, GET_GLOBAL_PARAMETERS, GLOBAL_PARAMETERS, GET_BEAM_TRACKS, BEAM_TRACKS, TRACK, NEXT_TIME_STEP, COMPUTATION_COMPLETE }; const int FIRST_NTUPLE_HANDLE=10000; const int MAX_NTUPLE_DOUBLES=4096; // Max # doubles in an NTuple message /** struct Msg is a base class for MPI messages. * There must be no virtual functions in any Msg struct. * * Each derived struct has datatype(), sendTo0() or send(), and recv() * functions. **/ struct Msg { /// Constructs an MPI_Datatype for a struct consisting of chars, /// ints, and floats, in that order. Be sure the # chars is a /// multiple of 4. static MPI_Datatype charIntFloat(int nchar, int nint, int nfloat); /// Sends 1 struct of type datatype to rank 0; blocks until sent. static void sendTo0(void *data, MPI_Datatype datatype, int tag) { MPI_Send(data,1,datatype,0,tag,MPI_COMM_WORLD); } /// Receives a message with specified tag from any source. /// The source can be obtained from status. static void recv(void *data, MPI_Datatype datatype,int tag, MPI_Status &status) { MPI_Recv(data,1,datatype,MPI_ANY_SOURCE,tag, MPI_COMM_WORLD, &status); } /// sends an NTuple row to rank 0 (nData can be an integer multiple /// of the # floats in each row) static void sendNTupleRow(float *data, int nData, int handle) { MPI_Send(data,nData,MPI_FLOAT,0,handle,MPI_COMM_WORLD); } }; struct CreateNTuple : public Msg { char type[32]; char category[128]; char name[128]; char fields[256]; char filename[128]; MPI_Datatype datatype() { return charIntFloat(32+128+128+256+128,0,0); } void sendTo0() { Msg::sendTo0(this,datatype(),CREATE_NTUPLE); } void recv(MPI_Status &status) { Msg::recv(this,datatype(),CREATE_NTUPLE,status); } }; struct NTupleHandle : public Msg { int handle; MPI_Datatype datatype() { return charIntFloat(0,1,0); } void send(int dest) { MPI_Send(this,1,datatype(),dest,NTUPLE_HANDLE,MPI_COMM_WORLD); } void recv(MPI_Status &status) { Msg::recv(this,datatype(),NTUPLE_HANDLE,status); } }; struct GlobalParameters : public Msg { int maxTracks; // max # tracks in a BEAM_TRACKS message int nodeTracks; // max # tracks for each node (collective mode) MPI_Datatype datatype() { return charIntFloat(0,2,0); } void send(int dest) { MPI_Send(this,1,datatype(),dest,GLOBAL_PARAMETERS, MPI_COMM_WORLD); } void recv(MPI_Status &status) { Msg::recv(this,datatype(),GLOBAL_PARAMETERS,status); } }; struct Track : public Msg { int PDGid; int eventID; int trackID; // external value int parentID; // external value float position[3]; // mm float momentum[3]; // MeV/c float time; // ns float weight; MPI_Datatype datatype() { return charIntFloat(0,4,8); } int tag() { return TRACK; } void send(int dest) { MPI_Send(this,1,datatype(),dest,TRACK,MPI_COMM_WORLD); } void recv(MPI_Status &status) { Msg::recv(this,datatype(),TRACK,status); } }; struct GetBeamTracks : public Msg { int ignored; MPI_Datatype datatype() { return charIntFloat(0,1,0); } void sendTo0() { Msg::sendTo0(this,datatype(),GET_BEAM_TRACKS); } void recv(MPI_Status &status) { Msg::recv(this,datatype(),GET_BEAM_TRACKS,status); } }; struct BeamTracks : public Msg { Track *tracks; int nTracks; BeamTracks() { tracks=0; nTracks=0; } BeamTracks(int n) { tracks = new Track[n]; nTracks = n; } ~BeamTracks() { if(tracks) delete[] tracks; tracks=0; nTracks=0; } MPI_Datatype datatype() { return tracks->datatype(); } void send(int dest, int n) { MPI_Send(tracks,n,tracks->datatype(),dest,BEAM_TRACKS, MPI_COMM_WORLD); } void recv(MPI_Status &status) // get # Tracks from status { MPI_Recv(tracks,nTracks,tracks->datatype(),MPI_ANY_SOURCE, BEAM_TRACKS, MPI_COMM_WORLD, &status); } }; struct NextTimeStep : public Msg { float suggestedDeltaT; // ns MPI_Datatype datatype() { return charIntFloat(0,0,1); } void send(int dest) { MPI_Send(this,1,datatype(),dest,NEXT_TIME_STEP,MPI_COMM_WORLD); } void recv(MPI_Status &status) { Msg::recv(this,datatype(),NEXT_TIME_STEP,status); } }; struct ComputationComplete : public Msg { float startupTime; float startupWaitTime; float computeTime; float computeWaitTime; MPI_Datatype datatype() { return charIntFloat(0,0,4); } void sendTo0() { Msg::sendTo0(this,datatype(),COMPUTATION_COMPLETE); } void recv(MPI_Status &status) { Msg::recv(this,datatype(),COMPUTATION_COMPLETE,status); } }; /** class MPINTuple implements a generic MPI NTuple. **/ class MPINTuple : public BLNTuple { int handle; int nfields; G4String fields; float *buffer; int iBuffer; // current position in buffer[] int nBuffer; // # rows in buffer[] public: MPINTuple(int _handle, G4String _name, G4String _fields); ~MPINTuple() { close(); } virtual void appendRow(float data[], int n); virtual bool readRow(float data[], int ndata) { return false; } virtual int getNData() { return nfields; } virtual G4String getFieldNames() { return fields; } virtual void annotate(G4String line) { } void flush() { } void close() { if(iBuffer > 0) Msg::sendNTupleRow(buffer,nfields*iBuffer,handle); iBuffer = 0; } void doSummary() { } }; /** class MPINTupleHandler implements the handler for MPI NTuples. * NOTE: this class forces itself as the handler for all NTuple types. * So it must be explicitly constructed in MPI mode (i.e. no default * instance exists). **/ class MPINTupleHandler : public BLNTupleHandler { public: MPINTupleHandler() { BLNTuple::registerHandler("MPI",this); BLNTuple::setForceHandler(this); } BLNTuple *create(G4String type, G4String category, G4String name, G4String fields, G4String filename); BLNTuple *read(G4String type, G4String category, G4String name, G4String fields,G4String filename) { return 0; } }; // utility routine delarations /// handleNTupleMessage() will receive and handle any message related to NTuples /// in rank 0. Returns true if the message was handled, false otherwise. static bool handleNTupleMessage(MPI_Status &status); /// sendBeamTracks() is used by rank 0 to send a BEAM_TRACKS message to dest. static void sendBeamTracks(int dest); /// processOneTrackAndAllSecondaries() is used in rank nonzero processes to /// track a Track and any secondaries it might create (recursively) static void processOneTrackAndAllSecondaries(Track &track); // monitor the real-time usage. States: // 0 startup // 1 startup-wait // 2 compute // 3 compute-wait BLRealTimeMonitor monitor; long long startTime=0; /** mainRankZero() is the main program for rank 0 process * Note this is called after the input.file is read. **/ void BLMPI::mainRankZero() { MPI_Status status; BLRunManager *runManager = BLRunManager::getObject(); BLManager::getObject()->setState(BEAM); runManager->beginRun(); //monitor.setState(2); // wait for compute mode until a GET_BEAM_TRACKS for(;;) { monitor.incrState(1); // blocking probe for any message from anybody if(MPI_ProbeSleep <= 0) { MPI_Probe(MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&status); } else { for(;;) { int flag; MPI_Iprobe(MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&flag,&status); if(flag != 0) break; BLTime::sleepms(MPI_ProbeSleep); } } monitor.incrState(-1); //printf("Rank 0 received message tag=%d source=%d\n",status.MPI_TAG,status/MPI_SOURCE); if(handleNTupleMessage(status)) continue; switch(status.MPI_TAG) { case GET_BEAM_TRACKS: monitor.setState(2); // compute { struct GetBeamTracks get; get.recv(status); sendBeamTracks(status.MPI_SOURCE); } break; case COMPUTATION_COMPLETE: { struct ComputationComplete complete; complete.recv(status); startupTime += complete.startupTime; startupWaitTime += complete.startupWaitTime; computeTime += complete.computeTime; computeWaitTime += complete.computeWaitTime; if(++nComplete == nodes-1) goto exit; } break; default: BLAssert(false && "Illegal message tag"); } } exit: runManager->endRun(); monitor.setState(-1); double t=((double)BLTime::timeus() - (double)startTime)/1.0E6; printf("MPI COMPUTATION COMPLETE elapsed time=%.3f sec. %d workers\n", t,nodes-1); printf(" Rank0 Startup:%-9.3f Wait:%-9.3f Compute:%-9.3f Wait:%-9.3f\n", monitor.getTime(0),monitor.getTime(1),monitor.getTime(2), monitor.getTime(3)); printf(" 1-N Startup:%-9.3f Wait:%-9.3f Compute:%-9.3f Wait:%-9.3f\n", startupTime,startupWaitTime,computeTime,computeWaitTime); printf(" Agv. Startup:%-9.3f Wait:%-9.3f Compute:%-9.3f Wait:%-9.3f\n", startupTime/(nodes-1),startupWaitTime/(nodes-1), computeTime/(nodes-1),computeWaitTime/(nodes-1)); startupTime += monitor.getTime(0); startupWaitTime += monitor.getTime(1); computeTime += monitor.getTime(2); computeWaitTime += monitor.getTime(3); printf(" Total Startup:%-9.3f Wait:%-9.3f Compute:%-9.3f Wait:%-9.3f\n", startupTime,startupWaitTime,computeTime,computeWaitTime); } /** mainRankNonZero is the main program for ranks other than 0 * Note this is called after the input.file is read. **/ void BLMPI::mainRankNonZero() { BLRunManager *runManager = BLRunManager::getObject(); BLManager::getObject()->setState(BEAM); runManager->beginRun(); monitor.setState(2); // compute GetBeamTracks get; BeamTracks tracks(MPI_PackTracks); get.sendTo0(); for(;;) { MPI_Status status; monitor.incrState(1); tracks.recv(status); int count; MPI_Get_count(&status,tracks.datatype(),&count); if(count == 0) { monitor.incrState(-1); break; } get.sendTo0(); monitor.incrState(-1); for(int i=0; i beginEvent(tracks.tracks[i].eventID); BLBeam::setRandomSeedToTrack(tracks.tracks[i].eventID); processOneTrackAndAllSecondaries(tracks.tracks[i]); runManager->endEvent(); } } runManager->endRun(); monitor.setState(-1); ComputationComplete complete; complete.startupTime = monitor.getTime(0); complete.startupWaitTime = monitor.getTime(1); complete.computeTime = monitor.getTime(2); complete.computeWaitTime = monitor.getTime(3); complete.sendTo0(); } void BLMPI::init(int *argc, char ***argv) { if(!isMPI()) return; startTime = BLTime::timeus(); monitor.setState(0); // startup MPI_Init(argc,argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nodes); BLAssert(nodes > 1); if(rank != 0) { fclose(stdout); stdout = fopen("/dev/null","w"); } // This prevents rank 0 from creating real NTuples, except in response // to messages from other ranks. // It also causes other ranks to create NTuples via MPI messages. new MPINTupleHandler(); // set default values of MPI parameters Param.setParam("MPI_PackTracks",10); Param.setParam("MPI_PackNTuples",100); Param.setParam("MPI_ProbeSleep",1); #ifdef G4BL_FORCEMPI printf("MPI MODE, G4BL_FORCEMPI rank=%d nodes=%d\n",rank,nodes); #else printf("MPI MODE rank=%d nodes=%d\n",rank,nodes); #endif } bool BLMPI::isMPI() { #ifdef G4BL_FORCEMPI return true; #else // simple GUESS how to detect Open MPI -- works on Mac OS X (Leopard) return getenv("OMPI_MCA_universe") != 0; #endif } bool BLMPI::isRank0() { return rank == 0; } bool BLMPI::isRank1() { return rank == 1; } void BLMPI::main() { if(!isMPI()) return; MPI_PackTracks = Param.getInt("MPI_PackTracks"); MPI_ProbeSleep = Param.getInt("MPI_ProbeSleep"); if(rank == 0) mainRankZero(); else mainRankNonZero(); BLNTuple::summary(); BLNTuple::closeAll(); BLManager::getObject()->handleCallbacks(2); MPI_Finalize(); fflush(stdout); _exit(0); } MPI_Datatype Msg::charIntFloat(int nchar, int nint, int nfloat) { static std::map known; BLAssert(nchar < 1024 && nint < 1024 && nfloat < 1024 && (nchar&3) == 0); int key = (nchar<<20) | (nint<<10) | nfloat; MPI_Datatype v = known[key]; if(v != 0) return v; int len[3]; MPI_Aint offset[3]; MPI_Datatype type[3]; int count=0, addr=0; if(nchar > 0) { len[count] = nchar; offset[count] = addr; type[count++] = MPI_CHAR; addr += nchar * sizeof(char); } if(nint > 0) { len[count] = nint; offset[count] = addr; type[count++] = MPI_INT; addr += nint * sizeof(int); } if(nfloat > 0) { len[count] = nfloat; offset[count] = addr; type[count++] = MPI_FLOAT; addr += nfloat * sizeof(float); } MPI_Type_struct(count,len,offset,type,&v); MPI_Type_commit(&v); known[key] = v; return v; } bool handleNTupleMessage(MPI_Status &status) { static std::map known; static std::map ntupleMap; static int nextHandle=FIRST_NTUPLE_HANDLE; if(status.MPI_TAG >= FIRST_NTUPLE_HANDLE) { static float *data=0; static int ndata=0; int count; MPI_Get_count(&status,MPI_FLOAT,&count); if(count > ndata) { if(data) delete[] data; data = new float[count]; ndata = count; } MPI_Recv(data,count,MPI_FLOAT,status.MPI_SOURCE,status.MPI_TAG, MPI_COMM_WORLD,&status); BLNTuple *ntuple = ntupleMap[status.MPI_TAG]; if(ntuple) { int k=ntuple->getNData(); for(int j=0; jappendRow(&data[j],k); } else { printf("Unknown NTuple handle %d nData=%d\n", status.MPI_TAG,count); } return true; } else if(status.MPI_TAG == CREATE_NTUPLE) { CreateNTuple c; c.recv(status); G4String name = c.category; if(name.size() > 0) name += "/"; name += c.name; NTupleHandle h; h.handle = known[name]; if(h.handle == 0) { BLNTupleHandler *prev = BLNTuple::getForceHandler(); BLNTuple::setForceHandler(0); BLNTuple *ntuple = BLNTuple::create(c.type,c.category, c.name,c.fields,c.filename); BLNTuple::setForceHandler(prev); BLAssert(ntuple != 0); h.handle = nextHandle++; known[name] = h.handle; ntupleMap[h.handle] = ntuple; } monitor.incrState(1); h.send(status.MPI_SOURCE); monitor.incrState(-1); return true; } return false; } void sendBeamTracks(int dest) { static int nev=0; static BeamTracks msg(MPI_PackTracks); static BLManager *manager=0; static BLRunManager *runManager = 0; if(manager == 0) { manager = BLManager::getObject(); runManager = BLRunManager::getObject(); } int n; for(n=0; ngetNextBeamEventAndTrack(&event,&track)) break; manager->incrEventsProcessed(event->GetEventID()); msg.tracks[n].PDGid = track->GetDefinition()->GetPDGEncoding(); msg.tracks[n].eventID = event->GetEventID(); msg.tracks[n].trackID = manager->getPrimaryTrackID(); msg.tracks[n].parentID = manager->getPrimaryParentID(); G4ThreeVector pos = track->GetPosition(); msg.tracks[n].position[0] = pos[0]; msg.tracks[n].position[1] = pos[1]; msg.tracks[n].position[2] = pos[2]; G4ThreeVector mom = track->GetMomentum(); msg.tracks[n].momentum[0] = mom[0]; msg.tracks[n].momentum[1] = mom[1]; msg.tracks[n].momentum[2] = mom[2]; msg.tracks[n].time = track->GetGlobalTime(); msg.tracks[n].weight = track->GetWeight(); delete event; delete track; } monitor.incrState(1); msg.send(dest,n); // n=0 is OK, indicates computation is complete monitor.incrState(-1); } void processOneTrackAndAllSecondaries(Track &track) { static BLManager *manager=0; static BLRunManager *runManager = 0; if(manager == 0) { manager = BLManager::getObject(); runManager = BLRunManager::getObject(); } G4ThreeVector pos(track.position[0],track.position[1], track.position[2]); G4ThreeVector mom(track.momentum[0],track.momentum[1], track.momentum[2]); G4ParticleDefinition *particle = G4ParticleTable::GetParticleTable()->FindParticle(track.PDGid); G4DynamicParticle *dyn = new G4DynamicParticle(particle,mom); G4Track *g4track = new G4Track(dyn,track.time,pos); g4track->SetTrackID(track.trackID); g4track->SetParentID(track.parentID); BLManager::getObject()->setExternalTrackID(g4track,track.trackID, track.parentID); g4track->SetWeight(track.weight); manager->setEventID(track.eventID); runManager->processOneTrack(g4track); runManager->processAllSecondariesAndDeferredTracks(); } MPINTuple::MPINTuple(int _handle, G4String _name, G4String _fields) : BLNTuple(_name,_fields) { handle = _handle; fields = _fields; nfields = 1; for(unsigned i=0; i MAX_NTUPLE_FLOATS/nfields) nBuffer = MAX_NTUPLE_FLOATS/nfields; buffer = new float[nBuffer*nfields]; iBuffer = 0; } void MPINTuple::appendRow(float data[], int n) { if(handle == 0) return; int j = iBuffer*nfields; for(int i=0; i= nBuffer) { Msg::sendNTupleRow(buffer,iBuffer*nfields,handle); iBuffer = 0; } } BLNTuple *MPINTupleHandler::create(G4String type, G4String category, G4String name, G4String fields, G4String filename) { if(BLMPI::isRank0()) { // rank 0 cannot send a message yet, but doesn't need NTuple-s, // so create a do-nothing NTuple return new MPINTuple(0,name,fields); } // send a CreateNTuple message to rank 0 CreateNTuple c; strcpy(c.type,type); strcpy(c.category,category); strcpy(c.name,name); strcpy(c.fields,fields); strcpy(c.filename,filename); c.sendTo0(); // wait for and receive the NTupleHandle response MPI_Status status; NTupleHandle h; h.recv(status); return new MPINTuple(h.handle,name,fields); } #else // G4BL_MPI void BLMPI::init(int *argc, char ***argv) { } bool BLMPI::isMPI() { return false; } bool BLMPI::isRank0() { return false; } bool BLMPI::isRank1() { return true; } void BLMPI::main() { } #endif // G4BL_MPI