@@@@@ @ @ @ @ @ @ @@@ @ @@ @@@@ @ @ @ @@@@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @@ @@@@@ @ @ @ @ @ @ @ @ @ @ @ @ @@ @ @ @ @@@@@ @@@ @ @@@@ @ @ @ @@@@ @ @ @ @ @ @ @@@@ @@@@ @ @@ @@@@ @ @ @@@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @@ @ @ @ @ @@@@@ @ @@@@ @ @ @ @ @ @ @ @ @ @ @@@@ @ @@@@ @@ @@ @@@ @@ ************************************************************************* * * * _ _ _ _ ____ _ _ * * | |____ _| | | |_ __ (_)/ ___| |_ _ ___| |_ ___ _ __ * * | '_ \ \ /\ / / | | | '_ \| | | | | | | / __| __/ _ \ '__| * * | |_) \ V V /| |_| | | | | | |___| | |_| \__ \ || __/ | * * |_.__/ \_/\_/ \___/|_| |_|_|\____|_|\__,_|___/\__\___|_| * * * * * * Author: juergen.oehlschlaeger@kit.edu 12 June 2014 * ************************************************************************* Instruction, how to run parallel CORSIKA on the bwunicluster at the KIT with parallelization using the MPI system: path: corsika.conex/ (i.e. main path of a corsika distribution) (same as on the hc3 parallel processor system). (1) reset all environment optimization flags during compilation and linking by typing `. src/parallel/unsetoptflags.sh` ( - use command `env | grep FLAG` to display current environment variables - ); it avoids higher compiler optimization of the MPI system for the parallel application; load mpi module by script bwunicluster.sh and list current modules; then run ./coconut to create a CORSIKA executable with parallelization by the MPI system by selecting `p - PARALLEL treatment of subshowers` and then `2 - Library to be used with MPI system`; `mpi_corsika74345Linux_QGSII_gheisha_runner` is automatically moved to the subdirectory run/ of the main path of the corsika distribution; then switch to path: corsika.trunk/run/ (or also $WORK) (2) rename the executable to `mpi_corsika74345_stnd_QGSII4_gheisha_runner` to distinguish `standard` or `thinning` simulations, and some more possible options like `history`, `augerhit`, `stackin` and others. (3) prepare the corsika steering file `parallel-001239` for the parallel run with the keyword PARALLEL by using the fortran utility program `acreinphc3.f`, i.e. `./acreinphc3`, or modify an existing one; the best ratio for the first parameter of the keyword PARALLEL is 1/1000 of the second (name ecutmax), which influences directly the total number of simulated parts of the complete corsika run; each processor should calculate about 4 parts of the run, see utility `pllselectecut.c` for energy-dependent examples (../src/parallel/). the keyword CUTFILE must not be used and is also invalid in a regular parallel run; example see appendix (A-3). (4) prepare the submit script `jobwcl-001239` with the organization of the (new) subdirectory csk001239/ by using the fortran utility program `acreinphc3.f`, i.e. `./acreinphc3`, or modify a copy of an existing one to the new run number; the last line of `jobhc3-001239` is the complete submit command to the hc3 job queueing system, see appendix (A-4); submit it by: msub jobhc3-001239 ; (5) after the successful parallel corsika simulation the subdirectory csk001239/ will contain a file `time.txt` of about 90 bytes length; next switch to this subdirectory and execute the postprocessing script `./postprocessnew.sh` which writes additional infos to the new job protocol files `Job001239_*.[err,out]` and to the new extended file `time.txt001239`; the last line of `Job001239_*.out` now contains the total amount of Gigabytes of all DAT001239-files as formatted number where the next to last line is the word `EXIT` from the steering file `parallel-001239`; see appendix (A-5); (6) optionally run the script `showanalyhc3.sh` after a complete standard (not thinned) corsika simulation; see appendix (A-6); switch back to path: $WORK (7) next run script `showparallel.sh` (no argument necessary) to create the new current tabular of all available parallel simulations in this working path, see appendix (A-7). ************************************************************************* (A-3) parallel steering file ============================ See also CORSIKA user guide. RUNNR 1239 PARALLEL 3000. 3000000. 1 F NSHOW 1 EVTNR 1 SEED 2160 0 0 SEED 2161 0 0 SEED 2162 0 0 SEED 2163 0 0 SEED 2164 0 0 SEED 2165 0 0 PRMPAR 14 ERANGE 3.8283E+09 3.8283E+09 THETAP 23.45 23.45 PHIP 26.56 26.56 OBSLEV 1452.e2 870.000 g/cm^2 MAGNET 19.51 -14.18 Auger MAXPRT 1 ECTMAP 1.E11 ECUTS 0.1000 0.1000 2.5e-4 2.5e-4 RADNKG 200.E2 HADFLG 0 0 0 0 0 2 ELMFLG T T QGSJET T 0 QGSSIG T MUADDI T MUMULT T STEPFC 1. * AUGSCT 20 55. 200. ! data reduction to 2% * AUGSCT 20 55. 300. ! data reduction to 1% HILOW 88. DIRECT csk001239/ HOST bwc.uni USER you EXIT steering file creation ====================== #!/bin/bash # - - - - compile and link fortran program: ifort -C -check bounds acreinphc3.f -o acreinphc3 # - - - - execute fortran program: ./acreinphc3 # - - - - make jobhc3-001239 files executable: chmod +x jobhc3-* ************************************************************************* (A-2) loading mpi ================= #!/bin/bash # # bwunicload.sh: # ============== # loading mpi module at bwunicluster.scc.kit.edu: # ------------------------------------------------------------------------ # cd ~jl5949/corsika.conex/ # usage: ./bwunicluster.sh # ------------------------------------------------------------------------ # juergen.oehlschlaeger@kit.edu # ------------------------------------------------------------------------ # module load mpi module list ************************************************************************* (A-4) job submit script ======================= #!/bin/bash # # parallel corsika air shower simulation: # --------------------------------------- #MSUB -l nodes=1:ppn=16 #MSUB -l walltime=00:30:00 #MSUB -l pmem=2000mb #MSUB -q develop # MSUB -l nodes=5:ppn=16 # MSUB -l walltime=06:50:00 # MSUB -l pmem=2000mb # MSUB -q multinode # MSUB -e job_uc1_jobwcl-000180_%j.err # MSUB -o job_uc1_jobwcl-000180_%j.out # MSUB -m n #MSUB -e job_uc1_jobwcl-000333_%j.err #MSUB -o job_uc1_jobwcl-000333_%j.out #MSUB -m n # module load mpi 2>&1 # cd /work/kit/ikp/jl5949 # if [ ! -e csk000333/ ] ; then /bin/mkdir csk000333/ else /bin/rm -f csk000333/* fi /bin/cp jobwcl-000333 csk000333/ /bin/cp parallel-000333 csk000333/ /bin/cp summ* csk000333/ /bin/cp totaltimenew* csk000333/ /bin/cp postprocessnew* csk000333/ /bin/cp showanalyhc3* csk000333/ # mpirun ./mpi_corsika74072_stnd_QGSII4_gheisha_runner parallel-000333 ************************************************************************* (A-5) time statistic file (after ./postprocessnew.sh) ========================= START TIME STOP TIME TIME (min) 1359112626.006491 1359114041.460160 23.590894 LONGEST JOB: MPIID = 17 and Time = 707.726820 Total number of jobs = 207 Maximum size of group = 17 TOTAL CPU TIME (days) = 0.438553 time.txt001239 ************************************************************************* (A-6) analysis script ===================== #!/bin/bash # # = = = = = job_submit -p1 -cp -t660 -m1000 showanalyhc3.sh001239 # # create file list and run `showanalyhc3` program: # --------------------------------------------------------------- # ls -1 DAT001239* | grep t -v | grep n -v > showanalyhc3.i001239 # # names of sub paths csk00????; # gfortran -fbounds-check showanalyhc3.f -o showanalyhc3 # f77 -fbounds-check showanalyhc3.f -o showanalyhc3 # ifort -C -check bounds showanalyhc3.f -o showanalyhc3 # ./showanalyhc3 < showanalyhc3.i001239 > showanalyhc3.out001239 mv fort.9 showanalyhc3.fort001239 ************************************************************************* (A-7) show infos of all parallel simulations ============================================ #!/bin/bash # # create the tabular of available parallel corsika simulations: # ------------------------------------------------------------------------ # Primary lg(E) theta phi runtsk sizeGBy procs # T(days) ecutmax t(min) files RATIO obslev Xmagn Zmagn # _corsika_executable_ # < ecutha ecutmu ecutel ecutga thilev wmax lg(thirad) > # ------------------------------------------------------------------------ # usage: ./showparallel.sh # ------------------------------------------------------------------------ # juergen.oehlschlaeger@kit.edu # ------------------------------------------------------------------------ # ls -1 csk00*/Job00*.out > showparallel.jobinfos # # names of subdirectories csk00????; # hc3 job protocols Job00????_%jobid.err, Job00????_%jobid.out; # gfortran -fbounds-check showparallel.f -o showparallel # ifort -C -check bounds showparallel.f -o showparallel # ./showparallel < showparallel.jobinfos > showparallel.hc3-work-jobinfos *************************************************************************