# http://www.gridway.org/documentation/stable/gwdeveloperguide/c41.htm # assume CPU_SMP, SIZE_MEM_MB, SIZE_DISK_MB, are per-node # Is there any way to specify node properties in wsgram? # For example, 'fastcpu' or 'himem' on NCSA IA64? # The problem with one machine dict per node type is finding unions. # DataStar has 176 16GB, 1.5GHz nodes and 96 32GB, 1.7GHz nodes. # If a job needs 256 16GB, 1.5GHz nodes, it would not see DS... # Need to allow combos with same HOSTNAME? Would normally create # a stanza with 272 16GB, 1.5GHz nodes, to include the fast, fat ones, # but what if a user wants only the slow, skinny ones? # For combining, which job_template should be used? # For each machine_dict, have a node_pool_list. Each node_pool_list # should have an attributes dict. But how to construct the job script # from the node_pool_list info? # Do not allow users to specify exact matches for int of float, # rather, interpret these as minima. machine_dict_list = [ { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['tg-login.ncsa.teragrid.org',], 'NODECOUNT' : [631,], 'CPU_MODEL' : ['ia64',], 'CPU_SMP' : [2,], 'CPU_MHZ' : [1500,], 'CPU_MEMORY_GB' : [2,], 'QUEUE_NAME' : ['debug', 'dque','long','big','gpfs-wan'], 'QUEUE_NODECOUNT' : [128,400,400,600,400], 'QUEUE_MAXTIME' : [1800,86400,345600,172800,86400], 'QUEUE_MAXCOUNT' : [384,384,384,384,384], 'QUEUE_MAXRUNNINGJOBS' : [128,128,128,128,128], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 24*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__:fastcpu #MCP qtype pbs #MCP submit_host tg-login.ncsa.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP cluster_id 0 cd __MCP_JOB_DIR__ NPROCS=`wc -l < $PBS_NODEFILE` #/usr/local/mpich/mpich-gm-1.2.6..14b-intel-r2/bin/mpirun -v -machinefile $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_PARALLEL_RUN__ -v -machinefile $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ cd """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['tg-login.ncsa.teragrid.org',], 'NODECOUNT' : [128,], 'CPU_MODEL' : ['ia64',], 'CPU_SMP' : [2,], 'CPU_MHZ' : [1300,], 'CPU_MEMORY_GB' : [6,], 'QUEUE_NAME' : ['debug', 'dque','long','big','gpfs-wan'], 'QUEUE_NODECOUNT' : [128,400,400,600,400], 'QUEUE_MAXTIME' : [1800,86400,345600,172800,86400], 'QUEUE_MAXCOUNT' : [384,384,384,384,384], 'QUEUE_MAXRUNNINGJOBS' : [128,128,128,128,128], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 24*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__:himem #MCP qtype pbs #MCP submit_host tg-login.ncsa.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP cluster_id 0 cd __MCP_JOB_DIR__ NPROCS=`wc -l < $PBS_NODEFILE` #/usr/local/mpich/mpich-gm-1.2.6..14b-intel-r2/bin/mpirun -v -machinefile $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_PARALLEL_RUN__ -v -machinefile $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ cd """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['tg-login.ncsa.teragrid.org',], 'NODECOUNT' : [887,], 'CPU_MODEL' : ['ia64',], 'CPU_SMP' : [2,], 'CPU_MHZ' : [1300,1500], 'CPU_MEMORY_GB' : [2,6], 'QUEUE_NAME' : ['debug', 'dque','long','big','gpfs-wan'], 'QUEUE_NODECOUNT' : [128,400,400,600,400], 'QUEUE_MAXTIME' : [1800,86400,345600,172800,86400], 'QUEUE_MAXCOUNT' : [384,384,384,384,384], 'QUEUE_MAXRUNNINGJOBS' : [128,128,128,128,128], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 24*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__ #MCP qtype pbs #MCP submit_host tg-login.ncsa.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP cluster_id 0 cd __MCP_JOB_DIR__ NPROCS=`wc -l < $PBS_NODEFILE` #/usr/local/mpich/mpich-gm-1.2.6..14b-intel-r2/bin/mpirun -v -machinefile $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_PARALLEL_RUN__ -v -machinefile $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ cd """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['tg-login1.sdsc.teragrid.org',], 'NODECOUNT' : [262,], 'CPU_MODEL' : ['ia64',], 'CPU_SMP' : [2,], 'CPU_MHZ' : [1500,], 'CPU_MEMORY_GB' : [2,], 'QUEUE_NAME' : ['dque',], 'QUEUE_NODECOUNT' : [262,], 'QUEUE_MAXTIME' : [64800,], 'QUEUE_MAXCOUNT' : [6,], 'QUEUE_MAXRUNNINGJOBS' : [262,], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 24*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__ #MCP qtype pbs #MCP submit_host tg-login1.sdsc.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP submit_return_pattern (?P\\d+).dtf-mgmt1.sdsc.teragrid.org #MCP cluster_id 1 cd __MCP_JOB_DIR__ NPROCS=`wc -l < $PBS_NODEFILE` __MCP_PARALLEL_RUN__ -v -machinefile $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ cd """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['tg-login1.sdsc.teragrid.org',], 'NODECOUNT' : [262,], 'CPU_MODEL' : ['ia64',], 'CPU_SMP' : [2,], 'CPU_MHZ' : [1500,], 'CPU_MEMORY_GB' : [2,], 'QUEUE_NAME' : ['dque',], 'QUEUE_NODECOUNT' : [262,], 'QUEUE_MAXTIME' : [64800,], 'QUEUE_MAXCOUNT' : [6,], 'QUEUE_MAXRUNNINGJOBS' : [262,], 'STATUS' : ['production',], }, 'argument_prefix' : '', 'argument_suffix' : '', 'required_subs_list' : [ '__MCP_NODES__', 'wallclock_seconds', '__MCP_TOTAL_CPUS__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', '__MCP_QUEUE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : import math wallclock_string = "%u" % (int(math.ceil(wallclock_seconds/60)),) return wallclock_string """, 'job_template' : """ __MCP_EXECUTABLE__ __MCP_JOB_DIR__ __MCP_ARGUMENTS__ __MCP_TOTAL_CPUS__ __MCP_NODES__ __MCP_QUEUE__ __MCP_WALLCLOCK__ mpi """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['queenbee.loni-lsu.teragrid.org',], 'NODECOUNT' : [668,], 'CPU_MODEL' : ['x86_64',], 'CPU_SMP' : [8,], 'CPU_MHZ' : [2330,], 'CPU_MEMORY_GB' : [8,], 'QUEUE_NAME' : ['checkpt','workq'], 'QUEUE_NODECOUNT' : [256,256,], 'QUEUE_MAXTIME' : [172800,172800,], 'QUEUE_MAXCOUNT' : [512,512,], 'QUEUE_MAXRUNNINGJOBS' : [512,512,], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 48*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__ #MCP qtype pbs #MCP submit_host queenbee.loni-lsu.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP cluster_id 2 export WORK_DIR=__MCP_JOB_DIR__ cd __MCP_JOB_DIR__ NPROCS=`wc -l < $PBS_NODEFILE` # For MVAPICH2 jobs, start the mpd daemon on each allocated node. export MPDSNP=`uniq $PBS_NODEFILE |wc -l| cut -d'/' -f1` cat $PBS_NODEFILE | uniq > $WORK_DIR/mpd_nodefile_$USER export MPD_NODEFILE=$WORK_DIR/mpd_nodefile_$USER mpdboot -v -n $MPDSNP -f $MPD_NODEFILE mpdtrace -l rm $MPD_NODEFILE __MCP_SERIAL_RUN__ mpdallexit # run mvapich2 jobs __MCP_PARALLEL_RUN__ -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ # stop mpd daemons mpdallexit __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ cd """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['login-abe.ncsa.teragrid.org',], 'NODECOUNT' : [600,], 'CPU_MODEL' : ['x86_64',], 'CPU_SMP' : [8,], 'CPU_MHZ' : [1500,], 'CPU_MEMORY_GB' : [1,], 'QUEUE_NAME' : ['debug', 'normal','long'], 'QUEUE_NODECOUNT' : [16,600,600], 'QUEUE_MAXTIME' : [1800,172800,604800], 'QUEUE_MAXCOUNT' : [100,100,100], 'QUEUE_MAXRUNNINGJOBS' : [16,600,600], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 24*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__ #MCP qtype pbs #MCP submit_host login-abe.ncsa.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP cluster_id 3 cd __MCP_JOB_DIR__ mvapich2-start-mpd NPROCS=`wc -l < $PBS_NODEFILE` export NP=`wc -l ${PBS_NODEFILE} | cut -d'/' -f1` export MV2_SRQ_SIZE=4000 #/usr/local/mvapich2-1.2-intel-ofed-1.2.5.5/bin/mpirun -np $NP __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_PARALLEL_RUN__ -np $NP __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ mpdallexit cd """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['login-abe.ncsa.teragrid.org',], 'NODECOUNT' : [600,], 'CPU_MODEL' : ['x86_64',], 'CPU_SMP' : [8,], 'CPU_MHZ' : [1500,], 'CPU_MEMORY_GB' : [2,], 'QUEUE_NAME' : ['debug', 'normal','long'], 'QUEUE_NODECOUNT' : [16,600,600], 'QUEUE_MAXTIME' : [1800,172800,604800], 'QUEUE_MAXCOUNT' : [100,100,100], 'QUEUE_MAXRUNNINGJOBS' : [16,600,600], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 24*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__:himem #MCP qtype pbs #MCP submit_host login-abe.ncsa.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP cluster_id 3 cd __MCP_JOB_DIR__ mvapich2-start-mpd NPROCS=`wc -l < $PBS_NODEFILE` export NP=`wc -l ${PBS_NODEFILE} | cut -d'/' -f1` export MV2_SRQ_SIZE=4000 #/usr/local/mvapich2-1.2-intel-ofed-1.2.5.5/bin/mpirun -np $NP __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_PARALLEL_RUN__ $PBS_NODEFILE -np $NP __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ mpdallexit cd """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['login-abe.ncsa.teragrid.org',], 'NODECOUNT' : [1200,], 'CPU_MODEL' : ['x86_64',], 'CPU_SMP' : [8,], 'CPU_MHZ' : [1500,], 'CPU_MEMORY_GB' : [1,2], 'QUEUE_NAME' : ['debug', 'normal','wide','long'], 'QUEUE_NODECOUNT' : [16,600,1196,600], 'QUEUE_MAXTIME' : [1800,172800,172800,604800], 'QUEUE_MAXCOUNT' : [100,100,100,100], 'QUEUE_MAXRUNNINGJOBS' : [16,600,1196,600], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 24*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__ #MCP qtype pbs #MCP submit_host login-abe.ncsa.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP cluster_id 3 cd __MCP_JOB_DIR__ mvapich2-start-mpd NPROCS=`wc -l < $PBS_NODEFILE` export NP=`wc -l ${PBS_NODEFILE} | cut -d'/' -f1` export MV2_SRQ_SIZE=4000 #/usr/local/mvapich2-1.2-intel-ofed-1.2.5.5/bin/mpirun -np $NP __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_PARALLEL_RUN__ $PBS_NODEFILE -np $NP __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ mpdallexit cd """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['tg-login.frost.ncar.teragrid.org',], 'NODECOUNT' : [1024,], 'CPU_MODEL' : ['PowerPC-440',], 'CPU_SMP' : [2,], 'CPU_MHZ' : [700,], 'CPU_MEMORY_GB' : [256,], 'QUEUE_NAME' : ['',], 'QUEUE_NODECOUNT' : [1024,], 'QUEUE_MAXTIME' : [64800,], 'QUEUE_MAXCOUNT' : [1024,], 'QUEUE_MAXRUNNINGJOBS' : [1024,], 'STATUS' : ['experimental',], }, 'argument_prefix' : '', 'argument_suffix' : '', 'required_subs_list' : [ '__MCP_NODES__', 'wallclock_seconds', '__MCP_TOTAL_CPUS__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', '__MCP_ACCOUNT__', '__MCP_QUEUE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : import math wallclock_string = "%u" % (int(math.ceil(wallclock_seconds/60)),) return wallclock_string """, 'job_template' : """ __MCP_EXECUTABLE__ __MCP_JOB_DIR__ __MCP_ARGUMENTS__ __MCP_TOTAL_CPUS__ __MCP_NODES__ __MCP_ACCOUNT__ __MCP_WALLCLOCK__ mpi """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['tg-login.lonestar.tacc.teragrid.org',], 'NODECOUNT' : [1460,], 'CPU_MODEL' : ['x86_64',], 'CPU_SMP' : [4,], 'CPU_MHZ' : [2660,], 'CPU_MEMORY_GB' : [2,], 'QUEUE_NAME' : ['serial','normal','high','hero','development'], 'QUEUE_NODECOUNT' : [1460,1460,1460,1460,1460], 'QUEUE_MAXTIME' : [43200,172800,172800,172800,1800], 'QUEUE_MAXCOUNT' : [1,128,128,1460,16], 'QUEUE_MAXRUNNINGJOBS' : [1460,1460,1460,1460,1460], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_CPUS__', '__MCP_ACCOUNT__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 48*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #BSUB -W __MCP_WALLCLOCK__ #BSUB -n __MCP_CPUS__ #BSUB -P __MCP_ACCOUNT__ #MCP qtype lsf #MCP submit_host tg-login.lonestar.tacc.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP submit_command bsub < #MCP submit_return_pattern Job <(?P\d+)> is submitted #MCP queue_line_command bjobs #MCP queue_line_pattern ^\\d+\\s+\\S+\\s+(?P\\S+)\\s+\\S+\\s+\\S+ #MCP kill_command bkill #MCP jobvanishes no #MCP run_string RUN DONE #MCP canceled_string RUN DONE EXIT #MCP cluster_id 6 cd $LS_SUBCWD # run mvapich2 jobs __MCP_PARALLEL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['tg-login.purdue.teragrid.org',], 'NODECOUNT' : [624,], 'CPU_MODEL' : ['x86_64',], 'CPU_SMP' : [8,], 'CPU_MHZ' : [2330,], 'CPU_MEMORY_GB' : [2,], 'QUEUE_NAME' : ['standby', 'rcac-a','standby-8','steele_hold'], 'QUEUE_NODECOUNT' : [624,192,400,624], 'QUEUE_MAXTIME' : [14400,2592000,28800,], 'QUEUE_MAXCOUNT' : [100,100,100,100], 'QUEUE_MAXRUNNINGJOBS' : [624,192,400,624], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_ACCOUNT__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 24*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__ #PBS -A __MCP_ACCOUNT__ #MCP qtype pbs #MCP submit_host tg-login.purdue.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP cluster_id 5 cd __MCP_JOB_DIR__ NPROCS=`wc -l < $PBS_NODEFILE` __MCP_PARALLEL_RUN__ $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ cd """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['tg-login.purdue.teragrid.org',], 'NODECOUNT' : [180,], 'CPU_MODEL' : ['x86_64',], 'CPU_SMP' : [8,], 'CPU_MHZ' : [2330,], 'CPU_MEMORY_GB' : [2,], 'QUEUE_NAME' : ['standby', 'rcac-b','standby-8','steele_hold'], 'QUEUE_NODECOUNT' : [180,112,180,180], 'QUEUE_MAXTIME' : [14400,2592000,28800,], 'QUEUE_MAXCOUNT' : [100,100,100,100], 'QUEUE_MAXRUNNINGJOBS' : [180,112,180,180], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_ACCOUNT__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 24*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__ #PBS -A __MCP_ACCOUNT__ #MCP qtype pbs #MCP submit_host tg-login.purdue.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP cluster_id 5 cd __MCP_JOB_DIR__ NPROCS=`wc -l < $PBS_NODEFILE` __MCP_PARALLEL_RUN__ $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ cd """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['tg-login.purdue.teragrid.org',], 'NODECOUNT' : [48,], 'CPU_MODEL' : ['x86_64',], 'CPU_SMP' : [8,], 'CPU_MHZ' : [2330,], 'CPU_MEMORY_GB' : [4,], 'QUEUE_NAME' : ['standby', 'rcac-c','standby-8','steele_hold'], 'QUEUE_NODECOUNT' : [48,8,48,48], 'QUEUE_MAXTIME' : [14400,2592000,28800,], 'QUEUE_MAXCOUNT' : [100,100,100,100], 'QUEUE_MAXRUNNINGJOBS' : [48,8,48,48], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_ACCOUNT__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 24*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__ #PBS -A __MCP_ACCOUNT__ #MCP qtype pbs #MCP submit_host tg-login.purdue.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP cluster_id 5 cd __MCP_JOB_DIR__ NPROCS=`wc -l < $PBS_NODEFILE` __MCP_PARALLEL_RUN__ $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ cd """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['tg-login.purdue.teragrid.org',], 'NODECOUNT' : [41,], 'CPU_MODEL' : ['x86_64',], 'CPU_SMP' : [8,], 'CPU_MHZ' : [2330,], 'CPU_MEMORY_GB' : [4,], 'QUEUE_NAME' : ['standby', 'rcac-d','standby-8','steele_hold'], 'QUEUE_NODECOUNT' : [41,16,41,41], 'QUEUE_MAXTIME' : [14400,2592000,28800,], 'QUEUE_MAXCOUNT' : [100,100,100,100], 'QUEUE_MAXRUNNINGJOBS' : [41,16,41,41], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_ACCOUNT__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 24*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__ #PBS -A __MCP_ACCOUNT__ #MCP qtype pbs #MCP submit_host tg-login.purdue.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP cluster_id 5 cd __MCP_JOB_DIR__ NPROCS=`wc -l < $PBS_NODEFILE` __MCP_PARALLEL_RUN__ $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ cd """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['tg-login.purdue.teragrid.org',], 'NODECOUNT' : [893,], 'CPU_MODEL' : ['x86_64',], 'CPU_SMP' : [8,], 'CPU_MHZ' : [2330,], 'CPU_MEMORY_GB' : [2,4], 'QUEUE_NAME' : ['standby','steele_hold','standy-8'], 'QUEUE_NODECOUNT' : [893,893,893], 'QUEUE_MAXTIME' : [14400,28800,], 'QUEUE_MAXCOUNT' : [100,100,100], 'QUEUE_MAXRUNNINGJOBS' : [893,893,893], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_JOB_DIR__', '__MCP_ACCOUNT__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 24*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__ #PBS -A __MCP_ACCOUNT__ #PBS -q __MCP_QUEUE__ #MCP qtype pbs #MCP submit_host tg-login.purdue.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP cluster_id 5 cd __MCP_JOB_DIR__ NPROCS=`wc -l < $PBS_NODEFILE` __MCP_PARALLEL_RUN__ $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ cd """, }, { 'submit_host' : '', 'attributes' : { 'HOSTNAME' : ['tg-login.uc.teragrid.org',], 'NODECOUNT' : [64,], 'CPU_MODEL' : ['ia64',], 'CPU_SMP' : [2,], 'CPU_MHZ' : [1500,], 'CPU_MEMORY_GB' : [2,], 'QUEUE_NAME' : ['dque',], 'QUEUE_NODECOUNT' : [64,], 'QUEUE_MAXTIME' : [86400,], 'QUEUE_MAXCOUNT' : [64,], 'QUEUE_MAXRUNNINGJOBS' : [64,], 'STATUS' : ['production',], }, 'argument_prefix' : ' ', 'argument_suffix' : '', 'required_subs_list' : [ 'wallclock_seconds', '__MCP_SHELL__', '__MCP_PARALLEL_RUN__', '__MCP_SERIAL_RUN__', '__MCP_NODES__', '__MCP_CPUS_PER_NODE__', '__MCP_USERNAME__', '__MCP_SCRATCH_DIR__', '__MCP_JOB_DIR__', '__MCP_EXECUTABLE__', ], 'getwallclock_string' : """ def getwallclock_string(wallclock_seconds) : walldays, wallremainder = divmod(wallclock_seconds, 24*60*60) wall_timestruct = time.gmtime(float(wallremainder)) wall_hours = int(time.strftime("%H", wall_timestruct)) wall_hours = walldays * 24 + wall_hours wallclock_time_string = time.strftime("%M:%S", wall_timestruct) wallclock_string = "%u:%s" % (wall_hours, wallclock_time_string) return wallclock_string """, 'job_template' : """#!__MCP_SHELL__ #PBS -l walltime=__MCP_WALLCLOCK__,nodes=__MCP_NODES__:ppn=__MCP_CPUS_PER_NODE__ #MCP qtype pbs #MCP submit_host tg-login.uc.teragrid.org #MCP username __MCP_USERNAME__ #MCP scratch_dir __MCP_SCRATCH_DIR__ #MCP cluster_id 7 cd __MCP_JOB_DIR__ NPROCS=`wc -l < $PBS_NODEFILE` #/soft/mpich-gm-1.2.5..10-intel-r2a/bin/mpirun -v -machinefile $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_PARALLEL_RUN__ -v -machinefile $PBS_NODEFILE -np $NPROCS __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ __MCP_SERIAL_RUN__ __MCP_EXECUTABLE__ __MCP_ARGUMENTS__ cd """, }, ]