#! /bin/csh -f
#
#  Use qsub to start a PBS (or similar) TRANSP batch job
#  to run "runtr $1 $2"...
#
#     $1  -- runid
#     $2  -- runtr argument (default "all")
#     $3  -- job size selector (default "pppl_serial" equivalent to "1x1")
#            alternative: <Nnodes>x<NperNode>, e.g. "4x2" means an 8 proc job
#            built on 4 2-processor nodes.

    if ( $#argv < 1 || $#argv > 7 ) then
        echo "trqsub syntax: trqsub <runid> [<runtr-argument> [<job-size>]] [options...]"
        echo "   <runid> -- TRANSP runid."
	echo '   <runtr-argument> -- runtr function, e.g. "all" or "rs" or "lrs".'
	echo '   <job-size> -- the default is "pppl_serial" or "1x1"'
        echo '                 choose "4x2" for an 8p job on 4 2p nodes.'
        echo "   <-q queue> -- if you want other than the default queue."
        echo "   <-log log_level> -- set log file log_level. Could be"
        echo '                       "info", "warn", "err" or "nomsg";'
        echo '                       default is "warn".'
        echo " "
        echo "  memory request is 1900mb * <number of processors> "
        echo "  unless overridden by TRQSUB_MEM_LIMIT environment variable."
        echo " "
        echo "  walltime request is 36:00:00 (36 hours)  "
        echo "  unless overridden by TRQSUB_TIME_LIMIT environment variable."
        echo " "
        echo " To prevent being queried for the number of processors for each"
        echo " component in pbs_mpi_qsub, set the environment variables"
        echo "    PMQ_NBI_NPROCS, PMQ_TOR_NPROCS, PMQ_PTR_NPROCS"
        echo " "
        exit 1
    endif

#
# CLF: to find pppl...tcsh scripts
if ( $?TRANSP_LOCATION ) then
   set CMD_PATH=$TRANSP_LOCATION
else
    set CMD_PATH=$CODESYSDIR/pbs
endif

if ( $?PMQ_NBI_NPROCS || $?PMQ_TOR_NPROCS || $?PMQ_PTR_NPROCS ) then
   set mop="-m a"   # mail on abort only
else
   set mop=""
endif

#  determine output directory: if /local, replace with /l/<hostname>
    set loc_cwd = `pwd`
    set loc_host = `hostname | sed 's#\..*##'`
    set test_cwd = `echo $loc_cwd | sed 's#^/local/##'`
    if ( $test_cwd != $loc_cwd ) then
        set loc_cwd = "/l/$loc_host/$test_cwd"
    endif

    set runid = $1
    echo " runid: $runid "

    set runtr_arg = "all"
    set jobsize = "pppl_serial"
    @ nnodes = 1
    @ npernode = 1
    set logfile_level = 1

    @ iarg = 1
    set have_runtr_arg="N" 
    while ( $iarg < $#argv )
        @ iarg++
        if ("${argv[$iarg]}" == "-q" ) then
            @ iarg++
            set PBSQUE = ${argv[$iarg]}
	    echo pbsque = $PBSQUE
        else if ("${argv[$iarg]}" == "-log" ) then
            @ iarg++
	    set logfile_level = ${argv[$iarg]}
	      
	    if ( "$logfile_level" == "0" || "$logfile_level" == "info" ) then
              echo "trqsub: logfile_level = 0 (info)"
            else if ( "$logfile_level" == "1" || "$logfile_level" == "warn" ) then
              echo "trqsub: logfile_level = 1 (warn)"
            else if ( "$logfile_level" == "2" || "$logfile_level" == "err" ) then
              echo "trqsub: logfile_level = 2 (error)"
            else if ( "$logfile_level" == "3" || "$logfile_level" == "nomsg" ) then
              echo "trqsub: logfile_level = 3 (nomsg)"
	    else
              echo "trqsub: logfile_level of $logfile_level unrecognized, reset to 1 (warn)"
              set logfile_level = 1
            endif
        else
           if ( $have_runtr_arg == "N" ) then
              set runtr_arg = ${argv[$iarg]}
              set have_runtr_arg="Y"
           else
              set jobsize = ${argv[$iarg]}
           endif
        endif
    end

    setenv LOG_LEVEL $logfile_level

    echo " runtr_arg: $runtr_arg "
    echo " jobsize: $jobsize "

    @ ier = 0
    if ( "$jobsize" != "pppl_serial" ) then
        # parse job size
        set try_nnodes = `echo $jobsize | sed 's#x.*##'`
        set try_nper = `echo $jobsize | sed 's#.*x##'`
	if ( "$try_nnodes" == "$jobsize" ) then
            @ ier++
        else 
            set test = `@ nnodes = $try_nnodes`
            if ( $status != 0 ) then
                @ ier++
                echo " <#nodes> in <#nodes>x<#perNode> not integer: $try_nnodes in $jobsize"
            else
                @ nnodes = $try_nnodes
                if ( $nnodes < 1 || $nnodes > 3200 ) then
                    @ ier++
                    echo " $nnodes <#nodes> in <#nodes>x<#perNode> not in range 1:3200 "
		endif
            endif
            set test = `@ npernode = $try_nper`
            if ( $status != 0 ) then
                @ ier++
                echo " <#perNode> in <#nodes>x<#perNode> not integer: $try_nper in $jobsize"
            else
                @ npernode  = $try_nper
                if ( $npernode != 1  && $npernode != 2   && \
                     $npernode != 4  && $npernode != 8   && \
                     $npernode != 16 && $npernode != 24  && \
                     $npernode != 32 && $npernode != 48  && \
                     $npernode != 64) then

                    @ ier++
                    echo " $npernode <#perNode> in <#nodes>x<#perNode> not one of {1,2,4,8,16,32}"
		endif
            endif
        endif
    endif

    if ( $ier > 0 ) then
        echo "trqsub: ill formed job size specifier."
        echo "  form of specify should be NxM or <#nodes>x<#perNode>"
        echo "  for example 4x2 to specify an 8p job on 4 2p nodes."
        echo "  <#nodes> can be in the range 1:32"
        echo "  <#perNode> must be selected from the set {1,2,4,8}."
        echo " "
        echo "full command syntax with job size specifier:"
        echo "  trqsub <runid> all <#nodes>x<#perNode> [-q <PBS-queue-name>]"
        exit 3
    endif

    @ nprocs = $nnodes * $npernode
    if ( $nprocs == 1 ) then
        echo "jobsize 1x1 => serial job."
        set jobsize = pppl_serial
    endif

    if ( ! -f ${runid}tr.mk ) then
        echo "trqsub: not found: ${runid}tr.mk"
	echo "  Before using this script:"
	echo "     (a) go to appropriate tokamak subdirectory of WORKDIR, e.g.:"
        echo "         $WORKDIR/NSTX "
        echo "     (b) provide a namelist for the run: ${runid}TR.DAT"
        echo "     (c) run   pretr   interactively for this run ${runid}."
	exit 1
    endif

#  in PBS, jobname cannot start with numerical digit character
#  so, prepend "q"

    set jobname = q$runid
    if ( "$jobsize" != "pppl_serial" ) then
        set jobname = ${jobname}_$jobsize
    endif

    echo " jobname: $jobname "

    set joblog = $loc_cwd/${jobname}.joblog
    if ( -f $joblog ) then
        mv $joblog $joblog~
        echo "(renamed old $joblog to $joblog~)"
    endif
    echo " joblog: $joblog "
    echo " (this is the pbs log, separate from ${runid}tr.log)"
    echo " "

    set qstat_test = `which qstat`
    if ( $#qstat_test != 1 ) then
        echo $qstat_test
        echo "  PBS appears to be unavailable; exiting... "
        exit 5
    endif

    if ( $?TRQSUB_TIME_LIMIT ) then
      set time_limit = $TRQSUB_TIME_LIMIT
    else
      if ( $?NERSC ) then
         set time_limit="36:00:00"
      else
         set time_limit="80:00:00"
      endif
      echo " %trqsub walltime default, no TRQSUB_TIME_LIMIT, $time_limit"
    endif
    if ( $?TRQSUB_MEM_LIMIT ) then
      set mem_limit = $TRQSUB_MEM_LIMIT
    else
#MG      set mem_limit = "6000mb"
      set mem_limit = "2000mb"
#MG      set mem_limit = "4000mb"
      echo " %trqsub memory default, no TRQSUB_MEM_LIMIT, $mem_limit per cpu"
      if ( "$jobsize" != "pppl_serial" ) then
        set mem_limit = `calc_memtot $mem_limit $nnodes $npernode`
        if ( $status ) then
          echo " ?trqsub: error in: calc_memtot $mem_limit $nnodes $npernode"
          exit 1
        endif
      endif
    endif

    if ( $?NERSC ) then
# Edison can't handle -v
       set EV = "-V"
       set RUN_ARGS=""
       setenv  RUNTR_ARG $runtr_arg
    else
       set EV = ""
       set RUN_ARGS="-v RUNTR_ARG=$runtr_arg,LOG_LEVEL=$LOG_LEVEL"
    endif

    if ( "$jobsize" == "pppl_serial" ) then
       set pbs_cmd = $CMD_PATH/pppl_serial.tcsh
       #
       # if tshare gfortran, uncomment SWAPGCC
       if ( $?SWAPGCC ) then
          cat $pbs_cmd | sed s"/###SWAPGCC//" > ${runid}_serial.tcsh
          set pbs_cmd = ${runid}_serial.tcsh
       endif 
       #
       # if run mpi genray and cql3d subprocess from serial transp
       set cql3d_pserve=`grep -i '^\ *ncql3d_pserve\ *=\ *-\ *1' ${runid}TR.DAT |cut -d! -f1 |sed 's/.*= *//' | sed 's/^\s+|\s+$//g'`
       echo " %cql3d_pserve: $cql3d_pserve "

       if("$cql3d_pserve" != '' ) then
          echo "    %cql3d_pserve: subprocess parallel"
          if ( $?PMQ_CQL3D_NPROCS ) then
             set ncql3d_procs = $PMQ_CQL3D_NPROCS
          else 
             echo " " 
             echo -n "    To use MPI World Communicator for CQL3D, enter nql3d_nodes: "
             set ncql3d_nodes = $<
             echo -n "    To use MPI World Communicator for CQL3D, enter nql3d_procs: "
             set ncql3d_ppn = $<
             @ ncql3d_procs = $ncql3d_nodes * $ncql3d_ppn 
             echo "    %ncql3d_procs: $ncql3d_procs " 
             if ( $ncql3d_procs > 1 ) then
                echo "    CQL3D  $ncql3d_nodes "
                echo "    CQL3D  $ncql3d_ppn "
                cat $pbs_cmd | sed s"/###PBS -l nodes=1:ppn=1/#PBS -l nodes=`echo $ncql3d_nodes`:ppn=`echo $ncql3d_ppn`/" > tmp_serial.tcsh
                cat tmp_serial.tcsh | sed s"/This job is/CQL3D/g" > ${runid}_serial.tcsh
                set pbs_cmd = ${runid}_serial.tcsh
             endif 
          endif
       else
          echo "    %cql3d_pserve: subprocess serial "
       endif

       if ($?PBSQUE) then

echo  "    qsub -N $jobname $RUN_ARGS -o $joblog \
              -l mem=$mem_limit -l walltime=$time_limit $EV \
              $mop -q $PBSQUE $pbs_cmd "
          qsub -N $jobname $RUN_ARGS -o $joblog \
              -l mem=$mem_limit -l walltime=$time_limit $EV \
              -q $PBSQUE $pbs_cmd
           set qstat = $status
       else
echo  "    qsub -N $jobname $RUN_ARGS -o $joblog \
              -l mem=$mem_limit -l walltime=$time_limit $EV \
              $$mop pbs_cmd "
           qsub -N $jobname $RUN_ARGS -o $joblog \
              -l mem=$mem_limit -l walltime=$time_limit $EV \
#	      -W x=excludenodes:dawson010 \
#              -l nodes=dawson102:ppn=16+dawson111:ppn=16 \
		 $pbs_cmd
# example how to exclude some nodes -W x=excludenodes:dawson101:dawson102
           set qstat = $status
       endif
       if ( $qstat == 0 ) then
           echo "PPPL serial TRANSP job requested (to check -- use qstat command)."
           rm -f ${runid}zz.runtrx:* >& /dev/null
           set zdate = `date`
           echo $zdate > ${runid}zz.runtrx:sque
       else
           echo " qsub error, failed to submit job."
       endif
    else
       if ( $?NERSC ) then
          setenv RUNTR_ARG $runtr_arg
# Edison can't handle -v
          set    RUN_ARGS =""
       else
         set RUN_ARGS = "-v RUNTR_ARG=$runtr_arg,LOG_LEVEL=$LOG_LEVEL"
         if ($?TRANSP_EXEC_METHOD) then
           set RUN_ARGS = "$RUN_ARGS,TRANSP_EXEC_METHOD=$TRANSP_EXEC_METHOD"
         endif 
         if ($?TRANSP_EXEC_VERBOSE) then
           set RUN_ARGS = "$RUN_ARGS,TRANSP_EXEC_VERBOSE=$TRANSP_EXEC_VERBOSE"
         endif
         if ($?TRANSP_EXEC_NAME) then
           set RUN_ARGS = "$RUN_ARGS,TRANSP_EXEC_NAME=$TRANSP_EXEC_NAME"
         endif
       endif  
       if ($?PBSQUE) then
           pbs_mpi_qsub pppl_trmpi $nnodes $npernode \
              -runid $runid \
              -l mem=$mem_limit -l walltime=$time_limit $EV \
#	      -W x=excludenodes:dawson010 \
              -N $jobname $RUN_ARGS \
              -o $joblog -q $PBSQUE
           set qstat = $status
       else
           pbs_mpi_qsub pppl_trmpi $nnodes $npernode \
              -runid $runid \
              -l mem=$mem_limit -l walltime=$time_limit $EV \
#              -l nodes=dawson102:ppn=16+dawson111:ppn=16 \
#              -l nodes=dawson095:ppn=32+dawson096:ppn=32 \
#	      -W x=excludenodes:dawson010 \
              -N $jobname $RUN_ARGS \
              -o $joblog
           set qstat = $status
       endif
       if ( $qstat == 0 ) then
           echo "PPPL MPI TRANSP job requested (to check -- use qstat command)."
           rm -f ${runid}zz.runtrx:* >& /dev/null
           set zdate = `date`
           echo $zdate > ${runid}zz.runtrx:mque
       else
           echo " qsub error, failed to submit job."
       endif
    endif
