#!/bin/sh
#-----------------------------------------------------------------------
# File    : djc_prob
# Contents: probabilistic network induction
#           on artificial Danish Jersey Cattle data
# Author  : Christian Borgelt
# History : 17.12.1999 file created
#           16.01.2000 simulated annealing added from file djc_sian
#           05.03.2002 shell changed from csh to sh
#           10.04.2002 all induction loops moved into functions
#-----------------------------------------------------------------------

function generate ()
{                               # --- generate random databases
  for (( i = 0; i < 10; i++ )); do
    gendb -s$(( $1*(i+1) )) djc.net train$i.tab 2> /dev/null
    gendb -s$(( $2*(i+1) )) djc.net test$i.tab  2> /dev/null
  done
}  # generate()

#-----------------------------------------------------------------------

function collect ()
{                               # --- collect evaluation results
gawk '
/evaluation of/         { network = $3; }
/number of attributes/  { attcnt  = $NF }
/number of conditions/  { concnt  = $NF }
/number of parameters/  { parcnt  = $NF }
/number of tuples/      { tplcnt  = $NF }
/impossible tuples/     { imptpl  = $4  }
/minimum/               { minimum = $NF }
/average/               { average = $NF }
/maximum/               { maximum = $NF }
/additional conditions/ { addcnt  = $NF }
/missing    conditions/ { miscnt  = $NF }
END {
  printf("%-12s", network);
  printf(" %3d %3d %3d", concnt, addcnt, miscnt);
  printf(" %5d %10g", parcnt, average);
}'
}  # collect()

#-----------------------------------------------------------------------

function average ()
{                               # --- average evaluation results
gawk '
function output() {
  if (NR > 0) {
    printf("%-10s", network);
    printf(" %6.1f %6.1f %6.1f", concnt/n, addcnt/n, miscnt/n);
    printf(" %7.1f %10.1f %10.1f\n", parcnt/n, train/n, test/n);
  }
}
BEGIN { network = ""; }
($1 == network) {
  concnt += $2; addcnt += $3; miscnt += $4;
  parcnt += $5; train  += $6; test   += $7; n++;
}
($1 != network) {
  if (n > 0) output();
  network = $1; n = 1;
  concnt  = $2; addcnt  = $3; miscnt  = $4;
  parcnt  = $5; train   = $6; test    = $7;
}
END { if (n > 0) output(); }' prob.tmp
}  # average()

#-----------------------------------------------------------------------

function evaluate ()
{                               # --- evaluate a given network
  neval -L1 -c djc.net $1 train$2.tab 2> /dev/null | \
    collect >> prob.tmp
  neval -L1            $1 test$2.tab  2> /dev/null | \
    gawk '/average/ { printf(" %10.1f\n", $NF); }' >> prob.tmp
  rm -f $1
}  # evaluate()

#-----------------------------------------------------------------------

function induce ()
{                               # --- induce and evaluate networks
  rm -f prob.tmp
  for (( i = 0; i < 10; i++ )); do
    ines -x -s$1 -e$2 djc.dom train$i.tab $3 $4 2> /dev/null
    evaluate $3 $i
  done
  average | tee -a prob.res
}  # induce()

#-----------------------------------------------------------------------

function fixed ()
{                               # --- evaluate empty/original network
  if [[ $1 == indep ]]; then in="djc.dom";
                        else in="djc.net"; fi
  rm -f prob.tmp
  for (( i = 0; i < 10; i++ )); do
    ines -x $in train$i.tab $1 2> /dev/null
    evaluate $1 $i
  done
  average | tee -a prob.res
}  # fixed()

#-----------------------------------------------------------------------

function owst ()
{                               # --- optimum weight spanning tree cons.
  echo "---owst------------------------------------------------------" \
       | tee -a prob.res
  for m in infgain infsgr1 chi2; do
    induce owst $m $m
  done
}  # owst()

#-----------------------------------------------------------------------

function extst ()
{                               # --- optimum weight spanning tree ext.
  echo "---extst-----------------------------------------------------" \
       | tee -a prob.res
  for m in infgain infsgr1 chi2; do
    induce extst $m $m
  done
}  # extst()

#-----------------------------------------------------------------------
 
function topord ()
{                               # --- selection on topological order
  echo "---topord----------------------------------------------------" \
       | tee -a prob.res
  for m in infgain infgr infsgr1 gini chi2 bdm bdeu rdlrel; do
    if [[ $m == bdeu ]]; then mm="bdm" x="-p-20";
                         else mm=$m    x=""; fi
    induce topord $mm $m $x
  done
}  # topord()

#-----------------------------------------------------------------------

function noloop ()
{                               # --- selection avoiding directed loops
  echo "---noloop----------------------------------------------------" \
       | tee -a prob.res
  for m in infgain infgr infsgr1 gini chi2 bdm bdeu rdlrel; do
    if [[ $m == bdeu ]]; then mm="bdm" x="-p-20";
                         else mm=$m    x=""; fi
    induce noloop $mm $m $x
  done
}  # noloop()

#-----------------------------------------------------------------------

function sian ()
{                               # --- hypertree simulated annealing
  echo "---sian------------------------------------------------------" \
       | tee -a prob.res
  for p in 0 1; do
    if (( p == 0 )); then out="sian_no";
                     else out="sian_yes"; fi
    rm -f prob.tmp
    for (( i = 0; i < 10; i++ )); do
      ines -x -ssian -w$p -S1$i djc.dom train$i.tab $out 2> /dev/null
      evaluate $out $i
    done
    average | tee -a prob.res
  done
}  # sian()

#-----------------------------------------------------------------------

function cleanup ()
{                               # --- clean up temporary files
  rm -f prob.tmp
  rm -f train[0-9].tab
  rm -f test[0-9].tab
}  # cleanup()

#-----------------------------------------------------------------------

echo "network      cond    add   miss  params      train       test" \
     | tee prob.res
echo "-------------------------------------------------------------" \
     | tee -a prob.res

generate 13 17          # generate random databases
fixed indep             # evaluate empty    network
fixed orig              # evaluate original network

owst                    # optimum weight spanning tree construction
#extst                   # optimum weight spanning tree extension
topord                  # condition selection on topological order
#noloop                  # condition selection avoiding directed loops
sian                    # hypertree simulated annealing

cleanup                 # clean up temporary files
