#!/usr/bin/env bash
VERSION=22-282

#  (C) Copyright 2005-2022 Stijn van Dongen
#
#  This file is part of MCL.  You can redistribute and/or modify MCL under the
#  terms of the GNU General Public License; either version 3 of the License or
#  (at your option) any later version.  You should have received a copy of the
#  GPL along with MCL, in the file COPYING.

#  Add tab-completion to this script with
# function _clxdo_complete()
# {  local tag="${COMP_WORDS[1]}"
#    if [[ $COMP_CWORD == 1 ]]; then
#      COMPREPLY=( $(clxdo -h | cut -f 2 -d ' ' | grep "$tag") )
#    else
#      COMPREPLY=( $(ls ${COMP_WORDS[-1]}*) )
#    fi
# }
# complete -F _clxdo_complete clxdo


mode=$1
nargs=$#
export MCLXIOVERBOSITY=${MCLXIOVERBOSITY-2}

if [[ x$mode == x ]]; then
   mode=-h
fi

set -e

function require_num() {
   local num=$1
   local usage=$2
   if (( num+1 > nargs )); then
      echo "mode $mode needs arguments: $usage"
      false
   fi
}


    case "$mode" in


################################

    version)
echo "clxdo version: $VERSION"
exit
;;


################################

    -h)
      (cat | sort) <<EOU
clxdo gra          <cls-file>                    #  show cluster sizes
clxdo grabig       <num> <cls-file>              #  <num> largest sizes
clxdo grabar       <num> <cls-file>              #  largest sizes counting up to <num>
clxdo gragq        <num> <cls-file>              #  all sizes at least <num>
clxdo gralog       <cls-file>                    #  show cluster size distribution on log scale
clxdo check_symmetry       <mx-file>             #  check symmetry. OUTPUTS DIFFERENCE EDGES
clxdo check_symmetry_present  <mx-file>          #  check symmetry of presence. OUTPUTS DIFFERENCE EDGES
clxdo dist_pct <STDIN clm dist output>           #  convert clm dist output to percentages
clxdo grok    <cls-file> <mx-file> <nsm-name> <ccm-name> #  node-cls and cls-cls
clxdo randomize_cls  <cls-file>                  #  randomize clustering
clxdo annot2tab  <annot-file> <dict-file>        #  make dict file from annot file
clxdo gen_root <num>                             #  generate root clustering on <num> nodes
clxdo gen_singletons <num>                       #  generate singletons clustering on <num> nodes
clxdo cls_annot_summary <cls-annot-dump-file>
clxdo tab_grep <tab-file> <identifier-file>      # get mcl identifiers for external identifiers
clxdo dump_cls <cls-file> <dict-file>            # dump clustering as lines of labels
clxdo coarsen <cls-file> <mx-file>               # create graph on clusters
clxdo mxsum <mx-file>+                           # sum matrices (output on STDOUT)
clxdo multiplex_coarse_cls <coarse-clustering> <fine-clustering>
clxdo factor_table_from_cls <node-to-factormatrix> <node-clustering> <factor-dictionary>
clxdo make_map <source-cls> <dest-cls>           # make map file between clusterings
clxdo diff_table <tbl-file> <tbl-file>           # prints max(abs(a-b))
clxdo diff_mx <tbl-file> <tbl-file>              # dumps all edges from a-b
clxdo version                                    # version
EOU
      exit
      ;;



################################

    gen_singletons)
require_num 1 "<num>"
export CLXDO_VAL1=$2
perl -e '$i = 0; while ($i < $ENV{CLXDO_VAL1}) { print "$i $i\n"; $i++; }' | mcxload -123 - --stream-split
      ;;



################################

    mxsum)
require_num 2 "<mx>+"
shift 1
mxlist=("${@/#/\/}")
nlist=${#mxlist[*]}
      mcxi <<EOC
0 vb
   ${mxlist[@]}
	 lm .sum def
   { type /str eq }
   { lm .sum add .sum def }
   while
   .sum /- wm
EOC

      ;;



################################

    gen_root)
require_num 1 "<num>"
export CLXDO_VAL1=$2
perl -e '$i = 0; while ($i < $ENV{CLXDO_VAL1}) { print "0 $i\n"; $i++; }' | mcxload -123 - -123-maxc 1 --stream-split
      ;;


################################

    grabig)
require_num 2 "<num> <cls-file>+"
export CLXDO_VAL1=$2
shift 2
width=0
for mx in $@; do
   if (( ${#mx} > width )); then
      width=${#mx}
   fi
done
((width+=2))
thewidth=${CLXDO_WIDTH-$width}
for mx in $@; do
   printf %-$thewidth''s $mx
   if [[ -n ${CLXDO_GRABIG_TAG-""} ]]; then
     printf "$CLXDO_GRABIG_TAG"
   fi
     mcxdump -imx $mx --no-values --dump-lines -o - \
   | perl -ane 'print (@F-1); print "\n";' \
   | sort -nr | head -n $CLXDO_VAL1 | tr '\n' ' '
   echo
done
      ;;


################################

    gragq)
require_num 2 "<num> <cls-file>+"
export CLXDO_GRAGQ=$2
shift 2
for mx in $@; do
     mcxdump -imx $mx --no-values --dump-lines -o - \
   | perl -ane '$x=@F-1; print "$x\n" if @F>$ENV{CLXDO_GRAGQ};' \
   | sort -nr | tr '\n' ' '
   echo
done
      ;;


################################

    grabar)
require_num 2 "<num> <cls-file>+"
export CLXDO_GRABAR=$2
shift 2
for mx in $@; do
     mcxdump -imx $mx --no-values --dump-lines -o - \
   | perl -ane 'print (@F-1); print "\n";' \
   | sort -nr \
   | perl -ane 'chomp; $y+=$_; print "$_\n"; last if $y >= $ENV{CLXDO_GRABAR};' \
   | tr '\n' ' '
   echo
done
      ;;


################################

    gra)
require_num 1 "<cls-file>+"
shift 1
for mx in $@; do
     mcxdump -imx $mx --no-values --dump-lines -o - \
   | perl -ane 'print (@F-1); print "\n";' \
   | sort -n | tr '\n' ' '
   echo
done
      ;;


################################

    gralog)
require_num 1 "<cls-file>+"
shift 1
for mx in $@; do
     mcxdump -imx $mx --no-values --dump-lines -o - \
   | perl -ane 'print (@F-1); print "\n";' \
   | sort -n | perl -an <(cat <<'EOF'
BEGIN { @counts = (); $sum = 0; }
chomp; my $index = int(0.99 + log($_)/log(10**0.5));
$counts[$index] += $_; $sum += $_;
END {
  my @pct = ();
  my $line1 = "    ";
  for (my $i=0; $i<@counts; $i++) {
    $psum += $counts[$i];
    $line1 .= sprintf("%4.0f", 100.0 * $psum / $sum);
  }
  for (my $i=0; $i<1+@counts/2;$i++) {
    $line2 .= '-------' . $i;
  }
  $line1 .= ' ' x (length($line2) - length($line1));
  print "$line1  percentage of nodes in clusters\n";
  print "$line2  of size <= 10^i and 10^(i+0.5)\n";
}
EOF
)
done
      ;;


################################

    check_symmetry)
require_num 1 "<mx-file>"
mx=$2
mcxi /$mx lm tp -1 mul add /- wm | mcxdump
      ;;


################################

    check_symmetry_present)
require_num 1 "<mx-file>"
mx=$2
mcxi /$mx lm ch tp -1 mul add /- wm | mcxdump
      ;;


################################

    grok)
require_num 4 "<cls-read> <mx-read> <nsm-write> <ccm-write>"
cls=$2
mx=$3
nsm=$4
ccm=$5
mcxi /$cls lm tp /$mx lm st mul dup st /$nsm wm pop exch mul st /$ccm wm
      ;;


################################

    make_map)
require_num 2 "<mx-source> <mx-dest>"
source=$2
dest=$3
mcxi  /$dest lm tp /$source lm mul /- wm
      ;;


################################

    factor_table_from_cls)
require_num 3 "<factor-mx> <cls-file> <factor-tab>"
factormatrix=$2
clustering=$3
factortab=$4
mcxi /$factormatrix lm /$clustering lm mul /- wm | mcxdump -tabr $factortab --dump-table --header
      ;;


################################

    dump_cls)
require_num 2 "<cls-file> <dict-file>"
cls=$2
dict=$3
mcxdump -imx $cls -tabr $dict --dump-rlines --no-values
      ;;


################################

    coarsen)
require_num 2 "<cls-file> <mx-file>"
cls=$2
mx=$3
mcxi /$cls lm tp exch st /$mx lm st exch mul mul /- wm
      ;;

    coarsen2)
require_num 2 "<cls-file> <mx-file>"
cls=$2
mx=$3
mcxi /$cls lm tp exch /$mx lm st exch mul mul /- wm
      ;;

    coarsen3)
require_num 2 "<cls-file> <mx-file>"
cls=$2
mx=$3
mcxi /$cls lm tp exch /$mx lm exch mul mul /- wm
      ;;


################################

    multiplex_coarse_cls)
require_num 2 "<cls-coarse> <cls-fine>"
coarse=$2
fine=$3
mcxi /$fine lm tp /$coarse lm mul tp /$coarse lm exch mul /- wm
      ;;


################################

    cls_annot_summary)
require_num 1 "<cls-annot-dump-file>"
cls=$2
perl -ane 'BEGIN { $, = " "; } my %a = (); for (@F) { if (/^\?/) { $a{_}++; } else { $a{$_}++; } } print $id++, "\n"; print map { "\t$_ $a{$_}\n" } (sort keys %a);' $cls
      ;;


################################

    tab_grep)
require_num 2 "<tab-file> <identifier-file>"
fntab=$2
fnid=$3
perl -e '$fntab = shift; open(I, "$fntab")|| die "no $fnid"; my %map = map { chomp; reverse(split) } <I>; while(<>) { chomp; print "$map{$_}\t$_\n" if defined($map{$_}); }' $fntab $fnid
      ;;


################################

    annot2tab)
require_num 2 "<annot-file> <tab-file>"
fnannot=$2
fntab=$3
perl -ane 'BEGIN { ($fannot, $ftab) = @ARGV; $" = ":"; } if ($ARGV eq $fannot) { my $f = shift @F; $annot{$f} = "@F"; } else { print "$F[0]\t$annot{$F[1]}\n" if defined($annot{$F[1]}) && $ARGV eq $ftab; }' $fnannot $fntab
      ;;


################################

   diff_mx)
require_num 2 "<mx-file> <mx-file>"
mcxi /$2 lm /$3 lm -1 mul add /- wm | mcxdump
      ;;


################################
# d=216540  d1=168247   d2=48293 nn=1211662  c1=72917

   dist_pct)
perl -pe '/nn=(\d+)/ || die "no nn"; $nn=100/$1; s/(d[12]=)(\d+)/$1 . sprintf("%.2f", $2*$nn) . "%"/ge;'
      ;;


################################

   diff_table)
require_num 2 "<table-file> <table-file>"
shift 1
R --vanilla --quiet --slave --args $@ <<EOR
args <- commandArgs(trailingOnly=TRUE)
a <- as.matrix(read.table(args[1], header=T, row.names=1))
b <- as.matrix(read.table(args[2], header=T, row.names=1))
print(max(abs(a-b)))
EOR
      ;;


################################

   add_table)
require_num 1 "<table-file>+"
shift 1
R --vanilla --quiet --slave --args $@ <<EOR
total <- NULL
for (a in commandArgs(trailingOnly=TRUE)) {
   fobj <- file(a, "rt")
   if (is.null(fobj)) { quit(status = 1) }
   res <- read.table(fobj, sep="\t", quote="", comment.char="", as.is=T)
   if (is.null(res)) { quit(status = 1) }
   close(fobj)
   if (is.null(total)) {
      total <- as.matrix(res)
   } else {
      if (ncol(total) != ncol(res) || nrow(total) != nrow(res)) {
         print("error: dimensions do not match")
         quit(status = 1)
      }
      total <- total + as.matrix(res)
   }
}
write.table(total, sep="\t", row.names=FALSE, col.names=FALSE)
EOR
      ;;


################################

    randomize_cls)
require_num 1 "<cls-file>"
clin=$2
clxdo gra $clin | perl -ne 'use List::Util "shuffle"; $, = " "; chomp; my @v = split; map { $sum += $_ } @v; @w = shuffle(0..$sum-1); $o = 0; while (@v) { $oo = $o; $o += shift @v; print map { $w[$_] } ($oo..$o-1); print "\n"; };' \
   | mcxload -235-ai -

      ;;

   *)
   echo "unknown mode $mode, please use $0 -h"
   false

####
   esac


