#!/bin/bash
#
# Copyright (c) 2006 Mael Hillereau
#
# This file is part of alphabib.
#
# alphabib is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# alphabib is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with alphabib; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# cxBib -- Addition of alphabetical headers into BibTeX '.bbl' files.
# Author:        Mael Hillereau <mael.hillereau@free.fr>
# Created:       12.apr.2006
# Last modified: 14.may.2006
version="1.0";
author="Mael Hillereau <mael.hillereau@free.fr>";
name=`basename $0`;

# default definitions for LaTeX macros
defaultpenalty="\penalty-200";
defaultskip="\bigskip";
defaultlabel="\Large\bfseries#1\hfill";
defaultcontent="\ \\\\[-1ex]\hrule\medskip";
pdfbookmarks="";
pdfbookmarkscodewithoutlevel="\expandafter\ifx\csname firstpdfbookmark\endcsname\relax\
\subpdfbookmark{#1}{alphabib.#1}\
\def\firstpdfbookmark{}\
\else\
\currentpdfbookmark{#1}{alphabib.#1}\
\fi";
pdfbookmarkslevel="";
pdfbookmarkscodewithlevelB="\pdfbookmark[";
pdfbookmarkscodewithlevelE="]{#1}{alphabib.#1}";
novon=0;

# display errors
function error()
{
  echo -ne "Error:\t$1\n\tYou may try '$name -h' for further help.\n";
}

# display usage
function usage()
{
  echo "";
  echo "SYNTAX";
  echo "  $name [-hsv] [-b [-l level]] <input-file.bbl> [<output-file.bbl>]";
  echo "";
  echo "DESCRIPTION";
  echo "  Addition of alphabetical headers into BibTeX '.bbl' files. Input and output";
  echo "  files may be identical, which is equivalent to no output file given. Input";
  echo "  file must be a '.bbl' file generated by BibTeX (so you have to run bibtex";
  echo "  before running $name).";
  echo "";
  echo "OTIONS";
  echo "  -b";
  echo "    This option will cause insertion of bookmarks into PDF files. Of course, it";
  echo "    requires using 'hyperref' package. Bookmarks level is set automatically if";
  echo "    option -l is not given.";
  echo "  -h";
  echo "    Displays this help.";
  echo "  -l";
  echo "    This option allows specifying level of bookmarks into PDF files. It must be";
  echo "    used with -b option.";
  echo "  -s";
  echo "    Choose this option if your BibTeX style sorts entries using the 'von' part";
  echo "    of names. For instance, 'alpha.bst' behaves like this (e.g. 'Ludwig van";
  echo "    Beethoven' will result in a key begining with 'vB' and will thus be sorted";
  echo "    like 'VB' would).";
  echo "  -v";
  echo "    Displays version information.";
  echo "";
  echo "See the manual for more detailed information.";
  echo "";
}

# deletion of temporary files
function cleanfiles()
{
  rm -f "$inputfile.working";
}

# test the class of a letter (upper/lowercase, backslash, or other)
function testletter()
{
  case "$1" in
  [[:lower:]])
    return 0;;
  [[:upper:]])
    return 1;;
  "\\")
    return 2;;
  *)
    return 3;;
  esac
}

echo "This is cxBib version $version";
echo "Bash script written by $author";

# options processing
while getopts "bhl:sv" arg
do
  case $arg in
  b)
    echo "PDF bookmarks option selected.";
    pdfbookmarks=$pdfbookmarkscodewithoutlevel;;
  h)
    usage;
    exit 0;;
  l)
    echo "Bookmarks level $OPTARG selected.";
    pdfbookmarkslevel=$OPTARG;;
  s)
    echo "Assume that sorting is performed using the 'von' part.";
    novon=1;;
  v)
    exit 0;;
  ?)
    error "Problem while processing options.";
    exit 1;;
  esac
done
shift $(($OPTIND - 1));

# input and output files
if [ $# -eq 1 ]
then
  outputfile=$1;
elif [ $# -eq 2 ]
then
  outputfile=$2;
else
  error "Invalid number of arguments!";
  exit 1;
fi
if [ ! -f "$1" ]
then
  error "'$1' is not a valid file!";
  exit 2;
fi
inputfile=$1;

# test dependence between -b and -l and adjust bookmarks code if needed
if [ ! -z "$pdfbookmarkslevel" ]
then
  if [ -z "$pdfbookmarks" ]
  then
    error "-l option used without -b option!";
    exit 1;
  fi
  pdfbookmarks=$pdfbookmarkscodewithlevelB$pdfbookmarkslevel$pdfbookmarkscodewithlevelE;
fi

# make a working copy of input file
cp -i $inputfile $inputfile.working;
if [ $? -ne 0 ]
then
  error "Unable to create a temporary file. You must have write permission into\n\tthe current directory!";
  exit 3;
fi

# test wether input file contains a bibliography environment
grep -n -e '\\begin{thebibliography}' "$inputfile.working" > /dev/null 2>&1;
bbibenv=$?;
grep -n -e '\\end{thebibliography}' "$inputfile.working" > /dev/null 2>&1;
ebibenv=$?;
if [ $bbibenv -ne 0 -o $ebibenv -ne 0 ]
then
  error "'$inputfile' is not a valid BibTeX file: 'thebibliography' not found!";
  cleanfiles;
  exit 4;
fi

# let's scan input file
nblines=`wc -l $inputfile.working | cut -f 1 -d " "`;
# spaces are replaced by '%' so that lines can be separated with spaces
rawentries=`grep -n -e '.*\\\\bibitem' $inputfile.working | tr " " "%" | tr "\n" " "`;
if [ -z "$rawentries" ]
then
  # wow! where are the entries?
  error "'$inputfile' is not a valid BibTeX file: No entry found!";
  cleanfiles;
  exit 4;
fi

unknownformmesage="Sorry, your BibTeX style is not supported!\n\tIf it should be (see supported styles in the manual), please send a bug\n\treport to the maintainer.";
nbentries=0;
tmpstr="";
tmpint=1;
# we now scan each entry
for rawentry in $rawentries
do
  # firstly, we get the line number
  line[$nbentries]=`echo $rawentry | cut -f 1 -d ":"`;
  # for all entries but the first, we set the line of the next entry (this one)
  # for the previous entry. For the last entry this number is set outside the
  # loop
  if [ $nbentries -ne 0 ]
  then
    next[`expr $nbentries - 1`]=${line[$nbentries]};
  fi
  # now we look at the bib items...
  # usually, bib entries are in the form '\bibitem[A...' (with 'A' being the
  # entry initial)
  # so first, we get the optionnal argument of '\bibitem' macro
  tmpstr=`expr match "$rawentry" '.*\\\\bibitem\[\([^]]*\)\].*'`;
  # then, we look if there are some additional macros to remove (i.e. first
  # character is '\')
  letter[$nbentries]=`expr match "$tmpstr" '\(.\).*'`;
  testletter "${letter[${nbentries}]}";
  tmpint=$?;
  if [ $tmpint -eq 2 ]
  then
    # these styles must have their bib entries in one of the forms
    # '\bibitem[\foo\bar A...', or '\bibitem[\foo\bar{A...', so we check this
    # by now...
    tmpstr=`echo "$tmpstr" | grep -E -o "[%{][[:alpha:]]+" | head -n 1 | grep -o "[^%{].*"`;
    if [ -z "$tmpstr" ]
    then
      # the bib entry is not in a known form...
      error "$unknownformmesage";
      cleanfiles;
      exit 4;
    else
      # we can update the first letter...
      letter[$nbentries]=`expr match "$tmpstr" '\(.\).*'`;
      testletter "${letter[${nbentries}]}";
      tmpint=$?;
    fi
  fi
  if [ $tmpint -eq 0 ] # lower case
  then
    # we test wether von part must be considered or not
    if [ $novon -eq 0 ]
    then
      # we have a von part, so we now look for the first capital (which should
      # be the name initial)
      theinitial=`expr match "$tmpstr" '[^[:upper:]]*\([[:upper:]]\).*'`;
      if [ ! -z "$theinitial" ]
      then
        # we found it!
        letter[$nbentries]=$theinitial;
      else
        error $unknownformmesage;
        cleanfiles;
        exit 4;
      fi
    else
      # we keep the von initial letter but we translate it in uppercase
      letter[$nbentries]=`echo ${letter[${nbentries}]} | tr 'a-z' 'A-Z'`;
    fi
  elif [ $tmpint -ne 1 ] # no letter so we can't do anything more!
  then
    error "$unknownformmesage";
    cleanfiles;
    exit 4;
  fi
  nbentries=`expr $nbentries + 1`;
done

next[`expr $nbentries - 1`]=$nblines;
echo "$nbentries bib entries found in input file '$inputfile'.";

# read again and write output file
endpreamb=`expr ${line[0]} - 1`;
preamb=`head -n$endpreamb $inputfile.working`;
echo "$preamb" > $outputfile;

# write '\alphabibitem' macro definition
echo "\def\alphabibitem#1{\
\expandafter\ifx\csname firstalphabibitem\endcsname\relax\
\def\firstalphabibitem{}\
\else\
\expandafter\ifx\csname alphabibitemskip\endcsname\relax\
$defaultskip\
\else\
\alphabibitemskip\
\fi\
\fi\
\expandafter\ifx\csname alphabibitempenalty\endcsname\relax\
\alphabibitempenalty\
\else\
$defaultpenalty\
\fi\
$pdfbookmarks\
\expandafter\ifx\csname alphabibitemlabel\endcsname\relax\
\expandafter\ifx\csname alphabibitemcontent\endcsname\relax\
\item[$defaultlabel]$defaultcontent\
\else\
\item[$defaultlabel]\alphabibitemcontent{#1}\
\fi\
\else\
\expandafter\ifx\csname alphabibitemcontent\endcsname\relax\
\item[\alphabibitemlabel{#1}]$defaultcontent\
\else\
\item[\alphabibitemlabel{#1}]\alphabibitemcontent{#1}\
\fi\
\fi}" >> $outputfile;

n=0;
precletter="_";
for rawentry in $rawentries
do
  currentletter=${letter[$n]};
  currentline=${line[$n]};
  nextline=${next[$n]};
  last=`expr $nblines - $currentline + 1`;
  first=`expr $nextline - $currentline`;
  bibentry=`tail -n$last $inputfile.working | head -n$first`;
  if [ ! $currentletter == $precletter ]
  then
    echo -ne "$currentletter ";
    echo -ne "\n\\\\alphabibitem{$currentletter}\n" >> $outputfile;
    precletter=$currentletter;
  fi
  echo -ne "\n" >> $outputfile;
  echo "$bibentry" >> $outputfile;
  n=`expr $n + 1`;
done

echo -ne "\n\\\\end{thebibliography}\n" >> $outputfile;

# delete temp files
cleanfiles;

echo -ne "\nOutput written in '$outputfile'.\n";
exit 0;
