=begin
    Ruby support library for dhelp database access

    Copyright (C) 2005  Esteban Manchado Velzquez

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
=end

require 'bdb'
require 'pathname'
require 'fileutils'

# Dhelp-related classes
module Dhelp
  SUPPORTED_FORMATS = %w(html text pdf postscript dvi)

  # Exception for indexer errors (when calling index++)
  class Exception < RuntimeError; end   # Base class Dhelp::Exception
  class IndexerError < Exception; end
  class KeyNotFoundError < Exception; end
  class InvalidOptionError < Exception; end


  # C struct wrapper: it allows to map a C struct in an easy to use Ruby class.
  # For each C struct you want to support, you create a Ruby class with calls
  # to the class methods "pack_fmt" and "field_list". See Array#pack
  # documentation for the "pack_fmt" parameter. "field_list" is just a list of
  # struct field names (positional according to the "pack_fmt").
  #
  #  class SomeStructClass < CStructWrapper
  #    pack_fmt    'Z100'
  #    field_list  %w(dir)
  #  end
  class CStructWrapper
    # Class methods
    class <<self
      # Get/Set the pack format
      def pack_fmt(fmt = nil)
        @fmt = fmt if fmt
        @fmt
      end
      # Get/Set the field list. It can be set as an array, or as several
      # parameters
      def field_list(*args)
        case args.size
        when 0
          # Just get the value, do nothing
        when 1
          first = args.first
          @field_list = first.kind_of?(Array) ? first : [first]
        else
          @field_list = args
        end
        @field_list.map {|f| f.to_sym}
      end
    end

    def initialize(data)
      case data
      when String
        @data = data
      when Hash
        @data = keys.map {|f| data[f]}.pack(pack_fmt)
      else
        raise ArgumentError, "Argument must be either String or Hash"
      end
    end

    # Handy shortcut methods
    def keys;     self.class.field_list; end
    def pack_fmt; self.class.pack_fmt;   end

    def get_field(f)
      i = keys.index(f.to_sym)
      if i
        @data.unpack(pack_fmt)[i]
      else
        raise ArgumentError, "Unknown field '#{f}'"
      end
    end

    # Returns a Hash object with all the C struct fields
    def to_hash
      h = {}
      keys.each do |f|
        h[f] = get_field(f)
      end
      h
    end

    # Returns the data in C format
    def to_raw_data
      @data
    end

    # Catches missing methods, to get the field values
    def method_missing(meth, *args)
      if keys.include? meth.to_sym
        get_field(meth)
      else
        super
      end
    end
  end



  # doc-base document format representation (consists of "format", "files" and
  # optionally "index")
  class DocBaseDocumentFormat
    attr_reader :format

    def initialize(format, index, files)
      @format, @index, @files = format, index, files
    end

    def index
      @index.to_s != "" ?  @index : files.first
    end

    def files
      @files.to_s.strip.split(/\s+/)
    end
  end


  # doc-base registered document
  class DocBaseDocument
    attr_reader :path, :document, :title, :author, :formats

    def initialize(path)
      @path         = path
      @document, @title, @author, @abstract, @section = ""
      @formats      = []
      @format, @index, @files = ""
      state         = :main_block
      last_property = nil
      File.readlines(@path).each do |line|
        case line
        when /^(\w+):\s*(.*)/
          prop, contents = $1, $2.to_s
          state = :format_block if state == :noblock
          instance_var_name = "@#{prop.downcase}"
          instance_variable_set(instance_var_name,
                                instance_variable_get(instance_var_name).to_s + contents)
          last_property = prop
        when /^\s*$/
          if state == :format_block
            @formats << DocBaseDocumentFormat.new(@format, @index, @files)
            @format = @index = @files = nil
          end
          state = :noblock
        else
          instance_var_name = "@#{last_property.downcase}"
          instance_variable_set(instance_var_name,
                                instance_variable_get(instance_var_name) + line.chomp)
        end
      end

      if state == :format_block
        @formats << DocBaseDocumentFormat.new(@format, @index, @files)
      end
    end

    def section
      @section.split("/").map {|ss| ss.capitalize}.join("/")
    end

    def abstract
      # Could be nil
      @abstract.to_s.strip
    end

    # Convenience method to return all files in all supported formats
    def files
      formats.find_all {|d| SUPPORTED_FORMATS.include? d.format}.map {|d| d.files}
    end
  end


  # Pool of doc-base registered documents. It looks for doc-base registered
  # documents in a list of directories (by default /usr/share/doc-base), and
  # allows to iterate over a collection of DocBaseDocument objects.
  class DocBaseDocumentPool
    def initialize(opts={})
      supported_options = [:dirs, :skip]
      @opts = {:dirs => ['/usr/share/doc-base'], :skip => []}.merge(opts)
      @opts[:skip] = [@opts[:skip]] unless @opts[:skip].kind_of? Array

      unsupported_options = @opts.keys - supported_options
      unless unsupported_options.empty?
        raise InvalidOptionError, "Invalid option(s): '#{unsupported_options.join(", ")}'"
      end
    end

    # Iterates over the documents, passing a DocBaseDocument object to the
    # block.
    def each
      @opts[:dirs].each do |dir|
        Dir.entries(dir).each do |entry|
          path = File.join(dir, entry)
          next if @opts[:skip].include? path
          next if entry[0..0] == '.' or File.directory? path
          yield DocBaseDocument.new(path)
        end
      end
    end

    # Iterates over the sections, passing both the section name and the list of
    # documents in that section to the block. The sections are ordered
    # alphabetically, but sending all subsections first.
    def each_section
      item_list = {}
      each do |doc|
        item_list[doc.section] ||= []
        item_list[doc.section] << doc
      end

      ordered_sections = item_list.keys.sort {|a,b|
        # Order subsections first
        case tmp = a.scan('/').size <=> b.scan('/').size
        when 0
          a <=> b
        else
          tmp * -1
        end
      }
      ordered_sections.each do |sec|
        yield sec, item_list[sec].sort {|a,b| a.title <=> b.title}
      end
    end

    # Returns a Hash containing the sections (keys) and list of documents per
    # section and subsections (values). The format is like this:
    #
    # pool.section_tree # => {"App"  => {:documents   => [ ... ],
    #                                    :subsections => {"Tools" => {...}}},
    #                         "Text" => {:documents   => [ ... ]}}
    def section_tree
      root = {}
      each do |doc|
        section_name = doc.section
        section_hash = root
        # Create parent sections if needed
        if section_name =~ /(.+?)\/(.+)/
          parent, subsection = $1, $2
          root[parent] ||= {:documents   => [],
                            :subsections => {}}
          section_name = subsection
          section_hash = root[parent][:subsections]
        end
        section_hash[section_name] ||= {:documents   => [],
                                        :subsections => {}}
        section_hash[section_name][:documents] << doc
      end
      root
    end
  end


  # DocDirDatabase key C struct wrapper
  class DocDirDatabaseKey < CStructWrapper
    pack_fmt    'Z100'
    field_list  %w(dir)
  end


  # DocDirDatabase value C struct wrapper
  class DocDirDatabaseValue < CStructWrapper
    pack_fmt    'Z50 Z1000'
    field_list  %w(doc_id title)
  end


  # Database for doc-base directories. It contains base directories associated
  # with the corresponding doc-base doc id and the document title.
  class DocDirDatabase < BDB::Hash
    def self.open(flags   = BDB::RDONLY,
                  options = {},
                  mode    = 0644,
                  name    = '/var/lib/dhelp/doc-base_dirs',
                  subname = nil)
      default_options = {"ffactor"   => 8,
                         "nelem"     => 1,
                         "cachesize" => 5000,
                         "hash"      => nil,
                         "lorder"    => 0}
      super(name, subname, flags, mode, default_options.merge(options))
    end

    # Traverse entire BD, passing directory, doc_id and title of each item to
    # the block
    def each
      super do |k,v|
        value = DocDirDatabaseValue.new(v)
        yield DocDirDatabaseKey.new(k).dir, value.doc_id, value.title
      end
    end

    # Writes an association between the given directory and the doc-base
    # document id and its title
    def add(dir, doc_id, title)
      key = DocDirDatabaseKey.new(:dir => dir)
      value = DocDirDatabaseValue.new(:doc_id => doc_id, :title => title)
      put(key.to_raw_data, value.to_raw_data)
    end

    # Returns an array with two elements, doc_id and title, for the registered
    # doc-base document in the given directory
    def info_for_path(dir)
      key = DocDirDatabaseKey.new(:dir => dir)
      raw_value = get(key.to_raw_data)
      if raw_value.nil?
        raise KeyNotFoundError, "Can't find information for path #{dir}"
      end
      value = DocDirDatabaseValue.new(raw_value)
      [value.doc_id, value.title]
    end
  end



  # Indexer class. So far it only takes care of doc-base documents.
  class Indexer
    def initialize(user_opts={})
      @opts = {:index_file   => "/var/lib/dhelp/documents.index",
               :config_file  => "/usr/share/dhelp/swish++.conf",
               :indexpp_cmd  => "/usr/bin/index++",
               :search_dirs  => ['/usr/share/doc-base']}.merge(user_opts)
      @pool = DocBaseDocumentPool.new(:dirs => @opts[:search_dirs])
    end

    # Returns the index file
    def index_file; @opts[:index_file]; end

    # Returns the index++ binary path
    def indexpp_cmd; @opts[:indexpp_cmd]; end

    # Returns the index++ command-line options
    def indexpp_options(user_opts)
      opts = {:incremental  => true}.merge(user_opts)
      "--config-file #{@opts[:config_file]} --index-file #{index_file}" +
        (opts[:incremental] ? " --incremental" : "")
    end

    # Index the list of given dirs/files. Directories are indexed recursively.
    # There is only one valid key for the user_opts hash: :incremental, which
    # adds the contents of the given paths to the index, instead of replacing
    # it with the indexed contents of paths.
    def index(paths, user_opts={})
      opts = {:incremental  => true}.merge(user_opts)
      cmd = "#{indexpp_cmd} #{indexpp_options(opts)} -"

      # If the index doesn't exist yet and we're doing incremental, just exit
      if opts[:incremental] and not File.exists?(index_file)
        return
      end

      # If it's already indexing, just exit
      if File.exists?(index_file) and File.size(index_file) == 0
        return
      end

      begin
        File.popen(cmd, "w") do |f|
          paths.each do |dir|
            f.puts dir
          end
        end
      rescue Errno::EPIPE
        raise IndexerError, "Broken pipe indexing #{paths.to_a.join(', ')}, using #{cmd}"
      end

      if $? != 0
        raise IndexerError, "Couldn't index #{paths.to_a.join(', ')} using #{cmd}"
      end

      # When using incremental indexing (default), a new index is created (with
      # the extension ".new")
      if opts[:incremental]
        FileUtils.mv "#{index_file}.new", index_file
      end
    end

    # Indexes all the documents registered in the doc-base database. It also
    # registers some basic information about each doc-base document and the
    # parent directories they store their files in
    def reindex_all
      doc_dir_db = DocDirDatabase.open(BDB::CREATE)
      index_paths = []
      @pool.each do |doc|
        doc.formats.each do |format|
          next unless SUPPORTED_FORMATS.include? format.format.downcase
          format.files.each do |glob_path|
            # Dirnames can be globs too (like /usr/share/doc/foo/*/*.html)
            dirs  = Dir.glob(File.dirname(glob_path))
            files = Dir.glob(glob_path)
            # Add an entry with a reference to the document title
            dirs.each do |dir|
              doc_dir_db.add(dir, doc.document, doc.title) if File.directory? dir
            end
            # Collect the files
            index_paths += files.select {|f| File.file? f}
          end
        end
      end

      index(index_paths, :incremental => false)
    end
  end
end
