require "web/escape"
module Web
  class Agent
    class Robots
      class Rule
        def initialize
          @agents = []
          @rules = []
          @extensions = []
        end
        attr_accessor :agents,:rules,:extensions

        def to_s
          s = ""
          agents.each {|a|
            s << "User-agent: #{a}\r\n"
          }
          @rules.each {|type,value|
            case type
            when :disallow
              s << "Disallow: #{value}\r\n"
            when :allow
              s << "Allow: #{value}\r\n"
            end
          }
          extensions.each {|key,value|
            s << "#{key}: #{value}\r\n"
          }
          s << "\r\n"
        end

        def allowed?(path)
          return true if path=~/\A\/robots.txt\z/i
          flag = true
          path = Web::unescape(path.gsub(/%2f/i,"%252f")).downcase
          @rules.each {|type,r|
            r = Web::unescape(r.gsub(/%2f/i,"%252f")).downcase
            case type
            when :disallow
              if r==""
                flag = true
                break
              elsif path.index(r)==0
                flag = false
                break
              end
            when :allow
              if path.index(r)==0
                flag = true
                break
              end
            end
          }
          flag
        end
      end
      
      def initialize
        @rules = []
      end
      
      def parse(io)
        rule = nil
        state = :start
        while text=io.gets
          text.chomp!
          case state
          when :start
            case text
            when /\A\s*\r?\n/   # blank
            when /\A\s*\#.*\r?\n/ # commentline
            when /\AUser-agent:\s*([^\x00-\x20\x7f-\xff\(\)<>@,;:\\"\/\[\]\?={}]+)(?:\s*\#.)*?/n #"
              rule = Rule.new
              rule.agents << $1.downcase
              state = :agent
            end
          when :agent
            case text
            when /\A\s*\z/   # blank
              state = :start
            when /\A\s*\#.*\z/ # commentline
            when /\AUser-agent:\s*([^\x00-\x20\x7f-\xff\(\)<>@,;:\\"\/\[\]\?={}]+)(?:\s*\#.)?\z/n #"
              rule.agents << $1.downcase
            when /\ADisallow:\s*((?:[a-zA-Z0-9~$\-_.+!*'\(\),:@&=]|%[0-9a-fA-F][0-9a-fA-F])*(?:\/(?:[a-zA-Z0-9~$\-_.+!*'\(\),:@&=]|%[0-9a-fA-F][0-9a-fA-F])*)*)(?:\s*\#.*)?\z/ #'              rule.rules << [:disallow, $1]
              state = :rule
            when /\AAllow:\s*((?:[a-zA-Z0-9~$\-_.+!*'\(\),:@&=]|%[0-9a-fA-F][0-9a-fA-F])*(?:\/(?:[a-zA-Z0-9~$\-_.+!*'\(\),:@&=]|%[0-9a-fA-F][0-9a-fA-F])*)*)(?:\s*\#.*)?\z/ #'
              rule.rules << [:allow, $1]
              state = :rule
            when /\A([^\x00-\x20\x7f-\xff\(\)<>@,;:\\"\/\[\]\?={}]+):\s*([^\x00-\x20\x7f-\xff\#]+)(?:\s*\#.*)?\z/n #"
              rule.extensions << [$1,$2]
              state = :rule
            else
            end
          when :rule
            case text
            when /\A\s*\z/   # blank
              @rules << rule
              rule = nil
              state = :start
            when /\A\s*\#.*\z/ # commentline
            when /\ADisallow:\s*((?:[a-zA-Z0-9~$\-_.+!*'\(\),:@&=]|%[0-9a-fA-F][0-9a-fA-F])*(?:\/(?:[a-zA-Z0-9~$\-_.+!*'\(\),:@&=]|%[0-9a-fA-F][0-9a-fA-F])*)*)(?:\s*\#.*)\z/ #'
              rule.rules << [:disallow, $1]
            when /\AAllow:\s*((?:[a-zA-Z0-9~$\-_.+!*'\(\),:@&=]|%[0-9a-fA-F][0-9a-fA-F])*(?:\/(?:[a-zA-Z0-9~$\-_.+!*'\(\),:@&=]|%[0-9a-fA-F][0-9a-fA-F])*)*)(?:\s*\#.*)?\z/ #'
              rule.rules << [:allow, $1]
            when /\A([^\x00-\x20\x7f-\xff\(\)<>@,;:\\"\/\[\]\?={}]+):\s*([^\x00-\x20\x7f-\xff\#]+)(?:\s*\#.*)?\z/n #"
              rule.extensions << [$1,$2]
            else
              raise
            end
          end
        end
        @rules << rule if rule
        #
        new_rules = []
        wildcard = nil
        @rules.each {|rule|
          if rule.agents.include?("*")
            wildcard = rule
          else
            new_rules << rule
          end
        }
        new_rules << wildcard if wildcard
        @rules = new_rules
      end
      
      def to_s
        s = ""
        @rules.each {|r|
          s << r.to_s
        }
        s
      end

      def find_rule(user_agent)
        matched = nil
        user_agent = user_agent.downcase
        @rules.each {|rule|
          if rule.agents.find {|agent| user_agent.include?(agent) || agent=="*" }
            matched = rule
            break
          end
        }
        matched
      end
      
      def allowed?(user_agent,path)
        rule = find_rule(user_agent)
        if rule
          rule.allowed?(path)
        else
          true
        end
      end
      
      def disallowed?(user_agent,path)
        rule = find_rule(user_agent)
        if rule
          !rule.allowed?(path)
        else
          false
        end
      end
    end # Robots
  end # Agent
end # Web
