#!/usr/bin/env ruby
#
# Copyright (c) 2006-2007 Gordon Gremme <gordon@gremme.org>
# Copyright (c) 2006-2007 Center for Bioinformatics, University of Hamburg
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#

$:.push(File.dirname($0))
require 'gff3'

class Scanner
  def initialize(input)
    @input = input
    @line_buf = nil
  end
  def peek
    if @line_buf then
      return @line_buf
    end
    @line_buf = @input.gets
  end
  def gets
    if @line_buf then
      buf = @line_buf
      @line_buf = nil
      return buf
    end
    @input.gets
  end
  def lineno
    @input.lineno
  end
  def filename
    @input.filename
  end
end

class GMAPParser
  def initialize(sequences)
    @sequences = sequences
    @current_accession = nil
    @current_strand = nil
  end
  def parse(scanner)
    while scanner.peek
      if scanner.gets =~ /^>/ then
        if parse_paths(scanner) then
          parse_alignment(scanner)
        end
      end
    end
  end
  private
  def parse_paths(scanner)
    line = scanner.gets
    if line =~ /^Paths/ then
      # XXX: change parser to handle multiple paths
      if line =~ /^Paths \(1\):/ then
        while line = scanner.gets do
          if line =~ /Genomic pos:/ then
            if line =~ /^.*\(([+-])/ then
              @current_strand = $1
            else
              raise "could not parse strand on line #{scanner.lineno} of " +
                    "file #{scanner.filename}"
            end
          elsif line =~ /Accessions:/ then
            accession_parts = line.split(':')
            @current_accession = accession_parts[1].strip
            return true
          end
        end
      elsif line !~ /^Paths \(0\):/
        STDERR.puts "warning: skipping multiple path alignment on line " +
                    "#{scanner.lineno} of file #{scanner.filename}"
        return false
      end
    else
      raise "expecting 'Paths' on line #{scanner.lineno} of file " +
            "#{scanner.filename}"
    end
  end
  def parse_alignment(scanner)
    while line = scanner.gets do
      if line =~ /^Alignments:/ then
        # skip two lines
        scanner.gets
        scanner.gets
        exons = []
        while (line = scanner.gets) =~ /:/ do
          # process all alignment lines
          line =~ /^.*:(\d+)-(\d+)/
          exon = Range.new($1.to_i, $2.to_i)
          if exon.begin > exon.end then
            exon = Range.new(exon.end, exon.begin)
          end
          exons.push(exon)
        end
        if exons.size == 0 then
          raise "could not parse exon up to line #{scanner.lineno} in file " +
                "#{scanner.filename}"
        end
        # construct gene
        exon_border = []
        exon_border.push(exons.first.begin)
        exon_border.push(exons.first.end)
        exon_border.push(exons.last.begin)
        exon_border.push(exons.last.end)
        gene_range = Range.new(exon_border.min, exon_border.max)
        raise if gene_range.begin > gene_range.end
        if current_sequence = @sequences[@current_accession] then
          # the current sequence exists already -> make sure it is maximal
          current_sequence.update_range(gene_range.begin, gene_range.end)
        else
          # the current sequence does not exist -> add it
          @sequences[@current_accession] = Sequence.new(gene_range.begin,
                                                        gene_range.end)
        end
        current_sequence =  @sequences[@current_accession]
        gene = Gene.new(gene_range, @current_strand)
        exons.each { |exon| gene.add_exon(exon) }
        current_sequence.add_gene(gene)
        return
      end
    end
  end
end

# read input
sequences = {}
gmap_parser = GMAPParser.new(sequences)
gmap_parser.parse(Scanner.new(ARGF))

# output
gff3_output(sequences, "gmap")
