#!/usr/bin/make -rRf
# Run the ABySS paired-end assembler.
# Written by Shaun Jackman <sjackman@bcgsc.ca>.

SHELL=/bin/bash -o pipefail

# Define this environment variable on Mac OS X to read
# compressed files.
export DYLD_FORCE_FLAT_NAMESPACE=1

# Integrate with Sun Grid Engine (SGE)
ifdef JOB_NAME
name?=$(JOB_NAME)
endif
ifdef SGE_TASK_ID
k?=$(SGE_TASK_ID)
endif
ifdef NSLOTS
ifneq ($(NSLOTS), 1)
np?=$(NSLOTS)
endif
endif

# Integrate with Portable Batch System (PBS).
ifdef PBS_JOBNAME
name?=$(PBS_JOBNAME)
endif
ifdef PBS_ARRAYID
k?=$(PBS_ARRAYID)
endif
ifdef PBS_NODEFILE
NSLOTS=$(shell wc -l <$(PBS_NODEFILE))
ifneq ($(NSLOTS), 1)
np?=$(NSLOTS)
endif
endif

# Integrate with IBM LoadLeveler
ifdef LOADL_JOB_NAME
name?=$(LOADL_JOB_NAME)
endif
ifdef LOADL_STEP_ID
k?=$(LOADL_STEP_ID)
endif
ifdef LOADL_HOSTFILE
NSLOTS=$(shell wc -l <$(LOADL_HOSTFILE))
ifneq ($(NSLOTS), 1)
np?=$(NSLOTS)
endif
endif

mpirun?=$(shell which mpirun)

path?=$(shell if ! which ABYSS >/dev/null 2>/dev/null; then \
	dirname $(MAKEFILE_LIST); fi)
ifdef path
PATH:=$(path):$(PATH)
endif

ifndef name
$(error missing argument name)
endif

ifdef lib
map=$(foreach a,$(2),$(call $(1),$(a)))
deref=$($1)
in?=$(call map, deref, $(lib))
else
ifdef in
lib?=$(name)
$(lib)?=$(in)
endif
endif
pe?=$(lib)
mp?=$(pe)

ifdef in
override in:=$(strip $(in))
endif
ifdef se
override se:=$(strip $(se))
endif

# Mandatory parameters
ifeq ($(in)$(se),)
$(error missing argument in, lib or se)
endif
ifndef k
$(error missing argument k)
endif

# ABYSS parameters
q ?= 3
abyssopt += -k$k -q$q
ifdef l
$(error argument l is deprecated)
endif
ifdef e
abyssopt += -e$e
endif
ifdef E
abyssopt += -E$E
endif
ifdef t
abyssopt += -t$t
endif
ifdef c
abyssopt += -c$c
endif
ifdef b
abyssopt += -b$b
pbopt += -b$b
endif
abyssopt += $v --coverage-hist=coverage.hist -s $*-bubbles.fa

# Common paired-end parameters
ifdef PE_HOSTFILE
hostname?=$(shell hostname -f)
j?=$(shell awk '$$1 == "$(hostname)" {print $$2}' $(PE_HOSTFILE))
endif
ifeq ($j,)
j:=$(np)
endif
ifeq ($j,)
j:=2
endif

# AdjList parameters
m?=30

# PopBubbles parameters
p?=0.9
pbopt += -p$p

# Select the aligner program
aligner?=map
align?=abyss-${aligner}

# fixmate parameters
ifeq (${align},abyss-kaligner)
fixmate?=ParseAligns
fmopt=-k$k
else
fixmate?=abyss-fixmate
endif

# DistanceEst parameters
s?=200
n?=10
libs=$(pe) $(mp)
$(foreach i,$(libs),$(eval $i_s?=$s))
$(foreach i,$(libs),$(eval $i_n?=$n))

# SimpleGraph parameters
ifdef d
sgopt += -d$d
endif

# PathConsensus parameters
ifdef a
pcopt += -a$a
endif
pcopt += -p$p

# Remove environment variables.
unexport in se $(lib) $(pe) $(mp)

ifneq ($(in),)
default: pe-contigs pe-dot
else
default: se-contigs se-dot
endif
ifneq ($(mp),)
default: scaffolds
endif

se-contigs: $(name)-se-contigs.fa

se-dot: $(name)-3.dot

pe-index: $(name)-3.fa.fm

pe-sam: $(addsuffix -3.sam.gz, $(pe))

pe-bam: $(addsuffix -3.bam.bai, $(pe))

pe-contigs: $(name)-contigs.fa

pe-dot: $(name)-contigs.dot

mp-index: $(name)-6.fa.fm

mp-sam: $(addsuffix -6.sam.gz, $(mp))

mp-bam: $(addsuffix -6.bam.bai, $(mp))

scaffolds: $(name)-scaffolds.fa

all: default bam

clean:
	rm -f *.adj *.dot *.sam.gz *.hist *.dist *.path *.path[123]

.PHONY: bam default se-contigs se-dot \
	pe-index pe-sam pe-bam pe-contigs pe-dot \
	mp-index mp-sam mp-bam scaffolds all clean
.DELETE_ON_ERROR:
.SECONDARY:

# Utilities

%.fa.fm: %.fa
	abyss-index $v $<

%.bam: %.sam.gz
	samtools view -Sb $< -o $@

%.bam.bai: %.bam
	samtools index $<

# Single-end assembly

%-1.fa:
ifdef np
	$(mpirun) -np $(np) ABYSS-P $(abyssopt) $(ABYSS_OPTIONS) -o $@ $(in) $(se)
else
	ABYSS $(abyssopt) $(ABYSS_OPTIONS) -o $@ $(in) $(se)
endif

%-1.adj: %-1.fa
	AdjList $v -k$k -m$m $< >$@

# Remove shim contigs

%-2.adj: %-1.adj
	abyss-filtergraph $v -k$k -g $@ $^ >$*-1.path

# Pop bubbles.

%-2.path %-3.adj: %-1.fa %-2.adj
	PopBubbles $v -j$j -k$k $(pbopt) $(POPBUBBLES_OPTIONS) -g $*-3.adj $^ >$*-2.path

%-3.fa: %-1.fa %-2.adj %-2.path
	MergeContigs -k$k -o $@ $^
	awk '!/^>/ {x[">" $$1]=1; next} {getline s} $$1 in x {print $$0 "\n" s}' \
		$*-2.path $*-1.fa >$*-indel.fa

%-se-contigs.fa: %-3.fa
	ln -sf $< $@

# Estimate distances between contigs

%-3.sam.gz %-3.hist: $(name)-3.fa
	${align} $v -j$j -k$k $(ALIGNER_OPTIONS) $(strip $($*)) $< \
		|${fixmate} $v ${fmopt} -h $*-3.hist \
		|sort -snk3 -k4 \
		|gzip >$*-3.sam.gz

%-3.bam %-3.hist: $(name)-3.fa
	${align} $v -j$j -k$k $(ALIGNER_OPTIONS) $(strip $($*)) $< \
		|${fixmate} $v ${fmopt} -h $*-3.hist \
		|sort -snk3 -k4 \
		|samtools view -Sb - -o $*-3.bam

%-3.dist: %-3.sam.gz %-3.hist
	gunzip -c $< \
	|DistanceEst $v -j$j -k$k -s$($*_s) -n$($*_n) -o $@ $*-3.hist

%-3.dist: %-3.bam %-3.hist
	samtools view -h $< \
	|DistanceEst $v -j$j -k$k -s$($*_s) -n$($*_n) -o $@ $*-3.hist

%-3.dist: $(name)-3.fa
	${align} $v -j$j -k$k $(ALIGNER_OPTIONS) $(strip $($*)) $< \
		|${fixmate} $v ${fmopt} -h $*-3.hist \
		|sort -snk3 -k4 \
		|DistanceEst $v -j$j -k$k -s$($*_s) -n$($*_n) -o $@ $*-3.hist

dist=$(addsuffix -3.dist, $(pe))

ifneq ($(name)-3.dist, $(dist))
$(name)-3.dist: $(name)-3.fa $(dist)
	abyss-todot $v --dist -e $^ >$@

$(name)-3.bam: $(addsuffix -3.bam, $(pe))
	samtools merge -r $@ $^
endif

# Find overlaps between contigs

%-4.fa %-4.adj: %-3.fa %-3.adj %-3.dist
	Overlap $v $(OVERLAP_OPTIONS) -k$k -g $*-4.adj -o $*-4.fa $^

# Paired-end assembly

%-4.path1: %-4.adj %-3.dist
	SimpleGraph $v $(sgopt) $(SIMPLEGRAPH_OPTIONS) -j$j -k$k -o $@ $^

%-4.path2: %-4.adj %-4.path1
	MergePaths $v $(MERGEPATHS_OPTIONS) -j$j -k$k -o $@ $^

%-4.path3: %-4.adj %-4.path2
	PathOverlap --assemble $v -k$k $^ >$@

ifndef cs

%-5.path %-5.fa %-5.adj: %-3.fa %-4.fa %-4.adj %-4.path3
	cat $(wordlist 1, 2, $^) \
		|PathConsensus $v -k$k $(pcopt) -o $*-5.path -s $*-5.fa -g $*-5.adj - $(wordlist 3, 4, $^)

%-6.fa: %-3.fa %-4.fa %-5.fa %-5.adj %-5.path
	cat $(wordlist 1, 3, $^) |MergeContigs $v -k$k -o $@ - $(wordlist 4, 5, $^)

else

%-5.adj %-5.path: %-4.adj %-4.path3
	ln -sf $*-4.adj $*-5.adj
	ln -sf $*-4.path3 $*-5.path

%-cs.fa: %-3.fa %-4.fa %-4.adj %-4.path3
	cat $(wordlist 1, 2, $^) |MergeContigs $v -k$k -o $@ - $(wordlist 3, 4, $^)

%-6.fa: %-cs.fa
	KAligner $v --seq -m -j$j -k$k $(in) $(se) $< \
		|Consensus $v -o $@ $<

endif

%-contigs.fa: %-6.fa
	ln -sf $< $@

%-3.dot: %-3.adj
	abyss-todot $v -k$k $< >$@

%-6.dot: %-5.adj %-5.path
	PathOverlap --overlap $v --dot -k$k $^ >$@

%-contigs.dot: %-6.dot
	ln -sf $< $@

# Estimate distances between contigs

%-6.sam.gz %-6.hist: $(name)-6.fa
	${align} $v -j$j -k$k $(ALIGNER_OPTIONS) $(strip $($*)) $< \
		|${fixmate} $v ${fmopt} -h $*-6.hist \
		|sort -snk3 -k4 \
		|gzip >$*-6.sam.gz

%-6.bam %-6.hist: $(name)-6.fa
	${align} $v -j$j -k$k $(ALIGNER_OPTIONS) $(strip $($*)) $< \
		|${fixmate} $v ${fmopt} -h $*-6.hist \
		|sort -snk3 -k4 \
		|samtools view -Sb - -o $*-6.bam

%-6.dist.dot: %-6.sam.gz %-6.hist
	gunzip -c $< \
	|DistanceEst --dot $v -j$j -k$k -s$($*_s) -n$($*_n) -o $@ $*-6.hist

%-6.dist.dot: %-6.bam %-6.hist
	samtools view -h $< \
	|DistanceEst --dot $v -j$j -k$k -s$($*_s) -n$($*_n) -o $@ $*-6.hist

%-6.dist.dot: $(name)-6.fa
	${align} $v -j$j -k$k $(ALIGNER_OPTIONS) $(strip $($*)) $< \
		|${fixmate} $v ${fmopt} -h $*-6.hist \
		|sort -snk3 -k4 \
		|DistanceEst --dot $v -j$j -k$k -s$($*_s) -n$($*_n) -o $@ $*-6.hist

# Scaffold

%-6.path1: $(name)-6.dot $(addsuffix -6.dist.dot, $(mp))
	abyss-scaffold -vk$k -s$s -n$n -g $@.dot $(SCAFFOLD_OPTIONS) $^ >$@

%-6.path2: %-6.fa %-6.dot %-6.path1
	PathConsensus -vk$k -p1 -s /dev/null -o $@ $^

%-scaffolds.fa: %-6.fa %-6.dot %-6.path2
	MergeContigs -vk$k -g $*-scaffolds.dot -o $@ $^

# Final BAM file

ifdef mp
bam: $(name)-scaffolds.bam.bai
else
ifdef in
bam: $(name)-contigs.bam.bai
else
bam: $(name)-se-contigs.bam.bai
endif
endif

$(name)-contigs.bam $(name)-scaffolds.bam: %.bam: %.fa
	${align} $v -j$j -k$k $(ALIGNER_OPTIONS) \
		$(call map, deref, $(sort $(lib) $(pe) $(mp))) $< \
		|${fixmate} $v ${fmopt} \
		|sort -snk3 -k4 \
		|samtools view -Sb - -o $@
