From a3beca4ed2562879ee626b0480aa18135bd1001a Mon Sep 17 00:00:00 2001 From: Arthur Lu Date: Tue, 29 Oct 2024 17:03:19 +0000 Subject: [PATCH] various optimizations to compute time, add more profiling options to make test --- Makefile | 16 +++++-- pkg/custom_slice.go | 50 +++++++++++++------- pkg/types.go | 112 ++++++++++++++++++-------------------------- pkg/utils.go | 87 +++++++++++++--------------------- pkg/wfa.go | 20 ++++---- 5 files changed, 132 insertions(+), 153 deletions(-) diff --git a/Makefile b/Makefile index b4c5a27..60d06bd 100644 --- a/Makefile +++ b/Makefile @@ -7,11 +7,19 @@ build: clean clean: @echo "======================== Cleaning Project ======================" go clean - rm -f dist/wfa.wasm + rm -f dist/wfa.wasm cover.prof cpu.prof mem.prof test.test test: @echo "======================== Running Tests =========================" - go test -v -cover -coverpkg=./pkg/ -coverprofile coverage ./test/ + go test -v -cover -coverpkg=./pkg/ -coverprofile cover.prof -cpuprofile cpu.prof -memprofile mem.prof ./test/ @echo "======================= Coverage Report ========================" - go tool cover -func=coverage - @rm -f coverage \ No newline at end of file + go tool cover -func=cover.prof + @rm -f cover.prof + @echo "==================== CPU Performance Report ====================" + go tool pprof -top cpu.prof + @rm -f cpu.prof + @echo "=================== Memory Performance Report ==================" + go tool pprof -top mem.prof + @rm -f mem.prof + + @rm -f test.test \ No newline at end of file diff --git a/pkg/custom_slice.go b/pkg/custom_slice.go index f213fe1..ea65e12 100644 --- a/pkg/custom_slice.go +++ b/pkg/custom_slice.go @@ -16,16 +16,12 @@ func (a *IntegerSlice[T]) TranslateIndex(idx int) int { func (a *IntegerSlice[T]) Valid(idx int) bool { actualIdx := a.TranslateIndex(idx) - if actualIdx < len(a.valid) { // idx is in the slice - return a.valid[actualIdx] - } else { // idx is out of the slice - return false - } + return 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx] } func (a *IntegerSlice[T]) Get(idx int) T { actualIdx := a.TranslateIndex(idx) - if actualIdx < len(a.valid) { // idx is in the slice + if 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx] { // idx is in the slice return a.data[actualIdx] } else { // idx is out of the slice return a.defaultValue @@ -36,12 +32,12 @@ func (a *IntegerSlice[T]) Set(idx int, value T) { actualIdx := a.TranslateIndex(idx) if actualIdx >= len(a.valid) { // idx is outside the slice // expand data array to actualIdx - newData := make([]T, actualIdx+1) + newData := make([]T, 2*actualIdx+1) copy(newData, a.data) a.data = newData // expand valid array to actualIdx - newValid := make([]bool, actualIdx+1) + newValid := make([]bool, 2*actualIdx+1) copy(newValid, a.valid) a.valid = newValid } @@ -50,6 +46,20 @@ func (a *IntegerSlice[T]) Set(idx int, value T) { a.valid[actualIdx] = true } +func (a *IntegerSlice[T]) Preallocate(lo int, hi int) { + actualLo := a.TranslateIndex(lo) + actualHi := a.TranslateIndex(hi) + size := max(actualHi, actualLo) + + // expand data array to actualIdx + newData := make([]T, size+1) + a.data = newData + + // expand valid array to actualIdx + newValid := make([]bool, size+1) + a.valid = newValid +} + type PositiveSlice[T any] struct { data []T valid []bool @@ -62,16 +72,12 @@ func (a *PositiveSlice[T]) TranslateIndex(idx int) int { func (a *PositiveSlice[T]) Valid(idx int) bool { actualIdx := a.TranslateIndex(idx) - if actualIdx >= 0 && actualIdx < len(a.valid) { // idx is in the slice - return a.valid[actualIdx] - } else { // idx is out of the slice - return false - } + return 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx] } func (a *PositiveSlice[T]) Get(idx int) T { actualIdx := a.TranslateIndex(idx) - if actualIdx >= 0 && actualIdx < len(a.valid) { // idx is in the slice + if 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx] { // idx is in the slice return a.data[actualIdx] } else { // idx is out of the slice return a.defaultValue @@ -82,12 +88,12 @@ func (a *PositiveSlice[T]) Set(idx int, value T) { actualIdx := a.TranslateIndex(idx) if actualIdx < 0 || actualIdx >= len(a.valid) { // idx is outside the slice // expand data array to actualIdx - newData := make([]T, actualIdx+1) + newData := make([]T, 2*actualIdx+1) copy(newData, a.data) a.data = newData // expand valid array to actualIdx - newValid := make([]bool, actualIdx+1) + newValid := make([]bool, 2*actualIdx+1) copy(newValid, a.valid) a.valid = newValid } @@ -95,3 +101,15 @@ func (a *PositiveSlice[T]) Set(idx int, value T) { a.data[actualIdx] = value a.valid[actualIdx] = true } + +func (a *PositiveSlice[T]) Preallocate(hi int) { + size := hi + + // expand data array to actualIdx + newData := make([]T, size+1) + a.data = newData + + // expand valid array to actualIdx + newValid := make([]bool, size+1) + a.valid = newValid +} diff --git a/pkg/types.go b/pkg/types.go index 46ffbcb..ac605d1 100644 --- a/pkg/types.go +++ b/pkg/types.go @@ -5,6 +5,11 @@ import ( "math" ) +type Result struct { + Score int + CIGAR string +} + type Penalty struct { M int X int @@ -32,14 +37,14 @@ type WavefrontComponent struct { A *PositiveSlice[*IntegerSlice[traceback]] // compact CIGAR for backtrace for each wavefront } -func NewWavefrontComponent() WavefrontComponent { +func NewWavefrontComponent(preallocateSize int) WavefrontComponent { // new wavefront component = { // lo = [0] // hi = [0] // W = [] // A = [] // } - return WavefrontComponent{ + w := WavefrontComponent{ lo: &PositiveSlice[int]{ data: []int{0}, valid: []bool{true}, @@ -48,98 +53,73 @@ func NewWavefrontComponent() WavefrontComponent { data: []int{0}, valid: []bool{true}, }, - W: &PositiveSlice[*IntegerSlice[int]]{}, - A: &PositiveSlice[*IntegerSlice[traceback]]{}, + W: &PositiveSlice[*IntegerSlice[int]]{ + defaultValue: &IntegerSlice[int]{ + data: []int{}, + valid: []bool{}, + }, + }, + A: &PositiveSlice[*IntegerSlice[traceback]]{ + defaultValue: &IntegerSlice[traceback]{ + data: []traceback{}, + valid: []bool{}, + }, + }, } + + w.lo.Preallocate(preallocateSize) + w.hi.Preallocate(preallocateSize) + w.W.Preallocate(preallocateSize) + w.A.Preallocate(preallocateSize) + + return w } // get value for wavefront=score, diag=k => returns ok, value func (w *WavefrontComponent) GetVal(score int, k int) (bool, int) { - // if W[score][k] is valid - if w.W.Valid(score) && w.W.Get(score).Valid(k) { - // return W[score][k] - return true, w.W.Get(score).Get(k) - } else { - return false, 0 - } + return w.W.Valid(score) && w.W.Get(score).Valid(k), w.W.Get(score).Get(k) } // set value for wavefront=score, diag=k func (w *WavefrontComponent) SetVal(score int, k int, val int) { - // if W[score] is valid - if w.W.Valid(score) { - // W[score][k] = val - w.W.Get(score).Set(k, val) - } else { - // W[score] = [] - w.W.Set(score, &IntegerSlice[int]{}) - // W[score][k] = val - w.W.Get(score).Set(k, val) - } + w.W.Get(score).Set(k, val) } // get alignment traceback for wavefront=score, diag=k => returns ok, value func (w *WavefrontComponent) GetTraceback(score int, k int) (bool, traceback) { - // if W[score][k] is valid - if w.A.Valid(score) && w.A.Get(score).Valid(k) { - // return W[score][k] - return true, w.A.Get(score).Get(k) - } else { - return false, 0 - } + return w.A.Valid(score) && w.A.Get(score).Valid(k), w.A.Get(score).Get(k) } // set alignment traceback for wavefront=score, diag=k func (w *WavefrontComponent) SetTraceback(score int, k int, val traceback) { - // if A[score] is valid - if w.A.Valid(score) { - // A[score][k] = val - w.A.Get(score).Set(k, val) - } else { - // W[score] = [] - w.A.Set(score, &IntegerSlice[traceback]{}) - // W[score][k] = val - w.A.Get(score).Set(k, val) - } + w.A.Get(score).Set(k, val) } // get hi for wavefront=score -func (w *WavefrontComponent) GetHi(score int) (bool, int) { - // if hi[score] is valid - if w.hi.Valid(score) { - // return hi[score] - return true, w.hi.Get(score) +func (w *WavefrontComponent) GetLoHi(score int) (bool, int, int) { + // if lo[score] and hi[score] are valid + if w.lo.Valid(score) && w.hi.Valid(score) { + // return lo[score] hi[score] + return true, w.lo.Get(score), w.hi.Get(score) } else { - return false, 0 + return false, 0, 0 } } // set hi for wavefront=score -func (w *WavefrontComponent) SetHi(score int, hi int) { - // hi[score] = hi - w.hi.Set(score, hi) -} - -// get lo for wavefront=score -func (w *WavefrontComponent) GetLo(score int) (bool, int) { - // if lo[score] is valid - if w.lo.Valid(score) { - // return lo[score] - return true, w.lo.Get(score) - } else { - return false, 0 - } -} - -// set hi for wavefront=score -func (w *WavefrontComponent) SetLo(score int, lo int) { +func (w *WavefrontComponent) SetLoHi(score int, lo int, hi int) { // lo[score] = lo w.lo.Set(score, lo) -} + // hi[score] = hi + w.hi.Set(score, hi) -type Result struct { - Score int - CIGAR string + // preemptively setup w.A + w.A.Set(score, &IntegerSlice[traceback]{}) + w.A.Get(score).Preallocate(lo, hi) + + // preemptively setup w.W + w.W.Set(score, &IntegerSlice[int]{}) + w.W.Get(score).Preallocate(lo, hi) } func (w *WavefrontComponent) String(score int) string { diff --git a/pkg/utils.go b/pkg/utils.go index c95c63d..49b4c5e 100644 --- a/pkg/utils.go +++ b/pkg/utils.go @@ -5,14 +5,12 @@ import ( "unicode/utf8" ) -func SafeMin(valids []bool, values []int) (bool, int) { - ok, idx := SafeArgMin(valids, values) - return ok, values[idx] +func SafeMin(values []int, idx int) int { + return values[idx] } -func SafeMax(valids []bool, values []int) (bool, int) { - ok, idx := SafeArgMax(valids, values) - return ok, values[idx] +func SafeMax(values []int, idx int) int { + return values[idx] } func SafeArgMax(valids []bool, values []int) (bool, int) { @@ -66,50 +64,34 @@ func Splice(s string, c rune, idx int) string { return s[:idx] + string(c) + s[idx:] } -func NextLo(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) int { +func NextLoHi(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) (int, int) { x := penalties.X o := penalties.O e := penalties.E - a_ok, a := M.GetLo(score - x) - b_ok, b := M.GetLo(score - o - e) - c_ok, c := I.GetLo(score - e) - d_ok, d := D.GetLo(score - e) + a_ok, a_lo, a_hi := M.GetLoHi(score - x) + b_ok, b_lo, b_hi := M.GetLoHi(score - o - e) + c_ok, c_lo, c_hi := I.GetLoHi(score - e) + d_ok, d_lo, d_hi := D.GetLoHi(score - e) - ok, lo := SafeMin( + ok_lo, idx := SafeArgMin( []bool{a_ok, b_ok, c_ok, d_ok}, - []int{a, b, c, d}, + []int{a_lo, b_lo, c_lo, d_lo}, ) - lo-- - if ok { - M.SetLo(score, lo) - I.SetLo(score, lo) - D.SetLo(score, lo) - } - return lo -} + lo := SafeMin([]int{a_lo, b_lo, c_lo, d_lo}, idx) - 1 -func NextHi(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) int { - x := penalties.X - o := penalties.O - e := penalties.E - - a_ok, a := M.GetHi(score - x) - b_ok, b := M.GetHi(score - o - e) - c_ok, c := I.GetHi(score - e) - d_ok, d := D.GetHi(score - e) - - ok, hi := SafeMax( + ok_hi, idx := SafeArgMax( []bool{a_ok, b_ok, c_ok, d_ok}, - []int{a, b, c, d}, + []int{a_hi, b_hi, c_hi, d_hi}, ) - hi++ - if ok { - M.SetHi(score, hi) - I.SetHi(score, hi) - D.SetHi(score, hi) + hi := SafeMax([]int{a_hi, b_hi, c_hi, d_hi}, idx) + 1 + + if ok_lo && ok_hi { + M.SetLoHi(score, lo, hi) + I.SetLoHi(score, lo, hi) + D.SetLoHi(score, lo, hi) } - return hi + return lo, hi } func NextI(M WavefrontComponent, I WavefrontComponent, score int, k int, penalties Penalty) { @@ -119,13 +101,10 @@ func NextI(M WavefrontComponent, I WavefrontComponent, score int, k int, penalti a_ok, a := M.GetVal(score-o-e, k-1) b_ok, b := I.GetVal(score-e, k-1) - ok, nextIVal := SafeMax([]bool{a_ok, b_ok}, []int{a, b}) - if ok { - I.SetVal(score, k, nextIVal+1) // important that the +1 is here - } - ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []int{a, b}) + nextIVal := SafeMax([]int{a, b}, nextITraceback) + 1 // important that the +1 is here if ok { + I.SetVal(score, k, nextIVal) I.SetTraceback(score, k, []traceback{OpenIns, ExtdIns}[nextITraceback]) } } @@ -137,13 +116,13 @@ func NextD(M WavefrontComponent, D WavefrontComponent, score int, k int, penalti a_ok, a := M.GetVal(score-o-e, k+1) b_ok, b := D.GetVal(score-e, k+1) - ok, nextDVal := SafeMax([]bool{a_ok, b_ok}, []int{a, b}) - if ok { - D.SetVal(score, k, nextDVal) // nothing special - } - - ok, nextDTraceback := SafeArgMax([]bool{a_ok, b_ok}, []int{a, b}) + ok, nextDTraceback := SafeArgMax( + []bool{a_ok, b_ok}, + []int{a, b}, + ) + nextDVal := SafeMax([]int{a, b}, nextDTraceback) // nothing special if ok { + D.SetVal(score, k, nextDVal) D.SetTraceback(score, k, []traceback{OpenDel, ExtdDel}[nextDTraceback]) } } @@ -156,13 +135,11 @@ func NextM(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, sco b_ok, b := I.GetVal(score, k) c_ok, c := D.GetVal(score, k) - ok, nextMVal := SafeMax([]bool{a_ok, b_ok, c_ok}, []int{a, b, c}) + ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []int{a, b, c}) + nextMVal := SafeMax([]int{a, b, c}, nextMTraceback) + if ok { M.SetVal(score, k, nextMVal) - } - - ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []int{a, b, c}) - if ok { M.SetTraceback(score, k, []traceback{Sub, Ins, Del}[nextMTraceback]) } } diff --git a/pkg/wfa.go b/pkg/wfa.go index 1e442d6..876cf10 100644 --- a/pkg/wfa.go +++ b/pkg/wfa.go @@ -6,13 +6,13 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result { A_k := m - n A_offset := m score := 0 - M := NewWavefrontComponent() + estimatedScore := (max(n, m) * max(penalties.M, penalties.X, penalties.O, penalties.E)) / 4 + M := NewWavefrontComponent(estimatedScore) + M.SetLoHi(0, 0, 0) M.SetVal(0, 0, 0) - M.SetHi(0, 0) - M.SetLo(0, 0) M.SetTraceback(0, 0, End) - I := NewWavefrontComponent() - D := NewWavefrontComponent() + I := NewWavefrontComponent(estimatedScore) + D := NewWavefrontComponent(estimatedScore) for { WFExtend(M, s1, n, s2, m, score) @@ -36,8 +36,7 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result { } func WFExtend(M WavefrontComponent, s1 string, n int, s2 string, m int, score int) { - _, lo := M.GetLo(score) - _, hi := M.GetHi(score) + _, lo, hi := M.GetLoHi(score) for k := lo; k <= hi; k++ { // v = M[score][k] - k // h = M[score][k] @@ -58,11 +57,8 @@ func WFExtend(M WavefrontComponent, s1 string, n int, s2 string, m int, score in } func WFNext(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) { - // get this score's lo - lo := NextLo(M, I, D, score, penalties) - - // get this score's hi - hi := NextHi(M, I, D, score, penalties) + // get this score's lo, hi + lo, hi := NextLoHi(M, I, D, score, penalties) for k := lo; k <= hi; k++ { NextI(M, I, score, k, penalties)