various optimizations to compute time,

add more profiling options to make test
This commit is contained in:
Arthur Lu 2024-10-29 17:03:19 +00:00
parent aed3ac6f8f
commit a3beca4ed2
5 changed files with 132 additions and 153 deletions

View File

@ -7,11 +7,19 @@ build: clean
clean: clean:
@echo "======================== Cleaning Project ======================" @echo "======================== Cleaning Project ======================"
go clean go clean
rm -f dist/wfa.wasm rm -f dist/wfa.wasm cover.prof cpu.prof mem.prof test.test
test: test:
@echo "======================== Running Tests =========================" @echo "======================== Running Tests ========================="
go test -v -cover -coverpkg=./pkg/ -coverprofile coverage ./test/ go test -v -cover -coverpkg=./pkg/ -coverprofile cover.prof -cpuprofile cpu.prof -memprofile mem.prof ./test/
@echo "======================= Coverage Report ========================" @echo "======================= Coverage Report ========================"
go tool cover -func=coverage go tool cover -func=cover.prof
@rm -f coverage @rm -f cover.prof
@echo "==================== CPU Performance Report ===================="
go tool pprof -top cpu.prof
@rm -f cpu.prof
@echo "=================== Memory Performance Report =================="
go tool pprof -top mem.prof
@rm -f mem.prof
@rm -f test.test

View File

@ -16,16 +16,12 @@ func (a *IntegerSlice[T]) TranslateIndex(idx int) int {
func (a *IntegerSlice[T]) Valid(idx int) bool { func (a *IntegerSlice[T]) Valid(idx int) bool {
actualIdx := a.TranslateIndex(idx) actualIdx := a.TranslateIndex(idx)
if actualIdx < len(a.valid) { // idx is in the slice return 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx]
return a.valid[actualIdx]
} else { // idx is out of the slice
return false
}
} }
func (a *IntegerSlice[T]) Get(idx int) T { func (a *IntegerSlice[T]) Get(idx int) T {
actualIdx := a.TranslateIndex(idx) actualIdx := a.TranslateIndex(idx)
if actualIdx < len(a.valid) { // idx is in the slice if 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx] { // idx is in the slice
return a.data[actualIdx] return a.data[actualIdx]
} else { // idx is out of the slice } else { // idx is out of the slice
return a.defaultValue return a.defaultValue
@ -36,12 +32,12 @@ func (a *IntegerSlice[T]) Set(idx int, value T) {
actualIdx := a.TranslateIndex(idx) actualIdx := a.TranslateIndex(idx)
if actualIdx >= len(a.valid) { // idx is outside the slice if actualIdx >= len(a.valid) { // idx is outside the slice
// expand data array to actualIdx // expand data array to actualIdx
newData := make([]T, actualIdx+1) newData := make([]T, 2*actualIdx+1)
copy(newData, a.data) copy(newData, a.data)
a.data = newData a.data = newData
// expand valid array to actualIdx // expand valid array to actualIdx
newValid := make([]bool, actualIdx+1) newValid := make([]bool, 2*actualIdx+1)
copy(newValid, a.valid) copy(newValid, a.valid)
a.valid = newValid a.valid = newValid
} }
@ -50,6 +46,20 @@ func (a *IntegerSlice[T]) Set(idx int, value T) {
a.valid[actualIdx] = true a.valid[actualIdx] = true
} }
func (a *IntegerSlice[T]) Preallocate(lo int, hi int) {
actualLo := a.TranslateIndex(lo)
actualHi := a.TranslateIndex(hi)
size := max(actualHi, actualLo)
// expand data array to actualIdx
newData := make([]T, size+1)
a.data = newData
// expand valid array to actualIdx
newValid := make([]bool, size+1)
a.valid = newValid
}
type PositiveSlice[T any] struct { type PositiveSlice[T any] struct {
data []T data []T
valid []bool valid []bool
@ -62,16 +72,12 @@ func (a *PositiveSlice[T]) TranslateIndex(idx int) int {
func (a *PositiveSlice[T]) Valid(idx int) bool { func (a *PositiveSlice[T]) Valid(idx int) bool {
actualIdx := a.TranslateIndex(idx) actualIdx := a.TranslateIndex(idx)
if actualIdx >= 0 && actualIdx < len(a.valid) { // idx is in the slice return 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx]
return a.valid[actualIdx]
} else { // idx is out of the slice
return false
}
} }
func (a *PositiveSlice[T]) Get(idx int) T { func (a *PositiveSlice[T]) Get(idx int) T {
actualIdx := a.TranslateIndex(idx) actualIdx := a.TranslateIndex(idx)
if actualIdx >= 0 && actualIdx < len(a.valid) { // idx is in the slice if 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx] { // idx is in the slice
return a.data[actualIdx] return a.data[actualIdx]
} else { // idx is out of the slice } else { // idx is out of the slice
return a.defaultValue return a.defaultValue
@ -82,12 +88,12 @@ func (a *PositiveSlice[T]) Set(idx int, value T) {
actualIdx := a.TranslateIndex(idx) actualIdx := a.TranslateIndex(idx)
if actualIdx < 0 || actualIdx >= len(a.valid) { // idx is outside the slice if actualIdx < 0 || actualIdx >= len(a.valid) { // idx is outside the slice
// expand data array to actualIdx // expand data array to actualIdx
newData := make([]T, actualIdx+1) newData := make([]T, 2*actualIdx+1)
copy(newData, a.data) copy(newData, a.data)
a.data = newData a.data = newData
// expand valid array to actualIdx // expand valid array to actualIdx
newValid := make([]bool, actualIdx+1) newValid := make([]bool, 2*actualIdx+1)
copy(newValid, a.valid) copy(newValid, a.valid)
a.valid = newValid a.valid = newValid
} }
@ -95,3 +101,15 @@ func (a *PositiveSlice[T]) Set(idx int, value T) {
a.data[actualIdx] = value a.data[actualIdx] = value
a.valid[actualIdx] = true a.valid[actualIdx] = true
} }
func (a *PositiveSlice[T]) Preallocate(hi int) {
size := hi
// expand data array to actualIdx
newData := make([]T, size+1)
a.data = newData
// expand valid array to actualIdx
newValid := make([]bool, size+1)
a.valid = newValid
}

View File

@ -5,6 +5,11 @@ import (
"math" "math"
) )
type Result struct {
Score int
CIGAR string
}
type Penalty struct { type Penalty struct {
M int M int
X int X int
@ -32,14 +37,14 @@ type WavefrontComponent struct {
A *PositiveSlice[*IntegerSlice[traceback]] // compact CIGAR for backtrace for each wavefront A *PositiveSlice[*IntegerSlice[traceback]] // compact CIGAR for backtrace for each wavefront
} }
func NewWavefrontComponent() WavefrontComponent { func NewWavefrontComponent(preallocateSize int) WavefrontComponent {
// new wavefront component = { // new wavefront component = {
// lo = [0] // lo = [0]
// hi = [0] // hi = [0]
// W = [] // W = []
// A = [] // A = []
// } // }
return WavefrontComponent{ w := WavefrontComponent{
lo: &PositiveSlice[int]{ lo: &PositiveSlice[int]{
data: []int{0}, data: []int{0},
valid: []bool{true}, valid: []bool{true},
@ -48,98 +53,73 @@ func NewWavefrontComponent() WavefrontComponent {
data: []int{0}, data: []int{0},
valid: []bool{true}, valid: []bool{true},
}, },
W: &PositiveSlice[*IntegerSlice[int]]{}, W: &PositiveSlice[*IntegerSlice[int]]{
A: &PositiveSlice[*IntegerSlice[traceback]]{}, defaultValue: &IntegerSlice[int]{
data: []int{},
valid: []bool{},
},
},
A: &PositiveSlice[*IntegerSlice[traceback]]{
defaultValue: &IntegerSlice[traceback]{
data: []traceback{},
valid: []bool{},
},
},
} }
w.lo.Preallocate(preallocateSize)
w.hi.Preallocate(preallocateSize)
w.W.Preallocate(preallocateSize)
w.A.Preallocate(preallocateSize)
return w
} }
// get value for wavefront=score, diag=k => returns ok, value // get value for wavefront=score, diag=k => returns ok, value
func (w *WavefrontComponent) GetVal(score int, k int) (bool, int) { func (w *WavefrontComponent) GetVal(score int, k int) (bool, int) {
// if W[score][k] is valid return w.W.Valid(score) && w.W.Get(score).Valid(k), w.W.Get(score).Get(k)
if w.W.Valid(score) && w.W.Get(score).Valid(k) {
// return W[score][k]
return true, w.W.Get(score).Get(k)
} else {
return false, 0
}
} }
// set value for wavefront=score, diag=k // set value for wavefront=score, diag=k
func (w *WavefrontComponent) SetVal(score int, k int, val int) { func (w *WavefrontComponent) SetVal(score int, k int, val int) {
// if W[score] is valid w.W.Get(score).Set(k, val)
if w.W.Valid(score) {
// W[score][k] = val
w.W.Get(score).Set(k, val)
} else {
// W[score] = []
w.W.Set(score, &IntegerSlice[int]{})
// W[score][k] = val
w.W.Get(score).Set(k, val)
}
} }
// get alignment traceback for wavefront=score, diag=k => returns ok, value // get alignment traceback for wavefront=score, diag=k => returns ok, value
func (w *WavefrontComponent) GetTraceback(score int, k int) (bool, traceback) { func (w *WavefrontComponent) GetTraceback(score int, k int) (bool, traceback) {
// if W[score][k] is valid return w.A.Valid(score) && w.A.Get(score).Valid(k), w.A.Get(score).Get(k)
if w.A.Valid(score) && w.A.Get(score).Valid(k) {
// return W[score][k]
return true, w.A.Get(score).Get(k)
} else {
return false, 0
}
} }
// set alignment traceback for wavefront=score, diag=k // set alignment traceback for wavefront=score, diag=k
func (w *WavefrontComponent) SetTraceback(score int, k int, val traceback) { func (w *WavefrontComponent) SetTraceback(score int, k int, val traceback) {
// if A[score] is valid w.A.Get(score).Set(k, val)
if w.A.Valid(score) {
// A[score][k] = val
w.A.Get(score).Set(k, val)
} else {
// W[score] = []
w.A.Set(score, &IntegerSlice[traceback]{})
// W[score][k] = val
w.A.Get(score).Set(k, val)
}
} }
// get hi for wavefront=score // get hi for wavefront=score
func (w *WavefrontComponent) GetHi(score int) (bool, int) { func (w *WavefrontComponent) GetLoHi(score int) (bool, int, int) {
// if hi[score] is valid // if lo[score] and hi[score] are valid
if w.hi.Valid(score) { if w.lo.Valid(score) && w.hi.Valid(score) {
// return hi[score] // return lo[score] hi[score]
return true, w.hi.Get(score) return true, w.lo.Get(score), w.hi.Get(score)
} else { } else {
return false, 0 return false, 0, 0
} }
} }
// set hi for wavefront=score // set hi for wavefront=score
func (w *WavefrontComponent) SetHi(score int, hi int) { func (w *WavefrontComponent) SetLoHi(score int, lo int, hi int) {
// hi[score] = hi
w.hi.Set(score, hi)
}
// get lo for wavefront=score
func (w *WavefrontComponent) GetLo(score int) (bool, int) {
// if lo[score] is valid
if w.lo.Valid(score) {
// return lo[score]
return true, w.lo.Get(score)
} else {
return false, 0
}
}
// set hi for wavefront=score
func (w *WavefrontComponent) SetLo(score int, lo int) {
// lo[score] = lo // lo[score] = lo
w.lo.Set(score, lo) w.lo.Set(score, lo)
} // hi[score] = hi
w.hi.Set(score, hi)
type Result struct { // preemptively setup w.A
Score int w.A.Set(score, &IntegerSlice[traceback]{})
CIGAR string w.A.Get(score).Preallocate(lo, hi)
// preemptively setup w.W
w.W.Set(score, &IntegerSlice[int]{})
w.W.Get(score).Preallocate(lo, hi)
} }
func (w *WavefrontComponent) String(score int) string { func (w *WavefrontComponent) String(score int) string {

View File

@ -5,14 +5,12 @@ import (
"unicode/utf8" "unicode/utf8"
) )
func SafeMin(valids []bool, values []int) (bool, int) { func SafeMin(values []int, idx int) int {
ok, idx := SafeArgMin(valids, values) return values[idx]
return ok, values[idx]
} }
func SafeMax(valids []bool, values []int) (bool, int) { func SafeMax(values []int, idx int) int {
ok, idx := SafeArgMax(valids, values) return values[idx]
return ok, values[idx]
} }
func SafeArgMax(valids []bool, values []int) (bool, int) { func SafeArgMax(valids []bool, values []int) (bool, int) {
@ -66,50 +64,34 @@ func Splice(s string, c rune, idx int) string {
return s[:idx] + string(c) + s[idx:] return s[:idx] + string(c) + s[idx:]
} }
func NextLo(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) int { func NextLoHi(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) (int, int) {
x := penalties.X x := penalties.X
o := penalties.O o := penalties.O
e := penalties.E e := penalties.E
a_ok, a := M.GetLo(score - x) a_ok, a_lo, a_hi := M.GetLoHi(score - x)
b_ok, b := M.GetLo(score - o - e) b_ok, b_lo, b_hi := M.GetLoHi(score - o - e)
c_ok, c := I.GetLo(score - e) c_ok, c_lo, c_hi := I.GetLoHi(score - e)
d_ok, d := D.GetLo(score - e) d_ok, d_lo, d_hi := D.GetLoHi(score - e)
ok, lo := SafeMin( ok_lo, idx := SafeArgMin(
[]bool{a_ok, b_ok, c_ok, d_ok}, []bool{a_ok, b_ok, c_ok, d_ok},
[]int{a, b, c, d}, []int{a_lo, b_lo, c_lo, d_lo},
) )
lo-- lo := SafeMin([]int{a_lo, b_lo, c_lo, d_lo}, idx) - 1
if ok {
M.SetLo(score, lo)
I.SetLo(score, lo)
D.SetLo(score, lo)
}
return lo
}
func NextHi(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) int { ok_hi, idx := SafeArgMax(
x := penalties.X
o := penalties.O
e := penalties.E
a_ok, a := M.GetHi(score - x)
b_ok, b := M.GetHi(score - o - e)
c_ok, c := I.GetHi(score - e)
d_ok, d := D.GetHi(score - e)
ok, hi := SafeMax(
[]bool{a_ok, b_ok, c_ok, d_ok}, []bool{a_ok, b_ok, c_ok, d_ok},
[]int{a, b, c, d}, []int{a_hi, b_hi, c_hi, d_hi},
) )
hi++ hi := SafeMax([]int{a_hi, b_hi, c_hi, d_hi}, idx) + 1
if ok {
M.SetHi(score, hi) if ok_lo && ok_hi {
I.SetHi(score, hi) M.SetLoHi(score, lo, hi)
D.SetHi(score, hi) I.SetLoHi(score, lo, hi)
D.SetLoHi(score, lo, hi)
} }
return hi return lo, hi
} }
func NextI(M WavefrontComponent, I WavefrontComponent, score int, k int, penalties Penalty) { func NextI(M WavefrontComponent, I WavefrontComponent, score int, k int, penalties Penalty) {
@ -119,13 +101,10 @@ func NextI(M WavefrontComponent, I WavefrontComponent, score int, k int, penalti
a_ok, a := M.GetVal(score-o-e, k-1) a_ok, a := M.GetVal(score-o-e, k-1)
b_ok, b := I.GetVal(score-e, k-1) b_ok, b := I.GetVal(score-e, k-1)
ok, nextIVal := SafeMax([]bool{a_ok, b_ok}, []int{a, b})
if ok {
I.SetVal(score, k, nextIVal+1) // important that the +1 is here
}
ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []int{a, b}) ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []int{a, b})
nextIVal := SafeMax([]int{a, b}, nextITraceback) + 1 // important that the +1 is here
if ok { if ok {
I.SetVal(score, k, nextIVal)
I.SetTraceback(score, k, []traceback{OpenIns, ExtdIns}[nextITraceback]) I.SetTraceback(score, k, []traceback{OpenIns, ExtdIns}[nextITraceback])
} }
} }
@ -137,13 +116,13 @@ func NextD(M WavefrontComponent, D WavefrontComponent, score int, k int, penalti
a_ok, a := M.GetVal(score-o-e, k+1) a_ok, a := M.GetVal(score-o-e, k+1)
b_ok, b := D.GetVal(score-e, k+1) b_ok, b := D.GetVal(score-e, k+1)
ok, nextDVal := SafeMax([]bool{a_ok, b_ok}, []int{a, b}) ok, nextDTraceback := SafeArgMax(
if ok { []bool{a_ok, b_ok},
D.SetVal(score, k, nextDVal) // nothing special []int{a, b},
} )
nextDVal := SafeMax([]int{a, b}, nextDTraceback) // nothing special
ok, nextDTraceback := SafeArgMax([]bool{a_ok, b_ok}, []int{a, b})
if ok { if ok {
D.SetVal(score, k, nextDVal)
D.SetTraceback(score, k, []traceback{OpenDel, ExtdDel}[nextDTraceback]) D.SetTraceback(score, k, []traceback{OpenDel, ExtdDel}[nextDTraceback])
} }
} }
@ -156,13 +135,11 @@ func NextM(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, sco
b_ok, b := I.GetVal(score, k) b_ok, b := I.GetVal(score, k)
c_ok, c := D.GetVal(score, k) c_ok, c := D.GetVal(score, k)
ok, nextMVal := SafeMax([]bool{a_ok, b_ok, c_ok}, []int{a, b, c}) ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []int{a, b, c})
nextMVal := SafeMax([]int{a, b, c}, nextMTraceback)
if ok { if ok {
M.SetVal(score, k, nextMVal) M.SetVal(score, k, nextMVal)
}
ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []int{a, b, c})
if ok {
M.SetTraceback(score, k, []traceback{Sub, Ins, Del}[nextMTraceback]) M.SetTraceback(score, k, []traceback{Sub, Ins, Del}[nextMTraceback])
} }
} }

View File

@ -6,13 +6,13 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
A_k := m - n A_k := m - n
A_offset := m A_offset := m
score := 0 score := 0
M := NewWavefrontComponent() estimatedScore := (max(n, m) * max(penalties.M, penalties.X, penalties.O, penalties.E)) / 4
M := NewWavefrontComponent(estimatedScore)
M.SetLoHi(0, 0, 0)
M.SetVal(0, 0, 0) M.SetVal(0, 0, 0)
M.SetHi(0, 0)
M.SetLo(0, 0)
M.SetTraceback(0, 0, End) M.SetTraceback(0, 0, End)
I := NewWavefrontComponent() I := NewWavefrontComponent(estimatedScore)
D := NewWavefrontComponent() D := NewWavefrontComponent(estimatedScore)
for { for {
WFExtend(M, s1, n, s2, m, score) WFExtend(M, s1, n, s2, m, score)
@ -36,8 +36,7 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
} }
func WFExtend(M WavefrontComponent, s1 string, n int, s2 string, m int, score int) { func WFExtend(M WavefrontComponent, s1 string, n int, s2 string, m int, score int) {
_, lo := M.GetLo(score) _, lo, hi := M.GetLoHi(score)
_, hi := M.GetHi(score)
for k := lo; k <= hi; k++ { for k := lo; k <= hi; k++ {
// v = M[score][k] - k // v = M[score][k] - k
// h = M[score][k] // h = M[score][k]
@ -58,11 +57,8 @@ func WFExtend(M WavefrontComponent, s1 string, n int, s2 string, m int, score in
} }
func WFNext(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) { func WFNext(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) {
// get this score's lo // get this score's lo, hi
lo := NextLo(M, I, D, score, penalties) lo, hi := NextLoHi(M, I, D, score, penalties)
// get this score's hi
hi := NextHi(M, I, D, score, penalties)
for k := lo; k <= hi; k++ { for k := lo; k <= hi; k++ {
NextI(M, I, score, k, penalties) NextI(M, I, score, k, penalties)