9 Commits

Author SHA1 Message Date
63a375995c minor code cleanup, use uint64 for wavefront values 2025-05-07 21:20:13 +00:00
2351faf2d7 cleanup code 2025-05-01 18:04:42 +00:00
a446bbd923 fix readme,
update go mod
2025-03-12 00:09:18 +00:00
f1407fd045 remove unused PositiveSlice Preallocate 2024-11-12 19:04:02 +00:00
ce635cc2b1 remove wavefront component preallocation 2024-11-12 18:53:03 +00:00
f5d2528e20 fix comment 2024-11-12 18:47:25 +00:00
6f78825876 optimize build size by avoiding fmt,
pack lo/hi values into Wavefront
2024-11-12 18:44:12 +00:00
a878da42a3 update README 2024-11-08 22:49:37 +00:00
bd720f06fb minor optimization switching to wavefront pointers 2024-11-08 21:51:17 +00:00
10 changed files with 181 additions and 165 deletions

View File

@@ -1,4 +1,4 @@
.PHONY: build clean test .PHONY: build clean test dev-init
build: clean build: clean
@echo "======================== Building Binary =======================" @echo "======================== Building Binary ======================="
@@ -24,3 +24,7 @@ test:
@rm -f mem.prof @rm -f mem.prof
@rm -f test.test @rm -f test.test
dev-init:
apt install minify
go get -t wfa/test

View File

@@ -1,11 +1,13 @@
# Using WFA-JS # Using WFA-JS
Download `wfa.js` and `wfa.wasm`from [releases](https://git.tronnet.net/tronnet/WFA-JS/releases) to your project. Add to your script: Download `wfa.js` and `wfa.wasm`from [releases](https://git.tronnet.net/alu/WFA-JS/releases) to your project. Add to your script:
``` ```
import wfa from "./wfa.js" import wfa from "./wfa.js"
await wfa("<path to wasm>") await wfa("<path to wasm>")
console.log(wfAlign(...)) let result = wfAlign(...)
operations = DecodeCIGAR(result.CIGAR)
// ...
``` ```
Where `<path to wasm>` is the path from the site root ie. `./scripts/wfa.wasm`. This will depend on your project structure. Where `<path to wasm>` is the path from the site root ie. `./scripts/wfa.wasm`. This will depend on your project structure.

10
go.mod
View File

@@ -1,15 +1,15 @@
module wfa module wfa
go 1.23.2 go 1.23.6
require ( require (
github.com/schollz/progressbar/v3 v3.17.0 github.com/schollz/progressbar/v3 v3.17.1
golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f
) )
require ( require (
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/rivo/uniseg v0.4.7 // indirect github.com/rivo/uniseg v0.4.7 // indirect
golang.org/x/sys v0.26.0 // indirect golang.org/x/sys v0.27.0 // indirect
golang.org/x/term v0.25.0 // indirect golang.org/x/term v0.26.0 // indirect
) )

49
main.go
View File

@@ -1,7 +1,6 @@
package main package main
import ( import (
"fmt"
"syscall/js" "syscall/js"
wfa "wfa/pkg" wfa "wfa/pkg"
) )
@@ -15,32 +14,47 @@ func main() {
func wfAlign(this js.Value, args []js.Value) interface{} { func wfAlign(this js.Value, args []js.Value) interface{} {
if len(args) != 4 { if len(args) != 4 {
fmt.Println("invalid number of args, requires 4: s1, s2, penalties, doCIGAR") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "invalid number of args, requires 4: s1, s2, penalties, doCIGAR",
}
return js.ValueOf(resultMap)
} }
if args[0].Type() != js.TypeString { if args[0].Type() != js.TypeString {
fmt.Println("s1 should be a string") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "s1 should be a string",
}
return js.ValueOf(resultMap)
} }
s1 := args[0].String() s1 := args[0].String()
if args[1].Type() != js.TypeString { if args[1].Type() != js.TypeString {
fmt.Println("s2 should be a string") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "s2 should be a string",
}
return js.ValueOf(resultMap)
} }
s2 := args[1].String() s2 := args[1].String()
if args[2].Type() != js.TypeObject { if args[2].Type() != js.TypeObject {
fmt.Println("penalties should be a map with key values m, x, o, e") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "penalties should be a map with key values m, x, o, e",
}
return js.ValueOf(resultMap)
} }
if args[2].Get("m").IsUndefined() || args[2].Get("x").IsUndefined() || args[2].Get("o").IsUndefined() || args[2].Get("e").IsUndefined() { if args[2].Get("m").IsUndefined() || args[2].Get("x").IsUndefined() || args[2].Get("o").IsUndefined() || args[2].Get("e").IsUndefined() {
fmt.Println("penalties should be a map with key values m, x, o, e") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "penalties should be a map with key values m, x, o, e",
}
return js.ValueOf(resultMap)
} }
m := args[2].Get("m").Int() m := args[2].Get("m").Int()
@@ -56,8 +70,11 @@ func wfAlign(this js.Value, args []js.Value) interface{} {
} }
if args[3].Type() != js.TypeBoolean { if args[3].Type() != js.TypeBoolean {
fmt.Println("doCIGAR should be a boolean") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "doCIGAR should be a boolean",
}
return js.ValueOf(resultMap)
} }
doCIGAR := args[3].Bool() doCIGAR := args[3].Bool()
@@ -65,8 +82,10 @@ func wfAlign(this js.Value, args []js.Value) interface{} {
// Call the actual func. // Call the actual func.
result := wfa.WFAlign(s1, s2, penalties, doCIGAR) result := wfa.WFAlign(s1, s2, penalties, doCIGAR)
resultMap := map[string]interface{}{ resultMap := map[string]interface{}{
"ok": true,
"score": result.Score, "score": result.Score,
"CIGAR": result.CIGAR, "CIGAR": result.CIGAR,
"error": "",
} }
return js.ValueOf(resultMap) return js.ValueOf(resultMap)
@@ -74,12 +93,12 @@ func wfAlign(this js.Value, args []js.Value) interface{} {
func DecodeCIGAR(this js.Value, args []js.Value) interface{} { func DecodeCIGAR(this js.Value, args []js.Value) interface{} {
if len(args) != 1 { if len(args) != 1 {
fmt.Println("invalid number of args, requires 1: CIGAR") println("invalid number of args, requires 1: CIGAR")
return nil return nil
} }
if args[0].Type() != js.TypeString { if args[0].Type() != js.TypeString {
fmt.Println("CIGAR should be a string") println("CIGAR should be a string")
return nil return nil
} }

View File

@@ -6,50 +6,31 @@ type PositiveSlice[T any] struct {
defaultValue T defaultValue T
} }
func (a *PositiveSlice[T]) TranslateIndex(idx int) int {
return idx
}
func (a *PositiveSlice[T]) Valid(idx int) bool { func (a *PositiveSlice[T]) Valid(idx int) bool {
actualIdx := a.TranslateIndex(idx) return 0 <= idx && idx < len(a.valid) && a.valid[idx]
return 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx]
} }
func (a *PositiveSlice[T]) Get(idx int) T { func (a *PositiveSlice[T]) Get(idx int) T {
actualIdx := a.TranslateIndex(idx) if 0 <= idx && idx < len(a.valid) && a.valid[idx] { // idx is in the slice
if 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx] { // idx is in the slice return a.data[idx]
return a.data[actualIdx]
} else { // idx is out of the slice } else { // idx is out of the slice
return a.defaultValue return a.defaultValue
} }
} }
func (a *PositiveSlice[T]) Set(idx int, value T) { func (a *PositiveSlice[T]) Set(idx int, value T) {
actualIdx := a.TranslateIndex(idx) if idx >= len(a.valid) { // idx is outside the slice
if actualIdx < 0 || actualIdx >= len(a.valid) { // idx is outside the slice // expand data array to 2*idx
// expand data array to actualIdx newData := make([]T, 2*idx+1)
newData := make([]T, 2*actualIdx+1)
copy(newData, a.data) copy(newData, a.data)
a.data = newData a.data = newData
// expand valid array to actualIdx // expand valid array to 2*idx
newValid := make([]bool, 2*actualIdx+1) newValid := make([]bool, 2*idx+1)
copy(newValid, a.valid) copy(newValid, a.valid)
a.valid = newValid a.valid = newValid
} }
a.data[actualIdx] = value a.data[idx] = value
a.valid[actualIdx] = true a.valid[idx] = true
}
func (a *PositiveSlice[T]) Preallocate(hi int) {
size := hi
// expand data array to actualIdx
newData := make([]T, size+1)
a.data = newData
// expand valid array to actualIdx
newValid := make([]bool, size+1)
a.valid = newValid
} }

View File

@@ -1,3 +1,5 @@
//go:build debug
package wfa package wfa
import ( import (
@@ -12,11 +14,13 @@ func (w *WavefrontComponent) String(score int) string {
max_hi := math.MinInt max_hi := math.MinInt
for i := 0; i <= score; i++ { for i := 0; i <= score; i++ {
if w.lo.Valid(i) && w.lo.Get(i) < min_lo { valid := w.W.Valid(i)
min_lo = w.lo.Get(i) lo, hi := UnpackWavefrontLoHi(w.W.Get(i).lohi)
if valid && lo < min_lo {
min_lo = lo
} }
if w.hi.Valid(i) && w.hi.Get(i) > max_hi { if valid && hi > max_hi {
max_hi = w.hi.Get(i) max_hi = hi
} }
} }
@@ -40,9 +44,7 @@ func (w *WavefrontComponent) String(score int) string {
for i := 0; i <= score; i++ { for i := 0; i <= score; i++ {
s = s + "[" s = s + "["
lo := w.lo.Get(i) lo, hi := UnpackWavefrontLoHi(w.W.Get(i).lohi)
hi := w.hi.Get(i)
// print out wavefront matrix
for k := min_lo; k <= max_hi; k++ { for k := min_lo; k <= max_hi; k++ {
valid, val, _ := UnpackWavefrontValue(w.W.Get(i).Get(k)) valid, val, _ := UnpackWavefrontValue(w.W.Get(i).Get(k))
if valid { if valid {

View File

@@ -25,49 +25,60 @@ const (
End End
) )
// bitpacked wavefront values with 1 valid bit, 3 traceback bits, and 28 bits for the diag distance // bitpacked wavefront lo/hi values with 32 bits each
// technically this restricts to solutions within 268 million score but that should be sufficient for most cases type WavefrontLoHi uint64
type WavefrontValue uint32
// TODO: add 64 bit packed value in case more than 268 million score is needed func PackWavefrontLoHi(lo int, hi int) WavefrontLoHi {
loBM := int64(int32(lo)) & 0x0000_0000_FFFF_FFFF
hiBM := int64(int64(hi) << 32)
return WavefrontLoHi(hiBM | loBM)
}
func UnpackWavefrontLoHi(lohi WavefrontLoHi) (int, int) {
loBM := int(int32(lohi & 0x0000_0000_FFFF_FFFF))
hiBM := int(int32(lohi & 0xFFFF_FFFF_0000_0000 >> 32))
return loBM, hiBM
}
// bitpacked wavefront values with 1 valid bit, 3 traceback bits, and 60 bits for the diag distance
type WavefrontValue uint64
// PackWavefrontValue: packs a diag value and traceback into a WavefrontValue // PackWavefrontValue: packs a diag value and traceback into a WavefrontValue
func PackWavefrontValue(value uint32, traceback Traceback) WavefrontValue { func PackWavefrontValue(value uint64, traceback Traceback) WavefrontValue {
valueBM := value & 0x0FFF_FFFF validBM := uint64(0x8000_0000_0000_0000)
tracebackBM := uint32(traceback&0x0000_0007) << 28 tracebackBM := uint64(traceback&0x0000_0007) << 60
return WavefrontValue(0x8000_0000 | valueBM | tracebackBM) valueBM := uint64(value) & 0x0FFF_FFFF_FFFF_FFFF
return WavefrontValue(validBM | tracebackBM | valueBM)
} }
// UnpackWavefrontValue: opens a WavefrontValue into a valid bool, diag value and traceback // UnpackWavefrontValue: opens a WavefrontValue into a valid bool, diag value and traceback
func UnpackWavefrontValue(wfv WavefrontValue) (bool, uint32, Traceback) { func UnpackWavefrontValue(wfv WavefrontValue) (bool, uint64, Traceback) {
valueBM := uint32(wfv & 0x0FFF_FFFF) validBM := wfv&0x8000_0000_0000_0000 != 0
tracebackBM := uint8(wfv & 0x7000_0000 >> 28) tracebackBM := uint8(wfv & 0x7000_0000_0000_0000 >> 60)
validBM := wfv&0x8000_0000 != 0 valueBM := uint64(wfv & 0x0000_0000_FFFF_FFFF)
return validBM, valueBM, Traceback(tracebackBM) return validBM, valueBM, Traceback(tracebackBM)
} }
// Wavefront: stores a single wavefront, stores wavefront's lo value and hi is naturally lo + len(data) // Wavefront: stores a single wavefront, stores wavefront's lo value and hi is naturally lo + len(data)
type Wavefront struct { // since wavefronts store diag distance, they should never be negative, and traceback data can be stored as uint8 type Wavefront struct { // since wavefronts store diag distance, they should never be negative, and traceback data can be stored as uint8
data []WavefrontValue data []WavefrontValue
lo int lohi WavefrontLoHi
} }
// NewWavefront: returns a new wavefront with size accomodating lo and hi (inclusive) // NewWavefront: returns a new wavefront with size accomodating lo and hi (inclusive)
func NewWavefront(lo int, hi int) *Wavefront { func NewWavefront(lo int, hi int) *Wavefront {
a := &Wavefront{} a := &Wavefront{}
a.lohi = PackWavefrontLoHi(lo, hi)
a.lo = lo size := hi - lo
size := a.TranslateIndex(hi) a.data = make([]WavefrontValue, size+1)
newData := make([]WavefrontValue, size+1)
a.data = newData
return a return a
} }
// TranslateIndex: utility function for getting the data index given a diagonal // TranslateIndex: utility function for getting the data index given a diagonal
func (a *Wavefront) TranslateIndex(diagonal int) int { func (a *Wavefront) TranslateIndex(diagonal int) int {
return diagonal - a.lo lo := int(int32(a.lohi & 0x0000_0000_FFFF_FFFF))
return diagonal - lo
} }
// Get: returns WavefrontValue for given diagonal // Get: returns WavefrontValue for given diagonal
@@ -95,27 +106,17 @@ func (a *Wavefront) Set(diagonal int, value WavefrontValue) {
// WavefrontComponent: each M/I/D wavefront matrix including the wavefront data, lo and hi // WavefrontComponent: each M/I/D wavefront matrix including the wavefront data, lo and hi
type WavefrontComponent struct { type WavefrontComponent struct {
lo *PositiveSlice[int] // lo for each wavefront W *PositiveSlice[*Wavefront] // wavefront diag distance and traceback for each wavefront
hi *PositiveSlice[int] // hi for each wavefront
W *PositiveSlice[*Wavefront] // wavefront diag distance and traceback for each wavefront
} }
// NewWavefrontComponent: returns initialized WavefrontComponent // NewWavefrontComponent: returns initialized WavefrontComponent
func NewWavefrontComponent(preallocateSize int) WavefrontComponent { func NewWavefrontComponent() *WavefrontComponent {
// new wavefront component = { // new wavefront component = {
// lo = [0] // lo = [0]
// hi = [0] // hi = [0]
// W = [] // W = []
// } // }
w := WavefrontComponent{ w := &WavefrontComponent{
lo: &PositiveSlice[int]{
data: []int{0},
valid: []bool{true},
},
hi: &PositiveSlice[int]{
data: []int{0},
valid: []bool{true},
},
W: &PositiveSlice[*Wavefront]{ W: &PositiveSlice[*Wavefront]{
defaultValue: &Wavefront{ defaultValue: &Wavefront{
data: []WavefrontValue{0}, data: []WavefrontValue{0},
@@ -123,42 +124,27 @@ func NewWavefrontComponent(preallocateSize int) WavefrontComponent {
}, },
} }
w.lo.Preallocate(preallocateSize)
w.hi.Preallocate(preallocateSize)
w.W.Preallocate(preallocateSize)
return w return w
} }
// GetVal: get value for wavefront=score, diag=k => returns ok, value, traceback // GetVal: get value for wavefront=score, diag=k => returns ok, value, traceback
func (w *WavefrontComponent) GetVal(score int, k int) (bool, uint32, Traceback) { func (w *WavefrontComponent) GetVal(score int, k int) (bool, uint64, Traceback) {
return UnpackWavefrontValue(w.W.Get(score).Get(k)) return UnpackWavefrontValue(w.W.Get(score).Get(k))
} }
// SetVal: set value, traceback for wavefront=score, diag=k // SetVal: set value, traceback for wavefront=score, diag=k
func (w *WavefrontComponent) SetVal(score int, k int, val uint32, tb Traceback) { func (w *WavefrontComponent) SetVal(score int, k int, val uint64, tb Traceback) {
w.W.Get(score).Set(k, PackWavefrontValue(val, tb)) w.W.Get(score).Set(k, PackWavefrontValue(val, tb))
} }
// GetLoHi: get lo and hi for wavefront=score // GetLoHi: get lo and hi for wavefront=score
func (w *WavefrontComponent) GetLoHi(score int) (bool, int, int) { func (w *WavefrontComponent) GetLoHi(score int) (bool, int, int) {
// if lo[score] and hi[score] are valid lo, hi := UnpackWavefrontLoHi(w.W.Get(score).lohi)
if w.lo.Valid(score) && w.hi.Valid(score) { return w.W.Valid(score), lo, hi
// return lo[score] hi[score]
return true, w.lo.Get(score), w.hi.Get(score)
} else {
return false, 0, 0
}
} }
// SetLoHi: set lo and hi for wavefront=score // SetLoHi: set lo and hi for wavefront=score
func (w *WavefrontComponent) SetLoHi(score int, lo int, hi int) { func (w *WavefrontComponent) SetLoHi(score int, lo int, hi int) {
// lo[score] = lo
w.lo.Set(score, lo)
// hi[score] = hi
w.hi.Set(score, hi)
// preemptively setup w.W
b := NewWavefront(lo, hi) b := NewWavefront(lo, hi)
w.W.Set(score, b) w.W.Set(score, b)
} }

View File

@@ -7,6 +7,7 @@ import (
"golang.org/x/exp/constraints" "golang.org/x/exp/constraints"
) )
// convert an unsigned into to string
func UIntToString(num uint) string { // num assumed to be positive func UIntToString(num uint) string { // num assumed to be positive
var builder strings.Builder var builder strings.Builder
@@ -25,6 +26,7 @@ func UIntToString(num uint) string { // num assumed to be positive
return string(str) return string(str)
} }
// decode runlength encoded string such as CIGARs
func RunLengthDecode(encoded string) string { func RunLengthDecode(encoded string) string {
decoded := strings.Builder{} decoded := strings.Builder{}
length := len(encoded) length := len(encoded)
@@ -51,32 +53,17 @@ func RunLengthDecode(encoded string) string {
return decoded.String() return decoded.String()
} }
// given the min index, return the item in values at that index
func SafeMin[T constraints.Integer](values []T, idx int) T { func SafeMin[T constraints.Integer](values []T, idx int) T {
return values[idx] return values[idx]
} }
// given the max index, return the item in values at that index
func SafeMax[T constraints.Integer](values []T, idx int) T { func SafeMax[T constraints.Integer](values []T, idx int) T {
return values[idx] return values[idx]
} }
func SafeArgMax[T constraints.Integer](valids []bool, values []T) (bool, int) { // given array of values and corresponding array of valid flags, find the min of value which is valid or return false if there does not exist any
hasValid := false
maxIndex := 0
maxValue := math.MinInt
for i := 0; i < len(valids); i++ {
if valids[i] && int(values[i]) > maxValue {
hasValid = true
maxIndex = i
maxValue = int(values[i])
}
}
if hasValid {
return true, maxIndex
} else {
return false, 0
}
}
func SafeArgMin[T constraints.Integer](valids []bool, values []T) (bool, int) { func SafeArgMin[T constraints.Integer](valids []bool, values []T) (bool, int) {
hasValid := false hasValid := false
minIndex := 0 minIndex := 0
@@ -95,7 +82,23 @@ func SafeArgMin[T constraints.Integer](valids []bool, values []T) (bool, int) {
} }
} }
func NextLoHi(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) (int, int) { // given array of values and corresponding array of valid flags, find the max of value which is valid or return false if there does not exist any
func SafeArgMax[T constraints.Integer](valids []bool, values []T) (bool, int) {
hasValid := false
maxIndex := 0
maxValue := math.MinInt
for i := range valids {
if valids[i] && int(values[i]) > maxValue {
hasValid = true
maxIndex = i
maxValue = int(values[i])
}
}
return hasValid, maxIndex
}
// set the lext lo and hi bounds for wavefronts M, I, D
func NextLoHi(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty) (int, int) {
x := penalties.X x := penalties.X
o := penalties.O o := penalties.O
e := penalties.E e := penalties.E
@@ -125,35 +128,38 @@ func NextLoHi(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent,
return lo, hi return lo, hi
} }
func NextI(M WavefrontComponent, I WavefrontComponent, score int, k int, penalties Penalty) { // set the traceback and diag value for the next I wavefront
func NextI(M *WavefrontComponent, I *WavefrontComponent, score int, k int, penalties Penalty) {
o := penalties.O o := penalties.O
e := penalties.E e := penalties.E
a_ok, a, _ := M.GetVal(score-o-e, k-1) a_ok, a, _ := M.GetVal(score-o-e, k-1)
b_ok, b, _ := I.GetVal(score-e, k-1) b_ok, b, _ := I.GetVal(score-e, k-1)
ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint32{a, b}) ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint64{a, b})
nextIVal := SafeMax([]uint32{a, b}, nextITraceback) + 1 // important that the +1 is here nextIVal := SafeMax([]uint64{a, b}, nextITraceback) + 1 // important that the +1 is here
if ok { if ok {
I.SetVal(score, k, nextIVal, []Traceback{OpenIns, ExtdIns}[nextITraceback]) I.SetVal(score, k, nextIVal, []Traceback{OpenIns, ExtdIns}[nextITraceback])
} }
} }
func NextD(M WavefrontComponent, D WavefrontComponent, score int, k int, penalties Penalty) { // set the traceback and diag value for the next D wavefront
func NextD(M *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
o := penalties.O o := penalties.O
e := penalties.E e := penalties.E
a_ok, a, _ := M.GetVal(score-o-e, k+1) a_ok, a, _ := M.GetVal(score-o-e, k+1)
b_ok, b, _ := D.GetVal(score-e, k+1) b_ok, b, _ := D.GetVal(score-e, k+1)
ok, nextDTraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint32{a, b}) ok, nextDTraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint64{a, b})
nextDVal := SafeMax([]uint32{a, b}, nextDTraceback) nextDVal := SafeMax([]uint64{a, b}, nextDTraceback)
if ok { if ok {
D.SetVal(score, k, nextDVal, []Traceback{OpenDel, ExtdDel}[nextDTraceback]) D.SetVal(score, k, nextDVal, []Traceback{OpenDel, ExtdDel}[nextDTraceback])
} }
} }
func NextM(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, k int, penalties Penalty) { // set the traceback and diag value for the next M wavefront
func NextM(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
x := penalties.X x := penalties.X
a_ok, a, _ := M.GetVal(score-x, k) a_ok, a, _ := M.GetVal(score-x, k)
@@ -161,8 +167,8 @@ func NextM(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, sco
b_ok, b, _ := I.GetVal(score, k) b_ok, b, _ := I.GetVal(score, k)
c_ok, c, _ := D.GetVal(score, k) c_ok, c, _ := D.GetVal(score, k)
ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []uint32{a, b, c}) ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []uint64{a, b, c})
nextMVal := SafeMax([]uint32{a, b, c}, nextMTraceback) nextMVal := SafeMax([]uint64{a, b, c}, nextMTraceback)
if ok { if ok {
M.SetVal(score, k, nextMVal, []Traceback{Sub, Ins, Del}[nextMTraceback]) M.SetVal(score, k, nextMVal, []Traceback{Sub, Ins, Del}[nextMTraceback])
} }

View File

@@ -4,23 +4,23 @@ import (
"strings" "strings"
) )
// WFAlign takes strings s1, s2, penalties, and returns the score and CIGAR if doCIGAR is true
func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result { func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
n := len(s1) n := len(s1)
m := len(s2) m := len(s2)
A_k := m - n A_k := m - n // diagonal where both sequences end
A_offset := uint32(m) A_offset := uint64(m) // offset along a_k diagonal corresponding to end
score := 0 score := 0
estimatedScore := (max(n, m) * max(penalties.M, penalties.X, penalties.O, penalties.E)) / 4 M := NewWavefrontComponent()
M := NewWavefrontComponent(estimatedScore)
M.SetLoHi(0, 0, 0) M.SetLoHi(0, 0, 0)
M.SetVal(0, 0, 0, End) M.SetVal(0, 0, 0, End)
I := NewWavefrontComponent(estimatedScore) I := NewWavefrontComponent()
D := NewWavefrontComponent(estimatedScore) D := NewWavefrontComponent()
for { for {
WFExtend(M, s1, n, s2, m, score) WFExtend(M, s1, n, s2, m, score)
ok, val, _ := M.GetVal(score, A_k) ok, val, _ := M.GetVal(score, A_k)
if ok && val >= A_offset { if ok && val >= A_offset { // exit when M_(s,a_k) >= A_offset, ie the wavefront has reached the end
break break
} }
score = score + 1 score = score + 1
@@ -28,7 +28,7 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
} }
CIGAR := "" CIGAR := ""
if doCIGAR { if doCIGAR { // if doCIGAR, then perform backtrace, otherwise just return the score
CIGAR = WFBacktrace(M, I, D, score, penalties, A_k, A_offset, s1, s2) CIGAR = WFBacktrace(M, I, D, score, penalties, A_k, A_offset, s1, s2)
} }
@@ -38,40 +38,41 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
} }
} }
func WFExtend(M WavefrontComponent, s1 string, n int, s2 string, m int, score int) { func WFExtend(M *WavefrontComponent, s1 string, n int, s2 string, m int, score int) {
_, lo, hi := M.GetLoHi(score) _, lo, hi := M.GetLoHi(score)
for k := lo; k <= hi; k++ { for k := lo; k <= hi; k++ { // for each diagonal in current wavefront
// v = M[score][k] - k // v = M[score][k] - k
// h = M[score][k] // h = M[score][k]
ok, hu, _ := M.GetVal(score, k) ok, uh, tb := M.GetVal(score, k)
h := int(hu) // exit early if M_(s,l) is invalid
v := h - k
// exit early if v or h are invalid
if !ok { if !ok {
continue continue
} }
for v < n && h < m && s1[v] == s2[h] { h := int(uh)
_, val, tb := M.GetVal(score, k) v := h - k
M.SetVal(score, k, val+1, tb) // in the paper, we do v++, h++, M_(s,k)++
// however, note that h = M_(s,k) so instead we just do v++, h++ and set M_(s,k) at the end
// this saves a some memory reads and writes
for v < n && h < m && s1[v] == s2[h] { // extend diagonal for the next set of matches
v++ v++
h++ h++
} }
M.SetVal(score, k, uint64(h), tb)
} }
} }
func WFNext(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) { func WFNext(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty) {
// get this score's lo, hi // get this score's lo, hi
lo, hi := NextLoHi(M, I, D, score, penalties) lo, hi := NextLoHi(M, I, D, score, penalties)
for k := lo; k <= hi; k++ { for k := lo; k <= hi; k++ { // for each diagonal, extend the matrices for the next wavefronts
NextI(M, I, score, k, penalties) NextI(M, I, score, k, penalties)
NextD(M, D, score, k, penalties) NextD(M, D, score, k, penalties)
NextM(M, I, D, score, k, penalties) NextM(M, I, D, score, k, penalties)
} }
} }
func WFBacktrace(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty, A_k int, A_offset uint32, s1 string, s2 string) string { func WFBacktrace(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty, A_k int, A_offset uint64, s1 string, s2 string) string {
x := penalties.X x := penalties.X
o := penalties.O o := penalties.O
e := penalties.E e := penalties.E

View File

@@ -12,6 +12,7 @@ import (
wfa "wfa/pkg" wfa "wfa/pkg"
"github.com/schollz/progressbar/v3" "github.com/schollz/progressbar/v3"
"golang.org/x/exp/constraints"
) )
const testJsonPath = "tests.json" const testJsonPath = "tests.json"
@@ -29,14 +30,14 @@ type TestCase struct {
Solutions string `json:"solutions"` Solutions string `json:"solutions"`
} }
func randRange(min, max int) uint32 { func randRange[T constraints.Integer](min, max int) T {
return uint32(rand.IntN(max-min) + min) return T(rand.IntN(max-min) + min)
} }
func TestWavefrontPacking(t *testing.T) { func TestWavefrontPacking(t *testing.T) {
for range 1000 { for range 1000 {
val := randRange(0, 1000) val := randRange[uint64](0, 1000)
tb := wfa.Traceback(randRange(0, 7)) tb := wfa.Traceback(randRange[uint64](0, 7))
v := wfa.PackWavefrontValue(val, tb) v := wfa.PackWavefrontValue(val, tb)
valid, gotVal, gotTB := wfa.UnpackWavefrontValue(v) valid, gotVal, gotTB := wfa.UnpackWavefrontValue(v)
@@ -47,6 +48,20 @@ func TestWavefrontPacking(t *testing.T) {
} }
} }
func TestLoHiPacking(t *testing.T) {
for range 1000 {
lo := randRange[int](-1000, 1000)
hi := randRange[int](-1000, 1000)
v := wfa.PackWavefrontLoHi(lo, hi)
gotLo, gotHi := wfa.UnpackWavefrontLoHi(v)
if gotLo != lo || gotHi != hi {
t.Errorf(`test WavefrontPack/Unpack, lo: %d, hi: %d, packedval: %x, gotlo: %d, gothi: %d`, lo, hi, v, gotLo, gotHi)
}
}
}
func GetScoreFromCIGAR(CIGAR string, penalties wfa.Penalty) int { func GetScoreFromCIGAR(CIGAR string, penalties wfa.Penalty) int {
unpackedCIGAR := wfa.RunLengthDecode(CIGAR) unpackedCIGAR := wfa.RunLengthDecode(CIGAR)
previousOp := '~' previousOp := '~'