14 Commits

Author SHA1 Message Date
63a375995c minor code cleanup, use uint64 for wavefront values 2025-05-07 21:20:13 +00:00
2351faf2d7 cleanup code 2025-05-01 18:04:42 +00:00
a446bbd923 fix readme,
update go mod
2025-03-12 00:09:18 +00:00
f1407fd045 remove unused PositiveSlice Preallocate 2024-11-12 19:04:02 +00:00
ce635cc2b1 remove wavefront component preallocation 2024-11-12 18:53:03 +00:00
f5d2528e20 fix comment 2024-11-12 18:47:25 +00:00
6f78825876 optimize build size by avoiding fmt,
pack lo/hi values into Wavefront
2024-11-12 18:44:12 +00:00
a878da42a3 update README 2024-11-08 22:49:37 +00:00
bd720f06fb minor optimization switching to wavefront pointers 2024-11-08 21:51:17 +00:00
1bbf38aaab fix DecodeCIGAR argument hint 2024-11-07 20:58:17 +00:00
cde429cb80 fix issue in WFBacktrace and change format to proper CIGAR,
add test to ensure CIGAR correctness in the case of different traceback results,
add DecodeCIGAR function to exports
2024-11-07 19:01:01 +00:00
3da3ddf10c major optimization by packing wavefront values 2024-11-05 18:28:28 +00:00
8679c51fb0 update go mod,
move Wavefront String method to debug file,
minor optimizations
2024-11-05 05:35:46 +00:00
2c7adbef06 implement missing finalizeRef implementation from go wasm_exec.js 2024-11-01 17:23:04 +00:00
11 changed files with 598 additions and 383 deletions

View File

@@ -1,9 +1,9 @@
.PHONY: build clean test .PHONY: build clean test dev-init
build: clean build: clean
@echo "======================== Building Binary =======================" @echo "======================== Building Binary ======================="
minify wfa.js > dist/wfa.js minify wfa.js > dist/wfa.js
GOOS=js GOARCH=wasm CGO_ENABLED=0 tinygo build -no-debug -opt=2 -target=wasm -o dist/wfa.wasm . GOOS=js GOARCH=wasm CGO_ENABLED=0 tinygo build -panic=trap -no-debug -opt=2 -target=wasm -o dist/wfa.wasm .
clean: clean:
@echo "======================== Cleaning Project ======================" @echo "======================== Cleaning Project ======================"
@@ -24,3 +24,7 @@ test:
@rm -f mem.prof @rm -f mem.prof
@rm -f test.test @rm -f test.test
dev-init:
apt install minify
go get -t wfa/test

View File

@@ -1,11 +1,13 @@
# Using WFA-JS # Using WFA-JS
Download `wfa.js` and `wfa.wasm`from [releases](https://git.tronnet.net/tronnet/WFA-JS/releases) to your project. Add to your script: Download `wfa.js` and `wfa.wasm`from [releases](https://git.tronnet.net/alu/WFA-JS/releases) to your project. Add to your script:
``` ```
import wfa from "./wfa.js" import wfa from "./wfa.js"
await wfa("<path to wasm>") await wfa("<path to wasm>")
console.log(wfAlign(...)) let result = wfAlign(...)
operations = DecodeCIGAR(result.CIGAR)
// ...
``` ```
Where `<path to wasm>` is the path from the site root ie. `./scripts/wfa.wasm`. This will depend on your project structure. Where `<path to wasm>` is the path from the site root ie. `./scripts/wfa.wasm`. This will depend on your project structure.

12
go.mod
View File

@@ -1,11 +1,15 @@
module wfa module wfa
go 1.23.2 go 1.23.6
require (
github.com/schollz/progressbar/v3 v3.17.1
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f
)
require ( require (
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/rivo/uniseg v0.4.7 // indirect github.com/rivo/uniseg v0.4.7 // indirect
github.com/schollz/progressbar/v3 v3.16.1 // indirect golang.org/x/sys v0.27.0 // indirect
golang.org/x/sys v0.26.0 // indirect golang.org/x/term v0.26.0 // indirect
golang.org/x/term v0.25.0 // indirect
) )

64
main.go
View File

@@ -1,7 +1,6 @@
package main package main
import ( import (
"fmt"
"syscall/js" "syscall/js"
wfa "wfa/pkg" wfa "wfa/pkg"
) )
@@ -9,37 +8,53 @@ import (
func main() { func main() {
c := make(chan bool) c := make(chan bool)
js.Global().Set("wfAlign", js.FuncOf(wfAlign)) js.Global().Set("wfAlign", js.FuncOf(wfAlign))
js.Global().Set("DecodeCIGAR", js.FuncOf(DecodeCIGAR))
<-c <-c
} }
func wfAlign(this js.Value, args []js.Value) interface{} { func wfAlign(this js.Value, args []js.Value) interface{} {
if len(args) != 4 { if len(args) != 4 {
fmt.Println("invalid number of args, requires 4: s1, s2, penalties, doCIGAR") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "invalid number of args, requires 4: s1, s2, penalties, doCIGAR",
}
return js.ValueOf(resultMap)
} }
if args[0].Type() != js.TypeString { if args[0].Type() != js.TypeString {
fmt.Println("s1 should be a string") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "s1 should be a string",
}
return js.ValueOf(resultMap)
} }
s1 := args[0].String() s1 := args[0].String()
if args[1].Type() != js.TypeString { if args[1].Type() != js.TypeString {
fmt.Println("s2 should be a string") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "s2 should be a string",
}
return js.ValueOf(resultMap)
} }
s2 := args[1].String() s2 := args[1].String()
if args[2].Type() != js.TypeObject { if args[2].Type() != js.TypeObject {
fmt.Println("penalties should be a map with key values m, x, o, e") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "penalties should be a map with key values m, x, o, e",
}
return js.ValueOf(resultMap)
} }
if args[2].Get("m").IsUndefined() || args[2].Get("x").IsUndefined() || args[2].Get("o").IsUndefined() || args[2].Get("e").IsUndefined() { if args[2].Get("m").IsUndefined() || args[2].Get("x").IsUndefined() || args[2].Get("o").IsUndefined() || args[2].Get("e").IsUndefined() {
fmt.Println("penalties should be a map with key values m, x, o, e") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "penalties should be a map with key values m, x, o, e",
}
return js.ValueOf(resultMap)
} }
m := args[2].Get("m").Int() m := args[2].Get("m").Int()
@@ -55,8 +70,11 @@ func wfAlign(this js.Value, args []js.Value) interface{} {
} }
if args[3].Type() != js.TypeBoolean { if args[3].Type() != js.TypeBoolean {
fmt.Println("doCIGAR should be a boolean") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "doCIGAR should be a boolean",
}
return js.ValueOf(resultMap)
} }
doCIGAR := args[3].Bool() doCIGAR := args[3].Bool()
@@ -64,9 +82,29 @@ func wfAlign(this js.Value, args []js.Value) interface{} {
// Call the actual func. // Call the actual func.
result := wfa.WFAlign(s1, s2, penalties, doCIGAR) result := wfa.WFAlign(s1, s2, penalties, doCIGAR)
resultMap := map[string]interface{}{ resultMap := map[string]interface{}{
"ok": true,
"score": result.Score, "score": result.Score,
"CIGAR": result.CIGAR, "CIGAR": result.CIGAR,
"error": "",
} }
return js.ValueOf(resultMap) return js.ValueOf(resultMap)
} }
func DecodeCIGAR(this js.Value, args []js.Value) interface{} {
if len(args) != 1 {
println("invalid number of args, requires 1: CIGAR")
return nil
}
if args[0].Type() != js.TypeString {
println("CIGAR should be a string")
return nil
}
CIGAR := args[0].String()
decoded := wfa.RunLengthDecode(CIGAR)
return js.ValueOf(decoded)
}

View File

@@ -1,115 +1,36 @@
package wfa package wfa
type IntegerSlice[T any] struct {
data []T
valid []bool
defaultValue T
}
func (a *IntegerSlice[T]) TranslateIndex(idx int) int {
if idx >= 0 { // 0 -> 0, 1 -> 2, 2 -> 4, 3 -> 6, ...
return 2 * idx
} else { // -1 -> 1, -2 -> 3, -3 -> 5, ...
return (-2 * idx) - 1
}
}
func (a *IntegerSlice[T]) Valid(idx int) bool {
actualIdx := a.TranslateIndex(idx)
return 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx]
}
func (a *IntegerSlice[T]) Get(idx int) T {
actualIdx := a.TranslateIndex(idx)
if 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx] { // idx is in the slice
return a.data[actualIdx]
} else { // idx is out of the slice
return a.defaultValue
}
}
func (a *IntegerSlice[T]) Set(idx int, value T) {
actualIdx := a.TranslateIndex(idx)
if actualIdx >= len(a.valid) { // idx is outside the slice
// expand data array to actualIdx
newData := make([]T, 2*actualIdx+1)
copy(newData, a.data)
a.data = newData
// expand valid array to actualIdx
newValid := make([]bool, 2*actualIdx+1)
copy(newValid, a.valid)
a.valid = newValid
}
a.data[actualIdx] = value
a.valid[actualIdx] = true
}
func (a *IntegerSlice[T]) Preallocate(lo int, hi int) {
actualLo := a.TranslateIndex(lo)
actualHi := a.TranslateIndex(hi)
size := max(actualHi, actualLo)
// expand data array to actualIdx
newData := make([]T, size+1)
a.data = newData
// expand valid array to actualIdx
newValid := make([]bool, size+1)
a.valid = newValid
}
type PositiveSlice[T any] struct { type PositiveSlice[T any] struct {
data []T data []T
valid []bool valid []bool
defaultValue T defaultValue T
} }
func (a *PositiveSlice[T]) TranslateIndex(idx int) int {
return idx
}
func (a *PositiveSlice[T]) Valid(idx int) bool { func (a *PositiveSlice[T]) Valid(idx int) bool {
actualIdx := a.TranslateIndex(idx) return 0 <= idx && idx < len(a.valid) && a.valid[idx]
return 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx]
} }
func (a *PositiveSlice[T]) Get(idx int) T { func (a *PositiveSlice[T]) Get(idx int) T {
actualIdx := a.TranslateIndex(idx) if 0 <= idx && idx < len(a.valid) && a.valid[idx] { // idx is in the slice
if 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx] { // idx is in the slice return a.data[idx]
return a.data[actualIdx]
} else { // idx is out of the slice } else { // idx is out of the slice
return a.defaultValue return a.defaultValue
} }
} }
func (a *PositiveSlice[T]) Set(idx int, value T) { func (a *PositiveSlice[T]) Set(idx int, value T) {
actualIdx := a.TranslateIndex(idx) if idx >= len(a.valid) { // idx is outside the slice
if actualIdx < 0 || actualIdx >= len(a.valid) { // idx is outside the slice // expand data array to 2*idx
// expand data array to actualIdx newData := make([]T, 2*idx+1)
newData := make([]T, 2*actualIdx+1)
copy(newData, a.data) copy(newData, a.data)
a.data = newData a.data = newData
// expand valid array to actualIdx // expand valid array to 2*idx
newValid := make([]bool, 2*actualIdx+1) newValid := make([]bool, 2*idx+1)
copy(newValid, a.valid) copy(newValid, a.valid)
a.valid = newValid a.valid = newValid
} }
a.data[actualIdx] = value a.data[idx] = value
a.valid[actualIdx] = true a.valid[idx] = true
}
func (a *PositiveSlice[T]) Preallocate(hi int) {
size := hi
// expand data array to actualIdx
newData := make([]T, size+1)
a.data = newData
// expand valid array to actualIdx
newValid := make([]bool, size+1)
a.valid = newValid
} }

81
pkg/debug.go Normal file
View File

@@ -0,0 +1,81 @@
//go:build debug
package wfa
import (
"fmt"
"math"
)
func (w *WavefrontComponent) String(score int) string {
traceback_str := []string{"OI", "EI", "OD", "ED", "SB", "IN", "DL", "EN"}
s := "<"
min_lo := math.MaxInt
max_hi := math.MinInt
for i := 0; i <= score; i++ {
valid := w.W.Valid(i)
lo, hi := UnpackWavefrontLoHi(w.W.Get(i).lohi)
if valid && lo < min_lo {
min_lo = lo
}
if valid && hi > max_hi {
max_hi = hi
}
}
for k := min_lo; k <= max_hi; k++ {
s = s + fmt.Sprintf("%02d", k)
if k < max_hi {
s = s + "|"
}
}
s = s + ">\t<"
for k := min_lo; k <= max_hi; k++ {
s = s + fmt.Sprintf("%02d", k)
if k < max_hi {
s = s + "|"
}
}
s = s + ">\n"
for i := 0; i <= score; i++ {
s = s + "["
lo, hi := UnpackWavefrontLoHi(w.W.Get(i).lohi)
for k := min_lo; k <= max_hi; k++ {
valid, val, _ := UnpackWavefrontValue(w.W.Get(i).Get(k))
if valid {
s = s + fmt.Sprintf("%02d", val)
} else if k < lo || k > hi {
s = s + "--"
} else {
s = s + " "
}
if k < max_hi {
s = s + "|"
}
}
s = s + "]\t["
// print out traceback matrix
for k := min_lo; k <= max_hi; k++ {
valid, _, tb := UnpackWavefrontValue(w.W.Get(i).Get(k))
if valid {
s = s + traceback_str[tb]
} else if k < lo || k > hi {
s = s + "--"
} else {
s = s + " "
}
if k < max_hi {
s = s + "|"
}
}
s = s + "]\n"
}
return s
}

View File

@@ -1,10 +1,5 @@
package wfa package wfa
import (
"fmt"
"math"
)
type Result struct { type Result struct {
Score int Score int
CIGAR string CIGAR string
@@ -17,10 +12,10 @@ type Penalty struct {
E int E int
} }
type traceback byte type Traceback byte
const ( const (
OpenIns traceback = iota OpenIns Traceback = iota
ExtdIns ExtdIns
OpenDel OpenDel
ExtdDel ExtdDel
@@ -30,165 +25,126 @@ const (
End End
) )
type WavefrontComponent struct { // bitpacked wavefront lo/hi values with 32 bits each
lo *PositiveSlice[int] // lo for each wavefront type WavefrontLoHi uint64
hi *PositiveSlice[int] // hi for each wavefront
W *PositiveSlice[*IntegerSlice[int]] // wavefront diag distance for each wavefront func PackWavefrontLoHi(lo int, hi int) WavefrontLoHi {
A *PositiveSlice[*IntegerSlice[traceback]] // compact CIGAR for backtrace for each wavefront loBM := int64(int32(lo)) & 0x0000_0000_FFFF_FFFF
hiBM := int64(int64(hi) << 32)
return WavefrontLoHi(hiBM | loBM)
} }
func NewWavefrontComponent(preallocateSize int) WavefrontComponent { func UnpackWavefrontLoHi(lohi WavefrontLoHi) (int, int) {
loBM := int(int32(lohi & 0x0000_0000_FFFF_FFFF))
hiBM := int(int32(lohi & 0xFFFF_FFFF_0000_0000 >> 32))
return loBM, hiBM
}
// bitpacked wavefront values with 1 valid bit, 3 traceback bits, and 60 bits for the diag distance
type WavefrontValue uint64
// PackWavefrontValue: packs a diag value and traceback into a WavefrontValue
func PackWavefrontValue(value uint64, traceback Traceback) WavefrontValue {
validBM := uint64(0x8000_0000_0000_0000)
tracebackBM := uint64(traceback&0x0000_0007) << 60
valueBM := uint64(value) & 0x0FFF_FFFF_FFFF_FFFF
return WavefrontValue(validBM | tracebackBM | valueBM)
}
// UnpackWavefrontValue: opens a WavefrontValue into a valid bool, diag value and traceback
func UnpackWavefrontValue(wfv WavefrontValue) (bool, uint64, Traceback) {
validBM := wfv&0x8000_0000_0000_0000 != 0
tracebackBM := uint8(wfv & 0x7000_0000_0000_0000 >> 60)
valueBM := uint64(wfv & 0x0000_0000_FFFF_FFFF)
return validBM, valueBM, Traceback(tracebackBM)
}
// Wavefront: stores a single wavefront, stores wavefront's lo value and hi is naturally lo + len(data)
type Wavefront struct { // since wavefronts store diag distance, they should never be negative, and traceback data can be stored as uint8
data []WavefrontValue
lohi WavefrontLoHi
}
// NewWavefront: returns a new wavefront with size accomodating lo and hi (inclusive)
func NewWavefront(lo int, hi int) *Wavefront {
a := &Wavefront{}
a.lohi = PackWavefrontLoHi(lo, hi)
size := hi - lo
a.data = make([]WavefrontValue, size+1)
return a
}
// TranslateIndex: utility function for getting the data index given a diagonal
func (a *Wavefront) TranslateIndex(diagonal int) int {
lo := int(int32(a.lohi & 0x0000_0000_FFFF_FFFF))
return diagonal - lo
}
// Get: returns WavefrontValue for given diagonal
func (a *Wavefront) Get(diagonal int) WavefrontValue {
actualIdx := a.TranslateIndex(diagonal)
if 0 <= actualIdx && actualIdx < len(a.data) { // idx is in the slice
return a.data[actualIdx]
} else { // idx is out of the slice
return 0
}
}
// Set: the diagonal to a WavefrontValue
func (a *Wavefront) Set(diagonal int, value WavefrontValue) {
actualIdx := a.TranslateIndex(diagonal)
/* in theory idx is always in bounds because the wavefront is preallocated
if actualIdx < 0 || actualIdx >= len(a.data) {
return
}
*/
a.data[actualIdx] = value
}
// WavefrontComponent: each M/I/D wavefront matrix including the wavefront data, lo and hi
type WavefrontComponent struct {
W *PositiveSlice[*Wavefront] // wavefront diag distance and traceback for each wavefront
}
// NewWavefrontComponent: returns initialized WavefrontComponent
func NewWavefrontComponent() *WavefrontComponent {
// new wavefront component = { // new wavefront component = {
// lo = [0] // lo = [0]
// hi = [0] // hi = [0]
// W = [] // W = []
// A = []
// } // }
w := WavefrontComponent{ w := &WavefrontComponent{
lo: &PositiveSlice[int]{ W: &PositiveSlice[*Wavefront]{
data: []int{0}, defaultValue: &Wavefront{
valid: []bool{true}, data: []WavefrontValue{0},
},
hi: &PositiveSlice[int]{
data: []int{0},
valid: []bool{true},
},
W: &PositiveSlice[*IntegerSlice[int]]{
defaultValue: &IntegerSlice[int]{
data: []int{},
valid: []bool{},
},
},
A: &PositiveSlice[*IntegerSlice[traceback]]{
defaultValue: &IntegerSlice[traceback]{
data: []traceback{},
valid: []bool{},
}, },
}, },
} }
w.lo.Preallocate(preallocateSize)
w.hi.Preallocate(preallocateSize)
w.W.Preallocate(preallocateSize)
w.A.Preallocate(preallocateSize)
return w return w
} }
// get value for wavefront=score, diag=k => returns ok, value // GetVal: get value for wavefront=score, diag=k => returns ok, value, traceback
func (w *WavefrontComponent) GetVal(score int, k int) (bool, int) { func (w *WavefrontComponent) GetVal(score int, k int) (bool, uint64, Traceback) {
return w.W.Valid(score) && w.W.Get(score).Valid(k), w.W.Get(score).Get(k) return UnpackWavefrontValue(w.W.Get(score).Get(k))
} }
// set value for wavefront=score, diag=k // SetVal: set value, traceback for wavefront=score, diag=k
func (w *WavefrontComponent) SetVal(score int, k int, val int) { func (w *WavefrontComponent) SetVal(score int, k int, val uint64, tb Traceback) {
w.W.Get(score).Set(k, val) w.W.Get(score).Set(k, PackWavefrontValue(val, tb))
} }
// get alignment traceback for wavefront=score, diag=k => returns ok, value // GetLoHi: get lo and hi for wavefront=score
func (w *WavefrontComponent) GetTraceback(score int, k int) (bool, traceback) {
return w.A.Valid(score) && w.A.Get(score).Valid(k), w.A.Get(score).Get(k)
}
// set alignment traceback for wavefront=score, diag=k
func (w *WavefrontComponent) SetTraceback(score int, k int, val traceback) {
w.A.Get(score).Set(k, val)
}
// get hi for wavefront=score
func (w *WavefrontComponent) GetLoHi(score int) (bool, int, int) { func (w *WavefrontComponent) GetLoHi(score int) (bool, int, int) {
// if lo[score] and hi[score] are valid lo, hi := UnpackWavefrontLoHi(w.W.Get(score).lohi)
if w.lo.Valid(score) && w.hi.Valid(score) { return w.W.Valid(score), lo, hi
// return lo[score] hi[score]
return true, w.lo.Get(score), w.hi.Get(score)
} else {
return false, 0, 0
}
} }
// set hi for wavefront=score // SetLoHi: set lo and hi for wavefront=score
func (w *WavefrontComponent) SetLoHi(score int, lo int, hi int) { func (w *WavefrontComponent) SetLoHi(score int, lo int, hi int) {
// lo[score] = lo b := NewWavefront(lo, hi)
w.lo.Set(score, lo) w.W.Set(score, b)
// hi[score] = hi
w.hi.Set(score, hi)
// preemptively setup w.A
w.A.Set(score, &IntegerSlice[traceback]{})
w.A.Get(score).Preallocate(lo, hi)
// preemptively setup w.W
w.W.Set(score, &IntegerSlice[int]{})
w.W.Get(score).Preallocate(lo, hi)
}
func (w *WavefrontComponent) String(score int) string {
traceback_str := []string{"OI", "EI", "OD", "ED", "SB", "IN", "DL", "EN"}
s := "<"
min_lo := math.MaxInt
max_hi := math.MinInt
for i := 0; i <= score; i++ {
if w.lo.Valid(i) && w.lo.Get(i) < min_lo {
min_lo = w.lo.Get(i)
}
if w.hi.Valid(i) && w.hi.Get(i) > max_hi {
max_hi = w.hi.Get(i)
}
}
for k := min_lo; k <= max_hi; k++ {
s = s + fmt.Sprintf("%02d", k)
if k < max_hi {
s = s + "|"
}
}
s = s + ">\t<"
for k := min_lo; k <= max_hi; k++ {
s = s + fmt.Sprintf("%02d", k)
if k < max_hi {
s = s + "|"
}
}
s = s + ">\n"
for i := 0; i <= score; i++ {
s = s + "["
lo := w.lo.Get(i)
hi := w.hi.Get(i)
// print out wavefront matrix
for k := min_lo; k <= max_hi; k++ {
if w.W.Valid(i) && w.W.Get(i).Valid(k) {
s = s + fmt.Sprintf("%02d", w.W.Get(i).Get(k))
} else if k < lo || k > hi {
s = s + "--"
} else {
s = s + " "
}
if k < max_hi {
s = s + "|"
}
}
s = s + "]\t["
// print out traceback matrix
for k := min_lo; k <= max_hi; k++ {
if w.A.Valid(i) && w.A.Get(i).Valid(k) {
s = s + traceback_str[w.A.Get(i).Get(k)]
} else if k < lo || k > hi {
s = s + "--"
} else {
s = s + " "
}
if k < max_hi {
s = s + "|"
}
}
s = s + "]\n"
}
return s
} }

View File

@@ -2,44 +2,77 @@ package wfa
import ( import (
"math" "math"
"unicode/utf8" "strings"
"golang.org/x/exp/constraints"
) )
func SafeMin(values []int, idx int) int { // convert an unsigned into to string
return values[idx] func UIntToString(num uint) string { // num assumed to be positive
var builder strings.Builder
for num > 0 {
digit := num % 10
builder.WriteRune(rune('0' + digit))
num /= 10
}
// Reverse the string as we built it in reverse order
str := []rune(builder.String())
for i, j := 0, len(str)-1; i < j; i, j = i+1, j-1 {
str[i], str[j] = str[j], str[i]
}
return string(str)
} }
func SafeMax(values []int, idx int) int { // decode runlength encoded string such as CIGARs
return values[idx] func RunLengthDecode(encoded string) string {
} decoded := strings.Builder{}
length := len(encoded)
i := 0
func SafeArgMax(valids []bool, values []int) (bool, int) { for i < length {
hasValid := false // If the current character is a digit, we need to extract the run length
maxIndex := 0 runLength := 0
maxValue := math.MinInt for i < length && encoded[i] >= '0' && encoded[i] <= '9' {
for i := 0; i < len(valids); i++ { runLength = runLength*10 + int(encoded[i]-'0')
if valids[i] && values[i] > maxValue { i++
hasValid = true }
maxIndex = i
maxValue = values[i] // The next character will be the character to repeat
if i < length {
char := encoded[i]
for j := 0; j < runLength; j++ {
decoded.WriteByte(char)
}
i++ // Move past the character
} }
} }
if hasValid {
return true, maxIndex return decoded.String()
} else {
return false, 0
}
} }
func SafeArgMin(valids []bool, values []int) (bool, int) { // given the min index, return the item in values at that index
func SafeMin[T constraints.Integer](values []T, idx int) T {
return values[idx]
}
// given the max index, return the item in values at that index
func SafeMax[T constraints.Integer](values []T, idx int) T {
return values[idx]
}
// given array of values and corresponding array of valid flags, find the min of value which is valid or return false if there does not exist any
func SafeArgMin[T constraints.Integer](valids []bool, values []T) (bool, int) {
hasValid := false hasValid := false
minIndex := 0 minIndex := 0
minValue := math.MaxInt minValue := math.MaxInt
for i := 0; i < len(valids); i++ { for i := 0; i < len(valids); i++ {
if valids[i] && values[i] < minValue { if valids[i] && int(values[i]) < minValue {
hasValid = true hasValid = true
minIndex = i minIndex = i
minValue = values[i] minValue = int(values[i])
} }
} }
if hasValid { if hasValid {
@@ -49,22 +82,23 @@ func SafeArgMin(valids []bool, values []int) (bool, int) {
} }
} }
func Reverse(s string) string { // given array of values and corresponding array of valid flags, find the max of value which is valid or return false if there does not exist any
size := len(s) func SafeArgMax[T constraints.Integer](valids []bool, values []T) (bool, int) {
buf := make([]byte, size) hasValid := false
for start := 0; start < size; { maxIndex := 0
r, n := utf8.DecodeRuneInString(s[start:]) maxValue := math.MinInt
start += n for i := range valids {
utf8.EncodeRune(buf[size-start:], r) if valids[i] && int(values[i]) > maxValue {
hasValid = true
maxIndex = i
maxValue = int(values[i])
}
} }
return string(buf) return hasValid, maxIndex
} }
func Splice(s string, c rune, idx int) string { // set the lext lo and hi bounds for wavefronts M, I, D
return s[:idx] + string(c) + s[idx:] func NextLoHi(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty) (int, int) {
}
func NextLoHi(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) (int, int) {
x := penalties.X x := penalties.X
o := penalties.O o := penalties.O
e := penalties.E e := penalties.E
@@ -94,52 +128,48 @@ func NextLoHi(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent,
return lo, hi return lo, hi
} }
func NextI(M WavefrontComponent, I WavefrontComponent, score int, k int, penalties Penalty) { // set the traceback and diag value for the next I wavefront
func NextI(M *WavefrontComponent, I *WavefrontComponent, score int, k int, penalties Penalty) {
o := penalties.O o := penalties.O
e := penalties.E e := penalties.E
a_ok, a := M.GetVal(score-o-e, k-1) a_ok, a, _ := M.GetVal(score-o-e, k-1)
b_ok, b := I.GetVal(score-e, k-1) b_ok, b, _ := I.GetVal(score-e, k-1)
ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []int{a, b}) ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint64{a, b})
nextIVal := SafeMax([]int{a, b}, nextITraceback) + 1 // important that the +1 is here nextIVal := SafeMax([]uint64{a, b}, nextITraceback) + 1 // important that the +1 is here
if ok { if ok {
I.SetVal(score, k, nextIVal) I.SetVal(score, k, nextIVal, []Traceback{OpenIns, ExtdIns}[nextITraceback])
I.SetTraceback(score, k, []traceback{OpenIns, ExtdIns}[nextITraceback])
} }
} }
func NextD(M WavefrontComponent, D WavefrontComponent, score int, k int, penalties Penalty) { // set the traceback and diag value for the next D wavefront
func NextD(M *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
o := penalties.O o := penalties.O
e := penalties.E e := penalties.E
a_ok, a := M.GetVal(score-o-e, k+1) a_ok, a, _ := M.GetVal(score-o-e, k+1)
b_ok, b := D.GetVal(score-e, k+1) b_ok, b, _ := D.GetVal(score-e, k+1)
ok, nextDTraceback := SafeArgMax( ok, nextDTraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint64{a, b})
[]bool{a_ok, b_ok}, nextDVal := SafeMax([]uint64{a, b}, nextDTraceback)
[]int{a, b},
)
nextDVal := SafeMax([]int{a, b}, nextDTraceback) // nothing special
if ok { if ok {
D.SetVal(score, k, nextDVal) D.SetVal(score, k, nextDVal, []Traceback{OpenDel, ExtdDel}[nextDTraceback])
D.SetTraceback(score, k, []traceback{OpenDel, ExtdDel}[nextDTraceback])
} }
} }
func NextM(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, k int, penalties Penalty) { // set the traceback and diag value for the next M wavefront
func NextM(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
x := penalties.X x := penalties.X
a_ok, a := M.GetVal(score-x, k) a_ok, a, _ := M.GetVal(score-x, k)
a++ // important to have +1 here a++ // important to have +1 here
b_ok, b := I.GetVal(score, k) b_ok, b, _ := I.GetVal(score, k)
c_ok, c := D.GetVal(score, k) c_ok, c, _ := D.GetVal(score, k)
ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []int{a, b, c})
nextMVal := SafeMax([]int{a, b, c}, nextMTraceback)
ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []uint64{a, b, c})
nextMVal := SafeMax([]uint64{a, b, c}, nextMTraceback)
if ok { if ok {
M.SetVal(score, k, nextMVal) M.SetVal(score, k, nextMVal, []Traceback{Sub, Ins, Del}[nextMTraceback])
M.SetTraceback(score, k, []traceback{Sub, Ins, Del}[nextMTraceback])
} }
} }

View File

@@ -1,23 +1,26 @@
package wfa package wfa
import (
"strings"
)
// WFAlign takes strings s1, s2, penalties, and returns the score and CIGAR if doCIGAR is true
func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result { func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
n := len(s1) n := len(s1)
m := len(s2) m := len(s2)
A_k := m - n A_k := m - n // diagonal where both sequences end
A_offset := m A_offset := uint64(m) // offset along a_k diagonal corresponding to end
score := 0 score := 0
estimatedScore := (max(n, m) * max(penalties.M, penalties.X, penalties.O, penalties.E)) / 4 M := NewWavefrontComponent()
M := NewWavefrontComponent(estimatedScore)
M.SetLoHi(0, 0, 0) M.SetLoHi(0, 0, 0)
M.SetVal(0, 0, 0) M.SetVal(0, 0, 0, End)
M.SetTraceback(0, 0, End) I := NewWavefrontComponent()
I := NewWavefrontComponent(estimatedScore) D := NewWavefrontComponent()
D := NewWavefrontComponent(estimatedScore)
for { for {
WFExtend(M, s1, n, s2, m, score) WFExtend(M, s1, n, s2, m, score)
ok, val := M.GetVal(score, A_k) ok, val, _ := M.GetVal(score, A_k)
if ok && val >= A_offset { if ok && val >= A_offset { // exit when M_(s,a_k) >= A_offset, ie the wavefront has reached the end
break break
} }
score = score + 1 score = score + 1
@@ -25,8 +28,8 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
} }
CIGAR := "" CIGAR := ""
if doCIGAR { if doCIGAR { // if doCIGAR, then perform backtrace, otherwise just return the score
CIGAR = WFBacktrace(M, I, D, score, penalties, A_k, s1, s2) CIGAR = WFBacktrace(M, I, D, score, penalties, A_k, A_offset, s1, s2)
} }
return Result{ return Result{
@@ -35,108 +38,162 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
} }
} }
func WFExtend(M WavefrontComponent, s1 string, n int, s2 string, m int, score int) { func WFExtend(M *WavefrontComponent, s1 string, n int, s2 string, m int, score int) {
_, lo, hi := M.GetLoHi(score) _, lo, hi := M.GetLoHi(score)
for k := lo; k <= hi; k++ { for k := lo; k <= hi; k++ { // for each diagonal in current wavefront
// v = M[score][k] - k // v = M[score][k] - k
// h = M[score][k] // h = M[score][k]
ok, h := M.GetVal(score, k) ok, uh, tb := M.GetVal(score, k)
v := h - k // exit early if M_(s,l) is invalid
// exit early if v or h are invalid
if !ok { if !ok {
continue continue
} }
for v < n && h < m && s1[v] == s2[h] { h := int(uh)
_, val := M.GetVal(score, k) v := h - k
M.SetVal(score, k, val+1) // in the paper, we do v++, h++, M_(s,k)++
// however, note that h = M_(s,k) so instead we just do v++, h++ and set M_(s,k) at the end
// this saves a some memory reads and writes
for v < n && h < m && s1[v] == s2[h] { // extend diagonal for the next set of matches
v++ v++
h++ h++
} }
M.SetVal(score, k, uint64(h), tb)
} }
} }
func WFNext(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) { func WFNext(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty) {
// get this score's lo, hi // get this score's lo, hi
lo, hi := NextLoHi(M, I, D, score, penalties) lo, hi := NextLoHi(M, I, D, score, penalties)
for k := lo; k <= hi; k++ { for k := lo; k <= hi; k++ { // for each diagonal, extend the matrices for the next wavefronts
NextI(M, I, score, k, penalties) NextI(M, I, score, k, penalties)
NextD(M, D, score, k, penalties) NextD(M, D, score, k, penalties)
NextM(M, I, D, score, k, penalties) NextM(M, I, D, score, k, penalties)
} }
} }
func WFBacktrace(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty, A_k int, s1 string, s2 string) string { func WFBacktrace(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty, A_k int, A_offset uint64, s1 string, s2 string) string {
traceback_CIGAR := []string{"I", "I", "D", "D", "X", "", "", ""}
x := penalties.X x := penalties.X
o := penalties.O o := penalties.O
e := penalties.E e := penalties.E
CIGAR_rev := ""
tb_s := score tb_s := score
tb_k := A_k tb_k := A_k
_, current_traceback := M.GetTraceback(tb_s, tb_k)
done := false done := false
_, current_dist, current_traceback := M.GetVal(tb_s, tb_k)
Ops := []rune{'~'}
Counts := []uint{0}
idx := 0
for !done { for !done {
CIGAR_rev = CIGAR_rev + traceback_CIGAR[current_traceback]
switch current_traceback { switch current_traceback {
case OpenIns: case OpenIns:
if Ops[idx] == 'I' {
Counts[idx]++
} else {
Ops = append(Ops, 'I')
Counts = append(Counts, 1)
idx++
}
tb_s = tb_s - o - e tb_s = tb_s - o - e
tb_k = tb_k - 1 tb_k = tb_k - 1
_, current_traceback = M.GetTraceback(tb_s, tb_k) _, current_dist, current_traceback = M.GetVal(tb_s, tb_k)
case ExtdIns: case ExtdIns:
if Ops[idx] == 'I' {
Counts[idx]++
} else {
Ops = append(Ops, 'I')
Counts = append(Counts, 1)
idx++
}
tb_s = tb_s - e tb_s = tb_s - e
tb_k = tb_k - 1 tb_k = tb_k - 1
_, current_traceback = I.GetTraceback(tb_s, tb_k) _, current_dist, current_traceback = I.GetVal(tb_s, tb_k)
case OpenDel: case OpenDel:
if Ops[idx] == 'D' {
Counts[idx]++
} else {
Ops = append(Ops, 'D')
Counts = append(Counts, 1)
idx++
}
tb_s = tb_s - o - e tb_s = tb_s - o - e
tb_k = tb_k + 1 tb_k = tb_k + 1
_, current_traceback = M.GetTraceback(tb_s, tb_k) _, current_dist, current_traceback = M.GetVal(tb_s, tb_k)
case ExtdDel: case ExtdDel:
if Ops[idx] == 'D' {
Counts[idx]++
} else {
Ops = append(Ops, 'D')
Counts = append(Counts, 1)
idx++
}
tb_s = tb_s - e tb_s = tb_s - e
tb_k = tb_k + 1 tb_k = tb_k + 1
_, current_traceback = D.GetTraceback(tb_s, tb_k) _, current_dist, current_traceback = D.GetVal(tb_s, tb_k)
case Sub: case Sub:
tb_s = tb_s - x tb_s = tb_s - x
// tb_k = tb_k; // tb_k = tb_k;
_, current_traceback = M.GetTraceback(tb_s, tb_k) _, next_dist, next_traceback := M.GetVal(tb_s, tb_k)
if int(current_dist-next_dist)-1 > 0 {
Ops = append(Ops, 'M')
Counts = append(Counts, uint(current_dist-next_dist)-1)
idx++
}
if Ops[idx] == 'X' {
Counts[idx]++
} else {
Ops = append(Ops, 'X')
Counts = append(Counts, 1)
idx++
}
current_dist = next_dist
current_traceback = next_traceback
case Ins: case Ins:
// tb_s = tb_s; // tb_s = tb_s;
// tb_k = tb_k; // tb_k = tb_k;
_, current_traceback = I.GetTraceback(tb_s, tb_k) _, next_dist, next_traceback := I.GetVal(tb_s, tb_k)
Ops = append(Ops, 'M')
Counts = append(Counts, uint(current_dist-next_dist))
idx++
current_dist = next_dist
current_traceback = next_traceback
case Del: case Del:
// tb_s = tb_s; // tb_s = tb_s;
// tb_k = tb_k; // tb_k = tb_k;
_, current_traceback = D.GetTraceback(tb_s, tb_k) _, next_dist, next_traceback := D.GetVal(tb_s, tb_k)
Ops = append(Ops, 'M')
Counts = append(Counts, uint(current_dist-next_dist))
idx++
current_dist = next_dist
current_traceback = next_traceback
case End: case End:
Ops = append(Ops, 'M')
Counts = append(Counts, uint(current_dist))
idx++
done = true done = true
} }
} }
CIGAR_part := Reverse(CIGAR_rev) CIGAR := strings.Builder{}
c := 0 for i := len(Ops) - 1; i > 0; i-- {
i := 0 CIGAR.WriteString(UIntToString(Counts[i]))
j := 0 CIGAR.WriteRune(Ops[i])
for i < len(s1) && j < len(s2) {
if s1[i] == s2[j] {
//CIGAR_part.splice(c, 0, "M")
CIGAR_part = Splice(CIGAR_part, 'M', c)
c++
i++
j++
} else if CIGAR_part[c] == 'X' {
c++
i++
j++
} else if CIGAR_part[c] == 'I' {
c++
j++
} else if CIGAR_part[c] == 'D' {
c++
i++
}
} }
return CIGAR_part return CIGAR.String()
} }

View File

@@ -3,6 +3,8 @@ package tests
import ( import (
"bufio" "bufio"
"encoding/json" "encoding/json"
"log"
"math/rand/v2"
"os" "os"
"strconv" "strconv"
"strings" "strings"
@@ -10,6 +12,7 @@ import (
wfa "wfa/pkg" wfa "wfa/pkg"
"github.com/schollz/progressbar/v3" "github.com/schollz/progressbar/v3"
"golang.org/x/exp/constraints"
) )
const testJsonPath = "tests.json" const testJsonPath = "tests.json"
@@ -27,6 +30,103 @@ type TestCase struct {
Solutions string `json:"solutions"` Solutions string `json:"solutions"`
} }
func randRange[T constraints.Integer](min, max int) T {
return T(rand.IntN(max-min) + min)
}
func TestWavefrontPacking(t *testing.T) {
for range 1000 {
val := randRange[uint64](0, 1000)
tb := wfa.Traceback(randRange[uint64](0, 7))
v := wfa.PackWavefrontValue(val, tb)
valid, gotVal, gotTB := wfa.UnpackWavefrontValue(v)
if !valid || gotVal != val || gotTB != tb {
t.Errorf(`test WavefrontPack/Unpack, val: %d, tb: %d, packedval: %x, gotok: %t, gotval: %d, gottb: %d\n`, val, tb, v, valid, gotVal, gotTB)
}
}
}
func TestLoHiPacking(t *testing.T) {
for range 1000 {
lo := randRange[int](-1000, 1000)
hi := randRange[int](-1000, 1000)
v := wfa.PackWavefrontLoHi(lo, hi)
gotLo, gotHi := wfa.UnpackWavefrontLoHi(v)
if gotLo != lo || gotHi != hi {
t.Errorf(`test WavefrontPack/Unpack, lo: %d, hi: %d, packedval: %x, gotlo: %d, gothi: %d`, lo, hi, v, gotLo, gotHi)
}
}
}
func GetScoreFromCIGAR(CIGAR string, penalties wfa.Penalty) int {
unpackedCIGAR := wfa.RunLengthDecode(CIGAR)
previousOp := '~'
score := 0
for _, Op := range unpackedCIGAR {
if Op == 'M' {
score = score + penalties.M
} else if Op == 'X' {
score = score + penalties.X
} else if (Op == 'I' && previousOp != 'I') || (Op == 'D' && previousOp != 'D') {
score = score + penalties.O + penalties.E
} else if (Op == 'I' && previousOp == 'I') || (Op == 'D' && previousOp == 'D') {
score = score + penalties.E
}
previousOp = Op
}
return score
}
func CheckCIGARCorrectness(s1 string, s2 string, CIGAR string) bool {
unpackedCIGAR := wfa.RunLengthDecode(CIGAR)
i := 0
j := 0
s1Aligned := strings.Builder{}
alignment := strings.Builder{}
s2Aligned := strings.Builder{}
for c := 0; c < len(unpackedCIGAR); c++ {
Op := unpackedCIGAR[c]
if Op == 'M' {
s1Aligned.WriteByte(s1[i])
alignment.WriteRune('|')
s2Aligned.WriteByte(s2[j])
i++
j++
} else if Op == 'X' {
s1Aligned.WriteByte(s1[i])
alignment.WriteRune(' ')
s2Aligned.WriteByte(s2[j])
i++
j++
} else if Op == 'I' {
s1Aligned.WriteRune('-')
alignment.WriteRune(' ')
s2Aligned.WriteByte(s2[j])
j++
} else if Op == 'D' {
s1Aligned.WriteByte(s1[i])
alignment.WriteRune('|')
s2Aligned.WriteRune('-')
i++
}
}
if i == len(s1) && j == len(s2) {
return true
} else {
log.Printf("\n%s\n%s\n%s\n i=%d, j=%d, |s1|=%d, |s2|=%d\n", s1Aligned.String(), alignment.String(), s2Aligned.String(), i, j, len(s1), len(s2))
return false
}
}
func TestWFA(t *testing.T) { func TestWFA(t *testing.T) {
content, _ := os.ReadFile(testJsonPath) content, _ := os.ReadFile(testJsonPath)
@@ -54,7 +154,9 @@ func TestWFA(t *testing.T) {
for solutions.Scan() { for solutions.Scan() {
solution := solutions.Text() solution := solutions.Text()
expectedScore, _ := strconv.Atoi(strings.Split(solution, "\t")[0]) expectedScore, _ := strconv.Atoi(strings.Split(solution, "\t")[0])
expectedCIGAR := strings.Split(solution, "\t")[1]
sequences.Scan() sequences.Scan()
s1 := sequences.Text() s1 := sequences.Text()
@@ -64,11 +166,27 @@ func TestWFA(t *testing.T) {
s2 := sequences.Text() s2 := sequences.Text()
s2 = s2[1:] s2 = s2[1:]
x := wfa.WFAlign(s1, s2, testPenalties, false) x := wfa.WFAlign(s1, s2, testPenalties, true)
gotScore := x.Score gotScore := x.Score
gotCIGAR := x.CIGAR
if gotScore != -1*expectedScore { if gotScore != -1*expectedScore {
t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: %d, expected: %d\n`, testName, idx, s1, s2, gotScore, expectedScore) t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: %d, expected: %d`, testName, idx, s1, s2, gotScore, expectedScore)
os.Exit(1)
}
if gotCIGAR != expectedCIGAR {
checkScore := GetScoreFromCIGAR(gotCIGAR, testPenalties)
CIGARCorrectness := CheckCIGARCorrectness(s1, s2, gotCIGAR)
if checkScore != gotScore && checkScore != -1*expectedScore { // nonequivalent alignment
t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: [%s], expected: [%s]`, testName, idx, s1, s2, gotCIGAR, expectedCIGAR)
t.Errorf(`test: %s#%d, recalculated score: %d`, testName, idx, checkScore)
os.Exit(1)
}
if !CIGARCorrectness {
t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: [%s], expected: [%s]`, testName, idx, s1, s2, gotCIGAR, expectedCIGAR)
os.Exit(1)
}
} }
idx++ idx++

12
wfa.js
View File

@@ -300,10 +300,14 @@
// func finalizeRef(v ref) // func finalizeRef(v ref)
"syscall/js.finalizeRef": (v_ref) => { "syscall/js.finalizeRef": (v_ref) => {
// Note: TinyGo does not support finalizers so this should never be const id = mem().getUint32(unboxValue(v_ref), true);
// called. this._goRefCounts[id]--;
//console.error('syscall/js.finalizeRef not implemented'); if (this._goRefCounts[id] === 0) {
// for whatever reason this is called by wfajs but doesnt impact the results at all?? const v = this._values[id];
this._values[id] = null;
this._ids.delete(v);
this._idPool.push(id);
}
}, },
// func stringVal(value string) ref // func stringVal(value string) ref