Compare commits

...

11 Commits
v2.0.1 ... main

Author SHA1 Message Date
f1407fd045 remove unused PositiveSlice Preallocate 2024-11-12 19:04:02 +00:00
ce635cc2b1 remove wavefront component preallocation 2024-11-12 18:53:03 +00:00
f5d2528e20 fix comment 2024-11-12 18:47:25 +00:00
6f78825876 optimize build size by avoiding fmt,
pack lo/hi values into Wavefront
2024-11-12 18:44:12 +00:00
a878da42a3 update README 2024-11-08 22:49:37 +00:00
bd720f06fb minor optimization switching to wavefront pointers 2024-11-08 21:51:17 +00:00
1bbf38aaab fix DecodeCIGAR argument hint 2024-11-07 20:58:17 +00:00
cde429cb80 fix issue in WFBacktrace and change format to proper CIGAR,
add test to ensure CIGAR correctness in the case of different traceback results,
add DecodeCIGAR function to exports
2024-11-07 19:01:01 +00:00
3da3ddf10c major optimization by packing wavefront values 2024-11-05 18:28:28 +00:00
8679c51fb0 update go mod,
move Wavefront String method to debug file,
minor optimizations
2024-11-05 05:35:46 +00:00
2c7adbef06 implement missing finalizeRef implementation from go wasm_exec.js 2024-11-01 17:23:04 +00:00
11 changed files with 558 additions and 342 deletions

View File

@ -3,7 +3,7 @@
build: clean build: clean
@echo "======================== Building Binary =======================" @echo "======================== Building Binary ======================="
minify wfa.js > dist/wfa.js minify wfa.js > dist/wfa.js
GOOS=js GOARCH=wasm CGO_ENABLED=0 tinygo build -no-debug -opt=2 -target=wasm -o dist/wfa.wasm . GOOS=js GOARCH=wasm CGO_ENABLED=0 tinygo build -panic=trap -no-debug -opt=2 -target=wasm -o dist/wfa.wasm .
clean: clean:
@echo "======================== Cleaning Project ======================" @echo "======================== Cleaning Project ======================"

View File

@ -5,7 +5,9 @@ Download `wfa.js` and `wfa.wasm`from [releases](https://git.tronnet.net/tronnet/
``` ```
import wfa from "./wfa.js" import wfa from "./wfa.js"
await wfa("<path to wasm>") await wfa("<path to wasm>")
console.log(wfAlign(...)) let result = wfAlign(...)
operations = DecodeCIGAR(result.CIGAR)
// ...
``` ```
Where `<path to wasm>` is the path from the site root ie. `./scripts/wfa.wasm`. This will depend on your project structure. Where `<path to wasm>` is the path from the site root ie. `./scripts/wfa.wasm`. This will depend on your project structure.

10
go.mod
View File

@ -2,10 +2,14 @@ module wfa
go 1.23.2 go 1.23.2
require (
github.com/schollz/progressbar/v3 v3.17.1
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f
)
require ( require (
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/rivo/uniseg v0.4.7 // indirect github.com/rivo/uniseg v0.4.7 // indirect
github.com/schollz/progressbar/v3 v3.16.1 // indirect golang.org/x/sys v0.27.0 // indirect
golang.org/x/sys v0.26.0 // indirect golang.org/x/term v0.26.0 // indirect
golang.org/x/term v0.25.0 // indirect
) )

64
main.go
View File

@ -1,7 +1,6 @@
package main package main
import ( import (
"fmt"
"syscall/js" "syscall/js"
wfa "wfa/pkg" wfa "wfa/pkg"
) )
@ -9,37 +8,53 @@ import (
func main() { func main() {
c := make(chan bool) c := make(chan bool)
js.Global().Set("wfAlign", js.FuncOf(wfAlign)) js.Global().Set("wfAlign", js.FuncOf(wfAlign))
js.Global().Set("DecodeCIGAR", js.FuncOf(DecodeCIGAR))
<-c <-c
} }
func wfAlign(this js.Value, args []js.Value) interface{} { func wfAlign(this js.Value, args []js.Value) interface{} {
if len(args) != 4 { if len(args) != 4 {
fmt.Println("invalid number of args, requires 4: s1, s2, penalties, doCIGAR") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "invalid number of args, requires 4: s1, s2, penalties, doCIGAR",
}
return js.ValueOf(resultMap)
} }
if args[0].Type() != js.TypeString { if args[0].Type() != js.TypeString {
fmt.Println("s1 should be a string") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "s1 should be a string",
}
return js.ValueOf(resultMap)
} }
s1 := args[0].String() s1 := args[0].String()
if args[1].Type() != js.TypeString { if args[1].Type() != js.TypeString {
fmt.Println("s2 should be a string") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "s2 should be a string",
}
return js.ValueOf(resultMap)
} }
s2 := args[1].String() s2 := args[1].String()
if args[2].Type() != js.TypeObject { if args[2].Type() != js.TypeObject {
fmt.Println("penalties should be a map with key values m, x, o, e") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "penalties should be a map with key values m, x, o, e",
}
return js.ValueOf(resultMap)
} }
if args[2].Get("m").IsUndefined() || args[2].Get("x").IsUndefined() || args[2].Get("o").IsUndefined() || args[2].Get("e").IsUndefined() { if args[2].Get("m").IsUndefined() || args[2].Get("x").IsUndefined() || args[2].Get("o").IsUndefined() || args[2].Get("e").IsUndefined() {
fmt.Println("penalties should be a map with key values m, x, o, e") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "penalties should be a map with key values m, x, o, e",
}
return js.ValueOf(resultMap)
} }
m := args[2].Get("m").Int() m := args[2].Get("m").Int()
@ -55,8 +70,11 @@ func wfAlign(this js.Value, args []js.Value) interface{} {
} }
if args[3].Type() != js.TypeBoolean { if args[3].Type() != js.TypeBoolean {
fmt.Println("doCIGAR should be a boolean") resultMap := map[string]interface{}{
return nil "ok": false,
"error": "doCIGAR should be a boolean",
}
return js.ValueOf(resultMap)
} }
doCIGAR := args[3].Bool() doCIGAR := args[3].Bool()
@ -64,9 +82,29 @@ func wfAlign(this js.Value, args []js.Value) interface{} {
// Call the actual func. // Call the actual func.
result := wfa.WFAlign(s1, s2, penalties, doCIGAR) result := wfa.WFAlign(s1, s2, penalties, doCIGAR)
resultMap := map[string]interface{}{ resultMap := map[string]interface{}{
"ok": true,
"score": result.Score, "score": result.Score,
"CIGAR": result.CIGAR, "CIGAR": result.CIGAR,
"error": "",
} }
return js.ValueOf(resultMap) return js.ValueOf(resultMap)
} }
func DecodeCIGAR(this js.Value, args []js.Value) interface{} {
if len(args) != 1 {
println("invalid number of args, requires 1: CIGAR")
return nil
}
if args[0].Type() != js.TypeString {
println("CIGAR should be a string")
return nil
}
CIGAR := args[0].String()
decoded := wfa.RunLengthDecode(CIGAR)
return js.ValueOf(decoded)
}

View File

@ -1,65 +1,5 @@
package wfa package wfa
type IntegerSlice[T any] struct {
data []T
valid []bool
defaultValue T
}
func (a *IntegerSlice[T]) TranslateIndex(idx int) int {
if idx >= 0 { // 0 -> 0, 1 -> 2, 2 -> 4, 3 -> 6, ...
return 2 * idx
} else { // -1 -> 1, -2 -> 3, -3 -> 5, ...
return (-2 * idx) - 1
}
}
func (a *IntegerSlice[T]) Valid(idx int) bool {
actualIdx := a.TranslateIndex(idx)
return 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx]
}
func (a *IntegerSlice[T]) Get(idx int) T {
actualIdx := a.TranslateIndex(idx)
if 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx] { // idx is in the slice
return a.data[actualIdx]
} else { // idx is out of the slice
return a.defaultValue
}
}
func (a *IntegerSlice[T]) Set(idx int, value T) {
actualIdx := a.TranslateIndex(idx)
if actualIdx >= len(a.valid) { // idx is outside the slice
// expand data array to actualIdx
newData := make([]T, 2*actualIdx+1)
copy(newData, a.data)
a.data = newData
// expand valid array to actualIdx
newValid := make([]bool, 2*actualIdx+1)
copy(newValid, a.valid)
a.valid = newValid
}
a.data[actualIdx] = value
a.valid[actualIdx] = true
}
func (a *IntegerSlice[T]) Preallocate(lo int, hi int) {
actualLo := a.TranslateIndex(lo)
actualHi := a.TranslateIndex(hi)
size := max(actualHi, actualLo)
// expand data array to actualIdx
newData := make([]T, size+1)
a.data = newData
// expand valid array to actualIdx
newValid := make([]bool, size+1)
a.valid = newValid
}
type PositiveSlice[T any] struct { type PositiveSlice[T any] struct {
data []T data []T
valid []bool valid []bool
@ -101,15 +41,3 @@ func (a *PositiveSlice[T]) Set(idx int, value T) {
a.data[actualIdx] = value a.data[actualIdx] = value
a.valid[actualIdx] = true a.valid[actualIdx] = true
} }
func (a *PositiveSlice[T]) Preallocate(hi int) {
size := hi
// expand data array to actualIdx
newData := make([]T, size+1)
a.data = newData
// expand valid array to actualIdx
newValid := make([]bool, size+1)
a.valid = newValid
}

81
pkg/debug.go Normal file
View File

@ -0,0 +1,81 @@
//go:build debug
package wfa
import (
"fmt"
"math"
)
func (w *WavefrontComponent) String(score int) string {
traceback_str := []string{"OI", "EI", "OD", "ED", "SB", "IN", "DL", "EN"}
s := "<"
min_lo := math.MaxInt
max_hi := math.MinInt
for i := 0; i <= score; i++ {
valid := w.W.Valid(i)
lo, hi := UnpackWavefrontLoHi(w.W.Get(i).lohi)
if valid && lo < min_lo {
min_lo = lo
}
if valid && hi > max_hi {
max_hi = hi
}
}
for k := min_lo; k <= max_hi; k++ {
s = s + fmt.Sprintf("%02d", k)
if k < max_hi {
s = s + "|"
}
}
s = s + ">\t<"
for k := min_lo; k <= max_hi; k++ {
s = s + fmt.Sprintf("%02d", k)
if k < max_hi {
s = s + "|"
}
}
s = s + ">\n"
for i := 0; i <= score; i++ {
s = s + "["
lo, hi := UnpackWavefrontLoHi(w.W.Get(i).lohi)
for k := min_lo; k <= max_hi; k++ {
valid, val, _ := UnpackWavefrontValue(w.W.Get(i).Get(k))
if valid {
s = s + fmt.Sprintf("%02d", val)
} else if k < lo || k > hi {
s = s + "--"
} else {
s = s + " "
}
if k < max_hi {
s = s + "|"
}
}
s = s + "]\t["
// print out traceback matrix
for k := min_lo; k <= max_hi; k++ {
valid, _, tb := UnpackWavefrontValue(w.W.Get(i).Get(k))
if valid {
s = s + traceback_str[tb]
} else if k < lo || k > hi {
s = s + "--"
} else {
s = s + " "
}
if k < max_hi {
s = s + "|"
}
}
s = s + "]\n"
}
return s
}

View File

@ -1,10 +1,5 @@
package wfa package wfa
import (
"fmt"
"math"
)
type Result struct { type Result struct {
Score int Score int
CIGAR string CIGAR string
@ -17,10 +12,10 @@ type Penalty struct {
E int E int
} }
type traceback byte type Traceback byte
const ( const (
OpenIns traceback = iota OpenIns Traceback = iota
ExtdIns ExtdIns
OpenDel OpenDel
ExtdDel ExtdDel
@ -30,165 +25,132 @@ const (
End End
) )
type WavefrontComponent struct { // bitpacked wavefront lo/hi values with 32 bits each
lo *PositiveSlice[int] // lo for each wavefront type WavefrontLoHi uint64
hi *PositiveSlice[int] // hi for each wavefront
W *PositiveSlice[*IntegerSlice[int]] // wavefront diag distance for each wavefront func PackWavefrontLoHi(lo int, hi int) WavefrontLoHi {
A *PositiveSlice[*IntegerSlice[traceback]] // compact CIGAR for backtrace for each wavefront loBM := int64(int32(lo)) & 0x0000_0000_FFFF_FFFF
hiBM := int64(int64(hi) << 32)
return WavefrontLoHi(hiBM | loBM)
} }
func NewWavefrontComponent(preallocateSize int) WavefrontComponent { func UnpackWavefrontLoHi(lohi WavefrontLoHi) (int, int) {
loBM := int(int32(lohi & 0x0000_0000_FFFF_FFFF))
hiBM := int(int32(lohi & 0xFFFF_FFFF_0000_0000 >> 32))
return loBM, hiBM
}
// bitpacked wavefront values with 1 valid bit, 3 traceback bits, and 28 bits for the diag distance
// technically this restricts to alignments with less than 268 million characters but that should be sufficient for most cases
type WavefrontValue uint32
// TODO: add 64 bit packed value in case more than 268 million characters are needed
// PackWavefrontValue: packs a diag value and traceback into a WavefrontValue
func PackWavefrontValue(value uint32, traceback Traceback) WavefrontValue {
validBM := uint32(0x8000_0000)
tracebackBM := uint32(traceback&0x0000_0007) << 28
valueBM := value & 0x0FFF_FFFF
return WavefrontValue(validBM | tracebackBM | valueBM)
}
// UnpackWavefrontValue: opens a WavefrontValue into a valid bool, diag value and traceback
func UnpackWavefrontValue(wfv WavefrontValue) (bool, uint32, Traceback) {
validBM := wfv&0x8000_0000 != 0
tracebackBM := uint8(wfv & 0x7000_0000 >> 28)
valueBM := uint32(wfv & 0x0FFF_FFFF)
return validBM, valueBM, Traceback(tracebackBM)
}
// Wavefront: stores a single wavefront, stores wavefront's lo value and hi is naturally lo + len(data)
type Wavefront struct { // since wavefronts store diag distance, they should never be negative, and traceback data can be stored as uint8
data []WavefrontValue
lohi WavefrontLoHi
}
// NewWavefront: returns a new wavefront with size accomodating lo and hi (inclusive)
func NewWavefront(lo int, hi int) *Wavefront {
a := &Wavefront{}
a.lohi = PackWavefrontLoHi(lo, hi)
size := hi - lo
newData := make([]WavefrontValue, size+1)
a.data = newData
return a
}
// TranslateIndex: utility function for getting the data index given a diagonal
func (a *Wavefront) TranslateIndex(diagonal int) int {
lo := int(int32(a.lohi & 0x0000_0000_FFFF_FFFF))
return diagonal - lo
}
// Get: returns WavefrontValue for given diagonal
func (a *Wavefront) Get(diagonal int) WavefrontValue {
actualIdx := a.TranslateIndex(diagonal)
if 0 <= actualIdx && actualIdx < len(a.data) { // idx is in the slice
return a.data[actualIdx]
} else { // idx is out of the slice
return 0
}
}
// Set: the diagonal to a WavefrontValue
func (a *Wavefront) Set(diagonal int, value WavefrontValue) {
actualIdx := a.TranslateIndex(diagonal)
/* in theory idx is always in bounds because the wavefront is preallocated
if actualIdx < 0 || actualIdx >= len(a.data) {
return
}
*/
a.data[actualIdx] = value
}
// WavefrontComponent: each M/I/D wavefront matrix including the wavefront data, lo and hi
type WavefrontComponent struct {
W *PositiveSlice[*Wavefront] // wavefront diag distance and traceback for each wavefront
}
// NewWavefrontComponent: returns initialized WavefrontComponent
func NewWavefrontComponent() *WavefrontComponent {
// new wavefront component = { // new wavefront component = {
// lo = [0] // lo = [0]
// hi = [0] // hi = [0]
// W = [] // W = []
// A = []
// } // }
w := WavefrontComponent{ w := &WavefrontComponent{
lo: &PositiveSlice[int]{ W: &PositiveSlice[*Wavefront]{
data: []int{0}, defaultValue: &Wavefront{
valid: []bool{true}, data: []WavefrontValue{0},
},
hi: &PositiveSlice[int]{
data: []int{0},
valid: []bool{true},
},
W: &PositiveSlice[*IntegerSlice[int]]{
defaultValue: &IntegerSlice[int]{
data: []int{},
valid: []bool{},
},
},
A: &PositiveSlice[*IntegerSlice[traceback]]{
defaultValue: &IntegerSlice[traceback]{
data: []traceback{},
valid: []bool{},
}, },
}, },
} }
w.lo.Preallocate(preallocateSize)
w.hi.Preallocate(preallocateSize)
w.W.Preallocate(preallocateSize)
w.A.Preallocate(preallocateSize)
return w return w
} }
// get value for wavefront=score, diag=k => returns ok, value // GetVal: get value for wavefront=score, diag=k => returns ok, value, traceback
func (w *WavefrontComponent) GetVal(score int, k int) (bool, int) { func (w *WavefrontComponent) GetVal(score int, k int) (bool, uint32, Traceback) {
return w.W.Valid(score) && w.W.Get(score).Valid(k), w.W.Get(score).Get(k) return UnpackWavefrontValue(w.W.Get(score).Get(k))
} }
// set value for wavefront=score, diag=k // SetVal: set value, traceback for wavefront=score, diag=k
func (w *WavefrontComponent) SetVal(score int, k int, val int) { func (w *WavefrontComponent) SetVal(score int, k int, val uint32, tb Traceback) {
w.W.Get(score).Set(k, val) w.W.Get(score).Set(k, PackWavefrontValue(val, tb))
} }
// get alignment traceback for wavefront=score, diag=k => returns ok, value // GetLoHi: get lo and hi for wavefront=score
func (w *WavefrontComponent) GetTraceback(score int, k int) (bool, traceback) {
return w.A.Valid(score) && w.A.Get(score).Valid(k), w.A.Get(score).Get(k)
}
// set alignment traceback for wavefront=score, diag=k
func (w *WavefrontComponent) SetTraceback(score int, k int, val traceback) {
w.A.Get(score).Set(k, val)
}
// get hi for wavefront=score
func (w *WavefrontComponent) GetLoHi(score int) (bool, int, int) { func (w *WavefrontComponent) GetLoHi(score int) (bool, int, int) {
// if lo[score] and hi[score] are valid lo, hi := UnpackWavefrontLoHi(w.W.Get(score).lohi)
if w.lo.Valid(score) && w.hi.Valid(score) { return w.W.Valid(score), lo, hi
// return lo[score] hi[score]
return true, w.lo.Get(score), w.hi.Get(score)
} else {
return false, 0, 0
}
} }
// set hi for wavefront=score // SetLoHi: set lo and hi for wavefront=score
func (w *WavefrontComponent) SetLoHi(score int, lo int, hi int) { func (w *WavefrontComponent) SetLoHi(score int, lo int, hi int) {
// lo[score] = lo b := NewWavefront(lo, hi)
w.lo.Set(score, lo) w.W.Set(score, b)
// hi[score] = hi
w.hi.Set(score, hi)
// preemptively setup w.A
w.A.Set(score, &IntegerSlice[traceback]{})
w.A.Get(score).Preallocate(lo, hi)
// preemptively setup w.W
w.W.Set(score, &IntegerSlice[int]{})
w.W.Get(score).Preallocate(lo, hi)
}
func (w *WavefrontComponent) String(score int) string {
traceback_str := []string{"OI", "EI", "OD", "ED", "SB", "IN", "DL", "EN"}
s := "<"
min_lo := math.MaxInt
max_hi := math.MinInt
for i := 0; i <= score; i++ {
if w.lo.Valid(i) && w.lo.Get(i) < min_lo {
min_lo = w.lo.Get(i)
}
if w.hi.Valid(i) && w.hi.Get(i) > max_hi {
max_hi = w.hi.Get(i)
}
}
for k := min_lo; k <= max_hi; k++ {
s = s + fmt.Sprintf("%02d", k)
if k < max_hi {
s = s + "|"
}
}
s = s + ">\t<"
for k := min_lo; k <= max_hi; k++ {
s = s + fmt.Sprintf("%02d", k)
if k < max_hi {
s = s + "|"
}
}
s = s + ">\n"
for i := 0; i <= score; i++ {
s = s + "["
lo := w.lo.Get(i)
hi := w.hi.Get(i)
// print out wavefront matrix
for k := min_lo; k <= max_hi; k++ {
if w.W.Valid(i) && w.W.Get(i).Valid(k) {
s = s + fmt.Sprintf("%02d", w.W.Get(i).Get(k))
} else if k < lo || k > hi {
s = s + "--"
} else {
s = s + " "
}
if k < max_hi {
s = s + "|"
}
}
s = s + "]\t["
// print out traceback matrix
for k := min_lo; k <= max_hi; k++ {
if w.A.Valid(i) && w.A.Get(i).Valid(k) {
s = s + traceback_str[w.A.Get(i).Get(k)]
} else if k < lo || k > hi {
s = s + "--"
} else {
s = s + " "
}
if k < max_hi {
s = s + "|"
}
}
s = s + "]\n"
}
return s
} }

View File

@ -2,26 +2,72 @@ package wfa
import ( import (
"math" "math"
"unicode/utf8" "strings"
"golang.org/x/exp/constraints"
) )
func SafeMin(values []int, idx int) int { func UIntToString(num uint) string { // num assumed to be positive
var builder strings.Builder
for num > 0 {
digit := num % 10
builder.WriteRune(rune('0' + digit))
num /= 10
}
// Reverse the string as we built it in reverse order
str := []rune(builder.String())
for i, j := 0, len(str)-1; i < j; i, j = i+1, j-1 {
str[i], str[j] = str[j], str[i]
}
return string(str)
}
func RunLengthDecode(encoded string) string {
decoded := strings.Builder{}
length := len(encoded)
i := 0
for i < length {
// If the current character is a digit, we need to extract the run length
runLength := 0
for i < length && encoded[i] >= '0' && encoded[i] <= '9' {
runLength = runLength*10 + int(encoded[i]-'0')
i++
}
// The next character will be the character to repeat
if i < length {
char := encoded[i]
for j := 0; j < runLength; j++ {
decoded.WriteByte(char)
}
i++ // Move past the character
}
}
return decoded.String()
}
func SafeMin[T constraints.Integer](values []T, idx int) T {
return values[idx] return values[idx]
} }
func SafeMax(values []int, idx int) int { func SafeMax[T constraints.Integer](values []T, idx int) T {
return values[idx] return values[idx]
} }
func SafeArgMax(valids []bool, values []int) (bool, int) { func SafeArgMax[T constraints.Integer](valids []bool, values []T) (bool, int) {
hasValid := false hasValid := false
maxIndex := 0 maxIndex := 0
maxValue := math.MinInt maxValue := math.MinInt
for i := 0; i < len(valids); i++ { for i := 0; i < len(valids); i++ {
if valids[i] && values[i] > maxValue { if valids[i] && int(values[i]) > maxValue {
hasValid = true hasValid = true
maxIndex = i maxIndex = i
maxValue = values[i] maxValue = int(values[i])
} }
} }
if hasValid { if hasValid {
@ -31,15 +77,15 @@ func SafeArgMax(valids []bool, values []int) (bool, int) {
} }
} }
func SafeArgMin(valids []bool, values []int) (bool, int) { func SafeArgMin[T constraints.Integer](valids []bool, values []T) (bool, int) {
hasValid := false hasValid := false
minIndex := 0 minIndex := 0
minValue := math.MaxInt minValue := math.MaxInt
for i := 0; i < len(valids); i++ { for i := 0; i < len(valids); i++ {
if valids[i] && values[i] < minValue { if valids[i] && int(values[i]) < minValue {
hasValid = true hasValid = true
minIndex = i minIndex = i
minValue = values[i] minValue = int(values[i])
} }
} }
if hasValid { if hasValid {
@ -49,22 +95,7 @@ func SafeArgMin(valids []bool, values []int) (bool, int) {
} }
} }
func Reverse(s string) string { func NextLoHi(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty) (int, int) {
size := len(s)
buf := make([]byte, size)
for start := 0; start < size; {
r, n := utf8.DecodeRuneInString(s[start:])
start += n
utf8.EncodeRune(buf[size-start:], r)
}
return string(buf)
}
func Splice(s string, c rune, idx int) string {
return s[:idx] + string(c) + s[idx:]
}
func NextLoHi(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) (int, int) {
x := penalties.X x := penalties.X
o := penalties.O o := penalties.O
e := penalties.E e := penalties.E
@ -94,52 +125,45 @@ func NextLoHi(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent,
return lo, hi return lo, hi
} }
func NextI(M WavefrontComponent, I WavefrontComponent, score int, k int, penalties Penalty) { func NextI(M *WavefrontComponent, I *WavefrontComponent, score int, k int, penalties Penalty) {
o := penalties.O o := penalties.O
e := penalties.E e := penalties.E
a_ok, a := M.GetVal(score-o-e, k-1) a_ok, a, _ := M.GetVal(score-o-e, k-1)
b_ok, b := I.GetVal(score-e, k-1) b_ok, b, _ := I.GetVal(score-e, k-1)
ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []int{a, b}) ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint32{a, b})
nextIVal := SafeMax([]int{a, b}, nextITraceback) + 1 // important that the +1 is here nextIVal := SafeMax([]uint32{a, b}, nextITraceback) + 1 // important that the +1 is here
if ok { if ok {
I.SetVal(score, k, nextIVal) I.SetVal(score, k, nextIVal, []Traceback{OpenIns, ExtdIns}[nextITraceback])
I.SetTraceback(score, k, []traceback{OpenIns, ExtdIns}[nextITraceback])
} }
} }
func NextD(M WavefrontComponent, D WavefrontComponent, score int, k int, penalties Penalty) { func NextD(M *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
o := penalties.O o := penalties.O
e := penalties.E e := penalties.E
a_ok, a := M.GetVal(score-o-e, k+1) a_ok, a, _ := M.GetVal(score-o-e, k+1)
b_ok, b := D.GetVal(score-e, k+1) b_ok, b, _ := D.GetVal(score-e, k+1)
ok, nextDTraceback := SafeArgMax( ok, nextDTraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint32{a, b})
[]bool{a_ok, b_ok}, nextDVal := SafeMax([]uint32{a, b}, nextDTraceback)
[]int{a, b},
)
nextDVal := SafeMax([]int{a, b}, nextDTraceback) // nothing special
if ok { if ok {
D.SetVal(score, k, nextDVal) D.SetVal(score, k, nextDVal, []Traceback{OpenDel, ExtdDel}[nextDTraceback])
D.SetTraceback(score, k, []traceback{OpenDel, ExtdDel}[nextDTraceback])
} }
} }
func NextM(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, k int, penalties Penalty) { func NextM(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
x := penalties.X x := penalties.X
a_ok, a := M.GetVal(score-x, k) a_ok, a, _ := M.GetVal(score-x, k)
a++ // important to have +1 here a++ // important to have +1 here
b_ok, b := I.GetVal(score, k) b_ok, b, _ := I.GetVal(score, k)
c_ok, c := D.GetVal(score, k) c_ok, c, _ := D.GetVal(score, k)
ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []int{a, b, c})
nextMVal := SafeMax([]int{a, b, c}, nextMTraceback)
ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []uint32{a, b, c})
nextMVal := SafeMax([]uint32{a, b, c}, nextMTraceback)
if ok { if ok {
M.SetVal(score, k, nextMVal) M.SetVal(score, k, nextMVal, []Traceback{Sub, Ins, Del}[nextMTraceback])
M.SetTraceback(score, k, []traceback{Sub, Ins, Del}[nextMTraceback])
} }
} }

View File

@ -1,22 +1,24 @@
package wfa package wfa
import (
"strings"
)
func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result { func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
n := len(s1) n := len(s1)
m := len(s2) m := len(s2)
A_k := m - n A_k := m - n
A_offset := m A_offset := uint32(m)
score := 0 score := 0
estimatedScore := (max(n, m) * max(penalties.M, penalties.X, penalties.O, penalties.E)) / 4 M := NewWavefrontComponent()
M := NewWavefrontComponent(estimatedScore)
M.SetLoHi(0, 0, 0) M.SetLoHi(0, 0, 0)
M.SetVal(0, 0, 0) M.SetVal(0, 0, 0, End)
M.SetTraceback(0, 0, End) I := NewWavefrontComponent()
I := NewWavefrontComponent(estimatedScore) D := NewWavefrontComponent()
D := NewWavefrontComponent(estimatedScore)
for { for {
WFExtend(M, s1, n, s2, m, score) WFExtend(M, s1, n, s2, m, score)
ok, val := M.GetVal(score, A_k) ok, val, _ := M.GetVal(score, A_k)
if ok && val >= A_offset { if ok && val >= A_offset {
break break
} }
@ -26,7 +28,7 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
CIGAR := "" CIGAR := ""
if doCIGAR { if doCIGAR {
CIGAR = WFBacktrace(M, I, D, score, penalties, A_k, s1, s2) CIGAR = WFBacktrace(M, I, D, score, penalties, A_k, A_offset, s1, s2)
} }
return Result{ return Result{
@ -35,12 +37,13 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
} }
} }
func WFExtend(M WavefrontComponent, s1 string, n int, s2 string, m int, score int) { func WFExtend(M *WavefrontComponent, s1 string, n int, s2 string, m int, score int) {
_, lo, hi := M.GetLoHi(score) _, lo, hi := M.GetLoHi(score)
for k := lo; k <= hi; k++ { for k := lo; k <= hi; k++ {
// v = M[score][k] - k // v = M[score][k] - k
// h = M[score][k] // h = M[score][k]
ok, h := M.GetVal(score, k) ok, hu, _ := M.GetVal(score, k)
h := int(hu)
v := h - k v := h - k
// exit early if v or h are invalid // exit early if v or h are invalid
@ -48,15 +51,15 @@ func WFExtend(M WavefrontComponent, s1 string, n int, s2 string, m int, score in
continue continue
} }
for v < n && h < m && s1[v] == s2[h] { for v < n && h < m && s1[v] == s2[h] {
_, val := M.GetVal(score, k) _, val, tb := M.GetVal(score, k)
M.SetVal(score, k, val+1) M.SetVal(score, k, val+1, tb)
v++ v++
h++ h++
} }
} }
} }
func WFNext(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) { func WFNext(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty) {
// get this score's lo, hi // get this score's lo, hi
lo, hi := NextLoHi(M, I, D, score, penalties) lo, hi := NextLoHi(M, I, D, score, penalties)
@ -67,76 +70,128 @@ func WFNext(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, sc
} }
} }
func WFBacktrace(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty, A_k int, s1 string, s2 string) string { func WFBacktrace(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty, A_k int, A_offset uint32, s1 string, s2 string) string {
traceback_CIGAR := []string{"I", "I", "D", "D", "X", "", "", ""}
x := penalties.X x := penalties.X
o := penalties.O o := penalties.O
e := penalties.E e := penalties.E
CIGAR_rev := ""
tb_s := score tb_s := score
tb_k := A_k tb_k := A_k
_, current_traceback := M.GetTraceback(tb_s, tb_k)
done := false done := false
_, current_dist, current_traceback := M.GetVal(tb_s, tb_k)
Ops := []rune{'~'}
Counts := []uint{0}
idx := 0
for !done { for !done {
CIGAR_rev = CIGAR_rev + traceback_CIGAR[current_traceback]
switch current_traceback { switch current_traceback {
case OpenIns: case OpenIns:
if Ops[idx] == 'I' {
Counts[idx]++
} else {
Ops = append(Ops, 'I')
Counts = append(Counts, 1)
idx++
}
tb_s = tb_s - o - e tb_s = tb_s - o - e
tb_k = tb_k - 1 tb_k = tb_k - 1
_, current_traceback = M.GetTraceback(tb_s, tb_k) _, current_dist, current_traceback = M.GetVal(tb_s, tb_k)
case ExtdIns: case ExtdIns:
if Ops[idx] == 'I' {
Counts[idx]++
} else {
Ops = append(Ops, 'I')
Counts = append(Counts, 1)
idx++
}
tb_s = tb_s - e tb_s = tb_s - e
tb_k = tb_k - 1 tb_k = tb_k - 1
_, current_traceback = I.GetTraceback(tb_s, tb_k) _, current_dist, current_traceback = I.GetVal(tb_s, tb_k)
case OpenDel: case OpenDel:
if Ops[idx] == 'D' {
Counts[idx]++
} else {
Ops = append(Ops, 'D')
Counts = append(Counts, 1)
idx++
}
tb_s = tb_s - o - e tb_s = tb_s - o - e
tb_k = tb_k + 1 tb_k = tb_k + 1
_, current_traceback = M.GetTraceback(tb_s, tb_k) _, current_dist, current_traceback = M.GetVal(tb_s, tb_k)
case ExtdDel: case ExtdDel:
if Ops[idx] == 'D' {
Counts[idx]++
} else {
Ops = append(Ops, 'D')
Counts = append(Counts, 1)
idx++
}
tb_s = tb_s - e tb_s = tb_s - e
tb_k = tb_k + 1 tb_k = tb_k + 1
_, current_traceback = D.GetTraceback(tb_s, tb_k) _, current_dist, current_traceback = D.GetVal(tb_s, tb_k)
case Sub: case Sub:
tb_s = tb_s - x tb_s = tb_s - x
// tb_k = tb_k; // tb_k = tb_k;
_, current_traceback = M.GetTraceback(tb_s, tb_k) _, next_dist, next_traceback := M.GetVal(tb_s, tb_k)
if int(current_dist-next_dist)-1 > 0 {
Ops = append(Ops, 'M')
Counts = append(Counts, uint(current_dist-next_dist)-1)
idx++
}
if Ops[idx] == 'X' {
Counts[idx]++
} else {
Ops = append(Ops, 'X')
Counts = append(Counts, 1)
idx++
}
current_dist = next_dist
current_traceback = next_traceback
case Ins: case Ins:
// tb_s = tb_s; // tb_s = tb_s;
// tb_k = tb_k; // tb_k = tb_k;
_, current_traceback = I.GetTraceback(tb_s, tb_k) _, next_dist, next_traceback := I.GetVal(tb_s, tb_k)
Ops = append(Ops, 'M')
Counts = append(Counts, uint(current_dist-next_dist))
idx++
current_dist = next_dist
current_traceback = next_traceback
case Del: case Del:
// tb_s = tb_s; // tb_s = tb_s;
// tb_k = tb_k; // tb_k = tb_k;
_, current_traceback = D.GetTraceback(tb_s, tb_k) _, next_dist, next_traceback := D.GetVal(tb_s, tb_k)
Ops = append(Ops, 'M')
Counts = append(Counts, uint(current_dist-next_dist))
idx++
current_dist = next_dist
current_traceback = next_traceback
case End: case End:
Ops = append(Ops, 'M')
Counts = append(Counts, uint(current_dist))
idx++
done = true done = true
} }
} }
CIGAR_part := Reverse(CIGAR_rev) CIGAR := strings.Builder{}
c := 0 for i := len(Ops) - 1; i > 0; i-- {
i := 0 CIGAR.WriteString(UIntToString(Counts[i]))
j := 0 CIGAR.WriteRune(Ops[i])
for i < len(s1) && j < len(s2) {
if s1[i] == s2[j] {
//CIGAR_part.splice(c, 0, "M")
CIGAR_part = Splice(CIGAR_part, 'M', c)
c++
i++
j++
} else if CIGAR_part[c] == 'X' {
c++
i++
j++
} else if CIGAR_part[c] == 'I' {
c++
j++
} else if CIGAR_part[c] == 'D' {
c++
i++
}
} }
return CIGAR_part return CIGAR.String()
} }

View File

@ -3,6 +3,8 @@ package tests
import ( import (
"bufio" "bufio"
"encoding/json" "encoding/json"
"log"
"math/rand/v2"
"os" "os"
"strconv" "strconv"
"strings" "strings"
@ -10,6 +12,7 @@ import (
wfa "wfa/pkg" wfa "wfa/pkg"
"github.com/schollz/progressbar/v3" "github.com/schollz/progressbar/v3"
"golang.org/x/exp/constraints"
) )
const testJsonPath = "tests.json" const testJsonPath = "tests.json"
@ -27,6 +30,103 @@ type TestCase struct {
Solutions string `json:"solutions"` Solutions string `json:"solutions"`
} }
func randRange[T constraints.Integer](min, max int) T {
return T(rand.IntN(max-min) + min)
}
func TestWavefrontPacking(t *testing.T) {
for range 1000 {
val := randRange[uint32](0, 1000)
tb := wfa.Traceback(randRange[uint32](0, 7))
v := wfa.PackWavefrontValue(val, tb)
valid, gotVal, gotTB := wfa.UnpackWavefrontValue(v)
if !valid || gotVal != val || gotTB != tb {
t.Errorf(`test WavefrontPack/Unpack, val: %d, tb: %d, packedval: %x, gotok: %t, gotval: %d, gottb: %d\n`, val, tb, v, valid, gotVal, gotTB)
}
}
}
func TestLoHiPacking(t *testing.T) {
for range 1000 {
lo := randRange[int](-1000, 1000)
hi := randRange[int](-1000, 1000)
v := wfa.PackWavefrontLoHi(lo, hi)
gotLo, gotHi := wfa.UnpackWavefrontLoHi(v)
if gotLo != lo || gotHi != hi {
t.Errorf(`test WavefrontPack/Unpack, lo: %d, hi: %d, packedval: %x, gotlo: %d, gothi: %d`, lo, hi, v, gotLo, gotHi)
}
}
}
func GetScoreFromCIGAR(CIGAR string, penalties wfa.Penalty) int {
unpackedCIGAR := wfa.RunLengthDecode(CIGAR)
previousOp := '~'
score := 0
for _, Op := range unpackedCIGAR {
if Op == 'M' {
score = score + penalties.M
} else if Op == 'X' {
score = score + penalties.X
} else if (Op == 'I' && previousOp != 'I') || (Op == 'D' && previousOp != 'D') {
score = score + penalties.O + penalties.E
} else if (Op == 'I' && previousOp == 'I') || (Op == 'D' && previousOp == 'D') {
score = score + penalties.E
}
previousOp = Op
}
return score
}
func CheckCIGARCorrectness(s1 string, s2 string, CIGAR string) bool {
unpackedCIGAR := wfa.RunLengthDecode(CIGAR)
i := 0
j := 0
s1Aligned := strings.Builder{}
alignment := strings.Builder{}
s2Aligned := strings.Builder{}
for c := 0; c < len(unpackedCIGAR); c++ {
Op := unpackedCIGAR[c]
if Op == 'M' {
s1Aligned.WriteByte(s1[i])
alignment.WriteRune('|')
s2Aligned.WriteByte(s2[j])
i++
j++
} else if Op == 'X' {
s1Aligned.WriteByte(s1[i])
alignment.WriteRune(' ')
s2Aligned.WriteByte(s2[j])
i++
j++
} else if Op == 'I' {
s1Aligned.WriteRune('-')
alignment.WriteRune(' ')
s2Aligned.WriteByte(s2[j])
j++
} else if Op == 'D' {
s1Aligned.WriteByte(s1[i])
alignment.WriteRune('|')
s2Aligned.WriteRune('-')
i++
}
}
if i == len(s1) && j == len(s2) {
return true
} else {
log.Printf("\n%s\n%s\n%s\n i=%d, j=%d, |s1|=%d, |s2|=%d\n", s1Aligned.String(), alignment.String(), s2Aligned.String(), i, j, len(s1), len(s2))
return false
}
}
func TestWFA(t *testing.T) { func TestWFA(t *testing.T) {
content, _ := os.ReadFile(testJsonPath) content, _ := os.ReadFile(testJsonPath)
@ -54,7 +154,9 @@ func TestWFA(t *testing.T) {
for solutions.Scan() { for solutions.Scan() {
solution := solutions.Text() solution := solutions.Text()
expectedScore, _ := strconv.Atoi(strings.Split(solution, "\t")[0]) expectedScore, _ := strconv.Atoi(strings.Split(solution, "\t")[0])
expectedCIGAR := strings.Split(solution, "\t")[1]
sequences.Scan() sequences.Scan()
s1 := sequences.Text() s1 := sequences.Text()
@ -64,11 +166,27 @@ func TestWFA(t *testing.T) {
s2 := sequences.Text() s2 := sequences.Text()
s2 = s2[1:] s2 = s2[1:]
x := wfa.WFAlign(s1, s2, testPenalties, false) x := wfa.WFAlign(s1, s2, testPenalties, true)
gotScore := x.Score gotScore := x.Score
gotCIGAR := x.CIGAR
if gotScore != -1*expectedScore { if gotScore != -1*expectedScore {
t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: %d, expected: %d\n`, testName, idx, s1, s2, gotScore, expectedScore) t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: %d, expected: %d`, testName, idx, s1, s2, gotScore, expectedScore)
os.Exit(1)
}
if gotCIGAR != expectedCIGAR {
checkScore := GetScoreFromCIGAR(gotCIGAR, testPenalties)
CIGARCorrectness := CheckCIGARCorrectness(s1, s2, gotCIGAR)
if checkScore != gotScore && checkScore != -1*expectedScore { // nonequivalent alignment
t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: [%s], expected: [%s]`, testName, idx, s1, s2, gotCIGAR, expectedCIGAR)
t.Errorf(`test: %s#%d, recalculated score: %d`, testName, idx, checkScore)
os.Exit(1)
}
if !CIGARCorrectness {
t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: [%s], expected: [%s]`, testName, idx, s1, s2, gotCIGAR, expectedCIGAR)
os.Exit(1)
}
} }
idx++ idx++

12
wfa.js
View File

@ -300,10 +300,14 @@
// func finalizeRef(v ref) // func finalizeRef(v ref)
"syscall/js.finalizeRef": (v_ref) => { "syscall/js.finalizeRef": (v_ref) => {
// Note: TinyGo does not support finalizers so this should never be const id = mem().getUint32(unboxValue(v_ref), true);
// called. this._goRefCounts[id]--;
//console.error('syscall/js.finalizeRef not implemented'); if (this._goRefCounts[id] === 0) {
// for whatever reason this is called by wfajs but doesnt impact the results at all?? const v = this._values[id];
this._values[id] = null;
this._ids.delete(v);
this._idPool.push(id);
}
}, },
// func stringVal(value string) ref // func stringVal(value string) ref