Fix O(n^2) path lookup using a binary search

This commit is contained in:
comp500 2023-03-24 02:37:45 +00:00
parent d79556259b
commit 9fc9ee5e09

View File

@ -3,6 +3,7 @@ package core
import ( import (
"errors" "errors"
"fmt" "fmt"
"golang.org/x/exp/slices"
"io" "io"
"io/fs" "io/fs"
"os" "os"
@ -79,33 +80,45 @@ func (in *Index) resortIndex() {
}) })
} }
func (in *Index) markFound(i int, format, hash string) {
// Update hash
in.Files[i].Hash = hash
if in.HashFormat == format {
in.Files[i].HashFormat = ""
} else {
in.Files[i].HashFormat = format
}
// Mark this file as found
in.Files[i].fileExistsTemp = true
}
func (in *Index) updateFileHashGiven(path, format, hash string, mod bool) error { func (in *Index) updateFileHashGiven(path, format, hash string, mod bool) error {
// Find in index // Find in index
found := false
relPath, err := filepath.Rel(filepath.Dir(in.indexFile), path) relPath, err := filepath.Rel(filepath.Dir(in.indexFile), path)
if err != nil { if err != nil {
return err return err
} }
slashPath := filepath.ToSlash(relPath) slashPath := filepath.ToSlash(relPath)
// TODO: make this not a linear scan for every file update
for k, v := range in.Files { // Binary search for slashPath in the files list
if v.File == slashPath { i, found := slices.BinarySearchFunc(in.Files, IndexFile{File: slashPath}, func(a IndexFile, b IndexFile) int {
found = true return strings.Compare(a.File, b.File)
// Update hash })
in.Files[k].Hash = hash if found {
if in.HashFormat == format { in.markFound(i, format, hash)
in.Files[k].HashFormat = "" // There may be other entries with the same file path but different alias!
} else { // Search back and forth to find them:
in.Files[k].HashFormat = format j := i
} for j > 0 && in.Files[j-1].File == slashPath {
// Mark this file as found j = j - 1
in.Files[k].fileExistsTemp = true in.markFound(j, format, hash)
in.Files[k].File = slashPath
// Don't break out of loop, as there may be aliased versions that
// also need to be updated
} }
} j = i
if !found { for j < len(in.Files)-1 && in.Files[j+1].File == slashPath {
j = j + 1
in.markFound(j, format, hash)
}
} else {
newFile := IndexFile{ newFile := IndexFile{
File: slashPath, File: slashPath,
Hash: hash, Hash: hash,
@ -271,6 +284,8 @@ func (in *Index) Refresh() error {
for i := range in.Files { for i := range in.Files {
in.Files[i].File = path.Clean(in.Files[i].File) in.Files[i].File = path.Clean(in.Files[i].File)
} }
// Resort index (required by updateFile)
in.resortIndex()
for _, v := range fileList { for _, v := range fileList {
start := time.Now() start := time.Now()
@ -303,6 +318,8 @@ func (in *Index) Refresh() error {
// RefreshFile calculates the hash for a given path and updates it in the index (also sorts the index) // RefreshFile calculates the hash for a given path and updates it in the index (also sorts the index)
func (in *Index) RefreshFile(path string) error { func (in *Index) RefreshFile(path string) error {
// Resort index first (required by updateFile)
in.resortIndex()
err := in.updateFile(path) err := in.updateFile(path)
if err != nil { if err != nil {
return err return err
@ -335,6 +352,8 @@ func (in *Index) RefreshFileWithHash(path, format, hash string, mod bool) error
if viper.GetBool("no-internal-hashes") { if viper.GetBool("no-internal-hashes") {
hash = "" hash = ""
} }
// Resort index first (required by updateFile)
in.resortIndex()
err := in.updateFileHashGiven(path, format, hash, mod) err := in.updateFileHashGiven(path, format, hash, mod)
if err != nil { if err != nil {
return err return err