Continued work on download system (cache + cf meta now implemented)

This commit is contained in:
comp500 2022-05-18 22:11:28 +01:00
parent 30bc6d81bb
commit e73fa8c48a
5 changed files with 441 additions and 200 deletions

View File

@ -2,19 +2,21 @@ package core
import (
"encoding/json"
"errors"
"fmt"
"golang.org/x/exp/slices"
"io"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"strings"
"sync"
)
type DownloadSession interface {
GetManualDownloads() []ManualDownload
StartDownloads(workers int) chan CompletedDownload
StartDownloads() chan CompletedDownload
SaveIndex() error
}
type CompletedDownload struct {
@ -23,8 +25,8 @@ type CompletedDownload struct {
Hashes map[string]string
// Error indicates if/why downloading this file failed
Error error
// Warning indicates a message to show to the user regarding this file (download was successful, but had a problem)
Warning error
// Warnings indicates messages to show to the user regarding this file (download was successful, but had a problem)
Warnings []error
}
type downloadSessionInternal struct {
@ -43,207 +45,333 @@ type downloadTask struct {
hash string
}
func (d downloadSessionInternal) GetManualDownloads() []ManualDownload {
func (d *downloadSessionInternal) GetManualDownloads() []ManualDownload {
// TODO: set destpaths
return d.manualDownloads
}
func (d downloadSessionInternal) StartDownloads(workers int) chan CompletedDownload {
tasks := make(chan downloadTask)
func (d *downloadSessionInternal) StartDownloads() chan CompletedDownload {
downloads := make(chan CompletedDownload)
var indexLock sync.RWMutex
for i := 0; i < workers; i++ {
go func() {
for task := range tasks {
// Lookup file in index
indexLock.RLock()
// Map hash stored in mod to cache hash format
storedHashFmtList, hasStoredHashFmt := d.cacheIndex.Hashes[task.hashFormat]
cacheHashFmtList := d.cacheIndex.Hashes[cacheHashFormat]
if hasStoredHashFmt {
hashIdx := slices.Index(storedHashFmtList, task.hash)
if hashIdx > -1 {
// Found in index; try using it!
cacheFileHash := cacheHashFmtList[hashIdx]
cacheFilePath := filepath.Join(d.cacheFolder, cacheFileHash[:2], cacheFileHash[2:])
// Find hashes already stored in the index
hashes := make(map[string]string)
hashesToObtain := slices.Clone(d.hashesToObtain)
for hashFormat, hashList := range d.cacheIndex.Hashes {
if len(hashList) > hashIdx {
hashes[hashFormat] = hashList[hashIdx]
}
}
indexLock.RUnlock()
// Assuming the file already exists, attempt to open it
file, err := os.Open(cacheFilePath)
if err == nil {
// Calculate hashes
if len(hashesToObtain) > 0 {
// TODO: this code needs to add more hashes to the index
err = teeHashes(cacheFileHash, cacheHashFormat, d.hashesToObtain, hashes, io.Discard, file)
if err != nil {
downloads <- CompletedDownload{
Error: fmt.Errorf("failed to read hashes of file %s from cache: %w", cacheFilePath, err),
}
continue
}
}
downloads <- CompletedDownload{
File: file,
DestFilePath: task.destFilePath,
Hashes: hashes,
}
continue
} else if !os.IsNotExist(err) {
// Some other error trying to open the file!
downloads <- CompletedDownload{
Error: fmt.Errorf("failed to read file %s from cache: %w", cacheFilePath, err),
}
continue
}
}
}
indexLock.RUnlock()
// Create temp file to download to
tempFile, err := ioutil.TempFile(filepath.Join(d.cacheFolder, "temp"), "download-tmp")
if err != nil {
downloads <- CompletedDownload{
Error: fmt.Errorf("failed to create temporary file for download: %w", err),
}
continue
}
hashes := make(map[string]string)
hashes[task.hashFormat] = task.hash
// TODO: do download
var file *os.File
indexLock.Lock()
// Update hashes in the index and open file
hashIdx := slices.Index(cacheHashFmtList, hashes[cacheHashFormat])
if hashIdx < 0 {
// Doesn't exist in the index; add as a new value
hashIdx = len(cacheHashFmtList)
cacheFileHash := cacheHashFmtList[hashIdx]
cacheFilePath := filepath.Join(d.cacheFolder, cacheFileHash[:2], cacheFileHash[2:])
// Create the containing directory
err = os.MkdirAll(filepath.Dir(cacheFilePath), 0755)
if err != nil {
_ = tempFile.Close()
indexLock.Unlock()
downloads <- CompletedDownload{
Error: fmt.Errorf("failed to create directories for file %s in cache: %w", cacheFilePath, err),
}
continue
}
// Create destination file
file, err = os.Create(cacheFilePath)
if err != nil {
_ = tempFile.Close()
indexLock.Unlock()
downloads <- CompletedDownload{
Error: fmt.Errorf("failed to write file %s to cache: %w", cacheFilePath, err),
}
continue
}
// Seek back to start of temp file
_, err = tempFile.Seek(0, 0)
if err != nil {
_ = file.Close()
_ = tempFile.Close()
indexLock.Unlock()
downloads <- CompletedDownload{
Error: fmt.Errorf("failed to seek temp file %s in cache: %w", tempFile.Name(), err),
}
continue
}
// Copy temporary file to cache
_, err = io.Copy(file, tempFile)
if err != nil {
_ = file.Close()
_ = tempFile.Close()
indexLock.Unlock()
downloads <- CompletedDownload{
Error: fmt.Errorf("failed to seek temp file %s in cache: %w", tempFile.Name(), err),
}
continue
}
} else {
// Exists in the index and should exist on disk; open for reading
cacheFileHash := cacheHashFmtList[hashIdx]
cacheFilePath := filepath.Join(d.cacheFolder, cacheFileHash[:2], cacheFileHash[2:])
file, err = os.Open(cacheFilePath)
if err != nil {
_ = tempFile.Close()
indexLock.Unlock()
downloads <- CompletedDownload{
Error: fmt.Errorf("failed to write file %s to cache: %w", cacheFilePath, err),
}
continue
}
}
// Close temporary file, as we are done with it
err = tempFile.Close()
if err != nil {
_ = file.Close()
indexLock.Unlock()
downloads <- CompletedDownload{
Error: fmt.Errorf("failed to close temporary file for download: %w", err),
}
continue
}
var warning error
for hashFormat, hashList := range d.cacheIndex.Hashes {
if hashIdx >= len(hashList) {
// Add empty values to make hashList fit hashIdx
hashList = append(hashList, make([]string, (hashIdx-len(hashList))+1)...)
d.cacheIndex.Hashes[hashFormat] = hashList
}
// Replace if it doesn't already exist
if hashList[hashIdx] == "" {
hashList[hashIdx] = hashes[hashFormat]
} else if hash, ok := hashes[hashFormat]; ok && hashList[hashIdx] != hash {
// Warn if the existing hash is inconsistent!
warning = fmt.Errorf("inconsistent %s hash for %s overwritten - value %s (expected %s)", hashFormat,
file.Name(), hashList[hashIdx], hash)
hashList[hashIdx] = hashes[hashFormat]
}
}
indexLock.Unlock()
for _, task := range d.downloadTasks {
// Get handle for mod
cacheHandle := d.cacheIndex.GetHandleFromHash(task.hashFormat, task.hash)
if cacheHandle != nil {
download, err := reuseExistingFile(cacheHandle, d.hashesToObtain, task.destFilePath)
if err != nil {
downloads <- CompletedDownload{
File: file,
DestFilePath: task.destFilePath,
Hashes: hashes,
Warning: warning,
Error: err,
}
} else {
downloads <- download
}
}()
}
go func() {
for _, v := range d.downloadTasks {
tasks <- v
continue
}
}()
download, err := downloadNewFile(&task, d.cacheFolder, d.hashesToObtain, &d.cacheIndex)
if err != nil {
downloads <- CompletedDownload{
Error: err,
}
} else {
downloads <- download
}
}
return downloads
}
func teeHashes(validateHash string, validateHashFormat string, hashesToObtain []string, hashes map[string]string,
func (d *downloadSessionInternal) SaveIndex() error {
data, err := json.Marshal(d.cacheIndex)
if err != nil {
return fmt.Errorf("failed to serialise index: %w", err)
}
err = ioutil.WriteFile(filepath.Join(d.cacheFolder, "index.json"), data, 0644)
if err != nil {
return fmt.Errorf("failed to write index: %w", err)
}
return nil
}
func reuseExistingFile(cacheHandle *CacheIndexHandle, hashesToObtain []string, destFilePath string) (CompletedDownload, error) {
// Already stored; try using it!
file, err := cacheHandle.Open()
if err == nil {
remainingHashes := cacheHandle.GetRemainingHashes(hashesToObtain)
if len(remainingHashes) > 0 {
err = teeHashes(remainingHashes, cacheHandle.Hashes, io.Discard, file)
if err != nil {
_ = file.Close()
return CompletedDownload{}, fmt.Errorf("failed to read hashes of file %s from cache: %w", cacheHandle.Path(), err)
}
_, err := file.Seek(0, 0)
if err != nil {
_ = file.Close()
return CompletedDownload{}, fmt.Errorf("failed to seek file %s in cache: %w", cacheHandle.Path(), err)
}
cacheHandle.UpdateIndex()
}
return CompletedDownload{
File: file,
DestFilePath: destFilePath,
Hashes: cacheHandle.Hashes,
}, nil
} else {
return CompletedDownload{}, fmt.Errorf("failed to read file %s from cache: %w", cacheHandle.Path(), err)
}
}
func downloadNewFile(task *downloadTask, cacheFolder string, hashesToObtain []string, index *CacheIndex) (CompletedDownload, error) {
// Create temp file to download to
tempFile, err := ioutil.TempFile(filepath.Join(cacheFolder, "temp"), "download-tmp")
if err != nil {
return CompletedDownload{}, fmt.Errorf("failed to create temporary file for download: %w", err)
}
hashesToObtain, hashes := getHashListsForDownload(hashesToObtain, task.hashFormat, task.hash)
if len(hashesToObtain) > 0 {
var data io.ReadCloser
if task.url != "" {
resp, err := http.Get(task.url)
// TODO: content type, user-agent?
if err != nil {
return CompletedDownload{}, fmt.Errorf("failed to download %s: %w", task.url, err)
}
if resp.StatusCode != 200 {
_ = resp.Body.Close()
return CompletedDownload{}, fmt.Errorf("failed to download %s: invalid status code %v", task.url, resp.StatusCode)
}
data = resp.Body
} else {
data, err = task.metaDownloaderData.DownloadFile()
if err != nil {
return CompletedDownload{}, err
}
}
err = teeHashes(hashesToObtain, hashes, tempFile, data)
_ = data.Close()
if err != nil {
return CompletedDownload{}, fmt.Errorf("failed to download file for %s: %w", task.destFilePath, err)
}
}
// Create handle with calculated hashes
cacheHandle, alreadyExists := index.NewHandleFromHashes(hashes)
// Update index stored hashes
warnings := cacheHandle.UpdateIndex()
var file *os.File
if alreadyExists {
err = tempFile.Close()
if err != nil {
return CompletedDownload{}, fmt.Errorf("failed to close temporary file %s: %w", tempFile.Name(), err)
}
file, err = cacheHandle.Open()
if err != nil {
return CompletedDownload{}, fmt.Errorf("failed to read file %s from cache: %w", cacheHandle.Path(), err)
}
} else {
// Automatically closes tempFile
file, err = cacheHandle.CreateFromTemp(tempFile)
if err != nil {
_ = tempFile.Close()
return CompletedDownload{}, fmt.Errorf("failed to move file %s to cache: %w", cacheHandle.Path(), err)
}
}
return CompletedDownload{
File: file,
DestFilePath: task.destFilePath,
Hashes: hashes,
Warnings: warnings,
}, nil
}
func selectPreferredHash(hashes map[string]string) (currHash string, currHashFormat string) {
for _, hashFormat := range preferredHashList {
if hash, ok := hashes[hashFormat]; ok {
currHashFormat = hashFormat
currHash = hash
}
}
return
}
// getHashListsForDownload creates a hashes map with the given validate hash+format,
// ensures cacheHashFormat is in hashesToObtain (cloned+returned) and validateHashFormat isn't
func getHashListsForDownload(hashesToObtain []string, validateHashFormat string, validateHash string) ([]string, map[string]string) {
hashes := make(map[string]string)
hashes[validateHashFormat] = validateHash
cl := []string{cacheHashFormat}
for _, v := range hashesToObtain {
if v != validateHashFormat && v != cacheHashFormat {
cl = append(cl, v)
}
}
return cl, hashes
}
func teeHashes(hashesToObtain []string, hashes map[string]string,
dst io.Writer, src io.Reader) error {
// TODO: implement
// Select the best hash from the hashes map to validate against
validateHashFormat, validateHash := selectPreferredHash(hashes)
if validateHashFormat == "" {
return errors.New("failed to find preferred hash for file")
}
// Create writers for all the hashers
mainHasher, err := GetHashImpl(validateHashFormat)
if err != nil {
return fmt.Errorf("failed to get hash format %s", validateHashFormat)
}
hashers := make(map[string]HashStringer, len(hashesToObtain))
allWriters := make([]io.Writer, len(hashers))
for i, v := range hashesToObtain {
hashers[v], err = GetHashImpl(v)
if err != nil {
return fmt.Errorf("failed to get hash format %s", v)
}
allWriters[i] = hashers[v]
}
allWriters = append(allWriters, mainHasher, dst)
// Copy source to all writers (all hashers and dst)
w := io.MultiWriter(allWriters...)
_, err = io.Copy(w, src)
if err != nil {
return fmt.Errorf("failed to read file: %w", err)
}
calculatedHash := mainHasher.HashToString(mainHasher.Sum(nil))
// Check if the hash of the downloaded file matches the expected hash
if calculatedHash != validateHash {
return fmt.Errorf(
"%s hash of downloaded file does not match with expected hash!\n download hash: %s\n expected hash: %s\n",
validateHashFormat, calculatedHash, validateHash)
}
for hashFormat, v := range hashers {
hashes[hashFormat] = v.HashToString(v.Sum(nil))
}
return nil
}
const cacheHashFormat = "sha256"
type CacheIndex struct {
Version uint32
Hashes map[string][]string
Version uint32
Hashes map[string][]string
cachePath string
nextHashIdx int
}
type CacheIndexHandle struct {
index *CacheIndex
hashIdx int
Hashes map[string]string
}
func (c *CacheIndex) GetHandleFromHash(hashFormat string, hash string) *CacheIndexHandle {
storedHashFmtList, hasStoredHashFmt := c.Hashes[hashFormat]
if hasStoredHashFmt {
hashIdx := slices.Index(storedHashFmtList, hash)
if hashIdx > -1 {
hashes := make(map[string]string)
for curHashFormat, hashList := range c.Hashes {
if hashIdx < len(hashList) && hashList[hashIdx] != "" {
hashes[curHashFormat] = hashList[hashIdx]
}
}
return &CacheIndexHandle{
index: c,
hashIdx: hashIdx,
Hashes: hashes,
}
}
}
return nil
}
func (c *CacheIndex) NewHandleFromHashes(hashes map[string]string) (*CacheIndexHandle, bool) {
for hashFormat, hash := range hashes {
handle := c.GetHandleFromHash(hashFormat, hash)
if handle != nil {
// Add hashes to handle
for hashFormat2, hash2 := range hashes {
handle.Hashes[hashFormat2] = hash2
}
return handle, true
}
}
i := c.nextHashIdx
c.nextHashIdx += 1
return &CacheIndexHandle{
index: c,
hashIdx: i,
Hashes: hashes,
}, false
}
func (h *CacheIndexHandle) GetRemainingHashes(hashesToObtain []string) []string {
var remaining []string
for _, hashFormat := range hashesToObtain {
if _, ok := h.Hashes[hashFormat]; !ok {
remaining = append(remaining, hashFormat)
}
}
return remaining
}
func (h *CacheIndexHandle) Path() string {
cacheFileHash := h.index.Hashes[cacheHashFormat][h.hashIdx]
cacheFilePath := filepath.Join(h.index.cachePath, cacheFileHash[:2], cacheFileHash[2:])
return cacheFilePath
}
func (h *CacheIndexHandle) Open() (*os.File, error) {
return os.Open(h.Path())
}
func (h *CacheIndexHandle) CreateFromTemp(temp *os.File) (*os.File, error) {
err := os.MkdirAll(filepath.Dir(h.Path()), 0755)
if err != nil {
return nil, err
}
err = os.Rename(temp.Name(), h.Path())
if err != nil {
return nil, err
}
err = temp.Close()
if err != nil {
return nil, err
}
return os.Open(h.Path())
}
func (h *CacheIndexHandle) UpdateIndex() (warnings []error) {
// Add hashes to index
for hashFormat, hash := range h.Hashes {
hashList := h.index.Hashes[hashFormat]
if h.hashIdx >= len(hashList) {
// Add empty values to make hashList fit hashIdx
hashList = append(hashList, make([]string, (h.hashIdx-len(hashList))+1)...)
h.index.Hashes[hashFormat] = hashList
}
// Replace if it doesn't already exist
if hashList[h.hashIdx] == "" {
hashList[h.hashIdx] = h.Hashes[hashFormat]
} else if hashList[h.hashIdx] != hash {
// Warn if the existing hash is inconsistent!
warnings = append(warnings, fmt.Errorf("inconsistent %s hash for %s overwritten - value %s (expected %s)",
hashFormat, h.Path(), hashList[h.hashIdx], hash))
hashList[h.hashIdx] = h.Hashes[hashFormat]
}
}
return
}
func CreateDownloadSession(mods []*Mod, hashesToObtain []string) (DownloadSession, error) {
@ -281,6 +409,8 @@ func CreateDownloadSession(mods []*Mod, hashesToObtain []string) (DownloadSessio
if !hasCacheHashFmt {
cacheIndex.Hashes[cacheHashFormat] = make([]string, 0)
}
cacheIndex.cachePath = cachePath
cacheIndex.nextHashIdx = len(cacheIndex.Hashes[cacheHashFormat])
// TODO: move in/ files?
@ -336,5 +466,5 @@ func CreateDownloadSession(mods []*Mod, hashesToObtain []string) (DownloadSessio
// TODO: index housekeeping? i.e. remove deleted files, remove old files (LRU?)
return downloadSession, nil
return &downloadSession, nil
}

View File

@ -33,6 +33,14 @@ func GetHashImpl(hashType string) (HashStringer, error) {
return nil, fmt.Errorf("hash implementation %s not found", hashType)
}
var preferredHashList = []string{
"murmur2",
"md5",
"sha1",
"sha256",
"sha512",
}
type HashStringer interface {
hash.Hash
HashToString([]byte) string

View File

@ -45,7 +45,7 @@ type MetaDownloader interface {
// MetaDownloaderData specifies the per-Mod metadata retrieved for downloading
type MetaDownloaderData interface {
GetManualDownload() (bool, ManualDownload)
DownloadFile(io.Writer) error
DownloadFile() (io.ReadCloser, error)
}
type ManualDownload struct {

View File

@ -2,9 +2,11 @@ package curseforge
import (
"errors"
"fmt"
"github.com/spf13/viper"
"golang.org/x/exp/slices"
"io"
"net/http"
"path/filepath"
"regexp"
"strconv"
@ -448,20 +450,116 @@ func parseExportData(from map[string]interface{}) (cfExportData, error) {
type cfDownloader struct{}
func (c cfDownloader) GetFilesMetadata(mods []*core.Mod) ([]core.MetaDownloaderData, error) {
// TODO implement me
panic("implement me")
if len(mods) == 0 {
return []core.MetaDownloaderData{}, nil
}
downloaderData := make([]core.MetaDownloaderData, len(mods))
indexMap := make(map[int]int)
projectMetadata := make([]cfUpdateData, len(mods))
modIDs := make([]int, len(mods))
for i, v := range mods {
updateData, ok := v.GetParsedUpdateData("curseforge")
if !ok {
return nil, fmt.Errorf("failed to read CurseForge update metadata from %s", v.Name)
}
project := updateData.(cfUpdateData)
indexMap[project.ProjectID] = i
projectMetadata[i] = project
modIDs[i] = project.ProjectID
}
modData, err := cfDefaultClient.getModInfoMultiple(modIDs)
if err != nil {
return nil, fmt.Errorf("failed to get CurseForge mod metadata: %w", err)
}
handleFileInfo := func(modID int, fileInfo modFileInfo) {
// If metadata already exists (i.e. opted-out) update it with more metadata
if meta, ok := downloaderData[indexMap[modID]].(*cfDownloadMetadata); ok {
if meta.noDistribution {
meta.websiteUrl = meta.websiteUrl + "/files/" + strconv.Itoa(fileInfo.ID)
meta.fileName = fileInfo.FileName
}
}
downloaderData[indexMap[modID]] = &cfDownloadMetadata{
url: fileInfo.DownloadURL,
}
}
fileIDsToLookup := make([]int, 0)
for _, mod := range modData {
if _, ok := indexMap[mod.ID]; !ok {
return nil, fmt.Errorf("unknown mod ID in response: %v (for %v)", mod.ID, mod.Name)
}
if !mod.AllowModDistribution {
downloaderData[indexMap[mod.ID]] = &cfDownloadMetadata{
noDistribution: true, // Inverted so the default value is not this (probably doesn't matter)
name: mod.Name,
websiteUrl: mod.Links.WebsiteURL,
}
}
fileID := projectMetadata[indexMap[mod.ID]].FileID
fileInfoFound := false
// First look in latest files
for _, fileInfo := range mod.LatestFiles {
if fileInfo.ID == fileID {
fileInfoFound = true
handleFileInfo(mod.ID, fileInfo)
break
}
}
if !fileInfoFound {
fileIDsToLookup = append(fileIDsToLookup, fileID)
}
}
if len(fileIDsToLookup) > 0 {
fileData, err := cfDefaultClient.getFileInfoMultiple(fileIDsToLookup)
if err != nil {
return nil, fmt.Errorf("failed to get CurseForge file metadata: %w", err)
}
for _, fileInfo := range fileData {
if _, ok := indexMap[fileInfo.ModID]; !ok {
return nil, fmt.Errorf("unknown mod ID in response: %v from file %v (for %v)", fileInfo.ModID, fileInfo.ID, fileInfo.FileName)
}
handleFileInfo(fileInfo.ModID, fileInfo)
}
}
return downloaderData, nil
}
type cfDownloadMetadata struct {
url string
allowsDistribution bool
url string
noDistribution bool
name string
fileName string
websiteUrl string
}
func (m *cfDownloadMetadata) RequiresManualDownload() bool {
return !m.allowsDistribution
func (m *cfDownloadMetadata) GetManualDownload() (bool, core.ManualDownload) {
if !m.noDistribution {
return false, core.ManualDownload{}
}
return true, core.ManualDownload{
Name: m.name,
FileName: m.fileName,
URL: m.websiteUrl,
}
}
func (m *cfDownloadMetadata) DownloadFile(writer io.Writer) error {
// TODO implement me
panic("implement me")
func (m *cfDownloadMetadata) DownloadFile() (io.ReadCloser, error) {
resp, err := http.Get(m.url)
// TODO: content type, user-agent?
if err != nil {
return nil, fmt.Errorf("failed to download %s: %w", m.url, err)
}
if resp.StatusCode != 200 {
_ = resp.Body.Close()
return nil, fmt.Errorf("failed to download %s: invalid status code %v", m.url, resp.StatusCode)
}
return resp.Body, nil
}

View File

@ -147,7 +147,11 @@ type modInfo struct {
FileType int `json:"releaseType"`
Modloader int `json:"modLoader"`
} `json:"latestFilesIndexes"`
ModLoaders []string `json:"modLoaders"`
ModLoaders []string `json:"modLoaders"`
AllowModDistribution bool `json:"allowModDistribution"`
Links struct {
WebsiteURL string `json:"websiteUrl"`
} `json:"links"`
}
func (c *cfApiClient) getModInfo(modID int) (modInfo, error) {
@ -203,6 +207,7 @@ func (c *cfApiClient) getModInfoMultiple(modIDs []int) ([]modInfo, error) {
// modFileInfo is a subset of the deserialised JSON response from the Curse API for mod files
type modFileInfo struct {
ID int `json:"id"`
ModID int `json:"modId"`
FileName string `json:"fileName"`
FriendlyName string `json:"displayName"`
Date time.Time `json:"fileDate"`