portal/storage/storage.go

693 lines
17 KiB
Go

package storage
import (
"bytes"
"context"
"encoding/hex"
"errors"
"fmt"
"git.lumeweb.com/LumeWeb/libs5-go/encoding"
"git.lumeweb.com/LumeWeb/libs5-go/types"
"git.lumeweb.com/LumeWeb/portal/account"
"git.lumeweb.com/LumeWeb/portal/api/middleware"
"git.lumeweb.com/LumeWeb/portal/cron"
"git.lumeweb.com/LumeWeb/portal/db/models"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/service/s3"
s3types "github.com/aws/aws-sdk-go-v2/service/s3/types"
"github.com/go-co-op/gocron/v2"
"github.com/google/uuid"
"github.com/spf13/viper"
tusd "github.com/tus/tusd/v2/pkg/handler"
"github.com/tus/tusd/v2/pkg/s3store"
"go.sia.tech/renterd/api"
busClient "go.sia.tech/renterd/bus/client"
workerClient "go.sia.tech/renterd/worker/client"
"go.uber.org/fx"
"go.uber.org/zap"
"gorm.io/gorm"
"io"
"lukechampine.com/blake3"
"net/http"
"net/url"
"strings"
"time"
)
type TusPreUploadCreateCallback func(hook tusd.HookEvent) (tusd.HTTPResponse, tusd.FileInfoChanges, error)
type TusPreFinishResponseCallback func(hook tusd.HookEvent) (tusd.HTTPResponse, error)
type StorageServiceParams struct {
fx.In
Config *viper.Viper
Logger *zap.Logger
Db *gorm.DB
Accounts *account.AccountServiceImpl
Cron *cron.CronServiceImpl
}
var Module = fx.Module("storage",
fx.Provide(
NewStorageService,
),
)
type StorageServiceImpl struct {
busClient *busClient.Client
workerClient *workerClient.Client
tus *tusd.Handler
tusStore tusd.DataStore
s3Client *s3.Client
config *viper.Viper
logger *zap.Logger
db *gorm.DB
accounts *account.AccountServiceImpl
cron *cron.CronServiceImpl
}
func (s *StorageServiceImpl) Tus() *tusd.Handler {
return s.tus
}
func (s *StorageServiceImpl) Start() error {
return nil
}
func NewStorageService(params StorageServiceParams) *StorageServiceImpl {
return &StorageServiceImpl{
config: params.Config,
logger: params.Logger,
db: params.Db,
accounts: params.Accounts,
cron: params.Cron,
}
}
func (s StorageServiceImpl) PutFileSmall(file io.ReadSeeker, bucket string, generateProof bool) ([]byte, error) {
hash, err := s.GetHashSmall(file)
hashStr, err := encoding.NewMultihash(s.getPrefixedHash(hash)).ToBase64Url()
if err != nil {
return nil, err
}
_, err = file.Seek(0, io.SeekStart)
if err != nil {
return nil, err
}
err = s.createBucketIfNotExists(bucket)
if err != nil {
return nil, err
}
_, err = s.workerClient.UploadObject(context.Background(), file, bucket, hashStr, api.UploadObjectOptions{})
if err != nil {
return nil, err
}
return hash[:], nil
}
func (s StorageServiceImpl) PutFile(file io.Reader, bucket string, hash []byte) error {
hashStr, err := encoding.NewMultihash(s.getPrefixedHash(hash)).ToBase64Url()
err = s.createBucketIfNotExists(bucket)
if err != nil {
return err
}
_, err = s.workerClient.UploadObject(context.Background(), file, bucket, hashStr, api.UploadObjectOptions{})
if err != nil {
return err
}
return nil
}
func (s *StorageServiceImpl) BuildUploadBufferTus(basePath string, preUploadCb TusPreUploadCreateCallback, preFinishCb TusPreFinishResponseCallback) (*tusd.Handler, tusd.DataStore, *s3.Client, error) {
customResolver := aws.EndpointResolverWithOptionsFunc(func(service, region string, options ...interface{}) (aws.Endpoint, error) {
if service == s3.ServiceID {
return aws.Endpoint{
URL: s.config.GetString("core.storage.s3.endpoint"),
SigningRegion: s.config.GetString("core.storage.s3.region"),
}, nil
}
return aws.Endpoint{}, &aws.EndpointNotFoundError{}
})
cfg, err := config.LoadDefaultConfig(context.TODO(),
config.WithRegion("us-east-1"),
config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(
s.config.GetString("core.storage.s3.accessKey"),
s.config.GetString("core.storage.s3.secretKey"),
"",
)),
config.WithEndpointResolverWithOptions(customResolver),
)
if err != nil {
return nil, nil, nil, nil
}
s3Client := s3.NewFromConfig(cfg)
store := s3store.New(s.config.GetString("core.storage.s3.bufferBucket"), s3Client)
locker := NewMySQLLocker(s.db, s.logger)
composer := tusd.NewStoreComposer()
store.UseIn(composer)
composer.UseLocker(locker)
handler, err := tusd.NewHandler(tusd.Config{
BasePath: basePath,
StoreComposer: composer,
DisableDownload: true,
NotifyCompleteUploads: true,
NotifyTerminatedUploads: true,
NotifyCreatedUploads: true,
RespectForwardedHeaders: true,
PreUploadCreateCallback: preUploadCb,
})
return handler, store, s3Client, err
}
func (s *StorageServiceImpl) init() error {
addr := s.config.GetString("core.sia.url")
passwd := s.config.GetString("core.sia.key")
addrURL, err := url.Parse(addr)
if err != nil {
return err
}
addrURL.Path = "/api/worker"
s.workerClient = workerClient.New(addrURL.String(), passwd)
addrURL.Path = "/api/bus"
s.busClient = busClient.New(addrURL.String(), passwd)
preUpload := func(hook tusd.HookEvent) (tusd.HTTPResponse, tusd.FileInfoChanges, error) {
blankResp := tusd.HTTPResponse{}
blankChanges := tusd.FileInfoChanges{}
hash, ok := hook.Upload.MetaData["hash"]
if !ok {
return blankResp, blankChanges, errors.New("missing hash")
}
decodedHash, err := encoding.MultihashFromBase64Url(hash)
if err != nil {
return blankResp, blankChanges, err
}
exists, _ := s.FileExists(decodedHash.HashBytes())
if exists {
return blankResp, blankChanges, errors.New("file already exists")
}
exists, _ = s.TusUploadExists(decodedHash.HashBytes())
if exists {
return blankResp, blankChanges, errors.New("file is already being uploaded")
}
return blankResp, blankChanges, nil
}
tus, store, s3client, err := s.BuildUploadBufferTus("/s5/upload/tus", preUpload, nil)
if err != nil {
return err
}
s.tus = tus
s.tusStore = store
s.s3Client = s3client
s.cron.RegisterService(s)
go s.tusWorker()
return nil
}
func (s *StorageServiceImpl) LoadInitialTasks(cron cron.CronService) error {
return nil
}
func (s *StorageServiceImpl) createBucketIfNotExists(bucket string) error {
_, err := s.busClient.Bucket(context.Background(), bucket)
if err == nil {
return nil
}
if err != nil {
if !errors.Is(err, api.ErrBucketNotFound) {
return err
}
}
err = s.busClient.CreateBucket(context.Background(), bucket, api.CreateBucketOptions{
Policy: api.BucketPolicy{
PublicReadAccess: false,
},
})
if err != nil {
return err
}
return nil
}
func (s *StorageServiceImpl) FileExists(hash []byte) (bool, models.Upload) {
hashStr := hex.EncodeToString(hash)
var upload models.Upload
result := s.db.Model(&models.Upload{}).Where(&models.Upload{Hash: hashStr}).First(&upload)
return result.RowsAffected > 0, upload
}
func (s *StorageServiceImpl) GetHashSmall(file io.ReadSeeker) ([]byte, error) {
buf := bytes.NewBuffer(nil)
_, err := io.Copy(buf, file)
if err != nil {
return nil, err
}
hash := blake3.Sum256(buf.Bytes())
return hash[:], nil
}
func (s *StorageServiceImpl) GetHash(file io.Reader) ([]byte, int64, error) {
hasher := blake3.New(64, nil)
totalBytes, err := io.Copy(hasher, file)
if err != nil {
return nil, 0, err
}
hash := hasher.Sum(nil)
return hash[:32], totalBytes, nil
}
func (s *StorageServiceImpl) CreateUpload(hash []byte, mime string, uploaderID uint, uploaderIP string, size uint64, protocol string) (*models.Upload, error) {
hashStr := hex.EncodeToString(hash)
upload := &models.Upload{
Hash: hashStr,
MimeType: mime,
UserID: uploaderID,
UploaderIP: uploaderIP,
Protocol: protocol,
Size: size,
}
result := s.db.Create(upload)
if result.Error != nil {
return nil, result.Error
}
return upload, nil
}
func (s *StorageServiceImpl) tusWorker() {
for {
select {
case info := <-s.tus.CreatedUploads:
hash, ok := info.Upload.MetaData["hash"]
errorResponse := tusd.HTTPResponse{StatusCode: 400, Header: nil}
if !ok {
s.logger.Error("Missing hash in metadata")
continue
}
uploaderID, ok := info.Context.Value(middleware.S5AuthUserIDKey).(uint64)
if !ok {
errorResponse.Body = "Missing user id in context"
info.Upload.StopUpload(errorResponse)
s.logger.Error("Missing user id in context")
continue
}
uploaderIP := info.HTTPRequest.RemoteAddr
decodedHash, err := encoding.MultihashFromBase64Url(hash)
if err != nil {
errorResponse.Body = "Could not decode hash"
info.Upload.StopUpload(errorResponse)
s.logger.Error("Could not decode hash", zap.Error(err))
continue
}
_, err = s.CreateTusUpload(decodedHash.HashBytes(), info.Upload.ID, uint(uploaderID), uploaderIP, info.Context.Value("protocol").(string))
if err != nil {
errorResponse.Body = "Could not create tus upload"
info.Upload.StopUpload(errorResponse)
s.logger.Error("Could not create tus upload", zap.Error(err))
continue
}
case info := <-s.tus.UploadProgress:
err := s.TusUploadProgress(info.Upload.ID)
if err != nil {
s.logger.Error("Could not update tus upload", zap.Error(err))
continue
}
case info := <-s.tus.TerminatedUploads:
err := s.DeleteTusUpload(info.Upload.ID)
if err != nil {
s.logger.Error("Could not delete tus upload", zap.Error(err))
continue
}
case info := <-s.tus.CompleteUploads:
if !(!info.Upload.SizeIsDeferred && info.Upload.Offset == info.Upload.Size) {
continue
}
err := s.TusUploadCompleted(info.Upload.ID)
if err != nil {
s.logger.Error("Could not complete tus upload", zap.Error(err))
continue
}
err = s.ScheduleTusUpload(info.Upload.ID, 0)
if err != nil {
s.logger.Error("Could not schedule tus upload", zap.Error(err))
continue
}
}
}
}
func (s *StorageServiceImpl) TusUploadExists(hash []byte) (bool, models.TusUpload) {
hashStr := hex.EncodeToString(hash)
var upload models.TusUpload
result := s.db.Model(&models.TusUpload{}).Where(&models.TusUpload{Hash: hashStr}).First(&upload)
return result.RowsAffected > 0, upload
}
func (s *StorageServiceImpl) CreateTusUpload(hash []byte, uploadID string, uploaderID uint, uploaderIP string, protocol string) (*models.TusUpload, error) {
hashStr := hex.EncodeToString(hash)
upload := &models.TusUpload{
Hash: hashStr,
UploadID: uploadID,
UploaderID: uploaderID,
UploaderIP: uploaderIP,
Uploader: models.User{},
Protocol: protocol,
}
result := s.db.Create(upload)
if result.Error != nil {
return nil, result.Error
}
return upload, nil
}
func (s *StorageServiceImpl) TusUploadProgress(uploadID string) error {
find := &models.TusUpload{UploadID: uploadID}
var upload models.TusUpload
result := s.db.Model(&models.TusUpload{}).Where(find).First(&upload)
if result.RowsAffected == 0 {
return errors.New("upload not found")
}
result = s.db.Model(&models.TusUpload{}).Where(find).Update("updated_at", time.Now())
if result.Error != nil {
return result.Error
}
return nil
}
func (s *StorageServiceImpl) TusUploadCompleted(uploadID string) error {
find := &models.TusUpload{UploadID: uploadID}
var upload models.TusUpload
result := s.db.Model(&models.TusUpload{}).Where(find).First(&upload)
if result.RowsAffected == 0 {
return errors.New("upload not found")
}
result = s.db.Model(&models.TusUpload{}).Where(find).Update("completed", true)
return nil
}
func (s *StorageServiceImpl) DeleteTusUpload(uploadID string) error {
result := s.db.Where(&models.TusUpload{UploadID: uploadID}).Delete(&models.TusUpload{})
if result.Error != nil {
return result.Error
}
return nil
}
func (s *StorageServiceImpl) ScheduleTusUpload(uploadID string, attempt int) error {
find := &models.TusUpload{UploadID: uploadID}
var upload models.TusUpload
result := s.db.Model(&models.TusUpload{}).Where(find).First(&upload)
if result.RowsAffected == 0 {
return errors.New("upload not found")
}
job, task := s.buildNewTusUploadTask(&upload)
if attempt > 0 {
job = gocron.OneTimeJob(gocron.OneTimeJobStartDateTime(time.Now().Add(time.Duration(attempt) * time.Minute)))
}
_, err := s.cron.Scheduler().NewJob(job, task, gocron.WithEventListeners(gocron.AfterJobRunsWithError(func(jobID uuid.UUID, jobName string, err error) {
s.logger.Error("Error running job", zap.Error(err))
err = s.ScheduleTusUpload(uploadID, attempt+1)
if err != nil {
s.logger.Error("Error rescheduling job", zap.Error(err))
}
}),
gocron.AfterJobRuns(func(jobID uuid.UUID, jobName string) {
s.logger.Info("Job finished", zap.String("jobName", jobName), zap.String("uploadID", uploadID))
err := s.DeleteTusUpload(uploadID)
if err != nil {
s.logger.Error("Error deleting tus upload", zap.Error(err))
}
})))
if err != nil {
return err
}
return nil
}
func (s *StorageServiceImpl) buildNewTusUploadTask(upload *models.TusUpload) (job gocron.JobDefinition, task gocron.Task) {
job = gocron.OneTimeJob(gocron.OneTimeJobStartImmediately())
task = gocron.NewTask(
func(upload *models.TusUpload) error {
ctx := context.Background()
tusUpload, err := s.tusStore.GetUpload(ctx, upload.UploadID)
if err != nil {
s.logger.Error("Could not get upload", zap.Error(err))
return err
}
reader, err := tusUpload.GetReader(ctx)
if err != nil {
s.logger.Error("Could not get tus file", zap.Error(err))
return err
}
hash, byteCount, err := s.GetHash(reader)
if err != nil {
s.logger.Error("Could not compute hash", zap.Error(err))
return err
}
dbHash, err := hex.DecodeString(upload.Hash)
if err != nil {
s.logger.Error("Could not decode hash", zap.Error(err))
return err
}
if !bytes.Equal(hash, dbHash) {
s.logger.Error("Hashes do not match", zap.Any("upload", upload), zap.Any("hash", hash), zap.Any("dbHash", dbHash))
return err
}
reader, err = tusUpload.GetReader(ctx)
if err != nil {
s.logger.Error("Could not get tus file", zap.Error(err))
return err
}
var mimeBuf [512]byte
_, err = reader.Read(mimeBuf[:])
if err != nil {
s.logger.Error("Could not read mime", zap.Error(err))
return err
}
mimeType := http.DetectContentType(mimeBuf[:])
upload.MimeType = mimeType
if tx := s.db.Save(upload); tx.Error != nil {
s.logger.Error("Could not update tus upload", zap.Error(tx.Error))
return tx.Error
}
reader, err = tusUpload.GetReader(ctx)
if err != nil {
s.logger.Error("Could not get tus file", zap.Error(err))
return err
}
err = s.PutFile(reader, upload.Protocol, dbHash)
if err != nil {
s.logger.Error("Could not upload file", zap.Error(err))
return err
}
s3InfoId, _ := splitS3Ids(upload.UploadID)
_, err = s.s3Client.DeleteObjects(ctx, &s3.DeleteObjectsInput{
Bucket: aws.String(s.config.GetString("core.storage.s3.bufferBucket")),
Delete: &s3types.Delete{
Objects: []s3types.ObjectIdentifier{
{
Key: aws.String(s3InfoId),
},
{
Key: aws.String(s3InfoId + ".info"),
},
},
Quiet: aws.Bool(true),
},
})
if err != nil {
s.logger.Error("Could not delete upload metadata", zap.Error(err))
return err
}
newUpload, err := s.CreateUpload(dbHash, mimeType, upload.UploaderID, upload.UploaderIP, uint64(byteCount), upload.Protocol)
if err != nil {
s.logger.Error("Could not create upload", zap.Error(err))
return err
}
err = s.accounts.PinByID(newUpload.ID, upload.UploaderID)
if err != nil {
s.logger.Error("Could not pin upload", zap.Error(err))
return err
}
return nil
}, upload)
return job, task
}
func (s *StorageServiceImpl) getPrefixedHash(hash []byte) []byte {
return append([]byte{byte(types.HashTypeBlake3)}, hash...)
}
func splitS3Ids(id string) (objectId, multipartId string) {
index := strings.Index(id, "+")
if index == -1 {
return
}
objectId = id[:index]
multipartId = id[index+1:]
return
}
func (s *StorageServiceImpl) GetFile(hash []byte, start int64) (io.ReadCloser, int64, error) {
if exists, tusUpload := s.TusUploadExists(hash); exists {
if tusUpload.Completed {
upload, err := s.tusStore.GetUpload(context.Background(), tusUpload.UploadID)
if err != nil {
return nil, 0, err
}
info, _ := upload.GetInfo(context.Background())
ctx := context.Background()
if start > 0 {
endPosition := start + info.Size - 1
rangeHeader := fmt.Sprintf("bytes=%d-%d", start, endPosition)
ctx = context.WithValue(ctx, "range", rangeHeader)
}
reader, err := upload.GetReader(ctx)
return reader, info.Size, err
}
}
exists, upload := s.FileExists(hash)
if !exists {
return nil, 0, errors.New("file does not exist")
}
hashStr, err := encoding.NewMultihash(s.getPrefixedHash(hash)).ToBase64Url()
if err != nil {
return nil, 0, err
}
var partialRange api.DownloadRange
if start > 0 {
partialRange = api.DownloadRange{
Offset: start,
Length: int64(upload.Size) - start + 1,
Size: int64(upload.Size),
}
}
object, err := s.workerClient.GetObject(context.Background(), upload.Protocol, hashStr, api.DownloadObjectOptions{
Range: partialRange,
})
if err != nil {
return nil, 0, err
}
return object.Content, int64(upload.Size), nil
}
func (s *StorageServiceImpl) NewFile(hash []byte) *FileImpl {
return NewFile(hash, s)
}