1// Copyright 2017 Vector Creations Ltd 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package routing 16 17import ( 18 "context" 19 "crypto/rand" 20 "encoding/hex" 21 "fmt" 22 "io" 23 "net/http" 24 "net/url" 25 "path" 26 "strings" 27 28 "github.com/matrix-org/dendrite/clientapi/jsonerror" 29 "github.com/matrix-org/dendrite/mediaapi/fileutils" 30 "github.com/matrix-org/dendrite/mediaapi/storage" 31 "github.com/matrix-org/dendrite/mediaapi/thumbnailer" 32 "github.com/matrix-org/dendrite/mediaapi/types" 33 "github.com/matrix-org/dendrite/setup/config" 34 userapi "github.com/matrix-org/dendrite/userapi/api" 35 "github.com/matrix-org/gomatrixserverlib" 36 "github.com/matrix-org/util" 37 log "github.com/sirupsen/logrus" 38) 39 40// uploadRequest metadata included in or derivable from an upload request 41// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload 42// NOTE: The members come from HTTP request metadata such as headers, query parameters or can be derived from such 43type uploadRequest struct { 44 MediaMetadata *types.MediaMetadata 45 Logger *log.Entry 46} 47 48// uploadResponse defines the format of the JSON response 49// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload 50type uploadResponse struct { 51 ContentURI string `json:"content_uri"` 52} 53 54// Upload implements POST /upload 55// This endpoint involves uploading potentially significant amounts of data to the homeserver. 56// This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large. 57// Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory. 58// TODO: We should time out requests if they have not received any data within a configured timeout period. 59func Upload(req *http.Request, cfg *config.MediaAPI, dev *userapi.Device, db storage.Database, activeThumbnailGeneration *types.ActiveThumbnailGeneration) util.JSONResponse { 60 r, resErr := parseAndValidateRequest(req, cfg, dev) 61 if resErr != nil { 62 return *resErr 63 } 64 65 if resErr = r.doUpload(req.Context(), req.Body, cfg, db, activeThumbnailGeneration); resErr != nil { 66 return *resErr 67 } 68 69 return util.JSONResponse{ 70 Code: http.StatusOK, 71 JSON: uploadResponse{ 72 ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.Matrix.ServerName, r.MediaMetadata.MediaID), 73 }, 74 } 75} 76 77// parseAndValidateRequest parses the incoming upload request to validate and extract 78// all the metadata about the media being uploaded. 79// Returns either an uploadRequest or an error formatted as a util.JSONResponse 80func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI, dev *userapi.Device) (*uploadRequest, *util.JSONResponse) { 81 r := &uploadRequest{ 82 MediaMetadata: &types.MediaMetadata{ 83 Origin: cfg.Matrix.ServerName, 84 FileSizeBytes: types.FileSizeBytes(req.ContentLength), 85 ContentType: types.ContentType(req.Header.Get("Content-Type")), 86 UploadName: types.Filename(url.PathEscape(req.FormValue("filename"))), 87 UserID: types.MatrixUserID(dev.UserID), 88 }, 89 Logger: util.GetLogger(req.Context()).WithField("Origin", cfg.Matrix.ServerName), 90 } 91 92 if resErr := r.Validate(*cfg.MaxFileSizeBytes); resErr != nil { 93 return nil, resErr 94 } 95 96 return r, nil 97} 98 99func (r *uploadRequest) generateMediaID(ctx context.Context, db storage.Database) (types.MediaID, error) { 100 for { 101 // First try generating a meda ID. We'll do this by 102 // generating some random bytes and then hex-encoding. 103 mediaIDBytes := make([]byte, 32) 104 _, err := rand.Read(mediaIDBytes) 105 if err != nil { 106 return "", fmt.Errorf("rand.Read: %w", err) 107 } 108 mediaID := types.MediaID(hex.EncodeToString(mediaIDBytes)) 109 // Then we will check if this media ID already exists in 110 // our database. If it does then we had best generate a 111 // new one. 112 existingMetadata, err := db.GetMediaMetadata(ctx, mediaID, r.MediaMetadata.Origin) 113 if err != nil { 114 return "", fmt.Errorf("db.GetMediaMetadata: %w", err) 115 } 116 if existingMetadata != nil { 117 // The media ID was already used - repeat the process 118 // and generate a new one instead. 119 continue 120 } 121 // The media ID was not already used - let's return that. 122 return mediaID, nil 123 } 124} 125 126func (r *uploadRequest) doUpload( 127 ctx context.Context, 128 reqReader io.Reader, 129 cfg *config.MediaAPI, 130 db storage.Database, 131 activeThumbnailGeneration *types.ActiveThumbnailGeneration, 132) *util.JSONResponse { 133 r.Logger.WithFields(log.Fields{ 134 "UploadName": r.MediaMetadata.UploadName, 135 "FileSizeBytes": r.MediaMetadata.FileSizeBytes, 136 "ContentType": r.MediaMetadata.ContentType, 137 }).Info("Uploading file") 138 139 // The file data is hashed and the hash is used as the MediaID. The hash is useful as a 140 // method of deduplicating files to save storage, as well as a way to conduct 141 // integrity checks on the file data in the repository. 142 // Data is truncated to maxFileSizeBytes. Content-Length was reported as 0 < Content-Length <= maxFileSizeBytes so this is OK. 143 // 144 // TODO: This has a bad API shape where you either need to call: 145 // fileutils.RemoveDir(tmpDir, r.Logger) 146 // or call: 147 // r.storeFileAndMetadata(ctx, tmpDir, ...) 148 // before you return from doUpload else we will leak a temp file. We could make this nicer with a `WithTransaction` style of 149 // nested function to guarantee either storage or cleanup. 150 if *cfg.MaxFileSizeBytes > 0 { 151 if *cfg.MaxFileSizeBytes+1 <= 0 { 152 r.Logger.WithFields(log.Fields{ 153 "MaxFileSizeBytes": *cfg.MaxFileSizeBytes, 154 }).Warnf("Configured MaxFileSizeBytes overflows int64, defaulting to %d bytes", config.DefaultMaxFileSizeBytes) 155 cfg.MaxFileSizeBytes = &config.DefaultMaxFileSizeBytes 156 } 157 reqReader = io.LimitReader(reqReader, int64(*cfg.MaxFileSizeBytes)+1) 158 } 159 160 hash, bytesWritten, tmpDir, err := fileutils.WriteTempFile(ctx, reqReader, cfg.AbsBasePath) 161 if err != nil { 162 r.Logger.WithError(err).WithFields(log.Fields{ 163 "MaxFileSizeBytes": *cfg.MaxFileSizeBytes, 164 }).Warn("Error while transferring file") 165 return &util.JSONResponse{ 166 Code: http.StatusBadRequest, 167 JSON: jsonerror.Unknown("Failed to upload"), 168 } 169 } 170 171 // Check if temp file size exceeds max file size configuration 172 if bytesWritten > types.FileSizeBytes(*cfg.MaxFileSizeBytes) { 173 fileutils.RemoveDir(tmpDir, r.Logger) // delete temp file 174 return requestEntityTooLargeJSONResponse(*cfg.MaxFileSizeBytes) 175 } 176 177 // Look up the media by the file hash. If we already have the file but under a 178 // different media ID then we won't upload the file again - instead we'll just 179 // add a new metadata entry that refers to the same file. 180 existingMetadata, err := db.GetMediaMetadataByHash( 181 ctx, hash, r.MediaMetadata.Origin, 182 ) 183 if err != nil { 184 fileutils.RemoveDir(tmpDir, r.Logger) 185 r.Logger.WithError(err).Error("Error querying the database by hash.") 186 resErr := jsonerror.InternalServerError() 187 return &resErr 188 } 189 if existingMetadata != nil { 190 // The file already exists, delete the uploaded temporary file. 191 defer fileutils.RemoveDir(tmpDir, r.Logger) 192 // The file already exists. Make a new media ID up for it. 193 mediaID, merr := r.generateMediaID(ctx, db) 194 if merr != nil { 195 r.Logger.WithError(merr).Error("Failed to generate media ID for existing file") 196 resErr := jsonerror.InternalServerError() 197 return &resErr 198 } 199 200 // Then amend the upload metadata. 201 r.MediaMetadata = &types.MediaMetadata{ 202 MediaID: mediaID, 203 Origin: r.MediaMetadata.Origin, 204 ContentType: r.MediaMetadata.ContentType, 205 FileSizeBytes: r.MediaMetadata.FileSizeBytes, 206 CreationTimestamp: r.MediaMetadata.CreationTimestamp, 207 UploadName: r.MediaMetadata.UploadName, 208 Base64Hash: hash, 209 UserID: r.MediaMetadata.UserID, 210 } 211 } else { 212 // The file doesn't exist. Update the request metadata. 213 r.MediaMetadata.FileSizeBytes = bytesWritten 214 r.MediaMetadata.Base64Hash = hash 215 r.MediaMetadata.MediaID, err = r.generateMediaID(ctx, db) 216 if err != nil { 217 fileutils.RemoveDir(tmpDir, r.Logger) 218 r.Logger.WithError(err).Error("Failed to generate media ID for new upload") 219 resErr := jsonerror.InternalServerError() 220 return &resErr 221 } 222 } 223 224 r.Logger = r.Logger.WithField("media_id", r.MediaMetadata.MediaID) 225 r.Logger.WithFields(log.Fields{ 226 "Base64Hash": r.MediaMetadata.Base64Hash, 227 "UploadName": r.MediaMetadata.UploadName, 228 "FileSizeBytes": r.MediaMetadata.FileSizeBytes, 229 "ContentType": r.MediaMetadata.ContentType, 230 }).Info("File uploaded") 231 232 return r.storeFileAndMetadata( 233 ctx, tmpDir, cfg.AbsBasePath, db, cfg.ThumbnailSizes, 234 activeThumbnailGeneration, cfg.MaxThumbnailGenerators, 235 ) 236} 237 238func requestEntityTooLargeJSONResponse(maxFileSizeBytes config.FileSizeBytes) *util.JSONResponse { 239 return &util.JSONResponse{ 240 Code: http.StatusRequestEntityTooLarge, 241 JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)), 242 } 243} 244 245// Validate validates the uploadRequest fields 246func (r *uploadRequest) Validate(maxFileSizeBytes config.FileSizeBytes) *util.JSONResponse { 247 if maxFileSizeBytes > 0 && r.MediaMetadata.FileSizeBytes > types.FileSizeBytes(maxFileSizeBytes) { 248 return requestEntityTooLargeJSONResponse(maxFileSizeBytes) 249 } 250 if strings.HasPrefix(string(r.MediaMetadata.UploadName), "~") { 251 return &util.JSONResponse{ 252 Code: http.StatusBadRequest, 253 JSON: jsonerror.Unknown("File name must not begin with '~'."), 254 } 255 } 256 // TODO: Validate filename - what are the valid characters? 257 if r.MediaMetadata.UserID != "" { 258 // TODO: We should put user ID parsing code into gomatrixserverlib and use that instead 259 // (see https://github.com/matrix-org/gomatrixserverlib/blob/3394e7c7003312043208aa73727d2256eea3d1f6/eventcontent.go#L347 ) 260 // It should be a struct (with pointers into a single string to avoid copying) and 261 // we should update all refs to use UserID types rather than strings. 262 // https://github.com/matrix-org/synapse/blob/v0.19.2/synapse/types.py#L92 263 if _, _, err := gomatrixserverlib.SplitID('@', string(r.MediaMetadata.UserID)); err != nil { 264 return &util.JSONResponse{ 265 Code: http.StatusBadRequest, 266 JSON: jsonerror.BadJSON("user id must be in the form @localpart:domain"), 267 } 268 } 269 } 270 return nil 271} 272 273// storeFileAndMetadata moves the temporary file to its final path based on metadata and stores the metadata in the database 274// See getPathFromMediaMetadata in fileutils for details of the final path. 275// The order of operations is important as it avoids metadata entering the database before the file 276// is ready, and if we fail to move the file, it never gets added to the database. 277// Returns a util.JSONResponse error and cleans up directories in case of error. 278func (r *uploadRequest) storeFileAndMetadata( 279 ctx context.Context, 280 tmpDir types.Path, 281 absBasePath config.Path, 282 db storage.Database, 283 thumbnailSizes []config.ThumbnailSize, 284 activeThumbnailGeneration *types.ActiveThumbnailGeneration, 285 maxThumbnailGenerators int, 286) *util.JSONResponse { 287 finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger) 288 if err != nil { 289 r.Logger.WithError(err).Error("Failed to move file.") 290 return &util.JSONResponse{ 291 Code: http.StatusBadRequest, 292 JSON: jsonerror.Unknown("Failed to upload"), 293 } 294 } 295 if duplicate { 296 r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate") 297 } 298 299 if err = db.StoreMediaMetadata(ctx, r.MediaMetadata); err != nil { 300 r.Logger.WithError(err).Warn("Failed to store metadata") 301 // If the file is a duplicate (has the same hash as an existing file) then 302 // there is valid metadata in the database for that file. As such we only 303 // remove the file if it is not a duplicate. 304 if !duplicate { 305 fileutils.RemoveDir(types.Path(path.Dir(string(finalPath))), r.Logger) 306 } 307 return &util.JSONResponse{ 308 Code: http.StatusBadRequest, 309 JSON: jsonerror.Unknown("Failed to upload"), 310 } 311 } 312 313 go func() { 314 busy, err := thumbnailer.GenerateThumbnails( 315 context.Background(), finalPath, thumbnailSizes, r.MediaMetadata, 316 activeThumbnailGeneration, maxThumbnailGenerators, db, r.Logger, 317 ) 318 if err != nil { 319 r.Logger.WithError(err).Warn("Error generating thumbnails") 320 } 321 if busy { 322 r.Logger.Warn("Maximum number of active thumbnail generators reached. Skipping pre-generation.") 323 } 324 }() 325 326 return nil 327} 328