1// Copyright 2017 Vector Creations Ltd
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package routing
16
17import (
18	"context"
19	"crypto/rand"
20	"encoding/hex"
21	"fmt"
22	"io"
23	"net/http"
24	"net/url"
25	"path"
26	"strings"
27
28	"github.com/matrix-org/dendrite/clientapi/jsonerror"
29	"github.com/matrix-org/dendrite/mediaapi/fileutils"
30	"github.com/matrix-org/dendrite/mediaapi/storage"
31	"github.com/matrix-org/dendrite/mediaapi/thumbnailer"
32	"github.com/matrix-org/dendrite/mediaapi/types"
33	"github.com/matrix-org/dendrite/setup/config"
34	userapi "github.com/matrix-org/dendrite/userapi/api"
35	"github.com/matrix-org/gomatrixserverlib"
36	"github.com/matrix-org/util"
37	log "github.com/sirupsen/logrus"
38)
39
40// uploadRequest metadata included in or derivable from an upload request
41// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload
42// NOTE: The members come from HTTP request metadata such as headers, query parameters or can be derived from such
43type uploadRequest struct {
44	MediaMetadata *types.MediaMetadata
45	Logger        *log.Entry
46}
47
48// uploadResponse defines the format of the JSON response
49// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload
50type uploadResponse struct {
51	ContentURI string `json:"content_uri"`
52}
53
54// Upload implements POST /upload
55// This endpoint involves uploading potentially significant amounts of data to the homeserver.
56// This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large.
57// Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory.
58// TODO: We should time out requests if they have not received any data within a configured timeout period.
59func Upload(req *http.Request, cfg *config.MediaAPI, dev *userapi.Device, db storage.Database, activeThumbnailGeneration *types.ActiveThumbnailGeneration) util.JSONResponse {
60	r, resErr := parseAndValidateRequest(req, cfg, dev)
61	if resErr != nil {
62		return *resErr
63	}
64
65	if resErr = r.doUpload(req.Context(), req.Body, cfg, db, activeThumbnailGeneration); resErr != nil {
66		return *resErr
67	}
68
69	return util.JSONResponse{
70		Code: http.StatusOK,
71		JSON: uploadResponse{
72			ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.Matrix.ServerName, r.MediaMetadata.MediaID),
73		},
74	}
75}
76
77// parseAndValidateRequest parses the incoming upload request to validate and extract
78// all the metadata about the media being uploaded.
79// Returns either an uploadRequest or an error formatted as a util.JSONResponse
80func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI, dev *userapi.Device) (*uploadRequest, *util.JSONResponse) {
81	r := &uploadRequest{
82		MediaMetadata: &types.MediaMetadata{
83			Origin:        cfg.Matrix.ServerName,
84			FileSizeBytes: types.FileSizeBytes(req.ContentLength),
85			ContentType:   types.ContentType(req.Header.Get("Content-Type")),
86			UploadName:    types.Filename(url.PathEscape(req.FormValue("filename"))),
87			UserID:        types.MatrixUserID(dev.UserID),
88		},
89		Logger: util.GetLogger(req.Context()).WithField("Origin", cfg.Matrix.ServerName),
90	}
91
92	if resErr := r.Validate(*cfg.MaxFileSizeBytes); resErr != nil {
93		return nil, resErr
94	}
95
96	return r, nil
97}
98
99func (r *uploadRequest) generateMediaID(ctx context.Context, db storage.Database) (types.MediaID, error) {
100	for {
101		// First try generating a meda ID. We'll do this by
102		// generating some random bytes and then hex-encoding.
103		mediaIDBytes := make([]byte, 32)
104		_, err := rand.Read(mediaIDBytes)
105		if err != nil {
106			return "", fmt.Errorf("rand.Read: %w", err)
107		}
108		mediaID := types.MediaID(hex.EncodeToString(mediaIDBytes))
109		// Then we will check if this media ID already exists in
110		// our database. If it does then we had best generate a
111		// new one.
112		existingMetadata, err := db.GetMediaMetadata(ctx, mediaID, r.MediaMetadata.Origin)
113		if err != nil {
114			return "", fmt.Errorf("db.GetMediaMetadata: %w", err)
115		}
116		if existingMetadata != nil {
117			// The media ID was already used - repeat the process
118			// and generate a new one instead.
119			continue
120		}
121		// The media ID was not already used - let's return that.
122		return mediaID, nil
123	}
124}
125
126func (r *uploadRequest) doUpload(
127	ctx context.Context,
128	reqReader io.Reader,
129	cfg *config.MediaAPI,
130	db storage.Database,
131	activeThumbnailGeneration *types.ActiveThumbnailGeneration,
132) *util.JSONResponse {
133	r.Logger.WithFields(log.Fields{
134		"UploadName":    r.MediaMetadata.UploadName,
135		"FileSizeBytes": r.MediaMetadata.FileSizeBytes,
136		"ContentType":   r.MediaMetadata.ContentType,
137	}).Info("Uploading file")
138
139	// The file data is hashed and the hash is used as the MediaID. The hash is useful as a
140	// method of deduplicating files to save storage, as well as a way to conduct
141	// integrity checks on the file data in the repository.
142	// Data is truncated to maxFileSizeBytes. Content-Length was reported as 0 < Content-Length <= maxFileSizeBytes so this is OK.
143	//
144	// TODO: This has a bad API shape where you either need to call:
145	//   fileutils.RemoveDir(tmpDir, r.Logger)
146	// or call:
147	//   r.storeFileAndMetadata(ctx, tmpDir, ...)
148	// before you return from doUpload else we will leak a temp file. We could make this nicer with a `WithTransaction` style of
149	// nested function to guarantee either storage or cleanup.
150	if *cfg.MaxFileSizeBytes > 0 {
151		if *cfg.MaxFileSizeBytes+1 <= 0 {
152			r.Logger.WithFields(log.Fields{
153				"MaxFileSizeBytes": *cfg.MaxFileSizeBytes,
154			}).Warnf("Configured MaxFileSizeBytes overflows int64, defaulting to %d bytes", config.DefaultMaxFileSizeBytes)
155			cfg.MaxFileSizeBytes = &config.DefaultMaxFileSizeBytes
156		}
157		reqReader = io.LimitReader(reqReader, int64(*cfg.MaxFileSizeBytes)+1)
158	}
159
160	hash, bytesWritten, tmpDir, err := fileutils.WriteTempFile(ctx, reqReader, cfg.AbsBasePath)
161	if err != nil {
162		r.Logger.WithError(err).WithFields(log.Fields{
163			"MaxFileSizeBytes": *cfg.MaxFileSizeBytes,
164		}).Warn("Error while transferring file")
165		return &util.JSONResponse{
166			Code: http.StatusBadRequest,
167			JSON: jsonerror.Unknown("Failed to upload"),
168		}
169	}
170
171	// Check if temp file size exceeds max file size configuration
172	if bytesWritten > types.FileSizeBytes(*cfg.MaxFileSizeBytes) {
173		fileutils.RemoveDir(tmpDir, r.Logger) // delete temp file
174		return requestEntityTooLargeJSONResponse(*cfg.MaxFileSizeBytes)
175	}
176
177	// Look up the media by the file hash. If we already have the file but under a
178	// different media ID then we won't upload the file again - instead we'll just
179	// add a new metadata entry that refers to the same file.
180	existingMetadata, err := db.GetMediaMetadataByHash(
181		ctx, hash, r.MediaMetadata.Origin,
182	)
183	if err != nil {
184		fileutils.RemoveDir(tmpDir, r.Logger)
185		r.Logger.WithError(err).Error("Error querying the database by hash.")
186		resErr := jsonerror.InternalServerError()
187		return &resErr
188	}
189	if existingMetadata != nil {
190		// The file already exists, delete the uploaded temporary file.
191		defer fileutils.RemoveDir(tmpDir, r.Logger)
192		// The file already exists. Make a new media ID up for it.
193		mediaID, merr := r.generateMediaID(ctx, db)
194		if merr != nil {
195			r.Logger.WithError(merr).Error("Failed to generate media ID for existing file")
196			resErr := jsonerror.InternalServerError()
197			return &resErr
198		}
199
200		// Then amend the upload metadata.
201		r.MediaMetadata = &types.MediaMetadata{
202			MediaID:           mediaID,
203			Origin:            r.MediaMetadata.Origin,
204			ContentType:       r.MediaMetadata.ContentType,
205			FileSizeBytes:     r.MediaMetadata.FileSizeBytes,
206			CreationTimestamp: r.MediaMetadata.CreationTimestamp,
207			UploadName:        r.MediaMetadata.UploadName,
208			Base64Hash:        hash,
209			UserID:            r.MediaMetadata.UserID,
210		}
211	} else {
212		// The file doesn't exist. Update the request metadata.
213		r.MediaMetadata.FileSizeBytes = bytesWritten
214		r.MediaMetadata.Base64Hash = hash
215		r.MediaMetadata.MediaID, err = r.generateMediaID(ctx, db)
216		if err != nil {
217			fileutils.RemoveDir(tmpDir, r.Logger)
218			r.Logger.WithError(err).Error("Failed to generate media ID for new upload")
219			resErr := jsonerror.InternalServerError()
220			return &resErr
221		}
222	}
223
224	r.Logger = r.Logger.WithField("media_id", r.MediaMetadata.MediaID)
225	r.Logger.WithFields(log.Fields{
226		"Base64Hash":    r.MediaMetadata.Base64Hash,
227		"UploadName":    r.MediaMetadata.UploadName,
228		"FileSizeBytes": r.MediaMetadata.FileSizeBytes,
229		"ContentType":   r.MediaMetadata.ContentType,
230	}).Info("File uploaded")
231
232	return r.storeFileAndMetadata(
233		ctx, tmpDir, cfg.AbsBasePath, db, cfg.ThumbnailSizes,
234		activeThumbnailGeneration, cfg.MaxThumbnailGenerators,
235	)
236}
237
238func requestEntityTooLargeJSONResponse(maxFileSizeBytes config.FileSizeBytes) *util.JSONResponse {
239	return &util.JSONResponse{
240		Code: http.StatusRequestEntityTooLarge,
241		JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)),
242	}
243}
244
245// Validate validates the uploadRequest fields
246func (r *uploadRequest) Validate(maxFileSizeBytes config.FileSizeBytes) *util.JSONResponse {
247	if maxFileSizeBytes > 0 && r.MediaMetadata.FileSizeBytes > types.FileSizeBytes(maxFileSizeBytes) {
248		return requestEntityTooLargeJSONResponse(maxFileSizeBytes)
249	}
250	if strings.HasPrefix(string(r.MediaMetadata.UploadName), "~") {
251		return &util.JSONResponse{
252			Code: http.StatusBadRequest,
253			JSON: jsonerror.Unknown("File name must not begin with '~'."),
254		}
255	}
256	// TODO: Validate filename - what are the valid characters?
257	if r.MediaMetadata.UserID != "" {
258		// TODO: We should put user ID parsing code into gomatrixserverlib and use that instead
259		//       (see https://github.com/matrix-org/gomatrixserverlib/blob/3394e7c7003312043208aa73727d2256eea3d1f6/eventcontent.go#L347 )
260		//       It should be a struct (with pointers into a single string to avoid copying) and
261		//       we should update all refs to use UserID types rather than strings.
262		// https://github.com/matrix-org/synapse/blob/v0.19.2/synapse/types.py#L92
263		if _, _, err := gomatrixserverlib.SplitID('@', string(r.MediaMetadata.UserID)); err != nil {
264			return &util.JSONResponse{
265				Code: http.StatusBadRequest,
266				JSON: jsonerror.BadJSON("user id must be in the form @localpart:domain"),
267			}
268		}
269	}
270	return nil
271}
272
273// storeFileAndMetadata moves the temporary file to its final path based on metadata and stores the metadata in the database
274// See getPathFromMediaMetadata in fileutils for details of the final path.
275// The order of operations is important as it avoids metadata entering the database before the file
276// is ready, and if we fail to move the file, it never gets added to the database.
277// Returns a util.JSONResponse error and cleans up directories in case of error.
278func (r *uploadRequest) storeFileAndMetadata(
279	ctx context.Context,
280	tmpDir types.Path,
281	absBasePath config.Path,
282	db storage.Database,
283	thumbnailSizes []config.ThumbnailSize,
284	activeThumbnailGeneration *types.ActiveThumbnailGeneration,
285	maxThumbnailGenerators int,
286) *util.JSONResponse {
287	finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger)
288	if err != nil {
289		r.Logger.WithError(err).Error("Failed to move file.")
290		return &util.JSONResponse{
291			Code: http.StatusBadRequest,
292			JSON: jsonerror.Unknown("Failed to upload"),
293		}
294	}
295	if duplicate {
296		r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate")
297	}
298
299	if err = db.StoreMediaMetadata(ctx, r.MediaMetadata); err != nil {
300		r.Logger.WithError(err).Warn("Failed to store metadata")
301		// If the file is a duplicate (has the same hash as an existing file) then
302		// there is valid metadata in the database for that file. As such we only
303		// remove the file if it is not a duplicate.
304		if !duplicate {
305			fileutils.RemoveDir(types.Path(path.Dir(string(finalPath))), r.Logger)
306		}
307		return &util.JSONResponse{
308			Code: http.StatusBadRequest,
309			JSON: jsonerror.Unknown("Failed to upload"),
310		}
311	}
312
313	go func() {
314		busy, err := thumbnailer.GenerateThumbnails(
315			context.Background(), finalPath, thumbnailSizes, r.MediaMetadata,
316			activeThumbnailGeneration, maxThumbnailGenerators, db, r.Logger,
317		)
318		if err != nil {
319			r.Logger.WithError(err).Warn("Error generating thumbnails")
320		}
321		if busy {
322			r.Logger.Warn("Maximum number of active thumbnail generators reached. Skipping pre-generation.")
323		}
324	}()
325
326	return nil
327}
328