1 
2 //#define LOCAL_DEBUG
3 #include "debug.h"
4 
5 #include "job.h"
6 #include <cstdio>
7 #include <stdexcept>
8 #include <limits>
9 #include <queue>
10 using namespace std;
11 
12 #include "conn.h"
13 #include "acfg.h"
14 #include "fileitem.h"
15 #include "dlcon.h"
16 #include "sockio.h"
17 #include "fileio.h" // for ::stat and related macros
18 #include <dirent.h>
19 #include <algorithm>
20 #include "maintenance.h"
21 
22 #include <errno.h>
23 
24 #if 0 // defined(DEBUG)
25 #define CHUNKDEFAULT true
26 #warning testing chunk mode
27 #else
28 #define CHUNKDEFAULT false
29 #endif
30 
31 
32 namespace acng
33 {
34 
35 mstring sHttp11("HTTP/1.1");
36 
37 #define SPECIAL_FD -42
IsValidFD(int fd)38 inline bool IsValidFD(int fd) { return fd>=0 || SPECIAL_FD == fd; }
39 
40 tTraceData traceData;
dump_trace()41 void cfg::dump_trace()
42 {
43 	log::err("Paths with uncertain content types:");
44 	lockguard g(traceData);
45 	for (const auto& s : traceData)
46 		log::err(s);
47 }
getInstance()48 tTraceData& tTraceData::getInstance()
49 {
50 	return traceData;
51 }
52 
53 
54 /*
55  * Unlike the regular store-and-forward file item handler, this ones does not store anything to
56  * harddisk. Instead, it uses the download buffer and lets job object send the data straight from
57  * it to the client.
58  */
59 class tPassThroughFitem : public fileitem
60 {
61 protected:
62 
63 	const char *m_pData;
64 	size_t m_nConsumable, m_nConsumed;
65 
66 public:
tPassThroughFitem(std::string s)67 	tPassThroughFitem(std::string s) :
68 	m_pData(nullptr), m_nConsumable(0), m_nConsumed(0)
69 	{
70 		m_sPathRel = s;
71 		m_bAllowStoreData=false;
72 		m_nSizeChecked = m_nSizeSeen = 0;
73 	};
Setup(bool)74 	virtual FiStatus Setup(bool) override
75 	{
76 		m_nSizeChecked = m_nSizeSeen = 0;
77 		return m_status = FIST_INITED;
78 	}
GetFileFd()79 	virtual int GetFileFd() override
80 			{ return SPECIAL_FD; }; // something, don't care for now
DownloadStartedStoreHeader(const header & h,size_t,const char *,bool,bool &)81 	virtual bool DownloadStartedStoreHeader(const header & h, size_t, const char *,
82 			bool, bool&) override
83 	{
84 		setLockGuard;
85 		m_head=h;
86 		m_status=FIST_DLGOTHEAD;
87 		return true;
88 	}
StoreFileData(const char * data,unsigned int size)89 	virtual bool StoreFileData(const char *data, unsigned int size) override
90 	{
91 		lockuniq g(this);
92 
93 		LOGSTART2("tPassThroughFitem::StoreFileData", "status: " << (int) m_status);
94 
95 		// something might care, most likely... also about BOUNCE action
96 		notifyAll();
97 
98 		m_nIncommingCount += size;
99 
100 		dbgline;
101 		if (m_status > fileitem::FIST_COMPLETE || m_status < FIST_DLGOTHEAD)
102 			return false;
103 
104 		if (size == 0)
105 			m_status = FIST_COMPLETE;
106 		else
107 		{
108 			dbgline;
109 			m_status = FIST_DLRECEIVING;
110 			m_nSizeChecked += size;
111 			m_pData = data;
112 			m_nConsumable=size;
113 			m_nConsumed=0;
114 			while(0 == m_nConsumed && m_status <= FIST_COMPLETE)
115 				wait(g);
116 
117 			dbgline;
118 			// let the downloader abort?
119 			if(m_status >= FIST_DLERROR)
120 				return false;
121 
122 			dbgline;
123 			m_nConsumable=0;
124 			m_pData=nullptr;
125 			return m_nConsumed;
126 		}
127 		return true;
128 	}
SendData(int out_fd,int,off_t & nSendPos,size_t nMax2SendNow)129 	ssize_t SendData(int out_fd, int, off_t &nSendPos, size_t nMax2SendNow) override
130 	{
131 		lockuniq g(this);
132 
133 		while(0 == m_nConsumable && m_status<=FIST_COMPLETE
134 				&& ! (m_nSizeChecked==0 && m_status==FIST_COMPLETE))
135 		{
136 			wait(g);
137 		}
138 		if (m_status >= FIST_DLERROR || !m_pData)
139 			return -1;
140 
141 		if(!m_nSizeChecked)
142 			return 0;
143 
144 		auto r = write(out_fd, m_pData, min(nMax2SendNow, m_nConsumable));
145 		if (r < 0 && (errno == EAGAIN || errno == EINTR)) // harmless
146 			r = 0;
147 		if(r<0)
148 		{
149 			m_status=FIST_DLERROR;
150 			m_head.frontLine="HTTP/1.1 500 Data passing error";
151 		}
152 		else if(r>0)
153 		{
154 			m_nConsumable-=r;
155 			m_pData+=r;
156 			m_nConsumed+=r;
157 			nSendPos+=r;
158 		}
159 		notifyAll();
160 		return r;
161 	}
162 };
163 
164 // base class for a fileitem replacement with custom storage for local data
165 class tGeneratedFitemBase : public fileitem
166 {
167 public:
GetFileFd()168 	virtual int GetFileFd() override
169 	{ return SPECIAL_FD; }; // something, don't care for now
170 
171 	tSS m_data;
172 
tGeneratedFitemBase(const string & sFitemId,const char * szFrontLineMsg)173 	tGeneratedFitemBase(const string &sFitemId, const char *szFrontLineMsg) : m_data(256)
174 	{
175 		m_status=FIST_COMPLETE;
176 		m_sPathRel=sFitemId;
177 		m_head.type = header::ANSWER;
178 		m_head.frontLine = "HTTP/1.1 ";
179 		m_head.frontLine += (szFrontLineMsg ? szFrontLineMsg : "500 Internal Failure");
180 		m_head.set(header::CONTENT_TYPE, WITHLEN("text/html") );
181 	}
SendData(int out_fd,int,off_t & nSendPos,size_t nMax2SendNow)182 	ssize_t SendData(int out_fd, int, off_t &nSendPos, size_t nMax2SendNow)
183 	override
184 	{
185 		if (m_status > FIST_COMPLETE || out_fd<0)
186 			return -1;
187 		auto r = m_data.syswrite(out_fd, nMax2SendNow);
188 		if(r>0) nSendPos+=r;
189 		return r;
190 	}
seal()191 	inline void seal()
192 	{
193 		// seal the item
194 		m_nSizeChecked = m_data.size();
195 		m_head.set(header::CONTENT_LENGTH, m_nSizeChecked);
196 	}
197 	// never used to store data
DownloadStartedStoreHeader(const header &,size_t,const char *,bool,bool &)198 	bool DownloadStartedStoreHeader(const header &, size_t, const char *, bool, bool&)
199 	override
200 	{return false;};
StoreFileData(const char *,unsigned int)201 	bool StoreFileData(const char *, unsigned int) override {return false;};
HeadRef()202 	header & HeadRef() { return m_head; }
203 };
204 
job(header * h,conn * pParent)205 job::job(header *h, conn *pParent) :
206 	m_filefd(-1),
207 	m_pParentCon(pParent),
208 	m_bChunkMode(CHUNKDEFAULT),
209 	m_bClientWants2Close(false),
210 	m_bIsHttp11(false),
211 	m_bNoDownloadStarted(false),
212 	m_state(STATE_SEND_MAIN_HEAD),
213 	m_backstate(STATE_TODISCON),
214 	m_pReqHead(h),
215 	m_nSendPos(0),
216 	m_nCurrentRangeLast(MAX_VAL(off_t)-1),
217 	m_nAllDataCount(0),
218 	m_nChunkRemainingBytes(0),
219 	m_type(rex::FILE_INVALID),
220 	m_nReqRangeFrom(-1), m_nReqRangeTo(-1)
221 {
222 	LOGSTART2("job::job", "job creating, " << m_pReqHead->frontLine << " and this: " << uintptr_t(this));
223 }
224 
225 static const string miscError(" [HTTP error, code: ");
226 
~job()227 job::~job()
228 {
229 	LOGSTART("job::~job");
230 	int stcode = 200;
231 	if(m_pItem) stcode = m_pItem.getFiPtr()->GetHeader().getStatus();
232 
233 	bool bErr=m_sFileLoc.empty() || stcode >= 400;
234 
235 	m_pParentCon->LogDataCounts(
236 			m_sFileLoc + (bErr ? (miscError + ltos(stcode) + ']') : sEmptyString),
237 			m_pReqHead->h[header::XFORWARDEDFOR],
238 			(m_pItem ? m_pItem.getFiPtr()->GetTransferCount() : 0),
239 			m_nAllDataCount, bErr);
240 
241 	checkforceclose(m_filefd);
242 	delete m_pReqHead;
243 }
244 
245 
PrepareLocalDownload(const string & visPath,const string & fsBase,const string & fsSubpath)246 inline void job::PrepareLocalDownload(const string &visPath,
247 		const string &fsBase, const string &fsSubpath)
248 {
249 	mstring absPath = fsBase+SZPATHSEP+fsSubpath;
250 	Cstat stbuf(absPath);
251 	if (!stbuf)
252 	{
253 		switch(errno)
254 		{
255 		case EACCES:
256 			SetErrorResponse("403 Permission denied");
257 			break;
258 		case EBADF:
259 		case EFAULT:
260 		case ENOMEM:
261 		case EOVERFLOW:
262 		default:
263 			//aclog::err("Internal error");
264 			SetErrorResponse("500 Internal server error");
265 			break;
266 		case ELOOP:
267 			SetErrorResponse("500 Infinite link recursion");
268 			break;
269 		case ENAMETOOLONG:
270 			SetErrorResponse("500 File name too long");
271 			break;
272 		case ENOENT:
273 		case ENOTDIR:
274 			SetErrorResponse("404 File or directory not found");
275 			break;
276 		}
277 		return;
278 	}
279 
280 	if(S_ISDIR(stbuf.st_mode))
281 	{
282 		// unconfuse the browser
283 		if(!endsWithSzAr(visPath, SZPATHSEPUNIX))
284 		{
285 			class dirredirect : public tGeneratedFitemBase
286 			{
287 			public:	dirredirect(const string &visPath)
288 			: tGeneratedFitemBase(visPath, "301 Moved Permanently")
289 				{
290 					m_head.set(header::LOCATION, visPath+"/");
291 					m_data << "<!DOCTYPE html>\n<html lang=\"en\"><head><title>301 Moved Permanently</title></head><body><h1>Moved Permanently</h1>"
292 					"<p>The document has moved <a href=\""+visPath+"/\">here</a>.</p></body></html>";
293 
294 					seal();
295 				}
296 			};
297 			m_pItem.RegisterFileitemLocalOnly(new dirredirect(visPath));
298 			return;
299 		}
300 
301 		class listing: public tGeneratedFitemBase
302 		{
303 		public:
304 			listing(const string &visPath) :
305 				tGeneratedFitemBase(visPath, "200 OK")
306 			{
307 				seal(); // for now...
308 			}
309 		};
310 		listing *p=new listing(visPath);
311 		m_pItem.RegisterFileitemLocalOnly(p); // assign to smart pointer ASAP, operations might throw
312 		tSS & page = p->m_data;
313 
314 		page << "<!DOCTYPE html>\n<html lang=\"en\"><head><title>Index of "
315 				<< visPath << "</title></head>"
316 		"<body><h1>Index of " << visPath << "</h1>"
317 		"<table><tr><th>&nbsp;</th><th>Name</th><th>Last modified</th><th>Size</th></tr>"
318 		"<tr><th colspan=\"4\"><hr></th></tr>";
319 
320 		DIR *dir = opendir(absPath.c_str());
321 		if (!dir) // weird, whatever... ignore...
322 			page<<"ERROR READING DIRECTORY";
323 		else
324 		{
325 			// quick hack with sorting by custom keys, good enough here
326 			priority_queue<tStrPair, std::vector<tStrPair>, std::greater<tStrPair>> sortHeap;
327 			for(struct dirent *pdp(0);0!=(pdp=readdir(dir));)
328 			{
329 				if (0!=::stat(mstring(absPath+SZPATHSEP+pdp->d_name).c_str(), &stbuf))
330 					continue;
331 
332 				bool bDir=S_ISDIR(stbuf.st_mode);
333 
334 				char datestr[32]={0};
335 				struct tm tmtimebuf;
336 				strftime(datestr, sizeof(datestr)-1,
337 						"%d-%b-%Y %H:%M", localtime_r(&stbuf.st_mtime, &tmtimebuf));
338 
339 				string line;
340 				if(bDir)
341 					line += "[DIR]";
342 				else if(startsWithSz(cfg::GetMimeType(pdp->d_name), "image/"))
343 					line += "[IMG]";
344 				else
345 					line += "[&nbsp;&nbsp;&nbsp;]";
346 				line += string("</td><td><a href=\"") + pdp->d_name
347 						+ (bDir? "/\">" : "\">" )
348 						+ pdp->d_name
349 						+"</a></td><td>"
350 						+ datestr
351 						+ "</td><td align=\"right\">"
352 						+ (bDir ? string("-") : offttosH(stbuf.st_size));
353 				sortHeap.push(make_pair(string(bDir?"a":"b")+pdp->d_name, line));
354 				//dbgprint((mstring)line);
355 			}
356 			closedir(dir);
357 			while(!sortHeap.empty())
358 			{
359 				page.add(WITHLEN("<tr><td valign=\"top\">"));
360 				page << sortHeap.top().second;
361 				page.add(WITHLEN("</td></tr>\r\n"));
362 				sortHeap.pop();
363 			}
364 
365 		}
366 		page << "<tr><td colspan=\"4\">" <<GetFooter();
367 		page << "</td></tr></table></body></html>";
368 		p->seal();
369 		return;
370 	}
371 	if(!S_ISREG(stbuf.st_mode))
372 	{
373 		SetErrorResponse("403 Unsupported data type");
374 		return;
375 	}
376 	/*
377 	 * This variant of file item handler sends a local file. The
378 	 * header data is generated as needed, the relative cache path variable
379 	 * is reused for the real path.
380 	 */
381 	class tLocalGetFitem : public fileitem_with_storage
382 	{
383 	public:
384 		tLocalGetFitem(string sLocalPath, struct stat &stdata) :
385 			fileitem_with_storage(sLocalPath)
386 		{
387 			m_bAllowStoreData=false;
388 			m_status=FIST_COMPLETE;
389 			m_nSizeChecked=m_nSizeSeen=stdata.st_size;
390 			m_bCheckFreshness=false;
391 			m_head.type=header::ANSWER;
392 			m_head.frontLine="HTTP/1.1 200 OK";
393 			m_head.set(header::CONTENT_LENGTH, stdata.st_size);
394 			m_head.prep(header::LAST_MODIFIED, 26);
395 			if(m_head.h[header::LAST_MODIFIED])
396 				FormatTime(m_head.h[header::LAST_MODIFIED], 26, stdata.st_mtim.tv_sec);
397 			cmstring &sMimeType=cfg::GetMimeType(sLocalPath);
398 			if(!sMimeType.empty())
399 				m_head.set(header::CONTENT_TYPE, sMimeType);
400 		};
401 		virtual int GetFileFd() override
402 		{
403 			int fd=open(m_sPathRel.c_str(), O_RDONLY);
404 		#ifdef HAVE_FADVISE
405 			// optional, experimental
406 			if(fd>=0)
407 				posix_fadvise(fd, 0, m_nSizeChecked, POSIX_FADV_SEQUENTIAL);
408 		#endif
409 			return fd;
410 		}
411 	};
412 	m_pItem.RegisterFileitemLocalOnly(new tLocalGetFitem(absPath, stbuf));
413 }
414 
ParseRange()415 inline bool job::ParseRange()
416 {
417 
418 	/*
419 	 * Range: bytes=453291-
420 	 * ...
421 	 * Content-Length: 7271829
422 	 * Content-Range: bytes 453291-7725119/7725120
423 	 */
424 
425 	const char *pRange = m_pReqHead->h[header::RANGE];
426 	// working around a bug in old curl versions
427 	if (!pRange)
428 		pRange = m_pReqHead->h[header::CONTENT_RANGE];
429 	if (pRange)
430 	{
431 		int nRangeItems = sscanf(pRange, "bytes=" OFF_T_FMT
432 		"-" OFF_T_FMT, &m_nReqRangeFrom, &m_nReqRangeTo);
433 		// working around bad (old curl style) requests
434 		if (nRangeItems <= 0)
435 		{
436 			nRangeItems = sscanf(pRange, "bytes "
437 			OFF_T_FMT "-" OFF_T_FMT, &m_nReqRangeFrom, &m_nReqRangeTo);
438 		}
439 
440 		if (nRangeItems < 1) // weird...
441 			m_nReqRangeFrom = m_nReqRangeTo = -2;
442 		else
443 			return true;
444 	}
445 	return false;
446 }
447 
PrepareDownload(LPCSTR headBuf)448 void job::PrepareDownload(LPCSTR headBuf) {
449 
450     LOGSTART("job::PrepareDownload");
451 
452 #ifdef DEBUGLOCAL
453     cfg::localdirs["stuff"]="/tmp/stuff";
454     log::err(m_pReqHead->ToString());
455 #endif
456 
457     string sReqPath, sPathResidual;
458     tHttpUrl theUrl; // parsed URL
459 
460 	// resolve to an internal repo location and maybe backends later
461 	cfg::tRepoResolvResult repoMapping;
462 
463     fileitem::FiStatus fistate(fileitem::FIST_FRESH);
464     bool bPtMode(false);
465     bool bForceFreshnessChecks(false); // force update of the file, i.e. on dynamic index files?
466     tSplitWalk tokenizer(& m_pReqHead->frontLine, SPACECHARS);
467 
468     if(m_pReqHead->type!=header::GET && m_pReqHead->type!=header::HEAD)
469     	goto report_invpath;
470     if(!tokenizer.Next() || !tokenizer.Next()) // at path...
471     	goto report_invpath;
472     UrlUnescapeAppend(tokenizer, sReqPath);
473     if(!tokenizer.Next()) // at proto
474     	goto report_invpath;
475     m_bIsHttp11 = (sHttp11 == tokenizer.str());
476 
477     USRDBG( "Decoded request URI: " << sReqPath);
478 
479 	// a sane default? normally, close-mode for http 1.0, keep for 1.1.
480 	// But if set by the client then just comply!
481 	m_bClientWants2Close =!m_bIsHttp11;
482 	if(m_pReqHead && m_pReqHead->h[header::CONNECTION])
483 		m_bClientWants2Close = !strncasecmp(m_pReqHead->h[header::CONNECTION], "close", 5);
484 
485     // "clever" file system browsing attempt?
486 	if(rex::Match(sReqPath, rex::NASTY_PATH)
487 			|| stmiss != sReqPath.find(MAKE_PTR_0_LEN("/_actmp"))
488 			|| startsWithSz(sReqPath, "/_"))
489 		goto report_notallowed;
490 
491     try
492 	{
493 		if (startsWithSz(sReqPath, "/HTTPS///"))
494 			sReqPath.replace(0, 6, PROT_PFX_HTTPS);
495 		// special case: proxy-mode AND special prefix are there
496 		if(0==strncasecmp(sReqPath.c_str(), WITHLEN("http://https///")))
497 			sReqPath.replace(0, 13, PROT_PFX_HTTPS);
498 
499 		if(!theUrl.SetHttpUrl(sReqPath, false))
500 		{
501 			m_eMaintWorkType=tSpecialRequest::workUSERINFO;
502 			return;
503 		}
504 		LOG("refined path: " << theUrl.sPath << "\n on host: " << theUrl.sHost);
505 
506 		// extract the actual port from the URL
507 		char *pEnd(0);
508 		unsigned nPort = 80;
509 		LPCSTR sPort=theUrl.GetPort().c_str();
510 		if(!*sPort)
511 		{
512 			nPort = (uint) strtoul(sPort, &pEnd, 10);
513 			if('\0' != *pEnd || pEnd == sPort || nPort > TCP_PORT_MAX || !nPort)
514 				goto report_invport;
515 		}
516 
517 		if(cfg::pUserPorts)
518 		{
519 			if(!cfg::pUserPorts->test(nPort))
520 				goto report_invport;
521 		}
522 		else if(nPort != 80)
523 			goto report_invport;
524 
525 		// kill multiple slashes
526 		for(tStrPos pos=0; stmiss != (pos = theUrl.sPath.find("//", pos, 2)); )
527 			theUrl.sPath.erase(pos, 1);
528 
529 		bPtMode=rex::MatchUncacheable(theUrl.ToURI(false), rex::NOCACHE_REQ);
530 
531 		LOG("input uri: "<<theUrl.ToURI(false)<<" , dontcache-flag? " << bPtMode
532 				<< ", admin-page: " << cfg::reportpage);
533 
534 		if(!cfg::reportpage.empty() || theUrl.sHost == "style.css")
535 		{
536 			m_eMaintWorkType = tSpecialRequest::DispatchMaintWork(sReqPath,
537 					m_pReqHead->h[header::AUTHORIZATION]);
538 			if(m_eMaintWorkType != tSpecialRequest::workNotSpecial)
539 			{
540 				m_sFileLoc = sReqPath;
541 				return;
542 			}
543 		}
544 
545 		using namespace rex;
546 
547 		{
548 			tStrMap::const_iterator it = cfg::localdirs.find(theUrl.sHost);
549 			if (it != cfg::localdirs.end())
550 			{
551 				PrepareLocalDownload(sReqPath, it->second, theUrl.sPath);
552 				ParseRange();
553 				return;
554 			}
555 		}
556 
557 		// entered directory but not defined as local? Then 404 it with hints
558 		if(!theUrl.sPath.empty() && endsWithSzAr(theUrl.sPath, "/"))
559 		{
560 			LOG("generic user information page for " << theUrl.sPath);
561 			m_eMaintWorkType=tSpecialRequest::workUSERINFO;
562 			return;
563 		}
564 
565 		m_type = GetFiletype(theUrl.sPath);
566 
567 		if ( m_type == FILE_INVALID )
568 		{
569 			if(!cfg::patrace)
570 			{
571 				// just for the log
572 				m_sFileLoc = theUrl.sPath;
573 				goto report_notallowed;
574 			}
575 
576 			// ok, collect some information helpful to the user
577 			m_type = FILE_VOLATILE;
578 			lockguard g(traceData);
579 			traceData.insert(theUrl.sPath);
580 		}
581 
582 		// got something valid, has type now, trace it
583 		USRDBG("Processing new job, "<<m_pReqHead->frontLine);
584 
585 		cfg::GetRepNameAndPathResidual(theUrl, repoMapping);
586 		if(repoMapping.psRepoName && !repoMapping.psRepoName->empty())
587 			m_sFileLoc=*repoMapping.psRepoName+SZPATHSEP+repoMapping.sRestPath;
588 		else
589 			m_sFileLoc=theUrl.sHost+theUrl.sPath;
590 
591 		bForceFreshnessChecks = ( ! cfg::offlinemode && m_type == FILE_VOLATILE);
592 
593 		m_pItem.PrepareRegisteredFileItemWithStorage(m_sFileLoc, bForceFreshnessChecks);
594 	}
595 	catch(std::out_of_range&) // better safe...
596 	{
597     	goto report_invpath;
598     }
599 
600     if(!m_pItem)
601     {
602     	USRDBG("Error creating file item for " << m_sFileLoc);
603     	goto report_overload;
604     }
605 
606     if(cfg::DegradedMode())
607        goto report_degraded;
608 
609     fistate = m_pItem.getFiPtr()->Setup(bForceFreshnessChecks);
610 	LOG("Got initial file status: " << (int) fistate);
611 
612 	if (bPtMode && fistate != fileitem::FIST_COMPLETE)
613 		fistate = _SwitchToPtItem();
614 
615 	ParseRange();
616 
617 	// might need to update the filestamp because nothing else would trigger it
618 	if(cfg::trackfileuse && fistate >= fileitem::FIST_DLGOTHEAD && fistate < fileitem::FIST_DLERROR)
619 		m_pItem.getFiPtr()->UpdateHeadTimestamp();
620 
621 	if(fistate==fileitem::FIST_COMPLETE)
622 		return; // perfect, done here
623 
624 	// early optimization for requests which want a defined file part which we already have
625 	// no matter whether the file is complete or not
626 	// handles different cases: GET with range, HEAD with range, HEAD with no range
627 	if((m_nReqRangeFrom>=0 && m_nReqRangeTo>=0)
628 			|| (m_pReqHead->type==header::HEAD && 0!=(m_nReqRangeTo=-1)))
629 	{
630 		auto p(m_pItem.getFiPtr());
631 		lockguard g(p.get());
632 		if(m_pItem.getFiPtr()->CheckUsableRange_unlocked(m_nReqRangeTo))
633 		{
634 			LOG("Got a partial request for incomplete download; range is available");
635 			m_bNoDownloadStarted=true;
636 			return;
637 		}
638 	}
639 
640     if(cfg::offlinemode) { // make sure there will be no problems later in SendData or prepare a user message
641     	// error or needs download but freshness check was disabled, so it's really not complete.
642     	goto report_offlineconf;
643     }
644     dbgline;
645     if( fistate < fileitem::FIST_DLGOTHEAD) // needs a downloader
646     {
647     	dbgline;
648     	if(!m_pParentCon->SetupDownloader(m_pReqHead->h[header::XFORWARDEDFOR]))
649     	{
650     		USRDBG( "Error creating download handler for "<<m_sFileLoc);
651     		goto report_overload;
652     	}
653 
654     	dbgline;
655 try
656 		{
657     		auto bHaveRedirects=(repoMapping.repodata && !repoMapping.repodata->m_backends.empty());
658 
659     		if (cfg::forcemanaged && !bHaveRedirects)
660 						goto report_notallowed;
661 
662 				if (!bPtMode)
663 				{
664 					// XXX: this only checks the first found backend server, what about others?
665 					auto testUri= bHaveRedirects
666 							? repoMapping.repodata->m_backends.front().ToURI(false)
667 									+ repoMapping.sRestPath
668 							: theUrl.ToURI(false);
669 					if (rex::MatchUncacheable(testUri, rex::NOCACHE_TGT))
670 						fistate = _SwitchToPtItem();
671 				}
672 
673 					if (m_pParentCon->m_pDlClient->AddJob(m_pItem.getFiPtr(),
674 							bHaveRedirects ? nullptr : &theUrl, repoMapping.repodata,
675 							bHaveRedirects ? &repoMapping.sRestPath : nullptr,
676 									(LPCSTR) ( bPtMode ? headBuf : nullptr),
677 							cfg::redirmax))
678 				{
679 					ldbg("Download job enqueued for " << m_sFileLoc);
680 				}
681 				else
682 				{
683 					ldbg("PANIC! Error creating download job for " << m_sFileLoc);
684 					goto report_overload;
685 				}
686 		}
687 		catch(std::bad_alloc&) // OOM, may this ever happen here?
688 		{
689 			USRDBG( "Out of memory");
690 			goto report_overload;
691 		};
692 	}
693 
694 	return;
695 
696 report_overload:
697 	SetErrorResponse("503 Server overload, try later");
698     return ;
699 
700 report_notallowed:
701 	SetErrorResponse("403 Forbidden file type or location");
702     return ;
703 
704 report_offlineconf:
705 	SetErrorResponse("503 Unable to download in offline mode");
706 	return;
707 
708 report_invpath:
709 	SetErrorResponse("403 Invalid path specification");
710     return ;
711 
712 report_degraded:
713 	SetErrorResponse("403 Cache server in degraded mode");
714 	return ;
715 
716 report_invport:
717 	SetErrorResponse("403 Configuration error (confusing proxy mode) or prohibited port (see AllowUserPorts)");
718     return ;
719 }
720 
721 #define THROW_ERROR(x) { if(m_nAllDataCount) return R_DISCON; SetErrorResponse(x); return R_AGAIN; }
SendData(int confd)722 job::eJobResult job::SendData(int confd)
723 {
724 	LOGSTART("job::SendData");
725 
726 	if(m_eMaintWorkType)
727 	{
728 		tSpecialRequest::RunMaintWork(m_eMaintWorkType, m_sFileLoc, confd);
729 		return R_DISCON; // just stop and close connection
730 	}
731 
732 	off_t nGoodDataSize(0);
733 	fileitem::FiStatus fistate(fileitem::FIST_DLERROR);
734 	header respHead; // template of the response header, e.g. as found in cache
735 
736 	if (confd<0)
737 		return R_DISCON; // shouldn't be here
738 
739 	if (m_pItem)
740 	{
741 		lockuniq g(m_pItem.getFiPtr().get());
742 
743 		for(;;)
744 		{
745 			fistate=m_pItem.getFiPtr()->GetStatusUnlocked(nGoodDataSize);
746 
747 			LOG((int) fistate);
748 			if (fistate > fileitem::FIST_COMPLETE)
749 			{
750 				const header &h = m_pItem.getFiPtr()->GetHeaderUnlocked();
751 				g.unLock(); // item lock must be released in order to replace it!
752 				if(m_nAllDataCount)
753 					return R_DISCON;
754 				if(h.h[header::XORIG])
755 					m_sOrigUrl=h.h[header::XORIG];
756 				// must be something like "HTTP/1.1 403 Forbidden"
757 				if(h.frontLine.length()>9 && h.getStatus()!=200)
758 					SetErrorResponse(h.frontLine.c_str()+9, h.h[header::LOCATION]);
759 				else // good ungood? confused somewhere?!
760 				{
761 					ldbg("good ungood, consused?" << h.frontLine)
762 					SetErrorResponse("500 Unknown error");
763 				}
764 				return R_AGAIN;
765 			}
766 			/*
767 			 * Detect the same special case as above. There is no download agent to change
768 			 * the state so no need to wait.
769 			*/
770 			if(m_bNoDownloadStarted)
771 			{
772 				fistate = fileitem::FIST_DLRECEIVING;
773 				break;
774 			}
775 			// or wait for the dl source to get data at the position we need to start from
776 			LOG("sendstate: " << (int) fistate << " , sendpos: " << m_nSendPos << nGoodDataSize);
777 			if(fistate==fileitem::FIST_COMPLETE || (m_nSendPos < nGoodDataSize && fistate>=fileitem::FIST_DLGOTHEAD))
778 				break;
779 
780 			dbgline;
781 			m_pItem.getFiPtr()->wait(g);
782 
783 			dbgline;
784 		}
785 
786 		respHead = m_pItem.getFiPtr()->GetHeaderUnlocked();
787 
788 		if(respHead.h[header::XORIG])
789 			m_sOrigUrl=respHead.h[header::XORIG];
790 
791 	}
792 	else if(m_state != STATE_SEND_BUFFER)
793 	{
794 		ASSERT(!"no FileItem assigned and no sensible way to continue");
795 		return R_DISCON;
796 	}
797 #define returnSomething(msg) { LOG(msg); if(m_bClientWants2Close) return R_DISCON; \
798 		LOG("Reporting job done"); return R_DONE; };
799 
800 	for(;;) // left by returning
801 	{
802 		try // for bad_alloc in members
803 		{
804 			switch(m_state)
805 			{
806 				case(STATE_SEND_MAIN_HEAD):
807 				{
808 					ldbg("STATE_FRESH");
809 					if(fistate < fileitem::FIST_DLGOTHEAD) // be sure about that
810 						return R_AGAIN;
811 					m_state=STATE_SEND_BUFFER;
812 					m_backstate=STATE_HEADER_SENT; // could be changed while creating header
813 					const char *szErr = BuildAndEnqueHeader(fistate, nGoodDataSize, respHead);
814 					if(szErr) THROW_ERROR(szErr);
815 					USRDBG("Response header to be sent in the next cycle: \n" << m_sendbuf );
816 					return R_AGAIN;
817 				}
818 				case(STATE_HEADER_SENT):
819 				{
820 					ldbg("STATE_HEADER_SENT");
821 
822 					if(fistate < fileitem::FIST_DLGOTHEAD)
823 					{
824 						ldbg("ERROR condition detected: starts activity while downloader not ready")
825 						return R_AGAIN;
826 					}
827 
828 					m_filefd=m_pItem.getFiPtr()->GetFileFd();
829 					if(!IsValidFD(m_filefd)) THROW_ERROR("503 IO error");
830 
831 					m_state=m_bChunkMode ? STATE_SEND_CHUNK_HEADER : STATE_SEND_PLAIN_DATA;
832 					ldbg("next state will be: " << (int) m_state);
833 					continue;
834 				}
835 				case(STATE_SEND_PLAIN_DATA):
836 				{
837 					ldbg("STATE_SEND_PLAIN_DATA, max. " << nGoodDataSize);
838 
839 					// eof?
840 #define GOTOENDE { m_state=STATE_FINISHJOB ; continue; }
841 					if(m_nSendPos>=nGoodDataSize)
842 					{
843 						if(fistate>=fileitem::FIST_COMPLETE)
844 							GOTOENDE;
845 						LOG("Cannot send more, not enough fresh data yet");
846 						return R_AGAIN;
847 					}
848 
849 					size_t nMax2SendNow=min(nGoodDataSize-m_nSendPos, m_nCurrentRangeLast+1-m_nSendPos);
850 					ldbg("~sendfile: on "<< m_nSendPos << " up to : " << nMax2SendNow);
851 					int n = m_pItem.getFiPtr()->SendData(confd, m_filefd, m_nSendPos, nMax2SendNow);
852 					ldbg("~sendfile: " << n << " new m_nSendPos: " << m_nSendPos);
853 
854 					if(n>0)
855 						m_nAllDataCount+=n;
856 
857 					// shortcuts
858 					if(m_nSendPos>m_nCurrentRangeLast ||
859 							(fistate==fileitem::FIST_COMPLETE && m_nSendPos==nGoodDataSize))
860 						GOTOENDE;
861 
862 					if(n<0)
863 						THROW_ERROR("400 Client error");
864 
865 					return R_AGAIN;
866 				}
867 				case(STATE_SEND_CHUNK_HEADER):
868 				{
869 					m_nChunkRemainingBytes=nGoodDataSize-m_nSendPos;
870 					ldbg("STATE_SEND_CHUNK_HEADER for " << m_nChunkRemainingBytes);
871 					if(!m_nChunkRemainingBytes && fistate < fileitem::FIST_COMPLETE)
872 					{
873 						ldbg("No data to send YET, will try later");
874 						return R_AGAIN;
875 					}
876 					m_sendbuf << tSS::hex << m_nChunkRemainingBytes << tSS::dec
877 							<< (m_nChunkRemainingBytes ? "\r\n" : "\r\n\r\n");
878 
879 					m_state=STATE_SEND_BUFFER;
880 					m_backstate=m_nChunkRemainingBytes ? STATE_SEND_CHUNK_DATA : STATE_FINISHJOB;
881 					continue;
882 				}
883 				case(STATE_SEND_CHUNK_DATA):
884 				{
885 					ldbg("STATE_SEND_CHUNK_DATA");
886 
887 					if(m_nChunkRemainingBytes==0)
888 						GOTOENDE; // done
889 					int n = m_pItem.getFiPtr()->SendData(confd, m_filefd, m_nSendPos, m_nChunkRemainingBytes);
890 					if(n<0)
891 						THROW_ERROR("400 Client error");
892 					m_nChunkRemainingBytes-=n;
893 					m_nAllDataCount+=n;
894 					if(m_nChunkRemainingBytes<=0)
895 					{ // append final newline
896 						m_sendbuf<<"\r\n";
897 						m_state=STATE_SEND_BUFFER;
898 						m_backstate=STATE_SEND_CHUNK_HEADER;
899 						continue;
900 					}
901 					return R_AGAIN;
902 				}
903 				case(STATE_SEND_BUFFER):
904 				{
905 					/* Sends data from the local buffer, used for page or chunk headers.
906 					* -> DELICATE STATE, should not be interrupted by exception or throw
907 					* to another state path uncontrolled. */
908 					try
909 					{
910 						ldbg("prebuf sending: "<< m_sendbuf.c_str());
911 						auto r=send(confd, m_sendbuf.rptr(), m_sendbuf.size(),
912 								m_backstate == STATE_TODISCON ? 0 : MSG_MORE);
913 						if (r<0)
914 						{
915 							if (errno==EAGAIN || errno==EINTR || errno == ENOBUFS)
916 								return R_AGAIN;
917 							return R_DISCON;
918 						}
919 						m_nAllDataCount+=r;
920 						m_sendbuf.drop(r);
921 						if(m_sendbuf.empty())
922 						{
923 							USRDBG("Returning to last state, " << (int) m_backstate);
924 							m_state=m_backstate;
925 							continue;
926 						}
927 					}
928 					catch(...)
929 					{
930 						return R_DISCON;
931 					}
932 					return R_AGAIN;
933 				}
934 				case(STATE_ALLDONE):
935 				returnSomething("STATE_ALLDONE?");
936 				case (STATE_ERRORCONT):
937 				returnSomething("STATE_ERRORCONT?");
938 				case(STATE_FINISHJOB):
939 				returnSomething("STATE_FINISHJOB?");
940 				case(STATE_TODISCON):
941 				default:
942 					return R_DISCON;
943 			}
944 		}
945 		catch(bad_alloc&) {
946 			// TODO: report memory failure?
947 			return R_DISCON;
948 		}
949 		//ASSERT(!"UNREACHED");
950 	}
951 	//ASSERT(!"UNREACHEABLE");
952 	return R_DISCON;
953 }
954 
955 
BuildAndEnqueHeader(const fileitem::FiStatus & fistate,const off_t & nGooddataSize,header & respHead)956 inline const char * job::BuildAndEnqueHeader(const fileitem::FiStatus &fistate,
957 		const off_t &nGooddataSize, header& respHead)
958 {
959 	LOGSTART("job::BuildAndEnqueHeader");
960 
961 	if(respHead.type != header::ANSWER)
962 	{
963 		LOG(respHead.ToString());
964 		return "500 Rotten Data";
965 	}
966 
967 	// make sure that header has consistent state and there is data to send which is expected by the client
968 	int httpstatus = respHead.getStatus();
969 	LOG("State: " << httpstatus);
970 
971 	// nothing to send for special codes (like not-unmodified) or w/ GUARANTEED empty body
972 	bool bHasSendableData = !BODYFREECODE(httpstatus)
973 			&& (!respHead.h[header::CONTENT_LENGTH] // not set, maybe chunked data
974 			                || atoofft(respHead.h[header::CONTENT_LENGTH])); // set, to non-0
975 
976 	tSS &sb=m_sendbuf;
977 	sb.clear();
978 	sb << respHead.frontLine <<"\r\n";
979 
980 	bool bGotLen(false);
981 
982 	if(bHasSendableData)
983 	{
984 		LOG("has sendable content");
985 		if( ! respHead.h[header::CONTENT_LENGTH]
986 #ifdef DEBUG // might have been defined before for testing purposes
987 		                 || m_bChunkMode
988 #endif
989 				)
990 		{
991 			// unknown length but must have data, will have to improvise: prepare chunked transfer
992 			if ( ! m_bIsHttp11 ) // you cannot process this? go away
993 				return "505 HTTP version not supported for this file";
994 			m_bChunkMode=true;
995 			sb<<"Transfer-Encoding: chunked\r\n";
996 		}
997 		else if(200==httpstatus) // state: good data response with known length, can try some optimizations
998 		{
999 			LOG("has known content length, optimizing response...");
1000 
1001 			// Handle If-Modified-Since and Range headers;
1002 			// we deal with them equally but need to know which to use
1003 			const char *pIfmo = m_pReqHead->h[header::RANGE] ?
1004 					m_pReqHead->h[header::IFRANGE] : m_pReqHead->h[header::IF_MODIFIED_SINCE];
1005 			const char *pLastMo = respHead.h[header::LAST_MODIFIED];
1006 
1007 			// consider contents "fresh" for non-volatile data, or when "our" special client is there, or the client simply doesn't care
1008 			bool bDataIsFresh = (m_type != rex::FILE_VOLATILE
1009 				|| m_pReqHead->h[header::ACNGFSMARK] || !pIfmo);
1010 
1011 			auto tm1=tm(), tm2=tm();
1012 			bool bIfModSeenAndChecked=false;
1013 			if(pIfmo && header::ParseDate(pIfmo, &tm1) && header::ParseDate(pLastMo, &tm2))
1014 			{
1015 				time_t a(mktime(&tm1)), b(mktime(&tm2));
1016 				LOG("if-mo-since: " << a << " vs. last-mo: " << b);
1017 				bIfModSeenAndChecked = (a==b);
1018 			}
1019 
1020 			// is it fresh? or is this relevant? or is range mode forced?
1021 			if(  bDataIsFresh || bIfModSeenAndChecked)
1022 			{
1023 				off_t nContLen=atoofft(respHead.h[header::CONTENT_LENGTH]);
1024 
1025 				// Client requested with Range* spec?
1026 				if(m_nReqRangeFrom >=0)
1027 				{
1028 					if(m_nReqRangeTo<0 || m_nReqRangeTo>=nContLen) // open-end? set the end to file length. Also when request range would be too large
1029 						m_nReqRangeTo=nContLen-1;
1030 
1031 					// or simply don't care within that rage
1032 					bool bPermitPartialStart = (
1033 							fistate >= fileitem::FIST_DLGOTHEAD
1034 							&& fistate <= fileitem::FIST_COMPLETE
1035 							&& nGooddataSize >= ( m_nReqRangeFrom - cfg::maxredlsize));
1036 
1037 					/*
1038 					 * make sure that our client doesn't just hang here while the download thread is
1039 					 * fetching from 0 to start position for many minutes. If the resumed position
1040 					 * is beyond of what we already have, fall back to 200 (complete download).
1041 					 */
1042 					if(fistate==fileitem::FIST_COMPLETE
1043 							// or can start sending within this range (positive range-from)
1044 							|| bPermitPartialStart	// don't care since found special hint from acngfs (kludge...)
1045 							|| m_pReqHead->h[header::ACNGFSMARK] )
1046 					{
1047 						// detect errors, out-of-range case
1048 						if(m_nReqRangeFrom>=nContLen || m_nReqRangeTo<m_nReqRangeFrom)
1049 							return "416 Requested Range Not Satisfiable";
1050 
1051 						m_nSendPos = m_nReqRangeFrom;
1052 						m_nCurrentRangeLast = m_nReqRangeTo;
1053 						// replace with partial-response header
1054 						sb.clear();
1055 						sb << "HTTP/1.1 206 Partial Response\r\nContent-Length: "
1056 						 << (m_nCurrentRangeLast-m_nSendPos+1) <<
1057 								"\r\nContent-Range: bytes "<< m_nSendPos
1058 								<< "-" << m_nCurrentRangeLast << "/" << nContLen << "\r\n";
1059 						bGotLen=true;
1060 					}
1061 				}
1062 				else if(bIfModSeenAndChecked)
1063 				{
1064 					// file is fresh, and user sent if-mod-since -> fine
1065 					return "304 Not Modified";
1066 				}
1067 			}
1068 		}
1069 		// has cont-len available but this header was not set yet in the code above
1070 		if( !bGotLen && !m_bChunkMode)
1071 			sb<<"Content-Length: "<<respHead.h[header::CONTENT_LENGTH]<<"\r\n";
1072 
1073 		// OK, has data for user and has set content-length and/or range or chunked transfer mode, now add various meta headers...
1074 
1075 		if(respHead.h[header::LAST_MODIFIED])
1076 			sb<<"Last-Modified: "<<respHead.h[header::LAST_MODIFIED]<<"\r\n";
1077 
1078 		sb<<"Content-Type: ";
1079 		if(respHead.h[header::CONTENT_TYPE])
1080 			sb<<respHead.h[header::CONTENT_TYPE]<<"\r\n";
1081 		else
1082 			sb<<"application/octet-stream\r\n";
1083 	}
1084 	else
1085 	{
1086 		sb<<"Content-Length: 0\r\n";
1087 
1088 		m_backstate=STATE_ALLDONE;
1089 	}
1090 
1091 	sb << header::GenInfoHeaders();
1092 
1093 	if(respHead.h[header::LOCATION])
1094 		sb<<"Location: "<<respHead.h[header::LOCATION]<<"\r\n";
1095 
1096 	if(!m_sOrigUrl.empty())
1097 		sb<<"X-Original-Source: "<< m_sOrigUrl <<"\r\n";
1098 
1099 	// whatever the client wants
1100 	sb<<"Connection: "<<(m_bClientWants2Close?"close":"Keep-Alive")<<"\r\n";
1101 
1102 	sb<<"\r\n";
1103 	LOG("response prepared:" << sb);
1104 
1105 	if(m_pReqHead->type==header::HEAD)
1106 		m_backstate=STATE_ALLDONE; // simulated head is prepared but don't send stuff
1107 
1108 	return 0;
1109 }
1110 
_SwitchToPtItem()1111 fileitem::FiStatus job::_SwitchToPtItem()
1112 {
1113 	// Changing to local pass-through file item
1114 	LOGSTART("job::_SwitchToPtItem");
1115 	// exception-safe sequence
1116 	m_pItem.RegisterFileitemLocalOnly(new tPassThroughFitem(m_sFileLoc));
1117 	return m_pItem.getFiPtr()->Setup(true);
1118 }
1119 
1120 
SetErrorResponse(const char * errorLine,const char * szLocation,const char * bodytext)1121 void job::SetErrorResponse(const char * errorLine, const char *szLocation, const char *bodytext)
1122 {
1123 	LOGSTART2("job::SetErrorResponse", errorLine << " ; for " << m_sOrigUrl);
1124 	class erroritem: public tGeneratedFitemBase
1125 	{
1126 	public:
1127 		erroritem(const string &sId, const char *szError, const char *bodytext)
1128 			: tGeneratedFitemBase(sId, szError)
1129 		{
1130 			if(BODYFREECODE(m_head.getStatus()))
1131 				return;
1132 			// otherwise do something meaningful
1133 			m_data <<"<!DOCTYPE html>\n<html lang=\"en\"><head><title>" << (bodytext ? bodytext : szError)
1134 				<< "</title>\n</head>\n<body><h1>"
1135 				<< (bodytext ? bodytext : szError) << "</h1></body></html>";
1136 			m_head.set(header::CONTENT_TYPE, "text/html");
1137 			seal();
1138 		}
1139 	};
1140 
1141 	erroritem *p = new erroritem("noid", errorLine, bodytext);
1142 	p->HeadRef().set(header::LOCATION, szLocation);
1143 	m_pItem.RegisterFileitemLocalOnly(p);
1144 	//aclog::err(tSS() << "fileitem is now " << uintptr_t(m_pItem.get()));
1145 	m_state=STATE_SEND_MAIN_HEAD;
1146 }
1147 
1148 }
1149