1 /*********************************************************************
2  *   Copyright 2016, UCAR/Unidata
3  *   See netcdf/COPYRIGHT file for copying and redistribution conditions.
4  *********************************************************************/
5 
6 #include "ncdispatch.h"
7 #include "ncd4dispatch.h"
8 #include "d4includes.h"
9 #include "d4read.h"
10 #include "d4curlfunctions.h"
11 
12 #ifdef _MSC_VER
13 #include <process.h>
14 #include <direct.h>
15 #endif
16 
17 #ifdef HAVE_SYS_STAT_H
18 #include <sys/stat.h>
19 #endif
20 
21 /**************************************************/
22 /* Forward */
23 
24 static void applyclientmetacontrols(NCD4meta* meta);
25 static int constrainable(NCURI*);
26 static void freeCurl(NCD4curl*);
27 static void freeInfo(NCD4INFO*);
28 static int paramcheck(NCD4INFO*, const char* key, const char* subkey);
29 static const char* getparam(NCD4INFO* info, const char* key);
30 static int set_curl_properties(NCD4INFO*);
31 
32 /**************************************************/
33 /* Constants */
34 
35 static const char* checkseps = "+,:;";
36 
37 /**************************************************/
38 int
NCD4_open(const char * path,int mode,int basepe,size_t * chunksizehintp,void * mpidata,NC_Dispatch * dispatch,NC * nc)39 NCD4_open(const char * path, int mode,
40           int basepe, size_t *chunksizehintp,
41           void *mpidata, NC_Dispatch *dispatch, NC *nc)
42 {
43     int ret = NC_NOERR;
44     NCD4INFO* d4info = NULL;
45     const char* value;
46     NCD4meta* meta;
47 
48     if(path == NULL)
49 	return THROW(NC_EDAPURL);
50 
51     assert(dispatch != NULL);
52 
53     /* Setup our NC and NCDAPCOMMON state*/
54 
55     d4info = (NCD4INFO*)calloc(1,sizeof(NCD4INFO));
56     if(d4info == NULL) {ret = NC_ENOMEM; goto done;}
57 
58     nc->dispatchdata = d4info;
59     nc->int_ncid = nc__pseudofd(); /* create a unique id */
60     d4info->controller = (NC*)nc;
61 
62     /* Parse url and params */
63     if(ncuriparse(nc->path,&d4info->uri) != NCU_OK)
64 	{ret = NC_EDAPURL; goto done;}
65 
66     /* Load auth info from rc file */
67     if((ret = NC_authsetup(&d4info->auth, d4info->uri)))
68 	goto done;
69     NCD4_curl_protocols(d4info);
70 
71     if(!constrainable(d4info->uri))
72 	SETFLAG(d4info->controls.flags,NCF_UNCONSTRAINABLE);
73 
74     /* fail if we are unconstrainable but have constraints */
75     if(FLAGSET(d4info->controls.flags,NCF_UNCONSTRAINABLE)) {
76 	if(d4info->uri->query != NULL) {
77 	    nclog(NCLOGWARN,"Attempt to constrain an unconstrainable data source: %s",
78 		   d4info->uri->query);
79 	    ret = THROW(NC_EDAPCONSTRAINT);
80 	    goto done;
81 	}
82     }
83 
84     /* process control client parameters */
85     NCD4_applyclientparamcontrols(d4info);
86 
87     /* Use libsrc4 code (netcdf-4) for storing metadata */
88     {
89 	char tmpname[NC_MAX_NAME];
90 
91         /* Create fake file name: exact name must be unique,
92            but is otherwise irrelevant because we are using NC_DISKLESS
93         */
94 	if(strlen(d4info->controls.substratename) > 0)
95             snprintf(tmpname,sizeof(tmpname),"%s",d4info->controls.substratename);
96 	else
97             snprintf(tmpname,sizeof(tmpname),"tmp_%d",nc->int_ncid);
98 
99         /* Now, use the file to create the hidden substrate netcdf file.
100 	   We want this hidden file to always be NC_NETCDF4, so we need to
101            force default format temporarily in case user changed it.
102 	   Since diskless is enabled, create file in-memory.
103 	*/
104 	{
105 	    int new = NC_NETCDF4;
106 	    int old = 0;
107 	    int ncid = 0;
108 	    int ncflags = NC_NETCDF4|NC_CLOBBER;
109 	    ncflags |= NC_DISKLESS;
110 	    if(FLAGSET(d4info->controls.debugflags,NCF_DEBUG_COPY)) {
111 		/* Cause data to be dumped to real file */
112 		ncflags |= NC_WRITE;
113 		ncflags &= ~(NC_DISKLESS); /* use real file */
114 	    }
115 	    nc_set_default_format(new,&old); /* save and change */
116             ret = nc_create(tmpname,ncflags,&ncid);
117 	    nc_set_default_format(old,&new); /* restore */
118 	    d4info->substrate.realfile = ((ncflags & NC_DISKLESS) == 0);
119 	    d4info->substrate.filename = strdup(tmpname);
120 	    if(d4info->substrate.filename == NULL) ret = NC_ENOMEM;
121 	    d4info->substrate.nc4id = ncid;
122 	}
123         if(ret != NC_NOERR) goto done;
124 	/* Avoid fill */
125 	nc_set_fill(getnc4id(nc),NC_NOFILL,NULL);
126     }
127 
128     /* Turn on logging; only do this after oc_open*/
129     if((value = ncurilookup(d4info->uri,"log")) != NULL) {
130 	ncloginit();
131         if(nclogopen(value))
132 	    ncsetlogging(1);
133 	ncloginit();
134         if(nclogopen(value))
135 	    ncsetlogging(1);
136     }
137 
138     /* Setup a curl connection */
139     {
140         CURL* curl = NULL; /* curl handle*/
141 	d4info->curl = (NCD4curl*)calloc(1,sizeof(NCD4curl));
142 	if(d4info->curl == NULL)
143 	    {ret = NC_ENOMEM; goto done;}
144 	/* create the connection */
145         if((ret=NCD4_curlopen(&curl))!= NC_NOERR) goto done;
146 	d4info->curl->curl = curl;
147         /* Load misc rc properties */
148         NCD4_get_rcproperties(d4info);
149         if((ret=set_curl_properties(d4info))!= NC_NOERR) goto done;
150         /* Set the one-time curl flags */
151         if((ret=NCD4_set_flags_perlink(d4info))!= NC_NOERR) goto done;
152 #if 1 /* temporarily make per-link */
153         if((ret=NCD4_set_flags_perfetch(d4info))!= NC_NOERR) goto done;
154 #endif
155     }
156 
157     d4info->curl->packet = ncbytesnew();
158     ncbytessetalloc(d4info->curl->packet,DFALTPACKETSIZE); /*initial reasonable size*/
159 
160     /* fetch the dmr + data*/
161     {
162 	int inmem = FLAGSET(d4info->controls.flags,NCF_ONDISK) ? 0 : 1;
163         if((ret = NCD4_readDAP(d4info,inmem))) goto done;
164     }
165 
166     /* if the url goes astray to a random web page, then try to just dump it */
167     {
168 	char* response = ncbytescontents(d4info->curl->packet);
169 	size_t responselen = ncbyteslength(d4info->curl->packet);
170 
171         /* Apply some heuristics to see what we have.
172            The leading byte will have the chunk flags, which should
173            be less than 0x0f (for now). However, it will not be zero if
174            the data was little-endian
175 	*/
176         if(responselen == 0 || response[0] >= ' ') {
177 	    /* does not look like a chunk, so probable server failure */
178 	    if(responselen == 0)
179 	        nclog(NCLOGERR,"Empty DAP4 response");
180 	    else {/* probable html response */
181 		nclog(NCLOGERR,"Unexpected DAP response:");
182 		nclog(NCLOGERR,"==============================");
183 		nclogtext(NCLOGERR,response);
184 		nclog(NCLOGERR,"==============================\n");
185 	    }
186 	    ret = NC_EDAPSVC;
187   	    fflush(stderr);
188 	    goto done;
189 	}
190     }
191 
192     /* Build the meta data */
193     if((d4info->substrate.metadata=NCD4_newmeta(ncbyteslength(d4info->curl->packet),
194         ncbytescontents(d4info->curl->packet)))==NULL)
195 	{ret = NC_ENOMEM; goto done;}
196     meta = d4info->substrate.metadata;
197     meta->controller = d4info;
198     meta->ncid = getnc4id(nc); /* Transfer netcdf ncid */
199 
200     /* process meta control parameters */
201     applyclientmetacontrols(meta);
202 
203     /* Infer the mode */
204     if((ret=NCD4_infermode(meta))) goto done;
205 
206     if((ret=NCD4_dechunk(meta))) goto done;
207 
208 #ifdef D4DUMPDMR
209   {
210     fprintf(stderr,"=============\n");
211     fputs(d4info->substrate.metadata->serial.dmr,stderr);
212     fprintf(stderr,"\n=============\n");
213     fflush(stderr);
214   }
215 #endif
216 
217     if((ret = NCD4_parse(d4info->substrate.metadata))) goto done;
218 #ifdef D4DEBUGMETA
219   {
220     fprintf(stderr,"\n/////////////\n");
221     NCbytes* buf = ncbytesnew();
222     NCD4_print(d4info->substrate.metadata,buf);
223     ncbytesnull(buf);
224     fputs(ncbytescontents(buf),stderr);
225     ncbytesfree(buf);
226     fprintf(stderr,"\n/////////////\n");
227     fflush(stderr);
228   }
229 #endif
230     if((ret = NCD4_metabuild(d4info->substrate.metadata,d4info->substrate.metadata->ncid))) goto done;
231     if(ret != NC_NOERR && ret != NC_EVARSIZE) goto done;
232     if((ret = NCD4_processdata(d4info->substrate.metadata))) goto done;
233 
234     return THROW(ret);
235 
236 done:
237     if(ret) {
238 	freeInfo(d4info);
239         nc->dispatchdata = NULL;
240     }
241     return THROW(ret);
242 }
243 
244 int
NCD4_close(int ncid,void * ignore)245 NCD4_close(int ncid, void* ignore)
246 {
247     int ret = NC_NOERR;
248     NC* nc;
249     NCD4INFO* d4info;
250     int substrateid;
251 
252     ret = NC_check_id(ncid, (NC**)&nc);
253     if(ret != NC_NOERR) goto done;
254     d4info = (NCD4INFO*)nc->dispatchdata;
255     substrateid = makenc4id(nc,ncid);
256 
257     /* We call abort rather than close to avoid trying to write anything,
258        except if we are debugging
259      */
260     if(FLAGSET(d4info->controls.debugflags,NCF_DEBUG_COPY)) {
261         /* Dump the data into the substrate */
262 	if((ret = NCD4_debugcopy(d4info)))
263 	    goto done;
264         ret = nc_close(substrateid);
265     } else {
266         ret = nc_abort(substrateid);
267     }
268 
269     freeInfo(d4info);
270 
271 done:
272     return THROW(ret);
273 }
274 
275 int
NCD4_abort(int ncid)276 NCD4_abort(int ncid)
277 {
278     return NCD4_close(ncid,NULL);
279 }
280 
281 /**************************************************/
282 
283 /* Reclaim an NCD4INFO instance */
284 static void
freeInfo(NCD4INFO * d4info)285 freeInfo(NCD4INFO* d4info)
286 {
287     if(d4info == NULL) return;
288     d4info->controller = NULL; /* break link */
289     nullfree(d4info->rawurltext);
290     nullfree(d4info->urltext);
291     ncurifree(d4info->uri);
292     freeCurl(d4info->curl);
293     nullfree(d4info->data.memory);
294     nullfree(d4info->data.ondiskfilename);
295     if(d4info->data.ondiskfile != NULL)
296 	fclose(d4info->data.ondiskfile);
297     nullfree(d4info->fileproto.filename);
298     if(d4info->substrate.realfile
299 	&& !FLAGSET(d4info->controls.debugflags,NCF_DEBUG_COPY)) {
300 	/* We used real file, so we need to delete the temp file
301            unless we are debugging.
302 	   Assume caller has done nc_close|nc_abort on the ncid.
303            Note that in theory, this should not be necessary since
304            AFAIK the substrate file is still in def mode, and
305            when aborted, it should be deleted. But that is not working
306            for some reason, so we delete it ourselves.
307 	*/
308 #if 0
309 	if(d4info->substrate.filename != NULL) {
310 	    unlink(d4info->substrate.filename);
311 	}
312 #endif
313     }
314     nullfree(d4info->substrate.filename); /* always reclaim */
315     NCD4_reclaimMeta(d4info->substrate.metadata);
316     NC_authclear(&d4info->auth);
317     nclistfree(d4info->blobs);
318     free(d4info);
319 }
320 
321 static void
freeCurl(NCD4curl * curl)322 freeCurl(NCD4curl* curl)
323 {
324     if(curl == NULL) return;
325     NCD4_curlclose(curl->curl);
326     ncbytesfree(curl->packet);
327     nullfree(curl->errdata.code);
328     nullfree(curl->errdata.message);
329     free(curl);
330 }
331 
332 /* Define the set of protocols known to be constrainable */
333 static const char* constrainableprotocols[] = {"http", "https",NULL};
334 
335 static int
constrainable(NCURI * durl)336 constrainable(NCURI* durl)
337 {
338    const char** protocol = constrainableprotocols;
339    for(;*protocol;protocol++) {
340 	if(strcmp(durl->protocol,*protocol)==0)
341 	    return 1;
342    }
343    return 0;
344 }
345 
346 /*
347 Set curl properties for link based on rc files etc.
348 */
349 static int
set_curl_properties(NCD4INFO * d4info)350 set_curl_properties(NCD4INFO* d4info)
351 {
352     int ret = NC_NOERR;
353 
354     if(d4info->auth.curlflags.useragent == NULL) {
355 	char* agent;
356         size_t len = strlen(DFALTUSERAGENT) + strlen(VERSION);
357 	len++; /*strlcat nul*/
358 	agent = (char*)malloc(len+1);
359 	strncpy(agent,DFALTUSERAGENT,len);
360 	strlcat(agent,VERSION,len);
361         d4info->auth.curlflags.useragent = agent;
362     }
363 
364     /* Some servers (e.g. thredds and columbia) appear to require a place
365        to put cookies in order for some security functions to work
366     */
367     if(d4info->auth.curlflags.cookiejar != NULL
368        && strlen(d4info->auth.curlflags.cookiejar) == 0) {
369 	free(d4info->auth.curlflags.cookiejar);
370 	d4info->auth.curlflags.cookiejar = NULL;
371     }
372 
373     if(d4info->auth.curlflags.cookiejar == NULL) {
374 	/* If no cookie file was defined, define a default */
375         char* path = NULL;
376         char* newpath = NULL;
377         int len;
378 	errno = 0;
379 	/* Create the unique cookie file name */
380         len =
381 	  strlen(ncrc_globalstate.tempdir)
382 	  + 1 /* '/' */
383 	  + strlen("ncd4cookies");
384         path = (char*)malloc(len+1);
385         if(path == NULL) return NC_ENOMEM;
386 	snprintf(path,len,"%s/nc4cookies",ncrc_globalstate.tempdir);
387 	/* Create the unique cookie file name */
388         newpath = NC_mktmp(path);
389         free(path);
390 	if(newpath == NULL) {
391 	    fprintf(stderr,"Cannot create cookie file\n");
392 	    goto fail;
393 	}
394 	d4info->auth.curlflags.cookiejar = newpath;
395 	d4info->auth.curlflags.cookiejarcreated = 1;
396 	errno = 0;
397     }
398     assert(d4info->auth.curlflags.cookiejar != NULL);
399 
400     /* Make sure the cookie jar exists and can be read and written */
401     {
402 	FILE* f = NULL;
403 	char* fname = d4info->auth.curlflags.cookiejar;
404 	/* See if the file exists already */
405         f = fopen(fname,"r");
406 	if(f == NULL) {
407 	    /* Ok, create it */
408 	    f = fopen(fname,"w+");
409 	    if(f == NULL) {
410 	        fprintf(stderr,"Cookie file cannot be read and written: %s\n",fname);
411 	        {ret= NC_EPERM; goto fail;}
412 	    }
413 	} else { /* test if file can be written */
414 	    fclose(f);
415 	    f = fopen(fname,"r+");
416 	    if(f == NULL) {
417 	        fprintf(stderr,"Cookie file is cannot be written: %s\n",fname);
418 	        {ret = NC_EPERM; goto fail;}
419 	    }
420 	}
421 	if(f != NULL) fclose(f);
422     }
423 
424     return THROW(ret);
425 
426 fail:
427     return THROW(ret);
428 }
429 
430 void
NCD4_applyclientparamcontrols(NCD4INFO * info)431 NCD4_applyclientparamcontrols(NCD4INFO* info)
432 {
433     const char* value;
434 
435     /* clear the flags */
436     CLRFLAG(info->controls.flags,NCF_CACHE);
437     CLRFLAG(info->controls.flags,NCF_SHOWFETCH);
438     CLRFLAG(info->controls.flags,NCF_NC4);
439     CLRFLAG(info->controls.flags,NCF_NCDAP);
440     CLRFLAG(info->controls.flags,NCF_FILLMISMATCH);
441 
442     /* Turn on any default on flags */
443     SETFLAG(info->controls.flags,DFALT_ON_FLAGS);
444     SETFLAG(info->controls.flags,(NCF_NC4|NCF_NCDAP));
445 
446     if(paramcheck(info,"show","fetch"))
447 	SETFLAG(info->controls.flags,NCF_SHOWFETCH);
448 
449     if(paramcheck(info,"translate","nc4"))
450 	info->controls.translation = NCD4_TRANSNC4;
451 
452     /* Look at the debug flags */
453     if(paramcheck(info,"debug","copy"))
454 	SETFLAG(info->controls.debugflags,NCF_DEBUG_COPY); /* => close */
455 
456     value = getparam(info,"substratename");
457     if(value != NULL)
458 	strncpy(info->controls.substratename,value,NC_MAX_NAME);
459 
460     info->controls.opaquesize = DFALTOPAQUESIZE;
461     value = getparam(info,"opaquesize");
462     if(value != NULL) {
463 	long long len = 0;
464 	if(sscanf(value,"%lld",&len) != 1 || len == 0)
465 	    nclog(NCLOGWARN,"bad [opaquesize] tag: %s",value);
466 	else
467 	    info->controls.opaquesize = (size_t)len;
468     }
469 
470     value = getparam(info,"fillmismatch");
471     if(value != NULL)
472 	SETFLAG(info->controls.flags,NCF_FILLMISMATCH);
473 
474     value = getparam(info,"nofillmismatch");
475     if(value != NULL)
476 	CLRFLAG(info->controls.debugflags,NCF_FILLMISMATCH);
477 }
478 
479 static void
applyclientmetacontrols(NCD4meta * meta)480 applyclientmetacontrols(NCD4meta* meta)
481 {
482     NCD4INFO* info = meta->controller;
483     const char* value = getparam(info,"checksummode");
484     if(value != NULL) {
485         if(strcmp(value,"ignore")==0)
486 	    meta->ignorechecksums = 1;
487     }
488 }
489 
490 /* Search for substring in value of param. If substring == NULL; then just
491    check if param is defined.
492 */
493 static int
paramcheck(NCD4INFO * info,const char * key,const char * subkey)494 paramcheck(NCD4INFO* info, const char* key, const char* subkey)
495 {
496     const char* value;
497     char* p;
498 
499     value = getparam(info, key);
500     if(value == NULL)
501 	return 0;
502     if(subkey == NULL) return 1;
503     p = strstr(value,subkey);
504     if(p == NULL) return 0;
505     p += strlen(subkey);
506     if(*p != '\0' && strchr(checkseps,*p) == NULL) return 0;
507     return 1;
508 }
509 
510 /*
511 Given a parameter key, return its value or NULL if not defined.
512 */
513 static const char*
getparam(NCD4INFO * info,const char * key)514 getparam(NCD4INFO* info, const char* key)
515 {
516     const char* value;
517 
518     if(info == NULL || key == NULL) return NULL;
519     if((value=ncurilookup(info->uri,key)) == NULL)
520 	return NULL;
521     return value;
522 }
523 
524