1 /*
2  THIS COMMENT MAY NOT BE UP TO DATE. Sep 19 2011.
3 
4  The idea is as follows. We use the private field in the xmlDocPtr
5  object to store information about nodes that are out in the wild,
6  i.e. that have been returned to R across the .Call() interface.
7  Each time a node is returned, we increment the number of references
8  to that node by incrementing a table in the xmlDocPtr.
9  Each time these R objects are garbage collected, we decrement the
10  reference count.  When the number of references to that node go to 0,
11  we remove that entry from the table. When all the node entries
12  are removed and the document itself is no longer being pointed to,
13  we free the document.
14 
15  What about circularity? Does it occur?
16  What happens when we reparent a node?
17 
18  What happens when we put a node into an R object
19   e.g.  x = node[[2]]
20         y[[3]] = x
21   Will R's garbage collection handle this for us?
22 
23 */
24 
25 
26 /*
27   This now contains the code related to our memory management.
28  */
29 
30 #include "Utils.h"
31 #include "NodeGC.h"
32 
33 
34 
R_xmlFreeDoc(SEXP ref)35 void R_xmlFreeDoc(SEXP ref)
36 {
37   xmlDocPtr doc;
38   doc = (xmlDocPtr) R_ExternalPtrAddr(ref);
39 
40   if(doc && !IS_NOT_OUR_DOC_TO_TOUCH(doc)) {
41       int *val;
42       val = doc->_private;
43       if(val) {
44 	  (*val)--;
45 	  if(*val) {
46 #ifdef R_XML_DEBUG
47 	      fprintf(stderr, "Not freeing XML document %p (%s); still has %d references in the wild\n", doc, doc->URL ? doc->URL : "?", *val);
48 #endif
49 	      R_ClearExternalPtr(ref);
50 	      return;
51 	  }
52       }
53 
54 #ifdef R_XML_DEBUG
55       const xmlChar *url = doc->URL ? doc->URL : (doc->name ? doc->name : (const xmlChar *)"?? (internally created)");
56       fprintf(stderr, "Cleaning up document %p, %s, has children %d\n", (void *) doc, url, (int) (doc->children != NULL));
57 #endif
58       if(val) {
59 	  free(val);
60 	  doc->_private = NULL;
61 
62 #ifdef R_XML_DEBUG
63 	  fprintf(stderr, "Freeing the XML doc %p\n", doc);
64 #endif
65           xmlFreeDoc(doc);
66           R_numXMLDocsFreed++;
67      } /* was before the xmlFreeDoc so that that was unconditional.*/
68   }
69   R_ClearExternalPtr(ref);
70 }
71 
72 
73 SEXP
RS_XML_freeDoc(SEXP ref)74 RS_XML_freeDoc(SEXP ref)
75 {
76     R_xmlFreeDoc(ref);
77     return(R_NilValue);
78 }
79 
80 
81 SEXP
RS_XML_forceFreeDoc(SEXP ref)82 RS_XML_forceFreeDoc(SEXP ref)
83 {
84     xmlDocPtr doc;
85     doc = (xmlDocPtr) R_ExternalPtrAddr(ref);
86     xmlFreeDoc(doc);
87     return(R_NilValue);
88 }
89 
90 
91 
92 
93 /* This is a finalizer that removes the nodes and disassociates the
94    node and the document and then frees the document structure.
95 
96    Does xmlFreeDoc() deal with the URL and name fields in the doc?
97 
98 XXX With the nodes and document under garbage collection, do we really
99 need this?
100 */
R_xmlFreeDocLeaveChildren(SEXP ref)101 void R_xmlFreeDocLeaveChildren(SEXP ref)
102 {
103  xmlDocPtr doc;
104  doc = (xmlDocPtr) R_ExternalPtrAddr(ref);
105 
106   if(doc) {
107       xmlNodePtr tmp;
108 #ifdef R_XML_DEBUG
109       const xmlChar *url = doc->URL ? doc->URL : (doc->name ? doc->name : (const xmlChar *) "?? (internally created)");
110       fprintf(stderr, "Cleaning up document but not children: %p, %s\n", (void *) doc, url);
111 #endif
112       tmp = doc->children;
113       xmlUnlinkNode(doc->children);
114       tmp->doc = NULL;
115       xmlFreeDoc(doc);
116       R_numXMLDocsFreed++;
117   }
118   R_ClearExternalPtr(ref);
119 }
120 
121 
122 
123 int R_XML_MemoryMgrMarker = 1010101011;
124 int R_XML_NoMemoryMgmt = 111111111;
125 
126 /*
127   This returns a value that indicates whether we should
128   add a finalizer and put the XML node under a C finalizer
129   to reduce the reference count.
130   user is an R object that should be an integer vector of length
131   1 and should be 0, 1 or NA  (effectively a logical)
132   If it is NA, we consult the document object in which  the node
133   is located (or NULL if not part of a document). This document
134   object can have a value in the _private field that tells us
135   no to
136  */
137 int
R_XML_getManageMemory(SEXP user,xmlDocPtr doc,xmlNodePtr node)138 R_XML_getManageMemory(SEXP user, xmlDocPtr doc, xmlNodePtr node)
139 {
140 
141     int manage;
142 
143     if(TYPEOF(user) == STRSXP || TYPEOF(user) == EXTPTRSXP)
144 	return(0);
145 
146     manage = INTEGER(user)[0]; // TYPEOF(user) == INTSXP ? INTEGER(user)[0] : INTEGER(asInteger(user))[0];
147     if(manage == R_NaInt) {
148         if(!doc)
149 	  manage = 1;
150         else
151 	  manage = doc->_private != &R_XML_NoMemoryMgmt;
152     }
153 #ifdef R_XML_DEBUG
154     if(manage)
155 	fprintf(stderr, "getManageMemory (%p) %d  (type = %d, name = %s)\n", doc, manage, node->type, node->name);fflush(stderr);
156 #endif
157     return(manage);
158 }
159 
160 SEXP
R_xmlSetNoMemoryMgmt(SEXP r_doc)161 R_xmlSetNoMemoryMgmt(SEXP r_doc)
162 {
163     xmlDocPtr doc;
164     doc = (xmlDocPtr) R_ExternalPtrAddr(r_doc);
165     doc->_private = &R_XML_NoMemoryMgmt;
166     return(NULL_USER_OBJECT);
167 }
168 
169 void
initDocRefCounter(xmlDocPtr doc)170 initDocRefCounter(xmlDocPtr doc)
171 {
172     int *val;
173     if(doc->_private)
174 	return;
175 
176     doc->_private = calloc(2, sizeof(int));
177     val = (int *) doc->_private;
178     val[1] = R_MEMORY_MANAGER_MARKER;
179 }
180 
181 void
incrementDocRefBy(xmlDocPtr doc,int num)182 incrementDocRefBy(xmlDocPtr doc, int num)
183 {
184     int *val;
185     if(!doc || IS_NOT_OUR_DOC_TO_TOUCH(doc))
186 	return;
187 
188     if(!doc->_private) {
189 	initDocRefCounter(doc);
190     }
191 
192     val = (int *) doc->_private;
193 
194     (*val) += num;
195 }
196 
197 void
incrementDocRef(xmlDocPtr doc)198 incrementDocRef(xmlDocPtr doc)
199 {
200   incrementDocRefBy(doc, 1);
201 }
202 
203 #define GET_NODE_COUNT(n) \
204    n->_private ? *((int*) (n)->_private) : 0
205 
206 
207 
getNodeCount(xmlNodePtr node)208 int getNodeCount(xmlNodePtr node)
209 {
210   int val = 0;
211   xmlNodePtr p = node->children;
212 
213   if(!node || IS_NOT_OUR_NODE_TO_TOUCH(node))
214     return(0);
215 
216   val = GET_NODE_COUNT(node);
217   while(p) {
218     val += getNodeCount(p);
219     p = p->next;
220   }
221   return(val);
222 }
223 
224 
225 void
internal_incrementNodeRefCount(xmlNodePtr node)226 internal_incrementNodeRefCount(xmlNodePtr node)
227 {
228     int *val;
229     if(!node || IS_NOT_OUR_NODE_TO_TOUCH(node) || !node->_private)
230 	return;
231     val = (int *) node->_private;
232     (*val)++;
233 }
234 
235 
236 SEXP
R_getXMLRefCount(SEXP rnode)237 R_getXMLRefCount(SEXP rnode)
238 {
239     xmlNodePtr node = (xmlNodePtr) R_ExternalPtrAddr(rnode);
240     if(!node || IS_NOT_OUR_NODE_TO_TOUCH(node) || !node->_private)
241 	return(ScalarInteger(-1));
242     return(ScalarInteger(*((int *) node->_private)));
243 }
244 
245 int
checkDescendantsInR(xmlNodePtr node,int process)246 checkDescendantsInR(xmlNodePtr node, int process)
247 {
248     xmlNodePtr p;
249     if(!node && (process || IS_NOT_OUR_NODE_TO_TOUCH(node)))
250 	return(0);
251 
252     if(node->_private)
253 	return(1);
254 
255     p = node->children;
256     while(p) {
257 	if(checkDescendantsInR(p, 0))
258 	    return(1);
259 	p = p->next;
260     }
261     return(0);
262 }
263 
264 int
internal_decrementNodeRefCount(xmlNodePtr node)265 internal_decrementNodeRefCount(xmlNodePtr node)
266 {
267     int *val, status = 0;
268        /* */
269     if(!node || IS_NOT_OUR_NODE_TO_TOUCH(node))
270                                  /* if node->_private == NULL, should
271 				  * we free this node?, i.e. if it is
272 				  * not in a parent or a document.
273                                   No! Basically we shouldn't get here
274                                   if we have not set the _private. We
275                                   set the finalizer having set the _private */
276 	return(status);
277 
278     if(!node->_private)
279 	return(status);
280 
281 
282     /*  Get the value of the reference count and decrement it by 1.
283         If we are now at 0, then we can potentially free this node.
284         Certainly, if we are at 0, we should remove the reference
285         count memory altogether.
286         Now that _we_ no longer need the node, perhaps we can free it.
287         But we have to make certain that we don't free it if
288          a) it is a child of another node or
289          b) if it is within a document and that document is still "in  play".
290               To determine if the document is "in play" we look at it's
291               reference count.
292               We decrement it by one since we added one to it for this
293               node.
294               If that makes the document's reference count 0, then we
295               free it.
296 
297      */
298     val = (int *) node->_private;
299     (*val)--;
300 #ifdef R_XML_DEBUG
301     fprintf(stderr, "decremented node (%s, %d) to %d (%p)   %s\n", node->name, node->type, *val, node, *val == 0 ? "***" : "");fflush(stderr);
302 #endif
303     if(*val == 0) {
304 
305 	free(node->_private);
306         node->_private = NULL;
307 	if(node->doc && !IS_NOT_OUR_DOC_TO_TOUCH(node->doc)) {
308 	    val = (int *) node->doc->_private;
309 	    if(val) (*val)--;
310 	    if(!val || *val == 0) {
311 		/* Consolidate with R_xmlFreeDoc */
312 #ifdef R_XML_DEBUG
313 		fprintf(stderr, "releasing document (for node) %p %s (%s)\n", node->doc, node->doc->URL ? node->doc->URL : "?", val ? "has zero count" : "no count");fflush(stderr);
314 #endif
315 		if(val)
316                     free(node->doc->_private);
317 		node->doc->_private = NULL;
318 		xmlFreeDoc(node->doc);
319                 status = 1;
320 		R_numXMLDocsFreed++;
321 	    }
322 	} else if(!node->parent) {
323 	    /* If the node is not in a tree by having a parent, then
324 	     * check the children and if they aren't being referenced
325                by an R variable, we can free those too. */
326             int hold;
327 	    hold = checkDescendantsInR(node, 1);
328             if(!hold) {
329 #ifdef R_XML_DEBUG
330 		fprintf(stderr, "Calling xmlFreeNode() for %p (type = %d)\n", node, node->type);fflush(stderr);
331 #endif
332    	       xmlFreeNode(node);
333 	       status = 1;
334 	    }
335 	} else {
336             /* So we have a parent.  But what if that parent is not
337                being held as an R variable. We need to free the node.
338                We need to make this smarter to see what parts of the
339                tree we can remove.  For instance, we might be holding
340                onto this one, but not the parent, but that parent has
341                a second child which is being held onto.
342 
343                So we go to the top of the node tree and check for its descendants
344             */
345 	    int hold;
346             xmlNodePtr p = node->parent;
347 	    while(p->parent)
348                 p = p->parent;
349 
350 	    hold = checkDescendantsInR(p, 0);
351 	    if(!hold) {
352 #ifdef R_XML_DEBUG
353 		fprintf(stderr, "Calling xmlFree() for %p\n", node);fflush(stderr);
354 #endif
355 		xmlFree(p); //XXX xmlFree() or xmlFreeNode() ?
356 		status = 1;
357 	    }
358 
359 	}
360     }
361 
362     return(status);
363 }
364 
365 
366 
367 void
decrementNodeRefCount(SEXP rnode)368 decrementNodeRefCount(SEXP rnode)
369 {
370     xmlNodePtr node = (xmlNodePtr) R_ExternalPtrAddr(rnode);
371     int status;
372     status = internal_decrementNodeRefCount(node);
373     if(status)
374 	R_ClearExternalPtr(rnode);
375 }
376 
377 
378 int
clearNodeMemoryManagement(xmlNodePtr node)379 clearNodeMemoryManagement(xmlNodePtr node)
380 {
381     xmlNodePtr tmp;
382     int ctr = 0;
383     if(node->_private) {
384 	int a, b;
385 	// This compares and int and a pointer.  Changed to be like NodeGC.h
386 	// int isOurs = (a = node->_private != &R_XML_NoMemoryMgmt) && (b = ((int *)(node->_private))[1] == (int *) &R_XML_MemoryMgrMarker);
387 	int isOurs = (a = node->_private != &R_XML_NoMemoryMgmt) && (b = ((int *)(node->_private))[1] == R_MEMORY_MANAGER_MARKER);
388 	if(isOurs) {
389 #if R_XML_DEBUG
390  fprintf(stderr, "Removing memory management from %p, %s\n", node, node->name);fflush(stderr);
391 #endif
392    	   free(node->_private);
393    	   ctr++;
394 	}
395 	node->_private = NULL;
396     }
397 
398     tmp = node->children;
399     while(tmp) {
400         if(tmp)
401    	   ctr += clearNodeMemoryManagement(tmp);
402 	tmp = tmp->next;
403     }
404     return(ctr);
405 }
406 
407 SEXP
R_clearNodeMemoryManagement(SEXP r_node)408 R_clearNodeMemoryManagement(SEXP r_node)
409 {
410    xmlNodePtr node = (xmlNodePtr) R_ExternalPtrAddr(r_node);
411    int val;
412 
413    if(!node)
414        return(ScalarInteger(-1));
415 
416    val = clearNodeMemoryManagement(node);
417 
418    return(ScalarInteger(val));
419 }
420 
421 
422 
423 SEXP
R_xmlRefCountEnabled()424 R_xmlRefCountEnabled()
425 {
426   int ans =
427 #ifdef XML_REF_COUNT_NODES
428       1;
429 #else
430       0;
431 #endif
432       return(ScalarLogical(ans));
433 }
434