1 /* grefstring.c: Reference counted strings
2  *
3  * Copyright 2018  Emmanuele Bassi
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /**
20  * SECTION:refstring
21  * @Title: Reference counted strings
22  * @Short_description: Strings with reference counted memory management
23  *
24  * Reference counted strings are normal C strings that have been augmented
25  * with a reference counter to manage their resources. You allocate a new
26  * reference counted string and acquire and release references as needed,
27  * instead of copying the string among callers; when the last reference on
28  * the string is released, the resources allocated for it are freed.
29  *
30  * Typically, reference counted strings can be used when parsing data from
31  * files and storing them into data structures that are passed to various
32  * callers:
33  *
34  * |[<!-- language="C" -->
35  * PersonDetails *
36  * person_details_from_data (const char *data)
37  * {
38  *   // Use g_autoptr() to simplify error cases
39  *   g_autoptr(GRefString) full_name = NULL;
40  *   g_autoptr(GRefString) address =  NULL;
41  *   g_autoptr(GRefString) city = NULL;
42  *   g_autoptr(GRefString) state = NULL;
43  *   g_autoptr(GRefString) zip_code = NULL;
44  *
45  *   // parse_person_details() is defined elsewhere; returns refcounted strings
46  *   if (!parse_person_details (data, &full_name, &address, &city, &state, &zip_code))
47  *     return NULL;
48  *
49  *   if (!validate_zip_code (zip_code))
50  *     return NULL;
51  *
52  *   // add_address_to_cache() and add_full_name_to_cache() are defined
53  *   // elsewhere; they add strings to various caches, using refcounted
54  *   // strings to avoid copying data over and over again
55  *   add_address_to_cache (address, city, state, zip_code);
56  *   add_full_name_to_cache (full_name);
57  *
58  *   // person_details_new() is defined elsewhere; it takes a reference
59  *   // on each string
60  *   PersonDetails *res = person_details_new (full_name,
61  *                                            address,
62  *                                            city,
63  *                                            state,
64  *                                            zip_code);
65  *
66  *   return res;
67  * }
68  * ]|
69  *
70  * In the example above, we have multiple functions taking the same strings
71  * for different uses; with typical C strings, we'd have to copy the strings
72  * every time the life time rules of the data differ from the life time of
73  * the string parsed from the original buffer. With reference counted strings,
74  * each caller can take a reference on the data, and keep it as long as it
75  * needs to own the string.
76  *
77  * Reference counted strings can also be "interned" inside a global table
78  * owned by GLib; while an interned string has at least a reference, creating
79  * a new interned reference counted string with the same contents will return
80  * a reference to the existing string instead of creating a new reference
81  * counted string instance. Once the string loses its last reference, it will
82  * be automatically removed from the global interned strings table.
83  *
84  * Since: 2.58
85  */
86 
87 #include "config.h"
88 
89 #include "grefstring.h"
90 
91 #include "ghash.h"
92 #include "gmessages.h"
93 #include "grcbox.h"
94 #include "gthread.h"
95 
96 #include <string.h>
97 
98 /* A global table of refcounted strings; the hash table does not own
99  * the strings, just a pointer to them. Strings are interned as long
100  * as they are alive; once their reference count drops to zero, they
101  * are removed from the table
102  */
103 G_LOCK_DEFINE_STATIC (interned_ref_strings);
104 static GHashTable *interned_ref_strings;
105 
106 /**
107  * g_ref_string_new:
108  * @str: (not nullable): a NUL-terminated string
109  *
110  * Creates a new reference counted string and copies the contents of @str
111  * into it.
112  *
113  * Returns: (transfer full) (not nullable): the newly created reference counted string
114  *
115  * Since: 2.58
116  */
117 char *
g_ref_string_new(const char * str)118 g_ref_string_new (const char *str)
119 {
120   char *res;
121   gsize len;
122 
123   g_return_val_if_fail (str != NULL, NULL);
124 
125   len = strlen (str);
126 
127   res = (char *) g_atomic_rc_box_dup (sizeof (char) * len + 1, str);
128 
129   return res;
130 }
131 
132 /**
133  * g_ref_string_new_len:
134  * @str: (not nullable): a string
135  * @len: length of @str to use, or -1 if @str is nul-terminated
136  *
137  * Creates a new reference counted string and copies the contents of @str
138  * into it, up to @len bytes.
139  *
140  * Since this function does not stop at nul bytes, it is the caller's
141  * responsibility to ensure that @str has at least @len addressable bytes.
142  *
143  * Returns: (transfer full) (not nullable): the newly created reference counted string
144  *
145  * Since: 2.58
146  */
147 char *
g_ref_string_new_len(const char * str,gssize len)148 g_ref_string_new_len (const char *str, gssize len)
149 {
150   char *res;
151 
152   g_return_val_if_fail (str != NULL, NULL);
153 
154   if (len < 0)
155     return g_ref_string_new (str);
156 
157   /* allocate then copy as str[len] may not be readable */
158   res = (char *) g_atomic_rc_box_alloc ((gsize) len + 1);
159   memcpy (res, str, len);
160   res[len] = '\0';
161 
162   return res;
163 }
164 
165 /* interned_str_equal: variant of g_str_equal() that compares
166  * pointers as well as contents; this avoids running strcmp()
167  * on arbitrarily long strings, as it's more likely to have
168  * g_ref_string_new_intern() being called on the same refcounted
169  * string instance, than on a different string with the same
170  * contents
171  */
172 static gboolean
interned_str_equal(gconstpointer v1,gconstpointer v2)173 interned_str_equal (gconstpointer v1,
174                     gconstpointer v2)
175 {
176   const char *str1 = v1;
177   const char *str2 = v2;
178 
179   if (v1 == v2)
180     return TRUE;
181 
182   return strcmp (str1, str2) == 0;
183 }
184 
185 /**
186  * g_ref_string_new_intern:
187  * @str: (not nullable): a NUL-terminated string
188  *
189  * Creates a new reference counted string and copies the content of @str
190  * into it.
191  *
192  * If you call this function multiple times with the same @str, or with
193  * the same contents of @str, it will return a new reference, instead of
194  * creating a new string.
195  *
196  * Returns: (transfer full) (not nullable): the newly created reference
197  *   counted string, or a new reference to an existing string
198  *
199  * Since: 2.58
200  */
201 char *
g_ref_string_new_intern(const char * str)202 g_ref_string_new_intern (const char *str)
203 {
204   char *res;
205 
206   g_return_val_if_fail (str != NULL, NULL);
207 
208   G_LOCK (interned_ref_strings);
209 
210   if (G_UNLIKELY (interned_ref_strings == NULL))
211     interned_ref_strings = g_hash_table_new (g_str_hash, interned_str_equal);
212 
213   res = g_hash_table_lookup (interned_ref_strings, str);
214   if (res != NULL)
215     {
216       /* We acquire the reference while holding the lock, to
217        * avoid a potential race between releasing the lock on
218        * the hash table and another thread releasing the reference
219        * on the same string
220        */
221       g_atomic_rc_box_acquire (res);
222       G_UNLOCK (interned_ref_strings);
223       return res;
224     }
225 
226   res = g_ref_string_new (str);
227   g_hash_table_add (interned_ref_strings, res);
228   G_UNLOCK (interned_ref_strings);
229 
230   return res;
231 }
232 
233 /**
234  * g_ref_string_acquire:
235  * @str: a reference counted string
236  *
237  * Acquires a reference on a string.
238  *
239  * Returns: the given string, with its reference count increased
240  *
241  * Since: 2.58
242  */
243 char *
g_ref_string_acquire(char * str)244 g_ref_string_acquire (char *str)
245 {
246   g_return_val_if_fail (str != NULL, NULL);
247 
248   return g_atomic_rc_box_acquire (str);
249 }
250 
251 static void
remove_if_interned(gpointer data)252 remove_if_interned (gpointer data)
253 {
254   char *str = data;
255 
256   G_LOCK (interned_ref_strings);
257 
258   if (G_LIKELY (interned_ref_strings != NULL))
259     {
260       g_hash_table_remove (interned_ref_strings, str);
261 
262       if (g_hash_table_size (interned_ref_strings) == 0)
263         g_clear_pointer (&interned_ref_strings, g_hash_table_destroy);
264     }
265 
266   G_UNLOCK (interned_ref_strings);
267 }
268 
269 /**
270  * g_ref_string_release:
271  * @str: a reference counted string
272  *
273  * Releases a reference on a string; if it was the last reference, the
274  * resources allocated by the string are freed as well.
275  *
276  * Since: 2.58
277  */
278 void
g_ref_string_release(char * str)279 g_ref_string_release (char *str)
280 {
281   g_return_if_fail (str != NULL);
282 
283   g_atomic_rc_box_release_full (str, remove_if_interned);
284 }
285 
286 /**
287  * g_ref_string_length:
288  * @str: a reference counted string
289  *
290  * Retrieves the length of @str.
291  *
292  * Returns: the length of the given string, in bytes
293  *
294  * Since: 2.58
295  */
296 gsize
g_ref_string_length(char * str)297 g_ref_string_length (char *str)
298 {
299   g_return_val_if_fail (str != NULL, 0);
300 
301   return g_atomic_rc_box_get_size (str) - 1;
302 }
303