1 // Copyright 2010 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Author: jdtang@google.com (Jonathan Tang)
16 
17 #include "string_buffer.h"
18 
19 #include <assert.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <strings.h>
23 
24 #include "string_piece.h"
25 #include "util.h"
26 
27 struct GumboInternalParser;
28 
29 // Size chosen via statistical analysis of ~60K websites.
30 // 99% of text nodes and 98% of attribute names/values fit in this initial size.
31 static const size_t kDefaultStringBufferSize = 5;
32 
maybe_resize_string_buffer(struct GumboInternalParser * parser,size_t additional_chars,GumboStringBuffer * buffer)33 static void maybe_resize_string_buffer(struct GumboInternalParser* parser,
34     size_t additional_chars, GumboStringBuffer* buffer) {
35   size_t new_length = buffer->length + additional_chars;
36   size_t new_capacity = buffer->capacity;
37   while (new_capacity < new_length) {
38     new_capacity *= 2;
39   }
40   if (new_capacity != buffer->capacity) {
41     char* new_data = gumbo_parser_allocate(parser, new_capacity);
42     memcpy(new_data, buffer->data, buffer->length);
43     gumbo_parser_deallocate(parser, buffer->data);
44     buffer->data = new_data;
45     buffer->capacity = new_capacity;
46   }
47 }
48 
gumbo_string_buffer_init(struct GumboInternalParser * parser,GumboStringBuffer * output)49 void gumbo_string_buffer_init(
50     struct GumboInternalParser* parser, GumboStringBuffer* output) {
51   output->data = gumbo_parser_allocate(parser, kDefaultStringBufferSize);
52   output->length = 0;
53   output->capacity = kDefaultStringBufferSize;
54 }
55 
gumbo_string_buffer_reserve(struct GumboInternalParser * parser,size_t min_capacity,GumboStringBuffer * output)56 void gumbo_string_buffer_reserve(struct GumboInternalParser* parser,
57     size_t min_capacity, GumboStringBuffer* output) {
58   maybe_resize_string_buffer(parser, min_capacity - output->length, output);
59 }
60 
gumbo_string_buffer_append_codepoint(struct GumboInternalParser * parser,int c,GumboStringBuffer * output)61 void gumbo_string_buffer_append_codepoint(
62     struct GumboInternalParser* parser, int c, GumboStringBuffer* output) {
63   // num_bytes is actually the number of continuation bytes, 1 less than the
64   // total number of bytes.  This is done to keep the loop below simple and
65   // should probably change if we unroll it.
66   int num_bytes, prefix;
67   if (c <= 0x7f) {
68     num_bytes = 0;
69     prefix = 0;
70   } else if (c <= 0x7ff) {
71     num_bytes = 1;
72     prefix = 0xc0;
73   } else if (c <= 0xffff) {
74     num_bytes = 2;
75     prefix = 0xe0;
76   } else {
77     num_bytes = 3;
78     prefix = 0xf0;
79   }
80   maybe_resize_string_buffer(parser, num_bytes + 1, output);
81   output->data[output->length++] = prefix | (c >> (num_bytes * 6));
82   for (int i = num_bytes - 1; i >= 0; --i) {
83     output->data[output->length++] = 0x80 | (0x3f & (c >> (i * 6)));
84   }
85 }
86 
gumbo_string_buffer_append_string(struct GumboInternalParser * parser,GumboStringPiece * str,GumboStringBuffer * output)87 void gumbo_string_buffer_append_string(struct GumboInternalParser* parser,
88     GumboStringPiece* str, GumboStringBuffer* output) {
89   maybe_resize_string_buffer(parser, str->length, output);
90   memcpy(output->data + output->length, str->data, str->length);
91   output->length += str->length;
92 }
93 
gumbo_string_buffer_to_string(struct GumboInternalParser * parser,GumboStringBuffer * input)94 char* gumbo_string_buffer_to_string(
95     struct GumboInternalParser* parser, GumboStringBuffer* input) {
96   char* buffer = gumbo_parser_allocate(parser, input->length + 1);
97   memcpy(buffer, input->data, input->length);
98   buffer[input->length] = '\0';
99   return buffer;
100 }
101 
gumbo_string_buffer_clear(struct GumboInternalParser * parser,GumboStringBuffer * input)102 void gumbo_string_buffer_clear(
103     struct GumboInternalParser* parser, GumboStringBuffer* input) {
104   input->length = 0;
105 }
106 
gumbo_string_buffer_destroy(struct GumboInternalParser * parser,GumboStringBuffer * buffer)107 void gumbo_string_buffer_destroy(
108     struct GumboInternalParser* parser, GumboStringBuffer* buffer) {
109   gumbo_parser_deallocate(parser, buffer->data);
110 }
111