1 /*!
2  * ws: a node.js websocket client
3  * Copyright(c) 2011 Einar Otto Stangvik <einaros@gmail.com>
4  * MIT Licensed
5  */
6 
7 #include <v8.h>
8 #include <node.h>
9 #include <node_buffer.h>
10 #include <node_object_wrap.h>
11 #include <stdlib.h>
12 #include <wchar.h>
13 #include <stdio.h>
14 #include "nan.h"
15 
16 using namespace v8;
17 using namespace node;
18 
19 #define UNI_SUR_HIGH_START   (uint32_t) 0xD800
20 #define UNI_SUR_LOW_END    (uint32_t) 0xDFFF
21 #define UNI_REPLACEMENT_CHAR (uint32_t) 0x0000FFFD
22 #define UNI_MAX_LEGAL_UTF32  (uint32_t) 0x0010FFFF
23 
24 static const uint8_t trailingBytesForUTF8[256] = {
25   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
26   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
27   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
28   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
29   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
30   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
31   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
32   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
33 };
34 
35 static const uint32_t offsetsFromUTF8[6] = {
36   0x00000000, 0x00003080, 0x000E2080,
37   0x03C82080, 0xFA082080, 0x82082080
38 };
39 
isLegalUTF8(const uint8_t * source,const int length)40 static int isLegalUTF8(const uint8_t *source, const int length)
41 {
42   uint8_t a;
43   const uint8_t *srcptr = source+length;
44   switch (length) {
45   default: return 0;
46   /* Everything else falls through when "true"... */
47   /* RFC3629 makes 5 & 6 bytes UTF-8 illegal
48   case 6: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
49   case 5: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; */
50   case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
51   case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
52   case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
53     switch (*source) {
54       /* no fall-through in this inner switch */
55       case 0xE0: if (a < 0xA0) return 0; break;
56       case 0xED: if (a > 0x9F) return 0; break;
57       case 0xF0: if (a < 0x90) return 0; break;
58       case 0xF4: if (a > 0x8F) return 0; break;
59       default:   if (a < 0x80) return 0;
60     }
61 
62   case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
63   }
64   if (*source > 0xF4) return 0;
65   return 1;
66 }
67 
is_valid_utf8(size_t len,char * value)68 int is_valid_utf8 (size_t len, char *value)
69 {
70   /* is the string valid UTF-8? */
71   for (unsigned int i = 0; i < len; i++) {
72     uint32_t ch = 0;
73     uint8_t  extrabytes = trailingBytesForUTF8[(uint8_t) value[i]];
74 
75     if (extrabytes + i >= len)
76       return 0;
77 
78     if (isLegalUTF8 ((uint8_t *) (value + i), extrabytes + 1) == 0) return 0;
79 
80     switch (extrabytes) {
81       case 5 : ch += (uint8_t) value[i++]; ch <<= 6;
82       case 4 : ch += (uint8_t) value[i++]; ch <<= 6;
83       case 3 : ch += (uint8_t) value[i++]; ch <<= 6;
84       case 2 : ch += (uint8_t) value[i++]; ch <<= 6;
85       case 1 : ch += (uint8_t) value[i++]; ch <<= 6;
86       case 0 : ch += (uint8_t) value[i];
87     }
88 
89     ch -= offsetsFromUTF8[extrabytes];
90 
91     if (ch <= UNI_MAX_LEGAL_UTF32) {
92       if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
93         return 0;
94     } else {
95       return 0;
96     }
97   }
98 
99   return 1;
100 }
101 
102 class Validation : public ObjectWrap
103 {
104 public:
105 
Initialize(v8::Handle<v8::Object> target)106   static void Initialize(v8::Handle<v8::Object> target)
107   {
108     NanScope();
109     Local<FunctionTemplate> t = NanNew<FunctionTemplate>(New);
110     t->InstanceTemplate()->SetInternalFieldCount(1);
111     NODE_SET_METHOD(t, "isValidUTF8", Validation::IsValidUTF8);
112     target->Set(NanSymbol("Validation"), t->GetFunction());
113   }
114 
115 protected:
116 
NAN_METHOD(New)117   static NAN_METHOD(New)
118   {
119     NanScope();
120     Validation* validation = new Validation();
121     validation->Wrap(args.This());
122     NanReturnValue(args.This());
123   }
124 
NAN_METHOD(IsValidUTF8)125   static NAN_METHOD(IsValidUTF8)
126   {
127     NanScope();
128     if (!Buffer::HasInstance(args[0])) {
129       return NanThrowTypeError("First argument needs to be a buffer");
130     }
131     Local<Object> buffer_obj = args[0]->ToObject();
132     char *buffer_data = Buffer::Data(buffer_obj);
133     size_t buffer_length = Buffer::Length(buffer_obj);
134     NanReturnValue(is_valid_utf8(buffer_length, buffer_data) == 1 ? NanTrue() : NanFalse());
135   }
136 };
137 
init(Handle<Object> target)138 extern "C" void init (Handle<Object> target)
139 {
140   NanScope();
141   Validation::Initialize(target);
142 }
143 
144 NODE_MODULE(validation, init)
145 
146