1 // Copyright 2016 The Servo Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9 //! A crate to create static string caches at compiletime.
10 //!
11 //! # Examples
12 //!
13 //! With static atoms:
14 //!
15 //! In `Cargo.toml`:
16 //!
17 //! ```toml
18 //! [package]
19 //! build = "build.rs"
20 //!
21 //! [dependencies]
22 //! string_cache = "0.8"
23 //!
24 //! [build-dependencies]
25 //! string_cache_codegen = "0.5"
26 //! ```
27 //!
28 //! In `build.rs`:
29 //!
30 //! ```no_run
31 //! extern crate string_cache_codegen;
32 //!
33 //! use std::env;
34 //! use std::path::Path;
35 //!
36 //! fn main() {
37 //!     string_cache_codegen::AtomType::new("foo::FooAtom", "foo_atom!")
38 //!         .atoms(&["foo", "bar"])
39 //!         .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs"))
40 //!         .unwrap()
41 //! }
42 //! ```
43 //!
44 //! In `lib.rs`:
45 //!
46 //! ```ignore
47 //! extern crate string_cache;
48 //!
49 //! mod foo {
50 //!     include!(concat!(env!("OUT_DIR"), "/foo_atom.rs"));
51 //! }
52 //! ```
53 //!
54 //! The generated code will define a `FooAtom` type and a `foo_atom!` macro.
55 //! The macro can be used in expression or patterns, with strings listed in `build.rs`.
56 //! For example:
57 //!
58 //! ```ignore
59 //! fn compute_something(input: &foo::FooAtom) -> u32 {
60 //!     match *input {
61 //!         foo_atom!("foo") => 1,
62 //!         foo_atom!("bar") => 2,
63 //!         _ => 3,
64 //!     }
65 //! }
66 //! ```
67 //!
68 
69 #![recursion_limit = "128"]
70 
71 use quote::quote;
72 use std::collections::HashSet;
73 use std::fs::File;
74 use std::io::{self, BufWriter, Write};
75 use std::path::Path;
76 
77 /// A builder for a static atom set and relevant macros
78 pub struct AtomType {
79     path: String,
80     atom_doc: Option<String>,
81     static_set_doc: Option<String>,
82     macro_name: String,
83     macro_doc: Option<String>,
84     atoms: HashSet<String>,
85 }
86 
87 impl AtomType {
88     /// Constructs a new static atom set builder
89     ///
90     /// `path` is a path within a crate of the atom type that will be created.
91     /// e.g. `"FooAtom"` at the crate root or `"foo::Atom"` if the generated code
92     /// is included in a `foo` module.
93     ///
94     /// `macro_name` must end with `!`.
95     ///
96     /// For example, `AtomType::new("foo::FooAtom", "foo_atom!")` will generate:
97     ///
98     /// ```ignore
99     /// pub type FooAtom = ::string_cache::Atom<FooAtomStaticSet>;
100     /// pub struct FooAtomStaticSet;
101     /// impl ::string_cache::StaticAtomSet for FooAtomStaticSet {
102     ///     // ...
103     /// }
104     /// #[macro_export]
105     /// macro_rules foo_atom {
106     ///    // Expands to: $crate::foo::FooAtom { … }
107     /// }
108     /// ```
new(path: &str, macro_name: &str) -> Self109     pub fn new(path: &str, macro_name: &str) -> Self {
110         assert!(macro_name.ends_with("!"), "`macro_name` must end with '!'");
111         AtomType {
112             path: path.to_owned(),
113             macro_name: macro_name[..macro_name.len() - "!".len()].to_owned(),
114             atom_doc: None,
115             static_set_doc: None,
116             macro_doc: None,
117             atoms: HashSet::new(),
118         }
119     }
120 
121     /// Add some documentation to the generated Atom type alias.
122     ///
123     /// This can help the user know that the type uses interned strings.
124     ///
125     /// Note that `docs` should not contain the `///` at the front of normal docs.
with_atom_doc(&mut self, docs: &str) -> &mut Self126     pub fn with_atom_doc(&mut self, docs: &str) -> &mut Self {
127         self.atom_doc = Some(docs.to_owned());
128         self
129     }
130 
131     /// Add some documentation to the generated static set.
132     ///
133     /// This can help the user know that this type is zero-sized and just references a static
134     /// lookup table, or point them to the `Atom` type alias for more info.
135     ///
136     /// Note that `docs` should not contain the `///` at the front of normal docs.
with_static_set_doc(&mut self, docs: &str) -> &mut Self137     pub fn with_static_set_doc(&mut self, docs: &str) -> &mut Self {
138         self.static_set_doc = Some(docs.to_owned());
139         self
140     }
141 
142     /// Add some documentation to the generated macro.
143     ///
144     /// Note that `docs` should not contain the `///` at the front of normal docs.
with_macro_doc(&mut self, docs: &str) -> &mut Self145     pub fn with_macro_doc(&mut self, docs: &str) -> &mut Self {
146         self.macro_doc = Some(docs.to_owned());
147         self
148     }
149 
150     /// Adds an atom to the builder
atom(&mut self, s: &str) -> &mut Self151     pub fn atom(&mut self, s: &str) -> &mut Self {
152         self.atoms.insert(s.to_owned());
153         self
154     }
155 
156     /// Adds multiple atoms to the builder
atoms<I>(&mut self, iter: I) -> &mut Self where I: IntoIterator, I::Item: AsRef<str>,157     pub fn atoms<I>(&mut self, iter: I) -> &mut Self
158     where
159         I: IntoIterator,
160         I::Item: AsRef<str>,
161     {
162         self.atoms
163             .extend(iter.into_iter().map(|s| s.as_ref().to_owned()));
164         self
165     }
166 
167     /// Write generated code to `destination`.
write_to<W>(&mut self, mut destination: W) -> io::Result<()> where W: Write,168     pub fn write_to<W>(&mut self, mut destination: W) -> io::Result<()>
169     where
170         W: Write,
171     {
172         destination.write_all(
173             self.to_tokens()
174                 .to_string()
175                 // Insert some newlines to make the generated code slightly easier to read.
176                 .replace(" [ \"", "[\n\"")
177                 .replace("\" , ", "\",\n")
178                 .replace(" ( \"", "\n( \"")
179                 .replace("; ", ";\n")
180                 .as_bytes(),
181         )
182     }
183 
to_tokens(&mut self) -> proc_macro2::TokenStream184     fn to_tokens(&mut self) -> proc_macro2::TokenStream {
185         // `impl Default for Atom` requires the empty string to be in the static set.
186         // This also makes sure the set in non-empty,
187         // which would cause divisions by zero in rust-phf.
188         self.atoms.insert(String::new());
189 
190         let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect();
191         let hash_state = phf_generator::generate_hash(&atoms);
192         let phf_generator::HashState { key, disps, map } = hash_state;
193         let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip();
194         let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect();
195         let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32;
196         let indices = 0..atoms.len() as u32;
197 
198         let hashes: Vec<u32> = atoms
199             .iter()
200             .map(|string| {
201                 let hash = phf_shared::hash(string, &key);
202                 (hash.g ^ hash.f1) as u32
203             })
204             .collect();
205 
206         let mut path_parts = self.path.rsplitn(2, "::");
207         let type_name = path_parts.next().unwrap();
208         let module = match path_parts.next() {
209             Some(m) => format!("$crate::{}", m),
210             None => format!("$crate"),
211         };
212         let atom_doc = match self.atom_doc {
213             Some(ref doc) => quote!(#[doc = #doc]),
214             None => quote!(),
215         };
216         let static_set_doc = match self.static_set_doc {
217             Some(ref doc) => quote!(#[doc = #doc]),
218             None => quote!(),
219         };
220         let macro_doc = match self.macro_doc {
221             Some(ref doc) => quote!(#[doc = #doc]),
222             None => quote!(),
223         };
224         let new_term =
225             |string: &str| proc_macro2::Ident::new(string, proc_macro2::Span::call_site());
226         let static_set_name = new_term(&format!("{}StaticSet", type_name));
227         let type_name = new_term(type_name);
228         let macro_name = new_term(&*self.macro_name);
229         let module = module.parse::<proc_macro2::TokenStream>().unwrap();
230         let atom_prefix = format!("ATOM_{}_", type_name.to_string().to_uppercase());
231         let const_names: Vec<_> = atoms
232             .iter()
233             .map(|atom| {
234                 let mut name = atom_prefix.clone();
235                 for c in atom.chars() {
236                     name.push_str(&format!("_{:02X}", c as u32))
237                 }
238                 new_term(&name)
239             })
240             .collect();
241 
242         quote! {
243             #atom_doc
244             pub type #type_name = ::string_cache::Atom<#static_set_name>;
245 
246             #static_set_doc
247             #[derive(PartialEq, Eq, PartialOrd, Ord)]
248             pub struct #static_set_name;
249 
250             impl ::string_cache::StaticAtomSet for #static_set_name {
251                 fn get() -> &'static ::string_cache::PhfStrSet {
252                     static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet {
253                         key: #key,
254                         disps: &[#((#disps0, #disps1)),*],
255                         atoms: &[#(#atoms),*],
256                         hashes: &[#(#hashes),*]
257                     };
258                     &SET
259                 }
260                 fn empty_string_index() -> u32 {
261                     #empty_string_index
262                 }
263             }
264 
265             #(
266                 pub const #const_names: #type_name = #type_name::pack_static(#indices);
267             )*
268 
269             #macro_doc
270             #[macro_export]
271             macro_rules! #macro_name {
272                 #(
273                     (#atoms) => { #module::#const_names };
274                 )*
275             }
276         }
277     }
278 
279     /// Create a new file at `path` and write generated code there.
280     ///
281     /// Typical usage:
282     /// `.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs"))`
write_to_file(&mut self, path: &Path) -> io::Result<()>283     pub fn write_to_file(&mut self, path: &Path) -> io::Result<()> {
284         self.write_to(BufWriter::new(File::create(path)?))
285     }
286 }
287