1 //  ascii_check implementation  ------------------------------------------------//
2 
3 //  Copyright Marshall Clow 2007.
4 //  Based on the tab-check checker by Beman Dawes
5 //
6 //  Distributed under the Boost Software License, Version 1.0.
7 //  (See accompanying file LICENSE_1_0.txt or copy at
8 //  http://www.boost.org/LICENSE_1_0.txt)
9 //  √ -- this is a test.
10 
11 #include "ascii_check.hpp"
12 #include <functional>
13 
14 namespace boost
15 {
16   namespace inspect
17   {
18 
19     static const string gPunct ( "$_{}[]#()<>%:;.?*+-/ˆ&|~!=,\\\"'@^`" );
20 
21    // Legal characters for a source file are defined in section 2.2 of the standard
22    // I have added '@', '^', and '`' to the "legal" chars because they are commonly
23    //    used in comments, and they are strictly ASCII.
24    struct non_ascii : public std::unary_function<char, bool> {
25    public:
non_asciiboost::inspect::non_ascii26       non_ascii () {}
~non_asciiboost::inspect::non_ascii27       ~non_ascii () {}
operator ()boost::inspect::non_ascii28       bool operator () ( char c ) const
29       {
30          if ( c == ' ' ) return false;
31          if ( c >= 'a' && c <= 'z' ) return false;
32          if ( c >= 'A' && c <= 'Z' ) return false;
33          if ( c >= '0' && c <= '9' ) return false;
34       // Horizontal/Vertical tab, newline, and form feed
35          if ( c == '\t' || c == '\n' || c == '\r' || c == '\v' || c == '\f' ) return false;
36          return gPunct.find ( c ) == string::npos;
37       }
38    };
39 
40    struct is_CRLF : public std::unary_function<char, bool> {
41    public:
is_CRLFboost::inspect::is_CRLF42       is_CRLF () {}
~is_CRLFboost::inspect::is_CRLF43       ~is_CRLF () {}
operator ()boost::inspect::is_CRLF44       bool operator () ( char c ) const
45       {
46          return c == '\015' || c == '\012';
47       }
48    };
49 
50    const char *kCRLF = "\012\015";
51 
52 // Given a position in the file, extract and return the line
find_line(const std::string & contents,std::string::const_iterator iter_pos)53    std::string find_line ( const std::string &contents, std::string::const_iterator iter_pos )
54    {
55       std::size_t pos = iter_pos - contents.begin ();
56 
57    // Search backwards for a CR or LR
58       std::size_t start_pos = contents.find_last_of ( kCRLF, pos );
59       std::string::const_iterator line_start = contents.begin () + ( start_pos == std::string::npos ? 0 : start_pos + 1 );
60 
61 
62    // Search forwards for a CR or LF
63       std::size_t end_pos = contents.find_first_of ( kCRLF, pos + 1 );
64       std::string::const_iterator line_end;
65       if ( end_pos == std::string::npos )
66          line_end = contents.end ();
67       else
68          line_end = contents.begin () + end_pos - 1;
69 
70       return std::string ( line_start, line_end );
71    }
72 
ascii_check()73    ascii_check::ascii_check() : m_files_with_errors(0)
74    {
75      register_signature( ".c" );
76      register_signature( ".cpp" );
77      register_signature( ".cxx" );
78      register_signature( ".h" );
79      register_signature( ".hpp" );
80      register_signature( ".hxx" );
81      register_signature( ".ipp" );
82    }
83 
inspect(const string & library_name,const path & full_path,const string & contents)84    void ascii_check::inspect(
85       const string & library_name,
86       const path & full_path,   // example: c:/foo/boost/filesystem/path.hpp
87       const string & contents )     // contents of file to be inspected
88     {
89       if (contents.find( "boostinspect:" "noascii" ) != string::npos) return;
90       string::const_iterator bad_char = std::find_if ( contents.begin (), contents.end (), non_ascii ());
91       if ( bad_char != contents.end ())
92       {
93         ++m_files_with_errors;
94         int ln = std::count( contents.begin(), bad_char, '\n' ) + 1;
95         string the_line = find_line ( contents, bad_char );
96         error( library_name, full_path, "Non-ASCII: " + the_line, ln );
97       }
98     }
99   } // namespace inspect
100 } // namespace boost
101 
102 
103