1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 //! Low level column reader and writer APIs.
19 //!
20 //! This API is designed for reading and writing column values, definition and repetition
21 //! levels directly.
22 //!
23 //! # Example of writing and reading data
24 //!
25 //! Data has the following format:
26 //! ```text
27 //! +---------------+
28 //! |         values|
29 //! +---------------+
30 //! |[1, 2]         |
31 //! |[3, null, null]|
32 //! +---------------+
33 //! ```
34 //!
35 //! The example uses column writer and reader APIs to write raw values, definition and
36 //! repetition levels and read them to verify write/read correctness.
37 //!
38 //! ```rust,no_run
39 //! use std::{fs, path::Path, rc::Rc};
40 //!
41 //! use parquet::{
42 //!     column::{reader::ColumnReader, writer::ColumnWriter},
43 //!     file::{
44 //!         properties::WriterProperties,
45 //!         reader::{FileReader, SerializedFileReader},
46 //!         writer::{FileWriter, SerializedFileWriter},
47 //!     },
48 //!     schema::parser::parse_message_type,
49 //! };
50 //!
51 //! let path = Path::new("/path/to/column_sample.parquet");
52 //!
53 //! // Writing data using column writer API.
54 //!
55 //! let message_type = "
56 //!   message schema {
57 //!     optional group values (LIST) {
58 //!       repeated group list {
59 //!         optional INT32 element;
60 //!       }
61 //!     }
62 //!   }
63 //! ";
64 //! let schema = Rc::new(parse_message_type(message_type).unwrap());
65 //! let props = Rc::new(WriterProperties::builder().build());
66 //! let file = fs::File::create(path).unwrap();
67 //! let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
68 //! let mut row_group_writer = writer.next_row_group().unwrap();
69 //! while let Some(mut col_writer) = row_group_writer.next_column().unwrap() {
70 //!     match col_writer {
71 //!         // You can also use `get_typed_column_writer` method to extract typed writer.
72 //!         ColumnWriter::Int32ColumnWriter(ref mut typed_writer) => {
73 //!             typed_writer
74 //!                 .write_batch(&[1, 2, 3], Some(&[3, 3, 3, 2, 2]), Some(&[0, 1, 0, 1, 1]))
75 //!                 .unwrap();
76 //!         }
77 //!         _ => {}
78 //!     }
79 //!     row_group_writer.close_column(col_writer).unwrap();
80 //! }
81 //! writer.close_row_group(row_group_writer).unwrap();
82 //! writer.close().unwrap();
83 //!
84 //! // Reading data using column reader API.
85 //!
86 //! let file = fs::File::open(path).unwrap();
87 //! let reader = SerializedFileReader::new(file).unwrap();
88 //! let metadata = reader.metadata();
89 //!
90 //! let mut res = Ok((0, 0));
91 //! let mut values = vec![0; 8];
92 //! let mut def_levels = vec![0; 8];
93 //! let mut rep_levels = vec![0; 8];
94 //!
95 //! for i in 0..metadata.num_row_groups() {
96 //!     let row_group_reader = reader.get_row_group(i).unwrap();
97 //!     let row_group_metadata = metadata.row_group(i);
98 //!
99 //!     for j in 0..row_group_metadata.num_columns() {
100 //!         let mut column_reader = row_group_reader.get_column_reader(j).unwrap();
101 //!         match column_reader {
102 //!             // You can also use `get_typed_column_reader` method to extract typed reader.
103 //!             ColumnReader::Int32ColumnReader(ref mut typed_reader) => {
104 //!                 res = typed_reader.read_batch(
105 //!                     8, // batch size
106 //!                     Some(&mut def_levels),
107 //!                     Some(&mut rep_levels),
108 //!                     &mut values,
109 //!                 );
110 //!             }
111 //!             _ => {}
112 //!         }
113 //!     }
114 //! }
115 //!
116 //! assert_eq!(res, Ok((3, 5)));
117 //! assert_eq!(values, vec![1, 2, 3, 0, 0, 0, 0, 0]);
118 //! assert_eq!(def_levels, vec![3, 3, 3, 2, 2, 0, 0, 0]);
119 //! assert_eq!(rep_levels, vec![0, 1, 0, 1, 1, 0, 0, 0]);
120 //! ```
121 
122 pub mod page;
123 pub mod reader;
124 pub mod writer;
125