1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, 12 // software distributed under the License is distributed on an 13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 // KIND, either express or implied. See the License for the 15 // specific language governing permissions and limitations 16 // under the License. 17 18 //! Low level column reader and writer APIs. 19 //! 20 //! This API is designed for reading and writing column values, definition and repetition 21 //! levels directly. 22 //! 23 //! # Example of writing and reading data 24 //! 25 //! Data has the following format: 26 //! ```text 27 //! +---------------+ 28 //! | values| 29 //! +---------------+ 30 //! |[1, 2] | 31 //! |[3, null, null]| 32 //! +---------------+ 33 //! ``` 34 //! 35 //! The example uses column writer and reader APIs to write raw values, definition and 36 //! repetition levels and read them to verify write/read correctness. 37 //! 38 //! ```rust,no_run 39 //! use std::{fs, path::Path, rc::Rc}; 40 //! 41 //! use parquet::{ 42 //! column::{reader::ColumnReader, writer::ColumnWriter}, 43 //! file::{ 44 //! properties::WriterProperties, 45 //! reader::{FileReader, SerializedFileReader}, 46 //! writer::{FileWriter, SerializedFileWriter}, 47 //! }, 48 //! schema::parser::parse_message_type, 49 //! }; 50 //! 51 //! let path = Path::new("/path/to/column_sample.parquet"); 52 //! 53 //! // Writing data using column writer API. 54 //! 55 //! let message_type = " 56 //! message schema { 57 //! optional group values (LIST) { 58 //! repeated group list { 59 //! optional INT32 element; 60 //! } 61 //! } 62 //! } 63 //! "; 64 //! let schema = Rc::new(parse_message_type(message_type).unwrap()); 65 //! let props = Rc::new(WriterProperties::builder().build()); 66 //! let file = fs::File::create(path).unwrap(); 67 //! let mut writer = SerializedFileWriter::new(file, schema, props).unwrap(); 68 //! let mut row_group_writer = writer.next_row_group().unwrap(); 69 //! while let Some(mut col_writer) = row_group_writer.next_column().unwrap() { 70 //! match col_writer { 71 //! // You can also use `get_typed_column_writer` method to extract typed writer. 72 //! ColumnWriter::Int32ColumnWriter(ref mut typed_writer) => { 73 //! typed_writer 74 //! .write_batch(&[1, 2, 3], Some(&[3, 3, 3, 2, 2]), Some(&[0, 1, 0, 1, 1])) 75 //! .unwrap(); 76 //! } 77 //! _ => {} 78 //! } 79 //! row_group_writer.close_column(col_writer).unwrap(); 80 //! } 81 //! writer.close_row_group(row_group_writer).unwrap(); 82 //! writer.close().unwrap(); 83 //! 84 //! // Reading data using column reader API. 85 //! 86 //! let file = fs::File::open(path).unwrap(); 87 //! let reader = SerializedFileReader::new(file).unwrap(); 88 //! let metadata = reader.metadata(); 89 //! 90 //! let mut res = Ok((0, 0)); 91 //! let mut values = vec![0; 8]; 92 //! let mut def_levels = vec![0; 8]; 93 //! let mut rep_levels = vec![0; 8]; 94 //! 95 //! for i in 0..metadata.num_row_groups() { 96 //! let row_group_reader = reader.get_row_group(i).unwrap(); 97 //! let row_group_metadata = metadata.row_group(i); 98 //! 99 //! for j in 0..row_group_metadata.num_columns() { 100 //! let mut column_reader = row_group_reader.get_column_reader(j).unwrap(); 101 //! match column_reader { 102 //! // You can also use `get_typed_column_reader` method to extract typed reader. 103 //! ColumnReader::Int32ColumnReader(ref mut typed_reader) => { 104 //! res = typed_reader.read_batch( 105 //! 8, // batch size 106 //! Some(&mut def_levels), 107 //! Some(&mut rep_levels), 108 //! &mut values, 109 //! ); 110 //! } 111 //! _ => {} 112 //! } 113 //! } 114 //! } 115 //! 116 //! assert_eq!(res, Ok((3, 5))); 117 //! assert_eq!(values, vec![1, 2, 3, 0, 0, 0, 0, 0]); 118 //! assert_eq!(def_levels, vec![3, 3, 3, 2, 2, 0, 0, 0]); 119 //! assert_eq!(rep_levels, vec![0, 1, 0, 1, 1, 0, 0, 0]); 120 //! ``` 121 122 pub mod page; 123 pub mod reader; 124 pub mod writer; 125