calamine/
lib.rs

1//! Rust Excel/OpenDocument reader
2//!
3//! # Status
4//!
5//! **calamine** is a pure Rust library to read Excel and OpenDocument Spreadsheet files.
6//!
7//! Read both cell values and vba project.
8//!
9//! # Examples
10//! ```
11//! use calamine::{Reader, open_workbook, Xlsx, Data};
12//!
13//! // opens a new workbook
14//! # let path = format!("{}/tests/issue3.xlsm", env!("CARGO_MANIFEST_DIR"));
15//! let mut workbook: Xlsx<_> = open_workbook(path).expect("Cannot open file");
16//!
17//! // Read whole worksheet data and provide some statistics
18//! if let Ok(range) = workbook.worksheet_range("Sheet1") {
19//!     let total_cells = range.get_size().0 * range.get_size().1;
20//!     let non_empty_cells: usize = range.used_cells().count();
21//!     println!("Found {} cells in 'Sheet1', including {} non empty cells",
22//!              total_cells, non_empty_cells);
23//!     // alternatively, we can manually filter rows
24//!     assert_eq!(non_empty_cells, range.rows()
25//!         .flat_map(|r| r.iter().filter(|&c| c != &Data::Empty)).count());
26//! }
27//!
28//! // Check if the workbook has a vba project
29//! if let Some(Ok(mut vba)) = workbook.vba_project() {
30//!     let vba = vba.to_mut();
31//!     let module1 = vba.get_module("Module 1").unwrap();
32//!     println!("Module 1 code:");
33//!     println!("{}", module1);
34//!     for r in vba.get_references() {
35//!         if r.is_missing() {
36//!             println!("Reference {} is broken or not accessible", r.name);
37//!         }
38//!     }
39//! }
40//!
41//! // You can also get defined names definition (string representation only)
42//! for name in workbook.defined_names() {
43//!     println!("name: {}, formula: {}", name.0, name.1);
44//! }
45//!
46//! // Now get all formula!
47//! let sheets = workbook.sheet_names().to_owned();
48//! for s in sheets {
49//!     println!("found {} formula in '{}'",
50//!              workbook
51//!                 .worksheet_formula(&s)
52//!                 .expect("error while getting formula")
53//!                 .rows().flat_map(|r| r.iter().filter(|f| !f.is_empty()))
54//!                 .count(),
55//!              s);
56//! }
57//! ```
58#![deny(missing_docs)]
59
60#[macro_use]
61mod utils;
62
63mod auto;
64mod cfb;
65mod datatype;
66mod formats;
67mod ods;
68mod xls;
69mod xlsb;
70mod xlsx;
71
72mod de;
73mod errors;
74pub mod vba;
75
76use serde::de::{Deserialize, DeserializeOwned, Deserializer};
77use std::borrow::Cow;
78use std::cmp::{max, min};
79use std::fmt;
80use std::fs::File;
81use std::io::{BufReader, Read, Seek};
82use std::ops::{Index, IndexMut};
83use std::path::Path;
84
85pub use crate::auto::{open_workbook_auto, open_workbook_auto_from_rs, Sheets};
86pub use crate::datatype::{Data, DataRef, DataType, ExcelDateTime, ExcelDateTimeType};
87pub use crate::de::{DeError, RangeDeserializer, RangeDeserializerBuilder, ToCellDeserializer};
88pub use crate::errors::Error;
89pub use crate::ods::{Ods, OdsError};
90pub use crate::xls::{Xls, XlsError, XlsOptions};
91pub use crate::xlsb::{Xlsb, XlsbError};
92pub use crate::xlsx::{Xlsx, XlsxError};
93
94use crate::vba::VbaProject;
95
96// https://msdn.microsoft.com/en-us/library/office/ff839168.aspx
97/// An enum to represent all different errors that can appear as
98/// a value in a worksheet cell
99#[derive(Debug, Clone, PartialEq)]
100pub enum CellErrorType {
101    /// Division by 0 error
102    Div0,
103    /// Unavailable value error
104    NA,
105    /// Invalid name error
106    Name,
107    /// Null value error
108    Null,
109    /// Number error
110    Num,
111    /// Invalid cell reference error
112    Ref,
113    /// Value error
114    Value,
115    /// Getting data
116    GettingData,
117}
118
119impl fmt::Display for CellErrorType {
120    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
121        match *self {
122            CellErrorType::Div0 => write!(f, "#DIV/0!"),
123            CellErrorType::NA => write!(f, "#N/A"),
124            CellErrorType::Name => write!(f, "#NAME?"),
125            CellErrorType::Null => write!(f, "#NULL!"),
126            CellErrorType::Num => write!(f, "#NUM!"),
127            CellErrorType::Ref => write!(f, "#REF!"),
128            CellErrorType::Value => write!(f, "#VALUE!"),
129            CellErrorType::GettingData => write!(f, "#DATA!"),
130        }
131    }
132}
133
134/// Dimensions info
135#[derive(Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Copy, Clone)]
136pub struct Dimensions {
137    /// start: (row, col)
138    pub start: (u32, u32),
139    /// end: (row, col)
140    pub end: (u32, u32),
141}
142
143#[allow(clippy::len_without_is_empty)]
144impl Dimensions {
145    /// create dimensions info with start position and end position
146    pub fn new(start: (u32, u32), end: (u32, u32)) -> Self {
147        Self { start, end }
148    }
149    /// check if a position is in it
150    pub fn contains(&self, row: u32, col: u32) -> bool {
151        row >= self.start.0 && row <= self.end.0 && col >= self.start.1 && col <= self.end.1
152    }
153    /// len
154    pub fn len(&self) -> u64 {
155        (self.end.0 - self.start.0 + 1) as u64 * (self.end.1 - self.start.1 + 1) as u64
156    }
157}
158
159/// Common file metadata
160///
161/// Depending on file type, some extra information may be stored
162/// in the Reader implementations
163#[derive(Debug, Default)]
164pub struct Metadata {
165    sheets: Vec<Sheet>,
166    /// Map of sheet names/sheet path within zip archive
167    names: Vec<(String, String)>,
168}
169
170/// Type of sheet
171///
172/// Only Excel formats support this. Default value for ODS is SheetType::WorkSheet.
173/// https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/b9ec509a-235d-424e-871d-f8e721106501
174/// https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xlsb/1edadf56-b5cd-4109-abe7-76651bbe2722
175/// [ECMA-376 Part 1](https://www.ecma-international.org/publications-and-standards/standards/ecma-376/) 12.3.2, 12.3.7 and 12.3.24
176#[derive(Debug, Clone, Copy, PartialEq)]
177pub enum SheetType {
178    /// WorkSheet
179    WorkSheet,
180    /// DialogSheet
181    DialogSheet,
182    /// MacroSheet
183    MacroSheet,
184    /// ChartSheet
185    ChartSheet,
186    /// VBA module
187    Vba,
188}
189
190/// Type of visible sheet
191///
192/// http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1417896_253892949
193/// https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/b9ec509a-235d-424e-871d-f8e721106501
194/// https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xlsb/74cb1d22-b931-4bf8-997d-17517e2416e9
195/// [ECMA-376 Part 1](https://www.ecma-international.org/publications-and-standards/standards/ecma-376/) 18.18.68
196#[derive(Debug, Clone, Copy, PartialEq)]
197pub enum SheetVisible {
198    /// Visible
199    Visible,
200    /// Hidden
201    Hidden,
202    /// The sheet is hidden and cannot be displayed using the user interface. It is supported only by Excel formats.
203    VeryHidden,
204}
205
206/// Metadata of sheet
207#[derive(Debug, Clone, PartialEq)]
208pub struct Sheet {
209    /// Name
210    pub name: String,
211    /// Type
212    /// Only Excel formats support this. Default value for ODS is SheetType::WorkSheet.
213    pub typ: SheetType,
214    /// Visible
215    pub visible: SheetVisible,
216}
217
218/// Row to use as header
219/// By default, the first non-empty row is used as header
220#[derive(Debug, Clone, Copy)]
221#[non_exhaustive]
222pub enum HeaderRow {
223    /// First non-empty row
224    FirstNonEmptyRow,
225    /// Index of the header row
226    Row(u32),
227}
228
229impl Default for HeaderRow {
230    fn default() -> Self {
231        HeaderRow::FirstNonEmptyRow
232    }
233}
234
235// FIXME `Reader` must only be seek `Seek` for `Xls::xls`. Because of the present API this limits
236// the kinds of readers (other) data in formats can be read from.
237/// A trait to share spreadsheets reader functions across different `FileType`s
238pub trait Reader<RS>: Sized
239where
240    RS: Read + Seek,
241{
242    /// Error specific to file type
243    type Error: std::fmt::Debug + From<std::io::Error>;
244
245    /// Creates a new instance.
246    fn new(reader: RS) -> Result<Self, Self::Error>;
247
248    /// Set header row (i.e. first row to be read)
249    /// If `header_row` is `None`, the first non-empty row will be used as header row
250    fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self;
251
252    /// Gets `VbaProject`
253    fn vba_project(&mut self) -> Option<Result<Cow<'_, VbaProject>, Self::Error>>;
254
255    /// Initialize
256    fn metadata(&self) -> &Metadata;
257
258    /// Read worksheet data in corresponding worksheet path
259    fn worksheet_range(&mut self, name: &str) -> Result<Range<Data>, Self::Error>;
260
261    /// Fetch all worksheet data & paths
262    fn worksheets(&mut self) -> Vec<(String, Range<Data>)>;
263
264    /// Read worksheet formula in corresponding worksheet path
265    fn worksheet_formula(&mut self, _: &str) -> Result<Range<String>, Self::Error>;
266
267    /// Get all sheet names of this workbook, in workbook order
268    ///
269    /// # Examples
270    /// ```
271    /// use calamine::{Xlsx, open_workbook, Reader};
272    ///
273    /// # let path = format!("{}/tests/issue3.xlsm", env!("CARGO_MANIFEST_DIR"));
274    /// let mut workbook: Xlsx<_> = open_workbook(path).unwrap();
275    /// println!("Sheets: {:#?}", workbook.sheet_names());
276    /// ```
277    fn sheet_names(&self) -> Vec<String> {
278        self.metadata()
279            .sheets
280            .iter()
281            .map(|s| s.name.to_owned())
282            .collect()
283    }
284
285    /// Fetch all sheets metadata
286    fn sheets_metadata(&self) -> &[Sheet] {
287        &self.metadata().sheets
288    }
289
290    /// Get all defined names (Ranges names etc)
291    fn defined_names(&self) -> &[(String, String)] {
292        &self.metadata().names
293    }
294
295    /// Get the nth worksheet. Shortcut for getting the nth
296    /// sheet_name, then the corresponding worksheet.
297    fn worksheet_range_at(&mut self, n: usize) -> Option<Result<Range<Data>, Self::Error>> {
298        let name = self.sheet_names().get(n)?.to_string();
299        Some(self.worksheet_range(&name))
300    }
301
302    /// Get all pictures, tuple as (ext: String, data: Vec<u8>)
303    #[cfg(feature = "picture")]
304    fn pictures(&self) -> Option<Vec<(String, Vec<u8>)>>;
305}
306
307/// A trait to share spreadsheets reader functions across different `FileType`s
308pub trait ReaderRef<RS>: Reader<RS>
309where
310    RS: Read + Seek,
311{
312    /// Get worksheet range where shared string values are only borrowed.
313    ///
314    /// This is implemented only for [`calamine::Xlsb`] and [`calamine::Xlsx`], as Xls and Ods formats
315    /// do not support lazy iteration.
316    fn worksheet_range_ref<'a>(&'a mut self, name: &str)
317        -> Result<Range<DataRef<'a>>, Self::Error>;
318
319    /// Get the nth worksheet range where shared string values are only borrowed. Shortcut for getting the nth
320    /// sheet_name, then the corresponding worksheet.
321    ///
322    /// This is implemented only for [`calamine::Xlsb`] and [`calamine::Xlsx`], as Xls and Ods formats
323    /// do not support lazy iteration.
324    fn worksheet_range_at_ref(&mut self, n: usize) -> Option<Result<Range<DataRef>, Self::Error>> {
325        let name = self.sheet_names().get(n)?.to_string();
326        Some(self.worksheet_range_ref(&name))
327    }
328}
329
330/// Convenient function to open a file with a BufReader<File>
331pub fn open_workbook<R, P>(path: P) -> Result<R, R::Error>
332where
333    R: Reader<BufReader<File>>,
334    P: AsRef<Path>,
335{
336    let file = BufReader::new(File::open(path)?);
337    R::new(file)
338}
339
340/// Convenient function to open a file with a BufReader<File>
341pub fn open_workbook_from_rs<R, RS>(rs: RS) -> Result<R, R::Error>
342where
343    RS: Read + Seek,
344    R: Reader<RS>,
345{
346    R::new(rs)
347}
348
349/// A trait to constrain cells
350pub trait CellType: Default + Clone + PartialEq {}
351
352impl CellType for Data {}
353impl<'a> CellType for DataRef<'a> {}
354impl CellType for String {}
355impl CellType for usize {} // for tests
356
357/// A struct to hold cell position and value
358#[derive(Debug, Clone)]
359pub struct Cell<T: CellType> {
360    /// Position for the cell (row, column)
361    pos: (u32, u32),
362    /// Value for the cell
363    val: T,
364}
365
366impl<T: CellType> Cell<T> {
367    /// Creates a new `Cell`
368    pub fn new(position: (u32, u32), value: T) -> Cell<T> {
369        Cell {
370            pos: position,
371            val: value,
372        }
373    }
374
375    /// Gets `Cell` position
376    pub fn get_position(&self) -> (u32, u32) {
377        self.pos
378    }
379
380    /// Gets `Cell` value
381    pub fn get_value(&self) -> &T {
382        &self.val
383    }
384}
385
386/// A struct which represents a squared selection of cells
387#[derive(Debug, Default, Clone)]
388pub struct Range<T> {
389    start: (u32, u32),
390    end: (u32, u32),
391    inner: Vec<T>,
392}
393
394impl<T: CellType> Range<T> {
395    /// Creates a new non-empty `Range`
396    ///
397    /// When possible, prefer the more efficient `Range::from_sparse`
398    ///
399    /// # Panics
400    ///
401    /// Panics if start.0 > end.0 or start.1 > end.1
402    #[inline]
403    pub fn new(start: (u32, u32), end: (u32, u32)) -> Range<T> {
404        assert!(start <= end, "invalid range bounds");
405        Range {
406            start,
407            end,
408            inner: vec![T::default(); ((end.0 - start.0 + 1) * (end.1 - start.1 + 1)) as usize],
409        }
410    }
411
412    /// Creates a new empty range
413    #[inline]
414    pub fn empty() -> Range<T> {
415        Range {
416            start: (0, 0),
417            end: (0, 0),
418            inner: Vec::new(),
419        }
420    }
421
422    /// Get top left cell position (row, column)
423    #[inline]
424    pub fn start(&self) -> Option<(u32, u32)> {
425        if self.is_empty() {
426            None
427        } else {
428            Some(self.start)
429        }
430    }
431
432    /// Get bottom right cell position (row, column)
433    #[inline]
434    pub fn end(&self) -> Option<(u32, u32)> {
435        if self.is_empty() {
436            None
437        } else {
438            Some(self.end)
439        }
440    }
441
442    /// Get column width
443    #[inline]
444    pub fn width(&self) -> usize {
445        if self.is_empty() {
446            0
447        } else {
448            (self.end.1 - self.start.1 + 1) as usize
449        }
450    }
451
452    /// Get column height
453    #[inline]
454    pub fn height(&self) -> usize {
455        if self.is_empty() {
456            0
457        } else {
458            (self.end.0 - self.start.0 + 1) as usize
459        }
460    }
461
462    /// Get size in (height, width) format
463    #[inline]
464    pub fn get_size(&self) -> (usize, usize) {
465        (self.height(), self.width())
466    }
467
468    /// Is range empty
469    #[inline]
470    pub fn is_empty(&self) -> bool {
471        self.inner.is_empty()
472    }
473
474    /// Creates a `Range` from a coo sparse vector of `Cell`s.
475    ///
476    /// Coordinate list (COO) is the natural way cells are stored
477    /// Inner size is defined only by non empty.
478    ///
479    /// cells: `Vec` of non empty `Cell`s, sorted by row
480    ///
481    /// # Panics
482    ///
483    /// panics when a `Cell` row is lower than the first `Cell` row or
484    /// bigger than the last `Cell` row.
485    pub fn from_sparse(cells: Vec<Cell<T>>) -> Range<T> {
486        if cells.is_empty() {
487            Range::empty()
488        } else {
489            // search bounds
490            let row_start = cells.first().unwrap().pos.0;
491            let row_end = cells.last().unwrap().pos.0;
492            let mut col_start = u32::MAX;
493            let mut col_end = 0;
494            for c in cells.iter().map(|c| c.pos.1) {
495                if c < col_start {
496                    col_start = c;
497                }
498                if c > col_end {
499                    col_end = c
500                }
501            }
502            let cols = (col_end - col_start + 1) as usize;
503            let rows = (row_end - row_start + 1) as usize;
504            let len = cols.saturating_mul(rows);
505            let mut v = vec![T::default(); len];
506            v.shrink_to_fit();
507            for c in cells {
508                let row = (c.pos.0 - row_start) as usize;
509                let col = (c.pos.1 - col_start) as usize;
510                let idx = row.saturating_mul(cols) + col;
511                if let Some(v) = v.get_mut(idx) {
512                    *v = c.val;
513                }
514            }
515            Range {
516                start: (row_start, col_start),
517                end: (row_end, col_end),
518                inner: v,
519            }
520        }
521    }
522
523    /// Set inner value from absolute position
524    ///
525    /// # Remarks
526    ///
527    /// Will try to resize inner structure if the value is out of bounds.
528    /// For relative positions, use Index trait
529    ///
530    /// Try to avoid this method as much as possible and prefer initializing
531    /// the `Range` with `from_sparse` constructor.
532    ///
533    /// # Panics
534    ///
535    /// If absolute_position > Cell start
536    ///
537    /// # Examples
538    /// ```
539    /// use calamine::{Range, Data};
540    ///
541    /// let mut range = Range::new((0, 0), (5, 2));
542    /// assert_eq!(range.get_value((2, 1)), Some(&Data::Empty));
543    /// range.set_value((2, 1), Data::Float(1.0));
544    /// assert_eq!(range.get_value((2, 1)), Some(&Data::Float(1.0)));
545    /// ```
546    pub fn set_value(&mut self, absolute_position: (u32, u32), value: T) {
547        assert!(
548            self.start.0 <= absolute_position.0 && self.start.1 <= absolute_position.1,
549            "absolute_position out of bounds"
550        );
551
552        // check if we need to change range dimension (strangely happens sometimes ...)
553        match (
554            self.end.0 < absolute_position.0,
555            self.end.1 < absolute_position.1,
556        ) {
557            (false, false) => (), // regular case, position within bounds
558            (true, false) => {
559                let len = (absolute_position.0 - self.end.0 + 1) as usize * self.width();
560                self.inner.extend_from_slice(&vec![T::default(); len]);
561                self.end.0 = absolute_position.0;
562            }
563            // missing some rows
564            (e, true) => {
565                let height = if e {
566                    (absolute_position.0 - self.start.0 + 1) as usize
567                } else {
568                    self.height()
569                };
570                let width = (absolute_position.1 - self.start.1 + 1) as usize;
571                let old_width = self.width();
572                let mut data = Vec::with_capacity(width * height);
573                let empty = vec![T::default(); width - old_width];
574                for sce in self.inner.chunks(old_width) {
575                    data.extend_from_slice(sce);
576                    data.extend_from_slice(&empty);
577                }
578                data.extend_from_slice(&vec![T::default(); width * (height - self.height())]);
579                if e {
580                    self.end = absolute_position
581                } else {
582                    self.end.1 = absolute_position.1
583                }
584                self.inner = data;
585            } // missing some columns
586        }
587
588        let pos = (
589            absolute_position.0 - self.start.0,
590            absolute_position.1 - self.start.1,
591        );
592        let idx = pos.0 as usize * self.width() + pos.1 as usize;
593        self.inner[idx] = value;
594    }
595
596    /// Get cell value from **absolute position**.
597    ///
598    /// If the `absolute_position` is out of range, returns `None`, else returns the cell value.
599    /// The coordinate format is (row, column).
600    ///
601    /// # Warnings
602    ///
603    /// For relative positions, use Index trait
604    ///
605    /// # Remarks
606    ///
607    /// Absolute position is in *sheet* referential while relative position is in *range* referential.
608    ///
609    /// For instance if we consider range *C2:H38*:
610    /// - `(0, 0)` absolute is "A1" and thus this function returns `None`
611    /// - `(0, 0)` relative is "C2" and is returned by the `Index` trait (i.e `my_range[(0, 0)]`)
612    ///
613    /// # Examples
614    /// ```
615    /// use calamine::{Range, Data};
616    ///
617    /// let range: Range<usize> = Range::new((1, 0), (5, 2));
618    /// assert_eq!(range.get_value((0, 0)), None);
619    /// assert_eq!(range[(0, 0)], 0);
620    /// ```
621    pub fn get_value(&self, absolute_position: (u32, u32)) -> Option<&T> {
622        let p = absolute_position;
623        if p.0 >= self.start.0 && p.0 <= self.end.0 && p.1 >= self.start.1 && p.1 <= self.end.1 {
624            return self.get((
625                (absolute_position.0 - self.start.0) as usize,
626                (absolute_position.1 - self.start.1) as usize,
627            ));
628        }
629        None
630    }
631
632    /// Get cell value from **relative position**.
633    ///
634    /// Unlike using the Index trait, this will not panic but rather yield `None` if out of range.
635    /// Otherwise, returns the cell value. The coordinate format is (row, column).
636    ///
637    pub fn get(&self, relative_position: (usize, usize)) -> Option<&T> {
638        let (row, col) = relative_position;
639        let (height, width) = self.get_size();
640        if col >= width || row >= height {
641            None
642        } else {
643            self.inner.get(row * width + col)
644        }
645    }
646
647    /// Get an iterator over inner rows
648    ///
649    /// # Examples
650    /// ```
651    /// use calamine::{Range, Data};
652    ///
653    /// let range: Range<Data> = Range::new((0, 0), (5, 2));
654    /// // with rows item row: &[Data]
655    /// assert_eq!(range.rows().map(|r| r.len()).sum::<usize>(), 18);
656    /// ```
657    pub fn rows(&self) -> Rows<'_, T> {
658        if self.inner.is_empty() {
659            Rows { inner: None }
660        } else {
661            let width = self.width();
662            Rows {
663                inner: Some(self.inner.chunks(width)),
664            }
665        }
666    }
667
668    /// Get an iterator over used cells only
669    pub fn used_cells(&self) -> UsedCells<'_, T> {
670        UsedCells {
671            width: self.width(),
672            inner: self.inner.iter().enumerate(),
673        }
674    }
675
676    /// Get an iterator over all cells in this range
677    pub fn cells(&self) -> Cells<'_, T> {
678        Cells {
679            width: self.width(),
680            inner: self.inner.iter().enumerate(),
681        }
682    }
683
684    /// Build a `RangeDeserializer` from this configuration.
685    ///
686    /// # Example
687    ///
688    /// ```
689    /// # use calamine::{Reader, Error, open_workbook, Xlsx, RangeDeserializerBuilder};
690    /// fn main() -> Result<(), Error> {
691    ///     let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR"));
692    ///     let mut workbook: Xlsx<_> = open_workbook(path)?;
693    ///     let mut sheet = workbook.worksheet_range("Sheet1")?;
694    ///     let mut iter = sheet.deserialize()?;
695    ///
696    ///     if let Some(result) = iter.next() {
697    ///         let (label, value): (String, f64) = result?;
698    ///         assert_eq!(label, "celsius");
699    ///         assert_eq!(value, 22.2222);
700    ///
701    ///         Ok(())
702    ///     } else {
703    ///         return Err(From::from("expected at least one record but got none"));
704    ///     }
705    /// }
706    /// ```
707    pub fn deserialize<'a, D>(&'a self) -> Result<RangeDeserializer<'a, T, D>, DeError>
708    where
709        T: ToCellDeserializer<'a>,
710        D: DeserializeOwned,
711    {
712        RangeDeserializerBuilder::new().from_range(self)
713    }
714
715    /// Build a new `Range` out of this range
716    ///
717    /// # Remarks
718    ///
719    /// Cells within this range will be cloned, cells out of it will be set to Empty
720    ///
721    /// # Example
722    ///
723    /// ```
724    /// # use calamine::{Range, Data};
725    /// let mut a = Range::new((1, 1), (3, 3));
726    /// a.set_value((1, 1), Data::Bool(true));
727    /// a.set_value((2, 2), Data::Bool(true));
728    ///
729    /// let b = a.range((2, 2), (5, 5));
730    /// assert_eq!(b.get_value((2, 2)), Some(&Data::Bool(true)));
731    /// assert_eq!(b.get_value((3, 3)), Some(&Data::Empty));
732    ///
733    /// let c = a.range((0, 0), (2, 2));
734    /// assert_eq!(c.get_value((0, 0)), Some(&Data::Empty));
735    /// assert_eq!(c.get_value((1, 1)), Some(&Data::Bool(true)));
736    /// assert_eq!(c.get_value((2, 2)), Some(&Data::Bool(true)));
737    /// ```
738    pub fn range(&self, start: (u32, u32), end: (u32, u32)) -> Range<T> {
739        let mut other = Range::new(start, end);
740        let (self_start_row, self_start_col) = self.start;
741        let (self_end_row, self_end_col) = self.end;
742        let (other_start_row, other_start_col) = other.start;
743        let (other_end_row, other_end_col) = other.end;
744
745        // copy data from self to other
746        let start_row = max(self_start_row, other_start_row);
747        let end_row = min(self_end_row, other_end_row);
748        let start_col = max(self_start_col, other_start_col);
749        let end_col = min(self_end_col, other_end_col);
750
751        if start_row > end_row || start_col > end_col {
752            return other;
753        }
754
755        let self_width = self.width();
756        let other_width = other.width();
757
758        // change referential
759        //
760        // we want to copy range: start_row..(end_row + 1)
761        // In self referential it is (start_row - self_start_row)..(end_row + 1 - self_start_row)
762        let self_row_start = (start_row - self_start_row) as usize;
763        let self_row_end = (end_row + 1 - self_start_row) as usize;
764        let self_col_start = (start_col - self_start_col) as usize;
765        let self_col_end = (end_col + 1 - self_start_col) as usize;
766
767        let other_row_start = (start_row - other_start_row) as usize;
768        let other_row_end = (end_row + 1 - other_start_row) as usize;
769        let other_col_start = (start_col - other_start_col) as usize;
770        let other_col_end = (end_col + 1 - other_start_col) as usize;
771
772        {
773            let self_rows = self
774                .inner
775                .chunks(self_width)
776                .take(self_row_end)
777                .skip(self_row_start);
778
779            let other_rows = other
780                .inner
781                .chunks_mut(other_width)
782                .take(other_row_end)
783                .skip(other_row_start);
784
785            for (self_row, other_row) in self_rows.zip(other_rows) {
786                let self_cols = &self_row[self_col_start..self_col_end];
787                let other_cols = &mut other_row[other_col_start..other_col_end];
788                other_cols.clone_from_slice(self_cols);
789            }
790        }
791
792        other
793    }
794}
795
796impl<T: CellType + fmt::Display> Range<T> {
797    /// Get range headers.
798    ///
799    /// # Examples
800    /// ```
801    /// use calamine::{Range, Data};
802    ///
803    /// let mut range = Range::new((0, 0), (5, 2));
804    /// range.set_value((0, 0), Data::String(String::from("a")));
805    /// range.set_value((0, 1), Data::Int(1));
806    /// range.set_value((0, 2), Data::Bool(true));
807    /// let headers = range.headers();
808    /// assert_eq!(
809    ///     headers,
810    ///     Some(vec![
811    ///         String::from("a"),
812    ///         String::from("1"),
813    ///         String::from("true")
814    ///     ])
815    /// );
816    /// ```
817    pub fn headers(&self) -> Option<Vec<String>> {
818        self.rows()
819            .next()
820            .map(|row| row.iter().map(ToString::to_string).collect())
821    }
822}
823
824impl<T: CellType> Index<usize> for Range<T> {
825    type Output = [T];
826    fn index(&self, index: usize) -> &[T] {
827        let width = self.width();
828        &self.inner[index * width..(index + 1) * width]
829    }
830}
831
832impl<T: CellType> Index<(usize, usize)> for Range<T> {
833    type Output = T;
834    fn index(&self, index: (usize, usize)) -> &T {
835        let (height, width) = self.get_size();
836        assert!(index.1 < width && index.0 < height, "index out of bounds");
837        &self.inner[index.0 * width + index.1]
838    }
839}
840
841impl<T: CellType> IndexMut<usize> for Range<T> {
842    fn index_mut(&mut self, index: usize) -> &mut [T] {
843        let width = self.width();
844        &mut self.inner[index * width..(index + 1) * width]
845    }
846}
847
848impl<T: CellType> IndexMut<(usize, usize)> for Range<T> {
849    fn index_mut(&mut self, index: (usize, usize)) -> &mut T {
850        let (height, width) = self.get_size();
851        assert!(index.1 < width && index.0 < height, "index out of bounds");
852        &mut self.inner[index.0 * width + index.1]
853    }
854}
855
856/// A struct to iterate over all cells
857#[derive(Clone, Debug)]
858pub struct Cells<'a, T: CellType> {
859    width: usize,
860    inner: std::iter::Enumerate<std::slice::Iter<'a, T>>,
861}
862
863impl<'a, T: 'a + CellType> Iterator for Cells<'a, T> {
864    type Item = (usize, usize, &'a T);
865    fn next(&mut self) -> Option<Self::Item> {
866        self.inner.next().map(|(i, v)| {
867            let row = i / self.width;
868            let col = i % self.width;
869            (row, col, v)
870        })
871    }
872    fn size_hint(&self) -> (usize, Option<usize>) {
873        self.inner.size_hint()
874    }
875}
876
877impl<'a, T: 'a + CellType> DoubleEndedIterator for Cells<'a, T> {
878    fn next_back(&mut self) -> Option<Self::Item> {
879        self.inner.next_back().map(|(i, v)| {
880            let row = i / self.width;
881            let col = i % self.width;
882            (row, col, v)
883        })
884    }
885}
886
887impl<'a, T: 'a + CellType> ExactSizeIterator for Cells<'a, T> {}
888
889/// A struct to iterate over used cells
890#[derive(Clone, Debug)]
891pub struct UsedCells<'a, T: CellType> {
892    width: usize,
893    inner: std::iter::Enumerate<std::slice::Iter<'a, T>>,
894}
895
896impl<'a, T: 'a + CellType> Iterator for UsedCells<'a, T> {
897    type Item = (usize, usize, &'a T);
898    fn next(&mut self) -> Option<Self::Item> {
899        self.inner
900            .by_ref()
901            .find(|&(_, v)| v != &T::default())
902            .map(|(i, v)| {
903                let row = i / self.width;
904                let col = i % self.width;
905                (row, col, v)
906            })
907    }
908    fn size_hint(&self) -> (usize, Option<usize>) {
909        let (_, up) = self.inner.size_hint();
910        (0, up)
911    }
912}
913
914impl<'a, T: 'a + CellType> DoubleEndedIterator for UsedCells<'a, T> {
915    fn next_back(&mut self) -> Option<Self::Item> {
916        self.inner
917            .by_ref()
918            .rfind(|&(_, v)| v != &T::default())
919            .map(|(i, v)| {
920                let row = i / self.width;
921                let col = i % self.width;
922                (row, col, v)
923            })
924    }
925}
926
927/// An iterator to read `Range` struct row by row
928#[derive(Clone, Debug)]
929pub struct Rows<'a, T: CellType> {
930    inner: Option<std::slice::Chunks<'a, T>>,
931}
932
933impl<'a, T: 'a + CellType> Iterator for Rows<'a, T> {
934    type Item = &'a [T];
935    fn next(&mut self) -> Option<Self::Item> {
936        self.inner.as_mut().and_then(std::iter::Iterator::next)
937    }
938    fn size_hint(&self) -> (usize, Option<usize>) {
939        self.inner
940            .as_ref()
941            .map_or((0, Some(0)), std::iter::Iterator::size_hint)
942    }
943}
944
945impl<'a, T: 'a + CellType> DoubleEndedIterator for Rows<'a, T> {
946    fn next_back(&mut self) -> Option<Self::Item> {
947        self.inner
948            .as_mut()
949            .and_then(std::iter::DoubleEndedIterator::next_back)
950    }
951}
952
953impl<'a, T: 'a + CellType> ExactSizeIterator for Rows<'a, T> {}
954
955/// Struct with the key elements of a table
956pub struct Table<T> {
957    pub(crate) name: String,
958    pub(crate) sheet_name: String,
959    pub(crate) columns: Vec<String>,
960    pub(crate) data: Range<T>,
961}
962impl<T> Table<T> {
963    /// Get the name of the table
964    pub fn name(&self) -> &str {
965        &self.name
966    }
967    /// Get the name of the sheet that table exists within
968    pub fn sheet_name(&self) -> &str {
969        &self.sheet_name
970    }
971    /// Get the names of the columns in the order they occur
972    pub fn columns(&self) -> &[String] {
973        &self.columns
974    }
975    /// Get a range representing the data from the table (excludes column headers)
976    pub fn data(&self) -> &Range<T> {
977        &self.data
978    }
979}
980
981impl<T: CellType> From<Table<T>> for Range<T> {
982    fn from(table: Table<T>) -> Range<T> {
983        table.data
984    }
985}
986
987/// A helper function to deserialize cell values as `i64`,
988/// useful when cells may also contain invalid values (i.e. strings).
989/// It applies the [`as_i64`] method to the cell value, and returns
990/// `Ok(Some(value_as_i64))` if successful or `Ok(None)` if unsuccessful,
991/// therefore never failing. This function is intended to be used with Serde's
992/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
993pub fn deserialize_as_i64_or_none<'de, D>(deserializer: D) -> Result<Option<i64>, D::Error>
994where
995    D: Deserializer<'de>,
996{
997    let data = Data::deserialize(deserializer)?;
998    Ok(data.as_i64())
999}
1000
1001/// A helper function to deserialize cell values as `i64`,
1002/// useful when cells may also contain invalid values (i.e. strings).
1003/// It applies the [`as_i64`] method to the cell value, and returns
1004/// `Ok(Ok(value_as_i64))` if successful or `Ok(Err(value_to_string))` if unsuccessful,
1005/// therefore never failing. This function is intended to be used with Serde's
1006/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1007pub fn deserialize_as_i64_or_string<'de, D>(
1008    deserializer: D,
1009) -> Result<Result<i64, String>, D::Error>
1010where
1011    D: Deserializer<'de>,
1012{
1013    let data = Data::deserialize(deserializer)?;
1014    Ok(data.as_i64().ok_or_else(|| data.to_string()))
1015}
1016
1017/// A helper function to deserialize cell values as `f64`,
1018/// useful when cells may also contain invalid values (i.e. strings).
1019/// It applies the [`as_f64`] method to the cell value, and returns
1020/// `Ok(Some(value_as_f64))` if successful or `Ok(None)` if unsuccessful,
1021/// therefore never failing. This function is intended to be used with Serde's
1022/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1023pub fn deserialize_as_f64_or_none<'de, D>(deserializer: D) -> Result<Option<f64>, D::Error>
1024where
1025    D: Deserializer<'de>,
1026{
1027    let data = Data::deserialize(deserializer)?;
1028    Ok(data.as_f64())
1029}
1030
1031/// A helper function to deserialize cell values as `f64`,
1032/// useful when cells may also contain invalid values (i.e. strings).
1033/// It applies the [`as_f64`] method to the cell value, and returns
1034/// `Ok(Ok(value_as_f64))` if successful or `Ok(Err(value_to_string))` if unsuccessful,
1035/// therefore never failing. This function is intended to be used with Serde's
1036/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1037pub fn deserialize_as_f64_or_string<'de, D>(
1038    deserializer: D,
1039) -> Result<Result<f64, String>, D::Error>
1040where
1041    D: Deserializer<'de>,
1042{
1043    let data = Data::deserialize(deserializer)?;
1044    Ok(data.as_f64().ok_or_else(|| data.to_string()))
1045}
1046
1047/// A helper function to deserialize cell values as `chrono::NaiveDate`,
1048/// useful when cells may also contain invalid values (i.e. strings).
1049/// It applies the [`as_date`] method to the cell value, and returns
1050/// `Ok(Some(value_as_date))` if successful or `Ok(None)` if unsuccessful,
1051/// therefore never failing. This function is intended to be used with Serde's
1052/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1053#[cfg(feature = "dates")]
1054pub fn deserialize_as_date_or_none<'de, D>(
1055    deserializer: D,
1056) -> Result<Option<chrono::NaiveDate>, D::Error>
1057where
1058    D: Deserializer<'de>,
1059{
1060    let data = Data::deserialize(deserializer)?;
1061    Ok(data.as_date())
1062}
1063
1064/// A helper function to deserialize cell values as `chrono::NaiveDate`,
1065/// useful when cells may also contain invalid values (i.e. strings).
1066/// It applies the [`as_date`] method to the cell value, and returns
1067/// `Ok(Ok(value_as_date))` if successful or `Ok(Err(value_to_string))` if unsuccessful,
1068/// therefore never failing. This function is intended to be used with Serde's
1069/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1070#[cfg(feature = "dates")]
1071pub fn deserialize_as_date_or_string<'de, D>(
1072    deserializer: D,
1073) -> Result<Result<chrono::NaiveDate, String>, D::Error>
1074where
1075    D: Deserializer<'de>,
1076{
1077    let data = Data::deserialize(deserializer)?;
1078    Ok(data.as_date().ok_or_else(|| data.to_string()))
1079}
1080
1081/// A helper function to deserialize cell values as `chrono::NaiveTime`,
1082/// useful when cells may also contain invalid values (i.e. strings).
1083/// It applies the [`as_time`] method to the cell value, and returns
1084/// `Ok(Some(value_as_time))` if successful or `Ok(None)` if unsuccessful,
1085/// therefore never failing. This function is intended to be used with Serde's
1086/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1087#[cfg(feature = "dates")]
1088pub fn deserialize_as_time_or_none<'de, D>(
1089    deserializer: D,
1090) -> Result<Option<chrono::NaiveTime>, D::Error>
1091where
1092    D: Deserializer<'de>,
1093{
1094    let data = Data::deserialize(deserializer)?;
1095    Ok(data.as_time())
1096}
1097
1098/// A helper function to deserialize cell values as `chrono::NaiveTime`,
1099/// useful when cells may also contain invalid values (i.e. strings).
1100/// It applies the [`as_time`] method to the cell value, and returns
1101/// `Ok(Ok(value_as_time))` if successful or `Ok(Err(value_to_string))` if unsuccessful,
1102/// therefore never failing. This function is intended to be used with Serde's
1103/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1104#[cfg(feature = "dates")]
1105pub fn deserialize_as_time_or_string<'de, D>(
1106    deserializer: D,
1107) -> Result<Result<chrono::NaiveTime, String>, D::Error>
1108where
1109    D: Deserializer<'de>,
1110{
1111    let data = Data::deserialize(deserializer)?;
1112    Ok(data.as_time().ok_or_else(|| data.to_string()))
1113}
1114
1115/// A helper function to deserialize cell values as `chrono::Duration`,
1116/// useful when cells may also contain invalid values (i.e. strings).
1117/// It applies the [`as_duration`] method to the cell value, and returns
1118/// `Ok(Some(value_as_duration))` if successful or `Ok(None)` if unsuccessful,
1119/// therefore never failing. This function is intended to be used with Serde's
1120/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1121#[cfg(feature = "dates")]
1122pub fn deserialize_as_duration_or_none<'de, D>(
1123    deserializer: D,
1124) -> Result<Option<chrono::Duration>, D::Error>
1125where
1126    D: Deserializer<'de>,
1127{
1128    let data = Data::deserialize(deserializer)?;
1129    Ok(data.as_duration())
1130}
1131
1132/// A helper function to deserialize cell values as `chrono::Duration`,
1133/// useful when cells may also contain invalid values (i.e. strings).
1134/// It applies the [`as_duration`] method to the cell value, and returns
1135/// `Ok(Ok(value_as_duration))` if successful or `Ok(Err(value_to_string))` if unsuccessful,
1136/// therefore never failing. This function is intended to be used with Serde's
1137/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1138#[cfg(feature = "dates")]
1139pub fn deserialize_as_duration_or_string<'de, D>(
1140    deserializer: D,
1141) -> Result<Result<chrono::Duration, String>, D::Error>
1142where
1143    D: Deserializer<'de>,
1144{
1145    let data = Data::deserialize(deserializer)?;
1146    Ok(data.as_duration().ok_or_else(|| data.to_string()))
1147}
1148
1149/// A helper function to deserialize cell values as `chrono::NaiveDateTime`,
1150/// useful when cells may also contain invalid values (i.e. strings).
1151/// It applies the [`as_datetime`] method to the cell value, and returns
1152/// `Ok(Some(value_as_datetime))` if successful or `Ok(None)` if unsuccessful,
1153/// therefore never failing. This function is intended to be used with Serde's
1154/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1155#[cfg(feature = "dates")]
1156pub fn deserialize_as_datetime_or_none<'de, D>(
1157    deserializer: D,
1158) -> Result<Option<chrono::NaiveDateTime>, D::Error>
1159where
1160    D: Deserializer<'de>,
1161{
1162    let data = Data::deserialize(deserializer)?;
1163    Ok(data.as_datetime())
1164}
1165
1166/// A helper function to deserialize cell values as `chrono::NaiveDateTime`,
1167/// useful when cells may also contain invalid values (i.e. strings).
1168/// It applies the [`as_datetime`] method to the cell value, and returns
1169/// `Ok(Ok(value_as_datetime))` if successful or `Ok(Err(value_to_string))` if unsuccessful,
1170/// therefore never failing. This function is intended to be used with Serde's
1171/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1172#[cfg(feature = "dates")]
1173pub fn deserialize_as_datetime_or_string<'de, D>(
1174    deserializer: D,
1175) -> Result<Result<chrono::NaiveDateTime, String>, D::Error>
1176where
1177    D: Deserializer<'de>,
1178{
1179    let data = Data::deserialize(deserializer)?;
1180    Ok(data.as_datetime().ok_or_else(|| data.to_string()))
1181}