calamine/lib.rs
1//! Rust Excel/OpenDocument reader
2//!
3//! # Status
4//!
5//! **calamine** is a pure Rust library to read Excel and OpenDocument Spreadsheet files.
6//!
7//! Read both cell values and vba project.
8//!
9//! # Examples
10//! ```
11//! use calamine::{Reader, open_workbook, Xlsx, Data};
12//!
13//! // opens a new workbook
14//! # let path = format!("{}/tests/issue3.xlsm", env!("CARGO_MANIFEST_DIR"));
15//! let mut workbook: Xlsx<_> = open_workbook(path).expect("Cannot open file");
16//!
17//! // Read whole worksheet data and provide some statistics
18//! if let Ok(range) = workbook.worksheet_range("Sheet1") {
19//! let total_cells = range.get_size().0 * range.get_size().1;
20//! let non_empty_cells: usize = range.used_cells().count();
21//! println!("Found {} cells in 'Sheet1', including {} non empty cells",
22//! total_cells, non_empty_cells);
23//! // alternatively, we can manually filter rows
24//! assert_eq!(non_empty_cells, range.rows()
25//! .flat_map(|r| r.iter().filter(|&c| c != &Data::Empty)).count());
26//! }
27//!
28//! // Check if the workbook has a vba project
29//! if let Some(Ok(mut vba)) = workbook.vba_project() {
30//! let vba = vba.to_mut();
31//! let module1 = vba.get_module("Module 1").unwrap();
32//! println!("Module 1 code:");
33//! println!("{}", module1);
34//! for r in vba.get_references() {
35//! if r.is_missing() {
36//! println!("Reference {} is broken or not accessible", r.name);
37//! }
38//! }
39//! }
40//!
41//! // You can also get defined names definition (string representation only)
42//! for name in workbook.defined_names() {
43//! println!("name: {}, formula: {}", name.0, name.1);
44//! }
45//!
46//! // Now get all formula!
47//! let sheets = workbook.sheet_names().to_owned();
48//! for s in sheets {
49//! println!("found {} formula in '{}'",
50//! workbook
51//! .worksheet_formula(&s)
52//! .expect("error while getting formula")
53//! .rows().flat_map(|r| r.iter().filter(|f| !f.is_empty()))
54//! .count(),
55//! s);
56//! }
57//! ```
58#![deny(missing_docs)]
59
60#[macro_use]
61mod utils;
62
63mod auto;
64mod cfb;
65mod datatype;
66mod formats;
67mod ods;
68mod xls;
69mod xlsb;
70mod xlsx;
71
72mod de;
73mod errors;
74pub mod vba;
75
76use serde::de::{Deserialize, DeserializeOwned, Deserializer};
77use std::borrow::Cow;
78use std::cmp::{max, min};
79use std::fmt;
80use std::fs::File;
81use std::io::{BufReader, Read, Seek};
82use std::ops::{Index, IndexMut};
83use std::path::Path;
84
85pub use crate::auto::{open_workbook_auto, open_workbook_auto_from_rs, Sheets};
86pub use crate::datatype::{Data, DataRef, DataType, ExcelDateTime, ExcelDateTimeType};
87pub use crate::de::{DeError, RangeDeserializer, RangeDeserializerBuilder, ToCellDeserializer};
88pub use crate::errors::Error;
89pub use crate::ods::{Ods, OdsError};
90pub use crate::xls::{Xls, XlsError, XlsOptions};
91pub use crate::xlsb::{Xlsb, XlsbError};
92pub use crate::xlsx::{Xlsx, XlsxError};
93
94use crate::vba::VbaProject;
95
96// https://msdn.microsoft.com/en-us/library/office/ff839168.aspx
97/// An enum to represent all different errors that can appear as
98/// a value in a worksheet cell
99#[derive(Debug, Clone, PartialEq)]
100pub enum CellErrorType {
101 /// Division by 0 error
102 Div0,
103 /// Unavailable value error
104 NA,
105 /// Invalid name error
106 Name,
107 /// Null value error
108 Null,
109 /// Number error
110 Num,
111 /// Invalid cell reference error
112 Ref,
113 /// Value error
114 Value,
115 /// Getting data
116 GettingData,
117}
118
119impl fmt::Display for CellErrorType {
120 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
121 match *self {
122 CellErrorType::Div0 => write!(f, "#DIV/0!"),
123 CellErrorType::NA => write!(f, "#N/A"),
124 CellErrorType::Name => write!(f, "#NAME?"),
125 CellErrorType::Null => write!(f, "#NULL!"),
126 CellErrorType::Num => write!(f, "#NUM!"),
127 CellErrorType::Ref => write!(f, "#REF!"),
128 CellErrorType::Value => write!(f, "#VALUE!"),
129 CellErrorType::GettingData => write!(f, "#DATA!"),
130 }
131 }
132}
133
134/// Dimensions info
135#[derive(Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Copy, Clone)]
136pub struct Dimensions {
137 /// start: (row, col)
138 pub start: (u32, u32),
139 /// end: (row, col)
140 pub end: (u32, u32),
141}
142
143#[allow(clippy::len_without_is_empty)]
144impl Dimensions {
145 /// create dimensions info with start position and end position
146 pub fn new(start: (u32, u32), end: (u32, u32)) -> Self {
147 Self { start, end }
148 }
149 /// check if a position is in it
150 pub fn contains(&self, row: u32, col: u32) -> bool {
151 row >= self.start.0 && row <= self.end.0 && col >= self.start.1 && col <= self.end.1
152 }
153 /// len
154 pub fn len(&self) -> u64 {
155 (self.end.0 - self.start.0 + 1) as u64 * (self.end.1 - self.start.1 + 1) as u64
156 }
157}
158
159/// Common file metadata
160///
161/// Depending on file type, some extra information may be stored
162/// in the Reader implementations
163#[derive(Debug, Default)]
164pub struct Metadata {
165 sheets: Vec<Sheet>,
166 /// Map of sheet names/sheet path within zip archive
167 names: Vec<(String, String)>,
168}
169
170/// Type of sheet
171///
172/// Only Excel formats support this. Default value for ODS is SheetType::WorkSheet.
173/// https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/b9ec509a-235d-424e-871d-f8e721106501
174/// https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xlsb/1edadf56-b5cd-4109-abe7-76651bbe2722
175/// [ECMA-376 Part 1](https://www.ecma-international.org/publications-and-standards/standards/ecma-376/) 12.3.2, 12.3.7 and 12.3.24
176#[derive(Debug, Clone, Copy, PartialEq)]
177pub enum SheetType {
178 /// WorkSheet
179 WorkSheet,
180 /// DialogSheet
181 DialogSheet,
182 /// MacroSheet
183 MacroSheet,
184 /// ChartSheet
185 ChartSheet,
186 /// VBA module
187 Vba,
188}
189
190/// Type of visible sheet
191///
192/// http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1417896_253892949
193/// https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/b9ec509a-235d-424e-871d-f8e721106501
194/// https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xlsb/74cb1d22-b931-4bf8-997d-17517e2416e9
195/// [ECMA-376 Part 1](https://www.ecma-international.org/publications-and-standards/standards/ecma-376/) 18.18.68
196#[derive(Debug, Clone, Copy, PartialEq)]
197pub enum SheetVisible {
198 /// Visible
199 Visible,
200 /// Hidden
201 Hidden,
202 /// The sheet is hidden and cannot be displayed using the user interface. It is supported only by Excel formats.
203 VeryHidden,
204}
205
206/// Metadata of sheet
207#[derive(Debug, Clone, PartialEq)]
208pub struct Sheet {
209 /// Name
210 pub name: String,
211 /// Type
212 /// Only Excel formats support this. Default value for ODS is SheetType::WorkSheet.
213 pub typ: SheetType,
214 /// Visible
215 pub visible: SheetVisible,
216}
217
218/// Row to use as header
219/// By default, the first non-empty row is used as header
220#[derive(Debug, Clone, Copy)]
221#[non_exhaustive]
222pub enum HeaderRow {
223 /// First non-empty row
224 FirstNonEmptyRow,
225 /// Index of the header row
226 Row(u32),
227}
228
229impl Default for HeaderRow {
230 fn default() -> Self {
231 HeaderRow::FirstNonEmptyRow
232 }
233}
234
235// FIXME `Reader` must only be seek `Seek` for `Xls::xls`. Because of the present API this limits
236// the kinds of readers (other) data in formats can be read from.
237/// A trait to share spreadsheets reader functions across different `FileType`s
238pub trait Reader<RS>: Sized
239where
240 RS: Read + Seek,
241{
242 /// Error specific to file type
243 type Error: std::fmt::Debug + From<std::io::Error>;
244
245 /// Creates a new instance.
246 fn new(reader: RS) -> Result<Self, Self::Error>;
247
248 /// Set header row (i.e. first row to be read)
249 /// If `header_row` is `None`, the first non-empty row will be used as header row
250 fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self;
251
252 /// Gets `VbaProject`
253 fn vba_project(&mut self) -> Option<Result<Cow<'_, VbaProject>, Self::Error>>;
254
255 /// Initialize
256 fn metadata(&self) -> &Metadata;
257
258 /// Read worksheet data in corresponding worksheet path
259 fn worksheet_range(&mut self, name: &str) -> Result<Range<Data>, Self::Error>;
260
261 /// Fetch all worksheet data & paths
262 fn worksheets(&mut self) -> Vec<(String, Range<Data>)>;
263
264 /// Read worksheet formula in corresponding worksheet path
265 fn worksheet_formula(&mut self, _: &str) -> Result<Range<String>, Self::Error>;
266
267 /// Get all sheet names of this workbook, in workbook order
268 ///
269 /// # Examples
270 /// ```
271 /// use calamine::{Xlsx, open_workbook, Reader};
272 ///
273 /// # let path = format!("{}/tests/issue3.xlsm", env!("CARGO_MANIFEST_DIR"));
274 /// let mut workbook: Xlsx<_> = open_workbook(path).unwrap();
275 /// println!("Sheets: {:#?}", workbook.sheet_names());
276 /// ```
277 fn sheet_names(&self) -> Vec<String> {
278 self.metadata()
279 .sheets
280 .iter()
281 .map(|s| s.name.to_owned())
282 .collect()
283 }
284
285 /// Fetch all sheets metadata
286 fn sheets_metadata(&self) -> &[Sheet] {
287 &self.metadata().sheets
288 }
289
290 /// Get all defined names (Ranges names etc)
291 fn defined_names(&self) -> &[(String, String)] {
292 &self.metadata().names
293 }
294
295 /// Get the nth worksheet. Shortcut for getting the nth
296 /// sheet_name, then the corresponding worksheet.
297 fn worksheet_range_at(&mut self, n: usize) -> Option<Result<Range<Data>, Self::Error>> {
298 let name = self.sheet_names().get(n)?.to_string();
299 Some(self.worksheet_range(&name))
300 }
301
302 /// Get all pictures, tuple as (ext: String, data: Vec<u8>)
303 #[cfg(feature = "picture")]
304 fn pictures(&self) -> Option<Vec<(String, Vec<u8>)>>;
305}
306
307/// A trait to share spreadsheets reader functions across different `FileType`s
308pub trait ReaderRef<RS>: Reader<RS>
309where
310 RS: Read + Seek,
311{
312 /// Get worksheet range where shared string values are only borrowed.
313 ///
314 /// This is implemented only for [`calamine::Xlsb`] and [`calamine::Xlsx`], as Xls and Ods formats
315 /// do not support lazy iteration.
316 fn worksheet_range_ref<'a>(&'a mut self, name: &str)
317 -> Result<Range<DataRef<'a>>, Self::Error>;
318
319 /// Get the nth worksheet range where shared string values are only borrowed. Shortcut for getting the nth
320 /// sheet_name, then the corresponding worksheet.
321 ///
322 /// This is implemented only for [`calamine::Xlsb`] and [`calamine::Xlsx`], as Xls and Ods formats
323 /// do not support lazy iteration.
324 fn worksheet_range_at_ref(&mut self, n: usize) -> Option<Result<Range<DataRef>, Self::Error>> {
325 let name = self.sheet_names().get(n)?.to_string();
326 Some(self.worksheet_range_ref(&name))
327 }
328}
329
330/// Convenient function to open a file with a BufReader<File>
331pub fn open_workbook<R, P>(path: P) -> Result<R, R::Error>
332where
333 R: Reader<BufReader<File>>,
334 P: AsRef<Path>,
335{
336 let file = BufReader::new(File::open(path)?);
337 R::new(file)
338}
339
340/// Convenient function to open a file with a BufReader<File>
341pub fn open_workbook_from_rs<R, RS>(rs: RS) -> Result<R, R::Error>
342where
343 RS: Read + Seek,
344 R: Reader<RS>,
345{
346 R::new(rs)
347}
348
349/// A trait to constrain cells
350pub trait CellType: Default + Clone + PartialEq {}
351
352impl CellType for Data {}
353impl<'a> CellType for DataRef<'a> {}
354impl CellType for String {}
355impl CellType for usize {} // for tests
356
357/// A struct to hold cell position and value
358#[derive(Debug, Clone)]
359pub struct Cell<T: CellType> {
360 /// Position for the cell (row, column)
361 pos: (u32, u32),
362 /// Value for the cell
363 val: T,
364}
365
366impl<T: CellType> Cell<T> {
367 /// Creates a new `Cell`
368 pub fn new(position: (u32, u32), value: T) -> Cell<T> {
369 Cell {
370 pos: position,
371 val: value,
372 }
373 }
374
375 /// Gets `Cell` position
376 pub fn get_position(&self) -> (u32, u32) {
377 self.pos
378 }
379
380 /// Gets `Cell` value
381 pub fn get_value(&self) -> &T {
382 &self.val
383 }
384}
385
386/// A struct which represents a squared selection of cells
387#[derive(Debug, Default, Clone)]
388pub struct Range<T> {
389 start: (u32, u32),
390 end: (u32, u32),
391 inner: Vec<T>,
392}
393
394impl<T: CellType> Range<T> {
395 /// Creates a new non-empty `Range`
396 ///
397 /// When possible, prefer the more efficient `Range::from_sparse`
398 ///
399 /// # Panics
400 ///
401 /// Panics if start.0 > end.0 or start.1 > end.1
402 #[inline]
403 pub fn new(start: (u32, u32), end: (u32, u32)) -> Range<T> {
404 assert!(start <= end, "invalid range bounds");
405 Range {
406 start,
407 end,
408 inner: vec![T::default(); ((end.0 - start.0 + 1) * (end.1 - start.1 + 1)) as usize],
409 }
410 }
411
412 /// Creates a new empty range
413 #[inline]
414 pub fn empty() -> Range<T> {
415 Range {
416 start: (0, 0),
417 end: (0, 0),
418 inner: Vec::new(),
419 }
420 }
421
422 /// Get top left cell position (row, column)
423 #[inline]
424 pub fn start(&self) -> Option<(u32, u32)> {
425 if self.is_empty() {
426 None
427 } else {
428 Some(self.start)
429 }
430 }
431
432 /// Get bottom right cell position (row, column)
433 #[inline]
434 pub fn end(&self) -> Option<(u32, u32)> {
435 if self.is_empty() {
436 None
437 } else {
438 Some(self.end)
439 }
440 }
441
442 /// Get column width
443 #[inline]
444 pub fn width(&self) -> usize {
445 if self.is_empty() {
446 0
447 } else {
448 (self.end.1 - self.start.1 + 1) as usize
449 }
450 }
451
452 /// Get column height
453 #[inline]
454 pub fn height(&self) -> usize {
455 if self.is_empty() {
456 0
457 } else {
458 (self.end.0 - self.start.0 + 1) as usize
459 }
460 }
461
462 /// Get size in (height, width) format
463 #[inline]
464 pub fn get_size(&self) -> (usize, usize) {
465 (self.height(), self.width())
466 }
467
468 /// Is range empty
469 #[inline]
470 pub fn is_empty(&self) -> bool {
471 self.inner.is_empty()
472 }
473
474 /// Creates a `Range` from a coo sparse vector of `Cell`s.
475 ///
476 /// Coordinate list (COO) is the natural way cells are stored
477 /// Inner size is defined only by non empty.
478 ///
479 /// cells: `Vec` of non empty `Cell`s, sorted by row
480 ///
481 /// # Panics
482 ///
483 /// panics when a `Cell` row is lower than the first `Cell` row or
484 /// bigger than the last `Cell` row.
485 pub fn from_sparse(cells: Vec<Cell<T>>) -> Range<T> {
486 if cells.is_empty() {
487 Range::empty()
488 } else {
489 // search bounds
490 let row_start = cells.first().unwrap().pos.0;
491 let row_end = cells.last().unwrap().pos.0;
492 let mut col_start = u32::MAX;
493 let mut col_end = 0;
494 for c in cells.iter().map(|c| c.pos.1) {
495 if c < col_start {
496 col_start = c;
497 }
498 if c > col_end {
499 col_end = c
500 }
501 }
502 let cols = (col_end - col_start + 1) as usize;
503 let rows = (row_end - row_start + 1) as usize;
504 let len = cols.saturating_mul(rows);
505 let mut v = vec![T::default(); len];
506 v.shrink_to_fit();
507 for c in cells {
508 let row = (c.pos.0 - row_start) as usize;
509 let col = (c.pos.1 - col_start) as usize;
510 let idx = row.saturating_mul(cols) + col;
511 if let Some(v) = v.get_mut(idx) {
512 *v = c.val;
513 }
514 }
515 Range {
516 start: (row_start, col_start),
517 end: (row_end, col_end),
518 inner: v,
519 }
520 }
521 }
522
523 /// Set inner value from absolute position
524 ///
525 /// # Remarks
526 ///
527 /// Will try to resize inner structure if the value is out of bounds.
528 /// For relative positions, use Index trait
529 ///
530 /// Try to avoid this method as much as possible and prefer initializing
531 /// the `Range` with `from_sparse` constructor.
532 ///
533 /// # Panics
534 ///
535 /// If absolute_position > Cell start
536 ///
537 /// # Examples
538 /// ```
539 /// use calamine::{Range, Data};
540 ///
541 /// let mut range = Range::new((0, 0), (5, 2));
542 /// assert_eq!(range.get_value((2, 1)), Some(&Data::Empty));
543 /// range.set_value((2, 1), Data::Float(1.0));
544 /// assert_eq!(range.get_value((2, 1)), Some(&Data::Float(1.0)));
545 /// ```
546 pub fn set_value(&mut self, absolute_position: (u32, u32), value: T) {
547 assert!(
548 self.start.0 <= absolute_position.0 && self.start.1 <= absolute_position.1,
549 "absolute_position out of bounds"
550 );
551
552 // check if we need to change range dimension (strangely happens sometimes ...)
553 match (
554 self.end.0 < absolute_position.0,
555 self.end.1 < absolute_position.1,
556 ) {
557 (false, false) => (), // regular case, position within bounds
558 (true, false) => {
559 let len = (absolute_position.0 - self.end.0 + 1) as usize * self.width();
560 self.inner.extend_from_slice(&vec![T::default(); len]);
561 self.end.0 = absolute_position.0;
562 }
563 // missing some rows
564 (e, true) => {
565 let height = if e {
566 (absolute_position.0 - self.start.0 + 1) as usize
567 } else {
568 self.height()
569 };
570 let width = (absolute_position.1 - self.start.1 + 1) as usize;
571 let old_width = self.width();
572 let mut data = Vec::with_capacity(width * height);
573 let empty = vec![T::default(); width - old_width];
574 for sce in self.inner.chunks(old_width) {
575 data.extend_from_slice(sce);
576 data.extend_from_slice(&empty);
577 }
578 data.extend_from_slice(&vec![T::default(); width * (height - self.height())]);
579 if e {
580 self.end = absolute_position
581 } else {
582 self.end.1 = absolute_position.1
583 }
584 self.inner = data;
585 } // missing some columns
586 }
587
588 let pos = (
589 absolute_position.0 - self.start.0,
590 absolute_position.1 - self.start.1,
591 );
592 let idx = pos.0 as usize * self.width() + pos.1 as usize;
593 self.inner[idx] = value;
594 }
595
596 /// Get cell value from **absolute position**.
597 ///
598 /// If the `absolute_position` is out of range, returns `None`, else returns the cell value.
599 /// The coordinate format is (row, column).
600 ///
601 /// # Warnings
602 ///
603 /// For relative positions, use Index trait
604 ///
605 /// # Remarks
606 ///
607 /// Absolute position is in *sheet* referential while relative position is in *range* referential.
608 ///
609 /// For instance if we consider range *C2:H38*:
610 /// - `(0, 0)` absolute is "A1" and thus this function returns `None`
611 /// - `(0, 0)` relative is "C2" and is returned by the `Index` trait (i.e `my_range[(0, 0)]`)
612 ///
613 /// # Examples
614 /// ```
615 /// use calamine::{Range, Data};
616 ///
617 /// let range: Range<usize> = Range::new((1, 0), (5, 2));
618 /// assert_eq!(range.get_value((0, 0)), None);
619 /// assert_eq!(range[(0, 0)], 0);
620 /// ```
621 pub fn get_value(&self, absolute_position: (u32, u32)) -> Option<&T> {
622 let p = absolute_position;
623 if p.0 >= self.start.0 && p.0 <= self.end.0 && p.1 >= self.start.1 && p.1 <= self.end.1 {
624 return self.get((
625 (absolute_position.0 - self.start.0) as usize,
626 (absolute_position.1 - self.start.1) as usize,
627 ));
628 }
629 None
630 }
631
632 /// Get cell value from **relative position**.
633 ///
634 /// Unlike using the Index trait, this will not panic but rather yield `None` if out of range.
635 /// Otherwise, returns the cell value. The coordinate format is (row, column).
636 ///
637 pub fn get(&self, relative_position: (usize, usize)) -> Option<&T> {
638 let (row, col) = relative_position;
639 let (height, width) = self.get_size();
640 if col >= width || row >= height {
641 None
642 } else {
643 self.inner.get(row * width + col)
644 }
645 }
646
647 /// Get an iterator over inner rows
648 ///
649 /// # Examples
650 /// ```
651 /// use calamine::{Range, Data};
652 ///
653 /// let range: Range<Data> = Range::new((0, 0), (5, 2));
654 /// // with rows item row: &[Data]
655 /// assert_eq!(range.rows().map(|r| r.len()).sum::<usize>(), 18);
656 /// ```
657 pub fn rows(&self) -> Rows<'_, T> {
658 if self.inner.is_empty() {
659 Rows { inner: None }
660 } else {
661 let width = self.width();
662 Rows {
663 inner: Some(self.inner.chunks(width)),
664 }
665 }
666 }
667
668 /// Get an iterator over used cells only
669 pub fn used_cells(&self) -> UsedCells<'_, T> {
670 UsedCells {
671 width: self.width(),
672 inner: self.inner.iter().enumerate(),
673 }
674 }
675
676 /// Get an iterator over all cells in this range
677 pub fn cells(&self) -> Cells<'_, T> {
678 Cells {
679 width: self.width(),
680 inner: self.inner.iter().enumerate(),
681 }
682 }
683
684 /// Build a `RangeDeserializer` from this configuration.
685 ///
686 /// # Example
687 ///
688 /// ```
689 /// # use calamine::{Reader, Error, open_workbook, Xlsx, RangeDeserializerBuilder};
690 /// fn main() -> Result<(), Error> {
691 /// let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR"));
692 /// let mut workbook: Xlsx<_> = open_workbook(path)?;
693 /// let mut sheet = workbook.worksheet_range("Sheet1")?;
694 /// let mut iter = sheet.deserialize()?;
695 ///
696 /// if let Some(result) = iter.next() {
697 /// let (label, value): (String, f64) = result?;
698 /// assert_eq!(label, "celsius");
699 /// assert_eq!(value, 22.2222);
700 ///
701 /// Ok(())
702 /// } else {
703 /// return Err(From::from("expected at least one record but got none"));
704 /// }
705 /// }
706 /// ```
707 pub fn deserialize<'a, D>(&'a self) -> Result<RangeDeserializer<'a, T, D>, DeError>
708 where
709 T: ToCellDeserializer<'a>,
710 D: DeserializeOwned,
711 {
712 RangeDeserializerBuilder::new().from_range(self)
713 }
714
715 /// Build a new `Range` out of this range
716 ///
717 /// # Remarks
718 ///
719 /// Cells within this range will be cloned, cells out of it will be set to Empty
720 ///
721 /// # Example
722 ///
723 /// ```
724 /// # use calamine::{Range, Data};
725 /// let mut a = Range::new((1, 1), (3, 3));
726 /// a.set_value((1, 1), Data::Bool(true));
727 /// a.set_value((2, 2), Data::Bool(true));
728 ///
729 /// let b = a.range((2, 2), (5, 5));
730 /// assert_eq!(b.get_value((2, 2)), Some(&Data::Bool(true)));
731 /// assert_eq!(b.get_value((3, 3)), Some(&Data::Empty));
732 ///
733 /// let c = a.range((0, 0), (2, 2));
734 /// assert_eq!(c.get_value((0, 0)), Some(&Data::Empty));
735 /// assert_eq!(c.get_value((1, 1)), Some(&Data::Bool(true)));
736 /// assert_eq!(c.get_value((2, 2)), Some(&Data::Bool(true)));
737 /// ```
738 pub fn range(&self, start: (u32, u32), end: (u32, u32)) -> Range<T> {
739 let mut other = Range::new(start, end);
740 let (self_start_row, self_start_col) = self.start;
741 let (self_end_row, self_end_col) = self.end;
742 let (other_start_row, other_start_col) = other.start;
743 let (other_end_row, other_end_col) = other.end;
744
745 // copy data from self to other
746 let start_row = max(self_start_row, other_start_row);
747 let end_row = min(self_end_row, other_end_row);
748 let start_col = max(self_start_col, other_start_col);
749 let end_col = min(self_end_col, other_end_col);
750
751 if start_row > end_row || start_col > end_col {
752 return other;
753 }
754
755 let self_width = self.width();
756 let other_width = other.width();
757
758 // change referential
759 //
760 // we want to copy range: start_row..(end_row + 1)
761 // In self referential it is (start_row - self_start_row)..(end_row + 1 - self_start_row)
762 let self_row_start = (start_row - self_start_row) as usize;
763 let self_row_end = (end_row + 1 - self_start_row) as usize;
764 let self_col_start = (start_col - self_start_col) as usize;
765 let self_col_end = (end_col + 1 - self_start_col) as usize;
766
767 let other_row_start = (start_row - other_start_row) as usize;
768 let other_row_end = (end_row + 1 - other_start_row) as usize;
769 let other_col_start = (start_col - other_start_col) as usize;
770 let other_col_end = (end_col + 1 - other_start_col) as usize;
771
772 {
773 let self_rows = self
774 .inner
775 .chunks(self_width)
776 .take(self_row_end)
777 .skip(self_row_start);
778
779 let other_rows = other
780 .inner
781 .chunks_mut(other_width)
782 .take(other_row_end)
783 .skip(other_row_start);
784
785 for (self_row, other_row) in self_rows.zip(other_rows) {
786 let self_cols = &self_row[self_col_start..self_col_end];
787 let other_cols = &mut other_row[other_col_start..other_col_end];
788 other_cols.clone_from_slice(self_cols);
789 }
790 }
791
792 other
793 }
794}
795
796impl<T: CellType + fmt::Display> Range<T> {
797 /// Get range headers.
798 ///
799 /// # Examples
800 /// ```
801 /// use calamine::{Range, Data};
802 ///
803 /// let mut range = Range::new((0, 0), (5, 2));
804 /// range.set_value((0, 0), Data::String(String::from("a")));
805 /// range.set_value((0, 1), Data::Int(1));
806 /// range.set_value((0, 2), Data::Bool(true));
807 /// let headers = range.headers();
808 /// assert_eq!(
809 /// headers,
810 /// Some(vec![
811 /// String::from("a"),
812 /// String::from("1"),
813 /// String::from("true")
814 /// ])
815 /// );
816 /// ```
817 pub fn headers(&self) -> Option<Vec<String>> {
818 self.rows()
819 .next()
820 .map(|row| row.iter().map(ToString::to_string).collect())
821 }
822}
823
824impl<T: CellType> Index<usize> for Range<T> {
825 type Output = [T];
826 fn index(&self, index: usize) -> &[T] {
827 let width = self.width();
828 &self.inner[index * width..(index + 1) * width]
829 }
830}
831
832impl<T: CellType> Index<(usize, usize)> for Range<T> {
833 type Output = T;
834 fn index(&self, index: (usize, usize)) -> &T {
835 let (height, width) = self.get_size();
836 assert!(index.1 < width && index.0 < height, "index out of bounds");
837 &self.inner[index.0 * width + index.1]
838 }
839}
840
841impl<T: CellType> IndexMut<usize> for Range<T> {
842 fn index_mut(&mut self, index: usize) -> &mut [T] {
843 let width = self.width();
844 &mut self.inner[index * width..(index + 1) * width]
845 }
846}
847
848impl<T: CellType> IndexMut<(usize, usize)> for Range<T> {
849 fn index_mut(&mut self, index: (usize, usize)) -> &mut T {
850 let (height, width) = self.get_size();
851 assert!(index.1 < width && index.0 < height, "index out of bounds");
852 &mut self.inner[index.0 * width + index.1]
853 }
854}
855
856/// A struct to iterate over all cells
857#[derive(Clone, Debug)]
858pub struct Cells<'a, T: CellType> {
859 width: usize,
860 inner: std::iter::Enumerate<std::slice::Iter<'a, T>>,
861}
862
863impl<'a, T: 'a + CellType> Iterator for Cells<'a, T> {
864 type Item = (usize, usize, &'a T);
865 fn next(&mut self) -> Option<Self::Item> {
866 self.inner.next().map(|(i, v)| {
867 let row = i / self.width;
868 let col = i % self.width;
869 (row, col, v)
870 })
871 }
872 fn size_hint(&self) -> (usize, Option<usize>) {
873 self.inner.size_hint()
874 }
875}
876
877impl<'a, T: 'a + CellType> DoubleEndedIterator for Cells<'a, T> {
878 fn next_back(&mut self) -> Option<Self::Item> {
879 self.inner.next_back().map(|(i, v)| {
880 let row = i / self.width;
881 let col = i % self.width;
882 (row, col, v)
883 })
884 }
885}
886
887impl<'a, T: 'a + CellType> ExactSizeIterator for Cells<'a, T> {}
888
889/// A struct to iterate over used cells
890#[derive(Clone, Debug)]
891pub struct UsedCells<'a, T: CellType> {
892 width: usize,
893 inner: std::iter::Enumerate<std::slice::Iter<'a, T>>,
894}
895
896impl<'a, T: 'a + CellType> Iterator for UsedCells<'a, T> {
897 type Item = (usize, usize, &'a T);
898 fn next(&mut self) -> Option<Self::Item> {
899 self.inner
900 .by_ref()
901 .find(|&(_, v)| v != &T::default())
902 .map(|(i, v)| {
903 let row = i / self.width;
904 let col = i % self.width;
905 (row, col, v)
906 })
907 }
908 fn size_hint(&self) -> (usize, Option<usize>) {
909 let (_, up) = self.inner.size_hint();
910 (0, up)
911 }
912}
913
914impl<'a, T: 'a + CellType> DoubleEndedIterator for UsedCells<'a, T> {
915 fn next_back(&mut self) -> Option<Self::Item> {
916 self.inner
917 .by_ref()
918 .rfind(|&(_, v)| v != &T::default())
919 .map(|(i, v)| {
920 let row = i / self.width;
921 let col = i % self.width;
922 (row, col, v)
923 })
924 }
925}
926
927/// An iterator to read `Range` struct row by row
928#[derive(Clone, Debug)]
929pub struct Rows<'a, T: CellType> {
930 inner: Option<std::slice::Chunks<'a, T>>,
931}
932
933impl<'a, T: 'a + CellType> Iterator for Rows<'a, T> {
934 type Item = &'a [T];
935 fn next(&mut self) -> Option<Self::Item> {
936 self.inner.as_mut().and_then(std::iter::Iterator::next)
937 }
938 fn size_hint(&self) -> (usize, Option<usize>) {
939 self.inner
940 .as_ref()
941 .map_or((0, Some(0)), std::iter::Iterator::size_hint)
942 }
943}
944
945impl<'a, T: 'a + CellType> DoubleEndedIterator for Rows<'a, T> {
946 fn next_back(&mut self) -> Option<Self::Item> {
947 self.inner
948 .as_mut()
949 .and_then(std::iter::DoubleEndedIterator::next_back)
950 }
951}
952
953impl<'a, T: 'a + CellType> ExactSizeIterator for Rows<'a, T> {}
954
955/// Struct with the key elements of a table
956pub struct Table<T> {
957 pub(crate) name: String,
958 pub(crate) sheet_name: String,
959 pub(crate) columns: Vec<String>,
960 pub(crate) data: Range<T>,
961}
962impl<T> Table<T> {
963 /// Get the name of the table
964 pub fn name(&self) -> &str {
965 &self.name
966 }
967 /// Get the name of the sheet that table exists within
968 pub fn sheet_name(&self) -> &str {
969 &self.sheet_name
970 }
971 /// Get the names of the columns in the order they occur
972 pub fn columns(&self) -> &[String] {
973 &self.columns
974 }
975 /// Get a range representing the data from the table (excludes column headers)
976 pub fn data(&self) -> &Range<T> {
977 &self.data
978 }
979}
980
981impl<T: CellType> From<Table<T>> for Range<T> {
982 fn from(table: Table<T>) -> Range<T> {
983 table.data
984 }
985}
986
987/// A helper function to deserialize cell values as `i64`,
988/// useful when cells may also contain invalid values (i.e. strings).
989/// It applies the [`as_i64`] method to the cell value, and returns
990/// `Ok(Some(value_as_i64))` if successful or `Ok(None)` if unsuccessful,
991/// therefore never failing. This function is intended to be used with Serde's
992/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
993pub fn deserialize_as_i64_or_none<'de, D>(deserializer: D) -> Result<Option<i64>, D::Error>
994where
995 D: Deserializer<'de>,
996{
997 let data = Data::deserialize(deserializer)?;
998 Ok(data.as_i64())
999}
1000
1001/// A helper function to deserialize cell values as `i64`,
1002/// useful when cells may also contain invalid values (i.e. strings).
1003/// It applies the [`as_i64`] method to the cell value, and returns
1004/// `Ok(Ok(value_as_i64))` if successful or `Ok(Err(value_to_string))` if unsuccessful,
1005/// therefore never failing. This function is intended to be used with Serde's
1006/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1007pub fn deserialize_as_i64_or_string<'de, D>(
1008 deserializer: D,
1009) -> Result<Result<i64, String>, D::Error>
1010where
1011 D: Deserializer<'de>,
1012{
1013 let data = Data::deserialize(deserializer)?;
1014 Ok(data.as_i64().ok_or_else(|| data.to_string()))
1015}
1016
1017/// A helper function to deserialize cell values as `f64`,
1018/// useful when cells may also contain invalid values (i.e. strings).
1019/// It applies the [`as_f64`] method to the cell value, and returns
1020/// `Ok(Some(value_as_f64))` if successful or `Ok(None)` if unsuccessful,
1021/// therefore never failing. This function is intended to be used with Serde's
1022/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1023pub fn deserialize_as_f64_or_none<'de, D>(deserializer: D) -> Result<Option<f64>, D::Error>
1024where
1025 D: Deserializer<'de>,
1026{
1027 let data = Data::deserialize(deserializer)?;
1028 Ok(data.as_f64())
1029}
1030
1031/// A helper function to deserialize cell values as `f64`,
1032/// useful when cells may also contain invalid values (i.e. strings).
1033/// It applies the [`as_f64`] method to the cell value, and returns
1034/// `Ok(Ok(value_as_f64))` if successful or `Ok(Err(value_to_string))` if unsuccessful,
1035/// therefore never failing. This function is intended to be used with Serde's
1036/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1037pub fn deserialize_as_f64_or_string<'de, D>(
1038 deserializer: D,
1039) -> Result<Result<f64, String>, D::Error>
1040where
1041 D: Deserializer<'de>,
1042{
1043 let data = Data::deserialize(deserializer)?;
1044 Ok(data.as_f64().ok_or_else(|| data.to_string()))
1045}
1046
1047/// A helper function to deserialize cell values as `chrono::NaiveDate`,
1048/// useful when cells may also contain invalid values (i.e. strings).
1049/// It applies the [`as_date`] method to the cell value, and returns
1050/// `Ok(Some(value_as_date))` if successful or `Ok(None)` if unsuccessful,
1051/// therefore never failing. This function is intended to be used with Serde's
1052/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1053#[cfg(feature = "dates")]
1054pub fn deserialize_as_date_or_none<'de, D>(
1055 deserializer: D,
1056) -> Result<Option<chrono::NaiveDate>, D::Error>
1057where
1058 D: Deserializer<'de>,
1059{
1060 let data = Data::deserialize(deserializer)?;
1061 Ok(data.as_date())
1062}
1063
1064/// A helper function to deserialize cell values as `chrono::NaiveDate`,
1065/// useful when cells may also contain invalid values (i.e. strings).
1066/// It applies the [`as_date`] method to the cell value, and returns
1067/// `Ok(Ok(value_as_date))` if successful or `Ok(Err(value_to_string))` if unsuccessful,
1068/// therefore never failing. This function is intended to be used with Serde's
1069/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1070#[cfg(feature = "dates")]
1071pub fn deserialize_as_date_or_string<'de, D>(
1072 deserializer: D,
1073) -> Result<Result<chrono::NaiveDate, String>, D::Error>
1074where
1075 D: Deserializer<'de>,
1076{
1077 let data = Data::deserialize(deserializer)?;
1078 Ok(data.as_date().ok_or_else(|| data.to_string()))
1079}
1080
1081/// A helper function to deserialize cell values as `chrono::NaiveTime`,
1082/// useful when cells may also contain invalid values (i.e. strings).
1083/// It applies the [`as_time`] method to the cell value, and returns
1084/// `Ok(Some(value_as_time))` if successful or `Ok(None)` if unsuccessful,
1085/// therefore never failing. This function is intended to be used with Serde's
1086/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1087#[cfg(feature = "dates")]
1088pub fn deserialize_as_time_or_none<'de, D>(
1089 deserializer: D,
1090) -> Result<Option<chrono::NaiveTime>, D::Error>
1091where
1092 D: Deserializer<'de>,
1093{
1094 let data = Data::deserialize(deserializer)?;
1095 Ok(data.as_time())
1096}
1097
1098/// A helper function to deserialize cell values as `chrono::NaiveTime`,
1099/// useful when cells may also contain invalid values (i.e. strings).
1100/// It applies the [`as_time`] method to the cell value, and returns
1101/// `Ok(Ok(value_as_time))` if successful or `Ok(Err(value_to_string))` if unsuccessful,
1102/// therefore never failing. This function is intended to be used with Serde's
1103/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1104#[cfg(feature = "dates")]
1105pub fn deserialize_as_time_or_string<'de, D>(
1106 deserializer: D,
1107) -> Result<Result<chrono::NaiveTime, String>, D::Error>
1108where
1109 D: Deserializer<'de>,
1110{
1111 let data = Data::deserialize(deserializer)?;
1112 Ok(data.as_time().ok_or_else(|| data.to_string()))
1113}
1114
1115/// A helper function to deserialize cell values as `chrono::Duration`,
1116/// useful when cells may also contain invalid values (i.e. strings).
1117/// It applies the [`as_duration`] method to the cell value, and returns
1118/// `Ok(Some(value_as_duration))` if successful or `Ok(None)` if unsuccessful,
1119/// therefore never failing. This function is intended to be used with Serde's
1120/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1121#[cfg(feature = "dates")]
1122pub fn deserialize_as_duration_or_none<'de, D>(
1123 deserializer: D,
1124) -> Result<Option<chrono::Duration>, D::Error>
1125where
1126 D: Deserializer<'de>,
1127{
1128 let data = Data::deserialize(deserializer)?;
1129 Ok(data.as_duration())
1130}
1131
1132/// A helper function to deserialize cell values as `chrono::Duration`,
1133/// useful when cells may also contain invalid values (i.e. strings).
1134/// It applies the [`as_duration`] method to the cell value, and returns
1135/// `Ok(Ok(value_as_duration))` if successful or `Ok(Err(value_to_string))` if unsuccessful,
1136/// therefore never failing. This function is intended to be used with Serde's
1137/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1138#[cfg(feature = "dates")]
1139pub fn deserialize_as_duration_or_string<'de, D>(
1140 deserializer: D,
1141) -> Result<Result<chrono::Duration, String>, D::Error>
1142where
1143 D: Deserializer<'de>,
1144{
1145 let data = Data::deserialize(deserializer)?;
1146 Ok(data.as_duration().ok_or_else(|| data.to_string()))
1147}
1148
1149/// A helper function to deserialize cell values as `chrono::NaiveDateTime`,
1150/// useful when cells may also contain invalid values (i.e. strings).
1151/// It applies the [`as_datetime`] method to the cell value, and returns
1152/// `Ok(Some(value_as_datetime))` if successful or `Ok(None)` if unsuccessful,
1153/// therefore never failing. This function is intended to be used with Serde's
1154/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1155#[cfg(feature = "dates")]
1156pub fn deserialize_as_datetime_or_none<'de, D>(
1157 deserializer: D,
1158) -> Result<Option<chrono::NaiveDateTime>, D::Error>
1159where
1160 D: Deserializer<'de>,
1161{
1162 let data = Data::deserialize(deserializer)?;
1163 Ok(data.as_datetime())
1164}
1165
1166/// A helper function to deserialize cell values as `chrono::NaiveDateTime`,
1167/// useful when cells may also contain invalid values (i.e. strings).
1168/// It applies the [`as_datetime`] method to the cell value, and returns
1169/// `Ok(Ok(value_as_datetime))` if successful or `Ok(Err(value_to_string))` if unsuccessful,
1170/// therefore never failing. This function is intended to be used with Serde's
1171/// [`deserialize_with`](https://serde.rs/field-attrs.html) field attribute.
1172#[cfg(feature = "dates")]
1173pub fn deserialize_as_datetime_or_string<'de, D>(
1174 deserializer: D,
1175) -> Result<Result<chrono::NaiveDateTime, String>, D::Error>
1176where
1177 D: Deserializer<'de>,
1178{
1179 let data = Data::deserialize(deserializer)?;
1180 Ok(data.as_datetime().ok_or_else(|| data.to_string()))
1181}