Program Listing for File archive.h

Return to documentation for file (include/zim/archive.h)

/*
 * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
 * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
 * Copyright (C) 2020 Veloman Yunkan
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
 * NON-INFRINGEMENT.  See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 *
 */

#ifndef ZIM_ARCHIVE_H
#define ZIM_ARCHIVE_H

#include "zim.h"
#include "entry.h"
#include "illustration.h"
#include "uuid.h"

#include <string>
#include <vector>
#include <memory>
#include <bitset>
#include <set>

namespace zim
{
  class FileImpl;

  enum class EntryOrder {
    pathOrder,
    titleOrder,
    efficientOrder
  };

  size_t LIBZIM_API getClusterCacheMaxSize();

  size_t LIBZIM_API getClusterCacheCurrentSize();

  void LIBZIM_API setClusterCacheMaxSize(size_t sizeInB);


  class LIBZIM_API Archive
  {
    public:
      template<EntryOrder order> class EntryRange;
      template<EntryOrder order> class iterator;
      typedef std::vector<IllustrationInfo> IllustrationInfos;

      explicit Archive(const std::string& fname);

      Archive(const std::string& fname, OpenConfig openConfig);

#ifndef _WIN32
      explicit Archive(int fd);

      Archive(int fd, OpenConfig openConfig);

       Archive(int fd, offset_type offset, size_type size);

       Archive(int fd, offset_type offset, size_type size, OpenConfig openConfig);

      explicit Archive(FdInput fd);

      Archive(FdInput fd, OpenConfig openConfig);

      explicit Archive(const std::vector<FdInput>& fds);

      Archive(const std::vector<FdInput>& fds, OpenConfig openConfig);
#endif

      const std::string& getFilename() const;

      size_type getFilesize() const;

      entry_index_type getAllEntryCount() const;

      entry_index_type getEntryCount() const;

      entry_index_type getArticleCount() const;

      entry_index_type getMediaCount() const;

      Uuid getUuid() const;

      std::string getMetadata(const std::string& name) const;

      Item getMetadataItem(const std::string& name) const;

      std::vector<std::string> getMetadataKeys() const;

      Item getIllustrationItem(const IllustrationInfo& ii) const;

      Item getIllustrationItem(unsigned int size=48) const;

      DEPRECATED std::set<unsigned int> getIllustrationSizes() const;


      IllustrationInfos getIllustrationInfos() const;

      IllustrationInfos getIllustrationInfos(uint32_t w, uint32_t h, float minScale) const;


      Entry getEntryByPath(entry_index_type idx) const;

      Entry getEntryByPath(const std::string& path) const;

      Entry getEntryByTitle(entry_index_type idx) const;

      Entry getEntryByTitle(const std::string& title) const;

      Entry getEntryByClusterOrder(entry_index_type idx) const;

      Entry getMainEntry() const;

      Entry getRandomEntry() const;

      bool hasEntryByPath(const std::string& path) const {
        try{
          getEntryByPath(path);
          return true;
        } catch(...) { return false; }
      }

      bool hasEntryByTitle(const std::string& title) const {
        try{
          getEntryByTitle(title);
          return true;
        } catch(...) { return false; }
      }

      bool hasMainEntry() const;

      bool hasIllustration(unsigned int size=48) const;

      bool hasFulltextIndex() const;

      bool hasTitleIndex() const;


      EntryRange<EntryOrder::pathOrder> iterByPath() const;

      EntryRange<EntryOrder::titleOrder> iterByTitle() const;

      EntryRange<EntryOrder::efficientOrder> iterEfficient() const;

      EntryRange<EntryOrder::pathOrder>  findByPath(std::string path) const;

      EntryRange<EntryOrder::titleOrder> findByTitle(std::string title) const;

      bool hasChecksum() const;

      std::string getChecksum() const;

      bool check() const;

      bool checkIntegrity(IntegrityCheck checkType);

      bool isMultiPart() const;

      bool hasNewNamespaceScheme() const;

      std::shared_ptr<FileImpl> getImpl() const { return m_impl; }

      size_t getDirentCacheMaxSize() const;

      size_t getDirentCacheCurrentSize() const;

      void setDirentCacheMaxSize(size_t nbDirents);

#ifdef ZIM_PRIVATE
      cluster_index_type getClusterCount() const;
      offset_type getClusterOffset(cluster_index_type idx) const;
      entry_index_type getMainEntryIndex() const;

      Entry getEntryByPathWithNamespace(char ns, const std::string& path) const;
#endif

    private:
      std::shared_ptr<FileImpl> m_impl;
  };

  template<EntryOrder order>
  LIBZIM_API entry_index_type _toPathOrder(const FileImpl& file, entry_index_type idx);

  template<>
  LIBZIM_API entry_index_type _toPathOrder<EntryOrder::pathOrder>(const FileImpl& file, entry_index_type idx);
  template<>
  LIBZIM_API entry_index_type _toPathOrder<EntryOrder::titleOrder>(const FileImpl& file, entry_index_type idx);
  template<>
  LIBZIM_API entry_index_type _toPathOrder<EntryOrder::efficientOrder>(const FileImpl& file, entry_index_type idx);


  template<EntryOrder order>
  class LIBZIM_API Archive::EntryRange {
    public:
      explicit EntryRange(const std::shared_ptr<FileImpl> file, entry_index_type begin, entry_index_type end)
        : m_file(file),
          m_begin(begin),
          m_end(end)
      {}

      iterator<order> begin() const
        { return iterator<order>(m_file, entry_index_type(m_begin)); }
      iterator<order> end() const
        { return iterator<order>(m_file, entry_index_type(m_end)); }
      int size() const
        { return m_end - m_begin; }

      EntryRange<order> offset(int start, int maxResults) const
      {
        auto begin = m_begin + start;
        if (begin > m_end) {
          begin = m_end;
        }
        auto end = m_end;
        if (begin + maxResults < end) {
          end = begin + maxResults;
        }
        return EntryRange<order>(m_file, begin, end);
      }

private:
      std::shared_ptr<FileImpl> m_file;
      entry_index_type m_begin;
      entry_index_type m_end;
  };

  template<EntryOrder order>
  class LIBZIM_API Archive::iterator
  {
    public:
      /* SuggestionIterator is conceptually a bidirectional iterator.
       * But std *LegayBidirectionalIterator* is also a *LegacyForwardIterator* and
       * it would impose us that :
       * > Given a and b, dereferenceable iterators of type It:
       * >  If a and b compare equal (a == b is contextually convertible to true)
       * >  then either they are both non-dereferenceable or *a and *b are references bound to the same object.
       * and
       * > the LegacyForwardIterator requirements requires dereference to return a reference.
       * Which cannot be as we create the entry on demand.
       *
       * So we are stick with declaring ourselves at `input_iterator`.
       */
      using iterator_category = std::input_iterator_tag;
      using value_type = Entry;
      using pointer = Entry*;
      using reference = Entry&;

      explicit iterator(const std::shared_ptr<FileImpl> file, entry_index_type idx)
        : m_file(file),
          m_idx(idx),
          m_entry(nullptr)
      {}

      iterator(const iterator<order>& other)
        : m_file(other.m_file),
          m_idx(other.m_idx),
          m_entry(other.m_entry?new Entry(*other.m_entry):nullptr)
      {}

      bool operator== (const iterator<order>& it) const
        { return m_file == it.m_file && m_idx == it.m_idx; }
      bool operator!= (const iterator<order>& it) const
        { return !operator==(it); }

      iterator<order>& operator=(iterator<order>&& it) = default;

      iterator<order>& operator=(iterator<order>& it)
      {
        m_entry.reset();
        m_idx = it.m_idx;
        m_file = it.m_file;
        return *this;
      }

      iterator<order>& operator++()
      {
        ++m_idx;
        m_entry.reset();
        return *this;
      }

      iterator<order> operator++(int)
      {
        auto it = *this;
        operator++();
        return it;
      }

      iterator<order>& operator--()
      {
        --m_idx;
        m_entry.reset();
        return *this;
      }

      iterator<order> operator--(int)
      {
        auto it = *this;
        operator--();
        return it;
      }

      const Entry& operator*() const
      {
        if (!m_entry) {
          m_entry.reset(new Entry(m_file, _toPathOrder<order>(*m_file, m_idx)));
        }
        return *m_entry;
      }

      const Entry* operator->() const
      {
        operator*();
        return m_entry.get();
      }

    private:
      std::shared_ptr<FileImpl> m_file;
      entry_index_type m_idx;
      mutable std::unique_ptr<Entry> m_entry;
  };

  typedef std::bitset<size_t(IntegrityCheck::COUNT)> IntegrityCheckList;

  bool LIBZIM_API validate(const std::string& zimPath, IntegrityCheckList checksToRun);
}

#endif // ZIM_ARCHIVE_H