Program Listing for File archive.h¶
↰ Return to documentation for file (include/zim/archive.h)
/*
* Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
* Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
* Copyright (C) 2020 Veloman Yunkan
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_ARCHIVE_H
#define ZIM_ARCHIVE_H
#include "zim.h"
#include "entry.h"
#include "illustration.h"
#include "uuid.h"
#include <string>
#include <vector>
#include <memory>
#include <bitset>
#include <set>
namespace zim
{
class FileImpl;
enum class EntryOrder {
pathOrder,
titleOrder,
efficientOrder
};
size_t LIBZIM_API getClusterCacheMaxSize();
size_t LIBZIM_API getClusterCacheCurrentSize();
void LIBZIM_API setClusterCacheMaxSize(size_t sizeInB);
class LIBZIM_API Archive
{
public:
template<EntryOrder order> class EntryRange;
template<EntryOrder order> class iterator;
typedef std::vector<IllustrationInfo> IllustrationInfos;
explicit Archive(const std::string& fname);
Archive(const std::string& fname, OpenConfig openConfig);
#ifndef _WIN32
explicit Archive(int fd);
Archive(int fd, OpenConfig openConfig);
Archive(int fd, offset_type offset, size_type size);
Archive(int fd, offset_type offset, size_type size, OpenConfig openConfig);
explicit Archive(FdInput fd);
Archive(FdInput fd, OpenConfig openConfig);
explicit Archive(const std::vector<FdInput>& fds);
Archive(const std::vector<FdInput>& fds, OpenConfig openConfig);
#endif
const std::string& getFilename() const;
size_type getFilesize() const;
entry_index_type getAllEntryCount() const;
entry_index_type getEntryCount() const;
entry_index_type getArticleCount() const;
entry_index_type getMediaCount() const;
Uuid getUuid() const;
std::string getMetadata(const std::string& name) const;
Item getMetadataItem(const std::string& name) const;
std::vector<std::string> getMetadataKeys() const;
Item getIllustrationItem(const IllustrationInfo& ii) const;
Item getIllustrationItem(unsigned int size=48) const;
DEPRECATED std::set<unsigned int> getIllustrationSizes() const;
IllustrationInfos getIllustrationInfos() const;
IllustrationInfos getIllustrationInfos(uint32_t w, uint32_t h, float minScale) const;
Entry getEntryByPath(entry_index_type idx) const;
Entry getEntryByPath(const std::string& path) const;
Entry getEntryByTitle(entry_index_type idx) const;
Entry getEntryByTitle(const std::string& title) const;
Entry getEntryByClusterOrder(entry_index_type idx) const;
Entry getMainEntry() const;
Entry getRandomEntry() const;
bool hasEntryByPath(const std::string& path) const {
try{
getEntryByPath(path);
return true;
} catch(...) { return false; }
}
bool hasEntryByTitle(const std::string& title) const {
try{
getEntryByTitle(title);
return true;
} catch(...) { return false; }
}
bool hasMainEntry() const;
bool hasIllustration(unsigned int size=48) const;
bool hasFulltextIndex() const;
bool hasTitleIndex() const;
EntryRange<EntryOrder::pathOrder> iterByPath() const;
EntryRange<EntryOrder::titleOrder> iterByTitle() const;
EntryRange<EntryOrder::efficientOrder> iterEfficient() const;
EntryRange<EntryOrder::pathOrder> findByPath(std::string path) const;
EntryRange<EntryOrder::titleOrder> findByTitle(std::string title) const;
bool hasChecksum() const;
std::string getChecksum() const;
bool check() const;
bool checkIntegrity(IntegrityCheck checkType);
bool isMultiPart() const;
bool hasNewNamespaceScheme() const;
std::shared_ptr<FileImpl> getImpl() const { return m_impl; }
size_t getDirentCacheMaxSize() const;
size_t getDirentCacheCurrentSize() const;
void setDirentCacheMaxSize(size_t nbDirents);
#ifdef ZIM_PRIVATE
cluster_index_type getClusterCount() const;
offset_type getClusterOffset(cluster_index_type idx) const;
entry_index_type getMainEntryIndex() const;
Entry getEntryByPathWithNamespace(char ns, const std::string& path) const;
#endif
private:
std::shared_ptr<FileImpl> m_impl;
};
template<EntryOrder order>
LIBZIM_API entry_index_type _toPathOrder(const FileImpl& file, entry_index_type idx);
template<>
LIBZIM_API entry_index_type _toPathOrder<EntryOrder::pathOrder>(const FileImpl& file, entry_index_type idx);
template<>
LIBZIM_API entry_index_type _toPathOrder<EntryOrder::titleOrder>(const FileImpl& file, entry_index_type idx);
template<>
LIBZIM_API entry_index_type _toPathOrder<EntryOrder::efficientOrder>(const FileImpl& file, entry_index_type idx);
template<EntryOrder order>
class LIBZIM_API Archive::EntryRange {
public:
explicit EntryRange(const std::shared_ptr<FileImpl> file, entry_index_type begin, entry_index_type end)
: m_file(file),
m_begin(begin),
m_end(end)
{}
iterator<order> begin() const
{ return iterator<order>(m_file, entry_index_type(m_begin)); }
iterator<order> end() const
{ return iterator<order>(m_file, entry_index_type(m_end)); }
int size() const
{ return m_end - m_begin; }
EntryRange<order> offset(int start, int maxResults) const
{
auto begin = m_begin + start;
if (begin > m_end) {
begin = m_end;
}
auto end = m_end;
if (begin + maxResults < end) {
end = begin + maxResults;
}
return EntryRange<order>(m_file, begin, end);
}
private:
std::shared_ptr<FileImpl> m_file;
entry_index_type m_begin;
entry_index_type m_end;
};
template<EntryOrder order>
class LIBZIM_API Archive::iterator
{
public:
/* SuggestionIterator is conceptually a bidirectional iterator.
* But std *LegayBidirectionalIterator* is also a *LegacyForwardIterator* and
* it would impose us that :
* > Given a and b, dereferenceable iterators of type It:
* > If a and b compare equal (a == b is contextually convertible to true)
* > then either they are both non-dereferenceable or *a and *b are references bound to the same object.
* and
* > the LegacyForwardIterator requirements requires dereference to return a reference.
* Which cannot be as we create the entry on demand.
*
* So we are stick with declaring ourselves at `input_iterator`.
*/
using iterator_category = std::input_iterator_tag;
using value_type = Entry;
using pointer = Entry*;
using reference = Entry&;
explicit iterator(const std::shared_ptr<FileImpl> file, entry_index_type idx)
: m_file(file),
m_idx(idx),
m_entry(nullptr)
{}
iterator(const iterator<order>& other)
: m_file(other.m_file),
m_idx(other.m_idx),
m_entry(other.m_entry?new Entry(*other.m_entry):nullptr)
{}
bool operator== (const iterator<order>& it) const
{ return m_file == it.m_file && m_idx == it.m_idx; }
bool operator!= (const iterator<order>& it) const
{ return !operator==(it); }
iterator<order>& operator=(iterator<order>&& it) = default;
iterator<order>& operator=(iterator<order>& it)
{
m_entry.reset();
m_idx = it.m_idx;
m_file = it.m_file;
return *this;
}
iterator<order>& operator++()
{
++m_idx;
m_entry.reset();
return *this;
}
iterator<order> operator++(int)
{
auto it = *this;
operator++();
return it;
}
iterator<order>& operator--()
{
--m_idx;
m_entry.reset();
return *this;
}
iterator<order> operator--(int)
{
auto it = *this;
operator--();
return it;
}
const Entry& operator*() const
{
if (!m_entry) {
m_entry.reset(new Entry(m_file, _toPathOrder<order>(*m_file, m_idx)));
}
return *m_entry;
}
const Entry* operator->() const
{
operator*();
return m_entry.get();
}
private:
std::shared_ptr<FileImpl> m_file;
entry_index_type m_idx;
mutable std::unique_ptr<Entry> m_entry;
};
typedef std::bitset<size_t(IntegrityCheck::COUNT)> IntegrityCheckList;
bool LIBZIM_API validate(const std::string& zimPath, IntegrityCheckList checksToRun);
}
#endif // ZIM_ARCHIVE_H