# Copyright (C) 2015-2019 OpenIO SAS, as part of OpenIO SDS
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 3.0 of the License, or (at your option) any later version.
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this library.
from oio.common import exceptions as exc
from oio.common.logger import get_logger
from oio.blob.client import BlobClient
from oio.container.client import ContainerClient, extract_chunk_qualities
from oio.common.constants import OIO_VERSION
from oio.common.fullpath import encode_fullpath
[docs]def compare_chunk_quality(current, candidate):
Compare the qualities of two chunks.
:returns: > 0 if the candidate is better quality,
0 if they are equal, < 0 if the candidate is worse.
balance = 0
# Compare distance between chunks.
balance += candidate.get('final_dist', 1) - current.get('final_dist', 1)
# Compare use of fallback mechanisms.
expected_slot = current.get('expected_slot')
if (current.get('final_slot') != expected_slot and
candidate.get('final_slot') == expected_slot):
# The current slot is not the expected one,
# and the candidate slot is the expected one.
balance += 1
elif (current.get('final_slot') == expected_slot and
candidate.get('final_slot') != expected_slot):
# The current slot is the expected one,
# but we are proposed to replace it with another one.
# The final balance may still be positive if the distance
# has been drastically increased.
balance -= 1
return balance
[docs]def ensure_better_chunk_qualities(current_chunks, candidates, threshold=1):
Ensure that the set of spare chunks is really an improvement over
the set of current chunks, raise SpareChunkException if it is not.
balance = 0
for current, candidate in zip(current_chunks, candidates.keys()):
balance += compare_chunk_quality(current.quality,
if balance < threshold:
raise exc.SpareChunkException(
"the spare chunks found would not improve the quality "
"(balance=%d, threshold=%d)" % (balance, threshold))
return balance
[docs]def cmp(x, y):
"""cmp function as a workaround for python3"""
return (x > y) - (x < y)
[docs]class Content(object):
# FIXME: no need for container_id since we have account and container name
def __init__(self, conf, container_id, metadata, chunks, storage_method,
account, container_name, blob_client=None,
container_client=None, logger=None):
self.conf = conf
self.container_id = container_id
self.metadata = metadata
self.chunks = ChunksHelper(chunks)
self.storage_method = storage_method
self.logger = logger or get_logger(self.conf)
self.blob_client = (blob_client or BlobClient(conf))
self.container_client = (container_client
or ContainerClient(self.conf,
# FIXME: all these may be properties
self.content_id = self.metadata["id"]
self.path = self.metadata["name"]
self.length = int(self.metadata["length"])
self.version = self.metadata["version"]
self.checksum = self.metadata["hash"]
self.chunk_method = self.metadata["chunk_method"]
self.account = account
self.container_name = container_name
if 'full_path' in self.metadata:
self.full_path = metadata['full_path']
self.full_path = encode_fullpath(
self.account, self.container_name, self.path, self.version,
def mime_type(self):
return self.metadata["mime_type"]
def mime_type(self, value):
self.metadata["mime_type"] = value
def policy(self):
return self.metadata["policy"]
def policy(self, value):
self.metadata["policy"] = value
def properties(self):
return self.metadata.get('properties')
def properties(self, value):
if not isinstance(value, dict):
raise ValueError("'value' must be a dict")
self.metadata['properties'] = value
def _get_spare_chunk(self, chunks_notin, chunks_broken,
max_attempts=3, check_quality=False,
fake_excluded_chunks=None, **kwargs):
notin = ChunksHelper(chunks_notin, False).raw()
broken = ChunksHelper(chunks_broken, False).raw()
if fake_excluded_chunks:
for fake_excluded_chunk in fake_excluded_chunks:
chunk = fake_excluded_chunk.copy()
chunk['hash'] = broken[0]['hash']
chunk['pos'] = broken[0]['pos']
chunk['size'] = broken[0]['size']
spare_data = {
"notin": notin,
"broken": broken
last_exc = None
bal = 0
for attempt in range(max_attempts):
spare_resp = self.container_client.content_spare(
cid=self.container_id, path=self.content_id,
data=spare_data, stgpol=self.policy, **kwargs)
quals = extract_chunk_qualities(
spare_resp.get('properties', {}), raw=True)
if check_quality:
bal = ensure_better_chunk_qualities(chunks_broken, quals)
except (exc.ClientException, exc.SpareChunkException) as err:
"Failed to find spare chunk (attempt %d/%d): %s",
attempt + 1, max_attempts, err)
last_exc = err
# TODO(FVE): exponential backoff?
if isinstance(last_exc, exc.SpareChunkException):
exc.reraise(exc.SpareChunkException, last_exc)
raise exc.SpareChunkException(
"No spare chunk: %s" % str(last_exc))
url_list = []
for chunk in spare_resp["chunks"]:
if check_quality:
self.logger.info("Found %d spare chunks, that will improve "
"metachunk quality by %d", len(url_list), bal)
return url_list, quals
def _add_raw_chunk(self, current_chunk, url, **kwargs):
data = {'type': 'chunk',
'id': url,
'hash': current_chunk.checksum,
'size': current_chunk.size,
'pos': current_chunk.pos,
'content': self.content_id}
data, cid=self.container_id, **kwargs)
def _update_spare_chunk(self, current_chunk, new_url, **kwargs):
old = {'type': 'chunk',
'id': current_chunk.url,
'hash': current_chunk.checksum,
'size': current_chunk.size,
'pos': current_chunk.pos,
'content': self.content_id}
new = {'type': 'chunk',
'id': new_url,
'hash': current_chunk.checksum,
'size': current_chunk.size,
'pos': current_chunk.pos,
'content': self.content_id}
[old], [new], cid=self.container_id, **kwargs)
def _generate_sysmeta(self):
sysmeta = dict()
sysmeta['id'] = self.content_id
sysmeta['version'] = self.version
sysmeta['policy'] = self.policy
sysmeta['mime_type'] = self.mime_type
sysmeta['chunk_method'] = self.chunk_method
sysmeta['chunk_size'] = self.metadata['chunk_size']
sysmeta['oio_version'] = OIO_VERSION
sysmeta['full_path'] = self.full_path
sysmeta['content_path'] = self.path
sysmeta['container_id'] = self.container_id
return sysmeta
def _create_object(self, **kwargs):
data = {'chunks': self.chunks.raw(),
'properties': self.properties}
cid=self.container_id, path=self.path, content_id=self.content_id,
stgpol=self.policy, size=self.length, checksum=self.checksum,
version=self.version, chunk_method=self.chunk_method,
mime_type=self.mime_type, data=data,
[docs] def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None,
raise NotImplementedError()
[docs] def create(self, stream, **kwargs):
raise NotImplementedError()
[docs] def fetch(self):
raise NotImplementedError()
[docs] def delete(self, **kwargs):
cid=self.container_id, path=self.path, **kwargs)
[docs] def move_chunk(self, chunk_id, check_quality=False, dry_run=False,
max_attempts=3, **kwargs):
Move a chunk to another place. Optionally ensure that the
new place is an improvement over the current one.
if isinstance(chunk_id, Chunk):
current_chunk = chunk_id
chunk_id = current_chunk.id
current_chunk = self.chunks.filter(id=chunk_id).one()
if current_chunk is None or current_chunk not in self.chunks:
raise exc.OrphanChunk("Chunk not found in content")
other_chunks = self.chunks.filter(
spare_urls, qualities = self._get_spare_chunk(
other_chunks, [current_chunk], check_quality=check_quality,
max_attempts=max_attempts, **kwargs)
if dry_run:
self.logger.info("Dry-run: would copy chunk from %s to %s",
current_chunk.url, spare_urls[0])
self.logger.info("Copying chunk from %s to %s",
current_chunk.url, spare_urls[0])
# TODO(FVE): retry to copy (max_attempts times)
current_chunk.url, spare_urls[0], chunk_id=chunk_id,
fullpath=self.full_path, cid=self.container_id,
path=self.path, version=self.version,
content_id=self.content_id, **kwargs)
self._update_spare_chunk(current_chunk, spare_urls[0])
self.blob_client.chunk_delete(current_chunk.url, **kwargs)
except Exception as err:
"Failed to delete chunk %s: %s", current_chunk.url, err)
current_chunk.url = spare_urls[0]
current_chunk.quality = qualities[current_chunk.url]
return current_chunk.raw()
[docs] def move_linked_chunk(self, chunk_id, from_url):
current_chunk = self.chunks.filter(id=chunk_id).one()
if current_chunk is None:
raise exc.OrphanChunk("Chunk not found in content")
_, to_url = self.blob_client.chunk_link(from_url, None, self.full_path)
self.logger.debug("link chunk %s from %s to %s", chunk_id, from_url,
self._update_spare_chunk(current_chunk, to_url)
except Exception as err:
"Failed to delete chunk %s: %s", current_chunk.url, err)
current_chunk.url = to_url
return current_chunk.raw()
[docs]class Chunk(object):
def __init__(self, chunk):
self._data = chunk
self._pos = chunk['pos']
d = self.pos.split('.', 1)
if len(d) > 1:
ec = True
self._metapos = int(d[0])
self._subpos = int(d[1])
self._metapos = int(self._pos)
ec = False
self._ec = ec
def ec(self):
return self._ec
def url(self):
return self._data.get('url') or self._data['id']
def url(self, new_url):
self._data["url"] = new_url
self._data["real_url"] = new_url
def pos(self):
return self._pos
def metapos(self):
return self._metapos
def subpos(self):
return self._subpos
def size(self):
return self._data["size"]
def size(self, new_size):
self._data["size"] = new_size
def id(self):
return self.url.split('/')[-1]
def host(self):
return self.url.split('/')[2]
def checksum(self):
return self._data["hash"].upper()
def checksum(self, new_checksum):
self._data["hash"] = new_checksum
def data(self):
return self._data
def imperfections(self):
List imperfections of this chunk.
Tell how much the quality of this chunk can be improved.
:returns: a positive number telling how many criteria can be improved
(0 if all criteria are met).
qual = self.quality
imperfections = list()
if qual.get('final_slot') != qual.get('expected_slot'):
imperfections.append('slot %s != %s' % (
if qual['final_dist'] < qual['expected_dist']:
imperfections.append('dist %d < %d' % (
return imperfections
def quality(self):
Get the "quality" of the chunk, i.e. a dictionary telling how it
matched the request criteria when it has been selected.
return self._data.setdefault('quality',
{'final_dist': 0,
'expected_dist': 1})
def quality(self, value):
self._data['quality'] = value
[docs] def raw(self):
return self._data
def __str__(self):
return "[Chunk %s (%s)]" % (self.url, self.pos)
def __repr__(self):
return str(self)
def __cmp__(self, other):
if self.metapos != other.metapos:
return cmp(self.metapos, other.metapos)
if not self.ec:
return cmp(self.id, other.id)
return cmp(self.subpos, other.subpos)
def __lt__(self, other):
return self.__cmp__(other) < 0
def __le__(self, other):
return self.__cmp__(other) <= 0
def __eq__(self, other):
return self.__cmp__(other) == 0
def __ne__(self, other):
return self.__cmp__(other) != 0
def __ge__(self, other):
return self.__cmp__(other) >= 0
def __gt__(self, other):
return self.__cmp__(other) > 0
[docs]class ChunksHelper(object):
def __init__(self, chunks, raw_chunk=True):
if raw_chunk:
self.chunks = [Chunk(c) for c in chunks]
self.chunks = chunks
[docs] def filter(self, id=None, pos=None, metapos=None, subpos=None, host=None,
found = []
for c in self.chunks:
if id is not None and c.id != id:
if pos is not None and c.pos != str(pos):
if metapos is not None and c.metapos != metapos:
if subpos is not None and c.subpos != subpos:
if host is not None and c.host != host:
if url is not None and c.url != url:
return ChunksHelper(found, False)
[docs] def exclude(self, id=None, pos=None, metapos=None, subpos=None, host=None,
found = []
for c in self.chunks:
if id is not None and c.id == id:
if pos is not None and c.pos == str(pos):
if metapos is not None and c.metapos == metapos:
if subpos is not None and c.subpos == subpos:
if host is not None and c.host == host:
if url is not None and c.url == url:
return ChunksHelper(found, False)
[docs] def one(self):
if len(self.chunks) != 1:
return None
return self.chunks[0]
[docs] def all(self):
return self.chunks
[docs] def raw(self):
res = []
for c in self.chunks:
return res
def __len__(self):
return len(self.chunks)
def __iter__(self):
for c in self.chunks:
yield c
def __getitem__(self, item):
return self.chunks[item]