Source code for oio.blob.rebuilder

# Copyright (C) 2019 OpenIO SAS, as part of OpenIO SDS
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


from datetime import datetime
from socket import gethostname

from oio.blob.operator import ChunkOperator
from oio.common.easy_value import float_value, int_value, true_value
from oio.common.exceptions import OioException, OrphanChunk, RetryLater
from oio.common.green import time
from oio.common.tool import Tool, ToolWorker
from oio.event.evob import EventTypes
from oio.rdir.client import RdirClient


[docs]class BlobRebuilder(Tool): """ Rebuild chunks. """ DEFAULT_BEANSTALKD_WORKER_TUBE = 'oio-rebuild' DEFAULT_DISTRIBUTED_BEANSTALKD_WORKER_TUBE = 'oio-rebuild' DEFAULT_RDIR_FETCH_LIMIT = 100 DEFAULT_RDIR_TIMEOUT = 60.0 DEFAULT_ALLOW_FROZEN_CT = False DEFAULT_ALLOW_SAME_RAWX = True DEFAULT_TRY_CHUNK_DELETE = False DEFAULT_DRY_RUN = False def __init__(self, conf, input_file=None, service_id=None, **kwargs): super(BlobRebuilder, self).__init__(conf, **kwargs) # counters self.bytes_processed = 0 self.total_bytes_processed = 0 # input self.input_file = input_file self.rawx_id = service_id # rawx/rdir self.rdir_client = RdirClient(self.conf, logger=self.logger) self.rdir_fetch_limit = int_value( self.conf.get('rdir_fetch_limit'), self.DEFAULT_RDIR_FETCH_LIMIT) self.rdir_shuffle_chunks = true_value(conf.get('rdir_shuffle_chunks')) self.rdir_timeout = float_value( conf.get('rdir_timeout'), self.DEFAULT_RDIR_TIMEOUT)
[docs] @staticmethod def items_from_task_event(task_event): namespace = task_event['url']['ns'] container_id = task_event['url']['id'] content_id = task_event['url']['content'] for chunk_id_or_pos in task_event['data']['missing_chunks']: yield namespace, container_id, content_id, str(chunk_id_or_pos)
[docs] @staticmethod def task_event_from_item(item): namespace, container_id, content_id, chunk_id_or_pos = item return \ { 'when': time.time(), 'event': EventTypes.CONTENT_BROKEN, 'url': { 'ns': namespace, 'id': container_id, 'content': content_id }, 'data': { 'missing_chunks': [ chunk_id_or_pos ] } }
[docs] @staticmethod def tasks_res_from_res_event(res_event): namespace = res_event['url']['ns'] container_id = res_event['url']['id'] content_id = res_event['url']['content'] for chunk_rebuilt in res_event['data']['chunks_rebuilt']: yield (namespace, container_id, content_id, str(chunk_rebuilt['chunk_id_or_pos'])), \ chunk_rebuilt['bytes_processed'], chunk_rebuilt['error']
[docs] @staticmethod def res_event_from_task_res(task_res): item, bytes_processed, error = task_res namespace, container_id, content_id, chunk_id_or_pos = item return \ { 'when': time.time(), 'event': EventTypes.CONTENT_REBUILT, 'url': { 'ns': namespace, 'id': container_id, 'content': content_id }, 'data': { 'chunks_rebuilt': [{ 'chunk_id_or_pos': chunk_id_or_pos, 'bytes_processed': bytes_processed, 'error': error }] } }
[docs] @staticmethod def string_from_item(item): namespace, container_id, content_id, chunk_id_or_pos = item return '%s|%s|%s|%s' % ( namespace, container_id, content_id, chunk_id_or_pos)
def _fetch_items_from_input_file(self): with open(self.input_file, 'r') as ifile: for line in ifile: stripped = line.strip() if not stripped or stripped.startswith('#'): continue container_id, content_id, chunk_id_or_pos = \ stripped.split('|', 3)[:3] yield self.namespace, container_id, content_id, \ chunk_id_or_pos def _fetch_items_from_rawx_id(self): lost_chunks = self.rdir_client.chunk_fetch( self.rawx_id, limit=self.rdir_fetch_limit, rebuild=True, shuffle=self.rdir_shuffle_chunks, timeout=self.rdir_timeout) for container_id, content_id, chunk_id, _ in lost_chunks: yield self.namespace, container_id, content_id, chunk_id def _fetch_items(self): if self.input_file: return self._fetch_items_from_input_file() if self.rawx_id: return self._fetch_items_from_rawx_id() def _empty_generator(): return yield # pylint: disable=unreachable return _empty_generator()
[docs] def update_counters(self, task_res): super(BlobRebuilder, self).update_counters(task_res) _, bytes_processed, _ = task_res if bytes_processed is not None: self.bytes_processed += bytes_processed
def _update_total_counters(self): chunks_processed, total_chunks_processed, errors, total_errors = \ super(BlobRebuilder, self)._update_total_counters() bytes_processed = self.bytes_processed self.bytes_processed = 0 self.total_bytes_processed += bytes_processed return chunks_processed, total_chunks_processed, \ bytes_processed, self.total_bytes_processed, \ errors, total_errors def _get_report(self, status, end_time, counters): chunks_processed, total_chunks_processed, \ bytes_processed, total_bytes_processed, \ errors, total_errors = counters time_since_last_report = (end_time - self.last_report) or 0.00001 total_time = (end_time - self.start_time) or 0.00001 report = ( '%(status)s ' 'last_report=%(last_report)s %(time_since_last_report).2fs ' 'chunks=%(chunks)d %(chunks_rate).2f/s ' 'bytes=%(bytes)d %(bytes_rate).2fB/s ' 'errors=%(errors)d %(errors_rate).2f%% ' 'start_time=%(start_time)s %(total_time).2fs ' 'total_chunks=%(total_chunks)d %(total_chunks_rate).2f/s ' 'total_bytes=%(total_bytes)d %(total_bytes_rate).2fB/s ' 'total_errors=%(total_errors)d %(total_errors_rate).2f%%' % { 'status': status, 'last_report': datetime.fromtimestamp( int(self.last_report)).isoformat(), 'time_since_last_report': time_since_last_report, 'chunks': chunks_processed, 'chunks_rate': chunks_processed / time_since_last_report, 'bytes': bytes_processed, 'bytes_rate': bytes_processed / time_since_last_report, 'errors': errors, 'errors_rate': 100 * errors / float(chunks_processed or 1), 'start_time': datetime.fromtimestamp( int(self.start_time)).isoformat(), 'total_time': total_time, 'total_chunks': total_chunks_processed, 'total_chunks_rate': total_chunks_processed / total_time, 'total_bytes': total_bytes_processed, 'total_bytes_rate': total_bytes_processed / total_time, 'total_errors': total_errors, 'total_errors_rate': 100 * total_errors / float(total_chunks_processed or 1) }) if self.total_expected_items is not None: progress = 100 * total_chunks_processed / \ float(self.total_expected_items or 1) report += ' progress=%d/%d %.2f%%' % \ (total_chunks_processed, self.total_expected_items, progress) return report
[docs] def create_worker(self, queue_workers, queue_reply): return BlobRebuilderWorker(self, queue_workers, queue_reply)
def _load_total_expected_items(self): if self.rawx_id: try: info = self.rdir_client.status( self.rawx_id, read_timeout=self.rdir_timeout) self.total_expected_items = info.get( 'chunk', dict()).get('to_rebuild', None) except Exception as exc: self.logger.warn( 'Failed to fetch the total chunks to rebuild: %s', exc)
[docs] def run(self): if self.rawx_id: self.rdir_client.admin_lock( self.rawx_id, "rebuilder on %s" % gethostname(), timeout=self.rdir_timeout) success = super(BlobRebuilder, self).run() if self.rawx_id: self.rdir_client.admin_unlock(self.rawx_id, timeout=self.rdir_timeout) return success
[docs]class BlobRebuilderWorker(ToolWorker): def __init__(self, tool, queue_workers, queue_reply): super(BlobRebuilderWorker, self).__init__( tool, queue_workers, queue_reply) self.allow_frozen_container = true_value(self.tool.conf.get( 'allow_frozen_container', self.tool.DEFAULT_ALLOW_FROZEN_CT)) self.allow_same_rawx = true_value(self.tool.conf.get( 'allow_same_rawx', self.tool.DEFAULT_ALLOW_SAME_RAWX)) self.try_chunk_delete = true_value(self.tool.conf.get( 'try_chunk_delete', self.tool.DEFAULT_TRY_CHUNK_DELETE)) self.dry_run = true_value(self.tool.conf.get( 'dry_run', self.tool.DEFAULT_DRY_RUN)) self.chunk_operator = ChunkOperator(self.conf, logger=self.logger) def _process_item(self, item): namespace, container_id, content_id, chunk_id_or_pos = item if namespace != self.tool.namespace: raise ValueError('Invalid namespace (actual=%s, expected=%s)' % ( namespace, self.tool.namespace)) log_rebuilding = 'Rebuilding %s' % self.tool.string_from_item(item) if self.dry_run: self.logger.debug('[dryrun] %s', log_rebuilding) return None self.logger.debug(log_rebuilding) try: return self.chunk_operator.rebuild( container_id, content_id, chunk_id_or_pos, rawx_id=self.tool.rawx_id, try_chunk_delete=self.try_chunk_delete, allow_frozen_container=self.allow_frozen_container, allow_same_rawx=self.allow_same_rawx) except OioException as exc: if not isinstance(exc, OrphanChunk): raise RetryLater(exc) raise