diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0af2369517e282fb1265c5957ed189fc1d3a593c..08ab4cf2e82a0012bdb83c76c274b7c1e0166687 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -38,8 +38,10 @@ ostack-einfra_cz-project-migration: &migration-job - python3 -m pip install -r ci/requirements.pip > pymodules-install.log - apt -y update > apt-install.log - apt -y install $(cat ci/requirements.apt) >> apt-install.log + - ci/install-ssh-private-key.sh ci/ssh_configuration + - eval "$(ssh-agent)" script: - - ci/project-migrator.py --source-openrc="${SRC_CLOUD_OSTACK_RC_FILE}" --destination-openrc="${DST_CLOUD_OSTACK_RC_FILE}" --project-name="${PROJECT_NAME}" --validation-a-source-server-id="${VALIDATION_SERVER_ID}" --ceph-migrator-sshkeyfile="${MIGRATOR_SSH_KEY_FILE}" --explicit-server-names="${MIGRATE_EXPLICIT_SERVER_NAMES}" --explicit-volume-names="${MIGRATE_EXPLICIT_VOLUME_NAMES}" ${PROJECT_MIGRATOR_EXTRA_ARGS} + - ci/project-migrator.py --source-openrc="${SRC_CLOUD_OSTACK_RC_FILE}" --destination-openrc="${DST_CLOUD_OSTACK_RC_FILE}" --project-name="${PROJECT_NAME}" --validation-a-source-server-id="${VALIDATION_SERVER_ID}" --ceph-migrator-sshkeyfile="${MIGRATOR_SSH_KEY_FILE}" --hypervisor-sshkeyfile="${SSH_PRIVATE_KEY_HYPERVISORS}" --explicit-server-names="${MIGRATE_EXPLICIT_SERVER_NAMES}" --explicit-volume-names="${MIGRATE_EXPLICIT_VOLUME_NAMES}" ${PROJECT_MIGRATOR_EXTRA_ARGS} after_script: - zip -P ${MIGRATION_ARCHIVE_PASSWORD} migration-archive.zip *.dump *.log - rm -f *.dump *.log diff --git a/CHANGELOG.md b/CHANGELOG.md index 716409e76ac5a4130b53b9db33ebb630a0e6a245..c5920dfe49a6761eca291264dcb1f804c86ad189 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.9.0] - 2024-11-15 +### Changed +- Added possibility to migrate VM's with epehemeral disk. + ## [1.8.1] - 2024-11-14 ### Changed - flavor mapping, ensure std flavors are being mapped into e1 ones diff --git a/ci/install-ssh-configuration.sh b/ci/install-ssh-configuration.sh new file mode 100755 index 0000000000000000000000000000000000000000..b10db6d56d3699bf116c45fe69a5fedc8480c2db --- /dev/null +++ b/ci/install-ssh-configuration.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Install ssh configuration +# install-ssh-private-key.sh <ssh-configuration-file> + +set -eo pipefail + +CONFIGURATION_FILE="$1" + +if [ ! -d "${HOME}/.ssh" ]; then + rm -f "${HOME}/.ssh" + mkdir "${HOME}/.ssh" +fi +if [ -s "${CONFIGURATION_FILE}" ]; then + cat "${CONFIGURATION_FILE}" > ${HOME}/.ssh/config +fi +chmod 700 ${HOME}/.ssh +chmod 600 ~/.ssh/config +ls -la ${HOME}/.ssh/config diff --git a/ci/lib.py b/ci/lib.py index 199686afff7ed58183fcd878e628d484639874ec..3fb2d06a4829617448f1a724c6e8a7e761ee99ec 100644 --- a/ci/lib.py +++ b/ci/lib.py @@ -8,6 +8,7 @@ import time import openstack import paramiko +import subprocess from keystoneauth1 import session from keystoneauth1.identity import v3 @@ -242,3 +243,162 @@ def wait_for_ostack_volume_status(ostack_connection, volume_name_or_id, volume_s time.sleep(10) return int_volume_status + +def run_sshuttle(jump_host, user, target_subnet, exclude_subnets=None, ssh_key=None, port=22): + """ + Run sshuttle to create a VPN over SSH. + + :param jump_host: The SSH server to connect to (e.g., 'jump.example.com'). + :param user: SSH username for connecting to the jump host. + :param target_subnet: The subnet to route through the VPN (e.g., '10.0.0.0/24'). + :param ssh_key: Path to the SSH private key file (default is None, which means use default SSH key). + :param port: SSH port on the jump host (default is 22). + :param local_port: Local host port where sshuttle should listen (default is 8000). + """ + sshuttle_command = [ + 'sshuttle', '-r', f'{user}@{jump_host}:{port}', target_subnet, '-v', '-N' + ] + + if exclude_subnets: + for subnet in exclude_subnets: + sshuttle_command.extend(['--exclude', subnet]) + + if ssh_key: + sshuttle_command.extend(['--ssh-cmd', f'ssh -i {ssh_key}']) + + sshuttle_command.append('-D') + try: + subprocess.run( + sshuttle_command, + check=True + ) + except subprocess.CalledProcessError as e: + print(f"Error: sshuttle command failed with exit code {e.returncode}") + except Exception as e: + print(f"An exception occurred: {e}") + +def copy_ephemeral_disks_over_ssh(args, destination_project_conn, diskfile_paths, + i_destination_server, i_source_server_detail, i_source_server, + source_hypervisor_ip, destination_hypervisor_ip, restore_server_status_callback): + """Copy ephemeral disks over ssh""" + args.logger.info(f"F.40 Source OpenStack VM server (name:{i_source_server_detail.name}) have ephemeral disk connected") + i_destination_server_detail = destination_project_conn.compute.find_server(get_dst_resource_name(args, i_source_server_detail.name)) + # destination VM stop, wait for SHUTOFF + if i_destination_server.status != 'SHUTOFF': + destination_project_conn.compute.stop_server(i_destination_server_detail) + args.logger.info(f"F.41 Destination OpenStack VM server (name:{i_destination_server_detail.name}) requested to stop") + log_or_assert(args, f"F.42 Destination OpenStack VM server (name:{i_destination_server_detail.name}) stopped (reached SHUTOFF state)", + wait_for_ostack_server_status(destination_project_conn, i_destination_server.id, 'SHUTOFF') == "SHUTOFF") + # Change to ip to one in br-data + destination_hypervisor_ip = change_ip_address(destination_hypervisor_ip,3,16) + + # Create tmp directory + mkdir_command = f'sudo mkdir {args.destination_hypervisor_ephemeral_tmp}' + chown_tmp = f'sudo chown {args.destination_hypervisor_login_user}:{args.destination_hypervisor_login_user} {args.destination_hypervisor_ephemeral_tmp}' + prepare_tmp_command = f'{mkdir_command}; {chown_tmp}' + _, _, reply_ecode = remote_cmd_exec(destination_hypervisor_ip, args.destination_hypervisor_login_user, + args.hypervisor_sshkeyfile.name, prepare_tmp_command) + + for diskfile_path in diskfile_paths.values(): + if not diskfile_path: + continue + diskfile_name = os.path.basename(diskfile_path) + scp_command = [ + 'scp', '-A', + '-o', f'IdentityFile={args.hypervisor_sshkeyfile.name}', + '-o', 'StrictHostKeyChecking=no', + f"{args.source_hypervisor_login_user}@{source_hypervisor_ip}:{diskfile_path}", + f"{args.destination_hypervisor_login_user}@{destination_hypervisor_ip}:{args.destination_hypervisor_ephemeral_tmp}{i_destination_server.id}{diskfile_name}" + ] + try: + # Execute the command + subprocess.run(scp_command, check=True) + print('Scp command executed successfully.') + except subprocess.CalledProcessError as e: + print(f'Scp command failed with return code: {e.returncode}') + except Exception as e: + print(f'An error occurred: {e}') + + # Prepare to move file from home directory to nova directory, where ephemeral disks are stored. + remote_path = f'{args.destination_hypervisor_ephemeral_path}{i_destination_server.id}/{diskfile_name}' + tmp_path = f'{args.destination_hypervisor_ephemeral_tmp}{i_destination_server.id}{diskfile_name}' + + # Construct the command + chown_command = f'sudo chown --reference={remote_path} {tmp_path}' + move_command = f'sudo mv {tmp_path} {remote_path}' + complete_command = f'{chown_command}; {move_command}' + + # Validate checksums + checks_result = compare_checksums(args, i_source_server['OS-EXT-SRV-ATTR:host'], destination_hypervisor_ip, diskfile_path, tmp_path) + if checks_result: + _, _, reply_ecode = remote_cmd_exec(destination_hypervisor_ip, args.destination_hypervisor_login_user, + args.hypervisor_sshkeyfile.name, complete_command) + log_or_assert(args, "F.43 checksum of migrated ephemeral disk is correct", reply_ecode == 0) + + else: + remove_command = f"rm -f {tmp_path}" + _, _, reply_ecode = remote_cmd_exec(destination_hypervisor_ip, args.destination_hypervisor_login_user, + args.hypervisor_sshkeyfile.name, remove_command) + args.logger.warning(f"Migration of ephemeral disk {diskfile_path} was not sucessfull, due to different checksum." + f"File {tmp_path} on {destination_hypervisor_ip} hypervisor was removed!") + + # Remove temporary directory + rmdir_command = f'sudo rmdir {args.destination_hypervisor_ephemeral_tmp}' + _, _, reply_ecode = remote_cmd_exec(destination_hypervisor_ip, args.destination_hypervisor_login_user, + args.hypervisor_sshkeyfile.name, rmdir_command) + # start server in source cloud (if necessary) + restore_server_status_callback['func'](**restore_server_status_callback['args']) + # destination VM start, wait for ACTIVE + destination_project_conn.compute.start_server(i_destination_server_detail) + args.logger.info(f"F.33 Destination OpenStack VM server (name:{i_destination_server_detail.name}) requested to start") + log_or_assert(args, f"F.33 Destination OpenStack VM server (name:{i_destination_server_detail.name}) started (reached ACTIVE state)", + wait_for_ostack_server_status(destination_project_conn, i_destination_server.id, 'ACTIVE') == "ACTIVE") + +def change_ip_address(ip_address, byte, number): + """Takes IPv4 address and based on specified byte and number changes specified byte by the number in descimal form""" + # Split the IP address into its components and convert to integers + parts = list(map(int, ip_address.split('.'))) + + # Validate the input + if byte < 1 or byte > 4: + raise ValueError("Byte must be between 1 and 4.") + + # Adjust the byte index to align with zero-based indexing + byte_index = byte - 1 + + # Change the specified byte + parts[byte_index] += number + + # Handle any overflow for that byte and the subsequent bytes + for i in range(byte_index, -1, -1): + if parts[i] > 255: + parts[i] -= 256 + if i > 0: + parts[i - 1] += 1 + elif parts[i] < 0: + parts[i] += 256 + if i > 0: + parts[i - 1] -= 1 + + # Convert the integer list back to a dot-separated IP address + new_ip_address = '.'.join(map(str, parts)) + + # Check if the resultant IP address is valid + if any(part > 255 or part < 0 for part in parts): + raise ValueError("Resulting IP address is invalid.") + + return new_ip_address + +def compare_checksums(args, source_address, destination_address, source_file, destination_file): + """Compares checksums between source and destination ephemeral disks""" + source_command = f'sha256sum {source_file}' + destination_command = f'sha256sum {destination_file}' + reply_stdout, _, reply_ecode = remote_cmd_exec(source_address, args.ceph_migrator_user, + args.hypervisor_sshkeyfile.name, source_command) + log_or_assert(args, "F.44 source checkum was obtained successfully", reply_ecode == 0) + source_checksum = reply_stdout.split()[0] + reply_stdout, _, reply_ecode = remote_cmd_exec(destination_address, args.destination_hypervisor_login_user, + args.hypervisor_sshkeyfile.name, destination_command) + log_or_assert(args, "F.45 destination checkum was obtained successfully", reply_ecode == 0) + destination_checksum = reply_stdout.split()[0] + return source_checksum == destination_checksum \ No newline at end of file diff --git a/ci/olib.py b/ci/olib.py index 2b099f721f73432b0fb3b0bda385f353de7dd126..a9edecb9b2b5c7a959f16119f30b5ad4d87b692f 100644 --- a/ci/olib.py +++ b/ci/olib.py @@ -11,7 +11,7 @@ import openstack.exceptions import xmltodict import clib -from lib import log_or_assert, get_dst_resource_name, get_dst_secgroup_name, get_dst_resource_desc, remote_cmd_exec, normalize_table_data, trim_dict, wait_for_ostack_volume_status +from lib import log_or_assert, get_dst_resource_name, get_dst_secgroup_name, get_dst_resource_desc, remote_cmd_exec, normalize_table_data, trim_dict, wait_for_ostack_volume_status, remote_cmd_exec def get_destination_network(source_network): @@ -72,7 +72,6 @@ def get_destination_flavor(source_flavor): 'hpc.4core-4ram': 'e1.medium', # nemusime resit 'hpc.8core-128ram': 'c3.8core-120ram', # OK 'hpc.8core-16ram': 'c2.8core-16ram', # ok - 'hpc.8core-16ram-ssd-ephem': 'p3.8core-16ram', # nemusime resit 'hpc.8core-256ram': None, # nemusime resit 'hpc.8core-32ram-dukan': 'c2.8core-30ram', # nemusime resit 'hpc.8core-32ram-ssd-ephem': 'p3.8core-30ram', # ok @@ -114,12 +113,20 @@ def get_destination_flavor(source_flavor): 'csirtmu.large8x32': 'g2.8core-30ram', # ok 'csirtmu.jumbo16x32': 'g2.2xlarge', # ok 'csirtmu.jumbo8x64': 'g2.8core-60ram', # ok - 'csirtmu.jumbo16x64': 'g2.3xlarge' # ok + 'csirtmu.jumbo16x64': 'g2.3xlarge', # ok + 'hpc.8core-16ram-ssd-ephem' : 'r3.8core-16ram' # ok } assert source_flavor in flavor_mapping, "Source flavor can be mapped to destination one" assert flavor_mapping[source_flavor], "Source flavor mapping is not valid" return flavor_mapping[source_flavor] +def get_host_ip(ostack_connection: openstack.connection.Connection, hostname): + """ LUT for host ip """ + hypervisors = ostack_connection.list_hypervisors() + for hypervisor in hypervisors: + if hypervisor['hypervisor_hostname'] == hostname: + return hypervisor['host_ip'] + return None def create_destination_networking(args, src_ostack_conn, dst_ostack_conn, src_project, dst_project, src_network_name): """ Create matching OpenStack networking (network, subnet, router) """ @@ -601,13 +608,16 @@ def create_server_block_device_mappings(args, src_ostack_conn, src_server, sourc i_source_server_volume_attachment, i_server_volume, src_server_root_device_name)) - - log_or_assert(args, + if server_block_device_mappings: + log_or_assert(args, "F.26 Source OpenStack server - root partition detected", server_block_device_mappings and server_block_device_mappings[0] and server_block_device_mappings[0]['source']) - log_or_assert(args, + log_or_assert(args, "F.27 Destination OpenStack server - root partition details generated", server_block_device_mappings and server_block_device_mappings[0] and server_block_device_mappings[0]['destination']) + else: + args.logger.info("F.26 Source OpenStack server - no connected volumes deteted") + return None return server_block_device_mappings @@ -696,22 +706,27 @@ def create_dst_server(args, src_server, dst_ostack_conn, dst_project, flavor, ke # Note: argument network is not valid anymore, use networks server_args = {'name': get_dst_resource_name(args, src_server.name), 'flavorRef': flavor.id, - 'block_device_mapping_v2': [{'source_type': 'volume', - 'destination_type': 'volume', - 'uuid': i_block_device_mapping['destination']['volume_id'], - 'device_name': i_block_device_mapping['destination']['device_name'], - 'boot_index': 0 if i_block_device_mapping['destination']['volume_bootable'] else None} - for i_block_device_mapping in block_device_mappings], - 'boot_volume': block_device_mappings[0]['destination']['volume_id'], 'networks': [describe_server_network_connection(args, dst_ostack_conn, dst_project, i_netaddr) for i_netaddr in server_network_addresses]} - if keypair: - server_args['key_name'] = keypair["name"] - log_or_assert(args, + if block_device_mappings: + server_args['block_device_mapping_v2'] = [{'source_type': 'volume', + 'destination_type': 'volume', + 'uuid': i_block_device_mapping['destination']['volume_id'], + 'device_name': i_block_device_mapping['destination']['device_name'], + 'boot_index': 0 if i_block_device_mapping['destination']['volume_bootable'] else None} + for i_block_device_mapping in block_device_mappings] + server_args['boot_volume'] = block_device_mappings[0]['destination']['volume_id'], + log_or_assert(args, "F.35 Destination OpenStack server arguments are generated with valid block-device-mapping", server_args['block_device_mapping_v2'], locals()) + else: + server_args['imageRef'] = 'e9048b17-50db-4a01-94dc-bfe764a0c6ac' + + if keypair: + server_args['key_name'] = keypair["name"] + log_or_assert(args, "F.36 Destination OpenStack server arguments are generated with valid network configuration", server_args['networks'], locals()) @@ -886,3 +901,31 @@ def restore_source_server_status(args, source_project_conn, source_server_detail else: args.logger.warning(f"F.34 Source OpenStack VM server (name:{source_server_detail.name}) is not in expected state, " f"but migrator does not know how to move to {source_server_detail.status} state") + + +def get_ephemeral_disk_path(args,i_source_server_detail, source_hypervisor_ip): + """Get ephemeral disk name, if no one exist returns None""" + reply_stdout, _, reply_ecode = remote_cmd_exec(source_hypervisor_ip, args.ceph_migrator_user, + args.hypervisor_sshkeyfile.name, 'virsh dumpxml ' + i_source_server_detail.id ) + assert reply_ecode == 0, "Virsh dumpxml obtained successfully" + d_dumpxml = xmltodict.parse(reply_stdout) + return get_disk_paths_from_xml(d_dumpxml) + + +def get_disk_paths_from_xml(d_dumpxml): + disks = d_dumpxml['domain']['devices']['disk'] + diskfile_path = { + 'second_disk' : None, + 'root_disk' : None + } + if type(disks) is dict: + disks = [disks] + for disk in disks: + if '@file' in disk['source']: + if '.eph0' in disk['source']['@file']: + diskfile_path['second_disk'] = disk['source']['@file'] + if disk['source']['@file'].endswith('/disk'): + diskfile_path['root_disk'] = disk['source']['@file'] + if diskfile_path['second_disk'] or diskfile_path['root_disk']: + return diskfile_path + return None diff --git a/ci/project-migrator.py b/ci/project-migrator.py index 9d17c4659deadff4c8a7ce03d6373582dac30790..617c7c19518f3ceb80751f04a7a3e0316edeea93 100755 --- a/ci/project-migrator.py +++ b/ci/project-migrator.py @@ -160,7 +160,7 @@ def main(args): f"keypair: {i_source_server_detail.key_name}, flavor: {i_source_server_detail.flavor}, " f"sec-groups:{i_source_server_detail.security_groups}, root_device_name: {i_source_server_detail.root_device_name}, " f"block_device_mapping: {i_source_server_detail.block_device_mapping}, " - f"attached-volumes: {i_source_server_detail.attached_volumes}" + f"attached-volumes: {i_source_server_detail.attached_volumes}, " f"addresses: {i_source_server_detail.addresses}") # network/subnet/router detection & creation @@ -193,8 +193,9 @@ def main(args): i_source_server_detail, source_rbd_images) # volume creation in destination cloud - i_server_block_device_mappings = \ - olib.create_dst_server_volumes_update_block_device_mappings(args, + if i_server_block_device_mappings: + i_server_block_device_mappings = \ + olib.create_dst_server_volumes_update_block_device_mappings(args, i_server_block_device_mappings, destination_project_conn, destination_image) @@ -218,10 +219,17 @@ def main(args): } # volumes migration (browse i_server_block_device_mappings) - clib.migrate_rbd_images(args, i_server_block_device_mappings, restore_server_status_callback) + if i_server_block_device_mappings: + clib.migrate_rbd_images(args, i_server_block_device_mappings, restore_server_status_callback) + + #detect if VM have ephemeral disk + source_server_hostname = i_source_server['OS-EXT-SRV-ATTR:host'] + source_hypervisor_ip = olib.get_host_ip(source_project_conn,source_server_hostname) + diskfile_paths = olib.get_ephemeral_disk_path(args,i_source_server_detail,source_hypervisor_ip) - # start server in source cloud (if necessary) - restore_server_status_callback['func'](**restore_server_status_callback['args']) + if not diskfile_paths: + # start server in source cloud (if necessary) + restore_server_status_callback['func'](**restore_server_status_callback['args']) # start server in destination cloud i_destination_server = olib.create_dst_server(args, @@ -233,6 +241,19 @@ def main(args): i_server_block_device_mappings, i_destination_server_network_addresses) + # Copy ethemeral disks over ssh + if diskfile_paths: + destination_hypervisor_address = olib.get_host_ip(destination_project_conn, i_destination_server['OS-EXT-SRV-ATTR:host']) + lib.copy_ephemeral_disks_over_ssh(args, + destination_project_conn, + diskfile_paths, + i_destination_server, + i_source_server_detail, + i_source_server, + source_hypervisor_ip, + destination_hypervisor_address, + restore_server_status_callback) + # add security groups to the destination server (if missing) dst_security_groups = {(i_destination_server_security_group.id, i_destination_server_security_group.name) for i_destination_server_security_group in i_destination_server_security_groups} for i_destination_server_security_group_id, i_destination_server_security_group_name in dst_security_groups: @@ -348,6 +369,10 @@ if __name__ == "__main__": help='Destination cloud entity name prefix (all except secgroups).') AP.add_argument('--destination-entity-description-suffix', default=', migrated(id:{})', help='Destination cloud entity description suffix.') + AP.add_argument('--destination-hypervisor-ephemeral-path', default='/var/lib/nova/instances/', + help='Destination hypervisor basepath where ephemeral disks are stored') + AP.add_argument('--destination-hypervisor-ephemeral-tmp', default='/var/lib/nova/instances/tmp/', + help='Destination hypervisor basepath where emphemeral disks are temporary stored') AP.add_argument('--project-name', default=None, required=True, help='OpenStack project name (identical name in both clouds required)') @@ -371,6 +396,15 @@ if __name__ == "__main__": clib.BLOCK_STORAGE_VOLUME_MIGRATION_MODE_VMOFF_SNAP_VMON_CLONE_FLATTEN_CLEANUP, clib.BLOCK_STORAGE_VOLUME_MIGRATION_MODE_VMOFF_SNAP_CLONE_FLATTEN_CLEANUP_VMON], help='(Optional) Mode which determines order of steps performed during volume migration (steps G.05-G.17, F34).') + + AP.add_argument('--hypervisor-sshkeyfile', default=None, type=argparse.FileType('r'), + help='OpenStack hypervisor SSH keyfile') + + AP.add_argument('--source-hypervisor-login-user', default='root', + help='Source OpenStack hypervisor login user') + + AP.add_argument('--destination-hypervisor-login-user', default='ubuntu', + help='Destination OpenStack hypervisor login user') AP.add_argument('--validation-a-source-server-id', default=None, required=True, help='For validation any server ID from source OpenStack project') diff --git a/ci/ssh_configuration b/ci/ssh_configuration new file mode 100644 index 0000000000000000000000000000000000000000..2e74d691e7b84d62552d4357166557b6fe9d206c --- /dev/null +++ b/ci/ssh_configuration @@ -0,0 +1,18 @@ +Host jump-cloud-muni + HostName jump.cloud.muni.cz + User root + StrictHostKeyChecking no +Host 10.16.72.* + User ubuntu + StrictHostKeyChecking no + ProxyJump jump-cloud-muni + ServerAliveInterval 60 + ServerAliveCountMax 10 + ConnectTimeout 60 +Host 10.16.10{0,1}.* + User root + StrictHostKeyChecking no + ProxyJump jump-cloud-muni + ServerAliveInterval 60 + ServerAliveCountMax 10 + ConnectTimeout 60