Difference between revisions of "Microceph"

From UVOO Tech Wiki
Jump to navigation Jump to search
 
(7 intermediate revisions by the same user not shown)
Line 62: Line 62:
 
   lxc exec $name -- microceph cluster join $token
 
   lxc exec $name -- microceph cluster join $token
 
   lxc exec $name -- microceph disk add --wipe "/dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1"
 
   lxc exec $name -- microceph disk add --wipe "/dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1"
 +
 +
  sleep 5
 
   lxc exec $name -- microceph cluster list
 
   lxc exec $name -- microceph cluster list
 
   lxc exec $name -- microceph disk list
 
   lxc exec $name -- microceph disk list
Line 70: Line 72:
 
echo Sleeping 5 before joining cluster.; sleep 5
 
echo Sleeping 5 before joining cluster.; sleep 5
 
join_vm
 
join_vm
 +
```
 +
 +
## Remove mon and other services
 +
- https://docs.ceph.com/en/latest/rados/operations/add-or-rm-mons/
 +
```
 +
lxc shell microceph66
 +
systemctl stop snap.microceph.mon.service
 +
systemctl disable snap.microceph.mon.service
 +
ceph mon remove microceph66
 +
ceph mon dump
 +
# microceph status  ## still shows though bug?
 +
```
 +
 +
## Remove node
 +
```
 +
microceph disk list
 +
microceph disk remove <number id of your disk>
 +
microceph cluster remove microceph66
 +
```
 +
 +
# Migrate services to new node
 +
- https://canonical-microceph.readthedocs-hosted.com/en/latest/how-to/migrate-auto-services/
 +
```
 +
sudo microceph cluster migrate <source> <destination>
 +
sudo microceph status
 +
```
 +
 +
## Cluster Health
 +
```
 +
ceph health detail
 
```
 
```
  

Latest revision as of 16:12, 26 March 2024

Instro to Ceph & Concepts

Optimizing mds, mgr, mon, osd

Bootstrapping Microceph on LXD

https://github.com/canonical/microceph

https://microk8s.io/docs/how-to-ceph

https://canonical-microceph.readthedocs-hosted.com/en/latest/tutorial/multi-node/

Multi-node Cluster

bootstrap

microceph cluster bootstrap
microceph.ceph status
microceph disk list
microceph disk add --wipe /dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1

microceph cluster add microceph2
lxc shell microceph2
microceph cluster join <output from previous command>
microceph disk add --wipe /dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1

Add new node script

#!/bin/bash
set -eu

if [[ "$#" -ne 3 ]]; then
  echo "Usage: $0 <existing-cluster-node-name> <new-node-name>"
  echo "Example: $0 microceph1 microceph66 lxd66"
  exit
fi

existing_node_name=$1
name=$2
target=$3
#target=$(hostname)
echo Creating vm $name on target $target in 5 seconds.; sleep 5

create_vm(){
  sudo zfs create -s -V 1000G tank/$name-osd1
  lxc init ubuntu:22.04 $name --vm -c limits.cpu=12 -c limits.memory=24GB --target $target
  lxc config device override $name root size=64GB
  lxc config device add $name osd1 disk source=/dev/zvol/tank/$name-osd1
  lxc start $name
  while ! lxc exec $name -- hostname &>/dev/null; do
    sleep 10
  done
  while ! lxc exec $name -- snap install microceph --channel=reef/stable; do
    sleep 10
  done
}

join_vm(){
  token=$(lxc exec microceph1 -- microceph cluster add $existing_node_name)
  lxc exec $name -- microceph cluster join $token
  lxc exec $name -- microceph disk add --wipe "/dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1"

  sleep 5
  lxc exec $name -- microceph cluster list
  lxc exec $name -- microceph disk list
  lxc exec $name -- ceph osd status
}

create_vm
echo Sleeping 5 before joining cluster.; sleep 5
join_vm

Remove mon and other services

lxc shell microceph66
systemctl stop snap.microceph.mon.service
systemctl disable snap.microceph.mon.service
ceph mon remove microceph66
ceph mon dump
# microceph status  ## still shows though bug?

Remove node

microceph disk list
microceph disk remove <number id of your disk>
microceph cluster remove microceph66

Migrate services to new node

sudo microceph cluster migrate <source> <destination>
sudo microceph status

Cluster Health

ceph health detail

Add new node to cluster with zfs block disk sparse for microceph --vm manual

sudo zfs create -s -V 1000G tank/microceph4-osd1
lxc init ubuntu:22.04 microceph4 --vm -c limits.cpu=12 -c limits.memory=24GB --target <your tarket machine you create zfs on>
lxc config device override microceph4 root size=64GB
lxc config device add microceph4 osd1 disk source=/dev/zvol/tank/microceph4-osd1
lxc start microceph4
lxc shell microceph4
snap install microceph --channel=reef/stable
# sudo microceph cluster add microceph4  # run on existing cluster node
microceph cluster join <your token eyJuYW1lIjoib...==>
microceph disk list
microceph disk add --wipe "/dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1"
microceph disk list
microceph.ceph status
microceph disk list
microceph.ceph status
microceph.ceph osd status
microceph cluster list

Enable Monitor on node

https://canonical-microceph.readthedocs-hosted.com/en/reef-stable/reference/commands/enable/

microceph enable mon --target servername (default: this server)
# i.e. microceph enable mon --target microceph3
ceph mon dump

Add new node 4 manually

Get token from existing cluster member

microceph cluster add microceph4

ZFS vs sparse image

 sudo zfs create -V 1000G tank/microceph5-osd1
lxc config device add microceph5 osd1 disk source=/dev/zvol/tank/microceph5-osd1

sudo truncate -s 1000G /tank/microceph/microceph4.osd1.img
lxc config device add microceph4 osd1 disk source=/tank/microceph/microceph4.osd1.img

Status Example

root@microceph4:~# sudo microceph.ceph status
  cluster:
    id:     ee26b25c-f1d9-45e5-a653-be0f14d9cb33
    health: HEALTH_OK

  services:
    mon: 3 daemons, quorum microceph1,microceph2,microceph3 (age 6h)
    mgr: microceph1(active, since 5d), standbys: microceph3, microceph2
    osd: 4 osds: 4 up (since 94s), 4 in (since 97s)

  data:
    pools:   2 pools, 33 pgs
    objects: 7 objects, 577 KiB
    usage:   1.7 GiB used, 3.9 TiB / 3.9 TiB avail
    pgs:     33 active+clean

root@microceph4:~# sudo microceph.ceph status
  cluster:
    id:     ee26b25c-f1d9-45e5-a653-be0f14d9cb33
    health: HEALTH_OK

  services:
    mon: 3 daemons, quorum microceph1,microceph2,microceph3 (age 7h)
    mgr: microceph1(active, since 5d), standbys: microceph3, microceph2
    osd: 4 osds: 4 up (since 4m), 4 in (since 4m)

  data:
    pools:   2 pools, 33 pgs
    objects: 7 objects, 577 KiB
    usage:   1.7 GiB used, 3.9 TiB / 3.9 TiB avail
    pgs:     33 active+clean

If using zfs let's look at resources

$ sudo zfs list | grep microceph4
default/virtual-machines/microceph4                                                    6.95M  93.1M     6.96M  legacy
default/virtual-machines/microceph4.block

Removing OSDs

https://docs.ceph.com/en/latest/rados/operations/add-or-rm-osds/

Mark host as out so it can rebalance data on OSDs

sudo microceph.ceph osd status
sudo microceph.ceph osd out 2

Destroy if needed or it is is in a failed state

sudo microceph.ceph osd safe-to-destroy 2
sudo microceph.ceph osd destroy 2 --yes-i-really-mean-it