Difference between revisions of "Microceph"

From UVOO Tech Wiki
Jump to navigation Jump to search
 
(23 intermediate revisions by the same user not shown)
Line 1: Line 1:
 +
# Instro to Ceph & Concepts
 +
- https://docs.ceph.com/en/latest/start/intro/
 +
 +
# Optimizing mds, mgr, mon, osd
 +
- https://access.redhat.com/documentation/en-us/red_hat_ceph_storage/4/html/operations_guide/managing-the-storage-cluster-size
 +
 
# Bootstrapping Microceph on LXD
 
# Bootstrapping Microceph on LXD
  
Line 6: Line 12:
  
 
https://canonical-microceph.readthedocs-hosted.com/en/latest/tutorial/multi-node/
 
https://canonical-microceph.readthedocs-hosted.com/en/latest/tutorial/multi-node/
 +
# Multi-node Cluster
  
# bootstrap
+
## bootstrap
 
```
 
```
sudo microceph cluster bootstrap
+
microceph cluster bootstrap
sudo microceph.ceph status
+
microceph.ceph status
sudo microceph disk list
+
microceph disk list
sudo microceph disk add --wipe /dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1
+
microceph disk add --wipe /dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1
  
 
microceph cluster add microceph2
 
microceph cluster add microceph2
 
lxc shell microceph2
 
lxc shell microceph2
 
microceph cluster join <output from previous command>
 
microceph cluster join <output from previous command>
sudo microceph disk add --wipe /dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1
+
microceph disk add --wipe /dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1
 +
```
 +
 
 +
## Add new node script
 +
```
 +
#!/bin/bash
 +
set -eu
 +
 
 +
if [[ "$#" -ne 3 ]]; then
 +
  echo "Usage: $0 <existing-cluster-node-name> <new-node-name>"
 +
  echo "Example: $0 microceph1 microceph66 lxd66"
 +
  exit
 +
fi
 +
 
 +
existing_node_name=$1
 +
name=$2
 +
target=$3
 +
#target=$(hostname)
 +
echo Creating vm $name on target $target in 5 seconds.; sleep 5
 +
 
 +
create_vm(){
 +
  sudo zfs create -s -V 1000G tank/$name-osd1
 +
  lxc init ubuntu:22.04 $name --vm -c limits.cpu=12 -c limits.memory=24GB --target $target
 +
  lxc config device override $name root size=64GB
 +
  lxc config device add $name osd1 disk source=/dev/zvol/tank/$name-osd1
 +
  lxc start $name
 +
  while ! lxc exec $name -- hostname &>/dev/null; do
 +
    sleep 10
 +
  done
 +
  while ! lxc exec $name -- snap install microceph --channel=reef/stable; do
 +
    sleep 10
 +
  done
 +
}
 +
 
 +
join_vm(){
 +
  token=$(lxc exec microceph1 -- microceph cluster add $existing_node_name)
 +
  lxc exec $name -- microceph cluster join $token
 +
  lxc exec $name -- microceph disk add --wipe "/dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1"
 +
 
 +
  sleep 5
 +
  lxc exec $name -- microceph cluster list
 +
  lxc exec $name -- microceph disk list
 +
  lxc exec $name -- ceph osd status
 +
}
 +
 
 +
create_vm
 +
echo Sleeping 5 before joining cluster.; sleep 5
 +
join_vm
 +
```
 +
 
 +
## Remove mon and other services
 +
- https://docs.ceph.com/en/latest/rados/operations/add-or-rm-mons/
 +
```
 +
lxc shell microceph66
 +
systemctl stop snap.microceph.mon.service
 +
systemctl disable snap.microceph.mon.service
 +
ceph mon remove microceph66
 +
ceph mon dump
 +
# microceph status  ## still shows though bug?
 
```
 
```
  
# Add new node 4 manually
+
## Remove node
 +
```
 +
microceph disk list
 +
microceph disk remove <number id of your disk>
 +
microceph cluster remove microceph66
 +
```
 +
 
 +
# Migrate services to new node
 +
- https://canonical-microceph.readthedocs-hosted.com/en/latest/how-to/migrate-auto-services/
 +
```
 +
sudo microceph cluster migrate <source> <destination>
 +
sudo microceph status
 +
```
 +
 
 +
## Cluster Health
 +
```
 +
ceph health detail
 +
```
 +
 
 +
 
 +
## Add new node to cluster with zfs block disk sparse for microceph --vm manual
 
```
 
```
sudo mkdir -p /tank/microceph
+
sudo zfs create -s -V 1000G tank/microceph4-osd1
sudo truncate -s 1000G /tank/microceph/microceph4.osd1.img
+
lxc init ubuntu:22.04 microceph4 --vm -c limits.cpu=12 -c limits.memory=24GB --target <your tarket machine you create zfs on>
lxc init ubuntu:22.04 microceph4 --vm -c limits.cpu=16 -c limits.memory=32GB
 
 
lxc config device override microceph4 root size=64GB
 
lxc config device override microceph4 root size=64GB
lxc config device add microceph4 osd1 disk source=/tank/microceph/microceph4.osd1.img
+
lxc config device add microceph4 osd1 disk source=/dev/zvol/tank/microceph4-osd1
 
lxc start microceph4
 
lxc start microceph4
 
lxc shell microceph4
 
lxc shell microceph4
sudo snap install microceph --channel=quincy/stable
+
snap install microceph --channel=reef/stable
sudo microceph cluster join <your token eyJuYW1lIjoib...==>
+
# sudo microceph cluster add microceph4  # run on existing cluster node
 +
microceph cluster join <your token eyJuYW1lIjoib...==>
 
microceph disk list
 
microceph disk list
sudo microceph disk add --wipe "/dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1"
+
microceph disk add --wipe "/dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1"
 
microceph disk list
 
microceph disk list
 +
microceph.ceph status
 +
microceph disk list
 +
microceph.ceph status
 +
microceph.ceph osd status
 +
microceph cluster list
 +
```
 +
## Enable Monitor on node
 +
https://canonical-microceph.readthedocs-hosted.com/en/reef-stable/reference/commands/enable/
 +
```
 +
microceph enable mon --target servername (default: this server)
 +
# i.e. microceph enable mon --target microceph3
 +
ceph mon dump
 
```
 
```
 +
 +
# Add new node 4 manually
  
 
## Get token from existing cluster member
 
## Get token from existing cluster member
 
```
 
```
 
microceph cluster add microceph4
 
microceph cluster add microceph4
 +
```
 +
## ZFS vs sparse image
 +
```
 +
sudo zfs create -V 1000G tank/microceph5-osd1
 +
lxc config device add microceph5 osd1 disk source=/dev/zvol/tank/microceph5-osd1
 +
 +
sudo truncate -s 1000G /tank/microceph/microceph4.osd1.img
 +
lxc config device add microceph4 osd1 disk source=/tank/microceph/microceph4.osd1.img
 +
```
 +
 +
 +
 +
 +
 +
 +
# Status Example
 +
```
 +
root@microceph4:~# sudo microceph.ceph status
 +
  cluster:
 +
    id:    ee26b25c-f1d9-45e5-a653-be0f14d9cb33
 +
    health: HEALTH_OK
 +
 +
  services:
 +
    mon: 3 daemons, quorum microceph1,microceph2,microceph3 (age 6h)
 +
    mgr: microceph1(active, since 5d), standbys: microceph3, microceph2
 +
    osd: 4 osds: 4 up (since 94s), 4 in (since 97s)
 +
 +
  data:
 +
    pools:  2 pools, 33 pgs
 +
    objects: 7 objects, 577 KiB
 +
    usage:  1.7 GiB used, 3.9 TiB / 3.9 TiB avail
 +
    pgs:    33 active+clean
 +
 +
root@microceph4:~# sudo microceph.ceph status
 +
  cluster:
 +
    id:    ee26b25c-f1d9-45e5-a653-be0f14d9cb33
 +
    health: HEALTH_OK
 +
 +
  services:
 +
    mon: 3 daemons, quorum microceph1,microceph2,microceph3 (age 7h)
 +
    mgr: microceph1(active, since 5d), standbys: microceph3, microceph2
 +
    osd: 4 osds: 4 up (since 4m), 4 in (since 4m)
 +
 +
  data:
 +
    pools:  2 pools, 33 pgs
 +
    objects: 7 objects, 577 KiB
 +
    usage:  1.7 GiB used, 3.9 TiB / 3.9 TiB avail
 +
    pgs:    33 active+clean
 +
```
 +
 +
If using zfs let's look at resources
 +
```
 +
$ sudo zfs list | grep microceph4
 +
default/virtual-machines/microceph4                                                    6.95M  93.1M    6.96M  legacy
 +
default/virtual-machines/microceph4.block
 +
```
 +
 +
# Removing OSDs
 +
https://docs.ceph.com/en/latest/rados/operations/add-or-rm-osds/
 +
 +
## Mark host as out so it can rebalance data on OSDs
 +
```
 +
sudo microceph.ceph osd status
 +
sudo microceph.ceph osd out 2
 +
```
 +
 +
## Destroy if needed or it is is in a failed state
 +
```
 +
sudo microceph.ceph osd safe-to-destroy 2
 +
sudo microceph.ceph osd destroy 2 --yes-i-really-mean-it
 
```
 
```

Latest revision as of 16:12, 26 March 2024

Instro to Ceph & Concepts

Optimizing mds, mgr, mon, osd

Bootstrapping Microceph on LXD

https://github.com/canonical/microceph

https://microk8s.io/docs/how-to-ceph

https://canonical-microceph.readthedocs-hosted.com/en/latest/tutorial/multi-node/

Multi-node Cluster

bootstrap

microceph cluster bootstrap
microceph.ceph status
microceph disk list
microceph disk add --wipe /dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1

microceph cluster add microceph2
lxc shell microceph2
microceph cluster join <output from previous command>
microceph disk add --wipe /dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1

Add new node script

#!/bin/bash
set -eu

if [[ "$#" -ne 3 ]]; then
  echo "Usage: $0 <existing-cluster-node-name> <new-node-name>"
  echo "Example: $0 microceph1 microceph66 lxd66"
  exit
fi

existing_node_name=$1
name=$2
target=$3
#target=$(hostname)
echo Creating vm $name on target $target in 5 seconds.; sleep 5

create_vm(){
  sudo zfs create -s -V 1000G tank/$name-osd1
  lxc init ubuntu:22.04 $name --vm -c limits.cpu=12 -c limits.memory=24GB --target $target
  lxc config device override $name root size=64GB
  lxc config device add $name osd1 disk source=/dev/zvol/tank/$name-osd1
  lxc start $name
  while ! lxc exec $name -- hostname &>/dev/null; do
    sleep 10
  done
  while ! lxc exec $name -- snap install microceph --channel=reef/stable; do
    sleep 10
  done
}

join_vm(){
  token=$(lxc exec microceph1 -- microceph cluster add $existing_node_name)
  lxc exec $name -- microceph cluster join $token
  lxc exec $name -- microceph disk add --wipe "/dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1"

  sleep 5
  lxc exec $name -- microceph cluster list
  lxc exec $name -- microceph disk list
  lxc exec $name -- ceph osd status
}

create_vm
echo Sleeping 5 before joining cluster.; sleep 5
join_vm

Remove mon and other services

lxc shell microceph66
systemctl stop snap.microceph.mon.service
systemctl disable snap.microceph.mon.service
ceph mon remove microceph66
ceph mon dump
# microceph status  ## still shows though bug?

Remove node

microceph disk list
microceph disk remove <number id of your disk>
microceph cluster remove microceph66

Migrate services to new node

sudo microceph cluster migrate <source> <destination>
sudo microceph status

Cluster Health

ceph health detail

Add new node to cluster with zfs block disk sparse for microceph --vm manual

sudo zfs create -s -V 1000G tank/microceph4-osd1
lxc init ubuntu:22.04 microceph4 --vm -c limits.cpu=12 -c limits.memory=24GB --target <your tarket machine you create zfs on>
lxc config device override microceph4 root size=64GB
lxc config device add microceph4 osd1 disk source=/dev/zvol/tank/microceph4-osd1
lxc start microceph4
lxc shell microceph4
snap install microceph --channel=reef/stable
# sudo microceph cluster add microceph4  # run on existing cluster node
microceph cluster join <your token eyJuYW1lIjoib...==>
microceph disk list
microceph disk add --wipe "/dev/disk/by-id/scsi-SQEMU_QEMU_HARDDISK_lxd_osd1"
microceph disk list
microceph.ceph status
microceph disk list
microceph.ceph status
microceph.ceph osd status
microceph cluster list

Enable Monitor on node

https://canonical-microceph.readthedocs-hosted.com/en/reef-stable/reference/commands/enable/

microceph enable mon --target servername (default: this server)
# i.e. microceph enable mon --target microceph3
ceph mon dump

Add new node 4 manually

Get token from existing cluster member

microceph cluster add microceph4

ZFS vs sparse image

 sudo zfs create -V 1000G tank/microceph5-osd1
lxc config device add microceph5 osd1 disk source=/dev/zvol/tank/microceph5-osd1

sudo truncate -s 1000G /tank/microceph/microceph4.osd1.img
lxc config device add microceph4 osd1 disk source=/tank/microceph/microceph4.osd1.img

Status Example

root@microceph4:~# sudo microceph.ceph status
  cluster:
    id:     ee26b25c-f1d9-45e5-a653-be0f14d9cb33
    health: HEALTH_OK

  services:
    mon: 3 daemons, quorum microceph1,microceph2,microceph3 (age 6h)
    mgr: microceph1(active, since 5d), standbys: microceph3, microceph2
    osd: 4 osds: 4 up (since 94s), 4 in (since 97s)

  data:
    pools:   2 pools, 33 pgs
    objects: 7 objects, 577 KiB
    usage:   1.7 GiB used, 3.9 TiB / 3.9 TiB avail
    pgs:     33 active+clean

root@microceph4:~# sudo microceph.ceph status
  cluster:
    id:     ee26b25c-f1d9-45e5-a653-be0f14d9cb33
    health: HEALTH_OK

  services:
    mon: 3 daemons, quorum microceph1,microceph2,microceph3 (age 7h)
    mgr: microceph1(active, since 5d), standbys: microceph3, microceph2
    osd: 4 osds: 4 up (since 4m), 4 in (since 4m)

  data:
    pools:   2 pools, 33 pgs
    objects: 7 objects, 577 KiB
    usage:   1.7 GiB used, 3.9 TiB / 3.9 TiB avail
    pgs:     33 active+clean

If using zfs let's look at resources

$ sudo zfs list | grep microceph4
default/virtual-machines/microceph4                                                    6.95M  93.1M     6.96M  legacy
default/virtual-machines/microceph4.block

Removing OSDs

https://docs.ceph.com/en/latest/rados/operations/add-or-rm-osds/

Mark host as out so it can rebalance data on OSDs

sudo microceph.ceph osd status
sudo microceph.ceph osd out 2

Destroy if needed or it is is in a failed state

sudo microceph.ceph osd safe-to-destroy 2
sudo microceph.ceph osd destroy 2 --yes-i-really-mean-it