bugfix: comprehensive stability audit and fixes

- discover/docker.py: handle comma-separated compose_file in labels
- discover/network.py: replace os.getlogin() with robust user detection
- target.py: add lsb_release fallback via hostnamectl, guard None compose_file,
  use container name (not cached CID) for docker logs
- main.py: call reset_state(mode='target') before target mode,
  improve EOF handling info message
- source.py: remove redundant set_stage('DONE') inside transfer_offer
- transfer.py: fix stage naming for resume after transfer
- add dry_run.py for local logic validation
This commit is contained in:
2026-05-22 20:58:37 +04:00
parent 6f3637795b
commit 97b3623915
5 changed files with 205 additions and 27 deletions

View File

@@ -74,6 +74,18 @@ def get_compose_file_from_container(inspect):
if compose_file and os.path.isfile(compose_file):
return compose_file
# Если labels содержит несколько файлов через запятую — берём первый существующий
if compose_file and ", " in compose_file:
for fp in compose_file.split(", "):
fp = fp.strip()
if os.path.isfile(fp):
return fp
elif compose_file and "," in compose_file:
for fp in compose_file.split(","):
fp = fp.strip()
if os.path.isfile(fp):
return fp
# Fallback: ищем docker-compose.yml / compose.yml рядом с working_dir
if working_dir and os.path.isdir(working_dir):
for fname in ("docker-compose.yml", "docker-compose.yaml", "compose.yml", "compose.yaml"):

View File

@@ -209,7 +209,18 @@ def gather_cron_jobs(user_hint=None):
"""
jobs = []
# crontab -l для текущего пользователя и root
for user in (os.getlogin(), "root"):
current_user = os.environ.get('USER') or os.environ.get('LOGNAME')
if not current_user:
try:
import pwd
current_user = pwd.getpwuid(os.getuid()).pw_name
except Exception:
current_user = None
users = []
if current_user:
users.append(current_user)
users.append("root")
for user in users:
try:
out = run(f"crontab -u {user} -l", check=False)
for ln in out.stdout.splitlines():

136
dry_run.py Normal file
View File

@@ -0,0 +1,136 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
dry_run.py — Локальная проверка логики без Docker/SSH
"""
import sys
import os
# Добавляем корень проекта
_PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
errors = []
def check(msg, ok):
if not ok:
errors.append(msg)
print(f" FAIL: {msg}")
else:
print(f" OK: {msg}")
print("=== DRY-RUN: Проверка импортов ===")
try:
from core import state
check("state модуль импортируется", True)
except Exception as e:
check(f"state импорт: {e}", False)
try:
from core.fsm import FSM
check("FSM класс доступен", True)
except Exception as e:
check(f"FSM импорт: {e}", False)
try:
from core.color import menu, prompt
check("color модуль доступен", True)
except Exception as e:
check(f"color импорт: {e}", False)
try:
from discover.docker import discover_docker, get_container_pid
check("discover.docker доступен", True)
except Exception as e:
check(f"discover.docker: {e}", False)
try:
from discover.nginx import discover_nginx
check("discover.nginx доступен", True)
except Exception as e:
check(f"discover.nginx: {e}", False)
try:
from transfer.transfer import do_transfer
check("transfer.transfer доступен", True)
except Exception as e:
check(f"transfer.transfer: {e}", False)
try:
from transfer.ssh import list_private_keys
check("transfer.ssh доступен", True)
except Exception as e:
check(f"transfer.ssh: {e}", False)
try:
from manifest.manifest import build_manifest, save_manifest
check("manifest доступен", True)
except Exception as e:
check(f"manifest: {e}", False)
print("\n=== DRY-RUN: Проверка FSM ===")
# Проверим что SOURCE_STEPS содержит все нужные шаги
fsm_s = FSM(mode="source")
check("SOURCE_STEPS содержит TRANSFER", "TRANSFER" in fsm_s.steps)
check("SOURCE_STEPS содержит DONE", "DONE" in fsm_s.steps)
fsm_t = FSM(mode="target")
check("TARGET_STEPS содержит DONE", "DONE" in fsm_t.steps)
print("\n=== DRY-RUN: Проверка state.json ===")
state.load_state()
check("State загружается без ошибок", True)
state.set_stage("INIT", mode=None)
check("State сохраняется", True)
print("\n=== DRY-RUN: Проверка reset_state ===")
state.mark_completed("SOURCE_DISCOVER")
state.set_stage("SOURCE_PACK", archive_path="/tmp/test.tar.gz")
state.reset_state(mode="source")
st = state.load_state()
check("reset_state очищает completed_steps", len(st.get("completed_steps", [])) == 0)
check("reset_stage после reset = INIT", st.get("stage") == "INIT")
print("\n=== DRY-RUN: Проверка manifest ===")
manifest = build_manifest(
docker_data={"container_name":"test","image":"img","status":"running","compose_file":"/tmp/compose.yml","env_file":None,"mounts":[],"ports":{},"networks":[],"host_config":{},"labels":{}},
nginx_data=[],
sidecars=[],
host_network={},
systemd_units=[],
cron_jobs=[],
extra_hints=[],
)
check("Manifest содержит service.name", manifest["service"]["name"] == "test")
check("Manifest содержит docker.compose_file", manifest["docker"]["compose_file"] == "/tmp/compose.yml")
print("\n=== DRY-RUN: Проверка transfer без host ===")
st = state.load_state()
state.save_state({**st, "archive_path":None, "target_host":None, "target_user":None, "target_port":22})
try:
do_transfer()
check("do_transfer без archive_path — бросил RuntimeError", False)
except RuntimeError as e:
check("do_transfer без archive_path — RuntimeError", "Архив не найден" in str(e))
except Exception as e:
check(f"do_transfer без archive_path — неожиданная ошибка: {e}", False)
# Проверка ssh key discovery (без файловой системы)
try:
keys = list_private_keys()
check("list_private_keys не падает", True)
except Exception as e:
check(f"list_private_keys: {e}", False)
print("\n=== DRY-RUN: ИТОГ ===")
if errors:
print(f"FAIL: {len(errors)} проверок не прошло:")
for e in errors:
print(f" - {e}")
sys.exit(1)
else:
print("Все проверки прошли успешно!")
sys.exit(0)

View File

@@ -28,9 +28,17 @@ def do_preflight():
state.set_stage("TARGET_PREFLIGHT")
# Ubuntu version
out = run("lsb_release -ds", check=False).stdout.strip()
info(f"OS: {out}")
version_match = re.search(r'(\d+\.\d+)', out)
try:
out = run("lsb_release -ds", check=False).stdout.strip()
except RuntimeError:
out = ""
if not out and exists("hostnamectl"):
try:
out = run("hostnamectl | grep 'Operating System'", check=False).stdout.strip()
except RuntimeError:
out = ""
info(f"OS: {out or 'не определено'}")
version_match = re.search(r'(\d+\.\d+)', out) or re.search(r'Ubuntu\s+(\d+\.\d+)', out, re.I)
ubuntu_version = version_match.group(1) if version_match else None
if not ubuntu_version:
warn("Не удалось определить версию Ubuntu. Продолжаем на свой риск.")
@@ -346,23 +354,27 @@ def do_verify():
# Docker compose up
cf = manifest["docker"].get("compose_file")
if cf and os.path.isfile(cf):
info("Запускаем docker compose up ...")
compose_dir = os.path.dirname(cf)
r = run(f"cd {compose_dir} \u0026\u0026 {sudo} docker compose up -d", check=False)
if r.returncode != 0:
print(f"\n{r.stdout}")
print(f"\n{r.stderr}")
state.set_error(
step="docker_compose_up",
stdout=r.stdout,
stderr=r.stderr,
suggestion="Проверьте compose-файл, доступность image, volumes. После исправления запустите: ./migrate --resume"
)
raise RuntimeError("docker compose up failed")
success("Docker compose up выполнен")
if cf:
cf_path = cf
if os.path.isfile(cf_path):
info("Запускаем docker compose up ...")
compose_dir = os.path.dirname(cf_path)
r = run(f"cd {compose_dir} && {sudo} docker compose up -d", check=False)
if r.returncode != 0:
print(f"\n{r.stdout}")
print(f"\n{r.stderr}")
state.set_error(
step="docker_compose_up",
stdout=r.stdout,
stderr=r.stderr,
suggestion="Проверьте compose-файл, доступность image, volumes. После исправления запустите: docker-migrate --resume"
)
raise RuntimeError("docker compose up failed")
success("Docker compose up выполнен")
else:
warn(f"Compose-файл из манифеста не найден на target: {cf_path}")
elif manifest["docker"].get("container_name"):
info("Compose не найден, пробуем docker run по параметрам контейнера ...")
info("Compose не найден, запуск по docker run не реализован. Используйте docker run вручную.")
# Sidecar units start
for u in manifest.get("systemd_units", []):
@@ -371,12 +383,19 @@ def do_verify():
run(f"{sudo} systemctl start {uname}", check=False)
info(f"Запущен unit: {uname}")
# Логи
cid = manifest["docker"].get("container_id")
if cid:
info("Последние логи контейнера:")
log_out = run(f"docker logs --tail 30 {cid[:12]}", check=False)
print(log_out.stdout)
# Логи — ищем контейнер по имени, а не по ID (ID с другого сервера)
cname = manifest["docker"].get("container_name")
if cname:
try:
out = run(f"docker ps -q -f name={cname}", check=False)
if out.stdout.strip():
info("Последние логи контейнера:")
log_out = run(f"docker logs --tail 30 {cname}", check=False)
print(log_out.stdout)
else:
warn(f"Контейнер '{cname}' пока не запущен, логи недоступны")
except Exception as e:
warn(f"Не удалось получить логи: {e}")
success("Верификация завершена. Сервис должен работать.")
state.set_stage("DONE")

View File

@@ -99,7 +99,7 @@ def do_transfer():
raise RuntimeError(f"Распаковка на target не удалась: {r2.stderr}")
# Сохраняем remote_dir в state для target
state.set_stage("TRANSFER_DONE", target_remote_dir=remote_dir)
state.set_stage("TRANSFER", target_remote_dir=remote_dir)
# Предлагаем сразу запустить target-режим удалённо
if confirm("Сразу запустить восстановление на новом сервере (remote target mode)", default="y"):