additional params

This commit is contained in:
2026-02-23 17:14:17 +03:00
parent 90f87d8f40
commit 5bb50a842d
3 changed files with 79 additions and 27 deletions

View File

@@ -24,8 +24,22 @@ n = 2
# log_level: pick from DEBUG | INFO | WARNING | ERROR | CRITICAL # log_level: pick from DEBUG | INFO | WARNING | ERROR | CRITICAL
log_level = "INFO" log_level = "INFO"
# use to pass arguments to the rsync binary running locally
# see rsync help/manpage for details
[rsync_parameters]
# --rsync-path
rsync_path = "/usr/bin/rsync"
# --exclude-from
exclude_from = ".rsync-exclude.txt"
``` ```
- remote_folder: rsync-compatible full path to the target folder in remote. "remote" here means that it's the target of the operation, and could still reside on the local system. - `remote_folder`: rsync-compatible full path to the target folder in remote. "remote" here means that it's the target of the operation, and could still reside on the local system.
- **do not forget to use the trailing slash!** - **do not forget to use the trailing slash!**
- local_folders: - `local_folders`: each folder to be copied over
- `n`: how many parallel rsync routines to be spawned
- `log_level`: self explanatory
- `rsync_parameters`: exposes local rsync binary's options
- `rsync_path`: specify the rsync to run on remote machine
- `exclude_from`: read exclude patterns from FILE

View File

@@ -13,3 +13,12 @@ n = 2
# log_level: pick from DEBUG | INFO | WARNING | ERROR | CRITICAL # log_level: pick from DEBUG | INFO | WARNING | ERROR | CRITICAL
log_level = "INFO" log_level = "INFO"
# use to pass arguments to the rsync binary running locally
# see rsync help/manpage for details
[rsync_parameters]
# --rsync-path
rsync_path = "/usr/bin/rsync"
# --exclude-from
exclude_from = ".rsync-exclude.txt"

View File

@@ -5,9 +5,7 @@ author Yigid BALABAN <balaban@yigid.dev>
co-authored by Opus 4.6 co-authored by Opus 4.6
parallel rsync runner. reads config from TOML (preferred) or JSON. parallel rsync runner. reads config from TOML (preferred) or JSON.
local: foo bar baz --|many-rsync|--> remote/foo remote/bar remote/baz local: foo bar --|many-rsync|--> remote/foo remote/bar
optionally takes in an argument for configuration
""" """
import json import json
@@ -17,7 +15,21 @@ import sys
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any, TypedDict, Literal
class RsyncParameters(TypedDict, total=False):
rsync_path: str
exclude_from: str
class Config(TypedDict):
local_folders: list[Path]
remote_folder: str
n: int
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
rsync_parameters: RsyncParameters
try: try:
import tomllib # 3.11+ import tomllib # 3.11+
@@ -26,11 +38,7 @@ except ModuleNotFoundError:
HOME = Path.home() HOME = Path.home()
LOG_DIR = HOME / ".rsync-logs" LOG_DIR = HOME / ".rsync-logs"
RSYNC_CMD = ( RSYNC_BASE = ("rsync", "-avh", "--progress", "--delete", "--stats")
"rsync", "-avh", "--progress", "--delete", "--stats",
'--rsync-path=/bin/rsync', # TODO: this MUST be configurable! default: don't pass this option
"--exclude-from=.rsync-exclude.txt", # TODO: this should be configurable.
)
logger = logging.getLogger("sync") logger = logging.getLogger("sync")
@@ -48,12 +56,11 @@ def _load_raw(path: Path) -> dict[str, Any]:
sys.exit(f"FATAL: unsupported config format: {path.suffix}") sys.exit(f"FATAL: unsupported config format: {path.suffix}")
def load_config(path: Path) -> dict: def load_config(path: Path) -> Config:
raw = _load_raw(path) raw = _load_raw(path)
folders = raw.get("local_folders", []) folders: list[Any] = raw.get("local_folders", [])
if not folders or not isinstance(folders, list): if not folders:
sys.exit("FATAL: local_folders must be a non-empty list") sys.exit("FATAL: local_folders must be a non-empty list")
folders = [Path(f).expanduser() for f in folders] folders = [Path(f).expanduser() for f in folders]
for f in folders: for f in folders:
@@ -62,7 +69,8 @@ def load_config(path: Path) -> dict:
remote = raw.get("remote_folder") remote = raw.get("remote_folder")
if not remote: if not remote:
sys.exit("FATAL: remote_folder is required") sys.exit("FATAL: remote_folder is required")
n = int(raw.get("n", 2)) remote = str(remote) # keep as str — may be "host:/path", not a local path
n = int(raw.get("n", 1))
if n < 1: if n < 1:
sys.exit("FATAL: n must be >= 1") sys.exit("FATAL: n must be >= 1")
if n > len(folders): if n > len(folders):
@@ -72,17 +80,35 @@ def load_config(path: Path) -> dict:
if level not in ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"): if level not in ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"):
sys.exit(f"FATAL: invalid log_level: {level!r}") sys.exit(f"FATAL: invalid log_level: {level!r}")
cfg = {"local_folders": folders, "remote_folder": remote, "n": n, "log_level": level} rsync_params: RsyncParameters = {}
logger.debug("got config:", cfg) raw_params = raw.get("rsync_parameters", {})
return cfg if rsync_path := raw_params.get("rsync_path"):
rsync_params["rsync_path"] = str(rsync_path)
if exclude_from := raw_params.get("exclude_from"):
rsync_params["exclude_from"] = str(exclude_from)
return {
"local_folders": folders, "remote_folder": remote,
"n": n, "log_level": level, "rsync_parameters": rsync_params,
}
# ── sync ────────────────────────────────────────────────────────────────────── # ── sync ──────────────────────────────────────────────────────────────────────
def sync_folder(folder: str, remote: str, ts: str) -> tuple[str, int]: def _build_rsync_cmd(params: RsyncParameters) -> tuple[str, ...]:
"""Extend RSYNC_BASE with optional flags from config."""
extra: list[str] = []
if rp := params.get("rsync_path"):
extra.append(f"--rsync-path={rp}")
if ef := params.get("exclude_from"):
extra.append(f"--exclude-from={ef}")
return (*RSYNC_BASE, *extra)
def sync_folder(folder: Path, remote: str, ts: str, params: RsyncParameters) -> tuple[Path, int]:
"""Run rsync for a single folder. Returns (folder, returncode).""" """Run rsync for a single folder. Returns (folder, returncode)."""
log_file = LOG_DIR / f"{folder}-{ts}.log" log_file = LOG_DIR / f"{folder.name}-{ts}.log"
cmd = [*RSYNC_CMD, f"{folder}", remote] cmd: list[str | Path] = [*_build_rsync_cmd(params), f"{folder}", remote]
logger.info("START %s%s (log: %s)", folder, remote, log_file) logger.info("START %s%s (log: %s)", folder, remote, log_file)
@@ -90,6 +116,7 @@ def sync_folder(folder: str, remote: str, ts: str) -> tuple[str, int]:
proc = subprocess.run( proc = subprocess.run(
cmd, cmd,
cwd=HOME, cwd=HOME,
stdin=subprocess.DEVNULL, # prevent parallel processes from fighting over terminal input
stdout=fh, stdout=fh,
stderr=subprocess.STDOUT, # interleave; nothing goes silent stderr=subprocess.STDOUT, # interleave; nothing goes silent
text=True, text=True,
@@ -122,13 +149,15 @@ def main() -> None:
) )
folders, remote, n = cfg["local_folders"], cfg["remote_folder"], cfg["n"] folders, remote, n = cfg["local_folders"], cfg["remote_folder"], cfg["n"]
params = cfg["rsync_parameters"]
logger.info("log level is INFO by default, use config to change") logger.debug("got config: %s", cfg)
logger.info("log level is set to %s", cfg["log_level"])
logger.info("syncing %d folder(s), parallelism=%d", len(folders), n) logger.info("syncing %d folder(s), parallelism=%d", len(folders), n)
failed: list[tuple[str, int]] = [] failed: list[tuple[Path, int]] = []
with ThreadPoolExecutor(max_workers=n) as pool: with ThreadPoolExecutor(max_workers=n) as pool:
futures = {pool.submit(sync_folder, f, remote, ts): f for f in folders} futures = {pool.submit(sync_folder, f, remote, ts, params): f for f in folders}
for fut in as_completed(futures): for fut in as_completed(futures):
folder, rc = fut.result() folder, rc = fut.result()
if rc != 0: if rc != 0:
@@ -139,7 +168,7 @@ def main() -> None:
logger.critical("FAILED: %s (rc=%d)", f, rc) logger.critical("FAILED: %s (rc=%d)", f, rc)
sys.exit(1) sys.exit(1)
logger.info("all folders synced successfully") logger.info("SUCC all folders synced successfully")
if __name__ == "__main__": if __name__ == "__main__":