From 5bb50a842dc3cc64d89cc6c1a961b934f5c393c8 Mon Sep 17 00:00:00 2001 From: Yigid BALABAN Date: Mon, 23 Feb 2026 17:14:17 +0300 Subject: [PATCH] additional params --- many-rsync/README.md | 18 +++++++- many-rsync/example-sync.toml | 9 ++++ many-rsync/main.py | 79 ++++++++++++++++++++++++------------ 3 files changed, 79 insertions(+), 27 deletions(-) diff --git a/many-rsync/README.md b/many-rsync/README.md index 98eb39b..a184124 100644 --- a/many-rsync/README.md +++ b/many-rsync/README.md @@ -24,8 +24,22 @@ n = 2 # log_level: pick from DEBUG | INFO | WARNING | ERROR | CRITICAL log_level = "INFO" + +# use to pass arguments to the rsync binary running locally +# see rsync help/manpage for details +[rsync_parameters] +# --rsync-path +rsync_path = "/usr/bin/rsync" + +# --exclude-from +exclude_from = ".rsync-exclude.txt" ``` -- remote_folder: rsync-compatible full path to the target folder in remote. "remote" here means that it's the target of the operation, and could still reside on the local system. +- `remote_folder`: rsync-compatible full path to the target folder in remote. "remote" here means that it's the target of the operation, and could still reside on the local system. - **do not forget to use the trailing slash!** -- local_folders: +- `local_folders`: each folder to be copied over +- `n`: how many parallel rsync routines to be spawned +- `log_level`: self explanatory +- `rsync_parameters`: exposes local rsync binary's options + - `rsync_path`: specify the rsync to run on remote machine + - `exclude_from`: read exclude patterns from FILE diff --git a/many-rsync/example-sync.toml b/many-rsync/example-sync.toml index 3ee1b56..fc36ad0 100644 --- a/many-rsync/example-sync.toml +++ b/many-rsync/example-sync.toml @@ -13,3 +13,12 @@ n = 2 # log_level: pick from DEBUG | INFO | WARNING | ERROR | CRITICAL log_level = "INFO" + +# use to pass arguments to the rsync binary running locally +# see rsync help/manpage for details +[rsync_parameters] +# --rsync-path +rsync_path = "/usr/bin/rsync" + +# --exclude-from +exclude_from = ".rsync-exclude.txt" diff --git a/many-rsync/main.py b/many-rsync/main.py index 38fca52..5d9569b 100644 --- a/many-rsync/main.py +++ b/many-rsync/main.py @@ -5,9 +5,7 @@ author Yigid BALABAN co-authored by Opus 4.6 parallel rsync runner. reads config from TOML (preferred) or JSON. -local: foo bar baz --|many-rsync|--> remote/foo remote/bar remote/baz - -optionally takes in an argument for configuration +local: foo bar --|many-rsync|--> remote/foo remote/bar """ import json @@ -17,7 +15,21 @@ import sys from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timezone from pathlib import Path -from typing import Any +from typing import Any, TypedDict, Literal + + +class RsyncParameters(TypedDict, total=False): + rsync_path: str + exclude_from: str + + +class Config(TypedDict): + local_folders: list[Path] + remote_folder: str + n: int + log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] + rsync_parameters: RsyncParameters + try: import tomllib # 3.11+ @@ -26,11 +38,7 @@ except ModuleNotFoundError: HOME = Path.home() LOG_DIR = HOME / ".rsync-logs" -RSYNC_CMD = ( - "rsync", "-avh", "--progress", "--delete", "--stats", - '--rsync-path=/bin/rsync', # TODO: this MUST be configurable! default: don't pass this option - "--exclude-from=.rsync-exclude.txt", # TODO: this should be configurable. -) +RSYNC_BASE = ("rsync", "-avh", "--progress", "--delete", "--stats") logger = logging.getLogger("sync") @@ -48,12 +56,11 @@ def _load_raw(path: Path) -> dict[str, Any]: sys.exit(f"FATAL: unsupported config format: {path.suffix}") -def load_config(path: Path) -> dict: +def load_config(path: Path) -> Config: raw = _load_raw(path) - folders = raw.get("local_folders", []) - if not folders or not isinstance(folders, list): + folders: list[Any] = raw.get("local_folders", []) + if not folders: sys.exit("FATAL: local_folders must be a non-empty list") - folders = [Path(f).expanduser() for f in folders] for f in folders: @@ -62,7 +69,8 @@ def load_config(path: Path) -> dict: remote = raw.get("remote_folder") if not remote: sys.exit("FATAL: remote_folder is required") - n = int(raw.get("n", 2)) + remote = str(remote) # keep as str — may be "host:/path", not a local path + n = int(raw.get("n", 1)) if n < 1: sys.exit("FATAL: n must be >= 1") if n > len(folders): @@ -71,18 +79,36 @@ def load_config(path: Path) -> dict: level = raw.get("log_level", "INFO").upper() if level not in ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"): sys.exit(f"FATAL: invalid log_level: {level!r}") - - cfg = {"local_folders": folders, "remote_folder": remote, "n": n, "log_level": level} - logger.debug("got config:", cfg) - return cfg + + rsync_params: RsyncParameters = {} + raw_params = raw.get("rsync_parameters", {}) + if rsync_path := raw_params.get("rsync_path"): + rsync_params["rsync_path"] = str(rsync_path) + if exclude_from := raw_params.get("exclude_from"): + rsync_params["exclude_from"] = str(exclude_from) + + return { + "local_folders": folders, "remote_folder": remote, + "n": n, "log_level": level, "rsync_parameters": rsync_params, + } # ── sync ────────────────────────────────────────────────────────────────────── -def sync_folder(folder: str, remote: str, ts: str) -> tuple[str, int]: +def _build_rsync_cmd(params: RsyncParameters) -> tuple[str, ...]: + """Extend RSYNC_BASE with optional flags from config.""" + extra: list[str] = [] + if rp := params.get("rsync_path"): + extra.append(f"--rsync-path={rp}") + if ef := params.get("exclude_from"): + extra.append(f"--exclude-from={ef}") + return (*RSYNC_BASE, *extra) + + +def sync_folder(folder: Path, remote: str, ts: str, params: RsyncParameters) -> tuple[Path, int]: """Run rsync for a single folder. Returns (folder, returncode).""" - log_file = LOG_DIR / f"{folder}-{ts}.log" - cmd = [*RSYNC_CMD, f"{folder}", remote] + log_file = LOG_DIR / f"{folder.name}-{ts}.log" + cmd: list[str | Path] = [*_build_rsync_cmd(params), f"{folder}", remote] logger.info("START %s → %s (log: %s)", folder, remote, log_file) @@ -90,6 +116,7 @@ def sync_folder(folder: str, remote: str, ts: str) -> tuple[str, int]: proc = subprocess.run( cmd, cwd=HOME, + stdin=subprocess.DEVNULL, # prevent parallel processes from fighting over terminal input stdout=fh, stderr=subprocess.STDOUT, # interleave; nothing goes silent text=True, @@ -122,13 +149,15 @@ def main() -> None: ) folders, remote, n = cfg["local_folders"], cfg["remote_folder"], cfg["n"] + params = cfg["rsync_parameters"] - logger.info("log level is INFO by default, use config to change") + logger.debug("got config: %s", cfg) + logger.info("log level is set to %s", cfg["log_level"]) logger.info("syncing %d folder(s), parallelism=%d", len(folders), n) - failed: list[tuple[str, int]] = [] + failed: list[tuple[Path, int]] = [] with ThreadPoolExecutor(max_workers=n) as pool: - futures = {pool.submit(sync_folder, f, remote, ts): f for f in folders} + futures = {pool.submit(sync_folder, f, remote, ts, params): f for f in folders} for fut in as_completed(futures): folder, rc = fut.result() if rc != 0: @@ -139,7 +168,7 @@ def main() -> None: logger.critical("FAILED: %s (rc=%d)", f, rc) sys.exit(1) - logger.info("all folders synced successfully") + logger.info("SUCC all folders synced successfully") if __name__ == "__main__":