additional params
This commit is contained in:
@@ -24,8 +24,22 @@ n = 2
|
|||||||
|
|
||||||
# log_level: pick from DEBUG | INFO | WARNING | ERROR | CRITICAL
|
# log_level: pick from DEBUG | INFO | WARNING | ERROR | CRITICAL
|
||||||
log_level = "INFO"
|
log_level = "INFO"
|
||||||
|
|
||||||
|
# use to pass arguments to the rsync binary running locally
|
||||||
|
# see rsync help/manpage for details
|
||||||
|
[rsync_parameters]
|
||||||
|
# --rsync-path
|
||||||
|
rsync_path = "/usr/bin/rsync"
|
||||||
|
|
||||||
|
# --exclude-from
|
||||||
|
exclude_from = ".rsync-exclude.txt"
|
||||||
```
|
```
|
||||||
|
|
||||||
- remote_folder: rsync-compatible full path to the target folder in remote. "remote" here means that it's the target of the operation, and could still reside on the local system.
|
- `remote_folder`: rsync-compatible full path to the target folder in remote. "remote" here means that it's the target of the operation, and could still reside on the local system.
|
||||||
- **do not forget to use the trailing slash!**
|
- **do not forget to use the trailing slash!**
|
||||||
- local_folders:
|
- `local_folders`: each folder to be copied over
|
||||||
|
- `n`: how many parallel rsync routines to be spawned
|
||||||
|
- `log_level`: self explanatory
|
||||||
|
- `rsync_parameters`: exposes local rsync binary's options
|
||||||
|
- `rsync_path`: specify the rsync to run on remote machine
|
||||||
|
- `exclude_from`: read exclude patterns from FILE
|
||||||
|
|||||||
@@ -13,3 +13,12 @@ n = 2
|
|||||||
|
|
||||||
# log_level: pick from DEBUG | INFO | WARNING | ERROR | CRITICAL
|
# log_level: pick from DEBUG | INFO | WARNING | ERROR | CRITICAL
|
||||||
log_level = "INFO"
|
log_level = "INFO"
|
||||||
|
|
||||||
|
# use to pass arguments to the rsync binary running locally
|
||||||
|
# see rsync help/manpage for details
|
||||||
|
[rsync_parameters]
|
||||||
|
# --rsync-path
|
||||||
|
rsync_path = "/usr/bin/rsync"
|
||||||
|
|
||||||
|
# --exclude-from
|
||||||
|
exclude_from = ".rsync-exclude.txt"
|
||||||
|
|||||||
@@ -5,9 +5,7 @@ author Yigid BALABAN <balaban@yigid.dev>
|
|||||||
co-authored by Opus 4.6
|
co-authored by Opus 4.6
|
||||||
|
|
||||||
parallel rsync runner. reads config from TOML (preferred) or JSON.
|
parallel rsync runner. reads config from TOML (preferred) or JSON.
|
||||||
local: foo bar baz --|many-rsync|--> remote/foo remote/bar remote/baz
|
local: foo bar --|many-rsync|--> remote/foo remote/bar
|
||||||
|
|
||||||
optionally takes in an argument for configuration
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
@@ -17,7 +15,21 @@ import sys
|
|||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any, TypedDict, Literal
|
||||||
|
|
||||||
|
|
||||||
|
class RsyncParameters(TypedDict, total=False):
|
||||||
|
rsync_path: str
|
||||||
|
exclude_from: str
|
||||||
|
|
||||||
|
|
||||||
|
class Config(TypedDict):
|
||||||
|
local_folders: list[Path]
|
||||||
|
remote_folder: str
|
||||||
|
n: int
|
||||||
|
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
||||||
|
rsync_parameters: RsyncParameters
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import tomllib # 3.11+
|
import tomllib # 3.11+
|
||||||
@@ -26,11 +38,7 @@ except ModuleNotFoundError:
|
|||||||
|
|
||||||
HOME = Path.home()
|
HOME = Path.home()
|
||||||
LOG_DIR = HOME / ".rsync-logs"
|
LOG_DIR = HOME / ".rsync-logs"
|
||||||
RSYNC_CMD = (
|
RSYNC_BASE = ("rsync", "-avh", "--progress", "--delete", "--stats")
|
||||||
"rsync", "-avh", "--progress", "--delete", "--stats",
|
|
||||||
'--rsync-path=/bin/rsync', # TODO: this MUST be configurable! default: don't pass this option
|
|
||||||
"--exclude-from=.rsync-exclude.txt", # TODO: this should be configurable.
|
|
||||||
)
|
|
||||||
|
|
||||||
logger = logging.getLogger("sync")
|
logger = logging.getLogger("sync")
|
||||||
|
|
||||||
@@ -48,12 +56,11 @@ def _load_raw(path: Path) -> dict[str, Any]:
|
|||||||
sys.exit(f"FATAL: unsupported config format: {path.suffix}")
|
sys.exit(f"FATAL: unsupported config format: {path.suffix}")
|
||||||
|
|
||||||
|
|
||||||
def load_config(path: Path) -> dict:
|
def load_config(path: Path) -> Config:
|
||||||
raw = _load_raw(path)
|
raw = _load_raw(path)
|
||||||
folders = raw.get("local_folders", [])
|
folders: list[Any] = raw.get("local_folders", [])
|
||||||
if not folders or not isinstance(folders, list):
|
if not folders:
|
||||||
sys.exit("FATAL: local_folders must be a non-empty list")
|
sys.exit("FATAL: local_folders must be a non-empty list")
|
||||||
|
|
||||||
folders = [Path(f).expanduser() for f in folders]
|
folders = [Path(f).expanduser() for f in folders]
|
||||||
|
|
||||||
for f in folders:
|
for f in folders:
|
||||||
@@ -62,7 +69,8 @@ def load_config(path: Path) -> dict:
|
|||||||
remote = raw.get("remote_folder")
|
remote = raw.get("remote_folder")
|
||||||
if not remote:
|
if not remote:
|
||||||
sys.exit("FATAL: remote_folder is required")
|
sys.exit("FATAL: remote_folder is required")
|
||||||
n = int(raw.get("n", 2))
|
remote = str(remote) # keep as str — may be "host:/path", not a local path
|
||||||
|
n = int(raw.get("n", 1))
|
||||||
if n < 1:
|
if n < 1:
|
||||||
sys.exit("FATAL: n must be >= 1")
|
sys.exit("FATAL: n must be >= 1")
|
||||||
if n > len(folders):
|
if n > len(folders):
|
||||||
@@ -72,17 +80,35 @@ def load_config(path: Path) -> dict:
|
|||||||
if level not in ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"):
|
if level not in ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"):
|
||||||
sys.exit(f"FATAL: invalid log_level: {level!r}")
|
sys.exit(f"FATAL: invalid log_level: {level!r}")
|
||||||
|
|
||||||
cfg = {"local_folders": folders, "remote_folder": remote, "n": n, "log_level": level}
|
rsync_params: RsyncParameters = {}
|
||||||
logger.debug("got config:", cfg)
|
raw_params = raw.get("rsync_parameters", {})
|
||||||
return cfg
|
if rsync_path := raw_params.get("rsync_path"):
|
||||||
|
rsync_params["rsync_path"] = str(rsync_path)
|
||||||
|
if exclude_from := raw_params.get("exclude_from"):
|
||||||
|
rsync_params["exclude_from"] = str(exclude_from)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"local_folders": folders, "remote_folder": remote,
|
||||||
|
"n": n, "log_level": level, "rsync_parameters": rsync_params,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# ── sync ──────────────────────────────────────────────────────────────────────
|
# ── sync ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
def sync_folder(folder: str, remote: str, ts: str) -> tuple[str, int]:
|
def _build_rsync_cmd(params: RsyncParameters) -> tuple[str, ...]:
|
||||||
|
"""Extend RSYNC_BASE with optional flags from config."""
|
||||||
|
extra: list[str] = []
|
||||||
|
if rp := params.get("rsync_path"):
|
||||||
|
extra.append(f"--rsync-path={rp}")
|
||||||
|
if ef := params.get("exclude_from"):
|
||||||
|
extra.append(f"--exclude-from={ef}")
|
||||||
|
return (*RSYNC_BASE, *extra)
|
||||||
|
|
||||||
|
|
||||||
|
def sync_folder(folder: Path, remote: str, ts: str, params: RsyncParameters) -> tuple[Path, int]:
|
||||||
"""Run rsync for a single folder. Returns (folder, returncode)."""
|
"""Run rsync for a single folder. Returns (folder, returncode)."""
|
||||||
log_file = LOG_DIR / f"{folder}-{ts}.log"
|
log_file = LOG_DIR / f"{folder.name}-{ts}.log"
|
||||||
cmd = [*RSYNC_CMD, f"{folder}", remote]
|
cmd: list[str | Path] = [*_build_rsync_cmd(params), f"{folder}", remote]
|
||||||
|
|
||||||
logger.info("START %s → %s (log: %s)", folder, remote, log_file)
|
logger.info("START %s → %s (log: %s)", folder, remote, log_file)
|
||||||
|
|
||||||
@@ -90,6 +116,7 @@ def sync_folder(folder: str, remote: str, ts: str) -> tuple[str, int]:
|
|||||||
proc = subprocess.run(
|
proc = subprocess.run(
|
||||||
cmd,
|
cmd,
|
||||||
cwd=HOME,
|
cwd=HOME,
|
||||||
|
stdin=subprocess.DEVNULL, # prevent parallel processes from fighting over terminal input
|
||||||
stdout=fh,
|
stdout=fh,
|
||||||
stderr=subprocess.STDOUT, # interleave; nothing goes silent
|
stderr=subprocess.STDOUT, # interleave; nothing goes silent
|
||||||
text=True,
|
text=True,
|
||||||
@@ -122,13 +149,15 @@ def main() -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
folders, remote, n = cfg["local_folders"], cfg["remote_folder"], cfg["n"]
|
folders, remote, n = cfg["local_folders"], cfg["remote_folder"], cfg["n"]
|
||||||
|
params = cfg["rsync_parameters"]
|
||||||
|
|
||||||
logger.info("log level is INFO by default, use config to change")
|
logger.debug("got config: %s", cfg)
|
||||||
|
logger.info("log level is set to %s", cfg["log_level"])
|
||||||
logger.info("syncing %d folder(s), parallelism=%d", len(folders), n)
|
logger.info("syncing %d folder(s), parallelism=%d", len(folders), n)
|
||||||
|
|
||||||
failed: list[tuple[str, int]] = []
|
failed: list[tuple[Path, int]] = []
|
||||||
with ThreadPoolExecutor(max_workers=n) as pool:
|
with ThreadPoolExecutor(max_workers=n) as pool:
|
||||||
futures = {pool.submit(sync_folder, f, remote, ts): f for f in folders}
|
futures = {pool.submit(sync_folder, f, remote, ts, params): f for f in folders}
|
||||||
for fut in as_completed(futures):
|
for fut in as_completed(futures):
|
||||||
folder, rc = fut.result()
|
folder, rc = fut.result()
|
||||||
if rc != 0:
|
if rc != 0:
|
||||||
@@ -139,7 +168,7 @@ def main() -> None:
|
|||||||
logger.critical("FAILED: %s (rc=%d)", f, rc)
|
logger.critical("FAILED: %s (rc=%d)", f, rc)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
logger.info("all folders synced successfully")
|
logger.info("SUCC all folders synced successfully")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user