Skip to content

txi

Sync

transcript-indexer

Sync¶

`transcript_indexer.sync` ¶

Idempotent sync engine.

Walks conversation sources (Otter .txt today), reads, hashes, and ingests new/changed/renamed/deleted conversations along with their turns and participants. Hash-driven; conversations whose hash changes are deleted and re-inserted so cascades clear any derived rows.

Embedding runs as a post-pass on touched conversations unless disabled via embed=False. Renames are not re-embedded — content hash matched.

`sync(conn, cfg, *, only=None, strict=False, keep_orphans=False, embed=True, on_embed_progress=None)` ¶

Run a full sync against the given DB connection.

Source code in src/transcript_indexer/sync.py

def sync(
    conn: sqlite3.Connection,
    cfg: Config,
    *,
    only: Iterable[Path] | None = None,
    strict: bool = False,
    keep_orphans: bool = False,
    embed: bool = True,
    on_embed_progress: ProgressCallback | None = None,
) -> SyncReport:
    """Run a full sync against the given DB connection."""
    report = SyncReport()
    only_list = list(only) if only is not None else None
    touched: list[int] = []
    started = datetime.now(UTC)

    if not strict:
        strict = cfg.indexing.strict_format

    _sync_conversations(
        conn,
        cfg,
        only=only_list,
        strict=strict,
        keep_orphans=keep_orphans,
        report=report,
        touched=touched,
    )
    resolve_people(conn)

    if embed and touched:
        embed_report = embed_conversations(conn, cfg, touched, on_progress=on_embed_progress)
        report.embedding_chunks_total = embed_report.chunks_total
        report.embedding_cache_hits = embed_report.cache_hits
        report.embedding_cache_misses = embed_report.cache_misses
        report.embedding_embedded = embed_report.embedded

    report.elapsed_seconds = (datetime.now(UTC) - started).total_seconds()
    return report