csv ¶

Read and write csv file.

ScanCsv ¶

Bases: TypedDict

A struct to check type of parameter give to polars.scan_csv.

chr2length_into_lazyframe ¶

chr2length_into_lazyframe(input_path: Path) -> LazyFrame

Read a csv file with two column chr and length and perform some percomputation.

Parameters:

input_path (Path) –

Path to csv.

Returns:

LazyFrame –

A lazyframe with chromosome name associate to length, offset information

Source code in src/variantplaner/io/csv.py

def chr2length_into_lazyframe(input_path: pathlib.Path) -> polars.LazyFrame:
    """Read a csv file with two column chr and length and perform some percomputation.

    Args:
        input_path: Path to csv.

    Returns:
        A lazyframe with chromosome name associate to length, offset information
    """
    lf = polars.scan_csv(input_path, schema={"chr": polars.Utf8, "length": polars.UInt64})
    return lf.with_columns(
        offset=polars.col("length").cum_sum() - polars.col("length"),
    )

into_lazyframe ¶

into_lazyframe(input_path: Path, chr2len_path: Path, chromosome_col: str, position_col: str, reference_col: str, alternative_col: str, info_cols: list[str], /, **scan_csv_args: Unpack[ScanCsv]) -> LazyFrame

Read a csv file and convert it in lazyframe.

Parameters:

input_path (Path) –

Path to csv.
chr2len_path (Path) –

Path to chr2length csv.
chromosome_col (str) –

Name of the column that holds the chromosomes.
position_col (str) –

Name of the column that holds the positions.
reference_col (str) –

Name of the column that holds the reference sequence.
alternative_col (str) –

Name of the column that hold the alternative sequence.
scan_csv_args (Unpack[ScanCsv], default: {} ) –

polars.scan_csv parameter.

Returns:

LazyFrame –

A lazyframe that contain csv information

Source code in src/variantplaner/io/csv.py

def into_lazyframe(
    input_path: pathlib.Path,
    chr2len_path: pathlib.Path,
    chromosome_col: str,
    position_col: str,
    reference_col: str,
    alternative_col: str,
    info_cols: list[str],
    /,
    **scan_csv_args: Unpack[ScanCsv],
) -> polars.LazyFrame:
    """Read a csv file and convert it in lazyframe.

    Args:
        input_path: Path to csv.
        chr2len_path: Path to chr2length csv.
        chromosome_col: Name of the column that holds the chromosomes.
        position_col: Name of the column that holds the positions.
        reference_col: Name of the column that holds the reference sequence.
        alternative_col: Name of the column that hold the alternative sequence.
        scan_csv_args: polars.scan_csv parameter.

    Returns:
        A lazyframe that contain csv information

    """
    lf = polars.scan_csv(
        input_path,
        **scan_csv_args,
    )
    chr2len = chr2length_into_lazyframe(chr2len_path)

    lf = lf.rename(
        {
            chromosome_col: "chr",
            position_col: "pos",
            reference_col: "ref",
            alternative_col: "alt",
        },
    )

    lf = lf.cast({"pos": polars.UInt64})

    if info_cols:
        lf = lf.select(["chr", "pos", "ref", "alt", *info_cols])

    return normalization.add_variant_id(lf, chr2len)