Skip to content

utils

Some utils function.

Functions:

  • add_id_part

    Compute and add id_part of polars.DataFrame.

  • add_recurrence

    Compute recurrence of variant.

  • fix_variants_path

    Fix variants path to match if variants are split or not.

  • get_list

    Replace list by value at index or null_value if index is out of bound.

  • list2string

    Convert list in string.

add_id_part

add_id_part(
    data: DataFrame, number_of_bits: int = 8
) -> DataFrame

Compute and add id_part of polars.DataFrame.

Source code in src/sake/utils.py
17
18
19
20
21
22
23
24
25
26
27
28
def add_id_part(data: polars.DataFrame, number_of_bits: int = 8) -> polars.DataFrame:
    """Compute and add id_part of polars.DataFrame."""
    # it's look like dark magic but it's just bit shift without bit shift operator
    return data.with_columns(
        id_part=polars.when(polars.col("id") // pow(2, 63) == 1)
        .then(
            pow(2, number_of_bits) - 1,
        )
        .otherwise(
            polars.col("id") * 2 // pow(2, 64 - number_of_bits),
        ),
    )

add_recurrence

add_recurrence(data: DataFrame) -> DataFrame

Compute recurrence of variant.

Requirement: - id: variant id - gt: genotype

Source code in src/sake/utils.py
31
32
33
34
35
36
37
38
39
40
41
42
def add_recurrence(data: polars.DataFrame) -> polars.DataFrame:
    """Compute recurrence of variant.

    Requirement:
    - id: variant id
    - gt: genotype
    """
    recurrence = data.group_by("id").agg(
        sake_AC=polars.sum("gt"),
    )

    return data.join(recurrence, on="id", how="left")

fix_variants_path

fix_variants_path(
    path: Path, target: str, chrom: str | None = None
) -> str

Fix variants path to match if variants are split or not.

Source code in src/sake/utils.py
65
66
67
68
69
70
71
def fix_variants_path(path: pathlib.Path, target: str, chrom: str | None = None) -> str:
    """Fix variants path to match if variants are split or not."""
    if chrom is None and pathlib.Path(str(path).format(target=target)).is_dir():
        return str(path).format(target=target) + "/*.parquet"
    if pathlib.Path(str(path).format(target=target)).is_dir():
        return str(path).format(target=target) + f"/{chrom}.parquet"
    return str(path.with_suffix(".parquet")).format(target=target)

get_list

get_list(
    data: DataFrame,
    *,
    columns: list[str],
    index: int = 0,
    null_value: Any = 0
) -> DataFrame

Replace list by value at index or null_value if index is out of bound.

Source code in src/sake/utils.py
52
53
54
55
56
57
58
59
60
61
62
def get_list(
    data: polars.DataFrame,
    *,
    columns: list[str],
    index: int = 0,
    null_value: typing.Any = 0,
) -> polars.DataFrame:
    """Replace list by value at index or null_value if index is out of bound."""
    return data.with_columns(
        [polars.col(name).list.get(index, null_on_oob=True).fill_null(null_value).alias(name) for name in columns],
    )

list2string

list2string(
    data: DataFrame,
    *,
    columns: list[str],
    separator: str = ","
) -> DataFrame

Convert list in string.

Source code in src/sake/utils.py
45
46
47
48
49
def list2string(data: polars.DataFrame, *, columns: list[str], separator: str = ",") -> polars.DataFrame:
    """Convert list in string."""
    return data.with_columns(
        [polars.col(name).cast(polars.List(polars.String)).list.join(separator).alias(name) for name in columns],
    )