Source code for aoiro.reader._io
import re
import warnings
from collections.abc import Iterable
from decimal import Decimal
from pathlib import Path
from typing import Any
import pandas as pd
from dateparser import parse
from .._ledger import GeneralLedgerLineImpl, LedgerElementImpl
[docs]
def read_all_csvs(path: Path, /, **kwargs: Any) -> pd.DataFrame:
"""
Read all CSV files in the path.
Parameters
----------
path : Path
The path to the directory containing CSV files.
**kwargs : Any
The keyword arguments for `pd.read_csv`.
Returns
-------
pd.DataFrame
The concatenated DataFrame with
column "path" containing the relative path of the CSV file added.
"""
dfs = []
for p in path.rglob("*.csv"):
df = pd.read_csv(p, **kwargs)
df["path"] = p.relative_to(path).as_posix()
dfs.append(df)
if not dfs:
return pd.DataFrame(columns=["path"])
return pd.concat(dfs)
def parse_date(s: str) -> pd.Timestamp:
"""
Parse date.
Prefer the last day of the month if the day is not provided.
Parameters
----------
s : str
The string to parse.
Returns
-------
pd.Timestamp
The parsed date.
"""
return pd.Timestamp(parse(s, settings={"PREFER_DAY_OF_MONTH": "last"}))
def parse_money(
s: str, currency: str | None = None
) -> tuple[Decimal | None, str | None]:
"""
Parse money.
Parameters
----------
s : str
The string to parse.
currency : str | None, optional
The currency, by default None.
If provided, the currency
in the string would be ignored and replaced by this.
Returns
-------
tuple[Decimal | None, str | None]
The amount and the currency.
"""
match = re.search(r"-?[\d.]+", s)
if match is None:
return None, None
amount = Decimal(match.group())
if currency is None:
currency = re.sub(r"\s+", "", s[: match.start()] + s[match.end() :])
return amount, currency
[docs]
def read_simple_csvs(path: Path) -> pd.DataFrame:
"""
Read all CSV files in the path.
The CSV files are assumed to have columns
["発生日", "金額"].
Parameters
----------
path : Path
The path to the directory containing CSV files.
Returns
-------
pd.DataFrame
The concatenated DataFrame with columns
["発生日", "金額", "通貨", "path"].
"""
df = read_all_csvs(path, dtype=str)
if df.empty:
return df
for col in ["発生日", "金額"]:
if col not in df.columns:
df[col] = None
# parse date
for k in df.columns:
if "日" not in k:
continue
df[k] = df[k].map(parse_date)
# parse money
df[["金額", "通貨"]] = pd.DataFrame(
df["金額"].map(parse_money).tolist(), index=df.index
)
# set date as index
df.set_index("発生日", inplace=True, drop=False)
return df
[docs]
def read_general_ledger(path: Path) -> Iterable[GeneralLedgerLineImpl[Any, Any]]:
"""
Read general ledger.
The first column is assumed to be the date.
For all n in N. the 2n-1-th column is assumed to be
the account name, and the 2n-th column
is assumed to be the amount.
Parameters
----------
path : Path
The path to the CSV file.
Returns
-------
Iterable[GeneralLedgerLineImpl[Any, Any]]
The general ledger.
"""
df = read_all_csvs(path / "general", header=None, dtype=str)
df.drop(columns="path", inplace=True)
if df.empty:
return
if len(df.columns) % 2 != 1:
raise ValueError("The number of columns should be odd.")
if len(df.columns) < 3:
raise ValueError("The number of columns should be at least 3.")
for _, row in df.iterrows():
values: list[LedgerElementImpl[Any, Any]] = []
for i in range(1, len(row), 2):
amount, currency = parse_money(row[i + 1])
if amount is None:
warnings.warn(f"Amount not found in {row[i + 1]}", stacklevel=2)
continue
values.append(
LedgerElementImpl(
account=row[i],
amount=amount,
currency=currency,
)
)
yield GeneralLedgerLineImpl(
values=values,
date=parse_date(row[0]),
)