Source code for nmdc_ms_metadata_gen.sheet_generator

import pandas as pd



[docs]
class ChangeSheetGenerator:
    """
    A class to assist in prgrammatically creating change sheets. More documentation can be found here https://docs.microbiomedata.org/runtime/howto-guides/author-changesheets/
    """


[docs]
    @staticmethod
    def initialize_empty_df() -> pd.DataFrame:
        """
        Create an empty DataFrame with required columns.
        Required columns are: id, action, attribute, value

        Parameters
        ----------
        None

        Returns
        -------
        pd.DataFrame
            The initialized empty DataFrame
        """
        df = pd.DataFrame(columns=["id", "action", "attribute", "value"])
        return df



[docs]
    @staticmethod
    def add_row(
        df: pd.DataFrame, id: str, action: str, attribute: str, value: str
    ) -> pd.DataFrame:
        """
        Add a new row to the DataFrame.

        Parameters
        ----------
        df : pd.DataFrame
            The DataFrame to add the row to
        id : str
            The identifier
        action : str
            The action being performed
        attribute : str
            The attribute being modified
        value : str
            The value to set

        Returns
        -------
        pd.DataFrame
            The updated DataFrame
        """
        new_row = {"id": id, "action": action, "attribute": attribute, "value": value}

        # Changed self.df to df since we're using static method
        return pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)





[docs]
class WorkflowSheetGenerator:
    """
    The workflow sheet provides the last processed sample id and the raw file name it will be associated with.
    This provides a reference for creating the associated data generation records.

    """


[docs]
    @staticmethod
    def initialize_empty_df() -> pd.DataFrame:
        """
        Create an empty DataFrame with required columns

        Parameters
        ----------
        None

        Returns
        -------
        pd.DataFrame
            The initialized empty DataFrame
        """
        df = pd.DataFrame(
            columns=["biosample_id", "raw_data_identifier", "last_processed_sample"]
        )
        return df



[docs]
    @staticmethod
    def add_row(
        df: pd.DataFrame,
        biosample_id: str,
        raw_data_identifier: str,
        last_processed_sample: str,
    ) -> pd.DataFrame:
        """
        Add a new row to the DataFrame.

        Parameters
        ----------
        df : pd.DataFrame
            The DataFrame to add the row to
        biosample_id : str
            The biosample identifier
        raw_data_identifier : str
            The raw file identifier
        last_processed_sample : str
            The last processed sample made during material processing, to be used as input to data generation record

        Returns
        -------
        pd.DataFrame
            The updated DataFrame
        """
        new_row = {
            "biosample_id": biosample_id,
            "raw_data_identifier": raw_data_identifier,
            "last_processed_sample": last_processed_sample,
        }

        # Changed self.df to df since we're using static method
        return pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)