Source code for src.lcms_lipid_metadata_generator

# -*- coding: utf-8 -*-
from src.lcms_metadata_generator import LCMSMetadataGenerator
from typing import List


[docs] class LCMSLipidomicsMetadataGenerator(LCMSMetadataGenerator): """ A class for generating NMDC metadata objects using provided metadata files and configuration for LC-MS lipidomics data. This class processes input metadata files, generates various NMDC objects, and produces a database dump in JSON format. Parameters ---------- metadata_file : str Path to the input CSV metadata file. database_dump_json_path : str Path where the output database dump JSON file will be saved. raw_data_url : str Base URL for the raw data files. process_data_url : str Base URL for the processed data files. minting_config_creds : str, optional Path to the configuration file containing the client ID and client secret for minting NMDC IDs. It can also include the bio ontology API key if generating biosample ids is needed. If not provided, the CLIENT_ID, CLIENT_SECRET, and BIO_API_KEY environment variables will be used. Attributes ---------- unique_columns : List[str] List of unique columns in the metadata file. mass_spec_desc : str Description of the mass spectrometry analysis. mass_spec_eluent_intro : str Eluent introduction category for mass spectrometry. analyte_category : str Category of the analyte. raw_data_obj_type : str Type of the raw data object. raw_data_obj_desc : str Description of the raw data object. workflow_analysis_name : str Name of the workflow analysis. workflow_description : str Description of the workflow. workflow_git_url : str URL of the workflow's Git repository. workflow_version : str Version of the workflow. workflow_category : str Category of the workflow. wf_config_process_data_category : str Category of the workflow configuration process data. wf_config_process_data_obj_type : str Type of the workflow configuration process data object. wf_config_process_data_description : str Description of the workflow configuration process data. no_config_process_data_category : str Category for processed data without configuration. no_config_process_data_obj_type : str Type of processed data object without configuration. csv_process_data_description : str Description of CSV processed data. hdf5_process_data_obj_type : str Type of HDF5 processed data object. hdf5_process_data_description : str Description of HDF5 processed data. """ unique_columns: List[str] = ["raw_data_file", "processed_data_directory"] # Data Generation attributes mass_spec_desc: str = ( "Generation of mass spectrometry data for the analysis of lipids." ) mass_spec_eluent_intro: str = "liquid_chromatography" analyte_category: str = "lipidome" raw_data_obj_type: str = "LC-DDA-MS/MS Raw Data" raw_data_obj_desc: str = "LC-DDA-MS/MS raw data for lipidomics data acquisition." # Workflow attributes workflow_analysis_name: str = "Lipidomics analysis" workflow_description: str = ( "Analysis of raw mass spectrometry data for the annotation of lipids." ) workflow_git_url: str = ( "https://github.com/microbiomedata/metaMS/wdl/metaMS_lipidomics.wdl" ) workflow_version: str workflow_category: str = "lc_ms_lipidomics" # Processed data attributes wf_config_process_data_category: str = "workflow_parameter_data" wf_config_process_data_obj_type: str = "Configuration toml" wf_config_process_data_description: str = ( "CoreMS parameters used for Lipidomics workflow." ) no_config_process_data_category: str = "processed_data" no_config_process_data_obj_type: str = "LC-MS Lipidomics Results" csv_process_data_description: str = ( "Lipid annotations as a result of a lipidomics workflow activity." ) hdf5_process_data_obj_type: str = "LC-MS Lipidomics Processed Data" hdf5_process_data_description: str = ( "CoreMS hdf5 file representing a lipidomics data file including annotations." ) def __init__( self, metadata_file: str, database_dump_json_path: str, raw_data_url: str, process_data_url: str, minting_config_creds: str = None, workflow_version: str = None, ): super().__init__( metadata_file=metadata_file, database_dump_json_path=database_dump_json_path, raw_data_url=raw_data_url, process_data_url=process_data_url, ) # Set the workflow version, prioritizing user input, then fetching from the Git URL, and finally using a default. self.workflow_version = workflow_version or self.get_workflow_version( workflow_version_git_url="https://github.com/microbiomedata/metaMS/blob/master/.bumpversion_lipid.cfg" ) self.minting_config_creds = minting_config_creds
[docs] def rerun(self): super().rerun()
[docs] def run(self): super().run()