Source code for nmdc_ms_metadata_gen.data_classes

from dataclasses import dataclass

"""
This module defines data classes for NMDC (National Microbiome Data Collaborative) type constants.
"""


[docs] @dataclass class NmdcTypes: """ Data class holding NMDC type constants. Attributes ---------- Biosample : str NMDC type for Biosample. MassSpectrometry : str NMDC type for Mass Spectrometry. MetabolomicsAnalysis : str NMDC type for Metabolomics Analysis. DataObject : str NMDC type for Data Object. CalibrationInformation : str NMDC type for Calibration Information. MetaboliteIdentification : str NMDC type for Metabolite Identification. NomAnalysis : str NMDC type for NOM Analysis. OntologyClass : str NMDC type for Ontology Class. ControlledIdentifiedTermValue : str NMDC type for Controlled Identified Term Value. TextValue : str NMDC type for Text Value. GeolocationValue : str NMDC type for Geolocation Value. TimeStampValue : str NMDC type for Timestamp Value. QuantityValue : str NMDC type for Quantity Value. MassSpectrometryConfiguration : str NMDC type for Mass Spectrometry Configuration. PortionOfSubstance : str NMDC type for Portion of Substance. MobilePhaseSegment : str NMDC type for Mobile Phase Segment. ChromatographyConfiguration : str NMDC type for Chromatography Configuration. Instrument : str NMDC type for Instrument. Manifest : str NMDC type for Manifest. Protocol : str NMDC type for Protocol. ChemicalConversionProcess : str NMDC type for Chemical Conversion Process. ChromatographicSeparationProcess : str NMDC type for Chromatographic Separation Process. Pooling : str NMDC type for Pooling. SubSamplingProcess : str NMDC type for Sub Sampling Process. Extraction : str NMDC type for Extraction. ProcessedSample : str NMDC type for Processed Sample. DissolvingProcess : str NMDC type for Dissolving Process. """ Biosample: str = "nmdc:Biosample" MassSpectrometry: str = "nmdc:MassSpectrometry" MetabolomicsAnalysis: str = "nmdc:MetabolomicsAnalysis" DataObject: str = "nmdc:DataObject" CalibrationInformation: str = "nmdc:CalibrationInformation" MetaboliteIdentification: str = "nmdc:MetaboliteIdentification" NomAnalysis: str = "nmdc:NomAnalysis" OntologyClass: str = "nmdc:OntologyClass" ControlledIdentifiedTermValue: str = "nmdc:ControlledIdentifiedTermValue" TextValue: str = "nmdc:TextValue" GeolocationValue: str = "nmdc:GeolocationValue" TimeStampValue: str = "nmdc:TimestampValue" QuantityValue: str = "nmdc:QuantityValue" MassSpectrometryConfiguration: str = "nmdc:MassSpectrometryConfiguration" PortionOfSubstance: str = "nmdc:PortionOfSubstance" MobilePhaseSegment: str = "nmdc:MobilePhaseSegment" ChromatographyConfiguration: str = "nmdc:ChromatographyConfiguration" Instrument: str = "nmdc:Instrument" Protocol: str = "nmdc:Protocol" Manifest: str = "nmdc:Manifest" ChemicalConversionProcess: str = "nmdc:ChemicalConversionProcess" ChromatographicSeparationProcess: str = "nmdc:ChromatographicSeparationProcess" Pooling: str = "nmdc:Pooling" SubSamplingProcess: str = "nmdc:SubSamplingProcess" Extraction: str = "nmdc:Extraction" ProcessedSample: str = "nmdc:ProcessedSample" DissolvingProcess: str = "nmdc:DissolvingProcess"
[docs] @dataclass class GCMSMetabWorkflowMetadata: """ Data class for holding GCMS metabolomic workflow metadata information. Attributes ---------- biosample_id: str Identifier for the biosample.s nmdc_study : str Identifier for the NMDC study. processed_data_file : str Path or name of the processed data file. raw_data_file : str Path or name of the raw data file. mass_spec_configuration_id : str Identifier for the mass spectrometry configuration used. lc_config_id: str Identifier for the liquid chromatography configuration used. instrument_id: str Identifier for the instrument used for analysis. calibration_id : str Identifier for the calibration information used. instrument_analysis_start_date: str, optional Start date of the instrument analysis. instrument_analysis_end_date: str, optional End date of the instrument analysis. processing_institution : str Name of the processing institution. Must be a value from ProcessingInstitutionEnum. OPTIONAL IF processing_institution_generation AND processing_institution_workflow ARE PROVIDED processing_institution_generation : str Name of the processing institution where the data was generated. Must be a value from ProcessingInstitutionEnum. OPTIONAL IF processing_institution IS PROVIDED processing_institution_workflow : str Name of the processing institution where the workflow was executed. Must be a value from ProcessingInstitutionEnum. OPTIONAL IF processing_institution IS PROVIDED execution_resource : str, optional Name of the execution resource. Must be a value from ExecutionResourceEnum. raw_data_url : str, optional Complete URL for the raw data file. If provided, this takes precedence over constructing the URL from base_url + filename. manifest_id : str Identifier for the manifest associated with this workflow metadata. instrument_instance_specifier : str, optional Specifier for the instrument instance used in the analysis. """ biosample_id: str nmdc_study: str processed_data_file: str raw_data_file: str mass_spec_configuration_id: str lc_config_id: str instrument_id: str calibration_id: str instrument_analysis_start_date: str = None instrument_analysis_end_date: str = None processing_institution: str = None processing_institution_generation: str = None processing_institution_workflow: str = None execution_resource: str = None raw_data_url: str = None manifest_id: str = None instrument_instance_specifier: str = None
[docs] @dataclass class LCMSLipidWorkflowMetadata: """ Data class for holding LC-MS lipidomics workflow metadata information. Also used for LC-MS Metabolomics workflows. Attributes ---------- processed_data_dir : str Directory containing processed data files. raw_data_file : str Path or name of the raw data file. mass_spec_config_id : str Identifier for the mass spectrometry configuration used. lc_config_id : str Identifier for the liquid chromatography configuration used. instrument_id : str Identifier for the instrument used for analysis. processing_institution : str Name of the processing institution. Must be a value from ProcessingInstitutionEnum. OPTIONAL IF processing_institution_generation AND processing_institution_workflow ARE PROVIDED processing_institution_generation : str Name of the processing institution where the data was generated. Must be a value from ProcessingInstitutionEnum. OPTIONAL IF processing_institution IS PROVIDED processing_institution_workflow : str Name of the processing institution where the workflow was executed. Must be a value from ProcessingInstitutionEnum. OPTIONAL IF processing_institution IS PROVIDED execution_resource : str, optional Name of the execution resource. Must be a value from ExecutionResourceEnum. instrument_analysis_start_date : str, optional Start date of the instrument analysis. instrument_analysis_end_date : str, optional End date of the instrument analysis. raw_data_url : str, optional Complete URL for the raw data file. If provided, this takes precedence over constructing the URL from base_url + filename. manifest_id : str, optional Identifier for the manifest associated with this workflow metadata. instrument_instance_specifier : str, optional Specifier for the instrument instance used in the analysis. """ processed_data_dir: str raw_data_file: str mass_spec_configuration_id: str lc_config_id: str instrument_id: str processing_institution: str = None processing_institution_generation: str = None processing_institution_workflow: str = None execution_resource: str = None instrument_analysis_start_date: str = None instrument_analysis_end_date: str = None raw_data_url: str = None manifest_id: str = None instrument_instance_specifier: str = None
@dataclass class NOMMetadata: """ Data class for holding NOM workflow metadata information. Attributes ---------- raw_data_file : str Path or name of the raw data file. processed_data_directory : str Directory containing processed data files. associated_studies : list List of associated study identifiers. biosample_id : str Identifier for the biosample. instrument_id : str Identifier for the instrument used for analysis. mass_spec_configuration_id : str Identifier for the mass spectrometry configuration used. lc_config_id : str Identifier for the liquid chromatography configuration used. manifest_id : str Identifier for the manifest associated with this workflow metadata. processing_institution : str Name of the processing institution. Must be a value from ProcessingInstitutionEnum. OPTIONAL IF processing_institution_generation AND processing_institution_workflow ARE PROVIDED processing_institution_generation : str Name of the processing institution where the data was generated. Must be a value from ProcessingInstitutionEnum. OPTIONAL IF processing_institution IS PROVIDED processing_institution_workflow : str Name of the processing institution where the workflow was executed. Must be a value from ProcessingInstitutionEnum. OPTIONAL IF processing_institution IS PROVIDED execution_resource : str, optional Name of the execution resource. Must be a value from ExecutionResourceEnum. instrument_instance_specifier : str, optional Specifier for the instrument instance used in the analysis. """ raw_data_file: str processed_data_directory: str associated_studies: list biosample_id: str instrument_id: str mass_spec_configuration_id: str lc_config_id: str manifest_id: str processing_institution: str = None processing_institution_generation: str = None processing_institution_workflow: str = None execution_resource: str = None instrument_instance_specifier: str = None @dataclass class ProcessGeneratorMap: """ Maps process names from YAML file to their corresponding generator methods. This mapping is used to dynamically call the appropriate generator method based on the process type found in the YAML file. """ SubSamplingProcess: str = "generate_subsampling_process" Extraction: str = "generate_extraction" ChemicalConversionProcess: str = "generate_chemical_conversion" ChromatographicSeparationProcess: str = "generate_chromatographic_separation" DissolvingProcess: str = "generate_dissolving_process"