Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions deeplc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
import sys
__all__ = ["DeepLC"]

from importlib.metadata import version

if sys.version_info >= (3,8):
from importlib.metadata import version
__version__ = version('deeplc')
else:
import pkg_resources
__version__ = pkg_resources.require("deeplc")[0].version
__version__ = version("deeplc")


from deeplc.deeplc import DeepLC
from deeplc.feat_extractor import FeatExtractor

122 changes: 74 additions & 48 deletions deeplc/__main__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
"""Main command line interface to DeepLC."""

__author__ = ["Robbin Bouwmeester", "Ralf Gabriels"]
__credits__ = ["Robbin Bouwmeester", "Ralf Gabriels", "Prof. Lennart Martens", "Sven Degroeve"]
__credits__ = [
"Robbin Bouwmeester",
"Ralf Gabriels",
"Prof. Lennart Martens",
"Sven Degroeve",
]
__license__ = "Apache License, Version 2.0"
__maintainer__ = ["Robbin Bouwmeester", "Ralf Gabriels"]
__email__ = ["Robbin.Bouwmeester@ugent.be", "Ralf.Gabriels@ugent.be"]
Expand All @@ -12,12 +17,12 @@
import warnings

import pandas as pd
from psm_utils.io import read_file
from psm_utils.io.peptide_record import peprec_to_proforma
from psm_utils.psm import PSM
from psm_utils.psm_list import PSMList
from psm_utils.io import read_file

from deeplc import __version__, DeepLC, FeatExtractor
from deeplc import DeepLC, __version__
from deeplc._argument_parser import parse_arguments
from deeplc._exceptions import DeepLCError

Expand All @@ -26,27 +31,28 @@

def setup_logging(passed_level):
log_mapping = {
'critical': logging.CRITICAL,
'error': logging.ERROR,
'warning': logging.WARNING,
'info': logging.INFO,
'debug': logging.DEBUG,
"critical": logging.CRITICAL,
"error": logging.ERROR,
"warning": logging.WARNING,
"info": logging.INFO,
"debug": logging.DEBUG,
}

if passed_level.lower() not in log_mapping:
print(
"Invalid log level. Should be one of the following: ",
', '.join(log_mapping.keys())
", ".join(log_mapping.keys()),
)
exit(1)

logging.basicConfig(
stream=sys.stdout,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=log_mapping[passed_level.lower()]
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
level=log_mapping[passed_level.lower()],
)


def main(gui=False):
"""Main function for the CLI."""
argu = parse_arguments(gui=gui)
Expand All @@ -55,13 +61,13 @@ def main(gui=False):

# Reset logging levels if DEBUG (see deeplc.py)
if argu.log_level.lower() == "debug":
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
logging.getLogger('tensorflow').setLevel(logging.DEBUG)
warnings.filterwarnings('default', category=DeprecationWarning)
warnings.filterwarnings('default', category=FutureWarning)
warnings.filterwarnings('default', category=UserWarning)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "0"
logging.getLogger("tensorflow").setLevel(logging.DEBUG)
warnings.filterwarnings("default", category=DeprecationWarning)
warnings.filterwarnings("default", category=FutureWarning)
warnings.filterwarnings("default", category=UserWarning)
else:
os.environ['KMP_WARNINGS'] = '0'
os.environ["KMP_WARNINGS"] = "0"

try:
run(**vars(argu))
Expand Down Expand Up @@ -101,13 +107,13 @@ def run(
for fm in file_model:
if len(sel_group) == 0:
sel_group = "_".join(fm.split("_")[:-1])
fm_dict[sel_group]= fm
fm_dict[sel_group] = fm
continue
m_group = "_".join(fm.split("_")[:-1])
if m_group == sel_group:
fm_dict[m_group] = fm
file_model = fm_dict

with open(file_pred) as f:
first_line_pred = f.readline().strip()
if file_cal:
Expand All @@ -118,53 +124,68 @@ def run(
# Read input files
df_pred = pd.read_csv(file_pred)
if len(df_pred.columns) < 2:
df_pred = pd.read_csv(file_pred,sep=" ")
df_pred = pd.read_csv(file_pred, sep=" ")
df_pred = df_pred.fillna("")
file_pred = ""

list_of_psms = []
for seq,mod,ident in zip(df_pred["seq"],df_pred["modifications"],df_pred.index):
list_of_psms.append(PSM(peptidoform=peprec_to_proforma(seq,mod),spectrum_id=ident))
for seq, mod, ident in zip(df_pred["seq"], df_pred["modifications"], df_pred.index):
list_of_psms.append(PSM(peptidoform=peprec_to_proforma(seq, mod), spectrum_id=ident))
psm_list_pred = PSMList(psm_list=list_of_psms)
df_pred = None
else:
psm_list_pred = read_file(file_pred)
if "msms" in file_pred and ".txt" in file_pred:
mapper = pd.read_csv(os.path.join(os.path.dirname(os.path.realpath(__file__)), "unimod/map_mq_file.csv"),index_col=0)["value"].to_dict()
mapper = pd.read_csv(
os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"unimod/map_mq_file.csv",
),
index_col=0,
)["value"].to_dict()
psm_list_pred.rename_modifications(mapper)

# Allow for calibration file to be empty (undefined), fill in if/elif if present
psm_list_cal = []
if "modifications" in first_line_cal.split(",") and "seq" in first_line_cal.split(",") and file_cal:
if (
"modifications" in first_line_cal.split(",")
and "seq" in first_line_cal.split(",")
and file_cal
):
df_cal = pd.read_csv(file_cal)
if len(df_cal.columns) < 2:
df_cal = pd.read_csv(df_cal,sep=" ")
df_cal = pd.read_csv(df_cal, sep=" ")
df_cal = df_cal.fillna("")
file_cal = ""

list_of_psms = []
for seq,mod,ident,tr in zip(df_cal["seq"],df_cal["modifications"],df_cal.index,df_cal["tr"]):
list_of_psms.append(PSM(peptidoform=peprec_to_proforma(seq,mod),spectrum_id=ident,retention_time=tr))
for seq, mod, ident, tr in zip(
df_cal["seq"], df_cal["modifications"], df_cal.index, df_cal["tr"]
):
list_of_psms.append(
PSM(
peptidoform=peprec_to_proforma(seq, mod),
spectrum_id=ident,
retention_time=tr,
)
)
psm_list_cal = PSMList(psm_list=list_of_psms)
df_cal = None
elif file_cal:
psm_list_cal = read_file(file_cal)
if "msms" in file_cal and ".txt" in file_cal:
mapper = pd.read_csv(os.path.join(os.path.dirname(os.path.realpath(__file__)), "unimod/map_mq_file.csv"),index_col=0)["value"].to_dict()
mapper = pd.read_csv(
os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"unimod/map_mq_file.csv",
),
index_col=0,
)["value"].to_dict()
psm_list_cal.rename_modifications(mapper)
# Make a feature extraction object; you can skip this if you do not want to
# use the default settings for DeepLC. Here we want to use a model that does
# not use RDKit features so we skip the chemical descriptor making
# procedure.
f_extractor = FeatExtractor(
cnn_feats=True,
verbose=verbose
)


# Make the DeepLC object that will handle making predictions and calibration
dlc = DeepLC(
path_model=file_model,
f_extractor=f_extractor,
cnn_model=True,
split_cal=split_cal,
dict_cal_divider=dict_divider,
Expand All @@ -173,9 +194,9 @@ def run(
batch_num=batch_num,
n_jobs=n_threads,
verbose=verbose,
deeplc_retrain=transfer_learning
deeplc_retrain=transfer_learning,
)

# Calibrate the original model based on the new retention times
if len(psm_list_cal) > 0:
logger.info("Selecting best model and calibrating predictions...")
Expand All @@ -185,16 +206,21 @@ def run(
# Make predictions; calibrated or uncalibrated
logger.info("Making predictions using model: %s", dlc.model)
if len(psm_list_cal) > 0:
preds = dlc.make_preds(seq_df=df_pred, infile=file_pred, psm_list=psm_list_pred)
preds = dlc._make_preds(seq_df=df_pred, infile=file_pred, psm_list=psm_list_pred)
else:
preds = dlc.make_preds(seq_df=df_pred, infile=file_pred, psm_list=psm_list_pred, calibrate=False)

#df_pred["predicted_tr"] = preds
preds = dlc._make_preds(
seq_df=df_pred,
infile=file_pred,
psm_list=psm_list_pred,
calibrate=False,
)

# df_pred["predicted_tr"] = preds
logger.info("Writing predictions to file: %s", file_pred_out)
file_pred_out = open(file_pred_out,"w")

file_pred_out = open(file_pred_out, "w")
file_pred_out.write("Sequence proforma,predicted retention time\n")
for psm,tr in zip(psm_list_pred,preds):
for psm, tr in zip(psm_list_pred, preds):
file_pred_out.write(f"{psm.peptidoform.proforma},{tr}\n")
file_pred_out.close()

Expand Down
21 changes: 5 additions & 16 deletions deeplc/_argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,13 @@ def parse_arguments(gui=False):

parser = ArgumentParser(
prog="DeepLC",
description=(
"Retention time prediction for (modified) peptides using deep " "learning."
),
description=("Retention time prediction for (modified) peptides using deep learning."),
usage="deeplc [OPTIONS] --file_pred <peptide_file>",
formatter_class=lambda prog: HelpFormatter(prog, max_help_position=42),
add_help=False,
)

io_args = parser.add_argument_group(
"Input and output files", **gooey_args["io_args"]
)
io_args = parser.add_argument_group("Input and output files", **gooey_args["io_args"])
io_args.add_argument(
"--file_pred",
required=True,
Expand All @@ -97,9 +93,7 @@ def parse_arguments(gui=False):
type=str,
default=None,
metavar="Input peptides for calibration" if gui else "",
help=(
"path to peptide CSV file with retention times to use for " "calibration"
),
help=("path to peptide CSV file with retention times to use for calibration"),
**gooey_args["file_cal"],
)
io_args.add_argument(
Expand Down Expand Up @@ -166,10 +160,7 @@ def parse_arguments(gui=False):
dest="split_cal",
default=50,
metavar="split cal" if gui else "",
help=(
"number of splits in the chromatogram for piecewise linear "
"calibration fit"
),
help=("number of splits in the chromatogram for piecewise linear calibration fit"),
**gooey_args["split_cal"],
)
model_cal_args.add_argument(
Expand Down Expand Up @@ -265,8 +256,6 @@ def parse_arguments(gui=False):
results = parser.parse_args()

if not results.file_pred_out:
results.file_pred_out = (
os.path.splitext(results.file_pred)[0] + "_deeplc_predictions.csv"
)
results.file_pred_out = os.path.splitext(results.file_pred)[0] + "_deeplc_predictions.csv"

return results
1 change: 1 addition & 0 deletions deeplc/_exceptions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""DeepLC exceptions."""


class DeepLCError(Exception):
pass

Expand Down
Loading
Loading