SysBioChalmers · edkerk · Dec 2, 2023 · Aug 19, 2023 · Aug 19, 2023 · Aug 21, 2023
diff --git a/doc/src/geckomat/change_model/applyKcatConstraints.html b/doc/src/geckomat/change_model/applyKcatConstraints.html
@@ -157,23 +157,25 @@ <h2><a name="_source"></a>SOURCE CODE <a href="#_top"><img alt="^" border="0" sr
 0095     modRxns     = extractAfter(model.ec.rxns,4);
 0096     <span class="comment">% Map ec-reactions to model.rxns</span>
 0097     [hasEc,~]   = ismember(model.rxns,modRxns);
-0098     [~,rxnIdx]  = ismember(modRxns,model.rxns);
-0099     hasEc = find(hasEc &amp; updateRxns);
+0098     hasEc       = find(hasEc &amp; updateRxns);
+0099     [~,rxnIdx]   = ismember(modRxns,model.rxns);
 0100     <span class="keyword">for</span> i = 1:numel(hasEc)
 0101         <span class="comment">% Get all isozymes per reaction</span>
 0102         ecIdx = find(rxnIdx == hasEc(i));
-0103         <span class="comment">% Multiply enzymes with their MW (they are then automatically</span>
-0104         <span class="comment">% summed per reaction), and divide by their kcat, to get a vector</span>
-0105         <span class="comment">% of MW/kcat values.</span>
-0106         MWkcat = (model.ec.rxnEnzMat(ecIdx,:) * model.ec.mw) ./ model.ec.kcat(ecIdx);
-0107         <span class="comment">% If kcat was zero, MWkcat is Inf. Correct back to zero</span>
-0108         MWkcat(abs(MWkcat)==Inf)=0;
-0109         <span class="comment">% Select the lowest MW/kcat (= most efficient), and convert to /hour</span>
-0110         model.S(prot_pool_idx, hasEc(i)) = -min(MWkcat/3600);
-0111     <span class="keyword">end</span>
-0112 <span class="keyword">end</span>
-0113 <span class="keyword">end</span>
-0114</pre></div>
+0103         <span class="comment">% ecIdx = strcmp(model.rxns(hasEc(i)),modRxns);</span>
+0104         <span class="comment">% Multiply enzymes with their MW (they are then automatically</span>
+0105         <span class="comment">% summed per reaction), and divide by their kcat, to get a vector</span>
+0106         <span class="comment">% of MW/kcat values.</span>
+0107         MWkcat = (model.ec.rxnEnzMat(ecIdx,:) * model.ec.mw) ./ model.ec.kcat(ecIdx);
+0108         <span class="comment">% If kcat was zero, MWkcat is Inf. If no enzyme info was found,</span>
+0109         <span class="comment">% MWkcat is NaN. Correct both back to zero</span>
+0110         MWkcat(isinf(MWkcat) | isnan(MWkcat)) = 0;
+0111         <span class="comment">% Select the lowest MW/kcat (= most efficient), and convert to /hour</span>
+0112         model.S(prot_pool_idx, hasEc(i)) = -min(MWkcat/3600);
+0113     <span class="keyword">end</span>
+0114 <span class="keyword">end</span>
+0115 <span class="keyword">end</span>
+0116</pre></div>
 <hr><address>Generated by <strong><a href="http://www.artefact.tk/software/matlab/m2html/" title="Matlab Documentation in HTML">m2html</a></strong> &copy; 2005</address>
 </body>
 </html>
diff --git a/doc/src/geckomat/gather_kcats/runDLKcat.html b/doc/src/geckomat/gather_kcats/runDLKcat.html
@@ -90,7 +90,7 @@ <h2><a name="_source"></a>SOURCE CODE <a href="#_top"><img alt="^" border="0" sr
 0035 <span class="keyword">end</span>
 0036 
 0037 disp(<span class="string">'Running DLKcat prediction, this may take many minutes, especially the first time.'</span>)
-0038 status = system([<span class="string">'docker run --rm -v '</span> fullfile(params.path,<span class="string">'/data'</span>) <span class="string">':/data ghcr.io/sysbiochalmers/dlkcat-gecko:0.1 /bin/bash -c &quot;python DLKcat.py /data/DLKcat.tsv /data/DLKcatOutput.tsv&quot;'</span>]);
+0038 status = system([<span class="string">'docker run --rm -v &quot;'</span> fullfile(params.path,<span class="string">'/data'</span>) <span class="string">'&quot;:/data ghcr.io/sysbiochalmers/dlkcat-gecko:0.1 /bin/bash -c &quot;python DLKcat.py /data/DLKcat.tsv /data/DLKcatOutput.tsv&quot;'</span>]);
 0039 
 0040 <span class="keyword">if</span> status == 0 &amp;&amp; exist(fullfile(params.path,<span class="string">'data/DLKcatOutput.tsv'</span>))
 0041     delete(fullfile(params.path,<span class="string">'/data/DLKcat.tsv'</span>));

diff --git a/doc/src/geckomat/get_enzyme_data/loadDatabases.html b/doc/src/geckomat/get_enzyme_data/loadDatabases.html
diff --git a/doc/src/geckomat/limit_proteins/calculateFfactor.html b/doc/src/geckomat/limit_proteins/calculateFfactor.html
@@ -114,7 +114,7 @@ <h2><a name="_source"></a>SOURCE CODE <a href="#_top"><img alt="^" border="0" sr
 0054     fileContent = textscan(fID,<span class="string">'%s %s %f'</span>,<span class="string">'delimiter'</span>,<span class="string">'\t'</span>,<span class="string">'HeaderLines'</span>,headerLines);
 0055     genes       = fileContent{2};
 0056     <span class="comment">%Remove internal geneIDs modifiers</span>
-0057     genes       = regexprep(genes,<span class="string">'(\d{4}).'</span>,<span class="string">''</span>);
+0057     genes       = regexprep(genes,<span class="string">'^\d+\.'</span>,<span class="string">''</span>);
 0058     level       = fileContent{3};
 0059     fclose(fID);
 0060     [a,b]       = ismember(genes,uniprotDB.genes);

diff --git a/src/geckomat/change_model/applyKcatConstraints.m b/src/geckomat/change_model/applyKcatConstraints.m
@@ -95,17 +95,19 @@
     modRxns     = extractAfter(model.ec.rxns,4);
     % Map ec-reactions to model.rxns
     [hasEc,~]   = ismember(model.rxns,modRxns);
-    [~,rxnIdx]  = ismember(modRxns,model.rxns);
-    hasEc = find(hasEc & updateRxns);
+    hasEc       = find(hasEc & updateRxns);
+    [~,rxnIdx]   = ismember(modRxns,model.rxns);
     for i = 1:numel(hasEc)
         % Get all isozymes per reaction
         ecIdx = find(rxnIdx == hasEc(i));
+        % ecIdx = strcmp(model.rxns(hasEc(i)),modRxns);
         % Multiply enzymes with their MW (they are then automatically
         % summed per reaction), and divide by their kcat, to get a vector
         % of MW/kcat values.
         MWkcat = (model.ec.rxnEnzMat(ecIdx,:) * model.ec.mw) ./ model.ec.kcat(ecIdx);
-        % If kcat was zero, MWkcat is Inf. Correct back to zero
-        MWkcat(abs(MWkcat)==Inf)=0;
+        % If kcat was zero, MWkcat is Inf. If no enzyme info was found,
+        % MWkcat is NaN. Correct both back to zero
+        MWkcat(isinf(MWkcat) | isnan(MWkcat)) = 0;
         % Select the lowest MW/kcat (= most efficient), and convert to /hour
         model.S(prot_pool_idx, hasEc(i)) = -min(MWkcat/3600);
     end

diff --git a/src/geckomat/gather_kcats/runDLKcat.m b/src/geckomat/gather_kcats/runDLKcat.m
@@ -35,7 +35,7 @@ function runDLKcat(modelAdapter)
 end
 
 disp('Running DLKcat prediction, this may take many minutes, especially the first time.')
-status = system(['docker run --rm -v ' fullfile(params.path,'/data') ':/data ghcr.io/sysbiochalmers/dlkcat-gecko:0.1 /bin/bash -c "python DLKcat.py /data/DLKcat.tsv /data/DLKcatOutput.tsv"']);
+status = system(['docker run --rm -v "' fullfile(params.path,'/data') '":/data ghcr.io/sysbiochalmers/dlkcat-gecko:0.1 /bin/bash -c "python DLKcat.py /data/DLKcat.tsv /data/DLKcatOutput.tsv"']);
 
 if status == 0 && exist(fullfile(params.path,'data/DLKcatOutput.tsv'))
     delete(fullfile(params.path,'/data/DLKcat.tsv'));

diff --git a/src/geckomat/get_enzyme_data/loadDatabases.m b/src/geckomat/get_enzyme_data/loadDatabases.m
@@ -79,6 +79,15 @@
     else
         databases.uniprot = [];
     end
+    if ~isempty(databases.uniprot)
+        [uniqueIDs,uniqueIdx] = unique(databases.uniprot.ID,'stable');
+        if numel(uniqueIDs) < numel(databases.uniprot.ID)
+            duplID = setdiff(1:numel(databases.uniprot.ID),uniqueIdx);
+            dispEM(['Duplicate entries are found for the following proteins. '...
+                    'Manually curate the ''uniprot.tsv'' file, or adjust the uniprot parameters '...
+                    'in the model adapter:'],true,databases.uniprot.ID(duplID));
+        end
+    end
 end
 
 %% KEGG

diff --git a/src/geckomat/limit_proteins/calculateFfactor.m b/src/geckomat/limit_proteins/calculateFfactor.m
@@ -54,7 +54,7 @@
     fileContent = textscan(fID,'%s %s %f','delimiter','\t','HeaderLines',headerLines);
     genes       = fileContent{2};
     %Remove internal geneIDs modifiers
-    genes       = regexprep(genes,'(\d{4}).','');
+    genes       = regexprep(genes,'^\d+\.','');
     level       = fileContent{3};
     fclose(fID);
     [a,b]       = ismember(genes,uniprotDB.genes);