From 70562d49df9f5fb60e6cdf5cab6cc490e1923b62 Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Fri, 3 Jan 2020 15:21:14 -0800 Subject: [PATCH 01/14] modification of MERFISHProbeDesign.m by yilin this version works with LIMS references changed "transferAbund" to "true" under "if useUniformWeights" output the same probes each run --- probe_construction/MERFISHProbeDesign.m | 50 +++++++++++++------------ 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index d79e094..07afc2f 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -502,11 +502,11 @@ function MERFISHProbeDesign(varargin) % Check that paths generated exist or not as required by downstream % code - badFilesFound = filePathCheck(pathsToCheck); + %badFilesFound = filePathCheck(pathsToCheck); - if badFilesFound - error('Address file path issues and restart processing.'); - end + %if badFilesFound + % error('Address file path issues and restart processing.'); + %end % Start logging logFileName = fullfile(analysisSavePath, strcat(libraryName, '.log')); @@ -661,12 +661,14 @@ function MERFISHProbeDesign(varargin) fprintf(logFID, '%s - Building transcriptome object.\n', datestr(datetime)); % Build transcriptome using existing abundance data + %transcriptome = Transcriptome(rawTranscriptomeFasta, ... + % 'abundPath', fpkmPath, ... + % 'verbose', true, ... + % 'headerType', transcriptomeHeaderType, ... + %'IDType', transcriptomeIDType); transcriptome = Transcriptome(rawTranscriptomeFasta, ... 'abundPath', fpkmPath, ... - 'verbose', true, ... - 'headerType', transcriptomeHeaderType, ... - 'IDType', transcriptomeIDType); - + 'verbose', true); transcriptome.Save(transcriptomePath); fprintf(logFID, '%s - Transcriptome object saved to %s\n', datestr(datetime), transcriptomePath); @@ -713,23 +715,23 @@ function MERFISHProbeDesign(varargin) fprintf(logFID, '%s - Gene %d of %d. First transcript %s\n', datestr(datetime), i, length(names), idsByName{i}{1}); end - if useUniformWeights + % if useUniformWeights % Generate a OTTable for isoforms for the given gene isoSpecificityTables(i) = OTTable(localTranscriptome, ... isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties 'verbose', false, ... - 'transferAbund', false); + 'transferAbund', true); - else + % else % Generate a OTTable for isoforms for the given gene - isoSpecificityTables(i) = OTTable(localTranscriptome, ... - isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties - 'verbose', false, ... - 'transferAbund', true); + % isoSpecificityTables(i) = OTTable(localTranscriptome, ... + % isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties + % 'verbose', false, ... + % 'transferAbund', true); - end + % end % Name the table isoSpecificityTables(i).name = names{i}; @@ -798,7 +800,7 @@ function MERFISHProbeDesign(varargin) %% Create parallel pool... speeds up the construction of the TRDesigner and the construction of libraries if isempty(gcp('nocreate')) fprintf(logFID, '%s - Start parallel processing pool\n', datestr(datetime)); - p = parpool(5); % Insert a number here appropriate to the used computational resources + p = parpool(4); % Insert a number here appropriate to the used computational resources else p = gcp; end @@ -956,9 +958,9 @@ function MERFISHProbeDesign(varargin) % string miss-match. Replacing '_' with ',' in original codebook solves % this issue. - if ~strcmp(transcriptomeIDType, 'NCBI') - finalIds = strrep(finalIds, '_', ','); - end + %if ~strcmp(transcriptomeIDType, 'NCBI') + % finalIds = strrep(finalIds, '_', ','); + %end finalGenes = {codebook.name}; % Extract gene common names from codebook barcodes = char({codebook.barcode}) == '1'; % Extract string barcodes and convert to logical matrix @@ -1108,13 +1110,15 @@ function MERFISHProbeDesign(varargin) else % Create random orientation and selection of readouts - localReadouts = possibleReadouts(randperm(length(possibleReadouts), 3)); + %localReadouts = possibleReadouts(randperm(length(possibleReadouts), 3)); + localReadouts = possibleReadouts([1 2 3]); end - if rand(1) > 0.5 - % Create header + %if rand(1) > 0.5 + if 0.2 >0.5 + % Create header headers{p} = [libraryName ' ' ... localReadouts(1).Header ' ' ... tRegion.geneName '__' ... From bdef939beb60130a7d14e8f0ee91e1e60add1808 Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Fri, 3 Jan 2020 15:35:23 -0800 Subject: [PATCH 02/14] Update MERFISHProbeDesign.m --- probe_construction/MERFISHProbeDesign.m | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index 07afc2f..039780a 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -502,11 +502,11 @@ function MERFISHProbeDesign(varargin) % Check that paths generated exist or not as required by downstream % code - %badFilesFound = filePathCheck(pathsToCheck); + badFilesFound = filePathCheck(pathsToCheck); - %if badFilesFound - % error('Address file path issues and restart processing.'); - %end + if badFilesFound + error('Address file path issues and restart processing.'); + end % Start logging logFileName = fullfile(analysisSavePath, strcat(libraryName, '.log')); From a82c8fb63cd6d8502a2b05a830dfd3dc12b5625c Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Wed, 8 Jan 2020 11:56:57 -0800 Subject: [PATCH 03/14] update allOligos The human_MTG sequential codebook revealed a bug that the allOligos were not collected correctly with the original version. I rewrote the logic to collect allOligos. --- probe_construction/MERFISHProbeDesign.m | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index 039780a..7fc9a79 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -1036,7 +1036,7 @@ function MERFISHProbeDesign(varargin) if keepGoingFlag oligos = []; - + allOligos = []; lastGene = ''; if keepAllPossibleProbes @@ -1256,14 +1256,16 @@ function MERFISHProbeDesign(varargin) for s=1:length(indsToKeepForReal) oligos(end+1).Header = headers{indsToKeepForReal(s)}; oligos(end).Sequence = seqs{indsToKeepForReal(s)}; + allOligos(end+1).Header = headers{length(headers)}; + allOligos(end).Sequence = seqs{length(seqs)}; end - if keepAllPossibleProbes + %if keepAllPossibleProbes % Append all headers + seqs to cell - allHeaders(seqCount:(seqCount + length(headers) - 1)) = headers; - allSeqs(seqCount:(seqCount + length(seqs) - 1)) = seqs; - seqCount = seqCount + length(seqs); - end + % allHeaders(seqCount:(seqCount + length(headers) - 1)) = headers; + % allSeqs(seqCount:(seqCount + length(seqs) - 1)) = seqs; + % seqCount = seqCount + length(seqs); + %end else @@ -1288,7 +1290,7 @@ function MERFISHProbeDesign(varargin) if keepAllPossibleProbes - allOligos = cell2struct([allHeaders, allSeqs], {'Header', 'Sequence'}, 2); + %allOligos = cell2struct([allHeaders, allSeqs], {'Header', 'Sequence'}, 2); PageBreak(); fprintf(1, 'Writing: %s\n', allOligosPath); From 9984f6beb74a2902af425ea9a4cc99c1bb352c3c Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Wed, 8 Jan 2020 14:14:37 -0800 Subject: [PATCH 04/14] Update MERFISHProbeDesign.m --- probe_construction/MERFISHProbeDesign.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index 7fc9a79..f57fc12 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -1256,8 +1256,8 @@ function MERFISHProbeDesign(varargin) for s=1:length(indsToKeepForReal) oligos(end+1).Header = headers{indsToKeepForReal(s)}; oligos(end).Sequence = seqs{indsToKeepForReal(s)}; - allOligos(end+1).Header = headers{length(headers)}; - allOligos(end).Sequence = seqs{length(seqs)}; + allOligos(end+1).Header = headers{1:length(headers)}; + allOligos(end).Sequence = seqs{1:length(seqs)}; end %if keepAllPossibleProbes From ea0b20f783c0937070e4db2c49011b585234d68d Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Wed, 8 Jan 2020 14:36:45 -0800 Subject: [PATCH 05/14] Update MERFISHProbeDesign.m --- probe_construction/MERFISHProbeDesign.m | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index f57fc12..039780a 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -1036,7 +1036,7 @@ function MERFISHProbeDesign(varargin) if keepGoingFlag oligos = []; - allOligos = []; + lastGene = ''; if keepAllPossibleProbes @@ -1256,16 +1256,14 @@ function MERFISHProbeDesign(varargin) for s=1:length(indsToKeepForReal) oligos(end+1).Header = headers{indsToKeepForReal(s)}; oligos(end).Sequence = seqs{indsToKeepForReal(s)}; - allOligos(end+1).Header = headers{1:length(headers)}; - allOligos(end).Sequence = seqs{1:length(seqs)}; end - %if keepAllPossibleProbes + if keepAllPossibleProbes % Append all headers + seqs to cell - % allHeaders(seqCount:(seqCount + length(headers) - 1)) = headers; - % allSeqs(seqCount:(seqCount + length(seqs) - 1)) = seqs; - % seqCount = seqCount + length(seqs); - %end + allHeaders(seqCount:(seqCount + length(headers) - 1)) = headers; + allSeqs(seqCount:(seqCount + length(seqs) - 1)) = seqs; + seqCount = seqCount + length(seqs); + end else @@ -1290,7 +1288,7 @@ function MERFISHProbeDesign(varargin) if keepAllPossibleProbes - %allOligos = cell2struct([allHeaders, allSeqs], {'Header', 'Sequence'}, 2); + allOligos = cell2struct([allHeaders, allSeqs], {'Header', 'Sequence'}, 2); PageBreak(); fprintf(1, 'Writing: %s\n', allOligosPath); From bc82b52e9fc830953904c3fb1bfd0686571e1398 Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Tue, 14 Jan 2020 14:34:11 -0800 Subject: [PATCH 06/14] fixed bugs for genes with multiple scripts 1. Check whether gene and ids match with each other in the reference, send a warning if not 2. allOligos file is written in a different way to avoid empty cells when there are mismatched gene+ids in the codebook 3. change the selection process of probes to stop picking duplicated probes for genes with multiple transcripts --- probe_construction/MERFISHProbeDesign.m | 65 +++++++++++++++++-------- 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index 039780a..a13eada 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -505,7 +505,13 @@ function MERFISHProbeDesign(varargin) badFilesFound = filePathCheck(pathsToCheck); if badFilesFound - error('Address file path issues and restart processing.'); + delete(usedReadoutPath) + delete(possibleOligosPath) + delete(oligosPath) + delete(primersPath) + delete(finalPrimersPath) + delete(allOligosPath) + %error('Address file path issues and restart processing.'); end % Start logging @@ -916,7 +922,11 @@ function MERFISHProbeDesign(varargin) fprintf(logFID, '%s - Target regions loaded from %s\n', datestr(datetime), trRegionsPath); end - + %write the genename + genenames={targetRegions.geneName}'; + geneids={targetRegions.id}'; + numRegions={targetRegions.numRegions}'; + writetable(table(genenames,geneids,numRegions),[analysisSavePath 'targetRegionsHuman.csv'],'WriteRowNames',false) %% ------------------------------------------------------------------------ % Step 2: Compile the library % The target regions designed above will be compiled into template @@ -964,7 +974,13 @@ function MERFISHProbeDesign(varargin) finalGenes = {codebook.name}; % Extract gene common names from codebook barcodes = char({codebook.barcode}) == '1'; % Extract string barcodes and convert to logical matrix - + %check if gene and ids match the reference + genes_from_ids=targetRegions(ismember({targetRegions.id}, finalIds)); + genenames=targetRegions(ismember({targetRegions.geneName}, finalGenes)); + if length(setdiff({genenames.geneName},{genes_from_ids.geneName}))>0 + warning([char(setdiff({genenames.geneName},{genes_from_ids.geneName})),' ',char(setdiff({genes_from_ids.id},{genenames.id})),' is different from the reference']); + fprintf(logFID, ['WARNING! ', char(setdiff({genenames.geneName},{genes_from_ids.geneName})),' ',char(setdiff({genes_from_ids.id},{genenames.id})),' is different from the reference\n']); + end if versionMatch finalTargetRegions = targetRegions(ismember({targetRegions.id}, finalIds)); % Extract only the desired target regions @@ -1036,16 +1052,16 @@ function MERFISHProbeDesign(varargin) if keepGoingFlag oligos = []; - + allOligos = []; lastGene = ''; - if keepAllPossibleProbes - allHeaders = cell(sum(vertcat(finalTargetRegions.numRegions)), 1); - allSeqs = cell(sum(vertcat(finalTargetRegions.numRegions)), 1); - seqCount = 1; - end + %if keepAllPossibleProbes + % allHeaders = cell(sum(vertcat(finalTargetRegions.numRegions)), 1); + % allSeqs = cell(sum(vertcat(finalTargetRegions.numRegions)), 1); + % seqCount = 1; + %end - for i=1:length(finalIds) + for i=1:length(finalGenes) % Save local gene localGeneName = finalGenes{i}; @@ -1062,7 +1078,7 @@ function MERFISHProbeDesign(varargin) % Determine targetRegion sequences tRegionPull = finalTargetRegions(strcmp({finalTargetRegions.geneName}, localGeneName)); - + if ~isempty(tRegionPull) % Check to see if there are no target regions--only used for blanks @@ -1175,6 +1191,7 @@ function MERFISHProbeDesign(varargin) indsToKeep = find(~hasrRNAPenalty); indsToRemove = setdiff(1:length(seqs), indsToKeep); display(['... removing ' num2str(length(indsToRemove)) ' probes']); + fprintf(logFID, '%s - Removing %d probes\n', datestr(datetime), length(indsToRemove)); for r=1:length(indsToRemove) display(['... ' headers{indsToRemove(r)}]); @@ -1182,11 +1199,14 @@ function MERFISHProbeDesign(varargin) % display(indsToKeep) - indsToKeepForReal = [indsToKeepForReal, indsToKeep]; - + %indsToKeepForReal = [indsToKeepForReal, indsToKeep]; + indsToKeepForReal = indsToKeep; end - + indsToKeepForAll = indsToKeepForReal(1:length(indsToKeepForReal)); + indsToKeepForReal = indsToKeepForReal(randperm(length(indsToKeepForReal), min([length(indsToKeepForReal) numProbesPerGene]))); + + display(['... keeping ' num2str(length(indsToKeepForReal)) ' probes']); fprintf(logFID, '%s - Retaining %d probes\n', datestr(datetime), length(indsToKeepForReal)); @@ -1257,13 +1277,16 @@ function MERFISHProbeDesign(varargin) oligos(end+1).Header = headers{indsToKeepForReal(s)}; oligos(end).Sequence = seqs{indsToKeepForReal(s)}; end - - if keepAllPossibleProbes - % Append all headers + seqs to cell - allHeaders(seqCount:(seqCount + length(headers) - 1)) = headers; - allSeqs(seqCount:(seqCount + length(seqs) - 1)) = seqs; - seqCount = seqCount + length(seqs); + for s=1:length(indsToKeepForAll) + allOligos(end+1).Header = headers{indsToKeepForAll(s)}; + allOligos(end).Sequence = seqs{indsToKeepForAll(s)}; end + %if keepAllPossibleProbes + % Append all headers + seqs to cell + % allHeaders(seqCount:(seqCount + length(headers) - 1)) = headers; + % allSeqs(seqCount:(seqCount + length(seqs) - 1)) = seqs; + % seqCount = seqCount + length(seqs); + %end else @@ -1288,7 +1311,7 @@ function MERFISHProbeDesign(varargin) if keepAllPossibleProbes - allOligos = cell2struct([allHeaders, allSeqs], {'Header', 'Sequence'}, 2); + %allOligos = cell2struct([allHeaders, allSeqs], {'Header', 'Sequence'}, 2); PageBreak(); fprintf(1, 'Writing: %s\n', allOligosPath); From 03e1e0cb3f126393017b27212710be9b12134e14 Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Tue, 14 Jan 2020 14:46:20 -0800 Subject: [PATCH 07/14] Update MERFISHProbeDesign.m --- probe_construction/MERFISHProbeDesign.m | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index a13eada..579ac8a 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -1061,7 +1061,7 @@ function MERFISHProbeDesign(varargin) % seqCount = 1; %end - for i=1:length(finalGenes) + for i=1:length(finalIds) % Save local gene localGeneName = finalGenes{i}; @@ -1191,7 +1191,6 @@ function MERFISHProbeDesign(varargin) indsToKeep = find(~hasrRNAPenalty); indsToRemove = setdiff(1:length(seqs), indsToKeep); display(['... removing ' num2str(length(indsToRemove)) ' probes']); - fprintf(logFID, '%s - Removing %d probes\n', datestr(datetime), length(indsToRemove)); for r=1:length(indsToRemove) display(['... ' headers{indsToRemove(r)}]); @@ -1203,10 +1202,7 @@ function MERFISHProbeDesign(varargin) indsToKeepForReal = indsToKeep; end indsToKeepForAll = indsToKeepForReal(1:length(indsToKeepForReal)); - indsToKeepForReal = indsToKeepForReal(randperm(length(indsToKeepForReal), min([length(indsToKeepForReal) numProbesPerGene]))); - - display(['... keeping ' num2str(length(indsToKeepForReal)) ' probes']); fprintf(logFID, '%s - Retaining %d probes\n', datestr(datetime), length(indsToKeepForReal)); From f07937ebc9e5efa4abec1b5c6c15633b21a41e12 Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Tue, 11 Feb 2020 10:20:19 -0800 Subject: [PATCH 08/14] Update MERFISHProbeDesign.m --- probe_construction/MERFISHProbeDesign.m | 32 ++++++++++--------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index 579ac8a..bf4a970 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -505,13 +505,7 @@ function MERFISHProbeDesign(varargin) badFilesFound = filePathCheck(pathsToCheck); if badFilesFound - delete(usedReadoutPath) - delete(possibleOligosPath) - delete(oligosPath) - delete(primersPath) - delete(finalPrimersPath) - delete(allOligosPath) - %error('Address file path issues and restart processing.'); + error('Address file path issues and restart processing.'); end % Start logging @@ -721,23 +715,23 @@ function MERFISHProbeDesign(varargin) fprintf(logFID, '%s - Gene %d of %d. First transcript %s\n', datestr(datetime), i, length(names), idsByName{i}{1}); end - % if useUniformWeights + if useUniformWeights % Generate a OTTable for isoforms for the given gene isoSpecificityTables(i) = OTTable(localTranscriptome, ... isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties 'verbose', false, ... - 'transferAbund', true); + 'transferAbund', false); - % else + else % Generate a OTTable for isoforms for the given gene - % isoSpecificityTables(i) = OTTable(localTranscriptome, ... - % isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties - % 'verbose', false, ... - % 'transferAbund', true); + isoSpecificityTables(i) = OTTable(localTranscriptome, ... + isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties + 'verbose', false, ... + 'transferAbund', true); - % end + end % Name the table isoSpecificityTables(i).name = names{i}; @@ -1079,7 +1073,7 @@ function MERFISHProbeDesign(varargin) % Determine targetRegion sequences tRegionPull = finalTargetRegions(strcmp({finalTargetRegions.geneName}, localGeneName)); - + if ~isempty(tRegionPull) % Check to see if there are no target regions--only used for blanks seqs = {}; @@ -1126,14 +1120,12 @@ function MERFISHProbeDesign(varargin) else % Create random orientation and selection of readouts - %localReadouts = possibleReadouts(randperm(length(possibleReadouts), 3)); - localReadouts = possibleReadouts([1 2 3]); + localReadouts = possibleReadouts(randperm(length(possibleReadouts), 3)); end - %if rand(1) > 0.5 - if 0.2 >0.5 + if rand(1) > 0.5 % Create header headers{p} = [libraryName ' ' ... localReadouts(1).Header ' ' ... From efa7d0581618e3ce4381e020c3fa23a01141f4f5 Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Tue, 11 Feb 2020 10:25:18 -0800 Subject: [PATCH 09/14] Update MERFISHProbeDesign.m --- probe_construction/MERFISHProbeDesign.m | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index bf4a970..8e6c231 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -719,8 +719,7 @@ function MERFISHProbeDesign(varargin) % Generate a OTTable for isoforms for the given gene isoSpecificityTables(i) = OTTable(localTranscriptome, ... - isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties - 'verbose', false, ... + isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties 'verbose', false, ... 'transferAbund', false); else @@ -1072,8 +1071,8 @@ function MERFISHProbeDesign(varargin) % Determine targetRegion sequences tRegionPull = finalTargetRegions(strcmp({finalTargetRegions.geneName}, localGeneName)); - - + + if ~isempty(tRegionPull) % Check to see if there are no target regions--only used for blanks seqs = {}; @@ -1126,7 +1125,7 @@ function MERFISHProbeDesign(varargin) if rand(1) > 0.5 - % Create header + % Create header headers{p} = [libraryName ' ' ... localReadouts(1).Header ' ' ... tRegion.geneName '__' ... From 3c7a0654ceb3b45be031bfd7769a05a021dbe32c Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Tue, 11 Feb 2020 10:31:41 -0800 Subject: [PATCH 10/14] Update MERFISHProbeDesign.m --- probe_construction/MERFISHProbeDesign.m | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index 8e6c231..602d457 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -719,14 +719,15 @@ function MERFISHProbeDesign(varargin) % Generate a OTTable for isoforms for the given gene isoSpecificityTables(i) = OTTable(localTranscriptome, ... - isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties 'verbose', false, ... + isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties + 'verbose', false, ... 'transferAbund', false); else % Generate a OTTable for isoforms for the given gene isoSpecificityTables(i) = OTTable(localTranscriptome, ... - isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties + isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties 'verbose', false, ... 'transferAbund', true); @@ -915,6 +916,7 @@ function MERFISHProbeDesign(varargin) fprintf(logFID, '%s - Target regions loaded from %s\n', datestr(datetime), trRegionsPath); end + %write the genename genenames={targetRegions.geneName}'; geneids={targetRegions.id}'; @@ -967,6 +969,7 @@ function MERFISHProbeDesign(varargin) finalGenes = {codebook.name}; % Extract gene common names from codebook barcodes = char({codebook.barcode}) == '1'; % Extract string barcodes and convert to logical matrix + %check if gene and ids match the reference genes_from_ids=targetRegions(ismember({targetRegions.id}, finalIds)); genenames=targetRegions(ismember({targetRegions.geneName}, finalGenes)); @@ -1045,6 +1048,7 @@ function MERFISHProbeDesign(varargin) if keepGoingFlag oligos = []; + allOligos = []; lastGene = ''; @@ -1187,6 +1191,7 @@ function MERFISHProbeDesign(varargin) display(['... ' headers{indsToRemove(r)}]); end + % display(indsToKeep) %indsToKeepForReal = [indsToKeepForReal, indsToKeep]; @@ -1264,10 +1269,11 @@ function MERFISHProbeDesign(varargin) oligos(end+1).Header = headers{indsToKeepForReal(s)}; oligos(end).Sequence = seqs{indsToKeepForReal(s)}; end - for s=1:length(indsToKeepForAll) + for s=1:length(indsToKeepForAll) allOligos(end+1).Header = headers{indsToKeepForAll(s)}; allOligos(end).Sequence = seqs{indsToKeepForAll(s)}; end + %if keepAllPossibleProbes % Append all headers + seqs to cell % allHeaders(seqCount:(seqCount + length(headers) - 1)) = headers; From b2c08f90caf1ab486a8bb1a17d306665ae24dca8 Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Wed, 12 Feb 2020 09:49:20 -0800 Subject: [PATCH 11/14] remove NCBI _ to , modification Tested. We can keep the ensemble and NCBI choice as long as we remove the NCBI _ to , modification. --- probe_construction/MERFISHProbeDesign.m | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index 602d457..56e898d 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -660,15 +660,15 @@ function MERFISHProbeDesign(varargin) fprintf(logFID, '%s - Building transcriptome object.\n', datestr(datetime)); - % Build transcriptome using existing abundance data - %transcriptome = Transcriptome(rawTranscriptomeFasta, ... - % 'abundPath', fpkmPath, ... - % 'verbose', true, ... - % 'headerType', transcriptomeHeaderType, ... - %'IDType', transcriptomeIDType); + %Build transcriptome using existing abundance data transcriptome = Transcriptome(rawTranscriptomeFasta, ... 'abundPath', fpkmPath, ... - 'verbose', true); + 'verbose', true, ... + 'headerType', transcriptomeHeaderType, ... + 'IDType', transcriptomeIDType); + %transcriptome = Transcriptome(rawTranscriptomeFasta, ... + % 'abundPath', fpkmPath, ... + % 'verbose', true); transcriptome.Save(transcriptomePath); fprintf(logFID, '%s - Transcriptome object saved to %s\n', datestr(datetime), transcriptomePath); From efa15477284850e5e262c820092b9151e587eae3 Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Thu, 13 Feb 2020 09:58:18 -0800 Subject: [PATCH 12/14] Update MERFISHProbeDesign.m --- probe_construction/MERFISHProbeDesign.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index 56e898d..dd6d371 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -921,7 +921,7 @@ function MERFISHProbeDesign(varargin) genenames={targetRegions.geneName}'; geneids={targetRegions.id}'; numRegions={targetRegions.numRegions}'; - writetable(table(genenames,geneids,numRegions),[analysisSavePath 'targetRegionsHuman.csv'],'WriteRowNames',false) + writetable(table(genenames,geneids,numRegions),[analysisSavePath 'targetRegions.csv'],'WriteRowNames',false) %% ------------------------------------------------------------------------ % Step 2: Compile the library % The target regions designed above will be compiled into template From 84c540225db49b0670c5031a2edd10d8fe5b2d3f Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Thu, 13 Feb 2020 11:10:17 -0800 Subject: [PATCH 13/14] Update MERFISHProbeDesign.m --- probe_construction/MERFISHProbeDesign.m | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index dd6d371..6b12288 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -1048,10 +1048,11 @@ function MERFISHProbeDesign(varargin) if keepGoingFlag oligos = []; - allOligos = []; lastGene = ''; - + Genes={}; + ProbeNumbers={}; + %if keepAllPossibleProbes % allHeaders = cell(sum(vertcat(finalTargetRegions.numRegions)), 1); % allSeqs = cell(sum(vertcat(finalTargetRegions.numRegions)), 1); @@ -1201,6 +1202,10 @@ function MERFISHProbeDesign(varargin) indsToKeepForReal = indsToKeepForReal(randperm(length(indsToKeepForReal), min([length(indsToKeepForReal) numProbesPerGene]))); display(['... keeping ' num2str(length(indsToKeepForReal)) ' probes']); fprintf(logFID, '%s - Retaining %d probes\n', datestr(datetime), length(indsToKeepForReal)); + %write the genename + Genes=[Genes,localGeneName]; + ProbeNumbers=[ProbeNumbers,length(indsToKeepForReal)]; + % Check on number if length(indsToKeepForReal) < numProbesPerGene @@ -1301,7 +1306,10 @@ function MERFISHProbeDesign(varargin) fastawrite(possibleOligosPath, oligos); display(['... completed in ' num2str(toc(writeTimer))]); fprintf(logFID, '%s - Completed in %d s\n', datestr(datetime), toc(writeTimer)); - + %%%%%Write out how many probes per gene as a table + Genes=Genes' + ProbeNumbers=ProbeNumbers' + writetable(table(Genes,ProbeNumbers),[analysisSavePath 'probesPerGene.csv'],'WriteRowNames',false) if keepAllPossibleProbes %allOligos = cell2struct([allHeaders, allSeqs], {'Header', 'Sequence'}, 2); From 8f2b238bff97be26af254b1e08111591e2bd294e Mon Sep 17 00:00:00 2001 From: yilinzhao1615 <56006310+yilinzhao1615@users.noreply.github.com> Date: Thu, 13 Feb 2020 11:13:05 -0800 Subject: [PATCH 14/14] add an output table for genes and number of probes --- probe_construction/MERFISHProbeDesign.m | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/probe_construction/MERFISHProbeDesign.m b/probe_construction/MERFISHProbeDesign.m index 6b12288..dafc7a1 100644 --- a/probe_construction/MERFISHProbeDesign.m +++ b/probe_construction/MERFISHProbeDesign.m @@ -1048,11 +1048,12 @@ function MERFISHProbeDesign(varargin) if keepGoingFlag oligos = []; + allOligos = []; lastGene = ''; + Genes={}; - ProbeNumbers={}; - + ProbeNumbers={}; %if keepAllPossibleProbes % allHeaders = cell(sum(vertcat(finalTargetRegions.numRegions)), 1); % allSeqs = cell(sum(vertcat(finalTargetRegions.numRegions)), 1); @@ -1204,8 +1205,7 @@ function MERFISHProbeDesign(varargin) fprintf(logFID, '%s - Retaining %d probes\n', datestr(datetime), length(indsToKeepForReal)); %write the genename Genes=[Genes,localGeneName]; - ProbeNumbers=[ProbeNumbers,length(indsToKeepForReal)]; - + ProbeNumbers=[ProbeNumbers,length(indsToKeepForReal)]; % Check on number if length(indsToKeepForReal) < numProbesPerGene