@@ -70,11 +70,13 @@ def handle_excepthook(exc_type, exc_value, exc_traceback):
7070normalize_input_files = config ["files" ]["aggregate_files" ]
7171normalize_output_files = config ["files" ]["normalize_files" ]
7272single_cell_file = config ["files" ]["single_file_only_output_file" ]
73+ image_file = config ["files" ]["image_file" ]
7374
7475sc_config = config ["options" ]["profile" ]["single_cell" ]
7576normalize_singlecell_from_single_file = sc_config ["output_one_single_cell_file_only" ]
7677
7778normalize_args = config ["options" ]["profile" ]["normalize" ]
79+ output_single_cell_by_guide = normalize_args ["output_single_cell_by_guide" ]
7880normalize_levels = normalize_args ["levels" ]
7981normalize_by_samples = normalize_args ["by_samples" ]
8082normalize_these_features = normalize_args ["features" ]
@@ -105,28 +107,107 @@ def handle_excepthook(exc_type, exc_value, exc_traceback):
105107 file_to_normalize .name .replace (".csv.gz" , f"_{ data_split_site } .csv.gz" ),
106108 )
107109
108- print (
109- f"Now normalizing { data_level } ...with operation: { normalize_method } for split { data_split_site } "
110- )
111- logging .info (
112- f"Normalizing { data_level } ...with operation: { normalize_method } for split { data_split_site } "
113- )
114-
115110 output_file = normalize_output_files [data_level ]
116111 output_file = pathlib .Path (
117112 normalize_output_files [data_level ].parents [0 ],
118113 output_file .name .replace (".csv.gz" , f"_{ data_split_site } .csv.gz" ),
119114 )
120- df = read_csvs_with_chunksize (file_to_normalize )
121-
122- normalize (
123- profiles = df ,
124- features = normalize_these_features ,
125- samples = normalize_by_samples ,
126- method = normalize_method ,
127- output_file = output_file ,
128- compression_options = compression ,
129- float_format = float_format ,
130- )
115+
116+ if os .path .exists (output_file ):
117+ if force :
118+ print (f"Force overwriting { output_file } " )
119+ logging .info (f"Force overwriting { output_file } " )
120+ print (
121+ f"Now normalizing { data_level } ...with operation: { normalize_method } for split { data_split_site } "
122+ )
123+ logging .info (
124+ f"Normalizing { data_level } ...with operation: { normalize_method } for split { data_split_site } "
125+ )
126+ df = read_csvs_with_chunksize (file_to_normalize )
127+
128+ # Don't normalize locations
129+ meta_cols = list (df .columns [df .columns .str .contains ("Metadata" )])
130+ remove_locs = list (filter (lambda x : "_Location_Center_X" in x or "_Location_Center_Y" in x , df .columns ))
131+ remove_cents = list (filter (lambda x : "AreaShape_Center_X" in x or "AreaShape_Center_Y" in x , df .columns ))
132+ meta_cols = meta_cols + remove_locs + remove_cents
133+
134+ normalize (
135+ profiles = df ,
136+ features = normalize_these_features ,
137+ meta_features = meta_cols ,
138+ samples = normalize_by_samples ,
139+ method = normalize_method ,
140+ output_file = output_file ,
141+ compression_options = compression ,
142+ float_format = float_format ,
143+ )
144+ else :
145+ print (
146+ f"Now normalizing { data_level } ...with operation: { normalize_method } for split { data_split_site } "
147+ )
148+ logging .info (
149+ f"Normalizing { data_level } ...with operation: { normalize_method } for split { data_split_site } "
150+ )
151+ df = read_csvs_with_chunksize (file_to_normalize )
152+
153+ # Don't normalize locations
154+ meta_cols = list (df .columns [df .columns .str .contains ("Metadata" )])
155+ remove_locs = list (filter (lambda x : "_Location_Center_X" in x or "_Location_Center_Y" in x , df .columns ))
156+ remove_cents = list (filter (lambda x : "AreaShape_Center_X" in x or "AreaShape_Center_Y" in x , df .columns ))
157+ meta_cols = meta_cols + remove_locs + remove_cents
158+
159+ normalize (
160+ profiles = df ,
161+ features = normalize_these_features ,
162+ meta_features = meta_cols ,
163+ samples = normalize_by_samples ,
164+ method = normalize_method ,
165+ output_file = output_file ,
166+ compression_options = compression ,
167+ float_format = float_format ,
168+ )
169+
170+ if data_level == "single_cell" :
171+ if output_single_cell_by_guide :
172+ print (
173+ f"Now outputting normalized single cell profiles by guide for split { data_split_site } "
174+ )
175+ logging .info (
176+ f"Now outputting normalized single cell profiles by guide for split { data_split_site } "
177+ )
178+ # Load image alignment information for appending to single_cell_by_guide csvs
179+ image_df = pd .read_csv (image_file , sep = "\t " )
180+ keep_columns = []
181+ for col in image_df .columns :
182+ if "Align_" in col :
183+ keep_columns .append (col )
184+ keep_columns .append ("Metadata_site" )
185+ image_df = image_df .loc [:, keep_columns ]
186+
187+ sc_by_guide_folder = os .path .join (
188+ single_cell_input_dir , "single_cell_by_guide"
189+ )
190+ if not os .path .isdir (sc_by_guide_folder ):
191+ os .mkdir (sc_by_guide_folder )
192+ df = read_csvs_with_chunksize (output_file )
193+ for guide in set (df ["Metadata_Foci_Barcode_MatchedTo_Barcode" ]):
194+ gene = df [df ["Metadata_Foci_Barcode_MatchedTo_Barcode" ] == guide ][
195+ "Metadata_Foci_Barcode_MatchedTo_GeneCode"
196+ ].tolist ()[0 ]
197+ guide_file_name = f"{ str (output_file ).split ('__' )[0 ].split ('/' )[- 1 ]} __{ guide } _{ gene } .csv.gz"
198+ guide_path = os .path .join (sc_by_guide_folder , guide_file_name )
199+ if not os .path .exists (guide_path ):
200+ guide_df = pd .DataFrame ()
201+ else :
202+ guide_df = read_csvs_with_chunksize (guide_path )
203+ append_df = df .loc [
204+ df ["Metadata_Foci_Barcode_MatchedTo_Barcode" ] == guide
205+ ]
206+ append_df = append_df .merge (
207+ image_df , left_on = "Metadata_Foci_site" , right_on = "Metadata_site"
208+ )
209+ guide_df = guide_df .append (append_df )
210+ guide_df .to_csv (guide_path , index = False )
211+
131212print ("Finished 2.normalize." )
132213logging .info ("Finished 2.normalize." )
0 commit comments