Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 67ff07f

Browse files
committedDec 12, 2024·
Add paper DOI badge
1 parent 2781299 commit 67ff07f

File tree

1 file changed

+187
-196
lines changed

1 file changed

+187
-196
lines changed
 

‎pygad/helper/unique.py

Lines changed: 187 additions & 196 deletions
Original file line numberDiff line numberDiff line change
@@ -16,136 +16,127 @@ def solve_duplicate_genes_randomly(self,
1616
mutation_by_replacement,
1717
gene_type,
1818
num_trials=10):
19-
"""
20-
Resolves duplicates in a solution by randomly selecting new values for the duplicate genes.
21-
22-
Args:
23-
solution (list): A solution containing genes, potentially with duplicate values.
24-
min_val (int): The minimum value of the range to sample a number randomly.
25-
max_val (int): The maximum value of the range to sample a number randomly.
26-
mutation_by_replacement (bool): Indicates if mutation is performed by replacement.
27-
gene_type (type): The data type of the gene (e.g., int, float).
28-
num_trials (int): The maximum number of attempts to resolve duplicates by changing the gene values. Only works for floating-point gene types.
19+
"""
20+
Resolves duplicates in a solution by randomly selecting new values for the duplicate genes.
2921
30-
Returns:
31-
tuple:
32-
list: The updated solution after attempting to resolve duplicates. If no duplicates are resolved, the solution remains unchanged.
33-
list: The indices of genes that still have duplicate values.
34-
int: The number of duplicates that could not be resolved.
35-
"""
22+
Args:
23+
solution (list): A solution containing genes, potentially with duplicate values.
24+
min_val (int): The minimum value of the range to sample a number randomly.
25+
max_val (int): The maximum value of the range to sample a number randomly.
26+
mutation_by_replacement (bool): Indicates if mutation is performed by replacement.
27+
gene_type (type): The data type of the gene (e.g., int, float).
28+
num_trials (int): The maximum number of attempts to resolve duplicates by changing the gene values. Only works for floating-point gene types.
3629
37-
new_solution = solution.copy()
38-
39-
_, unique_gene_indices = numpy.unique(solution, return_index=True)
40-
not_unique_indices = set(range(len(solution))) - set(unique_gene_indices)
30+
Returns:
31+
tuple:
32+
list: The updated solution after attempting to resolve duplicates. If no duplicates are resolved, the solution remains unchanged.
33+
list: The indices of genes that still have duplicate values.
34+
int: The number of duplicates that could not be resolved.
35+
"""
4136

42-
num_unsolved_duplicates = 0
43-
if len(not_unique_indices) > 0:
44-
for duplicate_index in not_unique_indices:
45-
if self.gene_type_single == True:
46-
dtype = gene_type
47-
else:
48-
dtype = gene_type[duplicate_index]
37+
new_solution = solution.copy()
4938

50-
if dtype[0] in pygad.GA.supported_int_types:
51-
temp_val = self.unique_int_gene_from_range(solution=new_solution,
52-
gene_index=duplicate_index,
53-
min_val=min_val,
54-
max_val=max_val,
55-
mutation_by_replacement=mutation_by_replacement,
56-
gene_type=gene_type)
57-
else:
58-
temp_val = self.unique_float_gene_from_range(solution=new_solution,
59-
gene_index=duplicate_index,
60-
min_val=min_val,
61-
max_val=max_val,
62-
mutation_by_replacement=mutation_by_replacement,
63-
gene_type=gene_type,
64-
num_trials=num_trials)
65-
"""
66-
temp_val = numpy.random.uniform(low=min_val,
67-
high=max_val,
68-
size=1)[0]
69-
if mutation_by_replacement:
70-
pass
71-
else:
72-
temp_val = new_solution[duplicate_index] + temp_val
73-
"""
39+
_, unique_gene_indices = numpy.unique(solution, return_index=True)
40+
not_unique_indices = set(range(len(solution))) - set(unique_gene_indices)
7441

75-
if temp_val in new_solution:
76-
num_unsolved_duplicates = num_unsolved_duplicates + 1
77-
if not self.suppress_warnings: warnings.warn(f"Failed to find a unique value for gene with index {duplicate_index} whose value is {solution[duplicate_index]}. Consider adding more values in the gene space or use a wider range for initial population or random mutation.")
78-
else:
79-
# Unique gene value found.
80-
new_solution[duplicate_index] = temp_val
42+
num_unsolved_duplicates = 0
43+
if len(not_unique_indices) > 0:
44+
for duplicate_index in not_unique_indices:
45+
if self.gene_type_single == True:
46+
dtype = gene_type
47+
else:
48+
dtype = gene_type[duplicate_index]
49+
50+
if dtype[0] in pygad.GA.supported_int_types:
51+
temp_val = self.unique_int_gene_from_range(solution=new_solution,
52+
gene_index=duplicate_index,
53+
min_val=min_val,
54+
max_val=max_val,
55+
mutation_by_replacement=mutation_by_replacement,
56+
gene_type=gene_type)
57+
else:
58+
temp_val = self.unique_float_gene_from_range(solution=new_solution,
59+
gene_index=duplicate_index,
60+
min_val=min_val,
61+
max_val=max_val,
62+
mutation_by_replacement=mutation_by_replacement,
63+
gene_type=gene_type,
64+
num_trials=num_trials)
65+
66+
if temp_val in new_solution:
67+
num_unsolved_duplicates = num_unsolved_duplicates + 1
68+
if not self.suppress_warnings: warnings.warn(f"Failed to find a unique value for gene with index {duplicate_index} whose value is {solution[duplicate_index]}. Consider adding more values in the gene space or use a wider range for initial population or random mutation.")
69+
else:
70+
# Unique gene value found.
71+
new_solution[duplicate_index] = temp_val
8172

82-
# Update the list of duplicate indices after each iteration.
83-
_, unique_gene_indices = numpy.unique(new_solution, return_index=True)
84-
not_unique_indices = set(range(len(solution))) - set(unique_gene_indices)
85-
# self.logger.info("not_unique_indices INSIDE", not_unique_indices)
73+
# Update the list of duplicate indices after each iteration.
74+
_, unique_gene_indices = numpy.unique(new_solution, return_index=True)
75+
not_unique_indices = set(range(len(solution))) - set(unique_gene_indices)
76+
# self.logger.info("not_unique_indices INSIDE", not_unique_indices)
8677

87-
return new_solution, not_unique_indices, num_unsolved_duplicates
78+
return new_solution, not_unique_indices, num_unsolved_duplicates
8879

8980
def solve_duplicate_genes_by_space(self,
9081
solution,
9182
gene_type,
9283
num_trials=10,
9384
build_initial_pop=False):
9485

95-
"""
96-
Resolves duplicates in a solution by selecting new values for the duplicate genes from the gene space.
86+
"""
87+
Resolves duplicates in a solution by selecting new values for the duplicate genes from the gene space.
9788
98-
Args:
99-
solution (list): A solution containing genes, potentially with duplicate values.
100-
gene_type (type): The data type of the gene (e.g., int, float).
101-
num_trials (int): The maximum number of attempts to resolve duplicates by selecting values from the gene space.
89+
Args:
90+
solution (list): A solution containing genes, potentially with duplicate values.
91+
gene_type (type): The data type of the gene (e.g., int, float).
92+
num_trials (int): The maximum number of attempts to resolve duplicates by selecting values from the gene space.
10293
103-
Returns:
104-
tuple:
105-
list: The updated solution after attempting to resolve duplicates. If no duplicates are resolved, the solution remains unchanged.
106-
list: The indices of genes that still have duplicate values.
107-
int: The number of duplicates that could not be resolved.
108-
"""
94+
Returns:
95+
tuple:
96+
list: The updated solution after attempting to resolve duplicates. If no duplicates are resolved, the solution remains unchanged.
97+
list: The indices of genes that still have duplicate values.
98+
int: The number of duplicates that could not be resolved.
99+
"""
109100

110-
new_solution = solution.copy()
111-
112-
_, unique_gene_indices = numpy.unique(solution, return_index=True)
113-
not_unique_indices = set(range(len(solution))) - set(unique_gene_indices)
114-
# self.logger.info("not_unique_indices OUTSIDE", not_unique_indices)
115-
116-
# First try to solve the duplicates.
117-
# For a solution like [3 2 0 0], the indices of the 2 duplicating genes are 2 and 3.
118-
# The next call to the find_unique_value() method tries to change the value of the gene with index 3 to solve the duplicate.
119-
if len(not_unique_indices) > 0:
120-
new_solution, not_unique_indices, num_unsolved_duplicates = self.unique_genes_by_space(new_solution=new_solution,
121-
gene_type=gene_type,
122-
not_unique_indices=not_unique_indices,
123-
num_trials=10,
124-
build_initial_pop=build_initial_pop)
125-
else:
126-
return new_solution, not_unique_indices, len(not_unique_indices)
101+
new_solution = solution.copy()
102+
103+
_, unique_gene_indices = numpy.unique(solution, return_index=True)
104+
not_unique_indices = set(range(len(solution))) - set(unique_gene_indices)
105+
# self.logger.info("not_unique_indices OUTSIDE", not_unique_indices)
106+
107+
# First try to solve the duplicates.
108+
# For a solution like [3 2 0 0], the indices of the 2 duplicating genes are 2 and 3.
109+
# The next call to the find_unique_value() method tries to change the value of the gene with index 3 to solve the duplicate.
110+
if len(not_unique_indices) > 0:
111+
new_solution, not_unique_indices, num_unsolved_duplicates = self.unique_genes_by_space(new_solution=new_solution,
112+
gene_type=gene_type,
113+
not_unique_indices=not_unique_indices,
114+
num_trials=10,
115+
build_initial_pop=build_initial_pop)
116+
else:
117+
return new_solution, not_unique_indices, len(not_unique_indices)
127118

128-
# Do another try if there exist duplicate genes.
129-
# If there are no possible values for the gene 3 with index 3 to solve the duplicate, try to change the value of the other gene with index 2.
130-
if len(not_unique_indices) > 0:
131-
not_unique_indices = set(numpy.where(new_solution == new_solution[list(not_unique_indices)[0]])[0]) - set([list(not_unique_indices)[0]])
132-
new_solution, not_unique_indices, num_unsolved_duplicates = self.unique_genes_by_space(new_solution=new_solution,
133-
gene_type=gene_type,
134-
not_unique_indices=not_unique_indices,
135-
num_trials=10,
136-
build_initial_pop=build_initial_pop)
137-
else:
138-
# DEEP-DUPLICATE-REMOVAL-NEEDED
139-
# Search by this phrase to find where deep duplicates removal should be applied.
140-
141-
# If there exist duplicate genes, then changing either of the 2 duplicating genes (with indices 2 and 3) will not solve the problem.
142-
# This problem can be solved by randomly changing one of the non-duplicating genes that may make a room for a unique value in one the 2 duplicating genes.
143-
# For example, if gene_space=[[3, 0, 1], [4, 1, 2], [0, 2], [3, 2, 0]] and the solution is [3 2 0 0], then the values of the last 2 genes duplicate.
144-
# There are no possible changes in the last 2 genes to solve the problem. But it could be solved by changing the second gene from 2 to 4.
145-
# As a result, any of the last 2 genes can take the value 2 and solve the duplicates.
146-
return new_solution, not_unique_indices, len(not_unique_indices)
119+
# Do another try if there exist duplicate genes.
120+
# If there are no possible values for the gene 3 with index 3 to solve the duplicate, try to change the value of the other gene with index 2.
121+
if len(not_unique_indices) > 0:
122+
not_unique_indices = set(numpy.where(new_solution == new_solution[list(not_unique_indices)[0]])[0]) - set([list(not_unique_indices)[0]])
123+
new_solution, not_unique_indices, num_unsolved_duplicates = self.unique_genes_by_space(new_solution=new_solution,
124+
gene_type=gene_type,
125+
not_unique_indices=not_unique_indices,
126+
num_trials=10,
127+
build_initial_pop=build_initial_pop)
128+
else:
129+
# DEEP-DUPLICATE-REMOVAL-NEEDED
130+
# Search by this phrase to find where deep duplicates removal should be applied.
131+
132+
# If there exist duplicate genes, then changing either of the 2 duplicating genes (with indices 2 and 3) will not solve the problem.
133+
# This problem can be solved by randomly changing one of the non-duplicating genes that may make a room for a unique value in one the 2 duplicating genes.
134+
# For example, if gene_space=[[3, 0, 1], [4, 1, 2], [0, 2], [3, 2, 0]] and the solution is [3 2 0 0], then the values of the last 2 genes duplicate.
135+
# There are no possible changes in the last 2 genes to solve the problem. But it could be solved by changing the second gene from 2 to 4.
136+
# As a result, any of the last 2 genes can take the value 2 and solve the duplicates.
137+
return new_solution, not_unique_indices, len(not_unique_indices)
147138

148-
return new_solution, not_unique_indices, num_unsolved_duplicates
139+
return new_solution, not_unique_indices, num_unsolved_duplicates
149140

150141
def unique_int_gene_from_range(self,
151142
solution,
@@ -156,54 +147,54 @@ def unique_int_gene_from_range(self,
156147
gene_type,
157148
step=1):
158149

159-
"""
160-
Finds a unique integer value for a specific gene in a solution.
150+
"""
151+
Finds a unique integer value for a specific gene in a solution.
161152
162-
Args:
163-
solution (list): A solution containing genes, potentially with duplicate values.
164-
gene_index (int): The index of the gene for which to find a unique value.
165-
min_val (int): The minimum value of the range to sample an integer randomly.
166-
max_val (int): The maximum value of the range to sample an integer randomly.
167-
mutation_by_replacement (bool): Indicates if mutation is performed by replacement.
168-
gene_type (type): The data type of the gene (e.g., int, int8, uint16, etc).
169-
step (int, optional): The step size for generating candidate values. Defaults to 1.
153+
Args:
154+
solution (list): A solution containing genes, potentially with duplicate values.
155+
gene_index (int): The index of the gene for which to find a unique value.
156+
min_val (int): The minimum value of the range to sample an integer randomly.
157+
max_val (int): The maximum value of the range to sample an integer randomly.
158+
mutation_by_replacement (bool): Indicates if mutation is performed by replacement.
159+
gene_type (type): The data type of the gene (e.g., int, int8, uint16, etc).
160+
step (int, optional): The step size for generating candidate values. Defaults to 1.
170161
171-
Returns:
172-
int: The new integer value of the gene. If no unique value can be found, the original gene value is returned.
173-
"""
162+
Returns:
163+
int: The new integer value of the gene. If no unique value can be found, the original gene value is returned.
164+
"""
174165

175-
# The gene_type is of the form [type, precision]
176-
dtype = gene_type
166+
# The gene_type is of the form [type, precision]
167+
dtype = gene_type
177168

178-
# For non-integer steps, the numpy.arange() function returns zeros if the dtype parameter is set to an integer data type. So, this returns zeros if step is non-integer and dtype is set to an int data type: numpy.arange(min_val, max_val, step, dtype=gene_type[0])
179-
# To solve this issue, the data type casting will not be handled inside numpy.arange(). The range is generated by numpy.arange() and then the data type is converted using the numpy.asarray() function.
180-
all_gene_values = numpy.asarray(numpy.arange(min_val,
181-
max_val,
182-
step),
183-
dtype=dtype[0])
169+
# For non-integer steps, the numpy.arange() function returns zeros if the dtype parameter is set to an integer data type. So, this returns zeros if step is non-integer and dtype is set to an int data type: numpy.arange(min_val, max_val, step, dtype=gene_type[0])
170+
# To solve this issue, the data type casting will not be handled inside numpy.arange(). The range is generated by numpy.arange() and then the data type is converted using the numpy.asarray() function.
171+
all_gene_values = numpy.asarray(numpy.arange(min_val,
172+
max_val,
173+
step),
174+
dtype=dtype[0])
184175

185-
# If mutation is by replacement, do not add the current gene value into the list.
186-
# This is to avoid replacing the value by itself again. We are doing nothing in this case.
187-
if mutation_by_replacement:
188-
pass
189-
else:
190-
all_gene_values = all_gene_values + solution[gene_index]
176+
# If mutation is by replacement, do not add the current gene value into the list.
177+
# This is to avoid replacing the value by itself again. We are doing nothing in this case.
178+
if mutation_by_replacement:
179+
pass
180+
else:
181+
all_gene_values = all_gene_values + solution[gene_index]
191182

192-
# After adding solution[gene_index] to the list, we have to change the data type again.
193-
all_gene_values = numpy.asarray(all_gene_values,
194-
dtype[0])
183+
# After adding solution[gene_index] to the list, we have to change the data type again.
184+
all_gene_values = numpy.asarray(all_gene_values,
185+
dtype[0])
195186

196-
values_to_select_from = list(set(list(all_gene_values)) - set(solution))
187+
values_to_select_from = list(set(list(all_gene_values)) - set(solution))
197188

198-
if len(values_to_select_from) == 0:
199-
# If there are no values, then keep the current gene value.
200-
selected_value = solution[gene_index]
201-
else:
202-
selected_value = random.choice(values_to_select_from)
189+
if len(values_to_select_from) == 0:
190+
# If there are no values, then keep the current gene value.
191+
selected_value = solution[gene_index]
192+
else:
193+
selected_value = random.choice(values_to_select_from)
203194

204-
selected_value = dtype[0](selected_value)
195+
selected_value = dtype[0](selected_value)
205196

206-
return selected_value
197+
return selected_value
207198

208199
def unique_float_gene_from_range(self,
209200
solution,
@@ -214,60 +205,60 @@ def unique_float_gene_from_range(self,
214205
gene_type,
215206
num_trials=10):
216207

217-
"""
218-
Finds a unique floating-point value for a specific gene in a solution.
208+
"""
209+
Finds a unique floating-point value for a specific gene in a solution.
219210
220-
Args:
221-
solution (list): A solution containing genes, potentially with duplicate values.
222-
gene_index (int): The index of the gene for which to find a unique value.
223-
min_val (int): The minimum value of the range to sample a floating-point number randomly.
224-
max_val (int): The maximum value of the range to sample a floating-point number randomly.
225-
mutation_by_replacement (bool): Indicates if mutation is performed by replacement.
226-
gene_type (type): The data type of the gene (e.g., float, float16, float32, etc).
227-
num_trials (int): The maximum number of attempts to resolve duplicates by changing the gene values.
211+
Args:
212+
solution (list): A solution containing genes, potentially with duplicate values.
213+
gene_index (int): The index of the gene for which to find a unique value.
214+
min_val (int): The minimum value of the range to sample a floating-point number randomly.
215+
max_val (int): The maximum value of the range to sample a floating-point number randomly.
216+
mutation_by_replacement (bool): Indicates if mutation is performed by replacement.
217+
gene_type (type): The data type of the gene (e.g., float, float16, float32, etc).
218+
num_trials (int): The maximum number of attempts to resolve duplicates by changing the gene values.
228219
229-
Returns:
230-
int: The new floating-point value of the gene. If no unique value can be found, the original gene value is returned.
231-
"""
220+
Returns:
221+
int: The new floating-point value of the gene. If no unique value can be found, the original gene value is returned.
222+
"""
232223

233-
# The gene_type is of the form [type, precision]
234-
dtype = gene_type
224+
# The gene_type is of the form [type, precision]
225+
dtype = gene_type
235226

236-
for trial_index in range(num_trials):
237-
temp_val = numpy.random.uniform(low=min_val,
238-
high=max_val,
239-
size=1)[0]
227+
for trial_index in range(num_trials):
228+
temp_val = numpy.random.uniform(low=min_val,
229+
high=max_val,
230+
size=1)[0]
240231

241-
# If mutation is by replacement, do not add the current gene value into the list.
242-
# This is to avoid replacing the value by itself again. We are doing nothing in this case.
243-
if mutation_by_replacement:
244-
pass
245-
else:
246-
temp_val = temp_val + solution[gene_index]
232+
# If mutation is by replacement, do not add the current gene value into the list.
233+
# This is to avoid replacing the value by itself again. We are doing nothing in this case.
234+
if mutation_by_replacement:
235+
pass
236+
else:
237+
temp_val = temp_val + solution[gene_index]
247238

248-
if not dtype[1] is None:
249-
# Precision is available and we have to round the number.
250-
# Convert the data type and round the number.
251-
temp_val = numpy.round(dtype[0](temp_val),
252-
dtype[1])
253-
else:
254-
# There is no precision and rounding the number is not needed. The type is [type, None]
255-
# Just convert the data type.
256-
temp_val = dtype[0](temp_val)
257-
258-
if temp_val in solution and trial_index == (num_trials - 1):
259-
# If there are no values, then keep the current gene value.
260-
if not self.suppress_warnings: warnings.warn("You set 'allow_duplicate_genes=False' but cannot find a value to prevent duplicates.")
261-
selected_value = solution[gene_index]
262-
elif temp_val in solution:
263-
# Keep trying in the other remaining trials.
264-
continue
265-
else:
266-
# Unique gene value found.
267-
selected_value = temp_val
268-
break
239+
if not dtype[1] is None:
240+
# Precision is available and we have to round the number.
241+
# Convert the data type and round the number.
242+
temp_val = numpy.round(dtype[0](temp_val),
243+
dtype[1])
244+
else:
245+
# There is no precision and rounding the number is not needed. The type is [type, None]
246+
# Just convert the data type.
247+
temp_val = dtype[0](temp_val)
248+
249+
if temp_val in solution and trial_index == (num_trials - 1):
250+
# If there are no values, then keep the current gene value.
251+
if not self.suppress_warnings: warnings.warn("You set 'allow_duplicate_genes=False' but cannot find a value to prevent duplicates.")
252+
selected_value = solution[gene_index]
253+
elif temp_val in solution:
254+
# Keep trying in the other remaining trials.
255+
continue
256+
else:
257+
# Unique gene value found.
258+
selected_value = temp_val
259+
break
269260

270-
return selected_value
261+
return selected_value
271262

272263
def unique_genes_by_space(self,
273264
new_solution,

0 commit comments

Comments
 (0)
Please sign in to comment.