Skip to content

Commit e72e7d5

Browse files
committed
Added suggest_simpler_pattern
1 parent b6ae964 commit e72e7d5

File tree

5 files changed

+361
-0
lines changed

5 files changed

+361
-0
lines changed

MANIFEST

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,5 @@ t/generate.t
2323
t/pod-cm.t
2424
t/pod-synopsis.t
2525
t/pod.t
26+
t/suggest.t
2627
t/utility_methods.t

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,19 @@ Gets the random seed for reproducible generation
164164

165165
Sets the random seed for reproducible generation
166166

167+
## suggest\_simpler\_pattern()
168+
169+
Analyzes patterns and suggests improvements.
170+
171+
my $suggestion = $gen->suggest_simpler_pattern();
172+
173+
if ($suggestion) {
174+
print "Reason: $suggestion->{reason}\n";
175+
print "Better pattern: $suggestion->{pattern}\n" if $suggestion->{pattern};
176+
print "Tips:\n";
177+
print " - $_\n" for @{$suggestion->{tips}};
178+
}
179+
167180
## validate($string)
168181

169182
Checks if a string matches the pattern without generating.

lib/Data/Random/String/Matches.pm

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,151 @@ sub set_seed {
373373
return $self;
374374
}
375375

376+
=head2 suggest_simpler_pattern()
377+
378+
Analyzes patterns and suggests improvements.
379+
380+
my $suggestion = $gen->suggest_simpler_pattern();
381+
382+
if ($suggestion) {
383+
print "Reason: $suggestion->{reason}\n";
384+
print "Better pattern: $suggestion->{pattern}\n" if $suggestion->{pattern};
385+
print "Tips:\n";
386+
print " - $_\n" for @{$suggestion->{tips}};
387+
}
388+
389+
=cut
390+
391+
sub suggest_simpler_pattern {
392+
my $self = $_[0];
393+
394+
my $pattern = $self->{regex_str};
395+
my $info = $self->pattern_info();
396+
397+
# Check for patterns that are too complex
398+
if ($info->{complexity} eq 'very_complex') {
399+
return {
400+
pattern => undef,
401+
reason => 'Pattern is very complex. Consider breaking it into multiple simpler patterns.',
402+
tips => [
403+
'Split alternations into separate generators',
404+
'Avoid deeply nested groups',
405+
'Use fixed-length patterns when possible',
406+
],
407+
};
408+
}
409+
410+
# Suggest removing unnecessary backreferences
411+
if ($info->{features}{has_backreferences} && $pattern =~ /(\(\w+\)).*\\\d+/) {
412+
my $simpler = $pattern;
413+
# Can't automatically simplify backreferences, but can suggest
414+
return {
415+
pattern => undef,
416+
reason => 'Backreferences add complexity. Consider if you really need repeated groups.',
417+
tips => [
418+
'If the repeated part doesn\'t need to match, use two separate patterns',
419+
'For validation, backreferences are great; for generation, they limit variation',
420+
],
421+
};
422+
}
423+
424+
# Suggest fixed quantifiers instead of ranges
425+
if ($pattern =~ /\{(\d+),(\d+)\}/) {
426+
my ($min, $max) = ($1, $2);
427+
if ($max - $min > 10) {
428+
my $mid = int(($min + $max) / 2);
429+
my $simpler = $pattern;
430+
$simpler =~ s/\{\d+,\d+\}/\{$mid\}/;
431+
return {
432+
pattern => $simpler,
433+
reason => "Large quantifier range {$min,$max} creates high variability. Consider fixed length {$mid}.",
434+
tips => [
435+
'Fixed lengths are faster to generate',
436+
'If you need variety, generate multiple patterns with different fixed lengths',
437+
],
438+
};
439+
}
440+
}
441+
442+
# Suggest limiting alternations
443+
if ($info->{features}{has_alternation}) {
444+
my @alts = split /\|/, $pattern;
445+
if (@alts > 10) {
446+
return {
447+
pattern => undef,
448+
reason => 'Too many alternations (' . scalar(@alts) . '). Consider splitting into multiple patterns.',
449+
tips => [
450+
'Create separate generators for different alternatives',
451+
'Group similar patterns together',
452+
'Use character classes [abc] instead of (a|b|c)',
453+
],
454+
};
455+
}
456+
457+
# Check if alternations could be a character class
458+
if ($pattern =~ /\(([a-zA-Z])\|([a-zA-Z])\|([a-zA-Z])\)/) {
459+
my $chars = join('', $1, $2, $3);
460+
my $simpler = $pattern;
461+
$simpler =~ s/\([a-zA-Z]\|[a-zA-Z]\|[a-zA-Z]\)/[$chars]/;
462+
return {
463+
pattern => $simpler,
464+
reason => 'Single-character alternations can be simplified to character classes.',
465+
tips => [
466+
'Use [abc] instead of (a|b|c)',
467+
'Character classes are faster to process',
468+
],
469+
};
470+
}
471+
}
472+
473+
# Suggest removing lookaheads/lookbehinds for generation
474+
if ($info->{features}{has_lookahead} || $info->{features}{has_lookbehind}) {
475+
my $simpler = $pattern;
476+
$simpler =~ s/\(\?[=!].*?\)//g; # Remove lookaheads
477+
$simpler =~ s/\(\?<[=!].*?\)//g; # Remove lookbehinds
478+
479+
if ($simpler ne $pattern) {
480+
return {
481+
pattern => $simpler,
482+
reason => 'Lookaheads/lookbehinds add complexity but don\'t contribute to generated strings.',
483+
tips => [
484+
'Lookaheads are great for validation, not generation',
485+
'The simplified pattern generates the same strings',
486+
],
487+
};
488+
}
489+
}
490+
491+
# Check for Unicode when ASCII would work
492+
if ($info->{features}{has_unicode} && $pattern =~ /\\p\{L\}/) {
493+
my $simpler = $pattern;
494+
$simpler =~ s/\\p\{L\}/[A-Za-z]/g;
495+
return {
496+
pattern => $simpler,
497+
reason => 'Unicode \\p{L} can be simplified to [A-Za-z] if you only need ASCII letters.',
498+
tips => [
499+
'ASCII patterns are faster',
500+
'Only use Unicode if you need non-ASCII characters',
501+
],
502+
};
503+
}
504+
505+
# Check for overly long fixed strings
506+
if ($pattern =~ /([a-zA-Z]{20,})/) {
507+
return {
508+
pattern => undef,
509+
reason => 'Pattern contains very long fixed literal strings. Consider if you need such specific patterns.',
510+
tips => [
511+
'Use variables instead of long literals',
512+
'Break into smaller patterns',
513+
],
514+
};
515+
}
516+
517+
# Pattern seems reasonable
518+
return undef;
519+
}
520+
376521
=head2 validate($string)
377522
378523
Checks if a string matches the pattern without generating.

t/suggest.t

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
#!/usr/bin/env perl
2+
3+
use strict;
4+
use warnings;
5+
use Test::More;
6+
7+
BEGIN {
8+
use_ok('Data::Random::String::Matches');
9+
}
10+
11+
# ===========================================================================
12+
# suggest_simpler_pattern() tests
13+
# ===========================================================================
14+
15+
subtest 'suggest_simpler_pattern - simple patterns need no changes' => sub {
16+
my $gen = Data::Random::String::Matches->new(qr/\d{4}/);
17+
my $suggestion = $gen->suggest_simpler_pattern();
18+
19+
is($suggestion, undef, 'Simple pattern returns undef');
20+
};
21+
22+
subtest 'suggest_simpler_pattern - very complex patterns' => sub {
23+
my $gen = Data::Random::String::Matches->new(
24+
qr/(?<id>\d{3})-(\w+)-\k<id>|[A-Z]{10}(?=\d)(?!X)/
25+
);
26+
my $suggestion = $gen->suggest_simpler_pattern();
27+
28+
ok(defined $suggestion, 'Returns suggestion for complex pattern');
29+
ok(exists $suggestion->{reason}, 'Has reason');
30+
ok(exists $suggestion->{tips}, 'Has tips');
31+
like($suggestion->{reason}, qr/complex/i, 'Mentions complexity');
32+
};
33+
34+
subtest 'suggest_simpler_pattern - large quantifier ranges' => sub {
35+
my $gen = Data::Random::String::Matches->new(qr/\d{5,25}/);
36+
my $suggestion = $gen->suggest_simpler_pattern();
37+
38+
ok(defined $suggestion, 'Suggests simplification for large range');
39+
ok(defined $suggestion->{pattern}, 'Provides alternative pattern');
40+
like($suggestion->{pattern}, qr/\{\d+\}/, 'Suggests fixed quantifier');
41+
like($suggestion->{reason}, qr/range/i, 'Explains about range');
42+
};
43+
44+
subtest 'suggest_simpler_pattern - small quantifier ranges ok' => sub {
45+
my $gen = Data::Random::String::Matches->new(qr/\d{3,5}/);
46+
my $suggestion = $gen->suggest_simpler_pattern();
47+
48+
# Small ranges might not trigger suggestion, or might for other reasons
49+
ok(1, 'Handles small ranges');
50+
};
51+
52+
subtest 'suggest_simpler_pattern - too many alternations' => sub {
53+
my $pattern = '(' . join('|', ('a'..'z')) . ')'; # 26 alternations
54+
my $gen = Data::Random::String::Matches->new(qr/$pattern/);
55+
my $suggestion = $gen->suggest_simpler_pattern();
56+
57+
ok(defined $suggestion, 'Suggests simplification for many alternations');
58+
like($suggestion->{reason}, qr/alternation/i, 'Mentions alternations');
59+
ok(ref($suggestion->{tips}) eq 'ARRAY', 'Provides tips array');
60+
};
61+
62+
subtest 'suggest_simpler_pattern - character class suggestion' => sub {
63+
my $gen = Data::Random::String::Matches->new(qr/(a|b|c)/);
64+
my $suggestion = $gen->suggest_simpler_pattern();
65+
66+
if (defined $suggestion) {
67+
like($suggestion->{pattern}, qr/\[abc\]/, 'Suggests character class');
68+
like($suggestion->{reason}, qr/character class/i, 'Explains character class benefit');
69+
} else {
70+
ok(1, 'Pattern acceptable as-is');
71+
}
72+
};
73+
74+
subtest 'suggest_simpler_pattern - backreferences' => sub {
75+
my $gen = Data::Random::String::Matches->new(qr/(\w{3})-\1/);
76+
my $suggestion = $gen->suggest_simpler_pattern();
77+
78+
if (defined $suggestion) {
79+
like($suggestion->{reason}, qr/backreference/i, 'Mentions backreferences');
80+
ok(ref($suggestion->{tips}) eq 'ARRAY', 'Provides tips');
81+
ok(scalar @{$suggestion->{tips}} > 0, 'Has at least one tip');
82+
} else {
83+
ok(1, 'Pattern acceptable as-is');
84+
}
85+
};
86+
87+
subtest 'suggest_simpler_pattern - lookaheads' => sub {
88+
my $gen = Data::Random::String::Matches->new(qr/\d{3}(?=[A-Z])/);
89+
my $suggestion = $gen->suggest_simpler_pattern();
90+
91+
ok(defined $suggestion, 'Suggests removing lookahead');
92+
is($suggestion->{pattern}, '(?^:\d{3})', 'Removes lookahead from pattern');
93+
like($suggestion->{reason}, qr/lookahead/i, 'Explains lookahead issue');
94+
};
95+
96+
subtest 'suggest_simpler_pattern - lookbehinds' => sub {
97+
my $gen = Data::Random::String::Matches->new(qr/(?<=PRE)\d{3}/);
98+
my $suggestion = $gen->suggest_simpler_pattern();
99+
100+
ok(defined $suggestion, 'Suggests removing lookbehind');
101+
is($suggestion->{pattern}, '(?^:\d{3})', 'Removes lookbehind from pattern');
102+
like($suggestion->{reason}, qr/lookbehind/i, 'Explains lookbehind issue');
103+
};
104+
105+
subtest 'suggest_simpler_pattern - unicode to ascii' => sub {
106+
my $gen = Data::Random::String::Matches->new(qr/\p{L}{5}/);
107+
my $suggestion = $gen->suggest_simpler_pattern();
108+
109+
ok(defined $suggestion, 'Suggests ASCII alternative');
110+
like($suggestion->{pattern}, qr/\[A-Za-z\]/, 'Suggests ASCII character class');
111+
like($suggestion->{reason}, qr/ASCII/i, 'Explains ASCII benefit');
112+
};
113+
114+
subtest 'suggest_simpler_pattern - return structure' => sub {
115+
my $gen = Data::Random::String::Matches->new(qr/\d{5,25}/);
116+
my $suggestion = $gen->suggest_simpler_pattern();
117+
118+
if (defined $suggestion) {
119+
is(ref($suggestion), 'HASH', 'Returns hashref');
120+
ok(exists $suggestion->{reason}, 'Has reason key');
121+
ok(exists $suggestion->{tips}, 'Has tips key');
122+
123+
is(ref($suggestion->{tips}), 'ARRAY', 'Tips is arrayref');
124+
ok(scalar @{$suggestion->{tips}} > 0, 'Tips not empty');
125+
}
126+
};
127+
128+
subtest 'suggest_simpler_pattern - multiple issues' => sub {
129+
my $gen = Data::Random::String::Matches->new(qr/\p{L}{5,50}(?=[A-Z])/);
130+
my $suggestion = $gen->suggest_simpler_pattern();
131+
132+
# Should catch at least one issue
133+
ok(defined $suggestion, 'Detects issues in pattern with multiple problems');
134+
};
135+
136+
subtest 'suggest_simpler_pattern - various simple patterns' => sub {
137+
my @simple = (
138+
qr/\d{4}/,
139+
qr/[A-Z]{3}/,
140+
qr/\w{5}/,
141+
qr/[a-z]{2,4}/,
142+
);
143+
144+
for my $pattern (@simple) {
145+
my $gen = Data::Random::String::Matches->new($pattern);
146+
my $suggestion = $gen->suggest_simpler_pattern();
147+
148+
# These should either return undef or have valid suggestions
149+
if (defined $suggestion) {
150+
ok(exists $suggestion->{reason}, "Pattern $pattern: has reason if suggesting");
151+
} else {
152+
pass("Pattern $pattern: no suggestion needed");
153+
}
154+
}
155+
};
156+
157+
subtest 'suggest_simpler_pattern - tip structure' => sub {
158+
my $gen = Data::Random::String::Matches->new(qr/\d{5,25}/);
159+
my $suggestion = $gen->suggest_simpler_pattern();
160+
161+
if (defined $suggestion && exists $suggestion->{tips}) {
162+
for my $tip (@{$suggestion->{tips}}) {
163+
ok(length($tip) > 0, 'Tip is not empty string');
164+
unlike($tip, qr/^\s*$/, 'Tip is not just whitespace');
165+
}
166+
}
167+
};
168+
169+
subtest 'suggest_simpler_pattern - pattern key validity' => sub {
170+
my @patterns_with_suggestions = (
171+
qr/\d{5,25}/,
172+
qr/\d{3}(?=[A-Z])/,
173+
qr/\p{L}{5}/,
174+
);
175+
176+
for my $pattern (@patterns_with_suggestions) {
177+
my $gen = Data::Random::String::Matches->new($pattern);
178+
my $suggestion = $gen->suggest_simpler_pattern();
179+
180+
if (defined $suggestion && defined $suggestion->{pattern}) {
181+
# Try to compile the suggested pattern
182+
eval { qr/$suggestion->{pattern}/ };
183+
ok(!$@, "Suggested pattern is valid regex: $suggestion->{pattern}");
184+
}
185+
}
186+
};
187+
188+
subtest 'suggest_simpler_pattern - integration with pattern_info' => sub {
189+
my $gen = Data::Random::String::Matches->new(qr/\d{10,50}/);
190+
191+
my $info = $gen->pattern_info();
192+
my $suggestion = $gen->suggest_simpler_pattern();
193+
194+
ok(defined $info, 'Can get pattern info');
195+
if (defined $suggestion) {
196+
ok(defined $suggestion->{reason}, 'Suggestion based on pattern analysis');
197+
}
198+
};
199+
200+
done_testing();

t/utility_methods.t

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ BEGIN {
1515
subtest 'set_seed - basic functionality' => sub {
1616
my $gen = Data::Random::String::Matches->new(qr/\d{10}/);
1717

18+
is($gen->get_seed(), undef, 'No seed initially');
19+
1820
# Set seed and generate
1921
$gen->set_seed(12345);
2022
my $str1 = $gen->generate();

0 commit comments

Comments
 (0)