-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathregen.go
269 lines (242 loc) · 6.72 KB
/
regen.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
// Copyright 2016 Noel Cower. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be found in the LICENSE.txt file.
// regen is a tool to parse and generate random strings from regular expressions.
//
// go get go.spiff.io/regen
//
// regen works by parsing a regular expression and walking its op tree. It is currently not guaranteed to produce
// entirely accurate results, but will at least try.
//
// Currently, word boundaries are not supported (until I decide how best to randomly insert a word boundary character).
// Using a word boundary op (\b or \B) will currently cause regen to panic. In addition, line endings are also poorly
// supported right now and EOT markers are treated as the end of string generation.
//
// Usage is simple, pass one or more regular expressions to regen on the command line and it will generate a string from
// each, printing them in the same order as on the command line (separated by newlines):
//
// $ regen 'foo(-(bar|baz|quux|woop)){4}'
// foo-woop-quux-bar-quux
//
// So, if you fancy yourself a Javascript weirdo of some variety, you can at least use regen to write code for eBay:
//
// $ regen '!{0,5}\[\](\[(!\[\](\+!{1,2}\[\]))\]|\+!{0,5}\[(\[\])?\]|\+\{\})+'
// ![]+!!![[]]+{}[![]+!![]][![]+!![]]+{}[![]+![]]+{}+{}[![]+![]][![]+!![]]+![[]]+{}
//
// A few command-line options are provided, which you can see by running regen -help.
package main
import (
"bytes"
"crypto/rand"
"flag"
"fmt"
"io"
"log"
"math/big"
"os"
"regexp/syntax"
"strings"
)
// CLI options
var verbose bool
var unboundMax = 32
func randint(max int64) int64 {
if max < 0 {
panic("randint: max < 0")
} else if max <= 1 {
return 0
}
var bigmax big.Int
bigmax.SetInt64(max)
res, err := rand.Int(rand.Reader, &bigmax)
if err != nil {
panic(err)
}
return res.Int64()
}
// GenString writes a response that should, ideally, be a match for rx to w, and proceeds to do the same for its
// sub-expressions where applicable. Returns io.EOF if it encounters OpEndText. This may not be entirely correct
// behavior for OpEndText handling. Otherwise, returns nil.
func GenString(w *bytes.Buffer, rx *syntax.Regexp) (err error) {
switch rx.Op {
case syntax.OpNoMatch:
return
case syntax.OpEmptyMatch:
return
case syntax.OpLiteral:
w.WriteString(string(rx.Rune))
case syntax.OpCharClass:
sum := 0
for i := 0; i < len(rx.Rune); i += 2 {
sum += 1 + int(rx.Rune[i+1]-rx.Rune[i])
}
for i, nth := 0, rune(randint(int64(sum))); i < len(rx.Rune); i += 2 {
min, max := rx.Rune[i], rx.Rune[i+1]
delta := max - min
if nth <= delta {
w.WriteRune(min + nth)
return nil
}
nth -= 1 + delta
}
panic("unreachable")
case syntax.OpAnyCharNotNL:
w.WriteRune(rune(' ' + randint(95)))
case syntax.OpAnyChar:
i := int(randint(96))
ch := rune(' ' + i)
if i == 95 {
ch = '\n'
}
w.WriteRune(ch)
case syntax.OpBeginLine:
if w.Len() != 0 {
w.WriteByte('\n')
}
case syntax.OpEndLine:
if w.Len() != 0 {
w.WriteByte('\n')
} else {
return io.EOF
}
case syntax.OpBeginText:
case syntax.OpEndText:
return io.EOF
case syntax.OpWordBoundary:
fallthrough
case syntax.OpNoWordBoundary:
panic("regen: word boundaries not supported yet")
case syntax.OpStar, syntax.OpPlus:
min := 0
if rx.Op == syntax.OpPlus {
min = 1
}
max := min + unboundMax
for sz := min + int(randint(int64(max)-int64(min)+1)); sz > 0; sz-- {
for _, rx := range rx.Sub {
GenString(w, rx)
}
}
case syntax.OpQuest:
if randint(0xFFFFFFFF) > 0x7FFFFFFF {
for _, rx := range rx.Sub {
if err := GenString(w, rx); err != nil {
return err
}
}
}
case syntax.OpRepeat:
min := rx.Min
max := rx.Max
if max == -1 {
max = min + unboundMax
}
for sz := min + int(randint(int64(max)-int64(min)+1)); sz > 0; sz-- {
for _, rx := range rx.Sub {
if err := GenString(w, rx); err != nil {
return err
}
}
}
case syntax.OpConcat, syntax.OpCapture:
for _, rx := range rx.Sub {
if err := GenString(w, rx); err != nil {
return err
}
}
case syntax.OpAlternate:
nth := randint(int64(len(rx.Sub)))
return GenString(w, rx.Sub[nth])
}
return nil
}
const usageText = `
regen [OPTIONS] <pattern>...
<pattern> must be a valid POSIX- or Perl-compatible RE2 regular expression pattern. RE2's
regular expression syntax is described at <https://github.com/google/re2/wiki/Syntax>.
Note that when passing -simplify, this can convert {m,n} repetitions into chains of zero-or-one
repetitions. This can produce less variance in result strings as zero-or-one repetitions are
essentially a coin toss and will skip nested sub-expressions if the toss fails.
OPTIONS
-------
`
func main() {
log.SetPrefix("regen: ")
log.SetFlags(0)
flag.Usage = func() {
fmt.Fprintln(os.Stderr, strings.TrimSpace(usageText))
flag.PrintDefaults()
}
simplify := flag.Bool("simplify", false, "Whether to simplify the parsed regular expressions.")
posix := flag.Bool("posix", false, "Use POSIX syntax instead of Perl-like syntax.")
zip := flag.Bool("zip", false, "Whether to interleave patterns or go pattern by pattern.")
n := flag.Uint("n", 1, "The `number` of strings to generate per regexp.")
flag.IntVar(&unboundMax, "max", unboundMax, "The max `repetitions` to use for unlimited repetitions/matches.")
flag.Parse()
if flag.NArg() == 0 {
log.Println("no regexp given")
return
}
mode := syntax.Perl
if *posix {
mode = syntax.POSIX
}
regexen := make([]*syntax.Regexp, flag.NArg())
for i, s := range flag.Args() {
var err error
regexen[i], err = syntax.Parse(s, mode)
if err != nil {
log.Printf("error parsing regular expression %q:\n%v", s, err)
os.Exit(1)
}
if *simplify {
regexen[i] = regexen[i].Simplify()
}
}
var b bytes.Buffer
first := true
if *zip {
for i := uint(0); i < *n; i++ {
for _, rx := range regexen {
if !first {
fmt.Print("\n")
b.Reset()
}
first = false
err := GenString(&b, rx)
if err != nil && err != io.EOF {
log.Printf("Error generating string: %v", err)
os.Exit(1)
}
fmt.Print(b.String())
}
}
} else {
for _, rx := range regexen {
for i := uint(0); i < *n; i++ {
if !first {
fmt.Print("\n")
b.Reset()
}
first = false
err := GenString(&b, rx)
if err != nil && err != io.EOF {
log.Printf("Error generating string: %v", err)
os.Exit(1)
}
fmt.Print(b.String())
}
}
}
if isTTY() {
fmt.Print("\n")
}
}
// isTTY attempts to determine whether the current stdout refers to a terminal.
func isTTY() bool {
fi, err := os.Stdout.Stat()
if err != nil {
log.Println("Error getting Stat of os.Stdout:", err)
return true // Assume human readable
}
return (fi.Mode() & os.ModeNamedPipe) != os.ModeNamedPipe
}