From a94f9e3dc7022a1c7667c2393714ca021eb3a8cf Mon Sep 17 00:00:00 2001 From: M09Ic Date: Mon, 9 Jan 2023 14:47:58 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E7=9B=B8=E4=BC=BC=E5=BA=A6?= =?UTF-8?q?=E5=88=A4=E6=96=AD,=20=E5=B9=B6=E6=B7=BB=E5=8A=A0=E4=BA=86dista?= =?UTF-8?q?nce/sim=E5=AD=97=E6=AE=B5=E7=94=A8=E6=9D=A5=E8=8E=B7=E5=8F=96.?= =?UTF-8?q?=20=E4=BC=98=E5=8C=96fuzzybaseline=E7=9A=84=E9=80=BB=E8=BE=91,?= =?UTF-8?q?=20=E7=A7=BB=E5=8A=A8=E5=88=B0=E5=A4=84=E7=90=86=E7=BA=BF?= =?UTF-8?q?=E7=A8=8B=E4=B8=AD.=20=E4=BC=98=E5=8C=96expr=E7=9A=84=E6=80=A7?= =?UTF-8?q?=E8=83=BD=20=E4=BF=AE=E5=A4=8D--fuzzy=E6=B2=A1=E5=90=AF?= =?UTF-8?q?=E7=94=A8=E4=B9=9F=E4=BC=9A=E7=94=9F=E6=95=88=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/pool.go | 12 +++++++----- internal/runner.go | 5 +++-- pkg/baseline.go | 14 ++++++++++++-- pkg/utils.go | 3 +-- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/internal/pool.go b/internal/pool.go index a5ff4df..903cbf9 100644 --- a/internal/pool.go +++ b/internal/pool.go @@ -27,6 +27,7 @@ var ( maxRedirect = 3 maxCrawl = 3 maxRecursion = 0 + nilBaseline = &pkg.Baseline{} ) func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) { @@ -54,6 +55,10 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) { // 挂起一个异步的处理结果线程, 不干扰主线程的请求并发 go func() { for bl := range pool.tempCh { + if bl.IsValid { + pool.addFuzzyBaseline(bl) + } + if _, ok := pool.Statistor.Counts[bl.Status]; ok { pool.Statistor.Counts[bl.Status]++ } else { @@ -71,7 +76,7 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) { if bl, ok := pool.baselines[status]; ok { params["bl"+strconv.Itoa(status)] = bl } else { - params["bl"+strconv.Itoa(status)] = &pkg.Baseline{} + params["bl"+strconv.Itoa(status)] = nilBaseline } } } @@ -257,7 +262,7 @@ Loop: if pool.Mod == pkg.HostSpray { pool.reqPool.Invoke(newUnit(pkg.RandHost(), source)) } else if pool.Mod == pkg.PathSpray { - pool.reqPool.Invoke(newUnit(pkg.RandPath(), source)) + pool.reqPool.Invoke(newUnit(safePath(pool.BaseURL, pkg.RandPath()), source)) } case unit, ok := <-pool.additionCh: if !ok { @@ -323,7 +328,6 @@ func (pool *Pool) Invoke(v interface{}) { pool.wg.Add(1) pool.doRedirect(bl, unit.depth) } - pool.addFuzzyBaseline(bl) } else { bl = pkg.NewInvalidBaseline(req.URI(), req.Host(), resp, err.Error()) } @@ -643,11 +647,9 @@ func (pool *Pool) addAddition(u *Unit) { func (pool *Pool) addFuzzyBaseline(bl *pkg.Baseline) { if _, ok := pool.baselines[bl.Status]; !ok && IntsContains(FuzzyStatus, bl.Status) { bl.Collect() - pool.locker.Lock() pool.wg.Add(1) pool.doCrawl(bl) pool.baselines[bl.Status] = bl - pool.locker.Unlock() logs.Log.Infof("[baseline.%dinit] %s", bl.Status, bl.Format([]string{"status", "length", "spend", "title", "frame", "redirect"})) } } diff --git a/internal/runner.go b/internal/runner.go index e0c8203..6583d9c 100644 --- a/internal/runner.go +++ b/internal/runner.go @@ -380,7 +380,6 @@ func (r *Runner) Outputting() { } else { logs.Log.Debug(bl.String()) } - } } } @@ -410,7 +409,9 @@ func (r *Runner) Outputting() { if !ok { return } - fuzzySaveFunc(bl) + if r.Fuzzy { + fuzzySaveFunc(bl) + } } } }() diff --git a/pkg/baseline.go b/pkg/baseline.go index 17ca9cf..82e44ae 100644 --- a/pkg/baseline.go +++ b/pkg/baseline.go @@ -135,6 +135,7 @@ type Baseline struct { IsFuzzy bool `json:"fuzzy"` Source int `json:"source"` ReqDepth int `json:"depth"` + Distance uint8 `json:"distance"` Recu bool `json:"-"` RecuDepth int `json:"-"` URLs []string `json:"-"` @@ -233,10 +234,11 @@ func (bl *Baseline) Compare(other *Baseline) int { return -1 } -var Distance uint8 = 5 +var Distance uint8 = 5 // 数字越小越相似, 数字为0则为完全一致. func (bl *Baseline) FuzzyCompare(other *Baseline) bool { - if parsers.SimhashCompare(other.BodySimhash, bl.BodySimhash) < Distance { + // 这里使用rawsimhash, 是为了保证一定数量的字符串, 否则超短的body会导致simhash偏差指较大 + if other.Distance = parsers.SimhashCompare(other.RawSimhash, bl.RawSimhash); other.Distance < Distance { return true } return false @@ -278,6 +280,8 @@ func (bl *Baseline) Get(key string) string { return strconv.Itoa(int(bl.Spended)) + "ms" case "length": return strconv.Itoa(bl.BodyLength) + case "sim", "distance": + return "sim:" + strconv.Itoa(int(bl.Distance)) case "source": return GetSourceName(bl.Source) case "extract": @@ -366,6 +370,9 @@ func (bl *Baseline) ColorString() string { line.WriteString(logs.YellowBold(strconv.Itoa(int(bl.Spended)) + "ms")) line.WriteString(logs.YellowBold(" - " + GetSourceName(bl.Source))) line.WriteString(logs.GreenLine(bl.Additional("title"))) + if bl.Distance != 0 { + line.WriteString(logs.GreenLine(bl.Additional("sim"))) + } line.WriteString(logs.Cyan(bl.Frameworks.String())) line.WriteString(logs.Cyan(bl.Extracteds.String())) if bl.RedirectURL != "" { @@ -416,6 +423,9 @@ func (bl *Baseline) String() string { line.WriteString(" - ") line.WriteString(strconv.Itoa(int(bl.Spended)) + "ms") line.WriteString(bl.Additional("title")) + if bl.Distance != 0 { + line.WriteString(logs.GreenLine(bl.Additional("sim"))) + } line.WriteString(bl.Frameworks.String()) line.WriteString(bl.Extracteds.String()) if bl.RedirectURL != "" { diff --git a/pkg/utils.go b/pkg/utils.go index ae786f4..1549181 100644 --- a/pkg/utils.go +++ b/pkg/utils.go @@ -104,9 +104,8 @@ const ( func RandPath() string { n := 16 b := make([]byte, n) - b[0] = byte(0x2f) // A rand.Int63() generates 63 random bits, enough for letterIdMax letters! - for i, cache, remain := n-1, src.Int63(), letterIdMax; i >= 1; { + for i, cache, remain := n-1, src.Int63(), letterIdMax; i >= 0; { if remain == 0 { cache, remain = src.Int63(), letterIdMax }