From e2a31585ad070d0efa5bd51a90eac28f6f7905ef Mon Sep 17 00:00:00 2001 From: Biodog06 Date: Mon, 9 Mar 2026 10:54:34 +0800 Subject: [PATCH 1/4] Add predefined field whitelist validation and parallel processing for dump Additionally, implemented concurrent query processing in the `dump` command by introducing workers and rate limiting, significantly improving data extraction performance. Key changes: - Created official field whitelists (ValidFieldsAll and ValidFieldsNext) in fields.go. - Added validation logic (ValidateFieldsAll, ValidateFieldsNext) and integrated it into the search, dump, and random commands. - Enhanced the `dump` command with a worker pool for concurrency and thread-safe data writing mechanics. - Included comprehensive unit test coverage for the validation and concurrency logic in fields_test.go and dump_test.go. - Updated README.md and README_ZH.md to natively reference the official FOFA API documentation URLs for valid fields. --- README.md | 21 +++- README_ZH.md | 21 +++- cmd/fofa/cmd/dump.go | 97 +++++++++++++---- cmd/fofa/cmd/dump_test.go | 223 ++++++++++++++++++++++++++++++++++++++ cmd/fofa/cmd/random.go | 15 ++- cmd/fofa/cmd/search.go | 16 ++- fields.go | 148 +++++++++++++++++++++++++ fields_test.go | 100 +++++++++++++++++ 8 files changed, 601 insertions(+), 40 deletions(-) create mode 100644 cmd/fofa/cmd/dump_test.go create mode 100644 fields.go create mode 100644 fields_test.go diff --git a/README.md b/README.md index df01a01..abbbfce 100644 --- a/README.md +++ b/README.md @@ -136,10 +136,10 @@ categories: | Parameter | Abbreviation | Default Value | Description | |-------------|--------------|---------------|-----------------------------------------------------------| -| fields | f | ip,port | Fields returned by FOFA. [Learn More](https://fofa.info/vip) | +| fields | f | ip,port | Fields returned by FOFA, valid fields refer to https://en.fofa.info/api | | format | | csv | Output format: csv/json/xml | | outFile | o | | Output file. If not set, prints to terminal | -| size | s | 100 | Query size. Maximum is 10,000, subject to `deductMode` | +| size | s | 100 | Query size. Maximum is 10,000, subject to `deductMode` *1*2 | | deductMode | | | Consumption of f-points. If not set, uses max free query limit | | fixUrl | | false | Combines URLs (e.g., 1.1.1.1,80 becomes http://1.1.1.1) | | urlPrefix | | http:// | URL prefix | @@ -156,22 +156,30 @@ categories: | headline | | false | Outputs CSV headers. Available only when format is CSV | | help | h | false | Displays usage information | +*1: When the query contains `cert` and `banner`, the maximum results size setting is 2000 per page. +*2: When the query contains `body`, the maximum results size setting is 500 per page. + ### `dump` | Parameter | Abbreviation | Default Value | Description | |-------------|--------------|---------------|-----------------------------------------------------------| -| fields | f | ip,port | Fields returned by FOFA. [Learn More](https://fofa.info/vip) | +| fields | f | ip,port | Fields returned by FOFA, valid fields refer to https://en.fofa.info/api, for dump command refer to https://en.fofa.info/api/batches_pages | | format | | csv | Output format: csv/json/xml | | outFile | o | | Output file. If not set, prints to terminal | | inFile | i | | Input file. If not set, reads from pipeline input | -| size | s | 100 | Query size. No upper limit but consumes f-points or free query quota | +| size | s | 100 | Query size. No upper limit but consumes f-points or free quota *1*2 | | fixUrl | | false | Combines URLs (e.g., 1.1.1.1,80 becomes http://1.1.1.1) | | urlPrefix | | http:// | URL prefix | | full | | false | Retrieves full data | | batchSize | bs | 1000 | Number of records to fetch per batch | | batchType | bt | | Batch query type: ip/domain | +| workers | | 10 | Number of threads, defaults to 10 when using -i | +| rate | | 2 | Query rate per second | | help | h | false | Displays usage information | +*1: When the query contains `cert` and `banner`, the maximum results size setting is 2000 per page. +*2: When the query contains `body`, the maximum results size setting is 500 per page. + ### `jsRender` | Parameter | Abbreviation | Default Value | Description | @@ -254,13 +262,16 @@ categories: |-------------|--------------|--------------------------------------|------------------------------------------| | fields | f | ip,port,host,header,title,server,lastupdatetime | Fields returned by FOFA. [Learn More](https://en.fofa.info/vip) | | format | | json | Output format: csv/json/xml | -| size | s | 1 | Query count. `-1` for infinite queries | +| size | s | 1 | Query count. `-1` for infinite queries *1*2 | | sleep | | 1000 | Interval between queries in milliseconds | | fixUrl | | false | Combines URLs (e.g., 1.1.1.1,80 becomes http://1.1.1.1) | | urlPrefix | | http:// | URL prefix | | full | | false | Retrieves full data | | help | h | false | Displays usage information | +*1: When the query contains `cert` and `banner`, the maximum results size setting is 2000 per page. +*2: When the query contains `body`, the maximum results size setting is 500 per page. + ### `count` | Parameter | Abbreviation | Default Value | Description | diff --git a/README_ZH.md b/README_ZH.md index e581409..4698a71 100644 --- a/README_ZH.md +++ b/README_ZH.md @@ -139,10 +139,10 @@ categories: | 参数 | 参数简写 | 默认值 | 简介 | | ----------- | -------- | ------- | ------------------------------------------------- | -| fields | f | ip,port | FOFA返回的字段选择,[了解更多](https://fofa.info/vip) | +| fields | f | ip,port | FOFA返回的字段选择,有效字段参考https://fofa.info/api | | format | | csv | 输出格式,可以为csv/json/xml | | outFile | o | | 输出文件,如果不设置则终端打印 | -| size | s | 100 | 查询数量,最大为10000,受deductMode参数限制 | +| size | s | 100 | 查询数量,最大为10000,受deductMode参数限制 *1*2 | | deductMode | | | 消费f点数,不设置则读取用户最大免费数量 | | fixUrl | | false | 是否组合url,例如1.1.1.1,80组合为http://1.1.1.1 | | urlPrefix | | http:// | url前缀 | @@ -159,22 +159,30 @@ categories: | headline | | false | 是否输出csv头,只有在format为csv时可用 | | help | h | false | 使用方法 | +*1:当获取字段包含 `cert` 和 `banner` 时,单次查询 size 最大支持 2000。 +*2:当获取字段包含 `body` 时,单次查询 size 最大支持 500。 + ### dump | 参数 | 参数简写 | 默认值 | 简介 | | --------- | -------- | ------- | ----------------------------------------------------- | -| fields | f | ip,port | FOFA返回的字段选择,[了解更多](https://fofa.info/vip) | +| fields | f | ip,port | FOFA返回的字段选择,有效字段参考https://fofa.info/api,dump的参考https://fofa.info/api/batches_pages | | format | | csv | 输出格式,可以为csv/json/xml | | outFile | o | | 输出文件,如果不设置则终端打印 | | inFile | i | | 输入文件,如果不设置则读取管道输入 | -| size | s | 100 | 查询数量,无上限,但要扣除f点或免费数量 | +| size | s | 100 | 查询数量,无上限,但要扣除f点或免费数量 *1*2 | | fixUrl | | false | 是否组合url,例如1.1.1.1,80组合为http://1.1.1.1 | | urlPrefix | | http:// | url前缀 | | full | | false | 是否调取全量数据 | | batchSize | bs | 1000 | 每次拉取多少条数据 | | batchType | bt | | 批量查询,可以为ip/domain | +| workers | | 10 | 线程数量,当使用-i时默认10 | +| rate | | 2 | 每秒查询次数 | | help | h | false | 使用方法 | +*1:当获取字段包含 `cert` 和 `banner` 时,单次查询 size 最大支持 2000。 +*2:当获取字段包含 `body` 时,单次查询 size 最大支持 500。 + ### jsRender | 参数 | 参数简写 | 默认值 | 简介 | @@ -256,13 +264,16 @@ categories: | --------- | -------- | ----------------------------------------------- | ----------------------------------------------------- | | fields | f | ip,port,host,header,title,server,lastupdatetime | FOFA返回的字段选择,[了解更多](https://fofa.info/vip) | | format | | json | 输出格式,可以为csv/json/xml | -| size | s | 1 | 查询次数,-1表示永远不停 | +| size | s | 1 | 查询次数,-1表示永远不停 *1*2 | | sleep | | 1000 | 获取间隔,单位ms | | fixUrl | | false | 是否组合url,例如1.1.1.1,80组合为http://1.1.1.1 | | urlPrefix | | http:// | url前缀 | | full | | false | 是否调取全量数据 | | help | h | false | 使用方法 | +*1:当获取字段包含 `cert` 和 `banner` 时,单次查询 size 最大支持 2000。 +*2:当获取字段包含 `body` 时,单次查询 size 最大支持 500。 + ### count | 参数 | 参数简写 | 默认值 | 简介 | diff --git a/cmd/fofa/cmd/dump.go b/cmd/fofa/cmd/dump.go index 5bb9f76..9371ce5 100644 --- a/cmd/fofa/cmd/dump.go +++ b/cmd/fofa/cmd/dump.go @@ -2,16 +2,20 @@ package cmd import ( "bufio" + "context" "errors" "fmt" - "github.com/FofaInfo/GoFOFA" - "github.com/FofaInfo/GoFOFA/pkg/outformats" - "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" "io" "log" "os" "strings" + "sync" + + gofofa "github.com/FofaInfo/GoFOFA" + "github.com/FofaInfo/GoFOFA/pkg/outformats" + "github.com/sirupsen/logrus" + "github.com/urfave/cli/v2" + "golang.org/x/time/rate" ) var ( @@ -112,6 +116,18 @@ var dumpCmd = &cli.Command{ Usage: "use custom fields", Destination: &customFields, }, + &cli.IntFlag{ + Name: "workers", + Value: 1, + Usage: "number of workers", + Destination: &workers, + }, + &cli.IntFlag{ + Name: "rate", + Value: 2, + Usage: "fofa query per second", + Destination: &ratePerSecond, + }, }, Action: DumpAction, } @@ -171,6 +187,14 @@ func DumpAction(ctx *cli.Context) error { if err = scanner.Err(); err != nil { log.Fatal(err) } + + // 默认workers提升到10,rate提升到2 + if !ctx.IsSet("workers") { + workers = 10 + } + if !ctx.IsSet("rate") { + ratePerSecond = 2 + } } if len(queries) == 0 { @@ -190,6 +214,11 @@ func DumpAction(ctx *cli.Context) error { return errors.New("fofa fields cannot be empty") } + // 字段白名单前置强校验 (使用 ValidateFieldsNext) + if err := gofofa.ValidateFieldsNext(fields); err != nil { + return err + } + // headline只允许在format=csv的情况下使用 if headline && format != "csv" && len(outFile) > 0 { return errors.New("headline param is only allowed if format is csv, outFile not be empty") @@ -249,26 +278,52 @@ func DumpAction(ctx *cli.Context) error { } // do search - for _, query := range queries { - log.Println("dump data of query:", query) + var locker sync.Mutex + wg := sync.WaitGroup{} + queriesChan := make(chan string, len(queries)) + limiter := rate.NewLimiter(rate.Limit(ratePerSecond), 5) - fetchedSize := 0 - err := fofaCli.DumpSearch(query, size, batchSize, fields, func(res [][]string, allSize int) (err error) { - fetchedSize += len(res) - log.Printf("size: %d/%d, %.2f%%", fetchedSize, allSize, 100*float32(fetchedSize)/float32(allSize)) - // output - err = writer.WriteAll(res) - return err - }, gofofa.SearchOptions{ - FixUrl: fixUrl, - UrlPrefix: urlPrefix, - Full: full, - }) - if err != nil { - log.Println("fetch error:", err) - //return err + worker := func(qChan <-chan string, wg *sync.WaitGroup) { + for query := range qChan { + if err := limiter.Wait(context.Background()); err != nil { + fmt.Println("Error: ", err) + } + log.Println("dump data of query:", query) + + fetchedSize := 0 + err := fofaCli.DumpSearch(query, size, batchSize, fields, func(res [][]string, allSize int) (err error) { + fetchedSize += len(res) + log.Printf("size: %d/%d, %.2f%% for query: %s", fetchedSize, allSize, 100*float32(fetchedSize)/float32(allSize), query) + // output + locker.Lock() + defer locker.Unlock() + err = writer.WriteAll(res) + if err == nil { + writer.Flush() + } + return err + }, gofofa.SearchOptions{ + FixUrl: fixUrl, + UrlPrefix: urlPrefix, + Full: full, + }) + if err != nil { + log.Printf("fetch error for query %s: %v\n", query, err) + } + wg.Done() } } + for w := 0; w < workers; w++ { + go worker(queriesChan, &wg) + } + + for _, query := range queries { + wg.Add(1) + queriesChan <- query + } + close(queriesChan) + wg.Wait() + return nil } diff --git a/cmd/fofa/cmd/dump_test.go b/cmd/fofa/cmd/dump_test.go new file mode 100644 index 0000000..b51dc5d --- /dev/null +++ b/cmd/fofa/cmd/dump_test.go @@ -0,0 +1,223 @@ +package cmd + +import ( + stdjson "encoding/json" + "net/http" + "net/http/httptest" + "os" + "reflect" + "strings" + "sync" + "testing" + + gofofa "github.com/FofaInfo/GoFOFA" + "github.com/stretchr/testify/assert" + "github.com/urfave/cli/v2" +) + +type HostResults struct { + Error bool `json:"error"` + Size int `json:"size"` + Results []interface{} `json:"results"` +} + +func Test_constructQuery(t *testing.T) { + tests := []struct { + name string + queryType string + queries []string + want string + }{ + { + name: "single query", + queryType: "ip", + queries: []string{"1.1.1.1"}, + want: "ip=1.1.1.1", + }, + { + name: "multiple queries", + queryType: "domain", + queries: []string{"google.com", "baidu.com"}, + want: "domain=google.com || domain=baidu.com", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := constructQuery(tt.queryType, tt.queries); got != tt.want { + t.Errorf("constructQuery() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_batchProcess(t *testing.T) { + tests := []struct { + name string + queries []string + batchSize int + queryType string + want []string + }{ + { + name: "batch size 1", + queries: []string{"1.1.1.1", "2.2.2.2"}, + batchSize: 1, + queryType: "ip", + want: []string{"ip=1.1.1.1", "ip=2.2.2.2"}, + }, + { + name: "batch size 2", + queries: []string{"1.1.1.1", "2.2.2.2", "3.3.3.3"}, + batchSize: 2, + queryType: "ip", + want: []string{"ip=1.1.1.1 || ip=2.2.2.2", "ip=3.3.3.3"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := batchProcess(tt.queries, tt.batchSize, tt.queryType); !reflect.DeepEqual(got, tt.want) { + t.Errorf("batchProcess() = %v, want %v", got, tt.want) + } + }) + } +} + +// Mocking gofofa.Client is hard because it's a struct and many methods are on it. +// However, we can mock the network if we want, or in this case, we just want to test +// that DumpAction calls DumpSearch correctly. +// Since fofaCli is global, we can swap it. + +type mockDumpClient struct { + *gofofa.Client + dumpSearchFunc func(query string, size int, batchSize int, fields []string, callback func([][]string, int) error, options gofofa.SearchOptions) error +} + +func (m *mockDumpClient) DumpSearch(query string, size int, batchSize int, fields []string, callback func([][]string, int) error, options gofofa.SearchOptions) error { + return m.dumpSearchFunc(query, size, batchSize, fields, callback, options) +} + +// Note: DumpSearch is not an interface method, so mockDumpClient doesn't actually override gofofa.Client's method +// when called via a gofofa.Client pointer. We'd need an interface for fofaCli. +// For now, let's at least test the helper functions. +// If we want to test DumpAction, we might need to refactor gofofa.Client to an interface. +// But the user just asked for "corresponding unit tests". + +func TestDumpAction_Concurrency(t *testing.T) { + // 1. Setup mock server + var mu sync.Mutex + queryCount := 0 + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + mu.Lock() + queryCount++ + mu.Unlock() + + if strings.Contains(r.URL.Path, "/api/v1/info/my") { + w.Write([]byte(`{"error":false, "fcoin":100, "vip_level":2}`)) + return + } + + if strings.Contains(r.URL.Path, "search/next") { + // Simulate FOFA response + res := HostResults{ + Error: false, + Size: 1, + Results: []interface{}{ + []interface{}{"1.1.1.1", "80"}, + }, + } + data, _ := stdjson.Marshal(res) + w.Write(data) + return + } + })) + defer ts.Close() + + // 2. Mock fofaCli + origCli := fofaCli + defer func() { fofaCli = origCli }() + + var err error + fofaCli, err = gofofa.NewClient(gofofa.WithURL(ts.URL + "/?email=test&key=test")) + assert.Nil(t, err) + + // 3. Setup CLI App + app := &cli.App{ + Commands: []*cli.Command{ + dumpCmd, + }, + } + + // 4. Create inFile with multiple queries + tmpFile, _ := os.CreateTemp("", "queries.txt") + defer os.Remove(tmpFile.Name()) + tmpFile.WriteString("domain=google.com\ndomain=baidu.com\ndomain=bing.com\n") + tmpFile.Close() + + // 5. Run DumpAction with concurrency + // We need to capture stdout or just check if it doesn't fail + err = app.Run([]string{"fofa", "dump", "-i", tmpFile.Name(), "-workers", "3", "-s", "1"}) + assert.Nil(t, err) + + // 6. Verify all queries were processed + assert.Equal(t, 3, queryCount-1) // -1 for the initial info/my call in NewClient +} + +func TestDumpAction_DefaultsWithInFile(t *testing.T) { + // 1. Setup mock server + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.Contains(r.URL.Path, "/api/v1/info/my") { + w.Write([]byte(`{"error":false, "fcoin":100, "vip_level":2}`)) + return + } + if strings.Contains(r.URL.Path, "search/next") { + // Simulate FOFA response + res := HostResults{ + Error: false, + Size: 1, + Results: []interface{}{ + []interface{}{"1.1.1.1", "80"}, + }, + } + data, _ := stdjson.Marshal(res) + w.Write(data) + return + } + })) + defer ts.Close() + + // 2. Mock fofaCli + origCli := fofaCli + defer func() { fofaCli = origCli }() + + var err error + fofaCli, err = gofofa.NewClient(gofofa.WithURL(ts.URL + "/?email=test&key=test")) + assert.Nil(t, err) + + // 3. Setup CLI App + app := &cli.App{ + Commands: []*cli.Command{ + dumpCmd, + }, + } + + // 4. Create inFile with multiple queries + tmpFile, _ := os.CreateTemp("", "queries.txt") + defer os.Remove(tmpFile.Name()) + tmpFile.WriteString("domain=google.com\n") + tmpFile.Close() + + // reset globals + workers = 1 + ratePerSecond = 2 // default is 2 anyway, but let's change it to test + origRate := ratePerSecond + ratePerSecond = 1 + defer func() { ratePerSecond = origRate }() + + // 5. Run DumpAction without workers and rate explicit flags + err = app.Run([]string{"fofa", "dump", "-i", tmpFile.Name(), "-s", "1"}) + assert.Nil(t, err) + + // 6. Verify workers and rate are updated to 10 and 2 + assert.Equal(t, 10, workers) + assert.Equal(t, 2, ratePerSecond) +} diff --git a/cmd/fofa/cmd/random.go b/cmd/fofa/cmd/random.go index ca6a108..31e3d7d 100644 --- a/cmd/fofa/cmd/random.go +++ b/cmd/fofa/cmd/random.go @@ -3,14 +3,15 @@ package cmd import ( "errors" "fmt" - "github.com/FofaInfo/GoFOFA" - "github.com/FofaInfo/GoFOFA/pkg/outformats" - "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" "math/rand" "os" "strings" "time" + + gofofa "github.com/FofaInfo/GoFOFA" + "github.com/FofaInfo/GoFOFA/pkg/outformats" + "github.com/sirupsen/logrus" + "github.com/urfave/cli/v2" ) var ( @@ -98,6 +99,12 @@ func randomAction(ctx *cli.Context) error { if len(fields) == 0 { return errors.New("fofa fields cannot be empty") } + + // 字段白名单前置强校验 (使用 ValidateFieldsAll) + if err := gofofa.ValidateFieldsAll(fields); err != nil { + return err + } + hostIndex := -1 if ctx.Bool("verbose") { if !hashField(fields, "host") { diff --git a/cmd/fofa/cmd/search.go b/cmd/fofa/cmd/search.go index 2db80ba..ed0fce8 100644 --- a/cmd/fofa/cmd/search.go +++ b/cmd/fofa/cmd/search.go @@ -5,16 +5,17 @@ import ( "context" "errors" "fmt" - "github.com/FofaInfo/GoFOFA" - "github.com/FofaInfo/GoFOFA/pkg/outformats" - "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" - "golang.org/x/time/rate" "io" "log" "os" "strings" "sync" + + gofofa "github.com/FofaInfo/GoFOFA" + "github.com/FofaInfo/GoFOFA/pkg/outformats" + "github.com/sirupsen/logrus" + "github.com/urfave/cli/v2" + "golang.org/x/time/rate" ) var ( @@ -273,6 +274,11 @@ func SearchAction(ctx *cli.Context) error { return errors.New("fofa fields cannot be empty") } + // 字段白名单前置强校验 (使用 ValidateFieldsAll) + if err := gofofa.ValidateFieldsAll(fields); err != nil { + return err + } + // headline只允许在format=csv的情况下使用 if headline && format != "csv" && len(outFile) > 0 { return errors.New("headline param is only allowed if format is csv, outFile not be empty") diff --git a/fields.go b/fields.go new file mode 100644 index 0000000..8fad605 --- /dev/null +++ b/fields.go @@ -0,0 +1,148 @@ +package gofofa + +import ( + "fmt" + "strings" +) + +// ValidFieldsAll is the whitelist of fields for the /search/all API endpoint. +// Based on https://fofa.info/api +var ValidFieldsAll = map[string]bool{ + "ip": true, // IP + "port": true, // Port + "protocol": true, // Protocol + "country": true, // Country code + "country_name": true, // Country Name + "region": true, // Region + "city": true, // City + "longitude": true, // Longitude of geographical location + "latitude": true, // Latitude of geographical location + "asn": true, // ASN Number + "org": true, // ASN Organization + "host": true, // Host + "domain": true, // Domain + "os": true, // OS + "server": true, // Server + "icp": true, // ICP Number Information + "title": true, // Website Title + "jarm": true, // JARM Fingerprint + "header": true, // Type is subdomain is header + "banner": true, // Type is service is banner + "cert": true, // Cert + "base_protocol": true, // Base protocol, e.g. tcp/udp + "link": true, // Asset URL + "cert.issuer.org": true, // SSL issuer organization + "cert.issuer.cn": true, // SSL issuer common name + "cert.subject.org": true, // SSL subject organization + "cert.subject.cn": true, // SSL subject common name + "tls.ja3s": true, // ja3s fingerprint + "tls.version": true, // TLS version + "cert.sn": true, // Certificate's serial number + "cert.not_before": true, // Certificate's validity start date + "cert.not_after": true, // Certificate's expired date + "cert.domain": true, // The domain name list in certificate + "header_hash": true, // http/https response hash value + "banner_hash": true, // banner response hash value + "banner_fid": true, // banner's structure hash value + "cname": true, // Domain's cname + "lastupdatetime": true, // FOFA last update time + "product": true, // Product name + "product_category": true, // Product category + "product.version": true, // Product version + "icon_hash": true, // Favicon's hash + "cert.is_valid": true, // Certificate is validity or not + "cname_domain": true, // Domain's cname + "body": true, // HTML Website Body + "cert.is_match": true, // Certificate matches the asset's domain + "cert.is_equal": true, // Certificate issuer equal the certificate subject + "icon": true, // Icon data + "fid": true, // FID + "structinfo": true, // Structure information (partial protocol support) +} + +// ValidFieldsNext is the whitelist of fields for the /search/next API endpoint. +// Based on https://fofa.info/api/batches_pages +var ValidFieldsNext = map[string]bool{ + "ip": true, // IP + "port": true, // Port + "protocol": true, // Protocol + "country": true, // Country code + "country_name": true, // Country Name + "region": true, // Region + "city": true, // City + "longitude": true, // Longitude of geographical location + "latitude": true, // Latitude of geographical location + "asn": true, // ASN Number + "org": true, // ASN Organization + "host": true, // Host + "domain": true, // Domain + "os": true, // OS + "server": true, // Server + "icp": true, // ICP Number Information + "title": true, // Website Title + "jarm": true, // JARM Fingerprint + "header": true, // Type is subdomain is header + "banner": true, // Type is service is banner + "cert": true, // Cert + "base_protocol": true, // Base protocol, e.g. tcp/udp + "link": true, // Asset URL + "cert.issuer.org": true, // SSL issuer organization + "cert.issuer.cn": true, // SSL issuer common name + "cert.subject.org": true, // SSL subject organization + "cert.subject.cn": true, // SSL subject common name + "tls.ja3s": true, // ja3s fingerprint + "tls.version": true, // TLS version + "cert.sn": true, // Certificate's serial number + "cert.not_before": true, // Certificate's validity start date + "cert.not_after": true, // Certificate's expired date + "cert.domain": true, // The domain name list in certificate + "header_hash": true, // http/https response hash value + "banner_hash": true, // banner response hash value + "banner_fid": true, // banner's structure hash value + "cname": true, // Domain's cname + "lastupdatetime": true, // FOFA last update time + "product": true, // Product name + "product_category": true, // Product category + "product.version": true, // Product version + "icon_hash": true, // Favicon's hash + "cert.is_valid": true, // Certificate is validity or not + "cname_domain": true, // Domain's cname + "body": true, // HTML Website Body + "cert.is_match": true, // Certificate matches the asset's domain + "cert.is_equal": true, // Certificate issuer equal the certificate subject + "icon": true, // Icon data + "fid": true, // FID + "structinfo": true, // Structure information (partial protocol support) +} + +// ValidateFieldsAll checks that all fields are in the ValidFieldsAll whitelist. +// Returns an error immediately if an unsupported field is detected. +func ValidateFieldsAll(fields []string) error { + for _, f := range fields { + f = strings.TrimSpace(f) + if f == "" { + continue + } + + if !ValidFieldsAll[f] { + return fmt.Errorf("[Error] Unsupported export field detected: %q. Execution aborted to prevent empty results", f) + } + } + return nil +} + +// ValidateFieldsNext checks that all fields are in the ValidFieldsNext whitelist. +// Returns an error immediately if an unsupported field is detected. +func ValidateFieldsNext(fields []string) error { + for _, f := range fields { + f = strings.TrimSpace(f) + if f == "" { + continue + } + + if !ValidFieldsNext[f] { + return fmt.Errorf("[Error] Unsupported export field detected: %q. Execution aborted to prevent empty results", f) + } + } + return nil +} diff --git a/fields_test.go b/fields_test.go new file mode 100644 index 0000000..43ff31c --- /dev/null +++ b/fields_test.go @@ -0,0 +1,100 @@ +package gofofa + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestValidateFields(t *testing.T) { + // All 50 official API fields should pass individually + allFields := []string{ + "ip", "port", "protocol", "country", "country_name", + "region", "city", "longitude", "latitude", "asn", + "org", "host", "domain", "os", "server", + "icp", "title", "jarm", "header", "banner", + "cert", "base_protocol", "link", + "cert.issuer.org", "cert.issuer.cn", "cert.subject.org", "cert.subject.cn", + "tls.ja3s", "tls.version", + "cert.sn", "cert.not_before", "cert.not_after", "cert.domain", + "header_hash", "banner_hash", "banner_fid", + "cname", "lastupdatetime", "product", "product_category", + "product.version", "icon_hash", "cert.is_valid", "cname_domain", + "body", "cert.is_match", "cert.is_equal", + "icon", "fid", "structinfo", + } + for _, isNext := range []bool{true, false} { + var err error + if isNext { + err = ValidateFieldsNext(allFields) + } else { + err = ValidateFieldsAll(allFields) + } + assert.Nil(t, err) + + // Verify we are testing all fields in the whitelist + if isNext { + assert.Equal(t, len(ValidFieldsNext), len(allFields), "Test must cover all fields in ValidFieldsNext") + } else { + assert.Equal(t, len(ValidFieldsAll), len(allFields), "Test must cover all fields in ValidFieldsAll") + } + + // Empty fields should pass + if isNext { + err = ValidateFieldsNext([]string{}) + } else { + err = ValidateFieldsAll([]string{}) + } + assert.Nil(t, err) + + // Invalid field should fail + if isNext { + err = ValidateFieldsNext([]string{"ip", "doamin"}) + } else { + err = ValidateFieldsAll([]string{"ip", "doamin"}) + } + assert.NotNil(t, err) + assert.Contains(t, err.Error(), "doamin") + assert.Contains(t, err.Error(), "Unsupported export field detected") + + // Another typo + if isNext { + err = ValidateFieldsNext([]string{"ttle", "port"}) + } else { + err = ValidateFieldsAll([]string{"ttle", "port"}) + } + assert.NotNil(t, err) + assert.Contains(t, err.Error(), "ttle") + + // structinfo prefix should be rejected (only exact "structinfo" is valid) + if isNext { + err = ValidateFieldsNext([]string{"structinfo.any_field"}) + } else { + err = ValidateFieldsAll([]string{"structinfo.any_field"}) + } + assert.NotNil(t, err) + assert.Contains(t, err.Error(), "structinfo.any_field") + + // Fields removed from old whitelist should now fail + if isNext { + err = ValidateFieldsNext([]string{"app"}) + } else { + err = ValidateFieldsAll([]string{"app"}) + } + assert.NotNil(t, err) + + if isNext { + err = ValidateFieldsNext([]string{"status_code"}) + } else { + err = ValidateFieldsAll([]string{"status_code"}) + } + assert.NotNil(t, err) + + if isNext { + err = ValidateFieldsNext([]string{"type"}) + } else { + err = ValidateFieldsAll([]string{"type"}) + } + assert.NotNil(t, err) + } +} From 5ce8eeab2facb8806a566e5b3c26ec452064cb5f Mon Sep 17 00:00:00 2001 From: Biodog06 Date: Mon, 9 Mar 2026 12:12:12 +0800 Subject: [PATCH 2/4] UPDATE HISTORY.md --- HISTORY.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index 65dfdba..cc4b6f9 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,11 @@ +## v0.2.29 add predefined field whitelist validation and parallel processing for dump + +- create official field whitelists (ValidFieldsAll and ValidFieldsNext) in fields.go. +- add validation logic (ValidateFieldsAll, ValidateFieldsNext) and integrated it into the search, dump, and random commands. +- enhance the `dump` command with a worker pool for concurrency and thread-safe data writing mechanics. +- include comprehensive unit test coverage for the validation and concurrency logic in fields_test.go and dump_test.go. +- update README.md and README_ZH.md to natively reference the official FOFA API documentation URLs for valid fields. + ## v0.2.28 fix dedup mode - fix dedup of multiple fields to remove duplicates From 8d76495c288b3314364118cbed372aa5b5ec5df4 Mon Sep 17 00:00:00 2001 From: Biodog06 Date: Mon, 9 Mar 2026 17:38:36 +0800 Subject: [PATCH 3/4] change domain active test --- httpcheck_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/httpcheck_test.go b/httpcheck_test.go index 4b2ed95..4111bcf 100644 --- a/httpcheck_test.go +++ b/httpcheck_test.go @@ -1,8 +1,9 @@ package gofofa import ( - "github.com/stretchr/testify/assert" "testing" + + "github.com/stretchr/testify/assert" ) func TestCheckActive(t *testing.T) { @@ -28,7 +29,7 @@ func TestCheckActive(t *testing.T) { }, { name: "Domain base", - fixedHostInfo: "baidu.com", + fixedHostInfo: "www.baidu.com", want: HttpResponse{IsActive: true, StatusCode: "200"}, }, { From 40546fafda2e30d3445b19cc104b1d7b67f8ea65 Mon Sep 17 00:00:00 2001 From: Biodog06 Date: Mon, 9 Mar 2026 18:45:34 +0800 Subject: [PATCH 4/4] change browser test --- browser.go | 22 +++++++++++++++------- browser_test.go | 6 +++++- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/browser.go b/browser.go index 0d49c2e..b89a9d1 100644 --- a/browser.go +++ b/browser.go @@ -4,25 +4,29 @@ import ( "context" "errors" "fmt" + "log" + "strings" + "time" + "github.com/go-rod/rod" "github.com/go-rod/rod/lib/devices" "github.com/go-rod/rod/lib/launcher" + flags "github.com/go-rod/rod/lib/launcher/flags" "github.com/go-rod/rod/lib/proto" "golang.org/x/net/html" - "log" - "strings" - "time" ) type WorkerBrowser struct { - Url string - Retry int + Url string + Retry int + LauncherArgs []string } func NewWorkerBrowser(url string, retry int) *WorkerBrowser { return &WorkerBrowser{ - Url: url, - Retry: retry, + Url: url, + Retry: retry, + LauncherArgs: []string{}, } } @@ -143,6 +147,10 @@ func (wp *WorkerBrowser) renderScan(url string) (string, error) { l.Set("--ignore-certificate-errors") l.Set("disable-notifications", "true") + for _, arg := range wp.LauncherArgs { + l.Set(flags.Flag(arg), "true") + } + lurl := l.MustLaunch() b := rod.New().ControlURL(lurl). diff --git a/browser_test.go b/browser_test.go index 1824955..b672b3d 100644 --- a/browser_test.go +++ b/browser_test.go @@ -2,11 +2,12 @@ package gofofa import ( "fmt" - "github.com/stretchr/testify/assert" "net/http" "net/http/httptest" "testing" "time" + + "github.com/stretchr/testify/assert" ) func runningTime() func() { @@ -80,18 +81,21 @@ func TestWorkerBrowser_Run(t *testing.T) { // 错误url情况 b := NewWorkerBrowser("", 3) + b.LauncherArgs = []string{"no-sandbox"} body, err := b.Run() assert.NotNil(t, err) assert.Nil(t, body["body"]) // 常规js渲染 b = NewWorkerBrowser(ts.URL+"/js/normal", 3) + b.LauncherArgs = []string{"no-sandbox"} body, err = b.Run() assert.Nil(t, err) assert.Equal(t, "Updated Title", body["title"]) // 页面跳转 b = NewWorkerBrowser(ts.URL+"/js/redirect", 3) + b.LauncherArgs = []string{"no-sandbox"} body, err = b.Run() assert.Nil(t, err) assert.Equal(t, "Successfully Title", body["title"])