diff --git a/internal/inputs/htmlparser.go b/internal/inputs/htmlparser.go
index 529e5fb77..4b348c23d 100644
--- a/internal/inputs/htmlparser.go
+++ b/internal/inputs/htmlparser.go
@@ -12,6 +12,7 @@ import (
"strings"
"unicode"
+ "github.com/newrelic/nri-flex/internal/formatter"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
@@ -24,9 +25,9 @@ type Table struct {
}
// ParseToJSON parses a html fragment or whole document looking for HTML
-func ParseToJSON(s []byte) (string, error) {
+func ParseToJSON(s []byte, htmlAttributes map[string]string) (string, error) {
- tables, err := Parse(s)
+ tables, err := Parse(s, htmlAttributes)
if err != nil {
return "", err
}
@@ -87,13 +88,14 @@ func convertTable(t *Table, i int) string {
// Parse parses a html fragment or whole document looking for HTML
// tables. It converts all cells into text, stripping away any HTML content.
-func Parse(s []byte) ([]*Table, error) {
+func Parse(s []byte, htmlAttributes map[string]string) ([]*Table, error) {
node, err := html.Parse(bytes.NewReader(s))
if err != nil {
return nil, err
}
tables := []*Table{}
- parse(node, &tables)
+ var vThead = true
+ parse(node, &tables, vThead, htmlAttributes)
for kk, t := range tables {
tables[kk] = addMissingColumns(t)
@@ -102,14 +104,19 @@ func Parse(s []byte) ([]*Table, error) {
return tables, nil
}
-func innerText(n *html.Node) string {
+func innerText(n *html.Node, parseAttribute bool, htmlAttributes map[string]string) string {
if n.Type == html.TextNode {
stripResult := stripChars(n.Data)
return stripResult
}
- result := ""
+ var result string = ""
+ if n.Type == html.ElementNode {
+ if parseAttribute {
+ result = parseAttributes(n.Attr, htmlAttributes)
+ }
+ }
for x := n.FirstChild; x != nil; x = x.NextSibling {
- result += innerText(x)
+ result += innerText(x, parseAttribute, htmlAttributes)
}
return result
}
@@ -143,7 +150,7 @@ func containTable(n *html.Node) bool {
return result
}
-func parse(n *html.Node, tables *[]*Table) {
+func parse(n *html.Node, tables *[]*Table, vThead bool, htmlAttributes map[string]string) {
strip := strings.TrimSpace
switch n.DataAtom {
case atom.Table:
@@ -155,9 +162,24 @@ func parse(n *html.Node, tables *[]*Table) {
t.Attributes[at.Key] = at.Val
}
*tables = append(*tables, t)
+ vThead = true
case atom.Th:
- t := (*tables)[len(*tables)-1]
- t.Headers = append(t.Headers, strip(innerText(n)))
+ if vThead {
+ t := (*tables)[len(*tables)-1]
+ t.Headers = append(t.Headers, strip(innerText(n, false, htmlAttributes)))
+ } else {
+ if !containTable(n) {
+ t := (*tables)[len(*tables)-1]
+ l := len(t.Rows) - 1
+ t.Rows[l] = append(t.Rows[l], strip(innerText(n, true, htmlAttributes)))
+ return
+ }
+ t := (*tables)[len(*tables)-1]
+ l := len(t.Rows) - 1
+ // If the
contains element, set the | content to "TableElement"
+ t.Rows[l] = append(t.Rows[l], "TableElement")
+ }
+
case atom.Tr:
t := (*tables)[len(*tables)-1]
t.Rows = append(t.Rows, []string{})
@@ -165,7 +187,7 @@ func parse(n *html.Node, tables *[]*Table) {
if !containTable(n) {
t := (*tables)[len(*tables)-1]
l := len(t.Rows) - 1
- t.Rows[l] = append(t.Rows[l], strip(innerText(n)))
+ t.Rows[l] = append(t.Rows[l], strip(innerText(n, true, htmlAttributes)))
return
}
t := (*tables)[len(*tables)-1]
@@ -173,9 +195,13 @@ func parse(n *html.Node, tables *[]*Table) {
// If the | contains element, set the content to "TableElement"
t.Rows[l] = append(t.Rows[l], "TableElement")
+ case atom.Thead:
+ vThead = true
+ case atom.Tbody:
+ vThead = false
}
for child := n.FirstChild; child != nil; child = child.NextSibling {
- parse(child, tables)
+ parse(child, tables, vThead, htmlAttributes)
}
}
@@ -208,3 +234,18 @@ func addMissingColumns(t *Table) *Table {
t.Rows = rows
return t
}
+
+func parseAttributes(input []html.Attribute, parseAttributes map[string]string) string {
+ var result []string
+ for _, attr := range input {
+ for key, val := range parseAttributes {
+ if formatter.KvFinder("regex", attr.Key, key) {
+ if formatter.KvFinder("regex", attr.Val, val) {
+ result = append(result, attr.Key+":"+attr.Val+";")
+ }
+ }
+ }
+
+ }
+ return strings.Join(result, "")
+}
diff --git a/internal/inputs/htmlparser_test.go b/internal/inputs/htmlparser_test.go
index ac838f620..5c180bcd5 100644
--- a/internal/inputs/htmlparser_test.go
+++ b/internal/inputs/htmlparser_test.go
@@ -15,46 +15,61 @@ func TestParseToJSON(t *testing.T) {
}
testCases := map[string]struct {
- parseCfg load.API
- value string
- key string
- expected string
+ parseCfg load.API
+ value string
+ htmlAttributes map[string]string
+ key string
+ expected string
}{
"SingleTable": {
parseCfg: getConfig(
true),
value: `
+
| Heading 1 | Heading 11 | Heading 12 | Heading 13 | Heading 14 |
- | Data 11 | Data 12 |
+
+ | Data 11 | Data 12 |
| Data 21 | Data 22 |
| Data 31 | Data 32 |
| Data 41 | Data 42 |
+
`,
- expected: `[{"table":[{ "Heading 1": "Data 11", "Heading 11": "Data 12", "Heading 12": "", "Heading 13": "", "Heading 14": ""},{ "Heading 1": "Data 21", "Heading 11": "Data 22", "Heading 12": "", "Heading 13": "", "Heading 14": ""},{ "Heading 1": "Data 31", "Heading 11": "Data 32", "Heading 12": "", "Heading 13": "", "Heading 14": ""},{ "Heading 1": "Data 41", "Heading 11": "Data 42", "Heading 12": "", "Heading 13": "", "Heading 14": ""}], "source": "myTestPage1","Index":0 }]`,
+ htmlAttributes: map[string]string{
+ "class": ".*",
+ },
+ expected: `[{"table":[{ "Heading 1": "class:city;Data 11", "Heading 11": "Data 12", "Heading 12": "", "Heading 13": "", "Heading 14": ""},{ "Heading 1": "Data 21", "Heading 11": "Data 22", "Heading 12": "", "Heading 13": "", "Heading 14": ""},{ "Heading 1": "Data 31", "Heading 11": "Data 32", "Heading 12": "", "Heading 13": "", "Heading 14": ""},{ "Heading 1": "Data 41", "Heading 11": "Data 42", "Heading 12": "", "Heading 13": "", "Heading 14": ""}], "source": "myTestPage1","Index":0 }]`,
},
"TwoTableWithAttribute": {
parseCfg: getConfig(
true),
value: `
+
| Heading 1 | Heading 11 | Heading 12 | Heading 13 | Heading 14 |
+
| Data 11 | Data 12 |
| Data 21 | Data 22 |
| Data 31 | Data 32 |
| Data 41 | Data 42 |
+
Stuff in here
+
| Heading 21 | Heading 22 |
| Data 211 | Data 212 |
| Data 221 | Data 222 |
| Data 231 | Data 232 |
| Data 241 | Data 242 |
+
`,
+ htmlAttributes: map[string]string{
+ "class": ".*",
+ },
expected: `[{"table":[{ "Heading 1": "Data 11", "Heading 11": "Data 12", "Heading 12": "", "Heading 13": "", "Heading 14": ""},{ "Heading 1": "Data 21", "Heading 11": "Data 22", "Heading 12": "", "Heading 13": "", "Heading 14": ""},{ "Heading 1": "Data 31", "Heading 11": "Data 32", "Heading 12": "", "Heading 13": "", "Heading 14": ""},{ "Heading 1": "Data 41", "Heading 11": "Data 42", "Heading 12": "", "Heading 13": "", "Heading 14": ""}], "source": "myTestTable1","Index":0 },{"table":[{ "Heading 21": "Data 211", "Heading 22": "Data 212"},{ "Heading 21": "Data 221", "Heading 22": "Data 222"},{ "Heading 21": "Data 231", "Heading 22": "Data 232"},{ "Heading 21": "Data 241", "Heading 22": "Data 242"}], "source": "myTestTable2","Index":1 }]`,
},
}
@@ -62,7 +77,7 @@ func TestParseToJSON(t *testing.T) {
for testName, testCase := range testCases {
t.Run(testName, func(t *testing.T) {
- result, _ := ParseToJSON([]byte(testCase.value))
+ result, _ := ParseToJSON([]byte(testCase.value), testCase.htmlAttributes)
assert.Equal(t, testCase.expected, string(result))
})
}
diff --git a/internal/inputs/http.go b/internal/inputs/http.go
index 8e4faf872..493a9481c 100644
--- a/internal/inputs/http.go
+++ b/internal/inputs/http.go
@@ -105,9 +105,9 @@ func RunHTTP(dataStore *[]interface{}, doLoop *bool, yml *load.Config, api load.
handleJSON(dataStore, jsonBody.Bytes(), &resp, doLoop, reqURL, nextLink, api.ReturnHeaders)
}
}
- case (contentType == "text/html" || contentType == "text/html; charset=utf-8") && api.ParseHTML:
+ case (strings.Contains(strings.ToLower(contentType), "text/html")) && api.ParseHTML:
body, _ := ioutil.ReadAll(resp.Body)
- jsonBody, err := ParseToJSON(body)
+ jsonBody, err := ParseToJSON(body, api.ParseHtmlAttributes)
if err != nil {
load.Logrus.WithError(err).Errorf("http: URL %v failed to convert XML to Json resp.Body", *reqURL)
} else {
diff --git a/internal/load/load.go b/internal/load/load.go
index dd02d379a..a81228017 100644
--- a/internal/load/load.go
+++ b/internal/load/load.go
@@ -257,62 +257,63 @@ type SampleMerge struct {
// API YAML Struct
type API struct {
- Name string `yaml:"name"`
- EventType string `yaml:"event_type"` // override eventType
- Entity string `yaml:"entity"` // define a custom entity name
- EntityType string `yaml:"entity_type"` // define a custom entity type (namespace)
- Ingest bool `yaml:"ingest"`
- Inventory map[string]string `yaml:"inventory"` // set as inventory
- InventoryOnly bool `yaml:"inventory_only"` // only generate inventory data
- Events map[string]string `yaml:"events"` // set as events
- EventsOnly bool `yaml:"events_only"` // only generate events
- Merge string `yaml:"merge"` // merge into another eventType
- RunAsync bool `yaml:"run_async" ` // API block to run in Async mode when using with lookupstore
- AsyncRate int `yaml:"async_rate"` //Async Request Throttle Rate
- JoinKey string `yaml:"join_key"` // merge into another eventType
- Prefix string `yaml:"prefix"` // prefix attribute keys
- File string `yaml:"file"`
- URL string `yaml:"url"`
- Pagination Pagination `yaml:"pagination"`
- EscapeURL bool `yaml:"escape_url"`
- Prometheus Prometheus `yaml:"prometheus"`
- Cache string `yaml:"cache"` // read data from datastore
- Database string `yaml:"database"`
- DBDriver string `yaml:"db_driver"`
- DBConn string `yaml:"db_conn"`
- Shell string `yaml:"shell"`
- CommandsAsync bool `yaml:"commands_async"` // run commands async
- Commands []Command `yaml:"commands"`
- DBQueries []Command `yaml:"db_queries"`
- DBAsync bool `yaml:"db_async"` // perform db queries async
- Jq string `yaml:"jq"` // parse data using jq
- ParseHTML bool `yaml:"parse_html"` // parse text/html content type table element to JSON
- Jmx JMX `yaml:"jmx"`
- IgnoreLines []int // not implemented - idea is to ignore particular lines starting from 0 of the command output
- User, Pass string
- Proxy string
- TLSConfig TLSConfig `yaml:"tls_config"`
- Timeout int
- Method string
- Payload string
- Headers map[string]string `yaml:"headers"`
- DisableParentAttr bool `yaml:"disable_parent_attr"`
- StartKey []string `yaml:"start_key"` // start from a different section of the payload
- StoreLookups map[string]string `yaml:"store_lookups"`
- DedupeLookups []string `yaml:"dedupe_lookups"`
- StoreVariables map[string]string `yaml:"store_variables"`
- LazyFlatten []string `yaml:"lazy_flatten"`
- SampleKeys map[string]string `yaml:"sample_keys"`
- RenameSamples map[string]string `yaml:"rename_samples"` // using regex if sample has a key that matches, make that a different sample
- SkipProcessing []string `yaml:"skip_processing"` // skip processing particular keys using an array of regex strings
- InheritAttributes bool `yaml:"inherit_attributes"` // attempts to inherit attributes were possible
- CustomAttributes map[string]string `yaml:"custom_attributes"` // set additional custom attributes
- SplitObjects bool `yaml:"split_objects"` // convert object with nested objects to array
- SplitArray bool `yaml:"split_array"` // convert array to samples, use SetHeader to set attribute name
- LeafArray bool `yaml:"leaf_array"` // convert array element to samples when SplitArray, use SetHeader to set attribute name
- Scp SCP `yaml:"scp"`
- HWSigner HWSigner `yaml:"hw_signer"` // Huawei Cloud Service API signer
- AliyunSigner AliyunSigner `yaml:"aliyun_signer"` // Huawei Cloud Service API signer
+ Name string `yaml:"name"`
+ EventType string `yaml:"event_type"` // override eventType
+ Entity string `yaml:"entity"` // define a custom entity name
+ EntityType string `yaml:"entity_type"` // define a custom entity type (namespace)
+ Ingest bool `yaml:"ingest"`
+ Inventory map[string]string `yaml:"inventory"` // set as inventory
+ InventoryOnly bool `yaml:"inventory_only"` // only generate inventory data
+ Events map[string]string `yaml:"events"` // set as events
+ EventsOnly bool `yaml:"events_only"` // only generate events
+ Merge string `yaml:"merge"` // merge into another eventType
+ RunAsync bool `yaml:"run_async" ` // API block to run in Async mode when using with lookupstore
+ AsyncRate int `yaml:"async_rate"` //Async Request Throttle Rate
+ JoinKey string `yaml:"join_key"` // merge into another eventType
+ Prefix string `yaml:"prefix"` // prefix attribute keys
+ File string `yaml:"file"`
+ URL string `yaml:"url"`
+ Pagination Pagination `yaml:"pagination"`
+ EscapeURL bool `yaml:"escape_url"`
+ Prometheus Prometheus `yaml:"prometheus"`
+ Cache string `yaml:"cache"` // read data from datastore
+ Database string `yaml:"database"`
+ DBDriver string `yaml:"db_driver"`
+ DBConn string `yaml:"db_conn"`
+ Shell string `yaml:"shell"`
+ CommandsAsync bool `yaml:"commands_async"` // run commands async
+ Commands []Command `yaml:"commands"`
+ DBQueries []Command `yaml:"db_queries"`
+ DBAsync bool `yaml:"db_async"` // perform db queries async
+ Jq string `yaml:"jq"` // parse data using jq
+ ParseHTML bool `yaml:"parse_html"` // parse text/html content type table element to JSON
+ ParseHtmlAttributes map[string]string `yaml:"parse_html_attributes"` // parse HTML attributes in table element cell match the regex settings
+ Jmx JMX `yaml:"jmx"`
+ IgnoreLines []int // not implemented - idea is to ignore particular lines starting from 0 of the command output
+ User, Pass string
+ Proxy string
+ TLSConfig TLSConfig `yaml:"tls_config"`
+ Timeout int
+ Method string
+ Payload string
+ Headers map[string]string `yaml:"headers"`
+ DisableParentAttr bool `yaml:"disable_parent_attr"`
+ StartKey []string `yaml:"start_key"` // start from a different section of the payload
+ StoreLookups map[string]string `yaml:"store_lookups"`
+ DedupeLookups []string `yaml:"dedupe_lookups"`
+ StoreVariables map[string]string `yaml:"store_variables"`
+ LazyFlatten []string `yaml:"lazy_flatten"`
+ SampleKeys map[string]string `yaml:"sample_keys"`
+ RenameSamples map[string]string `yaml:"rename_samples"` // using regex if sample has a key that matches, make that a different sample
+ SkipProcessing []string `yaml:"skip_processing"` // skip processing particular keys using an array of regex strings
+ InheritAttributes bool `yaml:"inherit_attributes"` // attempts to inherit attributes were possible
+ CustomAttributes map[string]string `yaml:"custom_attributes"` // set additional custom attributes
+ SplitObjects bool `yaml:"split_objects"` // convert object with nested objects to array
+ SplitArray bool `yaml:"split_array"` // convert array to samples, use SetHeader to set attribute name
+ LeafArray bool `yaml:"leaf_array"` // convert array element to samples when SplitArray, use SetHeader to set attribute name
+ Scp SCP `yaml:"scp"`
+ HWSigner HWSigner `yaml:"hw_signer"` // Huawei Cloud Service API signer
+ AliyunSigner AliyunSigner `yaml:"aliyun_signer"` // Huawei Cloud Service API signer
// Key manipulation
ToLower bool `yaml:"to_lower"` // convert all unicode letters mapped to their lower case.
ConvertSpace string `yaml:"convert_space"` // convert spaces to another char
diff --git a/internal/processor/create.go b/internal/processor/create.go
index ac2bb8e83..32e84aae9 100644
--- a/internal/processor/create.go
+++ b/internal/processor/create.go
@@ -119,11 +119,9 @@ func CreateMetricSets(samples []interface{}, config *load.Config, i int, mergeMe
runSampleFilterExperimental := true
// check if we should ignore this output completely
// useful when requests are made to generate a lookup, but the data is not needed
- if api.IgnoreOutput {
- createSample = false
- currentSample["event_type"] = eventType
- load.IgnoredIntegrationData = append(load.IgnoredIntegrationData, currentSample)
- } else {
+
+ // else
+ {
// check if this contains any key pair values to filter out
excludeSample := true
// evalute sample_include_filter if sample_include_match_all_filter is not specified
@@ -150,6 +148,12 @@ func CreateMetricSets(samples []interface{}, config *load.Config, i int, mergeMe
}
}
+ if api.IgnoreOutput && createSample {
+ createSample = false
+ currentSample["event_type"] = eventType
+ load.IgnoredIntegrationData = append(load.IgnoredIntegrationData, currentSample)
+ }
+
if createSample {
RunMathCalculations(&api.Math, ¤tSample)
diff --git a/test/testbed/scenarios/fixtures/url_api.go b/test/testbed/scenarios/fixtures/url_api.go
index a15dbad1f..82f1e382d 100644
--- a/test/testbed/scenarios/fixtures/url_api.go
+++ b/test/testbed/scenarios/fixtures/url_api.go
@@ -50,4 +50,24 @@ integrations:
`,
ExpectedStdout: `{"name":"com.newrelic.nri-flex","protocol_version":"3","integration_version":"Unknown-SNAPSHOT","data":[{"metrics":[{"api.StatusCode":200,"event_type":"TestURL","id":"eca0338f4ea31566","integration_name":"com.newrelic.nri-flex","integration_version":"Unknown-SNAPSHOT","leader_info.abc.def":123,"leader_info.abc.hij":234,"leader_info.leader":"8a69d5f6b7814500","leader_info.start_time":"2014-10-24T13:15:51.186620747-07:00","leader_info.uptime":"10m59.322358947s","name":"node3"},{"event_type":"flexStatusSample","flex.Hostname":"d43822b4a811","flex.IntegrationVersion":"Unknown-SNAPSHOT","flex.counter.ConfigsProcessed":1,"flex.counter.EventCount":1,"flex.counter.EventDropCount":0,"flex.counter.HttpRequests":1,"flex.counter.TestURL":1,"flex.time.elapsedMs":7,"flex.time.endMs":1654770857187,"flex.time.startMs":1654770857180}],"inventory":{},"events":[]}]}`,
},
+ {
+ Name: "html parse attributes parse_html_attributes",
+ Endpoint: "parse_html_attributes",
+ Port: "8001",
+ Payload: `HTML Table| Name | Expenditure |
|---|
| Alex | 500 | | Sarah | 8000 | | Josh | 7500 |
| `,
+ Config: `
+---
+integrations:
+ - name: nri-flex
+ config:
+ name: gcpStatus
+ apis:
+ - event_type: gcpStatus
+ url: http://127.0.0.1:8001/parse_html_attributes
+ parse_html: true
+ parse_html_attributes:
+ class: .*
+`,
+ ExpectedStdout: `{ "name": "com.newrelic.nri-flex", "protocol_version": "3", "integration_version": "Unknown-SNAPSHOT", "data": [ { "metrics": [ { "Col 1": "TableElement", "Index": 0, "api.StatusCode": 200, "border": 1, "event_type": "gcpStatus", "integration_name": "com.newrelic.nri-flex", "integration_version": "Unknown-SNAPSHOT", "width": "100%" }, { "Expenditure": "class:standard abc;500", "Index": 1, "Name": "Alex", "api.StatusCode": 200, "border": 1, "event_type": "gcpStatus", "integration_name": "com.newrelic.nri-flex", "integration_version": "Unknown-SNAPSHOT", "width": "100%" }, { "Expenditure": "class:requireApproval xyz;8000", "Index": 1, "Name": "Sarah", "api.StatusCode": 200, "border": 1, "event_type": "gcpStatus", "integration_name": "com.newrelic.nri-flex", "integration_version": "Unknown-SNAPSHOT", "width": "100%" }, { "Expenditure": "class:requireApproval abc123;7500", "Index": 1, "Name": "Josh", "api.StatusCode": 200, "border": 1, "event_type": "gcpStatus", "integration_name": "com.newrelic.nri-flex", "integration_version": "Unknown-SNAPSHOT", "width": "100%" }, { "event_type": "flexStatusSample", "flex.Hostname": "C02FWCLFMD6M", "flex.IntegrationVersion": "Unknown-SNAPSHOT", "flex.counter.ConfigsProcessed": 1, "flex.counter.EventCount": 4, "flex.counter.EventDropCount": 0, "flex.counter.HttpRequests": 1, "flex.counter.gcpStatus": 4, "flex.time.elapsedMs": 10, "flex.time.endMs": 1673401968833, "flex.time.startMs": 1673401968823 } ], "inventory": {}, "events": [] } ] }`,
+ },
}
| | |