From c83955f49cd10f6389bcccdb3e31cb7671962e59 Mon Sep 17 00:00:00 2001 From: Visrut Date: Thu, 27 Jul 2023 11:23:55 +0530 Subject: [PATCH] fix #163: parsed new HTML for input codeforces --- client/parse.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/client/parse.go b/client/parse.go index 382b7345..60c6fcdf 100644 --- a/client/parse.go +++ b/client/parse.go @@ -19,23 +19,29 @@ import ( ) func findSample(body []byte) (input [][]byte, output [][]byte, err error) { - irg := regexp.MustCompile(`class="input"[\s\S]*?
([\s\S]*?)
`) - org := regexp.MustCompile(`class="output"[\s\S]*?
([\s\S]*?)
`) + irg := regexp.MustCompile(`
[\s\S]*?]*>([\s\S]*?)`) + org := regexp.MustCompile(`
[\s\S]*?]*>([\s\S]*?)`) + a := irg.FindAllSubmatch(body, -1) b := org.FindAllSubmatch(body, -1) + if a == nil || b == nil || len(a) != len(b) { return nil, nil, fmt.Errorf("Cannot parse sample with input %v and output %v", len(a), len(b)) } - newline := regexp.MustCompile(`<[\s/br]+?>`) + + tagRegex := regexp.MustCompile(`]*>|
`) filter := func(src []byte) []byte { - src = newline.ReplaceAll(src, []byte("\n")) + src = tagRegex.ReplaceAll(src, []byte("\n")) + src = bytes.ReplaceAll(src, []byte("\n\n"), []byte("\n")) s := html.UnescapeString(string(src)) - return []byte(strings.TrimSpace(s) + "\n") + return []byte(strings.TrimSpace(s)) } + for i := 0; i < len(a); i++ { input = append(input, filter(a[i][1])) output = append(output, filter(b[i][1])) } + return }