eisop · aryanneb · Mar 14, 2025 · Mar 17, 2025 · Apr 2, 2025 · Apr 2, 2025
diff --git a/report-generation/README.md b/report-generation/README.md
@@ -0,0 +1,54 @@
+JDK API Changes Report Generation
+
+Tina Xia, tzxia@uwaterloo.ca for issues or questions
+Friday, April 2, 2025
+
+This project automates the review process for newly introduced methods across subsequent JDK versions. 
+
+-----------
+
+Configuration
+
+PERFORMING MINIMIZATION AND STORING RESULTS 
+1. Create a sub-directory called "results" in the "report-generation" directory 
+
+2. Update the absolute paths in config.py to match your environment:
+
+JDK_REPO_PATH: path to your local JDK repository.
+CHECKER_FRAMEWORK_REPO_PATH: path to your local Checker Framework repository.
+RESULTS_BASE_DIR: path to the "results" sub-directory you just created 
+
+3. run minimization script: 
+- python3 perform-minimization.py
+- This will generate and store the minimized files for each jdk version in "results" 
+
+GENERATE DIFF JSONS 
+4. run JSON generation script: 
+- python3 generate-json.py 
+- This will compare and store differences between files in a sub-directory called json_files in a directory called "report-files"
+- This script can be modified to only generate JSON's for a defined sub-set of minimized files by altering the versions array in the processing portion
+
+GENERATE CSVs
+5. run CSV generation script: 
+- python3 generate-csv.py 
+- This will generate a csv tracker for the review status of classes by iterating over the JSON files generated in the previous step
+- NOTE: when introducing a new jdk version, this file must be modified to generate a CSV for ONLY the newly introduced jdk version, or past CSVs will be completely reset. 
+
+GENERATE HTML Report
+6. run HTML report generation script: 
+
+- python3 generate-report.py
+- this creates and opens a report in your browser, loading in information from the CSVs and JSONs generated in the previous step 
+
+-----------
+
+Use
+
+Once a method has been reviewed, updated the final column in each row "checked" by changing the entry from "False" to "True". The next time the report generation script is run, a tag will appear on the report that indicates its updated status. You must re-run the report generation script "python3 generate-report.py" to see the changes. 
+
+-----------
+
+Future Updates
+
+New JDK Versions:
+When a new JDK version is released, update the branch list in the scripts to only include the newly introuced versions. This ensures that processing a new branch does not overwrite progress in previous versions. 
diff --git a/report-generation/config.py b/report-generation/config.py
@@ -0,0 +1,4 @@
+# Update these absolute paths for your environment
+JDK_REPO_PATH = "/Users/tinaxia/Desktop/eisop/jdk"
+CHECKER_FRAMEWORK_REPO_PATH = "/Users/tinaxia/Desktop/eisop/checker-framework"
+RESULTS_BASE_DIR = "/Users/tinaxia/Desktop/eisop/jdk/report-generation/results"
diff --git a/report-generation/generate-csv.py b/report-generation/generate-csv.py
@@ -0,0 +1,62 @@
+import json
+import os
+import csv
+
+csv_folder = "../report-files/csv_reports"
+os.makedirs(csv_folder, exist_ok=True)
+
+data = {}
+json_folder = "../report-files/json_files"
+os.makedirs(csv_folder, exist_ok=True)
+
+json_files = [f for f in os.listdir(json_folder) if f.endswith(".json")]
+
+#this list must be modified for newly added jdk versions, so that past versions are not overwitten with every method review status set to False
+versions_to_process = ['jdk-18', 'jdk-19', 'jdk-20', 'jdk-21', 'jdk-22', 'jdk-23', 'jdk-24']
+
+for filename in os.listdir(json_folder):
+    if filename.endswith(".json"):
+        version = os.path.splitext(filename)[0]
+        #processes only the versions in the versions_to_process list
+        if version not in versions_to_process:
+            continue
+        file_path = os.path.join(json_folder, filename)
+        with open(file_path, "r", encoding="utf-8") as f:
+            file_data = json.load(f)
+            data[version] = file_data[version]
+
+for version in data:
+    #gets current csv path 
+    current_csv = os.path.join(csv_folder, f"{version}_methods.csv")
+    with open(current_csv, "w", newline='', encoding="utf-8") as csvfile:
+
+        fieldnames = ["version", "file_url", "method", "checked"]
+
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+        writer.writeheader()
+
+        #since methods are grouped by file_url in the methods json
+        for file_url, file_data in data[version].items():
+            new_changes = file_data.get("new_changes", {})
+            new_classes = new_changes.get("new_classes", [])
+            new_methods = new_changes.get("new_methods", [])
+
+            writer.writerow({
+                    "version": version, 
+                    "file_url": file_url,
+                    "checked": "False"
+                })
+
+            # Write new class methods
+            for cls in new_classes:
+                declaration = cls.get("declaration")
+
+                #store checked status for newly introduced classes
+                writer.writerow({
+                    "version": version, 
+                    "file_url": file_url,
+                    "method": cls.get("declaration"),
+                    "checked": "False"
+                })
+
+    print(f"Created csv for {version}")
diff --git a/report-generation/generate-json.py b/report-generation/generate-json.py
@@ -0,0 +1,154 @@
+import os
+import json
+import difflib
+import re
+
+#matches for empty lines, "import", any comment "/*", and closing braces (})
+pattern = re.compile(r'^(?:\s*$|import\b|.*\/\*.*|\s*\*|\s*\})')
+
+def extract_new_changes(old_lines, new_lines):
+
+    #get list of differences
+    diff = list(difflib.ndiff(old_lines, new_lines))
+    new_changes = []  #list of tuples (new_line_index, content)
+    new_index = 0  #get index to check whether method belongs to a new class
+
+    for line in diff:
+        if line.startswith('+ '):
+            content = line[2:].strip()
+            if not pattern.match(content):
+                new_changes.append((new_index, content))
+
+        #increase line counter
+        if line.startswith('  ') or line.startswith('+ '):
+            new_index += 1
+
+    #creates a list to get the position of every class
+    class_positions = []
+    for i, line in enumerate(new_lines):
+
+        if re.search(r'\bclass\b', line):
+            class_positions.append((i, line.strip()))
+
+    new_classes = {}
+    #for new methods that have been added to an existing class
+    new_methods = []
+
+    for idx, content in new_changes:
+        if re.search(r'\bclass\b', content):
+            new_classes[idx] = {"declaration": content, "methods": []}
+
+    for idx, content in new_changes:
+        if re.search(r'\bclass\b', content):
+            continue  #already handled
+
+        parent_class_idx = None
+        for pos, decl in class_positions:
+            if pos <= idx:
+                parent_class_idx = pos
+            else:
+                break
+
+        if parent_class_idx is not None and parent_class_idx in new_classes:
+            new_classes[parent_class_idx]["methods"].append(content)
+        else:
+            new_methods.append(content)
+
+    return {
+        "new_classes": list(new_classes.values()),
+        "new_methods": new_methods
+    }
+
+#https://stackoverflow.com/questions/8625991/use-python-os-walk-to-identify-a-list-of-files
+#get all java files in every subdirectory of results (after minimization is performed)
+def list_java_files(directory):
+    java_files = []
+    for root, _, files in os.walk(directory):
+        for file in files:
+            full_path = os.path.join(root, file)
+            relative_path = os.path.relpath(full_path, directory)
+            java_files.append(relative_path)
+
+    #list of relative path of every file
+    return java_files
+
+def read_file_lines(filepath):
+
+    with open(filepath, 'r', encoding='utf-8') as f:
+        return f.readlines()
+
+#https://docs.python.org/3/library/difflib.html
+#uses python's difflib to compare two lines at a time 
+def extract_new_lines(old_lines, new_lines):
+
+    diff = difflib.ndiff(old_lines, new_lines)
+    newly_added = []
+
+    for line in diff:
+        #new lines will start with + 
+        if line.startswith('+ '):
+            content = line[2:].strip()
+            #makes sure a new line passes regex check before appending
+            if not pattern.match(content):
+                newly_added.append(content)
+    return newly_added
+
+#returns a dictionary of new changes between subsequent JDK versions
+def compare_version_pair(old_version_dir, new_version_dir):
+    new_files = list_java_files(new_version_dir)
+    file_diff = {}
+
+    for rel_path in new_files:
+        #generates github URL 
+        branch = os.path.basename(new_version_dir)
+        url = f"https://github.com/eisop/jdk/tree/{branch}"
+        parts = rel_path.split(os.path.sep)
+        url = url + '/' + '/'.join(parts[1:])
+
+        new_file_path = os.path.join(new_version_dir, rel_path)
+        old_file_path = os.path.join(old_version_dir, rel_path)
+
+        if os.path.exists(old_file_path):
+            old_lines = read_file_lines(old_file_path)
+        else:
+            old_lines = []
+
+        new_lines = read_file_lines(new_file_path)
+
+        changes = extract_new_changes(old_lines, new_lines)
+        if changes["new_classes"] or changes["new_methods"]:
+            file_diff[url] = {
+                "new_changes": changes
+            }
+    return file_diff
+
+#this list must be modified for newly added jdk versions
+versions = ['master', 'jdk-18', 'jdk-19', 'jdk-20', 'jdk-21', 'jdk-22', 'jdk-23', 'jdk-24']
+
+base_path = './results/'
+
+json_folder = "../report-files/json_files"
+os.makedirs(json_folder, exist_ok=True)
+
+#compares all subsequent versions, creates a seperate json file for each version pair
+for i in range(1, len(versions)):
+
+    results = {}
+
+    old_version = versions[i - 1]
+    new_version = versions[i]
+
+    version_pair_key = new_version
+
+    old_dir = os.path.join(base_path, old_version)
+    new_dir = os.path.join(base_path, new_version)
+
+    current_diff = compare_version_pair(old_dir, new_dir)
+    results[version_pair_key] = current_diff
+
+    current_json = os.path.join(json_folder, f"{versions[i]}.json")
+
+    with open(current_json, 'w', encoding='utf-8') as out_f:
+        json.dump(results, out_f, indent=2)
+
+    print(f"Created json for {old_version} -> {new_version}")