ConferenceCode/V2_Sys_Eng.py at master · packers12345/ConferenceCode · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
import pypyodbc as odbc  # pip install pypyodbc
import openai
import spacy
import re
from typing import Dict, List, Any
import PyPDF2  # Import the PyPDF2 library
from io import BytesIO
import base64
import os
import requests  # Added for image downloading

# For Graphormer integration and visualization
import networkx as nx
from pyvis.network import Network
import json
from pathlib import Path
import graphviz  # Add this import at the top with other imports

# Load spaCy NLP model
nlp = spacy.load("en_core_web_sm")

def initialize_api(api_key: str) -> bool:
    """Initialize the OpenAI API using the provided API key."""
    if not api_key:
        print("No API key provided.")
        return False
    try:
        openai.api_key = api_key
        print(f"API initialized with key: {api_key}")
        return True
    except Exception as e:
        print(f"Error initializing API: {e}")
        return False

def connect_to_db():
    """Establish a connection to the MS SQL Server database using ODBC with environment variables."""
    try:
        # Get database connection parameters from environment variables
        db_server = os.environ.get("DB_SERVER", "X")
        db_name = os.environ.get("DB_NAME", "X")
        db_user = os.environ.get("DB_USER", "X")
        db_password = os.environ.get("DB_PASSWORD", "X")

        conn = odbc.connect(
            "Driver={ODBC Driver 18 for SQL Server};"
            f"Server={db_server};"
            f"Database={db_name};"
            f"Uid={db_user};"
            f"Pwd={db_password};"
            "TrustServerCertificate=yes;"
            "Connection Timeout=300;"
        )
        return conn
    except Exception as e:
        print(f"Database connection error: {e}")
        return None

def list_all_tables() -> List[str]:
    """Retrieve a list of all tables in the 'dbo' schema."""
    conn = connect_to_db()
    if not conn:
        return []
    try:
        cursor = conn.cursor()
        cursor.execute("SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = 'dbo';")
        tables = [row[0] for row in cursor.fetchall()]
        conn.close()
        if tables:
            print(f"Tables have been retrieved successfully: {tables}")
        else:
            print("No tables found in the 'dbo' schema.")
        return tables
    except Exception as e:
        print(f"Error fetching table list: {e}")
        return []

def fetch_table_structure() -> Dict[str, Dict[str, str]]:
    """Retrieve column details for all tables in the database."""
    conn = connect_to_db()
    if not conn:
        return {}
    table_structure = {}
    try:
        cursor = conn.cursor()
        tables = list_all_tables()
        for table in tables:
            cursor.execute(f"""
                SELECT COLUMN_NAME, DATA_TYPE
                FROM INFORMATION_SCHEMA.COLUMNS
                WHERE TABLE_NAME = '{table}';
            """)
            columns = cursor.fetchall()
            table_structure[table] = {col[0]: col[1] for col in columns}
        conn.close()
        return table_structure
    except Exception as e:
        print(f"Error fetching table structures: {e}")
        return {}

def fetch_specific_table(table_name: str, limit: int = 5) -> List[Any]:
    """
    Fetch up to `limit` rows from the given table_name.
    Returns a list of tuples (one tuple per row).
    """
    conn = connect_to_db()
    if not conn:
        return []
    try:
        cursor = conn.cursor()
        if not re.match(r'^\w+$', table_name):
            raise ValueError("Invalid table name format.")
        query = f"SELECT TOP {limit} * FROM {table_name};"
        cursor.execute(query)
        rows = cursor.fetchall()
        conn.close()
        return rows
    except Exception as e:
        print(f"Error fetching data from table '{table_name}': {e}")
        return []

def detect_table_name(user_text: str) -> str:
    """
    Use regex to detect a table name mentioned in user_text.
    Example: if user_text contains 'table system_requirements', returns 'system_requirements'.
    """
    pattern = re.compile(r'\btable\s+([a-zA-Z0-9_]+)', re.IGNORECASE)
    match = pattern.search(user_text)
    if match:
        return match.group(1)
    return ""

def enhance_user_requirements(user_text: str) -> str:
    """
    Process and enhance the free-form user input using NLP.
    Extracts key phrases and entities to form a more precise prompt.
    """
    doc = nlp(user_text)
    key_phrases = set(chunk.text.strip() for chunk in doc.noun_chunks)
    key_phrases.update(ent.text.strip() for ent in doc.ents)
    enhanced_text = user_text.strip()
    if key_phrases:
        enhanced_text += "\nKey concepts: " + ", ".join(key_phrases)
    if len(user_text.split()) < 20:
        enhanced_text += "\n[Note: The input is brief; more detail may yield a richer design.]"
    print("Enhanced User Requirements:")
    print(enhanced_text)
    return enhanced_text

def extract_text_from_pdf(pdf_file: BytesIO) -> str:
    """Extract text from the given PDF file."""
    text = ""
    try:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        for page in pdf_reader.pages:
            text += page.extract_text()
    except Exception as e:
        print(f"Error reading PDF: {e}")
    return text

def generate_system_designs(user_requirements: str, examples: Any = None, pdf_data: BytesIO = None) -> str:
    """Generate a concise system design document (500 words) incorporating provided data."""
    if not isinstance(examples, dict):
        print("Warning: 'examples' parameter is not a dictionary. Using default examples.")
        examples = {
            "example_reqs": "Example system requirements: [Default structured requirements].",
            "example_designs": "Example system designs: [Detailed design example]."
        }
    try:
        processed_requirements = enhance_user_requirements(user_requirements)
        table_structure = fetch_table_structure()
        referenced_table = detect_table_name(user_requirements)
        table_data_string = ""
        if referenced_table:
            rows = fetch_specific_table(referenced_table, limit=5)
            if rows:
                table_data_string = f"Sample rows from '{referenced_table}':\n"
                for i, row in enumerate(rows, start=1):
                    table_data_string += f"Row {i}: {row}\n"
            else:
                table_data_string = f"No data found for table '{referenced_table}'.\n"
        if pdf_data:
            pdf_text = extract_text_from_pdf(pdf_data)
            processed_requirements += f"\nPDF data: {pdf_text}"
        else:
            print("No PDF data provided; skipping PDF extraction.")
        query_text = f"""
User Requirements (enhanced):
{processed_requirements}

Reference Requirements:
{examples.get("example_reqs", "")}

Reference Designs:
{examples.get("example_designs", "")}

Database Structure:
{table_structure}

{table_data_string}

Generate a concise system design document (500 words) that includes:
1. A mathematical description of the system requirements (use LaTeX for any math equations, e.g. $$E=mc^2$$, and include tables as regular HTML).
2. Acceptable system designs with formal proofs (using key properties and homomorphism).
3. Unacceptable designs with proofs outlining discrepancies.
4. Recommendations for improvement.
5. A formal proof of homomorphism demonstrating equivalence between requirements and designs.

IMPORTANT:
- Do NOT use any generic or fallback examples unless specified.
- Clearly label each section with appropriate headings.
- Ensure any defined mathematical expressions are formatted in LaTeX.
- Keep the response self-contained and data-driven.
        """
        print("Final Query Text for System Designs:")
        print(query_text)
        response = openai.ChatCompletion.create(
            model="gpt-4.1-nano",
            messages=[{"role": "user", "content": query_text}],
            max_tokens=1500,
            temperature=0.7
        )
        return response.choices[0].message["content"].strip()
    except Exception as e:
        return f"Error in generating system designs: {str(e)}"

def create_verification_requirements_models(system_requirements: str, examples: Any = None, pdf_data: BytesIO = None) -> str:
    """Generate a concise verification requirements document (500 words) integrating provided data."""
    if not isinstance(examples, dict):
        print("Warning: 'examples' parameter is not a dictionary. Using default examples.")
        examples = {
            "example_system_reqs": "Example system requirements: [Structured requirements].",
            "example_verif_reqs": {"verification": {"details": [{"example": "verification structure"}]}},
            "example_designs": {"design": {"details": [{"example": "system design structure"}]}}
        }
    try:
        processed_requirements = enhance_user_requirements(system_requirements)
        if pdf_data:
            pdf_text = extract_text_from_pdf(pdf_data)
            processed_requirements += f"\nPDF data: {pdf_text}"
        else:
            print("No PDF data provided; skipping PDF extraction.")
        query_text = f"""
Enhanced System Requirements:
{processed_requirements}

Reference Requirements:
{examples.get("example_system_reqs", "")}

Reference Verification Examples:
{examples.get("example_verif_reqs", "")}

Reference Designs:
{examples.get("example_designs", "")}

Generate a concise verification requirements document (500 words) that includes:
1. Detailed verification problem spaces with proofs of morphism to the system requirements.
2. Verification models with proofs indicating adherence to these problem spaces.
3. A formal yes/no proof of homomorphism demonstrating equivalence between system designs and verification requirements.

IMPORTANT:
- Do NOT use any generic or fallback examples unless specified.
- Clearly label every section (for example, 'Verification Problem Spaces', 'Verification Models', etc.).
- Format any defined mathematical expressions correctly.
- Keep the response self-contained and data-driven.
        """
        print("Final Query Text for Verification Requirements:")
        print(query_text)
        response = openai.ChatCompletion.create(
            model="gpt-4.1-nano",
            messages=[{"role": "user", "content": query_text}],
            max_tokens=1500,
            temperature=0.7
        )
        return response.choices[0].message["content"].strip()
    except Exception as e:
        return f"Error in generating verification requirements and models: {str(e)}"

def get_traceability(system_requirements: str, example_system_requirements: str,
                      example_system_designs: Dict[str, Dict[str, List[Dict[str, str]]]]) -> str:
    """Generate traceability and proof based on the given system requirements and example system designs in 500 words."""
    try:
        query_text = f"""
Generate traceability and proof based on the given system requirements. The provided example system designs and their corresponding system requirements are for structure reference only. Do not use the example content directly.

Example System Requirements (for structure reference only):
{example_system_requirements}

Example System Designs (for structure reference only):
{example_system_designs}

System Requirements: {system_requirements}

Please provide your answer in clearly labeled sections. Include:
1. A traceability matrix formatted as a clean HTML table (with bold headers and no extraneous rows).
2. A short, spaced proof of traceability explanation that follows the table.

IMPORTANT:
- Do NOT use any generic or fallback examples unless specified.
- Format any defined mathematical expressions in LaTeX.
- Clearly label each section with headers (e.g., "Traceability Matrix", "Proof of Traceability").
- Ensure the table is neatly formatted, accounting for missing data.
        """
        response = openai.ChatCompletion.create(
            model="gpt-4.1-nano",
            messages=[{"role": "user", "content": query_text}],
            max_tokens=1500,
            temperature=0.7
        )
        return response.choices[0].message["content"].strip()
    except Exception as e:
        return f"Error in generating traceability: {str(e)}"

def get_verification_conditions(system_requirements: str, example_system_requirements: str,
                                example_verification_requirements: Dict[str, Dict[str, List[Dict[str, str]]]],
                                example_system_designs: Dict[str, Dict[str, List[Dict[str, str]]]]) -> str:
    """Generate verification conditions based on the given system requirements and example verification requirements in 500 words."""
    try:
        query_text = f"""
Generate verification conditions based on the given system requirements. The provided example system requirements, verification requirements, and system designs are for structure reference only. Do not use the example content directly.

Example System Requirements (for structure reference only):
{example_system_requirements}

Example Verification Requirements (for structure reference only):
{example_verification_requirements}

Example System Designs (for structure reference only):
{example_system_designs}

System Requirements: {system_requirements}

Please provide your answer in clearly labeled sections. Include:
1. A description of the type of homomorphism (e.g., Homomorphism, Isomorphism, Identity isomorphism, Parameter morphism) along with a clear explanation.
2. A discussion of the verification requirement problem space with clear definitions.
3. A proof of the type of homomorphism and the verification requirement problem space.

IMPORTANT:
- Do NOT use any generic or fallback examples unless specified.
- Format any defined mathematical expressions in LaTeX if needed.
- Clearly label each section with headers.
- Keep the response self-contained and data-driven.
        """
        response = openai.ChatCompletion.create(
            model="gpt-4.1-nano",
            messages=[{"role": "user", "content": query_text}],
            max_tokens=1500,
            temperature=0.7
        )
        return response.choices[0].message["content"].strip()
    except Exception as e:
        return f"Error in generating verification conditions: {str(e)}"

import torch
from diffusers import StableDiffusion3Pipeline

import itertools

def extract_important_phrases(text: str) -> list:
    """
    Extracts important phrases from the requirements text:
    - Phrases containing numbers, units, or mathematical/engineering keywords.
    - Named entities of type QUANTITY, CARDINAL, PERCENT, TIME, DATE, or ORDINAL.
    - Phrases mentioning models, equations, constraints, state machines, etc.
    """
    doc = nlp(text)
    keywords = [
        "constraint", "model", "equation", "state machine", "differential equation",
        "threshold", "limit", "performance", "acceleration", "speed", "force", "balance",
        "representation", "convert", "compare", "problem space"
    ]
    # Lowercase keywords for matching
    keywords = [k.lower() for k in keywords]

    # 1. Extract entities with numbers/quantities
    important_phrases = set()
    for ent in doc.ents:
        if ent.label_ in {"QUANTITY", "CARDINAL", "PERCENT", "TIME", "DATE", "ORDINAL"}:
            important_phrases.add(ent.text.strip())
        # Also add entities with numbers/units
        if re.search(r"\d", ent.text) or re.search(r"\b(sec|second|mph|km/h|ms|g|kg|Hz|%)\b", ent.text, re.I):
            important_phrases.add(ent.text.strip())

    # 2. Extract noun chunks or sentences with keywords or numbers
    for chunk in doc.noun_chunks:
        chunk_text = chunk.text.strip()
        if any(k in chunk_text.lower() for k in keywords):
            important_phrases.add(chunk_text)
        elif re.search(r"\d", chunk_text):
            important_phrases.add(chunk_text)
        elif re.search(r"\b(sec|second|mph|km/h|ms|g|kg|Hz|%)\b", chunk_text, re.I):
            important_phrases.add(chunk_text)

    # 3. Extract sentences with keywords or numbers
    for sent in doc.sents:
        sent_text = sent.text.strip()
        if any(k in sent_text.lower() for k in keywords):
            important_phrases.add(sent_text)
        elif re.search(r"\d", sent_text):
            important_phrases.add(sent_text)
        elif re.search(r"\b(sec|second|mph|km/h|ms|g|kg|Hz|%)\b", sent_text, re.I):
            important_phrases.add(sent_text)

    # 4. Remove trivial/short/generic phrases
    filtered = set()
    for phrase in important_phrases:
        if len(phrase) < 4:
            continue
        if phrase.lower() in {"i", "am", "a", "the", "it", "these", "this", "that"}:
            continue
        filtered.add(phrase)

    # 5. Sort by appearance in text
    def phrase_index(phrase):
        try:
            return text.index(phrase)
        except ValueError:
            return 1e9
    sorted_phrases = sorted(filtered, key=phrase_index)
    return sorted_phrases

def generate_network_visualization(graph_data, pdf_data=None):
    """
    Generates a SysML-inspired diagram using Graphviz.
    The user requirements are parsed for key concepts, which are visualized as nodes.
    Returns a raw SVG string.
    """
    # Extract user requirements text
    user_requirements = ""
    if "user_requirements" in graph_data and graph_data["user_requirements"]:
        user_requirements = graph_data["user_requirements"]
    elif pdf_data:
        user_requirements = extract_text_from_pdf(pdf_data)[:500]
    else:
        user_requirements = "No requirements provided."

    # Use improved phrase extraction
    key_phrases = extract_important_phrases(user_requirements)
    if not key_phrases:
        key_phrases = ["No key requirements found."]

    # Create a Graphviz Digraph
    dot = graphviz.Digraph(comment="SysML-inspired System Requirement Diagram", format="svg")
    dot.attr(rankdir="LR", size="8,5")
    dot.node("REQ", "System Requirement", shape="box", style="filled", fillcolor="#b3c6ff")

    # Add key phrase nodes and connect to central requirement
    for idx, phrase in enumerate(key_phrases):
        node_id = f"KP{idx}"
        dot.node(node_id, phrase, shape="ellipse", style="filled", fillcolor="#e6ffe6")
        dot.edge("REQ", node_id)

    # Optionally, add PDF context as a note node
    if pdf_data:
        pdf_text = extract_text_from_pdf(pdf_data)
        if pdf_text:
            dot.node("PDF", "PDF Context", shape="note", style="filled", fillcolor="#fff2cc")
            dot.edge("REQ", "PDF", style="dashed")

    try:
        svg_bytes = dot.pipe(format="svg")
        svg_str = svg_bytes.decode("utf-8")
        return svg_str
    except Exception as e:
        print(f"Error in Graphviz visualization: {e}")
        return '<svg xmlns="http://www.w3.org/2000/svg" width="400" height="60"><text x="10" y="35" fill="red">Graphviz visualization unavailable (backend error)</text></svg>'

if __name__ == "__main__":
    test_key = "XX"
    if not initialize_api(test_key):
        print("Failed to initialize API. Exiting.")
    else:
        tables = list_all_tables()
        print(f"Tables returned: {tables}")
        structure = fetch_table_structure()
        print("Table Structure:")
        print(structure)

        user_input = (
            "I need a system design for a smart home energy management system that handles sensor data, "
            "optimizes energy usage, and allows remote control. Please consider the information provided in the attached document."
        )
        # For demonstration, add the user requirements into the graph_data so the graph node can display it.
        graph_data = {
            "user_requirements": user_input
        }

        examples_design = {
            "example_reqs": "Example system requirements: [Structured requirements similar to those from the dissertation].",
            "example_designs": "Example system designs: [Detailed design example]."
        }
        try:
            with open("C:\\Users\\X\\X\\Wach_PF_D_2023_main.pdf", "rb") as pdf_file:
                pdf_data = BytesIO(pdf_file.read())
        except FileNotFoundError:
            pdf_data = None
            print("PDF file not found! Example will run without PDF data.")

        design_output = generate_system_designs(user_input, examples_design, pdf_data)
        print("\nGenerated System Design Document:")
        print(design_output)

        examples_verif = {
            "example_system_reqs": "Example system requirements: [Structured requirements based on the dissertation].",
            "example_verif_reqs": {"verification": {"details": [{"example": "verification requirement structure"}]}},
            "example_designs": {"design": {"details": [{"example": "system design structure"}]}}
        }
        verification_output = create_verification_requirements_models(user_input, examples_verif, pdf_data)
        print("\nGenerated Verification Requirements and Models:")
        print(verification_output)

        example_system_requirements = "Example system requirements: [Structured requirements for traceability]."
        example_system_designs = {"design": {"details": [{"example": "system design structure for traceability"}]}}
        traceability_output = get_traceability(user_input, example_system_requirements, example_system_designs)
        print("\nGenerated Traceability and Proof:")
        print(traceability_output)

        example_verification_requirements = {
            "verification": {"details": [{"example": "verification requirement structure for conditions"}]}
        }
        verification_conditions_output = get_verification_conditions(
            user_input,
            example_system_requirements,
            example_verification_requirements,
            example_system_designs
        )
        print("\nGenerated Verification Conditions:")
        print(verification_conditions_output)

        # Generate Graphviz-based visualization using the updated graph_data.
        svg_output = generate_network_visualization(graph_data, pdf_data)
        print("\nGenerated Graphviz-based Visualization (SVG):")
        print(svg_output)