diff --git a/modmesh/pilot/_gui.py b/modmesh/pilot/_gui.py index 74a24a9cf..56439fd81 100644 --- a/modmesh/pilot/_gui.py +++ b/modmesh/pilot/_gui.py @@ -37,6 +37,7 @@ from . import _pilot_core as _pcore from . import airfoil +from . import vision if _pcore.enable: from PySide6.QtGui import QAction @@ -80,6 +81,7 @@ def __init__(self): self.burgers = None self.openprofiledata = None self.runprofiling = None + self.vision = None def __getattr__(self, name): return None if self._rmgr is None else getattr(self._rmgr, name) @@ -99,6 +101,7 @@ def launch(self, name="pilot", size=(1000, 600)): self.linear_wave = _linear_wave.LinearWave1DApp(mgr=self._rmgr) self.openprofiledata = _profiling.Profiling(mgr=self._rmgr) self.runprofiling = _profiling.RunProfiling(mgr=self._rmgr) + self.vision = vision.VisionGui(mgr=self._rmgr) self.populate_menu() self._rmgr.show() return self._rmgr.exec() @@ -131,6 +134,7 @@ def _addAction(menu, text, tip, func, checkable=False, checked=False): self.linear_wave.populate_menu() self.openprofiledata.populate_menu() self.runprofiling.populate_menu() + self.vision.populate_menu() if sys.platform != 'darwin': _addAction( @@ -149,7 +153,6 @@ def _addAction(menu, text, tip, func, checkable=False, checked=False): checked=True, ) - controller = _Controller() # vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4: diff --git a/modmesh/pilot/vision/__init__.py b/modmesh/pilot/vision/__init__.py new file mode 100644 index 000000000..e02f33dd4 --- /dev/null +++ b/modmesh/pilot/vision/__init__.py @@ -0,0 +1,37 @@ +# Copyright (c) 2025, Li-Hung Wang +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# - Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + + +""" +Vision features modules +""" + +from ._vision_gui import VisionGui + +__all__ = [ + 'VisionGui', +] + diff --git a/modmesh/pilot/vision/_vision_gui.py b/modmesh/pilot/vision/_vision_gui.py new file mode 100644 index 000000000..1fe1f3ee9 --- /dev/null +++ b/modmesh/pilot/vision/_vision_gui.py @@ -0,0 +1,200 @@ +# Copyright (c) 2025, Li-Hung Wang +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# - Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +""" +GUI for Vision features +""" + +import numpy as np + +from PySide6.QtCore import Qt +from PySide6.QtGui import QIcon, QImage, QPixmap +from PySide6.QtWidgets import QDockWidget, QVBoxLayout, QHBoxLayout, QPushButton, QLabel, QFileDialog, QWidget +from PySide6.QtGui import QPainter, QPen, QFont + +from .._gui_common import PilotFeature +from ._vision_yolo import _yolo_detector + +class VisionGui(PilotFeature): + + def __init__(self, *args, **kw): + super().__init__(*args, **kw) + # Initialize Vision GUI components here + self.widget = QDockWidget("Vision", self._mainWindow) + self.widget.resize(400, 300) + + # Create central widget for the dock widget + self.central_widget = QWidget() + self.widget.setWidget(self.central_widget) + + self.layout = QVBoxLayout() + self.central_widget.setLayout(self.layout) + + self._status_layout = QHBoxLayout() + self._status_layout.setSpacing(10) # Set spacing between items + self._status_layout.setContentsMargins(0, 0, 0, 0) # Remove margins + self._status_layout.setAlignment(Qt.AlignmentFlag.AlignLeft) # Align items to left + self.layout.addLayout(self._status_layout) + + self.status_light_icon = QLabel() + red_icon = QIcon.fromTheme("media-record") + self.status_light_icon.setPixmap(red_icon.pixmap(16, 16)) + + self._status_layout.addWidget(self.status_light_icon) + self.status_label = QLabel("Not Activated") + self._status_layout.addWidget(self.status_label) + + self.image_instance = QImage() + self.image_label = QLabel() + self.layout.addWidget(self.image_label, 1) # Add stretch factor of 1 + + self.load_image_button = QPushButton("Load Image") + self.load_image_button.clicked.connect(self.click_load_image) + self.layout.addWidget(self.load_image_button) + + self.is_vision_active = False + self.vision_button = QPushButton("Activate Vision") + self.vision_button.clicked.connect(self.toggle_activation) + self.layout.addWidget(self.vision_button) + + self._mainWindow.addDockWidget(Qt.DockWidgetArea.BottomDockWidgetArea, self.widget) + + + def populate_menu(self): + # Code to populate the menu for Vision GUI + self._add_menu_item( + menu=self._mgr.windowMenu, + text="Computer Vision", + tip="Open / Close Computer Vision Window", + func=self.toggle_visibility, + ) + + def click_load_image(self): + # Code to handle image loading + file_name, _ = QFileDialog.getOpenFileName( + self.widget, + "Open Image", + "", + "Image Files (*.png *.jpg *.bmp);;All Files (*)" + ) + + if file_name: + self.image_instance.load(file_name) + scaled_image = self.image_instance.scaled( + self.image_label.size(), + Qt.AspectRatioMode.KeepAspectRatio, + Qt.TransformationMode.SmoothTransformation + ) + self.image_label.setPixmap(QPixmap.fromImage(scaled_image)) + + if not self.is_vision_active: + return + + image_array = self.qimage_to_numpy(self.image_instance) + detections = _yolo_detector.detect(image_array) + + self.draw_bboxes(image_array, detections) + + def draw_bboxes(self, image, detections): + # Code to draw bounding boxes on the image based on detections + + for det in detections: + x1, y1, w, h = det['bbox'] + label = det['label'] + score = det['score'] + + # Convert numpy array to QImage for drawing + height, width, channel = image.shape + bytes_per_line = 3 * width + q_image = QImage(image.data, width, height, bytes_per_line, QImage.Format.Format_RGB888) + + # Create QPainter to draw on the image + + painter = QPainter(q_image) + + # Set pen for bounding box + pen = QPen(Qt.GlobalColor.red) + pen.setWidth(3) + painter.setPen(pen) + + # Draw rectangle (bounding box) + painter.drawRect(x1, y1, w, h) + + # Set font and draw label text + font = QFont() + font.setPointSize(20) + painter.setFont(font) + + text = f"{label}: {score:.2f}" + painter.drawText(x1, y1 - 5, text) + + painter.end() + + # Update the displayed image + scaled_image = q_image.scaled( + self.image_label.size(), + Qt.AspectRatioMode.KeepAspectRatio, + Qt.TransformationMode.SmoothTransformation + ) + self.image_label.setPixmap(QPixmap.fromImage(scaled_image)) + + def qimage_to_numpy(self, qimage): + # Convert QImage to numpy array + qimage = qimage.convertToFormat(QImage.Format.Format_RGB888) + width = qimage.width() + height = qimage.height() + + ptr = qimage.bits() + arr = np.array(ptr).reshape((height, width, 3)) + return arr + + def toggle_activation(self): + # Code to toggle activation of Vision features + if self.is_vision_active: + self.is_vision_active = False + _yolo_detector.deactivate() + + self.vision_button.setText("Activate Vision") + self.status_label.setText("Vision Module Deactivated") + + red_icon = QIcon.fromTheme("media-record") + self.status_light_icon.setPixmap(red_icon.pixmap(16, 16)) + else: + self.is_vision_active = True + _yolo_detector.activate() + + self.vision_button.setText("Deactivate Vision") + self.status_label.setText("Vision Module Activated") + + green_icon = QIcon.fromTheme("media-playback-start") + self.status_light_icon.setPixmap(green_icon.pixmap(16, 16)) + + def toggle_visibility(self): + # Code to toggle visibility of Vision GUI + if self.widget.isVisible(): + self.widget.hide() + else: + self.widget.show() \ No newline at end of file diff --git a/modmesh/pilot/vision/_vision_yolo.py b/modmesh/pilot/vision/_vision_yolo.py new file mode 100644 index 000000000..33efe0db2 --- /dev/null +++ b/modmesh/pilot/vision/_vision_yolo.py @@ -0,0 +1,170 @@ +# Copyright (c) 2025, Li-Hung Wang +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# - Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +""" +Vision feature: YOLO object detection integration +""" + +from ultralytics import YOLO +from ultralytics.utils import set_logging +import io +import logging + +__all__ = [ + "_yolo_detector", +] + + +class ConsoleHandler(logging.Handler): + """ + Custom logging handler for sending logs to python console widget + """ + + def __init__(self, console_widget): + super().__init__() + self.console_widget = console_widget + + def emit(self, record): + """ + Send log record to console widget + """ + try: + msg = self.format(record) + if self.console_widget: + self.console_widget.writeToHistory(msg + "\n") + except Exception: + self.handleError(record) + + +def set_up_logger(): + global logger + if "logger" not in globals(): + set_logging(name="ultralytics", verbose=True) + logger = logging.getLogger("ultralytics") + + try: + from .. import mgr + + console_widget = mgr.pycon + + if console_widget is None: + raise RuntimeError("Python console widget is not available") + + # Create custom handler + console_handler = ConsoleHandler(console_widget) + console_handler.setLevel(logging.DEBUG) + + # Set formatter + formatter = logging.Formatter("[%(name)s] %(levelname)s: %(message)s") + console_handler.setFormatter(formatter) + + # Clear existing handlers and add the custom handler + logger.handlers.clear() + logger.addHandler(console_handler) + logger.setLevel(logging.DEBUG) + logger.propagate = False # Avoid duplicate output + + # Test the console widget by writing a message + if console_widget: + console_widget.writeToHistory( + "[Vision] Logger initialized successfully\n" + ) + + except Exception as e: + # If unable to get console widget, fall back to original method + # Print the error for debugging + print(f"[Vision] Failed to set up console widget logger: {e}") + + log_stream = io.StringIO() + handler = logging.StreamHandler(log_stream) + logger.addHandler(handler) + logger.setLevel(logging.DEBUG) + + # Also add a console handler for fallback + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.DEBUG) + formatter = logging.Formatter("[%(name)s] %(levelname)s: %(message)s") + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + +class YoloDetector: + """ + YOLO Detector class to manage model loading and detection + """ + + def __init__(self): + self.model = None + self.logger = None + self.is_active = False + + def activate(self, model_path="./thirdparty/yolo11n.pt"): + if self.model is None: + self.model = YOLO(model_path) + set_up_logger() + self.logger = logging.getLogger("ultralytics") + self.is_active = True + + def deactivate(self): + self.is_active = False + + def detect(self, np_img): + """ + Perform detection on the input image + + Args: + np_img (numpy.ndarray): Input image as a numpy array. + + Returns: + list of dict: Each dict contains 'bbox', 'label', and 'score'. + """ + if self.model is None: + raise RuntimeError( + "YOLO model is not loaded. Please activate the detector first." + ) + + results = self.model(np_img) + boxes = [] + for box in results[0].boxes: + x1, y1, x2, y2 = box.xyxy[0].tolist() + label_idx = int(box.cls[0]) + label = results[0].names[label_idx] + score = float(box.conf[0]) + + box_obj = { + "bbox": [int(x1), int(y1), int(x2 - x1), int(y2 - y1)], + "label": label, + "score": score, + } + self.logger.debug( + f"Detected {label} with confidence {score:.2f} at [{x1}, {y1}, {x2}, {y2}]" + ) + boxes.append(box_obj) + + return boxes + + +_yolo_detector = YoloDetector() diff --git a/tests/data/jpg/COPYING b/tests/data/jpg/COPYING new file mode 100644 index 000000000..7ab100b73 --- /dev/null +++ b/tests/data/jpg/COPYING @@ -0,0 +1,12 @@ +The JPG file (cat.jpg) is licensed under +the Unsplash License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://unsplash.com/license + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/tests/data/jpg/README.rst b/tests/data/jpg/README.rst new file mode 100644 index 000000000..98faf6041 --- /dev/null +++ b/tests/data/jpg/README.rst @@ -0,0 +1,15 @@ +============== +JPG Test Files +============== + +This directory contains JPG files used for testing purposes in modmesh. + +Test Files +========== + +- cat.jpg (original source: https://unsplash.com/photos/orange-and-white-cat-on-yellow-surface-sR0cTmQHPug?utm_source=unsplash&utm_medium=referral&utm_content=creditShareLink) + +License +======= + +See the COPYING file in the root directory for license information. \ No newline at end of file diff --git a/tests/data/jpg/cat.jpg b/tests/data/jpg/cat.jpg new file mode 100644 index 000000000..70b8ee4f8 Binary files /dev/null and b/tests/data/jpg/cat.jpg differ diff --git a/tests/test_pilot_vision.py b/tests/test_pilot_vision.py new file mode 100644 index 000000000..135d2eefe --- /dev/null +++ b/tests/test_pilot_vision.py @@ -0,0 +1,89 @@ +# Copyright (c) 2025, Li-Hung Wang +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# - Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + + +import io +import os +import logging +import unittest + +import numpy as np +import requests +from PIL import Image +from ultralytics import YOLO + +import modmesh +from modmesh.pilot.vision._vision_yolo import _yolo_detector + +try: + from modmesh import pilot +except ImportError: + pilot = None + + +@unittest.skipUnless(modmesh.HAS_PILOT, "Qt pilot is not built") +class VisionTC(unittest.TestCase): + + def test_yolo_detector(self): + self.assertIsNotNone(_yolo_detector) + + self.assertFalse(_yolo_detector.model) + self.assertFalse(_yolo_detector.logger) + self.assertFalse(_yolo_detector.is_active) + + _yolo_detector.activate() + + self.assertTrue(_yolo_detector.is_active) + self.assertIsInstance(_yolo_detector.logger, logging.Logger) + self.assertIsInstance(_yolo_detector.model, YOLO) + + _yolo_detector.deactivate() + + def test_yolo_inference(self): + + TESTDIR = os.path.abspath(os.path.dirname(__file__)) + DATADIR = os.path.join(TESTDIR, "data/jpg") + + _yolo_detector.activate() + + # Convert to numpy array + image = Image.open(os.path.join(DATADIR, "cat.jpg")).convert("RGB") + image_array = np.array(image) + + results = _yolo_detector.detect(image_array) + + self.assertIsNotNone(results) + self.assertGreater(len(results), 0) + + first_result = results[0] + self.assertTrue("bbox" in first_result) + self.assertTrue("label" in first_result) + self.assertTrue("score" in first_result) + + _yolo_detector.deactivate() + + +# vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4: