diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-1.png b/src/.vuepress/public/img/timeseries-featured-analysis-1.png
new file mode 100644
index 000000000..78b04889b
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-1.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-10.png b/src/.vuepress/public/img/timeseries-featured-analysis-10.png
new file mode 100644
index 000000000..93c88b998
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-10.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-11.png b/src/.vuepress/public/img/timeseries-featured-analysis-11.png
new file mode 100644
index 000000000..95bfd761f
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-11.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-12.png b/src/.vuepress/public/img/timeseries-featured-analysis-12.png
new file mode 100644
index 000000000..913404eb2
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-12.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-13.png b/src/.vuepress/public/img/timeseries-featured-analysis-13.png
new file mode 100644
index 000000000..7581feeb0
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-13.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-14.png b/src/.vuepress/public/img/timeseries-featured-analysis-14.png
new file mode 100644
index 000000000..ba38b56e0
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-14.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-15.png b/src/.vuepress/public/img/timeseries-featured-analysis-15.png
new file mode 100644
index 000000000..e47168fe9
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-15.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-16.png b/src/.vuepress/public/img/timeseries-featured-analysis-16.png
new file mode 100644
index 000000000..7a7adce34
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-16.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-17.png b/src/.vuepress/public/img/timeseries-featured-analysis-17.png
new file mode 100644
index 000000000..40e4cbed4
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-17.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-18.png b/src/.vuepress/public/img/timeseries-featured-analysis-18.png
new file mode 100644
index 000000000..ca7138f22
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-18.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-19.png b/src/.vuepress/public/img/timeseries-featured-analysis-19.png
new file mode 100644
index 000000000..bfbab7e0a
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-19.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-2.png b/src/.vuepress/public/img/timeseries-featured-analysis-2.png
new file mode 100644
index 000000000..34f9a985c
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-2.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-20.png b/src/.vuepress/public/img/timeseries-featured-analysis-20.png
new file mode 100644
index 000000000..b883062d1
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-20.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-21.png b/src/.vuepress/public/img/timeseries-featured-analysis-21.png
new file mode 100644
index 000000000..98ffa2ed3
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-21.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-22-en.png b/src/.vuepress/public/img/timeseries-featured-analysis-22-en.png
new file mode 100644
index 000000000..c71afbcf2
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-22-en.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-22.png b/src/.vuepress/public/img/timeseries-featured-analysis-22.png
new file mode 100644
index 000000000..e74c1aff3
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-22.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-23.png b/src/.vuepress/public/img/timeseries-featured-analysis-23.png
new file mode 100644
index 000000000..39fedbb37
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-23.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-3.png b/src/.vuepress/public/img/timeseries-featured-analysis-3.png
new file mode 100644
index 000000000..083f348cf
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-3.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-4-en.png b/src/.vuepress/public/img/timeseries-featured-analysis-4-en.png
new file mode 100644
index 000000000..5d24ce241
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-4-en.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-4.png b/src/.vuepress/public/img/timeseries-featured-analysis-4.png
new file mode 100644
index 000000000..0e510f73f
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-4.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-5-en.png b/src/.vuepress/public/img/timeseries-featured-analysis-5-en.png
new file mode 100644
index 000000000..d62c0164b
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-5-en.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-5.png b/src/.vuepress/public/img/timeseries-featured-analysis-5.png
new file mode 100644
index 000000000..b2f62fddd
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-5.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-6-en.png b/src/.vuepress/public/img/timeseries-featured-analysis-6-en.png
new file mode 100644
index 000000000..81ffed319
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-6-en.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-6.png b/src/.vuepress/public/img/timeseries-featured-analysis-6.png
new file mode 100644
index 000000000..aa7d86662
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-6.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-7-en.png b/src/.vuepress/public/img/timeseries-featured-analysis-7-en.png
new file mode 100644
index 000000000..277405901
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-7-en.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-7.png b/src/.vuepress/public/img/timeseries-featured-analysis-7.png
new file mode 100644
index 000000000..aee00dc48
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-7.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-8-en.png b/src/.vuepress/public/img/timeseries-featured-analysis-8-en.png
new file mode 100644
index 000000000..3676e933e
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-8-en.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-8.png b/src/.vuepress/public/img/timeseries-featured-analysis-8.png
new file mode 100644
index 000000000..ce2db75f1
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-8.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-9-en.png b/src/.vuepress/public/img/timeseries-featured-analysis-9-en.png
new file mode 100644
index 000000000..1ef47271f
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-9-en.png differ
diff --git a/src/.vuepress/public/img/timeseries-featured-analysis-9.png b/src/.vuepress/public/img/timeseries-featured-analysis-9.png
new file mode 100644
index 000000000..924845cd3
Binary files /dev/null and b/src/.vuepress/public/img/timeseries-featured-analysis-9.png differ
diff --git a/src/.vuepress/sidebar_timecho/V2.0.x/en-Table.ts b/src/.vuepress/sidebar_timecho/V2.0.x/en-Table.ts
index d651f24d8..b293a4bcd 100644
--- a/src/.vuepress/sidebar_timecho/V2.0.x/en-Table.ts
+++ b/src/.vuepress/sidebar_timecho/V2.0.x/en-Table.ts
@@ -133,6 +133,7 @@ export const enSidebar = {
},
{ text: 'Tiered Storage', link: 'Tiered-Storage_timecho' },
{ text: 'Tree-to-Table Mapping', link: 'Tree-to-Table' },
+ { text: 'Timeseries Featured Analysis', link: 'Timeseries-Featured-Analysis_timecho' },
{
text: 'System Maintenance',
collapsible: true,
@@ -246,7 +247,7 @@ export const enSidebar = {
{ text: 'ORDER BY Clause', link: 'OrderBy-Clause' },
{ text: 'LIMIT&OFFSET Clause', link: 'Limit-Offset-Clause' },
{ text: 'Nested Queries', link: 'Nested-Queries' },
- { text: 'Row Pattern Recognition', link: 'Row-Pattern-Recognition' },
+ { text: 'Pattern Query', link: 'Row-Pattern-Recognition_timecho' },
],
},
{ text: 'Maintenance Statements', link: 'SQL-Maintenance-Statements' },
diff --git a/src/.vuepress/sidebar_timecho/V2.0.x/zh-Table.ts b/src/.vuepress/sidebar_timecho/V2.0.x/zh-Table.ts
index 58806cccd..8e758a1ec 100644
--- a/src/.vuepress/sidebar_timecho/V2.0.x/zh-Table.ts
+++ b/src/.vuepress/sidebar_timecho/V2.0.x/zh-Table.ts
@@ -122,6 +122,7 @@ export const zhSidebar = {
},
{ text: '多级存储', link: 'Tiered-Storage_timecho' },
{ text: '树转表视图', link: 'Tree-to-Table' },
+ { text: '时序特色分析', link: 'Timeseries-Featured-Analysis_timecho' },
{
text: '系统运维',
collapsible: true,
@@ -235,7 +236,7 @@ export const zhSidebar = {
{ text: 'ORDER BY子句', link: 'OrderBy-Clause' },
{ text: 'LIMIT&OFFSET子句', link: 'Limit-Offset-Clause' },
{ text: '嵌套查询', link: 'Nested-Queries' },
- { text: '行模式识别', link: 'Row-Pattern-Recognition' },
+ { text: '模式查询', link: 'Row-Pattern-Recognition_timecho' },
],
},
{ text: '运维语句', link: 'SQL-Maintenance-Statements' },
diff --git a/src/UserGuide/Master/Table/Basic-Concept/Query-Data_timecho.md b/src/UserGuide/Master/Table/Basic-Concept/Query-Data_timecho.md
index 659cbfd73..0a0fdb1f3 100644
--- a/src/UserGuide/Master/Table/Basic-Concept/Query-Data_timecho.md
+++ b/src/UserGuide/Master/Table/Basic-Concept/Query-Data_timecho.md
@@ -40,11 +40,9 @@ The IoTDB table model query syntax supports the following clauses:
- **SELECT Clause**: Specifies the columns to be included in the result. Details: [SELECT Clause](../SQL-Manual/Select-Clause.md)
- **FROM Clause**: Indicates the data source for the query, which can be a single table, multiple tables joined using the `JOIN` clause, or a subquery. Details: [FROM & JOIN Clause](../SQL-Manual/From-Join-Clause.md)
-- **patternRecognition**: Row Pattern Recognition, which supports capturing a segment of continuous data by defining recognition logic for pattern variables and regular expressions, and performs analysis and calculation on each captured data segment. Details:[Row Pattern Recognition](../SQL-Manual/Row-Pattern-Recognition.md)
- **WHERE Clause**: Filters rows based on specific conditions. Logically executed immediately after the `FROM` clause. Details: [WHERE Clause](../SQL-Manual/Where-Clause.md)
- **GROUP BY Clause**: Used for aggregating data, specifying the columns for grouping. Details: [GROUP BY Clause](../SQL-Manual/GroupBy-Clause.md)
- **HAVING Clause**: Applied after the `GROUP BY` clause to filter grouped data, similar to `WHERE` but operates after grouping. Details:[HAVING Clause](../SQL-Manual/Having-Clause.md)
-- **WINDOW FUNCTION**: Window Function, a special function that performs calculations on each row based on a specific set of rows related to the current row (called a "window"). It combines grouping operations, sorting, and definable calculation ranges to implement complex cross-row calculations without collapsing the original data rows. Details: [Window Function](../SQL-Manual/Featured-Functions_timecho.md#_4-Window-Function)
- **FILL Clause**: Handles missing values in query results by specifying fill methods (e.g., previous non-null value or linear interpolation) for better visualization and analysis. Details:[FILL Clause](../SQL-Manual/Fill-Clause.md)
- **ORDER BY Clause**: Sorts query results in ascending (`ASC`) or descending (`DESC`) order, with optional handling for null values (`NULLS FIRST` or `NULLS LAST`). Details: [ORDER BY Clause](../SQL-Manual/OrderBy-Clause.md)
- **OFFSET Clause**: Specifies the starting position for the query result, skipping the first `OFFSET` rows. Often used with the `LIMIT` clause. Details: [LIMIT and OFFSET Clause](../SQL-Manual/Limit-Offset-Clause.md)
@@ -590,77 +588,3 @@ IoTDB> SELECT time, temperature, humidity
Total line number = 10
It costs 0.093s
```
-
-### 3.9 Row Pattern Recognition
-
-**Example**: Segment data in table1 by time intervals of 24 hours or less, and query the total number of data entries in each segment, as well as the start and end times.
-
-```SQL
-SELECT start_time, end_time, cnt
-FROM table1
-MATCH_RECOGNIZE (
- ORDER BY time
- MEASURES
- RPR_FIRST(A.time) AS start_time,
- RPR_LAST(time) AS end_time,
- COUNT() AS cnt
- PATTERN (A B*)
- DEFINE B AS (cast(B.time as INT64) - cast(PREV(B.time) as INT64)) <= 86400000
-) AS m
-```
-
-**Result**:
-
-```SQL
-+-----------------------------+-----------------------------+---+
-| start_time| end_time|cnt|
-+-----------------------------+-----------------------------+---+
-|2024-11-26T13:37:00.000+08:00|2024-11-26T13:38:00.000+08:00| 2|
-|2024-11-27T16:38:00.000+08:00|2024-11-30T14:30:00.000+08:00| 16|
-+-----------------------------+-----------------------------+---+
-Total line number = 2
-```
-
-### 3.10 Window Functions
-
-**Example**: Query the cumulative power consumption values of different devices.
-
-The original data is as follows:
-
-```SQL
-+-----------------------------+------+-----+
-| time|device| flow|
-+-----------------------------+------+-----+
-|1970-01-01T08:00:00.000+08:00| d0| 3|
-|1970-01-01T08:00:00.001+08:00| d0| 5|
-|1970-01-01T08:00:00.002+08:00| d0| 3|
-|1970-01-01T08:00:00.003+08:00| d0| 1|
-|1970-01-01T08:00:00.004+08:00| d1| 2|
-|1970-01-01T08:00:00.005+08:00| d1| 4|
-+-----------------------------+------+-----+
-```
-
-**Query Statement**:
-
-```SQL
-IoTDB> SELECT *, sum(flow) OVER(PARTITION BY device ORDER BY flow) as sum FROM device_flow;
-```
-
-After grouping, sorting, and calculation (steps are disassembled as shown in the figure below),
-
-
-
-**Result**:
-
-```SQL
-+-----------------------------+------+----+----+
-| time|device|flow| sum|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
-+-----------------------------+------+----+----+
-```
diff --git a/src/UserGuide/Master/Table/QuickStart/QuickStart_timecho.md b/src/UserGuide/Master/Table/QuickStart/QuickStart_timecho.md
index a91a86dcb..a0b0942a3 100644
--- a/src/UserGuide/Master/Table/QuickStart/QuickStart_timecho.md
+++ b/src/UserGuide/Master/Table/QuickStart/QuickStart_timecho.md
@@ -62,7 +62,7 @@ This guide will assist you in quickly installing and deploying IoTDB. You can qu
2. Data Insertion & Updates: IoTDB provides multiple methods for inserting real-time data. For basic data insertion and updating operations, please see [Write&Updata Data](../Basic-Concept/Write-Updata-Data.md)
-3. Data Querying: IoTDB offers a rich set of data querying capabilities. For a basic introduction to data querying, please see [Query Data](../Basic-Concept/Query-Data_timecho.md). It includes [Row Pattern Recognition](../SQL-Manual/Row-Pattern-Recognition.md) suitable for business scenarios such as identifying specific patterns in time-series data and detecting specific events, as well as [Window Functions](../SQL-Manual/Featured-Functions_timecho.md#_4-Window-Functions) and other featured functions often used in data analysis scenarios.
+3. Data Querying: IoTDB offers a rich set of data querying capabilities. For a basic introduction to data querying, please see [Query Data](../Basic-Concept/Query-Data.md). It includes pattern queries and window functions applicable to time-series featured analysis. For detailed introductions, please refer to [Timeseries Featured Analysis](../User-Manual/Timeseries-Featured-Analysis_timecho.md).
4. Data Deletion: IoTDB supports two deletion methods: SQL-based deletion and automatic expiration deletion (TTL).
diff --git a/src/UserGuide/Master/Table/SQL-Manual/Featured-Functions_timecho.md b/src/UserGuide/Master/Table/SQL-Manual/Featured-Functions_timecho.md
index 5afcc9592..aa0df8cd8 100644
--- a/src/UserGuide/Master/Table/SQL-Manual/Featured-Functions_timecho.md
+++ b/src/UserGuide/Master/Table/SQL-Manual/Featured-Functions_timecho.md
@@ -697,58 +697,7 @@ IoTDB> SELECT window_start, window_end, stock_id, avg(price) as avg FROM CUMULAT
## 4. Window Functions
-### 4.1 Function Overview
-
-The Window Functions supported by IoTDB are special functions that perform calculations on each row based on a specific set of rows related to the current row (called a "window"). It combines grouping operations (`PARTITION BY`), sorting (`ORDER BY`), and definable calculation ranges (window frame `FRAME`), enabling complex cross-row calculations without collapsing the original data rows. It is commonly used in data analysis scenarios such as ranking, cumulative sums, moving averages, etc.
-
-> Note: This feature is available starting from version V 2.0.5.
-
-For example, in a scenario where you need to query the cumulative power consumption values of different devices, you can achieve this using window functions.
-
-```SQL
--- Original data
-+-----------------------------+------+-----+
-| time|device| flow|
-+-----------------------------+------+-----+
-|1970-01-01T08:00:00.000+08:00| d0| 3|
-|1970-01-01T08:00:00.001+08:00| d0| 5|
-|1970-01-01T08:00:00.002+08:00| d0| 3|
-|1970-01-01T08:00:00.003+08:00| d0| 1|
-|1970-01-01T08:00:00.004+08:00| d1| 2|
-|1970-01-01T08:00:00.005+08:00| d1| 4|
-+-----------------------------+------+-----+
-
--- Create table and insert data
-CREATE TABLE device_flow(device String tag, flow INT32 FIELD);
-insert into device_flow(time, device ,flow ) values ('1970-01-01T08:00:00.000+08:00','d0',3),('1970-01-01T08:00:01.000+08:00','d0',5),('1970-01-01T08:00:02.000+08:00','d0',3),('1970-01-01T08:00:03.000+08:00','d0',1),('1970-01-01T08:00:04.000+08:00','d1',2),('1970-01-01T08:00:05.000+08:00','d1',4);
-
-
--- Execute window function query
-SELECT *, sum(flow) OVER(PARTITION BY device ORDER BY flow) as sum FROM device_flow;
-```
-
-After grouping, sorting, and calculation (steps are disassembled as shown in the figure below),
-
-
-
-the expected results can be obtained:
-
-```SQL
-+-----------------------------+------+----+----+
-| time|device|flow| sum|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
-+-----------------------------+------+----+----+
-```
-
-### 4.2 Function Definition
-
-#### 4.2.1 SQL Definition
+### 4.1 SQL Definition
```SQL
windowDefinition
@@ -783,193 +732,56 @@ frameBound
;
```
-#### 4.2.2 Window Definition
-
-##### Partition
-
-`PARTITION BY` is used to divide data into multiple independent, unrelated "groups". Window functions can only access and operate on data within their respective groups, and cannot access data from other groups. This clause is optional; if not explicitly specified, all data is divided into the same group by default. It is worth noting that unlike `GROUP BY` which aggregates a group of data into a single row, the window function with `PARTITION BY` **does not affect the number of rows within the group.**
-
-* Example
-
-Query statement:
-
-```SQL
-IoTDB> SELECT *, count(flow) OVER (PARTITION BY device) as count FROM device_flow;
-```
-
-Disassembly steps:
+For more detailed introductions to the features, please refer to: [Window Functions](../User-Manual/Timeseries-Featured-Analysis_timecho.md#_2-window-functions)
-
+### 4.2 Usage Examples
-Query result:
+The original data of the device_flow table is as follows:
-```SQL
-+-----------------------------+------+----+-----+
-| time|device|flow|count|
-+-----------------------------+------+----+-----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 4|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 4|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 4|
-+-----------------------------+------+----+-----+
+```sql
++-----------------------------+------+-----+
+| time|device| flow|
++-----------------------------+------+-----+
+|1970-01-01T08:00:00.000+08:00| d0| 3|
+|1970-01-01T08:00:00.001+08:00| d0| 5|
+|1970-01-01T08:00:00.002+08:00| d0| 3|
+|1970-01-01T08:00:00.003+08:00| d0| 1|
+|1970-01-01T08:00:00.004+08:00| d1| 2|
+|1970-01-01T08:00:00.005+08:00| d1| 4|
++-----------------------------+------+-----+
```
-##### Ordering
-
-`ORDER BY` is used to sort data within a partition. After sorting, rows with equal values are called peers. Peers affect the behavior of window functions; for example, different rank functions handle peers differently, and different frame division methods also handle peers differently. This clause is optional.
+1. Query all columns from device_flow, group the data by the device dimension, sort the records within each device group by the value of the flow field, calculate the cumulative sum of the flow field, and finally return the cumulative sum as a column named sum.
-* Example
-
-Query statement:
+SQL:
```SQL
-IoTDB> SELECT *, rank() OVER (PARTITION BY device ORDER BY flow) as rank FROM device_flow;
+IoTDB> SELECT *, sum(flow) OVER (PARTITION BY device ORDER BY flow) as sum FROM device_flow;
```
-Disassembly steps:
-
-
-
-Query result:
+Result:
```SQL
+-----------------------------+------+----+----+
-| time|device|flow|rank|
+| time|device|flow| sum|
+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
+-----------------------------+------+----+----+
```
+2. Query all original columns from the device_flow table, group the data by the device dimension (device), sort the records within each device group by the value of the flow field, count the number of rows within the range of "the flow group of the current row + the previous 1 flow group", and finally return the count result as a column named count.
-##### Framing
-
-For each row in a partition, the window function evaluates on a corresponding set of rows called a Frame (i.e., the input domain of the Window Function on each row). The Frame can be specified manually, involving two attributes when specified, as detailed below.
-
-
-
-
- | Frame Attribute |
- Attribute Value |
- Value Description |
-
-
- | Type |
- ROWS |
- Divide the frame by row number |
-
-
- | GROUPS |
- Divide the frame by peers, i.e., rows with the same value are regarded as equivalent. All rows in peers are grouped into one group called a peer group |
-
-
- | RANGE |
- Divide the frame by value |
-
-
- | Start and End Position |
- UNBOUNDED PRECEDING |
- The first row of the entire partition |
-
-
- | offset PRECEDING |
- Represents the row with an "offset" distance from the current row in the preceding direction |
-
-
- | CURRENT ROW |
- The current row |
-
-
- | offset FOLLOWING |
- Represents the row with an "offset" distance from the current row in the following direction |
-
-
- | UNBOUNDED FOLLOWING |
- The last row of the entire partition |
-
-
-
-
-Among them, the meanings of `CURRENT ROW`, `PRECEDING N`, and `FOLLOWING N` vary with the type of frame, as shown in the following table:
-
-| | `ROWS` | `GROUPS` | `RANGE` |
-|--------------------|------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|
-| `CURRENT ROW` | Current row | Since a peer group contains multiple rows, this option differs depending on whether it acts on frame_start and frame_end: * frame_start: the first row of the peer group; * frame_end: the last row of the peer group. | Same as GROUPS, differing depending on whether it acts on frame_start and frame_end: * frame_start: the first row of the peer group; * frame_end: the last row of the peer group. |
-| `offset PRECEDING` | The previous offset rows | The previous offset peer groups; | Rows whose value difference from the current row in the preceding direction is less than or equal to offset are grouped into one frame |
-| `offset FOLLOWING` | The following offset rows | The following offset peer groups. | Rows whose value difference from the current row in the following direction is less than or equal to offset are grouped into one frame |
-
-The syntax format is as follows:
-
-```SQL
--- Specify both frame_start and frame_end
-{ RANGE | ROWS | GROUPS } BETWEEN frame_start AND frame_end
--- Specify only frame_start, frame_end is CURRENT ROW
-{ RANGE | ROWS | GROUPS } frame_start
-```
-
-If the Frame is not specified manually, the default Frame division rules are as follows:
-
-* When the window function uses ORDER BY: The default Frame is RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW (i.e., from the first row of the window to the current row). For example: In RANK() OVER(PARTITION BY COL1 ORDER BY COL2), the Frame defaults to include the current row and all preceding rows in the partition.
-* When the window function does not use ORDER BY: The default Frame is RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING (i.e., all rows in the entire window). For example: In AVG(COL2) OVER(PARTITION BY col1), the Frame defaults to include all rows in the partition, calculating the average of the entire partition.
-
-It should be noted that when the Frame type is GROUPS or RANGE, `ORDER BY` must be specified. The difference is that ORDER BY in GROUPS can involve multiple fields, while RANGE requires calculation and thus can only specify one field.
-
-* Example
-
-1. Frame type is ROWS
-
-Query statement:
-
-```SQL
-IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ROWS 1 PRECEDING) as count FROM device_flow;
-```
-
-Disassembly steps:
-
-* Take the previous row and the current row as the Frame
- * For the first row of the partition, since there is no previous row, the entire Frame has only this row, returning 1;
- * For other rows of the partition, the entire Frame includes the current row and its previous row, returning 2:
-
-
-
-Query result:
-
-```SQL
-+-----------------------------+------+----+-----+
-| time|device|flow|count|
-+-----------------------------+------+----+-----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 2|
-+-----------------------------+------+----+-----+
-```
-
-2. Frame type is GROUPS
-
-Query statement:
+SQL:
```SQL
IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ORDER BY flow GROUPS BETWEEN 1 PRECEDING AND CURRENT ROW) as count FROM device_flow;
```
-Disassembly steps:
-
-* Take the previous peer group and the current peer group as the Frame. Taking the partition with device d0 as an example (same for d1), for the count of rows:
- * For the peer group with flow 1, since there are no peer groups smaller than it, the entire Frame has only this row, returning 1;
- * For the peer group with flow 3, it itself contains 2 rows, and the previous peer group is the one with flow 1 (1 row), so the entire Frame has 3 rows, returning 3;
- * For the peer group with flow 5, it itself contains 1 row, and the previous peer group is the one with flow 3 (2 rows), so the entire Frame has 3 rows, returning 3.
-
-
-
-Query result:
+Result:
```SQL
+-----------------------------+------+----+-----+
@@ -984,24 +796,15 @@ Query result:
+-----------------------------+------+----+-----+
```
-3. Frame type is RANGE
+3. Query all original columns from the device_flow table, group the data by device, sort the records in ascending order by the value of the flow field within each group, count the number of all rows falling within the numeric range of "the flow value of the current row minus 2" to "the flow value of the current row", and finally return the count result as a column named count.
-Query statement:
+SQL:
```SQL
IoTDB> SELECT *,count(flow) OVER(PARTITION BY device ORDER BY flow RANGE BETWEEN 2 PRECEDING AND CURRENT ROW) as count FROM device_flow;
```
-Disassembly steps:
-
-* Group rows whose data is **less than or equal to 2** compared to the current row into the same Frame. Taking the partition with device d0 as an example (same for d1), for the count of rows:
- * For the row with flow 1, since it is the smallest row, the entire Frame has only this row, returning 1;
- * For the row with flow 3, note that CURRENT ROW exists as frame_end, so it is the last row of the entire peer group. There is 1 row smaller than it that meets the requirement, and the peer group has 2 rows, so the entire Frame has 3 rows, returning 3;
- * For the row with flow 5, it itself contains 1 row, and there are 2 rows smaller than it that meet the requirement, so the entire Frame has 3 rows, returning 3.
-
-
-
-Query result:
+Result:
```SQL
+-----------------------------+------+----+-----+
@@ -1015,419 +818,3 @@ Query result:
|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
+-----------------------------+------+----+-----+
```
-
-### 4.3 Built-in Window Functions
-
-
-
-
- | Window Function Category |
- Window Function Name |
- Function Definition |
- Supports FRAME Clause |
-
-
- | Aggregate Function |
- All built-in aggregate functions |
- Aggregate a set of values to get a single aggregated result. |
- Yes |
-
-
- | Value Function |
- first_value |
- Return the first value of the frame; if IGNORE NULLS is specified, skip leading NULLs |
- Yes |
-
-
- | last_value |
- Return the last value of the frame; if IGNORE NULLS is specified, skip trailing NULLs |
- Yes |
-
-
- | nth_value |
- Return the nth element of the frame (note that n starts from 1); if IGNORE NULLS is specified, skip NULLs |
- Yes |
-
-
- | lead |
- Return the element offset rows after the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default |
- No |
-
-
- | lag |
- Return the element offset rows before the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default |
- No |
-
-
- | Rank Function |
- rank |
- Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there may be gaps between sequence numbers |
- No |
-
-
- | dense_rank |
- Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there are no gaps between sequence numbers |
- No |
-
-
- | row_number |
- Return the row number of the current row in the entire partition; note that the row number starts from 1 |
- No |
-
-
- | percent_rank |
- Return the sequence number of the current row's value in the entire partition as a percentage; i.e., (rank() - 1) / (n - 1), where n is the number of rows in the entire partition |
- No |
-
-
- | cume_dist |
- Return the sequence number of the current row's value in the entire partition as a percentage; i.e., (number of rows less than or equal to it) / n |
- No |
-
-
- | ntile |
- Specify n to number each row from 1 to n. |
- No |
-
-
-
-
-#### 4.3.1 Aggregate Function
-
-All built-in aggregate functions such as `sum()`, `avg()`, `min()`, `max()` can be used as Window Functions.
-
-> Note: Unlike GROUP BY, each row has a corresponding output in the Window Function
-
-Example:
-
-```SQL
-IoTDB> SELECT *, sum(flow) OVER (PARTITION BY device ORDER BY flow) as sum FROM device_flow;
-+-----------------------------+------+----+----+
-| time|device|flow| sum|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
-+-----------------------------+------+----+----+
-```
-
-#### 4.3.2 Value Function
-
-1. `first_value`
-
-* Function name: `first_value(value) [IGNORE NULLS]`
-* Definition: Return the first value of the frame; if IGNORE NULLS is specified, skip leading NULLs;
-* Example:
-
-```SQL
-IoTDB> SELECT *, first_value(flow) OVER w as first_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
-+-----------------------------+------+----+-----------+
-| time|device|flow|first_value|
-+-----------------------------+------+----+-----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-+-----------------------------+------+----+-----------+
-```
-
-2. `last_value`
-
-* Function name: `last_value(value) [IGNORE NULLS]`
-* Definition: Return the last value of the frame; if IGNORE NULLS is specified, skip trailing NULLs;
-* Example:
-
-```SQL
-IoTDB> SELECT *, last_value(flow) OVER w as last_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
-+-----------------------------+------+----+----------+
-| time|device|flow|last_value|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
-+-----------------------------+------+----+----------+
-```
-
-3. `nth_value`
-
-* Function name: `nth_value(value, n) [IGNORE NULLS]`
-* Definition: Return the nth element of the frame (note that n starts from 1); if IGNORE NULLS is specified, skip NULLs;
-* Example:
-
-```SQL
-IoTDB> SELECT *, nth_value(flow, 2) OVER w as nth_values FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
-+-----------------------------+------+----+----------+
-| time|device|flow|nth_values|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
-+-----------------------------+------+----+----------+
-```
-
-4. lead
-
-* Function name: `lead(value[, offset[, default]]) [IGNORE NULLS]`
-* Definition: Return the element offset rows after the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default; the default value of offset is 1, and the default value of default is NULL.
-* The lead function requires an ORDER BY window clause
-* Example:
-
-```SQL
-IoTDB> SELECT *, lead(flow) OVER w as lead FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY time);
-+-----------------------------+------+----+----+
-| time|device|flow|lead|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
-|1970-01-01T08:00:05.000+08:00| d1| 4|null|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 5|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:03.000+08:00| d0| 1|null|
-+-----------------------------+------+----+----+
-```
-
-5. lag
-
-* Function name: `lag(value[, offset[, default]]) [IGNORE NULLS]`
-* Definition: Return the element offset rows before the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default; the default value of offset is 1, and the default value of default is NULL.
-* The lag function requires an ORDER BY window clause
-* Example:
-
-```SQL
-IoTDB> SELECT *, lag(flow) OVER w as lag FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY device);
-+-----------------------------+------+----+----+
-| time|device|flow| lag|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2|null|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:00.000+08:00| d0| 3|null|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
-+-----------------------------+------+----+----+
-```
-
-#### 4.3.3 Rank Function
-
-1. rank
-
-* Function name: `rank()`
-* Definition: Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there may be gaps between sequence numbers;
-* Example:
-
-```SQL
-IoTDB> SELECT *, rank() OVER w as rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+----+
-| time|device|flow|rank|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
-+-----------------------------+------+----+----+
-```
-
-2. dense_rank
-
-* Function name: `dense_rank()`
-* Definition: Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there are no gaps between sequence numbers.
-* Example:
-
-```SQL
-IoTDB> SELECT *, dense_rank() OVER w as dense_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+----------+
-| time|device|flow|dense_rank|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-+-----------------------------+------+----+----------+
-```
-
-3. row_number
-
-* Function name: `row_number()`
-* Definition: Return the row number of the current row in the entire partition; note that the row number starts from 1;
-* Example:
-
-```SQL
-IoTDB> SELECT *, row_number() OVER w as row_number FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+----------+
-| time|device|flow|row_number|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
-+-----------------------------+------+----+----------+
-```
-
-4. percent_rank
-
-* Function name: `percent_rank()`
-* Definition: Return the sequence number of the current row's value in the entire partition as a percentage; i.e., **(rank() - 1) / (n - 1)**, where n is the number of rows in the entire partition;
-* Example:
-
-```SQL
-IoTDB> SELECT *, percent_rank() OVER w as percent_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+------------------+
-| time|device|flow| percent_rank|
-+-----------------------------+------+----+------------------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 0.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 0.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3|0.3333333333333333|
-|1970-01-01T08:00:02.000+08:00| d0| 3|0.3333333333333333|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
-+-----------------------------+------+----+------------------+
-```
-
-5. cume_dist
-
-* Function name: `cume_dist`
-* Definition: Return the sequence number of the current row's value in the entire partition as a percentage; i.e., **(number of rows less than or equal to it) / n**.
-* Example:
-
-```SQL
-IoTDB> SELECT *, cume_dist() OVER w as cume_dist FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+---------+
-| time|device|flow|cume_dist|
-+-----------------------------+------+----+---------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 0.5|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 0.25|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 0.75|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 0.75|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
-+-----------------------------+------+----+---------+
-```
-
-6. ntile
-
-* Function name: `ntile`
-* Definition: Specify n to number each row from 1 to n.
- * If the number of rows in the entire partition is less than n, the number is the row index;
- * If the number of rows in the entire partition is greater than n:
- * If the number of rows is divisible by n, it is perfect. For example, if the number of rows is 4 and n is 2, the numbers are 1, 1, 2, 2;
- * If the number of rows is not divisible by n, distribute to the first few groups. For example, if the number of rows is 5 and n is 3, the numbers are 1, 1, 2, 2, 3;
-* Example:
-
-```SQL
-IoTDB> SELECT *, ntile(2) OVER w as ntile FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+-----+
-| time|device|flow|ntile|
-+-----------------------------+------+----+-----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
-+-----------------------------+------+----+-----+
-```
-
-### 4.4 Scenario Examples
-
-1. Multi-device diff function
-
-For each row of each device, calculate the difference from the previous row:
-
-```SQL
-SELECT
- *,
- measurement - lag(measurement) OVER (PARTITION BY device ORDER BY time)
-FROM data
-WHERE timeCondition;
-```
-
-For each row of each device, calculate the difference from the next row:
-
-```SQL
-SELECT
- *,
- measurement - lead(measurement) OVER (PARTITION BY device ORDER BY time)
-FROM data
-WHERE timeCondition;
-```
-
-For each row of a single device, calculate the difference from the previous row (same for the next row):
-
-```SQL
-SELECT
- *,
- measurement - lag(measurement) OVER (ORDER BY time)
-FROM data
-where device='d1'
-WHERE timeCondition;
-```
-
-2. Multi-device TOP_K/BOTTOM_K
-
-Use rank to get the sequence number, then retain the desired order in the outer query.
-
-(Note: The execution order of window functions is after the HAVING clause, so a subquery is needed here)
-
-```SQL
-SELECT *
-FROM(
- SELECT
- *,
- rank() OVER (PARTITION BY device ORDER BY time DESC)
- FROM data
- WHERE timeCondition
-)
-WHERE rank <= 3;
-```
-
-In addition to sorting by time, you can also sort by the value of the measurement point:
-
-```SQL
-SELECT *
-FROM(
- SELECT
- *,
- rank() OVER (PARTITION BY device ORDER BY measurement DESC)
- FROM data
- WHERE timeCondition
-)
-WHERE rank <= 3;
-```
-
-3. Multi-device CHANGE_POINTS
-
-This SQL is used to remove consecutive identical values in the input sequence, which can be achieved with lead + subquery:
-
-```SQL
-SELECT
- time,
- device,
- measurement
-FROM(
- SELECT
- time,
- device,
- measurement,
- LEAD(measurement) OVER (PARTITION BY device ORDER BY time) AS next
- FROM data
- WHERE timeCondition
-)
-WHERE measurement != next OR next IS NULL;
-```
diff --git a/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition_timecho.md b/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition_timecho.md
new file mode 100644
index 000000000..62cd4691e
--- /dev/null
+++ b/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition_timecho.md
@@ -0,0 +1,167 @@
+
+
+# Pattern Query
+
+## 1. Syntax Definition
+
+```SQL
+MATCH_RECOGNIZE (
+ [ PARTITION BY column [, ...] ]
+ [ ORDER BY column [, ...] ]
+ [ MEASURES measure_definition [, ...] ]
+ [ ROWS PER MATCH ]
+ [ AFTER MATCH skip_to ]
+ PATTERN ( row_pattern )
+ [ SUBSET subset_definition [, ...] ]
+ DEFINE variable_definition [, ...]
+)
+```
+
+**Note:**
+
+* PARTITION BY: Optional. Used to group the input table, and each group can perform pattern matching independently. If this clause is not specified, the entire input table will be processed as a single unit.
+* ORDER BY: Optional. Used to ensure that input data is processed in a specific order during matching.
+* MEASURES: Optional. Used to specify which information to extract from the matched segment of data.
+* ROWS PER MATCH: Optional. Used to specify the output method of the result set after successful pattern matching.
+* AFTER MATCH SKIP: Optional. Used to specify which row to resume from for the next pattern match after identifying a non-empty match.
+* PATTERN: Used to define the row pattern to be matched.
+* SUBSET: Optional. Used to merge rows matched by multiple basic pattern variables into a single logical set.
+* DEFINE: Used to define the basic pattern variables for the row pattern.
+
+For more detailed introductions to the features, please refer to:[Pattern Query](../User-Manual/Timeseries-Featured-Analysis_timecho.md#_1-pattern-query)
+
+## 2. Usage Examples
+
+Using [Sample Data](../Reference/Sample-Data.md) as the source data
+
+1. Time Segment Query
+
+Segment the data in table1 by time intervals less than or equal to 24 hours, and query the total number of data entries in each segment, as well as the start and end times.
+
+Query SQL
+
+SQL
+
+```SQL
+SELECT start_time, end_time, cnt
+FROM table1
+MATCH_RECOGNIZE (
+ ORDER BY time
+ MEASURES
+ RPR_FIRST(A.time) AS start_time,
+ RPR_LAST(time) AS end_time,
+ COUNT() AS cnt
+ PATTERN (A B*)
+ DEFINE B AS (cast(B.time as INT64) - cast(PREV(B.time) as INT64)) <= 86400000
+) AS m
+```
+
+Query Results
+
+SQL
+
+```SQL
++-----------------------------+-----------------------------+---+
+| start_time| end_time|cnt|
++-----------------------------+-----------------------------+---+
+|2024-11-26T13:37:00.000+08:00|2024-11-26T13:38:00.000+08:00| 2|
+|2024-11-27T16:38:00.000+08:00|2024-11-30T14:30:00.000+08:00| 16|
++-----------------------------+-----------------------------+---+
+Total line number = 2
+```
+
+2. Difference Segment Query
+
+Segment the data in table2 by humidity value differences less than 0.1, and query the total number of data entries in each segment, as well as the start and end times.
+
+* Query SQL
+
+SQL
+
+```SQL
+SELECT start_time, end_time, cnt
+FROM table2
+MATCH_RECOGNIZE (
+ ORDER BY time
+ MEASURES
+ RPR_FIRST(A.time) AS start_time,
+ RPR_LAST(time) AS end_time,
+ COUNT() AS cnt
+ PATTERN (A B*)
+ DEFINE B AS (B.humidity - PREV(B.humidity )) <=0.1
+) AS m;
+```
+
+* Query Results
+
+SQL
+
+```SQL
++-----------------------------+-----------------------------+---+
+| start_time| end_time|cnt|
++-----------------------------+-----------------------------+---+
+|2024-11-26T13:37:00.000+08:00|2024-11-27T00:00:00.000+08:00| 2|
+|2024-11-28T08:00:00.000+08:00|2024-11-29T00:00:00.000+08:00| 2|
+|2024-11-29T11:00:00.000+08:00|2024-11-30T00:00:00.000+08:00| 2|
++-----------------------------+-----------------------------+---+
+Total line number = 3
+```
+
+3. Event Statistics Query
+
+Group the data in table1 by device ID, and count the start and end times and maximum humidity value where the humidity in the Shanghai area is greater than 35.
+
+* Query SQL
+
+SQL
+
+```SQL
+SELECT m.device_id, m.match, m.event_start, m.event_end, m.max_humidity
+FROM table1
+MATCH_RECOGNIZE (
+ PARTITION BY device_id
+ ORDER BY time
+ MEASURES
+ MATCH_NUMBER() AS match,
+ RPR_FIRST(A.time) AS event_start,
+ RPR_LAST(A.time) AS event_end,
+ MAX(A.humidity) AS max_humidity
+ ONE ROW PER MATCH
+ PATTERN (A+)
+ DEFINE
+ A AS A.region= 'Shanghai' AND A.humidity> 35
+) AS m
+```
+
+* Query Results
+
+SQL
+
+```SQL
++---------+-----+-----------------------------+-----------------------------+------------+
+|device_id|match| event_start| event_end|max_humidity|
++---------+-----+-----------------------------+-----------------------------+------------+
+| 100| 1|2024-11-28T09:00:00.000+08:00|2024-11-29T18:30:00.000+08:00| 45.1|
+| 101| 1|2024-11-30T09:30:00.000+08:00|2024-11-30T09:30:00.000+08:00| 35.2|
++---------+-----+-----------------------------+-----------------------------+------------+
+Total line number = 2
+```
diff --git a/src/UserGuide/Master/Table/SQL-Manual/overview_timecho.md b/src/UserGuide/Master/Table/SQL-Manual/overview_timecho.md
index a7be3102b..19afdc1b8 100644
--- a/src/UserGuide/Master/Table/SQL-Manual/overview_timecho.md
+++ b/src/UserGuide/Master/Table/SQL-Manual/overview_timecho.md
@@ -40,11 +40,9 @@ The IoTDB table model query syntax supports the following clauses:
- **SELECT Clause**: Specifies the columns to be included in the result. Details: [SELECT Clause](../SQL-Manual/Select-Clause.md)
- **FROM Clause**: Indicates the data source for the query, which can be a single table, multiple tables joined using the `JOIN` clause, or a subquery. Details: [FROM & JOIN Clause](../SQL-Manual/From-Join-Clause.md)
-- **patternRecognition**: Row Pattern Recognition, which supports capturing a segment of continuous data by defining recognition logic for pattern variables and regular expressions, and performs analysis and calculation on each captured data segment. Details:[Row Pattern Recognition](../SQL-Manual/Row-Pattern-Recognition.md)
- **WHERE Clause**: Filters rows based on specific conditions. Logically executed immediately after the `FROM` clause. Details: [WHERE Clause](../SQL-Manual/Where-Clause.md)
- **GROUP BY Clause**: Used for aggregating data, specifying the columns for grouping. Details: [GROUP BY Clause](../SQL-Manual/GroupBy-Clause.md)
- **HAVING Clause**: Applied after the `GROUP BY` clause to filter grouped data, similar to `WHERE` but operates after grouping. Details:[HAVING Clause](../SQL-Manual/Having-Clause.md)
-- **WINDOW FUNCTION**: Window Function, a special function that performs calculations on each row based on a specific set of rows related to the current row (called a "window"). It combines grouping operations, sorting, and definable calculation ranges to implement complex cross-row calculations without collapsing the original data rows. Details: [Window Function](../SQL-Manual/Featured-Functions_timecho.md#_4-Window-Function)
- **FILL Clause**: Handles missing values in query results by specifying fill methods (e.g., previous non-null value or linear interpolation) for better visualization and analysis. Details:[FILL Clause](../SQL-Manual/Fill-Clause.md)
- **ORDER BY Clause**: Sorts query results in ascending (`ASC`) or descending (`DESC`) order, with optional handling for null values (`NULLS FIRST` or `NULLS LAST`). Details: [ORDER BY Clause](../SQL-Manual/OrderBy-Clause.md)
- **OFFSET Clause**: Specifies the starting position for the query result, skipping the first `OFFSET` rows. Often used with the `LIMIT` clause. Details: [LIMIT and OFFSET Clause](../SQL-Manual/Limit-Offset-Clause.md)
diff --git a/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md b/src/UserGuide/Master/Table/User-Manual/Timeseries-Featured-Analysis_timecho.md
similarity index 55%
rename from src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md
rename to src/UserGuide/Master/Table/User-Manual/Timeseries-Featured-Analysis_timecho.md
index e428fe52c..cb36c56f6 100644
--- a/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md
+++ b/src/UserGuide/Master/Table/User-Manual/Timeseries-Featured-Analysis_timecho.md
@@ -19,21 +19,22 @@
-->
-# Row Pattern Recognition
+# Timeseries Featured Analysis
-## 1. Overview
+For time-series data feature analysis scenarios, IoTDB provides two core capabilities: pattern query and window functions. These capabilities deliver a flexible and efficient solution for in-depth mining and complex computation of time-series data. The following sections will elaborate on the two features in detail.
-IoTDB supports Row Pattern Recognition. This feature enables capturing a segment of continuous data by defining the recognition logic of pattern variables and regular expressions, and performing analysis and calculation on each captured data segment. It is suitable for business scenarios such as identifying specific patterns in time-series data and detecting specific events. If we regard Row Pattern Recognition as grouping processing of data, the core process is roughly as follows:
+## 1. Pattern Query
-* Perform group capture through the PATTERN, DEFINE, and SUBSET clauses
-* Conduct computational processing on the captured groups through the MEASURES clause
-* Set the output format of groups through the ROWS PER MATCH clause
-* Specify how to locate the start position of the next group through the AFTER MATCH SKIP clause
+### 1.1 Overview
+
+Pattern query enables capturing a segment of continuous data by defining the recognition logic of pattern variables and regular expressions, and performing analysis and calculation on each captured data segment. It is suitable for business scenarios such as identifying specific patterns in time-series data (as shown in the figure below) and detecting specific events.
+
+
> Note: This feature is available starting from version V2.0.5.
-## 2. Function Introduction
-### 2.1 Syntax Format
+### 1.2 Function Introduction
+#### 1.2.1 Syntax Format
```SQL
MATCH_RECOGNIZE (
@@ -59,7 +60,28 @@ MATCH_RECOGNIZE (
* SUBSET: Optional. Used to merge rows matched by multiple basic pattern variables into a single logical set.
* DEFINE: Used to define the basic pattern variables for the row pattern.
-### 2.2 DEFINE Clause
+**Original Data for Syntax Examples:**
+
+```SQL
+IoTDB:database3> select * from t
++-----------------------------+------+----------+
+| time|device|totalprice|
++-----------------------------+------+----------+
+|2025-01-01T00:01:00.000+08:00| d1| 90|
+|2025-01-01T00:02:00.000+08:00| d1| 80|
+|2025-01-01T00:03:00.000+08:00| d1| 70|
+|2025-01-01T00:04:00.000+08:00| d1| 80|
+|2025-01-01T00:05:00.000+08:00| d1| 70|
+|2025-01-01T00:06:00.000+08:00| d1| 80|
++-----------------------------+------+----------+
+
+-- Creation Statement
+create table t(device tag, totalprice int32 field)
+
+insert into t(time,device,totalprice) values(2025-01-01T00:01:00, 'd1', 90),(2025-01-01T00:02:00, 'd1', 80),(2025-01-01T00:03:00, 'd1', 70),(2025-01-01T00:04:00, 'd1', 80),(2025-01-01T00:05:00, 'd1', 70),(2025-01-01T00:06:00, 'd1', 80)
+```
+
+#### 1.2.2 DEFINE Clause
Used to specify the judgment condition for each basic pattern variable in pattern recognition. These variables are usually represented by identifiers (e.g., `A`, `B`), and the Boolean expressions in this clause precisely define which rows meet the requirements of the variable.
@@ -72,7 +94,7 @@ DEFINE B AS totalprice < PREV(totalprice)
* Variables not **explicitly** defined in this clause have an implicitly set condition of always true (TRUE), meaning they can be successfully matched on any input row.
-### 2.3 SUBSET Clause
+#### 1.2.3 SUBSET Clause
Used to merge rows matched by multiple basic pattern variables (e.g., `A`, `B`) into a combined pattern variable (e.g., `U`), allowing these rows to be treated as a single logical set for operations. It can be used in the `MEASURES`, `DEFINE`, and `AFTER MATCH SKIP` clauses.
@@ -84,7 +106,7 @@ For example, for the pattern `PATTERN ((A | B){5} C+)`, it is impossible to dete
1. In the `MEASURES` clause, if you need to reference the last row matched in this phase, you can do so by defining the combined pattern variable `SUBSET U = (A, B)`. At this point, the expression `RPR_LAST(U.totalprice)` will directly return the `totalprice` value of the target row.
2. In the `AFTER MATCH SKIP` clause, if the matching result does not include the basic pattern variable A or B, executing `AFTER MATCH SKIP TO LAST B` or `AFTER MATCH SKIP TO LAST A` will fail to jump due to missing anchors. However, by introducing the combined pattern variable `SUBSET U = (A, B)`, using `AFTER MATCH SKIP TO LAST U` is always valid.
-### 2.4 PATTERN Clause
+#### 1.2.4 PATTERN Clause
Used to define the row pattern to be matched, whose basic building block is a row pattern variable.
@@ -92,7 +114,7 @@ Used to define the row pattern to be matched, whose basic building block is a ro
PATTERN ( row_pattern )
```
-#### 2.4.1 Pattern Types
+##### 1.2.4.1 Pattern Types
| Row Pattern | Syntax Format | Description |
|-----------------------|---------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -103,7 +125,7 @@ PATTERN ( row_pattern )
| Empty Pattern | `()` | Represents an empty match that does not contain any rows. |
| Pattern Exclusion | `{- row_pattern -}` | Used to specify the matched part to be excluded from the output. Usually used with the `ALL ROWS PER MATCH` option to output rows of interest. For example, `PATTERN (A {- B+ C+ -} D+)` with ALL ROWS PER MATCH will only output the first row `(corresponding to A)` and the trailing rows `(corresponding to D+)` of the match. |
-#### 2.4.2 Partition Start/End Anchor
+##### 1.2.4.2 Partition Start/End Anchor
* `^A` indicates matching a pattern that starts with A as the partition beginning
* When the value of the PATTERN clause is `^A`, the match must start from the first row of the partition, and this row must satisfy the definition of `A`.
@@ -112,152 +134,9 @@ PATTERN ( row_pattern )
* When the value of the PATTERN clause is `A$`, the match must end at the end of the partition, and this row must satisfy the definition of `A`.
* When the value of the PATTERN clause is `$A` or `$A$`, the output result is empty.
-For example illustrations, see[Section 3.1](./Row-Pattern-Recognition.md#_3-1-Patter-Clause-Partition-Anchor)
-
-#### 2.4.3 Quantifiers
+**Examples**
-Quantifiers are used to specify the number of times a subpattern repeats, placed after the corresponding subpattern (e.g., `(A | B)*`).
-
-Common quantifiers are as follows:
-
-| Quantifier | Description |
-| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `*` | Zero or more repetitions |
-| `+` | One or more repetitions |
-| `?` | Zero or one repetition |
-| `{n}` | Exactly n repetitions |
-| `{m, n}` | Repetitions between m and n times (m and n are non-negative integers). \* If the left bound is omitted, the default starts from 0; \* If the right bound is omitted, there is no upper limit on the number of repetitions (e.g., {5,} is equivalent to "at least five times"); \* If both left and right bounds are omitted (i.e., {,}), it is equivalent to `*`. |
-
-* The matching preference can be changed by adding `?` after the quantifier.
- * `{3,5}`: Prefers 5 times, least prefers 3 times; `{3,5}?`: Prefers 3 times, least prefers 5 times.
- * `?`: Prefers 1 time; `??`: Prefers 0 times.
-
-### 2.5 AFTER MATCH SKIP Clause
-
-Used to specify which row to start the next pattern match from after identifying a non-empty match.
-
-| Jump Strategy | Description | Allows Overlapping Matches? |
-| ------------------------------------------------------------- | -------------------------------------------------------------------------------- | ----------------------------- |
-| `AFTER MATCH SKIP PAST LAST ROW` | Default behavior. Starts from the row after the last row of the current match. | No |
-| `AFTER MATCH SKIP TO NEXT ROW` | Starts from the second row in the current match. | Yes |
-| `AFTER MATCH SKIP TO [ FIRST \| LAST ] pattern_variable` | Jumps to start from the [ first row | last row ] of a pattern variable. | Yes |
-
-* Among all possible configurations, only when `ALL ROWS PER MATCH WITH UNMATCHED ROWS` is used in combination with `AFTER MATCH SKIP PAST LAST ROW` can the system ensure that exactly one output record is generated for each input row.
-
-For example illustrations, see [Section 3.2](./Row-Pattern-Recognition.md#_3-2-AFTER-MATCH-SKIP-Clause)
-
-### 2.6 ROWS PER MATCH Clause
-
-Used to specify the output method of the result set after a successful pattern match, including the following two main options:
-
-| Output Method | Rule Description | Output Result | Handling Logic for **Empty Matches/Unmatched Rows** |
-| -------------------- | -------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| ONE ROW PER MATCH | Generates one output row for each successful match. | \* Columns in the PARTITION BY clause\* Expressions defined in the MEASURES clause. | Outputs empty matches; skips unmatched rows. |
-| ALL ROWS PER MATCH | Each row in a match generates an output record, unless the row is excluded via exclusion syntax. | \* Columns in the PARTITION BY clause\* Columns in the ORDER BY clause\* Expressions defined in the MEASURES clause\* Remaining columns in the input table | \* Default: Outputs empty matches; skips unmatched rows.\* ALL ROWS PER MATCH**SHOW EMPTY MATCHES**: Outputs empty matches by default; skips unmatched rows.\* ALL ROWS PER MATCH**OMIT EMPTY MATCHES**: Does not output empty matches; skips unmatched rows.\* ALL ROWS PER MATCH**WITH UNMATCHED ROWS**: Outputs empty matches and generates an additional output record for each unmatched row. |
-
-### 2.7 MEASURES Clause
-
-Used to specify which information to extract from a matched set of data. This clause is optional; if not explicitly specified, some input columns will become the output results of pattern recognition based on the settings of the ROWS PER MATCH clause.
-
-SQL
-
-```SQL
-MEASURES measure_expression AS measure_name [, ...]
-```
-
-* A `measure_expression` is a scalar value calculated from the matched set of data.
-
-| Usage Example | Description |
-| ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `A.totalprice AS starting_price` | Returns the price from the first row in the matched group (i.e., the only row associated with variable A) as the starting price. |
-| `RPR_LAST(B.totalprice) AS bottom_price` | Returns the price from the last row associated with variable B, representing the lowest price in the "V" shape pattern (corresponding to the end of the downward segment). |
-| `RPR_LAST(U.totalprice) AS top_price` | Returns the highest price in the matched group, corresponding to the last row associated with variable C or D (i.e., the end of the entire matched group). [Assuming SUBSET U = (C, D)] |
-
-* Each `measure_expression` defines an output column, which can be referenced by its specified `measure_name`.
-
-### 2.8 Row Pattern Recognition Expressions
-
-Expressions used in the MEASURES and DEFINE clauses are **scalar expressions**, evaluated in the row-level context of the input table. In addition to supporting standard SQL syntax, **scalar expressions** also support special extended functions for row pattern recognition.
-
-#### 2.8.1 Pattern Variable References
-
-SQL
-
-```SQL
-A.totalprice
-U.orderdate
-orderstatus
-```
-
-* When a column name is prefixed with a **basic pattern variable** or a **combined pattern variable**, it refers to the corresponding column values of all rows matched by that variable.
-* If a column name has no prefix, it is equivalent to using the "**global combined pattern variable**" (i.e., the union of all basic pattern variables) as the prefix, referring to the column values of all rows in the current match.
-
-> Using table names as column name prefixes in pattern recognition expressions is not allowed.
-
-#### 2.8.2 Extended Functions
-
-| Function Name | Function Syntax | Description |
-| ------------------------------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `MATCH_NUMBER` Function | `MATCH_NUMBER()` | Returns the sequence number of the current match within the partition, starting from 1. Empty matches occupy match sequence numbers just like non-empty matches. |
-| `CLASSIFIER` Function | `CLASSIFIER(option)` | 1. Returns the name of the basic pattern variable mapped by the current row. 2. `option` is an optional parameter: a basic pattern variable `CLASSIFIER(A)` or a combined pattern variable `CLASSIFIER(U)` can be passed in to limit the function's scope; for rows outside the scope, NULL is returned directly. When used with a combined pattern variable, it can be used to distinguish which basic pattern variable in the union the row is mapped to. |
-| Logical Navigation Functions | `RPR_FIRST(expr, k)` | 1. Indicates locating the first row satisfying `expr` in the **current match group**, then searching for the k-th occurrence of the row corresponding to the same pattern variable towards the end of the group, and returning the specified column value of that row. If the k-th matching row is not found in the specified direction, the function returns NULL. 2. `k` is an optional parameter, defaulting to 0 (only locating the first row satisfying the condition); if explicitly specified, it must be a non-negative integer. |
-| Logical Navigation Functions | `RPR_LAST(expr, k)` | 1. Indicates locating the last row satisfying `expr` in the **current match group**, then searching for the k-th occurrence of the row corresponding to the same pattern variable towards the start of the group, and returning the specified column value of that row. If the k-th matching row is not found in the specified direction, the function returns NULL. 2. `k` is an optional parameter, defaulting to 0 (only locating the last row satisfying the condition); if explicitly specified, it must be a non-negative integer. |
-| Physical Navigation Functions | `PREV(expr, k)` | 1. Indicates offsetting k rows towards the start from the last row matched to the given pattern variable, and returning the corresponding column value. If navigation exceeds the **partition boundary**, the function returns NULL. 2. `k` is an optional parameter, defaulting to 1; if explicitly specified, it must be a non-negative integer. |
-| Physical Navigation Functions | `NEXT(expr, k)` | 1. Indicates offsetting k rows towards the end from the last row matched to the given pattern variable, and returning the corresponding column value. If navigation exceeds the **partition boundary**, the function returns NULL. 2. `k` is an optional parameter, defaulting to 1; if explicitly specified, it must be a non-negative integer. |
-| Aggregate Functions | COUNT, SUM, AVG, MAX, MIN Functions | Can be used to calculate data in the current match. Aggregate functions and navigation functions are not allowed to be nested within each other. (Supported from version V2.0.6) |
-| Nested Functions | `PREV/NEXT(CLASSIFIER())` | Nesting of physical navigation functions and the CLASSIFIER function. Used to obtain the pattern variables corresponding to the previous and next matching rows of the current row. |
-| Nested Functions | `PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | **Logical functions are allowed to be nested** inside physical functions; **physical functions are not allowed to be nested** inside logical functions. Used to perform logical offset first, then physical offset. |
-
-For example illustrations, see [Section 3.3](./Row-Pattern-Recognition.md#_3-3-Row-Pattern-Expressions-Extended-Functions)
-
-#### 2.8.3 RUNNING and FINAL Semantics
-
-1. Definition
-
-* `RUNNING`: Indicates the calculation scope is from the start row of the current match group to the row currently being processed (i.e., up to the current row).
-* `FINAL`: Indicates the calculation scope is from the start row of the current match group to the final row of the group (i.e., the entire match group).
-
-2. Scope of Application
-
-* The DEFINE clause uses RUNNING semantics by default.
-* The MEASURES clause uses RUNNING semantics by default and supports specifying FINAL semantics. When using the ONE ROW PER MATCH output mode, all expressions are calculated from the last row position of the match group, and at this time, RUNNING semantics are equivalent to FINAL semantics.
-
-3. Syntax Constraints
-
-* RUNNING and FINAL need to be written before **logical navigation functions** or aggregate functions, and cannot directly act on **column references.**
- * Valid: `RUNNING RPP_LAST(A.totalprice)`, `FINAL RPP_LAST(A.totalprice)`
- * Invalid: `RUNNING A.totalprice`, `FINAL A.totalprice`, `RUNNING PREV(A.totalprice)`
-
-## 3. Syntax Examples
-
-Original Data
-
-SQL
-
-```SQL
-IoTDB:database3> select * from t
-+-----------------------------+------+----------+
-| time|device|totalprice|
-+-----------------------------+------+----------+
-|2025-01-01T00:01:00.000+08:00| d1| 90|
-|2025-01-01T00:02:00.000+08:00| d1| 80|
-|2025-01-01T00:03:00.000+08:00| d1| 70|
-|2025-01-01T00:04:00.000+08:00| d1| 80|
-|2025-01-01T00:05:00.000+08:00| d1| 70|
-|2025-01-01T00:06:00.000+08:00| d1| 80|
-+-----------------------------+------+----------+
-
--- Create Statement
-create table t(device tag, totalprice int32 field)
-
-insert into t(time,device,totalprice) values(2025-01-01T00:01:00, 'd1', 90),(2025-01-01T00:02:00, 'd1', 80),(2025-01-01T00:03:00, 'd1', 70),(2025-01-01T00:04:00, 'd1', 80),(2025-01-01T00:05:00, 'd1', 70),(2025-01-01T00:06:00, 'd1', 80)
-```
-
-### 3.1 PATTERN Clause Partition Anchor
-
-* Query SQL
-
-SQL
+* Query sql
```SQL
SELECT m.time, m.match, m.price, m.label
@@ -270,16 +149,17 @@ MATCH_RECOGNIZE (
CLASSIFIER() AS label
ALL ROWS PER MATCH
AFTER MATCH SKIP PAST LAST ROW
- PATTERN %s -- PATTERN Clause
+ PATTERN %s -- PATTERN 子句
DEFINE A AS true
) AS m;
```
-* Query Results
-
- * When the PATTERN clause is PATTERN (^A)
+* Results
+ * When the PATTERN clause is specified as PATTERN (^A)
+
+ 
- SQL
+ Actual Return
```SQL
+-----------------------------+-----+-----+-----+
@@ -290,9 +170,7 @@ MATCH_RECOGNIZE (
Total line number = 1
```
- * When the PATTERN clause is PATTERN (^A^)
-
- SQL
+ * When the PATTERN clause is specified as PATTERN (^A^), the output result is empty. This is because it is impossible to match an A starting from the beginning of a partition and then return to the beginning of the partition again.
```SQL
+----+-----+-----+-----+
@@ -302,9 +180,11 @@ MATCH_RECOGNIZE (
Empty set.
```
- * When the PATTERN clause is PATTERN (A\$)
+ * When the PATTERN clause is specified as PATTERN (A\$)
+
+ 
- SQL
+ Actual Return
```SQL
+-----------------------------+-----+-----+-----+
@@ -315,9 +195,7 @@ MATCH_RECOGNIZE (
Total line number = 1
```
- * When the PATTERN clause is PATTERN (\$A\$)
-
- SQL
+ * When the PATTERN clause is specified as PATTERN (\$A\$), the output result is empty.
```SQL
+----+-----+-----+-----+
@@ -327,11 +205,40 @@ MATCH_RECOGNIZE (
Empty set.
```
-### 3.2 AFTER MATCH SKIP Clause
-* Query SQL
+##### 1.2.4.3 Quantifiers
-SQL
+Quantifiers are used to specify the number of times a subpattern repeats, placed after the corresponding subpattern (e.g., `(A | B)*`).
+
+Common quantifiers are as follows:
+
+| Quantifier | Description |
+| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `*` | Zero or more repetitions |
+| `+` | One or more repetitions |
+| `?` | Zero or one repetition |
+| `{n}` | Exactly n repetitions |
+| `{m, n}` | Repetitions between m and n times (m and n are non-negative integers). \* If the left bound is omitted, the default starts from 0; \* If the right bound is omitted, there is no upper limit on the number of repetitions (e.g., {5,} is equivalent to "at least five times"); \* If both left and right bounds are omitted (i.e., {,}), it is equivalent to `*`. |
+
+* The matching preference can be changed by adding `?` after the quantifier.
+ * `{3,5}`: Prefers 5 times, least prefers 3 times; `{3,5}?`: Prefers 3 times, least prefers 5 times.
+ * `?`: Prefers 1 time; `??`: Prefers 0 times.
+
+#### 1.2.5 AFTER MATCH SKIP Clause
+
+Used to specify which row to start the next pattern match from after identifying a non-empty match.
+
+| Jump Strategy | Description | Allows Overlapping Matches? |
+| ------------------------------------------------------------- | -------------------------------------------------------------------------------- | ----------------------------- |
+| `AFTER MATCH SKIP PAST LAST ROW` | Default behavior. Starts from the row after the last row of the current match. | No |
+| `AFTER MATCH SKIP TO NEXT ROW` | Starts from the second row in the current match. | Yes |
+| `AFTER MATCH SKIP TO [ FIRST \| LAST ] pattern_variable` | Jumps to start from the [ first row | last row ] of a pattern variable. | Yes |
+
+* Among all possible configurations, only when `ALL ROWS PER MATCH WITH UNMATCHED ROWS` is used in combination with `AFTER MATCH SKIP PAST LAST ROW` can the system ensure that exactly one output record is generated for each input row.
+
+**Examples**
+
+* Query sql
```SQL
SELECT m.time, m.match, m.price, m.label
@@ -343,24 +250,25 @@ MATCH_RECOGNIZE (
RUNNING RPR_LAST(totalprice) AS price,
CLASSIFIER() AS label
ALL ROWS PER MATCH
- %s -- AFTER MATCH SKIP Clause
+ %s -- AFTER MATCH SKIP 子句
PATTERN (A B+ C+ D?)
SUBSET U = (C, D)
DEFINE
B AS B.totalprice < PREV (B.totalprice),
C AS C.totalprice > PREV (C.totalprice),
- D AS false -- Never matches successfully
+ D AS false -- 永远不会匹配成功
) AS m;
```
-* Query Results
+* Results
+ * When AFTER MATCH SKIP PAST LAST ROW is specified
- * When AFTER MATCH SKIP PAST LAST ROW
- * First match: Rows 1, 2, 3, 4
- * Second match: According to the semantics of `AFTER MATCH SKIP PAST LAST ROW`, starting from row 5, no valid match can be found
- * This pattern will never have overlapping matches
+ 
- SQL
+ *
+ * First match: Rows 1, 2, 3, 4
+ * Second match: According to the semantics of `AFTER MATCH SKIP PAST LAST ROW`, starting from row 5, no valid match can be found
+ * This pattern will never have overlapping matches
```SQL
+-----------------------------+-----+-----+-----+
@@ -375,13 +283,15 @@ MATCH_RECOGNIZE (
```
* When AFTER MATCH SKIP TO NEXT ROW
- * First match: Rows 1, 2, 3, 4
- * Second match: According to the semantics of `AFTER MATCH SKIP TO NEXT ROW`, starting from row 2, matches: Rows 2, 3, 4
- * Third match: Attempts to start from row 3, fails
- * Fourth match: Attempts to start from row 4, succeeds, matches rows 4, 5, 6
- * This pattern allows overlapping matches
- SQL
+ 
+
+ *
+ * First match: Rows 1, 2, 3, 4
+ * Second match: According to the semantics of `AFTER MATCH SKIP TO NEXT ROW`, starting from row 2, matches: Rows 2, 3, 4
+ * Third match: Attempts to start from row 3, fails
+ * Fourth match: Attempts to start from row 4, succeeds, matches rows 4, 5, 6
+ * This pattern allows overlapping matches
```SQL
+-----------------------------+-----+-----+-----+
@@ -402,11 +312,13 @@ MATCH_RECOGNIZE (
```
* When AFTER MATCH SKIP TO FIRST C
- * First match: Rows 1, 2, 3, 4
- * Second match: Starts from the first C (i.e., row 4), matches rows 4, 5, 6
- * This pattern allows overlapping matches
- SQL
+ 
+
+ *
+ * First match: Rows 1, 2, 3, 4
+ * Second match: Starts from the first C (i.e., row 4), matches rows 4, 5, 6
+ * This pattern allows overlapping matches
```SQL
+-----------------------------+-----+-----+-----+
@@ -424,12 +336,14 @@ MATCH_RECOGNIZE (
```
* When AFTER MATCH SKIP TO LAST B or AFTER MATCH SKIP TO B
- * First match: Rows 1, 2, 3, 4
- * Second match: Attempts to start from the last B (i.e., row 3), fails
- * Third match: Attempts to start from row 4, successfully matches rows 4, 5, 6
- * This pattern allows overlapping matches
- SQL
+ 
+
+ *
+ * First match: Rows 1, 2, 3, 4
+ * Second match: Attempts to start from the last B (i.e., row 3), fails
+ * Third match: Attempts to start from row 4, successfully matches rows 4, 5, 6
+ * This pattern allows overlapping matches
```SQL
+-----------------------------+-----+-----+-----+
@@ -447,11 +361,13 @@ MATCH_RECOGNIZE (
```
* When AFTER MATCH SKIP TO U
- * First match: Rows 1, 2, 3, 4
- * Second match: `SKIP TO U` means jumping to the last C or D; D can never match successfully, so it jumps to the last C (i.e., row 4), successfully matching rows 4, 5, 6
- * This pattern allows overlapping matches
- SQL
+ 
+
+ *
+ * First match: Rows 1, 2, 3, 4
+ * Second match: `SKIP TO U` means jumping to the last C or D; D can never match successfully, so it jumps to the last C (i.e., row 4), successfully matching rows 4, 5, 6
+ * This pattern allows overlapping matches
```SQL
+-----------------------------+-----+-----+-----+
@@ -470,28 +386,83 @@ MATCH_RECOGNIZE (
* When AFTER MATCH SKIP TO A, you cannot jump to the first row of the match, otherwise it will cause an infinite loop
- SQL
-
```SQL
Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: cannot skip to first row of match
```
* When AFTER MATCH SKIP TO B, you cannot jump to a pattern variable that does not exist in the match group
- SQL
-
```SQL
Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: pattern variable is not present in match
```
-### 3.3 Row Pattern Expressions - Extended Functions
-#### 3.3.1 CLASSIFIER() Function
+#### 1.2.6 ROWS PER MATCH Clause
-* Query SQL
+Used to specify the output method of the result set after a successful pattern match, including the following two main options:
+
+| Output Method | Rule Description | Output Result | Handling Logic for **Empty Matches/Unmatched Rows** |
+| -------------------- | -------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| ONE ROW PER MATCH | Generates one output row for each successful match. | \* Columns in the PARTITION BY clause\* Expressions defined in the MEASURES clause. | Outputs empty matches; skips unmatched rows. |
+| ALL ROWS PER MATCH | Each row in a match generates an output record, unless the row is excluded via exclusion syntax. | \* Columns in the PARTITION BY clause\* Columns in the ORDER BY clause\* Expressions defined in the MEASURES clause\* Remaining columns in the input table | \* Default: Outputs empty matches; skips unmatched rows.\* ALL ROWS PER MATCH**SHOW EMPTY MATCHES**: Outputs empty matches by default; skips unmatched rows.\* ALL ROWS PER MATCH**OMIT EMPTY MATCHES**: Does not output empty matches; skips unmatched rows.\* ALL ROWS PER MATCH**WITH UNMATCHED ROWS**: Outputs empty matches and generates an additional output record for each unmatched row. |
+
+#### 1.2.7 MEASURES Clause
+
+Used to specify which information to extract from a matched set of data. This clause is optional; if not explicitly specified, some input columns will become the output results of pattern recognition based on the settings of the ROWS PER MATCH clause.
SQL
+```SQL
+MEASURES measure_expression AS measure_name [, ...]
+```
+
+* A `measure_expression` is a scalar value calculated from the matched set of data.
+
+| Usage Example | Description |
+| ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `A.totalprice AS starting_price` | Returns the price from the first row in the matched group (i.e., the only row associated with variable A) as the starting price. |
+| `RPR_LAST(B.totalprice) AS bottom_price` | Returns the price from the last row associated with variable B, representing the lowest price in the "V" shape pattern (corresponding to the end of the downward segment). |
+| `RPR_LAST(U.totalprice) AS top_price` | Returns the highest price in the matched group, corresponding to the last row associated with variable C or D (i.e., the end of the entire matched group). [Assuming SUBSET U = (C, D)] |
+
+* Each `measure_expression` defines an output column, which can be referenced by its specified `measure_name`.
+
+#### 1.2.8 Row Pattern Recognition Expressions
+
+Expressions used in the MEASURES and DEFINE clauses are **scalar expressions**, evaluated in the row-level context of the input table. In addition to supporting standard SQL syntax, **scalar expressions** also support special extended functions for row pattern recognition.
+
+##### 1.2.8.1 Pattern Variable References
+
+```SQL
+A.totalprice
+U.orderdate
+orderstatus
+```
+
+* When a column name is prefixed with a **basic pattern variable** or a **combined pattern variable**, it refers to the corresponding column values of all rows matched by that variable.
+* If a column name has no prefix, it is equivalent to using the "**global combined pattern variable**" (i.e., the union of all basic pattern variables) as the prefix, referring to the column values of all rows in the current match.
+
+> Using table names as column name prefixes in pattern recognition expressions is not allowed.
+
+##### 1.2.8.2 Extended Functions
+
+| Function Name | Function Syntax | Description |
+| ------------------------------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `MATCH_NUMBER` Function | `MATCH_NUMBER()` | Returns the sequence number of the current match within the partition, starting from 1. Empty matches occupy match sequence numbers just like non-empty matches. |
+| `CLASSIFIER` Function | `CLASSIFIER(option)` | 1. Returns the name of the basic pattern variable mapped by the current row. 2. `option` is an optional parameter: a basic pattern variable `CLASSIFIER(A)` or a combined pattern variable `CLASSIFIER(U)` can be passed in to limit the function's scope; for rows outside the scope, NULL is returned directly. When used with a combined pattern variable, it can be used to distinguish which basic pattern variable in the union the row is mapped to. |
+| Logical Navigation Functions | `RPR_FIRST(expr, k)` | 1. Indicates locating the first row satisfying `expr` in the **current match group**, then searching for the k-th occurrence of the row corresponding to the same pattern variable towards the end of the group, and returning the specified column value of that row. If the k-th matching row is not found in the specified direction, the function returns NULL. 2. `k` is an optional parameter, defaulting to 0 (only locating the first row satisfying the condition); if explicitly specified, it must be a non-negative integer. |
+| Logical Navigation Functions | `RPR_LAST(expr, k)` | 1. Indicates locating the last row satisfying `expr` in the **current match group**, then searching for the k-th occurrence of the row corresponding to the same pattern variable towards the start of the group, and returning the specified column value of that row. If the k-th matching row is not found in the specified direction, the function returns NULL. 2. `k` is an optional parameter, defaulting to 0 (only locating the last row satisfying the condition); if explicitly specified, it must be a non-negative integer. |
+| Physical Navigation Functions | `PREV(expr, k)` | 1. Indicates offsetting k rows towards the start from the last row matched to the given pattern variable, and returning the corresponding column value. If navigation exceeds the **partition boundary**, the function returns NULL. 2. `k` is an optional parameter, defaulting to 1; if explicitly specified, it must be a non-negative integer. |
+| Physical Navigation Functions | `NEXT(expr, k)` | 1. Indicates offsetting k rows towards the end from the last row matched to the given pattern variable, and returning the corresponding column value. If navigation exceeds the **partition boundary**, the function returns NULL. 2. `k` is an optional parameter, defaulting to 1; if explicitly specified, it must be a non-negative integer. |
+| Aggregate Functions | COUNT, SUM, AVG, MAX, MIN Functions | Can be used to calculate data in the current match. Aggregate functions and navigation functions are not allowed to be nested within each other. (Supported from version V2.0.6) |
+| Nested Functions | `PREV/NEXT(CLASSIFIER())` | Nesting of physical navigation functions and the CLASSIFIER function. Used to obtain the pattern variables corresponding to the previous and next matching rows of the current row. |
+| Nested Functions | `PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | **Logical functions are allowed to be nested** inside physical functions; **physical functions are not allowed to be nested** inside logical functions. Used to perform logical offset first, then physical offset. |
+
+**Examples**
+
+1. CLASSIFIER Function
+
+* Query sql
+
```SQL
SELECT m.time, m.match, m.price, m.lower_or_higher, m.label
FROM t
@@ -513,10 +484,11 @@ MATCH_RECOGNIZE (
H AS H.totalprice > 80
) AS m;
```
+* Analysis
-* Query Results
+ 
-SQL
+* Result
```SQL
+-----------------------------+-----+-----+---------------+-----+
@@ -532,11 +504,9 @@ SQL
Total line number = 6
```
-#### 3.3.2 Logical Navigation Functions
+2. Logical Navigation Functions
-* Query SQL
-
-SQL
+* Query sql
```SQL
SELECT m.time, m.measure
@@ -544,18 +514,19 @@ FROM t
MATCH_RECOGNIZE (
ORDER BY time
MEASURES
- %s AS measure -- MEASURES Clause
+ %s AS measure -- MEASURES 子句
ALL ROWS PER MATCH
PATTERN (A+)
DEFINE A AS true
) AS m;
```
-* Query Results
-
+* Results
* When the value is totalprice, RPR\_LAST(totalprice), RUNNING RPR\_LAST(totalprice)
+
+ 
- SQL
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -573,7 +544,9 @@ MATCH_RECOGNIZE (
* When the value is FINAL RPR\_LAST(totalprice)
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -591,7 +564,9 @@ MATCH_RECOGNIZE (
* When the value is RPR\_FIRST(totalprice), RUNNING RPR\_FIRST(totalprice), FINAL RPR\_FIRST(totalprice)
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -609,7 +584,9 @@ MATCH_RECOGNIZE (
* When the value is RPR\_LAST(totalprice, 2)
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -627,7 +604,9 @@ MATCH_RECOGNIZE (
* When the value is FINAL RPP\_LAST(totalprice, 2)
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -645,7 +624,9 @@ MATCH_RECOGNIZE (
* When the value is RPR\_FIRST(totalprice, 2) and FINAL RPR\_FIRST(totalprice, 2)
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -661,11 +642,9 @@ MATCH_RECOGNIZE (
Total line number = 6
```
-#### 3.3.3 Physical Navigation Functions
-
-* Query SQL
+3. Physical Navigation Functions
-SQL
+* Query sql
```SQL
SELECT m.time, m.measure
@@ -673,18 +652,19 @@ FROM t
MATCH_RECOGNIZE (
ORDER BY time
MEASURES
- %s AS measure -- MEASURES Clause
+ %s AS measure -- MEASURES 子句
ALL ROWS PER MATCH
PATTERN (B)
DEFINE B AS B.totalprice >= PREV(B.totalprice)
) AS m;
```
-* Query Results
-
+* Results
* When the value is `PREV(totalprice)`
+
+ 
- SQL
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -698,7 +678,9 @@ MATCH_RECOGNIZE (
* When the value is `PREV(B.totalprice, 2)`
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -712,7 +694,9 @@ MATCH_RECOGNIZE (
* When the value is `PREV(B.totalprice, 4)`
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -726,7 +710,9 @@ MATCH_RECOGNIZE (
* When the value is `NEXT(totalprice)` or `NEXT(B.totalprice, 1)`
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -738,9 +724,11 @@ MATCH_RECOGNIZE (
Total line number = 2
```
- * When the value is `NEXT(B.totalprice, 2)`
+ * `When the value is `NEXT(B.totalprice, 2)`
+
+ 
- SQL
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -752,11 +740,9 @@ MATCH_RECOGNIZE (
Total line number = 2
```
-#### 3.3.4 Aggregate Functions
-
-* Query SQL
+4. Aggregate Functions
-SQL
+* Query sql
```SQL
SELECT m.time, m.count, m.avg, m.sum, m.min, m.max
@@ -774,10 +760,11 @@ MATCH_RECOGNIZE (
DEFINE A AS true
) AS m;
```
+* Analysis (Taking MIN(totalprice) as an Example)
-* Query Results
+
-SQL
+* Result
```SQL
+-----------------------------+-----+-----------------+-----+---+---+
@@ -793,13 +780,11 @@ SQL
Total line number = 6
```
-#### 3.3.5 Nested Functions
+5. Nested Functions
-1. Example 1
+Example 1
-* Query SQL
-
-SQL
+* Query sql
```SQL
SELECT m.time, m.match, m.price, m.lower_or_higher, m.label, m.prev_label, m.next_label
@@ -824,10 +809,11 @@ MATCH_RECOGNIZE (
H AS H.totalprice > 80
) AS m;
```
+* Analysis
-* Query Results
+
-SQL
+* Result
```SQL
+-----------------------------+-----+-----+---------------+-----+----------+----------+
@@ -843,11 +829,9 @@ SQL
Total line number = 6
```
-2. Example 2
+Example 2
-* Query SQL
-
-SQL
+* Query sql
```SQL
SELECT m.time, m.prev_last_price, m.next_first_price
@@ -862,10 +846,11 @@ MATCH_RECOGNIZE (
DEFINE A AS true
) AS m;
```
+* Analysis
-* Query Results
+
-SQL
+* Result
```SQL
+-----------------------------+---------------+----------------+
@@ -881,18 +866,34 @@ SQL
Total line number = 6
```
-## 4. Scenario Examples
+##### 1.2.8.3 RUNNING and FINAL Semantics
+
+1. Definition
+
+* `RUNNING`: Indicates the calculation scope is from the start row of the current match group to the row currently being processed (i.e., up to the current row).
+* `FINAL`: Indicates the calculation scope is from the start row of the current match group to the final row of the group (i.e., the entire match group).
+
+2. Scope of Application
+
+* The DEFINE clause uses RUNNING semantics by default.
+* The MEASURES clause uses RUNNING semantics by default and supports specifying FINAL semantics. When using the ONE ROW PER MATCH output mode, all expressions are calculated from the last row position of the match group, and at this time, RUNNING semantics are equivalent to FINAL semantics.
+
+3. Syntax Constraints
+
+* RUNNING and FINAL need to be written before **logical navigation functions** or aggregate functions, and cannot directly act on **column references.**
+ * Valid: `RUNNING RPP_LAST(A.totalprice)`, `FINAL RPP_LAST(A.totalprice)`
+ * Invalid: `RUNNING A.totalprice`, `FINAL A.totalprice`, `RUNNING PREV(A.totalprice)`
+
+### 1.3 Scenario Examples
Using [Sample Data](../Reference/Sample-Data.md) as the source data
-### 4.1 Time Segment Query
+#### 1.3.1 Time Segment Query
Segment the data in table1 by time intervals less than or equal to 24 hours, and query the total number of data entries in each segment, as well as the start and end times.
Query SQL
-SQL
-
```SQL
SELECT start_time, end_time, cnt
FROM table1
@@ -907,9 +908,7 @@ MATCH_RECOGNIZE (
) AS m
```
-Query Results
-
-SQL
+Results
```SQL
+-----------------------------+-----------------------------+---+
@@ -921,14 +920,12 @@ SQL
Total line number = 2
```
-### 4.2 Difference Segment Query
+#### 1.3.2 Difference Segment Query
Segment the data in table2 by humidity value differences less than 0.1, and query the total number of data entries in each segment, as well as the start and end times.
* Query SQL
-SQL
-
```SQL
SELECT start_time, end_time, cnt
FROM table2
@@ -943,9 +940,7 @@ MATCH_RECOGNIZE (
) AS m;
```
-* Query Results
-
-SQL
+* Results
```SQL
+-----------------------------+-----------------------------+---+
@@ -958,14 +953,12 @@ SQL
Total line number = 3
```
-### 4.3 Event Statistics Query
+#### 1.3.3 Event Statistics Query
Group the data in table1 by device ID, and count the start and end times and maximum humidity value where the humidity in the Shanghai area is greater than 35.
* Query SQL
-SQL
-
```SQL
SELECT m.device_id, m.match, m.event_start, m.event_end, m.max_humidity
FROM table1
@@ -980,13 +973,11 @@ MATCH_RECOGNIZE (
ONE ROW PER MATCH
PATTERN (A+)
DEFINE
- A AS A.region= 'Shanghai' AND A.humidity> 35
+ A AS A.region= '上海' AND A.humidity> 35
) AS m
```
-* Query Results
-
-SQL
+* Results
```SQL
+---------+-----+-----------------------------+-----------------------------+------------+
@@ -997,3 +988,741 @@ SQL
+---------+-----+-----------------------------+-----------------------------+------------+
Total line number = 2
```
+
+
+## 2. Window Functions
+
+### 2.1 Function Overview
+
+Window Functions perform calculations on each row based on a specific set of rows related to the current row (called a "window"). It combines grouping operations (`PARTITION BY`), sorting (`ORDER BY`), and definable calculation ranges (window frame `FRAME`), enabling complex cross-row calculations without collapsing the original data rows. It is commonly used in data analysis scenarios such as ranking, cumulative sums, moving averages, etc.
+
+> Note: This feature is available starting from version V 2.0.5.
+
+For example, in a scenario where you need to query the cumulative power consumption values of different devices, you can achieve this using window functions.
+
+```SQL
+-- Original data
++-----------------------------+------+-----+
+| time|device| flow|
++-----------------------------+------+-----+
+|1970-01-01T08:00:00.000+08:00| d0| 3|
+|1970-01-01T08:00:00.001+08:00| d0| 5|
+|1970-01-01T08:00:00.002+08:00| d0| 3|
+|1970-01-01T08:00:00.003+08:00| d0| 1|
+|1970-01-01T08:00:00.004+08:00| d1| 2|
+|1970-01-01T08:00:00.005+08:00| d1| 4|
++-----------------------------+------+-----+
+
+-- Create table and insert data
+CREATE TABLE device_flow(device String tag, flow INT32 FIELD);
+insert into device_flow(time, device ,flow ) values ('1970-01-01T08:00:00.000+08:00','d0',3),('1970-01-01T08:00:01.000+08:00','d0',5),('1970-01-01T08:00:02.000+08:00','d0',3),('1970-01-01T08:00:03.000+08:00','d0',1),('1970-01-01T08:00:04.000+08:00','d1',2),('1970-01-01T08:00:05.000+08:00','d1',4);
+
+
+-- Execute window function query
+SELECT *, sum(flow) OVER(PARTITION BY device ORDER BY flow) as sum FROM device_flow;
+```
+
+After grouping, sorting, and calculation (steps are disassembled as shown in the figure below),
+
+
+
+the expected results can be obtained:
+
+```SQL
++-----------------------------+------+----+----+
+| time|device|flow| sum|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
++-----------------------------+------+----+----+
+```
+
+### 2.2 Function Definition
+
+#### 2.2.1 SQL Definition
+
+```SQL
+windowDefinition
+ : name=identifier AS '(' windowSpecification ')'
+ ;
+
+windowSpecification
+ : (existingWindowName=identifier)?
+ (PARTITION BY partition+=expression (',' partition+=expression)*)?
+ (ORDER BY sortItem (',' sortItem)*)?
+ windowFrame?
+ ;
+
+windowFrame
+ : frameExtent
+ ;
+
+frameExtent
+ : frameType=RANGE start=frameBound
+ | frameType=ROWS start=frameBound
+ | frameType=GROUPS start=frameBound
+ | frameType=RANGE BETWEEN start=frameBound AND end=frameBound
+ | frameType=ROWS BETWEEN start=frameBound AND end=frameBound
+ | frameType=GROUPS BETWEEN start=frameBound AND end=frameBound
+ ;
+
+frameBound
+ : UNBOUNDED boundType=PRECEDING #unboundedFrame
+ | UNBOUNDED boundType=FOLLOWING #unboundedFrame
+ | CURRENT ROW #currentRowBound
+ | expression boundType=(PRECEDING | FOLLOWING) #boundedFrame
+ ;
+```
+
+#### 2.2.2 Window Definition
+
+##### 2.2.2.1 Partition
+
+`PARTITION BY` is used to divide data into multiple independent, unrelated "groups". Window functions can only access and operate on data within their respective groups, and cannot access data from other groups. This clause is optional; if not explicitly specified, all data is divided into the same group by default. It is worth noting that unlike `GROUP BY` which aggregates a group of data into a single row, the window function with `PARTITION BY` **does not affect the number of rows within the group.**
+
+* Example
+
+Query statement:
+
+```SQL
+IoTDB> SELECT *, count(flow) OVER (PARTITION BY device) as count FROM device_flow;
+```
+
+Disassembly steps:
+
+
+
+Query result:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 4|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 4|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 4|
++-----------------------------+------+----+-----+
+```
+
+##### 2.2.2.2 Ordering
+
+`ORDER BY` is used to sort data within a partition. After sorting, rows with equal values are called peers. Peers affect the behavior of window functions; for example, different rank functions handle peers differently, and different frame division methods also handle peers differently. This clause is optional.
+
+* Example
+
+Query statement:
+
+```SQL
+IoTDB> SELECT *, rank() OVER (PARTITION BY device ORDER BY flow) as rank FROM device_flow;
+```
+
+Disassembly steps:
+
+
+
+Query result:
+
+```SQL
++-----------------------------+------+----+----+
+| time|device|flow|rank|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
++-----------------------------+------+----+----+
+```
+
+##### 2.2.2.3 Framing
+
+For each row in a partition, the window function evaluates on a corresponding set of rows called a Frame (i.e., the input domain of the Window Function on each row). The Frame can be specified manually, involving two attributes when specified, as detailed below.
+
+
+
+
+ | Frame Attribute |
+ Attribute Value |
+ Value Description |
+
+
+ | Type |
+ ROWS |
+ Divide the frame by row number |
+
+
+ | GROUPS |
+ Divide the frame by peers, i.e., rows with the same value are regarded as equivalent. All rows in peers are grouped into one group called a peer group |
+
+
+ | RANGE |
+ Divide the frame by value |
+
+
+ | Start and End Position |
+ UNBOUNDED PRECEDING |
+ The first row of the entire partition |
+
+
+ | offset PRECEDING |
+ Represents the row with an "offset" distance from the current row in the preceding direction |
+
+
+ | CURRENT ROW |
+ The current row |
+
+
+ | offset FOLLOWING |
+ Represents the row with an "offset" distance from the current row in the following direction |
+
+
+ | UNBOUNDED FOLLOWING |
+ The last row of the entire partition |
+
+
+
+
+Among them, the meanings of `CURRENT ROW`, `PRECEDING N`, and `FOLLOWING N` vary with the type of frame, as shown in the following table:
+
+| | `ROWS` | `GROUPS` | `RANGE` |
+|--------------------|------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|
+| `CURRENT ROW` | Current row | Since a peer group contains multiple rows, this option differs depending on whether it acts on frame_start and frame_end: * frame_start: the first row of the peer group; * frame_end: the last row of the peer group. | Same as GROUPS, differing depending on whether it acts on frame_start and frame_end: * frame_start: the first row of the peer group; * frame_end: the last row of the peer group. |
+| `offset PRECEDING` | The previous offset rows | The previous offset peer groups; | Rows whose value difference from the current row in the preceding direction is less than or equal to offset are grouped into one frame |
+| `offset FOLLOWING` | The following offset rows | The following offset peer groups. | Rows whose value difference from the current row in the following direction is less than or equal to offset are grouped into one frame |
+
+The syntax format is as follows:
+
+```SQL
+-- Specify both frame_start and frame_end
+{ RANGE | ROWS | GROUPS } BETWEEN frame_start AND frame_end
+-- Specify only frame_start, frame_end is CURRENT ROW
+{ RANGE | ROWS | GROUPS } frame_start
+```
+
+If the Frame is not specified manually, the default Frame division rules are as follows:
+
+* When the window function uses ORDER BY: The default Frame is RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW (i.e., from the first row of the window to the current row). For example: In RANK() OVER(PARTITION BY COL1 ORDER BY COL2), the Frame defaults to include the current row and all preceding rows in the partition.
+* When the window function does not use ORDER BY: The default Frame is RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING (i.e., all rows in the entire window). For example: In AVG(COL2) OVER(PARTITION BY col1), the Frame defaults to include all rows in the partition, calculating the average of the entire partition.
+
+It should be noted that when the Frame type is GROUPS or RANGE, `ORDER BY` must be specified. The difference is that ORDER BY in GROUPS can involve multiple fields, while RANGE requires calculation and thus can only specify one field.
+
+* Example
+
+1. Frame type is ROWS
+
+Query statement:
+
+```SQL
+IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ROWS 1 PRECEDING) as count FROM device_flow;
+```
+
+Disassembly steps:
+
+* Take the previous row and the current row as the Frame
+ * For the first row of the partition, since there is no previous row, the entire Frame has only this row, returning 1;
+ * For other rows of the partition, the entire Frame includes the current row and its previous row, returning 2:
+
+
+
+Query result:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 2|
++-----------------------------+------+----+-----+
+```
+
+2. Frame type is GROUPS
+
+Query statement:
+
+```SQL
+IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ORDER BY flow GROUPS BETWEEN 1 PRECEDING AND CURRENT ROW) as count FROM device_flow;
+```
+
+Disassembly steps:
+
+* Take the previous peer group and the current peer group as the Frame. Taking the partition with device d0 as an example (same for d1), for the count of rows:
+ * For the peer group with flow 1, since there are no peer groups smaller than it, the entire Frame has only this row, returning 1;
+ * For the peer group with flow 3, it itself contains 2 rows, and the previous peer group is the one with flow 1 (1 row), so the entire Frame has 3 rows, returning 3;
+ * For the peer group with flow 5, it itself contains 1 row, and the previous peer group is the one with flow 3 (2 rows), so the entire Frame has 3 rows, returning 3.
+
+
+
+Query result:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+-----+
+```
+
+3. Frame type is RANGE
+
+Query statement:
+
+```SQL
+IoTDB> SELECT *,count(flow) OVER(PARTITION BY device ORDER BY flow RANGE BETWEEN 2 PRECEDING AND CURRENT ROW) as count FROM device_flow;
+```
+
+Disassembly steps:
+
+* Group rows whose data is **less than or equal to 2** compared to the current row into the same Frame. Taking the partition with device d0 as an example (same for d1), for the count of rows:
+ * For the row with flow 1, since it is the smallest row, the entire Frame has only this row, returning 1;
+ * For the row with flow 3, note that CURRENT ROW exists as frame_end, so it is the last row of the entire peer group. There is 1 row smaller than it that meets the requirement, and the peer group has 2 rows, so the entire Frame has 3 rows, returning 3;
+ * For the row with flow 5, it itself contains 1 row, and there are 2 rows smaller than it that meet the requirement, so the entire Frame has 3 rows, returning 3.
+
+
+
+Query result:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+-----+
+```
+
+### 2.3 Built-in Window Functions
+
+
+
+
+ | Window Function Category |
+ Window Function Name |
+ Function Definition |
+ Supports FRAME Clause |
+
+
+ | Aggregate Function |
+ All built-in aggregate functions |
+ Aggregate a set of values to get a single aggregated result. |
+ Yes |
+
+
+ | Value Function |
+ first_value |
+ Return the first value of the frame; if IGNORE NULLS is specified, skip leading NULLs |
+ Yes |
+
+
+ | last_value |
+ Return the last value of the frame; if IGNORE NULLS is specified, skip trailing NULLs |
+ Yes |
+
+
+ | nth_value |
+ Return the nth element of the frame (note that n starts from 1); if IGNORE NULLS is specified, skip NULLs |
+ Yes |
+
+
+ | lead |
+ Return the element offset rows after the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default |
+ No |
+
+
+ | lag |
+ Return the element offset rows before the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default |
+ No |
+
+
+ | Rank Function |
+ rank |
+ Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there may be gaps between sequence numbers |
+ No |
+
+
+ | dense_rank |
+ Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there are no gaps between sequence numbers |
+ No |
+
+
+ | row_number |
+ Return the row number of the current row in the entire partition; note that the row number starts from 1 |
+ No |
+
+
+ | percent_rank |
+ Return the sequence number of the current row's value in the entire partition as a percentage; i.e., (rank() - 1) / (n - 1), where n is the number of rows in the entire partition |
+ No |
+
+
+ | cume_dist |
+ Return the sequence number of the current row's value in the entire partition as a percentage; i.e., (number of rows less than or equal to it) / n |
+ No |
+
+
+ | ntile |
+ Specify n to number each row from 1 to n. |
+ No |
+
+
+
+
+#### 2.3.1 Aggregate Function
+
+All built-in aggregate functions such as `sum()`, `avg()`, `min()`, `max()` can be used as Window Functions.
+
+> Note: Unlike GROUP BY, each row has a corresponding output in the Window Function
+
+Example:
+
+```SQL
+IoTDB> SELECT *, sum(flow) OVER (PARTITION BY device ORDER BY flow) as sum FROM device_flow;
++-----------------------------+------+----+----+
+| time|device|flow| sum|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
++-----------------------------+------+----+----+
+```
+
+#### 2.3.2 Value Function
+
+1. `first_value`
+
+* Function name: `first_value(value) [IGNORE NULLS]`
+* Definition: Return the first value of the frame; if IGNORE NULLS is specified, skip leading NULLs;
+* Example:
+
+```SQL
+IoTDB> SELECT *, first_value(flow) OVER w as first_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
++-----------------------------+------+----+-----------+
+| time|device|flow|first_value|
++-----------------------------+------+----+-----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+-----------+
+```
+
+2. `last_value`
+
+* Function name: `last_value(value) [IGNORE NULLS]`
+* Definition: Return the last value of the frame; if IGNORE NULLS is specified, skip trailing NULLs;
+* Example:
+
+```SQL
+IoTDB> SELECT *, last_value(flow) OVER w as last_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
++-----------------------------+------+----+----------+
+| time|device|flow|last_value|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
++-----------------------------+------+----+----------+
+```
+
+3. `nth_value`
+
+* Function name: `nth_value(value, n) [IGNORE NULLS]`
+* Definition: Return the nth element of the frame (note that n starts from 1); if IGNORE NULLS is specified, skip NULLs;
+* Example:
+
+```SQL
+IoTDB> SELECT *, nth_value(flow, 2) OVER w as nth_values FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
++-----------------------------+------+----+----------+
+| time|device|flow|nth_values|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
++-----------------------------+------+----+----------+
+```
+
+4. lead
+
+* Function name: `lead(value[, offset[, default]]) [IGNORE NULLS]`
+* Definition: Return the element offset rows after the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default; the default value of offset is 1, and the default value of default is NULL.
+* The lead function requires an ORDER BY window clause
+* Example:
+
+```SQL
+IoTDB> SELECT *, lead(flow) OVER w as lead FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY time);
++-----------------------------+------+----+----+
+| time|device|flow|lead|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
+|1970-01-01T08:00:05.000+08:00| d1| 4|null|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 5|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:03.000+08:00| d0| 1|null|
++-----------------------------+------+----+----+
+```
+
+5. lag
+
+* Function name: `lag(value[, offset[, default]]) [IGNORE NULLS]`
+* Definition: Return the element offset rows before the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default; the default value of offset is 1, and the default value of default is NULL.
+* The lag function requires an ORDER BY window clause
+* Example:
+
+```SQL
+IoTDB> SELECT *, lag(flow) OVER w as lag FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY device);
++-----------------------------+------+----+----+
+| time|device|flow| lag|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2|null|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:00.000+08:00| d0| 3|null|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
++-----------------------------+------+----+----+
+```
+
+#### 2.3.3 Rank Function
+
+1. rank
+
+* Function name: `rank()`
+* Definition: Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there may be gaps between sequence numbers;
+* Example:
+
+```SQL
+IoTDB> SELECT *, rank() OVER w as rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+----+
+| time|device|flow|rank|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
++-----------------------------+------+----+----+
+```
+
+2. dense_rank
+
+* Function name: `dense_rank()`
+* Definition: Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there are no gaps between sequence numbers.
+* Example:
+
+```SQL
+IoTDB> SELECT *, dense_rank() OVER w as dense_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+----------+
+| time|device|flow|dense_rank|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+----------+
+```
+
+3. row_number
+
+* Function name: `row_number()`
+* Definition: Return the row number of the current row in the entire partition; note that the row number starts from 1;
+* Example:
+
+```SQL
+IoTDB> SELECT *, row_number() OVER w as row_number FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+----------+
+| time|device|flow|row_number|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
++-----------------------------+------+----+----------+
+```
+
+4. percent_rank
+
+* Function name: `percent_rank()`
+* Definition: Return the sequence number of the current row's value in the entire partition as a percentage; i.e., **(rank() - 1) / (n - 1)**, where n is the number of rows in the entire partition;
+* Example:
+
+```SQL
+IoTDB> SELECT *, percent_rank() OVER w as percent_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+------------------+
+| time|device|flow| percent_rank|
++-----------------------------+------+----+------------------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 0.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 0.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3|0.3333333333333333|
+|1970-01-01T08:00:02.000+08:00| d0| 3|0.3333333333333333|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
++-----------------------------+------+----+------------------+
+```
+
+5. cume_dist
+
+* Function name: `cume_dist`
+* Definition: Return the sequence number of the current row's value in the entire partition as a percentage; i.e., **(number of rows less than or equal to it) / n**.
+* Example:
+
+```SQL
+IoTDB> SELECT *, cume_dist() OVER w as cume_dist FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+---------+
+| time|device|flow|cume_dist|
++-----------------------------+------+----+---------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 0.5|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 0.25|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 0.75|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 0.75|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
++-----------------------------+------+----+---------+
+```
+
+6. ntile
+
+* Function name: `ntile`
+* Definition: Specify n to number each row from 1 to n.
+ * If the number of rows in the entire partition is less than n, the number is the row index;
+ * If the number of rows in the entire partition is greater than n:
+ * If the number of rows is divisible by n, it is perfect. For example, if the number of rows is 4 and n is 2, the numbers are 1, 1, 2, 2;
+ * If the number of rows is not divisible by n, distribute to the first few groups. For example, if the number of rows is 5 and n is 3, the numbers are 1, 1, 2, 2, 3;
+* Example:
+
+```SQL
+IoTDB> SELECT *, ntile(2) OVER w as ntile FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+-----+
+| time|device|flow|ntile|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
++-----------------------------+------+----+-----+
+```
+
+### 2.4 Scenario Examples
+
+1. Multi-device diff function
+
+For each row of each device, calculate the difference from the previous row:
+
+```SQL
+SELECT
+ *,
+ measurement - lag(measurement) OVER (PARTITION BY device ORDER BY time)
+FROM data
+WHERE timeCondition;
+```
+
+For each row of each device, calculate the difference from the next row:
+
+```SQL
+SELECT
+ *,
+ measurement - lead(measurement) OVER (PARTITION BY device ORDER BY time)
+FROM data
+WHERE timeCondition;
+```
+
+For each row of a single device, calculate the difference from the previous row (same for the next row):
+
+```SQL
+SELECT
+ *,
+ measurement - lag(measurement) OVER (ORDER BY time)
+FROM data
+where device='d1'
+WHERE timeCondition;
+```
+
+2. Multi-device TOP_K/BOTTOM_K
+
+Use rank to get the sequence number, then retain the desired order in the outer query.
+
+(Note: The execution order of window functions is after the HAVING clause, so a subquery is needed here)
+
+```SQL
+SELECT *
+FROM(
+ SELECT
+ *,
+ rank() OVER (PARTITION BY device ORDER BY time DESC)
+ FROM data
+ WHERE timeCondition
+)
+WHERE rank <= 3;
+```
+
+In addition to sorting by time, you can also sort by the value of the measurement point:
+
+```SQL
+SELECT *
+FROM(
+ SELECT
+ *,
+ rank() OVER (PARTITION BY device ORDER BY measurement DESC)
+ FROM data
+ WHERE timeCondition
+)
+WHERE rank <= 3;
+```
+
+3. Multi-device CHANGE_POINTS
+
+This SQL is used to remove consecutive identical values in the input sequence, which can be achieved with lead + subquery:
+
+```SQL
+SELECT
+ time,
+ device,
+ measurement
+FROM(
+ SELECT
+ time,
+ device,
+ measurement,
+ LEAD(measurement) OVER (PARTITION BY device ORDER BY time) AS next
+ FROM data
+ WHERE timeCondition
+)
+WHERE measurement != next OR next IS NULL;
+```
diff --git a/src/UserGuide/latest-Table/Basic-Concept/Query-Data_timecho.md b/src/UserGuide/latest-Table/Basic-Concept/Query-Data_timecho.md
index 659cbfd73..0a0fdb1f3 100644
--- a/src/UserGuide/latest-Table/Basic-Concept/Query-Data_timecho.md
+++ b/src/UserGuide/latest-Table/Basic-Concept/Query-Data_timecho.md
@@ -40,11 +40,9 @@ The IoTDB table model query syntax supports the following clauses:
- **SELECT Clause**: Specifies the columns to be included in the result. Details: [SELECT Clause](../SQL-Manual/Select-Clause.md)
- **FROM Clause**: Indicates the data source for the query, which can be a single table, multiple tables joined using the `JOIN` clause, or a subquery. Details: [FROM & JOIN Clause](../SQL-Manual/From-Join-Clause.md)
-- **patternRecognition**: Row Pattern Recognition, which supports capturing a segment of continuous data by defining recognition logic for pattern variables and regular expressions, and performs analysis and calculation on each captured data segment. Details:[Row Pattern Recognition](../SQL-Manual/Row-Pattern-Recognition.md)
- **WHERE Clause**: Filters rows based on specific conditions. Logically executed immediately after the `FROM` clause. Details: [WHERE Clause](../SQL-Manual/Where-Clause.md)
- **GROUP BY Clause**: Used for aggregating data, specifying the columns for grouping. Details: [GROUP BY Clause](../SQL-Manual/GroupBy-Clause.md)
- **HAVING Clause**: Applied after the `GROUP BY` clause to filter grouped data, similar to `WHERE` but operates after grouping. Details:[HAVING Clause](../SQL-Manual/Having-Clause.md)
-- **WINDOW FUNCTION**: Window Function, a special function that performs calculations on each row based on a specific set of rows related to the current row (called a "window"). It combines grouping operations, sorting, and definable calculation ranges to implement complex cross-row calculations without collapsing the original data rows. Details: [Window Function](../SQL-Manual/Featured-Functions_timecho.md#_4-Window-Function)
- **FILL Clause**: Handles missing values in query results by specifying fill methods (e.g., previous non-null value or linear interpolation) for better visualization and analysis. Details:[FILL Clause](../SQL-Manual/Fill-Clause.md)
- **ORDER BY Clause**: Sorts query results in ascending (`ASC`) or descending (`DESC`) order, with optional handling for null values (`NULLS FIRST` or `NULLS LAST`). Details: [ORDER BY Clause](../SQL-Manual/OrderBy-Clause.md)
- **OFFSET Clause**: Specifies the starting position for the query result, skipping the first `OFFSET` rows. Often used with the `LIMIT` clause. Details: [LIMIT and OFFSET Clause](../SQL-Manual/Limit-Offset-Clause.md)
@@ -590,77 +588,3 @@ IoTDB> SELECT time, temperature, humidity
Total line number = 10
It costs 0.093s
```
-
-### 3.9 Row Pattern Recognition
-
-**Example**: Segment data in table1 by time intervals of 24 hours or less, and query the total number of data entries in each segment, as well as the start and end times.
-
-```SQL
-SELECT start_time, end_time, cnt
-FROM table1
-MATCH_RECOGNIZE (
- ORDER BY time
- MEASURES
- RPR_FIRST(A.time) AS start_time,
- RPR_LAST(time) AS end_time,
- COUNT() AS cnt
- PATTERN (A B*)
- DEFINE B AS (cast(B.time as INT64) - cast(PREV(B.time) as INT64)) <= 86400000
-) AS m
-```
-
-**Result**:
-
-```SQL
-+-----------------------------+-----------------------------+---+
-| start_time| end_time|cnt|
-+-----------------------------+-----------------------------+---+
-|2024-11-26T13:37:00.000+08:00|2024-11-26T13:38:00.000+08:00| 2|
-|2024-11-27T16:38:00.000+08:00|2024-11-30T14:30:00.000+08:00| 16|
-+-----------------------------+-----------------------------+---+
-Total line number = 2
-```
-
-### 3.10 Window Functions
-
-**Example**: Query the cumulative power consumption values of different devices.
-
-The original data is as follows:
-
-```SQL
-+-----------------------------+------+-----+
-| time|device| flow|
-+-----------------------------+------+-----+
-|1970-01-01T08:00:00.000+08:00| d0| 3|
-|1970-01-01T08:00:00.001+08:00| d0| 5|
-|1970-01-01T08:00:00.002+08:00| d0| 3|
-|1970-01-01T08:00:00.003+08:00| d0| 1|
-|1970-01-01T08:00:00.004+08:00| d1| 2|
-|1970-01-01T08:00:00.005+08:00| d1| 4|
-+-----------------------------+------+-----+
-```
-
-**Query Statement**:
-
-```SQL
-IoTDB> SELECT *, sum(flow) OVER(PARTITION BY device ORDER BY flow) as sum FROM device_flow;
-```
-
-After grouping, sorting, and calculation (steps are disassembled as shown in the figure below),
-
-
-
-**Result**:
-
-```SQL
-+-----------------------------+------+----+----+
-| time|device|flow| sum|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
-+-----------------------------+------+----+----+
-```
diff --git a/src/UserGuide/latest-Table/QuickStart/QuickStart_timecho.md b/src/UserGuide/latest-Table/QuickStart/QuickStart_timecho.md
index a91a86dcb..a0b0942a3 100644
--- a/src/UserGuide/latest-Table/QuickStart/QuickStart_timecho.md
+++ b/src/UserGuide/latest-Table/QuickStart/QuickStart_timecho.md
@@ -62,7 +62,7 @@ This guide will assist you in quickly installing and deploying IoTDB. You can qu
2. Data Insertion & Updates: IoTDB provides multiple methods for inserting real-time data. For basic data insertion and updating operations, please see [Write&Updata Data](../Basic-Concept/Write-Updata-Data.md)
-3. Data Querying: IoTDB offers a rich set of data querying capabilities. For a basic introduction to data querying, please see [Query Data](../Basic-Concept/Query-Data_timecho.md). It includes [Row Pattern Recognition](../SQL-Manual/Row-Pattern-Recognition.md) suitable for business scenarios such as identifying specific patterns in time-series data and detecting specific events, as well as [Window Functions](../SQL-Manual/Featured-Functions_timecho.md#_4-Window-Functions) and other featured functions often used in data analysis scenarios.
+3. Data Querying: IoTDB offers a rich set of data querying capabilities. For a basic introduction to data querying, please see [Query Data](../Basic-Concept/Query-Data.md). It includes pattern queries and window functions applicable to time-series featured analysis. For detailed introductions, please refer to [Timeseries Featured Analysis](../User-Manual/Timeseries-Featured-Analysis_timecho.md).
4. Data Deletion: IoTDB supports two deletion methods: SQL-based deletion and automatic expiration deletion (TTL).
diff --git a/src/UserGuide/latest-Table/SQL-Manual/Featured-Functions_timecho.md b/src/UserGuide/latest-Table/SQL-Manual/Featured-Functions_timecho.md
index 5afcc9592..aa0df8cd8 100644
--- a/src/UserGuide/latest-Table/SQL-Manual/Featured-Functions_timecho.md
+++ b/src/UserGuide/latest-Table/SQL-Manual/Featured-Functions_timecho.md
@@ -697,58 +697,7 @@ IoTDB> SELECT window_start, window_end, stock_id, avg(price) as avg FROM CUMULAT
## 4. Window Functions
-### 4.1 Function Overview
-
-The Window Functions supported by IoTDB are special functions that perform calculations on each row based on a specific set of rows related to the current row (called a "window"). It combines grouping operations (`PARTITION BY`), sorting (`ORDER BY`), and definable calculation ranges (window frame `FRAME`), enabling complex cross-row calculations without collapsing the original data rows. It is commonly used in data analysis scenarios such as ranking, cumulative sums, moving averages, etc.
-
-> Note: This feature is available starting from version V 2.0.5.
-
-For example, in a scenario where you need to query the cumulative power consumption values of different devices, you can achieve this using window functions.
-
-```SQL
--- Original data
-+-----------------------------+------+-----+
-| time|device| flow|
-+-----------------------------+------+-----+
-|1970-01-01T08:00:00.000+08:00| d0| 3|
-|1970-01-01T08:00:00.001+08:00| d0| 5|
-|1970-01-01T08:00:00.002+08:00| d0| 3|
-|1970-01-01T08:00:00.003+08:00| d0| 1|
-|1970-01-01T08:00:00.004+08:00| d1| 2|
-|1970-01-01T08:00:00.005+08:00| d1| 4|
-+-----------------------------+------+-----+
-
--- Create table and insert data
-CREATE TABLE device_flow(device String tag, flow INT32 FIELD);
-insert into device_flow(time, device ,flow ) values ('1970-01-01T08:00:00.000+08:00','d0',3),('1970-01-01T08:00:01.000+08:00','d0',5),('1970-01-01T08:00:02.000+08:00','d0',3),('1970-01-01T08:00:03.000+08:00','d0',1),('1970-01-01T08:00:04.000+08:00','d1',2),('1970-01-01T08:00:05.000+08:00','d1',4);
-
-
--- Execute window function query
-SELECT *, sum(flow) OVER(PARTITION BY device ORDER BY flow) as sum FROM device_flow;
-```
-
-After grouping, sorting, and calculation (steps are disassembled as shown in the figure below),
-
-
-
-the expected results can be obtained:
-
-```SQL
-+-----------------------------+------+----+----+
-| time|device|flow| sum|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
-+-----------------------------+------+----+----+
-```
-
-### 4.2 Function Definition
-
-#### 4.2.1 SQL Definition
+### 4.1 SQL Definition
```SQL
windowDefinition
@@ -783,193 +732,56 @@ frameBound
;
```
-#### 4.2.2 Window Definition
-
-##### Partition
-
-`PARTITION BY` is used to divide data into multiple independent, unrelated "groups". Window functions can only access and operate on data within their respective groups, and cannot access data from other groups. This clause is optional; if not explicitly specified, all data is divided into the same group by default. It is worth noting that unlike `GROUP BY` which aggregates a group of data into a single row, the window function with `PARTITION BY` **does not affect the number of rows within the group.**
-
-* Example
-
-Query statement:
-
-```SQL
-IoTDB> SELECT *, count(flow) OVER (PARTITION BY device) as count FROM device_flow;
-```
-
-Disassembly steps:
+For more detailed introductions to the features, please refer to: [Window Functions](../User-Manual/Timeseries-Featured-Analysis_timecho.md#_2-window-functions)
-
+### 4.2 Usage Examples
-Query result:
+The original data of the device_flow table is as follows:
-```SQL
-+-----------------------------+------+----+-----+
-| time|device|flow|count|
-+-----------------------------+------+----+-----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 4|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 4|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 4|
-+-----------------------------+------+----+-----+
+```sql
++-----------------------------+------+-----+
+| time|device| flow|
++-----------------------------+------+-----+
+|1970-01-01T08:00:00.000+08:00| d0| 3|
+|1970-01-01T08:00:00.001+08:00| d0| 5|
+|1970-01-01T08:00:00.002+08:00| d0| 3|
+|1970-01-01T08:00:00.003+08:00| d0| 1|
+|1970-01-01T08:00:00.004+08:00| d1| 2|
+|1970-01-01T08:00:00.005+08:00| d1| 4|
++-----------------------------+------+-----+
```
-##### Ordering
-
-`ORDER BY` is used to sort data within a partition. After sorting, rows with equal values are called peers. Peers affect the behavior of window functions; for example, different rank functions handle peers differently, and different frame division methods also handle peers differently. This clause is optional.
+1. Query all columns from device_flow, group the data by the device dimension, sort the records within each device group by the value of the flow field, calculate the cumulative sum of the flow field, and finally return the cumulative sum as a column named sum.
-* Example
-
-Query statement:
+SQL:
```SQL
-IoTDB> SELECT *, rank() OVER (PARTITION BY device ORDER BY flow) as rank FROM device_flow;
+IoTDB> SELECT *, sum(flow) OVER (PARTITION BY device ORDER BY flow) as sum FROM device_flow;
```
-Disassembly steps:
-
-
-
-Query result:
+Result:
```SQL
+-----------------------------+------+----+----+
-| time|device|flow|rank|
+| time|device|flow| sum|
+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
+-----------------------------+------+----+----+
```
+2. Query all original columns from the device_flow table, group the data by the device dimension (device), sort the records within each device group by the value of the flow field, count the number of rows within the range of "the flow group of the current row + the previous 1 flow group", and finally return the count result as a column named count.
-##### Framing
-
-For each row in a partition, the window function evaluates on a corresponding set of rows called a Frame (i.e., the input domain of the Window Function on each row). The Frame can be specified manually, involving two attributes when specified, as detailed below.
-
-
-
-
- | Frame Attribute |
- Attribute Value |
- Value Description |
-
-
- | Type |
- ROWS |
- Divide the frame by row number |
-
-
- | GROUPS |
- Divide the frame by peers, i.e., rows with the same value are regarded as equivalent. All rows in peers are grouped into one group called a peer group |
-
-
- | RANGE |
- Divide the frame by value |
-
-
- | Start and End Position |
- UNBOUNDED PRECEDING |
- The first row of the entire partition |
-
-
- | offset PRECEDING |
- Represents the row with an "offset" distance from the current row in the preceding direction |
-
-
- | CURRENT ROW |
- The current row |
-
-
- | offset FOLLOWING |
- Represents the row with an "offset" distance from the current row in the following direction |
-
-
- | UNBOUNDED FOLLOWING |
- The last row of the entire partition |
-
-
-
-
-Among them, the meanings of `CURRENT ROW`, `PRECEDING N`, and `FOLLOWING N` vary with the type of frame, as shown in the following table:
-
-| | `ROWS` | `GROUPS` | `RANGE` |
-|--------------------|------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|
-| `CURRENT ROW` | Current row | Since a peer group contains multiple rows, this option differs depending on whether it acts on frame_start and frame_end: * frame_start: the first row of the peer group; * frame_end: the last row of the peer group. | Same as GROUPS, differing depending on whether it acts on frame_start and frame_end: * frame_start: the first row of the peer group; * frame_end: the last row of the peer group. |
-| `offset PRECEDING` | The previous offset rows | The previous offset peer groups; | Rows whose value difference from the current row in the preceding direction is less than or equal to offset are grouped into one frame |
-| `offset FOLLOWING` | The following offset rows | The following offset peer groups. | Rows whose value difference from the current row in the following direction is less than or equal to offset are grouped into one frame |
-
-The syntax format is as follows:
-
-```SQL
--- Specify both frame_start and frame_end
-{ RANGE | ROWS | GROUPS } BETWEEN frame_start AND frame_end
--- Specify only frame_start, frame_end is CURRENT ROW
-{ RANGE | ROWS | GROUPS } frame_start
-```
-
-If the Frame is not specified manually, the default Frame division rules are as follows:
-
-* When the window function uses ORDER BY: The default Frame is RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW (i.e., from the first row of the window to the current row). For example: In RANK() OVER(PARTITION BY COL1 ORDER BY COL2), the Frame defaults to include the current row and all preceding rows in the partition.
-* When the window function does not use ORDER BY: The default Frame is RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING (i.e., all rows in the entire window). For example: In AVG(COL2) OVER(PARTITION BY col1), the Frame defaults to include all rows in the partition, calculating the average of the entire partition.
-
-It should be noted that when the Frame type is GROUPS or RANGE, `ORDER BY` must be specified. The difference is that ORDER BY in GROUPS can involve multiple fields, while RANGE requires calculation and thus can only specify one field.
-
-* Example
-
-1. Frame type is ROWS
-
-Query statement:
-
-```SQL
-IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ROWS 1 PRECEDING) as count FROM device_flow;
-```
-
-Disassembly steps:
-
-* Take the previous row and the current row as the Frame
- * For the first row of the partition, since there is no previous row, the entire Frame has only this row, returning 1;
- * For other rows of the partition, the entire Frame includes the current row and its previous row, returning 2:
-
-
-
-Query result:
-
-```SQL
-+-----------------------------+------+----+-----+
-| time|device|flow|count|
-+-----------------------------+------+----+-----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 2|
-+-----------------------------+------+----+-----+
-```
-
-2. Frame type is GROUPS
-
-Query statement:
+SQL:
```SQL
IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ORDER BY flow GROUPS BETWEEN 1 PRECEDING AND CURRENT ROW) as count FROM device_flow;
```
-Disassembly steps:
-
-* Take the previous peer group and the current peer group as the Frame. Taking the partition with device d0 as an example (same for d1), for the count of rows:
- * For the peer group with flow 1, since there are no peer groups smaller than it, the entire Frame has only this row, returning 1;
- * For the peer group with flow 3, it itself contains 2 rows, and the previous peer group is the one with flow 1 (1 row), so the entire Frame has 3 rows, returning 3;
- * For the peer group with flow 5, it itself contains 1 row, and the previous peer group is the one with flow 3 (2 rows), so the entire Frame has 3 rows, returning 3.
-
-
-
-Query result:
+Result:
```SQL
+-----------------------------+------+----+-----+
@@ -984,24 +796,15 @@ Query result:
+-----------------------------+------+----+-----+
```
-3. Frame type is RANGE
+3. Query all original columns from the device_flow table, group the data by device, sort the records in ascending order by the value of the flow field within each group, count the number of all rows falling within the numeric range of "the flow value of the current row minus 2" to "the flow value of the current row", and finally return the count result as a column named count.
-Query statement:
+SQL:
```SQL
IoTDB> SELECT *,count(flow) OVER(PARTITION BY device ORDER BY flow RANGE BETWEEN 2 PRECEDING AND CURRENT ROW) as count FROM device_flow;
```
-Disassembly steps:
-
-* Group rows whose data is **less than or equal to 2** compared to the current row into the same Frame. Taking the partition with device d0 as an example (same for d1), for the count of rows:
- * For the row with flow 1, since it is the smallest row, the entire Frame has only this row, returning 1;
- * For the row with flow 3, note that CURRENT ROW exists as frame_end, so it is the last row of the entire peer group. There is 1 row smaller than it that meets the requirement, and the peer group has 2 rows, so the entire Frame has 3 rows, returning 3;
- * For the row with flow 5, it itself contains 1 row, and there are 2 rows smaller than it that meet the requirement, so the entire Frame has 3 rows, returning 3.
-
-
-
-Query result:
+Result:
```SQL
+-----------------------------+------+----+-----+
@@ -1015,419 +818,3 @@ Query result:
|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
+-----------------------------+------+----+-----+
```
-
-### 4.3 Built-in Window Functions
-
-
-
-
- | Window Function Category |
- Window Function Name |
- Function Definition |
- Supports FRAME Clause |
-
-
- | Aggregate Function |
- All built-in aggregate functions |
- Aggregate a set of values to get a single aggregated result. |
- Yes |
-
-
- | Value Function |
- first_value |
- Return the first value of the frame; if IGNORE NULLS is specified, skip leading NULLs |
- Yes |
-
-
- | last_value |
- Return the last value of the frame; if IGNORE NULLS is specified, skip trailing NULLs |
- Yes |
-
-
- | nth_value |
- Return the nth element of the frame (note that n starts from 1); if IGNORE NULLS is specified, skip NULLs |
- Yes |
-
-
- | lead |
- Return the element offset rows after the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default |
- No |
-
-
- | lag |
- Return the element offset rows before the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default |
- No |
-
-
- | Rank Function |
- rank |
- Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there may be gaps between sequence numbers |
- No |
-
-
- | dense_rank |
- Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there are no gaps between sequence numbers |
- No |
-
-
- | row_number |
- Return the row number of the current row in the entire partition; note that the row number starts from 1 |
- No |
-
-
- | percent_rank |
- Return the sequence number of the current row's value in the entire partition as a percentage; i.e., (rank() - 1) / (n - 1), where n is the number of rows in the entire partition |
- No |
-
-
- | cume_dist |
- Return the sequence number of the current row's value in the entire partition as a percentage; i.e., (number of rows less than or equal to it) / n |
- No |
-
-
- | ntile |
- Specify n to number each row from 1 to n. |
- No |
-
-
-
-
-#### 4.3.1 Aggregate Function
-
-All built-in aggregate functions such as `sum()`, `avg()`, `min()`, `max()` can be used as Window Functions.
-
-> Note: Unlike GROUP BY, each row has a corresponding output in the Window Function
-
-Example:
-
-```SQL
-IoTDB> SELECT *, sum(flow) OVER (PARTITION BY device ORDER BY flow) as sum FROM device_flow;
-+-----------------------------+------+----+----+
-| time|device|flow| sum|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
-+-----------------------------+------+----+----+
-```
-
-#### 4.3.2 Value Function
-
-1. `first_value`
-
-* Function name: `first_value(value) [IGNORE NULLS]`
-* Definition: Return the first value of the frame; if IGNORE NULLS is specified, skip leading NULLs;
-* Example:
-
-```SQL
-IoTDB> SELECT *, first_value(flow) OVER w as first_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
-+-----------------------------+------+----+-----------+
-| time|device|flow|first_value|
-+-----------------------------+------+----+-----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-+-----------------------------+------+----+-----------+
-```
-
-2. `last_value`
-
-* Function name: `last_value(value) [IGNORE NULLS]`
-* Definition: Return the last value of the frame; if IGNORE NULLS is specified, skip trailing NULLs;
-* Example:
-
-```SQL
-IoTDB> SELECT *, last_value(flow) OVER w as last_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
-+-----------------------------+------+----+----------+
-| time|device|flow|last_value|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
-+-----------------------------+------+----+----------+
-```
-
-3. `nth_value`
-
-* Function name: `nth_value(value, n) [IGNORE NULLS]`
-* Definition: Return the nth element of the frame (note that n starts from 1); if IGNORE NULLS is specified, skip NULLs;
-* Example:
-
-```SQL
-IoTDB> SELECT *, nth_value(flow, 2) OVER w as nth_values FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
-+-----------------------------+------+----+----------+
-| time|device|flow|nth_values|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
-+-----------------------------+------+----+----------+
-```
-
-4. lead
-
-* Function name: `lead(value[, offset[, default]]) [IGNORE NULLS]`
-* Definition: Return the element offset rows after the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default; the default value of offset is 1, and the default value of default is NULL.
-* The lead function requires an ORDER BY window clause
-* Example:
-
-```SQL
-IoTDB> SELECT *, lead(flow) OVER w as lead FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY time);
-+-----------------------------+------+----+----+
-| time|device|flow|lead|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
-|1970-01-01T08:00:05.000+08:00| d1| 4|null|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 5|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:03.000+08:00| d0| 1|null|
-+-----------------------------+------+----+----+
-```
-
-5. lag
-
-* Function name: `lag(value[, offset[, default]]) [IGNORE NULLS]`
-* Definition: Return the element offset rows before the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default; the default value of offset is 1, and the default value of default is NULL.
-* The lag function requires an ORDER BY window clause
-* Example:
-
-```SQL
-IoTDB> SELECT *, lag(flow) OVER w as lag FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY device);
-+-----------------------------+------+----+----+
-| time|device|flow| lag|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2|null|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:00.000+08:00| d0| 3|null|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
-+-----------------------------+------+----+----+
-```
-
-#### 4.3.3 Rank Function
-
-1. rank
-
-* Function name: `rank()`
-* Definition: Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there may be gaps between sequence numbers;
-* Example:
-
-```SQL
-IoTDB> SELECT *, rank() OVER w as rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+----+
-| time|device|flow|rank|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
-+-----------------------------+------+----+----+
-```
-
-2. dense_rank
-
-* Function name: `dense_rank()`
-* Definition: Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there are no gaps between sequence numbers.
-* Example:
-
-```SQL
-IoTDB> SELECT *, dense_rank() OVER w as dense_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+----------+
-| time|device|flow|dense_rank|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-+-----------------------------+------+----+----------+
-```
-
-3. row_number
-
-* Function name: `row_number()`
-* Definition: Return the row number of the current row in the entire partition; note that the row number starts from 1;
-* Example:
-
-```SQL
-IoTDB> SELECT *, row_number() OVER w as row_number FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+----------+
-| time|device|flow|row_number|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
-+-----------------------------+------+----+----------+
-```
-
-4. percent_rank
-
-* Function name: `percent_rank()`
-* Definition: Return the sequence number of the current row's value in the entire partition as a percentage; i.e., **(rank() - 1) / (n - 1)**, where n is the number of rows in the entire partition;
-* Example:
-
-```SQL
-IoTDB> SELECT *, percent_rank() OVER w as percent_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+------------------+
-| time|device|flow| percent_rank|
-+-----------------------------+------+----+------------------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 0.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 0.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3|0.3333333333333333|
-|1970-01-01T08:00:02.000+08:00| d0| 3|0.3333333333333333|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
-+-----------------------------+------+----+------------------+
-```
-
-5. cume_dist
-
-* Function name: `cume_dist`
-* Definition: Return the sequence number of the current row's value in the entire partition as a percentage; i.e., **(number of rows less than or equal to it) / n**.
-* Example:
-
-```SQL
-IoTDB> SELECT *, cume_dist() OVER w as cume_dist FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+---------+
-| time|device|flow|cume_dist|
-+-----------------------------+------+----+---------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 0.5|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 0.25|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 0.75|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 0.75|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
-+-----------------------------+------+----+---------+
-```
-
-6. ntile
-
-* Function name: `ntile`
-* Definition: Specify n to number each row from 1 to n.
- * If the number of rows in the entire partition is less than n, the number is the row index;
- * If the number of rows in the entire partition is greater than n:
- * If the number of rows is divisible by n, it is perfect. For example, if the number of rows is 4 and n is 2, the numbers are 1, 1, 2, 2;
- * If the number of rows is not divisible by n, distribute to the first few groups. For example, if the number of rows is 5 and n is 3, the numbers are 1, 1, 2, 2, 3;
-* Example:
-
-```SQL
-IoTDB> SELECT *, ntile(2) OVER w as ntile FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+-----+
-| time|device|flow|ntile|
-+-----------------------------+------+----+-----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
-+-----------------------------+------+----+-----+
-```
-
-### 4.4 Scenario Examples
-
-1. Multi-device diff function
-
-For each row of each device, calculate the difference from the previous row:
-
-```SQL
-SELECT
- *,
- measurement - lag(measurement) OVER (PARTITION BY device ORDER BY time)
-FROM data
-WHERE timeCondition;
-```
-
-For each row of each device, calculate the difference from the next row:
-
-```SQL
-SELECT
- *,
- measurement - lead(measurement) OVER (PARTITION BY device ORDER BY time)
-FROM data
-WHERE timeCondition;
-```
-
-For each row of a single device, calculate the difference from the previous row (same for the next row):
-
-```SQL
-SELECT
- *,
- measurement - lag(measurement) OVER (ORDER BY time)
-FROM data
-where device='d1'
-WHERE timeCondition;
-```
-
-2. Multi-device TOP_K/BOTTOM_K
-
-Use rank to get the sequence number, then retain the desired order in the outer query.
-
-(Note: The execution order of window functions is after the HAVING clause, so a subquery is needed here)
-
-```SQL
-SELECT *
-FROM(
- SELECT
- *,
- rank() OVER (PARTITION BY device ORDER BY time DESC)
- FROM data
- WHERE timeCondition
-)
-WHERE rank <= 3;
-```
-
-In addition to sorting by time, you can also sort by the value of the measurement point:
-
-```SQL
-SELECT *
-FROM(
- SELECT
- *,
- rank() OVER (PARTITION BY device ORDER BY measurement DESC)
- FROM data
- WHERE timeCondition
-)
-WHERE rank <= 3;
-```
-
-3. Multi-device CHANGE_POINTS
-
-This SQL is used to remove consecutive identical values in the input sequence, which can be achieved with lead + subquery:
-
-```SQL
-SELECT
- time,
- device,
- measurement
-FROM(
- SELECT
- time,
- device,
- measurement,
- LEAD(measurement) OVER (PARTITION BY device ORDER BY time) AS next
- FROM data
- WHERE timeCondition
-)
-WHERE measurement != next OR next IS NULL;
-```
diff --git a/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition_timecho.md b/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition_timecho.md
new file mode 100644
index 000000000..62cd4691e
--- /dev/null
+++ b/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition_timecho.md
@@ -0,0 +1,167 @@
+
+
+# Pattern Query
+
+## 1. Syntax Definition
+
+```SQL
+MATCH_RECOGNIZE (
+ [ PARTITION BY column [, ...] ]
+ [ ORDER BY column [, ...] ]
+ [ MEASURES measure_definition [, ...] ]
+ [ ROWS PER MATCH ]
+ [ AFTER MATCH skip_to ]
+ PATTERN ( row_pattern )
+ [ SUBSET subset_definition [, ...] ]
+ DEFINE variable_definition [, ...]
+)
+```
+
+**Note:**
+
+* PARTITION BY: Optional. Used to group the input table, and each group can perform pattern matching independently. If this clause is not specified, the entire input table will be processed as a single unit.
+* ORDER BY: Optional. Used to ensure that input data is processed in a specific order during matching.
+* MEASURES: Optional. Used to specify which information to extract from the matched segment of data.
+* ROWS PER MATCH: Optional. Used to specify the output method of the result set after successful pattern matching.
+* AFTER MATCH SKIP: Optional. Used to specify which row to resume from for the next pattern match after identifying a non-empty match.
+* PATTERN: Used to define the row pattern to be matched.
+* SUBSET: Optional. Used to merge rows matched by multiple basic pattern variables into a single logical set.
+* DEFINE: Used to define the basic pattern variables for the row pattern.
+
+For more detailed introductions to the features, please refer to:[Pattern Query](../User-Manual/Timeseries-Featured-Analysis_timecho.md#_1-pattern-query)
+
+## 2. Usage Examples
+
+Using [Sample Data](../Reference/Sample-Data.md) as the source data
+
+1. Time Segment Query
+
+Segment the data in table1 by time intervals less than or equal to 24 hours, and query the total number of data entries in each segment, as well as the start and end times.
+
+Query SQL
+
+SQL
+
+```SQL
+SELECT start_time, end_time, cnt
+FROM table1
+MATCH_RECOGNIZE (
+ ORDER BY time
+ MEASURES
+ RPR_FIRST(A.time) AS start_time,
+ RPR_LAST(time) AS end_time,
+ COUNT() AS cnt
+ PATTERN (A B*)
+ DEFINE B AS (cast(B.time as INT64) - cast(PREV(B.time) as INT64)) <= 86400000
+) AS m
+```
+
+Query Results
+
+SQL
+
+```SQL
++-----------------------------+-----------------------------+---+
+| start_time| end_time|cnt|
++-----------------------------+-----------------------------+---+
+|2024-11-26T13:37:00.000+08:00|2024-11-26T13:38:00.000+08:00| 2|
+|2024-11-27T16:38:00.000+08:00|2024-11-30T14:30:00.000+08:00| 16|
++-----------------------------+-----------------------------+---+
+Total line number = 2
+```
+
+2. Difference Segment Query
+
+Segment the data in table2 by humidity value differences less than 0.1, and query the total number of data entries in each segment, as well as the start and end times.
+
+* Query SQL
+
+SQL
+
+```SQL
+SELECT start_time, end_time, cnt
+FROM table2
+MATCH_RECOGNIZE (
+ ORDER BY time
+ MEASURES
+ RPR_FIRST(A.time) AS start_time,
+ RPR_LAST(time) AS end_time,
+ COUNT() AS cnt
+ PATTERN (A B*)
+ DEFINE B AS (B.humidity - PREV(B.humidity )) <=0.1
+) AS m;
+```
+
+* Query Results
+
+SQL
+
+```SQL
++-----------------------------+-----------------------------+---+
+| start_time| end_time|cnt|
++-----------------------------+-----------------------------+---+
+|2024-11-26T13:37:00.000+08:00|2024-11-27T00:00:00.000+08:00| 2|
+|2024-11-28T08:00:00.000+08:00|2024-11-29T00:00:00.000+08:00| 2|
+|2024-11-29T11:00:00.000+08:00|2024-11-30T00:00:00.000+08:00| 2|
++-----------------------------+-----------------------------+---+
+Total line number = 3
+```
+
+3. Event Statistics Query
+
+Group the data in table1 by device ID, and count the start and end times and maximum humidity value where the humidity in the Shanghai area is greater than 35.
+
+* Query SQL
+
+SQL
+
+```SQL
+SELECT m.device_id, m.match, m.event_start, m.event_end, m.max_humidity
+FROM table1
+MATCH_RECOGNIZE (
+ PARTITION BY device_id
+ ORDER BY time
+ MEASURES
+ MATCH_NUMBER() AS match,
+ RPR_FIRST(A.time) AS event_start,
+ RPR_LAST(A.time) AS event_end,
+ MAX(A.humidity) AS max_humidity
+ ONE ROW PER MATCH
+ PATTERN (A+)
+ DEFINE
+ A AS A.region= 'Shanghai' AND A.humidity> 35
+) AS m
+```
+
+* Query Results
+
+SQL
+
+```SQL
++---------+-----+-----------------------------+-----------------------------+------------+
+|device_id|match| event_start| event_end|max_humidity|
++---------+-----+-----------------------------+-----------------------------+------------+
+| 100| 1|2024-11-28T09:00:00.000+08:00|2024-11-29T18:30:00.000+08:00| 45.1|
+| 101| 1|2024-11-30T09:30:00.000+08:00|2024-11-30T09:30:00.000+08:00| 35.2|
++---------+-----+-----------------------------+-----------------------------+------------+
+Total line number = 2
+```
diff --git a/src/UserGuide/latest-Table/SQL-Manual/overview_timecho.md b/src/UserGuide/latest-Table/SQL-Manual/overview_timecho.md
index a7be3102b..19afdc1b8 100644
--- a/src/UserGuide/latest-Table/SQL-Manual/overview_timecho.md
+++ b/src/UserGuide/latest-Table/SQL-Manual/overview_timecho.md
@@ -40,11 +40,9 @@ The IoTDB table model query syntax supports the following clauses:
- **SELECT Clause**: Specifies the columns to be included in the result. Details: [SELECT Clause](../SQL-Manual/Select-Clause.md)
- **FROM Clause**: Indicates the data source for the query, which can be a single table, multiple tables joined using the `JOIN` clause, or a subquery. Details: [FROM & JOIN Clause](../SQL-Manual/From-Join-Clause.md)
-- **patternRecognition**: Row Pattern Recognition, which supports capturing a segment of continuous data by defining recognition logic for pattern variables and regular expressions, and performs analysis and calculation on each captured data segment. Details:[Row Pattern Recognition](../SQL-Manual/Row-Pattern-Recognition.md)
- **WHERE Clause**: Filters rows based on specific conditions. Logically executed immediately after the `FROM` clause. Details: [WHERE Clause](../SQL-Manual/Where-Clause.md)
- **GROUP BY Clause**: Used for aggregating data, specifying the columns for grouping. Details: [GROUP BY Clause](../SQL-Manual/GroupBy-Clause.md)
- **HAVING Clause**: Applied after the `GROUP BY` clause to filter grouped data, similar to `WHERE` but operates after grouping. Details:[HAVING Clause](../SQL-Manual/Having-Clause.md)
-- **WINDOW FUNCTION**: Window Function, a special function that performs calculations on each row based on a specific set of rows related to the current row (called a "window"). It combines grouping operations, sorting, and definable calculation ranges to implement complex cross-row calculations without collapsing the original data rows. Details: [Window Function](../SQL-Manual/Featured-Functions_timecho.md#_4-Window-Function)
- **FILL Clause**: Handles missing values in query results by specifying fill methods (e.g., previous non-null value or linear interpolation) for better visualization and analysis. Details:[FILL Clause](../SQL-Manual/Fill-Clause.md)
- **ORDER BY Clause**: Sorts query results in ascending (`ASC`) or descending (`DESC`) order, with optional handling for null values (`NULLS FIRST` or `NULLS LAST`). Details: [ORDER BY Clause](../SQL-Manual/OrderBy-Clause.md)
- **OFFSET Clause**: Specifies the starting position for the query result, skipping the first `OFFSET` rows. Often used with the `LIMIT` clause. Details: [LIMIT and OFFSET Clause](../SQL-Manual/Limit-Offset-Clause.md)
diff --git a/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md b/src/UserGuide/latest-Table/User-Manual/Timeseries-Featured-Analysis_timecho.md
similarity index 55%
rename from src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md
rename to src/UserGuide/latest-Table/User-Manual/Timeseries-Featured-Analysis_timecho.md
index e428fe52c..cb36c56f6 100644
--- a/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md
+++ b/src/UserGuide/latest-Table/User-Manual/Timeseries-Featured-Analysis_timecho.md
@@ -19,21 +19,22 @@
-->
-# Row Pattern Recognition
+# Timeseries Featured Analysis
-## 1. Overview
+For time-series data feature analysis scenarios, IoTDB provides two core capabilities: pattern query and window functions. These capabilities deliver a flexible and efficient solution for in-depth mining and complex computation of time-series data. The following sections will elaborate on the two features in detail.
-IoTDB supports Row Pattern Recognition. This feature enables capturing a segment of continuous data by defining the recognition logic of pattern variables and regular expressions, and performing analysis and calculation on each captured data segment. It is suitable for business scenarios such as identifying specific patterns in time-series data and detecting specific events. If we regard Row Pattern Recognition as grouping processing of data, the core process is roughly as follows:
+## 1. Pattern Query
-* Perform group capture through the PATTERN, DEFINE, and SUBSET clauses
-* Conduct computational processing on the captured groups through the MEASURES clause
-* Set the output format of groups through the ROWS PER MATCH clause
-* Specify how to locate the start position of the next group through the AFTER MATCH SKIP clause
+### 1.1 Overview
+
+Pattern query enables capturing a segment of continuous data by defining the recognition logic of pattern variables and regular expressions, and performing analysis and calculation on each captured data segment. It is suitable for business scenarios such as identifying specific patterns in time-series data (as shown in the figure below) and detecting specific events.
+
+
> Note: This feature is available starting from version V2.0.5.
-## 2. Function Introduction
-### 2.1 Syntax Format
+### 1.2 Function Introduction
+#### 1.2.1 Syntax Format
```SQL
MATCH_RECOGNIZE (
@@ -59,7 +60,28 @@ MATCH_RECOGNIZE (
* SUBSET: Optional. Used to merge rows matched by multiple basic pattern variables into a single logical set.
* DEFINE: Used to define the basic pattern variables for the row pattern.
-### 2.2 DEFINE Clause
+**Original Data for Syntax Examples:**
+
+```SQL
+IoTDB:database3> select * from t
++-----------------------------+------+----------+
+| time|device|totalprice|
++-----------------------------+------+----------+
+|2025-01-01T00:01:00.000+08:00| d1| 90|
+|2025-01-01T00:02:00.000+08:00| d1| 80|
+|2025-01-01T00:03:00.000+08:00| d1| 70|
+|2025-01-01T00:04:00.000+08:00| d1| 80|
+|2025-01-01T00:05:00.000+08:00| d1| 70|
+|2025-01-01T00:06:00.000+08:00| d1| 80|
++-----------------------------+------+----------+
+
+-- Creation Statement
+create table t(device tag, totalprice int32 field)
+
+insert into t(time,device,totalprice) values(2025-01-01T00:01:00, 'd1', 90),(2025-01-01T00:02:00, 'd1', 80),(2025-01-01T00:03:00, 'd1', 70),(2025-01-01T00:04:00, 'd1', 80),(2025-01-01T00:05:00, 'd1', 70),(2025-01-01T00:06:00, 'd1', 80)
+```
+
+#### 1.2.2 DEFINE Clause
Used to specify the judgment condition for each basic pattern variable in pattern recognition. These variables are usually represented by identifiers (e.g., `A`, `B`), and the Boolean expressions in this clause precisely define which rows meet the requirements of the variable.
@@ -72,7 +94,7 @@ DEFINE B AS totalprice < PREV(totalprice)
* Variables not **explicitly** defined in this clause have an implicitly set condition of always true (TRUE), meaning they can be successfully matched on any input row.
-### 2.3 SUBSET Clause
+#### 1.2.3 SUBSET Clause
Used to merge rows matched by multiple basic pattern variables (e.g., `A`, `B`) into a combined pattern variable (e.g., `U`), allowing these rows to be treated as a single logical set for operations. It can be used in the `MEASURES`, `DEFINE`, and `AFTER MATCH SKIP` clauses.
@@ -84,7 +106,7 @@ For example, for the pattern `PATTERN ((A | B){5} C+)`, it is impossible to dete
1. In the `MEASURES` clause, if you need to reference the last row matched in this phase, you can do so by defining the combined pattern variable `SUBSET U = (A, B)`. At this point, the expression `RPR_LAST(U.totalprice)` will directly return the `totalprice` value of the target row.
2. In the `AFTER MATCH SKIP` clause, if the matching result does not include the basic pattern variable A or B, executing `AFTER MATCH SKIP TO LAST B` or `AFTER MATCH SKIP TO LAST A` will fail to jump due to missing anchors. However, by introducing the combined pattern variable `SUBSET U = (A, B)`, using `AFTER MATCH SKIP TO LAST U` is always valid.
-### 2.4 PATTERN Clause
+#### 1.2.4 PATTERN Clause
Used to define the row pattern to be matched, whose basic building block is a row pattern variable.
@@ -92,7 +114,7 @@ Used to define the row pattern to be matched, whose basic building block is a ro
PATTERN ( row_pattern )
```
-#### 2.4.1 Pattern Types
+##### 1.2.4.1 Pattern Types
| Row Pattern | Syntax Format | Description |
|-----------------------|---------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -103,7 +125,7 @@ PATTERN ( row_pattern )
| Empty Pattern | `()` | Represents an empty match that does not contain any rows. |
| Pattern Exclusion | `{- row_pattern -}` | Used to specify the matched part to be excluded from the output. Usually used with the `ALL ROWS PER MATCH` option to output rows of interest. For example, `PATTERN (A {- B+ C+ -} D+)` with ALL ROWS PER MATCH will only output the first row `(corresponding to A)` and the trailing rows `(corresponding to D+)` of the match. |
-#### 2.4.2 Partition Start/End Anchor
+##### 1.2.4.2 Partition Start/End Anchor
* `^A` indicates matching a pattern that starts with A as the partition beginning
* When the value of the PATTERN clause is `^A`, the match must start from the first row of the partition, and this row must satisfy the definition of `A`.
@@ -112,152 +134,9 @@ PATTERN ( row_pattern )
* When the value of the PATTERN clause is `A$`, the match must end at the end of the partition, and this row must satisfy the definition of `A`.
* When the value of the PATTERN clause is `$A` or `$A$`, the output result is empty.
-For example illustrations, see[Section 3.1](./Row-Pattern-Recognition.md#_3-1-Patter-Clause-Partition-Anchor)
-
-#### 2.4.3 Quantifiers
+**Examples**
-Quantifiers are used to specify the number of times a subpattern repeats, placed after the corresponding subpattern (e.g., `(A | B)*`).
-
-Common quantifiers are as follows:
-
-| Quantifier | Description |
-| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `*` | Zero or more repetitions |
-| `+` | One or more repetitions |
-| `?` | Zero or one repetition |
-| `{n}` | Exactly n repetitions |
-| `{m, n}` | Repetitions between m and n times (m and n are non-negative integers). \* If the left bound is omitted, the default starts from 0; \* If the right bound is omitted, there is no upper limit on the number of repetitions (e.g., {5,} is equivalent to "at least five times"); \* If both left and right bounds are omitted (i.e., {,}), it is equivalent to `*`. |
-
-* The matching preference can be changed by adding `?` after the quantifier.
- * `{3,5}`: Prefers 5 times, least prefers 3 times; `{3,5}?`: Prefers 3 times, least prefers 5 times.
- * `?`: Prefers 1 time; `??`: Prefers 0 times.
-
-### 2.5 AFTER MATCH SKIP Clause
-
-Used to specify which row to start the next pattern match from after identifying a non-empty match.
-
-| Jump Strategy | Description | Allows Overlapping Matches? |
-| ------------------------------------------------------------- | -------------------------------------------------------------------------------- | ----------------------------- |
-| `AFTER MATCH SKIP PAST LAST ROW` | Default behavior. Starts from the row after the last row of the current match. | No |
-| `AFTER MATCH SKIP TO NEXT ROW` | Starts from the second row in the current match. | Yes |
-| `AFTER MATCH SKIP TO [ FIRST \| LAST ] pattern_variable` | Jumps to start from the [ first row | last row ] of a pattern variable. | Yes |
-
-* Among all possible configurations, only when `ALL ROWS PER MATCH WITH UNMATCHED ROWS` is used in combination with `AFTER MATCH SKIP PAST LAST ROW` can the system ensure that exactly one output record is generated for each input row.
-
-For example illustrations, see [Section 3.2](./Row-Pattern-Recognition.md#_3-2-AFTER-MATCH-SKIP-Clause)
-
-### 2.6 ROWS PER MATCH Clause
-
-Used to specify the output method of the result set after a successful pattern match, including the following two main options:
-
-| Output Method | Rule Description | Output Result | Handling Logic for **Empty Matches/Unmatched Rows** |
-| -------------------- | -------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| ONE ROW PER MATCH | Generates one output row for each successful match. | \* Columns in the PARTITION BY clause\* Expressions defined in the MEASURES clause. | Outputs empty matches; skips unmatched rows. |
-| ALL ROWS PER MATCH | Each row in a match generates an output record, unless the row is excluded via exclusion syntax. | \* Columns in the PARTITION BY clause\* Columns in the ORDER BY clause\* Expressions defined in the MEASURES clause\* Remaining columns in the input table | \* Default: Outputs empty matches; skips unmatched rows.\* ALL ROWS PER MATCH**SHOW EMPTY MATCHES**: Outputs empty matches by default; skips unmatched rows.\* ALL ROWS PER MATCH**OMIT EMPTY MATCHES**: Does not output empty matches; skips unmatched rows.\* ALL ROWS PER MATCH**WITH UNMATCHED ROWS**: Outputs empty matches and generates an additional output record for each unmatched row. |
-
-### 2.7 MEASURES Clause
-
-Used to specify which information to extract from a matched set of data. This clause is optional; if not explicitly specified, some input columns will become the output results of pattern recognition based on the settings of the ROWS PER MATCH clause.
-
-SQL
-
-```SQL
-MEASURES measure_expression AS measure_name [, ...]
-```
-
-* A `measure_expression` is a scalar value calculated from the matched set of data.
-
-| Usage Example | Description |
-| ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `A.totalprice AS starting_price` | Returns the price from the first row in the matched group (i.e., the only row associated with variable A) as the starting price. |
-| `RPR_LAST(B.totalprice) AS bottom_price` | Returns the price from the last row associated with variable B, representing the lowest price in the "V" shape pattern (corresponding to the end of the downward segment). |
-| `RPR_LAST(U.totalprice) AS top_price` | Returns the highest price in the matched group, corresponding to the last row associated with variable C or D (i.e., the end of the entire matched group). [Assuming SUBSET U = (C, D)] |
-
-* Each `measure_expression` defines an output column, which can be referenced by its specified `measure_name`.
-
-### 2.8 Row Pattern Recognition Expressions
-
-Expressions used in the MEASURES and DEFINE clauses are **scalar expressions**, evaluated in the row-level context of the input table. In addition to supporting standard SQL syntax, **scalar expressions** also support special extended functions for row pattern recognition.
-
-#### 2.8.1 Pattern Variable References
-
-SQL
-
-```SQL
-A.totalprice
-U.orderdate
-orderstatus
-```
-
-* When a column name is prefixed with a **basic pattern variable** or a **combined pattern variable**, it refers to the corresponding column values of all rows matched by that variable.
-* If a column name has no prefix, it is equivalent to using the "**global combined pattern variable**" (i.e., the union of all basic pattern variables) as the prefix, referring to the column values of all rows in the current match.
-
-> Using table names as column name prefixes in pattern recognition expressions is not allowed.
-
-#### 2.8.2 Extended Functions
-
-| Function Name | Function Syntax | Description |
-| ------------------------------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `MATCH_NUMBER` Function | `MATCH_NUMBER()` | Returns the sequence number of the current match within the partition, starting from 1. Empty matches occupy match sequence numbers just like non-empty matches. |
-| `CLASSIFIER` Function | `CLASSIFIER(option)` | 1. Returns the name of the basic pattern variable mapped by the current row. 2. `option` is an optional parameter: a basic pattern variable `CLASSIFIER(A)` or a combined pattern variable `CLASSIFIER(U)` can be passed in to limit the function's scope; for rows outside the scope, NULL is returned directly. When used with a combined pattern variable, it can be used to distinguish which basic pattern variable in the union the row is mapped to. |
-| Logical Navigation Functions | `RPR_FIRST(expr, k)` | 1. Indicates locating the first row satisfying `expr` in the **current match group**, then searching for the k-th occurrence of the row corresponding to the same pattern variable towards the end of the group, and returning the specified column value of that row. If the k-th matching row is not found in the specified direction, the function returns NULL. 2. `k` is an optional parameter, defaulting to 0 (only locating the first row satisfying the condition); if explicitly specified, it must be a non-negative integer. |
-| Logical Navigation Functions | `RPR_LAST(expr, k)` | 1. Indicates locating the last row satisfying `expr` in the **current match group**, then searching for the k-th occurrence of the row corresponding to the same pattern variable towards the start of the group, and returning the specified column value of that row. If the k-th matching row is not found in the specified direction, the function returns NULL. 2. `k` is an optional parameter, defaulting to 0 (only locating the last row satisfying the condition); if explicitly specified, it must be a non-negative integer. |
-| Physical Navigation Functions | `PREV(expr, k)` | 1. Indicates offsetting k rows towards the start from the last row matched to the given pattern variable, and returning the corresponding column value. If navigation exceeds the **partition boundary**, the function returns NULL. 2. `k` is an optional parameter, defaulting to 1; if explicitly specified, it must be a non-negative integer. |
-| Physical Navigation Functions | `NEXT(expr, k)` | 1. Indicates offsetting k rows towards the end from the last row matched to the given pattern variable, and returning the corresponding column value. If navigation exceeds the **partition boundary**, the function returns NULL. 2. `k` is an optional parameter, defaulting to 1; if explicitly specified, it must be a non-negative integer. |
-| Aggregate Functions | COUNT, SUM, AVG, MAX, MIN Functions | Can be used to calculate data in the current match. Aggregate functions and navigation functions are not allowed to be nested within each other. (Supported from version V2.0.6) |
-| Nested Functions | `PREV/NEXT(CLASSIFIER())` | Nesting of physical navigation functions and the CLASSIFIER function. Used to obtain the pattern variables corresponding to the previous and next matching rows of the current row. |
-| Nested Functions | `PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | **Logical functions are allowed to be nested** inside physical functions; **physical functions are not allowed to be nested** inside logical functions. Used to perform logical offset first, then physical offset. |
-
-For example illustrations, see [Section 3.3](./Row-Pattern-Recognition.md#_3-3-Row-Pattern-Expressions-Extended-Functions)
-
-#### 2.8.3 RUNNING and FINAL Semantics
-
-1. Definition
-
-* `RUNNING`: Indicates the calculation scope is from the start row of the current match group to the row currently being processed (i.e., up to the current row).
-* `FINAL`: Indicates the calculation scope is from the start row of the current match group to the final row of the group (i.e., the entire match group).
-
-2. Scope of Application
-
-* The DEFINE clause uses RUNNING semantics by default.
-* The MEASURES clause uses RUNNING semantics by default and supports specifying FINAL semantics. When using the ONE ROW PER MATCH output mode, all expressions are calculated from the last row position of the match group, and at this time, RUNNING semantics are equivalent to FINAL semantics.
-
-3. Syntax Constraints
-
-* RUNNING and FINAL need to be written before **logical navigation functions** or aggregate functions, and cannot directly act on **column references.**
- * Valid: `RUNNING RPP_LAST(A.totalprice)`, `FINAL RPP_LAST(A.totalprice)`
- * Invalid: `RUNNING A.totalprice`, `FINAL A.totalprice`, `RUNNING PREV(A.totalprice)`
-
-## 3. Syntax Examples
-
-Original Data
-
-SQL
-
-```SQL
-IoTDB:database3> select * from t
-+-----------------------------+------+----------+
-| time|device|totalprice|
-+-----------------------------+------+----------+
-|2025-01-01T00:01:00.000+08:00| d1| 90|
-|2025-01-01T00:02:00.000+08:00| d1| 80|
-|2025-01-01T00:03:00.000+08:00| d1| 70|
-|2025-01-01T00:04:00.000+08:00| d1| 80|
-|2025-01-01T00:05:00.000+08:00| d1| 70|
-|2025-01-01T00:06:00.000+08:00| d1| 80|
-+-----------------------------+------+----------+
-
--- Create Statement
-create table t(device tag, totalprice int32 field)
-
-insert into t(time,device,totalprice) values(2025-01-01T00:01:00, 'd1', 90),(2025-01-01T00:02:00, 'd1', 80),(2025-01-01T00:03:00, 'd1', 70),(2025-01-01T00:04:00, 'd1', 80),(2025-01-01T00:05:00, 'd1', 70),(2025-01-01T00:06:00, 'd1', 80)
-```
-
-### 3.1 PATTERN Clause Partition Anchor
-
-* Query SQL
-
-SQL
+* Query sql
```SQL
SELECT m.time, m.match, m.price, m.label
@@ -270,16 +149,17 @@ MATCH_RECOGNIZE (
CLASSIFIER() AS label
ALL ROWS PER MATCH
AFTER MATCH SKIP PAST LAST ROW
- PATTERN %s -- PATTERN Clause
+ PATTERN %s -- PATTERN 子句
DEFINE A AS true
) AS m;
```
-* Query Results
-
- * When the PATTERN clause is PATTERN (^A)
+* Results
+ * When the PATTERN clause is specified as PATTERN (^A)
+
+ 
- SQL
+ Actual Return
```SQL
+-----------------------------+-----+-----+-----+
@@ -290,9 +170,7 @@ MATCH_RECOGNIZE (
Total line number = 1
```
- * When the PATTERN clause is PATTERN (^A^)
-
- SQL
+ * When the PATTERN clause is specified as PATTERN (^A^), the output result is empty. This is because it is impossible to match an A starting from the beginning of a partition and then return to the beginning of the partition again.
```SQL
+----+-----+-----+-----+
@@ -302,9 +180,11 @@ MATCH_RECOGNIZE (
Empty set.
```
- * When the PATTERN clause is PATTERN (A\$)
+ * When the PATTERN clause is specified as PATTERN (A\$)
+
+ 
- SQL
+ Actual Return
```SQL
+-----------------------------+-----+-----+-----+
@@ -315,9 +195,7 @@ MATCH_RECOGNIZE (
Total line number = 1
```
- * When the PATTERN clause is PATTERN (\$A\$)
-
- SQL
+ * When the PATTERN clause is specified as PATTERN (\$A\$), the output result is empty.
```SQL
+----+-----+-----+-----+
@@ -327,11 +205,40 @@ MATCH_RECOGNIZE (
Empty set.
```
-### 3.2 AFTER MATCH SKIP Clause
-* Query SQL
+##### 1.2.4.3 Quantifiers
-SQL
+Quantifiers are used to specify the number of times a subpattern repeats, placed after the corresponding subpattern (e.g., `(A | B)*`).
+
+Common quantifiers are as follows:
+
+| Quantifier | Description |
+| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `*` | Zero or more repetitions |
+| `+` | One or more repetitions |
+| `?` | Zero or one repetition |
+| `{n}` | Exactly n repetitions |
+| `{m, n}` | Repetitions between m and n times (m and n are non-negative integers). \* If the left bound is omitted, the default starts from 0; \* If the right bound is omitted, there is no upper limit on the number of repetitions (e.g., {5,} is equivalent to "at least five times"); \* If both left and right bounds are omitted (i.e., {,}), it is equivalent to `*`. |
+
+* The matching preference can be changed by adding `?` after the quantifier.
+ * `{3,5}`: Prefers 5 times, least prefers 3 times; `{3,5}?`: Prefers 3 times, least prefers 5 times.
+ * `?`: Prefers 1 time; `??`: Prefers 0 times.
+
+#### 1.2.5 AFTER MATCH SKIP Clause
+
+Used to specify which row to start the next pattern match from after identifying a non-empty match.
+
+| Jump Strategy | Description | Allows Overlapping Matches? |
+| ------------------------------------------------------------- | -------------------------------------------------------------------------------- | ----------------------------- |
+| `AFTER MATCH SKIP PAST LAST ROW` | Default behavior. Starts from the row after the last row of the current match. | No |
+| `AFTER MATCH SKIP TO NEXT ROW` | Starts from the second row in the current match. | Yes |
+| `AFTER MATCH SKIP TO [ FIRST \| LAST ] pattern_variable` | Jumps to start from the [ first row | last row ] of a pattern variable. | Yes |
+
+* Among all possible configurations, only when `ALL ROWS PER MATCH WITH UNMATCHED ROWS` is used in combination with `AFTER MATCH SKIP PAST LAST ROW` can the system ensure that exactly one output record is generated for each input row.
+
+**Examples**
+
+* Query sql
```SQL
SELECT m.time, m.match, m.price, m.label
@@ -343,24 +250,25 @@ MATCH_RECOGNIZE (
RUNNING RPR_LAST(totalprice) AS price,
CLASSIFIER() AS label
ALL ROWS PER MATCH
- %s -- AFTER MATCH SKIP Clause
+ %s -- AFTER MATCH SKIP 子句
PATTERN (A B+ C+ D?)
SUBSET U = (C, D)
DEFINE
B AS B.totalprice < PREV (B.totalprice),
C AS C.totalprice > PREV (C.totalprice),
- D AS false -- Never matches successfully
+ D AS false -- 永远不会匹配成功
) AS m;
```
-* Query Results
+* Results
+ * When AFTER MATCH SKIP PAST LAST ROW is specified
- * When AFTER MATCH SKIP PAST LAST ROW
- * First match: Rows 1, 2, 3, 4
- * Second match: According to the semantics of `AFTER MATCH SKIP PAST LAST ROW`, starting from row 5, no valid match can be found
- * This pattern will never have overlapping matches
+ 
- SQL
+ *
+ * First match: Rows 1, 2, 3, 4
+ * Second match: According to the semantics of `AFTER MATCH SKIP PAST LAST ROW`, starting from row 5, no valid match can be found
+ * This pattern will never have overlapping matches
```SQL
+-----------------------------+-----+-----+-----+
@@ -375,13 +283,15 @@ MATCH_RECOGNIZE (
```
* When AFTER MATCH SKIP TO NEXT ROW
- * First match: Rows 1, 2, 3, 4
- * Second match: According to the semantics of `AFTER MATCH SKIP TO NEXT ROW`, starting from row 2, matches: Rows 2, 3, 4
- * Third match: Attempts to start from row 3, fails
- * Fourth match: Attempts to start from row 4, succeeds, matches rows 4, 5, 6
- * This pattern allows overlapping matches
- SQL
+ 
+
+ *
+ * First match: Rows 1, 2, 3, 4
+ * Second match: According to the semantics of `AFTER MATCH SKIP TO NEXT ROW`, starting from row 2, matches: Rows 2, 3, 4
+ * Third match: Attempts to start from row 3, fails
+ * Fourth match: Attempts to start from row 4, succeeds, matches rows 4, 5, 6
+ * This pattern allows overlapping matches
```SQL
+-----------------------------+-----+-----+-----+
@@ -402,11 +312,13 @@ MATCH_RECOGNIZE (
```
* When AFTER MATCH SKIP TO FIRST C
- * First match: Rows 1, 2, 3, 4
- * Second match: Starts from the first C (i.e., row 4), matches rows 4, 5, 6
- * This pattern allows overlapping matches
- SQL
+ 
+
+ *
+ * First match: Rows 1, 2, 3, 4
+ * Second match: Starts from the first C (i.e., row 4), matches rows 4, 5, 6
+ * This pattern allows overlapping matches
```SQL
+-----------------------------+-----+-----+-----+
@@ -424,12 +336,14 @@ MATCH_RECOGNIZE (
```
* When AFTER MATCH SKIP TO LAST B or AFTER MATCH SKIP TO B
- * First match: Rows 1, 2, 3, 4
- * Second match: Attempts to start from the last B (i.e., row 3), fails
- * Third match: Attempts to start from row 4, successfully matches rows 4, 5, 6
- * This pattern allows overlapping matches
- SQL
+ 
+
+ *
+ * First match: Rows 1, 2, 3, 4
+ * Second match: Attempts to start from the last B (i.e., row 3), fails
+ * Third match: Attempts to start from row 4, successfully matches rows 4, 5, 6
+ * This pattern allows overlapping matches
```SQL
+-----------------------------+-----+-----+-----+
@@ -447,11 +361,13 @@ MATCH_RECOGNIZE (
```
* When AFTER MATCH SKIP TO U
- * First match: Rows 1, 2, 3, 4
- * Second match: `SKIP TO U` means jumping to the last C or D; D can never match successfully, so it jumps to the last C (i.e., row 4), successfully matching rows 4, 5, 6
- * This pattern allows overlapping matches
- SQL
+ 
+
+ *
+ * First match: Rows 1, 2, 3, 4
+ * Second match: `SKIP TO U` means jumping to the last C or D; D can never match successfully, so it jumps to the last C (i.e., row 4), successfully matching rows 4, 5, 6
+ * This pattern allows overlapping matches
```SQL
+-----------------------------+-----+-----+-----+
@@ -470,28 +386,83 @@ MATCH_RECOGNIZE (
* When AFTER MATCH SKIP TO A, you cannot jump to the first row of the match, otherwise it will cause an infinite loop
- SQL
-
```SQL
Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: cannot skip to first row of match
```
* When AFTER MATCH SKIP TO B, you cannot jump to a pattern variable that does not exist in the match group
- SQL
-
```SQL
Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: pattern variable is not present in match
```
-### 3.3 Row Pattern Expressions - Extended Functions
-#### 3.3.1 CLASSIFIER() Function
+#### 1.2.6 ROWS PER MATCH Clause
-* Query SQL
+Used to specify the output method of the result set after a successful pattern match, including the following two main options:
+
+| Output Method | Rule Description | Output Result | Handling Logic for **Empty Matches/Unmatched Rows** |
+| -------------------- | -------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| ONE ROW PER MATCH | Generates one output row for each successful match. | \* Columns in the PARTITION BY clause\* Expressions defined in the MEASURES clause. | Outputs empty matches; skips unmatched rows. |
+| ALL ROWS PER MATCH | Each row in a match generates an output record, unless the row is excluded via exclusion syntax. | \* Columns in the PARTITION BY clause\* Columns in the ORDER BY clause\* Expressions defined in the MEASURES clause\* Remaining columns in the input table | \* Default: Outputs empty matches; skips unmatched rows.\* ALL ROWS PER MATCH**SHOW EMPTY MATCHES**: Outputs empty matches by default; skips unmatched rows.\* ALL ROWS PER MATCH**OMIT EMPTY MATCHES**: Does not output empty matches; skips unmatched rows.\* ALL ROWS PER MATCH**WITH UNMATCHED ROWS**: Outputs empty matches and generates an additional output record for each unmatched row. |
+
+#### 1.2.7 MEASURES Clause
+
+Used to specify which information to extract from a matched set of data. This clause is optional; if not explicitly specified, some input columns will become the output results of pattern recognition based on the settings of the ROWS PER MATCH clause.
SQL
+```SQL
+MEASURES measure_expression AS measure_name [, ...]
+```
+
+* A `measure_expression` is a scalar value calculated from the matched set of data.
+
+| Usage Example | Description |
+| ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `A.totalprice AS starting_price` | Returns the price from the first row in the matched group (i.e., the only row associated with variable A) as the starting price. |
+| `RPR_LAST(B.totalprice) AS bottom_price` | Returns the price from the last row associated with variable B, representing the lowest price in the "V" shape pattern (corresponding to the end of the downward segment). |
+| `RPR_LAST(U.totalprice) AS top_price` | Returns the highest price in the matched group, corresponding to the last row associated with variable C or D (i.e., the end of the entire matched group). [Assuming SUBSET U = (C, D)] |
+
+* Each `measure_expression` defines an output column, which can be referenced by its specified `measure_name`.
+
+#### 1.2.8 Row Pattern Recognition Expressions
+
+Expressions used in the MEASURES and DEFINE clauses are **scalar expressions**, evaluated in the row-level context of the input table. In addition to supporting standard SQL syntax, **scalar expressions** also support special extended functions for row pattern recognition.
+
+##### 1.2.8.1 Pattern Variable References
+
+```SQL
+A.totalprice
+U.orderdate
+orderstatus
+```
+
+* When a column name is prefixed with a **basic pattern variable** or a **combined pattern variable**, it refers to the corresponding column values of all rows matched by that variable.
+* If a column name has no prefix, it is equivalent to using the "**global combined pattern variable**" (i.e., the union of all basic pattern variables) as the prefix, referring to the column values of all rows in the current match.
+
+> Using table names as column name prefixes in pattern recognition expressions is not allowed.
+
+##### 1.2.8.2 Extended Functions
+
+| Function Name | Function Syntax | Description |
+| ------------------------------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `MATCH_NUMBER` Function | `MATCH_NUMBER()` | Returns the sequence number of the current match within the partition, starting from 1. Empty matches occupy match sequence numbers just like non-empty matches. |
+| `CLASSIFIER` Function | `CLASSIFIER(option)` | 1. Returns the name of the basic pattern variable mapped by the current row. 2. `option` is an optional parameter: a basic pattern variable `CLASSIFIER(A)` or a combined pattern variable `CLASSIFIER(U)` can be passed in to limit the function's scope; for rows outside the scope, NULL is returned directly. When used with a combined pattern variable, it can be used to distinguish which basic pattern variable in the union the row is mapped to. |
+| Logical Navigation Functions | `RPR_FIRST(expr, k)` | 1. Indicates locating the first row satisfying `expr` in the **current match group**, then searching for the k-th occurrence of the row corresponding to the same pattern variable towards the end of the group, and returning the specified column value of that row. If the k-th matching row is not found in the specified direction, the function returns NULL. 2. `k` is an optional parameter, defaulting to 0 (only locating the first row satisfying the condition); if explicitly specified, it must be a non-negative integer. |
+| Logical Navigation Functions | `RPR_LAST(expr, k)` | 1. Indicates locating the last row satisfying `expr` in the **current match group**, then searching for the k-th occurrence of the row corresponding to the same pattern variable towards the start of the group, and returning the specified column value of that row. If the k-th matching row is not found in the specified direction, the function returns NULL. 2. `k` is an optional parameter, defaulting to 0 (only locating the last row satisfying the condition); if explicitly specified, it must be a non-negative integer. |
+| Physical Navigation Functions | `PREV(expr, k)` | 1. Indicates offsetting k rows towards the start from the last row matched to the given pattern variable, and returning the corresponding column value. If navigation exceeds the **partition boundary**, the function returns NULL. 2. `k` is an optional parameter, defaulting to 1; if explicitly specified, it must be a non-negative integer. |
+| Physical Navigation Functions | `NEXT(expr, k)` | 1. Indicates offsetting k rows towards the end from the last row matched to the given pattern variable, and returning the corresponding column value. If navigation exceeds the **partition boundary**, the function returns NULL. 2. `k` is an optional parameter, defaulting to 1; if explicitly specified, it must be a non-negative integer. |
+| Aggregate Functions | COUNT, SUM, AVG, MAX, MIN Functions | Can be used to calculate data in the current match. Aggregate functions and navigation functions are not allowed to be nested within each other. (Supported from version V2.0.6) |
+| Nested Functions | `PREV/NEXT(CLASSIFIER())` | Nesting of physical navigation functions and the CLASSIFIER function. Used to obtain the pattern variables corresponding to the previous and next matching rows of the current row. |
+| Nested Functions | `PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | **Logical functions are allowed to be nested** inside physical functions; **physical functions are not allowed to be nested** inside logical functions. Used to perform logical offset first, then physical offset. |
+
+**Examples**
+
+1. CLASSIFIER Function
+
+* Query sql
+
```SQL
SELECT m.time, m.match, m.price, m.lower_or_higher, m.label
FROM t
@@ -513,10 +484,11 @@ MATCH_RECOGNIZE (
H AS H.totalprice > 80
) AS m;
```
+* Analysis
-* Query Results
+ 
-SQL
+* Result
```SQL
+-----------------------------+-----+-----+---------------+-----+
@@ -532,11 +504,9 @@ SQL
Total line number = 6
```
-#### 3.3.2 Logical Navigation Functions
+2. Logical Navigation Functions
-* Query SQL
-
-SQL
+* Query sql
```SQL
SELECT m.time, m.measure
@@ -544,18 +514,19 @@ FROM t
MATCH_RECOGNIZE (
ORDER BY time
MEASURES
- %s AS measure -- MEASURES Clause
+ %s AS measure -- MEASURES 子句
ALL ROWS PER MATCH
PATTERN (A+)
DEFINE A AS true
) AS m;
```
-* Query Results
-
+* Results
* When the value is totalprice, RPR\_LAST(totalprice), RUNNING RPR\_LAST(totalprice)
+
+ 
- SQL
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -573,7 +544,9 @@ MATCH_RECOGNIZE (
* When the value is FINAL RPR\_LAST(totalprice)
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -591,7 +564,9 @@ MATCH_RECOGNIZE (
* When the value is RPR\_FIRST(totalprice), RUNNING RPR\_FIRST(totalprice), FINAL RPR\_FIRST(totalprice)
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -609,7 +584,9 @@ MATCH_RECOGNIZE (
* When the value is RPR\_LAST(totalprice, 2)
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -627,7 +604,9 @@ MATCH_RECOGNIZE (
* When the value is FINAL RPP\_LAST(totalprice, 2)
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -645,7 +624,9 @@ MATCH_RECOGNIZE (
* When the value is RPR\_FIRST(totalprice, 2) and FINAL RPR\_FIRST(totalprice, 2)
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -661,11 +642,9 @@ MATCH_RECOGNIZE (
Total line number = 6
```
-#### 3.3.3 Physical Navigation Functions
-
-* Query SQL
+3. Physical Navigation Functions
-SQL
+* Query sql
```SQL
SELECT m.time, m.measure
@@ -673,18 +652,19 @@ FROM t
MATCH_RECOGNIZE (
ORDER BY time
MEASURES
- %s AS measure -- MEASURES Clause
+ %s AS measure -- MEASURES 子句
ALL ROWS PER MATCH
PATTERN (B)
DEFINE B AS B.totalprice >= PREV(B.totalprice)
) AS m;
```
-* Query Results
-
+* Results
* When the value is `PREV(totalprice)`
+
+ 
- SQL
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -698,7 +678,9 @@ MATCH_RECOGNIZE (
* When the value is `PREV(B.totalprice, 2)`
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -712,7 +694,9 @@ MATCH_RECOGNIZE (
* When the value is `PREV(B.totalprice, 4)`
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -726,7 +710,9 @@ MATCH_RECOGNIZE (
* When the value is `NEXT(totalprice)` or `NEXT(B.totalprice, 1)`
- SQL
+ 
+
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -738,9 +724,11 @@ MATCH_RECOGNIZE (
Total line number = 2
```
- * When the value is `NEXT(B.totalprice, 2)`
+ * `When the value is `NEXT(B.totalprice, 2)`
+
+ 
- SQL
+ Actual Return
```SQL
+-----------------------------+-------+
@@ -752,11 +740,9 @@ MATCH_RECOGNIZE (
Total line number = 2
```
-#### 3.3.4 Aggregate Functions
-
-* Query SQL
+4. Aggregate Functions
-SQL
+* Query sql
```SQL
SELECT m.time, m.count, m.avg, m.sum, m.min, m.max
@@ -774,10 +760,11 @@ MATCH_RECOGNIZE (
DEFINE A AS true
) AS m;
```
+* Analysis (Taking MIN(totalprice) as an Example)
-* Query Results
+
-SQL
+* Result
```SQL
+-----------------------------+-----+-----------------+-----+---+---+
@@ -793,13 +780,11 @@ SQL
Total line number = 6
```
-#### 3.3.5 Nested Functions
+5. Nested Functions
-1. Example 1
+Example 1
-* Query SQL
-
-SQL
+* Query sql
```SQL
SELECT m.time, m.match, m.price, m.lower_or_higher, m.label, m.prev_label, m.next_label
@@ -824,10 +809,11 @@ MATCH_RECOGNIZE (
H AS H.totalprice > 80
) AS m;
```
+* Analysis
-* Query Results
+
-SQL
+* Result
```SQL
+-----------------------------+-----+-----+---------------+-----+----------+----------+
@@ -843,11 +829,9 @@ SQL
Total line number = 6
```
-2. Example 2
+Example 2
-* Query SQL
-
-SQL
+* Query sql
```SQL
SELECT m.time, m.prev_last_price, m.next_first_price
@@ -862,10 +846,11 @@ MATCH_RECOGNIZE (
DEFINE A AS true
) AS m;
```
+* Analysis
-* Query Results
+
-SQL
+* Result
```SQL
+-----------------------------+---------------+----------------+
@@ -881,18 +866,34 @@ SQL
Total line number = 6
```
-## 4. Scenario Examples
+##### 1.2.8.3 RUNNING and FINAL Semantics
+
+1. Definition
+
+* `RUNNING`: Indicates the calculation scope is from the start row of the current match group to the row currently being processed (i.e., up to the current row).
+* `FINAL`: Indicates the calculation scope is from the start row of the current match group to the final row of the group (i.e., the entire match group).
+
+2. Scope of Application
+
+* The DEFINE clause uses RUNNING semantics by default.
+* The MEASURES clause uses RUNNING semantics by default and supports specifying FINAL semantics. When using the ONE ROW PER MATCH output mode, all expressions are calculated from the last row position of the match group, and at this time, RUNNING semantics are equivalent to FINAL semantics.
+
+3. Syntax Constraints
+
+* RUNNING and FINAL need to be written before **logical navigation functions** or aggregate functions, and cannot directly act on **column references.**
+ * Valid: `RUNNING RPP_LAST(A.totalprice)`, `FINAL RPP_LAST(A.totalprice)`
+ * Invalid: `RUNNING A.totalprice`, `FINAL A.totalprice`, `RUNNING PREV(A.totalprice)`
+
+### 1.3 Scenario Examples
Using [Sample Data](../Reference/Sample-Data.md) as the source data
-### 4.1 Time Segment Query
+#### 1.3.1 Time Segment Query
Segment the data in table1 by time intervals less than or equal to 24 hours, and query the total number of data entries in each segment, as well as the start and end times.
Query SQL
-SQL
-
```SQL
SELECT start_time, end_time, cnt
FROM table1
@@ -907,9 +908,7 @@ MATCH_RECOGNIZE (
) AS m
```
-Query Results
-
-SQL
+Results
```SQL
+-----------------------------+-----------------------------+---+
@@ -921,14 +920,12 @@ SQL
Total line number = 2
```
-### 4.2 Difference Segment Query
+#### 1.3.2 Difference Segment Query
Segment the data in table2 by humidity value differences less than 0.1, and query the total number of data entries in each segment, as well as the start and end times.
* Query SQL
-SQL
-
```SQL
SELECT start_time, end_time, cnt
FROM table2
@@ -943,9 +940,7 @@ MATCH_RECOGNIZE (
) AS m;
```
-* Query Results
-
-SQL
+* Results
```SQL
+-----------------------------+-----------------------------+---+
@@ -958,14 +953,12 @@ SQL
Total line number = 3
```
-### 4.3 Event Statistics Query
+#### 1.3.3 Event Statistics Query
Group the data in table1 by device ID, and count the start and end times and maximum humidity value where the humidity in the Shanghai area is greater than 35.
* Query SQL
-SQL
-
```SQL
SELECT m.device_id, m.match, m.event_start, m.event_end, m.max_humidity
FROM table1
@@ -980,13 +973,11 @@ MATCH_RECOGNIZE (
ONE ROW PER MATCH
PATTERN (A+)
DEFINE
- A AS A.region= 'Shanghai' AND A.humidity> 35
+ A AS A.region= '上海' AND A.humidity> 35
) AS m
```
-* Query Results
-
-SQL
+* Results
```SQL
+---------+-----+-----------------------------+-----------------------------+------------+
@@ -997,3 +988,741 @@ SQL
+---------+-----+-----------------------------+-----------------------------+------------+
Total line number = 2
```
+
+
+## 2. Window Functions
+
+### 2.1 Function Overview
+
+Window Functions perform calculations on each row based on a specific set of rows related to the current row (called a "window"). It combines grouping operations (`PARTITION BY`), sorting (`ORDER BY`), and definable calculation ranges (window frame `FRAME`), enabling complex cross-row calculations without collapsing the original data rows. It is commonly used in data analysis scenarios such as ranking, cumulative sums, moving averages, etc.
+
+> Note: This feature is available starting from version V 2.0.5.
+
+For example, in a scenario where you need to query the cumulative power consumption values of different devices, you can achieve this using window functions.
+
+```SQL
+-- Original data
++-----------------------------+------+-----+
+| time|device| flow|
++-----------------------------+------+-----+
+|1970-01-01T08:00:00.000+08:00| d0| 3|
+|1970-01-01T08:00:00.001+08:00| d0| 5|
+|1970-01-01T08:00:00.002+08:00| d0| 3|
+|1970-01-01T08:00:00.003+08:00| d0| 1|
+|1970-01-01T08:00:00.004+08:00| d1| 2|
+|1970-01-01T08:00:00.005+08:00| d1| 4|
++-----------------------------+------+-----+
+
+-- Create table and insert data
+CREATE TABLE device_flow(device String tag, flow INT32 FIELD);
+insert into device_flow(time, device ,flow ) values ('1970-01-01T08:00:00.000+08:00','d0',3),('1970-01-01T08:00:01.000+08:00','d0',5),('1970-01-01T08:00:02.000+08:00','d0',3),('1970-01-01T08:00:03.000+08:00','d0',1),('1970-01-01T08:00:04.000+08:00','d1',2),('1970-01-01T08:00:05.000+08:00','d1',4);
+
+
+-- Execute window function query
+SELECT *, sum(flow) OVER(PARTITION BY device ORDER BY flow) as sum FROM device_flow;
+```
+
+After grouping, sorting, and calculation (steps are disassembled as shown in the figure below),
+
+
+
+the expected results can be obtained:
+
+```SQL
++-----------------------------+------+----+----+
+| time|device|flow| sum|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
++-----------------------------+------+----+----+
+```
+
+### 2.2 Function Definition
+
+#### 2.2.1 SQL Definition
+
+```SQL
+windowDefinition
+ : name=identifier AS '(' windowSpecification ')'
+ ;
+
+windowSpecification
+ : (existingWindowName=identifier)?
+ (PARTITION BY partition+=expression (',' partition+=expression)*)?
+ (ORDER BY sortItem (',' sortItem)*)?
+ windowFrame?
+ ;
+
+windowFrame
+ : frameExtent
+ ;
+
+frameExtent
+ : frameType=RANGE start=frameBound
+ | frameType=ROWS start=frameBound
+ | frameType=GROUPS start=frameBound
+ | frameType=RANGE BETWEEN start=frameBound AND end=frameBound
+ | frameType=ROWS BETWEEN start=frameBound AND end=frameBound
+ | frameType=GROUPS BETWEEN start=frameBound AND end=frameBound
+ ;
+
+frameBound
+ : UNBOUNDED boundType=PRECEDING #unboundedFrame
+ | UNBOUNDED boundType=FOLLOWING #unboundedFrame
+ | CURRENT ROW #currentRowBound
+ | expression boundType=(PRECEDING | FOLLOWING) #boundedFrame
+ ;
+```
+
+#### 2.2.2 Window Definition
+
+##### 2.2.2.1 Partition
+
+`PARTITION BY` is used to divide data into multiple independent, unrelated "groups". Window functions can only access and operate on data within their respective groups, and cannot access data from other groups. This clause is optional; if not explicitly specified, all data is divided into the same group by default. It is worth noting that unlike `GROUP BY` which aggregates a group of data into a single row, the window function with `PARTITION BY` **does not affect the number of rows within the group.**
+
+* Example
+
+Query statement:
+
+```SQL
+IoTDB> SELECT *, count(flow) OVER (PARTITION BY device) as count FROM device_flow;
+```
+
+Disassembly steps:
+
+
+
+Query result:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 4|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 4|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 4|
++-----------------------------+------+----+-----+
+```
+
+##### 2.2.2.2 Ordering
+
+`ORDER BY` is used to sort data within a partition. After sorting, rows with equal values are called peers. Peers affect the behavior of window functions; for example, different rank functions handle peers differently, and different frame division methods also handle peers differently. This clause is optional.
+
+* Example
+
+Query statement:
+
+```SQL
+IoTDB> SELECT *, rank() OVER (PARTITION BY device ORDER BY flow) as rank FROM device_flow;
+```
+
+Disassembly steps:
+
+
+
+Query result:
+
+```SQL
++-----------------------------+------+----+----+
+| time|device|flow|rank|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
++-----------------------------+------+----+----+
+```
+
+##### 2.2.2.3 Framing
+
+For each row in a partition, the window function evaluates on a corresponding set of rows called a Frame (i.e., the input domain of the Window Function on each row). The Frame can be specified manually, involving two attributes when specified, as detailed below.
+
+
+
+
+ | Frame Attribute |
+ Attribute Value |
+ Value Description |
+
+
+ | Type |
+ ROWS |
+ Divide the frame by row number |
+
+
+ | GROUPS |
+ Divide the frame by peers, i.e., rows with the same value are regarded as equivalent. All rows in peers are grouped into one group called a peer group |
+
+
+ | RANGE |
+ Divide the frame by value |
+
+
+ | Start and End Position |
+ UNBOUNDED PRECEDING |
+ The first row of the entire partition |
+
+
+ | offset PRECEDING |
+ Represents the row with an "offset" distance from the current row in the preceding direction |
+
+
+ | CURRENT ROW |
+ The current row |
+
+
+ | offset FOLLOWING |
+ Represents the row with an "offset" distance from the current row in the following direction |
+
+
+ | UNBOUNDED FOLLOWING |
+ The last row of the entire partition |
+
+
+
+
+Among them, the meanings of `CURRENT ROW`, `PRECEDING N`, and `FOLLOWING N` vary with the type of frame, as shown in the following table:
+
+| | `ROWS` | `GROUPS` | `RANGE` |
+|--------------------|------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|
+| `CURRENT ROW` | Current row | Since a peer group contains multiple rows, this option differs depending on whether it acts on frame_start and frame_end: * frame_start: the first row of the peer group; * frame_end: the last row of the peer group. | Same as GROUPS, differing depending on whether it acts on frame_start and frame_end: * frame_start: the first row of the peer group; * frame_end: the last row of the peer group. |
+| `offset PRECEDING` | The previous offset rows | The previous offset peer groups; | Rows whose value difference from the current row in the preceding direction is less than or equal to offset are grouped into one frame |
+| `offset FOLLOWING` | The following offset rows | The following offset peer groups. | Rows whose value difference from the current row in the following direction is less than or equal to offset are grouped into one frame |
+
+The syntax format is as follows:
+
+```SQL
+-- Specify both frame_start and frame_end
+{ RANGE | ROWS | GROUPS } BETWEEN frame_start AND frame_end
+-- Specify only frame_start, frame_end is CURRENT ROW
+{ RANGE | ROWS | GROUPS } frame_start
+```
+
+If the Frame is not specified manually, the default Frame division rules are as follows:
+
+* When the window function uses ORDER BY: The default Frame is RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW (i.e., from the first row of the window to the current row). For example: In RANK() OVER(PARTITION BY COL1 ORDER BY COL2), the Frame defaults to include the current row and all preceding rows in the partition.
+* When the window function does not use ORDER BY: The default Frame is RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING (i.e., all rows in the entire window). For example: In AVG(COL2) OVER(PARTITION BY col1), the Frame defaults to include all rows in the partition, calculating the average of the entire partition.
+
+It should be noted that when the Frame type is GROUPS or RANGE, `ORDER BY` must be specified. The difference is that ORDER BY in GROUPS can involve multiple fields, while RANGE requires calculation and thus can only specify one field.
+
+* Example
+
+1. Frame type is ROWS
+
+Query statement:
+
+```SQL
+IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ROWS 1 PRECEDING) as count FROM device_flow;
+```
+
+Disassembly steps:
+
+* Take the previous row and the current row as the Frame
+ * For the first row of the partition, since there is no previous row, the entire Frame has only this row, returning 1;
+ * For other rows of the partition, the entire Frame includes the current row and its previous row, returning 2:
+
+
+
+Query result:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 2|
++-----------------------------+------+----+-----+
+```
+
+2. Frame type is GROUPS
+
+Query statement:
+
+```SQL
+IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ORDER BY flow GROUPS BETWEEN 1 PRECEDING AND CURRENT ROW) as count FROM device_flow;
+```
+
+Disassembly steps:
+
+* Take the previous peer group and the current peer group as the Frame. Taking the partition with device d0 as an example (same for d1), for the count of rows:
+ * For the peer group with flow 1, since there are no peer groups smaller than it, the entire Frame has only this row, returning 1;
+ * For the peer group with flow 3, it itself contains 2 rows, and the previous peer group is the one with flow 1 (1 row), so the entire Frame has 3 rows, returning 3;
+ * For the peer group with flow 5, it itself contains 1 row, and the previous peer group is the one with flow 3 (2 rows), so the entire Frame has 3 rows, returning 3.
+
+
+
+Query result:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+-----+
+```
+
+3. Frame type is RANGE
+
+Query statement:
+
+```SQL
+IoTDB> SELECT *,count(flow) OVER(PARTITION BY device ORDER BY flow RANGE BETWEEN 2 PRECEDING AND CURRENT ROW) as count FROM device_flow;
+```
+
+Disassembly steps:
+
+* Group rows whose data is **less than or equal to 2** compared to the current row into the same Frame. Taking the partition with device d0 as an example (same for d1), for the count of rows:
+ * For the row with flow 1, since it is the smallest row, the entire Frame has only this row, returning 1;
+ * For the row with flow 3, note that CURRENT ROW exists as frame_end, so it is the last row of the entire peer group. There is 1 row smaller than it that meets the requirement, and the peer group has 2 rows, so the entire Frame has 3 rows, returning 3;
+ * For the row with flow 5, it itself contains 1 row, and there are 2 rows smaller than it that meet the requirement, so the entire Frame has 3 rows, returning 3.
+
+
+
+Query result:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+-----+
+```
+
+### 2.3 Built-in Window Functions
+
+
+
+
+ | Window Function Category |
+ Window Function Name |
+ Function Definition |
+ Supports FRAME Clause |
+
+
+ | Aggregate Function |
+ All built-in aggregate functions |
+ Aggregate a set of values to get a single aggregated result. |
+ Yes |
+
+
+ | Value Function |
+ first_value |
+ Return the first value of the frame; if IGNORE NULLS is specified, skip leading NULLs |
+ Yes |
+
+
+ | last_value |
+ Return the last value of the frame; if IGNORE NULLS is specified, skip trailing NULLs |
+ Yes |
+
+
+ | nth_value |
+ Return the nth element of the frame (note that n starts from 1); if IGNORE NULLS is specified, skip NULLs |
+ Yes |
+
+
+ | lead |
+ Return the element offset rows after the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default |
+ No |
+
+
+ | lag |
+ Return the element offset rows before the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default |
+ No |
+
+
+ | Rank Function |
+ rank |
+ Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there may be gaps between sequence numbers |
+ No |
+
+
+ | dense_rank |
+ Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there are no gaps between sequence numbers |
+ No |
+
+
+ | row_number |
+ Return the row number of the current row in the entire partition; note that the row number starts from 1 |
+ No |
+
+
+ | percent_rank |
+ Return the sequence number of the current row's value in the entire partition as a percentage; i.e., (rank() - 1) / (n - 1), where n is the number of rows in the entire partition |
+ No |
+
+
+ | cume_dist |
+ Return the sequence number of the current row's value in the entire partition as a percentage; i.e., (number of rows less than or equal to it) / n |
+ No |
+
+
+ | ntile |
+ Specify n to number each row from 1 to n. |
+ No |
+
+
+
+
+#### 2.3.1 Aggregate Function
+
+All built-in aggregate functions such as `sum()`, `avg()`, `min()`, `max()` can be used as Window Functions.
+
+> Note: Unlike GROUP BY, each row has a corresponding output in the Window Function
+
+Example:
+
+```SQL
+IoTDB> SELECT *, sum(flow) OVER (PARTITION BY device ORDER BY flow) as sum FROM device_flow;
++-----------------------------+------+----+----+
+| time|device|flow| sum|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
++-----------------------------+------+----+----+
+```
+
+#### 2.3.2 Value Function
+
+1. `first_value`
+
+* Function name: `first_value(value) [IGNORE NULLS]`
+* Definition: Return the first value of the frame; if IGNORE NULLS is specified, skip leading NULLs;
+* Example:
+
+```SQL
+IoTDB> SELECT *, first_value(flow) OVER w as first_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
++-----------------------------+------+----+-----------+
+| time|device|flow|first_value|
++-----------------------------+------+----+-----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+-----------+
+```
+
+2. `last_value`
+
+* Function name: `last_value(value) [IGNORE NULLS]`
+* Definition: Return the last value of the frame; if IGNORE NULLS is specified, skip trailing NULLs;
+* Example:
+
+```SQL
+IoTDB> SELECT *, last_value(flow) OVER w as last_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
++-----------------------------+------+----+----------+
+| time|device|flow|last_value|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
++-----------------------------+------+----+----------+
+```
+
+3. `nth_value`
+
+* Function name: `nth_value(value, n) [IGNORE NULLS]`
+* Definition: Return the nth element of the frame (note that n starts from 1); if IGNORE NULLS is specified, skip NULLs;
+* Example:
+
+```SQL
+IoTDB> SELECT *, nth_value(flow, 2) OVER w as nth_values FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
++-----------------------------+------+----+----------+
+| time|device|flow|nth_values|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
++-----------------------------+------+----+----------+
+```
+
+4. lead
+
+* Function name: `lead(value[, offset[, default]]) [IGNORE NULLS]`
+* Definition: Return the element offset rows after the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default; the default value of offset is 1, and the default value of default is NULL.
+* The lead function requires an ORDER BY window clause
+* Example:
+
+```SQL
+IoTDB> SELECT *, lead(flow) OVER w as lead FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY time);
++-----------------------------+------+----+----+
+| time|device|flow|lead|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
+|1970-01-01T08:00:05.000+08:00| d1| 4|null|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 5|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:03.000+08:00| d0| 1|null|
++-----------------------------+------+----+----+
+```
+
+5. lag
+
+* Function name: `lag(value[, offset[, default]]) [IGNORE NULLS]`
+* Definition: Return the element offset rows before the current row (if IGNORE NULLS is specified, NULLs are not considered); if no such element exists (exceeding the partition range), return default; the default value of offset is 1, and the default value of default is NULL.
+* The lag function requires an ORDER BY window clause
+* Example:
+
+```SQL
+IoTDB> SELECT *, lag(flow) OVER w as lag FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY device);
++-----------------------------+------+----+----+
+| time|device|flow| lag|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2|null|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:00.000+08:00| d0| 3|null|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
++-----------------------------+------+----+----+
+```
+
+#### 2.3.3 Rank Function
+
+1. rank
+
+* Function name: `rank()`
+* Definition: Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there may be gaps between sequence numbers;
+* Example:
+
+```SQL
+IoTDB> SELECT *, rank() OVER w as rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+----+
+| time|device|flow|rank|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
++-----------------------------+------+----+----+
+```
+
+2. dense_rank
+
+* Function name: `dense_rank()`
+* Definition: Return the sequence number of the current row in the entire partition; rows with the same value have the same sequence number, and there are no gaps between sequence numbers.
+* Example:
+
+```SQL
+IoTDB> SELECT *, dense_rank() OVER w as dense_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+----------+
+| time|device|flow|dense_rank|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+----------+
+```
+
+3. row_number
+
+* Function name: `row_number()`
+* Definition: Return the row number of the current row in the entire partition; note that the row number starts from 1;
+* Example:
+
+```SQL
+IoTDB> SELECT *, row_number() OVER w as row_number FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+----------+
+| time|device|flow|row_number|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
++-----------------------------+------+----+----------+
+```
+
+4. percent_rank
+
+* Function name: `percent_rank()`
+* Definition: Return the sequence number of the current row's value in the entire partition as a percentage; i.e., **(rank() - 1) / (n - 1)**, where n is the number of rows in the entire partition;
+* Example:
+
+```SQL
+IoTDB> SELECT *, percent_rank() OVER w as percent_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+------------------+
+| time|device|flow| percent_rank|
++-----------------------------+------+----+------------------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 0.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 0.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3|0.3333333333333333|
+|1970-01-01T08:00:02.000+08:00| d0| 3|0.3333333333333333|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
++-----------------------------+------+----+------------------+
+```
+
+5. cume_dist
+
+* Function name: `cume_dist`
+* Definition: Return the sequence number of the current row's value in the entire partition as a percentage; i.e., **(number of rows less than or equal to it) / n**.
+* Example:
+
+```SQL
+IoTDB> SELECT *, cume_dist() OVER w as cume_dist FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+---------+
+| time|device|flow|cume_dist|
++-----------------------------+------+----+---------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 0.5|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 0.25|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 0.75|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 0.75|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
++-----------------------------+------+----+---------+
+```
+
+6. ntile
+
+* Function name: `ntile`
+* Definition: Specify n to number each row from 1 to n.
+ * If the number of rows in the entire partition is less than n, the number is the row index;
+ * If the number of rows in the entire partition is greater than n:
+ * If the number of rows is divisible by n, it is perfect. For example, if the number of rows is 4 and n is 2, the numbers are 1, 1, 2, 2;
+ * If the number of rows is not divisible by n, distribute to the first few groups. For example, if the number of rows is 5 and n is 3, the numbers are 1, 1, 2, 2, 3;
+* Example:
+
+```SQL
+IoTDB> SELECT *, ntile(2) OVER w as ntile FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+-----+
+| time|device|flow|ntile|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
++-----------------------------+------+----+-----+
+```
+
+### 2.4 Scenario Examples
+
+1. Multi-device diff function
+
+For each row of each device, calculate the difference from the previous row:
+
+```SQL
+SELECT
+ *,
+ measurement - lag(measurement) OVER (PARTITION BY device ORDER BY time)
+FROM data
+WHERE timeCondition;
+```
+
+For each row of each device, calculate the difference from the next row:
+
+```SQL
+SELECT
+ *,
+ measurement - lead(measurement) OVER (PARTITION BY device ORDER BY time)
+FROM data
+WHERE timeCondition;
+```
+
+For each row of a single device, calculate the difference from the previous row (same for the next row):
+
+```SQL
+SELECT
+ *,
+ measurement - lag(measurement) OVER (ORDER BY time)
+FROM data
+where device='d1'
+WHERE timeCondition;
+```
+
+2. Multi-device TOP_K/BOTTOM_K
+
+Use rank to get the sequence number, then retain the desired order in the outer query.
+
+(Note: The execution order of window functions is after the HAVING clause, so a subquery is needed here)
+
+```SQL
+SELECT *
+FROM(
+ SELECT
+ *,
+ rank() OVER (PARTITION BY device ORDER BY time DESC)
+ FROM data
+ WHERE timeCondition
+)
+WHERE rank <= 3;
+```
+
+In addition to sorting by time, you can also sort by the value of the measurement point:
+
+```SQL
+SELECT *
+FROM(
+ SELECT
+ *,
+ rank() OVER (PARTITION BY device ORDER BY measurement DESC)
+ FROM data
+ WHERE timeCondition
+)
+WHERE rank <= 3;
+```
+
+3. Multi-device CHANGE_POINTS
+
+This SQL is used to remove consecutive identical values in the input sequence, which can be achieved with lead + subquery:
+
+```SQL
+SELECT
+ time,
+ device,
+ measurement
+FROM(
+ SELECT
+ time,
+ device,
+ measurement,
+ LEAD(measurement) OVER (PARTITION BY device ORDER BY time) AS next
+ FROM data
+ WHERE timeCondition
+)
+WHERE measurement != next OR next IS NULL;
+```
diff --git a/src/zh/UserGuide/Master/Table/Basic-Concept/Query-Data_timecho.md b/src/zh/UserGuide/Master/Table/Basic-Concept/Query-Data_timecho.md
index 0392c6119..53309d6aa 100644
--- a/src/zh/UserGuide/Master/Table/Basic-Concept/Query-Data_timecho.md
+++ b/src/zh/UserGuide/Master/Table/Basic-Concept/Query-Data_timecho.md
@@ -40,11 +40,9 @@ IoTDB 查询语法提供以下子句:
- SELECT 子句:查询结果应包含的列。详细语法见:[SELECT子句](../SQL-Manual/Select-Clause.md)
- FROM 子句:指出查询的数据源,可以是单个表、多个通过 `JOIN` 子句连接的表,或者是一个子查询。详细语法见:[FROM & JOIN 子句](../SQL-Manual/From-Join-Clause.md)
-- patternRecognition:行模式识别,支持通过定义模式变量的识别逻辑以及正则表达式来捕获一段连续的数据,并对每一段捕获的数据进行分析计算。详细语法见:[行模式识别](../SQL-Manual/Row-Pattern-Recognition.md)
- WHERE 子句:用于过滤数据,只选择满足特定条件的数据行。这个子句在逻辑上紧跟在 FROM 子句之后执行。详细语法见:[WHERE 子句](../SQL-Manual/Where-Clause.md)
- GROUP BY 子句:当需要对数据进行聚合时使用,指定了用于分组的列。详细语法见:[GROUP BY 子句](../SQL-Manual/GroupBy-Clause.md)
- HAVING 子句:在 GROUP BY 子句之后使用,用于对已经分组的数据进行过滤。与 WHERE 子句类似,但 HAVING 子句在分组后执行。详细语法见:[HAVING 子句](../SQL-Manual/Having-Clause.md)
-- WINDOW FUNCTION:窗口函数,是一种基于与当前行相关的特定行集合(称为“窗口”) 对每一行进行计算的特殊函数。它将分组操作、排序与可定义的计算范围结合,在不折叠原始数据行的前提下实现复杂的跨行计算。详细语法见:[窗口函数](../SQL-Manual/Featured-Functions_timecho.md#_4-窗口函数)
- FILL 子句:用于处理查询结果中的空值,用户可以使用 FILL 子句来指定数据缺失时的填充模式(如前一个非空值或线性插值)来填充 null 值,以便于数据可视化和分析。 详细语法见:[FILL 子句](../SQL-Manual/Fill-Clause.md)
- ORDER BY 子句:对查询结果进行排序,可以指定升序(ASC)或降序(DESC),以及 NULL 值的处理方式(NULLS FIRST 或 NULLS LAST)。详细语法见:[ORDER BY 子句](../SQL-Manual/OrderBy-Clause.md)
- OFFSET 子句:用于指定查询结果的起始位置,即跳过前 OFFSET 行。与 LIMIT 子句配合使用。详细语法见:[LIMIT 和 OFFSET 子句](../SQL-Manual/Limit-Offset-Clause.md)
@@ -592,77 +590,3 @@ IoTDB> SELECT time, temperature, humidity
Total line number = 10
It costs 0.093s
```
-
-### 3.9 行模式识别
-
-**示例:将 table1 中的数据按照时间间隔小于等于 24 小时分段,查询每段中的数据总条数,以及开始、结束时间。**
-
-```SQL
-SELECT start_time, end_time, cnt
-FROM table1
-MATCH_RECOGNIZE (
- ORDER BY time
- MEASURES
- RPR_FIRST(A.time) AS start_time,
- RPR_LAST(time) AS end_time,
- COUNT() AS cnt
- PATTERN (A B*)
- DEFINE B AS (cast(B.time as INT64) - cast(PREV(B.time) as INT64)) <= 86400000
-) AS m
-```
-
-执行结果如下:
-
-```SQL
-+-----------------------------+-----------------------------+---+
-| start_time| end_time|cnt|
-+-----------------------------+-----------------------------+---+
-|2024-11-26T13:37:00.000+08:00|2024-11-26T13:38:00.000+08:00| 2|
-|2024-11-27T16:38:00.000+08:00|2024-11-30T14:30:00.000+08:00| 16|
-+-----------------------------+-----------------------------+---+
-Total line number = 2
-```
-
-### 3.10 窗口函数
-
-**示例:查询不同设备的功耗累加值**
-
-原始数据如下:
-
-```SQL
-+-----------------------------+------+-----+
-| time|device| flow|
-+-----------------------------+------+-----+
-|1970-01-01T08:00:00.000+08:00| d0| 3|
-|1970-01-01T08:00:00.001+08:00| d0| 5|
-|1970-01-01T08:00:00.002+08:00| d0| 3|
-|1970-01-01T08:00:00.003+08:00| d0| 1|
-|1970-01-01T08:00:00.004+08:00| d1| 2|
-|1970-01-01T08:00:00.005+08:00| d1| 4|
-+-----------------------------+------+-----+
-```
-
-查询语句如下:
-
-```SQL
-IoTDB> SELECT *, sum(flow) OVER(PARTITION BY device ORDER BY flow) as sum FROM device_flow;
-```
-
-经过分组、排序、计算(步骤拆解如下图所示),
-
-
-
-执行结果如下:
-
-```SQL
-+-----------------------------+------+----+----+
-| time|device|flow| sum|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
-+-----------------------------+------+----+----+
-```
diff --git a/src/zh/UserGuide/Master/Table/QuickStart/QuickStart_timecho.md b/src/zh/UserGuide/Master/Table/QuickStart/QuickStart_timecho.md
index 08d1fac39..e91e163e4 100644
--- a/src/zh/UserGuide/Master/Table/QuickStart/QuickStart_timecho.md
+++ b/src/zh/UserGuide/Master/Table/QuickStart/QuickStart_timecho.md
@@ -62,7 +62,7 @@
2. 数据写入&更新:在数据写入&更新方面,IoTDB 提供了多种方式来插入实时数据,基本的数据写入&更新操作请查看 [数据写入&更新](../Basic-Concept/Write-Updata-Data.md)
-3. 数据查询:IoTDB 提供了丰富的数据查询功能,数据查询的基本介绍请查看 [数据查询](../Basic-Concept/Query-Data_timecho.md),其中包含了适用于识别时序数据中的特定模式、检测特定事件等业务场景的[行模式识别](../SQL-Manual/Row-Pattern-Recognition.md),以及常用于数据分析场景的[窗口函数](../SQL-Manual/Featured-Functions_timecho.md#_4-窗口函数)等特色函数
+3. 数据查询:IoTDB 提供了丰富的数据查询功能,数据查询的基本介绍请查看 [数据查询](../Basic-Concept/Query-Data.md),其中包含了适用于时序特色分析的模式查询和窗口函数,详细介绍请查看[时序特色分析](../User-Manual/Timeseries-Featured-Analysis_timecho.md)
4. 数据删除:IoTDB 提供了两种删除方式,分别为SQL语句删除与过期自动删除(TTL)
diff --git a/src/zh/UserGuide/Master/Table/SQL-Manual/Featured-Functions_timecho.md b/src/zh/UserGuide/Master/Table/SQL-Manual/Featured-Functions_timecho.md
index 4a84655ce..96646c022 100644
--- a/src/zh/UserGuide/Master/Table/SQL-Manual/Featured-Functions_timecho.md
+++ b/src/zh/UserGuide/Master/Table/SQL-Manual/Featured-Functions_timecho.md
@@ -130,9 +130,9 @@ SELECT date_bin(1h, time) AS hour_time, avg(temperature) AS avg_temp
- **每个** **`GROUP BY`** **子句中只能使用一个** **`date_bin_gapfill`**。如果出现多个 `date_bin_gapfill`,会报错:multiple date_bin_gapfill calls not allowed
- **`date_bin_gapfill`** **的执行顺序**:GAPFILL 功能发生在 `HAVING` 子句执行之后,`FILL` 子句执行之前。
- **使用** **`date_bin_gapfill`** **时,****`WHERE`** **子句中的时间过滤条件必须是以下形式之一:**
- - `time >= XXX AND time <= XXX`
- - `time > XXX AND time < XXX`
- - `time BETWEEN XXX AND XXX`
+ - `time >= XXX AND time <= XXX`
+ - `time > XXX AND time < XXX`
+ - `time BETWEEN XXX AND XXX`
- **使用** **`date_bin_gapfill`** **时,如果出现其他时间过滤条件**,会报错。时间过滤条件与其他值过滤条件只能通过 `AND` 连接。
- **如果不能从 where 子句中推断出 startTime 和 endTime,则报错**:could not infer startTime or endTime from WHERE clause。
@@ -229,15 +229,15 @@ DIFF(numberic[, boolean]) -> Double
- 第一个参数:数值类型
- - **类型**:必须是数值类型(`INT32`、`INT64`、`FLOAT`、`DOUBLE`)
- - **作用**:指定要计算差值的列。
+ - **类型**:必须是数值类型(`INT32`、`INT64`、`FLOAT`、`DOUBLE`)
+ - **作用**:指定要计算差值的列。
- 第二个参数:布尔类型(可选)
- - **类型**:布尔类型(`true` 或 `false`)。
- - **默认值**:`true`。
- - **作用**:
- - **`true`**:忽略 `NULL` 值,向前找到第一个非 `NULL` 值进行差值计算。如果前面没有非 `NULL` 值,则返回 `NULL`。
- - **`false`**:不忽略 `NULL` 值,如果前一行为 `NULL`,则差值结果为 `NULL`。
+ - **类型**:布尔类型(`true` 或 `false`)。
+ - **默认值**:`true`。
+ - **作用**:
+ - **`true`**:忽略 `NULL` 值,向前找到第一个非 `NULL` 值进行差值计算。如果前面没有非 `NULL` 值,则返回 `NULL`。
+ - **`false`**:不忽略 `NULL` 值,如果前一行为 `NULL`,则差值结果为 `NULL`。
### 2.4 注意事项
@@ -697,57 +697,7 @@ IoTDB> SELECT window_start, window_end, stock_id, avg(price) as avg FROM CUMULAT
## 4. 窗口函数
-### 4.1 功能介绍
-
-IoTDB 支持的窗口函数(Window Function) 是一种基于与当前行相关的特定行集合(称为“窗口”) 对每一行进行计算的特殊函数。它将分组操作(`PARTITION BY`)、排序(`ORDER BY`)与可定义的计算范围(窗口框架 `FRAME`)结合,在不折叠原始数据行的前提下实现复杂的跨行计算。常用于数据分析场景,比如排名、累计和、移动平均等操作。
-
-> 注意:该功能从 V 2.0.5 版本开始提供。
-
-例如,某场景下需要查询不同设备的功耗累加值,即可通过窗口函数来实现。
-
-```SQL
--- 原始数据
-+-----------------------------+------+-----+
-| time|device| flow|
-+-----------------------------+------+-----+
-|1970-01-01T08:00:00.000+08:00| d0| 3|
-|1970-01-01T08:00:00.001+08:00| d0| 5|
-|1970-01-01T08:00:00.002+08:00| d0| 3|
-|1970-01-01T08:00:00.003+08:00| d0| 1|
-|1970-01-01T08:00:00.004+08:00| d1| 2|
-|1970-01-01T08:00:00.005+08:00| d1| 4|
-+-----------------------------+------+-----+
-
--- 创建表并插入数据
-CREATE TABLE device_flow(device String tag, flow INT32 FIELD);
-insert into device_flow(time, device ,flow ) values ('1970-01-01T08:00:00.000+08:00','d0',3),('1970-01-01T08:00:01.000+08:00','d0',5),('1970-01-01T08:00:02.000+08:00','d0',3),('1970-01-01T08:00:03.000+08:00','d0',1),('1970-01-01T08:00:04.000+08:00','d1',2),('1970-01-01T08:00:05.000+08:00','d1',4);
-
-
---执行窗口函数查询
-SELECT *, sum(flow) OVER(PARTITION BY device ORDER BY flow) as sum FROM device_flow;
-```
-
-经过分组、排序、计算(步骤拆解如下图所示),
-
-
-
-即可得到期望结果:
-
-```SQL
-+-----------------------------+------+----+----+
-| time|device|flow| sum|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
-+-----------------------------+------+----+----+
-```
-
-### 4.2 功能定义
-#### 4.2.1 SQL 定义
+### 4.1 语法定义
```SQL
windowDefinition
@@ -782,175 +732,48 @@ frameBound
;
```
-#### 4.2.2 窗口定义
-##### Partition
-
-`PARTITION BY` 用于将数据分为多个独立、不相关的「组」,窗口函数只能访问并操作其所属分组内的数据,无法访问其它分组。该子句是可选的;如果未显式指定,则默认将所有数据分到同一组。值得注意的是,与 `GROUP BY` 通过聚合函数将一组数据规约成一行不同,`PARTITION BY` 的窗口函数**并不会影响组内的行数。**
-
-* 示例
-
-查询语句:
-
-```SQL
-IoTDB> SELECT *, count(flow) OVER (PARTITION BY device) as count FROM device_flow;
-```
-
-拆解步骤:
+更多详细功能介绍请参考:[窗口函数](../User-Manual/Timeseries-Featured-Analysis_timecho.md#_2-窗口函数)
-
+### 4.2 使用示例
-查询结果:
+表 device_flow 原始数据如下
-```SQL
-+-----------------------------+------+----+-----+
-| time|device|flow|count|
-+-----------------------------+------+----+-----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 4|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 4|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 4|
-+-----------------------------+------+----+-----+
+```sql
++-----------------------------+------+-----+
+| time|device| flow|
++-----------------------------+------+-----+
+|1970-01-01T08:00:00.000+08:00| d0| 3|
+|1970-01-01T08:00:00.001+08:00| d0| 5|
+|1970-01-01T08:00:00.002+08:00| d0| 3|
+|1970-01-01T08:00:00.003+08:00| d0| 1|
+|1970-01-01T08:00:00.004+08:00| d1| 2|
+|1970-01-01T08:00:00.005+08:00| d1| 4|
++-----------------------------+------+-----+
```
-##### Ordering
-
-`ORDER BY` 用于对 partition 内的数据进行排序。排序后,相等的行被称为 peers。peers 会影响窗口函数的行为,例如不同 rank function 对 peers 的处理不同;不同 frame 的划分方式对于 peers 的处理也不同。该子句是可选的。
-
-* 示例
+1. 从 device_flow 中查询所有列,并按 device 维度分组,在每个设备分组内按 flow 字段值排序,计算 flow 字段的累计求和,最终将累计和命名为 sum 列返回。
查询语句:
```SQL
-IoTDB> SELECT *, rank() OVER (PARTITION BY device ORDER BY flow) as rank FROM device_flow;
+IoTDB> SELECT *, sum(flow) OVER (PARTITION BY device ORDER BY flow) as sum FROM device_flow;
```
-拆解步骤:
-
-
-
查询结果:
```SQL
+-----------------------------+------+----+----+
-| time|device|flow|rank|
+| time|device|flow| sum|
+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
+-----------------------------+------+----+----+
```
-
-##### Framing
-
-对于 partition 中的每一行,窗口函数都会在相应的一组行上求值,这些行称为 Frame(即 Window Function 在每一行上的输入域)。Frame 可以手动指定,指定时涉及两个属性,具体说明如下。
-
-
-
-
- | Frame 属性 |
- 属性值 |
- 值描述 |
-
-
- | 类型 |
- ROWS |
- 通过行号来划分 frame |
-
-
- | GROUPS |
- 通过 peers 来划分 frame,即值相同的行视为同等的存在。peers 中所有的行分为一个组,叫做 peer group |
-
-
- | RANGE |
- 通过值来划分 frame |
-
-
- | 起始和终止位置 |
- UNBOUNDED PRECEDING |
- 整个 partition 的第一行 |
-
-
- | offset PRECEDING |
- 代表前面和当前行「距离」为 offset 的行 |
-
-
- | CURRENT ROW |
- 当前行 |
-
-
- | offset FOLLOWING |
- 代表后面和当前行「距离」为 offset 的行 |
-
-
- | UNBOUNDED FOLLOWING |
- 整个 partition 的最后一行 |
-
-
-
-
-其中,`CURRENT ROW`、`PRECEDING N` 和 `FOLLOWING N` 的含义随着 frame 种类的不同而不同,如下表所示:
-
-| | `ROWS` | `GROUPS` | `RANGE` |
-|--------------------|------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|
-| `CURRENT ROW` | 当前行 | 由于 peer group 包含多行,因此这个选项根据作用于 frame\_start 和 frame\_end 而不同:* frame\_start:peer group 的第一行;* frame\_end:peer group 的最后一行。 | 和 GROUPS 相同,根据作用于 frame\_start 和 frame\_end 而不同:* frame\_start:peer group 的第一行;* frame\_end:peer group 的最后一行。 |
-| `offset PRECEDING` | 前 offset 行 | 前 offset 个 peer group; | 前面与当前行的值之差小于等于 offset 就分为一个 frame |
-| `offset FOLLOWING` | 后 offset 行 | 后 offset 个 peer group。 | 后面与当前行的值之差小于等于 offset 就分为一个 frame |
-
-语法格式如下:
-
-```SQL
--- 同时指定 frame_start 和 frame_end
-{ RANGE | ROWS | GROUPS } BETWEEN frame_start AND frame_end
--- 仅指定 frame_start,frame_end 为 CURRENT ROW
-{ RANGE | ROWS | GROUPS } frame_start
-```
-
-若未手动指定 Frame,Frame 的默认划分规则如下:
-
-* 当窗口函数使用 ORDER BY 时:默认 Frame 为 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW (即从窗口的第一行到当前行)。例如:RANK() OVER(PARTITION BY COL1 0RDER BY COL2) 中,Frame 默认包含分区内当前行及之前的所有行。
-* 当窗口函数不使用 ORDER BY 时:默认 Frame 为 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING (即整个窗口的所有行)。例如:AVG(COL2) OVER(PARTITION BY col1) 中,Frame 默认包含分区内的所有行,计算整个分区的平均值。
-
-需要注意的是,当 Frame 类型为 GROUPS 或 RANGE 时,需要指定 `ORDER BY`,区别在于 GROUPS 中的 ORDER BY 可以涉及多个字段,而 RANGE 需要计算,所以只能指定一个字段。
-
-* 示例
-
-1. Frame 类型为 ROWS
-
-查询语句:
-
-```SQL
-IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ROWS 1 PRECEDING) as count FROM device_flow;
-```
-
-拆解步骤:
-
-* 取前一行和当前行作为 Frame
- * 对于 partition 的第一行,由于没有前一行,所以整个 Frame 只有它一行,返回 1;
- * 对于 partition 的其他行,整个 Frame 包含当前行和它的前一行,返回 2:
-
-
-
-查询结果:
-
-```SQL
-+-----------------------------+------+----+-----+
-| time|device|flow|count|
-+-----------------------------+------+----+-----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 2|
-+-----------------------------+------+----+-----+
-```
-
-2. Frame 类型为 GROUPS
+2. 从 device_flow 表查询所有原始列,按 device 设备分组,每个设备分组内按 flow 字段值排序,统计「当前行所在的 flow 分组 + 前 1 个 flow 分组」范围内的行数(计数),最终将计数结果命名为 count 列返回。
查询语句:
@@ -958,15 +781,6 @@ IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ROWS 1 PRECEDING) as count
IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ORDER BY flow GROUPS BETWEEN 1 PRECEDING AND CURRENT ROW) as count FROM device_flow;
```
-拆解步骤:
-
-* 取前一个 peer group 和当前 peer group 作为 Frame,那么以 device 为 d0 的 partition 为例(d1同理),对于 count 行数:
- * 对于 flow 为 1 的 peer group,由于它也没比它小的 peer group 了,所以整个 Frame 就它一行,返回 1;
- * 对于 flow 为 3 的 peer group,它本身包含 2 行,前一个 peer group 就是 flow 为 1 的,就一行,因此整个 Frame 三行,返回 3;
- * 对于 flow 为 5 的 peer group,它本身包含 1 行,前一个 peer group 就是 flow 为 3 的,共两行,因此整个 Frame 三行,返回 3。
-
-
-
查询结果:
```SQL
@@ -982,7 +796,7 @@ IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ORDER BY flow GROUPS BETWE
+-----------------------------+------+----+-----+
```
-3. Frame 类型为 RANGE
+3. 从 device_flow 表查询所有原始列,按 device 分组,每个分组内按 flow 字段值升序排序,统计「当前行 flow 值 - 2」到「当前行 flow 值」这个数值区间内的所有行的数量,最终将计数结果命名为 count 列返回。
查询语句:
@@ -990,15 +804,6 @@ IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ORDER BY flow GROUPS BETWE
IoTDB> SELECT *,count(flow) OVER(PARTITION BY device ORDER BY flow RANGE BETWEEN 2 PRECEDING AND CURRENT ROW) as count FROM device_flow;
```
-拆解步骤:
-
-* 把比当前行数据**小于等于 2 **的分为同一个 Frame,那么以 device 为 d0 的 partition 为例(d1 同理),对于 count 行数:
- * 对于 flow 为 1 的行,由于它是最小的行了,所以整个 Frame 就它一行,返回 1;
- * 对于 flow 为 3 的行,注意 CURRENT ROW 是作为 frame\_end 存在,因此是整个 peer group 的最后一行,符合要求比它小的共 1 行,然后 peer group 有 2 行,所以整个 Frame 共 3 行,返回 3;
- * 对于 flow 为 5 的行,它本身包含 1 行,符合要求的比它小的共 2 行,所以整个 Frame 共 3 行,返回 3。
-
-
-
查询结果:
```SQL
@@ -1013,416 +818,3 @@ IoTDB> SELECT *,count(flow) OVER(PARTITION BY device ORDER BY flow RANGE BETWEEN
|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
+-----------------------------+------+----+-----+
```
-
-### 4.3 内置的窗口函数
-
-
-
-
- | 窗口函数分类 |
- 窗口函数名 |
- 函数定义 |
- 是否支持 FRAME 子句 |
-
-
- | Aggregate Function |
- 所有内置聚合函数 |
- 对一组值进行聚合计算,得到单个聚合结果。 |
- 是 |
-
-
- | Value Function |
- first_value |
- 返回 frame 的第一个值,如果指定了 IGNORE NULLS 需要跳过前缀的 NULL |
- 是 |
-
-
- | last_value |
- 返回 frame 的最后一个值,如果指定了 IGNORE NULLS 需要跳过后缀的 NULL |
- 是 |
-
-
- | nth_value |
- 返回 frame 的第 n 个元素(注意 n 是从 1 开始),如果有 IGNORE NULLS 需要跳过 NULL |
- 是 |
-
-
- | lead |
- 返回当前行的后 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default |
- 否 |
-
-
- | lag |
- 返回当前行的前 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default |
- 否 |
-
-
- | Rank Function |
- rank |
- 返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间可能有 gap |
- 否 |
-
-
- | dense_rank |
- 返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间没有 gap |
- 否 |
-
-
- | row_number |
- 返回当前行在整个 partition 中的行号,注意行号从 1 开始 |
- 否 |
-
-
- | percent_rank |
- 以百分比的形式,返回当前行的值在整个 partition 中的序号;即 (rank() - 1) / (n - 1),其中 n 是整个 partition 的行数 |
- 否 |
-
-
- | cume_dist |
- 以百分比的形式,返回当前行的值在整个 partition 中的序号;即 (小于等于它的行数) / n |
- 否 |
-
-
- | ntile |
- 指定 n,给每一行进行 1~n 的编号。 |
- 否 |
-
-
-
-
-#### 4.3.1 Aggregate Function
-
-所有内置聚合函数,如 `sum()`、`avg()`、`min()`、`max()` 都能当作 Window Function 使用。
-
-> 注意:与 GROUP BY 不同,Window Function 中每一行都有相应的输出
-
-示例:
-
-```SQL
-IoTDB> SELECT *, sum(flow) OVER (PARTITION BY device ORDER BY flow) as sum FROM device_flow;
-+-----------------------------+------+----+----+
-| time|device|flow| sum|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
-+-----------------------------+------+----+----+
-```
-
-#### 4.3.2 Value Function
-1. `first_value`
-
-* 函数名:`first_value(value) [IGNORE NULLS]`
-* 定义:返回 frame 的第一个值,如果指定了 IGNORE NULLS 需要跳过前缀的 NULL;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, first_value(flow) OVER w as first_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
-+-----------------------------+------+----+-----------+
-| time|device|flow|first_value|
-+-----------------------------+------+----+-----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-+-----------------------------+------+----+-----------+
-```
-
-2. `last_value`
-
-* 函数名:`last_value(value) [IGNORE NULLS]`
-* 定义:返回 frame 的最后一个值,如果指定了 IGNORE NULLS 需要跳过后缀的 NULL;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, last_value(flow) OVER w as last_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
-+-----------------------------+------+----+----------+
-| time|device|flow|last_value|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
-+-----------------------------+------+----+----------+
-```
-
-3. `nth_value`
-
-* 函数名:`nth_value(value, n) [IGNORE NULLS]`
-* 定义:返回 frame 的第 n 个元素(注意 n 是从 1 开始),如果有 IGNORE NULLS 需要跳过 NULL;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, nth_value(flow, 2) OVER w as nth_values FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
-+-----------------------------+------+----+----------+
-| time|device|flow|nth_values|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
-+-----------------------------+------+----+----------+
-```
-
-4. lead
-
-* 函数名:`lead(value[, offset[, default]]) [IGNORE NULLS]`
-* 定义:返回当前行的后 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default;offset 的默认值为 1,default 的默认值为 NULL。
-* lead 函数需要需要一个 ORDER BY 窗口子句
-* 示例:
-
-```SQL
-IoTDB> SELECT *, lead(flow) OVER w as lead FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY time);
-+-----------------------------+------+----+----+
-| time|device|flow|lead|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
-|1970-01-01T08:00:05.000+08:00| d1| 4|null|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 5|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:03.000+08:00| d0| 1|null|
-+-----------------------------+------+----+----+
-```
-
-5. lag
-
-* 函数名:`lag(value[, offset[, default]]) [IGNORE NULLS]`
-* 定义:返回当前行的前 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default;offset 的默认值为 1,default 的默认值为 NULL。
-* lag 函数需要需要一个 ORDER BY 窗口子句
-* 示例:
-
-```SQL
-IoTDB> SELECT *, lag(flow) OVER w as lag FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY device);
-+-----------------------------+------+----+----+
-| time|device|flow| lag|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2|null|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:00.000+08:00| d0| 3|null|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
-+-----------------------------+------+----+----+
-```
-
-#### 4.3.3 Rank Function
-1. rank
-
-* 函数名:`rank()`
-* 定义:返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间可能有 gap;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, rank() OVER w as rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+----+
-| time|device|flow|rank|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
-+-----------------------------+------+----+----+
-```
-
-2. dense\_rank
-
-* 函数名:`dense_rank()`
-* 定义:返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间没有 gap。
-* 示例:
-
-```SQL
-IoTDB> SELECT *, dense_rank() OVER w as dense_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+----------+
-| time|device|flow|dense_rank|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-+-----------------------------+------+----+----------+
-```
-
-3. row\_number
-
-* 函数名:`row_number()`
-* 定义:返回当前行在整个 partition 中的行号,注意行号从 1 开始;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, row_number() OVER w as row_number FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+----------+
-| time|device|flow|row_number|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
-+-----------------------------+------+----+----------+
-```
-
-4. percent\_rank
-
-* 函数名:`percent_rank()`
-* 定义:以百分比的形式,返回当前行的值在整个 partition 中的序号;即 **(rank() - 1) / (n - 1)**,其中 n 是整个 partition 的行数;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, percent_rank() OVER w as percent_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+------------------+
-| time|device|flow| percent_rank|
-+-----------------------------+------+----+------------------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 0.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 0.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3|0.3333333333333333|
-|1970-01-01T08:00:02.000+08:00| d0| 3|0.3333333333333333|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
-+-----------------------------+------+----+------------------+
-```
-
-5. cume\_dist
-
-* 函数名:cume\_dist
-* 定义:以百分比的形式,返回当前行的值在整个 partition 中的序号;即 **(小于等于它的行数) / n**。
-* 示例:
-
-```SQL
-IoTDB> SELECT *, cume_dist() OVER w as cume_dist FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+---------+
-| time|device|flow|cume_dist|
-+-----------------------------+------+----+---------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 0.5|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 0.25|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 0.75|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 0.75|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
-+-----------------------------+------+----+---------+
-```
-
-6. ntile
-
-* 函数名:ntile
-* 定义:指定 n,给每一行进行 1~n 的编号。
- * 整个 partition 行数比 n 小,那么编号就是行号 index;
- * 整个 partition 行数比 n 大:
- * 如果行数能除尽 n,那么比较完美,比如行数为 4,n 为 2,那么编号为 1、1、2、2、;
- * 如果行数不能除尽 n,那么就分给开头几组,比如行数为 5,n 为 3,那么编号为 1、1、2、2、3;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, ntile(2) OVER w as ntile FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+-----+
-| time|device|flow|ntile|
-+-----------------------------+------+----+-----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
-+-----------------------------+------+----+-----+
-```
-
-### 4.4 场景示例
-1. 多设备 diff 函数
-
-对于每个设备的每一行,与前一行求差值:
-
-```SQL
-SELECT
- *,
- measurement - lag(measurement) OVER (PARTITION BY device ORDER BY time)
-FROM data
-WHERE timeCondition;
-```
-
-对于每个设备的每一行,与后一行求差值:
-
-```SQL
-SELECT
- *,
- measurement - lead(measurement) OVER (PARTITION BY device ORDER BY time)
-FROM data
-WHERE timeCondition;
-```
-
-对于单个设备的每一行,与前一行求差值(后一行同理):
-
-```SQL
-SELECT
- *,
- measurement - lag(measurement) OVER (ORDER BY time)
-FROM data
-where device='d1'
-WHERE timeCondition;
-```
-
-2. 多设备 TOP\_K/BOTTOM\_K
-
-利用 rank 获取序号,然后在外部的查询中保留想要的顺序。
-
-(注意, window function 的执行顺序在 HAVING 子句之后,所以这里需要子查询)
-
-```SQL
-SELECT *
-FROM(
- SELECT
- *,
- rank() OVER (PARTITION BY device ORDER BY time DESC)
- FROM data
- WHERE timeCondition
-)
-WHERE rank <= 3;
-```
-
-除了按照时间排序之外,还可以按照测点的值进行排序:
-
-```SQL
-SELECT *
-FROM(
- SELECT
- *,
- rank() OVER (PARTITION BY device ORDER BY measurement DESC)
- FROM data
- WHERE timeCondition
-)
-WHERE rank <= 3;
-```
-
-3. 多设备 CHANGE\_POINTS
-
-这个 sql 用来去除输入序列中连续相同值,可以用 lead + 子查询实现:
-
-```SQL
-SELECT
- time,
- device,
- measurement
-FROM(
- SELECT
- time,
- device,
- measurement,
- LEAD(measurement) OVER (PARTITION BY device ORDER BY time) AS next
- FROM data
- WHERE timeCondition
-)
-WHERE measurement != next OR next IS NULL;
-```
diff --git a/src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition_timecho.md b/src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition_timecho.md
new file mode 100644
index 000000000..876c1ee33
--- /dev/null
+++ b/src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition_timecho.md
@@ -0,0 +1,155 @@
+
+
+# 模式查询
+
+## 1. 语法定义
+
+```SQL
+MATCH_RECOGNIZE (
+ [ PARTITION BY column [, ...] ]
+ [ ORDER BY column [, ...] ]
+ [ MEASURES measure_definition [, ...] ]
+ [ ROWS PER MATCH ]
+ [ AFTER MATCH skip_to ]
+ PATTERN ( row_pattern )
+ [ SUBSET subset_definition [, ...] ]
+ DEFINE variable_definition [, ...]
+)
+```
+
+**说明:**
+
+* PARTITION BY : 可选,用于对输入表进行分组,每个分组能独立进行模式匹配。如果未声明该子句,则整个输入表将作为一个整体进行处理。
+* ORDER BY :可选,用于确保输入数据按某种顺序进行匹配处理。
+* MEASURES :可选,用于指定从匹配到的一段数据中提取哪些信息。
+* ROWS PER MATCH :可选,用于指定模式匹配成功后结果集的输出方式。
+* AFTER MATCH SKIP :可选,用于指定在识别到一个非空匹配后,下一次模式匹配应从哪一行继续进行。
+* PATTERN :用于定义需要匹配的行模式。
+* SUBSET :可选,用于将多个基本模式变量所匹配的行合并为一个逻辑集合。
+* DEFINE :用于定义行模式的基本模式变量。
+
+更多详细功能介绍请参考:[模式查询](../User-Manual/Timeseries-Featured-Analysis_timecho.md#_1-模式查询)
+
+## 2. 使用示例
+
+以[示例数据](../Reference/Sample-Data.md)为源数据
+
+1. 时间分段查询
+
+将 table1 中的数据按照时间间隔小于等于 24 小时分段,查询每段中的数据总条数,以及开始、结束时间。
+
+查询SQL
+
+```SQL
+SELECT start_time, end_time, cnt
+FROM table1
+MATCH_RECOGNIZE (
+ ORDER BY time
+ MEASURES
+ RPR_FIRST(A.time) AS start_time,
+ RPR_LAST(time) AS end_time,
+ COUNT() AS cnt
+ PATTERN (A B*)
+ DEFINE B AS (cast(B.time as INT64) - cast(PREV(B.time) as INT64)) <= 86400000
+) AS m
+```
+
+查询结果
+
+```SQL
++-----------------------------+-----------------------------+---+
+| start_time| end_time|cnt|
++-----------------------------+-----------------------------+---+
+|2024-11-26T13:37:00.000+08:00|2024-11-26T13:38:00.000+08:00| 2|
+|2024-11-27T16:38:00.000+08:00|2024-11-30T14:30:00.000+08:00| 16|
++-----------------------------+-----------------------------+---+
+Total line number = 2
+```
+
+2. 差值分段查询
+
+将 table2 中的数据按照 humidity 湿度值差值小于 0.1 分段,查询每段中的数据总条数,以及开始、结束时间。
+
+* 查询sql
+
+```SQL
+SELECT start_time, end_time, cnt
+FROM table2
+MATCH_RECOGNIZE (
+ ORDER BY time
+ MEASURES
+ RPR_FIRST(A.time) AS start_time,
+ RPR_LAST(time) AS end_time,
+ COUNT() AS cnt
+ PATTERN (A B*)
+ DEFINE B AS (B.humidity - PREV(B.humidity )) <=0.1
+) AS m;
+```
+
+* 查询结果
+
+```SQL
++-----------------------------+-----------------------------+---+
+| start_time| end_time|cnt|
++-----------------------------+-----------------------------+---+
+|2024-11-26T13:37:00.000+08:00|2024-11-27T00:00:00.000+08:00| 2|
+|2024-11-28T08:00:00.000+08:00|2024-11-29T00:00:00.000+08:00| 2|
+|2024-11-29T11:00:00.000+08:00|2024-11-30T00:00:00.000+08:00| 2|
++-----------------------------+-----------------------------+---+
+Total line number = 3
+```
+
+3. 事件统计查询
+
+将 table1 中数据按照设备号分组,统计上海地区湿度大于 35 的开始、结束时间及最大湿度值。
+
+* 查询sql
+
+```SQL
+SELECT m.device_id, m.match, m.event_start, m.event_end, m.max_humidity
+FROM table1
+MATCH_RECOGNIZE (
+ PARTITION BY device_id
+ ORDER BY time
+ MEASURES
+ MATCH_NUMBER() AS match,
+ RPR_FIRST(A.time) AS event_start,
+ RPR_LAST(A.time) AS event_end,
+ MAX(A.humidity) AS max_humidity
+ ONE ROW PER MATCH
+ PATTERN (A+)
+ DEFINE
+ A AS A.region= '上海' AND A.humidity> 35
+) AS m
+```
+
+* 查询结果
+
+```SQL
++---------+-----+-----------------------------+-----------------------------+------------+
+|device_id|match| event_start| event_end|max_humidity|
++---------+-----+-----------------------------+-----------------------------+------------+
+| 100| 1|2024-11-28T09:00:00.000+08:00|2024-11-29T18:30:00.000+08:00| 45.1|
+| 101| 1|2024-11-30T09:30:00.000+08:00|2024-11-30T09:30:00.000+08:00| 35.2|
++---------+-----+-----------------------------+-----------------------------+------------+
+Total line number = 2
+```
diff --git a/src/zh/UserGuide/Master/Table/SQL-Manual/overview_timecho.md b/src/zh/UserGuide/Master/Table/SQL-Manual/overview_timecho.md
index 581a106eb..7b6fcb458 100644
--- a/src/zh/UserGuide/Master/Table/SQL-Manual/overview_timecho.md
+++ b/src/zh/UserGuide/Master/Table/SQL-Manual/overview_timecho.md
@@ -40,11 +40,9 @@ IoTDB 查询语法提供以下子句:
- SELECT 子句:查询结果应包含的列。详细语法见:[SELECT子句](../SQL-Manual/Select-Clause.md)
- FROM 子句:指出查询的数据源,可以是单个表、多个通过 `JOIN` 子句连接的表,或者是一个子查询。详细语法见:[FROM & JOIN 子句](../SQL-Manual/From-Join-Clause.md)
-- patternRecognition:行模式识别,支持通过定义模式变量的识别逻辑以及正则表达式来捕获一段连续的数据,并对每一段捕获的数据进行分析计算。详细语法见:[行模式识别](../SQL-Manual/Row-Pattern-Recognition.md)
- WHERE 子句:用于过滤数据,只选择满足特定条件的数据行。这个子句在逻辑上紧跟在 FROM 子句之后执行。详细语法见:[WHERE 子句](../SQL-Manual/Where-Clause.md)
- GROUP BY 子句:当需要对数据进行聚合时使用,指定了用于分组的列。详细语法见:[GROUP BY 子句](../SQL-Manual/GroupBy-Clause.md)
- HAVING 子句:在 GROUP BY 子句之后使用,用于对已经分组的数据进行过滤。与 WHERE 子句类似,但 HAVING 子句在分组后执行。详细语法见:[HAVING 子句](../SQL-Manual/Having-Clause.md)
-- WINDOW FUNCTION:窗口函数,是一种基于与当前行相关的特定行集合(称为“窗口”) 对每一行进行计算的特殊函数。它将分组操作、排序与可定义的计算范围结合,在不折叠原始数据行的前提下实现复杂的跨行计算。详细语法见:[窗口函数](../SQL-Manual/Featured-Functions_timecho.md#_4-窗口函数)
- FILL 子句:用于处理查询结果中的空值,用户可以使用 FILL 子句来指定数据缺失时的填充模式(如前一个非空值或线性插值)来填充 null 值,以便于数据可视化和分析。 详细语法见:[FILL 子句](../SQL-Manual/Fill-Clause.md)
- ORDER BY 子句:对查询结果进行排序,可以指定升序(ASC)或降序(DESC),以及 NULL 值的处理方式(NULLS FIRST 或 NULLS LAST)。详细语法见:[ORDER BY 子句](../SQL-Manual/OrderBy-Clause.md)
- OFFSET 子句:用于指定查询结果的起始位置,即跳过前 OFFSET 行。与 LIMIT 子句配合使用。详细语法见:[LIMIT 和 OFFSET 子句](../SQL-Manual/Limit-Offset-Clause.md)
diff --git a/src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md b/src/zh/UserGuide/Master/Table/User-Manual/Timeseries-Featured-Analysis_timecho.md
similarity index 56%
rename from src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md
rename to src/zh/UserGuide/Master/Table/User-Manual/Timeseries-Featured-Analysis_timecho.md
index acd684051..003fdcedb 100644
--- a/src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md
+++ b/src/zh/UserGuide/Master/Table/User-Manual/Timeseries-Featured-Analysis_timecho.md
@@ -19,21 +19,22 @@
-->
-# 行模式识别
+# 时序特色分析
-## 1. 概述
+IoTDB 针对时序数据的特色分析场景,提供了模式查询与窗口函数两大核心能力,为时序数据的深度挖掘与复杂计算提供了灵活高效的解决方案。下文将对两大功能进行详细的介绍。
-IoTDB 支持行模式识别,该功能支持通过定义模式变量的识别逻辑以及正则表达式来捕获一段连续的数据,并对每一段捕获的数据进行分析计算,适用于识别时序数据中的特定模式、检测特定事件等业务场景。如果将行模式识别看作对数据进行分组处理,则核心流程大致如下:
+## 1. 模式查询
-* 通过 PATTERN、DEFINE、SUBSET 子句进行分组捕获
-* 通过 MEASURES 子句对捕获的分组进行计算处理
-* 通过 ROWS PER MATCH 子句设定分组的输出形式
-* 通过 AFTER MATCH SKIP 子句设定如何定位下一个分组的开始位置
+### 1.1 概述
+
+模式查询支持通过定义模式变量的识别逻辑以及正则表达式来捕获一段连续的数据,并对每一段捕获的数据进行分析计算,适用于识别时序数据中的特定模式(如下图所示)、检测特定事件等业务场景。
+
+
> 注意:该功能从 V 2.0.5 版本开始提供。
-## 2. 功能介绍
-### 2.1 语法格式
+### 1.2 功能介绍
+#### 1.2.1 语法格式
```SQL
MATCH_RECOGNIZE (
@@ -59,7 +60,28 @@ MATCH_RECOGNIZE (
* SUBSET :可选,用于将多个基本模式变量所匹配的行合并为一个逻辑集合。
* DEFINE :用于定义行模式的基本模式变量。
-### 2.2 DEFINE 子句
+**语法示例原始数据:**
+
+```SQL
+IoTDB:database3> select * from t
++-----------------------------+------+----------+
+| time|device|totalprice|
++-----------------------------+------+----------+
+|2025-01-01T00:01:00.000+08:00| d1| 90|
+|2025-01-01T00:02:00.000+08:00| d1| 80|
+|2025-01-01T00:03:00.000+08:00| d1| 70|
+|2025-01-01T00:04:00.000+08:00| d1| 80|
+|2025-01-01T00:05:00.000+08:00| d1| 70|
+|2025-01-01T00:06:00.000+08:00| d1| 80|
++-----------------------------+------+----------+
+
+-- 创建语句
+create table t(device tag, totalprice int32 field)
+
+insert into t(time,device,totalprice) values(2025-01-01T00:01:00, 'd1', 90),(2025-01-01T00:02:00, 'd1', 80),(2025-01-01T00:03:00, 'd1', 70),(2025-01-01T00:04:00, 'd1', 80),(2025-01-01T00:05:00, 'd1', 70),(2025-01-01T00:06:00, 'd1', 80)
+```
+
+#### 1.2.2 DEFINE 子句
用于为模式识别中的每个基本模式变量指定其判断条件。这些变量通常由标识符(如 `A`, `B`)代表,并通过该子句中的布尔表达式精确定义哪些行符合该变量的要求。
@@ -72,7 +94,7 @@ DEFINE B AS totalprice < PREV(totalprice)
* **未**在子句中**显式**定义的变量,其匹配条件隐含为恒真(TRUE),即可在任何输入行上成功匹配。
-### 2.3 SUBSET 子句
+#### 1.2.3 SUBSET 子句
用于将多个基本模式变量(如 `A`、`B`)匹配到的行合并成一个联合模式变量(如 `U`),使这些行可以被视为同一个逻辑集合进行操作。可用于`MEASURES`、`DEFINE `和`AFTER MATCH SKIP`子句。
@@ -85,7 +107,7 @@ SUBSET U = (A, B)
1. 在 `MEASURES `子句中,若需要引用该阶段最后一次匹配到的行,则可通过定义联合模式变量 `SUBSET U = (A, B)`实现。此时表达式 `RPR_LAST(U.totalprice)` 将直接返回该目标行的 `totalprice` 值。
2. 在 `AFTER MATCH SKIP` 子句中,若匹配结果中未包含基本模式变量 A 或 B 时,执行 `AFTER MATCH SKIP TO LAST B` 或 `AFTER MATCH SKIP TO LAST A` 会因锚点缺失跳转失败;而通过引入联合模式变量 `SUBSET U = (A, B)`,使用 `AFTER MATCH SKIP TO LAST U` 则始终有效。
-### 2.4 PATTERN 子句
+#### 1.2.4 PATTERN 子句
用于定义需要匹配的行模式,其基本构成单元是**基本模式变量。**
@@ -93,7 +115,7 @@ SUBSET U = (A, B)
PATTERN ( row_pattern )
```
-#### 2.4.1 模式种类
+##### 1.2.4.1 模式种类
| 行模式 | 语法格式 | 描述 |
| ----------------------------------- |---------------------| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
@@ -104,7 +126,7 @@ PATTERN ( row_pattern )
| 空模式(Empty Pattern) | `()` | 表示一个不包含任何行的空匹配 |
| 模式排除(Pattern Exclusion) | `{- row_pattern -}` | 用于指定在输出中需要排除的匹配部分。通常与`ALL ROWS PER MATCH`选项结合使用,用于输出感兴趣的行。如`PATTERN (A {- B+ C+ -} D+)`,并使用`ALL ROWS PER MATCH`时,输出将仅包含匹配的首行(`A`对应行)与尾部行(`D+`对应行)。 |
-#### 2.4.2 分区起始/结束锚点(Partition Start/End Anchor)
+##### 1.2.4.2 分区起始/结束锚点(Partition Start/End Anchor)
* `^A` 表示匹配以 A 为分区开始的模式
* 当 PATTERN 子句的取值为 `^A` 时,要求匹配必须从分区的首行开始,且这一行要满足 `A` 的定义
@@ -113,141 +135,7 @@ PATTERN ( row_pattern )
* 当 PATTERN 子句的取值为 `A$` 时,要求必须在分区的结束位置匹配,并且这一行要满足 `A`的定义
* 当 PATTERN 子句的取值为 `$A` 或 `$A$` 时,输出结果为空
-示例介绍可见 [3.1 小节](./Row-Pattern-Recognition.md#_3-1-Patter-子句分区锚点)
-
-#### 2.4.3 量词(Quantifiers)
-
-量词用于指定子模式重复出现的次数,置于相应子模式之后,如 `(A | B)*`。
-
-常用量词如下:
-
-| 量词 | 描述 |
-| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `*` | 零次或多次重复 |
-| `+` | 一次或多次重复 |
-| `?` | 零次或一次重复 |
-| `{n}` | 恰好重复 n 次 |
-| `{m, n}` | 重复次数在 m 到 n 之间(m、n 为非负整数)。* 若省略左界,则默认从 0 开始;* 若省略右界,则重复次数不设上限(如 {5,} 等同于“至少重复五次”);* 若同时省略左右界,即 {,},则与 \* 等价。 |
-
-* 可通过在量词后加 `?` 改变匹配偏好。
- * `{3,5}`:偏好 5 次,最不偏好 3 次;`{3,5}?`:偏好 3 次,最不偏好 5 次
- * `?`:偏好 1 次;`??`:偏好 0 次
-
-### 2.5 AFTER MATCH SKIP 子句
-
-用于指定在识别到一个非空匹配后,下一次模式匹配应从哪一行继续进行。
-
-| 跳转策略 | 描述 | 是否允许识别重叠匹配项 |
-| ------------------------------------------------------------- | --------------------------------------------------- | ------------------------ |
-| `AFTER MATCH SKIP PAST LAST ROW` | 默认行为。在当前匹配的最后一行之后的下一行开始。 | 否 |
-| `AFTER MATCH SKIP TO NEXT ROW` | 在当前匹配中的第二行开始。 | 是 |
-| `AFTER MATCH SKIP TO [ FIRST \| LAST ] pattern_variable` | 跳转到某个模式变量的 [ 第一行 | 最后一行 ] 开始。 | 是 |
-
-* 在所有可能的配置中,仅当 `ALL ROWS PER MATCH WITH UNMATCHED ROWS` 与 `AFTER MATCH SKIP PAST LAST ROW` 联合使用时,系统才能确保对每个输入行恰好生成一条输出记录。
-
-示例介绍可见 [3.2 小节](./Row-Pattern-Recognition.md#_3-2-AFTER-MATCH-SKIP-子句)
-
-### 2.6 ROWS PER MATCH 子句
-
-用于指定模式匹配成功后结果集的输出方式,主要包括以下两种选项:
-
-| 输出方式 | 规则描述 | 输出结果 | **空匹配/未匹配行**处理逻辑 |
-| -------------------- | ----------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| ONE ROW PER MATCH | 每一次成功匹配,产生一行输出结果。 | * PARTITION BY 子句中的列* MEASURES 子句中定义的表达式。 | 输出空匹配;跳过未匹配行。 |
-| ALL ROWS PER MATCH | 每一次匹配中的每一行都将产生一条输出记录,除非该行通过 exclusion 语法排除。 | * PARTITION BY 子句中的列* ORDER BY 子句中的列* MEASURES 子句中定义的表达式* 输入表中的其余列 | * 默认:输出空匹配;跳过未匹配行。* ALL ROWS PER MATCH**SHOW EMPTY MATCHES**:默认输出空匹配,跳过未匹配行* ALL ROWS PER MATCH**OMIT EMPTY MATCHES**:不输出空匹配,跳过未匹配行* ALL ROWS PER MATCH**WITH UNMATCHED ROWS**:输出空匹配,并为每一条未匹配行额外生成一条输出记录|
-
-### 2.7 MEASURES 子句
-
-用于指定从匹配到的一段数据中提取哪些信息。该子句为可选项,如果未显式指定,则根据 ROWS PER MATCH 子句的设置,部分输入列会成为模式识别的输出结果。
-
-```SQL
-MEASURES measure_expression AS measure_name [, ...]
-```
-
-* `measure_expression` 是根据匹配的一段数据计算出的标量值。
-
-| 用法示例 | 说明 |
-| ---------------------------------------------- | -------------------------------------------------------------------------------------------------------------- |
-| `A.totalprice AS starting_price` | 返回匹配分组中第一行(即与变量 A 关联的唯一一行)中的价格,作为起始价格。 |
-| `RPR_LAST(B.totalprice) AS bottom_price` | 返回与变量 B 关联的最后一行中的价格,代表“V”形模式中最低点的价格,对应下降区段的末尾。 |
-| `RPR_LAST(U.totalprice) AS top_price` | 返回匹配分组中的最高价格,对应变量 C 或 D 所关联的最后一行,即整个匹配分组的末尾。【假设 SUBSET U = (C, D)】 |
-
-* 每个 `measure_expression `都会定义一个输出列,该列可通过其指定的 `measure_name `进行引用。
-
-### 2.8 行模式识别表达式
-
-在 MEASURES 与 DEFINE 子句中使用的表达式为**标量表达式**,用于在输入表的行级上下文中求值。**标量表达式**除了支持标准 SQL 语法外,还支持针对行模式识别的特殊扩展函数。
-
-#### 2.8.1 模式变量引用
-
-```SQL
-A.totalprice
-U.orderdate
-orderstatus
-```
-
-* 当列名前缀为某**基本模式变量**或**联合模式变量**时,表示引用该变量所匹配的所有行的对应列值。
-* 若列名不带前缀,则等同于使用“**全局联合模式变量**”(即所有基本模式变量的并集)作前缀,表示引用当前匹配中所有行的该列值。
-
-> 不允许在模式识别表达式中使用表名作列名前缀。
-
-#### 2.8.2 扩展函数
-
-| 函数名 | 函数式 | 描述 |
-|------------------| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `MATCH_NUMBER`函数 | `MATCH_NUMBER()` | 返回当前匹配在分区内的序号,从 1 开始计数。空匹配与非空匹配一致,也占用匹配序号。 |
-| `CLASSIFIER `函数 | `CLASSIFIER(option)`| 1. 返回当前行所映射的基本模式变量名称。1. `option`是一个可选参数:可以传入基本模式变量`CLASSIFIER(A)`或联合模式变量`CLASSIFIER(U)`,用于限定函数作用范围,对于不在范围内的行,直接返回 NULL。在对联合模式变量使用时,可用于辨别该行究竟映射至并集中哪一个基本模式变量。 |
-| 逻辑导航函数 | `RPR_FIRST(expr, k)` | 1. 表示从**当前匹配分组**中,定位至第一个满足 expr 的行,在此基础上再向分组尾部方向搜索到第 k 次出现的同一模式变量对应行,返回该行的指定列值。如果在指定方向上未能找到第 k 次匹配行,则函数返回 NULL。1. 其中 k 是可选参数,默认为 0,表示仅定位至首个满足条件的行;若显式指定,必须为非负整数。 |
-| 逻辑导航函数 | `RPR_LAST(expr, k)`| 1. 表示从**当前匹配分组**中,定位至最后一个满足 expr 的行,在此基础上再向分组开头方向搜索到第 k 次出现的同一模式变量对应行,返回该行的指定列值。如果在指定方向上未能找到第 k 次匹配行,则函数返回 NULL。1. 其中 k 是可选参数,默认为 0,表示仅定位至末个满足条件的行;若显式指定,必须为非负整数。 |
-| 物理导航函数 | `PREV(expr, k)` | 1. 表示从最后一次匹配至给定模式变量的行开始,向开头方向偏移 k 行,返回对应列值。若导航超出**分区边界**,则函数返回 NULL。1. 其中 k 是可选参数,默认为 1;若显式指定,必须为非负整数。 |
-| 物理导航函数 |`NEXT(expr, k)` | 1. 表示从最后一次匹配至给定模式变量的行开始,向尾部方向偏移 k 行,返回对应列值。若导航超出**分区边界**,则函数返回 NULL。1. 其中 k 是可选参数,默认为 1;若显式指定,必须为非负整数。 |
-| 聚合函数 | COUNT、SUM、AVG、MAX、MIN 函数 | 可用于对当前匹配中的数据进行计算。聚合函数与导航函数不允许互相嵌套。(V 2.0.6 版本起支持) |
-| 嵌套函数 | `PREV/NEXT(CLASSIFIER())` | 物理导航函数与 CLASSIFIER 函数嵌套。用于获取当前行的前一个和后一个匹配行所对应的模式变量 |
-| 嵌套函数 |`PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | 物理函数内部**允许嵌套**逻辑函数,逻辑函数内部**不允许嵌套**物理函数。用于先进行逻辑偏移,再进行物理偏移。 |
-
-示例介绍可见 [3.3 小节](./Row-Pattern-Recognition.md#_3-3-行模式表达式-扩展函数)
-
-#### 2.8.3 RUNNING 和 FINAL 语义
-1. 定义
-
-* `RUNNING`: 表示计算范围为当前匹配分组内,从分组的起始行到当前正在处理的行(即到当前行为止)。
-* `FINAL`: 表示计算范围为当前匹配分组内,从分组的起始行到分组的最终行(即整个匹配分组)。
-
-2. 作用范围
-
-* DEFINE 子句默认采用 RUNNING 语义。
-* MEASURES 子句默认采用 RUNNING 语义,支持指定 FINAL 语义。当采用 ONE ROW PER MATCH 输出模式时,所有表达式都从匹配分组的末行位置进行计算,此时 RUNNING 语义与 FINAL 语义等价。
-
-3. 语法约束
-
-* RUNNING 和 FINAL 需要写在**逻辑导航函数**或聚合函数之前,不能直接作用于**列引用。**
- * 合法:`RUNNING RPP_LAST(A.totalprice)`、`FINAL RPP_LAST(A.totalprice)`
- * 非法:`RUNNING A.totalprice`、`FINAL A.totalprice`、 `RUNNING PREV(A.totalprice)`
-
-## 3. 语法示例
-
-原始数据
-
-```SQL
-IoTDB:database3> select * from t
-+-----------------------------+------+----------+
-| time|device|totalprice|
-+-----------------------------+------+----------+
-|2025-01-01T00:01:00.000+08:00| d1| 90|
-|2025-01-01T00:02:00.000+08:00| d1| 80|
-|2025-01-01T00:03:00.000+08:00| d1| 70|
-|2025-01-01T00:04:00.000+08:00| d1| 80|
-|2025-01-01T00:05:00.000+08:00| d1| 70|
-|2025-01-01T00:06:00.000+08:00| d1| 80|
-+-----------------------------+------+----------+
-
--- 创建语句
-create table t(device tag, totalprice int32 field)
-
-insert into t(time,device,totalprice) values(2025-01-01T00:01:00, 'd1', 90),(2025-01-01T00:02:00, 'd1', 80),(2025-01-01T00:03:00, 'd1', 70),(2025-01-01T00:04:00, 'd1', 80),(2025-01-01T00:05:00, 'd1', 70),(2025-01-01T00:06:00, 'd1', 80)
-```
-
-### 3.1 Patter 子句分区锚点
+**示例说明**
* 查询 sql
@@ -269,6 +157,10 @@ MATCH_RECOGNIZE (
* 查询结果
* 当 PATTERN 子句为 PATTERN (^A) 时
+
+ 
+
+ 实际返回
```SQL
+-----------------------------+-----+-----+-----+
@@ -279,7 +171,7 @@ MATCH_RECOGNIZE (
Total line number = 1
```
- * 当 PATTERN 子句为 PATTERN (^A^) 时
+ * 当 PATTERN 子句为 PATTERN (^A^) 时,输出的结果为空,因为不可能从分区的起始位置开始匹配了一个 A 之后,又回到分区的起始位置
```SQL
+----+-----+-----+-----+
@@ -291,6 +183,10 @@ MATCH_RECOGNIZE (
* 当 PATTERN 子句为 PATTERN (A\$) 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-----+-----+-----+
| time|match|price|label|
@@ -300,7 +196,7 @@ MATCH_RECOGNIZE (
Total line number = 1
```
- * 当 PATTERN 子句为 PATTERN (\$A\$) 时
+ * 当 PATTERN 子句为 PATTERN (\$A\$) 时,输出的结果为空
```SQL
+----+-----+-----+-----+
@@ -310,7 +206,38 @@ MATCH_RECOGNIZE (
Empty set.
```
-### 3.2 AFTER MATCH SKIP 子句
+
+##### 1.2.4.3 量词(Quantifiers)
+
+量词用于指定子模式重复出现的次数,置于相应子模式之后,如 `(A | B)*`。
+
+常用量词如下:
+
+| 量词 | 描述 |
+| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `*` | 零次或多次重复 |
+| `+` | 一次或多次重复 |
+| `?` | 零次或一次重复 |
+| `{n}` | 恰好重复 n 次 |
+| `{m, n}` | 重复次数在 m 到 n 之间(m、n 为非负整数)。* 若省略左界,则默认从 0 开始;* 若省略右界,则重复次数不设上限(如 {5,} 等同于“至少重复五次”);* 若同时省略左右界,即 {,},则与 \* 等价。 |
+
+* 可通过在量词后加 `?` 改变匹配偏好。
+ * `{3,5}`:偏好 5 次,最不偏好 3 次;`{3,5}?`:偏好 3 次,最不偏好 5 次
+ * `?`:偏好 1 次;`??`:偏好 0 次
+
+#### 1.2.5 AFTER MATCH SKIP 子句
+
+用于指定在识别到一个非空匹配后,下一次模式匹配应从哪一行继续进行。
+
+| 跳转策略 | 描述 | 是否允许识别重叠匹配项 |
+| ------------------------------------------------------------- | --------------------------------------------------- | ------------------------ |
+| `AFTER MATCH SKIP PAST LAST ROW` | 默认行为。在当前匹配的最后一行之后的下一行开始。 | 否 |
+| `AFTER MATCH SKIP TO NEXT ROW` | 在当前匹配中的第二行开始。 | 是 |
+| `AFTER MATCH SKIP TO [ FIRST \| LAST ] pattern_variable` | 跳转到某个模式变量的 [ 第一行 | 最后一行 ] 开始。 | 是 |
+
+* 在所有可能的配置中,仅当 `ALL ROWS PER MATCH WITH UNMATCHED ROWS` 与 `AFTER MATCH SKIP PAST LAST ROW` 联合使用时,系统才能确保对每个输入行恰好生成一条输出记录。
+
+**示例说明**
* 查询 sql
@@ -336,6 +263,10 @@ MATCH_RECOGNIZE (
* 查询结果
* 当 AFTER MATCH SKIP PAST LAST ROW 时
+
+ 
+
+ *
* 第一次匹配:第 1、2、3、4 行
* 第二次匹配:根据 `AFTER MATCH SKIP PAST LAST ROW` 语义,从第 5 行开始,无法再找寻到一个合法匹配
* 此模式一定不会出现重叠匹配
@@ -353,6 +284,10 @@ MATCH_RECOGNIZE (
```
* 当 AFTER MATCH SKIP TO NEXT ROW 时
+
+ 
+
+ *
* 第一次匹配:第 1、2、3、4 行
* 第二次匹配:根据 `AFTER MATCH SKIP TO NEXT ROW` 语义,从第 2 行开始,匹配:第 2、3、4 行
* 第三次匹配:尝试从第 3 行开始,失败
@@ -378,6 +313,10 @@ MATCH_RECOGNIZE (
```
* 当 AFTER MATCH SKIP TO FIRST C 时
+
+ 
+
+ *
* 第一次匹配:第 1、2、3、4 行
* 第二次匹配:从第一个 C (也就是第 4 行)处开始,匹配第4、5、6行
* 此模式允许出现重叠匹配
@@ -398,6 +337,10 @@ MATCH_RECOGNIZE (
```
* 当 AFTER MATCH SKIP TO LAST B 或 AFTER MATCH SKIP TO B 时
+
+ 
+
+ *
* 第一次匹配:第 1、2、3、4 行
* 第二次匹配:尝试从最后一个 B (也就是第 3 行)处开始,失败
* 第二次匹配:尝试从第 4 行开始,成功匹配第4、5、6行
@@ -419,6 +362,10 @@ MATCH_RECOGNIZE (
```
* 当 AFTER MATCH SKIP TO U 时
+
+ 
+
+ *
* 第一次匹配:第 1、2、3、4 行
* 第二次匹配:`SKIP TO U` 表示跳转到最后一个 C 或 D,D 永远不可能匹配成功,所以就是跳转到最后一个 C(也就是第 4 行),成功匹配第4、5、6行
* 此模式允许出现重叠匹配
@@ -438,20 +385,80 @@ MATCH_RECOGNIZE (
Total line number = 7
```
- * 当 AFTER MATCH SKIP TO A 时,不能跳转到匹配的第一行, 否则会造成死循环
+ * 当 AFTER MATCH SKIP TO A 时,报错。因为不能跳转到匹配的第一行, 否则会造成死循环。
```SQL
Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: cannot skip to first row of match
```
- * 当 AFTER MATCH SKIP TO B 时,不能跳转到匹配分组中不存在的模式变量
+ * 当 AFTER MATCH SKIP TO B 时,报错。因为不能跳转到匹配分组中不存在的模式变量。
```SQL
Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: pattern variable is not present in match
```
-### 3.3 行模式表达式-扩展函数
-#### 3.3.1 CLASSIFIER()函数
+
+#### 1.2.6 ROWS PER MATCH 子句
+
+用于指定模式匹配成功后结果集的输出方式,主要包括以下两种选项:
+
+| 输出方式 | 规则描述 | 输出结果 | **空匹配/未匹配行**处理逻辑 |
+| -------------------- | ----------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| ONE ROW PER MATCH | 每一次成功匹配,产生一行输出结果。 | * PARTITION BY 子句中的列* MEASURES 子句中定义的表达式。 | 输出空匹配;跳过未匹配行。 |
+| ALL ROWS PER MATCH | 每一次匹配中的每一行都将产生一条输出记录,除非该行通过 exclusion 语法排除。 | * PARTITION BY 子句中的列* ORDER BY 子句中的列* MEASURES 子句中定义的表达式* 输入表中的其余列 | * 默认:输出空匹配;跳过未匹配行。* ALL ROWS PER MATCH**SHOW EMPTY MATCHES**:默认输出空匹配,跳过未匹配行* ALL ROWS PER MATCH**OMIT EMPTY MATCHES**:不输出空匹配,跳过未匹配行* ALL ROWS PER MATCH**WITH UNMATCHED ROWS**:输出空匹配,并为每一条未匹配行额外生成一条输出记录|
+
+#### 1.2.7 MEASURES 子句
+
+用于指定从匹配到的一段数据中提取哪些信息。该子句为可选项,如果未显式指定,则根据 ROWS PER MATCH 子句的设置,部分输入列会成为模式识别的输出结果。
+
+```SQL
+MEASURES measure_expression AS measure_name [, ...]
+```
+
+* `measure_expression` 是根据匹配的一段数据计算出的标量值。
+
+| 用法示例 | 说明 |
+| ---------------------------------------------- | -------------------------------------------------------------------------------------------------------------- |
+| `A.totalprice AS starting_price` | 返回匹配分组中第一行(即与变量 A 关联的唯一一行)中的价格,作为起始价格。 |
+| `RPR_LAST(B.totalprice) AS bottom_price` | 返回与变量 B 关联的最后一行中的价格,代表“V”形模式中最低点的价格,对应下降区段的末尾。 |
+| `RPR_LAST(U.totalprice) AS top_price` | 返回匹配分组中的最高价格,对应变量 C 或 D 所关联的最后一行,即整个匹配分组的末尾。【假设 SUBSET U = (C, D)】 |
+
+* 每个 `measure_expression `都会定义一个输出列,该列可通过其指定的 `measure_name `进行引用。
+
+#### 1.2.8 模式查询表达式
+
+在 MEASURES 与 DEFINE 子句中使用的表达式为**标量表达式**,用于在输入表的行级上下文中求值。**标量表达式**除了支持标准 SQL 语法外,还支持针对模式查询的特殊扩展函数。
+
+##### 1.2.8.1 模式变量引用
+
+```SQL
+A.totalprice
+U.orderdate
+orderstatus
+```
+
+* 当列名前缀为某**基本模式变量**或**联合模式变量**时,表示引用该变量所匹配的所有行的对应列值。
+* 若列名不带前缀,则等同于使用“**全局联合模式变量**”(即所有基本模式变量的并集)作前缀,表示引用当前匹配中所有行的该列值。
+
+> 不允许在模式识别表达式中使用表名作列名前缀。
+
+##### 1.2.8.2 扩展函数
+
+| 函数名 | 函数式 | 描述 |
+|------------------| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `MATCH_NUMBER`函数 | `MATCH_NUMBER()` | 返回当前匹配在分区内的序号,从 1 开始计数。空匹配与非空匹配一致,也占用匹配序号。 |
+| `CLASSIFIER `函数 | `CLASSIFIER(option)`| 1. 返回当前行所映射的基本模式变量名称。1. `option`是一个可选参数:可以传入基本模式变量`CLASSIFIER(A)`或联合模式变量`CLASSIFIER(U)`,用于限定函数作用范围,对于不在范围内的行,直接返回 NULL。在对联合模式变量使用时,可用于辨别该行究竟映射至并集中哪一个基本模式变量。 |
+| 逻辑导航函数 | `RPR_FIRST(expr, k)` | 1. 表示从**当前匹配分组**中,定位至第一个满足 expr 的行,在此基础上再向分组尾部方向搜索到第 k 次出现的同一模式变量对应行,返回该行的指定列值。如果在指定方向上未能找到第 k 次匹配行,则函数返回 NULL。1. 其中 k 是可选参数,默认为 0,表示仅定位至首个满足条件的行;若显式指定,必须为非负整数。 |
+| 逻辑导航函数 | `RPR_LAST(expr, k)`| 1. 表示从**当前匹配分组**中,定位至最后一个满足 expr 的行,在此基础上再向分组开头方向搜索到第 k 次出现的同一模式变量对应行,返回该行的指定列值。如果在指定方向上未能找到第 k 次匹配行,则函数返回 NULL。1. 其中 k 是可选参数,默认为 0,表示仅定位至末个满足条件的行;若显式指定,必须为非负整数。 |
+| 物理导航函数 | `PREV(expr, k)` | 1. 表示从最后一次匹配至给定模式变量的行开始,向开头方向偏移 k 行,返回对应列值。若导航超出**分区边界**,则函数返回 NULL。1. 其中 k 是可选参数,默认为 1;若显式指定,必须为非负整数。 |
+| 物理导航函数 |`NEXT(expr, k)` | 1. 表示从最后一次匹配至给定模式变量的行开始,向尾部方向偏移 k 行,返回对应列值。若导航超出**分区边界**,则函数返回 NULL。1. 其中 k 是可选参数,默认为 1;若显式指定,必须为非负整数。 |
+| 聚合函数 | COUNT、SUM、AVG、MAX、MIN 函数 | 可用于对当前匹配中的数据进行计算。聚合函数与导航函数不允许互相嵌套。(V 2.0.6 版本起支持) |
+| 嵌套函数 | `PREV/NEXT(CLASSIFIER())` | 物理导航函数与 CLASSIFIER 函数嵌套。用于获取当前行的前一个和后一个匹配行所对应的模式变量 |
+| 嵌套函数 |`PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | 物理函数内部**允许嵌套**逻辑函数,逻辑函数内部**不允许嵌套**物理函数。用于先进行逻辑偏移,再进行物理偏移。 |
+
+**示例说明**
+
+1. CLASSIFIER 函数
* 查询 sql
@@ -476,6 +483,9 @@ MATCH_RECOGNIZE (
H AS H.totalprice > 80
) AS m;
```
+* 分析过程
+
+ 
* 查询结果
@@ -493,7 +503,7 @@ MATCH_RECOGNIZE (
Total line number = 6
```
-#### 3.3.2 逻辑导航函数
+2. 逻辑导航函数
* 查询 sql
@@ -512,6 +522,10 @@ MATCH_RECOGNIZE (
* 查询结果
* 当取值为 totalprice、RPR\_LAST(totalprice)、RUNNING RPR\_LAST(totalprice) 时
+
+ 
+
+ 实际返回
```SQL
+-----------------------------+-------+
@@ -529,6 +543,10 @@ MATCH_RECOGNIZE (
* 当取值为 FINAL RPR\_LAST(totalprice) 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -545,6 +563,10 @@ MATCH_RECOGNIZE (
* 当取值为 RPR\_FIRST(totalprice)、 RUNNING RPR\_FIRST(totalprice)、FINAL RPR\_FIRST(totalprice)时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -561,6 +583,10 @@ MATCH_RECOGNIZE (
* 当取值为 RPR\_LAST(totalprice, 2) 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -577,6 +603,10 @@ MATCH_RECOGNIZE (
* 当取值为 FINAL RPP\_LAST(totalprice, 2) 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -593,6 +623,10 @@ MATCH_RECOGNIZE (
* 当取值为 RPR\_FIRST(totalprice, 2) 和 FINAL RPR\_FIRST(totalprice, 2) 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -607,7 +641,7 @@ MATCH_RECOGNIZE (
Total line number = 6
```
-#### 3.3.3 物理导航函数
+3. 物理导航函数
* 查询 sql
@@ -626,6 +660,10 @@ MATCH_RECOGNIZE (
* 查询结果
* 当取值为 `PREV(totalprice)` 时
+
+ 
+
+ 实际返回
```SQL
+-----------------------------+-------+
@@ -639,6 +677,10 @@ MATCH_RECOGNIZE (
* 当取值为 `PREV(B.totalprice, 2)` 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -651,6 +693,10 @@ MATCH_RECOGNIZE (
* 当取值为 `PREV(B.totalprice, 4)` 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -663,6 +709,10 @@ MATCH_RECOGNIZE (
* 当取值为 `NEXT(totalprice)` 或 `NEXT(B.totalprice, 1)` 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -675,6 +725,10 @@ MATCH_RECOGNIZE (
* `当取值为 NEXT(B.totalprice, 2)` 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -685,7 +739,7 @@ MATCH_RECOGNIZE (
Total line number = 2
```
-#### 3.3.4 聚合函数
+4. 聚合函数
* 查询 sql
@@ -705,6 +759,9 @@ MATCH_RECOGNIZE (
DEFINE A AS true
) AS m;
```
+* 分析过程(以 MIN(totalprice)为例)
+
+
* 查询结果
@@ -722,8 +779,9 @@ MATCH_RECOGNIZE (
Total line number = 6
```
-#### 3.3.5 嵌套函数
-1. 示例一
+5. 嵌套函数
+
+示例一
* 查询 sql
@@ -750,6 +808,9 @@ MATCH_RECOGNIZE (
H AS H.totalprice > 80
) AS m;
```
+* 分析过程
+
+
* 查询结果
@@ -767,7 +828,7 @@ MATCH_RECOGNIZE (
Total line number = 6
```
-2. 示例二
+示例二
* 查询 sql
@@ -784,6 +845,9 @@ MATCH_RECOGNIZE (
DEFINE A AS true
) AS m;
```
+* 分析过程
+
+
* 查询结果
@@ -801,11 +865,28 @@ MATCH_RECOGNIZE (
Total line number = 6
```
-## 4. 场景示例
+##### 1.2.8.3 RUNNING 和 FINAL 语义
+1. 定义
+
+* `RUNNING`: 表示计算范围为当前匹配分组内,从分组的起始行到当前正在处理的行(即到当前行为止)。
+* `FINAL`: 表示计算范围为当前匹配分组内,从分组的起始行到分组的最终行(即整个匹配分组)。
+
+2. 作用范围
+
+* DEFINE 子句默认采用 RUNNING 语义。
+* MEASURES 子句默认采用 RUNNING 语义,支持指定 FINAL 语义。当采用 ONE ROW PER MATCH 输出模式时,所有表达式都从匹配分组的末行位置进行计算,此时 RUNNING 语义与 FINAL 语义等价。
+
+3. 语法约束
+
+* RUNNING 和 FINAL 需要写在**逻辑导航函数**或聚合函数之前,不能直接作用于**列引用。**
+ * 合法:`RUNNING RPP_LAST(A.totalprice)`、`FINAL RPP_LAST(A.totalprice)`
+ * 非法:`RUNNING A.totalprice`、`FINAL A.totalprice`、 `RUNNING PREV(A.totalprice)`
+
+### 1.3 场景示例
以[示例数据](../Reference/Sample-Data.md)为源数据
-### 4.1 时间分段查询
+#### 1.3.1 时间分段查询
将 table1 中的数据按照时间间隔小于等于 24 小时分段,查询每段中的数据总条数,以及开始、结束时间。
@@ -837,7 +918,7 @@ MATCH_RECOGNIZE (
Total line number = 2
```
-### 4.2 差值分段查询
+#### 1.3.2 差值分段查询
将 table2 中的数据按照 humidity 湿度值差值小于 0.1 分段,查询每段中的数据总条数,以及开始、结束时间。
@@ -870,7 +951,7 @@ MATCH_RECOGNIZE (
Total line number = 3
```
-### 4.3 事件统计查询
+#### 1.3.3 事件统计查询
将 table1 中数据按照设备号分组,统计上海地区湿度大于 35 的开始、结束时间及最大湿度值。
@@ -903,5 +984,738 @@ MATCH_RECOGNIZE (
| 100| 1|2024-11-28T09:00:00.000+08:00|2024-11-29T18:30:00.000+08:00| 45.1|
| 101| 1|2024-11-30T09:30:00.000+08:00|2024-11-30T09:30:00.000+08:00| 35.2|
+---------+-----+-----------------------------+-----------------------------+------------+
-Total line number = 2****
+Total line number = 2
+```
+
+
+## 2. 窗口函数
+
+### 2.1 功能介绍
+
+窗口函数(Window Function) 是一种基于与当前行相关的特定行集合(称为“窗口”) 对每一行进行计算的特殊函数。它将分组操作(`PARTITION BY`)、排序(`ORDER BY`)与可定义的计算范围(窗口框架 `FRAME`)结合,在不折叠原始数据行的前提下实现复杂的跨行计算。常用于数据分析场景,比如排名、累计和、移动平均等操作。
+
+> 注意:该功能从 V 2.0.5 版本开始提供。
+
+例如,某场景下需要查询不同设备的功耗累加值,即可通过窗口函数来实现。
+
+```SQL
+-- 原始数据
++-----------------------------+------+-----+
+| time|device| flow|
++-----------------------------+------+-----+
+|1970-01-01T08:00:00.000+08:00| d0| 3|
+|1970-01-01T08:00:00.001+08:00| d0| 5|
+|1970-01-01T08:00:00.002+08:00| d0| 3|
+|1970-01-01T08:00:00.003+08:00| d0| 1|
+|1970-01-01T08:00:00.004+08:00| d1| 2|
+|1970-01-01T08:00:00.005+08:00| d1| 4|
++-----------------------------+------+-----+
+
+-- 创建表并插入数据
+CREATE TABLE device_flow(device String tag, flow INT32 FIELD);
+insert into device_flow(time, device ,flow ) values ('1970-01-01T08:00:00.000+08:00','d0',3),('1970-01-01T08:00:01.000+08:00','d0',5),('1970-01-01T08:00:02.000+08:00','d0',3),('1970-01-01T08:00:03.000+08:00','d0',1),('1970-01-01T08:00:04.000+08:00','d1',2),('1970-01-01T08:00:05.000+08:00','d1',4);
+
+
+--执行窗口函数查询
+SELECT *, sum(flow) OVER(PARTITION BY device ORDER BY flow) as sum FROM device_flow;
+```
+
+经过分组、排序、计算(步骤拆解如下图所示),
+
+
+
+即可得到期望结果:
+
+```SQL
++-----------------------------+------+----+----+
+| time|device|flow| sum|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
++-----------------------------+------+----+----+
+```
+
+### 2.2 功能定义
+#### 2.2.1 SQL 定义
+
+```SQL
+windowDefinition
+ : name=identifier AS '(' windowSpecification ')'
+ ;
+
+windowSpecification
+ : (existingWindowName=identifier)?
+ (PARTITION BY partition+=expression (',' partition+=expression)*)?
+ (ORDER BY sortItem (',' sortItem)*)?
+ windowFrame?
+ ;
+
+windowFrame
+ : frameExtent
+ ;
+
+frameExtent
+ : frameType=RANGE start=frameBound
+ | frameType=ROWS start=frameBound
+ | frameType=GROUPS start=frameBound
+ | frameType=RANGE BETWEEN start=frameBound AND end=frameBound
+ | frameType=ROWS BETWEEN start=frameBound AND end=frameBound
+ | frameType=GROUPS BETWEEN start=frameBound AND end=frameBound
+ ;
+
+frameBound
+ : UNBOUNDED boundType=PRECEDING #unboundedFrame
+ | UNBOUNDED boundType=FOLLOWING #unboundedFrame
+ | CURRENT ROW #currentRowBound
+ | expression boundType=(PRECEDING | FOLLOWING) #boundedFrame
+ ;
+```
+
+#### 2.2.2 窗口定义
+##### 2.2.2.1 Partition
+
+`PARTITION BY` 用于将数据分为多个独立、不相关的「组」,窗口函数只能访问并操作其所属分组内的数据,无法访问其它分组。该子句是可选的;如果未显式指定,则默认将所有数据分到同一组。值得注意的是,与 `GROUP BY` 通过聚合函数将一组数据规约成一行不同,`PARTITION BY` 的窗口函数**并不会影响组内的行数。**
+
+* 示例
+
+查询语句:
+
+```SQL
+IoTDB> SELECT *, count(flow) OVER (PARTITION BY device) as count FROM device_flow;
+```
+
+拆解步骤:
+
+
+
+查询结果:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 4|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 4|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 4|
++-----------------------------+------+----+-----+
+```
+
+##### 2.2.2.2 Ordering
+
+`ORDER BY` 用于对 partition 内的数据进行排序。排序后,相等的行被称为 peers。peers 会影响窗口函数的行为,例如不同 rank function 对 peers 的处理不同;不同 frame 的划分方式对于 peers 的处理也不同。该子句是可选的。
+
+* 示例
+
+查询语句:
+
+```SQL
+IoTDB> SELECT *, rank() OVER (PARTITION BY device ORDER BY flow) as rank FROM device_flow;
+```
+
+拆解步骤:
+
+
+
+查询结果:
+
+```SQL
++-----------------------------+------+----+----+
+| time|device|flow|rank|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
++-----------------------------+------+----+----+
+```
+
+##### 2.2.2.3 Framing
+
+对于 partition 中的每一行,窗口函数都会在相应的一组行上求值,这些行称为 Frame(即 Window Function 在每一行上的输入域)。Frame 可以手动指定,指定时涉及两个属性,具体说明如下。
+
+
+
+
+ | Frame 属性 |
+ 属性值 |
+ 值描述 |
+
+
+ | 类型 |
+ ROWS |
+ 通过行号来划分 frame |
+
+
+ | GROUPS |
+ 通过 peers 来划分 frame,即值相同的行视为同等的存在。peers 中所有的行分为一个组,叫做 peer group |
+
+
+ | RANGE |
+ 通过值来划分 frame |
+
+
+ | 起始和终止位置 |
+ UNBOUNDED PRECEDING |
+ 整个 partition 的第一行 |
+
+
+ | offset PRECEDING |
+ 代表前面和当前行「距离」为 offset 的行 |
+
+
+ | CURRENT ROW |
+ 当前行 |
+
+
+ | offset FOLLOWING |
+ 代表后面和当前行「距离」为 offset 的行 |
+
+
+ | UNBOUNDED FOLLOWING |
+ 整个 partition 的最后一行 |
+
+
+
+
+其中,`CURRENT ROW`、`PRECEDING N` 和 `FOLLOWING N` 的含义随着 frame 种类的不同而不同,如下表所示:
+
+| | `ROWS` | `GROUPS` | `RANGE` |
+|--------------------|------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|
+| `CURRENT ROW` | 当前行 | 由于 peer group 包含多行,因此这个选项根据作用于 frame\_start 和 frame\_end 而不同:* frame\_start:peer group 的第一行;* frame\_end:peer group 的最后一行。 | 和 GROUPS 相同,根据作用于 frame\_start 和 frame\_end 而不同:* frame\_start:peer group 的第一行;* frame\_end:peer group 的最后一行。 |
+| `offset PRECEDING` | 前 offset 行 | 前 offset 个 peer group; | 前面与当前行的值之差小于等于 offset 就分为一个 frame |
+| `offset FOLLOWING` | 后 offset 行 | 后 offset 个 peer group。 | 后面与当前行的值之差小于等于 offset 就分为一个 frame |
+
+语法格式如下:
+
+```SQL
+-- 同时指定 frame_start 和 frame_end
+{ RANGE | ROWS | GROUPS } BETWEEN frame_start AND frame_end
+-- 仅指定 frame_start,frame_end 为 CURRENT ROW
+{ RANGE | ROWS | GROUPS } frame_start
+```
+
+若未手动指定 Frame,Frame 的默认划分规则如下:
+
+* 当窗口函数使用 ORDER BY 时:默认 Frame 为 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW (即从窗口的第一行到当前行)。例如:RANK() OVER(PARTITION BY COL1 0RDER BY COL2) 中,Frame 默认包含分区内当前行及之前的所有行。
+* 当窗口函数不使用 ORDER BY 时:默认 Frame 为 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING (即整个窗口的所有行)。例如:AVG(COL2) OVER(PARTITION BY col1) 中,Frame 默认包含分区内的所有行,计算整个分区的平均值。
+
+需要注意的是,当 Frame 类型为 GROUPS 或 RANGE 时,需要指定 `ORDER BY`,区别在于 GROUPS 中的 ORDER BY 可以涉及多个字段,而 RANGE 需要计算,所以只能指定一个字段。
+
+* 示例
+
+1. Frame 类型为 ROWS
+
+查询语句:
+
+```SQL
+IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ROWS 1 PRECEDING) as count FROM device_flow;
+```
+
+拆解步骤:
+
+* 取前一行和当前行作为 Frame
+ * 对于 partition 的第一行,由于没有前一行,所以整个 Frame 只有它一行,返回 1;
+ * 对于 partition 的其他行,整个 Frame 包含当前行和它的前一行,返回 2:
+
+
+
+查询结果:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 2|
++-----------------------------+------+----+-----+
+```
+
+2. Frame 类型为 GROUPS
+
+查询语句:
+
+```SQL
+IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ORDER BY flow GROUPS BETWEEN 1 PRECEDING AND CURRENT ROW) as count FROM device_flow;
+```
+
+拆解步骤:
+
+* 取前一个 peer group 和当前 peer group 作为 Frame,那么以 device 为 d0 的 partition 为例(d1同理),对于 count 行数:
+ * 对于 flow 为 1 的 peer group,由于它也没比它小的 peer group 了,所以整个 Frame 就它一行,返回 1;
+ * 对于 flow 为 3 的 peer group,它本身包含 2 行,前一个 peer group 就是 flow 为 1 的,就一行,因此整个 Frame 三行,返回 3;
+ * 对于 flow 为 5 的 peer group,它本身包含 1 行,前一个 peer group 就是 flow 为 3 的,共两行,因此整个 Frame 三行,返回 3。
+
+
+
+查询结果:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+-----+
+```
+
+3. Frame 类型为 RANGE
+
+查询语句:
+
+```SQL
+IoTDB> SELECT *,count(flow) OVER(PARTITION BY device ORDER BY flow RANGE BETWEEN 2 PRECEDING AND CURRENT ROW) as count FROM device_flow;
+```
+
+拆解步骤:
+
+* 把比当前行数据**小于等于 2 **的分为同一个 Frame,那么以 device 为 d0 的 partition 为例(d1 同理),对于 count 行数:
+ * 对于 flow 为 1 的行,由于它是最小的行了,所以整个 Frame 就它一行,返回 1;
+ * 对于 flow 为 3 的行,注意 CURRENT ROW 是作为 frame\_end 存在,因此是整个 peer group 的最后一行,符合要求比它小的共 1 行,然后 peer group 有 2 行,所以整个 Frame 共 3 行,返回 3;
+ * 对于 flow 为 5 的行,它本身包含 1 行,符合要求的比它小的共 2 行,所以整个 Frame 共 3 行,返回 3。
+
+
+
+查询结果:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+-----+
+```
+
+### 2.3 内置的窗口函数
+
+
+
+
+ | 窗口函数分类 |
+ 窗口函数名 |
+ 函数定义 |
+ 是否支持 FRAME 子句 |
+
+
+ | Aggregate Function |
+ 所有内置聚合函数 |
+ 对一组值进行聚合计算,得到单个聚合结果。 |
+ 是 |
+
+
+ | Value Function |
+ first_value |
+ 返回 frame 的第一个值,如果指定了 IGNORE NULLS 需要跳过前缀的 NULL |
+ 是 |
+
+
+ | last_value |
+ 返回 frame 的最后一个值,如果指定了 IGNORE NULLS 需要跳过后缀的 NULL |
+ 是 |
+
+
+ | nth_value |
+ 返回 frame 的第 n 个元素(注意 n 是从 1 开始),如果有 IGNORE NULLS 需要跳过 NULL |
+ 是 |
+
+
+ | lead |
+ 返回当前行的后 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default |
+ 否 |
+
+
+ | lag |
+ 返回当前行的前 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default |
+ 否 |
+
+
+ | Rank Function |
+ rank |
+ 返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间可能有 gap |
+ 否 |
+
+
+ | dense_rank |
+ 返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间没有 gap |
+ 否 |
+
+
+ | row_number |
+ 返回当前行在整个 partition 中的行号,注意行号从 1 开始 |
+ 否 |
+
+
+ | percent_rank |
+ 以百分比的形式,返回当前行的值在整个 partition 中的序号;即 (rank() - 1) / (n - 1),其中 n 是整个 partition 的行数 |
+ 否 |
+
+
+ | cume_dist |
+ 以百分比的形式,返回当前行的值在整个 partition 中的序号;即 (小于等于它的行数) / n |
+ 否 |
+
+
+ | ntile |
+ 指定 n,给每一行进行 1~n 的编号。 |
+ 否 |
+
+
+
+
+#### 2.3.1 Aggregate Function
+
+所有内置聚合函数,如 `sum()`、`avg()`、`min()`、`max()` 都能当作 Window Function 使用。
+
+> 注意:与 GROUP BY 不同,Window Function 中每一行都有相应的输出
+
+示例:
+
+```SQL
+IoTDB> SELECT *, sum(flow) OVER (PARTITION BY device ORDER BY flow) as sum FROM device_flow;
++-----------------------------+------+----+----+
+| time|device|flow| sum|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
++-----------------------------+------+----+----+
+```
+
+#### 2.3.2 Value Function
+1. `first_value`
+
+* 函数名:`first_value(value) [IGNORE NULLS]`
+* 定义:返回 frame 的第一个值,如果指定了 IGNORE NULLS 需要跳过前缀的 NULL;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, first_value(flow) OVER w as first_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
++-----------------------------+------+----+-----------+
+| time|device|flow|first_value|
++-----------------------------+------+----+-----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+-----------+
+```
+
+2. `last_value`
+
+* 函数名:`last_value(value) [IGNORE NULLS]`
+* 定义:返回 frame 的最后一个值,如果指定了 IGNORE NULLS 需要跳过后缀的 NULL;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, last_value(flow) OVER w as last_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
++-----------------------------+------+----+----------+
+| time|device|flow|last_value|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
++-----------------------------+------+----+----------+
+```
+
+3. `nth_value`
+
+* 函数名:`nth_value(value, n) [IGNORE NULLS]`
+* 定义:返回 frame 的第 n 个元素(注意 n 是从 1 开始),如果有 IGNORE NULLS 需要跳过 NULL;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, nth_value(flow, 2) OVER w as nth_values FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
++-----------------------------+------+----+----------+
+| time|device|flow|nth_values|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
++-----------------------------+------+----+----------+
+```
+
+4. lead
+
+* 函数名:`lead(value[, offset[, default]]) [IGNORE NULLS]`
+* 定义:返回当前行的后 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default;offset 的默认值为 1,default 的默认值为 NULL。
+* lead 函数需要需要一个 ORDER BY 窗口子句
+* 示例:
+
+```SQL
+IoTDB> SELECT *, lead(flow) OVER w as lead FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY time);
++-----------------------------+------+----+----+
+| time|device|flow|lead|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
+|1970-01-01T08:00:05.000+08:00| d1| 4|null|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 5|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:03.000+08:00| d0| 1|null|
++-----------------------------+------+----+----+
+```
+
+5. lag
+
+* 函数名:`lag(value[, offset[, default]]) [IGNORE NULLS]`
+* 定义:返回当前行的前 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default;offset 的默认值为 1,default 的默认值为 NULL。
+* lag 函数需要需要一个 ORDER BY 窗口子句
+* 示例:
+
+```SQL
+IoTDB> SELECT *, lag(flow) OVER w as lag FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY device);
++-----------------------------+------+----+----+
+| time|device|flow| lag|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2|null|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:00.000+08:00| d0| 3|null|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
++-----------------------------+------+----+----+
+```
+
+#### 2.3.3 Rank Function
+1. rank
+
+* 函数名:`rank()`
+* 定义:返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间可能有 gap;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, rank() OVER w as rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+----+
+| time|device|flow|rank|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
++-----------------------------+------+----+----+
+```
+
+2. dense\_rank
+
+* 函数名:`dense_rank()`
+* 定义:返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间没有 gap。
+* 示例:
+
+```SQL
+IoTDB> SELECT *, dense_rank() OVER w as dense_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+----------+
+| time|device|flow|dense_rank|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+----------+
+```
+
+3. row\_number
+
+* 函数名:`row_number()`
+* 定义:返回当前行在整个 partition 中的行号,注意行号从 1 开始;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, row_number() OVER w as row_number FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+----------+
+| time|device|flow|row_number|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
++-----------------------------+------+----+----------+
+```
+
+4. percent\_rank
+
+* 函数名:`percent_rank()`
+* 定义:以百分比的形式,返回当前行的值在整个 partition 中的序号;即 **(rank() - 1) / (n - 1)**,其中 n 是整个 partition 的行数;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, percent_rank() OVER w as percent_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+------------------+
+| time|device|flow| percent_rank|
++-----------------------------+------+----+------------------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 0.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 0.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3|0.3333333333333333|
+|1970-01-01T08:00:02.000+08:00| d0| 3|0.3333333333333333|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
++-----------------------------+------+----+------------------+
+```
+
+5. cume\_dist
+
+* 函数名:cume\_dist
+* 定义:以百分比的形式,返回当前行的值在整个 partition 中的序号;即 **(小于等于它的行数) / n**。
+* 示例:
+
+```SQL
+IoTDB> SELECT *, cume_dist() OVER w as cume_dist FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+---------+
+| time|device|flow|cume_dist|
++-----------------------------+------+----+---------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 0.5|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 0.25|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 0.75|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 0.75|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
++-----------------------------+------+----+---------+
+```
+
+6. ntile
+
+* 函数名:ntile
+* 定义:指定 n,给每一行进行 1~n 的编号。
+ * 整个 partition 行数比 n 小,那么编号就是行号 index;
+ * 整个 partition 行数比 n 大:
+ * 如果行数能除尽 n,那么比较完美,比如行数为 4,n 为 2,那么编号为 1、1、2、2、;
+ * 如果行数不能除尽 n,那么就分给开头几组,比如行数为 5,n 为 3,那么编号为 1、1、2、2、3;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, ntile(2) OVER w as ntile FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+-----+
+| time|device|flow|ntile|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
++-----------------------------+------+----+-----+
+```
+
+### 2.4 场景示例
+1. 多设备 diff 函数
+
+对于每个设备的每一行,与前一行求差值:
+
+```SQL
+SELECT
+ *,
+ measurement - lag(measurement) OVER (PARTITION BY device ORDER BY time)
+FROM data
+WHERE timeCondition;
+```
+
+对于每个设备的每一行,与后一行求差值:
+
+```SQL
+SELECT
+ *,
+ measurement - lead(measurement) OVER (PARTITION BY device ORDER BY time)
+FROM data
+WHERE timeCondition;
+```
+
+对于单个设备的每一行,与前一行求差值(后一行同理):
+
+```SQL
+SELECT
+ *,
+ measurement - lag(measurement) OVER (ORDER BY time)
+FROM data
+where device='d1'
+WHERE timeCondition;
+```
+
+2. 多设备 TOP\_K/BOTTOM\_K
+
+利用 rank 获取序号,然后在外部的查询中保留想要的顺序。
+
+(注意, window function 的执行顺序在 HAVING 子句之后,所以这里需要子查询)
+
+```SQL
+SELECT *
+FROM(
+ SELECT
+ *,
+ rank() OVER (PARTITION BY device ORDER BY time DESC)
+ FROM data
+ WHERE timeCondition
+)
+WHERE rank <= 3;
+```
+
+除了按照时间排序之外,还可以按照测点的值进行排序:
+
+```SQL
+SELECT *
+FROM(
+ SELECT
+ *,
+ rank() OVER (PARTITION BY device ORDER BY measurement DESC)
+ FROM data
+ WHERE timeCondition
+)
+WHERE rank <= 3;
+```
+
+3. 多设备 CHANGE\_POINTS
+
+这个 sql 用来去除输入序列中连续相同值,可以用 lead + 子查询实现:
+
+```SQL
+SELECT
+ time,
+ device,
+ measurement
+FROM(
+ SELECT
+ time,
+ device,
+ measurement,
+ LEAD(measurement) OVER (PARTITION BY device ORDER BY time) AS next
+ FROM data
+ WHERE timeCondition
+)
+WHERE measurement != next OR next IS NULL;
```
diff --git a/src/zh/UserGuide/latest-Table/Basic-Concept/Query-Data_timecho.md b/src/zh/UserGuide/latest-Table/Basic-Concept/Query-Data_timecho.md
index 0392c6119..53309d6aa 100644
--- a/src/zh/UserGuide/latest-Table/Basic-Concept/Query-Data_timecho.md
+++ b/src/zh/UserGuide/latest-Table/Basic-Concept/Query-Data_timecho.md
@@ -40,11 +40,9 @@ IoTDB 查询语法提供以下子句:
- SELECT 子句:查询结果应包含的列。详细语法见:[SELECT子句](../SQL-Manual/Select-Clause.md)
- FROM 子句:指出查询的数据源,可以是单个表、多个通过 `JOIN` 子句连接的表,或者是一个子查询。详细语法见:[FROM & JOIN 子句](../SQL-Manual/From-Join-Clause.md)
-- patternRecognition:行模式识别,支持通过定义模式变量的识别逻辑以及正则表达式来捕获一段连续的数据,并对每一段捕获的数据进行分析计算。详细语法见:[行模式识别](../SQL-Manual/Row-Pattern-Recognition.md)
- WHERE 子句:用于过滤数据,只选择满足特定条件的数据行。这个子句在逻辑上紧跟在 FROM 子句之后执行。详细语法见:[WHERE 子句](../SQL-Manual/Where-Clause.md)
- GROUP BY 子句:当需要对数据进行聚合时使用,指定了用于分组的列。详细语法见:[GROUP BY 子句](../SQL-Manual/GroupBy-Clause.md)
- HAVING 子句:在 GROUP BY 子句之后使用,用于对已经分组的数据进行过滤。与 WHERE 子句类似,但 HAVING 子句在分组后执行。详细语法见:[HAVING 子句](../SQL-Manual/Having-Clause.md)
-- WINDOW FUNCTION:窗口函数,是一种基于与当前行相关的特定行集合(称为“窗口”) 对每一行进行计算的特殊函数。它将分组操作、排序与可定义的计算范围结合,在不折叠原始数据行的前提下实现复杂的跨行计算。详细语法见:[窗口函数](../SQL-Manual/Featured-Functions_timecho.md#_4-窗口函数)
- FILL 子句:用于处理查询结果中的空值,用户可以使用 FILL 子句来指定数据缺失时的填充模式(如前一个非空值或线性插值)来填充 null 值,以便于数据可视化和分析。 详细语法见:[FILL 子句](../SQL-Manual/Fill-Clause.md)
- ORDER BY 子句:对查询结果进行排序,可以指定升序(ASC)或降序(DESC),以及 NULL 值的处理方式(NULLS FIRST 或 NULLS LAST)。详细语法见:[ORDER BY 子句](../SQL-Manual/OrderBy-Clause.md)
- OFFSET 子句:用于指定查询结果的起始位置,即跳过前 OFFSET 行。与 LIMIT 子句配合使用。详细语法见:[LIMIT 和 OFFSET 子句](../SQL-Manual/Limit-Offset-Clause.md)
@@ -592,77 +590,3 @@ IoTDB> SELECT time, temperature, humidity
Total line number = 10
It costs 0.093s
```
-
-### 3.9 行模式识别
-
-**示例:将 table1 中的数据按照时间间隔小于等于 24 小时分段,查询每段中的数据总条数,以及开始、结束时间。**
-
-```SQL
-SELECT start_time, end_time, cnt
-FROM table1
-MATCH_RECOGNIZE (
- ORDER BY time
- MEASURES
- RPR_FIRST(A.time) AS start_time,
- RPR_LAST(time) AS end_time,
- COUNT() AS cnt
- PATTERN (A B*)
- DEFINE B AS (cast(B.time as INT64) - cast(PREV(B.time) as INT64)) <= 86400000
-) AS m
-```
-
-执行结果如下:
-
-```SQL
-+-----------------------------+-----------------------------+---+
-| start_time| end_time|cnt|
-+-----------------------------+-----------------------------+---+
-|2024-11-26T13:37:00.000+08:00|2024-11-26T13:38:00.000+08:00| 2|
-|2024-11-27T16:38:00.000+08:00|2024-11-30T14:30:00.000+08:00| 16|
-+-----------------------------+-----------------------------+---+
-Total line number = 2
-```
-
-### 3.10 窗口函数
-
-**示例:查询不同设备的功耗累加值**
-
-原始数据如下:
-
-```SQL
-+-----------------------------+------+-----+
-| time|device| flow|
-+-----------------------------+------+-----+
-|1970-01-01T08:00:00.000+08:00| d0| 3|
-|1970-01-01T08:00:00.001+08:00| d0| 5|
-|1970-01-01T08:00:00.002+08:00| d0| 3|
-|1970-01-01T08:00:00.003+08:00| d0| 1|
-|1970-01-01T08:00:00.004+08:00| d1| 2|
-|1970-01-01T08:00:00.005+08:00| d1| 4|
-+-----------------------------+------+-----+
-```
-
-查询语句如下:
-
-```SQL
-IoTDB> SELECT *, sum(flow) OVER(PARTITION BY device ORDER BY flow) as sum FROM device_flow;
-```
-
-经过分组、排序、计算(步骤拆解如下图所示),
-
-
-
-执行结果如下:
-
-```SQL
-+-----------------------------+------+----+----+
-| time|device|flow| sum|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
-+-----------------------------+------+----+----+
-```
diff --git a/src/zh/UserGuide/latest-Table/QuickStart/QuickStart_timecho.md b/src/zh/UserGuide/latest-Table/QuickStart/QuickStart_timecho.md
index 08d1fac39..e91e163e4 100644
--- a/src/zh/UserGuide/latest-Table/QuickStart/QuickStart_timecho.md
+++ b/src/zh/UserGuide/latest-Table/QuickStart/QuickStart_timecho.md
@@ -62,7 +62,7 @@
2. 数据写入&更新:在数据写入&更新方面,IoTDB 提供了多种方式来插入实时数据,基本的数据写入&更新操作请查看 [数据写入&更新](../Basic-Concept/Write-Updata-Data.md)
-3. 数据查询:IoTDB 提供了丰富的数据查询功能,数据查询的基本介绍请查看 [数据查询](../Basic-Concept/Query-Data_timecho.md),其中包含了适用于识别时序数据中的特定模式、检测特定事件等业务场景的[行模式识别](../SQL-Manual/Row-Pattern-Recognition.md),以及常用于数据分析场景的[窗口函数](../SQL-Manual/Featured-Functions_timecho.md#_4-窗口函数)等特色函数
+3. 数据查询:IoTDB 提供了丰富的数据查询功能,数据查询的基本介绍请查看 [数据查询](../Basic-Concept/Query-Data.md),其中包含了适用于时序特色分析的模式查询和窗口函数,详细介绍请查看[时序特色分析](../User-Manual/Timeseries-Featured-Analysis_timecho.md)
4. 数据删除:IoTDB 提供了两种删除方式,分别为SQL语句删除与过期自动删除(TTL)
diff --git a/src/zh/UserGuide/latest-Table/SQL-Manual/Featured-Functions_timecho.md b/src/zh/UserGuide/latest-Table/SQL-Manual/Featured-Functions_timecho.md
index 2b3a5ddec..96646c022 100644
--- a/src/zh/UserGuide/latest-Table/SQL-Manual/Featured-Functions_timecho.md
+++ b/src/zh/UserGuide/latest-Table/SQL-Manual/Featured-Functions_timecho.md
@@ -697,57 +697,7 @@ IoTDB> SELECT window_start, window_end, stock_id, avg(price) as avg FROM CUMULAT
## 4. 窗口函数
-### 4.1 功能介绍
-
-IoTDB 支持的窗口函数(Window Function) 是一种基于与当前行相关的特定行集合(称为“窗口”) 对每一行进行计算的特殊函数。它将分组操作(`PARTITION BY`)、排序(`ORDER BY`)与可定义的计算范围(窗口框架 `FRAME`)结合,在不折叠原始数据行的前提下实现复杂的跨行计算。常用于数据分析场景,比如排名、累计和、移动平均等操作。
-
-> 注意:该功能从 V 2.0.5 版本开始提供。
-
-例如,某场景下需要查询不同设备的功耗累加值,即可通过窗口函数来实现。
-
-```SQL
--- 原始数据
-+-----------------------------+------+-----+
-| time|device| flow|
-+-----------------------------+------+-----+
-|1970-01-01T08:00:00.000+08:00| d0| 3|
-|1970-01-01T08:00:00.001+08:00| d0| 5|
-|1970-01-01T08:00:00.002+08:00| d0| 3|
-|1970-01-01T08:00:00.003+08:00| d0| 1|
-|1970-01-01T08:00:00.004+08:00| d1| 2|
-|1970-01-01T08:00:00.005+08:00| d1| 4|
-+-----------------------------+------+-----+
-
--- 创建表并插入数据
-CREATE TABLE device_flow(device String tag, flow INT32 FIELD);
-insert into device_flow(time, device ,flow ) values ('1970-01-01T08:00:00.000+08:00','d0',3),('1970-01-01T08:00:01.000+08:00','d0',5),('1970-01-01T08:00:02.000+08:00','d0',3),('1970-01-01T08:00:03.000+08:00','d0',1),('1970-01-01T08:00:04.000+08:00','d1',2),('1970-01-01T08:00:05.000+08:00','d1',4);
-
-
---执行窗口函数查询
-SELECT *, sum(flow) OVER(PARTITION BY device ORDER BY flow) as sum FROM device_flow;
-```
-
-经过分组、排序、计算(步骤拆解如下图所示),
-
-
-
-即可得到期望结果:
-
-```SQL
-+-----------------------------+------+----+----+
-| time|device|flow| sum|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
-+-----------------------------+------+----+----+
-```
-
-### 4.2 功能定义
-#### 4.2.1 SQL 定义
+### 4.1 语法定义
```SQL
windowDefinition
@@ -782,175 +732,48 @@ frameBound
;
```
-#### 4.2.2 窗口定义
-##### Partition
-
-`PARTITION BY` 用于将数据分为多个独立、不相关的「组」,窗口函数只能访问并操作其所属分组内的数据,无法访问其它分组。该子句是可选的;如果未显式指定,则默认将所有数据分到同一组。值得注意的是,与 `GROUP BY` 通过聚合函数将一组数据规约成一行不同,`PARTITION BY` 的窗口函数**并不会影响组内的行数。**
-
-* 示例
-
-查询语句:
-
-```SQL
-IoTDB> SELECT *, count(flow) OVER (PARTITION BY device) as count FROM device_flow;
-```
-
-拆解步骤:
+更多详细功能介绍请参考:[窗口函数](../User-Manual/Timeseries-Featured-Analysis_timecho.md#_2-窗口函数)
-
+### 4.2 使用示例
-查询结果:
+表 device_flow 原始数据如下
-```SQL
-+-----------------------------+------+----+-----+
-| time|device|flow|count|
-+-----------------------------+------+----+-----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 4|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 4|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 4|
-+-----------------------------+------+----+-----+
+```sql
++-----------------------------+------+-----+
+| time|device| flow|
++-----------------------------+------+-----+
+|1970-01-01T08:00:00.000+08:00| d0| 3|
+|1970-01-01T08:00:00.001+08:00| d0| 5|
+|1970-01-01T08:00:00.002+08:00| d0| 3|
+|1970-01-01T08:00:00.003+08:00| d0| 1|
+|1970-01-01T08:00:00.004+08:00| d1| 2|
+|1970-01-01T08:00:00.005+08:00| d1| 4|
++-----------------------------+------+-----+
```
-##### Ordering
-
-`ORDER BY` 用于对 partition 内的数据进行排序。排序后,相等的行被称为 peers。peers 会影响窗口函数的行为,例如不同 rank function 对 peers 的处理不同;不同 frame 的划分方式对于 peers 的处理也不同。该子句是可选的。
-
-* 示例
+1. 从 device_flow 中查询所有列,并按 device 维度分组,在每个设备分组内按 flow 字段值排序,计算 flow 字段的累计求和,最终将累计和命名为 sum 列返回。
查询语句:
```SQL
-IoTDB> SELECT *, rank() OVER (PARTITION BY device ORDER BY flow) as rank FROM device_flow;
+IoTDB> SELECT *, sum(flow) OVER (PARTITION BY device ORDER BY flow) as sum FROM device_flow;
```
-拆解步骤:
-
-
-
查询结果:
```SQL
+-----------------------------+------+----+----+
-| time|device|flow|rank|
+| time|device|flow| sum|
+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
+-----------------------------+------+----+----+
```
-
-##### Framing
-
-对于 partition 中的每一行,窗口函数都会在相应的一组行上求值,这些行称为 Frame(即 Window Function 在每一行上的输入域)。Frame 可以手动指定,指定时涉及两个属性,具体说明如下。
-
-
-
-
- | Frame 属性 |
- 属性值 |
- 值描述 |
-
-
- | 类型 |
- ROWS |
- 通过行号来划分 frame |
-
-
- | GROUPS |
- 通过 peers 来划分 frame,即值相同的行视为同等的存在。peers 中所有的行分为一个组,叫做 peer group |
-
-
- | RANGE |
- 通过值来划分 frame |
-
-
- | 起始和终止位置 |
- UNBOUNDED PRECEDING |
- 整个 partition 的第一行 |
-
-
- | offset PRECEDING |
- 代表前面和当前行「距离」为 offset 的行 |
-
-
- | CURRENT ROW |
- 当前行 |
-
-
- | offset FOLLOWING |
- 代表后面和当前行「距离」为 offset 的行 |
-
-
- | UNBOUNDED FOLLOWING |
- 整个 partition 的最后一行 |
-
-
-
-
-其中,`CURRENT ROW`、`PRECEDING N` 和 `FOLLOWING N` 的含义随着 frame 种类的不同而不同,如下表所示:
-
-| | `ROWS` | `GROUPS` | `RANGE` |
-|--------------------|------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|
-| `CURRENT ROW` | 当前行 | 由于 peer group 包含多行,因此这个选项根据作用于 frame\_start 和 frame\_end 而不同:* frame\_start:peer group 的第一行;* frame\_end:peer group 的最后一行。 | 和 GROUPS 相同,根据作用于 frame\_start 和 frame\_end 而不同:* frame\_start:peer group 的第一行;* frame\_end:peer group 的最后一行。 |
-| `offset PRECEDING` | 前 offset 行 | 前 offset 个 peer group; | 前面与当前行的值之差小于等于 offset 就分为一个 frame |
-| `offset FOLLOWING` | 后 offset 行 | 后 offset 个 peer group。 | 后面与当前行的值之差小于等于 offset 就分为一个 frame |
-
-语法格式如下:
-
-```SQL
--- 同时指定 frame_start 和 frame_end
-{ RANGE | ROWS | GROUPS } BETWEEN frame_start AND frame_end
--- 仅指定 frame_start,frame_end 为 CURRENT ROW
-{ RANGE | ROWS | GROUPS } frame_start
-```
-
-若未手动指定 Frame,Frame 的默认划分规则如下:
-
-* 当窗口函数使用 ORDER BY 时:默认 Frame 为 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW (即从窗口的第一行到当前行)。例如:RANK() OVER(PARTITION BY COL1 0RDER BY COL2) 中,Frame 默认包含分区内当前行及之前的所有行。
-* 当窗口函数不使用 ORDER BY 时:默认 Frame 为 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING (即整个窗口的所有行)。例如:AVG(COL2) OVER(PARTITION BY col1) 中,Frame 默认包含分区内的所有行,计算整个分区的平均值。
-
-需要注意的是,当 Frame 类型为 GROUPS 或 RANGE 时,需要指定 `ORDER BY`,区别在于 GROUPS 中的 ORDER BY 可以涉及多个字段,而 RANGE 需要计算,所以只能指定一个字段。
-
-* 示例
-
-1. Frame 类型为 ROWS
-
-查询语句:
-
-```SQL
-IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ROWS 1 PRECEDING) as count FROM device_flow;
-```
-
-拆解步骤:
-
-* 取前一行和当前行作为 Frame
- * 对于 partition 的第一行,由于没有前一行,所以整个 Frame 只有它一行,返回 1;
- * 对于 partition 的其他行,整个 Frame 包含当前行和它的前一行,返回 2:
-
-
-
-查询结果:
-
-```SQL
-+-----------------------------+------+----+-----+
-| time|device|flow|count|
-+-----------------------------+------+----+-----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 2|
-+-----------------------------+------+----+-----+
-```
-
-2. Frame 类型为 GROUPS
+2. 从 device_flow 表查询所有原始列,按 device 设备分组,每个设备分组内按 flow 字段值排序,统计「当前行所在的 flow 分组 + 前 1 个 flow 分组」范围内的行数(计数),最终将计数结果命名为 count 列返回。
查询语句:
@@ -958,15 +781,6 @@ IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ROWS 1 PRECEDING) as count
IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ORDER BY flow GROUPS BETWEEN 1 PRECEDING AND CURRENT ROW) as count FROM device_flow;
```
-拆解步骤:
-
-* 取前一个 peer group 和当前 peer group 作为 Frame,那么以 device 为 d0 的 partition 为例(d1同理),对于 count 行数:
- * 对于 flow 为 1 的 peer group,由于它也没比它小的 peer group 了,所以整个 Frame 就它一行,返回 1;
- * 对于 flow 为 3 的 peer group,它本身包含 2 行,前一个 peer group 就是 flow 为 1 的,就一行,因此整个 Frame 三行,返回 3;
- * 对于 flow 为 5 的 peer group,它本身包含 1 行,前一个 peer group 就是 flow 为 3 的,共两行,因此整个 Frame 三行,返回 3。
-
-
-
查询结果:
```SQL
@@ -982,7 +796,7 @@ IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ORDER BY flow GROUPS BETWE
+-----------------------------+------+----+-----+
```
-3. Frame 类型为 RANGE
+3. 从 device_flow 表查询所有原始列,按 device 分组,每个分组内按 flow 字段值升序排序,统计「当前行 flow 值 - 2」到「当前行 flow 值」这个数值区间内的所有行的数量,最终将计数结果命名为 count 列返回。
查询语句:
@@ -990,15 +804,6 @@ IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ORDER BY flow GROUPS BETWE
IoTDB> SELECT *,count(flow) OVER(PARTITION BY device ORDER BY flow RANGE BETWEEN 2 PRECEDING AND CURRENT ROW) as count FROM device_flow;
```
-拆解步骤:
-
-* 把比当前行数据**小于等于 2 **的分为同一个 Frame,那么以 device 为 d0 的 partition 为例(d1 同理),对于 count 行数:
- * 对于 flow 为 1 的行,由于它是最小的行了,所以整个 Frame 就它一行,返回 1;
- * 对于 flow 为 3 的行,注意 CURRENT ROW 是作为 frame\_end 存在,因此是整个 peer group 的最后一行,符合要求比它小的共 1 行,然后 peer group 有 2 行,所以整个 Frame 共 3 行,返回 3;
- * 对于 flow 为 5 的行,它本身包含 1 行,符合要求的比它小的共 2 行,所以整个 Frame 共 3 行,返回 3。
-
-
-
查询结果:
```SQL
@@ -1013,416 +818,3 @@ IoTDB> SELECT *,count(flow) OVER(PARTITION BY device ORDER BY flow RANGE BETWEEN
|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
+-----------------------------+------+----+-----+
```
-
-### 4.3 内置的窗口函数
-
-
-
-
- | 窗口函数分类 |
- 窗口函数名 |
- 函数定义 |
- 是否支持 FRAME 子句 |
-
-
- | Aggregate Function |
- 所有内置聚合函数 |
- 对一组值进行聚合计算,得到单个聚合结果。 |
- 是 |
-
-
- | Value Function |
- first_value |
- 返回 frame 的第一个值,如果指定了 IGNORE NULLS 需要跳过前缀的 NULL |
- 是 |
-
-
- | last_value |
- 返回 frame 的最后一个值,如果指定了 IGNORE NULLS 需要跳过后缀的 NULL |
- 是 |
-
-
- | nth_value |
- 返回 frame 的第 n 个元素(注意 n 是从 1 开始),如果有 IGNORE NULLS 需要跳过 NULL |
- 是 |
-
-
- | lead |
- 返回当前行的后 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default |
- 否 |
-
-
- | lag |
- 返回当前行的前 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default |
- 否 |
-
-
- | Rank Function |
- rank |
- 返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间可能有 gap |
- 否 |
-
-
- | dense_rank |
- 返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间没有 gap |
- 否 |
-
-
- | row_number |
- 返回当前行在整个 partition 中的行号,注意行号从 1 开始 |
- 否 |
-
-
- | percent_rank |
- 以百分比的形式,返回当前行的值在整个 partition 中的序号;即 (rank() - 1) / (n - 1),其中 n 是整个 partition 的行数 |
- 否 |
-
-
- | cume_dist |
- 以百分比的形式,返回当前行的值在整个 partition 中的序号;即 (小于等于它的行数) / n |
- 否 |
-
-
- | ntile |
- 指定 n,给每一行进行 1~n 的编号。 |
- 否 |
-
-
-
-
-#### 4.3.1 Aggregate Function
-
-所有内置聚合函数,如 `sum()`、`avg()`、`min()`、`max()` 都能当作 Window Function 使用。
-
-> 注意:与 GROUP BY 不同,Window Function 中每一行都有相应的输出
-
-示例:
-
-```SQL
-IoTDB> SELECT *, sum(flow) OVER (PARTITION BY device ORDER BY flow) as sum FROM device_flow;
-+-----------------------------+------+----+----+
-| time|device|flow| sum|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
-|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
-+-----------------------------+------+----+----+
-```
-
-#### 4.3.2 Value Function
-1. `first_value`
-
-* 函数名:`first_value(value) [IGNORE NULLS]`
-* 定义:返回 frame 的第一个值,如果指定了 IGNORE NULLS 需要跳过前缀的 NULL;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, first_value(flow) OVER w as first_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
-+-----------------------------+------+----+-----------+
-| time|device|flow|first_value|
-+-----------------------------+------+----+-----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-+-----------------------------+------+----+-----------+
-```
-
-2. `last_value`
-
-* 函数名:`last_value(value) [IGNORE NULLS]`
-* 定义:返回 frame 的最后一个值,如果指定了 IGNORE NULLS 需要跳过后缀的 NULL;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, last_value(flow) OVER w as last_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
-+-----------------------------+------+----+----------+
-| time|device|flow|last_value|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
-+-----------------------------+------+----+----------+
-```
-
-3. `nth_value`
-
-* 函数名:`nth_value(value, n) [IGNORE NULLS]`
-* 定义:返回 frame 的第 n 个元素(注意 n 是从 1 开始),如果有 IGNORE NULLS 需要跳过 NULL;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, nth_value(flow, 2) OVER w as nth_values FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
-+-----------------------------+------+----+----------+
-| time|device|flow|nth_values|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
-+-----------------------------+------+----+----------+
-```
-
-4. lead
-
-* 函数名:`lead(value[, offset[, default]]) [IGNORE NULLS]`
-* 定义:返回当前行的后 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default;offset 的默认值为 1,default 的默认值为 NULL。
-* lead 函数需要需要一个 ORDER BY 窗口子句
-* 示例:
-
-```SQL
-IoTDB> SELECT *, lead(flow) OVER w as lead FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY time);
-+-----------------------------+------+----+----+
-| time|device|flow|lead|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
-|1970-01-01T08:00:05.000+08:00| d1| 4|null|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 5|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:03.000+08:00| d0| 1|null|
-+-----------------------------+------+----+----+
-```
-
-5. lag
-
-* 函数名:`lag(value[, offset[, default]]) [IGNORE NULLS]`
-* 定义:返回当前行的前 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default;offset 的默认值为 1,default 的默认值为 NULL。
-* lag 函数需要需要一个 ORDER BY 窗口子句
-* 示例:
-
-```SQL
-IoTDB> SELECT *, lag(flow) OVER w as lag FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY device);
-+-----------------------------+------+----+----+
-| time|device|flow| lag|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2|null|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:00.000+08:00| d0| 3|null|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
-+-----------------------------+------+----+----+
-```
-
-#### 4.3.3 Rank Function
-1. rank
-
-* 函数名:`rank()`
-* 定义:返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间可能有 gap;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, rank() OVER w as rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+----+
-| time|device|flow|rank|
-+-----------------------------+------+----+----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
-+-----------------------------+------+----+----+
-```
-
-2. dense\_rank
-
-* 函数名:`dense_rank()`
-* 定义:返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间没有 gap。
-* 示例:
-
-```SQL
-IoTDB> SELECT *, dense_rank() OVER w as dense_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+----------+
-| time|device|flow|dense_rank|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
-+-----------------------------+------+----+----------+
-```
-
-3. row\_number
-
-* 函数名:`row_number()`
-* 定义:返回当前行在整个 partition 中的行号,注意行号从 1 开始;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, row_number() OVER w as row_number FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+----------+
-| time|device|flow|row_number|
-+-----------------------------+------+----+----------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
-+-----------------------------+------+----+----------+
-```
-
-4. percent\_rank
-
-* 函数名:`percent_rank()`
-* 定义:以百分比的形式,返回当前行的值在整个 partition 中的序号;即 **(rank() - 1) / (n - 1)**,其中 n 是整个 partition 的行数;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, percent_rank() OVER w as percent_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+------------------+
-| time|device|flow| percent_rank|
-+-----------------------------+------+----+------------------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 0.0|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 0.0|
-|1970-01-01T08:00:00.000+08:00| d0| 3|0.3333333333333333|
-|1970-01-01T08:00:02.000+08:00| d0| 3|0.3333333333333333|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
-+-----------------------------+------+----+------------------+
-```
-
-5. cume\_dist
-
-* 函数名:cume\_dist
-* 定义:以百分比的形式,返回当前行的值在整个 partition 中的序号;即 **(小于等于它的行数) / n**。
-* 示例:
-
-```SQL
-IoTDB> SELECT *, cume_dist() OVER w as cume_dist FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+---------+
-| time|device|flow|cume_dist|
-+-----------------------------+------+----+---------+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 0.5|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 0.25|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 0.75|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 0.75|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
-+-----------------------------+------+----+---------+
-```
-
-6. ntile
-
-* 函数名:ntile
-* 定义:指定 n,给每一行进行 1~n 的编号。
- * 整个 partition 行数比 n 小,那么编号就是行号 index;
- * 整个 partition 行数比 n 大:
- * 如果行数能除尽 n,那么比较完美,比如行数为 4,n 为 2,那么编号为 1、1、2、2、;
- * 如果行数不能除尽 n,那么就分给开头几组,比如行数为 5,n 为 3,那么编号为 1、1、2、2、3;
-* 示例:
-
-```SQL
-IoTDB> SELECT *, ntile(2) OVER w as ntile FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
-+-----------------------------+------+----+-----+
-| time|device|flow|ntile|
-+-----------------------------+------+----+-----+
-|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
-|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
-|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
-|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
-|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
-|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
-+-----------------------------+------+----+-----+
-```
-
-### 4.4 场景示例
-1. 多设备 diff 函数
-
-对于每个设备的每一行,与前一行求差值:
-
-```SQL
-SELECT
- *,
- measurement - lag(measurement) OVER (PARTITION BY device ORDER BY time)
-FROM data
-WHERE timeCondition;
-```
-
-对于每个设备的每一行,与后一行求差值:
-
-```SQL
-SELECT
- *,
- measurement - lead(measurement) OVER (PARTITION BY device ORDER BY time)
-FROM data
-WHERE timeCondition;
-```
-
-对于单个设备的每一行,与前一行求差值(后一行同理):
-
-```SQL
-SELECT
- *,
- measurement - lag(measurement) OVER (ORDER BY time)
-FROM data
-where device='d1'
-WHERE timeCondition;
-```
-
-2. 多设备 TOP\_K/BOTTOM\_K
-
-利用 rank 获取序号,然后在外部的查询中保留想要的顺序。
-
-(注意, window function 的执行顺序在 HAVING 子句之后,所以这里需要子查询)
-
-```SQL
-SELECT *
-FROM(
- SELECT
- *,
- rank() OVER (PARTITION BY device ORDER BY time DESC)
- FROM data
- WHERE timeCondition
-)
-WHERE rank <= 3;
-```
-
-除了按照时间排序之外,还可以按照测点的值进行排序:
-
-```SQL
-SELECT *
-FROM(
- SELECT
- *,
- rank() OVER (PARTITION BY device ORDER BY measurement DESC)
- FROM data
- WHERE timeCondition
-)
-WHERE rank <= 3;
-```
-
-3. 多设备 CHANGE\_POINTS
-
-这个 sql 用来去除输入序列中连续相同值,可以用 lead + 子查询实现:
-
-```SQL
-SELECT
- time,
- device,
- measurement
-FROM(
- SELECT
- time,
- device,
- measurement,
- LEAD(measurement) OVER (PARTITION BY device ORDER BY time) AS next
- FROM data
- WHERE timeCondition
-)
-WHERE measurement != next OR next IS NULL;
-```
diff --git a/src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition_timecho.md b/src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition_timecho.md
new file mode 100644
index 000000000..876c1ee33
--- /dev/null
+++ b/src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition_timecho.md
@@ -0,0 +1,155 @@
+
+
+# 模式查询
+
+## 1. 语法定义
+
+```SQL
+MATCH_RECOGNIZE (
+ [ PARTITION BY column [, ...] ]
+ [ ORDER BY column [, ...] ]
+ [ MEASURES measure_definition [, ...] ]
+ [ ROWS PER MATCH ]
+ [ AFTER MATCH skip_to ]
+ PATTERN ( row_pattern )
+ [ SUBSET subset_definition [, ...] ]
+ DEFINE variable_definition [, ...]
+)
+```
+
+**说明:**
+
+* PARTITION BY : 可选,用于对输入表进行分组,每个分组能独立进行模式匹配。如果未声明该子句,则整个输入表将作为一个整体进行处理。
+* ORDER BY :可选,用于确保输入数据按某种顺序进行匹配处理。
+* MEASURES :可选,用于指定从匹配到的一段数据中提取哪些信息。
+* ROWS PER MATCH :可选,用于指定模式匹配成功后结果集的输出方式。
+* AFTER MATCH SKIP :可选,用于指定在识别到一个非空匹配后,下一次模式匹配应从哪一行继续进行。
+* PATTERN :用于定义需要匹配的行模式。
+* SUBSET :可选,用于将多个基本模式变量所匹配的行合并为一个逻辑集合。
+* DEFINE :用于定义行模式的基本模式变量。
+
+更多详细功能介绍请参考:[模式查询](../User-Manual/Timeseries-Featured-Analysis_timecho.md#_1-模式查询)
+
+## 2. 使用示例
+
+以[示例数据](../Reference/Sample-Data.md)为源数据
+
+1. 时间分段查询
+
+将 table1 中的数据按照时间间隔小于等于 24 小时分段,查询每段中的数据总条数,以及开始、结束时间。
+
+查询SQL
+
+```SQL
+SELECT start_time, end_time, cnt
+FROM table1
+MATCH_RECOGNIZE (
+ ORDER BY time
+ MEASURES
+ RPR_FIRST(A.time) AS start_time,
+ RPR_LAST(time) AS end_time,
+ COUNT() AS cnt
+ PATTERN (A B*)
+ DEFINE B AS (cast(B.time as INT64) - cast(PREV(B.time) as INT64)) <= 86400000
+) AS m
+```
+
+查询结果
+
+```SQL
++-----------------------------+-----------------------------+---+
+| start_time| end_time|cnt|
++-----------------------------+-----------------------------+---+
+|2024-11-26T13:37:00.000+08:00|2024-11-26T13:38:00.000+08:00| 2|
+|2024-11-27T16:38:00.000+08:00|2024-11-30T14:30:00.000+08:00| 16|
++-----------------------------+-----------------------------+---+
+Total line number = 2
+```
+
+2. 差值分段查询
+
+将 table2 中的数据按照 humidity 湿度值差值小于 0.1 分段,查询每段中的数据总条数,以及开始、结束时间。
+
+* 查询sql
+
+```SQL
+SELECT start_time, end_time, cnt
+FROM table2
+MATCH_RECOGNIZE (
+ ORDER BY time
+ MEASURES
+ RPR_FIRST(A.time) AS start_time,
+ RPR_LAST(time) AS end_time,
+ COUNT() AS cnt
+ PATTERN (A B*)
+ DEFINE B AS (B.humidity - PREV(B.humidity )) <=0.1
+) AS m;
+```
+
+* 查询结果
+
+```SQL
++-----------------------------+-----------------------------+---+
+| start_time| end_time|cnt|
++-----------------------------+-----------------------------+---+
+|2024-11-26T13:37:00.000+08:00|2024-11-27T00:00:00.000+08:00| 2|
+|2024-11-28T08:00:00.000+08:00|2024-11-29T00:00:00.000+08:00| 2|
+|2024-11-29T11:00:00.000+08:00|2024-11-30T00:00:00.000+08:00| 2|
++-----------------------------+-----------------------------+---+
+Total line number = 3
+```
+
+3. 事件统计查询
+
+将 table1 中数据按照设备号分组,统计上海地区湿度大于 35 的开始、结束时间及最大湿度值。
+
+* 查询sql
+
+```SQL
+SELECT m.device_id, m.match, m.event_start, m.event_end, m.max_humidity
+FROM table1
+MATCH_RECOGNIZE (
+ PARTITION BY device_id
+ ORDER BY time
+ MEASURES
+ MATCH_NUMBER() AS match,
+ RPR_FIRST(A.time) AS event_start,
+ RPR_LAST(A.time) AS event_end,
+ MAX(A.humidity) AS max_humidity
+ ONE ROW PER MATCH
+ PATTERN (A+)
+ DEFINE
+ A AS A.region= '上海' AND A.humidity> 35
+) AS m
+```
+
+* 查询结果
+
+```SQL
++---------+-----+-----------------------------+-----------------------------+------------+
+|device_id|match| event_start| event_end|max_humidity|
++---------+-----+-----------------------------+-----------------------------+------------+
+| 100| 1|2024-11-28T09:00:00.000+08:00|2024-11-29T18:30:00.000+08:00| 45.1|
+| 101| 1|2024-11-30T09:30:00.000+08:00|2024-11-30T09:30:00.000+08:00| 35.2|
++---------+-----+-----------------------------+-----------------------------+------------+
+Total line number = 2
+```
diff --git a/src/zh/UserGuide/latest-Table/SQL-Manual/overview_timecho.md b/src/zh/UserGuide/latest-Table/SQL-Manual/overview_timecho.md
index 581a106eb..7b6fcb458 100644
--- a/src/zh/UserGuide/latest-Table/SQL-Manual/overview_timecho.md
+++ b/src/zh/UserGuide/latest-Table/SQL-Manual/overview_timecho.md
@@ -40,11 +40,9 @@ IoTDB 查询语法提供以下子句:
- SELECT 子句:查询结果应包含的列。详细语法见:[SELECT子句](../SQL-Manual/Select-Clause.md)
- FROM 子句:指出查询的数据源,可以是单个表、多个通过 `JOIN` 子句连接的表,或者是一个子查询。详细语法见:[FROM & JOIN 子句](../SQL-Manual/From-Join-Clause.md)
-- patternRecognition:行模式识别,支持通过定义模式变量的识别逻辑以及正则表达式来捕获一段连续的数据,并对每一段捕获的数据进行分析计算。详细语法见:[行模式识别](../SQL-Manual/Row-Pattern-Recognition.md)
- WHERE 子句:用于过滤数据,只选择满足特定条件的数据行。这个子句在逻辑上紧跟在 FROM 子句之后执行。详细语法见:[WHERE 子句](../SQL-Manual/Where-Clause.md)
- GROUP BY 子句:当需要对数据进行聚合时使用,指定了用于分组的列。详细语法见:[GROUP BY 子句](../SQL-Manual/GroupBy-Clause.md)
- HAVING 子句:在 GROUP BY 子句之后使用,用于对已经分组的数据进行过滤。与 WHERE 子句类似,但 HAVING 子句在分组后执行。详细语法见:[HAVING 子句](../SQL-Manual/Having-Clause.md)
-- WINDOW FUNCTION:窗口函数,是一种基于与当前行相关的特定行集合(称为“窗口”) 对每一行进行计算的特殊函数。它将分组操作、排序与可定义的计算范围结合,在不折叠原始数据行的前提下实现复杂的跨行计算。详细语法见:[窗口函数](../SQL-Manual/Featured-Functions_timecho.md#_4-窗口函数)
- FILL 子句:用于处理查询结果中的空值,用户可以使用 FILL 子句来指定数据缺失时的填充模式(如前一个非空值或线性插值)来填充 null 值,以便于数据可视化和分析。 详细语法见:[FILL 子句](../SQL-Manual/Fill-Clause.md)
- ORDER BY 子句:对查询结果进行排序,可以指定升序(ASC)或降序(DESC),以及 NULL 值的处理方式(NULLS FIRST 或 NULLS LAST)。详细语法见:[ORDER BY 子句](../SQL-Manual/OrderBy-Clause.md)
- OFFSET 子句:用于指定查询结果的起始位置,即跳过前 OFFSET 行。与 LIMIT 子句配合使用。详细语法见:[LIMIT 和 OFFSET 子句](../SQL-Manual/Limit-Offset-Clause.md)
diff --git a/src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md b/src/zh/UserGuide/latest-Table/User-Manual/Timeseries-Featured-Analysis_timecho.md
similarity index 56%
rename from src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md
rename to src/zh/UserGuide/latest-Table/User-Manual/Timeseries-Featured-Analysis_timecho.md
index acd684051..003fdcedb 100644
--- a/src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md
+++ b/src/zh/UserGuide/latest-Table/User-Manual/Timeseries-Featured-Analysis_timecho.md
@@ -19,21 +19,22 @@
-->
-# 行模式识别
+# 时序特色分析
-## 1. 概述
+IoTDB 针对时序数据的特色分析场景,提供了模式查询与窗口函数两大核心能力,为时序数据的深度挖掘与复杂计算提供了灵活高效的解决方案。下文将对两大功能进行详细的介绍。
-IoTDB 支持行模式识别,该功能支持通过定义模式变量的识别逻辑以及正则表达式来捕获一段连续的数据,并对每一段捕获的数据进行分析计算,适用于识别时序数据中的特定模式、检测特定事件等业务场景。如果将行模式识别看作对数据进行分组处理,则核心流程大致如下:
+## 1. 模式查询
-* 通过 PATTERN、DEFINE、SUBSET 子句进行分组捕获
-* 通过 MEASURES 子句对捕获的分组进行计算处理
-* 通过 ROWS PER MATCH 子句设定分组的输出形式
-* 通过 AFTER MATCH SKIP 子句设定如何定位下一个分组的开始位置
+### 1.1 概述
+
+模式查询支持通过定义模式变量的识别逻辑以及正则表达式来捕获一段连续的数据,并对每一段捕获的数据进行分析计算,适用于识别时序数据中的特定模式(如下图所示)、检测特定事件等业务场景。
+
+
> 注意:该功能从 V 2.0.5 版本开始提供。
-## 2. 功能介绍
-### 2.1 语法格式
+### 1.2 功能介绍
+#### 1.2.1 语法格式
```SQL
MATCH_RECOGNIZE (
@@ -59,7 +60,28 @@ MATCH_RECOGNIZE (
* SUBSET :可选,用于将多个基本模式变量所匹配的行合并为一个逻辑集合。
* DEFINE :用于定义行模式的基本模式变量。
-### 2.2 DEFINE 子句
+**语法示例原始数据:**
+
+```SQL
+IoTDB:database3> select * from t
++-----------------------------+------+----------+
+| time|device|totalprice|
++-----------------------------+------+----------+
+|2025-01-01T00:01:00.000+08:00| d1| 90|
+|2025-01-01T00:02:00.000+08:00| d1| 80|
+|2025-01-01T00:03:00.000+08:00| d1| 70|
+|2025-01-01T00:04:00.000+08:00| d1| 80|
+|2025-01-01T00:05:00.000+08:00| d1| 70|
+|2025-01-01T00:06:00.000+08:00| d1| 80|
++-----------------------------+------+----------+
+
+-- 创建语句
+create table t(device tag, totalprice int32 field)
+
+insert into t(time,device,totalprice) values(2025-01-01T00:01:00, 'd1', 90),(2025-01-01T00:02:00, 'd1', 80),(2025-01-01T00:03:00, 'd1', 70),(2025-01-01T00:04:00, 'd1', 80),(2025-01-01T00:05:00, 'd1', 70),(2025-01-01T00:06:00, 'd1', 80)
+```
+
+#### 1.2.2 DEFINE 子句
用于为模式识别中的每个基本模式变量指定其判断条件。这些变量通常由标识符(如 `A`, `B`)代表,并通过该子句中的布尔表达式精确定义哪些行符合该变量的要求。
@@ -72,7 +94,7 @@ DEFINE B AS totalprice < PREV(totalprice)
* **未**在子句中**显式**定义的变量,其匹配条件隐含为恒真(TRUE),即可在任何输入行上成功匹配。
-### 2.3 SUBSET 子句
+#### 1.2.3 SUBSET 子句
用于将多个基本模式变量(如 `A`、`B`)匹配到的行合并成一个联合模式变量(如 `U`),使这些行可以被视为同一个逻辑集合进行操作。可用于`MEASURES`、`DEFINE `和`AFTER MATCH SKIP`子句。
@@ -85,7 +107,7 @@ SUBSET U = (A, B)
1. 在 `MEASURES `子句中,若需要引用该阶段最后一次匹配到的行,则可通过定义联合模式变量 `SUBSET U = (A, B)`实现。此时表达式 `RPR_LAST(U.totalprice)` 将直接返回该目标行的 `totalprice` 值。
2. 在 `AFTER MATCH SKIP` 子句中,若匹配结果中未包含基本模式变量 A 或 B 时,执行 `AFTER MATCH SKIP TO LAST B` 或 `AFTER MATCH SKIP TO LAST A` 会因锚点缺失跳转失败;而通过引入联合模式变量 `SUBSET U = (A, B)`,使用 `AFTER MATCH SKIP TO LAST U` 则始终有效。
-### 2.4 PATTERN 子句
+#### 1.2.4 PATTERN 子句
用于定义需要匹配的行模式,其基本构成单元是**基本模式变量。**
@@ -93,7 +115,7 @@ SUBSET U = (A, B)
PATTERN ( row_pattern )
```
-#### 2.4.1 模式种类
+##### 1.2.4.1 模式种类
| 行模式 | 语法格式 | 描述 |
| ----------------------------------- |---------------------| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
@@ -104,7 +126,7 @@ PATTERN ( row_pattern )
| 空模式(Empty Pattern) | `()` | 表示一个不包含任何行的空匹配 |
| 模式排除(Pattern Exclusion) | `{- row_pattern -}` | 用于指定在输出中需要排除的匹配部分。通常与`ALL ROWS PER MATCH`选项结合使用,用于输出感兴趣的行。如`PATTERN (A {- B+ C+ -} D+)`,并使用`ALL ROWS PER MATCH`时,输出将仅包含匹配的首行(`A`对应行)与尾部行(`D+`对应行)。 |
-#### 2.4.2 分区起始/结束锚点(Partition Start/End Anchor)
+##### 1.2.4.2 分区起始/结束锚点(Partition Start/End Anchor)
* `^A` 表示匹配以 A 为分区开始的模式
* 当 PATTERN 子句的取值为 `^A` 时,要求匹配必须从分区的首行开始,且这一行要满足 `A` 的定义
@@ -113,141 +135,7 @@ PATTERN ( row_pattern )
* 当 PATTERN 子句的取值为 `A$` 时,要求必须在分区的结束位置匹配,并且这一行要满足 `A`的定义
* 当 PATTERN 子句的取值为 `$A` 或 `$A$` 时,输出结果为空
-示例介绍可见 [3.1 小节](./Row-Pattern-Recognition.md#_3-1-Patter-子句分区锚点)
-
-#### 2.4.3 量词(Quantifiers)
-
-量词用于指定子模式重复出现的次数,置于相应子模式之后,如 `(A | B)*`。
-
-常用量词如下:
-
-| 量词 | 描述 |
-| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `*` | 零次或多次重复 |
-| `+` | 一次或多次重复 |
-| `?` | 零次或一次重复 |
-| `{n}` | 恰好重复 n 次 |
-| `{m, n}` | 重复次数在 m 到 n 之间(m、n 为非负整数)。* 若省略左界,则默认从 0 开始;* 若省略右界,则重复次数不设上限(如 {5,} 等同于“至少重复五次”);* 若同时省略左右界,即 {,},则与 \* 等价。 |
-
-* 可通过在量词后加 `?` 改变匹配偏好。
- * `{3,5}`:偏好 5 次,最不偏好 3 次;`{3,5}?`:偏好 3 次,最不偏好 5 次
- * `?`:偏好 1 次;`??`:偏好 0 次
-
-### 2.5 AFTER MATCH SKIP 子句
-
-用于指定在识别到一个非空匹配后,下一次模式匹配应从哪一行继续进行。
-
-| 跳转策略 | 描述 | 是否允许识别重叠匹配项 |
-| ------------------------------------------------------------- | --------------------------------------------------- | ------------------------ |
-| `AFTER MATCH SKIP PAST LAST ROW` | 默认行为。在当前匹配的最后一行之后的下一行开始。 | 否 |
-| `AFTER MATCH SKIP TO NEXT ROW` | 在当前匹配中的第二行开始。 | 是 |
-| `AFTER MATCH SKIP TO [ FIRST \| LAST ] pattern_variable` | 跳转到某个模式变量的 [ 第一行 | 最后一行 ] 开始。 | 是 |
-
-* 在所有可能的配置中,仅当 `ALL ROWS PER MATCH WITH UNMATCHED ROWS` 与 `AFTER MATCH SKIP PAST LAST ROW` 联合使用时,系统才能确保对每个输入行恰好生成一条输出记录。
-
-示例介绍可见 [3.2 小节](./Row-Pattern-Recognition.md#_3-2-AFTER-MATCH-SKIP-子句)
-
-### 2.6 ROWS PER MATCH 子句
-
-用于指定模式匹配成功后结果集的输出方式,主要包括以下两种选项:
-
-| 输出方式 | 规则描述 | 输出结果 | **空匹配/未匹配行**处理逻辑 |
-| -------------------- | ----------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| ONE ROW PER MATCH | 每一次成功匹配,产生一行输出结果。 | * PARTITION BY 子句中的列* MEASURES 子句中定义的表达式。 | 输出空匹配;跳过未匹配行。 |
-| ALL ROWS PER MATCH | 每一次匹配中的每一行都将产生一条输出记录,除非该行通过 exclusion 语法排除。 | * PARTITION BY 子句中的列* ORDER BY 子句中的列* MEASURES 子句中定义的表达式* 输入表中的其余列 | * 默认:输出空匹配;跳过未匹配行。* ALL ROWS PER MATCH**SHOW EMPTY MATCHES**:默认输出空匹配,跳过未匹配行* ALL ROWS PER MATCH**OMIT EMPTY MATCHES**:不输出空匹配,跳过未匹配行* ALL ROWS PER MATCH**WITH UNMATCHED ROWS**:输出空匹配,并为每一条未匹配行额外生成一条输出记录|
-
-### 2.7 MEASURES 子句
-
-用于指定从匹配到的一段数据中提取哪些信息。该子句为可选项,如果未显式指定,则根据 ROWS PER MATCH 子句的设置,部分输入列会成为模式识别的输出结果。
-
-```SQL
-MEASURES measure_expression AS measure_name [, ...]
-```
-
-* `measure_expression` 是根据匹配的一段数据计算出的标量值。
-
-| 用法示例 | 说明 |
-| ---------------------------------------------- | -------------------------------------------------------------------------------------------------------------- |
-| `A.totalprice AS starting_price` | 返回匹配分组中第一行(即与变量 A 关联的唯一一行)中的价格,作为起始价格。 |
-| `RPR_LAST(B.totalprice) AS bottom_price` | 返回与变量 B 关联的最后一行中的价格,代表“V”形模式中最低点的价格,对应下降区段的末尾。 |
-| `RPR_LAST(U.totalprice) AS top_price` | 返回匹配分组中的最高价格,对应变量 C 或 D 所关联的最后一行,即整个匹配分组的末尾。【假设 SUBSET U = (C, D)】 |
-
-* 每个 `measure_expression `都会定义一个输出列,该列可通过其指定的 `measure_name `进行引用。
-
-### 2.8 行模式识别表达式
-
-在 MEASURES 与 DEFINE 子句中使用的表达式为**标量表达式**,用于在输入表的行级上下文中求值。**标量表达式**除了支持标准 SQL 语法外,还支持针对行模式识别的特殊扩展函数。
-
-#### 2.8.1 模式变量引用
-
-```SQL
-A.totalprice
-U.orderdate
-orderstatus
-```
-
-* 当列名前缀为某**基本模式变量**或**联合模式变量**时,表示引用该变量所匹配的所有行的对应列值。
-* 若列名不带前缀,则等同于使用“**全局联合模式变量**”(即所有基本模式变量的并集)作前缀,表示引用当前匹配中所有行的该列值。
-
-> 不允许在模式识别表达式中使用表名作列名前缀。
-
-#### 2.8.2 扩展函数
-
-| 函数名 | 函数式 | 描述 |
-|------------------| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `MATCH_NUMBER`函数 | `MATCH_NUMBER()` | 返回当前匹配在分区内的序号,从 1 开始计数。空匹配与非空匹配一致,也占用匹配序号。 |
-| `CLASSIFIER `函数 | `CLASSIFIER(option)`| 1. 返回当前行所映射的基本模式变量名称。1. `option`是一个可选参数:可以传入基本模式变量`CLASSIFIER(A)`或联合模式变量`CLASSIFIER(U)`,用于限定函数作用范围,对于不在范围内的行,直接返回 NULL。在对联合模式变量使用时,可用于辨别该行究竟映射至并集中哪一个基本模式变量。 |
-| 逻辑导航函数 | `RPR_FIRST(expr, k)` | 1. 表示从**当前匹配分组**中,定位至第一个满足 expr 的行,在此基础上再向分组尾部方向搜索到第 k 次出现的同一模式变量对应行,返回该行的指定列值。如果在指定方向上未能找到第 k 次匹配行,则函数返回 NULL。1. 其中 k 是可选参数,默认为 0,表示仅定位至首个满足条件的行;若显式指定,必须为非负整数。 |
-| 逻辑导航函数 | `RPR_LAST(expr, k)`| 1. 表示从**当前匹配分组**中,定位至最后一个满足 expr 的行,在此基础上再向分组开头方向搜索到第 k 次出现的同一模式变量对应行,返回该行的指定列值。如果在指定方向上未能找到第 k 次匹配行,则函数返回 NULL。1. 其中 k 是可选参数,默认为 0,表示仅定位至末个满足条件的行;若显式指定,必须为非负整数。 |
-| 物理导航函数 | `PREV(expr, k)` | 1. 表示从最后一次匹配至给定模式变量的行开始,向开头方向偏移 k 行,返回对应列值。若导航超出**分区边界**,则函数返回 NULL。1. 其中 k 是可选参数,默认为 1;若显式指定,必须为非负整数。 |
-| 物理导航函数 |`NEXT(expr, k)` | 1. 表示从最后一次匹配至给定模式变量的行开始,向尾部方向偏移 k 行,返回对应列值。若导航超出**分区边界**,则函数返回 NULL。1. 其中 k 是可选参数,默认为 1;若显式指定,必须为非负整数。 |
-| 聚合函数 | COUNT、SUM、AVG、MAX、MIN 函数 | 可用于对当前匹配中的数据进行计算。聚合函数与导航函数不允许互相嵌套。(V 2.0.6 版本起支持) |
-| 嵌套函数 | `PREV/NEXT(CLASSIFIER())` | 物理导航函数与 CLASSIFIER 函数嵌套。用于获取当前行的前一个和后一个匹配行所对应的模式变量 |
-| 嵌套函数 |`PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | 物理函数内部**允许嵌套**逻辑函数,逻辑函数内部**不允许嵌套**物理函数。用于先进行逻辑偏移,再进行物理偏移。 |
-
-示例介绍可见 [3.3 小节](./Row-Pattern-Recognition.md#_3-3-行模式表达式-扩展函数)
-
-#### 2.8.3 RUNNING 和 FINAL 语义
-1. 定义
-
-* `RUNNING`: 表示计算范围为当前匹配分组内,从分组的起始行到当前正在处理的行(即到当前行为止)。
-* `FINAL`: 表示计算范围为当前匹配分组内,从分组的起始行到分组的最终行(即整个匹配分组)。
-
-2. 作用范围
-
-* DEFINE 子句默认采用 RUNNING 语义。
-* MEASURES 子句默认采用 RUNNING 语义,支持指定 FINAL 语义。当采用 ONE ROW PER MATCH 输出模式时,所有表达式都从匹配分组的末行位置进行计算,此时 RUNNING 语义与 FINAL 语义等价。
-
-3. 语法约束
-
-* RUNNING 和 FINAL 需要写在**逻辑导航函数**或聚合函数之前,不能直接作用于**列引用。**
- * 合法:`RUNNING RPP_LAST(A.totalprice)`、`FINAL RPP_LAST(A.totalprice)`
- * 非法:`RUNNING A.totalprice`、`FINAL A.totalprice`、 `RUNNING PREV(A.totalprice)`
-
-## 3. 语法示例
-
-原始数据
-
-```SQL
-IoTDB:database3> select * from t
-+-----------------------------+------+----------+
-| time|device|totalprice|
-+-----------------------------+------+----------+
-|2025-01-01T00:01:00.000+08:00| d1| 90|
-|2025-01-01T00:02:00.000+08:00| d1| 80|
-|2025-01-01T00:03:00.000+08:00| d1| 70|
-|2025-01-01T00:04:00.000+08:00| d1| 80|
-|2025-01-01T00:05:00.000+08:00| d1| 70|
-|2025-01-01T00:06:00.000+08:00| d1| 80|
-+-----------------------------+------+----------+
-
--- 创建语句
-create table t(device tag, totalprice int32 field)
-
-insert into t(time,device,totalprice) values(2025-01-01T00:01:00, 'd1', 90),(2025-01-01T00:02:00, 'd1', 80),(2025-01-01T00:03:00, 'd1', 70),(2025-01-01T00:04:00, 'd1', 80),(2025-01-01T00:05:00, 'd1', 70),(2025-01-01T00:06:00, 'd1', 80)
-```
-
-### 3.1 Patter 子句分区锚点
+**示例说明**
* 查询 sql
@@ -269,6 +157,10 @@ MATCH_RECOGNIZE (
* 查询结果
* 当 PATTERN 子句为 PATTERN (^A) 时
+
+ 
+
+ 实际返回
```SQL
+-----------------------------+-----+-----+-----+
@@ -279,7 +171,7 @@ MATCH_RECOGNIZE (
Total line number = 1
```
- * 当 PATTERN 子句为 PATTERN (^A^) 时
+ * 当 PATTERN 子句为 PATTERN (^A^) 时,输出的结果为空,因为不可能从分区的起始位置开始匹配了一个 A 之后,又回到分区的起始位置
```SQL
+----+-----+-----+-----+
@@ -291,6 +183,10 @@ MATCH_RECOGNIZE (
* 当 PATTERN 子句为 PATTERN (A\$) 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-----+-----+-----+
| time|match|price|label|
@@ -300,7 +196,7 @@ MATCH_RECOGNIZE (
Total line number = 1
```
- * 当 PATTERN 子句为 PATTERN (\$A\$) 时
+ * 当 PATTERN 子句为 PATTERN (\$A\$) 时,输出的结果为空
```SQL
+----+-----+-----+-----+
@@ -310,7 +206,38 @@ MATCH_RECOGNIZE (
Empty set.
```
-### 3.2 AFTER MATCH SKIP 子句
+
+##### 1.2.4.3 量词(Quantifiers)
+
+量词用于指定子模式重复出现的次数,置于相应子模式之后,如 `(A | B)*`。
+
+常用量词如下:
+
+| 量词 | 描述 |
+| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `*` | 零次或多次重复 |
+| `+` | 一次或多次重复 |
+| `?` | 零次或一次重复 |
+| `{n}` | 恰好重复 n 次 |
+| `{m, n}` | 重复次数在 m 到 n 之间(m、n 为非负整数)。* 若省略左界,则默认从 0 开始;* 若省略右界,则重复次数不设上限(如 {5,} 等同于“至少重复五次”);* 若同时省略左右界,即 {,},则与 \* 等价。 |
+
+* 可通过在量词后加 `?` 改变匹配偏好。
+ * `{3,5}`:偏好 5 次,最不偏好 3 次;`{3,5}?`:偏好 3 次,最不偏好 5 次
+ * `?`:偏好 1 次;`??`:偏好 0 次
+
+#### 1.2.5 AFTER MATCH SKIP 子句
+
+用于指定在识别到一个非空匹配后,下一次模式匹配应从哪一行继续进行。
+
+| 跳转策略 | 描述 | 是否允许识别重叠匹配项 |
+| ------------------------------------------------------------- | --------------------------------------------------- | ------------------------ |
+| `AFTER MATCH SKIP PAST LAST ROW` | 默认行为。在当前匹配的最后一行之后的下一行开始。 | 否 |
+| `AFTER MATCH SKIP TO NEXT ROW` | 在当前匹配中的第二行开始。 | 是 |
+| `AFTER MATCH SKIP TO [ FIRST \| LAST ] pattern_variable` | 跳转到某个模式变量的 [ 第一行 | 最后一行 ] 开始。 | 是 |
+
+* 在所有可能的配置中,仅当 `ALL ROWS PER MATCH WITH UNMATCHED ROWS` 与 `AFTER MATCH SKIP PAST LAST ROW` 联合使用时,系统才能确保对每个输入行恰好生成一条输出记录。
+
+**示例说明**
* 查询 sql
@@ -336,6 +263,10 @@ MATCH_RECOGNIZE (
* 查询结果
* 当 AFTER MATCH SKIP PAST LAST ROW 时
+
+ 
+
+ *
* 第一次匹配:第 1、2、3、4 行
* 第二次匹配:根据 `AFTER MATCH SKIP PAST LAST ROW` 语义,从第 5 行开始,无法再找寻到一个合法匹配
* 此模式一定不会出现重叠匹配
@@ -353,6 +284,10 @@ MATCH_RECOGNIZE (
```
* 当 AFTER MATCH SKIP TO NEXT ROW 时
+
+ 
+
+ *
* 第一次匹配:第 1、2、3、4 行
* 第二次匹配:根据 `AFTER MATCH SKIP TO NEXT ROW` 语义,从第 2 行开始,匹配:第 2、3、4 行
* 第三次匹配:尝试从第 3 行开始,失败
@@ -378,6 +313,10 @@ MATCH_RECOGNIZE (
```
* 当 AFTER MATCH SKIP TO FIRST C 时
+
+ 
+
+ *
* 第一次匹配:第 1、2、3、4 行
* 第二次匹配:从第一个 C (也就是第 4 行)处开始,匹配第4、5、6行
* 此模式允许出现重叠匹配
@@ -398,6 +337,10 @@ MATCH_RECOGNIZE (
```
* 当 AFTER MATCH SKIP TO LAST B 或 AFTER MATCH SKIP TO B 时
+
+ 
+
+ *
* 第一次匹配:第 1、2、3、4 行
* 第二次匹配:尝试从最后一个 B (也就是第 3 行)处开始,失败
* 第二次匹配:尝试从第 4 行开始,成功匹配第4、5、6行
@@ -419,6 +362,10 @@ MATCH_RECOGNIZE (
```
* 当 AFTER MATCH SKIP TO U 时
+
+ 
+
+ *
* 第一次匹配:第 1、2、3、4 行
* 第二次匹配:`SKIP TO U` 表示跳转到最后一个 C 或 D,D 永远不可能匹配成功,所以就是跳转到最后一个 C(也就是第 4 行),成功匹配第4、5、6行
* 此模式允许出现重叠匹配
@@ -438,20 +385,80 @@ MATCH_RECOGNIZE (
Total line number = 7
```
- * 当 AFTER MATCH SKIP TO A 时,不能跳转到匹配的第一行, 否则会造成死循环
+ * 当 AFTER MATCH SKIP TO A 时,报错。因为不能跳转到匹配的第一行, 否则会造成死循环。
```SQL
Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: cannot skip to first row of match
```
- * 当 AFTER MATCH SKIP TO B 时,不能跳转到匹配分组中不存在的模式变量
+ * 当 AFTER MATCH SKIP TO B 时,报错。因为不能跳转到匹配分组中不存在的模式变量。
```SQL
Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: pattern variable is not present in match
```
-### 3.3 行模式表达式-扩展函数
-#### 3.3.1 CLASSIFIER()函数
+
+#### 1.2.6 ROWS PER MATCH 子句
+
+用于指定模式匹配成功后结果集的输出方式,主要包括以下两种选项:
+
+| 输出方式 | 规则描述 | 输出结果 | **空匹配/未匹配行**处理逻辑 |
+| -------------------- | ----------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| ONE ROW PER MATCH | 每一次成功匹配,产生一行输出结果。 | * PARTITION BY 子句中的列* MEASURES 子句中定义的表达式。 | 输出空匹配;跳过未匹配行。 |
+| ALL ROWS PER MATCH | 每一次匹配中的每一行都将产生一条输出记录,除非该行通过 exclusion 语法排除。 | * PARTITION BY 子句中的列* ORDER BY 子句中的列* MEASURES 子句中定义的表达式* 输入表中的其余列 | * 默认:输出空匹配;跳过未匹配行。* ALL ROWS PER MATCH**SHOW EMPTY MATCHES**:默认输出空匹配,跳过未匹配行* ALL ROWS PER MATCH**OMIT EMPTY MATCHES**:不输出空匹配,跳过未匹配行* ALL ROWS PER MATCH**WITH UNMATCHED ROWS**:输出空匹配,并为每一条未匹配行额外生成一条输出记录|
+
+#### 1.2.7 MEASURES 子句
+
+用于指定从匹配到的一段数据中提取哪些信息。该子句为可选项,如果未显式指定,则根据 ROWS PER MATCH 子句的设置,部分输入列会成为模式识别的输出结果。
+
+```SQL
+MEASURES measure_expression AS measure_name [, ...]
+```
+
+* `measure_expression` 是根据匹配的一段数据计算出的标量值。
+
+| 用法示例 | 说明 |
+| ---------------------------------------------- | -------------------------------------------------------------------------------------------------------------- |
+| `A.totalprice AS starting_price` | 返回匹配分组中第一行(即与变量 A 关联的唯一一行)中的价格,作为起始价格。 |
+| `RPR_LAST(B.totalprice) AS bottom_price` | 返回与变量 B 关联的最后一行中的价格,代表“V”形模式中最低点的价格,对应下降区段的末尾。 |
+| `RPR_LAST(U.totalprice) AS top_price` | 返回匹配分组中的最高价格,对应变量 C 或 D 所关联的最后一行,即整个匹配分组的末尾。【假设 SUBSET U = (C, D)】 |
+
+* 每个 `measure_expression `都会定义一个输出列,该列可通过其指定的 `measure_name `进行引用。
+
+#### 1.2.8 模式查询表达式
+
+在 MEASURES 与 DEFINE 子句中使用的表达式为**标量表达式**,用于在输入表的行级上下文中求值。**标量表达式**除了支持标准 SQL 语法外,还支持针对模式查询的特殊扩展函数。
+
+##### 1.2.8.1 模式变量引用
+
+```SQL
+A.totalprice
+U.orderdate
+orderstatus
+```
+
+* 当列名前缀为某**基本模式变量**或**联合模式变量**时,表示引用该变量所匹配的所有行的对应列值。
+* 若列名不带前缀,则等同于使用“**全局联合模式变量**”(即所有基本模式变量的并集)作前缀,表示引用当前匹配中所有行的该列值。
+
+> 不允许在模式识别表达式中使用表名作列名前缀。
+
+##### 1.2.8.2 扩展函数
+
+| 函数名 | 函数式 | 描述 |
+|------------------| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `MATCH_NUMBER`函数 | `MATCH_NUMBER()` | 返回当前匹配在分区内的序号,从 1 开始计数。空匹配与非空匹配一致,也占用匹配序号。 |
+| `CLASSIFIER `函数 | `CLASSIFIER(option)`| 1. 返回当前行所映射的基本模式变量名称。1. `option`是一个可选参数:可以传入基本模式变量`CLASSIFIER(A)`或联合模式变量`CLASSIFIER(U)`,用于限定函数作用范围,对于不在范围内的行,直接返回 NULL。在对联合模式变量使用时,可用于辨别该行究竟映射至并集中哪一个基本模式变量。 |
+| 逻辑导航函数 | `RPR_FIRST(expr, k)` | 1. 表示从**当前匹配分组**中,定位至第一个满足 expr 的行,在此基础上再向分组尾部方向搜索到第 k 次出现的同一模式变量对应行,返回该行的指定列值。如果在指定方向上未能找到第 k 次匹配行,则函数返回 NULL。1. 其中 k 是可选参数,默认为 0,表示仅定位至首个满足条件的行;若显式指定,必须为非负整数。 |
+| 逻辑导航函数 | `RPR_LAST(expr, k)`| 1. 表示从**当前匹配分组**中,定位至最后一个满足 expr 的行,在此基础上再向分组开头方向搜索到第 k 次出现的同一模式变量对应行,返回该行的指定列值。如果在指定方向上未能找到第 k 次匹配行,则函数返回 NULL。1. 其中 k 是可选参数,默认为 0,表示仅定位至末个满足条件的行;若显式指定,必须为非负整数。 |
+| 物理导航函数 | `PREV(expr, k)` | 1. 表示从最后一次匹配至给定模式变量的行开始,向开头方向偏移 k 行,返回对应列值。若导航超出**分区边界**,则函数返回 NULL。1. 其中 k 是可选参数,默认为 1;若显式指定,必须为非负整数。 |
+| 物理导航函数 |`NEXT(expr, k)` | 1. 表示从最后一次匹配至给定模式变量的行开始,向尾部方向偏移 k 行,返回对应列值。若导航超出**分区边界**,则函数返回 NULL。1. 其中 k 是可选参数,默认为 1;若显式指定,必须为非负整数。 |
+| 聚合函数 | COUNT、SUM、AVG、MAX、MIN 函数 | 可用于对当前匹配中的数据进行计算。聚合函数与导航函数不允许互相嵌套。(V 2.0.6 版本起支持) |
+| 嵌套函数 | `PREV/NEXT(CLASSIFIER())` | 物理导航函数与 CLASSIFIER 函数嵌套。用于获取当前行的前一个和后一个匹配行所对应的模式变量 |
+| 嵌套函数 |`PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | 物理函数内部**允许嵌套**逻辑函数,逻辑函数内部**不允许嵌套**物理函数。用于先进行逻辑偏移,再进行物理偏移。 |
+
+**示例说明**
+
+1. CLASSIFIER 函数
* 查询 sql
@@ -476,6 +483,9 @@ MATCH_RECOGNIZE (
H AS H.totalprice > 80
) AS m;
```
+* 分析过程
+
+ 
* 查询结果
@@ -493,7 +503,7 @@ MATCH_RECOGNIZE (
Total line number = 6
```
-#### 3.3.2 逻辑导航函数
+2. 逻辑导航函数
* 查询 sql
@@ -512,6 +522,10 @@ MATCH_RECOGNIZE (
* 查询结果
* 当取值为 totalprice、RPR\_LAST(totalprice)、RUNNING RPR\_LAST(totalprice) 时
+
+ 
+
+ 实际返回
```SQL
+-----------------------------+-------+
@@ -529,6 +543,10 @@ MATCH_RECOGNIZE (
* 当取值为 FINAL RPR\_LAST(totalprice) 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -545,6 +563,10 @@ MATCH_RECOGNIZE (
* 当取值为 RPR\_FIRST(totalprice)、 RUNNING RPR\_FIRST(totalprice)、FINAL RPR\_FIRST(totalprice)时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -561,6 +583,10 @@ MATCH_RECOGNIZE (
* 当取值为 RPR\_LAST(totalprice, 2) 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -577,6 +603,10 @@ MATCH_RECOGNIZE (
* 当取值为 FINAL RPP\_LAST(totalprice, 2) 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -593,6 +623,10 @@ MATCH_RECOGNIZE (
* 当取值为 RPR\_FIRST(totalprice, 2) 和 FINAL RPR\_FIRST(totalprice, 2) 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -607,7 +641,7 @@ MATCH_RECOGNIZE (
Total line number = 6
```
-#### 3.3.3 物理导航函数
+3. 物理导航函数
* 查询 sql
@@ -626,6 +660,10 @@ MATCH_RECOGNIZE (
* 查询结果
* 当取值为 `PREV(totalprice)` 时
+
+ 
+
+ 实际返回
```SQL
+-----------------------------+-------+
@@ -639,6 +677,10 @@ MATCH_RECOGNIZE (
* 当取值为 `PREV(B.totalprice, 2)` 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -651,6 +693,10 @@ MATCH_RECOGNIZE (
* 当取值为 `PREV(B.totalprice, 4)` 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -663,6 +709,10 @@ MATCH_RECOGNIZE (
* 当取值为 `NEXT(totalprice)` 或 `NEXT(B.totalprice, 1)` 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -675,6 +725,10 @@ MATCH_RECOGNIZE (
* `当取值为 NEXT(B.totalprice, 2)` 时
+ 
+
+ 实际返回
+
```SQL
+-----------------------------+-------+
| time|measure|
@@ -685,7 +739,7 @@ MATCH_RECOGNIZE (
Total line number = 2
```
-#### 3.3.4 聚合函数
+4. 聚合函数
* 查询 sql
@@ -705,6 +759,9 @@ MATCH_RECOGNIZE (
DEFINE A AS true
) AS m;
```
+* 分析过程(以 MIN(totalprice)为例)
+
+
* 查询结果
@@ -722,8 +779,9 @@ MATCH_RECOGNIZE (
Total line number = 6
```
-#### 3.3.5 嵌套函数
-1. 示例一
+5. 嵌套函数
+
+示例一
* 查询 sql
@@ -750,6 +808,9 @@ MATCH_RECOGNIZE (
H AS H.totalprice > 80
) AS m;
```
+* 分析过程
+
+
* 查询结果
@@ -767,7 +828,7 @@ MATCH_RECOGNIZE (
Total line number = 6
```
-2. 示例二
+示例二
* 查询 sql
@@ -784,6 +845,9 @@ MATCH_RECOGNIZE (
DEFINE A AS true
) AS m;
```
+* 分析过程
+
+
* 查询结果
@@ -801,11 +865,28 @@ MATCH_RECOGNIZE (
Total line number = 6
```
-## 4. 场景示例
+##### 1.2.8.3 RUNNING 和 FINAL 语义
+1. 定义
+
+* `RUNNING`: 表示计算范围为当前匹配分组内,从分组的起始行到当前正在处理的行(即到当前行为止)。
+* `FINAL`: 表示计算范围为当前匹配分组内,从分组的起始行到分组的最终行(即整个匹配分组)。
+
+2. 作用范围
+
+* DEFINE 子句默认采用 RUNNING 语义。
+* MEASURES 子句默认采用 RUNNING 语义,支持指定 FINAL 语义。当采用 ONE ROW PER MATCH 输出模式时,所有表达式都从匹配分组的末行位置进行计算,此时 RUNNING 语义与 FINAL 语义等价。
+
+3. 语法约束
+
+* RUNNING 和 FINAL 需要写在**逻辑导航函数**或聚合函数之前,不能直接作用于**列引用。**
+ * 合法:`RUNNING RPP_LAST(A.totalprice)`、`FINAL RPP_LAST(A.totalprice)`
+ * 非法:`RUNNING A.totalprice`、`FINAL A.totalprice`、 `RUNNING PREV(A.totalprice)`
+
+### 1.3 场景示例
以[示例数据](../Reference/Sample-Data.md)为源数据
-### 4.1 时间分段查询
+#### 1.3.1 时间分段查询
将 table1 中的数据按照时间间隔小于等于 24 小时分段,查询每段中的数据总条数,以及开始、结束时间。
@@ -837,7 +918,7 @@ MATCH_RECOGNIZE (
Total line number = 2
```
-### 4.2 差值分段查询
+#### 1.3.2 差值分段查询
将 table2 中的数据按照 humidity 湿度值差值小于 0.1 分段,查询每段中的数据总条数,以及开始、结束时间。
@@ -870,7 +951,7 @@ MATCH_RECOGNIZE (
Total line number = 3
```
-### 4.3 事件统计查询
+#### 1.3.3 事件统计查询
将 table1 中数据按照设备号分组,统计上海地区湿度大于 35 的开始、结束时间及最大湿度值。
@@ -903,5 +984,738 @@ MATCH_RECOGNIZE (
| 100| 1|2024-11-28T09:00:00.000+08:00|2024-11-29T18:30:00.000+08:00| 45.1|
| 101| 1|2024-11-30T09:30:00.000+08:00|2024-11-30T09:30:00.000+08:00| 35.2|
+---------+-----+-----------------------------+-----------------------------+------------+
-Total line number = 2****
+Total line number = 2
+```
+
+
+## 2. 窗口函数
+
+### 2.1 功能介绍
+
+窗口函数(Window Function) 是一种基于与当前行相关的特定行集合(称为“窗口”) 对每一行进行计算的特殊函数。它将分组操作(`PARTITION BY`)、排序(`ORDER BY`)与可定义的计算范围(窗口框架 `FRAME`)结合,在不折叠原始数据行的前提下实现复杂的跨行计算。常用于数据分析场景,比如排名、累计和、移动平均等操作。
+
+> 注意:该功能从 V 2.0.5 版本开始提供。
+
+例如,某场景下需要查询不同设备的功耗累加值,即可通过窗口函数来实现。
+
+```SQL
+-- 原始数据
++-----------------------------+------+-----+
+| time|device| flow|
++-----------------------------+------+-----+
+|1970-01-01T08:00:00.000+08:00| d0| 3|
+|1970-01-01T08:00:00.001+08:00| d0| 5|
+|1970-01-01T08:00:00.002+08:00| d0| 3|
+|1970-01-01T08:00:00.003+08:00| d0| 1|
+|1970-01-01T08:00:00.004+08:00| d1| 2|
+|1970-01-01T08:00:00.005+08:00| d1| 4|
++-----------------------------+------+-----+
+
+-- 创建表并插入数据
+CREATE TABLE device_flow(device String tag, flow INT32 FIELD);
+insert into device_flow(time, device ,flow ) values ('1970-01-01T08:00:00.000+08:00','d0',3),('1970-01-01T08:00:01.000+08:00','d0',5),('1970-01-01T08:00:02.000+08:00','d0',3),('1970-01-01T08:00:03.000+08:00','d0',1),('1970-01-01T08:00:04.000+08:00','d1',2),('1970-01-01T08:00:05.000+08:00','d1',4);
+
+
+--执行窗口函数查询
+SELECT *, sum(flow) OVER(PARTITION BY device ORDER BY flow) as sum FROM device_flow;
+```
+
+经过分组、排序、计算(步骤拆解如下图所示),
+
+
+
+即可得到期望结果:
+
+```SQL
++-----------------------------+------+----+----+
+| time|device|flow| sum|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
++-----------------------------+------+----+----+
+```
+
+### 2.2 功能定义
+#### 2.2.1 SQL 定义
+
+```SQL
+windowDefinition
+ : name=identifier AS '(' windowSpecification ')'
+ ;
+
+windowSpecification
+ : (existingWindowName=identifier)?
+ (PARTITION BY partition+=expression (',' partition+=expression)*)?
+ (ORDER BY sortItem (',' sortItem)*)?
+ windowFrame?
+ ;
+
+windowFrame
+ : frameExtent
+ ;
+
+frameExtent
+ : frameType=RANGE start=frameBound
+ | frameType=ROWS start=frameBound
+ | frameType=GROUPS start=frameBound
+ | frameType=RANGE BETWEEN start=frameBound AND end=frameBound
+ | frameType=ROWS BETWEEN start=frameBound AND end=frameBound
+ | frameType=GROUPS BETWEEN start=frameBound AND end=frameBound
+ ;
+
+frameBound
+ : UNBOUNDED boundType=PRECEDING #unboundedFrame
+ | UNBOUNDED boundType=FOLLOWING #unboundedFrame
+ | CURRENT ROW #currentRowBound
+ | expression boundType=(PRECEDING | FOLLOWING) #boundedFrame
+ ;
+```
+
+#### 2.2.2 窗口定义
+##### 2.2.2.1 Partition
+
+`PARTITION BY` 用于将数据分为多个独立、不相关的「组」,窗口函数只能访问并操作其所属分组内的数据,无法访问其它分组。该子句是可选的;如果未显式指定,则默认将所有数据分到同一组。值得注意的是,与 `GROUP BY` 通过聚合函数将一组数据规约成一行不同,`PARTITION BY` 的窗口函数**并不会影响组内的行数。**
+
+* 示例
+
+查询语句:
+
+```SQL
+IoTDB> SELECT *, count(flow) OVER (PARTITION BY device) as count FROM device_flow;
+```
+
+拆解步骤:
+
+
+
+查询结果:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 4|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 4|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 4|
++-----------------------------+------+----+-----+
+```
+
+##### 2.2.2.2 Ordering
+
+`ORDER BY` 用于对 partition 内的数据进行排序。排序后,相等的行被称为 peers。peers 会影响窗口函数的行为,例如不同 rank function 对 peers 的处理不同;不同 frame 的划分方式对于 peers 的处理也不同。该子句是可选的。
+
+* 示例
+
+查询语句:
+
+```SQL
+IoTDB> SELECT *, rank() OVER (PARTITION BY device ORDER BY flow) as rank FROM device_flow;
+```
+
+拆解步骤:
+
+
+
+查询结果:
+
+```SQL
++-----------------------------+------+----+----+
+| time|device|flow|rank|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
++-----------------------------+------+----+----+
+```
+
+##### 2.2.2.3 Framing
+
+对于 partition 中的每一行,窗口函数都会在相应的一组行上求值,这些行称为 Frame(即 Window Function 在每一行上的输入域)。Frame 可以手动指定,指定时涉及两个属性,具体说明如下。
+
+
+
+
+ | Frame 属性 |
+ 属性值 |
+ 值描述 |
+
+
+ | 类型 |
+ ROWS |
+ 通过行号来划分 frame |
+
+
+ | GROUPS |
+ 通过 peers 来划分 frame,即值相同的行视为同等的存在。peers 中所有的行分为一个组,叫做 peer group |
+
+
+ | RANGE |
+ 通过值来划分 frame |
+
+
+ | 起始和终止位置 |
+ UNBOUNDED PRECEDING |
+ 整个 partition 的第一行 |
+
+
+ | offset PRECEDING |
+ 代表前面和当前行「距离」为 offset 的行 |
+
+
+ | CURRENT ROW |
+ 当前行 |
+
+
+ | offset FOLLOWING |
+ 代表后面和当前行「距离」为 offset 的行 |
+
+
+ | UNBOUNDED FOLLOWING |
+ 整个 partition 的最后一行 |
+
+
+
+
+其中,`CURRENT ROW`、`PRECEDING N` 和 `FOLLOWING N` 的含义随着 frame 种类的不同而不同,如下表所示:
+
+| | `ROWS` | `GROUPS` | `RANGE` |
+|--------------------|------------|------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|
+| `CURRENT ROW` | 当前行 | 由于 peer group 包含多行,因此这个选项根据作用于 frame\_start 和 frame\_end 而不同:* frame\_start:peer group 的第一行;* frame\_end:peer group 的最后一行。 | 和 GROUPS 相同,根据作用于 frame\_start 和 frame\_end 而不同:* frame\_start:peer group 的第一行;* frame\_end:peer group 的最后一行。 |
+| `offset PRECEDING` | 前 offset 行 | 前 offset 个 peer group; | 前面与当前行的值之差小于等于 offset 就分为一个 frame |
+| `offset FOLLOWING` | 后 offset 行 | 后 offset 个 peer group。 | 后面与当前行的值之差小于等于 offset 就分为一个 frame |
+
+语法格式如下:
+
+```SQL
+-- 同时指定 frame_start 和 frame_end
+{ RANGE | ROWS | GROUPS } BETWEEN frame_start AND frame_end
+-- 仅指定 frame_start,frame_end 为 CURRENT ROW
+{ RANGE | ROWS | GROUPS } frame_start
+```
+
+若未手动指定 Frame,Frame 的默认划分规则如下:
+
+* 当窗口函数使用 ORDER BY 时:默认 Frame 为 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW (即从窗口的第一行到当前行)。例如:RANK() OVER(PARTITION BY COL1 0RDER BY COL2) 中,Frame 默认包含分区内当前行及之前的所有行。
+* 当窗口函数不使用 ORDER BY 时:默认 Frame 为 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING (即整个窗口的所有行)。例如:AVG(COL2) OVER(PARTITION BY col1) 中,Frame 默认包含分区内的所有行,计算整个分区的平均值。
+
+需要注意的是,当 Frame 类型为 GROUPS 或 RANGE 时,需要指定 `ORDER BY`,区别在于 GROUPS 中的 ORDER BY 可以涉及多个字段,而 RANGE 需要计算,所以只能指定一个字段。
+
+* 示例
+
+1. Frame 类型为 ROWS
+
+查询语句:
+
+```SQL
+IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ROWS 1 PRECEDING) as count FROM device_flow;
+```
+
+拆解步骤:
+
+* 取前一行和当前行作为 Frame
+ * 对于 partition 的第一行,由于没有前一行,所以整个 Frame 只有它一行,返回 1;
+ * 对于 partition 的其他行,整个 Frame 包含当前行和它的前一行,返回 2:
+
+
+
+查询结果:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 2|
++-----------------------------+------+----+-----+
+```
+
+2. Frame 类型为 GROUPS
+
+查询语句:
+
+```SQL
+IoTDB> SELECT *, count(flow) OVER(PARTITION BY device ORDER BY flow GROUPS BETWEEN 1 PRECEDING AND CURRENT ROW) as count FROM device_flow;
+```
+
+拆解步骤:
+
+* 取前一个 peer group 和当前 peer group 作为 Frame,那么以 device 为 d0 的 partition 为例(d1同理),对于 count 行数:
+ * 对于 flow 为 1 的 peer group,由于它也没比它小的 peer group 了,所以整个 Frame 就它一行,返回 1;
+ * 对于 flow 为 3 的 peer group,它本身包含 2 行,前一个 peer group 就是 flow 为 1 的,就一行,因此整个 Frame 三行,返回 3;
+ * 对于 flow 为 5 的 peer group,它本身包含 1 行,前一个 peer group 就是 flow 为 3 的,共两行,因此整个 Frame 三行,返回 3。
+
+
+
+查询结果:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+-----+
+```
+
+3. Frame 类型为 RANGE
+
+查询语句:
+
+```SQL
+IoTDB> SELECT *,count(flow) OVER(PARTITION BY device ORDER BY flow RANGE BETWEEN 2 PRECEDING AND CURRENT ROW) as count FROM device_flow;
+```
+
+拆解步骤:
+
+* 把比当前行数据**小于等于 2 **的分为同一个 Frame,那么以 device 为 d0 的 partition 为例(d1 同理),对于 count 行数:
+ * 对于 flow 为 1 的行,由于它是最小的行了,所以整个 Frame 就它一行,返回 1;
+ * 对于 flow 为 3 的行,注意 CURRENT ROW 是作为 frame\_end 存在,因此是整个 peer group 的最后一行,符合要求比它小的共 1 行,然后 peer group 有 2 行,所以整个 Frame 共 3 行,返回 3;
+ * 对于 flow 为 5 的行,它本身包含 1 行,符合要求的比它小的共 2 行,所以整个 Frame 共 3 行,返回 3。
+
+
+
+查询结果:
+
+```SQL
++-----------------------------+------+----+-----+
+| time|device|flow|count|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+-----+
+```
+
+### 2.3 内置的窗口函数
+
+
+
+
+ | 窗口函数分类 |
+ 窗口函数名 |
+ 函数定义 |
+ 是否支持 FRAME 子句 |
+
+
+ | Aggregate Function |
+ 所有内置聚合函数 |
+ 对一组值进行聚合计算,得到单个聚合结果。 |
+ 是 |
+
+
+ | Value Function |
+ first_value |
+ 返回 frame 的第一个值,如果指定了 IGNORE NULLS 需要跳过前缀的 NULL |
+ 是 |
+
+
+ | last_value |
+ 返回 frame 的最后一个值,如果指定了 IGNORE NULLS 需要跳过后缀的 NULL |
+ 是 |
+
+
+ | nth_value |
+ 返回 frame 的第 n 个元素(注意 n 是从 1 开始),如果有 IGNORE NULLS 需要跳过 NULL |
+ 是 |
+
+
+ | lead |
+ 返回当前行的后 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default |
+ 否 |
+
+
+ | lag |
+ 返回当前行的前 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default |
+ 否 |
+
+
+ | Rank Function |
+ rank |
+ 返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间可能有 gap |
+ 否 |
+
+
+ | dense_rank |
+ 返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间没有 gap |
+ 否 |
+
+
+ | row_number |
+ 返回当前行在整个 partition 中的行号,注意行号从 1 开始 |
+ 否 |
+
+
+ | percent_rank |
+ 以百分比的形式,返回当前行的值在整个 partition 中的序号;即 (rank() - 1) / (n - 1),其中 n 是整个 partition 的行数 |
+ 否 |
+
+
+ | cume_dist |
+ 以百分比的形式,返回当前行的值在整个 partition 中的序号;即 (小于等于它的行数) / n |
+ 否 |
+
+
+ | ntile |
+ 指定 n,给每一行进行 1~n 的编号。 |
+ 否 |
+
+
+
+
+#### 2.3.1 Aggregate Function
+
+所有内置聚合函数,如 `sum()`、`avg()`、`min()`、`max()` 都能当作 Window Function 使用。
+
+> 注意:与 GROUP BY 不同,Window Function 中每一行都有相应的输出
+
+示例:
+
+```SQL
+IoTDB> SELECT *, sum(flow) OVER (PARTITION BY device ORDER BY flow) as sum FROM device_flow;
++-----------------------------+------+----+----+
+| time|device|flow| sum|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 6.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 7.0|
+|1970-01-01T08:00:01.000+08:00| d0| 5|12.0|
++-----------------------------+------+----+----+
+```
+
+#### 2.3.2 Value Function
+1. `first_value`
+
+* 函数名:`first_value(value) [IGNORE NULLS]`
+* 定义:返回 frame 的第一个值,如果指定了 IGNORE NULLS 需要跳过前缀的 NULL;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, first_value(flow) OVER w as first_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
++-----------------------------+------+----+-----------+
+| time|device|flow|first_value|
++-----------------------------+------+----+-----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 2|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+-----------+
+```
+
+2. `last_value`
+
+* 函数名:`last_value(value) [IGNORE NULLS]`
+* 定义:返回 frame 的最后一个值,如果指定了 IGNORE NULLS 需要跳过后缀的 NULL;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, last_value(flow) OVER w as last_value FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
++-----------------------------+------+----+----------+
+| time|device|flow|last_value|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
++-----------------------------+------+----+----------+
+```
+
+3. `nth_value`
+
+* 函数名:`nth_value(value, n) [IGNORE NULLS]`
+* 定义:返回 frame 的第 n 个元素(注意 n 是从 1 开始),如果有 IGNORE NULLS 需要跳过 NULL;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, nth_value(flow, 2) OVER w as nth_values FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
++-----------------------------+------+----+----------+
+| time|device|flow|nth_values|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 4|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 5|
++-----------------------------+------+----+----------+
+```
+
+4. lead
+
+* 函数名:`lead(value[, offset[, default]]) [IGNORE NULLS]`
+* 定义:返回当前行的后 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default;offset 的默认值为 1,default 的默认值为 NULL。
+* lead 函数需要需要一个 ORDER BY 窗口子句
+* 示例:
+
+```SQL
+IoTDB> SELECT *, lead(flow) OVER w as lead FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY time);
++-----------------------------+------+----+----+
+| time|device|flow|lead|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 4|
+|1970-01-01T08:00:05.000+08:00| d1| 4|null|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 5|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:03.000+08:00| d0| 1|null|
++-----------------------------+------+----+----+
+```
+
+5. lag
+
+* 函数名:`lag(value[, offset[, default]]) [IGNORE NULLS]`
+* 定义:返回当前行的前 offset 个元素(如果有 IGNORE NULLS 则 NULL 不考虑在内),如果没有这样的元素(超过 partition 范围),则返回 default;offset 的默认值为 1,default 的默认值为 NULL。
+* lag 函数需要需要一个 ORDER BY 窗口子句
+* 示例:
+
+```SQL
+IoTDB> SELECT *, lag(flow) OVER w as lag FROM device_flow WINDOW w AS(PARTITION BY device ORDER BY device);
++-----------------------------+------+----+----+
+| time|device|flow| lag|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2|null|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:00.000+08:00| d0| 3|null|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 5|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 3|
++-----------------------------+------+----+----+
+```
+
+#### 2.3.3 Rank Function
+1. rank
+
+* 函数名:`rank()`
+* 定义:返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间可能有 gap;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, rank() OVER w as rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+----+
+| time|device|flow|rank|
++-----------------------------+------+----+----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
++-----------------------------+------+----+----+
+```
+
+2. dense\_rank
+
+* 函数名:`dense_rank()`
+* 定义:返回当前行在整个 partition 中的序号,值相同的行序号相同,序号之间没有 gap。
+* 示例:
+
+```SQL
+IoTDB> SELECT *, dense_rank() OVER w as dense_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+----------+
+| time|device|flow|dense_rank|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 3|
++-----------------------------+------+----+----------+
+```
+
+3. row\_number
+
+* 函数名:`row_number()`
+* 定义:返回当前行在整个 partition 中的行号,注意行号从 1 开始;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, row_number() OVER w as row_number FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+----------+
+| time|device|flow|row_number|
++-----------------------------+------+----+----------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 3|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 4|
++-----------------------------+------+----+----------+
+```
+
+4. percent\_rank
+
+* 函数名:`percent_rank()`
+* 定义:以百分比的形式,返回当前行的值在整个 partition 中的序号;即 **(rank() - 1) / (n - 1)**,其中 n 是整个 partition 的行数;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, percent_rank() OVER w as percent_rank FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+------------------+
+| time|device|flow| percent_rank|
++-----------------------------+------+----+------------------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 0.0|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 0.0|
+|1970-01-01T08:00:00.000+08:00| d0| 3|0.3333333333333333|
+|1970-01-01T08:00:02.000+08:00| d0| 3|0.3333333333333333|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
++-----------------------------+------+----+------------------+
+```
+
+5. cume\_dist
+
+* 函数名:cume\_dist
+* 定义:以百分比的形式,返回当前行的值在整个 partition 中的序号;即 **(小于等于它的行数) / n**。
+* 示例:
+
+```SQL
+IoTDB> SELECT *, cume_dist() OVER w as cume_dist FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+---------+
+| time|device|flow|cume_dist|
++-----------------------------+------+----+---------+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 0.5|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 1.0|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 0.25|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 0.75|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 0.75|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 1.0|
++-----------------------------+------+----+---------+
+```
+
+6. ntile
+
+* 函数名:ntile
+* 定义:指定 n,给每一行进行 1~n 的编号。
+ * 整个 partition 行数比 n 小,那么编号就是行号 index;
+ * 整个 partition 行数比 n 大:
+ * 如果行数能除尽 n,那么比较完美,比如行数为 4,n 为 2,那么编号为 1、1、2、2、;
+ * 如果行数不能除尽 n,那么就分给开头几组,比如行数为 5,n 为 3,那么编号为 1、1、2、2、3;
+* 示例:
+
+```SQL
+IoTDB> SELECT *, ntile(2) OVER w as ntile FROM device_flow WINDOW w AS (PARTITION BY device ORDER BY flow);
++-----------------------------+------+----+-----+
+| time|device|flow|ntile|
++-----------------------------+------+----+-----+
+|1970-01-01T08:00:04.000+08:00| d1| 2| 1|
+|1970-01-01T08:00:05.000+08:00| d1| 4| 2|
+|1970-01-01T08:00:03.000+08:00| d0| 1| 1|
+|1970-01-01T08:00:00.000+08:00| d0| 3| 1|
+|1970-01-01T08:00:02.000+08:00| d0| 3| 2|
+|1970-01-01T08:00:01.000+08:00| d0| 5| 2|
++-----------------------------+------+----+-----+
+```
+
+### 2.4 场景示例
+1. 多设备 diff 函数
+
+对于每个设备的每一行,与前一行求差值:
+
+```SQL
+SELECT
+ *,
+ measurement - lag(measurement) OVER (PARTITION BY device ORDER BY time)
+FROM data
+WHERE timeCondition;
+```
+
+对于每个设备的每一行,与后一行求差值:
+
+```SQL
+SELECT
+ *,
+ measurement - lead(measurement) OVER (PARTITION BY device ORDER BY time)
+FROM data
+WHERE timeCondition;
+```
+
+对于单个设备的每一行,与前一行求差值(后一行同理):
+
+```SQL
+SELECT
+ *,
+ measurement - lag(measurement) OVER (ORDER BY time)
+FROM data
+where device='d1'
+WHERE timeCondition;
+```
+
+2. 多设备 TOP\_K/BOTTOM\_K
+
+利用 rank 获取序号,然后在外部的查询中保留想要的顺序。
+
+(注意, window function 的执行顺序在 HAVING 子句之后,所以这里需要子查询)
+
+```SQL
+SELECT *
+FROM(
+ SELECT
+ *,
+ rank() OVER (PARTITION BY device ORDER BY time DESC)
+ FROM data
+ WHERE timeCondition
+)
+WHERE rank <= 3;
+```
+
+除了按照时间排序之外,还可以按照测点的值进行排序:
+
+```SQL
+SELECT *
+FROM(
+ SELECT
+ *,
+ rank() OVER (PARTITION BY device ORDER BY measurement DESC)
+ FROM data
+ WHERE timeCondition
+)
+WHERE rank <= 3;
+```
+
+3. 多设备 CHANGE\_POINTS
+
+这个 sql 用来去除输入序列中连续相同值,可以用 lead + 子查询实现:
+
+```SQL
+SELECT
+ time,
+ device,
+ measurement
+FROM(
+ SELECT
+ time,
+ device,
+ measurement,
+ LEAD(measurement) OVER (PARTITION BY device ORDER BY time) AS next
+ FROM data
+ WHERE timeCondition
+)
+WHERE measurement != next OR next IS NULL;
```