Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,13 @@ APP_GIT_SHORT_SHA=localdev
# Monitoring
POSTHOG_API_KEY=""
POSTHOG_HOST="https://us.i.posthog.com"

# Qdrant
QDRANT_URL=http://qdrant:6333
QDRANT_COLLECTION=courses_bge_m3

# Embedding
EMBEDDING_MODEL=Xenova/bge-m3
EMBEDDING_DTYPE=q4
EMBEDDING_BATCH_SIZE=50
TRANSFORMERS_CACHE_DIR=/app/.cache/transformers
2 changes: 1 addition & 1 deletion .nvmrc
Original file line number Diff line number Diff line change
@@ -1 +1 @@
22
22.22.3
77 changes: 58 additions & 19 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,52 +1,91 @@
# syntax=docker/dockerfile:1

# Base dependencies
FROM node:22.22.3-alpine3.22 AS base
FROM node:22.22.3-bullseye-slim AS deps

WORKDIR /app
RUN apk add --no-cache openssl

RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
make \
g++ \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*

COPY package.json yarn.lock ./
RUN yarn install --frozen-lockfile --ignore-scripts

# Build
FROM base AS build
FROM node:22.22.3-bullseye-slim AS prod-deps

WORKDIR /app

RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
make \
g++ \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*

COPY package.json yarn.lock ./
RUN yarn install --frozen-lockfile --ignore-scripts --production

FROM node:22.22.3-bullseye-slim AS build

WORKDIR /app
COPY --from=deps /app/node_modules ./node_modules
COPY . ./

ENV NODE_ENV=production

RUN yarn prisma:generate
RUN yarn build

# Development
FROM base AS dev
FROM node:22.22.3-bullseye-slim AS dev

WORKDIR /app
COPY --from=deps /app/node_modules ./node_modules
COPY package.json yarn.lock ./
COPY prisma ./prisma

RUN apt-get update && apt-get install -y --no-install-recommends \
openssl \
ca-certificates \
tzdata \
&& rm -rf /var/lib/apt/lists/*

RUN yarn prisma:generate

ENV NODE_ENV=development
ENV APP_ENV=development
ENV TZ=America/Toronto

EXPOSE 3001
CMD ["sh", "-c", "yarn prisma:generate && yarn start:dev"]

# Production
FROM node:22.22.3-alpine3.22 AS production
FROM node:22.22.3-bullseye-slim AS production

ARG APP_GIT_SHORT_SHA
ENV APP_GIT_SHORT_SHA=${APP_GIT_SHORT_SHA}
ENV NODE_ENV=production
ENV APP_ENV=production
ENV TZ=America/Toronto

WORKDIR /app
RUN apk add --no-cache tzdata openssl && \
cp /usr/share/zoneinfo/${TZ} /etc/localtime && \
echo "${TZ}" > /etc/timezone
COPY package.json yarn.lock ./
COPY prisma ./prisma
RUN yarn install --production --frozen-lockfile --ignore-scripts

COPY package.json ./
COPY --from=build /app/dist ./dist
COPY --from=prod-deps /app/node_modules ./node_modules
COPY --from=build /app/node_modules/.prisma ./node_modules/.prisma
COPY prisma ./prisma

# Generate Prisma Client
RUN yarn prisma:generate
RUN apt-get update && apt-get install -y --no-install-recommends \
openssl \
ca-certificates \
tzdata \
&& rm -rf /var/lib/apt/lists/* \
&& groupadd --gid 10001 appgroup \
&& useradd --uid 10001 --gid 10001 --no-create-home appuser \
&& chown -R appuser:appgroup /app

EXPOSE 3001
USER 10001:10001

EXPOSE 3001
CMD ["yarn", "start:prod"]
32 changes: 30 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ services:
context: .
target: production
ports:
- "${HOST_PORT:-3501}:${PORT:-3001}"
- "${PORT:-3501}:${PORT:-3001}"
environment:
DATABASE_URL: ${DATABASE_URL}
APP_ENV: production
Expand Down Expand Up @@ -35,19 +35,31 @@ services:
context: .
target: dev
ports:
- "${HOST_PORT:-3501}:${PORT:-3001}"
- "${PORT:-3501}:${PORT:-3001}"
volumes:
- .:/app
- /app/node_modules
- transformers-cache:/app/.cache/transformers
environment:
DATABASE_URL: ${DATABASE_URL}
APP_ENV: development
PORT: ${PORT:-3001}
LOG_LEVELS: ${LOG_LEVELS:-log,error,warn}
TZ: America/Toronto
QDRANT_URL: ${QDRANT_URL:-http://qdrant:6333}
QDRANT_COLLECTION: ${QDRANT_COLLECTION:-courses_bge_m3}
EMBEDDING_MODEL: ${EMBEDDING_MODEL:-Xenova/bge-m3}
EMBEDDING_DTYPE: ${EMBEDDING_DTYPE:-q4}
EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE:-50}
TRANSFORMERS_CACHE_DIR: ${TRANSFORMERS_CACHE_DIR:-/app/.cache/transformers}
CHOKIDAR_USEPOLLING: "true"
CHOKIDAR_INTERVAL: "100"
FORCE_POLLING: "true"
depends_on:
db:
condition: service_healthy
qdrant:
condition: service_started
networks:
- app-network
profiles:
Expand All @@ -72,9 +84,25 @@ services:
timeout: 5s
retries: 5

qdrant:
image: qdrant/qdrant:latest
ports:
- "6333:6333"
- "6334:6334"
volumes:
- qdrant-data:/qdrant/storage
networks:
- app-network
profiles:
- dev

volumes:
db-data:
driver: local
qdrant-data:
driver: local
transformers-cache:
driver: local

networks:
app-network:
Expand Down
11 changes: 10 additions & 1 deletion docs/onboarding.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

## Requirements

- Node.js 22
- Node.js 22.22.0
- Yarn
- PostgreSQL 16+
- VS Code
- nvm (recommended for Node.js version management)

## Optional

Expand Down Expand Up @@ -75,6 +76,14 @@ Default body:

## Option B - Local setup

### 0. Set up Node.js with nvm

```bash
nvm install 22.22.0
nvm use 22.22.0
node --version # Verify: should show v22.22.0+
```

### 1. Clone the project

```bash
Expand Down
13 changes: 8 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
"scripts": {
"build": "nest build",
"start": "nest start",
"dev": "nest start --watch",
"start:dev": "nest start --watch",
"start:debug": "nest start --debug --watch",
"dev": "nest start --watch --preserveWatchOutput --no-shell",
"start:dev": "nest start --watch --preserveWatchOutput --no-shell",
"start:debug": "nest start --debug --watch --preserveWatchOutput --no-shell",
"start:prod": "yarn prisma:migrate-prod && yarn prisma:seed && node dist/main",
"docker:prod": "docker-compose -f docker-compose.yml up -d --build --force-recreate --remove-orphans",
"typecheck": "tsc --noEmit",
Expand All @@ -28,20 +28,23 @@
"test:all": "yarn test && yarn test:integration && yarn test:e2e",
"test:e2e": "jest --config ./jest/e2e.config.mjs",
"knip": "knip",
"unused": "knip --use-tsconfig-files"
"unused": "knip --use-tsconfig-files",
"job:index": "nest build && node dist/jobs/jobs-embeddings.js"
},
"prisma": {
"schema": "prisma/schema.prisma",
"seed": "node dist/prisma/seeds/seed.js"
},
"dependencies": {
"@huggingface/transformers": "^4.2.0",
"@nestjs/axios": "^3.1.1",
"@nestjs/common": "^10.4.10",
"@nestjs/core": "^10.4.10",
"@nestjs/platform-express": "^10.4.22",
"@nestjs/schedule": "^4.1.1",
"@nestjs/swagger": "^11.2.6",
"@prisma/client": "^5.22.0",
"@qdrant/js-client-rest": "^1.18.0",
"@types/unzipper": "^0.10.10",
"axios": "^1.13.5",
"cheerio": "^1.2.0",
Expand All @@ -52,6 +55,7 @@
"express": "^4.21.1",
"pdf2json": "^4.0.0",
"posthog-node": "^5.34.2",
"prisma": "^5.22.0",
"reflect-metadata": "^0.2.2",
"rxjs": "^7.8.1",
"unzipper": "^0.12.3"
Expand Down Expand Up @@ -83,7 +87,6 @@
"knip": "^5.86.0",
"prettier": "^3.3.3",
"prettier-eslint": "^16.3.0",
"prisma": "^5.22.0",
"source-map-support": "^0.5.21",
"supertest": "^7.0.0",
"ts-jest": "^29.4.6",
Expand Down
23 changes: 23 additions & 0 deletions src/common/utils/uuid/uuidUtil.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { createHash } from 'node:crypto';

// produces stable reproducible identifier (name, namespace)
export function uuidV5(name: string, namespace: string): string {
const namespaceBytes = Buffer.from(namespace.replaceAll('-', ''), 'hex');
const nameBytes = Buffer.from(name, 'utf8');

const hash = createHash('sha1')
.update(namespaceBytes)
.update(nameBytes)
.digest();

hash[6] = (hash[6] & 0x0f) | 0x50;
hash[8] = (hash[8] & 0x3f) | 0x80;

return [
hash.subarray(0, 4).toString('hex'),
hash.subarray(4, 6).toString('hex'),
hash.subarray(6, 8).toString('hex'),
hash.subarray(8, 10).toString('hex'),
hash.subarray(10, 16).toString('hex'),
].join('-');
}
6 changes: 6 additions & 0 deletions src/embedding/dtos/embedding-count.dto.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import { ApiProperty } from '@nestjs/swagger';

export class EmbeddingCountDto {
@ApiProperty({ example: 42 })
public count!: number;
}
Loading
Loading