Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 27 additions & 5 deletions .github/workflows/daily-scraper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,31 @@ jobs:
type = drive
scope = drive
service_account_file = /tmp/service-account-key.json
root_folder_id = https://drive.google.com/drive/folders/1nYUczTuBjUoaSa9cucpjQU8zkEojdHBp?usp=sharing
# Don't use root_folder_id - we'll specify full paths instead
EOF
echo '${{ secrets.GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON }}' > /tmp/service-account-key.json


- name: Download only metadata files
run: |
mkdir -p data/moe/metadata
# Only download JSON/CSV metadata files, not PDFs
rclone copy gdrive:metadata data/moe/metadata -v --drive-shared-with-me --include "*.json" --include "*.csv"

# List accessible folders first
echo "Listing accessible Drive folders..."
rclone lsd gdrive: --drive-shared-with-me

# Try accessing by name path instead of ID
echo "Attempting to access moe_data/metadata..."
rclone copy "gdrive:moe_data/metadata" data/moe/metadata -v --drive-shared-with-me --include "*.json" --include "*.csv" || echo "No metadata files yet"

- name: Upload updated metadata
run: |
if [ -d "data/moe/metadata" ] && [ "$(ls -A data/moe/metadata)" ]; then
echo "Uploading metadata files..."
rclone copy data/moe/metadata "gdrive:moe_data/metadata" -v --drive-shared-with-me
else
echo "No metadata directory or files to upload"
fi


- name: Install Python dependencies
Expand All @@ -59,13 +75,19 @@ jobs:

- name: Run MoE scraper (scrape + upload directly to Drive)
env:
RCLONE_REMOTE:gdrive:
RCLONE_REMOTE: "gdrive:moe_data/"
GOOGLE_DRIVE_MASTER_FOLDER_ID: ${{ secrets.GOOGLE_DRIVE_MASTER_FOLDER_ID }}
run: |
python backend/services/moe_scraper_service.py --all

- name: Upload updated metadata
run: |
rclone copy data/moe/metadata gdrive:metadata -v --drive-shared-with-me
if [ -d "data/moe/metadata" ] && [ "$(ls -A data/moe/metadata)" ]; then
echo "Uploading metadata files..."
rclone copy data/moe/metadata "gdrive:moe_data/metadata" -v --drive-shared-with-me
else
echo "No metadata directory or files to upload"
fi

- name: Upload scrape report
if: always()
Expand Down
10 changes: 4 additions & 6 deletions backend/api/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from backend.core.config import get_settings
from backend.api.routers import collections, documents, search, chat, upload, sync, scraper
from backend.api.routers import collections, documents, search, chat, upload, sync, scraper, files_server # Add files import
from backend.api.routers.parse_marker import router as parse_marker_router

settings = get_settings()
Expand All @@ -13,13 +13,10 @@
)

# CORS: allow your frontend origins in development
# CORS: allow your frontend origins in development
# Add CORS middleware for team access
app.add_middleware(
CORSMiddleware,
allow_origins=[
"http://localhost:3000",
"http://127.0.0.1:3000",
],
allow_origins=["*"], # In production, specify team member IPs/domains
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
Expand All @@ -43,6 +40,7 @@
app.include_router(parse_marker_router, prefix="/api/marker", tags=["Marker"])
app.include_router(sync.router, prefix="/api", tags=["Sync"])
app.include_router(scraper.router, prefix="/api", tags=["Scraper"])
app.include_router(files_server.router) # Add this line

@app.get("/")
async def root():
Expand Down
Loading