Technical Documentation

System Architecture Details 🏗️

Component Specifications

flowchart TB
    subgraph Raw["Raw Data Layer"]
        RD["R Data Files"]
        EM["Excel/CSV Files"]
    end

    subgraph Control["Control & Metadata Layer"]
        CT["PROCESS_CONTROL Table"]
        MD["Metadata Definitions"]
        Config["Configuration Settings"]
        
        CT -->|Validates| MD
        MD -->|Configures| Config
    end

    subgraph Bronze["Bronze Layer (Landing)"]
        BT["Bronze Tables"]
        BV["Bronze Views"]
        
        BT -->|Transforms| BV
    end

    subgraph Silver["Silver Layer (Standardized)"]
        ST["Silver Tables"]
        SV["Silver Views"]
        
        ST -->|Transforms| SV
    end

    subgraph Gold["Gold Layer (Business)"]
        GT["Gold Tables"]
        GV["Gold Views"]
        
        GT -->|Transforms| GV
    end

    %% Control Table Interactions
    CT -->|Tracks Ingestion| RD
    CT -->|Monitors Loading| EM
    CT -->|Records Status| BT
    CT -->|Validates Rules| ST
    CT -->|Ensures Quality| GT

    %% Data Flow
    RD -->|Load| BT
    EM -->|Load| BT
    BV -->|Clean & Standardize| ST
    SV -->|Business Logic| GT

    %% Metadata Influence
    MD -->|Schema Definition| BT
    MD -->|Data Standards| ST
    MD -->|Business Rules| GT
    
    %% Configuration Flow
    Config -->|Processing Rules| BT
    Config -->|Transformation Logic| ST
    Config -->|Aggregation Rules| GT

    classDef controlNode fill:#f9f,stroke:#333,stroke-width:2px
    classDef dataNode fill:#bbf,stroke:#333,stroke-width:2px
    classDef metaNode fill:#ffb,stroke:#333,stroke-width:2px

    class CT,MD,Config controlNode
    class RD,EM,BT,BV,ST,SV,GT,GV dataNode
    class Bronze,Silver,Gold metaNode

Database Schema 💾

Table Definitions

CREATE TABLE BRONZE_CATCH_DATA (
      ID INTEGER,  -- DuckDB will auto-increment
      MONTH INTEGER,
      YEAR INTEGER,
      LICENCE_NO VARCHAR,  -- DuckDB doesn't need length specifications
      HOLDERS_NAME VARCHAR,
      LOG_NO INTEGER,
      PAGE_NO INTEGER,
      NON_COLLECTING_CODE INTEGER,
      DATE VARCHAR,
      LOCATION_NAME VARCHAR,
      COLLECTION_TIME_HOURS DOUBLE,  -- Using DOUBLE instead of NUMERIC for DuckDB
      SPECIES VARCHAR,
      NUMBER_OF_DEAD_SPECIMENS INTEGER,
      NUMBER_OF_LIVE_SPECIMENS INTEGER,
      WEATHER_CONDITION VARCHAR,
      DATA_DATE DATE,
      COUNTRY VARCHAR,
      LOADING_DATETIME TIMESTAMP,
      SOURCE_FILE VARCHAR,
      PROCESSING_STATUS VARCHAR DEFAULT 'UNPROCESSED',
      ERROR_MESSAGE VARCHAR
    );

CREATE TABLE Trips (
    trip_id INT PRIMARY KEY,
    vessel_id INT FOREIGN KEY REFERENCES Vessels(vessel_id),
    start_date DATE,
    end_date DATE
);

🔙 Back to Implementation