$ mkdir data
$ copy file to data folder
$ ls ./data
$ dvc add ./data/Mall_Customers.csv
$ cat ./data/Mall_Customers.csv.dvc
$ git add data/Mall_Customers.csv.dvc data/.gitignore
$ git commit -m "data: track"
$ git tag -a 'v1' -m "raw data"
$ git push origin main --tags
$ dvc push
Updating the Data Source [Removing lines from CSV]
$ dvc add ./data/Mall_Customers.csv
$ git add ./data/Mall_Customers.csv.dvc
$ git tag -a 'v2' -m 'removed 50 lines'
$ git push origin main --tags
$ dvc push
$ git add .
$ git commit -m 'adding second version tag of csv'
$ git push origin main --tags
$ dvc remote add -d dvc-PC-remote '../../Remote_Data'
$ dvc remote list
$ git add .dvc/config
$ git commit -m 'adding another DVC_REPO'
$ git push origin
RUN the Project
# Clone this project
$ git clone https://github.com/mohamedelmesawy/RAM_Project_DS_PROD.git
# Start ML-FLow Server
$ mlflow ui
# Run the FLASK Linear Regression Application
$ python ./main.py
# Run the ML Pipeline [GIT + DVC + MLFlow]
$ python ./ML_Pipeline.py