sudo locale-gen en_US.UTF-8
- soft nofile 100000
NOTE , if you use a stupid init like systemd you may need to change /etc/systemd/user.conf && system.conf
sysctl -p
sudo apt-get install postgresql sudo apt-get install apache2 sudo apt-get install git sudo apt-get install nodejs sudo apt-get install xfsprogs sudo apt-get install g++ sudo apt-get install build-essential sudo apt-get install libpqxx-dev sudo apt-get install libasio-dev sudo apt-get install rapidjson-dev sudo apt-get install screen sudo apt-get install npm sudo apt-get install python3-pip
ls -l /dev/disk/by-uuid/
echo 'UUID=9c811e51-9e6e-441c-8508-80791411c468 /nvme xfs defaults 0 2' >> /etc/fstab
sudo mkdir /nvme sudo mkfs.xfs /dev/nvme0n1 sudo mount /nvme
sudo mkdir /nvme/index sudo chown compdeep:compdeep /nvme/index sudo chmod 755 /nvme/index
sudo mkdir -p /nvme/postgresql/11/main/ sudo chown -R postgres:postgres /nvme/postgresql sudo chmod -R 755 /nvme/postgresql
git clone https://github.com/cormacguerin/lepton.git cd lepton
ln -s /nvme/index/ index
npm install
cd vue-app
Vue.prototype.$SERVER_URI = 'https://compdeep.customer.com'
npm install npm run build
cd ..
make
- indexroot (will read the documents from postgres and convert them in a reverse index in the index directory (it's pretty slow))
these are standalone applications that will run, I have yet to create a management script to start and stop them.
screen ./indexroot screen ./serveroot
sudo su - postgres
/usr/lib/postgresql/11/bin/initdb -D /nvme/postgresql/11/main/
createdb admin createdb index
cat /home/compdeep/lepton/server/admin_schema.psql |psql -d admin cat /home/compdeep/lepton/server/index_schema.psql |psql -d index
psql ALTER USER postgres PASSWORD '0fi1hakfpmaac1zmcx9nfa'; \q
exit
echo '0fi1hakfpmaac1zmcx9nfa' > dbpassword
node lepton.js
we are almost complete at this stage and we can actually run the app now but if we want to serve publically
we need to route the node traffic, I'm using apache reverse proxy to do this in procutions, if you are running locally
sudo su -
a2enmod proxy a2enmod proxy_http a2enmod proxy_ajp a2enmod rewrite a2enmod deflate a2enmod headers a2enmod proxy_balancer a2enmod proxy_connect a2enmod proxy_html
cp /home/compdeep/lepton/server/apache2.conf /etc/apache2/
mkdir /etc/apache2/cas/
openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -days 365 cp cert.pem /etc/apache2/cas/compdeep.pem cp cert.pem /etc/apache2/cas/compdeep.crt cp key.pem /etc/apache2/cas/compdeep.pem
Redirect permanent / https://34.67.102.230/
# ServerName www.compdeep.com
ProxyPass / http://127.0.0.1:3000/ Keepalive=On
ProxyPassReverse / http://127.0.0.1:3000/
https://www.digitalocean.com/community/tutorials/how-to-use-apache-http-server-as-reverse-proxy-using-mod_proxy-extension
service apache2 restart
sudo npm install pm2@latest -g pm2 startup
sudo env PATH=$PATH:/usr/bin /usr/local/lib/node_modules/pm2/bin/pm2 startup systemd -u compdeep --hp /home/compdeep
-
testing indexing api. curl -H "Content-Type: application/json" -X POST --data "@testdocs.json" '127.0.0.1:3000/addDocument?type=content'
-
testing query api curl 'https://35.239.29.200/search?query=test%20timing%202' -H 'accept: application/json' -H 'Connection: keep-alive' -H 'Accept-Encoding: gzip, deflate, br' -H 'Referer: https://35.239.29.200/' -H 'Accept-Language: en-US,en;q=0.9' -H 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36' --compressed --insecure -s -o /dev/null -w "%{time_starttransfer}\n"
-
there is also a web crawler add your start urls (start.urls) and start url follow patters (patterns.urls)
-
to start the crawler type the below (ensure node lepton.js is running first!) python crawler.py
Installing pytorch (depending on gpu or not the instructions may change see here for up to date instructions.)
pip3 install torch==1.5.1+cpu torchvision==0.6.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
-
export the vocabulary. ./main
-
train the model on the exported vocab
-
english example (you need to build the unigram and word models for english, for CJK just unigram) spm_train --input=rawvocab.txt --model_type=unigram --model_prefix=unigram --model_type=unigram --vocab_size=10000 spm_train --input=rawvocab.txt --model_type=word --model_prefix=unigram --model_type=word --vocab_size=10000
-
make an alphabetically sorted copy of these sort word.vocab > word.vocab.sorted sort unigram.vocab > unigram.vocab.sorted
-
build synonyms with electron. ./electron word.vocab.sorted
g++ -o ngrams ngrams.cc /usr/bin/icu-config --ldflags --cppflags
apt-get install libicu57 apt-get install libicu-dev apt-get install postgresql postgresql-server-dev-10 postgresql-server-dev-all postgresql-client
git clone https://github.com/Tencent/rapidjson.git cd rapidjson/ cmake . make make install cd ..
git clone https://github.com/jtv/libpqxx.git cd libpqxx/ cmake -DPostgreSQL_TYPE_INCLUDE_DIR=/usr/include/postgresql/ ./configure make make install cd ..
tere is a react frontend in web-app to build it enter the directory and execute.. npm install npm run build
you can then restart nodejs and access at 127.0.0.1:3000