In [ ]:
```
# SOURCE https://www.datacamp.com/community/tutorials/shell-commands-data-scientist
## Number of lines in file
wc -l filename.txt
## Number of words in a file
wc -w filename.txt
## Number of files in a folder
ls -l | wc -l
## Display top 2 lines of a file
head -n 2 filename.txt
## Display last two rows of a file
tail -n 2 filename.txt
## Extracting line from 101 to 120
# NOTE: new file will not contain header rows, refer next two queries
head -n 120 adult.csv | tail -n 20 > adult_sample.csv
## Insert a line in a file
echo "column1, column2, column3" > header.csv
## Concate two files in two
cat header.csv adult.data > adult.csv #inserting a header row adult.data file
## Regular expression
grep 'regex' filename
# For e.g.
grep ", ?," adult.csv | wc -l # Checking in file number of lines with at least one '?' symbol
## Replace a string with different string in bash
sed "s/<string to replace>/<string to replace it with>/g" source_filename > target_filename
# For e.g.
sed "s/, ?,/,,/g" adult.csv > adult.csv
## Unique Values
# uniq -c: which adds the repetition count to each line;
# uniq -d: which only outputs duplicate lines; And
# uniq -u: which only outputs unique lines
# For checking unique values
sort adult.csv | uniq -d | wc -l
# For showing top 3 repeated lines with count
# sort -r is for reverse sort
sort adult.csv | uniq -c | sort -r | head -n 3
## Selecting columns with cut
cut -d delimiter -f column_number filename
cut -d "," -f 2 adult.csv | head -3
## Looping
while true; do
_do something_ ;
done
# To replace space in filenames to underscore
replace_source=' '
replace_target='_'
for filename in ./*.csv; do
new_filename=${filename//$replace_source/$replace_target}
mv "$filename" "$new_filename"
done
## Variables
filename= 'file-1'
new_filename = 'file_1'
echo $filename
# To use variables in commands
mv "$filename" "$new_filename"
# query for opening a csv and replace a column value with some other and
# save it into a new Excel.
# "," : seperator
# FNR > 1: skip first row
# $2: row number
# print ; : print all columns
awk -F "," 'FNR > 1 {$2=0; print ;}' fortune500.csv > new503.csv
# Take just one column and save it in new csv
awk -F "," '{print $1}' fortune500.csv > new502.csv
```