CA 3 hive

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 4

Step 1: Define the Web Log Table

CREATE TABLE web_logs (


ip_address STRING,
timestamp STRING,
request_method STRING,
url STRING,
response_code INT,
user_agent STRING
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t' -- Assuming tab-delimited log file, adjust if different
STORED AS TEXTFILE;

Step 2: Load Data into the Web Logs Table


LOAD DATA INPATH '/path/to/your/logfile' INTO TABLE web_logs;
file:///C:/Users/JEEVAN%20%20N/Desktop/web_server_logs.csv

Step 3: Find the Top 10 IPs by Number of Requests


SELECT ip_address, COUNT(*) AS request_count
FROM web_logs
GROUP BY ip_address
ORDER BY request_count DESC
LIMIT 10;

DATASET:

ip_address,timestamp,request_method,url,response_code,user_agent
192.168.1.1,2024-12-03 08:15:23,GET,/home,200,"Mozilla/5.0 (Windows NT 10.0; Win64;
x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36"
192.168.1.2,2024-12-03 08:16:12,POST,/login,302,"Mozilla/5.0 (Windows NT 10.0;
Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121
Safari/537.36"
192.168.1.3,2024-12-03 08:17:55,GET,/products,200,"Mozilla/5.0 (Windows NT 10.0;
Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Firefox/84.0"
192.168.1.4,2024-12-03 08:18:30,GET,/contact,200,"Mozilla/5.0 (Macintosh; Intel Mac
OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36"
192.168.1.1,2024-12-03 08:20:10,GET,/about,200,"Mozilla/5.0 (Windows NT 10.0;
Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82
Safari/537.36"
192.168.1.2,2024-12-03 08:21:05,GET,/home,200,"Mozilla/5.0 (Windows NT 10.0; Win64;
x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
192.168.1.5,2024-12-03 08:23:15,POST,/register,400,"Mozilla/5.0 (X11; Ubuntu; Linux
x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36"
192.168.1.3,2024-12-03 08:25:40,GET,/products/123,404,"Mozilla/5.0 (Windows NT
10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Firefox/85.0"
192.168.1.4,2024-12-03 08:27:00,GET,/home,200,"Mozilla/5.0 (Macintosh; Intel Mac OS
X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36"
192.168.1.2,2024-12-03 08:28:50,GET,/profile,403,"Mozilla/5.0 (Windows NT 10.0;
Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124
Safari/537.36"
192.168.1.1,2024-12-03 08:30:15,POST,/login,200,"Mozilla/5.0 (Windows NT 10.0;
Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97
Safari/537.36"
192.168.1.5,2024-12-03 08:31:45,GET,/home,500,"Mozilla/5.0 (X11; Ubuntu; Linux
x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36"
Save the file with the .csv extension (e.g., web_server_logs.csv).

to create a shared file in cloudera go to :


1 devices
2 shared clipboard

[cloudera@quickstart ~]$ su
Password: cloudera
[root@quickstart cloudera]# mount -t vboxsf shared Desktop/shared

CLOUDERA EXECUTION

cloudera@quickstart ~]$ hive


24/12/03 00:28:06 WARN : Your hostname, quickstart.cloudera resolves to a
loopback/non-reachable address: 127.0.0.1, but we couldn't find any external IP
address!
24/12/03 00:28:08 WARN : Your hostname, quickstart.cloudera resolves to a
loopback/non-reachable address: 127.0.0.1, but we couldn't find any external IP
address!

Logging initialized using configuration in jar:file:/usr/lib/hive/lib/hive-common-


1.1.0-cdh5.13.0.jar!/hive-log4j.properties
WARNING: Hive CLI is deprecated and migration to Beeline is recommended.
hive> show tables;
OK
web_logs
Time taken: 1.849 seconds, Fetched: 1 row(s)
hive> use web_logs;
FAILED: SemanticException [Error 10072]: Database does not exist: web_logs
hive> show databases;
OK
default
demo
student1
student_3032
Time taken: 0.038 seconds, Fetched: 4 row(s)
hive> use student_3032;
OK
Time taken: 0.116 seconds
hive> show tables;
OK
student
Time taken: 0.02 seconds, Fetched: 1 row(s)
hive> use student1;
OK
Time taken: 0.024 seconds
hive> show tables;
OK
Time taken: 0.019 seconds
hive> use demo;
OK
Time taken: 0.026 seconds
hive> show tables;
OK
student1
Time taken: 0.023 seconds, Fetched: 1 row(s)
hive> use default;
OK
Time taken: 0.033 seconds
hive> show tables;
OK
web_logs
Time taken: 0.022 seconds, Fetched: 1 row(s)
hive> load data local inpath "Desktop/web_server_logs.csv into table web_logs;
MismatchedTokenException(26!=307)
at
org.antlr.runtime.BaseRecognizer.recoverFromMismatchedToken(BaseRecognizer.java:617
)
at org.antlr.runtime.BaseRecognizer.match(BaseRecognizer.java:115)
at
org.apache.hadoop.hive.ql.parse.HiveParser.loadStatement(HiveParser.java:1738)
at
org.apache.hadoop.hive.ql.parse.HiveParser.execStatement(HiveParser.java:1544)
at org.apache.hadoop.hive.ql.parse.HiveParser.statement(HiveParser.java:1065)
at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:201)
at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:166)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:522)
at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1356)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1473)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1285)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1275)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:226)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:175)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:389)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:781)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:699)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:634)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:4
3)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
FAILED: ParseException line 1:24 mismatched input 'Desktop' expecting StringLiteral
near 'inpath' in load statement
hive> load data local inpath "Desktop/web_server_logs.csv" into table web_logs;
Loading data to table default.web_logs
Table default.web_logs stats: [numFiles=1, totalSize=1958]
OK
Time taken: 1.798 seconds
hive> select * from web_logs;
OK
ip_address,timestamp,request_method,url,response_code,user_agent NULL NULL NULL
NULL NULL
192.168.1.1,03-12-2024 08:15,GET,/home,200,"Mozilla/5.0 (Windows NT 10.0; Win64;
x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36"NULL
NULL NULL NULL NULL
192.168.1.2,03-12-2024 08:16,POST,/login,302,"Mozilla/5.0 (Windows NT 10.0; Win64;
x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"
NULL NULL NULL NULL NULL
192.168.1.3,03-12-2024 08:17,GET,/products,200,"Mozilla/5.0 (Windows NT 10.0;
Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Firefox/84.0" NULL NULL NULL
NULL NULL
192.168.1.4,03-12-2024 08:18,GET,/contact,200,"Mozilla/5.0 (Macintosh; Intel Mac OS
X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36" NULL NULL NULL
NULL NULL
192.168.1.1,03-12-2024 08:20,GET,/about,200,"Mozilla/5.0 (Windows NT 10.0; Win64;
x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"NULL
NULL NULL NULL NULL
192.168.1.2,03-12-2024 08:21,GET,/home,200,"Mozilla/5.0 (Windows NT 10.0; Win64;
x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"NULL
NULL NULL NULL NULL
192.168.1.5,03-12-2024 08:23,POST,/register,400,"Mozilla/5.0 (X11; Ubuntu; Linux
x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36"
NULL NULL NULL NULL NULL
192.168.1.3,03-12-2024 08:25,GET,/products/123,404,"Mozilla/5.0 (Windows NT 10.0;
Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Firefox/85.0" NULL NULL NULL
NULL NULL
192.168.1.4,03-12-2024 08:27,GET,/home,200,"Mozilla/5.0 (Macintosh; Intel Mac OS X
10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36" NULL NULL NULL
NULL NULL
192.168.1.2,03-12-2024 08:28,GET,/profile,403,"Mozilla/5.0 (Windows NT 10.0; Win64;
x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
NULL NULL NULL NULL NULL
192.168.1.1,03-12-2024 08:30,POST,/login,200,"Mozilla/5.0 (Windows NT 10.0; Win64;
x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36"
NULL NULL NULL NULL NULL
192.168.1.5,03-12-2024 08:31,GET,/home,500,"Mozilla/5.0 (X11; Ubuntu; Linux x86_64)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36" NULL
NULL NULL NULL NULL
Time taken: 0.381 seconds, Fetched: 13 row(s)
hive>

You might also like