<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://air.imag.fr/index.php?action=history&amp;feed=atom&amp;title=Spark%2FDataFrame</id>
	<title>Spark/DataFrame - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://air.imag.fr/index.php?action=history&amp;feed=atom&amp;title=Spark%2FDataFrame"/>
	<link rel="alternate" type="text/html" href="https://air.imag.fr/index.php?title=Spark/DataFrame&amp;action=history"/>
	<updated>2026-06-16T20:10:46Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.39.17</generator>
	<entry>
		<id>https://air.imag.fr/index.php?title=Spark/DataFrame&amp;diff=30156&amp;oldid=prev</id>
		<title>Donsez: Created page with &quot; http://spark.apache.org/docs/latest/sql-programming-guide.html  &lt;source lang=&quot;scala&quot;&gt; val sc: SparkContext // An existing SparkContext. val sqlContext = new org.apache.spark....&quot;</title>
		<link rel="alternate" type="text/html" href="https://air.imag.fr/index.php?title=Spark/DataFrame&amp;diff=30156&amp;oldid=prev"/>
		<updated>2016-05-25T19:25:10Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot; http://spark.apache.org/docs/latest/sql-programming-guide.html  &amp;lt;source lang=&amp;quot;scala&amp;quot;&amp;gt; val sc: SparkContext // An existing SparkContext. val sqlContext = new org.apache.spark....&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;&lt;br /&gt;
http://spark.apache.org/docs/latest/sql-programming-guide.html&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;scala&amp;quot;&amp;gt;&lt;br /&gt;
val sc: SparkContext // An existing SparkContext.&lt;br /&gt;
val sqlContext = new org.apache.spark.sql.SQLContext(sc)&lt;br /&gt;
&lt;br /&gt;
// Create the DataFrame&lt;br /&gt;
val df = sqlContext.read.json(&amp;quot;examples/src/main/resources/people.json&amp;quot;)&lt;br /&gt;
&lt;br /&gt;
// Show the content of the DataFrame&lt;br /&gt;
df.show()&lt;br /&gt;
// age  name&lt;br /&gt;
// null Michael&lt;br /&gt;
// 30   Andy&lt;br /&gt;
// 19   Justin&lt;br /&gt;
&lt;br /&gt;
// Print the schema in a tree format&lt;br /&gt;
df.printSchema()&lt;br /&gt;
// root&lt;br /&gt;
// |-- age: long (nullable = true)&lt;br /&gt;
// |-- name: string (nullable = true)&lt;br /&gt;
&lt;br /&gt;
// Select only the &amp;quot;name&amp;quot; column&lt;br /&gt;
df.select(&amp;quot;name&amp;quot;).show()&lt;br /&gt;
// name&lt;br /&gt;
// Michael&lt;br /&gt;
// Andy&lt;br /&gt;
// Justin&lt;br /&gt;
&lt;br /&gt;
// Select everybody, but increment the age by 1&lt;br /&gt;
df.select(df(&amp;quot;name&amp;quot;), df(&amp;quot;age&amp;quot;) + 1).show()&lt;br /&gt;
// name    (age + 1)&lt;br /&gt;
// Michael null&lt;br /&gt;
// Andy    31&lt;br /&gt;
// Justin  20&lt;br /&gt;
&lt;br /&gt;
// Select people older than 21&lt;br /&gt;
df.filter(df(&amp;quot;age&amp;quot;) &amp;gt; 21).show()&lt;br /&gt;
// age name&lt;br /&gt;
// 30  Andy&lt;br /&gt;
&lt;br /&gt;
// Count people by age&lt;br /&gt;
df.groupBy(&amp;quot;age&amp;quot;).count().show()&lt;br /&gt;
// age  count&lt;br /&gt;
// null 1&lt;br /&gt;
// 19   1&lt;br /&gt;
// 30   1&lt;br /&gt;
&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;/div&gt;</summary>
		<author><name>Donsez</name></author>
	</entry>
</feed>